diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,134433 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.8003001125422033, + "eval_steps": 500, + "global_step": 19200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 4.1682297528239755e-05, + "grad_norm": 64.0, + "learning_rate": 1.388888888888889e-07, + "loss": 8.6274, + "step": 1 + }, + { + "epoch": 8.336459505647951e-05, + "grad_norm": 167.0, + "learning_rate": 2.777777777777778e-07, + "loss": 13.5009, + "step": 2 + }, + { + "epoch": 0.00012504689258471927, + "grad_norm": 210.0, + "learning_rate": 4.1666666666666667e-07, + "loss": 15.8188, + "step": 3 + }, + { + "epoch": 0.00016672919011295902, + "grad_norm": 80.5, + "learning_rate": 5.555555555555556e-07, + "loss": 13.8132, + "step": 4 + }, + { + "epoch": 0.00020841148764119878, + "grad_norm": 90.0, + "learning_rate": 6.944444444444445e-07, + "loss": 18.0005, + "step": 5 + }, + { + "epoch": 0.00025009378516943853, + "grad_norm": 66.0, + "learning_rate": 8.333333333333333e-07, + "loss": 13.815, + "step": 6 + }, + { + "epoch": 0.0002917760826976783, + "grad_norm": 73.5, + "learning_rate": 9.722222222222222e-07, + "loss": 12.3135, + "step": 7 + }, + { + "epoch": 0.00033345838022591804, + "grad_norm": 138.0, + "learning_rate": 1.1111111111111112e-06, + "loss": 16.5008, + "step": 8 + }, + { + "epoch": 0.0003751406777541578, + "grad_norm": 308.0, + "learning_rate": 1.25e-06, + "loss": 34.756, + "step": 9 + }, + { + "epoch": 0.00041682297528239755, + "grad_norm": 80.0, + "learning_rate": 1.388888888888889e-06, + "loss": 13.4411, + "step": 10 + }, + { + "epoch": 0.0004585052728106373, + "grad_norm": 156.0, + "learning_rate": 1.5277777777777778e-06, + "loss": 24.751, + "step": 11 + }, + { + "epoch": 0.0005001875703388771, + "grad_norm": 81.5, + "learning_rate": 1.6666666666666667e-06, + "loss": 17.0024, + "step": 12 + }, + { + "epoch": 0.0005418698678671169, + "grad_norm": 139.0, + "learning_rate": 1.8055555555555555e-06, + "loss": 28.7525, + "step": 13 + }, + { + "epoch": 0.0005835521653953566, + "grad_norm": 78.0, + "learning_rate": 1.9444444444444444e-06, + "loss": 13.9386, + "step": 14 + }, + { + "epoch": 0.0006252344629235964, + "grad_norm": 59.5, + "learning_rate": 2.0833333333333334e-06, + "loss": 10.6903, + "step": 15 + }, + { + "epoch": 0.0006669167604518361, + "grad_norm": 88.0, + "learning_rate": 2.2222222222222225e-06, + "loss": 17.1271, + "step": 16 + }, + { + "epoch": 0.0007085990579800759, + "grad_norm": 124.0, + "learning_rate": 2.361111111111111e-06, + "loss": 20.5015, + "step": 17 + }, + { + "epoch": 0.0007502813555083156, + "grad_norm": 72.0, + "learning_rate": 2.5e-06, + "loss": 13.4388, + "step": 18 + }, + { + "epoch": 0.0007919636530365554, + "grad_norm": 83.5, + "learning_rate": 2.638888888888889e-06, + "loss": 16.3777, + "step": 19 + }, + { + "epoch": 0.0008336459505647951, + "grad_norm": 164.0, + "learning_rate": 2.777777777777778e-06, + "loss": 19.3761, + "step": 20 + }, + { + "epoch": 0.0008753282480930349, + "grad_norm": 52.25, + "learning_rate": 2.916666666666667e-06, + "loss": 10.1263, + "step": 21 + }, + { + "epoch": 0.0009170105456212746, + "grad_norm": 103.0, + "learning_rate": 3.0555555555555556e-06, + "loss": 11.8139, + "step": 22 + }, + { + "epoch": 0.0009586928431495144, + "grad_norm": 132.0, + "learning_rate": 3.1944444444444443e-06, + "loss": 24.6261, + "step": 23 + }, + { + "epoch": 0.0010003751406777541, + "grad_norm": 98.0, + "learning_rate": 3.3333333333333333e-06, + "loss": 16.5008, + "step": 24 + }, + { + "epoch": 0.001042057438205994, + "grad_norm": 144.0, + "learning_rate": 3.4722222222222224e-06, + "loss": 21.126, + "step": 25 + }, + { + "epoch": 0.0010837397357342337, + "grad_norm": 123.5, + "learning_rate": 3.611111111111111e-06, + "loss": 21.0022, + "step": 26 + }, + { + "epoch": 0.0011254220332624733, + "grad_norm": 75.0, + "learning_rate": 3.75e-06, + "loss": 13.439, + "step": 27 + }, + { + "epoch": 0.0011671043307907132, + "grad_norm": 117.5, + "learning_rate": 3.888888888888889e-06, + "loss": 19.8756, + "step": 28 + }, + { + "epoch": 0.001208786628318953, + "grad_norm": 224.0, + "learning_rate": 4.027777777777779e-06, + "loss": 24.5091, + "step": 29 + }, + { + "epoch": 0.0012504689258471928, + "grad_norm": 59.75, + "learning_rate": 4.166666666666667e-06, + "loss": 11.6892, + "step": 30 + }, + { + "epoch": 0.0012921512233754324, + "grad_norm": 160.0, + "learning_rate": 4.305555555555556e-06, + "loss": 15.5017, + "step": 31 + }, + { + "epoch": 0.0013338335209036722, + "grad_norm": 70.0, + "learning_rate": 4.444444444444445e-06, + "loss": 10.565, + "step": 32 + }, + { + "epoch": 0.001375515818431912, + "grad_norm": 102.5, + "learning_rate": 4.583333333333333e-06, + "loss": 19.2535, + "step": 33 + }, + { + "epoch": 0.0014171981159601518, + "grad_norm": 150.0, + "learning_rate": 4.722222222222222e-06, + "loss": 20.6273, + "step": 34 + }, + { + "epoch": 0.0014588804134883914, + "grad_norm": 104.0, + "learning_rate": 4.861111111111111e-06, + "loss": 7.721, + "step": 35 + }, + { + "epoch": 0.0015005627110166312, + "grad_norm": 308.0, + "learning_rate": 5e-06, + "loss": 19.5012, + "step": 36 + }, + { + "epoch": 0.001542245008544871, + "grad_norm": 112.0, + "learning_rate": 5.138888888888889e-06, + "loss": 17.5013, + "step": 37 + }, + { + "epoch": 0.0015839273060731108, + "grad_norm": 211.0, + "learning_rate": 5.277777777777778e-06, + "loss": 19.0028, + "step": 38 + }, + { + "epoch": 0.0016256096036013504, + "grad_norm": 111.0, + "learning_rate": 5.416666666666667e-06, + "loss": 20.0022, + "step": 39 + }, + { + "epoch": 0.0016672919011295902, + "grad_norm": 374.0, + "learning_rate": 5.555555555555556e-06, + "loss": 36.5015, + "step": 40 + }, + { + "epoch": 0.00170897419865783, + "grad_norm": 93.0, + "learning_rate": 5.694444444444445e-06, + "loss": 15.6892, + "step": 41 + }, + { + "epoch": 0.0017506564961860698, + "grad_norm": 239.0, + "learning_rate": 5.833333333333334e-06, + "loss": 17.8794, + "step": 42 + }, + { + "epoch": 0.0017923387937143094, + "grad_norm": 200.0, + "learning_rate": 5.972222222222223e-06, + "loss": 15.065, + "step": 43 + }, + { + "epoch": 0.0018340210912425492, + "grad_norm": 231.0, + "learning_rate": 6.111111111111111e-06, + "loss": 13.1886, + "step": 44 + }, + { + "epoch": 0.001875703388770789, + "grad_norm": 91.0, + "learning_rate": 6.25e-06, + "loss": 17.001, + "step": 45 + }, + { + "epoch": 0.0019173856862990289, + "grad_norm": 129.0, + "learning_rate": 6.3888888888888885e-06, + "loss": 16.6274, + "step": 46 + }, + { + "epoch": 0.0019590679838272685, + "grad_norm": 134.0, + "learning_rate": 6.5277777777777784e-06, + "loss": 24.1271, + "step": 47 + }, + { + "epoch": 0.0020007502813555083, + "grad_norm": 176.0, + "learning_rate": 6.666666666666667e-06, + "loss": 21.5012, + "step": 48 + }, + { + "epoch": 0.002042432578883748, + "grad_norm": 71.5, + "learning_rate": 6.805555555555556e-06, + "loss": 11.0633, + "step": 49 + }, + { + "epoch": 0.002084114876411988, + "grad_norm": 90.5, + "learning_rate": 6.944444444444445e-06, + "loss": 14.4389, + "step": 50 + }, + { + "epoch": 0.0021257971739402277, + "grad_norm": 128.0, + "learning_rate": 7.083333333333334e-06, + "loss": 11.4404, + "step": 51 + }, + { + "epoch": 0.0021674794714684675, + "grad_norm": 48.0, + "learning_rate": 7.222222222222222e-06, + "loss": 8.3156, + "step": 52 + }, + { + "epoch": 0.0022091617689967073, + "grad_norm": 100.0, + "learning_rate": 7.361111111111112e-06, + "loss": 15.564, + "step": 53 + }, + { + "epoch": 0.0022508440665249467, + "grad_norm": 123.5, + "learning_rate": 7.5e-06, + "loss": 10.6887, + "step": 54 + }, + { + "epoch": 0.0022925263640531865, + "grad_norm": 242.0, + "learning_rate": 7.63888888888889e-06, + "loss": 18.8776, + "step": 55 + }, + { + "epoch": 0.0023342086615814263, + "grad_norm": 98.0, + "learning_rate": 7.777777777777777e-06, + "loss": 16.2522, + "step": 56 + }, + { + "epoch": 0.002375890959109666, + "grad_norm": 234.0, + "learning_rate": 7.916666666666667e-06, + "loss": 18.0009, + "step": 57 + }, + { + "epoch": 0.002417573256637906, + "grad_norm": 76.5, + "learning_rate": 8.055555555555557e-06, + "loss": 12.1889, + "step": 58 + }, + { + "epoch": 0.0024592555541661457, + "grad_norm": 149.0, + "learning_rate": 8.194444444444445e-06, + "loss": 17.1265, + "step": 59 + }, + { + "epoch": 0.0025009378516943855, + "grad_norm": 61.75, + "learning_rate": 8.333333333333334e-06, + "loss": 8.689, + "step": 60 + }, + { + "epoch": 0.0025426201492226254, + "grad_norm": 228.0, + "learning_rate": 8.472222222222223e-06, + "loss": 16.1302, + "step": 61 + }, + { + "epoch": 0.0025843024467508647, + "grad_norm": 74.5, + "learning_rate": 8.611111111111112e-06, + "loss": 12.0014, + "step": 62 + }, + { + "epoch": 0.0026259847442791045, + "grad_norm": 146.0, + "learning_rate": 8.75e-06, + "loss": 19.7507, + "step": 63 + }, + { + "epoch": 0.0026676670418073443, + "grad_norm": 296.0, + "learning_rate": 8.88888888888889e-06, + "loss": 21.3808, + "step": 64 + }, + { + "epoch": 0.002709349339335584, + "grad_norm": 172.0, + "learning_rate": 9.027777777777777e-06, + "loss": 14.8174, + "step": 65 + }, + { + "epoch": 0.002751031636863824, + "grad_norm": 119.0, + "learning_rate": 9.166666666666666e-06, + "loss": 5.5344, + "step": 66 + }, + { + "epoch": 0.0027927139343920638, + "grad_norm": 114.0, + "learning_rate": 9.305555555555555e-06, + "loss": 16.6262, + "step": 67 + }, + { + "epoch": 0.0028343962319203036, + "grad_norm": 80.5, + "learning_rate": 9.444444444444445e-06, + "loss": 10.0658, + "step": 68 + }, + { + "epoch": 0.0028760785294485434, + "grad_norm": 190.0, + "learning_rate": 9.583333333333334e-06, + "loss": 12.2508, + "step": 69 + }, + { + "epoch": 0.0029177608269767828, + "grad_norm": 88.5, + "learning_rate": 9.722222222222223e-06, + "loss": 13.44, + "step": 70 + }, + { + "epoch": 0.0029594431245050226, + "grad_norm": 210.0, + "learning_rate": 9.861111111111112e-06, + "loss": 6.9399, + "step": 71 + }, + { + "epoch": 0.0030011254220332624, + "grad_norm": 127.0, + "learning_rate": 1e-05, + "loss": 17.0012, + "step": 72 + }, + { + "epoch": 0.003042807719561502, + "grad_norm": 322.0, + "learning_rate": 1.013888888888889e-05, + "loss": 10.1906, + "step": 73 + }, + { + "epoch": 0.003084490017089742, + "grad_norm": 233.0, + "learning_rate": 1.0277777777777777e-05, + "loss": 27.6306, + "step": 74 + }, + { + "epoch": 0.003126172314617982, + "grad_norm": 158.0, + "learning_rate": 1.0416666666666668e-05, + "loss": 24.5016, + "step": 75 + }, + { + "epoch": 0.0031678546121462216, + "grad_norm": 99.0, + "learning_rate": 1.0555555555555555e-05, + "loss": 12.003, + "step": 76 + }, + { + "epoch": 0.0032095369096744614, + "grad_norm": 181.0, + "learning_rate": 1.0694444444444444e-05, + "loss": 15.8187, + "step": 77 + }, + { + "epoch": 0.003251219207202701, + "grad_norm": 144.0, + "learning_rate": 1.0833333333333334e-05, + "loss": 10.1267, + "step": 78 + }, + { + "epoch": 0.0032929015047309406, + "grad_norm": 138.0, + "learning_rate": 1.0972222222222223e-05, + "loss": 19.5013, + "step": 79 + }, + { + "epoch": 0.0033345838022591804, + "grad_norm": 282.0, + "learning_rate": 1.1111111111111112e-05, + "loss": 29.2517, + "step": 80 + }, + { + "epoch": 0.0033762660997874202, + "grad_norm": 68.5, + "learning_rate": 1.125e-05, + "loss": 9.6898, + "step": 81 + }, + { + "epoch": 0.00341794839731566, + "grad_norm": 132.0, + "learning_rate": 1.138888888888889e-05, + "loss": 16.1259, + "step": 82 + }, + { + "epoch": 0.0034596306948439, + "grad_norm": 202.0, + "learning_rate": 1.1527777777777779e-05, + "loss": 17.5011, + "step": 83 + }, + { + "epoch": 0.0035013129923721397, + "grad_norm": 87.5, + "learning_rate": 1.1666666666666668e-05, + "loss": 9.8779, + "step": 84 + }, + { + "epoch": 0.0035429952899003795, + "grad_norm": 115.0, + "learning_rate": 1.1805555555555555e-05, + "loss": 16.1263, + "step": 85 + }, + { + "epoch": 0.003584677587428619, + "grad_norm": 180.0, + "learning_rate": 1.1944444444444446e-05, + "loss": 7.9394, + "step": 86 + }, + { + "epoch": 0.0036263598849568587, + "grad_norm": 79.5, + "learning_rate": 1.2083333333333333e-05, + "loss": 10.2515, + "step": 87 + }, + { + "epoch": 0.0036680421824850985, + "grad_norm": 410.0, + "learning_rate": 1.2222222222222222e-05, + "loss": 27.6259, + "step": 88 + }, + { + "epoch": 0.0037097244800133383, + "grad_norm": 241.0, + "learning_rate": 1.2361111111111112e-05, + "loss": 21.6272, + "step": 89 + }, + { + "epoch": 0.003751406777541578, + "grad_norm": 131.0, + "learning_rate": 1.25e-05, + "loss": 13.9388, + "step": 90 + }, + { + "epoch": 0.003793089075069818, + "grad_norm": 163.0, + "learning_rate": 1.263888888888889e-05, + "loss": 19.3761, + "step": 91 + }, + { + "epoch": 0.0038347713725980577, + "grad_norm": 146.0, + "learning_rate": 1.2777777777777777e-05, + "loss": 17.3761, + "step": 92 + }, + { + "epoch": 0.0038764536701262975, + "grad_norm": 136.0, + "learning_rate": 1.2916666666666668e-05, + "loss": 15.5679, + "step": 93 + }, + { + "epoch": 0.003918135967654537, + "grad_norm": 164.0, + "learning_rate": 1.3055555555555557e-05, + "loss": 15.5639, + "step": 94 + }, + { + "epoch": 0.003959818265182777, + "grad_norm": 360.0, + "learning_rate": 1.3194444444444446e-05, + "loss": 27.88, + "step": 95 + }, + { + "epoch": 0.0040015005627110165, + "grad_norm": 320.0, + "learning_rate": 1.3333333333333333e-05, + "loss": 28.1302, + "step": 96 + }, + { + "epoch": 0.004043182860239257, + "grad_norm": 119.5, + "learning_rate": 1.3472222222222222e-05, + "loss": 14.3772, + "step": 97 + }, + { + "epoch": 0.004084865157767496, + "grad_norm": 101.5, + "learning_rate": 1.3611111111111111e-05, + "loss": 12.0642, + "step": 98 + }, + { + "epoch": 0.0041265474552957355, + "grad_norm": 128.0, + "learning_rate": 1.3750000000000002e-05, + "loss": 15.5011, + "step": 99 + }, + { + "epoch": 0.004168229752823976, + "grad_norm": 131.0, + "learning_rate": 1.388888888888889e-05, + "loss": 13.4389, + "step": 100 + }, + { + "epoch": 0.004209912050352215, + "grad_norm": 129.0, + "learning_rate": 1.4027777777777779e-05, + "loss": 11.8762, + "step": 101 + }, + { + "epoch": 0.004251594347880455, + "grad_norm": 191.0, + "learning_rate": 1.4166666666666668e-05, + "loss": 16.3772, + "step": 102 + }, + { + "epoch": 0.004293276645408695, + "grad_norm": 126.5, + "learning_rate": 1.4305555555555555e-05, + "loss": 14.3142, + "step": 103 + }, + { + "epoch": 0.004334958942936935, + "grad_norm": 119.0, + "learning_rate": 1.4444444444444444e-05, + "loss": 13.128, + "step": 104 + }, + { + "epoch": 0.004376641240465174, + "grad_norm": 178.0, + "learning_rate": 1.4583333333333335e-05, + "loss": 14.1896, + "step": 105 + }, + { + "epoch": 0.004418323537993415, + "grad_norm": 224.0, + "learning_rate": 1.4722222222222224e-05, + "loss": 10.8161, + "step": 106 + }, + { + "epoch": 0.004460005835521654, + "grad_norm": 255.0, + "learning_rate": 1.4861111111111111e-05, + "loss": 18.3769, + "step": 107 + }, + { + "epoch": 0.004501688133049893, + "grad_norm": 149.0, + "learning_rate": 1.5e-05, + "loss": 12.3134, + "step": 108 + }, + { + "epoch": 0.004543370430578134, + "grad_norm": 133.0, + "learning_rate": 1.5138888888888888e-05, + "loss": 13.3171, + "step": 109 + }, + { + "epoch": 0.004585052728106373, + "grad_norm": 202.0, + "learning_rate": 1.527777777777778e-05, + "loss": 13.5007, + "step": 110 + }, + { + "epoch": 0.004626735025634613, + "grad_norm": 194.0, + "learning_rate": 1.5416666666666668e-05, + "loss": 13.0014, + "step": 111 + }, + { + "epoch": 0.004668417323162853, + "grad_norm": 121.5, + "learning_rate": 1.5555555555555555e-05, + "loss": 13.1264, + "step": 112 + }, + { + "epoch": 0.004710099620691093, + "grad_norm": 468.0, + "learning_rate": 1.5694444444444446e-05, + "loss": 15.3138, + "step": 113 + }, + { + "epoch": 0.004751781918219332, + "grad_norm": 256.0, + "learning_rate": 1.5833333333333333e-05, + "loss": 23.1267, + "step": 114 + }, + { + "epoch": 0.004793464215747572, + "grad_norm": 206.0, + "learning_rate": 1.597222222222222e-05, + "loss": 12.6886, + "step": 115 + }, + { + "epoch": 0.004835146513275812, + "grad_norm": 171.0, + "learning_rate": 1.6111111111111115e-05, + "loss": 15.1892, + "step": 116 + }, + { + "epoch": 0.004876828810804051, + "grad_norm": 112.5, + "learning_rate": 1.6250000000000002e-05, + "loss": 12.3759, + "step": 117 + }, + { + "epoch": 0.0049185111083322915, + "grad_norm": 252.0, + "learning_rate": 1.638888888888889e-05, + "loss": 14.3134, + "step": 118 + }, + { + "epoch": 0.004960193405860531, + "grad_norm": 304.0, + "learning_rate": 1.6527777777777777e-05, + "loss": 26.2532, + "step": 119 + }, + { + "epoch": 0.005001875703388771, + "grad_norm": 372.0, + "learning_rate": 1.6666666666666667e-05, + "loss": 20.7521, + "step": 120 + }, + { + "epoch": 0.0050435580009170105, + "grad_norm": 211.0, + "learning_rate": 1.6805555555555558e-05, + "loss": 20.627, + "step": 121 + }, + { + "epoch": 0.005085240298445251, + "grad_norm": 264.0, + "learning_rate": 1.6944444444444446e-05, + "loss": 15.0682, + "step": 122 + }, + { + "epoch": 0.00512692259597349, + "grad_norm": 125.0, + "learning_rate": 1.7083333333333333e-05, + "loss": 9.6259, + "step": 123 + }, + { + "epoch": 0.0051686048935017295, + "grad_norm": 159.0, + "learning_rate": 1.7222222222222224e-05, + "loss": 16.6276, + "step": 124 + }, + { + "epoch": 0.00521028719102997, + "grad_norm": 143.0, + "learning_rate": 1.736111111111111e-05, + "loss": 11.5641, + "step": 125 + }, + { + "epoch": 0.005251969488558209, + "grad_norm": 85.5, + "learning_rate": 1.75e-05, + "loss": 8.751, + "step": 126 + }, + { + "epoch": 0.005293651786086449, + "grad_norm": 150.0, + "learning_rate": 1.763888888888889e-05, + "loss": 14.3139, + "step": 127 + }, + { + "epoch": 0.005335334083614689, + "grad_norm": 188.0, + "learning_rate": 1.777777777777778e-05, + "loss": 15.94, + "step": 128 + }, + { + "epoch": 0.005377016381142929, + "grad_norm": 219.0, + "learning_rate": 1.7916666666666667e-05, + "loss": 16.376, + "step": 129 + }, + { + "epoch": 0.005418698678671168, + "grad_norm": 119.5, + "learning_rate": 1.8055555555555555e-05, + "loss": 11.8145, + "step": 130 + }, + { + "epoch": 0.005460380976199408, + "grad_norm": 254.0, + "learning_rate": 1.8194444444444445e-05, + "loss": 14.3138, + "step": 131 + }, + { + "epoch": 0.005502063273727648, + "grad_norm": 156.0, + "learning_rate": 1.8333333333333333e-05, + "loss": 9.3155, + "step": 132 + }, + { + "epoch": 0.005543745571255887, + "grad_norm": 147.0, + "learning_rate": 1.8472222222222224e-05, + "loss": 12.8133, + "step": 133 + }, + { + "epoch": 0.0055854278687841276, + "grad_norm": 160.0, + "learning_rate": 1.861111111111111e-05, + "loss": 14.8763, + "step": 134 + }, + { + "epoch": 0.005627110166312367, + "grad_norm": 258.0, + "learning_rate": 1.8750000000000002e-05, + "loss": 23.6275, + "step": 135 + }, + { + "epoch": 0.005668792463840607, + "grad_norm": 241.0, + "learning_rate": 1.888888888888889e-05, + "loss": 19.0009, + "step": 136 + }, + { + "epoch": 0.0057104747613688465, + "grad_norm": 218.0, + "learning_rate": 1.9027777777777776e-05, + "loss": 18.6265, + "step": 137 + }, + { + "epoch": 0.005752157058897087, + "grad_norm": 165.0, + "learning_rate": 1.9166666666666667e-05, + "loss": 13.8765, + "step": 138 + }, + { + "epoch": 0.005793839356425326, + "grad_norm": 284.0, + "learning_rate": 1.9305555555555558e-05, + "loss": 10.3143, + "step": 139 + }, + { + "epoch": 0.0058355216539535655, + "grad_norm": 224.0, + "learning_rate": 1.9444444444444445e-05, + "loss": 13.3759, + "step": 140 + }, + { + "epoch": 0.005877203951481806, + "grad_norm": 226.0, + "learning_rate": 1.9583333333333333e-05, + "loss": 17.6269, + "step": 141 + }, + { + "epoch": 0.005918886249010045, + "grad_norm": 284.0, + "learning_rate": 1.9722222222222224e-05, + "loss": 11.1262, + "step": 142 + }, + { + "epoch": 0.005960568546538285, + "grad_norm": 344.0, + "learning_rate": 1.986111111111111e-05, + "loss": 25.2527, + "step": 143 + }, + { + "epoch": 0.006002250844066525, + "grad_norm": 171.0, + "learning_rate": 2e-05, + "loss": 15.3764, + "step": 144 + }, + { + "epoch": 0.006043933141594765, + "grad_norm": 280.0, + "learning_rate": 2.013888888888889e-05, + "loss": 19.8826, + "step": 145 + }, + { + "epoch": 0.006085615439123004, + "grad_norm": 229.0, + "learning_rate": 2.027777777777778e-05, + "loss": 18.378, + "step": 146 + }, + { + "epoch": 0.006127297736651245, + "grad_norm": 108.5, + "learning_rate": 2.0416666666666667e-05, + "loss": 9.4384, + "step": 147 + }, + { + "epoch": 0.006168980034179484, + "grad_norm": 166.0, + "learning_rate": 2.0555555555555555e-05, + "loss": 14.127, + "step": 148 + }, + { + "epoch": 0.006210662331707723, + "grad_norm": 177.0, + "learning_rate": 2.0694444444444445e-05, + "loss": 13.5015, + "step": 149 + }, + { + "epoch": 0.006252344629235964, + "grad_norm": 200.0, + "learning_rate": 2.0833333333333336e-05, + "loss": 14.3765, + "step": 150 + }, + { + "epoch": 0.006294026926764203, + "grad_norm": 412.0, + "learning_rate": 2.0972222222222223e-05, + "loss": 22.5064, + "step": 151 + }, + { + "epoch": 0.006335709224292443, + "grad_norm": 193.0, + "learning_rate": 2.111111111111111e-05, + "loss": 11.6886, + "step": 152 + }, + { + "epoch": 0.006377391521820683, + "grad_norm": 141.0, + "learning_rate": 2.125e-05, + "loss": 11.3758, + "step": 153 + }, + { + "epoch": 0.006419073819348923, + "grad_norm": 270.0, + "learning_rate": 2.138888888888889e-05, + "loss": 18.2512, + "step": 154 + }, + { + "epoch": 0.006460756116877162, + "grad_norm": 122.0, + "learning_rate": 2.152777777777778e-05, + "loss": 8.1271, + "step": 155 + }, + { + "epoch": 0.006502438414405402, + "grad_norm": 288.0, + "learning_rate": 2.1666666666666667e-05, + "loss": 20.5031, + "step": 156 + }, + { + "epoch": 0.006544120711933642, + "grad_norm": 418.0, + "learning_rate": 2.1805555555555558e-05, + "loss": 19.7526, + "step": 157 + }, + { + "epoch": 0.006585803009461881, + "grad_norm": 194.0, + "learning_rate": 2.1944444444444445e-05, + "loss": 14.2517, + "step": 158 + }, + { + "epoch": 0.0066274853069901215, + "grad_norm": 328.0, + "learning_rate": 2.2083333333333333e-05, + "loss": 20.2507, + "step": 159 + }, + { + "epoch": 0.006669167604518361, + "grad_norm": 191.0, + "learning_rate": 2.2222222222222223e-05, + "loss": 12.5019, + "step": 160 + }, + { + "epoch": 0.006710849902046601, + "grad_norm": 344.0, + "learning_rate": 2.2361111111111114e-05, + "loss": 22.0053, + "step": 161 + }, + { + "epoch": 0.0067525321995748405, + "grad_norm": 147.0, + "learning_rate": 2.25e-05, + "loss": 10.689, + "step": 162 + }, + { + "epoch": 0.006794214497103081, + "grad_norm": 171.0, + "learning_rate": 2.263888888888889e-05, + "loss": 12.5013, + "step": 163 + }, + { + "epoch": 0.00683589679463132, + "grad_norm": 364.0, + "learning_rate": 2.277777777777778e-05, + "loss": 24.3815, + "step": 164 + }, + { + "epoch": 0.0068775790921595595, + "grad_norm": 205.0, + "learning_rate": 2.2916666666666667e-05, + "loss": 11.7527, + "step": 165 + }, + { + "epoch": 0.0069192613896878, + "grad_norm": 232.0, + "learning_rate": 2.3055555555555558e-05, + "loss": 12.6897, + "step": 166 + }, + { + "epoch": 0.006960943687216039, + "grad_norm": 204.0, + "learning_rate": 2.3194444444444445e-05, + "loss": 14.5019, + "step": 167 + }, + { + "epoch": 0.007002625984744279, + "grad_norm": 268.0, + "learning_rate": 2.3333333333333336e-05, + "loss": 13.0013, + "step": 168 + }, + { + "epoch": 0.007044308282272519, + "grad_norm": 241.0, + "learning_rate": 2.3472222222222223e-05, + "loss": 11.8761, + "step": 169 + }, + { + "epoch": 0.007085990579800759, + "grad_norm": 268.0, + "learning_rate": 2.361111111111111e-05, + "loss": 18.501, + "step": 170 + }, + { + "epoch": 0.007127672877328998, + "grad_norm": 209.0, + "learning_rate": 2.375e-05, + "loss": 9.0014, + "step": 171 + }, + { + "epoch": 0.007169355174857238, + "grad_norm": 164.0, + "learning_rate": 2.3888888888888892e-05, + "loss": 12.1882, + "step": 172 + }, + { + "epoch": 0.007211037472385478, + "grad_norm": 332.0, + "learning_rate": 2.402777777777778e-05, + "loss": 17.751, + "step": 173 + }, + { + "epoch": 0.007252719769913717, + "grad_norm": 288.0, + "learning_rate": 2.4166666666666667e-05, + "loss": 19.001, + "step": 174 + }, + { + "epoch": 0.007294402067441958, + "grad_norm": 226.0, + "learning_rate": 2.4305555555555558e-05, + "loss": 14.191, + "step": 175 + }, + { + "epoch": 0.007336084364970197, + "grad_norm": 154.0, + "learning_rate": 2.4444444444444445e-05, + "loss": 10.6287, + "step": 176 + }, + { + "epoch": 0.007377766662498437, + "grad_norm": 162.0, + "learning_rate": 2.4583333333333332e-05, + "loss": 11.9385, + "step": 177 + }, + { + "epoch": 0.007419448960026677, + "grad_norm": 158.0, + "learning_rate": 2.4722222222222223e-05, + "loss": 11.0025, + "step": 178 + }, + { + "epoch": 0.007461131257554917, + "grad_norm": 191.0, + "learning_rate": 2.4861111111111114e-05, + "loss": 12.1264, + "step": 179 + }, + { + "epoch": 0.007502813555083156, + "grad_norm": 170.0, + "learning_rate": 2.5e-05, + "loss": 10.8136, + "step": 180 + }, + { + "epoch": 0.007544495852611396, + "grad_norm": 322.0, + "learning_rate": 2.513888888888889e-05, + "loss": 15.3767, + "step": 181 + }, + { + "epoch": 0.007586178150139636, + "grad_norm": 262.0, + "learning_rate": 2.527777777777778e-05, + "loss": 16.6263, + "step": 182 + }, + { + "epoch": 0.007627860447667875, + "grad_norm": 141.0, + "learning_rate": 2.5416666666666667e-05, + "loss": 10.4384, + "step": 183 + }, + { + "epoch": 0.007669542745196115, + "grad_norm": 223.0, + "learning_rate": 2.5555555555555554e-05, + "loss": 14.3758, + "step": 184 + }, + { + "epoch": 0.007711225042724355, + "grad_norm": 284.0, + "learning_rate": 2.5694444444444445e-05, + "loss": 19.001, + "step": 185 + }, + { + "epoch": 0.007752907340252595, + "grad_norm": 436.0, + "learning_rate": 2.5833333333333336e-05, + "loss": 25.5035, + "step": 186 + }, + { + "epoch": 0.007794589637780834, + "grad_norm": 218.0, + "learning_rate": 2.5972222222222226e-05, + "loss": 14.6892, + "step": 187 + }, + { + "epoch": 0.007836271935309074, + "grad_norm": 209.0, + "learning_rate": 2.6111111111111114e-05, + "loss": 15.0012, + "step": 188 + }, + { + "epoch": 0.007877954232837313, + "grad_norm": 264.0, + "learning_rate": 2.625e-05, + "loss": 18.1277, + "step": 189 + }, + { + "epoch": 0.007919636530365554, + "grad_norm": 148.0, + "learning_rate": 2.6388888888888892e-05, + "loss": 9.752, + "step": 190 + }, + { + "epoch": 0.007961318827893794, + "grad_norm": 202.0, + "learning_rate": 2.652777777777778e-05, + "loss": 11.5638, + "step": 191 + }, + { + "epoch": 0.008003001125422033, + "grad_norm": 274.0, + "learning_rate": 2.6666666666666667e-05, + "loss": 14.876, + "step": 192 + }, + { + "epoch": 0.008044683422950272, + "grad_norm": 246.0, + "learning_rate": 2.6805555555555557e-05, + "loss": 14.4382, + "step": 193 + }, + { + "epoch": 0.008086365720478514, + "grad_norm": 197.0, + "learning_rate": 2.6944444444444445e-05, + "loss": 10.6888, + "step": 194 + }, + { + "epoch": 0.008128048018006753, + "grad_norm": 396.0, + "learning_rate": 2.7083333333333332e-05, + "loss": 18.1294, + "step": 195 + }, + { + "epoch": 0.008169730315534992, + "grad_norm": 274.0, + "learning_rate": 2.7222222222222223e-05, + "loss": 17.1265, + "step": 196 + }, + { + "epoch": 0.008211412613063232, + "grad_norm": 155.0, + "learning_rate": 2.7361111111111114e-05, + "loss": 10.6907, + "step": 197 + }, + { + "epoch": 0.008253094910591471, + "grad_norm": 206.0, + "learning_rate": 2.7500000000000004e-05, + "loss": 13.6887, + "step": 198 + }, + { + "epoch": 0.008294777208119712, + "grad_norm": 246.0, + "learning_rate": 2.7638888888888892e-05, + "loss": 12.2511, + "step": 199 + }, + { + "epoch": 0.008336459505647952, + "grad_norm": 300.0, + "learning_rate": 2.777777777777778e-05, + "loss": 14.0012, + "step": 200 + }, + { + "epoch": 0.008378141803176191, + "grad_norm": 288.0, + "learning_rate": 2.791666666666667e-05, + "loss": 14.815, + "step": 201 + }, + { + "epoch": 0.00841982410070443, + "grad_norm": 314.0, + "learning_rate": 2.8055555555555557e-05, + "loss": 12.8143, + "step": 202 + }, + { + "epoch": 0.008461506398232671, + "grad_norm": 524.0, + "learning_rate": 2.8194444444444445e-05, + "loss": 28.1259, + "step": 203 + }, + { + "epoch": 0.00850318869576091, + "grad_norm": 142.0, + "learning_rate": 2.8333333333333335e-05, + "loss": 10.0023, + "step": 204 + }, + { + "epoch": 0.00854487099328915, + "grad_norm": 238.0, + "learning_rate": 2.8472222222222223e-05, + "loss": 13.3758, + "step": 205 + }, + { + "epoch": 0.00858655329081739, + "grad_norm": 322.0, + "learning_rate": 2.861111111111111e-05, + "loss": 18.6291, + "step": 206 + }, + { + "epoch": 0.008628235588345629, + "grad_norm": 320.0, + "learning_rate": 2.8749999999999997e-05, + "loss": 8.503, + "step": 207 + }, + { + "epoch": 0.00866991788587387, + "grad_norm": 310.0, + "learning_rate": 2.8888888888888888e-05, + "loss": 18.8761, + "step": 208 + }, + { + "epoch": 0.00871160018340211, + "grad_norm": 165.0, + "learning_rate": 2.9027777777777782e-05, + "loss": 10.1892, + "step": 209 + }, + { + "epoch": 0.008753282480930349, + "grad_norm": 286.0, + "learning_rate": 2.916666666666667e-05, + "loss": 15.6889, + "step": 210 + }, + { + "epoch": 0.008794964778458588, + "grad_norm": 141.0, + "learning_rate": 2.9305555555555557e-05, + "loss": 9.4395, + "step": 211 + }, + { + "epoch": 0.00883664707598683, + "grad_norm": 336.0, + "learning_rate": 2.9444444444444448e-05, + "loss": 16.3779, + "step": 212 + }, + { + "epoch": 0.008878329373515069, + "grad_norm": 452.0, + "learning_rate": 2.9583333333333335e-05, + "loss": 25.7515, + "step": 213 + }, + { + "epoch": 0.008920011671043308, + "grad_norm": 91.0, + "learning_rate": 2.9722222222222223e-05, + "loss": 5.0645, + "step": 214 + }, + { + "epoch": 0.008961693968571547, + "grad_norm": 222.0, + "learning_rate": 2.9861111111111113e-05, + "loss": 9.3767, + "step": 215 + }, + { + "epoch": 0.009003376266099787, + "grad_norm": 256.0, + "learning_rate": 3e-05, + "loss": 14.0634, + "step": 216 + }, + { + "epoch": 0.009045058563628028, + "grad_norm": 184.0, + "learning_rate": 3.0138888888888888e-05, + "loss": 11.6898, + "step": 217 + }, + { + "epoch": 0.009086740861156267, + "grad_norm": 474.0, + "learning_rate": 3.0277777777777776e-05, + "loss": 23.3761, + "step": 218 + }, + { + "epoch": 0.009128423158684507, + "grad_norm": 160.0, + "learning_rate": 3.0416666666666666e-05, + "loss": 9.815, + "step": 219 + }, + { + "epoch": 0.009170105456212746, + "grad_norm": 540.0, + "learning_rate": 3.055555555555556e-05, + "loss": 25.0059, + "step": 220 + }, + { + "epoch": 0.009211787753740985, + "grad_norm": 556.0, + "learning_rate": 3.069444444444445e-05, + "loss": 22.2527, + "step": 221 + }, + { + "epoch": 0.009253470051269226, + "grad_norm": 211.0, + "learning_rate": 3.0833333333333335e-05, + "loss": 11.7514, + "step": 222 + }, + { + "epoch": 0.009295152348797466, + "grad_norm": 344.0, + "learning_rate": 3.0972222222222226e-05, + "loss": 17.5021, + "step": 223 + }, + { + "epoch": 0.009336834646325705, + "grad_norm": 185.0, + "learning_rate": 3.111111111111111e-05, + "loss": 11.5019, + "step": 224 + }, + { + "epoch": 0.009378516943853945, + "grad_norm": 500.0, + "learning_rate": 3.125e-05, + "loss": 26.1286, + "step": 225 + }, + { + "epoch": 0.009420199241382186, + "grad_norm": 166.0, + "learning_rate": 3.138888888888889e-05, + "loss": 9.8135, + "step": 226 + }, + { + "epoch": 0.009461881538910425, + "grad_norm": 165.0, + "learning_rate": 3.1527777777777775e-05, + "loss": 9.7557, + "step": 227 + }, + { + "epoch": 0.009503563836438664, + "grad_norm": 860.0, + "learning_rate": 3.1666666666666666e-05, + "loss": 35.0053, + "step": 228 + }, + { + "epoch": 0.009545246133966904, + "grad_norm": 292.0, + "learning_rate": 3.180555555555556e-05, + "loss": 15.8763, + "step": 229 + }, + { + "epoch": 0.009586928431495143, + "grad_norm": 532.0, + "learning_rate": 3.194444444444444e-05, + "loss": 24.7514, + "step": 230 + }, + { + "epoch": 0.009628610729023384, + "grad_norm": 237.0, + "learning_rate": 3.208333333333334e-05, + "loss": 13.6887, + "step": 231 + }, + { + "epoch": 0.009670293026551624, + "grad_norm": 255.0, + "learning_rate": 3.222222222222223e-05, + "loss": 13.8142, + "step": 232 + }, + { + "epoch": 0.009711975324079863, + "grad_norm": 322.0, + "learning_rate": 3.236111111111111e-05, + "loss": 16.5009, + "step": 233 + }, + { + "epoch": 0.009753657621608102, + "grad_norm": 556.0, + "learning_rate": 3.2500000000000004e-05, + "loss": 29.3764, + "step": 234 + }, + { + "epoch": 0.009795339919136344, + "grad_norm": 432.0, + "learning_rate": 3.263888888888889e-05, + "loss": 18.1268, + "step": 235 + }, + { + "epoch": 0.009837022216664583, + "grad_norm": 384.0, + "learning_rate": 3.277777777777778e-05, + "loss": 19.0027, + "step": 236 + }, + { + "epoch": 0.009878704514192822, + "grad_norm": 300.0, + "learning_rate": 3.291666666666667e-05, + "loss": 14.8811, + "step": 237 + }, + { + "epoch": 0.009920386811721062, + "grad_norm": 360.0, + "learning_rate": 3.3055555555555553e-05, + "loss": 18.002, + "step": 238 + }, + { + "epoch": 0.009962069109249301, + "grad_norm": 207.0, + "learning_rate": 3.3194444444444444e-05, + "loss": 11.5636, + "step": 239 + }, + { + "epoch": 0.010003751406777542, + "grad_norm": 384.0, + "learning_rate": 3.3333333333333335e-05, + "loss": 18.3788, + "step": 240 + }, + { + "epoch": 0.010045433704305782, + "grad_norm": 446.0, + "learning_rate": 3.347222222222222e-05, + "loss": 20.752, + "step": 241 + }, + { + "epoch": 0.010087116001834021, + "grad_norm": 236.0, + "learning_rate": 3.3611111111111116e-05, + "loss": 9.6283, + "step": 242 + }, + { + "epoch": 0.01012879829936226, + "grad_norm": 372.0, + "learning_rate": 3.375000000000001e-05, + "loss": 18.3779, + "step": 243 + }, + { + "epoch": 0.010170480596890501, + "grad_norm": 201.0, + "learning_rate": 3.388888888888889e-05, + "loss": 10.3767, + "step": 244 + }, + { + "epoch": 0.01021216289441874, + "grad_norm": 380.0, + "learning_rate": 3.402777777777778e-05, + "loss": 14.7528, + "step": 245 + }, + { + "epoch": 0.01025384519194698, + "grad_norm": 230.0, + "learning_rate": 3.4166666666666666e-05, + "loss": 11.8766, + "step": 246 + }, + { + "epoch": 0.01029552748947522, + "grad_norm": 170.0, + "learning_rate": 3.430555555555556e-05, + "loss": 8.3163, + "step": 247 + }, + { + "epoch": 0.010337209787003459, + "grad_norm": 308.0, + "learning_rate": 3.444444444444445e-05, + "loss": 13.6312, + "step": 248 + }, + { + "epoch": 0.0103788920845317, + "grad_norm": 636.0, + "learning_rate": 3.458333333333333e-05, + "loss": 26.6308, + "step": 249 + }, + { + "epoch": 0.01042057438205994, + "grad_norm": 237.0, + "learning_rate": 3.472222222222222e-05, + "loss": 12.1259, + "step": 250 + }, + { + "epoch": 0.010462256679588179, + "grad_norm": 446.0, + "learning_rate": 3.486111111111111e-05, + "loss": 19.002, + "step": 251 + }, + { + "epoch": 0.010503938977116418, + "grad_norm": 231.0, + "learning_rate": 3.5e-05, + "loss": 11.6901, + "step": 252 + }, + { + "epoch": 0.01054562127464466, + "grad_norm": 498.0, + "learning_rate": 3.513888888888889e-05, + "loss": 22.6262, + "step": 253 + }, + { + "epoch": 0.010587303572172899, + "grad_norm": 294.0, + "learning_rate": 3.527777777777778e-05, + "loss": 13.3141, + "step": 254 + }, + { + "epoch": 0.010628985869701138, + "grad_norm": 207.0, + "learning_rate": 3.541666666666667e-05, + "loss": 11.0634, + "step": 255 + }, + { + "epoch": 0.010670668167229377, + "grad_norm": 432.0, + "learning_rate": 3.555555555555556e-05, + "loss": 15.3772, + "step": 256 + }, + { + "epoch": 0.010712350464757617, + "grad_norm": 112.5, + "learning_rate": 3.5694444444444444e-05, + "loss": 6.4409, + "step": 257 + }, + { + "epoch": 0.010754032762285858, + "grad_norm": 232.0, + "learning_rate": 3.5833333333333335e-05, + "loss": 11.4391, + "step": 258 + }, + { + "epoch": 0.010795715059814097, + "grad_norm": 284.0, + "learning_rate": 3.5972222222222225e-05, + "loss": 14.0653, + "step": 259 + }, + { + "epoch": 0.010837397357342337, + "grad_norm": 191.0, + "learning_rate": 3.611111111111111e-05, + "loss": 10.8762, + "step": 260 + }, + { + "epoch": 0.010879079654870576, + "grad_norm": 302.0, + "learning_rate": 3.625e-05, + "loss": 13.3143, + "step": 261 + }, + { + "epoch": 0.010920761952398815, + "grad_norm": 237.0, + "learning_rate": 3.638888888888889e-05, + "loss": 11.692, + "step": 262 + }, + { + "epoch": 0.010962444249927056, + "grad_norm": 708.0, + "learning_rate": 3.6527777777777775e-05, + "loss": 28.6281, + "step": 263 + }, + { + "epoch": 0.011004126547455296, + "grad_norm": 171.0, + "learning_rate": 3.6666666666666666e-05, + "loss": 6.9384, + "step": 264 + }, + { + "epoch": 0.011045808844983535, + "grad_norm": 376.0, + "learning_rate": 3.6805555555555556e-05, + "loss": 17.251, + "step": 265 + }, + { + "epoch": 0.011087491142511775, + "grad_norm": 308.0, + "learning_rate": 3.694444444444445e-05, + "loss": 14.2521, + "step": 266 + }, + { + "epoch": 0.011129173440040016, + "grad_norm": 294.0, + "learning_rate": 3.708333333333334e-05, + "loss": 12.5647, + "step": 267 + }, + { + "epoch": 0.011170855737568255, + "grad_norm": 233.0, + "learning_rate": 3.722222222222222e-05, + "loss": 11.6916, + "step": 268 + }, + { + "epoch": 0.011212538035096494, + "grad_norm": 249.0, + "learning_rate": 3.736111111111111e-05, + "loss": 11.7572, + "step": 269 + }, + { + "epoch": 0.011254220332624734, + "grad_norm": 314.0, + "learning_rate": 3.7500000000000003e-05, + "loss": 15.6262, + "step": 270 + }, + { + "epoch": 0.011295902630152973, + "grad_norm": 378.0, + "learning_rate": 3.763888888888889e-05, + "loss": 14.7516, + "step": 271 + }, + { + "epoch": 0.011337584927681214, + "grad_norm": 304.0, + "learning_rate": 3.777777777777778e-05, + "loss": 13.9387, + "step": 272 + }, + { + "epoch": 0.011379267225209454, + "grad_norm": 190.0, + "learning_rate": 3.791666666666667e-05, + "loss": 10.4393, + "step": 273 + }, + { + "epoch": 0.011420949522737693, + "grad_norm": 243.0, + "learning_rate": 3.805555555555555e-05, + "loss": 12.7512, + "step": 274 + }, + { + "epoch": 0.011462631820265932, + "grad_norm": 340.0, + "learning_rate": 3.8194444444444444e-05, + "loss": 16.002, + "step": 275 + }, + { + "epoch": 0.011504314117794174, + "grad_norm": 426.0, + "learning_rate": 3.8333333333333334e-05, + "loss": 18.7518, + "step": 276 + }, + { + "epoch": 0.011545996415322413, + "grad_norm": 316.0, + "learning_rate": 3.8472222222222225e-05, + "loss": 14.1893, + "step": 277 + }, + { + "epoch": 0.011587678712850652, + "grad_norm": 135.0, + "learning_rate": 3.8611111111111116e-05, + "loss": 7.7848, + "step": 278 + }, + { + "epoch": 0.011629361010378892, + "grad_norm": 218.0, + "learning_rate": 3.875e-05, + "loss": 10.94, + "step": 279 + }, + { + "epoch": 0.011671043307907131, + "grad_norm": 412.0, + "learning_rate": 3.888888888888889e-05, + "loss": 18.8758, + "step": 280 + }, + { + "epoch": 0.011712725605435372, + "grad_norm": 129.0, + "learning_rate": 3.902777777777778e-05, + "loss": 7.0642, + "step": 281 + }, + { + "epoch": 0.011754407902963612, + "grad_norm": 228.0, + "learning_rate": 3.9166666666666665e-05, + "loss": 10.0638, + "step": 282 + }, + { + "epoch": 0.011796090200491851, + "grad_norm": 192.0, + "learning_rate": 3.9305555555555556e-05, + "loss": 10.2527, + "step": 283 + }, + { + "epoch": 0.01183777249802009, + "grad_norm": 344.0, + "learning_rate": 3.944444444444445e-05, + "loss": 15.5049, + "step": 284 + }, + { + "epoch": 0.011879454795548331, + "grad_norm": 239.0, + "learning_rate": 3.958333333333333e-05, + "loss": 11.6889, + "step": 285 + }, + { + "epoch": 0.01192113709307657, + "grad_norm": 402.0, + "learning_rate": 3.972222222222222e-05, + "loss": 15.0027, + "step": 286 + }, + { + "epoch": 0.01196281939060481, + "grad_norm": 426.0, + "learning_rate": 3.986111111111111e-05, + "loss": 19.376, + "step": 287 + }, + { + "epoch": 0.01200450168813305, + "grad_norm": 254.0, + "learning_rate": 4e-05, + "loss": 11.9404, + "step": 288 + }, + { + "epoch": 0.012046183985661289, + "grad_norm": 402.0, + "learning_rate": 4.0138888888888894e-05, + "loss": 17.3774, + "step": 289 + }, + { + "epoch": 0.01208786628318953, + "grad_norm": 434.0, + "learning_rate": 4.027777777777778e-05, + "loss": 16.6274, + "step": 290 + }, + { + "epoch": 0.01212954858071777, + "grad_norm": 418.0, + "learning_rate": 4.041666666666667e-05, + "loss": 16.1263, + "step": 291 + }, + { + "epoch": 0.012171230878246009, + "grad_norm": 255.0, + "learning_rate": 4.055555555555556e-05, + "loss": 9.3773, + "step": 292 + }, + { + "epoch": 0.012212913175774248, + "grad_norm": 298.0, + "learning_rate": 4.0694444444444444e-05, + "loss": 11.8143, + "step": 293 + }, + { + "epoch": 0.01225459547330249, + "grad_norm": 396.0, + "learning_rate": 4.0833333333333334e-05, + "loss": 17.127, + "step": 294 + }, + { + "epoch": 0.012296277770830729, + "grad_norm": 270.0, + "learning_rate": 4.0972222222222225e-05, + "loss": 12.6888, + "step": 295 + }, + { + "epoch": 0.012337960068358968, + "grad_norm": 255.0, + "learning_rate": 4.111111111111111e-05, + "loss": 11.1893, + "step": 296 + }, + { + "epoch": 0.012379642365887207, + "grad_norm": 1040.0, + "learning_rate": 4.125e-05, + "loss": 36.7561, + "step": 297 + }, + { + "epoch": 0.012421324663415447, + "grad_norm": 402.0, + "learning_rate": 4.138888888888889e-05, + "loss": 13.6943, + "step": 298 + }, + { + "epoch": 0.012463006960943688, + "grad_norm": 253.0, + "learning_rate": 4.152777777777778e-05, + "loss": 12.1269, + "step": 299 + }, + { + "epoch": 0.012504689258471927, + "grad_norm": 600.0, + "learning_rate": 4.166666666666667e-05, + "loss": 20.3756, + "step": 300 + }, + { + "epoch": 0.012546371556000167, + "grad_norm": 360.0, + "learning_rate": 4.1805555555555556e-05, + "loss": 15.6265, + "step": 301 + }, + { + "epoch": 0.012588053853528406, + "grad_norm": 334.0, + "learning_rate": 4.194444444444445e-05, + "loss": 14.1899, + "step": 302 + }, + { + "epoch": 0.012629736151056645, + "grad_norm": 207.0, + "learning_rate": 4.208333333333334e-05, + "loss": 10.3152, + "step": 303 + }, + { + "epoch": 0.012671418448584887, + "grad_norm": 376.0, + "learning_rate": 4.222222222222222e-05, + "loss": 15.0636, + "step": 304 + }, + { + "epoch": 0.012713100746113126, + "grad_norm": 282.0, + "learning_rate": 4.236111111111111e-05, + "loss": 12.8765, + "step": 305 + }, + { + "epoch": 0.012754783043641365, + "grad_norm": 478.0, + "learning_rate": 4.25e-05, + "loss": 18.6258, + "step": 306 + }, + { + "epoch": 0.012796465341169605, + "grad_norm": 312.0, + "learning_rate": 4.263888888888889e-05, + "loss": 14.5663, + "step": 307 + }, + { + "epoch": 0.012838147638697846, + "grad_norm": 326.0, + "learning_rate": 4.277777777777778e-05, + "loss": 15.3764, + "step": 308 + }, + { + "epoch": 0.012879829936226085, + "grad_norm": 251.0, + "learning_rate": 4.291666666666667e-05, + "loss": 11.5634, + "step": 309 + }, + { + "epoch": 0.012921512233754325, + "grad_norm": 292.0, + "learning_rate": 4.305555555555556e-05, + "loss": 12.6264, + "step": 310 + }, + { + "epoch": 0.012963194531282564, + "grad_norm": 296.0, + "learning_rate": 4.319444444444445e-05, + "loss": 13.1255, + "step": 311 + }, + { + "epoch": 0.013004876828810803, + "grad_norm": 676.0, + "learning_rate": 4.3333333333333334e-05, + "loss": 21.8778, + "step": 312 + }, + { + "epoch": 0.013046559126339044, + "grad_norm": 488.0, + "learning_rate": 4.3472222222222225e-05, + "loss": 17.877, + "step": 313 + }, + { + "epoch": 0.013088241423867284, + "grad_norm": 344.0, + "learning_rate": 4.3611111111111116e-05, + "loss": 14.0636, + "step": 314 + }, + { + "epoch": 0.013129923721395523, + "grad_norm": 466.0, + "learning_rate": 4.375e-05, + "loss": 17.6263, + "step": 315 + }, + { + "epoch": 0.013171606018923762, + "grad_norm": 366.0, + "learning_rate": 4.388888888888889e-05, + "loss": 15.6883, + "step": 316 + }, + { + "epoch": 0.013213288316452004, + "grad_norm": 348.0, + "learning_rate": 4.402777777777778e-05, + "loss": 16.2511, + "step": 317 + }, + { + "epoch": 0.013254970613980243, + "grad_norm": 394.0, + "learning_rate": 4.4166666666666665e-05, + "loss": 16.7506, + "step": 318 + }, + { + "epoch": 0.013296652911508482, + "grad_norm": 668.0, + "learning_rate": 4.4305555555555556e-05, + "loss": 25.0008, + "step": 319 + }, + { + "epoch": 0.013338335209036722, + "grad_norm": 238.0, + "learning_rate": 4.4444444444444447e-05, + "loss": 10.9398, + "step": 320 + }, + { + "epoch": 0.013380017506564961, + "grad_norm": 384.0, + "learning_rate": 4.458333333333334e-05, + "loss": 16.6257, + "step": 321 + }, + { + "epoch": 0.013421699804093202, + "grad_norm": 280.0, + "learning_rate": 4.472222222222223e-05, + "loss": 12.0635, + "step": 322 + }, + { + "epoch": 0.013463382101621442, + "grad_norm": 223.0, + "learning_rate": 4.486111111111111e-05, + "loss": 10.5013, + "step": 323 + }, + { + "epoch": 0.013505064399149681, + "grad_norm": 326.0, + "learning_rate": 4.5e-05, + "loss": 14.002, + "step": 324 + }, + { + "epoch": 0.01354674669667792, + "grad_norm": 800.0, + "learning_rate": 4.5138888888888894e-05, + "loss": 30.1258, + "step": 325 + }, + { + "epoch": 0.013588428994206161, + "grad_norm": 214.0, + "learning_rate": 4.527777777777778e-05, + "loss": 10.1891, + "step": 326 + }, + { + "epoch": 0.0136301112917344, + "grad_norm": 354.0, + "learning_rate": 4.541666666666667e-05, + "loss": 14.9387, + "step": 327 + }, + { + "epoch": 0.01367179358926264, + "grad_norm": 608.0, + "learning_rate": 4.555555555555556e-05, + "loss": 21.5015, + "step": 328 + }, + { + "epoch": 0.01371347588679088, + "grad_norm": 596.0, + "learning_rate": 4.569444444444444e-05, + "loss": 20.8817, + "step": 329 + }, + { + "epoch": 0.013755158184319119, + "grad_norm": 394.0, + "learning_rate": 4.5833333333333334e-05, + "loss": 15.1888, + "step": 330 + }, + { + "epoch": 0.01379684048184736, + "grad_norm": 572.0, + "learning_rate": 4.5972222222222225e-05, + "loss": 19.8788, + "step": 331 + }, + { + "epoch": 0.0138385227793756, + "grad_norm": 520.0, + "learning_rate": 4.6111111111111115e-05, + "loss": 19.8771, + "step": 332 + }, + { + "epoch": 0.013880205076903839, + "grad_norm": 247.0, + "learning_rate": 4.6250000000000006e-05, + "loss": 11.5039, + "step": 333 + }, + { + "epoch": 0.013921887374432078, + "grad_norm": 262.0, + "learning_rate": 4.638888888888889e-05, + "loss": 12.0652, + "step": 334 + }, + { + "epoch": 0.01396356967196032, + "grad_norm": 300.0, + "learning_rate": 4.652777777777778e-05, + "loss": 11.1885, + "step": 335 + }, + { + "epoch": 0.014005251969488559, + "grad_norm": 254.0, + "learning_rate": 4.666666666666667e-05, + "loss": 12.0649, + "step": 336 + }, + { + "epoch": 0.014046934267016798, + "grad_norm": 322.0, + "learning_rate": 4.6805555555555556e-05, + "loss": 14.5639, + "step": 337 + }, + { + "epoch": 0.014088616564545037, + "grad_norm": 180.0, + "learning_rate": 4.6944444444444446e-05, + "loss": 7.6892, + "step": 338 + }, + { + "epoch": 0.014130298862073277, + "grad_norm": 332.0, + "learning_rate": 4.708333333333334e-05, + "loss": 14.7508, + "step": 339 + }, + { + "epoch": 0.014171981159601518, + "grad_norm": 322.0, + "learning_rate": 4.722222222222222e-05, + "loss": 12.5636, + "step": 340 + }, + { + "epoch": 0.014213663457129757, + "grad_norm": 406.0, + "learning_rate": 4.736111111111111e-05, + "loss": 15.1924, + "step": 341 + }, + { + "epoch": 0.014255345754657997, + "grad_norm": 512.0, + "learning_rate": 4.75e-05, + "loss": 17.131, + "step": 342 + }, + { + "epoch": 0.014297028052186236, + "grad_norm": 260.0, + "learning_rate": 4.7638888888888887e-05, + "loss": 10.5006, + "step": 343 + }, + { + "epoch": 0.014338710349714475, + "grad_norm": 278.0, + "learning_rate": 4.7777777777777784e-05, + "loss": 12.0663, + "step": 344 + }, + { + "epoch": 0.014380392647242717, + "grad_norm": 808.0, + "learning_rate": 4.791666666666667e-05, + "loss": 29.2507, + "step": 345 + }, + { + "epoch": 0.014422074944770956, + "grad_norm": 232.0, + "learning_rate": 4.805555555555556e-05, + "loss": 8.5638, + "step": 346 + }, + { + "epoch": 0.014463757242299195, + "grad_norm": 364.0, + "learning_rate": 4.819444444444445e-05, + "loss": 11.3759, + "step": 347 + }, + { + "epoch": 0.014505439539827435, + "grad_norm": 572.0, + "learning_rate": 4.8333333333333334e-05, + "loss": 18.3781, + "step": 348 + }, + { + "epoch": 0.014547121837355676, + "grad_norm": 420.0, + "learning_rate": 4.8472222222222224e-05, + "loss": 16.1259, + "step": 349 + }, + { + "epoch": 0.014588804134883915, + "grad_norm": 406.0, + "learning_rate": 4.8611111111111115e-05, + "loss": 15.001, + "step": 350 + }, + { + "epoch": 0.014630486432412155, + "grad_norm": 684.0, + "learning_rate": 4.875e-05, + "loss": 23.7515, + "step": 351 + }, + { + "epoch": 0.014672168729940394, + "grad_norm": 438.0, + "learning_rate": 4.888888888888889e-05, + "loss": 15.2517, + "step": 352 + }, + { + "epoch": 0.014713851027468633, + "grad_norm": 246.0, + "learning_rate": 4.902777777777778e-05, + "loss": 9.5636, + "step": 353 + }, + { + "epoch": 0.014755533324996874, + "grad_norm": 332.0, + "learning_rate": 4.9166666666666665e-05, + "loss": 12.7509, + "step": 354 + }, + { + "epoch": 0.014797215622525114, + "grad_norm": 202.0, + "learning_rate": 4.930555555555556e-05, + "loss": 10.5659, + "step": 355 + }, + { + "epoch": 0.014838897920053353, + "grad_norm": 197.0, + "learning_rate": 4.9444444444444446e-05, + "loss": 9.3782, + "step": 356 + }, + { + "epoch": 0.014880580217581593, + "grad_norm": 306.0, + "learning_rate": 4.958333333333334e-05, + "loss": 13.3147, + "step": 357 + }, + { + "epoch": 0.014922262515109834, + "grad_norm": 217.0, + "learning_rate": 4.972222222222223e-05, + "loss": 10.6889, + "step": 358 + }, + { + "epoch": 0.014963944812638073, + "grad_norm": 304.0, + "learning_rate": 4.986111111111111e-05, + "loss": 13.4383, + "step": 359 + }, + { + "epoch": 0.015005627110166312, + "grad_norm": 442.0, + "learning_rate": 5e-05, + "loss": 16.7517, + "step": 360 + }, + { + "epoch": 0.015047309407694552, + "grad_norm": 354.0, + "learning_rate": 5.013888888888889e-05, + "loss": 14.8143, + "step": 361 + }, + { + "epoch": 0.015088991705222791, + "grad_norm": 408.0, + "learning_rate": 5.027777777777778e-05, + "loss": 16.6269, + "step": 362 + }, + { + "epoch": 0.015130674002751032, + "grad_norm": 1184.0, + "learning_rate": 5.041666666666667e-05, + "loss": 33.0067, + "step": 363 + }, + { + "epoch": 0.015172356300279272, + "grad_norm": 374.0, + "learning_rate": 5.055555555555556e-05, + "loss": 15.6888, + "step": 364 + }, + { + "epoch": 0.015214038597807511, + "grad_norm": 181.0, + "learning_rate": 5.069444444444444e-05, + "loss": 9.0024, + "step": 365 + }, + { + "epoch": 0.01525572089533575, + "grad_norm": 456.0, + "learning_rate": 5.0833333333333333e-05, + "loss": 18.7506, + "step": 366 + }, + { + "epoch": 0.015297403192863991, + "grad_norm": 326.0, + "learning_rate": 5.0972222222222224e-05, + "loss": 13.8777, + "step": 367 + }, + { + "epoch": 0.01533908549039223, + "grad_norm": 330.0, + "learning_rate": 5.111111111111111e-05, + "loss": 13.3765, + "step": 368 + }, + { + "epoch": 0.01538076778792047, + "grad_norm": 328.0, + "learning_rate": 5.125e-05, + "loss": 12.3162, + "step": 369 + }, + { + "epoch": 0.01542245008544871, + "grad_norm": 398.0, + "learning_rate": 5.138888888888889e-05, + "loss": 16.1265, + "step": 370 + }, + { + "epoch": 0.015464132382976949, + "grad_norm": 1352.0, + "learning_rate": 5.1527777777777774e-05, + "loss": 36.7557, + "step": 371 + }, + { + "epoch": 0.01550581468050519, + "grad_norm": 274.0, + "learning_rate": 5.166666666666667e-05, + "loss": 12.0641, + "step": 372 + }, + { + "epoch": 0.01554749697803343, + "grad_norm": 424.0, + "learning_rate": 5.180555555555556e-05, + "loss": 18.3762, + "step": 373 + }, + { + "epoch": 0.015589179275561669, + "grad_norm": 472.0, + "learning_rate": 5.194444444444445e-05, + "loss": 18.3777, + "step": 374 + }, + { + "epoch": 0.015630861573089908, + "grad_norm": 326.0, + "learning_rate": 5.208333333333334e-05, + "loss": 14.8144, + "step": 375 + }, + { + "epoch": 0.015672543870618148, + "grad_norm": 249.0, + "learning_rate": 5.222222222222223e-05, + "loss": 11.6277, + "step": 376 + }, + { + "epoch": 0.015714226168146387, + "grad_norm": 243.0, + "learning_rate": 5.236111111111112e-05, + "loss": 10.3148, + "step": 377 + }, + { + "epoch": 0.015755908465674626, + "grad_norm": 768.0, + "learning_rate": 5.25e-05, + "loss": 23.6287, + "step": 378 + }, + { + "epoch": 0.01579759076320287, + "grad_norm": 412.0, + "learning_rate": 5.263888888888889e-05, + "loss": 17.2519, + "step": 379 + }, + { + "epoch": 0.01583927306073111, + "grad_norm": 208.0, + "learning_rate": 5.2777777777777784e-05, + "loss": 9.6261, + "step": 380 + }, + { + "epoch": 0.015880955358259348, + "grad_norm": 248.0, + "learning_rate": 5.291666666666667e-05, + "loss": 11.1901, + "step": 381 + }, + { + "epoch": 0.015922637655787587, + "grad_norm": 398.0, + "learning_rate": 5.305555555555556e-05, + "loss": 14.9392, + "step": 382 + }, + { + "epoch": 0.015964319953315827, + "grad_norm": 255.0, + "learning_rate": 5.319444444444445e-05, + "loss": 9.5632, + "step": 383 + }, + { + "epoch": 0.016006002250844066, + "grad_norm": 209.0, + "learning_rate": 5.333333333333333e-05, + "loss": 11.1273, + "step": 384 + }, + { + "epoch": 0.016047684548372305, + "grad_norm": 386.0, + "learning_rate": 5.3472222222222224e-05, + "loss": 15.003, + "step": 385 + }, + { + "epoch": 0.016089366845900545, + "grad_norm": 276.0, + "learning_rate": 5.3611111111111115e-05, + "loss": 10.4406, + "step": 386 + }, + { + "epoch": 0.016131049143428784, + "grad_norm": 191.0, + "learning_rate": 5.375e-05, + "loss": 8.0641, + "step": 387 + }, + { + "epoch": 0.016172731440957027, + "grad_norm": 528.0, + "learning_rate": 5.388888888888889e-05, + "loss": 18.3764, + "step": 388 + }, + { + "epoch": 0.016214413738485266, + "grad_norm": 976.0, + "learning_rate": 5.402777777777778e-05, + "loss": 26.1317, + "step": 389 + }, + { + "epoch": 0.016256096036013506, + "grad_norm": 480.0, + "learning_rate": 5.4166666666666664e-05, + "loss": 16.7522, + "step": 390 + }, + { + "epoch": 0.016297778333541745, + "grad_norm": 181.0, + "learning_rate": 5.4305555555555555e-05, + "loss": 9.6897, + "step": 391 + }, + { + "epoch": 0.016339460631069985, + "grad_norm": 580.0, + "learning_rate": 5.4444444444444446e-05, + "loss": 22.5013, + "step": 392 + }, + { + "epoch": 0.016381142928598224, + "grad_norm": 334.0, + "learning_rate": 5.458333333333333e-05, + "loss": 13.6272, + "step": 393 + }, + { + "epoch": 0.016422825226126463, + "grad_norm": 382.0, + "learning_rate": 5.472222222222223e-05, + "loss": 15.5636, + "step": 394 + }, + { + "epoch": 0.016464507523654703, + "grad_norm": 700.0, + "learning_rate": 5.486111111111112e-05, + "loss": 24.6256, + "step": 395 + }, + { + "epoch": 0.016506189821182942, + "grad_norm": 484.0, + "learning_rate": 5.500000000000001e-05, + "loss": 18.3771, + "step": 396 + }, + { + "epoch": 0.016547872118711185, + "grad_norm": 249.0, + "learning_rate": 5.513888888888889e-05, + "loss": 10.1887, + "step": 397 + }, + { + "epoch": 0.016589554416239424, + "grad_norm": 408.0, + "learning_rate": 5.5277777777777783e-05, + "loss": 14.9418, + "step": 398 + }, + { + "epoch": 0.016631236713767664, + "grad_norm": 136.0, + "learning_rate": 5.5416666666666674e-05, + "loss": 8.0015, + "step": 399 + }, + { + "epoch": 0.016672919011295903, + "grad_norm": 224.0, + "learning_rate": 5.555555555555556e-05, + "loss": 9.9386, + "step": 400 + }, + { + "epoch": 0.016714601308824142, + "grad_norm": 364.0, + "learning_rate": 5.569444444444445e-05, + "loss": 14.8761, + "step": 401 + }, + { + "epoch": 0.016756283606352382, + "grad_norm": 382.0, + "learning_rate": 5.583333333333334e-05, + "loss": 15.0014, + "step": 402 + }, + { + "epoch": 0.01679796590388062, + "grad_norm": 696.0, + "learning_rate": 5.5972222222222224e-05, + "loss": 23.2518, + "step": 403 + }, + { + "epoch": 0.01683964820140886, + "grad_norm": 474.0, + "learning_rate": 5.6111111111111114e-05, + "loss": 16.376, + "step": 404 + }, + { + "epoch": 0.0168813304989371, + "grad_norm": 364.0, + "learning_rate": 5.6250000000000005e-05, + "loss": 14.9388, + "step": 405 + }, + { + "epoch": 0.016923012796465343, + "grad_norm": 864.0, + "learning_rate": 5.638888888888889e-05, + "loss": 26.252, + "step": 406 + }, + { + "epoch": 0.016964695093993582, + "grad_norm": 388.0, + "learning_rate": 5.652777777777778e-05, + "loss": 15.0022, + "step": 407 + }, + { + "epoch": 0.01700637739152182, + "grad_norm": 418.0, + "learning_rate": 5.666666666666667e-05, + "loss": 16.3761, + "step": 408 + }, + { + "epoch": 0.01704805968905006, + "grad_norm": 350.0, + "learning_rate": 5.6805555555555555e-05, + "loss": 15.2511, + "step": 409 + }, + { + "epoch": 0.0170897419865783, + "grad_norm": 374.0, + "learning_rate": 5.6944444444444445e-05, + "loss": 14.127, + "step": 410 + }, + { + "epoch": 0.01713142428410654, + "grad_norm": 186.0, + "learning_rate": 5.7083333333333336e-05, + "loss": 9.1889, + "step": 411 + }, + { + "epoch": 0.01717310658163478, + "grad_norm": 370.0, + "learning_rate": 5.722222222222222e-05, + "loss": 14.1263, + "step": 412 + }, + { + "epoch": 0.01721478887916302, + "grad_norm": 828.0, + "learning_rate": 5.736111111111111e-05, + "loss": 25.2509, + "step": 413 + }, + { + "epoch": 0.017256471176691258, + "grad_norm": 608.0, + "learning_rate": 5.7499999999999995e-05, + "loss": 19.7516, + "step": 414 + }, + { + "epoch": 0.0172981534742195, + "grad_norm": 368.0, + "learning_rate": 5.7638888888888886e-05, + "loss": 15.1261, + "step": 415 + }, + { + "epoch": 0.01733983577174774, + "grad_norm": 248.0, + "learning_rate": 5.7777777777777776e-05, + "loss": 11.0052, + "step": 416 + }, + { + "epoch": 0.01738151806927598, + "grad_norm": 288.0, + "learning_rate": 5.7916666666666674e-05, + "loss": 11.7511, + "step": 417 + }, + { + "epoch": 0.01742320036680422, + "grad_norm": 468.0, + "learning_rate": 5.8055555555555565e-05, + "loss": 17.5006, + "step": 418 + }, + { + "epoch": 0.017464882664332458, + "grad_norm": 350.0, + "learning_rate": 5.819444444444445e-05, + "loss": 13.4405, + "step": 419 + }, + { + "epoch": 0.017506564961860697, + "grad_norm": 404.0, + "learning_rate": 5.833333333333334e-05, + "loss": 15.4385, + "step": 420 + }, + { + "epoch": 0.017548247259388937, + "grad_norm": 960.0, + "learning_rate": 5.847222222222223e-05, + "loss": 26.632, + "step": 421 + }, + { + "epoch": 0.017589929556917176, + "grad_norm": 468.0, + "learning_rate": 5.8611111111111114e-05, + "loss": 17.751, + "step": 422 + }, + { + "epoch": 0.017631611854445416, + "grad_norm": 696.0, + "learning_rate": 5.8750000000000005e-05, + "loss": 25.752, + "step": 423 + }, + { + "epoch": 0.01767329415197366, + "grad_norm": 276.0, + "learning_rate": 5.8888888888888896e-05, + "loss": 11.564, + "step": 424 + }, + { + "epoch": 0.017714976449501898, + "grad_norm": 504.0, + "learning_rate": 5.902777777777778e-05, + "loss": 15.8804, + "step": 425 + }, + { + "epoch": 0.017756658747030137, + "grad_norm": 464.0, + "learning_rate": 5.916666666666667e-05, + "loss": 14.4402, + "step": 426 + }, + { + "epoch": 0.017798341044558377, + "grad_norm": 684.0, + "learning_rate": 5.930555555555556e-05, + "loss": 22.1258, + "step": 427 + }, + { + "epoch": 0.017840023342086616, + "grad_norm": 462.0, + "learning_rate": 5.9444444444444445e-05, + "loss": 16.3761, + "step": 428 + }, + { + "epoch": 0.017881705639614855, + "grad_norm": 320.0, + "learning_rate": 5.9583333333333336e-05, + "loss": 12.7511, + "step": 429 + }, + { + "epoch": 0.017923387937143095, + "grad_norm": 462.0, + "learning_rate": 5.972222222222223e-05, + "loss": 15.4393, + "step": 430 + }, + { + "epoch": 0.017965070234671334, + "grad_norm": 326.0, + "learning_rate": 5.986111111111111e-05, + "loss": 13.0014, + "step": 431 + }, + { + "epoch": 0.018006752532199573, + "grad_norm": 278.0, + "learning_rate": 6e-05, + "loss": 12.4392, + "step": 432 + }, + { + "epoch": 0.018048434829727816, + "grad_norm": 404.0, + "learning_rate": 6.013888888888889e-05, + "loss": 13.3139, + "step": 433 + }, + { + "epoch": 0.018090117127256056, + "grad_norm": 584.0, + "learning_rate": 6.0277777777777776e-05, + "loss": 19.6259, + "step": 434 + }, + { + "epoch": 0.018131799424784295, + "grad_norm": 328.0, + "learning_rate": 6.041666666666667e-05, + "loss": 12.6272, + "step": 435 + }, + { + "epoch": 0.018173481722312534, + "grad_norm": 142.0, + "learning_rate": 6.055555555555555e-05, + "loss": 8.3776, + "step": 436 + }, + { + "epoch": 0.018215164019840774, + "grad_norm": 600.0, + "learning_rate": 6.069444444444444e-05, + "loss": 20.0029, + "step": 437 + }, + { + "epoch": 0.018256846317369013, + "grad_norm": 560.0, + "learning_rate": 6.083333333333333e-05, + "loss": 19.001, + "step": 438 + }, + { + "epoch": 0.018298528614897253, + "grad_norm": 494.0, + "learning_rate": 6.097222222222223e-05, + "loss": 15.2508, + "step": 439 + }, + { + "epoch": 0.018340210912425492, + "grad_norm": 147.0, + "learning_rate": 6.111111111111112e-05, + "loss": 7.6587, + "step": 440 + }, + { + "epoch": 0.01838189320995373, + "grad_norm": 332.0, + "learning_rate": 6.125000000000001e-05, + "loss": 13.1881, + "step": 441 + }, + { + "epoch": 0.01842357550748197, + "grad_norm": 652.0, + "learning_rate": 6.13888888888889e-05, + "loss": 19.3818, + "step": 442 + }, + { + "epoch": 0.018465257805010214, + "grad_norm": 740.0, + "learning_rate": 6.152777777777778e-05, + "loss": 24.0007, + "step": 443 + }, + { + "epoch": 0.018506940102538453, + "grad_norm": 404.0, + "learning_rate": 6.166666666666667e-05, + "loss": 14.9387, + "step": 444 + }, + { + "epoch": 0.018548622400066692, + "grad_norm": 334.0, + "learning_rate": 6.180555555555556e-05, + "loss": 12.4396, + "step": 445 + }, + { + "epoch": 0.01859030469759493, + "grad_norm": 412.0, + "learning_rate": 6.194444444444445e-05, + "loss": 15.4396, + "step": 446 + }, + { + "epoch": 0.01863198699512317, + "grad_norm": 410.0, + "learning_rate": 6.208333333333334e-05, + "loss": 14.2508, + "step": 447 + }, + { + "epoch": 0.01867366929265141, + "grad_norm": 336.0, + "learning_rate": 6.222222222222222e-05, + "loss": 12.5053, + "step": 448 + }, + { + "epoch": 0.01871535159017965, + "grad_norm": 580.0, + "learning_rate": 6.236111111111111e-05, + "loss": 18.5006, + "step": 449 + }, + { + "epoch": 0.01875703388770789, + "grad_norm": 980.0, + "learning_rate": 6.25e-05, + "loss": 25.8824, + "step": 450 + }, + { + "epoch": 0.01879871618523613, + "grad_norm": 478.0, + "learning_rate": 6.263888888888889e-05, + "loss": 15.3763, + "step": 451 + }, + { + "epoch": 0.01884039848276437, + "grad_norm": 696.0, + "learning_rate": 6.277777777777778e-05, + "loss": 20.8807, + "step": 452 + }, + { + "epoch": 0.01888208078029261, + "grad_norm": 512.0, + "learning_rate": 6.291666666666667e-05, + "loss": 17.7519, + "step": 453 + }, + { + "epoch": 0.01892376307782085, + "grad_norm": 185.0, + "learning_rate": 6.305555555555555e-05, + "loss": 9.4392, + "step": 454 + }, + { + "epoch": 0.01896544537534909, + "grad_norm": 102.0, + "learning_rate": 6.319444444444444e-05, + "loss": 6.7823, + "step": 455 + }, + { + "epoch": 0.01900712767287733, + "grad_norm": 828.0, + "learning_rate": 6.333333333333333e-05, + "loss": 19.1357, + "step": 456 + }, + { + "epoch": 0.01904880997040557, + "grad_norm": 342.0, + "learning_rate": 6.347222222222222e-05, + "loss": 13.9397, + "step": 457 + }, + { + "epoch": 0.019090492267933808, + "grad_norm": 197.0, + "learning_rate": 6.361111111111111e-05, + "loss": 8.8762, + "step": 458 + }, + { + "epoch": 0.019132174565462047, + "grad_norm": 1256.0, + "learning_rate": 6.375e-05, + "loss": 29.8807, + "step": 459 + }, + { + "epoch": 0.019173856862990286, + "grad_norm": 660.0, + "learning_rate": 6.388888888888888e-05, + "loss": 23.3768, + "step": 460 + }, + { + "epoch": 0.01921553916051853, + "grad_norm": 249.0, + "learning_rate": 6.402777777777777e-05, + "loss": 11.0638, + "step": 461 + }, + { + "epoch": 0.01925722145804677, + "grad_norm": 290.0, + "learning_rate": 6.416666666666668e-05, + "loss": 10.9386, + "step": 462 + }, + { + "epoch": 0.019298903755575008, + "grad_norm": 596.0, + "learning_rate": 6.430555555555557e-05, + "loss": 22.1296, + "step": 463 + }, + { + "epoch": 0.019340586053103247, + "grad_norm": 472.0, + "learning_rate": 6.444444444444446e-05, + "loss": 17.0024, + "step": 464 + }, + { + "epoch": 0.019382268350631487, + "grad_norm": 133.0, + "learning_rate": 6.458333333333334e-05, + "loss": 8.4396, + "step": 465 + }, + { + "epoch": 0.019423950648159726, + "grad_norm": 191.0, + "learning_rate": 6.472222222222223e-05, + "loss": 9.6265, + "step": 466 + }, + { + "epoch": 0.019465632945687966, + "grad_norm": 712.0, + "learning_rate": 6.486111111111112e-05, + "loss": 20.7546, + "step": 467 + }, + { + "epoch": 0.019507315243216205, + "grad_norm": 470.0, + "learning_rate": 6.500000000000001e-05, + "loss": 16.1264, + "step": 468 + }, + { + "epoch": 0.019548997540744444, + "grad_norm": 278.0, + "learning_rate": 6.51388888888889e-05, + "loss": 10.4403, + "step": 469 + }, + { + "epoch": 0.019590679838272687, + "grad_norm": 740.0, + "learning_rate": 6.527777777777778e-05, + "loss": 24.2509, + "step": 470 + }, + { + "epoch": 0.019632362135800926, + "grad_norm": 350.0, + "learning_rate": 6.541666666666667e-05, + "loss": 15.0019, + "step": 471 + }, + { + "epoch": 0.019674044433329166, + "grad_norm": 149.0, + "learning_rate": 6.555555555555556e-05, + "loss": 8.3143, + "step": 472 + }, + { + "epoch": 0.019715726730857405, + "grad_norm": 382.0, + "learning_rate": 6.569444444444445e-05, + "loss": 15.5007, + "step": 473 + }, + { + "epoch": 0.019757409028385645, + "grad_norm": 344.0, + "learning_rate": 6.583333333333334e-05, + "loss": 14.6895, + "step": 474 + }, + { + "epoch": 0.019799091325913884, + "grad_norm": 712.0, + "learning_rate": 6.597222222222223e-05, + "loss": 20.3764, + "step": 475 + }, + { + "epoch": 0.019840773623442123, + "grad_norm": 221.0, + "learning_rate": 6.611111111111111e-05, + "loss": 10.3132, + "step": 476 + }, + { + "epoch": 0.019882455920970363, + "grad_norm": 175.0, + "learning_rate": 6.625e-05, + "loss": 8.8771, + "step": 477 + }, + { + "epoch": 0.019924138218498602, + "grad_norm": 544.0, + "learning_rate": 6.638888888888889e-05, + "loss": 20.1266, + "step": 478 + }, + { + "epoch": 0.019965820516026845, + "grad_norm": 358.0, + "learning_rate": 6.652777777777778e-05, + "loss": 9.9387, + "step": 479 + }, + { + "epoch": 0.020007502813555084, + "grad_norm": 478.0, + "learning_rate": 6.666666666666667e-05, + "loss": 16.5014, + "step": 480 + }, + { + "epoch": 0.020049185111083324, + "grad_norm": 292.0, + "learning_rate": 6.680555555555556e-05, + "loss": 11.8145, + "step": 481 + }, + { + "epoch": 0.020090867408611563, + "grad_norm": 414.0, + "learning_rate": 6.694444444444444e-05, + "loss": 14.6884, + "step": 482 + }, + { + "epoch": 0.020132549706139802, + "grad_norm": 648.0, + "learning_rate": 6.708333333333333e-05, + "loss": 20.0029, + "step": 483 + }, + { + "epoch": 0.020174232003668042, + "grad_norm": 392.0, + "learning_rate": 6.722222222222223e-05, + "loss": 14.0636, + "step": 484 + }, + { + "epoch": 0.02021591430119628, + "grad_norm": 868.0, + "learning_rate": 6.736111111111112e-05, + "loss": 23.007, + "step": 485 + }, + { + "epoch": 0.02025759659872452, + "grad_norm": 360.0, + "learning_rate": 6.750000000000001e-05, + "loss": 14.0636, + "step": 486 + }, + { + "epoch": 0.02029927889625276, + "grad_norm": 242.0, + "learning_rate": 6.763888888888889e-05, + "loss": 10.6888, + "step": 487 + }, + { + "epoch": 0.020340961193781003, + "grad_norm": 113.5, + "learning_rate": 6.777777777777778e-05, + "loss": 7.0635, + "step": 488 + }, + { + "epoch": 0.020382643491309242, + "grad_norm": 490.0, + "learning_rate": 6.791666666666667e-05, + "loss": 17.3784, + "step": 489 + }, + { + "epoch": 0.02042432578883748, + "grad_norm": 338.0, + "learning_rate": 6.805555555555556e-05, + "loss": 12.8758, + "step": 490 + }, + { + "epoch": 0.02046600808636572, + "grad_norm": 466.0, + "learning_rate": 6.819444444444445e-05, + "loss": 17.0006, + "step": 491 + }, + { + "epoch": 0.02050769038389396, + "grad_norm": 159.0, + "learning_rate": 6.833333333333333e-05, + "loss": 8.9394, + "step": 492 + }, + { + "epoch": 0.0205493726814222, + "grad_norm": 173.0, + "learning_rate": 6.847222222222222e-05, + "loss": 8.3766, + "step": 493 + }, + { + "epoch": 0.02059105497895044, + "grad_norm": 308.0, + "learning_rate": 6.861111111111111e-05, + "loss": 12.3132, + "step": 494 + }, + { + "epoch": 0.02063273727647868, + "grad_norm": 524.0, + "learning_rate": 6.875e-05, + "loss": 18.5009, + "step": 495 + }, + { + "epoch": 0.020674419574006918, + "grad_norm": 235.0, + "learning_rate": 6.88888888888889e-05, + "loss": 10.19, + "step": 496 + }, + { + "epoch": 0.02071610187153516, + "grad_norm": 338.0, + "learning_rate": 6.902777777777779e-05, + "loss": 14.3773, + "step": 497 + }, + { + "epoch": 0.0207577841690634, + "grad_norm": 1320.0, + "learning_rate": 6.916666666666666e-05, + "loss": 40.5038, + "step": 498 + }, + { + "epoch": 0.02079946646659164, + "grad_norm": 334.0, + "learning_rate": 6.930555555555555e-05, + "loss": 13.8758, + "step": 499 + }, + { + "epoch": 0.02084114876411988, + "grad_norm": 340.0, + "learning_rate": 6.944444444444444e-05, + "loss": 14.0018, + "step": 500 + }, + { + "epoch": 0.020882831061648118, + "grad_norm": 358.0, + "learning_rate": 6.958333333333334e-05, + "loss": 14.1279, + "step": 501 + }, + { + "epoch": 0.020924513359176358, + "grad_norm": 292.0, + "learning_rate": 6.972222222222223e-05, + "loss": 12.2565, + "step": 502 + }, + { + "epoch": 0.020966195656704597, + "grad_norm": 376.0, + "learning_rate": 6.986111111111112e-05, + "loss": 13.0012, + "step": 503 + }, + { + "epoch": 0.021007877954232836, + "grad_norm": 117.0, + "learning_rate": 7e-05, + "loss": 7.4702, + "step": 504 + }, + { + "epoch": 0.021049560251761076, + "grad_norm": 756.0, + "learning_rate": 7.013888888888888e-05, + "loss": 22.8783, + "step": 505 + }, + { + "epoch": 0.02109124254928932, + "grad_norm": 219.0, + "learning_rate": 7.027777777777778e-05, + "loss": 10.1882, + "step": 506 + }, + { + "epoch": 0.021132924846817558, + "grad_norm": 512.0, + "learning_rate": 7.041666666666668e-05, + "loss": 15.3778, + "step": 507 + }, + { + "epoch": 0.021174607144345797, + "grad_norm": 124.0, + "learning_rate": 7.055555555555556e-05, + "loss": 8.0642, + "step": 508 + }, + { + "epoch": 0.021216289441874037, + "grad_norm": 500.0, + "learning_rate": 7.069444444444445e-05, + "loss": 17.0018, + "step": 509 + }, + { + "epoch": 0.021257971739402276, + "grad_norm": 302.0, + "learning_rate": 7.083333333333334e-05, + "loss": 12.7509, + "step": 510 + }, + { + "epoch": 0.021299654036930515, + "grad_norm": 488.0, + "learning_rate": 7.097222222222223e-05, + "loss": 17.5034, + "step": 511 + }, + { + "epoch": 0.021341336334458755, + "grad_norm": 520.0, + "learning_rate": 7.111111111111112e-05, + "loss": 19.6272, + "step": 512 + }, + { + "epoch": 0.021383018631986994, + "grad_norm": 286.0, + "learning_rate": 7.125000000000001e-05, + "loss": 12.3759, + "step": 513 + }, + { + "epoch": 0.021424700929515234, + "grad_norm": 612.0, + "learning_rate": 7.138888888888889e-05, + "loss": 20.626, + "step": 514 + }, + { + "epoch": 0.021466383227043473, + "grad_norm": 322.0, + "learning_rate": 7.152777777777778e-05, + "loss": 13.0032, + "step": 515 + }, + { + "epoch": 0.021508065524571716, + "grad_norm": 470.0, + "learning_rate": 7.166666666666667e-05, + "loss": 16.8779, + "step": 516 + }, + { + "epoch": 0.021549747822099955, + "grad_norm": 512.0, + "learning_rate": 7.180555555555556e-05, + "loss": 17.0019, + "step": 517 + }, + { + "epoch": 0.021591430119628195, + "grad_norm": 348.0, + "learning_rate": 7.194444444444445e-05, + "loss": 15.0633, + "step": 518 + }, + { + "epoch": 0.021633112417156434, + "grad_norm": 1064.0, + "learning_rate": 7.208333333333334e-05, + "loss": 31.3757, + "step": 519 + }, + { + "epoch": 0.021674794714684673, + "grad_norm": 1128.0, + "learning_rate": 7.222222222222222e-05, + "loss": 28.7583, + "step": 520 + }, + { + "epoch": 0.021716477012212913, + "grad_norm": 225.0, + "learning_rate": 7.236111111111111e-05, + "loss": 11.3761, + "step": 521 + }, + { + "epoch": 0.021758159309741152, + "grad_norm": 151.0, + "learning_rate": 7.25e-05, + "loss": 8.4412, + "step": 522 + }, + { + "epoch": 0.02179984160726939, + "grad_norm": 464.0, + "learning_rate": 7.263888888888889e-05, + "loss": 15.1891, + "step": 523 + }, + { + "epoch": 0.02184152390479763, + "grad_norm": 174.0, + "learning_rate": 7.277777777777778e-05, + "loss": 8.9412, + "step": 524 + }, + { + "epoch": 0.021883206202325874, + "grad_norm": 700.0, + "learning_rate": 7.291666666666667e-05, + "loss": 22.7512, + "step": 525 + }, + { + "epoch": 0.021924888499854113, + "grad_norm": 354.0, + "learning_rate": 7.305555555555555e-05, + "loss": 13.1892, + "step": 526 + }, + { + "epoch": 0.021966570797382352, + "grad_norm": 280.0, + "learning_rate": 7.319444444444444e-05, + "loss": 11.1885, + "step": 527 + }, + { + "epoch": 0.02200825309491059, + "grad_norm": 300.0, + "learning_rate": 7.333333333333333e-05, + "loss": 13.3133, + "step": 528 + }, + { + "epoch": 0.02204993539243883, + "grad_norm": 201.0, + "learning_rate": 7.347222222222224e-05, + "loss": 9.5011, + "step": 529 + }, + { + "epoch": 0.02209161768996707, + "grad_norm": 448.0, + "learning_rate": 7.361111111111111e-05, + "loss": 17.5013, + "step": 530 + }, + { + "epoch": 0.02213329998749531, + "grad_norm": 446.0, + "learning_rate": 7.375e-05, + "loss": 15.813, + "step": 531 + }, + { + "epoch": 0.02217498228502355, + "grad_norm": 712.0, + "learning_rate": 7.38888888888889e-05, + "loss": 20.7578, + "step": 532 + }, + { + "epoch": 0.02221666458255179, + "grad_norm": 328.0, + "learning_rate": 7.402777777777779e-05, + "loss": 11.9392, + "step": 533 + }, + { + "epoch": 0.02225834688008003, + "grad_norm": 227.0, + "learning_rate": 7.416666666666668e-05, + "loss": 10.8758, + "step": 534 + }, + { + "epoch": 0.02230002917760827, + "grad_norm": 240.0, + "learning_rate": 7.430555555555557e-05, + "loss": 11.8762, + "step": 535 + }, + { + "epoch": 0.02234171147513651, + "grad_norm": 576.0, + "learning_rate": 7.444444444444444e-05, + "loss": 18.0022, + "step": 536 + }, + { + "epoch": 0.02238339377266475, + "grad_norm": 696.0, + "learning_rate": 7.458333333333333e-05, + "loss": 20.1263, + "step": 537 + }, + { + "epoch": 0.02242507607019299, + "grad_norm": 386.0, + "learning_rate": 7.472222222222223e-05, + "loss": 13.814, + "step": 538 + }, + { + "epoch": 0.02246675836772123, + "grad_norm": 334.0, + "learning_rate": 7.486111111111112e-05, + "loss": 10.8776, + "step": 539 + }, + { + "epoch": 0.022508440665249468, + "grad_norm": 266.0, + "learning_rate": 7.500000000000001e-05, + "loss": 10.6257, + "step": 540 + }, + { + "epoch": 0.022550122962777707, + "grad_norm": 358.0, + "learning_rate": 7.51388888888889e-05, + "loss": 12.0647, + "step": 541 + }, + { + "epoch": 0.022591805260305946, + "grad_norm": 346.0, + "learning_rate": 7.527777777777777e-05, + "loss": 13.6896, + "step": 542 + }, + { + "epoch": 0.02263348755783419, + "grad_norm": 372.0, + "learning_rate": 7.541666666666667e-05, + "loss": 14.8763, + "step": 543 + }, + { + "epoch": 0.02267516985536243, + "grad_norm": 380.0, + "learning_rate": 7.555555555555556e-05, + "loss": 13.5009, + "step": 544 + }, + { + "epoch": 0.022716852152890668, + "grad_norm": 302.0, + "learning_rate": 7.569444444444445e-05, + "loss": 9.688, + "step": 545 + }, + { + "epoch": 0.022758534450418907, + "grad_norm": 382.0, + "learning_rate": 7.583333333333334e-05, + "loss": 14.3131, + "step": 546 + }, + { + "epoch": 0.022800216747947147, + "grad_norm": 436.0, + "learning_rate": 7.597222222222223e-05, + "loss": 12.6943, + "step": 547 + }, + { + "epoch": 0.022841899045475386, + "grad_norm": 1160.0, + "learning_rate": 7.61111111111111e-05, + "loss": 30.0016, + "step": 548 + }, + { + "epoch": 0.022883581343003626, + "grad_norm": 528.0, + "learning_rate": 7.625e-05, + "loss": 16.0058, + "step": 549 + }, + { + "epoch": 0.022925263640531865, + "grad_norm": 436.0, + "learning_rate": 7.638888888888889e-05, + "loss": 15.0634, + "step": 550 + }, + { + "epoch": 0.022966945938060104, + "grad_norm": 404.0, + "learning_rate": 7.652777777777778e-05, + "loss": 15.3143, + "step": 551 + }, + { + "epoch": 0.023008628235588347, + "grad_norm": 260.0, + "learning_rate": 7.666666666666667e-05, + "loss": 11.2511, + "step": 552 + }, + { + "epoch": 0.023050310533116587, + "grad_norm": 264.0, + "learning_rate": 7.680555555555556e-05, + "loss": 10.8138, + "step": 553 + }, + { + "epoch": 0.023091992830644826, + "grad_norm": 137.0, + "learning_rate": 7.694444444444445e-05, + "loss": 8.3136, + "step": 554 + }, + { + "epoch": 0.023133675128173065, + "grad_norm": 510.0, + "learning_rate": 7.708333333333334e-05, + "loss": 15.4389, + "step": 555 + }, + { + "epoch": 0.023175357425701305, + "grad_norm": 548.0, + "learning_rate": 7.722222222222223e-05, + "loss": 14.8196, + "step": 556 + }, + { + "epoch": 0.023217039723229544, + "grad_norm": 516.0, + "learning_rate": 7.736111111111112e-05, + "loss": 16.5012, + "step": 557 + }, + { + "epoch": 0.023258722020757783, + "grad_norm": 936.0, + "learning_rate": 7.75e-05, + "loss": 23.0024, + "step": 558 + }, + { + "epoch": 0.023300404318286023, + "grad_norm": 330.0, + "learning_rate": 7.763888888888889e-05, + "loss": 12.9384, + "step": 559 + }, + { + "epoch": 0.023342086615814262, + "grad_norm": 394.0, + "learning_rate": 7.777777777777778e-05, + "loss": 14.0006, + "step": 560 + }, + { + "epoch": 0.023383768913342505, + "grad_norm": 288.0, + "learning_rate": 7.791666666666667e-05, + "loss": 12.3769, + "step": 561 + }, + { + "epoch": 0.023425451210870744, + "grad_norm": 724.0, + "learning_rate": 7.805555555555556e-05, + "loss": 18.5063, + "step": 562 + }, + { + "epoch": 0.023467133508398984, + "grad_norm": 358.0, + "learning_rate": 7.819444444444445e-05, + "loss": 13.7513, + "step": 563 + }, + { + "epoch": 0.023508815805927223, + "grad_norm": 270.0, + "learning_rate": 7.833333333333333e-05, + "loss": 11.6893, + "step": 564 + }, + { + "epoch": 0.023550498103455463, + "grad_norm": 340.0, + "learning_rate": 7.847222222222222e-05, + "loss": 13.6261, + "step": 565 + }, + { + "epoch": 0.023592180400983702, + "grad_norm": 340.0, + "learning_rate": 7.861111111111111e-05, + "loss": 14.1261, + "step": 566 + }, + { + "epoch": 0.02363386269851194, + "grad_norm": 438.0, + "learning_rate": 7.875e-05, + "loss": 15.3754, + "step": 567 + }, + { + "epoch": 0.02367554499604018, + "grad_norm": 848.0, + "learning_rate": 7.88888888888889e-05, + "loss": 24.3771, + "step": 568 + }, + { + "epoch": 0.02371722729356842, + "grad_norm": 516.0, + "learning_rate": 7.902777777777778e-05, + "loss": 16.0022, + "step": 569 + }, + { + "epoch": 0.023758909591096663, + "grad_norm": 164.0, + "learning_rate": 7.916666666666666e-05, + "loss": 7.5028, + "step": 570 + }, + { + "epoch": 0.023800591888624902, + "grad_norm": 75.0, + "learning_rate": 7.930555555555555e-05, + "loss": 6.4694, + "step": 571 + }, + { + "epoch": 0.02384227418615314, + "grad_norm": 428.0, + "learning_rate": 7.944444444444444e-05, + "loss": 14.6284, + "step": 572 + }, + { + "epoch": 0.02388395648368138, + "grad_norm": 864.0, + "learning_rate": 7.958333333333333e-05, + "loss": 21.631, + "step": 573 + }, + { + "epoch": 0.02392563878120962, + "grad_norm": 1056.0, + "learning_rate": 7.972222222222223e-05, + "loss": 27.8822, + "step": 574 + }, + { + "epoch": 0.02396732107873786, + "grad_norm": 396.0, + "learning_rate": 7.986111111111112e-05, + "loss": 12.0009, + "step": 575 + }, + { + "epoch": 0.0240090033762661, + "grad_norm": 1048.0, + "learning_rate": 8e-05, + "loss": 26.0028, + "step": 576 + }, + { + "epoch": 0.02405068567379434, + "grad_norm": 220.0, + "learning_rate": 8.01388888888889e-05, + "loss": 10.6882, + "step": 577 + }, + { + "epoch": 0.024092367971322578, + "grad_norm": 268.0, + "learning_rate": 8.027777777777779e-05, + "loss": 11.3773, + "step": 578 + }, + { + "epoch": 0.02413405026885082, + "grad_norm": 454.0, + "learning_rate": 8.041666666666668e-05, + "loss": 15.4401, + "step": 579 + }, + { + "epoch": 0.02417573256637906, + "grad_norm": 1088.0, + "learning_rate": 8.055555555555556e-05, + "loss": 28.5076, + "step": 580 + }, + { + "epoch": 0.0242174148639073, + "grad_norm": 243.0, + "learning_rate": 8.069444444444445e-05, + "loss": 9.7525, + "step": 581 + }, + { + "epoch": 0.02425909716143554, + "grad_norm": 276.0, + "learning_rate": 8.083333333333334e-05, + "loss": 12.1272, + "step": 582 + }, + { + "epoch": 0.024300779458963778, + "grad_norm": 382.0, + "learning_rate": 8.097222222222223e-05, + "loss": 13.3832, + "step": 583 + }, + { + "epoch": 0.024342461756492018, + "grad_norm": 564.0, + "learning_rate": 8.111111111111112e-05, + "loss": 18.0014, + "step": 584 + }, + { + "epoch": 0.024384144054020257, + "grad_norm": 284.0, + "learning_rate": 8.125000000000001e-05, + "loss": 12.5022, + "step": 585 + }, + { + "epoch": 0.024425826351548496, + "grad_norm": 568.0, + "learning_rate": 8.138888888888889e-05, + "loss": 19.3811, + "step": 586 + }, + { + "epoch": 0.024467508649076736, + "grad_norm": 438.0, + "learning_rate": 8.152777777777778e-05, + "loss": 16.8755, + "step": 587 + }, + { + "epoch": 0.02450919094660498, + "grad_norm": 184.0, + "learning_rate": 8.166666666666667e-05, + "loss": 9.876, + "step": 588 + }, + { + "epoch": 0.024550873244133218, + "grad_norm": 568.0, + "learning_rate": 8.180555555555556e-05, + "loss": 13.319, + "step": 589 + }, + { + "epoch": 0.024592555541661457, + "grad_norm": 536.0, + "learning_rate": 8.194444444444445e-05, + "loss": 17.3758, + "step": 590 + }, + { + "epoch": 0.024634237839189697, + "grad_norm": 394.0, + "learning_rate": 8.208333333333334e-05, + "loss": 15.001, + "step": 591 + }, + { + "epoch": 0.024675920136717936, + "grad_norm": 544.0, + "learning_rate": 8.222222222222222e-05, + "loss": 19.1259, + "step": 592 + }, + { + "epoch": 0.024717602434246175, + "grad_norm": 306.0, + "learning_rate": 8.236111111111111e-05, + "loss": 13.9392, + "step": 593 + }, + { + "epoch": 0.024759284731774415, + "grad_norm": 240.0, + "learning_rate": 8.25e-05, + "loss": 9.9397, + "step": 594 + }, + { + "epoch": 0.024800967029302654, + "grad_norm": 352.0, + "learning_rate": 8.263888888888889e-05, + "loss": 13.5015, + "step": 595 + }, + { + "epoch": 0.024842649326830894, + "grad_norm": 384.0, + "learning_rate": 8.277777777777778e-05, + "loss": 14.7507, + "step": 596 + }, + { + "epoch": 0.024884331624359133, + "grad_norm": 402.0, + "learning_rate": 8.291666666666667e-05, + "loss": 13.8771, + "step": 597 + }, + { + "epoch": 0.024926013921887376, + "grad_norm": 328.0, + "learning_rate": 8.305555555555556e-05, + "loss": 14.4387, + "step": 598 + }, + { + "epoch": 0.024967696219415615, + "grad_norm": 560.0, + "learning_rate": 8.319444444444445e-05, + "loss": 18.8759, + "step": 599 + }, + { + "epoch": 0.025009378516943855, + "grad_norm": 552.0, + "learning_rate": 8.333333333333334e-05, + "loss": 20.0009, + "step": 600 + }, + { + "epoch": 0.025051060814472094, + "grad_norm": 406.0, + "learning_rate": 8.347222222222223e-05, + "loss": 14.5008, + "step": 601 + }, + { + "epoch": 0.025092743112000333, + "grad_norm": 290.0, + "learning_rate": 8.361111111111111e-05, + "loss": 8.6279, + "step": 602 + }, + { + "epoch": 0.025134425409528573, + "grad_norm": 134.0, + "learning_rate": 8.375e-05, + "loss": 8.5026, + "step": 603 + }, + { + "epoch": 0.025176107707056812, + "grad_norm": 158.0, + "learning_rate": 8.38888888888889e-05, + "loss": 9.3763, + "step": 604 + }, + { + "epoch": 0.02521779000458505, + "grad_norm": 672.0, + "learning_rate": 8.402777777777778e-05, + "loss": 18.752, + "step": 605 + }, + { + "epoch": 0.02525947230211329, + "grad_norm": 284.0, + "learning_rate": 8.416666666666668e-05, + "loss": 12.0025, + "step": 606 + }, + { + "epoch": 0.025301154599641534, + "grad_norm": 210.0, + "learning_rate": 8.430555555555557e-05, + "loss": 10.6885, + "step": 607 + }, + { + "epoch": 0.025342836897169773, + "grad_norm": 564.0, + "learning_rate": 8.444444444444444e-05, + "loss": 17.126, + "step": 608 + }, + { + "epoch": 0.025384519194698012, + "grad_norm": 660.0, + "learning_rate": 8.458333333333333e-05, + "loss": 18.5065, + "step": 609 + }, + { + "epoch": 0.025426201492226252, + "grad_norm": 334.0, + "learning_rate": 8.472222222222222e-05, + "loss": 12.9397, + "step": 610 + }, + { + "epoch": 0.02546788378975449, + "grad_norm": 544.0, + "learning_rate": 8.486111111111112e-05, + "loss": 17.3766, + "step": 611 + }, + { + "epoch": 0.02550956608728273, + "grad_norm": 141.0, + "learning_rate": 8.5e-05, + "loss": 8.2517, + "step": 612 + }, + { + "epoch": 0.02555124838481097, + "grad_norm": 510.0, + "learning_rate": 8.51388888888889e-05, + "loss": 17.1257, + "step": 613 + }, + { + "epoch": 0.02559293068233921, + "grad_norm": 314.0, + "learning_rate": 8.527777777777777e-05, + "loss": 13.3133, + "step": 614 + }, + { + "epoch": 0.02563461297986745, + "grad_norm": 360.0, + "learning_rate": 8.541666666666666e-05, + "loss": 12.0648, + "step": 615 + }, + { + "epoch": 0.02567629527739569, + "grad_norm": 600.0, + "learning_rate": 8.555555555555556e-05, + "loss": 19.5007, + "step": 616 + }, + { + "epoch": 0.02571797757492393, + "grad_norm": 248.0, + "learning_rate": 8.569444444444445e-05, + "loss": 11.0019, + "step": 617 + }, + { + "epoch": 0.02575965987245217, + "grad_norm": 386.0, + "learning_rate": 8.583333333333334e-05, + "loss": 14.063, + "step": 618 + }, + { + "epoch": 0.02580134216998041, + "grad_norm": 156.0, + "learning_rate": 8.597222222222223e-05, + "loss": 9.3151, + "step": 619 + }, + { + "epoch": 0.02584302446750865, + "grad_norm": 124.0, + "learning_rate": 8.611111111111112e-05, + "loss": 7.9702, + "step": 620 + }, + { + "epoch": 0.02588470676503689, + "grad_norm": 478.0, + "learning_rate": 8.625000000000001e-05, + "loss": 16.7516, + "step": 621 + }, + { + "epoch": 0.025926389062565128, + "grad_norm": 532.0, + "learning_rate": 8.63888888888889e-05, + "loss": 16.8758, + "step": 622 + }, + { + "epoch": 0.025968071360093367, + "grad_norm": 370.0, + "learning_rate": 8.652777777777779e-05, + "loss": 14.7508, + "step": 623 + }, + { + "epoch": 0.026009753657621607, + "grad_norm": 220.0, + "learning_rate": 8.666666666666667e-05, + "loss": 10.0032, + "step": 624 + }, + { + "epoch": 0.02605143595514985, + "grad_norm": 440.0, + "learning_rate": 8.680555555555556e-05, + "loss": 16.7514, + "step": 625 + }, + { + "epoch": 0.02609311825267809, + "grad_norm": 302.0, + "learning_rate": 8.694444444444445e-05, + "loss": 12.5652, + "step": 626 + }, + { + "epoch": 0.026134800550206328, + "grad_norm": 210.0, + "learning_rate": 8.708333333333334e-05, + "loss": 7.1896, + "step": 627 + }, + { + "epoch": 0.026176482847734567, + "grad_norm": 612.0, + "learning_rate": 8.722222222222223e-05, + "loss": 20.1299, + "step": 628 + }, + { + "epoch": 0.026218165145262807, + "grad_norm": 430.0, + "learning_rate": 8.736111111111112e-05, + "loss": 14.5636, + "step": 629 + }, + { + "epoch": 0.026259847442791046, + "grad_norm": 274.0, + "learning_rate": 8.75e-05, + "loss": 11.439, + "step": 630 + }, + { + "epoch": 0.026301529740319286, + "grad_norm": 504.0, + "learning_rate": 8.763888888888889e-05, + "loss": 18.1258, + "step": 631 + }, + { + "epoch": 0.026343212037847525, + "grad_norm": 292.0, + "learning_rate": 8.777777777777778e-05, + "loss": 11.5013, + "step": 632 + }, + { + "epoch": 0.026384894335375764, + "grad_norm": 344.0, + "learning_rate": 8.791666666666667e-05, + "loss": 12.6887, + "step": 633 + }, + { + "epoch": 0.026426576632904007, + "grad_norm": 106.0, + "learning_rate": 8.805555555555556e-05, + "loss": 6.5018, + "step": 634 + }, + { + "epoch": 0.026468258930432247, + "grad_norm": 516.0, + "learning_rate": 8.819444444444445e-05, + "loss": 18.3762, + "step": 635 + }, + { + "epoch": 0.026509941227960486, + "grad_norm": 968.0, + "learning_rate": 8.833333333333333e-05, + "loss": 28.1274, + "step": 636 + }, + { + "epoch": 0.026551623525488725, + "grad_norm": 448.0, + "learning_rate": 8.847222222222222e-05, + "loss": 14.5021, + "step": 637 + }, + { + "epoch": 0.026593305823016965, + "grad_norm": 145.0, + "learning_rate": 8.861111111111111e-05, + "loss": 8.8142, + "step": 638 + }, + { + "epoch": 0.026634988120545204, + "grad_norm": 179.0, + "learning_rate": 8.875e-05, + "loss": 8.5013, + "step": 639 + }, + { + "epoch": 0.026676670418073443, + "grad_norm": 680.0, + "learning_rate": 8.888888888888889e-05, + "loss": 20.6267, + "step": 640 + }, + { + "epoch": 0.026718352715601683, + "grad_norm": 446.0, + "learning_rate": 8.902777777777777e-05, + "loss": 16.5019, + "step": 641 + }, + { + "epoch": 0.026760035013129922, + "grad_norm": 406.0, + "learning_rate": 8.916666666666667e-05, + "loss": 14.3757, + "step": 642 + }, + { + "epoch": 0.026801717310658165, + "grad_norm": 452.0, + "learning_rate": 8.930555555555557e-05, + "loss": 16.0006, + "step": 643 + }, + { + "epoch": 0.026843399608186404, + "grad_norm": 466.0, + "learning_rate": 8.944444444444446e-05, + "loss": 15.8758, + "step": 644 + }, + { + "epoch": 0.026885081905714644, + "grad_norm": 215.0, + "learning_rate": 8.958333333333335e-05, + "loss": 10.189, + "step": 645 + }, + { + "epoch": 0.026926764203242883, + "grad_norm": 1072.0, + "learning_rate": 8.972222222222222e-05, + "loss": 29.2557, + "step": 646 + }, + { + "epoch": 0.026968446500771123, + "grad_norm": 426.0, + "learning_rate": 8.986111111111111e-05, + "loss": 14.1885, + "step": 647 + }, + { + "epoch": 0.027010128798299362, + "grad_norm": 556.0, + "learning_rate": 9e-05, + "loss": 17.7512, + "step": 648 + }, + { + "epoch": 0.0270518110958276, + "grad_norm": 72.5, + "learning_rate": 9.01388888888889e-05, + "loss": 5.222, + "step": 649 + }, + { + "epoch": 0.02709349339335584, + "grad_norm": 446.0, + "learning_rate": 9.027777777777779e-05, + "loss": 15.8765, + "step": 650 + }, + { + "epoch": 0.02713517569088408, + "grad_norm": 302.0, + "learning_rate": 9.041666666666668e-05, + "loss": 11.1886, + "step": 651 + }, + { + "epoch": 0.027176857988412323, + "grad_norm": 318.0, + "learning_rate": 9.055555555555556e-05, + "loss": 13.1883, + "step": 652 + }, + { + "epoch": 0.027218540285940562, + "grad_norm": 704.0, + "learning_rate": 9.069444444444445e-05, + "loss": 18.5043, + "step": 653 + }, + { + "epoch": 0.0272602225834688, + "grad_norm": 266.0, + "learning_rate": 9.083333333333334e-05, + "loss": 11.8152, + "step": 654 + }, + { + "epoch": 0.02730190488099704, + "grad_norm": 298.0, + "learning_rate": 9.097222222222223e-05, + "loss": 12.5008, + "step": 655 + }, + { + "epoch": 0.02734358717852528, + "grad_norm": 229.0, + "learning_rate": 9.111111111111112e-05, + "loss": 10.1888, + "step": 656 + }, + { + "epoch": 0.02738526947605352, + "grad_norm": 128.0, + "learning_rate": 9.125e-05, + "loss": 7.972, + "step": 657 + }, + { + "epoch": 0.02742695177358176, + "grad_norm": 362.0, + "learning_rate": 9.138888888888889e-05, + "loss": 14.0667, + "step": 658 + }, + { + "epoch": 0.02746863407111, + "grad_norm": 406.0, + "learning_rate": 9.152777777777778e-05, + "loss": 14.0646, + "step": 659 + }, + { + "epoch": 0.027510316368638238, + "grad_norm": 676.0, + "learning_rate": 9.166666666666667e-05, + "loss": 21.8777, + "step": 660 + }, + { + "epoch": 0.02755199866616648, + "grad_norm": 420.0, + "learning_rate": 9.180555555555556e-05, + "loss": 14.6275, + "step": 661 + }, + { + "epoch": 0.02759368096369472, + "grad_norm": 109.0, + "learning_rate": 9.194444444444445e-05, + "loss": 8.6901, + "step": 662 + }, + { + "epoch": 0.02763536326122296, + "grad_norm": 428.0, + "learning_rate": 9.208333333333333e-05, + "loss": 15.0668, + "step": 663 + }, + { + "epoch": 0.0276770455587512, + "grad_norm": 596.0, + "learning_rate": 9.222222222222223e-05, + "loss": 20.5007, + "step": 664 + }, + { + "epoch": 0.02771872785627944, + "grad_norm": 326.0, + "learning_rate": 9.236111111111112e-05, + "loss": 13.3777, + "step": 665 + }, + { + "epoch": 0.027760410153807678, + "grad_norm": 384.0, + "learning_rate": 9.250000000000001e-05, + "loss": 14.9382, + "step": 666 + }, + { + "epoch": 0.027802092451335917, + "grad_norm": 656.0, + "learning_rate": 9.26388888888889e-05, + "loss": 20.501, + "step": 667 + }, + { + "epoch": 0.027843774748864156, + "grad_norm": 328.0, + "learning_rate": 9.277777777777778e-05, + "loss": 13.8136, + "step": 668 + }, + { + "epoch": 0.027885457046392396, + "grad_norm": 540.0, + "learning_rate": 9.291666666666667e-05, + "loss": 19.2518, + "step": 669 + }, + { + "epoch": 0.02792713934392064, + "grad_norm": 412.0, + "learning_rate": 9.305555555555556e-05, + "loss": 14.9384, + "step": 670 + }, + { + "epoch": 0.027968821641448878, + "grad_norm": 338.0, + "learning_rate": 9.319444444444445e-05, + "loss": 11.7509, + "step": 671 + }, + { + "epoch": 0.028010503938977117, + "grad_norm": 568.0, + "learning_rate": 9.333333333333334e-05, + "loss": 15.8841, + "step": 672 + }, + { + "epoch": 0.028052186236505357, + "grad_norm": 272.0, + "learning_rate": 9.347222222222223e-05, + "loss": 11.6261, + "step": 673 + }, + { + "epoch": 0.028093868534033596, + "grad_norm": 304.0, + "learning_rate": 9.361111111111111e-05, + "loss": 12.6911, + "step": 674 + }, + { + "epoch": 0.028135550831561836, + "grad_norm": 322.0, + "learning_rate": 9.375e-05, + "loss": 13.5632, + "step": 675 + }, + { + "epoch": 0.028177233129090075, + "grad_norm": 378.0, + "learning_rate": 9.388888888888889e-05, + "loss": 14.5025, + "step": 676 + }, + { + "epoch": 0.028218915426618314, + "grad_norm": 510.0, + "learning_rate": 9.402777777777778e-05, + "loss": 17.7532, + "step": 677 + }, + { + "epoch": 0.028260597724146554, + "grad_norm": 482.0, + "learning_rate": 9.416666666666667e-05, + "loss": 17.2516, + "step": 678 + }, + { + "epoch": 0.028302280021674793, + "grad_norm": 800.0, + "learning_rate": 9.430555555555555e-05, + "loss": 24.6265, + "step": 679 + }, + { + "epoch": 0.028343962319203036, + "grad_norm": 728.0, + "learning_rate": 9.444444444444444e-05, + "loss": 22.7545, + "step": 680 + }, + { + "epoch": 0.028385644616731275, + "grad_norm": 616.0, + "learning_rate": 9.458333333333333e-05, + "loss": 19.5023, + "step": 681 + }, + { + "epoch": 0.028427326914259515, + "grad_norm": 404.0, + "learning_rate": 9.472222222222222e-05, + "loss": 13.2513, + "step": 682 + }, + { + "epoch": 0.028469009211787754, + "grad_norm": 672.0, + "learning_rate": 9.486111111111111e-05, + "loss": 21.5008, + "step": 683 + }, + { + "epoch": 0.028510691509315993, + "grad_norm": 127.5, + "learning_rate": 9.5e-05, + "loss": 7.2831, + "step": 684 + }, + { + "epoch": 0.028552373806844233, + "grad_norm": 1192.0, + "learning_rate": 9.513888888888888e-05, + "loss": 32.0055, + "step": 685 + }, + { + "epoch": 0.028594056104372472, + "grad_norm": 195.0, + "learning_rate": 9.527777777777777e-05, + "loss": 8.6261, + "step": 686 + }, + { + "epoch": 0.02863573840190071, + "grad_norm": 306.0, + "learning_rate": 9.541666666666668e-05, + "loss": 13.0637, + "step": 687 + }, + { + "epoch": 0.02867742069942895, + "grad_norm": 576.0, + "learning_rate": 9.555555555555557e-05, + "loss": 16.6327, + "step": 688 + }, + { + "epoch": 0.028719102996957194, + "grad_norm": 330.0, + "learning_rate": 9.569444444444446e-05, + "loss": 13.4384, + "step": 689 + }, + { + "epoch": 0.028760785294485433, + "grad_norm": 724.0, + "learning_rate": 9.583333333333334e-05, + "loss": 22.8765, + "step": 690 + }, + { + "epoch": 0.028802467592013672, + "grad_norm": 324.0, + "learning_rate": 9.597222222222223e-05, + "loss": 14.4389, + "step": 691 + }, + { + "epoch": 0.028844149889541912, + "grad_norm": 688.0, + "learning_rate": 9.611111111111112e-05, + "loss": 22.2514, + "step": 692 + }, + { + "epoch": 0.02888583218707015, + "grad_norm": 498.0, + "learning_rate": 9.625000000000001e-05, + "loss": 15.8134, + "step": 693 + }, + { + "epoch": 0.02892751448459839, + "grad_norm": 448.0, + "learning_rate": 9.63888888888889e-05, + "loss": 15.8765, + "step": 694 + }, + { + "epoch": 0.02896919678212663, + "grad_norm": 390.0, + "learning_rate": 9.652777777777779e-05, + "loss": 14.3755, + "step": 695 + }, + { + "epoch": 0.02901087907965487, + "grad_norm": 410.0, + "learning_rate": 9.666666666666667e-05, + "loss": 15.3755, + "step": 696 + }, + { + "epoch": 0.02905256137718311, + "grad_norm": 388.0, + "learning_rate": 9.680555555555556e-05, + "loss": 15.0006, + "step": 697 + }, + { + "epoch": 0.02909424367471135, + "grad_norm": 470.0, + "learning_rate": 9.694444444444445e-05, + "loss": 16.7509, + "step": 698 + }, + { + "epoch": 0.02913592597223959, + "grad_norm": 484.0, + "learning_rate": 9.708333333333334e-05, + "loss": 17.5025, + "step": 699 + }, + { + "epoch": 0.02917760826976783, + "grad_norm": 388.0, + "learning_rate": 9.722222222222223e-05, + "loss": 14.5013, + "step": 700 + }, + { + "epoch": 0.02921929056729607, + "grad_norm": 412.0, + "learning_rate": 9.736111111111111e-05, + "loss": 13.3132, + "step": 701 + }, + { + "epoch": 0.02926097286482431, + "grad_norm": 466.0, + "learning_rate": 9.75e-05, + "loss": 17.1264, + "step": 702 + }, + { + "epoch": 0.02930265516235255, + "grad_norm": 1272.0, + "learning_rate": 9.763888888888889e-05, + "loss": 29.1321, + "step": 703 + }, + { + "epoch": 0.029344337459880788, + "grad_norm": 404.0, + "learning_rate": 9.777777777777778e-05, + "loss": 13.6267, + "step": 704 + }, + { + "epoch": 0.029386019757409027, + "grad_norm": 239.0, + "learning_rate": 9.791666666666667e-05, + "loss": 11.2509, + "step": 705 + }, + { + "epoch": 0.029427702054937267, + "grad_norm": 1056.0, + "learning_rate": 9.805555555555556e-05, + "loss": 27.3768, + "step": 706 + }, + { + "epoch": 0.02946938435246551, + "grad_norm": 274.0, + "learning_rate": 9.819444444444444e-05, + "loss": 12.0005, + "step": 707 + }, + { + "epoch": 0.02951106664999375, + "grad_norm": 410.0, + "learning_rate": 9.833333333333333e-05, + "loss": 14.5634, + "step": 708 + }, + { + "epoch": 0.029552748947521988, + "grad_norm": 233.0, + "learning_rate": 9.847222222222223e-05, + "loss": 10.626, + "step": 709 + }, + { + "epoch": 0.029594431245050228, + "grad_norm": 816.0, + "learning_rate": 9.861111111111112e-05, + "loss": 20.3805, + "step": 710 + }, + { + "epoch": 0.029636113542578467, + "grad_norm": 306.0, + "learning_rate": 9.875000000000002e-05, + "loss": 12.1894, + "step": 711 + }, + { + "epoch": 0.029677795840106706, + "grad_norm": 668.0, + "learning_rate": 9.888888888888889e-05, + "loss": 20.1256, + "step": 712 + }, + { + "epoch": 0.029719478137634946, + "grad_norm": 396.0, + "learning_rate": 9.902777777777778e-05, + "loss": 14.7511, + "step": 713 + }, + { + "epoch": 0.029761160435163185, + "grad_norm": 226.0, + "learning_rate": 9.916666666666667e-05, + "loss": 11.0036, + "step": 714 + }, + { + "epoch": 0.029802842732691424, + "grad_norm": 416.0, + "learning_rate": 9.930555555555556e-05, + "loss": 15.0012, + "step": 715 + }, + { + "epoch": 0.029844525030219667, + "grad_norm": 231.0, + "learning_rate": 9.944444444444446e-05, + "loss": 11.2522, + "step": 716 + }, + { + "epoch": 0.029886207327747907, + "grad_norm": 486.0, + "learning_rate": 9.958333333333335e-05, + "loss": 16.876, + "step": 717 + }, + { + "epoch": 0.029927889625276146, + "grad_norm": 394.0, + "learning_rate": 9.972222222222222e-05, + "loss": 15.6258, + "step": 718 + }, + { + "epoch": 0.029969571922804385, + "grad_norm": 174.0, + "learning_rate": 9.986111111111111e-05, + "loss": 9.5011, + "step": 719 + }, + { + "epoch": 0.030011254220332625, + "grad_norm": 294.0, + "learning_rate": 0.0001, + "loss": 12.4381, + "step": 720 + }, + { + "epoch": 0.030052936517860864, + "grad_norm": 199.0, + "learning_rate": 9.999999954437278e-05, + "loss": 9.1268, + "step": 721 + }, + { + "epoch": 0.030094618815389104, + "grad_norm": 516.0, + "learning_rate": 9.999999817749113e-05, + "loss": 18.5007, + "step": 722 + }, + { + "epoch": 0.030136301112917343, + "grad_norm": 354.0, + "learning_rate": 9.999999589935508e-05, + "loss": 13.5634, + "step": 723 + }, + { + "epoch": 0.030177983410445582, + "grad_norm": 384.0, + "learning_rate": 9.999999270996466e-05, + "loss": 14.0636, + "step": 724 + }, + { + "epoch": 0.030219665707973825, + "grad_norm": 480.0, + "learning_rate": 9.999998860931994e-05, + "loss": 14.0019, + "step": 725 + }, + { + "epoch": 0.030261348005502064, + "grad_norm": 340.0, + "learning_rate": 9.999998359742098e-05, + "loss": 12.1883, + "step": 726 + }, + { + "epoch": 0.030303030303030304, + "grad_norm": 390.0, + "learning_rate": 9.999997767426788e-05, + "loss": 13.6886, + "step": 727 + }, + { + "epoch": 0.030344712600558543, + "grad_norm": 282.0, + "learning_rate": 9.999997083986076e-05, + "loss": 11.3756, + "step": 728 + }, + { + "epoch": 0.030386394898086783, + "grad_norm": 390.0, + "learning_rate": 9.999996309419972e-05, + "loss": 13.8757, + "step": 729 + }, + { + "epoch": 0.030428077195615022, + "grad_norm": 692.0, + "learning_rate": 9.999995443728493e-05, + "loss": 19.8759, + "step": 730 + }, + { + "epoch": 0.03046975949314326, + "grad_norm": 229.0, + "learning_rate": 9.999994486911652e-05, + "loss": 10.7511, + "step": 731 + }, + { + "epoch": 0.0305114417906715, + "grad_norm": 580.0, + "learning_rate": 9.999993438969468e-05, + "loss": 19.0005, + "step": 732 + }, + { + "epoch": 0.03055312408819974, + "grad_norm": 512.0, + "learning_rate": 9.99999229990196e-05, + "loss": 18.2511, + "step": 733 + }, + { + "epoch": 0.030594806385727983, + "grad_norm": 175.0, + "learning_rate": 9.999991069709147e-05, + "loss": 9.4384, + "step": 734 + }, + { + "epoch": 0.030636488683256222, + "grad_norm": 374.0, + "learning_rate": 9.999989748391054e-05, + "loss": 14.0011, + "step": 735 + }, + { + "epoch": 0.03067817098078446, + "grad_norm": 448.0, + "learning_rate": 9.999988335947703e-05, + "loss": 15.0638, + "step": 736 + }, + { + "epoch": 0.0307198532783127, + "grad_norm": 322.0, + "learning_rate": 9.999986832379122e-05, + "loss": 12.3139, + "step": 737 + }, + { + "epoch": 0.03076153557584094, + "grad_norm": 330.0, + "learning_rate": 9.999985237685336e-05, + "loss": 12.3776, + "step": 738 + }, + { + "epoch": 0.03080321787336918, + "grad_norm": 268.0, + "learning_rate": 9.999983551866377e-05, + "loss": 11.5637, + "step": 739 + }, + { + "epoch": 0.03084490017089742, + "grad_norm": 362.0, + "learning_rate": 9.999981774922271e-05, + "loss": 12.9388, + "step": 740 + }, + { + "epoch": 0.03088658246842566, + "grad_norm": 556.0, + "learning_rate": 9.999979906853056e-05, + "loss": 18.7512, + "step": 741 + }, + { + "epoch": 0.030928264765953898, + "grad_norm": 556.0, + "learning_rate": 9.999977947658763e-05, + "loss": 16.1272, + "step": 742 + }, + { + "epoch": 0.03096994706348214, + "grad_norm": 440.0, + "learning_rate": 9.999975897339427e-05, + "loss": 15.2576, + "step": 743 + }, + { + "epoch": 0.03101162936101038, + "grad_norm": 458.0, + "learning_rate": 9.999973755895087e-05, + "loss": 11.632, + "step": 744 + }, + { + "epoch": 0.03105331165853862, + "grad_norm": 356.0, + "learning_rate": 9.999971523325781e-05, + "loss": 13.0006, + "step": 745 + }, + { + "epoch": 0.03109499395606686, + "grad_norm": 450.0, + "learning_rate": 9.99996919963155e-05, + "loss": 16.6265, + "step": 746 + }, + { + "epoch": 0.0311366762535951, + "grad_norm": 864.0, + "learning_rate": 9.999966784812437e-05, + "loss": 24.3754, + "step": 747 + }, + { + "epoch": 0.031178358551123338, + "grad_norm": 724.0, + "learning_rate": 9.999964278868486e-05, + "loss": 22.5017, + "step": 748 + }, + { + "epoch": 0.031220040848651577, + "grad_norm": 692.0, + "learning_rate": 9.999961681799741e-05, + "loss": 23.2506, + "step": 749 + }, + { + "epoch": 0.031261723146179816, + "grad_norm": 732.0, + "learning_rate": 9.999958993606251e-05, + "loss": 21.8768, + "step": 750 + }, + { + "epoch": 0.031303405443708056, + "grad_norm": 508.0, + "learning_rate": 9.999956214288064e-05, + "loss": 15.5026, + "step": 751 + }, + { + "epoch": 0.031345087741236295, + "grad_norm": 326.0, + "learning_rate": 9.999953343845232e-05, + "loss": 12.3133, + "step": 752 + }, + { + "epoch": 0.031386770038764535, + "grad_norm": 210.0, + "learning_rate": 9.999950382277806e-05, + "loss": 9.6261, + "step": 753 + }, + { + "epoch": 0.031428452336292774, + "grad_norm": 368.0, + "learning_rate": 9.99994732958584e-05, + "loss": 13.0008, + "step": 754 + }, + { + "epoch": 0.03147013463382101, + "grad_norm": 684.0, + "learning_rate": 9.999944185769391e-05, + "loss": 20.5006, + "step": 755 + }, + { + "epoch": 0.03151181693134925, + "grad_norm": 262.0, + "learning_rate": 9.999940950828514e-05, + "loss": 11.6257, + "step": 756 + }, + { + "epoch": 0.0315534992288775, + "grad_norm": 396.0, + "learning_rate": 9.999937624763272e-05, + "loss": 15.3765, + "step": 757 + }, + { + "epoch": 0.03159518152640574, + "grad_norm": 458.0, + "learning_rate": 9.99993420757372e-05, + "loss": 13.7553, + "step": 758 + }, + { + "epoch": 0.03163686382393398, + "grad_norm": 208.0, + "learning_rate": 9.999930699259925e-05, + "loss": 8.6297, + "step": 759 + }, + { + "epoch": 0.03167854612146222, + "grad_norm": 180.0, + "learning_rate": 9.999927099821949e-05, + "loss": 10.0635, + "step": 760 + }, + { + "epoch": 0.03172022841899046, + "grad_norm": 434.0, + "learning_rate": 9.999923409259857e-05, + "loss": 16.0009, + "step": 761 + }, + { + "epoch": 0.031761910716518696, + "grad_norm": 336.0, + "learning_rate": 9.999919627573716e-05, + "loss": 12.0648, + "step": 762 + }, + { + "epoch": 0.031803593014046935, + "grad_norm": 318.0, + "learning_rate": 9.999915754763598e-05, + "loss": 13.3756, + "step": 763 + }, + { + "epoch": 0.031845275311575175, + "grad_norm": 304.0, + "learning_rate": 9.99991179082957e-05, + "loss": 13.8175, + "step": 764 + }, + { + "epoch": 0.031886957609103414, + "grad_norm": 398.0, + "learning_rate": 9.999907735771706e-05, + "loss": 13.5052, + "step": 765 + }, + { + "epoch": 0.03192863990663165, + "grad_norm": 676.0, + "learning_rate": 9.999903589590081e-05, + "loss": 19.379, + "step": 766 + }, + { + "epoch": 0.03197032220415989, + "grad_norm": 468.0, + "learning_rate": 9.999899352284768e-05, + "loss": 14.1886, + "step": 767 + }, + { + "epoch": 0.03201200450168813, + "grad_norm": 792.0, + "learning_rate": 9.999895023855845e-05, + "loss": 23.0009, + "step": 768 + }, + { + "epoch": 0.03205368679921637, + "grad_norm": 640.0, + "learning_rate": 9.999890604303392e-05, + "loss": 21.3758, + "step": 769 + }, + { + "epoch": 0.03209536909674461, + "grad_norm": 418.0, + "learning_rate": 9.999886093627491e-05, + "loss": 14.8757, + "step": 770 + }, + { + "epoch": 0.03213705139427285, + "grad_norm": 580.0, + "learning_rate": 9.999881491828219e-05, + "loss": 15.4394, + "step": 771 + }, + { + "epoch": 0.03217873369180109, + "grad_norm": 161.0, + "learning_rate": 9.999876798905664e-05, + "loss": 9.1889, + "step": 772 + }, + { + "epoch": 0.03222041598932933, + "grad_norm": 780.0, + "learning_rate": 9.99987201485991e-05, + "loss": 23.7507, + "step": 773 + }, + { + "epoch": 0.03226209828685757, + "grad_norm": 290.0, + "learning_rate": 9.999867139691045e-05, + "loss": 12.0658, + "step": 774 + }, + { + "epoch": 0.032303780584385815, + "grad_norm": 296.0, + "learning_rate": 9.999862173399159e-05, + "loss": 12.8139, + "step": 775 + }, + { + "epoch": 0.032345462881914054, + "grad_norm": 370.0, + "learning_rate": 9.99985711598434e-05, + "loss": 14.1255, + "step": 776 + }, + { + "epoch": 0.032387145179442293, + "grad_norm": 350.0, + "learning_rate": 9.99985196744668e-05, + "loss": 13.7512, + "step": 777 + }, + { + "epoch": 0.03242882747697053, + "grad_norm": 502.0, + "learning_rate": 9.999846727786275e-05, + "loss": 17.6266, + "step": 778 + }, + { + "epoch": 0.03247050977449877, + "grad_norm": 584.0, + "learning_rate": 9.99984139700322e-05, + "loss": 21.5016, + "step": 779 + }, + { + "epoch": 0.03251219207202701, + "grad_norm": 414.0, + "learning_rate": 9.99983597509761e-05, + "loss": 15.2512, + "step": 780 + }, + { + "epoch": 0.03255387436955525, + "grad_norm": 460.0, + "learning_rate": 9.999830462069548e-05, + "loss": 14.7552, + "step": 781 + }, + { + "epoch": 0.03259555666708349, + "grad_norm": 454.0, + "learning_rate": 9.999824857919132e-05, + "loss": 15.6891, + "step": 782 + }, + { + "epoch": 0.03263723896461173, + "grad_norm": 588.0, + "learning_rate": 9.999819162646462e-05, + "loss": 21.0011, + "step": 783 + }, + { + "epoch": 0.03267892126213997, + "grad_norm": 368.0, + "learning_rate": 9.999813376251644e-05, + "loss": 14.9381, + "step": 784 + }, + { + "epoch": 0.03272060355966821, + "grad_norm": 364.0, + "learning_rate": 9.999807498734785e-05, + "loss": 13.8758, + "step": 785 + }, + { + "epoch": 0.03276228585719645, + "grad_norm": 438.0, + "learning_rate": 9.99980153009599e-05, + "loss": 12.8765, + "step": 786 + }, + { + "epoch": 0.03280396815472469, + "grad_norm": 672.0, + "learning_rate": 9.999795470335367e-05, + "loss": 20.0038, + "step": 787 + }, + { + "epoch": 0.03284565045225293, + "grad_norm": 244.0, + "learning_rate": 9.999789319453029e-05, + "loss": 11.1256, + "step": 788 + }, + { + "epoch": 0.032887332749781166, + "grad_norm": 364.0, + "learning_rate": 9.999783077449087e-05, + "loss": 14.876, + "step": 789 + }, + { + "epoch": 0.032929015047309405, + "grad_norm": 382.0, + "learning_rate": 9.999776744323654e-05, + "loss": 14.8761, + "step": 790 + }, + { + "epoch": 0.032970697344837645, + "grad_norm": 932.0, + "learning_rate": 9.999770320076845e-05, + "loss": 26.1268, + "step": 791 + }, + { + "epoch": 0.033012379642365884, + "grad_norm": 388.0, + "learning_rate": 9.999763804708779e-05, + "loss": 14.0632, + "step": 792 + }, + { + "epoch": 0.033054061939894124, + "grad_norm": 492.0, + "learning_rate": 9.999757198219575e-05, + "loss": 16.8761, + "step": 793 + }, + { + "epoch": 0.03309574423742237, + "grad_norm": 2256.0, + "learning_rate": 9.99975050060935e-05, + "loss": 50.0052, + "step": 794 + }, + { + "epoch": 0.03313742653495061, + "grad_norm": 231.0, + "learning_rate": 9.999743711878229e-05, + "loss": 12.314, + "step": 795 + }, + { + "epoch": 0.03317910883247885, + "grad_norm": 940.0, + "learning_rate": 9.999736832026337e-05, + "loss": 25.5047, + "step": 796 + }, + { + "epoch": 0.03322079113000709, + "grad_norm": 450.0, + "learning_rate": 9.999729861053795e-05, + "loss": 15.751, + "step": 797 + }, + { + "epoch": 0.03326247342753533, + "grad_norm": 167.0, + "learning_rate": 9.999722798960733e-05, + "loss": 9.3765, + "step": 798 + }, + { + "epoch": 0.03330415572506357, + "grad_norm": 336.0, + "learning_rate": 9.999715645747279e-05, + "loss": 12.0636, + "step": 799 + }, + { + "epoch": 0.033345838022591806, + "grad_norm": 608.0, + "learning_rate": 9.999708401413564e-05, + "loss": 18.0014, + "step": 800 + }, + { + "epoch": 0.033387520320120045, + "grad_norm": 388.0, + "learning_rate": 9.999701065959719e-05, + "loss": 13.5008, + "step": 801 + }, + { + "epoch": 0.033429202617648285, + "grad_norm": 572.0, + "learning_rate": 9.999693639385878e-05, + "loss": 19.8758, + "step": 802 + }, + { + "epoch": 0.033470884915176524, + "grad_norm": 217.0, + "learning_rate": 9.999686121692179e-05, + "loss": 10.6258, + "step": 803 + }, + { + "epoch": 0.033512567212704764, + "grad_norm": 548.0, + "learning_rate": 9.999678512878754e-05, + "loss": 16.501, + "step": 804 + }, + { + "epoch": 0.033554249510233, + "grad_norm": 414.0, + "learning_rate": 9.999670812945745e-05, + "loss": 15.752, + "step": 805 + }, + { + "epoch": 0.03359593180776124, + "grad_norm": 360.0, + "learning_rate": 9.999663021893293e-05, + "loss": 13.6257, + "step": 806 + }, + { + "epoch": 0.03363761410528948, + "grad_norm": 332.0, + "learning_rate": 9.999655139721537e-05, + "loss": 13.1887, + "step": 807 + }, + { + "epoch": 0.03367929640281772, + "grad_norm": 1600.0, + "learning_rate": 9.999647166430623e-05, + "loss": 39.0029, + "step": 808 + }, + { + "epoch": 0.03372097870034596, + "grad_norm": 512.0, + "learning_rate": 9.999639102020695e-05, + "loss": 15.8765, + "step": 809 + }, + { + "epoch": 0.0337626609978742, + "grad_norm": 284.0, + "learning_rate": 9.9996309464919e-05, + "loss": 12.001, + "step": 810 + }, + { + "epoch": 0.03380434329540244, + "grad_norm": 133.0, + "learning_rate": 9.999622699844388e-05, + "loss": 8.4394, + "step": 811 + }, + { + "epoch": 0.033846025592930686, + "grad_norm": 238.0, + "learning_rate": 9.99961436207831e-05, + "loss": 10.3755, + "step": 812 + }, + { + "epoch": 0.033887707890458925, + "grad_norm": 968.0, + "learning_rate": 9.999605933193814e-05, + "loss": 23.7567, + "step": 813 + }, + { + "epoch": 0.033929390187987164, + "grad_norm": 386.0, + "learning_rate": 9.999597413191055e-05, + "loss": 13.3758, + "step": 814 + }, + { + "epoch": 0.033971072485515404, + "grad_norm": 226.0, + "learning_rate": 9.999588802070193e-05, + "loss": 11.0006, + "step": 815 + }, + { + "epoch": 0.03401275478304364, + "grad_norm": 179.0, + "learning_rate": 9.999580099831379e-05, + "loss": 9.4383, + "step": 816 + }, + { + "epoch": 0.03405443708057188, + "grad_norm": 768.0, + "learning_rate": 9.999571306474773e-05, + "loss": 22.6259, + "step": 817 + }, + { + "epoch": 0.03409611937810012, + "grad_norm": 320.0, + "learning_rate": 9.999562422000538e-05, + "loss": 11.1896, + "step": 818 + }, + { + "epoch": 0.03413780167562836, + "grad_norm": 422.0, + "learning_rate": 9.999553446408834e-05, + "loss": 12.8142, + "step": 819 + }, + { + "epoch": 0.0341794839731566, + "grad_norm": 244.0, + "learning_rate": 9.999544379699824e-05, + "loss": 10.6274, + "step": 820 + }, + { + "epoch": 0.03422116627068484, + "grad_norm": 454.0, + "learning_rate": 9.999535221873673e-05, + "loss": 15.3753, + "step": 821 + }, + { + "epoch": 0.03426284856821308, + "grad_norm": 290.0, + "learning_rate": 9.999525972930551e-05, + "loss": 12.6904, + "step": 822 + }, + { + "epoch": 0.03430453086574132, + "grad_norm": 1480.0, + "learning_rate": 9.999516632870621e-05, + "loss": 35.5058, + "step": 823 + }, + { + "epoch": 0.03434621316326956, + "grad_norm": 628.0, + "learning_rate": 9.999507201694058e-05, + "loss": 19.2508, + "step": 824 + }, + { + "epoch": 0.0343878954607978, + "grad_norm": 740.0, + "learning_rate": 9.999497679401033e-05, + "loss": 24.376, + "step": 825 + }, + { + "epoch": 0.03442957775832604, + "grad_norm": 314.0, + "learning_rate": 9.99948806599172e-05, + "loss": 11.5631, + "step": 826 + }, + { + "epoch": 0.034471260055854276, + "grad_norm": 656.0, + "learning_rate": 9.999478361466292e-05, + "loss": 18.0008, + "step": 827 + }, + { + "epoch": 0.034512942353382516, + "grad_norm": 127.5, + "learning_rate": 9.999468565824927e-05, + "loss": 8.1255, + "step": 828 + }, + { + "epoch": 0.034554624650910755, + "grad_norm": 632.0, + "learning_rate": 9.999458679067804e-05, + "loss": 18.2527, + "step": 829 + }, + { + "epoch": 0.034596306948439, + "grad_norm": 217.0, + "learning_rate": 9.999448701195102e-05, + "loss": 10.8755, + "step": 830 + }, + { + "epoch": 0.03463798924596724, + "grad_norm": 404.0, + "learning_rate": 9.999438632207003e-05, + "loss": 16.1255, + "step": 831 + }, + { + "epoch": 0.03467967154349548, + "grad_norm": 386.0, + "learning_rate": 9.999428472103694e-05, + "loss": 12.2511, + "step": 832 + }, + { + "epoch": 0.03472135384102372, + "grad_norm": 324.0, + "learning_rate": 9.999418220885355e-05, + "loss": 9.1261, + "step": 833 + }, + { + "epoch": 0.03476303613855196, + "grad_norm": 67.0, + "learning_rate": 9.999407878552175e-05, + "loss": 7.3448, + "step": 834 + }, + { + "epoch": 0.0348047184360802, + "grad_norm": 844.0, + "learning_rate": 9.999397445104343e-05, + "loss": 23.6261, + "step": 835 + }, + { + "epoch": 0.03484640073360844, + "grad_norm": 470.0, + "learning_rate": 9.999386920542049e-05, + "loss": 16.2512, + "step": 836 + }, + { + "epoch": 0.03488808303113668, + "grad_norm": 1020.0, + "learning_rate": 9.999376304865484e-05, + "loss": 25.506, + "step": 837 + }, + { + "epoch": 0.034929765328664916, + "grad_norm": 302.0, + "learning_rate": 9.999365598074843e-05, + "loss": 12.1897, + "step": 838 + }, + { + "epoch": 0.034971447626193156, + "grad_norm": 736.0, + "learning_rate": 9.99935480017032e-05, + "loss": 23.255, + "step": 839 + }, + { + "epoch": 0.035013129923721395, + "grad_norm": 458.0, + "learning_rate": 9.99934391115211e-05, + "loss": 11.6306, + "step": 840 + }, + { + "epoch": 0.035054812221249634, + "grad_norm": 732.0, + "learning_rate": 9.999332931020415e-05, + "loss": 17.2571, + "step": 841 + }, + { + "epoch": 0.035096494518777874, + "grad_norm": 960.0, + "learning_rate": 9.999321859775435e-05, + "loss": 23.5005, + "step": 842 + }, + { + "epoch": 0.03513817681630611, + "grad_norm": 868.0, + "learning_rate": 9.999310697417368e-05, + "loss": 25.7506, + "step": 843 + }, + { + "epoch": 0.03517985911383435, + "grad_norm": 880.0, + "learning_rate": 9.999299443946422e-05, + "loss": 25.5009, + "step": 844 + }, + { + "epoch": 0.03522154141136259, + "grad_norm": 280.0, + "learning_rate": 9.999288099362799e-05, + "loss": 10.6885, + "step": 845 + }, + { + "epoch": 0.03526322370889083, + "grad_norm": 135.0, + "learning_rate": 9.999276663666705e-05, + "loss": 6.9086, + "step": 846 + }, + { + "epoch": 0.03530490600641907, + "grad_norm": 358.0, + "learning_rate": 9.999265136858352e-05, + "loss": 13.5631, + "step": 847 + }, + { + "epoch": 0.03534658830394732, + "grad_norm": 528.0, + "learning_rate": 9.999253518937945e-05, + "loss": 18.6265, + "step": 848 + }, + { + "epoch": 0.035388270601475556, + "grad_norm": 332.0, + "learning_rate": 9.999241809905702e-05, + "loss": 13.7504, + "step": 849 + }, + { + "epoch": 0.035429952899003796, + "grad_norm": 314.0, + "learning_rate": 9.999230009761832e-05, + "loss": 10.4389, + "step": 850 + }, + { + "epoch": 0.035471635196532035, + "grad_norm": 484.0, + "learning_rate": 9.99921811850655e-05, + "loss": 17.3757, + "step": 851 + }, + { + "epoch": 0.035513317494060274, + "grad_norm": 478.0, + "learning_rate": 9.999206136140076e-05, + "loss": 17.0011, + "step": 852 + }, + { + "epoch": 0.035554999791588514, + "grad_norm": 164.0, + "learning_rate": 9.999194062662627e-05, + "loss": 9.0004, + "step": 853 + }, + { + "epoch": 0.03559668208911675, + "grad_norm": 960.0, + "learning_rate": 9.999181898074421e-05, + "loss": 25.5015, + "step": 854 + }, + { + "epoch": 0.03563836438664499, + "grad_norm": 338.0, + "learning_rate": 9.999169642375681e-05, + "loss": 12.2506, + "step": 855 + }, + { + "epoch": 0.03568004668417323, + "grad_norm": 1672.0, + "learning_rate": 9.99915729556663e-05, + "loss": 42.0003, + "step": 856 + }, + { + "epoch": 0.03572172898170147, + "grad_norm": 250.0, + "learning_rate": 9.999144857647495e-05, + "loss": 12.3758, + "step": 857 + }, + { + "epoch": 0.03576341127922971, + "grad_norm": 462.0, + "learning_rate": 9.999132328618501e-05, + "loss": 16.6257, + "step": 858 + }, + { + "epoch": 0.03580509357675795, + "grad_norm": 430.0, + "learning_rate": 9.999119708479876e-05, + "loss": 16.5003, + "step": 859 + }, + { + "epoch": 0.03584677587428619, + "grad_norm": 560.0, + "learning_rate": 9.99910699723185e-05, + "loss": 18.1279, + "step": 860 + }, + { + "epoch": 0.03588845817181443, + "grad_norm": 1224.0, + "learning_rate": 9.999094194874656e-05, + "loss": 31.6274, + "step": 861 + }, + { + "epoch": 0.03593014046934267, + "grad_norm": 235.0, + "learning_rate": 9.999081301408526e-05, + "loss": 11.6892, + "step": 862 + }, + { + "epoch": 0.03597182276687091, + "grad_norm": 350.0, + "learning_rate": 9.999068316833695e-05, + "loss": 12.7529, + "step": 863 + }, + { + "epoch": 0.03601350506439915, + "grad_norm": 338.0, + "learning_rate": 9.999055241150401e-05, + "loss": 13.1898, + "step": 864 + }, + { + "epoch": 0.036055187361927386, + "grad_norm": 446.0, + "learning_rate": 9.999042074358882e-05, + "loss": 15.376, + "step": 865 + }, + { + "epoch": 0.03609686965945563, + "grad_norm": 167.0, + "learning_rate": 9.999028816459377e-05, + "loss": 8.4379, + "step": 866 + }, + { + "epoch": 0.03613855195698387, + "grad_norm": 288.0, + "learning_rate": 9.999015467452128e-05, + "loss": 12.8773, + "step": 867 + }, + { + "epoch": 0.03618023425451211, + "grad_norm": 454.0, + "learning_rate": 9.999002027337377e-05, + "loss": 18.2532, + "step": 868 + }, + { + "epoch": 0.03622191655204035, + "grad_norm": 157.0, + "learning_rate": 9.998988496115372e-05, + "loss": 8.3767, + "step": 869 + }, + { + "epoch": 0.03626359884956859, + "grad_norm": 334.0, + "learning_rate": 9.998974873786357e-05, + "loss": 13.4382, + "step": 870 + }, + { + "epoch": 0.03630528114709683, + "grad_norm": 444.0, + "learning_rate": 9.998961160350582e-05, + "loss": 14.9379, + "step": 871 + }, + { + "epoch": 0.03634696344462507, + "grad_norm": 2040.0, + "learning_rate": 9.998947355808295e-05, + "loss": 46.0064, + "step": 872 + }, + { + "epoch": 0.03638864574215331, + "grad_norm": 176.0, + "learning_rate": 9.99893346015975e-05, + "loss": 8.5009, + "step": 873 + }, + { + "epoch": 0.03643032803968155, + "grad_norm": 75.5, + "learning_rate": 9.998919473405197e-05, + "loss": 6.3773, + "step": 874 + }, + { + "epoch": 0.03647201033720979, + "grad_norm": 588.0, + "learning_rate": 9.998905395544895e-05, + "loss": 18.1263, + "step": 875 + }, + { + "epoch": 0.036513692634738026, + "grad_norm": 314.0, + "learning_rate": 9.998891226579096e-05, + "loss": 11.1282, + "step": 876 + }, + { + "epoch": 0.036555374932266266, + "grad_norm": 304.0, + "learning_rate": 9.998876966508063e-05, + "loss": 13.6259, + "step": 877 + }, + { + "epoch": 0.036597057229794505, + "grad_norm": 420.0, + "learning_rate": 9.998862615332052e-05, + "loss": 14.501, + "step": 878 + }, + { + "epoch": 0.036638739527322745, + "grad_norm": 418.0, + "learning_rate": 9.998848173051327e-05, + "loss": 13.814, + "step": 879 + }, + { + "epoch": 0.036680421824850984, + "grad_norm": 640.0, + "learning_rate": 9.99883363966615e-05, + "loss": 19.0008, + "step": 880 + }, + { + "epoch": 0.03672210412237922, + "grad_norm": 262.0, + "learning_rate": 9.998819015176786e-05, + "loss": 10.1266, + "step": 881 + }, + { + "epoch": 0.03676378641990746, + "grad_norm": 1448.0, + "learning_rate": 9.998804299583502e-05, + "loss": 33.7555, + "step": 882 + }, + { + "epoch": 0.0368054687174357, + "grad_norm": 580.0, + "learning_rate": 9.998789492886565e-05, + "loss": 17.5041, + "step": 883 + }, + { + "epoch": 0.03684715101496394, + "grad_norm": 416.0, + "learning_rate": 9.998774595086247e-05, + "loss": 14.2525, + "step": 884 + }, + { + "epoch": 0.03688883331249219, + "grad_norm": 312.0, + "learning_rate": 9.998759606182817e-05, + "loss": 12.0008, + "step": 885 + }, + { + "epoch": 0.03693051561002043, + "grad_norm": 358.0, + "learning_rate": 9.99874452617655e-05, + "loss": 13.6904, + "step": 886 + }, + { + "epoch": 0.036972197907548666, + "grad_norm": 980.0, + "learning_rate": 9.998729355067719e-05, + "loss": 22.7557, + "step": 887 + }, + { + "epoch": 0.037013880205076906, + "grad_norm": 488.0, + "learning_rate": 9.998714092856605e-05, + "loss": 17.1271, + "step": 888 + }, + { + "epoch": 0.037055562502605145, + "grad_norm": 294.0, + "learning_rate": 9.998698739543481e-05, + "loss": 13.0632, + "step": 889 + }, + { + "epoch": 0.037097244800133385, + "grad_norm": 107.5, + "learning_rate": 9.998683295128627e-05, + "loss": 8.5631, + "step": 890 + }, + { + "epoch": 0.037138927097661624, + "grad_norm": 225.0, + "learning_rate": 9.99866775961233e-05, + "loss": 11.3136, + "step": 891 + }, + { + "epoch": 0.03718060939518986, + "grad_norm": 596.0, + "learning_rate": 9.998652132994865e-05, + "loss": 18.0006, + "step": 892 + }, + { + "epoch": 0.0372222916927181, + "grad_norm": 386.0, + "learning_rate": 9.998636415276525e-05, + "loss": 8.5636, + "step": 893 + }, + { + "epoch": 0.03726397399024634, + "grad_norm": 180.0, + "learning_rate": 9.99862060645759e-05, + "loss": 9.9392, + "step": 894 + }, + { + "epoch": 0.03730565628777458, + "grad_norm": 378.0, + "learning_rate": 9.998604706538352e-05, + "loss": 14.5011, + "step": 895 + }, + { + "epoch": 0.03734733858530282, + "grad_norm": 426.0, + "learning_rate": 9.9985887155191e-05, + "loss": 14.7508, + "step": 896 + }, + { + "epoch": 0.03738902088283106, + "grad_norm": 472.0, + "learning_rate": 9.998572633400123e-05, + "loss": 17.3772, + "step": 897 + }, + { + "epoch": 0.0374307031803593, + "grad_norm": 221.0, + "learning_rate": 9.998556460181718e-05, + "loss": 10.1887, + "step": 898 + }, + { + "epoch": 0.03747238547788754, + "grad_norm": 596.0, + "learning_rate": 9.998540195864177e-05, + "loss": 16.5007, + "step": 899 + }, + { + "epoch": 0.03751406777541578, + "grad_norm": 454.0, + "learning_rate": 9.998523840447795e-05, + "loss": 13.7517, + "step": 900 + }, + { + "epoch": 0.03755575007294402, + "grad_norm": 852.0, + "learning_rate": 9.998507393932875e-05, + "loss": 23.0018, + "step": 901 + }, + { + "epoch": 0.03759743237047226, + "grad_norm": 472.0, + "learning_rate": 9.998490856319713e-05, + "loss": 17.0005, + "step": 902 + }, + { + "epoch": 0.0376391146680005, + "grad_norm": 338.0, + "learning_rate": 9.998474227608612e-05, + "loss": 14.3758, + "step": 903 + }, + { + "epoch": 0.03768079696552874, + "grad_norm": 292.0, + "learning_rate": 9.998457507799874e-05, + "loss": 12.1884, + "step": 904 + }, + { + "epoch": 0.03772247926305698, + "grad_norm": 356.0, + "learning_rate": 9.998440696893805e-05, + "loss": 12.7504, + "step": 905 + }, + { + "epoch": 0.03776416156058522, + "grad_norm": 245.0, + "learning_rate": 9.99842379489071e-05, + "loss": 10.5017, + "step": 906 + }, + { + "epoch": 0.03780584385811346, + "grad_norm": 324.0, + "learning_rate": 9.998406801790898e-05, + "loss": 13.6258, + "step": 907 + }, + { + "epoch": 0.0378475261556417, + "grad_norm": 212.0, + "learning_rate": 9.998389717594677e-05, + "loss": 10.8761, + "step": 908 + }, + { + "epoch": 0.03788920845316994, + "grad_norm": 258.0, + "learning_rate": 9.998372542302361e-05, + "loss": 9.5011, + "step": 909 + }, + { + "epoch": 0.03793089075069818, + "grad_norm": 436.0, + "learning_rate": 9.998355275914262e-05, + "loss": 15.6256, + "step": 910 + }, + { + "epoch": 0.03797257304822642, + "grad_norm": 372.0, + "learning_rate": 9.998337918430694e-05, + "loss": 13.8766, + "step": 911 + }, + { + "epoch": 0.03801425534575466, + "grad_norm": 764.0, + "learning_rate": 9.998320469851971e-05, + "loss": 21.5054, + "step": 912 + }, + { + "epoch": 0.0380559376432829, + "grad_norm": 227.0, + "learning_rate": 9.998302930178418e-05, + "loss": 11.1265, + "step": 913 + }, + { + "epoch": 0.03809761994081114, + "grad_norm": 330.0, + "learning_rate": 9.998285299410348e-05, + "loss": 13.001, + "step": 914 + }, + { + "epoch": 0.038139302238339376, + "grad_norm": 828.0, + "learning_rate": 9.998267577548085e-05, + "loss": 23.254, + "step": 915 + }, + { + "epoch": 0.038180984535867615, + "grad_norm": 616.0, + "learning_rate": 9.998249764591951e-05, + "loss": 19.2538, + "step": 916 + }, + { + "epoch": 0.038222666833395855, + "grad_norm": 462.0, + "learning_rate": 9.998231860542273e-05, + "loss": 14.6256, + "step": 917 + }, + { + "epoch": 0.038264349130924094, + "grad_norm": 348.0, + "learning_rate": 9.998213865399376e-05, + "loss": 12.5006, + "step": 918 + }, + { + "epoch": 0.03830603142845233, + "grad_norm": 832.0, + "learning_rate": 9.998195779163586e-05, + "loss": 21.6261, + "step": 919 + }, + { + "epoch": 0.03834771372598057, + "grad_norm": 572.0, + "learning_rate": 9.998177601835235e-05, + "loss": 15.0014, + "step": 920 + }, + { + "epoch": 0.03838939602350882, + "grad_norm": 306.0, + "learning_rate": 9.998159333414652e-05, + "loss": 12.6893, + "step": 921 + }, + { + "epoch": 0.03843107832103706, + "grad_norm": 470.0, + "learning_rate": 9.998140973902173e-05, + "loss": 16.1254, + "step": 922 + }, + { + "epoch": 0.0384727606185653, + "grad_norm": 318.0, + "learning_rate": 9.99812252329813e-05, + "loss": 10.3154, + "step": 923 + }, + { + "epoch": 0.03851444291609354, + "grad_norm": 628.0, + "learning_rate": 9.998103981602862e-05, + "loss": 20.3757, + "step": 924 + }, + { + "epoch": 0.03855612521362178, + "grad_norm": 452.0, + "learning_rate": 9.998085348816704e-05, + "loss": 11.814, + "step": 925 + }, + { + "epoch": 0.038597807511150016, + "grad_norm": 454.0, + "learning_rate": 9.998066624939997e-05, + "loss": 15.0011, + "step": 926 + }, + { + "epoch": 0.038639489808678255, + "grad_norm": 1208.0, + "learning_rate": 9.998047809973081e-05, + "loss": 26.2559, + "step": 927 + }, + { + "epoch": 0.038681172106206495, + "grad_norm": 632.0, + "learning_rate": 9.998028903916302e-05, + "loss": 18.8786, + "step": 928 + }, + { + "epoch": 0.038722854403734734, + "grad_norm": 203.0, + "learning_rate": 9.998009906770002e-05, + "loss": 10.1881, + "step": 929 + }, + { + "epoch": 0.038764536701262974, + "grad_norm": 564.0, + "learning_rate": 9.997990818534527e-05, + "loss": 17.2507, + "step": 930 + }, + { + "epoch": 0.03880621899879121, + "grad_norm": 270.0, + "learning_rate": 9.997971639210227e-05, + "loss": 12.9397, + "step": 931 + }, + { + "epoch": 0.03884790129631945, + "grad_norm": 416.0, + "learning_rate": 9.997952368797448e-05, + "loss": 12.5006, + "step": 932 + }, + { + "epoch": 0.03888958359384769, + "grad_norm": 608.0, + "learning_rate": 9.997933007296545e-05, + "loss": 18.2514, + "step": 933 + }, + { + "epoch": 0.03893126589137593, + "grad_norm": 338.0, + "learning_rate": 9.99791355470787e-05, + "loss": 14.3764, + "step": 934 + }, + { + "epoch": 0.03897294818890417, + "grad_norm": 456.0, + "learning_rate": 9.997894011031774e-05, + "loss": 16.7517, + "step": 935 + }, + { + "epoch": 0.03901463048643241, + "grad_norm": 318.0, + "learning_rate": 9.997874376268619e-05, + "loss": 13.5633, + "step": 936 + }, + { + "epoch": 0.03905631278396065, + "grad_norm": 316.0, + "learning_rate": 9.997854650418758e-05, + "loss": 13.4384, + "step": 937 + }, + { + "epoch": 0.03909799508148889, + "grad_norm": 229.0, + "learning_rate": 9.997834833482553e-05, + "loss": 10.5628, + "step": 938 + }, + { + "epoch": 0.039139677379017135, + "grad_norm": 480.0, + "learning_rate": 9.997814925460364e-05, + "loss": 17.1255, + "step": 939 + }, + { + "epoch": 0.039181359676545374, + "grad_norm": 202.0, + "learning_rate": 9.997794926352555e-05, + "loss": 9.8754, + "step": 940 + }, + { + "epoch": 0.039223041974073614, + "grad_norm": 904.0, + "learning_rate": 9.997774836159488e-05, + "loss": 27.376, + "step": 941 + }, + { + "epoch": 0.03926472427160185, + "grad_norm": 298.0, + "learning_rate": 9.997754654881533e-05, + "loss": 12.3758, + "step": 942 + }, + { + "epoch": 0.03930640656913009, + "grad_norm": 900.0, + "learning_rate": 9.997734382519055e-05, + "loss": 24.3757, + "step": 943 + }, + { + "epoch": 0.03934808886665833, + "grad_norm": 796.0, + "learning_rate": 9.997714019072425e-05, + "loss": 19.506, + "step": 944 + }, + { + "epoch": 0.03938977116418657, + "grad_norm": 123.0, + "learning_rate": 9.997693564542012e-05, + "loss": 7.0963, + "step": 945 + }, + { + "epoch": 0.03943145346171481, + "grad_norm": 192.0, + "learning_rate": 9.99767301892819e-05, + "loss": 10.3131, + "step": 946 + }, + { + "epoch": 0.03947313575924305, + "grad_norm": 478.0, + "learning_rate": 9.997652382231334e-05, + "loss": 15.9384, + "step": 947 + }, + { + "epoch": 0.03951481805677129, + "grad_norm": 482.0, + "learning_rate": 9.997631654451821e-05, + "loss": 14.8132, + "step": 948 + }, + { + "epoch": 0.03955650035429953, + "grad_norm": 470.0, + "learning_rate": 9.997610835590027e-05, + "loss": 16.5008, + "step": 949 + }, + { + "epoch": 0.03959818265182777, + "grad_norm": 320.0, + "learning_rate": 9.997589925646331e-05, + "loss": 14.2507, + "step": 950 + }, + { + "epoch": 0.03963986494935601, + "grad_norm": 620.0, + "learning_rate": 9.997568924621116e-05, + "loss": 18.7543, + "step": 951 + }, + { + "epoch": 0.03968154724688425, + "grad_norm": 422.0, + "learning_rate": 9.997547832514762e-05, + "loss": 14.4384, + "step": 952 + }, + { + "epoch": 0.039723229544412486, + "grad_norm": 344.0, + "learning_rate": 9.997526649327656e-05, + "loss": 14.1881, + "step": 953 + }, + { + "epoch": 0.039764911841940725, + "grad_norm": 233.0, + "learning_rate": 9.997505375060183e-05, + "loss": 10.3145, + "step": 954 + }, + { + "epoch": 0.039806594139468965, + "grad_norm": 418.0, + "learning_rate": 9.997484009712732e-05, + "loss": 15.2524, + "step": 955 + }, + { + "epoch": 0.039848276436997204, + "grad_norm": 420.0, + "learning_rate": 9.99746255328569e-05, + "loss": 13.8129, + "step": 956 + }, + { + "epoch": 0.039889958734525444, + "grad_norm": 316.0, + "learning_rate": 9.997441005779451e-05, + "loss": 12.9382, + "step": 957 + }, + { + "epoch": 0.03993164103205369, + "grad_norm": 648.0, + "learning_rate": 9.997419367194404e-05, + "loss": 20.5028, + "step": 958 + }, + { + "epoch": 0.03997332332958193, + "grad_norm": 744.0, + "learning_rate": 9.997397637530946e-05, + "loss": 20.7527, + "step": 959 + }, + { + "epoch": 0.04001500562711017, + "grad_norm": 344.0, + "learning_rate": 9.997375816789474e-05, + "loss": 12.3763, + "step": 960 + }, + { + "epoch": 0.04005668792463841, + "grad_norm": 172.0, + "learning_rate": 9.997353904970381e-05, + "loss": 10.2515, + "step": 961 + }, + { + "epoch": 0.04009837022216665, + "grad_norm": 1048.0, + "learning_rate": 9.997331902074072e-05, + "loss": 29.6254, + "step": 962 + }, + { + "epoch": 0.04014005251969489, + "grad_norm": 272.0, + "learning_rate": 9.997309808100946e-05, + "loss": 11.1908, + "step": 963 + }, + { + "epoch": 0.040181734817223126, + "grad_norm": 692.0, + "learning_rate": 9.997287623051403e-05, + "loss": 20.3759, + "step": 964 + }, + { + "epoch": 0.040223417114751366, + "grad_norm": 556.0, + "learning_rate": 9.99726534692585e-05, + "loss": 18.3762, + "step": 965 + }, + { + "epoch": 0.040265099412279605, + "grad_norm": 364.0, + "learning_rate": 9.997242979724693e-05, + "loss": 13.938, + "step": 966 + }, + { + "epoch": 0.040306781709807844, + "grad_norm": 432.0, + "learning_rate": 9.997220521448338e-05, + "loss": 15.8128, + "step": 967 + }, + { + "epoch": 0.040348464007336084, + "grad_norm": 180.0, + "learning_rate": 9.997197972097196e-05, + "loss": 9.7509, + "step": 968 + }, + { + "epoch": 0.04039014630486432, + "grad_norm": 245.0, + "learning_rate": 9.997175331671678e-05, + "loss": 11.3753, + "step": 969 + }, + { + "epoch": 0.04043182860239256, + "grad_norm": 500.0, + "learning_rate": 9.997152600172195e-05, + "loss": 16.5009, + "step": 970 + }, + { + "epoch": 0.0404735108999208, + "grad_norm": 398.0, + "learning_rate": 9.997129777599163e-05, + "loss": 15.626, + "step": 971 + }, + { + "epoch": 0.04051519319744904, + "grad_norm": 828.0, + "learning_rate": 9.997106863952997e-05, + "loss": 21.0057, + "step": 972 + }, + { + "epoch": 0.04055687549497728, + "grad_norm": 1296.0, + "learning_rate": 9.997083859234115e-05, + "loss": 29.1304, + "step": 973 + }, + { + "epoch": 0.04059855779250552, + "grad_norm": 197.0, + "learning_rate": 9.997060763442937e-05, + "loss": 10.252, + "step": 974 + }, + { + "epoch": 0.04064024009003376, + "grad_norm": 402.0, + "learning_rate": 9.99703757657988e-05, + "loss": 15.0011, + "step": 975 + }, + { + "epoch": 0.040681922387562006, + "grad_norm": 470.0, + "learning_rate": 9.997014298645371e-05, + "loss": 15.0666, + "step": 976 + }, + { + "epoch": 0.040723604685090245, + "grad_norm": 258.0, + "learning_rate": 9.996990929639834e-05, + "loss": 12.3758, + "step": 977 + }, + { + "epoch": 0.040765286982618484, + "grad_norm": 230.0, + "learning_rate": 9.996967469563692e-05, + "loss": 11.6254, + "step": 978 + }, + { + "epoch": 0.040806969280146724, + "grad_norm": 400.0, + "learning_rate": 9.996943918417376e-05, + "loss": 12.2514, + "step": 979 + }, + { + "epoch": 0.04084865157767496, + "grad_norm": 420.0, + "learning_rate": 9.996920276201312e-05, + "loss": 16.0019, + "step": 980 + }, + { + "epoch": 0.0408903338752032, + "grad_norm": 392.0, + "learning_rate": 9.996896542915932e-05, + "loss": 14.8757, + "step": 981 + }, + { + "epoch": 0.04093201617273144, + "grad_norm": 462.0, + "learning_rate": 9.996872718561671e-05, + "loss": 16.2516, + "step": 982 + }, + { + "epoch": 0.04097369847025968, + "grad_norm": 336.0, + "learning_rate": 9.996848803138961e-05, + "loss": 12.8767, + "step": 983 + }, + { + "epoch": 0.04101538076778792, + "grad_norm": 408.0, + "learning_rate": 9.996824796648236e-05, + "loss": 15.1884, + "step": 984 + }, + { + "epoch": 0.04105706306531616, + "grad_norm": 528.0, + "learning_rate": 9.996800699089937e-05, + "loss": 16.7518, + "step": 985 + }, + { + "epoch": 0.0410987453628444, + "grad_norm": 462.0, + "learning_rate": 9.9967765104645e-05, + "loss": 16.2506, + "step": 986 + }, + { + "epoch": 0.04114042766037264, + "grad_norm": 436.0, + "learning_rate": 9.99675223077237e-05, + "loss": 15.877, + "step": 987 + }, + { + "epoch": 0.04118210995790088, + "grad_norm": 872.0, + "learning_rate": 9.996727860013985e-05, + "loss": 21.7509, + "step": 988 + }, + { + "epoch": 0.04122379225542912, + "grad_norm": 876.0, + "learning_rate": 9.996703398189792e-05, + "loss": 25.1263, + "step": 989 + }, + { + "epoch": 0.04126547455295736, + "grad_norm": 237.0, + "learning_rate": 9.996678845300236e-05, + "loss": 10.1264, + "step": 990 + }, + { + "epoch": 0.041307156850485596, + "grad_norm": 1288.0, + "learning_rate": 9.996654201345765e-05, + "loss": 29.6307, + "step": 991 + }, + { + "epoch": 0.041348839148013836, + "grad_norm": 596.0, + "learning_rate": 9.996629466326826e-05, + "loss": 17.6274, + "step": 992 + }, + { + "epoch": 0.041390521445542075, + "grad_norm": 142.0, + "learning_rate": 9.996604640243872e-05, + "loss": 9.5631, + "step": 993 + }, + { + "epoch": 0.04143220374307032, + "grad_norm": 608.0, + "learning_rate": 9.996579723097356e-05, + "loss": 17.6262, + "step": 994 + }, + { + "epoch": 0.04147388604059856, + "grad_norm": 380.0, + "learning_rate": 9.99655471488773e-05, + "loss": 14.5007, + "step": 995 + }, + { + "epoch": 0.0415155683381268, + "grad_norm": 482.0, + "learning_rate": 9.996529615615451e-05, + "loss": 16.8762, + "step": 996 + }, + { + "epoch": 0.04155725063565504, + "grad_norm": 668.0, + "learning_rate": 9.996504425280977e-05, + "loss": 21.626, + "step": 997 + }, + { + "epoch": 0.04159893293318328, + "grad_norm": 238.0, + "learning_rate": 9.996479143884765e-05, + "loss": 11.188, + "step": 998 + }, + { + "epoch": 0.04164061523071152, + "grad_norm": 226.0, + "learning_rate": 9.996453771427276e-05, + "loss": 10.8774, + "step": 999 + }, + { + "epoch": 0.04168229752823976, + "grad_norm": 145.0, + "learning_rate": 9.996428307908976e-05, + "loss": 8.2511, + "step": 1000 + }, + { + "epoch": 0.041723979825768, + "grad_norm": 584.0, + "learning_rate": 9.996402753330325e-05, + "loss": 18.5011, + "step": 1001 + }, + { + "epoch": 0.041765662123296236, + "grad_norm": 211.0, + "learning_rate": 9.996377107691792e-05, + "loss": 10.1261, + "step": 1002 + }, + { + "epoch": 0.041807344420824476, + "grad_norm": 420.0, + "learning_rate": 9.996351370993842e-05, + "loss": 15.4381, + "step": 1003 + }, + { + "epoch": 0.041849026718352715, + "grad_norm": 330.0, + "learning_rate": 9.996325543236943e-05, + "loss": 13.4379, + "step": 1004 + }, + { + "epoch": 0.041890709015880954, + "grad_norm": 424.0, + "learning_rate": 9.996299624421569e-05, + "loss": 16.6264, + "step": 1005 + }, + { + "epoch": 0.041932391313409194, + "grad_norm": 174.0, + "learning_rate": 9.996273614548191e-05, + "loss": 9.6882, + "step": 1006 + }, + { + "epoch": 0.04197407361093743, + "grad_norm": 330.0, + "learning_rate": 9.996247513617281e-05, + "loss": 12.565, + "step": 1007 + }, + { + "epoch": 0.04201575590846567, + "grad_norm": 506.0, + "learning_rate": 9.996221321629319e-05, + "loss": 15.6882, + "step": 1008 + }, + { + "epoch": 0.04205743820599391, + "grad_norm": 340.0, + "learning_rate": 9.996195038584779e-05, + "loss": 13.9389, + "step": 1009 + }, + { + "epoch": 0.04209912050352215, + "grad_norm": 612.0, + "learning_rate": 9.99616866448414e-05, + "loss": 17.7544, + "step": 1010 + }, + { + "epoch": 0.04214080280105039, + "grad_norm": 320.0, + "learning_rate": 9.996142199327885e-05, + "loss": 12.5006, + "step": 1011 + }, + { + "epoch": 0.04218248509857864, + "grad_norm": 286.0, + "learning_rate": 9.996115643116494e-05, + "loss": 11.6882, + "step": 1012 + }, + { + "epoch": 0.042224167396106876, + "grad_norm": 346.0, + "learning_rate": 9.996088995850453e-05, + "loss": 13.4386, + "step": 1013 + }, + { + "epoch": 0.042265849693635116, + "grad_norm": 796.0, + "learning_rate": 9.996062257530243e-05, + "loss": 26.0005, + "step": 1014 + }, + { + "epoch": 0.042307531991163355, + "grad_norm": 720.0, + "learning_rate": 9.996035428156358e-05, + "loss": 20.3755, + "step": 1015 + }, + { + "epoch": 0.042349214288691595, + "grad_norm": 668.0, + "learning_rate": 9.996008507729284e-05, + "loss": 21.8756, + "step": 1016 + }, + { + "epoch": 0.042390896586219834, + "grad_norm": 326.0, + "learning_rate": 9.995981496249511e-05, + "loss": 13.6882, + "step": 1017 + }, + { + "epoch": 0.04243257888374807, + "grad_norm": 520.0, + "learning_rate": 9.99595439371753e-05, + "loss": 16.0022, + "step": 1018 + }, + { + "epoch": 0.04247426118127631, + "grad_norm": 432.0, + "learning_rate": 9.995927200133839e-05, + "loss": 15.1301, + "step": 1019 + }, + { + "epoch": 0.04251594347880455, + "grad_norm": 784.0, + "learning_rate": 9.99589991549893e-05, + "loss": 22.3757, + "step": 1020 + }, + { + "epoch": 0.04255762577633279, + "grad_norm": 800.0, + "learning_rate": 9.995872539813302e-05, + "loss": 24.2507, + "step": 1021 + }, + { + "epoch": 0.04259930807386103, + "grad_norm": 556.0, + "learning_rate": 9.995845073077452e-05, + "loss": 18.0006, + "step": 1022 + }, + { + "epoch": 0.04264099037138927, + "grad_norm": 274.0, + "learning_rate": 9.995817515291884e-05, + "loss": 12.1267, + "step": 1023 + }, + { + "epoch": 0.04268267266891751, + "grad_norm": 992.0, + "learning_rate": 9.995789866457099e-05, + "loss": 24.8808, + "step": 1024 + }, + { + "epoch": 0.04272435496644575, + "grad_norm": 312.0, + "learning_rate": 9.995762126573598e-05, + "loss": 12.813, + "step": 1025 + }, + { + "epoch": 0.04276603726397399, + "grad_norm": 318.0, + "learning_rate": 9.99573429564189e-05, + "loss": 13.9378, + "step": 1026 + }, + { + "epoch": 0.04280771956150223, + "grad_norm": 176.0, + "learning_rate": 9.99570637366248e-05, + "loss": 9.3765, + "step": 1027 + }, + { + "epoch": 0.04284940185903047, + "grad_norm": 346.0, + "learning_rate": 9.995678360635879e-05, + "loss": 13.1267, + "step": 1028 + }, + { + "epoch": 0.042891084156558706, + "grad_norm": 636.0, + "learning_rate": 9.995650256562596e-05, + "loss": 19.3753, + "step": 1029 + }, + { + "epoch": 0.042932766454086946, + "grad_norm": 286.0, + "learning_rate": 9.995622061443143e-05, + "loss": 12.6259, + "step": 1030 + }, + { + "epoch": 0.04297444875161519, + "grad_norm": 266.0, + "learning_rate": 9.995593775278034e-05, + "loss": 11.1882, + "step": 1031 + }, + { + "epoch": 0.04301613104914343, + "grad_norm": 430.0, + "learning_rate": 9.995565398067785e-05, + "loss": 15.751, + "step": 1032 + }, + { + "epoch": 0.04305781334667167, + "grad_norm": 242.0, + "learning_rate": 9.995536929812915e-05, + "loss": 10.0629, + "step": 1033 + }, + { + "epoch": 0.04309949564419991, + "grad_norm": 328.0, + "learning_rate": 9.995508370513939e-05, + "loss": 8.881, + "step": 1034 + }, + { + "epoch": 0.04314117794172815, + "grad_norm": 167.0, + "learning_rate": 9.995479720171381e-05, + "loss": 8.6886, + "step": 1035 + }, + { + "epoch": 0.04318286023925639, + "grad_norm": 366.0, + "learning_rate": 9.995450978785762e-05, + "loss": 15.44, + "step": 1036 + }, + { + "epoch": 0.04322454253678463, + "grad_norm": 716.0, + "learning_rate": 9.995422146357605e-05, + "loss": 20.3754, + "step": 1037 + }, + { + "epoch": 0.04326622483431287, + "grad_norm": 728.0, + "learning_rate": 9.995393222887435e-05, + "loss": 20.2509, + "step": 1038 + }, + { + "epoch": 0.04330790713184111, + "grad_norm": 334.0, + "learning_rate": 9.995364208375781e-05, + "loss": 13.6878, + "step": 1039 + }, + { + "epoch": 0.043349589429369347, + "grad_norm": 552.0, + "learning_rate": 9.99533510282317e-05, + "loss": 18.0021, + "step": 1040 + }, + { + "epoch": 0.043391271726897586, + "grad_norm": 206.0, + "learning_rate": 9.995305906230134e-05, + "loss": 10.8758, + "step": 1041 + }, + { + "epoch": 0.043432954024425825, + "grad_norm": 201.0, + "learning_rate": 9.995276618597203e-05, + "loss": 10.5634, + "step": 1042 + }, + { + "epoch": 0.043474636321954065, + "grad_norm": 324.0, + "learning_rate": 9.995247239924915e-05, + "loss": 13.0005, + "step": 1043 + }, + { + "epoch": 0.043516318619482304, + "grad_norm": 211.0, + "learning_rate": 9.9952177702138e-05, + "loss": 9.8137, + "step": 1044 + }, + { + "epoch": 0.04355800091701054, + "grad_norm": 304.0, + "learning_rate": 9.995188209464398e-05, + "loss": 12.313, + "step": 1045 + }, + { + "epoch": 0.04359968321453878, + "grad_norm": 346.0, + "learning_rate": 9.995158557677249e-05, + "loss": 13.7512, + "step": 1046 + }, + { + "epoch": 0.04364136551206702, + "grad_norm": 164.0, + "learning_rate": 9.99512881485289e-05, + "loss": 9.1256, + "step": 1047 + }, + { + "epoch": 0.04368304780959526, + "grad_norm": 924.0, + "learning_rate": 9.995098980991866e-05, + "loss": 26.126, + "step": 1048 + }, + { + "epoch": 0.04372473010712351, + "grad_norm": 218.0, + "learning_rate": 9.995069056094719e-05, + "loss": 10.0647, + "step": 1049 + }, + { + "epoch": 0.04376641240465175, + "grad_norm": 600.0, + "learning_rate": 9.995039040161997e-05, + "loss": 19.7514, + "step": 1050 + }, + { + "epoch": 0.04380809470217999, + "grad_norm": 256.0, + "learning_rate": 9.995008933194243e-05, + "loss": 11.4387, + "step": 1051 + }, + { + "epoch": 0.043849776999708226, + "grad_norm": 286.0, + "learning_rate": 9.994978735192009e-05, + "loss": 12.1268, + "step": 1052 + }, + { + "epoch": 0.043891459297236465, + "grad_norm": 96.0, + "learning_rate": 9.994948446155842e-05, + "loss": 6.6886, + "step": 1053 + }, + { + "epoch": 0.043933141594764705, + "grad_norm": 472.0, + "learning_rate": 9.994918066086298e-05, + "loss": 15.628, + "step": 1054 + }, + { + "epoch": 0.043974823892292944, + "grad_norm": 219.0, + "learning_rate": 9.994887594983929e-05, + "loss": 9.6261, + "step": 1055 + }, + { + "epoch": 0.04401650618982118, + "grad_norm": 1144.0, + "learning_rate": 9.99485703284929e-05, + "loss": 31.3754, + "step": 1056 + }, + { + "epoch": 0.04405818848734942, + "grad_norm": 520.0, + "learning_rate": 9.994826379682938e-05, + "loss": 14.6323, + "step": 1057 + }, + { + "epoch": 0.04409987078487766, + "grad_norm": 498.0, + "learning_rate": 9.99479563548543e-05, + "loss": 17.5026, + "step": 1058 + }, + { + "epoch": 0.0441415530824059, + "grad_norm": 264.0, + "learning_rate": 9.99476480025733e-05, + "loss": 13.2519, + "step": 1059 + }, + { + "epoch": 0.04418323537993414, + "grad_norm": 382.0, + "learning_rate": 9.994733873999199e-05, + "loss": 15.3142, + "step": 1060 + }, + { + "epoch": 0.04422491767746238, + "grad_norm": 326.0, + "learning_rate": 9.994702856711597e-05, + "loss": 13.5013, + "step": 1061 + }, + { + "epoch": 0.04426659997499062, + "grad_norm": 516.0, + "learning_rate": 9.994671748395095e-05, + "loss": 17.626, + "step": 1062 + }, + { + "epoch": 0.04430828227251886, + "grad_norm": 584.0, + "learning_rate": 9.994640549050257e-05, + "loss": 17.501, + "step": 1063 + }, + { + "epoch": 0.0443499645700471, + "grad_norm": 916.0, + "learning_rate": 9.994609258677648e-05, + "loss": 24.5013, + "step": 1064 + }, + { + "epoch": 0.04439164686757534, + "grad_norm": 512.0, + "learning_rate": 9.994577877277845e-05, + "loss": 17.3754, + "step": 1065 + }, + { + "epoch": 0.04443332916510358, + "grad_norm": 380.0, + "learning_rate": 9.994546404851415e-05, + "loss": 14.7506, + "step": 1066 + }, + { + "epoch": 0.044475011462631824, + "grad_norm": 564.0, + "learning_rate": 9.994514841398934e-05, + "loss": 18.001, + "step": 1067 + }, + { + "epoch": 0.04451669376016006, + "grad_norm": 1040.0, + "learning_rate": 9.994483186920978e-05, + "loss": 25.8794, + "step": 1068 + }, + { + "epoch": 0.0445583760576883, + "grad_norm": 302.0, + "learning_rate": 9.994451441418122e-05, + "loss": 12.3757, + "step": 1069 + }, + { + "epoch": 0.04460005835521654, + "grad_norm": 512.0, + "learning_rate": 9.994419604890944e-05, + "loss": 17.5005, + "step": 1070 + }, + { + "epoch": 0.04464174065274478, + "grad_norm": 360.0, + "learning_rate": 9.994387677340026e-05, + "loss": 13.8136, + "step": 1071 + }, + { + "epoch": 0.04468342295027302, + "grad_norm": 149.0, + "learning_rate": 9.99435565876595e-05, + "loss": 9.1259, + "step": 1072 + }, + { + "epoch": 0.04472510524780126, + "grad_norm": 502.0, + "learning_rate": 9.994323549169297e-05, + "loss": 19.6265, + "step": 1073 + }, + { + "epoch": 0.0447667875453295, + "grad_norm": 334.0, + "learning_rate": 9.994291348550656e-05, + "loss": 13.5646, + "step": 1074 + }, + { + "epoch": 0.04480846984285774, + "grad_norm": 278.0, + "learning_rate": 9.99425905691061e-05, + "loss": 12.063, + "step": 1075 + }, + { + "epoch": 0.04485015214038598, + "grad_norm": 700.0, + "learning_rate": 9.994226674249749e-05, + "loss": 18.8761, + "step": 1076 + }, + { + "epoch": 0.04489183443791422, + "grad_norm": 334.0, + "learning_rate": 9.994194200568665e-05, + "loss": 13.876, + "step": 1077 + }, + { + "epoch": 0.04493351673544246, + "grad_norm": 1048.0, + "learning_rate": 9.994161635867949e-05, + "loss": 31.6254, + "step": 1078 + }, + { + "epoch": 0.044975199032970696, + "grad_norm": 668.0, + "learning_rate": 9.994128980148192e-05, + "loss": 19.7511, + "step": 1079 + }, + { + "epoch": 0.045016881330498935, + "grad_norm": 494.0, + "learning_rate": 9.994096233409992e-05, + "loss": 15.5044, + "step": 1080 + }, + { + "epoch": 0.045058563628027175, + "grad_norm": 512.0, + "learning_rate": 9.994063395653945e-05, + "loss": 17.63, + "step": 1081 + }, + { + "epoch": 0.045100245925555414, + "grad_norm": 118.0, + "learning_rate": 9.994030466880648e-05, + "loss": 8.3133, + "step": 1082 + }, + { + "epoch": 0.045141928223083654, + "grad_norm": 408.0, + "learning_rate": 9.993997447090704e-05, + "loss": 14.8131, + "step": 1083 + }, + { + "epoch": 0.04518361052061189, + "grad_norm": 464.0, + "learning_rate": 9.993964336284712e-05, + "loss": 17.2505, + "step": 1084 + }, + { + "epoch": 0.04522529281814014, + "grad_norm": 392.0, + "learning_rate": 9.993931134463277e-05, + "loss": 13.7515, + "step": 1085 + }, + { + "epoch": 0.04526697511566838, + "grad_norm": 540.0, + "learning_rate": 9.993897841627005e-05, + "loss": 18.5006, + "step": 1086 + }, + { + "epoch": 0.04530865741319662, + "grad_norm": 330.0, + "learning_rate": 9.9938644577765e-05, + "loss": 14.0632, + "step": 1087 + }, + { + "epoch": 0.04535033971072486, + "grad_norm": 366.0, + "learning_rate": 9.99383098291237e-05, + "loss": 13.8146, + "step": 1088 + }, + { + "epoch": 0.0453920220082531, + "grad_norm": 422.0, + "learning_rate": 9.993797417035231e-05, + "loss": 15.0007, + "step": 1089 + }, + { + "epoch": 0.045433704305781336, + "grad_norm": 432.0, + "learning_rate": 9.993763760145689e-05, + "loss": 16.0007, + "step": 1090 + }, + { + "epoch": 0.045475386603309575, + "grad_norm": 382.0, + "learning_rate": 9.99373001224436e-05, + "loss": 14.1257, + "step": 1091 + }, + { + "epoch": 0.045517068900837815, + "grad_norm": 608.0, + "learning_rate": 9.993696173331857e-05, + "loss": 18.8757, + "step": 1092 + }, + { + "epoch": 0.045558751198366054, + "grad_norm": 860.0, + "learning_rate": 9.9936622434088e-05, + "loss": 24.8768, + "step": 1093 + }, + { + "epoch": 0.045600433495894294, + "grad_norm": 247.0, + "learning_rate": 9.993628222475802e-05, + "loss": 11.7519, + "step": 1094 + }, + { + "epoch": 0.04564211579342253, + "grad_norm": 256.0, + "learning_rate": 9.993594110533488e-05, + "loss": 10.8759, + "step": 1095 + }, + { + "epoch": 0.04568379809095077, + "grad_norm": 296.0, + "learning_rate": 9.993559907582478e-05, + "loss": 13.376, + "step": 1096 + }, + { + "epoch": 0.04572548038847901, + "grad_norm": 304.0, + "learning_rate": 9.993525613623395e-05, + "loss": 12.6255, + "step": 1097 + }, + { + "epoch": 0.04576716268600725, + "grad_norm": 440.0, + "learning_rate": 9.993491228656866e-05, + "loss": 16.0007, + "step": 1098 + }, + { + "epoch": 0.04580884498353549, + "grad_norm": 370.0, + "learning_rate": 9.993456752683515e-05, + "loss": 11.4382, + "step": 1099 + }, + { + "epoch": 0.04585052728106373, + "grad_norm": 205.0, + "learning_rate": 9.99342218570397e-05, + "loss": 10.4382, + "step": 1100 + }, + { + "epoch": 0.04589220957859197, + "grad_norm": 253.0, + "learning_rate": 9.993387527718865e-05, + "loss": 10.8135, + "step": 1101 + }, + { + "epoch": 0.04593389187612021, + "grad_norm": 404.0, + "learning_rate": 9.993352778728827e-05, + "loss": 12.6269, + "step": 1102 + }, + { + "epoch": 0.045975574173648455, + "grad_norm": 294.0, + "learning_rate": 9.993317938734492e-05, + "loss": 11.8139, + "step": 1103 + }, + { + "epoch": 0.046017256471176694, + "grad_norm": 193.0, + "learning_rate": 9.993283007736495e-05, + "loss": 10.188, + "step": 1104 + }, + { + "epoch": 0.046058938768704934, + "grad_norm": 672.0, + "learning_rate": 9.993247985735472e-05, + "loss": 18.5038, + "step": 1105 + }, + { + "epoch": 0.04610062106623317, + "grad_norm": 378.0, + "learning_rate": 9.99321287273206e-05, + "loss": 13.4378, + "step": 1106 + }, + { + "epoch": 0.04614230336376141, + "grad_norm": 86.5, + "learning_rate": 9.993177668726901e-05, + "loss": 5.8762, + "step": 1107 + }, + { + "epoch": 0.04618398566128965, + "grad_norm": 688.0, + "learning_rate": 9.993142373720634e-05, + "loss": 23.2511, + "step": 1108 + }, + { + "epoch": 0.04622566795881789, + "grad_norm": 242.0, + "learning_rate": 9.993106987713906e-05, + "loss": 12.001, + "step": 1109 + }, + { + "epoch": 0.04626735025634613, + "grad_norm": 420.0, + "learning_rate": 9.993071510707359e-05, + "loss": 15.3141, + "step": 1110 + }, + { + "epoch": 0.04630903255387437, + "grad_norm": 704.0, + "learning_rate": 9.99303594270164e-05, + "loss": 19.7575, + "step": 1111 + }, + { + "epoch": 0.04635071485140261, + "grad_norm": 218.0, + "learning_rate": 9.9930002836974e-05, + "loss": 11.5632, + "step": 1112 + }, + { + "epoch": 0.04639239714893085, + "grad_norm": 242.0, + "learning_rate": 9.992964533695285e-05, + "loss": 11.5004, + "step": 1113 + }, + { + "epoch": 0.04643407944645909, + "grad_norm": 69.0, + "learning_rate": 9.992928692695947e-05, + "loss": 6.8757, + "step": 1114 + }, + { + "epoch": 0.04647576174398733, + "grad_norm": 644.0, + "learning_rate": 9.992892760700042e-05, + "loss": 19.0007, + "step": 1115 + }, + { + "epoch": 0.04651744404151557, + "grad_norm": 1200.0, + "learning_rate": 9.992856737708223e-05, + "loss": 28.0055, + "step": 1116 + }, + { + "epoch": 0.046559126339043806, + "grad_norm": 156.0, + "learning_rate": 9.992820623721147e-05, + "loss": 6.9381, + "step": 1117 + }, + { + "epoch": 0.046600808636572046, + "grad_norm": 252.0, + "learning_rate": 9.992784418739472e-05, + "loss": 12.001, + "step": 1118 + }, + { + "epoch": 0.046642490934100285, + "grad_norm": 196.0, + "learning_rate": 9.992748122763856e-05, + "loss": 10.2519, + "step": 1119 + }, + { + "epoch": 0.046684173231628524, + "grad_norm": 272.0, + "learning_rate": 9.992711735794965e-05, + "loss": 11.7509, + "step": 1120 + }, + { + "epoch": 0.046725855529156764, + "grad_norm": 322.0, + "learning_rate": 9.992675257833456e-05, + "loss": 12.6257, + "step": 1121 + }, + { + "epoch": 0.04676753782668501, + "grad_norm": 584.0, + "learning_rate": 9.992638688879999e-05, + "loss": 17.1255, + "step": 1122 + }, + { + "epoch": 0.04680922012421325, + "grad_norm": 262.0, + "learning_rate": 9.992602028935259e-05, + "loss": 12.4391, + "step": 1123 + }, + { + "epoch": 0.04685090242174149, + "grad_norm": 169.0, + "learning_rate": 9.992565277999903e-05, + "loss": 10.3175, + "step": 1124 + }, + { + "epoch": 0.04689258471926973, + "grad_norm": 182.0, + "learning_rate": 9.992528436074601e-05, + "loss": 10.4383, + "step": 1125 + }, + { + "epoch": 0.04693426701679797, + "grad_norm": 464.0, + "learning_rate": 9.992491503160027e-05, + "loss": 15.3138, + "step": 1126 + }, + { + "epoch": 0.04697594931432621, + "grad_norm": 326.0, + "learning_rate": 9.992454479256852e-05, + "loss": 14.3129, + "step": 1127 + }, + { + "epoch": 0.047017631611854446, + "grad_norm": 386.0, + "learning_rate": 9.992417364365749e-05, + "loss": 13.1259, + "step": 1128 + }, + { + "epoch": 0.047059313909382686, + "grad_norm": 656.0, + "learning_rate": 9.992380158487398e-05, + "loss": 20.8753, + "step": 1129 + }, + { + "epoch": 0.047100996206910925, + "grad_norm": 380.0, + "learning_rate": 9.992342861622475e-05, + "loss": 13.5008, + "step": 1130 + }, + { + "epoch": 0.047142678504439164, + "grad_norm": 224.0, + "learning_rate": 9.992305473771661e-05, + "loss": 11.0634, + "step": 1131 + }, + { + "epoch": 0.047184360801967404, + "grad_norm": 280.0, + "learning_rate": 9.992267994935635e-05, + "loss": 10.5632, + "step": 1132 + }, + { + "epoch": 0.04722604309949564, + "grad_norm": 508.0, + "learning_rate": 9.992230425115083e-05, + "loss": 16.3773, + "step": 1133 + }, + { + "epoch": 0.04726772539702388, + "grad_norm": 324.0, + "learning_rate": 9.992192764310685e-05, + "loss": 11.0645, + "step": 1134 + }, + { + "epoch": 0.04730940769455212, + "grad_norm": 624.0, + "learning_rate": 9.992155012523135e-05, + "loss": 19.1277, + "step": 1135 + }, + { + "epoch": 0.04735108999208036, + "grad_norm": 324.0, + "learning_rate": 9.992117169753115e-05, + "loss": 12.6254, + "step": 1136 + }, + { + "epoch": 0.0473927722896086, + "grad_norm": 188.0, + "learning_rate": 9.992079236001317e-05, + "loss": 10.6258, + "step": 1137 + }, + { + "epoch": 0.04743445458713684, + "grad_norm": 342.0, + "learning_rate": 9.99204121126843e-05, + "loss": 13.8762, + "step": 1138 + }, + { + "epoch": 0.04747613688466508, + "grad_norm": 174.0, + "learning_rate": 9.992003095555151e-05, + "loss": 9.7513, + "step": 1139 + }, + { + "epoch": 0.047517819182193326, + "grad_norm": 480.0, + "learning_rate": 9.991964888862171e-05, + "loss": 15.5637, + "step": 1140 + }, + { + "epoch": 0.047559501479721565, + "grad_norm": 604.0, + "learning_rate": 9.991926591190188e-05, + "loss": 20.8756, + "step": 1141 + }, + { + "epoch": 0.047601183777249804, + "grad_norm": 117.0, + "learning_rate": 9.991888202539901e-05, + "loss": 8.4385, + "step": 1142 + }, + { + "epoch": 0.047642866074778044, + "grad_norm": 452.0, + "learning_rate": 9.991849722912006e-05, + "loss": 16.2507, + "step": 1143 + }, + { + "epoch": 0.04768454837230628, + "grad_norm": 584.0, + "learning_rate": 9.991811152307208e-05, + "loss": 18.0005, + "step": 1144 + }, + { + "epoch": 0.04772623066983452, + "grad_norm": 474.0, + "learning_rate": 9.991772490726209e-05, + "loss": 14.7535, + "step": 1145 + }, + { + "epoch": 0.04776791296736276, + "grad_norm": 328.0, + "learning_rate": 9.991733738169712e-05, + "loss": 13.0013, + "step": 1146 + }, + { + "epoch": 0.047809595264891, + "grad_norm": 212.0, + "learning_rate": 9.991694894638426e-05, + "loss": 10.4385, + "step": 1147 + }, + { + "epoch": 0.04785127756241924, + "grad_norm": 704.0, + "learning_rate": 9.991655960133058e-05, + "loss": 20.256, + "step": 1148 + }, + { + "epoch": 0.04789295985994748, + "grad_norm": 852.0, + "learning_rate": 9.991616934654316e-05, + "loss": 25.5003, + "step": 1149 + }, + { + "epoch": 0.04793464215747572, + "grad_norm": 640.0, + "learning_rate": 9.99157781820291e-05, + "loss": 18.6252, + "step": 1150 + }, + { + "epoch": 0.04797632445500396, + "grad_norm": 282.0, + "learning_rate": 9.991538610779558e-05, + "loss": 12.7514, + "step": 1151 + }, + { + "epoch": 0.0480180067525322, + "grad_norm": 456.0, + "learning_rate": 9.991499312384971e-05, + "loss": 14.0643, + "step": 1152 + }, + { + "epoch": 0.04805968905006044, + "grad_norm": 394.0, + "learning_rate": 9.991459923019866e-05, + "loss": 14.9397, + "step": 1153 + }, + { + "epoch": 0.04810137134758868, + "grad_norm": 434.0, + "learning_rate": 9.99142044268496e-05, + "loss": 16.1256, + "step": 1154 + }, + { + "epoch": 0.048143053645116916, + "grad_norm": 330.0, + "learning_rate": 9.991380871380974e-05, + "loss": 14.1879, + "step": 1155 + }, + { + "epoch": 0.048184735942645156, + "grad_norm": 294.0, + "learning_rate": 9.991341209108627e-05, + "loss": 13.1882, + "step": 1156 + }, + { + "epoch": 0.048226418240173395, + "grad_norm": 448.0, + "learning_rate": 9.991301455868645e-05, + "loss": 14.6878, + "step": 1157 + }, + { + "epoch": 0.04826810053770164, + "grad_norm": 460.0, + "learning_rate": 9.991261611661751e-05, + "loss": 16.0016, + "step": 1158 + }, + { + "epoch": 0.04830978283522988, + "grad_norm": 628.0, + "learning_rate": 9.991221676488671e-05, + "loss": 22.1255, + "step": 1159 + }, + { + "epoch": 0.04835146513275812, + "grad_norm": 254.0, + "learning_rate": 9.991181650350133e-05, + "loss": 11.1258, + "step": 1160 + }, + { + "epoch": 0.04839314743028636, + "grad_norm": 308.0, + "learning_rate": 9.991141533246865e-05, + "loss": 12.6885, + "step": 1161 + }, + { + "epoch": 0.0484348297278146, + "grad_norm": 560.0, + "learning_rate": 9.991101325179601e-05, + "loss": 17.8754, + "step": 1162 + }, + { + "epoch": 0.04847651202534284, + "grad_norm": 592.0, + "learning_rate": 9.991061026149071e-05, + "loss": 18.8755, + "step": 1163 + }, + { + "epoch": 0.04851819432287108, + "grad_norm": 520.0, + "learning_rate": 9.991020636156012e-05, + "loss": 17.2506, + "step": 1164 + }, + { + "epoch": 0.04855987662039932, + "grad_norm": 424.0, + "learning_rate": 9.990980155201157e-05, + "loss": 14.814, + "step": 1165 + }, + { + "epoch": 0.048601558917927556, + "grad_norm": 298.0, + "learning_rate": 9.990939583285248e-05, + "loss": 11.6264, + "step": 1166 + }, + { + "epoch": 0.048643241215455796, + "grad_norm": 189.0, + "learning_rate": 9.99089892040902e-05, + "loss": 9.3136, + "step": 1167 + }, + { + "epoch": 0.048684923512984035, + "grad_norm": 440.0, + "learning_rate": 9.990858166573217e-05, + "loss": 15.0629, + "step": 1168 + }, + { + "epoch": 0.048726605810512275, + "grad_norm": 268.0, + "learning_rate": 9.990817321778581e-05, + "loss": 11.5635, + "step": 1169 + }, + { + "epoch": 0.048768288108040514, + "grad_norm": 438.0, + "learning_rate": 9.990776386025857e-05, + "loss": 15.5634, + "step": 1170 + }, + { + "epoch": 0.04880997040556875, + "grad_norm": 652.0, + "learning_rate": 9.990735359315788e-05, + "loss": 19.1261, + "step": 1171 + }, + { + "epoch": 0.04885165270309699, + "grad_norm": 128.0, + "learning_rate": 9.990694241649126e-05, + "loss": 6.3761, + "step": 1172 + }, + { + "epoch": 0.04889333500062523, + "grad_norm": 474.0, + "learning_rate": 9.990653033026618e-05, + "loss": 18.0012, + "step": 1173 + }, + { + "epoch": 0.04893501729815347, + "grad_norm": 148.0, + "learning_rate": 9.990611733449016e-05, + "loss": 9.0009, + "step": 1174 + }, + { + "epoch": 0.04897669959568171, + "grad_norm": 245.0, + "learning_rate": 9.99057034291707e-05, + "loss": 12.0005, + "step": 1175 + }, + { + "epoch": 0.04901838189320996, + "grad_norm": 290.0, + "learning_rate": 9.990528861431539e-05, + "loss": 12.7505, + "step": 1176 + }, + { + "epoch": 0.049060064190738197, + "grad_norm": 1408.0, + "learning_rate": 9.990487288993176e-05, + "loss": 30.005, + "step": 1177 + }, + { + "epoch": 0.049101746488266436, + "grad_norm": 274.0, + "learning_rate": 9.990445625602738e-05, + "loss": 12.3761, + "step": 1178 + }, + { + "epoch": 0.049143428785794675, + "grad_norm": 216.0, + "learning_rate": 9.990403871260984e-05, + "loss": 11.2507, + "step": 1179 + }, + { + "epoch": 0.049185111083322915, + "grad_norm": 732.0, + "learning_rate": 9.99036202596868e-05, + "loss": 22.2509, + "step": 1180 + }, + { + "epoch": 0.049226793380851154, + "grad_norm": 266.0, + "learning_rate": 9.990320089726583e-05, + "loss": 11.6264, + "step": 1181 + }, + { + "epoch": 0.04926847567837939, + "grad_norm": 804.0, + "learning_rate": 9.990278062535459e-05, + "loss": 26.0014, + "step": 1182 + }, + { + "epoch": 0.04931015797590763, + "grad_norm": 290.0, + "learning_rate": 9.990235944396075e-05, + "loss": 12.8753, + "step": 1183 + }, + { + "epoch": 0.04935184027343587, + "grad_norm": 544.0, + "learning_rate": 9.990193735309199e-05, + "loss": 19.501, + "step": 1184 + }, + { + "epoch": 0.04939352257096411, + "grad_norm": 464.0, + "learning_rate": 9.990151435275599e-05, + "loss": 19.2507, + "step": 1185 + }, + { + "epoch": 0.04943520486849235, + "grad_norm": 300.0, + "learning_rate": 9.990109044296044e-05, + "loss": 13.564, + "step": 1186 + }, + { + "epoch": 0.04947688716602059, + "grad_norm": 278.0, + "learning_rate": 9.99006656237131e-05, + "loss": 12.4456, + "step": 1187 + }, + { + "epoch": 0.04951856946354883, + "grad_norm": 322.0, + "learning_rate": 9.990023989502171e-05, + "loss": 13.8758, + "step": 1188 + }, + { + "epoch": 0.04956025176107707, + "grad_norm": 179.0, + "learning_rate": 9.9899813256894e-05, + "loss": 8.7505, + "step": 1189 + }, + { + "epoch": 0.04960193405860531, + "grad_norm": 244.0, + "learning_rate": 9.989938570933777e-05, + "loss": 12.2513, + "step": 1190 + }, + { + "epoch": 0.04964361635613355, + "grad_norm": 444.0, + "learning_rate": 9.98989572523608e-05, + "loss": 15.7528, + "step": 1191 + }, + { + "epoch": 0.04968529865366179, + "grad_norm": 466.0, + "learning_rate": 9.989852788597092e-05, + "loss": 16.8761, + "step": 1192 + }, + { + "epoch": 0.04972698095119003, + "grad_norm": 292.0, + "learning_rate": 9.989809761017593e-05, + "loss": 13.6262, + "step": 1193 + }, + { + "epoch": 0.049768663248718266, + "grad_norm": 744.0, + "learning_rate": 9.989766642498369e-05, + "loss": 18.3815, + "step": 1194 + }, + { + "epoch": 0.04981034554624651, + "grad_norm": 308.0, + "learning_rate": 9.989723433040203e-05, + "loss": 12.7516, + "step": 1195 + }, + { + "epoch": 0.04985202784377475, + "grad_norm": 428.0, + "learning_rate": 9.989680132643886e-05, + "loss": 15.7512, + "step": 1196 + }, + { + "epoch": 0.04989371014130299, + "grad_norm": 80.0, + "learning_rate": 9.989636741310207e-05, + "loss": 7.8128, + "step": 1197 + }, + { + "epoch": 0.04993539243883123, + "grad_norm": 720.0, + "learning_rate": 9.989593259039954e-05, + "loss": 21.626, + "step": 1198 + }, + { + "epoch": 0.04997707473635947, + "grad_norm": 241.0, + "learning_rate": 9.989549685833919e-05, + "loss": 11.313, + "step": 1199 + }, + { + "epoch": 0.05001875703388771, + "grad_norm": 282.0, + "learning_rate": 9.989506021692901e-05, + "loss": 12.5004, + "step": 1200 + }, + { + "epoch": 0.05006043933141595, + "grad_norm": 253.0, + "learning_rate": 9.989462266617691e-05, + "loss": 12.44, + "step": 1201 + }, + { + "epoch": 0.05010212162894419, + "grad_norm": 300.0, + "learning_rate": 9.98941842060909e-05, + "loss": 12.7508, + "step": 1202 + }, + { + "epoch": 0.05014380392647243, + "grad_norm": 664.0, + "learning_rate": 9.989374483667892e-05, + "loss": 16.6261, + "step": 1203 + }, + { + "epoch": 0.05018548622400067, + "grad_norm": 194.0, + "learning_rate": 9.989330455794904e-05, + "loss": 9.8132, + "step": 1204 + }, + { + "epoch": 0.050227168521528906, + "grad_norm": 502.0, + "learning_rate": 9.989286336990926e-05, + "loss": 16.1262, + "step": 1205 + }, + { + "epoch": 0.050268850819057145, + "grad_norm": 296.0, + "learning_rate": 9.98924212725676e-05, + "loss": 12.4394, + "step": 1206 + }, + { + "epoch": 0.050310533116585385, + "grad_norm": 422.0, + "learning_rate": 9.989197826593212e-05, + "loss": 15.1254, + "step": 1207 + }, + { + "epoch": 0.050352215414113624, + "grad_norm": 592.0, + "learning_rate": 9.989153435001093e-05, + "loss": 20.7513, + "step": 1208 + }, + { + "epoch": 0.050393897711641863, + "grad_norm": 468.0, + "learning_rate": 9.989108952481209e-05, + "loss": 15.8133, + "step": 1209 + }, + { + "epoch": 0.0504355800091701, + "grad_norm": 396.0, + "learning_rate": 9.989064379034372e-05, + "loss": 14.6257, + "step": 1210 + }, + { + "epoch": 0.05047726230669834, + "grad_norm": 584.0, + "learning_rate": 9.989019714661394e-05, + "loss": 18.1259, + "step": 1211 + }, + { + "epoch": 0.05051894460422658, + "grad_norm": 418.0, + "learning_rate": 9.988974959363089e-05, + "loss": 14.7511, + "step": 1212 + }, + { + "epoch": 0.05056062690175483, + "grad_norm": 199.0, + "learning_rate": 9.988930113140271e-05, + "loss": 10.6883, + "step": 1213 + }, + { + "epoch": 0.05060230919928307, + "grad_norm": 1272.0, + "learning_rate": 9.98888517599376e-05, + "loss": 30.0004, + "step": 1214 + }, + { + "epoch": 0.05064399149681131, + "grad_norm": 360.0, + "learning_rate": 9.988840147924372e-05, + "loss": 13.8138, + "step": 1215 + }, + { + "epoch": 0.050685673794339546, + "grad_norm": 179.0, + "learning_rate": 9.988795028932931e-05, + "loss": 9.7507, + "step": 1216 + }, + { + "epoch": 0.050727356091867785, + "grad_norm": 356.0, + "learning_rate": 9.988749819020258e-05, + "loss": 12.6266, + "step": 1217 + }, + { + "epoch": 0.050769038389396025, + "grad_norm": 420.0, + "learning_rate": 9.988704518187177e-05, + "loss": 12.8165, + "step": 1218 + }, + { + "epoch": 0.050810720686924264, + "grad_norm": 1232.0, + "learning_rate": 9.988659126434511e-05, + "loss": 26.1308, + "step": 1219 + }, + { + "epoch": 0.050852402984452504, + "grad_norm": 254.0, + "learning_rate": 9.988613643763091e-05, + "loss": 11.3135, + "step": 1220 + }, + { + "epoch": 0.05089408528198074, + "grad_norm": 244.0, + "learning_rate": 9.988568070173745e-05, + "loss": 11.4395, + "step": 1221 + }, + { + "epoch": 0.05093576757950898, + "grad_norm": 344.0, + "learning_rate": 9.988522405667302e-05, + "loss": 13.5014, + "step": 1222 + }, + { + "epoch": 0.05097744987703722, + "grad_norm": 192.0, + "learning_rate": 9.988476650244597e-05, + "loss": 9.8755, + "step": 1223 + }, + { + "epoch": 0.05101913217456546, + "grad_norm": 516.0, + "learning_rate": 9.988430803906461e-05, + "loss": 17.7504, + "step": 1224 + }, + { + "epoch": 0.0510608144720937, + "grad_norm": 188.0, + "learning_rate": 9.98838486665373e-05, + "loss": 10.8758, + "step": 1225 + }, + { + "epoch": 0.05110249676962194, + "grad_norm": 188.0, + "learning_rate": 9.988338838487243e-05, + "loss": 8.501, + "step": 1226 + }, + { + "epoch": 0.05114417906715018, + "grad_norm": 612.0, + "learning_rate": 9.98829271940784e-05, + "loss": 17.6262, + "step": 1227 + }, + { + "epoch": 0.05118586136467842, + "grad_norm": 226.0, + "learning_rate": 9.988246509416356e-05, + "loss": 11.3139, + "step": 1228 + }, + { + "epoch": 0.05122754366220666, + "grad_norm": 258.0, + "learning_rate": 9.988200208513637e-05, + "loss": 11.0628, + "step": 1229 + }, + { + "epoch": 0.0512692259597349, + "grad_norm": 276.0, + "learning_rate": 9.988153816700528e-05, + "loss": 11.501, + "step": 1230 + }, + { + "epoch": 0.051310908257263144, + "grad_norm": 418.0, + "learning_rate": 9.988107333977871e-05, + "loss": 15.9382, + "step": 1231 + }, + { + "epoch": 0.05135259055479138, + "grad_norm": 724.0, + "learning_rate": 9.988060760346515e-05, + "loss": 22.0066, + "step": 1232 + }, + { + "epoch": 0.05139427285231962, + "grad_norm": 536.0, + "learning_rate": 9.988014095807311e-05, + "loss": 19.1256, + "step": 1233 + }, + { + "epoch": 0.05143595514984786, + "grad_norm": 210.0, + "learning_rate": 9.987967340361107e-05, + "loss": 10.5012, + "step": 1234 + }, + { + "epoch": 0.0514776374473761, + "grad_norm": 266.0, + "learning_rate": 9.987920494008753e-05, + "loss": 11.7513, + "step": 1235 + }, + { + "epoch": 0.05151931974490434, + "grad_norm": 290.0, + "learning_rate": 9.987873556751107e-05, + "loss": 11.7513, + "step": 1236 + }, + { + "epoch": 0.05156100204243258, + "grad_norm": 584.0, + "learning_rate": 9.987826528589021e-05, + "loss": 18.2521, + "step": 1237 + }, + { + "epoch": 0.05160268433996082, + "grad_norm": 322.0, + "learning_rate": 9.987779409523357e-05, + "loss": 12.4382, + "step": 1238 + }, + { + "epoch": 0.05164436663748906, + "grad_norm": 588.0, + "learning_rate": 9.987732199554968e-05, + "loss": 15.7524, + "step": 1239 + }, + { + "epoch": 0.0516860489350173, + "grad_norm": 232.0, + "learning_rate": 9.987684898684718e-05, + "loss": 10.9381, + "step": 1240 + }, + { + "epoch": 0.05172773123254554, + "grad_norm": 384.0, + "learning_rate": 9.987637506913468e-05, + "loss": 15.4382, + "step": 1241 + }, + { + "epoch": 0.05176941353007378, + "grad_norm": 131.0, + "learning_rate": 9.98759002424208e-05, + "loss": 7.6882, + "step": 1242 + }, + { + "epoch": 0.051811095827602016, + "grad_norm": 92.0, + "learning_rate": 9.987542450671422e-05, + "loss": 7.0948, + "step": 1243 + }, + { + "epoch": 0.051852778125130256, + "grad_norm": 326.0, + "learning_rate": 9.987494786202361e-05, + "loss": 13.563, + "step": 1244 + }, + { + "epoch": 0.051894460422658495, + "grad_norm": 354.0, + "learning_rate": 9.987447030835763e-05, + "loss": 12.5009, + "step": 1245 + }, + { + "epoch": 0.051936142720186734, + "grad_norm": 516.0, + "learning_rate": 9.987399184572501e-05, + "loss": 17.5004, + "step": 1246 + }, + { + "epoch": 0.051977825017714974, + "grad_norm": 137.0, + "learning_rate": 9.987351247413446e-05, + "loss": 9.313, + "step": 1247 + }, + { + "epoch": 0.05201950731524321, + "grad_norm": 193.0, + "learning_rate": 9.987303219359471e-05, + "loss": 9.7518, + "step": 1248 + }, + { + "epoch": 0.05206118961277146, + "grad_norm": 366.0, + "learning_rate": 9.987255100411455e-05, + "loss": 14.6255, + "step": 1249 + }, + { + "epoch": 0.0521028719102997, + "grad_norm": 306.0, + "learning_rate": 9.987206890570269e-05, + "loss": 12.4382, + "step": 1250 + }, + { + "epoch": 0.05214455420782794, + "grad_norm": 1168.0, + "learning_rate": 9.987158589836796e-05, + "loss": 25.6307, + "step": 1251 + }, + { + "epoch": 0.05218623650535618, + "grad_norm": 484.0, + "learning_rate": 9.987110198211915e-05, + "loss": 15.1878, + "step": 1252 + }, + { + "epoch": 0.05222791880288442, + "grad_norm": 191.0, + "learning_rate": 9.987061715696509e-05, + "loss": 8.9379, + "step": 1253 + }, + { + "epoch": 0.052269601100412656, + "grad_norm": 580.0, + "learning_rate": 9.987013142291457e-05, + "loss": 17.3766, + "step": 1254 + }, + { + "epoch": 0.052311283397940896, + "grad_norm": 290.0, + "learning_rate": 9.986964477997651e-05, + "loss": 12.3755, + "step": 1255 + }, + { + "epoch": 0.052352965695469135, + "grad_norm": 944.0, + "learning_rate": 9.986915722815973e-05, + "loss": 22.6303, + "step": 1256 + }, + { + "epoch": 0.052394647992997374, + "grad_norm": 93.5, + "learning_rate": 9.986866876747314e-05, + "loss": 8.1888, + "step": 1257 + }, + { + "epoch": 0.052436330290525614, + "grad_norm": 260.0, + "learning_rate": 9.986817939792565e-05, + "loss": 11.1881, + "step": 1258 + }, + { + "epoch": 0.05247801258805385, + "grad_norm": 252.0, + "learning_rate": 9.986768911952613e-05, + "loss": 11.1259, + "step": 1259 + }, + { + "epoch": 0.05251969488558209, + "grad_norm": 1088.0, + "learning_rate": 9.986719793228357e-05, + "loss": 25.3838, + "step": 1260 + }, + { + "epoch": 0.05256137718311033, + "grad_norm": 656.0, + "learning_rate": 9.98667058362069e-05, + "loss": 19.0048, + "step": 1261 + }, + { + "epoch": 0.05260305948063857, + "grad_norm": 186.0, + "learning_rate": 9.986621283130508e-05, + "loss": 9.2518, + "step": 1262 + }, + { + "epoch": 0.05264474177816681, + "grad_norm": 414.0, + "learning_rate": 9.986571891758712e-05, + "loss": 15.6259, + "step": 1263 + }, + { + "epoch": 0.05268642407569505, + "grad_norm": 113.5, + "learning_rate": 9.9865224095062e-05, + "loss": 6.9709, + "step": 1264 + }, + { + "epoch": 0.05272810637322329, + "grad_norm": 1320.0, + "learning_rate": 9.986472836373875e-05, + "loss": 26.506, + "step": 1265 + }, + { + "epoch": 0.05276978867075153, + "grad_norm": 284.0, + "learning_rate": 9.986423172362638e-05, + "loss": 12.5661, + "step": 1266 + }, + { + "epoch": 0.05281147096827977, + "grad_norm": 396.0, + "learning_rate": 9.986373417473396e-05, + "loss": 15.1884, + "step": 1267 + }, + { + "epoch": 0.052853153265808014, + "grad_norm": 768.0, + "learning_rate": 9.986323571707058e-05, + "loss": 21.6278, + "step": 1268 + }, + { + "epoch": 0.052894835563336254, + "grad_norm": 366.0, + "learning_rate": 9.98627363506453e-05, + "loss": 13.9383, + "step": 1269 + }, + { + "epoch": 0.05293651786086449, + "grad_norm": 520.0, + "learning_rate": 9.986223607546721e-05, + "loss": 15.8154, + "step": 1270 + }, + { + "epoch": 0.05297820015839273, + "grad_norm": 108.0, + "learning_rate": 9.986173489154544e-05, + "loss": 7.7824, + "step": 1271 + }, + { + "epoch": 0.05301988245592097, + "grad_norm": 310.0, + "learning_rate": 9.986123279888913e-05, + "loss": 12.3759, + "step": 1272 + }, + { + "epoch": 0.05306156475344921, + "grad_norm": 219.0, + "learning_rate": 9.986072979750743e-05, + "loss": 9.2502, + "step": 1273 + }, + { + "epoch": 0.05310324705097745, + "grad_norm": 948.0, + "learning_rate": 9.98602258874095e-05, + "loss": 24.5049, + "step": 1274 + }, + { + "epoch": 0.05314492934850569, + "grad_norm": 812.0, + "learning_rate": 9.985972106860453e-05, + "loss": 26.2508, + "step": 1275 + }, + { + "epoch": 0.05318661164603393, + "grad_norm": 284.0, + "learning_rate": 9.985921534110171e-05, + "loss": 11.7515, + "step": 1276 + }, + { + "epoch": 0.05322829394356217, + "grad_norm": 472.0, + "learning_rate": 9.985870870491027e-05, + "loss": 15.6879, + "step": 1277 + }, + { + "epoch": 0.05326997624109041, + "grad_norm": 470.0, + "learning_rate": 9.985820116003944e-05, + "loss": 15.6256, + "step": 1278 + }, + { + "epoch": 0.05331165853861865, + "grad_norm": 270.0, + "learning_rate": 9.985769270649845e-05, + "loss": 12.2512, + "step": 1279 + }, + { + "epoch": 0.05335334083614689, + "grad_norm": 338.0, + "learning_rate": 9.985718334429662e-05, + "loss": 14.1888, + "step": 1280 + }, + { + "epoch": 0.053395023133675126, + "grad_norm": 260.0, + "learning_rate": 9.985667307344316e-05, + "loss": 12.0053, + "step": 1281 + }, + { + "epoch": 0.053436705431203366, + "grad_norm": 314.0, + "learning_rate": 9.985616189394743e-05, + "loss": 13.5635, + "step": 1282 + }, + { + "epoch": 0.053478387728731605, + "grad_norm": 640.0, + "learning_rate": 9.985564980581872e-05, + "loss": 18.6267, + "step": 1283 + }, + { + "epoch": 0.053520070026259844, + "grad_norm": 224.0, + "learning_rate": 9.985513680906635e-05, + "loss": 10.8759, + "step": 1284 + }, + { + "epoch": 0.053561752323788084, + "grad_norm": 212.0, + "learning_rate": 9.985462290369971e-05, + "loss": 10.5637, + "step": 1285 + }, + { + "epoch": 0.05360343462131633, + "grad_norm": 576.0, + "learning_rate": 9.985410808972812e-05, + "loss": 17.7537, + "step": 1286 + }, + { + "epoch": 0.05364511691884457, + "grad_norm": 560.0, + "learning_rate": 9.985359236716101e-05, + "loss": 18.8755, + "step": 1287 + }, + { + "epoch": 0.05368679921637281, + "grad_norm": 458.0, + "learning_rate": 9.985307573600772e-05, + "loss": 16.8777, + "step": 1288 + }, + { + "epoch": 0.05372848151390105, + "grad_norm": 776.0, + "learning_rate": 9.985255819627774e-05, + "loss": 20.1259, + "step": 1289 + }, + { + "epoch": 0.05377016381142929, + "grad_norm": 199.0, + "learning_rate": 9.985203974798043e-05, + "loss": 10.063, + "step": 1290 + }, + { + "epoch": 0.05381184610895753, + "grad_norm": 404.0, + "learning_rate": 9.985152039112528e-05, + "loss": 15.4384, + "step": 1291 + }, + { + "epoch": 0.053853528406485766, + "grad_norm": 169.0, + "learning_rate": 9.985100012572176e-05, + "loss": 9.7507, + "step": 1292 + }, + { + "epoch": 0.053895210704014006, + "grad_norm": 716.0, + "learning_rate": 9.985047895177932e-05, + "loss": 18.2515, + "step": 1293 + }, + { + "epoch": 0.053936893001542245, + "grad_norm": 248.0, + "learning_rate": 9.984995686930748e-05, + "loss": 11.6255, + "step": 1294 + }, + { + "epoch": 0.053978575299070485, + "grad_norm": 266.0, + "learning_rate": 9.984943387831573e-05, + "loss": 10.8133, + "step": 1295 + }, + { + "epoch": 0.054020257596598724, + "grad_norm": 174.0, + "learning_rate": 9.984890997881365e-05, + "loss": 7.8449, + "step": 1296 + }, + { + "epoch": 0.05406193989412696, + "grad_norm": 416.0, + "learning_rate": 9.984838517081076e-05, + "loss": 14.4381, + "step": 1297 + }, + { + "epoch": 0.0541036221916552, + "grad_norm": 183.0, + "learning_rate": 9.98478594543166e-05, + "loss": 9.9388, + "step": 1298 + }, + { + "epoch": 0.05414530448918344, + "grad_norm": 376.0, + "learning_rate": 9.98473328293408e-05, + "loss": 14.0633, + "step": 1299 + }, + { + "epoch": 0.05418698678671168, + "grad_norm": 150.0, + "learning_rate": 9.984680529589294e-05, + "loss": 9.6888, + "step": 1300 + }, + { + "epoch": 0.05422866908423992, + "grad_norm": 362.0, + "learning_rate": 9.98462768539826e-05, + "loss": 13.001, + "step": 1301 + }, + { + "epoch": 0.05427035138176816, + "grad_norm": 280.0, + "learning_rate": 9.984574750361946e-05, + "loss": 12.9386, + "step": 1302 + }, + { + "epoch": 0.0543120336792964, + "grad_norm": 217.0, + "learning_rate": 9.984521724481315e-05, + "loss": 10.688, + "step": 1303 + }, + { + "epoch": 0.054353715976824646, + "grad_norm": 350.0, + "learning_rate": 9.984468607757331e-05, + "loss": 14.9387, + "step": 1304 + }, + { + "epoch": 0.054395398274352885, + "grad_norm": 294.0, + "learning_rate": 9.984415400190966e-05, + "loss": 13.1253, + "step": 1305 + }, + { + "epoch": 0.054437080571881125, + "grad_norm": 434.0, + "learning_rate": 9.984362101783187e-05, + "loss": 15.5629, + "step": 1306 + }, + { + "epoch": 0.054478762869409364, + "grad_norm": 186.0, + "learning_rate": 9.984308712534967e-05, + "loss": 10.5029, + "step": 1307 + }, + { + "epoch": 0.0545204451669376, + "grad_norm": 308.0, + "learning_rate": 9.984255232447278e-05, + "loss": 13.3135, + "step": 1308 + }, + { + "epoch": 0.05456212746446584, + "grad_norm": 253.0, + "learning_rate": 9.984201661521094e-05, + "loss": 11.5005, + "step": 1309 + }, + { + "epoch": 0.05460380976199408, + "grad_norm": 552.0, + "learning_rate": 9.984147999757394e-05, + "loss": 17.2505, + "step": 1310 + }, + { + "epoch": 0.05464549205952232, + "grad_norm": 556.0, + "learning_rate": 9.984094247157152e-05, + "loss": 15.314, + "step": 1311 + }, + { + "epoch": 0.05468717435705056, + "grad_norm": 294.0, + "learning_rate": 9.984040403721351e-05, + "loss": 12.3131, + "step": 1312 + }, + { + "epoch": 0.0547288566545788, + "grad_norm": 488.0, + "learning_rate": 9.983986469450972e-05, + "loss": 17.2505, + "step": 1313 + }, + { + "epoch": 0.05477053895210704, + "grad_norm": 470.0, + "learning_rate": 9.983932444346996e-05, + "loss": 16.1256, + "step": 1314 + }, + { + "epoch": 0.05481222124963528, + "grad_norm": 648.0, + "learning_rate": 9.98387832841041e-05, + "loss": 19.5039, + "step": 1315 + }, + { + "epoch": 0.05485390354716352, + "grad_norm": 1152.0, + "learning_rate": 9.983824121642197e-05, + "loss": 31.3756, + "step": 1316 + }, + { + "epoch": 0.05489558584469176, + "grad_norm": 298.0, + "learning_rate": 9.983769824043349e-05, + "loss": 13.127, + "step": 1317 + }, + { + "epoch": 0.05493726814222, + "grad_norm": 540.0, + "learning_rate": 9.983715435614854e-05, + "loss": 18.7507, + "step": 1318 + }, + { + "epoch": 0.054978950439748236, + "grad_norm": 482.0, + "learning_rate": 9.9836609563577e-05, + "loss": 15.9387, + "step": 1319 + }, + { + "epoch": 0.055020632737276476, + "grad_norm": 660.0, + "learning_rate": 9.983606386272884e-05, + "loss": 20.1254, + "step": 1320 + }, + { + "epoch": 0.055062315034804715, + "grad_norm": 510.0, + "learning_rate": 9.9835517253614e-05, + "loss": 17.2505, + "step": 1321 + }, + { + "epoch": 0.05510399733233296, + "grad_norm": 310.0, + "learning_rate": 9.983496973624242e-05, + "loss": 12.063, + "step": 1322 + }, + { + "epoch": 0.0551456796298612, + "grad_norm": 576.0, + "learning_rate": 9.98344213106241e-05, + "loss": 18.8756, + "step": 1323 + }, + { + "epoch": 0.05518736192738944, + "grad_norm": 344.0, + "learning_rate": 9.983387197676903e-05, + "loss": 13.8759, + "step": 1324 + }, + { + "epoch": 0.05522904422491768, + "grad_norm": 241.0, + "learning_rate": 9.983332173468722e-05, + "loss": 12.688, + "step": 1325 + }, + { + "epoch": 0.05527072652244592, + "grad_norm": 432.0, + "learning_rate": 9.983277058438869e-05, + "loss": 15.6887, + "step": 1326 + }, + { + "epoch": 0.05531240881997416, + "grad_norm": 588.0, + "learning_rate": 9.98322185258835e-05, + "loss": 19.3754, + "step": 1327 + }, + { + "epoch": 0.0553540911175024, + "grad_norm": 428.0, + "learning_rate": 9.98316655591817e-05, + "loss": 13.8135, + "step": 1328 + }, + { + "epoch": 0.05539577341503064, + "grad_norm": 466.0, + "learning_rate": 9.983111168429338e-05, + "loss": 14.5014, + "step": 1329 + }, + { + "epoch": 0.05543745571255888, + "grad_norm": 492.0, + "learning_rate": 9.98305569012286e-05, + "loss": 16.8759, + "step": 1330 + }, + { + "epoch": 0.055479138010087116, + "grad_norm": 780.0, + "learning_rate": 9.983000120999753e-05, + "loss": 22.7507, + "step": 1331 + }, + { + "epoch": 0.055520820307615355, + "grad_norm": 556.0, + "learning_rate": 9.982944461061023e-05, + "loss": 19.0004, + "step": 1332 + }, + { + "epoch": 0.055562502605143595, + "grad_norm": 768.0, + "learning_rate": 9.982888710307692e-05, + "loss": 21.5016, + "step": 1333 + }, + { + "epoch": 0.055604184902671834, + "grad_norm": 446.0, + "learning_rate": 9.982832868740767e-05, + "loss": 14.8775, + "step": 1334 + }, + { + "epoch": 0.05564586720020007, + "grad_norm": 516.0, + "learning_rate": 9.982776936361275e-05, + "loss": 19.8765, + "step": 1335 + }, + { + "epoch": 0.05568754949772831, + "grad_norm": 628.0, + "learning_rate": 9.982720913170229e-05, + "loss": 19.1256, + "step": 1336 + }, + { + "epoch": 0.05572923179525655, + "grad_norm": 648.0, + "learning_rate": 9.982664799168653e-05, + "loss": 20.6265, + "step": 1337 + }, + { + "epoch": 0.05577091409278479, + "grad_norm": 748.0, + "learning_rate": 9.982608594357568e-05, + "loss": 21.7509, + "step": 1338 + }, + { + "epoch": 0.05581259639031303, + "grad_norm": 288.0, + "learning_rate": 9.982552298737999e-05, + "loss": 12.1882, + "step": 1339 + }, + { + "epoch": 0.05585427868784128, + "grad_norm": 196.0, + "learning_rate": 9.982495912310972e-05, + "loss": 6.8752, + "step": 1340 + }, + { + "epoch": 0.05589596098536952, + "grad_norm": 241.0, + "learning_rate": 9.982439435077515e-05, + "loss": 11.3753, + "step": 1341 + }, + { + "epoch": 0.055937643282897756, + "grad_norm": 358.0, + "learning_rate": 9.982382867038657e-05, + "loss": 14.6255, + "step": 1342 + }, + { + "epoch": 0.055979325580425995, + "grad_norm": 418.0, + "learning_rate": 9.982326208195428e-05, + "loss": 14.9385, + "step": 1343 + }, + { + "epoch": 0.056021007877954235, + "grad_norm": 183.0, + "learning_rate": 9.982269458548863e-05, + "loss": 10.3776, + "step": 1344 + }, + { + "epoch": 0.056062690175482474, + "grad_norm": 528.0, + "learning_rate": 9.982212618099993e-05, + "loss": 16.6259, + "step": 1345 + }, + { + "epoch": 0.056104372473010714, + "grad_norm": 1120.0, + "learning_rate": 9.982155686849858e-05, + "loss": 27.8762, + "step": 1346 + }, + { + "epoch": 0.05614605477053895, + "grad_norm": 234.0, + "learning_rate": 9.982098664799492e-05, + "loss": 12.1881, + "step": 1347 + }, + { + "epoch": 0.05618773706806719, + "grad_norm": 372.0, + "learning_rate": 9.982041551949936e-05, + "loss": 14.2512, + "step": 1348 + }, + { + "epoch": 0.05622941936559543, + "grad_norm": 628.0, + "learning_rate": 9.981984348302231e-05, + "loss": 18.6258, + "step": 1349 + }, + { + "epoch": 0.05627110166312367, + "grad_norm": 394.0, + "learning_rate": 9.981927053857417e-05, + "loss": 13.2505, + "step": 1350 + }, + { + "epoch": 0.05631278396065191, + "grad_norm": 624.0, + "learning_rate": 9.981869668616541e-05, + "loss": 18.7505, + "step": 1351 + }, + { + "epoch": 0.05635446625818015, + "grad_norm": 444.0, + "learning_rate": 9.981812192580649e-05, + "loss": 16.0006, + "step": 1352 + }, + { + "epoch": 0.05639614855570839, + "grad_norm": 1288.0, + "learning_rate": 9.981754625750788e-05, + "loss": 29.0066, + "step": 1353 + }, + { + "epoch": 0.05643783085323663, + "grad_norm": 408.0, + "learning_rate": 9.981696968128005e-05, + "loss": 14.5633, + "step": 1354 + }, + { + "epoch": 0.05647951315076487, + "grad_norm": 692.0, + "learning_rate": 9.981639219713353e-05, + "loss": 23.2515, + "step": 1355 + }, + { + "epoch": 0.05652119544829311, + "grad_norm": 432.0, + "learning_rate": 9.981581380507885e-05, + "loss": 16.3755, + "step": 1356 + }, + { + "epoch": 0.05656287774582135, + "grad_norm": 250.0, + "learning_rate": 9.981523450512653e-05, + "loss": 12.6883, + "step": 1357 + }, + { + "epoch": 0.056604560043349586, + "grad_norm": 462.0, + "learning_rate": 9.981465429728713e-05, + "loss": 12.8755, + "step": 1358 + }, + { + "epoch": 0.05664624234087783, + "grad_norm": 1384.0, + "learning_rate": 9.981407318157125e-05, + "loss": 31.2508, + "step": 1359 + }, + { + "epoch": 0.05668792463840607, + "grad_norm": 398.0, + "learning_rate": 9.981349115798946e-05, + "loss": 14.313, + "step": 1360 + }, + { + "epoch": 0.05672960693593431, + "grad_norm": 298.0, + "learning_rate": 9.981290822655237e-05, + "loss": 10.6882, + "step": 1361 + }, + { + "epoch": 0.05677128923346255, + "grad_norm": 374.0, + "learning_rate": 9.981232438727061e-05, + "loss": 14.0673, + "step": 1362 + }, + { + "epoch": 0.05681297153099079, + "grad_norm": 716.0, + "learning_rate": 9.981173964015481e-05, + "loss": 21.2512, + "step": 1363 + }, + { + "epoch": 0.05685465382851903, + "grad_norm": 1528.0, + "learning_rate": 9.981115398521563e-05, + "loss": 37.0009, + "step": 1364 + }, + { + "epoch": 0.05689633612604727, + "grad_norm": 356.0, + "learning_rate": 9.981056742246375e-05, + "loss": 12.6262, + "step": 1365 + }, + { + "epoch": 0.05693801842357551, + "grad_norm": 422.0, + "learning_rate": 9.980997995190987e-05, + "loss": 15.9392, + "step": 1366 + }, + { + "epoch": 0.05697970072110375, + "grad_norm": 284.0, + "learning_rate": 9.980939157356468e-05, + "loss": 13.0629, + "step": 1367 + }, + { + "epoch": 0.05702138301863199, + "grad_norm": 262.0, + "learning_rate": 9.98088022874389e-05, + "loss": 11.688, + "step": 1368 + }, + { + "epoch": 0.057063065316160226, + "grad_norm": 1024.0, + "learning_rate": 9.980821209354328e-05, + "loss": 25.381, + "step": 1369 + }, + { + "epoch": 0.057104747613688465, + "grad_norm": 664.0, + "learning_rate": 9.980762099188856e-05, + "loss": 19.8758, + "step": 1370 + }, + { + "epoch": 0.057146429911216705, + "grad_norm": 222.0, + "learning_rate": 9.980702898248553e-05, + "loss": 11.1256, + "step": 1371 + }, + { + "epoch": 0.057188112208744944, + "grad_norm": 616.0, + "learning_rate": 9.980643606534499e-05, + "loss": 20.5012, + "step": 1372 + }, + { + "epoch": 0.057229794506273184, + "grad_norm": 386.0, + "learning_rate": 9.98058422404777e-05, + "loss": 14.6899, + "step": 1373 + }, + { + "epoch": 0.05727147680380142, + "grad_norm": 362.0, + "learning_rate": 9.980524750789455e-05, + "loss": 12.3131, + "step": 1374 + }, + { + "epoch": 0.05731315910132966, + "grad_norm": 202.0, + "learning_rate": 9.980465186760633e-05, + "loss": 10.0632, + "step": 1375 + }, + { + "epoch": 0.0573548413988579, + "grad_norm": 205.0, + "learning_rate": 9.980405531962392e-05, + "loss": 10.7513, + "step": 1376 + }, + { + "epoch": 0.05739652369638615, + "grad_norm": 181.0, + "learning_rate": 9.980345786395815e-05, + "loss": 9.7512, + "step": 1377 + }, + { + "epoch": 0.05743820599391439, + "grad_norm": 580.0, + "learning_rate": 9.980285950061996e-05, + "loss": 20.8755, + "step": 1378 + }, + { + "epoch": 0.05747988829144263, + "grad_norm": 174.0, + "learning_rate": 9.980226022962022e-05, + "loss": 9.0007, + "step": 1379 + }, + { + "epoch": 0.057521570588970866, + "grad_norm": 153.0, + "learning_rate": 9.98016600509699e-05, + "loss": 9.0632, + "step": 1380 + }, + { + "epoch": 0.057563252886499106, + "grad_norm": 140.0, + "learning_rate": 9.980105896467987e-05, + "loss": 9.9385, + "step": 1381 + }, + { + "epoch": 0.057604935184027345, + "grad_norm": 568.0, + "learning_rate": 9.980045697076113e-05, + "loss": 16.3799, + "step": 1382 + }, + { + "epoch": 0.057646617481555584, + "grad_norm": 290.0, + "learning_rate": 9.979985406922466e-05, + "loss": 12.4381, + "step": 1383 + }, + { + "epoch": 0.057688299779083824, + "grad_norm": 342.0, + "learning_rate": 9.979925026008139e-05, + "loss": 13.4389, + "step": 1384 + }, + { + "epoch": 0.05772998207661206, + "grad_norm": 282.0, + "learning_rate": 9.97986455433424e-05, + "loss": 11.6254, + "step": 1385 + }, + { + "epoch": 0.0577716643741403, + "grad_norm": 274.0, + "learning_rate": 9.979803991901865e-05, + "loss": 12.2506, + "step": 1386 + }, + { + "epoch": 0.05781334667166854, + "grad_norm": 342.0, + "learning_rate": 9.979743338712121e-05, + "loss": 12.7504, + "step": 1387 + }, + { + "epoch": 0.05785502896919678, + "grad_norm": 234.0, + "learning_rate": 9.979682594766113e-05, + "loss": 11.4398, + "step": 1388 + }, + { + "epoch": 0.05789671126672502, + "grad_norm": 224.0, + "learning_rate": 9.979621760064947e-05, + "loss": 9.8147, + "step": 1389 + }, + { + "epoch": 0.05793839356425326, + "grad_norm": 156.0, + "learning_rate": 9.979560834609732e-05, + "loss": 8.3139, + "step": 1390 + }, + { + "epoch": 0.0579800758617815, + "grad_norm": 92.5, + "learning_rate": 9.97949981840158e-05, + "loss": 6.9071, + "step": 1391 + }, + { + "epoch": 0.05802175815930974, + "grad_norm": 252.0, + "learning_rate": 9.979438711441602e-05, + "loss": 11.8754, + "step": 1392 + }, + { + "epoch": 0.05806344045683798, + "grad_norm": 540.0, + "learning_rate": 9.97937751373091e-05, + "loss": 16.8754, + "step": 1393 + }, + { + "epoch": 0.05810512275436622, + "grad_norm": 432.0, + "learning_rate": 9.979316225270621e-05, + "loss": 14.9388, + "step": 1394 + }, + { + "epoch": 0.058146805051894464, + "grad_norm": 290.0, + "learning_rate": 9.979254846061852e-05, + "loss": 13.0632, + "step": 1395 + }, + { + "epoch": 0.0581884873494227, + "grad_norm": 344.0, + "learning_rate": 9.979193376105723e-05, + "loss": 13.7507, + "step": 1396 + }, + { + "epoch": 0.05823016964695094, + "grad_norm": 352.0, + "learning_rate": 9.97913181540335e-05, + "loss": 14.6261, + "step": 1397 + }, + { + "epoch": 0.05827185194447918, + "grad_norm": 272.0, + "learning_rate": 9.979070163955859e-05, + "loss": 12.0635, + "step": 1398 + }, + { + "epoch": 0.05831353424200742, + "grad_norm": 249.0, + "learning_rate": 9.979008421764371e-05, + "loss": 10.8805, + "step": 1399 + }, + { + "epoch": 0.05835521653953566, + "grad_norm": 358.0, + "learning_rate": 9.978946588830014e-05, + "loss": 14.314, + "step": 1400 + }, + { + "epoch": 0.0583968988370639, + "grad_norm": 167.0, + "learning_rate": 9.978884665153913e-05, + "loss": 10.0004, + "step": 1401 + }, + { + "epoch": 0.05843858113459214, + "grad_norm": 584.0, + "learning_rate": 9.978822650737197e-05, + "loss": 18.3755, + "step": 1402 + }, + { + "epoch": 0.05848026343212038, + "grad_norm": 218.0, + "learning_rate": 9.978760545580996e-05, + "loss": 11.0009, + "step": 1403 + }, + { + "epoch": 0.05852194572964862, + "grad_norm": 486.0, + "learning_rate": 9.978698349686444e-05, + "loss": 16.1288, + "step": 1404 + }, + { + "epoch": 0.05856362802717686, + "grad_norm": 588.0, + "learning_rate": 9.978636063054669e-05, + "loss": 17.2509, + "step": 1405 + }, + { + "epoch": 0.0586053103247051, + "grad_norm": 258.0, + "learning_rate": 9.978573685686813e-05, + "loss": 12.376, + "step": 1406 + }, + { + "epoch": 0.058646992622233336, + "grad_norm": 394.0, + "learning_rate": 9.978511217584008e-05, + "loss": 14.6255, + "step": 1407 + }, + { + "epoch": 0.058688674919761576, + "grad_norm": 219.0, + "learning_rate": 9.978448658747395e-05, + "loss": 10.0006, + "step": 1408 + }, + { + "epoch": 0.058730357217289815, + "grad_norm": 708.0, + "learning_rate": 9.978386009178112e-05, + "loss": 20.5003, + "step": 1409 + }, + { + "epoch": 0.058772039514818054, + "grad_norm": 676.0, + "learning_rate": 9.978323268877304e-05, + "loss": 19.753, + "step": 1410 + }, + { + "epoch": 0.058813721812346294, + "grad_norm": 462.0, + "learning_rate": 9.97826043784611e-05, + "loss": 15.1304, + "step": 1411 + }, + { + "epoch": 0.05885540410987453, + "grad_norm": 132.0, + "learning_rate": 9.97819751608568e-05, + "loss": 10.2525, + "step": 1412 + }, + { + "epoch": 0.05889708640740278, + "grad_norm": 292.0, + "learning_rate": 9.978134503597157e-05, + "loss": 12.3142, + "step": 1413 + }, + { + "epoch": 0.05893876870493102, + "grad_norm": 288.0, + "learning_rate": 9.97807140038169e-05, + "loss": 13.563, + "step": 1414 + }, + { + "epoch": 0.05898045100245926, + "grad_norm": 596.0, + "learning_rate": 9.978008206440431e-05, + "loss": 18.8767, + "step": 1415 + }, + { + "epoch": 0.0590221332999875, + "grad_norm": 298.0, + "learning_rate": 9.97794492177453e-05, + "loss": 13.5005, + "step": 1416 + }, + { + "epoch": 0.05906381559751574, + "grad_norm": 272.0, + "learning_rate": 9.977881546385141e-05, + "loss": 11.6277, + "step": 1417 + }, + { + "epoch": 0.059105497895043976, + "grad_norm": 155.0, + "learning_rate": 9.97781808027342e-05, + "loss": 9.565, + "step": 1418 + }, + { + "epoch": 0.059147180192572216, + "grad_norm": 720.0, + "learning_rate": 9.977754523440521e-05, + "loss": 20.5043, + "step": 1419 + }, + { + "epoch": 0.059188862490100455, + "grad_norm": 226.0, + "learning_rate": 9.977690875887604e-05, + "loss": 11.6274, + "step": 1420 + }, + { + "epoch": 0.059230544787628694, + "grad_norm": 154.0, + "learning_rate": 9.977627137615831e-05, + "loss": 7.751, + "step": 1421 + }, + { + "epoch": 0.059272227085156934, + "grad_norm": 624.0, + "learning_rate": 9.977563308626359e-05, + "loss": 16.751, + "step": 1422 + }, + { + "epoch": 0.05931390938268517, + "grad_norm": 229.0, + "learning_rate": 9.977499388920355e-05, + "loss": 10.3139, + "step": 1423 + }, + { + "epoch": 0.05935559168021341, + "grad_norm": 612.0, + "learning_rate": 9.977435378498983e-05, + "loss": 20.2508, + "step": 1424 + }, + { + "epoch": 0.05939727397774165, + "grad_norm": 470.0, + "learning_rate": 9.977371277363408e-05, + "loss": 16.7513, + "step": 1425 + }, + { + "epoch": 0.05943895627526989, + "grad_norm": 306.0, + "learning_rate": 9.977307085514802e-05, + "loss": 13.6894, + "step": 1426 + }, + { + "epoch": 0.05948063857279813, + "grad_norm": 376.0, + "learning_rate": 9.977242802954329e-05, + "loss": 14.4389, + "step": 1427 + }, + { + "epoch": 0.05952232087032637, + "grad_norm": 334.0, + "learning_rate": 9.977178429683167e-05, + "loss": 13.1902, + "step": 1428 + }, + { + "epoch": 0.05956400316785461, + "grad_norm": 362.0, + "learning_rate": 9.977113965702485e-05, + "loss": 14.4385, + "step": 1429 + }, + { + "epoch": 0.05960568546538285, + "grad_norm": 426.0, + "learning_rate": 9.97704941101346e-05, + "loss": 16.0014, + "step": 1430 + }, + { + "epoch": 0.05964736776291109, + "grad_norm": 864.0, + "learning_rate": 9.976984765617268e-05, + "loss": 24.1261, + "step": 1431 + }, + { + "epoch": 0.059689050060439335, + "grad_norm": 342.0, + "learning_rate": 9.976920029515087e-05, + "loss": 12.1881, + "step": 1432 + }, + { + "epoch": 0.059730732357967574, + "grad_norm": 422.0, + "learning_rate": 9.976855202708096e-05, + "loss": 13.7511, + "step": 1433 + }, + { + "epoch": 0.05977241465549581, + "grad_norm": 564.0, + "learning_rate": 9.976790285197476e-05, + "loss": 18.001, + "step": 1434 + }, + { + "epoch": 0.05981409695302405, + "grad_norm": 440.0, + "learning_rate": 9.976725276984413e-05, + "loss": 16.6264, + "step": 1435 + }, + { + "epoch": 0.05985577925055229, + "grad_norm": 224.0, + "learning_rate": 9.976660178070088e-05, + "loss": 11.3136, + "step": 1436 + }, + { + "epoch": 0.05989746154808053, + "grad_norm": 382.0, + "learning_rate": 9.976594988455691e-05, + "loss": 14.0631, + "step": 1437 + }, + { + "epoch": 0.05993914384560877, + "grad_norm": 438.0, + "learning_rate": 9.976529708142408e-05, + "loss": 15.1256, + "step": 1438 + }, + { + "epoch": 0.05998082614313701, + "grad_norm": 178.0, + "learning_rate": 9.976464337131429e-05, + "loss": 9.1257, + "step": 1439 + }, + { + "epoch": 0.06002250844066525, + "grad_norm": 344.0, + "learning_rate": 9.976398875423947e-05, + "loss": 11.8756, + "step": 1440 + }, + { + "epoch": 0.06006419073819349, + "grad_norm": 576.0, + "learning_rate": 9.976333323021152e-05, + "loss": 18.8762, + "step": 1441 + }, + { + "epoch": 0.06010587303572173, + "grad_norm": 91.0, + "learning_rate": 9.976267679924242e-05, + "loss": 8.251, + "step": 1442 + }, + { + "epoch": 0.06014755533324997, + "grad_norm": 488.0, + "learning_rate": 9.976201946134411e-05, + "loss": 16.8755, + "step": 1443 + }, + { + "epoch": 0.06018923763077821, + "grad_norm": 464.0, + "learning_rate": 9.976136121652857e-05, + "loss": 15.6877, + "step": 1444 + }, + { + "epoch": 0.060230919928306446, + "grad_norm": 252.0, + "learning_rate": 9.976070206480783e-05, + "loss": 11.6878, + "step": 1445 + }, + { + "epoch": 0.060272602225834686, + "grad_norm": 237.0, + "learning_rate": 9.976004200619385e-05, + "loss": 11.1882, + "step": 1446 + }, + { + "epoch": 0.060314284523362925, + "grad_norm": 388.0, + "learning_rate": 9.97593810406987e-05, + "loss": 14.3755, + "step": 1447 + }, + { + "epoch": 0.060355966820891165, + "grad_norm": 496.0, + "learning_rate": 9.975871916833441e-05, + "loss": 16.3756, + "step": 1448 + }, + { + "epoch": 0.060397649118419404, + "grad_norm": 452.0, + "learning_rate": 9.975805638911304e-05, + "loss": 14.1254, + "step": 1449 + }, + { + "epoch": 0.06043933141594765, + "grad_norm": 376.0, + "learning_rate": 9.975739270304669e-05, + "loss": 14.5013, + "step": 1450 + }, + { + "epoch": 0.06048101371347589, + "grad_norm": 302.0, + "learning_rate": 9.975672811014742e-05, + "loss": 12.8128, + "step": 1451 + }, + { + "epoch": 0.06052269601100413, + "grad_norm": 298.0, + "learning_rate": 9.975606261042738e-05, + "loss": 11.0629, + "step": 1452 + }, + { + "epoch": 0.06056437830853237, + "grad_norm": 466.0, + "learning_rate": 9.975539620389869e-05, + "loss": 15.6929, + "step": 1453 + }, + { + "epoch": 0.06060606060606061, + "grad_norm": 744.0, + "learning_rate": 9.975472889057346e-05, + "loss": 21.2514, + "step": 1454 + }, + { + "epoch": 0.06064774290358885, + "grad_norm": 580.0, + "learning_rate": 9.97540606704639e-05, + "loss": 17.3775, + "step": 1455 + }, + { + "epoch": 0.060689425201117086, + "grad_norm": 254.0, + "learning_rate": 9.975339154358216e-05, + "loss": 12.3759, + "step": 1456 + }, + { + "epoch": 0.060731107498645326, + "grad_norm": 428.0, + "learning_rate": 9.975272150994045e-05, + "loss": 15.0669, + "step": 1457 + }, + { + "epoch": 0.060772789796173565, + "grad_norm": 378.0, + "learning_rate": 9.975205056955096e-05, + "loss": 14.7504, + "step": 1458 + }, + { + "epoch": 0.060814472093701805, + "grad_norm": 239.0, + "learning_rate": 9.975137872242595e-05, + "loss": 12.1882, + "step": 1459 + }, + { + "epoch": 0.060856154391230044, + "grad_norm": 354.0, + "learning_rate": 9.975070596857764e-05, + "loss": 14.7503, + "step": 1460 + }, + { + "epoch": 0.06089783668875828, + "grad_norm": 876.0, + "learning_rate": 9.975003230801829e-05, + "loss": 22.6258, + "step": 1461 + }, + { + "epoch": 0.06093951898628652, + "grad_norm": 484.0, + "learning_rate": 9.974935774076019e-05, + "loss": 15.8127, + "step": 1462 + }, + { + "epoch": 0.06098120128381476, + "grad_norm": 241.0, + "learning_rate": 9.974868226681562e-05, + "loss": 11.5675, + "step": 1463 + }, + { + "epoch": 0.061022883581343, + "grad_norm": 596.0, + "learning_rate": 9.97480058861969e-05, + "loss": 17.0006, + "step": 1464 + }, + { + "epoch": 0.06106456587887124, + "grad_norm": 402.0, + "learning_rate": 9.974732859891637e-05, + "loss": 14.188, + "step": 1465 + }, + { + "epoch": 0.06110624817639948, + "grad_norm": 520.0, + "learning_rate": 9.974665040498636e-05, + "loss": 16.6257, + "step": 1466 + }, + { + "epoch": 0.06114793047392772, + "grad_norm": 316.0, + "learning_rate": 9.974597130441921e-05, + "loss": 12.8756, + "step": 1467 + }, + { + "epoch": 0.061189612771455966, + "grad_norm": 244.0, + "learning_rate": 9.974529129722733e-05, + "loss": 11.5004, + "step": 1468 + }, + { + "epoch": 0.061231295068984205, + "grad_norm": 344.0, + "learning_rate": 9.974461038342311e-05, + "loss": 12.8128, + "step": 1469 + }, + { + "epoch": 0.061272977366512445, + "grad_norm": 1040.0, + "learning_rate": 9.974392856301893e-05, + "loss": 33.2519, + "step": 1470 + }, + { + "epoch": 0.061314659664040684, + "grad_norm": 560.0, + "learning_rate": 9.974324583602726e-05, + "loss": 16.8756, + "step": 1471 + }, + { + "epoch": 0.06135634196156892, + "grad_norm": 388.0, + "learning_rate": 9.97425622024605e-05, + "loss": 14.3755, + "step": 1472 + }, + { + "epoch": 0.06139802425909716, + "grad_norm": 412.0, + "learning_rate": 9.974187766233112e-05, + "loss": 14.3756, + "step": 1473 + }, + { + "epoch": 0.0614397065566254, + "grad_norm": 368.0, + "learning_rate": 9.974119221565162e-05, + "loss": 14.5007, + "step": 1474 + }, + { + "epoch": 0.06148138885415364, + "grad_norm": 162.0, + "learning_rate": 9.974050586243448e-05, + "loss": 9.4381, + "step": 1475 + }, + { + "epoch": 0.06152307115168188, + "grad_norm": 104.5, + "learning_rate": 9.97398186026922e-05, + "loss": 8.9378, + "step": 1476 + }, + { + "epoch": 0.06156475344921012, + "grad_norm": 486.0, + "learning_rate": 9.97391304364373e-05, + "loss": 16.5004, + "step": 1477 + }, + { + "epoch": 0.06160643574673836, + "grad_norm": 129.0, + "learning_rate": 9.973844136368234e-05, + "loss": 8.9384, + "step": 1478 + }, + { + "epoch": 0.0616481180442666, + "grad_norm": 76.0, + "learning_rate": 9.973775138443987e-05, + "loss": 7.3758, + "step": 1479 + }, + { + "epoch": 0.06168980034179484, + "grad_norm": 191.0, + "learning_rate": 9.973706049872247e-05, + "loss": 9.0627, + "step": 1480 + }, + { + "epoch": 0.06173148263932308, + "grad_norm": 548.0, + "learning_rate": 9.973636870654272e-05, + "loss": 18.5009, + "step": 1481 + }, + { + "epoch": 0.06177316493685132, + "grad_norm": 512.0, + "learning_rate": 9.973567600791324e-05, + "loss": 17.3755, + "step": 1482 + }, + { + "epoch": 0.06181484723437956, + "grad_norm": 362.0, + "learning_rate": 9.973498240284664e-05, + "loss": 14.1879, + "step": 1483 + }, + { + "epoch": 0.061856529531907796, + "grad_norm": 252.0, + "learning_rate": 9.973428789135559e-05, + "loss": 10.5009, + "step": 1484 + }, + { + "epoch": 0.061898211829436035, + "grad_norm": 244.0, + "learning_rate": 9.973359247345272e-05, + "loss": 11.5636, + "step": 1485 + }, + { + "epoch": 0.06193989412696428, + "grad_norm": 596.0, + "learning_rate": 9.973289614915071e-05, + "loss": 15.8757, + "step": 1486 + }, + { + "epoch": 0.06198157642449252, + "grad_norm": 508.0, + "learning_rate": 9.973219891846225e-05, + "loss": 16.5009, + "step": 1487 + }, + { + "epoch": 0.06202325872202076, + "grad_norm": 436.0, + "learning_rate": 9.973150078140006e-05, + "loss": 12.5664, + "step": 1488 + }, + { + "epoch": 0.062064941019549, + "grad_norm": 382.0, + "learning_rate": 9.973080173797684e-05, + "loss": 15.4381, + "step": 1489 + }, + { + "epoch": 0.06210662331707724, + "grad_norm": 304.0, + "learning_rate": 9.973010178820534e-05, + "loss": 13.2509, + "step": 1490 + }, + { + "epoch": 0.06214830561460548, + "grad_norm": 1020.0, + "learning_rate": 9.972940093209833e-05, + "loss": 24.7517, + "step": 1491 + }, + { + "epoch": 0.06218998791213372, + "grad_norm": 356.0, + "learning_rate": 9.972869916966858e-05, + "loss": 14.5631, + "step": 1492 + }, + { + "epoch": 0.06223167020966196, + "grad_norm": 1056.0, + "learning_rate": 9.972799650092887e-05, + "loss": 27.1265, + "step": 1493 + }, + { + "epoch": 0.0622733525071902, + "grad_norm": 580.0, + "learning_rate": 9.9727292925892e-05, + "loss": 18.5005, + "step": 1494 + }, + { + "epoch": 0.062315034804718436, + "grad_norm": 498.0, + "learning_rate": 9.972658844457081e-05, + "loss": 14.692, + "step": 1495 + }, + { + "epoch": 0.062356717102246675, + "grad_norm": 422.0, + "learning_rate": 9.972588305697812e-05, + "loss": 17.0014, + "step": 1496 + }, + { + "epoch": 0.062398399399774915, + "grad_norm": 186.0, + "learning_rate": 9.97251767631268e-05, + "loss": 10.8755, + "step": 1497 + }, + { + "epoch": 0.062440081697303154, + "grad_norm": 322.0, + "learning_rate": 9.972446956302974e-05, + "loss": 13.6254, + "step": 1498 + }, + { + "epoch": 0.062481763994831394, + "grad_norm": 348.0, + "learning_rate": 9.97237614566998e-05, + "loss": 12.314, + "step": 1499 + }, + { + "epoch": 0.06252344629235963, + "grad_norm": 374.0, + "learning_rate": 9.972305244414987e-05, + "loss": 14.0008, + "step": 1500 + }, + { + "epoch": 0.06256512858988787, + "grad_norm": 187.0, + "learning_rate": 9.972234252539291e-05, + "loss": 9.4381, + "step": 1501 + }, + { + "epoch": 0.06260681088741611, + "grad_norm": 1096.0, + "learning_rate": 9.972163170044185e-05, + "loss": 25.0055, + "step": 1502 + }, + { + "epoch": 0.06264849318494435, + "grad_norm": 346.0, + "learning_rate": 9.972091996930964e-05, + "loss": 13.626, + "step": 1503 + }, + { + "epoch": 0.06269017548247259, + "grad_norm": 266.0, + "learning_rate": 9.972020733200924e-05, + "loss": 12.6255, + "step": 1504 + }, + { + "epoch": 0.06273185778000083, + "grad_norm": 336.0, + "learning_rate": 9.971949378855365e-05, + "loss": 13.1878, + "step": 1505 + }, + { + "epoch": 0.06277354007752907, + "grad_norm": 724.0, + "learning_rate": 9.971877933895587e-05, + "loss": 19.2505, + "step": 1506 + }, + { + "epoch": 0.06281522237505731, + "grad_norm": 162.0, + "learning_rate": 9.971806398322892e-05, + "loss": 9.0632, + "step": 1507 + }, + { + "epoch": 0.06285690467258555, + "grad_norm": 201.0, + "learning_rate": 9.971734772138586e-05, + "loss": 10.6889, + "step": 1508 + }, + { + "epoch": 0.06289858697011379, + "grad_norm": 354.0, + "learning_rate": 9.97166305534397e-05, + "loss": 14.7507, + "step": 1509 + }, + { + "epoch": 0.06294026926764203, + "grad_norm": 202.0, + "learning_rate": 9.971591247940355e-05, + "loss": 10.0009, + "step": 1510 + }, + { + "epoch": 0.06298195156517027, + "grad_norm": 868.0, + "learning_rate": 9.971519349929047e-05, + "loss": 25.0061, + "step": 1511 + }, + { + "epoch": 0.0630236338626985, + "grad_norm": 274.0, + "learning_rate": 9.971447361311359e-05, + "loss": 11.1257, + "step": 1512 + }, + { + "epoch": 0.06306531616022674, + "grad_norm": 398.0, + "learning_rate": 9.971375282088599e-05, + "loss": 16.0005, + "step": 1513 + }, + { + "epoch": 0.063106998457755, + "grad_norm": 444.0, + "learning_rate": 9.971303112262086e-05, + "loss": 15.5009, + "step": 1514 + }, + { + "epoch": 0.06314868075528324, + "grad_norm": 320.0, + "learning_rate": 9.971230851833131e-05, + "loss": 11.3757, + "step": 1515 + }, + { + "epoch": 0.06319036305281148, + "grad_norm": 350.0, + "learning_rate": 9.971158500803052e-05, + "loss": 10.3756, + "step": 1516 + }, + { + "epoch": 0.06323204535033972, + "grad_norm": 378.0, + "learning_rate": 9.971086059173169e-05, + "loss": 14.7504, + "step": 1517 + }, + { + "epoch": 0.06327372764786796, + "grad_norm": 760.0, + "learning_rate": 9.971013526944802e-05, + "loss": 20.7504, + "step": 1518 + }, + { + "epoch": 0.0633154099453962, + "grad_norm": 197.0, + "learning_rate": 9.97094090411927e-05, + "loss": 9.6261, + "step": 1519 + }, + { + "epoch": 0.06335709224292443, + "grad_norm": 238.0, + "learning_rate": 9.970868190697899e-05, + "loss": 11.0631, + "step": 1520 + }, + { + "epoch": 0.06339877454045267, + "grad_norm": 276.0, + "learning_rate": 9.970795386682017e-05, + "loss": 12.126, + "step": 1521 + }, + { + "epoch": 0.06344045683798091, + "grad_norm": 362.0, + "learning_rate": 9.970722492072945e-05, + "loss": 14.188, + "step": 1522 + }, + { + "epoch": 0.06348213913550915, + "grad_norm": 183.0, + "learning_rate": 9.970649506872015e-05, + "loss": 9.9394, + "step": 1523 + }, + { + "epoch": 0.06352382143303739, + "grad_norm": 105.5, + "learning_rate": 9.970576431080556e-05, + "loss": 7.8439, + "step": 1524 + }, + { + "epoch": 0.06356550373056563, + "grad_norm": 262.0, + "learning_rate": 9.9705032646999e-05, + "loss": 9.6273, + "step": 1525 + }, + { + "epoch": 0.06360718602809387, + "grad_norm": 516.0, + "learning_rate": 9.970430007731382e-05, + "loss": 18.0004, + "step": 1526 + }, + { + "epoch": 0.06364886832562211, + "grad_norm": 848.0, + "learning_rate": 9.970356660176337e-05, + "loss": 22.7504, + "step": 1527 + }, + { + "epoch": 0.06369055062315035, + "grad_norm": 448.0, + "learning_rate": 9.970283222036099e-05, + "loss": 14.7505, + "step": 1528 + }, + { + "epoch": 0.06373223292067859, + "grad_norm": 278.0, + "learning_rate": 9.970209693312007e-05, + "loss": 11.4392, + "step": 1529 + }, + { + "epoch": 0.06377391521820683, + "grad_norm": 280.0, + "learning_rate": 9.970136074005403e-05, + "loss": 11.6261, + "step": 1530 + }, + { + "epoch": 0.06381559751573507, + "grad_norm": 1328.0, + "learning_rate": 9.970062364117628e-05, + "loss": 31.0055, + "step": 1531 + }, + { + "epoch": 0.0638572798132633, + "grad_norm": 382.0, + "learning_rate": 9.969988563650026e-05, + "loss": 16.0036, + "step": 1532 + }, + { + "epoch": 0.06389896211079155, + "grad_norm": 322.0, + "learning_rate": 9.96991467260394e-05, + "loss": 13.5006, + "step": 1533 + }, + { + "epoch": 0.06394064440831979, + "grad_norm": 556.0, + "learning_rate": 9.969840690980718e-05, + "loss": 17.126, + "step": 1534 + }, + { + "epoch": 0.06398232670584802, + "grad_norm": 191.0, + "learning_rate": 9.969766618781709e-05, + "loss": 10.9389, + "step": 1535 + }, + { + "epoch": 0.06402400900337626, + "grad_norm": 360.0, + "learning_rate": 9.969692456008262e-05, + "loss": 13.4379, + "step": 1536 + }, + { + "epoch": 0.0640656913009045, + "grad_norm": 201.0, + "learning_rate": 9.969618202661728e-05, + "loss": 10.5005, + "step": 1537 + }, + { + "epoch": 0.06410737359843274, + "grad_norm": 384.0, + "learning_rate": 9.969543858743461e-05, + "loss": 15.1257, + "step": 1538 + }, + { + "epoch": 0.06414905589596098, + "grad_norm": 512.0, + "learning_rate": 9.969469424254819e-05, + "loss": 17.0003, + "step": 1539 + }, + { + "epoch": 0.06419073819348922, + "grad_norm": 233.0, + "learning_rate": 9.969394899197152e-05, + "loss": 10.7532, + "step": 1540 + }, + { + "epoch": 0.06423242049101746, + "grad_norm": 230.0, + "learning_rate": 9.969320283571824e-05, + "loss": 10.0629, + "step": 1541 + }, + { + "epoch": 0.0642741027885457, + "grad_norm": 408.0, + "learning_rate": 9.969245577380191e-05, + "loss": 13.7505, + "step": 1542 + }, + { + "epoch": 0.06431578508607394, + "grad_norm": 253.0, + "learning_rate": 9.969170780623617e-05, + "loss": 11.688, + "step": 1543 + }, + { + "epoch": 0.06435746738360218, + "grad_norm": 420.0, + "learning_rate": 9.969095893303464e-05, + "loss": 15.1889, + "step": 1544 + }, + { + "epoch": 0.06439914968113042, + "grad_norm": 1008.0, + "learning_rate": 9.969020915421098e-05, + "loss": 25.1308, + "step": 1545 + }, + { + "epoch": 0.06444083197865866, + "grad_norm": 173.0, + "learning_rate": 9.968945846977884e-05, + "loss": 9.7509, + "step": 1546 + }, + { + "epoch": 0.0644825142761869, + "grad_norm": 231.0, + "learning_rate": 9.968870687975192e-05, + "loss": 11.3136, + "step": 1547 + }, + { + "epoch": 0.06452419657371514, + "grad_norm": 342.0, + "learning_rate": 9.96879543841439e-05, + "loss": 13.9379, + "step": 1548 + }, + { + "epoch": 0.06456587887124338, + "grad_norm": 254.0, + "learning_rate": 9.968720098296849e-05, + "loss": 11.5628, + "step": 1549 + }, + { + "epoch": 0.06460756116877163, + "grad_norm": 416.0, + "learning_rate": 9.968644667623943e-05, + "loss": 15.3754, + "step": 1550 + }, + { + "epoch": 0.06464924346629987, + "grad_norm": 568.0, + "learning_rate": 9.968569146397049e-05, + "loss": 17.7504, + "step": 1551 + }, + { + "epoch": 0.06469092576382811, + "grad_norm": 584.0, + "learning_rate": 9.968493534617541e-05, + "loss": 18.876, + "step": 1552 + }, + { + "epoch": 0.06473260806135635, + "grad_norm": 300.0, + "learning_rate": 9.968417832286795e-05, + "loss": 11.5637, + "step": 1553 + }, + { + "epoch": 0.06477429035888459, + "grad_norm": 1136.0, + "learning_rate": 9.968342039406194e-05, + "loss": 24.5064, + "step": 1554 + }, + { + "epoch": 0.06481597265641283, + "grad_norm": 648.0, + "learning_rate": 9.968266155977118e-05, + "loss": 20.5006, + "step": 1555 + }, + { + "epoch": 0.06485765495394107, + "grad_norm": 159.0, + "learning_rate": 9.968190182000952e-05, + "loss": 9.8145, + "step": 1556 + }, + { + "epoch": 0.0648993372514693, + "grad_norm": 512.0, + "learning_rate": 9.968114117479077e-05, + "loss": 16.5004, + "step": 1557 + }, + { + "epoch": 0.06494101954899754, + "grad_norm": 416.0, + "learning_rate": 9.968037962412881e-05, + "loss": 15.5627, + "step": 1558 + }, + { + "epoch": 0.06498270184652578, + "grad_norm": 720.0, + "learning_rate": 9.967961716803755e-05, + "loss": 23.0007, + "step": 1559 + }, + { + "epoch": 0.06502438414405402, + "grad_norm": 712.0, + "learning_rate": 9.967885380653082e-05, + "loss": 19.5065, + "step": 1560 + }, + { + "epoch": 0.06506606644158226, + "grad_norm": 1144.0, + "learning_rate": 9.967808953962259e-05, + "loss": 26.6256, + "step": 1561 + }, + { + "epoch": 0.0651077487391105, + "grad_norm": 452.0, + "learning_rate": 9.967732436732677e-05, + "loss": 16.8752, + "step": 1562 + }, + { + "epoch": 0.06514943103663874, + "grad_norm": 384.0, + "learning_rate": 9.96765582896573e-05, + "loss": 12.2507, + "step": 1563 + }, + { + "epoch": 0.06519111333416698, + "grad_norm": 272.0, + "learning_rate": 9.967579130662814e-05, + "loss": 12.1261, + "step": 1564 + }, + { + "epoch": 0.06523279563169522, + "grad_norm": 494.0, + "learning_rate": 9.967502341825328e-05, + "loss": 17.5004, + "step": 1565 + }, + { + "epoch": 0.06527447792922346, + "grad_norm": 140.0, + "learning_rate": 9.967425462454669e-05, + "loss": 10.2512, + "step": 1566 + }, + { + "epoch": 0.0653161602267517, + "grad_norm": 412.0, + "learning_rate": 9.967348492552242e-05, + "loss": 13.5049, + "step": 1567 + }, + { + "epoch": 0.06535784252427994, + "grad_norm": 396.0, + "learning_rate": 9.967271432119447e-05, + "loss": 15.3134, + "step": 1568 + }, + { + "epoch": 0.06539952482180818, + "grad_norm": 130.0, + "learning_rate": 9.96719428115769e-05, + "loss": 9.1256, + "step": 1569 + }, + { + "epoch": 0.06544120711933642, + "grad_norm": 372.0, + "learning_rate": 9.967117039668376e-05, + "loss": 15.063, + "step": 1570 + }, + { + "epoch": 0.06548288941686466, + "grad_norm": 258.0, + "learning_rate": 9.967039707652911e-05, + "loss": 10.0032, + "step": 1571 + }, + { + "epoch": 0.0655245717143929, + "grad_norm": 154.0, + "learning_rate": 9.966962285112709e-05, + "loss": 8.9394, + "step": 1572 + }, + { + "epoch": 0.06556625401192114, + "grad_norm": 222.0, + "learning_rate": 9.966884772049178e-05, + "loss": 11.5012, + "step": 1573 + }, + { + "epoch": 0.06560793630944937, + "grad_norm": 428.0, + "learning_rate": 9.966807168463729e-05, + "loss": 14.6881, + "step": 1574 + }, + { + "epoch": 0.06564961860697761, + "grad_norm": 510.0, + "learning_rate": 9.96672947435778e-05, + "loss": 16.3755, + "step": 1575 + }, + { + "epoch": 0.06569130090450585, + "grad_norm": 1728.0, + "learning_rate": 9.966651689732746e-05, + "loss": 35.7554, + "step": 1576 + }, + { + "epoch": 0.06573298320203409, + "grad_norm": 1064.0, + "learning_rate": 9.966573814590043e-05, + "loss": 23.7557, + "step": 1577 + }, + { + "epoch": 0.06577466549956233, + "grad_norm": 380.0, + "learning_rate": 9.966495848931092e-05, + "loss": 13.7512, + "step": 1578 + }, + { + "epoch": 0.06581634779709057, + "grad_norm": 310.0, + "learning_rate": 9.966417792757315e-05, + "loss": 13.3772, + "step": 1579 + }, + { + "epoch": 0.06585803009461881, + "grad_norm": 364.0, + "learning_rate": 9.96633964607013e-05, + "loss": 15.0635, + "step": 1580 + }, + { + "epoch": 0.06589971239214705, + "grad_norm": 1168.0, + "learning_rate": 9.966261408870965e-05, + "loss": 26.5017, + "step": 1581 + }, + { + "epoch": 0.06594139468967529, + "grad_norm": 278.0, + "learning_rate": 9.966183081161244e-05, + "loss": 12.0635, + "step": 1582 + }, + { + "epoch": 0.06598307698720353, + "grad_norm": 81.5, + "learning_rate": 9.966104662942398e-05, + "loss": 8.3758, + "step": 1583 + }, + { + "epoch": 0.06602475928473177, + "grad_norm": 133.0, + "learning_rate": 9.966026154215851e-05, + "loss": 8.3757, + "step": 1584 + }, + { + "epoch": 0.06606644158226001, + "grad_norm": 378.0, + "learning_rate": 9.965947554983038e-05, + "loss": 15.6881, + "step": 1585 + }, + { + "epoch": 0.06610812387978825, + "grad_norm": 468.0, + "learning_rate": 9.96586886524539e-05, + "loss": 15.4385, + "step": 1586 + }, + { + "epoch": 0.0661498061773165, + "grad_norm": 141.0, + "learning_rate": 9.96579008500434e-05, + "loss": 9.2503, + "step": 1587 + }, + { + "epoch": 0.06619148847484474, + "grad_norm": 370.0, + "learning_rate": 9.965711214261327e-05, + "loss": 15.7513, + "step": 1588 + }, + { + "epoch": 0.06623317077237298, + "grad_norm": 620.0, + "learning_rate": 9.965632253017784e-05, + "loss": 19.8755, + "step": 1589 + }, + { + "epoch": 0.06627485306990122, + "grad_norm": 540.0, + "learning_rate": 9.965553201275153e-05, + "loss": 16.7516, + "step": 1590 + }, + { + "epoch": 0.06631653536742946, + "grad_norm": 912.0, + "learning_rate": 9.965474059034874e-05, + "loss": 23.3762, + "step": 1591 + }, + { + "epoch": 0.0663582176649577, + "grad_norm": 500.0, + "learning_rate": 9.965394826298391e-05, + "loss": 17.6254, + "step": 1592 + }, + { + "epoch": 0.06639989996248594, + "grad_norm": 912.0, + "learning_rate": 9.965315503067145e-05, + "loss": 23.8754, + "step": 1593 + }, + { + "epoch": 0.06644158226001418, + "grad_norm": 320.0, + "learning_rate": 9.965236089342582e-05, + "loss": 12.0006, + "step": 1594 + }, + { + "epoch": 0.06648326455754242, + "grad_norm": 167.0, + "learning_rate": 9.965156585126153e-05, + "loss": 10.0635, + "step": 1595 + }, + { + "epoch": 0.06652494685507065, + "grad_norm": 688.0, + "learning_rate": 9.965076990419305e-05, + "loss": 18.1265, + "step": 1596 + }, + { + "epoch": 0.0665666291525989, + "grad_norm": 221.0, + "learning_rate": 9.964997305223485e-05, + "loss": 10.8764, + "step": 1597 + }, + { + "epoch": 0.06660831145012713, + "grad_norm": 444.0, + "learning_rate": 9.964917529540149e-05, + "loss": 16.0013, + "step": 1598 + }, + { + "epoch": 0.06664999374765537, + "grad_norm": 296.0, + "learning_rate": 9.964837663370752e-05, + "loss": 13.3129, + "step": 1599 + }, + { + "epoch": 0.06669167604518361, + "grad_norm": 284.0, + "learning_rate": 9.964757706716748e-05, + "loss": 12.6255, + "step": 1600 + }, + { + "epoch": 0.06673335834271185, + "grad_norm": 278.0, + "learning_rate": 9.964677659579592e-05, + "loss": 12.8756, + "step": 1601 + }, + { + "epoch": 0.06677504064024009, + "grad_norm": 129.0, + "learning_rate": 9.964597521960746e-05, + "loss": 7.5956, + "step": 1602 + }, + { + "epoch": 0.06681672293776833, + "grad_norm": 468.0, + "learning_rate": 9.964517293861669e-05, + "loss": 15.7503, + "step": 1603 + }, + { + "epoch": 0.06685840523529657, + "grad_norm": 330.0, + "learning_rate": 9.964436975283823e-05, + "loss": 12.938, + "step": 1604 + }, + { + "epoch": 0.06690008753282481, + "grad_norm": 237.0, + "learning_rate": 9.964356566228674e-05, + "loss": 11.188, + "step": 1605 + }, + { + "epoch": 0.06694176983035305, + "grad_norm": 584.0, + "learning_rate": 9.964276066697687e-05, + "loss": 18.5005, + "step": 1606 + }, + { + "epoch": 0.06698345212788129, + "grad_norm": 294.0, + "learning_rate": 9.964195476692327e-05, + "loss": 14.0012, + "step": 1607 + }, + { + "epoch": 0.06702513442540953, + "grad_norm": 394.0, + "learning_rate": 9.964114796214062e-05, + "loss": 15.2507, + "step": 1608 + }, + { + "epoch": 0.06706681672293777, + "grad_norm": 356.0, + "learning_rate": 9.964034025264365e-05, + "loss": 12.6256, + "step": 1609 + }, + { + "epoch": 0.067108499020466, + "grad_norm": 206.0, + "learning_rate": 9.963953163844708e-05, + "loss": 11.5008, + "step": 1610 + }, + { + "epoch": 0.06715018131799425, + "grad_norm": 160.0, + "learning_rate": 9.963872211956562e-05, + "loss": 9.8757, + "step": 1611 + }, + { + "epoch": 0.06719186361552248, + "grad_norm": 648.0, + "learning_rate": 9.963791169601406e-05, + "loss": 20.3759, + "step": 1612 + }, + { + "epoch": 0.06723354591305072, + "grad_norm": 205.0, + "learning_rate": 9.963710036780716e-05, + "loss": 10.2506, + "step": 1613 + }, + { + "epoch": 0.06727522821057896, + "grad_norm": 304.0, + "learning_rate": 9.963628813495969e-05, + "loss": 13.1256, + "step": 1614 + }, + { + "epoch": 0.0673169105081072, + "grad_norm": 632.0, + "learning_rate": 9.963547499748646e-05, + "loss": 14.8801, + "step": 1615 + }, + { + "epoch": 0.06735859280563544, + "grad_norm": 95.0, + "learning_rate": 9.963466095540228e-05, + "loss": 6.4381, + "step": 1616 + }, + { + "epoch": 0.06740027510316368, + "grad_norm": 111.5, + "learning_rate": 9.963384600872202e-05, + "loss": 9.3138, + "step": 1617 + }, + { + "epoch": 0.06744195740069192, + "grad_norm": 396.0, + "learning_rate": 9.96330301574605e-05, + "loss": 13.6889, + "step": 1618 + }, + { + "epoch": 0.06748363969822016, + "grad_norm": 173.0, + "learning_rate": 9.96322134016326e-05, + "loss": 8.6882, + "step": 1619 + }, + { + "epoch": 0.0675253219957484, + "grad_norm": 410.0, + "learning_rate": 9.963139574125321e-05, + "loss": 15.0004, + "step": 1620 + }, + { + "epoch": 0.06756700429327664, + "grad_norm": 378.0, + "learning_rate": 9.963057717633721e-05, + "loss": 14.8127, + "step": 1621 + }, + { + "epoch": 0.06760868659080488, + "grad_norm": 348.0, + "learning_rate": 9.962975770689955e-05, + "loss": 12.5629, + "step": 1622 + }, + { + "epoch": 0.06765036888833313, + "grad_norm": 290.0, + "learning_rate": 9.962893733295515e-05, + "loss": 13.1266, + "step": 1623 + }, + { + "epoch": 0.06769205118586137, + "grad_norm": 217.0, + "learning_rate": 9.962811605451896e-05, + "loss": 10.8129, + "step": 1624 + }, + { + "epoch": 0.06773373348338961, + "grad_norm": 262.0, + "learning_rate": 9.962729387160595e-05, + "loss": 12.5004, + "step": 1625 + }, + { + "epoch": 0.06777541578091785, + "grad_norm": 620.0, + "learning_rate": 9.962647078423111e-05, + "loss": 19.7505, + "step": 1626 + }, + { + "epoch": 0.06781709807844609, + "grad_norm": 113.0, + "learning_rate": 9.962564679240942e-05, + "loss": 8.8767, + "step": 1627 + }, + { + "epoch": 0.06785878037597433, + "grad_norm": 195.0, + "learning_rate": 9.962482189615592e-05, + "loss": 10.627, + "step": 1628 + }, + { + "epoch": 0.06790046267350257, + "grad_norm": 390.0, + "learning_rate": 9.962399609548563e-05, + "loss": 14.2504, + "step": 1629 + }, + { + "epoch": 0.06794214497103081, + "grad_norm": 260.0, + "learning_rate": 9.96231693904136e-05, + "loss": 12.0004, + "step": 1630 + }, + { + "epoch": 0.06798382726855905, + "grad_norm": 205.0, + "learning_rate": 9.962234178095493e-05, + "loss": 11.3129, + "step": 1631 + }, + { + "epoch": 0.06802550956608729, + "grad_norm": 232.0, + "learning_rate": 9.962151326712466e-05, + "loss": 7.6884, + "step": 1632 + }, + { + "epoch": 0.06806719186361553, + "grad_norm": 540.0, + "learning_rate": 9.96206838489379e-05, + "loss": 18.6263, + "step": 1633 + }, + { + "epoch": 0.06810887416114376, + "grad_norm": 316.0, + "learning_rate": 9.961985352640977e-05, + "loss": 12.9379, + "step": 1634 + }, + { + "epoch": 0.068150556458672, + "grad_norm": 122.5, + "learning_rate": 9.961902229955541e-05, + "loss": 8.8759, + "step": 1635 + }, + { + "epoch": 0.06819223875620024, + "grad_norm": 368.0, + "learning_rate": 9.961819016838997e-05, + "loss": 14.1884, + "step": 1636 + }, + { + "epoch": 0.06823392105372848, + "grad_norm": 352.0, + "learning_rate": 9.96173571329286e-05, + "loss": 14.3132, + "step": 1637 + }, + { + "epoch": 0.06827560335125672, + "grad_norm": 328.0, + "learning_rate": 9.961652319318649e-05, + "loss": 14.0012, + "step": 1638 + }, + { + "epoch": 0.06831728564878496, + "grad_norm": 390.0, + "learning_rate": 9.961568834917885e-05, + "loss": 14.2508, + "step": 1639 + }, + { + "epoch": 0.0683589679463132, + "grad_norm": 258.0, + "learning_rate": 9.961485260092088e-05, + "loss": 11.9381, + "step": 1640 + }, + { + "epoch": 0.06840065024384144, + "grad_norm": 976.0, + "learning_rate": 9.961401594842783e-05, + "loss": 23.0051, + "step": 1641 + }, + { + "epoch": 0.06844233254136968, + "grad_norm": 245.0, + "learning_rate": 9.961317839171492e-05, + "loss": 11.6878, + "step": 1642 + }, + { + "epoch": 0.06848401483889792, + "grad_norm": 346.0, + "learning_rate": 9.961233993079743e-05, + "loss": 14.1262, + "step": 1643 + }, + { + "epoch": 0.06852569713642616, + "grad_norm": 1056.0, + "learning_rate": 9.961150056569064e-05, + "loss": 28.2503, + "step": 1644 + }, + { + "epoch": 0.0685673794339544, + "grad_norm": 736.0, + "learning_rate": 9.961066029640984e-05, + "loss": 20.7503, + "step": 1645 + }, + { + "epoch": 0.06860906173148264, + "grad_norm": 366.0, + "learning_rate": 9.960981912297037e-05, + "loss": 13.3131, + "step": 1646 + }, + { + "epoch": 0.06865074402901088, + "grad_norm": 284.0, + "learning_rate": 9.960897704538755e-05, + "loss": 12.6257, + "step": 1647 + }, + { + "epoch": 0.06869242632653912, + "grad_norm": 248.0, + "learning_rate": 9.960813406367669e-05, + "loss": 12.1887, + "step": 1648 + }, + { + "epoch": 0.06873410862406736, + "grad_norm": 696.0, + "learning_rate": 9.960729017785319e-05, + "loss": 17.5005, + "step": 1649 + }, + { + "epoch": 0.0687757909215956, + "grad_norm": 221.0, + "learning_rate": 9.960644538793245e-05, + "loss": 11.1259, + "step": 1650 + }, + { + "epoch": 0.06881747321912383, + "grad_norm": 216.0, + "learning_rate": 9.96055996939298e-05, + "loss": 11.752, + "step": 1651 + }, + { + "epoch": 0.06885915551665207, + "grad_norm": 792.0, + "learning_rate": 9.960475309586073e-05, + "loss": 21.6256, + "step": 1652 + }, + { + "epoch": 0.06890083781418031, + "grad_norm": 556.0, + "learning_rate": 9.96039055937406e-05, + "loss": 17.8784, + "step": 1653 + }, + { + "epoch": 0.06894252011170855, + "grad_norm": 246.0, + "learning_rate": 9.96030571875849e-05, + "loss": 10.0008, + "step": 1654 + }, + { + "epoch": 0.06898420240923679, + "grad_norm": 504.0, + "learning_rate": 9.960220787740908e-05, + "loss": 17.1253, + "step": 1655 + }, + { + "epoch": 0.06902588470676503, + "grad_norm": 494.0, + "learning_rate": 9.960135766322862e-05, + "loss": 18.5007, + "step": 1656 + }, + { + "epoch": 0.06906756700429327, + "grad_norm": 362.0, + "learning_rate": 9.960050654505901e-05, + "loss": 13.8761, + "step": 1657 + }, + { + "epoch": 0.06910924930182151, + "grad_norm": 616.0, + "learning_rate": 9.959965452291576e-05, + "loss": 20.8755, + "step": 1658 + }, + { + "epoch": 0.06915093159934976, + "grad_norm": 362.0, + "learning_rate": 9.95988015968144e-05, + "loss": 14.1884, + "step": 1659 + }, + { + "epoch": 0.069192613896878, + "grad_norm": 176.0, + "learning_rate": 9.959794776677049e-05, + "loss": 9.5014, + "step": 1660 + }, + { + "epoch": 0.06923429619440624, + "grad_norm": 800.0, + "learning_rate": 9.959709303279958e-05, + "loss": 23.5003, + "step": 1661 + }, + { + "epoch": 0.06927597849193448, + "grad_norm": 424.0, + "learning_rate": 9.959623739491724e-05, + "loss": 16.0031, + "step": 1662 + }, + { + "epoch": 0.06931766078946272, + "grad_norm": 338.0, + "learning_rate": 9.959538085313909e-05, + "loss": 13.2538, + "step": 1663 + }, + { + "epoch": 0.06935934308699096, + "grad_norm": 532.0, + "learning_rate": 9.95945234074807e-05, + "loss": 16.2518, + "step": 1664 + }, + { + "epoch": 0.0694010253845192, + "grad_norm": 344.0, + "learning_rate": 9.959366505795771e-05, + "loss": 13.9383, + "step": 1665 + }, + { + "epoch": 0.06944270768204744, + "grad_norm": 348.0, + "learning_rate": 9.959280580458578e-05, + "loss": 12.7512, + "step": 1666 + }, + { + "epoch": 0.06948438997957568, + "grad_norm": 472.0, + "learning_rate": 9.959194564738058e-05, + "loss": 15.8144, + "step": 1667 + }, + { + "epoch": 0.06952607227710392, + "grad_norm": 314.0, + "learning_rate": 9.959108458635775e-05, + "loss": 12.4418, + "step": 1668 + }, + { + "epoch": 0.06956775457463216, + "grad_norm": 472.0, + "learning_rate": 9.959022262153301e-05, + "loss": 17.0005, + "step": 1669 + }, + { + "epoch": 0.0696094368721604, + "grad_norm": 506.0, + "learning_rate": 9.958935975292206e-05, + "loss": 15.8135, + "step": 1670 + }, + { + "epoch": 0.06965111916968864, + "grad_norm": 1328.0, + "learning_rate": 9.958849598054062e-05, + "loss": 29.5081, + "step": 1671 + }, + { + "epoch": 0.06969280146721687, + "grad_norm": 1048.0, + "learning_rate": 9.958763130440444e-05, + "loss": 28.2502, + "step": 1672 + }, + { + "epoch": 0.06973448376474511, + "grad_norm": 244.0, + "learning_rate": 9.958676572452928e-05, + "loss": 11.8133, + "step": 1673 + }, + { + "epoch": 0.06977616606227335, + "grad_norm": 644.0, + "learning_rate": 9.958589924093091e-05, + "loss": 19.1267, + "step": 1674 + }, + { + "epoch": 0.0698178483598016, + "grad_norm": 242.0, + "learning_rate": 9.958503185362513e-05, + "loss": 13.3767, + "step": 1675 + }, + { + "epoch": 0.06985953065732983, + "grad_norm": 378.0, + "learning_rate": 9.958416356262773e-05, + "loss": 13.6256, + "step": 1676 + }, + { + "epoch": 0.06990121295485807, + "grad_norm": 372.0, + "learning_rate": 9.958329436795454e-05, + "loss": 13.9384, + "step": 1677 + }, + { + "epoch": 0.06994289525238631, + "grad_norm": 197.0, + "learning_rate": 9.958242426962144e-05, + "loss": 8.8758, + "step": 1678 + }, + { + "epoch": 0.06998457754991455, + "grad_norm": 296.0, + "learning_rate": 9.958155326764424e-05, + "loss": 12.3755, + "step": 1679 + }, + { + "epoch": 0.07002625984744279, + "grad_norm": 340.0, + "learning_rate": 9.958068136203883e-05, + "loss": 13.6897, + "step": 1680 + }, + { + "epoch": 0.07006794214497103, + "grad_norm": 572.0, + "learning_rate": 9.95798085528211e-05, + "loss": 18.8752, + "step": 1681 + }, + { + "epoch": 0.07010962444249927, + "grad_norm": 272.0, + "learning_rate": 9.957893484000696e-05, + "loss": 12.8759, + "step": 1682 + }, + { + "epoch": 0.07015130674002751, + "grad_norm": 476.0, + "learning_rate": 9.957806022361234e-05, + "loss": 14.1257, + "step": 1683 + }, + { + "epoch": 0.07019298903755575, + "grad_norm": 812.0, + "learning_rate": 9.957718470365315e-05, + "loss": 21.251, + "step": 1684 + }, + { + "epoch": 0.07023467133508399, + "grad_norm": 270.0, + "learning_rate": 9.957630828014539e-05, + "loss": 12.5016, + "step": 1685 + }, + { + "epoch": 0.07027635363261223, + "grad_norm": 284.0, + "learning_rate": 9.9575430953105e-05, + "loss": 12.3757, + "step": 1686 + }, + { + "epoch": 0.07031803593014047, + "grad_norm": 528.0, + "learning_rate": 9.957455272254797e-05, + "loss": 18.6259, + "step": 1687 + }, + { + "epoch": 0.0703597182276687, + "grad_norm": 152.0, + "learning_rate": 9.957367358849033e-05, + "loss": 9.1882, + "step": 1688 + }, + { + "epoch": 0.07040140052519694, + "grad_norm": 396.0, + "learning_rate": 9.957279355094809e-05, + "loss": 14.5005, + "step": 1689 + }, + { + "epoch": 0.07044308282272518, + "grad_norm": 304.0, + "learning_rate": 9.957191260993727e-05, + "loss": 13.0007, + "step": 1690 + }, + { + "epoch": 0.07048476512025342, + "grad_norm": 680.0, + "learning_rate": 9.957103076547395e-05, + "loss": 17.1256, + "step": 1691 + }, + { + "epoch": 0.07052644741778166, + "grad_norm": 196.0, + "learning_rate": 9.957014801757419e-05, + "loss": 11.3131, + "step": 1692 + }, + { + "epoch": 0.0705681297153099, + "grad_norm": 382.0, + "learning_rate": 9.956926436625409e-05, + "loss": 14.5007, + "step": 1693 + }, + { + "epoch": 0.07060981201283814, + "grad_norm": 816.0, + "learning_rate": 9.956837981152975e-05, + "loss": 23.8755, + "step": 1694 + }, + { + "epoch": 0.07065149431036638, + "grad_norm": 296.0, + "learning_rate": 9.956749435341728e-05, + "loss": 12.6883, + "step": 1695 + }, + { + "epoch": 0.07069317660789463, + "grad_norm": 488.0, + "learning_rate": 9.956660799193283e-05, + "loss": 19.6255, + "step": 1696 + }, + { + "epoch": 0.07073485890542287, + "grad_norm": 192.0, + "learning_rate": 9.956572072709254e-05, + "loss": 11.1254, + "step": 1697 + }, + { + "epoch": 0.07077654120295111, + "grad_norm": 210.0, + "learning_rate": 9.95648325589126e-05, + "loss": 12.064, + "step": 1698 + }, + { + "epoch": 0.07081822350047935, + "grad_norm": 288.0, + "learning_rate": 9.956394348740918e-05, + "loss": 12.8755, + "step": 1699 + }, + { + "epoch": 0.07085990579800759, + "grad_norm": 420.0, + "learning_rate": 9.95630535125985e-05, + "loss": 15.1883, + "step": 1700 + }, + { + "epoch": 0.07090158809553583, + "grad_norm": 348.0, + "learning_rate": 9.956216263449676e-05, + "loss": 12.1254, + "step": 1701 + }, + { + "epoch": 0.07094327039306407, + "grad_norm": 274.0, + "learning_rate": 9.956127085312021e-05, + "loss": 11.6259, + "step": 1702 + }, + { + "epoch": 0.07098495269059231, + "grad_norm": 464.0, + "learning_rate": 9.95603781684851e-05, + "loss": 16.5009, + "step": 1703 + }, + { + "epoch": 0.07102663498812055, + "grad_norm": 544.0, + "learning_rate": 9.955948458060768e-05, + "loss": 17.0007, + "step": 1704 + }, + { + "epoch": 0.07106831728564879, + "grad_norm": 76.5, + "learning_rate": 9.955859008950428e-05, + "loss": 7.1879, + "step": 1705 + }, + { + "epoch": 0.07110999958317703, + "grad_norm": 384.0, + "learning_rate": 9.955769469519117e-05, + "loss": 15.5629, + "step": 1706 + }, + { + "epoch": 0.07115168188070527, + "grad_norm": 76.0, + "learning_rate": 9.955679839768467e-05, + "loss": 7.1888, + "step": 1707 + }, + { + "epoch": 0.0711933641782335, + "grad_norm": 968.0, + "learning_rate": 9.955590119700112e-05, + "loss": 28.3755, + "step": 1708 + }, + { + "epoch": 0.07123504647576175, + "grad_norm": 712.0, + "learning_rate": 9.955500309315688e-05, + "loss": 21.3757, + "step": 1709 + }, + { + "epoch": 0.07127672877328999, + "grad_norm": 170.0, + "learning_rate": 9.95541040861683e-05, + "loss": 10.1886, + "step": 1710 + }, + { + "epoch": 0.07131841107081822, + "grad_norm": 370.0, + "learning_rate": 9.955320417605177e-05, + "loss": 15.4381, + "step": 1711 + }, + { + "epoch": 0.07136009336834646, + "grad_norm": 688.0, + "learning_rate": 9.955230336282371e-05, + "loss": 19.5004, + "step": 1712 + }, + { + "epoch": 0.0714017756658747, + "grad_norm": 434.0, + "learning_rate": 9.955140164650049e-05, + "loss": 14.8132, + "step": 1713 + }, + { + "epoch": 0.07144345796340294, + "grad_norm": 286.0, + "learning_rate": 9.955049902709861e-05, + "loss": 13.688, + "step": 1714 + }, + { + "epoch": 0.07148514026093118, + "grad_norm": 392.0, + "learning_rate": 9.954959550463447e-05, + "loss": 11.5632, + "step": 1715 + }, + { + "epoch": 0.07152682255845942, + "grad_norm": 352.0, + "learning_rate": 9.954869107912457e-05, + "loss": 12.8133, + "step": 1716 + }, + { + "epoch": 0.07156850485598766, + "grad_norm": 169.0, + "learning_rate": 9.954778575058537e-05, + "loss": 8.6254, + "step": 1717 + }, + { + "epoch": 0.0716101871535159, + "grad_norm": 354.0, + "learning_rate": 9.954687951903337e-05, + "loss": 14.0003, + "step": 1718 + }, + { + "epoch": 0.07165186945104414, + "grad_norm": 1200.0, + "learning_rate": 9.954597238448509e-05, + "loss": 30.1255, + "step": 1719 + }, + { + "epoch": 0.07169355174857238, + "grad_norm": 232.0, + "learning_rate": 9.954506434695707e-05, + "loss": 10.8756, + "step": 1720 + }, + { + "epoch": 0.07173523404610062, + "grad_norm": 520.0, + "learning_rate": 9.954415540646586e-05, + "loss": 16.1256, + "step": 1721 + }, + { + "epoch": 0.07177691634362886, + "grad_norm": 528.0, + "learning_rate": 9.9543245563028e-05, + "loss": 16.0012, + "step": 1722 + }, + { + "epoch": 0.0718185986411571, + "grad_norm": 508.0, + "learning_rate": 9.95423348166601e-05, + "loss": 16.6255, + "step": 1723 + }, + { + "epoch": 0.07186028093868534, + "grad_norm": 516.0, + "learning_rate": 9.954142316737877e-05, + "loss": 14.6906, + "step": 1724 + }, + { + "epoch": 0.07190196323621358, + "grad_norm": 624.0, + "learning_rate": 9.954051061520058e-05, + "loss": 18.7504, + "step": 1725 + }, + { + "epoch": 0.07194364553374182, + "grad_norm": 139.0, + "learning_rate": 9.95395971601422e-05, + "loss": 9.564, + "step": 1726 + }, + { + "epoch": 0.07198532783127005, + "grad_norm": 96.0, + "learning_rate": 9.953868280222026e-05, + "loss": 7.4696, + "step": 1727 + }, + { + "epoch": 0.0720270101287983, + "grad_norm": 510.0, + "learning_rate": 9.953776754145144e-05, + "loss": 17.3758, + "step": 1728 + }, + { + "epoch": 0.07206869242632653, + "grad_norm": 472.0, + "learning_rate": 9.953685137785238e-05, + "loss": 17.1258, + "step": 1729 + }, + { + "epoch": 0.07211037472385477, + "grad_norm": 254.0, + "learning_rate": 9.953593431143982e-05, + "loss": 10.8759, + "step": 1730 + }, + { + "epoch": 0.07215205702138301, + "grad_norm": 688.0, + "learning_rate": 9.953501634223047e-05, + "loss": 20.3755, + "step": 1731 + }, + { + "epoch": 0.07219373931891127, + "grad_norm": 676.0, + "learning_rate": 9.953409747024105e-05, + "loss": 21.8753, + "step": 1732 + }, + { + "epoch": 0.0722354216164395, + "grad_norm": 392.0, + "learning_rate": 9.953317769548829e-05, + "loss": 15.438, + "step": 1733 + }, + { + "epoch": 0.07227710391396774, + "grad_norm": 194.0, + "learning_rate": 9.953225701798899e-05, + "loss": 11.9384, + "step": 1734 + }, + { + "epoch": 0.07231878621149598, + "grad_norm": 286.0, + "learning_rate": 9.953133543775989e-05, + "loss": 12.7506, + "step": 1735 + }, + { + "epoch": 0.07236046850902422, + "grad_norm": 648.0, + "learning_rate": 9.95304129548178e-05, + "loss": 20.0026, + "step": 1736 + }, + { + "epoch": 0.07240215080655246, + "grad_norm": 752.0, + "learning_rate": 9.952948956917956e-05, + "loss": 22.1262, + "step": 1737 + }, + { + "epoch": 0.0724438331040807, + "grad_norm": 544.0, + "learning_rate": 9.952856528086197e-05, + "loss": 17.7514, + "step": 1738 + }, + { + "epoch": 0.07248551540160894, + "grad_norm": 256.0, + "learning_rate": 9.952764008988187e-05, + "loss": 11.5643, + "step": 1739 + }, + { + "epoch": 0.07252719769913718, + "grad_norm": 210.0, + "learning_rate": 9.952671399625613e-05, + "loss": 10.3756, + "step": 1740 + }, + { + "epoch": 0.07256887999666542, + "grad_norm": 660.0, + "learning_rate": 9.952578700000163e-05, + "loss": 19.6281, + "step": 1741 + }, + { + "epoch": 0.07261056229419366, + "grad_norm": 484.0, + "learning_rate": 9.952485910113529e-05, + "loss": 16.7509, + "step": 1742 + }, + { + "epoch": 0.0726522445917219, + "grad_norm": 340.0, + "learning_rate": 9.952393029967397e-05, + "loss": 13.8756, + "step": 1743 + }, + { + "epoch": 0.07269392688925014, + "grad_norm": 528.0, + "learning_rate": 9.952300059563464e-05, + "loss": 17.6255, + "step": 1744 + }, + { + "epoch": 0.07273560918677838, + "grad_norm": 392.0, + "learning_rate": 9.952206998903422e-05, + "loss": 16.3761, + "step": 1745 + }, + { + "epoch": 0.07277729148430662, + "grad_norm": 296.0, + "learning_rate": 9.952113847988969e-05, + "loss": 11.7507, + "step": 1746 + }, + { + "epoch": 0.07281897378183486, + "grad_norm": 236.0, + "learning_rate": 9.952020606821799e-05, + "loss": 10.6256, + "step": 1747 + }, + { + "epoch": 0.0728606560793631, + "grad_norm": 408.0, + "learning_rate": 9.951927275403616e-05, + "loss": 12.313, + "step": 1748 + }, + { + "epoch": 0.07290233837689133, + "grad_norm": 600.0, + "learning_rate": 9.95183385373612e-05, + "loss": 18.2505, + "step": 1749 + }, + { + "epoch": 0.07294402067441957, + "grad_norm": 290.0, + "learning_rate": 9.951740341821008e-05, + "loss": 11.5637, + "step": 1750 + }, + { + "epoch": 0.07298570297194781, + "grad_norm": 158.0, + "learning_rate": 9.951646739659993e-05, + "loss": 8.6276, + "step": 1751 + }, + { + "epoch": 0.07302738526947605, + "grad_norm": 286.0, + "learning_rate": 9.951553047254774e-05, + "loss": 12.376, + "step": 1752 + }, + { + "epoch": 0.07306906756700429, + "grad_norm": 150.0, + "learning_rate": 9.951459264607062e-05, + "loss": 8.0632, + "step": 1753 + }, + { + "epoch": 0.07311074986453253, + "grad_norm": 199.0, + "learning_rate": 9.951365391718565e-05, + "loss": 11.5004, + "step": 1754 + }, + { + "epoch": 0.07315243216206077, + "grad_norm": 278.0, + "learning_rate": 9.951271428590995e-05, + "loss": 13.0632, + "step": 1755 + }, + { + "epoch": 0.07319411445958901, + "grad_norm": 62.5, + "learning_rate": 9.951177375226064e-05, + "loss": 7.5637, + "step": 1756 + }, + { + "epoch": 0.07323579675711725, + "grad_norm": 460.0, + "learning_rate": 9.951083231625485e-05, + "loss": 16.6268, + "step": 1757 + }, + { + "epoch": 0.07327747905464549, + "grad_norm": 96.5, + "learning_rate": 9.950988997790974e-05, + "loss": 6.0321, + "step": 1758 + }, + { + "epoch": 0.07331916135217373, + "grad_norm": 632.0, + "learning_rate": 9.950894673724249e-05, + "loss": 19.5011, + "step": 1759 + }, + { + "epoch": 0.07336084364970197, + "grad_norm": 284.0, + "learning_rate": 9.950800259427031e-05, + "loss": 12.5005, + "step": 1760 + }, + { + "epoch": 0.07340252594723021, + "grad_norm": 386.0, + "learning_rate": 9.950705754901038e-05, + "loss": 14.2509, + "step": 1761 + }, + { + "epoch": 0.07344420824475845, + "grad_norm": 390.0, + "learning_rate": 9.950611160147991e-05, + "loss": 14.4384, + "step": 1762 + }, + { + "epoch": 0.07348589054228669, + "grad_norm": 112.0, + "learning_rate": 9.950516475169618e-05, + "loss": 8.9396, + "step": 1763 + }, + { + "epoch": 0.07352757283981493, + "grad_norm": 260.0, + "learning_rate": 9.950421699967642e-05, + "loss": 12.7507, + "step": 1764 + }, + { + "epoch": 0.07356925513734316, + "grad_norm": 225.0, + "learning_rate": 9.950326834543792e-05, + "loss": 10.9381, + "step": 1765 + }, + { + "epoch": 0.0736109374348714, + "grad_norm": 442.0, + "learning_rate": 9.950231878899796e-05, + "loss": 16.626, + "step": 1766 + }, + { + "epoch": 0.07365261973239964, + "grad_norm": 494.0, + "learning_rate": 9.950136833037385e-05, + "loss": 15.3763, + "step": 1767 + }, + { + "epoch": 0.07369430202992788, + "grad_norm": 374.0, + "learning_rate": 9.950041696958289e-05, + "loss": 15.3129, + "step": 1768 + }, + { + "epoch": 0.07373598432745614, + "grad_norm": 236.0, + "learning_rate": 9.949946470664245e-05, + "loss": 9.2506, + "step": 1769 + }, + { + "epoch": 0.07377766662498438, + "grad_norm": 231.0, + "learning_rate": 9.949851154156986e-05, + "loss": 10.1886, + "step": 1770 + }, + { + "epoch": 0.07381934892251261, + "grad_norm": 245.0, + "learning_rate": 9.949755747438252e-05, + "loss": 12.5004, + "step": 1771 + }, + { + "epoch": 0.07386103122004085, + "grad_norm": 560.0, + "learning_rate": 9.949660250509779e-05, + "loss": 18.001, + "step": 1772 + }, + { + "epoch": 0.0739027135175691, + "grad_norm": 404.0, + "learning_rate": 9.949564663373307e-05, + "loss": 15.126, + "step": 1773 + }, + { + "epoch": 0.07394439581509733, + "grad_norm": 482.0, + "learning_rate": 9.949468986030582e-05, + "loss": 15.3759, + "step": 1774 + }, + { + "epoch": 0.07398607811262557, + "grad_norm": 384.0, + "learning_rate": 9.949373218483344e-05, + "loss": 13.3145, + "step": 1775 + }, + { + "epoch": 0.07402776041015381, + "grad_norm": 368.0, + "learning_rate": 9.949277360733341e-05, + "loss": 14.2505, + "step": 1776 + }, + { + "epoch": 0.07406944270768205, + "grad_norm": 366.0, + "learning_rate": 9.949181412782318e-05, + "loss": 13.6881, + "step": 1777 + }, + { + "epoch": 0.07411112500521029, + "grad_norm": 334.0, + "learning_rate": 9.949085374632026e-05, + "loss": 13.189, + "step": 1778 + }, + { + "epoch": 0.07415280730273853, + "grad_norm": 520.0, + "learning_rate": 9.948989246284211e-05, + "loss": 17.751, + "step": 1779 + }, + { + "epoch": 0.07419448960026677, + "grad_norm": 460.0, + "learning_rate": 9.948893027740629e-05, + "loss": 15.5628, + "step": 1780 + }, + { + "epoch": 0.07423617189779501, + "grad_norm": 498.0, + "learning_rate": 9.948796719003033e-05, + "loss": 17.1265, + "step": 1781 + }, + { + "epoch": 0.07427785419532325, + "grad_norm": 155.0, + "learning_rate": 9.948700320073177e-05, + "loss": 10.8134, + "step": 1782 + }, + { + "epoch": 0.07431953649285149, + "grad_norm": 99.5, + "learning_rate": 9.948603830952816e-05, + "loss": 7.1576, + "step": 1783 + }, + { + "epoch": 0.07436121879037973, + "grad_norm": 310.0, + "learning_rate": 9.948507251643712e-05, + "loss": 13.4383, + "step": 1784 + }, + { + "epoch": 0.07440290108790797, + "grad_norm": 564.0, + "learning_rate": 9.948410582147627e-05, + "loss": 19.8755, + "step": 1785 + }, + { + "epoch": 0.0744445833854362, + "grad_norm": 470.0, + "learning_rate": 9.948313822466317e-05, + "loss": 16.3754, + "step": 1786 + }, + { + "epoch": 0.07448626568296444, + "grad_norm": 568.0, + "learning_rate": 9.948216972601549e-05, + "loss": 18.0007, + "step": 1787 + }, + { + "epoch": 0.07452794798049268, + "grad_norm": 95.5, + "learning_rate": 9.948120032555088e-05, + "loss": 7.1879, + "step": 1788 + }, + { + "epoch": 0.07456963027802092, + "grad_norm": 125.5, + "learning_rate": 9.948023002328699e-05, + "loss": 9.5632, + "step": 1789 + }, + { + "epoch": 0.07461131257554916, + "grad_norm": 800.0, + "learning_rate": 9.947925881924151e-05, + "loss": 22.2535, + "step": 1790 + }, + { + "epoch": 0.0746529948730774, + "grad_norm": 474.0, + "learning_rate": 9.947828671343217e-05, + "loss": 16.0006, + "step": 1791 + }, + { + "epoch": 0.07469467717060564, + "grad_norm": 260.0, + "learning_rate": 9.947731370587665e-05, + "loss": 10.1254, + "step": 1792 + }, + { + "epoch": 0.07473635946813388, + "grad_norm": 760.0, + "learning_rate": 9.94763397965927e-05, + "loss": 18.3799, + "step": 1793 + }, + { + "epoch": 0.07477804176566212, + "grad_norm": 520.0, + "learning_rate": 9.947536498559805e-05, + "loss": 17.7508, + "step": 1794 + }, + { + "epoch": 0.07481972406319036, + "grad_norm": 458.0, + "learning_rate": 9.94743892729105e-05, + "loss": 17.5002, + "step": 1795 + }, + { + "epoch": 0.0748614063607186, + "grad_norm": 314.0, + "learning_rate": 9.94734126585478e-05, + "loss": 13.754, + "step": 1796 + }, + { + "epoch": 0.07490308865824684, + "grad_norm": 428.0, + "learning_rate": 9.947243514252776e-05, + "loss": 17.0005, + "step": 1797 + }, + { + "epoch": 0.07494477095577508, + "grad_norm": 584.0, + "learning_rate": 9.947145672486822e-05, + "loss": 19.1256, + "step": 1798 + }, + { + "epoch": 0.07498645325330332, + "grad_norm": 932.0, + "learning_rate": 9.947047740558697e-05, + "loss": 24.6258, + "step": 1799 + }, + { + "epoch": 0.07502813555083156, + "grad_norm": 252.0, + "learning_rate": 9.946949718470188e-05, + "loss": 12.1256, + "step": 1800 + }, + { + "epoch": 0.0750698178483598, + "grad_norm": 312.0, + "learning_rate": 9.946851606223081e-05, + "loss": 10.5005, + "step": 1801 + }, + { + "epoch": 0.07511150014588804, + "grad_norm": 732.0, + "learning_rate": 9.946753403819164e-05, + "loss": 23.0007, + "step": 1802 + }, + { + "epoch": 0.07515318244341627, + "grad_norm": 392.0, + "learning_rate": 9.946655111260228e-05, + "loss": 13.8143, + "step": 1803 + }, + { + "epoch": 0.07519486474094451, + "grad_norm": 528.0, + "learning_rate": 9.946556728548065e-05, + "loss": 16.8795, + "step": 1804 + }, + { + "epoch": 0.07523654703847277, + "grad_norm": 1800.0, + "learning_rate": 9.946458255684464e-05, + "loss": 37.7548, + "step": 1805 + }, + { + "epoch": 0.075278229336001, + "grad_norm": 416.0, + "learning_rate": 9.946359692671222e-05, + "loss": 15.9383, + "step": 1806 + }, + { + "epoch": 0.07531991163352925, + "grad_norm": 486.0, + "learning_rate": 9.946261039510136e-05, + "loss": 17.3755, + "step": 1807 + }, + { + "epoch": 0.07536159393105749, + "grad_norm": 253.0, + "learning_rate": 9.946162296203005e-05, + "loss": 12.2506, + "step": 1808 + }, + { + "epoch": 0.07540327622858572, + "grad_norm": 244.0, + "learning_rate": 9.946063462751626e-05, + "loss": 8.6878, + "step": 1809 + }, + { + "epoch": 0.07544495852611396, + "grad_norm": 223.0, + "learning_rate": 9.945964539157801e-05, + "loss": 11.2503, + "step": 1810 + }, + { + "epoch": 0.0754866408236422, + "grad_norm": 274.0, + "learning_rate": 9.945865525423334e-05, + "loss": 12.5006, + "step": 1811 + }, + { + "epoch": 0.07552832312117044, + "grad_norm": 258.0, + "learning_rate": 9.945766421550028e-05, + "loss": 11.4382, + "step": 1812 + }, + { + "epoch": 0.07557000541869868, + "grad_norm": 268.0, + "learning_rate": 9.94566722753969e-05, + "loss": 12.7507, + "step": 1813 + }, + { + "epoch": 0.07561168771622692, + "grad_norm": 704.0, + "learning_rate": 9.945567943394127e-05, + "loss": 21.7505, + "step": 1814 + }, + { + "epoch": 0.07565337001375516, + "grad_norm": 416.0, + "learning_rate": 9.945468569115151e-05, + "loss": 15.3128, + "step": 1815 + }, + { + "epoch": 0.0756950523112834, + "grad_norm": 668.0, + "learning_rate": 9.94536910470457e-05, + "loss": 18.5006, + "step": 1816 + }, + { + "epoch": 0.07573673460881164, + "grad_norm": 1104.0, + "learning_rate": 9.945269550164199e-05, + "loss": 28.6256, + "step": 1817 + }, + { + "epoch": 0.07577841690633988, + "grad_norm": 187.0, + "learning_rate": 9.94516990549585e-05, + "loss": 10.1924, + "step": 1818 + }, + { + "epoch": 0.07582009920386812, + "grad_norm": 326.0, + "learning_rate": 9.945070170701342e-05, + "loss": 13.751, + "step": 1819 + }, + { + "epoch": 0.07586178150139636, + "grad_norm": 304.0, + "learning_rate": 9.944970345782491e-05, + "loss": 12.6279, + "step": 1820 + }, + { + "epoch": 0.0759034637989246, + "grad_norm": 476.0, + "learning_rate": 9.944870430741115e-05, + "loss": 17.0016, + "step": 1821 + }, + { + "epoch": 0.07594514609645284, + "grad_norm": 322.0, + "learning_rate": 9.944770425579037e-05, + "loss": 14.0634, + "step": 1822 + }, + { + "epoch": 0.07598682839398108, + "grad_norm": 396.0, + "learning_rate": 9.94467033029808e-05, + "loss": 15.3132, + "step": 1823 + }, + { + "epoch": 0.07602851069150932, + "grad_norm": 115.5, + "learning_rate": 9.944570144900067e-05, + "loss": 7.9384, + "step": 1824 + }, + { + "epoch": 0.07607019298903755, + "grad_norm": 600.0, + "learning_rate": 9.944469869386824e-05, + "loss": 18.3754, + "step": 1825 + }, + { + "epoch": 0.0761118752865658, + "grad_norm": 266.0, + "learning_rate": 9.944369503760179e-05, + "loss": 10.938, + "step": 1826 + }, + { + "epoch": 0.07615355758409403, + "grad_norm": 364.0, + "learning_rate": 9.94426904802196e-05, + "loss": 13.5632, + "step": 1827 + }, + { + "epoch": 0.07619523988162227, + "grad_norm": 452.0, + "learning_rate": 9.944168502173999e-05, + "loss": 15.7513, + "step": 1828 + }, + { + "epoch": 0.07623692217915051, + "grad_norm": 272.0, + "learning_rate": 9.94406786621813e-05, + "loss": 12.3129, + "step": 1829 + }, + { + "epoch": 0.07627860447667875, + "grad_norm": 81.0, + "learning_rate": 9.943967140156182e-05, + "loss": 6.9693, + "step": 1830 + }, + { + "epoch": 0.07632028677420699, + "grad_norm": 1392.0, + "learning_rate": 9.943866323989996e-05, + "loss": 37.2507, + "step": 1831 + }, + { + "epoch": 0.07636196907173523, + "grad_norm": 482.0, + "learning_rate": 9.943765417721407e-05, + "loss": 17.3759, + "step": 1832 + }, + { + "epoch": 0.07640365136926347, + "grad_norm": 199.0, + "learning_rate": 9.943664421352255e-05, + "loss": 10.2516, + "step": 1833 + }, + { + "epoch": 0.07644533366679171, + "grad_norm": 446.0, + "learning_rate": 9.943563334884379e-05, + "loss": 14.5009, + "step": 1834 + }, + { + "epoch": 0.07648701596431995, + "grad_norm": 280.0, + "learning_rate": 9.943462158319622e-05, + "loss": 12.9389, + "step": 1835 + }, + { + "epoch": 0.07652869826184819, + "grad_norm": 390.0, + "learning_rate": 9.94336089165983e-05, + "loss": 14.2504, + "step": 1836 + }, + { + "epoch": 0.07657038055937643, + "grad_norm": 71.5, + "learning_rate": 9.943259534906846e-05, + "loss": 5.6878, + "step": 1837 + }, + { + "epoch": 0.07661206285690467, + "grad_norm": 230.0, + "learning_rate": 9.943158088062518e-05, + "loss": 12.3135, + "step": 1838 + }, + { + "epoch": 0.0766537451544329, + "grad_norm": 185.0, + "learning_rate": 9.943056551128694e-05, + "loss": 10.1254, + "step": 1839 + }, + { + "epoch": 0.07669542745196115, + "grad_norm": 84.5, + "learning_rate": 9.942954924107227e-05, + "loss": 7.6259, + "step": 1840 + }, + { + "epoch": 0.07673710974948939, + "grad_norm": 304.0, + "learning_rate": 9.942853206999967e-05, + "loss": 12.8754, + "step": 1841 + }, + { + "epoch": 0.07677879204701764, + "grad_norm": 412.0, + "learning_rate": 9.94275139980877e-05, + "loss": 14.0675, + "step": 1842 + }, + { + "epoch": 0.07682047434454588, + "grad_norm": 133.0, + "learning_rate": 9.942649502535489e-05, + "loss": 6.5323, + "step": 1843 + }, + { + "epoch": 0.07686215664207412, + "grad_norm": 456.0, + "learning_rate": 9.942547515181982e-05, + "loss": 17.1265, + "step": 1844 + }, + { + "epoch": 0.07690383893960236, + "grad_norm": 632.0, + "learning_rate": 9.942445437750108e-05, + "loss": 20.1258, + "step": 1845 + }, + { + "epoch": 0.0769455212371306, + "grad_norm": 260.0, + "learning_rate": 9.942343270241725e-05, + "loss": 11.0005, + "step": 1846 + }, + { + "epoch": 0.07698720353465884, + "grad_norm": 66.5, + "learning_rate": 9.9422410126587e-05, + "loss": 5.6883, + "step": 1847 + }, + { + "epoch": 0.07702888583218707, + "grad_norm": 402.0, + "learning_rate": 9.942138665002892e-05, + "loss": 14.5014, + "step": 1848 + }, + { + "epoch": 0.07707056812971531, + "grad_norm": 211.0, + "learning_rate": 9.94203622727617e-05, + "loss": 10.8754, + "step": 1849 + }, + { + "epoch": 0.07711225042724355, + "grad_norm": 512.0, + "learning_rate": 9.941933699480397e-05, + "loss": 17.5016, + "step": 1850 + }, + { + "epoch": 0.07715393272477179, + "grad_norm": 248.0, + "learning_rate": 9.941831081617445e-05, + "loss": 9.5005, + "step": 1851 + }, + { + "epoch": 0.07719561502230003, + "grad_norm": 255.0, + "learning_rate": 9.941728373689182e-05, + "loss": 10.376, + "step": 1852 + }, + { + "epoch": 0.07723729731982827, + "grad_norm": 121.0, + "learning_rate": 9.941625575697481e-05, + "loss": 6.0022, + "step": 1853 + }, + { + "epoch": 0.07727897961735651, + "grad_norm": 532.0, + "learning_rate": 9.941522687644216e-05, + "loss": 15.2555, + "step": 1854 + }, + { + "epoch": 0.07732066191488475, + "grad_norm": 500.0, + "learning_rate": 9.94141970953126e-05, + "loss": 16.8766, + "step": 1855 + }, + { + "epoch": 0.07736234421241299, + "grad_norm": 528.0, + "learning_rate": 9.941316641360492e-05, + "loss": 17.6259, + "step": 1856 + }, + { + "epoch": 0.07740402650994123, + "grad_norm": 74.5, + "learning_rate": 9.941213483133788e-05, + "loss": 8.2506, + "step": 1857 + }, + { + "epoch": 0.07744570880746947, + "grad_norm": 253.0, + "learning_rate": 9.941110234853033e-05, + "loss": 11.9383, + "step": 1858 + }, + { + "epoch": 0.07748739110499771, + "grad_norm": 374.0, + "learning_rate": 9.941006896520102e-05, + "loss": 13.5631, + "step": 1859 + }, + { + "epoch": 0.07752907340252595, + "grad_norm": 536.0, + "learning_rate": 9.940903468136884e-05, + "loss": 18.7504, + "step": 1860 + }, + { + "epoch": 0.07757075570005419, + "grad_norm": 396.0, + "learning_rate": 9.940799949705259e-05, + "loss": 14.438, + "step": 1861 + }, + { + "epoch": 0.07761243799758243, + "grad_norm": 220.0, + "learning_rate": 9.940696341227119e-05, + "loss": 11.0017, + "step": 1862 + }, + { + "epoch": 0.07765412029511067, + "grad_norm": 788.0, + "learning_rate": 9.940592642704348e-05, + "loss": 20.1262, + "step": 1863 + }, + { + "epoch": 0.0776958025926389, + "grad_norm": 316.0, + "learning_rate": 9.940488854138839e-05, + "loss": 11.9385, + "step": 1864 + }, + { + "epoch": 0.07773748489016714, + "grad_norm": 147.0, + "learning_rate": 9.94038497553248e-05, + "loss": 11.0005, + "step": 1865 + }, + { + "epoch": 0.07777916718769538, + "grad_norm": 176.0, + "learning_rate": 9.940281006887168e-05, + "loss": 9.3141, + "step": 1866 + }, + { + "epoch": 0.07782084948522362, + "grad_norm": 135.0, + "learning_rate": 9.940176948204795e-05, + "loss": 9.3759, + "step": 1867 + }, + { + "epoch": 0.07786253178275186, + "grad_norm": 336.0, + "learning_rate": 9.940072799487259e-05, + "loss": 14.0007, + "step": 1868 + }, + { + "epoch": 0.0779042140802801, + "grad_norm": 360.0, + "learning_rate": 9.939968560736458e-05, + "loss": 13.2508, + "step": 1869 + }, + { + "epoch": 0.07794589637780834, + "grad_norm": 266.0, + "learning_rate": 9.939864231954292e-05, + "loss": 11.5639, + "step": 1870 + }, + { + "epoch": 0.07798757867533658, + "grad_norm": 780.0, + "learning_rate": 9.93975981314266e-05, + "loss": 19.7529, + "step": 1871 + }, + { + "epoch": 0.07802926097286482, + "grad_norm": 378.0, + "learning_rate": 9.939655304303468e-05, + "loss": 14.754, + "step": 1872 + }, + { + "epoch": 0.07807094327039306, + "grad_norm": 516.0, + "learning_rate": 9.93955070543862e-05, + "loss": 16.7504, + "step": 1873 + }, + { + "epoch": 0.0781126255679213, + "grad_norm": 384.0, + "learning_rate": 9.93944601655002e-05, + "loss": 14.3132, + "step": 1874 + }, + { + "epoch": 0.07815430786544954, + "grad_norm": 266.0, + "learning_rate": 9.93934123763958e-05, + "loss": 11.7512, + "step": 1875 + }, + { + "epoch": 0.07819599016297778, + "grad_norm": 280.0, + "learning_rate": 9.939236368709207e-05, + "loss": 13.251, + "step": 1876 + }, + { + "epoch": 0.07823767246050602, + "grad_norm": 380.0, + "learning_rate": 9.939131409760811e-05, + "loss": 14.2507, + "step": 1877 + }, + { + "epoch": 0.07827935475803427, + "grad_norm": 312.0, + "learning_rate": 9.939026360796309e-05, + "loss": 12.6889, + "step": 1878 + }, + { + "epoch": 0.07832103705556251, + "grad_norm": 276.0, + "learning_rate": 9.938921221817612e-05, + "loss": 13.1285, + "step": 1879 + }, + { + "epoch": 0.07836271935309075, + "grad_norm": 312.0, + "learning_rate": 9.938815992826638e-05, + "loss": 13.0001, + "step": 1880 + }, + { + "epoch": 0.07840440165061899, + "grad_norm": 472.0, + "learning_rate": 9.938710673825302e-05, + "loss": 17.7505, + "step": 1881 + }, + { + "epoch": 0.07844608394814723, + "grad_norm": 896.0, + "learning_rate": 9.938605264815529e-05, + "loss": 25.2529, + "step": 1882 + }, + { + "epoch": 0.07848776624567547, + "grad_norm": 258.0, + "learning_rate": 9.938499765799233e-05, + "loss": 12.5028, + "step": 1883 + }, + { + "epoch": 0.0785294485432037, + "grad_norm": 568.0, + "learning_rate": 9.938394176778343e-05, + "loss": 18.3758, + "step": 1884 + }, + { + "epoch": 0.07857113084073195, + "grad_norm": 214.0, + "learning_rate": 9.938288497754779e-05, + "loss": 11.8134, + "step": 1885 + }, + { + "epoch": 0.07861281313826018, + "grad_norm": 434.0, + "learning_rate": 9.938182728730469e-05, + "loss": 10.0634, + "step": 1886 + }, + { + "epoch": 0.07865449543578842, + "grad_norm": 684.0, + "learning_rate": 9.938076869707343e-05, + "loss": 21.876, + "step": 1887 + }, + { + "epoch": 0.07869617773331666, + "grad_norm": 376.0, + "learning_rate": 9.937970920687324e-05, + "loss": 11.9406, + "step": 1888 + }, + { + "epoch": 0.0787378600308449, + "grad_norm": 1120.0, + "learning_rate": 9.937864881672347e-05, + "loss": 26.2564, + "step": 1889 + }, + { + "epoch": 0.07877954232837314, + "grad_norm": 272.0, + "learning_rate": 9.937758752664347e-05, + "loss": 13.5628, + "step": 1890 + }, + { + "epoch": 0.07882122462590138, + "grad_norm": 540.0, + "learning_rate": 9.937652533665253e-05, + "loss": 17.8768, + "step": 1891 + }, + { + "epoch": 0.07886290692342962, + "grad_norm": 540.0, + "learning_rate": 9.937546224677005e-05, + "loss": 16.5004, + "step": 1892 + }, + { + "epoch": 0.07890458922095786, + "grad_norm": 1384.0, + "learning_rate": 9.937439825701538e-05, + "loss": 26.3812, + "step": 1893 + }, + { + "epoch": 0.0789462715184861, + "grad_norm": 139.0, + "learning_rate": 9.937333336740791e-05, + "loss": 10.3761, + "step": 1894 + }, + { + "epoch": 0.07898795381601434, + "grad_norm": 326.0, + "learning_rate": 9.937226757796706e-05, + "loss": 11.938, + "step": 1895 + }, + { + "epoch": 0.07902963611354258, + "grad_norm": 368.0, + "learning_rate": 9.937120088871226e-05, + "loss": 14.1254, + "step": 1896 + }, + { + "epoch": 0.07907131841107082, + "grad_norm": 426.0, + "learning_rate": 9.937013329966293e-05, + "loss": 15.188, + "step": 1897 + }, + { + "epoch": 0.07911300070859906, + "grad_norm": 334.0, + "learning_rate": 9.936906481083854e-05, + "loss": 13.1257, + "step": 1898 + }, + { + "epoch": 0.0791546830061273, + "grad_norm": 438.0, + "learning_rate": 9.936799542225856e-05, + "loss": 16.3755, + "step": 1899 + }, + { + "epoch": 0.07919636530365554, + "grad_norm": 482.0, + "learning_rate": 9.936692513394247e-05, + "loss": 16.5003, + "step": 1900 + }, + { + "epoch": 0.07923804760118378, + "grad_norm": 178.0, + "learning_rate": 9.936585394590982e-05, + "loss": 7.0017, + "step": 1901 + }, + { + "epoch": 0.07927972989871201, + "grad_norm": 476.0, + "learning_rate": 9.936478185818008e-05, + "loss": 16.7506, + "step": 1902 + }, + { + "epoch": 0.07932141219624025, + "grad_norm": 150.0, + "learning_rate": 9.936370887077281e-05, + "loss": 9.4395, + "step": 1903 + }, + { + "epoch": 0.0793630944937685, + "grad_norm": 187.0, + "learning_rate": 9.936263498370756e-05, + "loss": 10.5004, + "step": 1904 + }, + { + "epoch": 0.07940477679129673, + "grad_norm": 548.0, + "learning_rate": 9.936156019700391e-05, + "loss": 17.2506, + "step": 1905 + }, + { + "epoch": 0.07944645908882497, + "grad_norm": 326.0, + "learning_rate": 9.936048451068144e-05, + "loss": 14.8775, + "step": 1906 + }, + { + "epoch": 0.07948814138635321, + "grad_norm": 724.0, + "learning_rate": 9.935940792475975e-05, + "loss": 19.6262, + "step": 1907 + }, + { + "epoch": 0.07952982368388145, + "grad_norm": 298.0, + "learning_rate": 9.935833043925848e-05, + "loss": 12.5636, + "step": 1908 + }, + { + "epoch": 0.07957150598140969, + "grad_norm": 228.0, + "learning_rate": 9.935725205419726e-05, + "loss": 10.0006, + "step": 1909 + }, + { + "epoch": 0.07961318827893793, + "grad_norm": 824.0, + "learning_rate": 9.935617276959574e-05, + "loss": 21.5066, + "step": 1910 + }, + { + "epoch": 0.07965487057646617, + "grad_norm": 1680.0, + "learning_rate": 9.935509258547358e-05, + "loss": 32.0062, + "step": 1911 + }, + { + "epoch": 0.07969655287399441, + "grad_norm": 364.0, + "learning_rate": 9.935401150185048e-05, + "loss": 16.1274, + "step": 1912 + }, + { + "epoch": 0.07973823517152265, + "grad_norm": 1248.0, + "learning_rate": 9.935292951874613e-05, + "loss": 32.001, + "step": 1913 + }, + { + "epoch": 0.07977991746905089, + "grad_norm": 312.0, + "learning_rate": 9.935184663618026e-05, + "loss": 13.8754, + "step": 1914 + }, + { + "epoch": 0.07982159976657914, + "grad_norm": 213.0, + "learning_rate": 9.935076285417262e-05, + "loss": 10.5628, + "step": 1915 + }, + { + "epoch": 0.07986328206410738, + "grad_norm": 548.0, + "learning_rate": 9.934967817274294e-05, + "loss": 17.7504, + "step": 1916 + }, + { + "epoch": 0.07990496436163562, + "grad_norm": 242.0, + "learning_rate": 9.934859259191099e-05, + "loss": 11.7502, + "step": 1917 + }, + { + "epoch": 0.07994664665916386, + "grad_norm": 190.0, + "learning_rate": 9.934750611169656e-05, + "loss": 9.938, + "step": 1918 + }, + { + "epoch": 0.0799883289566921, + "grad_norm": 824.0, + "learning_rate": 9.934641873211945e-05, + "loss": 20.6296, + "step": 1919 + }, + { + "epoch": 0.08003001125422034, + "grad_norm": 324.0, + "learning_rate": 9.934533045319949e-05, + "loss": 13.3755, + "step": 1920 + }, + { + "epoch": 0.08007169355174858, + "grad_norm": 356.0, + "learning_rate": 9.934424127495649e-05, + "loss": 14.1258, + "step": 1921 + }, + { + "epoch": 0.08011337584927682, + "grad_norm": 310.0, + "learning_rate": 9.93431511974103e-05, + "loss": 10.2508, + "step": 1922 + }, + { + "epoch": 0.08015505814680506, + "grad_norm": 122.0, + "learning_rate": 9.934206022058083e-05, + "loss": 9.2523, + "step": 1923 + }, + { + "epoch": 0.0801967404443333, + "grad_norm": 380.0, + "learning_rate": 9.934096834448792e-05, + "loss": 15.0634, + "step": 1924 + }, + { + "epoch": 0.08023842274186153, + "grad_norm": 1064.0, + "learning_rate": 9.933987556915148e-05, + "loss": 28.3763, + "step": 1925 + }, + { + "epoch": 0.08028010503938977, + "grad_norm": 344.0, + "learning_rate": 9.933878189459142e-05, + "loss": 13.4378, + "step": 1926 + }, + { + "epoch": 0.08032178733691801, + "grad_norm": 430.0, + "learning_rate": 9.933768732082768e-05, + "loss": 16.2506, + "step": 1927 + }, + { + "epoch": 0.08036346963444625, + "grad_norm": 856.0, + "learning_rate": 9.93365918478802e-05, + "loss": 21.2554, + "step": 1928 + }, + { + "epoch": 0.08040515193197449, + "grad_norm": 113.0, + "learning_rate": 9.933549547576898e-05, + "loss": 8.3756, + "step": 1929 + }, + { + "epoch": 0.08044683422950273, + "grad_norm": 147.0, + "learning_rate": 9.933439820451395e-05, + "loss": 11.3758, + "step": 1930 + }, + { + "epoch": 0.08048851652703097, + "grad_norm": 137.0, + "learning_rate": 9.933330003413516e-05, + "loss": 10.2506, + "step": 1931 + }, + { + "epoch": 0.08053019882455921, + "grad_norm": 302.0, + "learning_rate": 9.933220096465258e-05, + "loss": 11.877, + "step": 1932 + }, + { + "epoch": 0.08057188112208745, + "grad_norm": 326.0, + "learning_rate": 9.933110099608627e-05, + "loss": 12.7503, + "step": 1933 + }, + { + "epoch": 0.08061356341961569, + "grad_norm": 189.0, + "learning_rate": 9.933000012845625e-05, + "loss": 10.9394, + "step": 1934 + }, + { + "epoch": 0.08065524571714393, + "grad_norm": 472.0, + "learning_rate": 9.932889836178261e-05, + "loss": 16.0003, + "step": 1935 + }, + { + "epoch": 0.08069692801467217, + "grad_norm": 784.0, + "learning_rate": 9.932779569608542e-05, + "loss": 23.126, + "step": 1936 + }, + { + "epoch": 0.0807386103122004, + "grad_norm": 1256.0, + "learning_rate": 9.932669213138475e-05, + "loss": 28.131, + "step": 1937 + }, + { + "epoch": 0.08078029260972865, + "grad_norm": 572.0, + "learning_rate": 9.932558766770076e-05, + "loss": 16.6261, + "step": 1938 + }, + { + "epoch": 0.08082197490725689, + "grad_norm": 240.0, + "learning_rate": 9.932448230505355e-05, + "loss": 11.8779, + "step": 1939 + }, + { + "epoch": 0.08086365720478512, + "grad_norm": 244.0, + "learning_rate": 9.932337604346327e-05, + "loss": 10.3132, + "step": 1940 + }, + { + "epoch": 0.08090533950231336, + "grad_norm": 848.0, + "learning_rate": 9.932226888295008e-05, + "loss": 23.0043, + "step": 1941 + }, + { + "epoch": 0.0809470217998416, + "grad_norm": 184.0, + "learning_rate": 9.932116082353417e-05, + "loss": 9.0652, + "step": 1942 + }, + { + "epoch": 0.08098870409736984, + "grad_norm": 502.0, + "learning_rate": 9.932005186523572e-05, + "loss": 15.6272, + "step": 1943 + }, + { + "epoch": 0.08103038639489808, + "grad_norm": 432.0, + "learning_rate": 9.931894200807494e-05, + "loss": 16.5011, + "step": 1944 + }, + { + "epoch": 0.08107206869242632, + "grad_norm": 592.0, + "learning_rate": 9.931783125207208e-05, + "loss": 17.7541, + "step": 1945 + }, + { + "epoch": 0.08111375098995456, + "grad_norm": 932.0, + "learning_rate": 9.931671959724736e-05, + "loss": 23.5049, + "step": 1946 + }, + { + "epoch": 0.0811554332874828, + "grad_norm": 374.0, + "learning_rate": 9.931560704362105e-05, + "loss": 13.3141, + "step": 1947 + }, + { + "epoch": 0.08119711558501104, + "grad_norm": 552.0, + "learning_rate": 9.931449359121343e-05, + "loss": 16.1286, + "step": 1948 + }, + { + "epoch": 0.08123879788253928, + "grad_norm": 284.0, + "learning_rate": 9.931337924004477e-05, + "loss": 12.7516, + "step": 1949 + }, + { + "epoch": 0.08128048018006752, + "grad_norm": 652.0, + "learning_rate": 9.93122639901354e-05, + "loss": 20.1254, + "step": 1950 + }, + { + "epoch": 0.08132216247759577, + "grad_norm": 408.0, + "learning_rate": 9.931114784150564e-05, + "loss": 13.8762, + "step": 1951 + }, + { + "epoch": 0.08136384477512401, + "grad_norm": 130.0, + "learning_rate": 9.931003079417584e-05, + "loss": 9.3761, + "step": 1952 + }, + { + "epoch": 0.08140552707265225, + "grad_norm": 392.0, + "learning_rate": 9.930891284816635e-05, + "loss": 14.5633, + "step": 1953 + }, + { + "epoch": 0.08144720937018049, + "grad_norm": 302.0, + "learning_rate": 9.930779400349754e-05, + "loss": 12.5011, + "step": 1954 + }, + { + "epoch": 0.08148889166770873, + "grad_norm": 296.0, + "learning_rate": 9.930667426018981e-05, + "loss": 13.3131, + "step": 1955 + }, + { + "epoch": 0.08153057396523697, + "grad_norm": 284.0, + "learning_rate": 9.930555361826356e-05, + "loss": 11.8756, + "step": 1956 + }, + { + "epoch": 0.08157225626276521, + "grad_norm": 752.0, + "learning_rate": 9.930443207773923e-05, + "loss": 21.3758, + "step": 1957 + }, + { + "epoch": 0.08161393856029345, + "grad_norm": 876.0, + "learning_rate": 9.930330963863725e-05, + "loss": 23.8756, + "step": 1958 + }, + { + "epoch": 0.08165562085782169, + "grad_norm": 360.0, + "learning_rate": 9.930218630097807e-05, + "loss": 14.9419, + "step": 1959 + }, + { + "epoch": 0.08169730315534993, + "grad_norm": 140.0, + "learning_rate": 9.930106206478216e-05, + "loss": 7.8441, + "step": 1960 + }, + { + "epoch": 0.08173898545287817, + "grad_norm": 636.0, + "learning_rate": 9.929993693007003e-05, + "loss": 16.8819, + "step": 1961 + }, + { + "epoch": 0.0817806677504064, + "grad_norm": 1208.0, + "learning_rate": 9.929881089686216e-05, + "loss": 27.5055, + "step": 1962 + }, + { + "epoch": 0.08182235004793464, + "grad_norm": 210.0, + "learning_rate": 9.929768396517908e-05, + "loss": 12.0633, + "step": 1963 + }, + { + "epoch": 0.08186403234546288, + "grad_norm": 179.0, + "learning_rate": 9.929655613504136e-05, + "loss": 10.3131, + "step": 1964 + }, + { + "epoch": 0.08190571464299112, + "grad_norm": 612.0, + "learning_rate": 9.929542740646951e-05, + "loss": 19.2502, + "step": 1965 + }, + { + "epoch": 0.08194739694051936, + "grad_norm": 392.0, + "learning_rate": 9.929429777948412e-05, + "loss": 14.3129, + "step": 1966 + }, + { + "epoch": 0.0819890792380476, + "grad_norm": 720.0, + "learning_rate": 9.929316725410577e-05, + "loss": 18.8763, + "step": 1967 + }, + { + "epoch": 0.08203076153557584, + "grad_norm": 402.0, + "learning_rate": 9.929203583035509e-05, + "loss": 14.9439, + "step": 1968 + }, + { + "epoch": 0.08207244383310408, + "grad_norm": 1384.0, + "learning_rate": 9.929090350825268e-05, + "loss": 33.0003, + "step": 1969 + }, + { + "epoch": 0.08211412613063232, + "grad_norm": 270.0, + "learning_rate": 9.928977028781916e-05, + "loss": 12.7514, + "step": 1970 + }, + { + "epoch": 0.08215580842816056, + "grad_norm": 446.0, + "learning_rate": 9.928863616907521e-05, + "loss": 16.0016, + "step": 1971 + }, + { + "epoch": 0.0821974907256888, + "grad_norm": 440.0, + "learning_rate": 9.92875011520415e-05, + "loss": 16.2505, + "step": 1972 + }, + { + "epoch": 0.08223917302321704, + "grad_norm": 208.0, + "learning_rate": 9.928636523673869e-05, + "loss": 9.0629, + "step": 1973 + }, + { + "epoch": 0.08228085532074528, + "grad_norm": 322.0, + "learning_rate": 9.92852284231875e-05, + "loss": 13.5629, + "step": 1974 + }, + { + "epoch": 0.08232253761827352, + "grad_norm": 404.0, + "learning_rate": 9.928409071140865e-05, + "loss": 15.5636, + "step": 1975 + }, + { + "epoch": 0.08236421991580176, + "grad_norm": 378.0, + "learning_rate": 9.928295210142289e-05, + "loss": 14.8787, + "step": 1976 + }, + { + "epoch": 0.08240590221333, + "grad_norm": 231.0, + "learning_rate": 9.928181259325093e-05, + "loss": 11.3139, + "step": 1977 + }, + { + "epoch": 0.08244758451085824, + "grad_norm": 180.0, + "learning_rate": 9.928067218691356e-05, + "loss": 9.8756, + "step": 1978 + }, + { + "epoch": 0.08248926680838647, + "grad_norm": 240.0, + "learning_rate": 9.927953088243158e-05, + "loss": 10.692, + "step": 1979 + }, + { + "epoch": 0.08253094910591471, + "grad_norm": 272.0, + "learning_rate": 9.927838867982576e-05, + "loss": 12.6878, + "step": 1980 + }, + { + "epoch": 0.08257263140344295, + "grad_norm": 386.0, + "learning_rate": 9.927724557911694e-05, + "loss": 14.3761, + "step": 1981 + }, + { + "epoch": 0.08261431370097119, + "grad_norm": 65.0, + "learning_rate": 9.927610158032594e-05, + "loss": 6.1569, + "step": 1982 + }, + { + "epoch": 0.08265599599849943, + "grad_norm": 456.0, + "learning_rate": 9.927495668347362e-05, + "loss": 14.6284, + "step": 1983 + }, + { + "epoch": 0.08269767829602767, + "grad_norm": 2176.0, + "learning_rate": 9.927381088858083e-05, + "loss": 43.2514, + "step": 1984 + }, + { + "epoch": 0.08273936059355591, + "grad_norm": 394.0, + "learning_rate": 9.927266419566847e-05, + "loss": 15.0631, + "step": 1985 + }, + { + "epoch": 0.08278104289108415, + "grad_norm": 768.0, + "learning_rate": 9.927151660475745e-05, + "loss": 19.628, + "step": 1986 + }, + { + "epoch": 0.08282272518861239, + "grad_norm": 294.0, + "learning_rate": 9.927036811586864e-05, + "loss": 13.4393, + "step": 1987 + }, + { + "epoch": 0.08286440748614064, + "grad_norm": 241.0, + "learning_rate": 9.9269218729023e-05, + "loss": 11.126, + "step": 1988 + }, + { + "epoch": 0.08290608978366888, + "grad_norm": 392.0, + "learning_rate": 9.926806844424148e-05, + "loss": 15.1296, + "step": 1989 + }, + { + "epoch": 0.08294777208119712, + "grad_norm": 378.0, + "learning_rate": 9.926691726154505e-05, + "loss": 15.0007, + "step": 1990 + }, + { + "epoch": 0.08298945437872536, + "grad_norm": 236.0, + "learning_rate": 9.926576518095466e-05, + "loss": 11.3131, + "step": 1991 + }, + { + "epoch": 0.0830311366762536, + "grad_norm": 205.0, + "learning_rate": 9.926461220249133e-05, + "loss": 11.188, + "step": 1992 + }, + { + "epoch": 0.08307281897378184, + "grad_norm": 156.0, + "learning_rate": 9.926345832617607e-05, + "loss": 10.1884, + "step": 1993 + }, + { + "epoch": 0.08311450127131008, + "grad_norm": 664.0, + "learning_rate": 9.926230355202992e-05, + "loss": 20.3756, + "step": 1994 + }, + { + "epoch": 0.08315618356883832, + "grad_norm": 1064.0, + "learning_rate": 9.92611478800739e-05, + "loss": 21.6311, + "step": 1995 + }, + { + "epoch": 0.08319786586636656, + "grad_norm": 632.0, + "learning_rate": 9.925999131032909e-05, + "loss": 19.6288, + "step": 1996 + }, + { + "epoch": 0.0832395481638948, + "grad_norm": 312.0, + "learning_rate": 9.925883384281658e-05, + "loss": 13.1888, + "step": 1997 + }, + { + "epoch": 0.08328123046142304, + "grad_norm": 290.0, + "learning_rate": 9.925767547755743e-05, + "loss": 12.063, + "step": 1998 + }, + { + "epoch": 0.08332291275895128, + "grad_norm": 490.0, + "learning_rate": 9.925651621457278e-05, + "loss": 16.8756, + "step": 1999 + }, + { + "epoch": 0.08336459505647952, + "grad_norm": 318.0, + "learning_rate": 9.925535605388375e-05, + "loss": 12.4379, + "step": 2000 + }, + { + "epoch": 0.08340627735400775, + "grad_norm": 740.0, + "learning_rate": 9.925419499551149e-05, + "loss": 17.8796, + "step": 2001 + }, + { + "epoch": 0.083447959651536, + "grad_norm": 187.0, + "learning_rate": 9.925303303947715e-05, + "loss": 11.0634, + "step": 2002 + }, + { + "epoch": 0.08348964194906423, + "grad_norm": 406.0, + "learning_rate": 9.92518701858019e-05, + "loss": 14.0631, + "step": 2003 + }, + { + "epoch": 0.08353132424659247, + "grad_norm": 486.0, + "learning_rate": 9.925070643450696e-05, + "loss": 16.8761, + "step": 2004 + }, + { + "epoch": 0.08357300654412071, + "grad_norm": 75.5, + "learning_rate": 9.924954178561351e-05, + "loss": 5.9386, + "step": 2005 + }, + { + "epoch": 0.08361468884164895, + "grad_norm": 314.0, + "learning_rate": 9.924837623914278e-05, + "loss": 11.3755, + "step": 2006 + }, + { + "epoch": 0.08365637113917719, + "grad_norm": 664.0, + "learning_rate": 9.924720979511605e-05, + "loss": 21.5007, + "step": 2007 + }, + { + "epoch": 0.08369805343670543, + "grad_norm": 132.0, + "learning_rate": 9.924604245355454e-05, + "loss": 8.6881, + "step": 2008 + }, + { + "epoch": 0.08373973573423367, + "grad_norm": 266.0, + "learning_rate": 9.924487421447952e-05, + "loss": 11.7506, + "step": 2009 + }, + { + "epoch": 0.08378141803176191, + "grad_norm": 1016.0, + "learning_rate": 9.92437050779123e-05, + "loss": 27.5006, + "step": 2010 + }, + { + "epoch": 0.08382310032929015, + "grad_norm": 316.0, + "learning_rate": 9.924253504387419e-05, + "loss": 13.0004, + "step": 2011 + }, + { + "epoch": 0.08386478262681839, + "grad_norm": 340.0, + "learning_rate": 9.92413641123865e-05, + "loss": 13.752, + "step": 2012 + }, + { + "epoch": 0.08390646492434663, + "grad_norm": 468.0, + "learning_rate": 9.924019228347059e-05, + "loss": 16.2505, + "step": 2013 + }, + { + "epoch": 0.08394814722187487, + "grad_norm": 356.0, + "learning_rate": 9.92390195571478e-05, + "loss": 12.8128, + "step": 2014 + }, + { + "epoch": 0.0839898295194031, + "grad_norm": 284.0, + "learning_rate": 9.92378459334395e-05, + "loss": 13.2515, + "step": 2015 + }, + { + "epoch": 0.08403151181693135, + "grad_norm": 396.0, + "learning_rate": 9.923667141236709e-05, + "loss": 14.5011, + "step": 2016 + }, + { + "epoch": 0.08407319411445958, + "grad_norm": 217.0, + "learning_rate": 9.923549599395197e-05, + "loss": 11.8758, + "step": 2017 + }, + { + "epoch": 0.08411487641198782, + "grad_norm": 296.0, + "learning_rate": 9.923431967821559e-05, + "loss": 9.0012, + "step": 2018 + }, + { + "epoch": 0.08415655870951606, + "grad_norm": 1152.0, + "learning_rate": 9.923314246517933e-05, + "loss": 31.5007, + "step": 2019 + }, + { + "epoch": 0.0841982410070443, + "grad_norm": 490.0, + "learning_rate": 9.92319643548647e-05, + "loss": 18.1256, + "step": 2020 + }, + { + "epoch": 0.08423992330457254, + "grad_norm": 235.0, + "learning_rate": 9.923078534729314e-05, + "loss": 11.5006, + "step": 2021 + }, + { + "epoch": 0.08428160560210078, + "grad_norm": 532.0, + "learning_rate": 9.922960544248614e-05, + "loss": 16.2513, + "step": 2022 + }, + { + "epoch": 0.08432328789962902, + "grad_norm": 156.0, + "learning_rate": 9.922842464046523e-05, + "loss": 10.1258, + "step": 2023 + }, + { + "epoch": 0.08436497019715727, + "grad_norm": 1408.0, + "learning_rate": 9.922724294125189e-05, + "loss": 29.1299, + "step": 2024 + }, + { + "epoch": 0.08440665249468551, + "grad_norm": 1176.0, + "learning_rate": 9.922606034486768e-05, + "loss": 27.1271, + "step": 2025 + }, + { + "epoch": 0.08444833479221375, + "grad_norm": 173.0, + "learning_rate": 9.922487685133415e-05, + "loss": 10.2506, + "step": 2026 + }, + { + "epoch": 0.08449001708974199, + "grad_norm": 280.0, + "learning_rate": 9.922369246067288e-05, + "loss": 13.0628, + "step": 2027 + }, + { + "epoch": 0.08453169938727023, + "grad_norm": 356.0, + "learning_rate": 9.922250717290545e-05, + "loss": 14.6902, + "step": 2028 + }, + { + "epoch": 0.08457338168479847, + "grad_norm": 322.0, + "learning_rate": 9.922132098805343e-05, + "loss": 12.6881, + "step": 2029 + }, + { + "epoch": 0.08461506398232671, + "grad_norm": 256.0, + "learning_rate": 9.922013390613849e-05, + "loss": 12.8761, + "step": 2030 + }, + { + "epoch": 0.08465674627985495, + "grad_norm": 205.0, + "learning_rate": 9.921894592718224e-05, + "loss": 12.1255, + "step": 2031 + }, + { + "epoch": 0.08469842857738319, + "grad_norm": 210.0, + "learning_rate": 9.921775705120632e-05, + "loss": 11.6258, + "step": 2032 + }, + { + "epoch": 0.08474011087491143, + "grad_norm": 173.0, + "learning_rate": 9.921656727823241e-05, + "loss": 9.1884, + "step": 2033 + }, + { + "epoch": 0.08478179317243967, + "grad_norm": 612.0, + "learning_rate": 9.921537660828219e-05, + "loss": 17.5003, + "step": 2034 + }, + { + "epoch": 0.08482347546996791, + "grad_norm": 676.0, + "learning_rate": 9.921418504137738e-05, + "loss": 20.6255, + "step": 2035 + }, + { + "epoch": 0.08486515776749615, + "grad_norm": 540.0, + "learning_rate": 9.921299257753966e-05, + "loss": 18.6257, + "step": 2036 + }, + { + "epoch": 0.08490684006502439, + "grad_norm": 112.0, + "learning_rate": 9.92117992167908e-05, + "loss": 4.938, + "step": 2037 + }, + { + "epoch": 0.08494852236255263, + "grad_norm": 374.0, + "learning_rate": 9.921060495915251e-05, + "loss": 13.5635, + "step": 2038 + }, + { + "epoch": 0.08499020466008086, + "grad_norm": 516.0, + "learning_rate": 9.920940980464658e-05, + "loss": 17.5018, + "step": 2039 + }, + { + "epoch": 0.0850318869576091, + "grad_norm": 988.0, + "learning_rate": 9.920821375329478e-05, + "loss": 25.2507, + "step": 2040 + }, + { + "epoch": 0.08507356925513734, + "grad_norm": 251.0, + "learning_rate": 9.920701680511894e-05, + "loss": 11.0628, + "step": 2041 + }, + { + "epoch": 0.08511525155266558, + "grad_norm": 470.0, + "learning_rate": 9.920581896014084e-05, + "loss": 16.8752, + "step": 2042 + }, + { + "epoch": 0.08515693385019382, + "grad_norm": 201.0, + "learning_rate": 9.920462021838233e-05, + "loss": 11.8764, + "step": 2043 + }, + { + "epoch": 0.08519861614772206, + "grad_norm": 177.0, + "learning_rate": 9.920342057986522e-05, + "loss": 10.1256, + "step": 2044 + }, + { + "epoch": 0.0852402984452503, + "grad_norm": 310.0, + "learning_rate": 9.920222004461144e-05, + "loss": 13.6255, + "step": 2045 + }, + { + "epoch": 0.08528198074277854, + "grad_norm": 302.0, + "learning_rate": 9.92010186126428e-05, + "loss": 14.1883, + "step": 2046 + }, + { + "epoch": 0.08532366304030678, + "grad_norm": 252.0, + "learning_rate": 9.919981628398126e-05, + "loss": 11.8756, + "step": 2047 + }, + { + "epoch": 0.08536534533783502, + "grad_norm": 290.0, + "learning_rate": 9.919861305864865e-05, + "loss": 11.626, + "step": 2048 + }, + { + "epoch": 0.08540702763536326, + "grad_norm": 716.0, + "learning_rate": 9.9197408936667e-05, + "loss": 20.6276, + "step": 2049 + }, + { + "epoch": 0.0854487099328915, + "grad_norm": 332.0, + "learning_rate": 9.919620391805818e-05, + "loss": 13.9381, + "step": 2050 + }, + { + "epoch": 0.08549039223041974, + "grad_norm": 225.0, + "learning_rate": 9.919499800284418e-05, + "loss": 12.0006, + "step": 2051 + }, + { + "epoch": 0.08553207452794798, + "grad_norm": 88.0, + "learning_rate": 9.919379119104697e-05, + "loss": 9.0007, + "step": 2052 + }, + { + "epoch": 0.08557375682547622, + "grad_norm": 372.0, + "learning_rate": 9.919258348268857e-05, + "loss": 14.5665, + "step": 2053 + }, + { + "epoch": 0.08561543912300446, + "grad_norm": 462.0, + "learning_rate": 9.919137487779095e-05, + "loss": 15.8143, + "step": 2054 + }, + { + "epoch": 0.0856571214205327, + "grad_norm": 83.0, + "learning_rate": 9.919016537637616e-05, + "loss": 9.7511, + "step": 2055 + }, + { + "epoch": 0.08569880371806093, + "grad_norm": 1144.0, + "learning_rate": 9.918895497846623e-05, + "loss": 29.8756, + "step": 2056 + }, + { + "epoch": 0.08574048601558917, + "grad_norm": 177.0, + "learning_rate": 9.918774368408324e-05, + "loss": 10.5628, + "step": 2057 + }, + { + "epoch": 0.08578216831311741, + "grad_norm": 316.0, + "learning_rate": 9.918653149324926e-05, + "loss": 14.9379, + "step": 2058 + }, + { + "epoch": 0.08582385061064565, + "grad_norm": 616.0, + "learning_rate": 9.918531840598637e-05, + "loss": 19.8755, + "step": 2059 + }, + { + "epoch": 0.08586553290817389, + "grad_norm": 270.0, + "learning_rate": 9.918410442231668e-05, + "loss": 12.7516, + "step": 2060 + }, + { + "epoch": 0.08590721520570214, + "grad_norm": 484.0, + "learning_rate": 9.918288954226233e-05, + "loss": 16.3755, + "step": 2061 + }, + { + "epoch": 0.08594889750323038, + "grad_norm": 191.0, + "learning_rate": 9.918167376584544e-05, + "loss": 11.0006, + "step": 2062 + }, + { + "epoch": 0.08599057980075862, + "grad_norm": 354.0, + "learning_rate": 9.91804570930882e-05, + "loss": 14.0004, + "step": 2063 + }, + { + "epoch": 0.08603226209828686, + "grad_norm": 484.0, + "learning_rate": 9.917923952401275e-05, + "loss": 15.3142, + "step": 2064 + }, + { + "epoch": 0.0860739443958151, + "grad_norm": 1080.0, + "learning_rate": 9.917802105864129e-05, + "loss": 25.2507, + "step": 2065 + }, + { + "epoch": 0.08611562669334334, + "grad_norm": 187.0, + "learning_rate": 9.917680169699603e-05, + "loss": 11.5006, + "step": 2066 + }, + { + "epoch": 0.08615730899087158, + "grad_norm": 200.0, + "learning_rate": 9.91755814390992e-05, + "loss": 12.0007, + "step": 2067 + }, + { + "epoch": 0.08619899128839982, + "grad_norm": 1176.0, + "learning_rate": 9.917436028497305e-05, + "loss": 34.5008, + "step": 2068 + }, + { + "epoch": 0.08624067358592806, + "grad_norm": 430.0, + "learning_rate": 9.917313823463978e-05, + "loss": 15.5634, + "step": 2069 + }, + { + "epoch": 0.0862823558834563, + "grad_norm": 496.0, + "learning_rate": 9.917191528812173e-05, + "loss": 16.0008, + "step": 2070 + }, + { + "epoch": 0.08632403818098454, + "grad_norm": 144.0, + "learning_rate": 9.917069144544116e-05, + "loss": 8.8131, + "step": 2071 + }, + { + "epoch": 0.08636572047851278, + "grad_norm": 772.0, + "learning_rate": 9.916946670662036e-05, + "loss": 21.2512, + "step": 2072 + }, + { + "epoch": 0.08640740277604102, + "grad_norm": 266.0, + "learning_rate": 9.916824107168166e-05, + "loss": 13.0011, + "step": 2073 + }, + { + "epoch": 0.08644908507356926, + "grad_norm": 225.0, + "learning_rate": 9.916701454064741e-05, + "loss": 11.8137, + "step": 2074 + }, + { + "epoch": 0.0864907673710975, + "grad_norm": 396.0, + "learning_rate": 9.916578711353996e-05, + "loss": 13.8764, + "step": 2075 + }, + { + "epoch": 0.08653244966862574, + "grad_norm": 272.0, + "learning_rate": 9.916455879038167e-05, + "loss": 13.0629, + "step": 2076 + }, + { + "epoch": 0.08657413196615397, + "grad_norm": 173.0, + "learning_rate": 9.916332957119492e-05, + "loss": 9.5005, + "step": 2077 + }, + { + "epoch": 0.08661581426368221, + "grad_norm": 56.25, + "learning_rate": 9.916209945600215e-05, + "loss": 8.4404, + "step": 2078 + }, + { + "epoch": 0.08665749656121045, + "grad_norm": 470.0, + "learning_rate": 9.916086844482573e-05, + "loss": 15.065, + "step": 2079 + }, + { + "epoch": 0.08669917885873869, + "grad_norm": 270.0, + "learning_rate": 9.915963653768812e-05, + "loss": 13.1255, + "step": 2080 + }, + { + "epoch": 0.08674086115626693, + "grad_norm": 420.0, + "learning_rate": 9.915840373461178e-05, + "loss": 16.0004, + "step": 2081 + }, + { + "epoch": 0.08678254345379517, + "grad_norm": 1696.0, + "learning_rate": 9.915717003561917e-05, + "loss": 38.2506, + "step": 2082 + }, + { + "epoch": 0.08682422575132341, + "grad_norm": 400.0, + "learning_rate": 9.915593544073276e-05, + "loss": 12.942, + "step": 2083 + }, + { + "epoch": 0.08686590804885165, + "grad_norm": 171.0, + "learning_rate": 9.915469994997509e-05, + "loss": 11.0636, + "step": 2084 + }, + { + "epoch": 0.08690759034637989, + "grad_norm": 422.0, + "learning_rate": 9.915346356336862e-05, + "loss": 15.8767, + "step": 2085 + }, + { + "epoch": 0.08694927264390813, + "grad_norm": 290.0, + "learning_rate": 9.915222628093593e-05, + "loss": 13.0629, + "step": 2086 + }, + { + "epoch": 0.08699095494143637, + "grad_norm": 352.0, + "learning_rate": 9.915098810269954e-05, + "loss": 13.4382, + "step": 2087 + }, + { + "epoch": 0.08703263723896461, + "grad_norm": 644.0, + "learning_rate": 9.914974902868204e-05, + "loss": 19.7543, + "step": 2088 + }, + { + "epoch": 0.08707431953649285, + "grad_norm": 182.0, + "learning_rate": 9.9148509058906e-05, + "loss": 10.5639, + "step": 2089 + }, + { + "epoch": 0.08711600183402109, + "grad_norm": 708.0, + "learning_rate": 9.914726819339401e-05, + "loss": 18.5045, + "step": 2090 + }, + { + "epoch": 0.08715768413154933, + "grad_norm": 137.0, + "learning_rate": 9.91460264321687e-05, + "loss": 9.6257, + "step": 2091 + }, + { + "epoch": 0.08719936642907757, + "grad_norm": 130.0, + "learning_rate": 9.914478377525269e-05, + "loss": 9.7529, + "step": 2092 + }, + { + "epoch": 0.0872410487266058, + "grad_norm": 768.0, + "learning_rate": 9.914354022266862e-05, + "loss": 23.2519, + "step": 2093 + }, + { + "epoch": 0.08728273102413404, + "grad_norm": 332.0, + "learning_rate": 9.91422957744392e-05, + "loss": 13.1265, + "step": 2094 + }, + { + "epoch": 0.08732441332166228, + "grad_norm": 159.0, + "learning_rate": 9.914105043058705e-05, + "loss": 9.0007, + "step": 2095 + }, + { + "epoch": 0.08736609561919052, + "grad_norm": 50.5, + "learning_rate": 9.913980419113491e-05, + "loss": 7.6597, + "step": 2096 + }, + { + "epoch": 0.08740777791671878, + "grad_norm": 728.0, + "learning_rate": 9.913855705610548e-05, + "loss": 20.5006, + "step": 2097 + }, + { + "epoch": 0.08744946021424702, + "grad_norm": 234.0, + "learning_rate": 9.913730902552148e-05, + "loss": 11.3135, + "step": 2098 + }, + { + "epoch": 0.08749114251177526, + "grad_norm": 904.0, + "learning_rate": 9.913606009940566e-05, + "loss": 22.7588, + "step": 2099 + }, + { + "epoch": 0.0875328248093035, + "grad_norm": 245.0, + "learning_rate": 9.913481027778077e-05, + "loss": 10.9382, + "step": 2100 + }, + { + "epoch": 0.08757450710683173, + "grad_norm": 236.0, + "learning_rate": 9.913355956066961e-05, + "loss": 11.8133, + "step": 2101 + }, + { + "epoch": 0.08761618940435997, + "grad_norm": 536.0, + "learning_rate": 9.913230794809497e-05, + "loss": 18.252, + "step": 2102 + }, + { + "epoch": 0.08765787170188821, + "grad_norm": 1016.0, + "learning_rate": 9.913105544007966e-05, + "loss": 23.2512, + "step": 2103 + }, + { + "epoch": 0.08769955399941645, + "grad_norm": 414.0, + "learning_rate": 9.91298020366465e-05, + "loss": 13.6897, + "step": 2104 + }, + { + "epoch": 0.08774123629694469, + "grad_norm": 556.0, + "learning_rate": 9.912854773781832e-05, + "loss": 16.411, + "step": 2105 + }, + { + "epoch": 0.08778291859447293, + "grad_norm": 250.0, + "learning_rate": 9.912729254361801e-05, + "loss": 11.1912, + "step": 2106 + }, + { + "epoch": 0.08782460089200117, + "grad_norm": 336.0, + "learning_rate": 9.912603645406844e-05, + "loss": 13.6297, + "step": 2107 + }, + { + "epoch": 0.08786628318952941, + "grad_norm": 306.0, + "learning_rate": 9.912477946919247e-05, + "loss": 11.6901, + "step": 2108 + }, + { + "epoch": 0.08790796548705765, + "grad_norm": 390.0, + "learning_rate": 9.912352158901306e-05, + "loss": 15.6901, + "step": 2109 + }, + { + "epoch": 0.08794964778458589, + "grad_norm": 227.0, + "learning_rate": 9.91222628135531e-05, + "loss": 10.9441, + "step": 2110 + }, + { + "epoch": 0.08799133008211413, + "grad_norm": 540.0, + "learning_rate": 9.912100314283552e-05, + "loss": 15.84, + "step": 2111 + }, + { + "epoch": 0.08803301237964237, + "grad_norm": 364.0, + "learning_rate": 9.911974257688332e-05, + "loss": 13.2522, + "step": 2112 + }, + { + "epoch": 0.0880746946771706, + "grad_norm": 422.0, + "learning_rate": 9.911848111571944e-05, + "loss": 16.128, + "step": 2113 + }, + { + "epoch": 0.08811637697469885, + "grad_norm": 322.0, + "learning_rate": 9.91172187593669e-05, + "loss": 14.1253, + "step": 2114 + }, + { + "epoch": 0.08815805927222709, + "grad_norm": 560.0, + "learning_rate": 9.911595550784867e-05, + "loss": 18.5096, + "step": 2115 + }, + { + "epoch": 0.08819974156975532, + "grad_norm": 488.0, + "learning_rate": 9.911469136118778e-05, + "loss": 15.4378, + "step": 2116 + }, + { + "epoch": 0.08824142386728356, + "grad_norm": 520.0, + "learning_rate": 9.91134263194073e-05, + "loss": 14.688, + "step": 2117 + }, + { + "epoch": 0.0882831061648118, + "grad_norm": 544.0, + "learning_rate": 9.911216038253026e-05, + "loss": 17.5086, + "step": 2118 + }, + { + "epoch": 0.08832478846234004, + "grad_norm": 346.0, + "learning_rate": 9.911089355057974e-05, + "loss": 15.188, + "step": 2119 + }, + { + "epoch": 0.08836647075986828, + "grad_norm": 488.0, + "learning_rate": 9.910962582357882e-05, + "loss": 17.2516, + "step": 2120 + }, + { + "epoch": 0.08840815305739652, + "grad_norm": 322.0, + "learning_rate": 9.910835720155062e-05, + "loss": 13.5635, + "step": 2121 + }, + { + "epoch": 0.08844983535492476, + "grad_norm": 157.0, + "learning_rate": 9.910708768451824e-05, + "loss": 10.1267, + "step": 2122 + }, + { + "epoch": 0.088491517652453, + "grad_norm": 142.0, + "learning_rate": 9.910581727250483e-05, + "loss": 9.0009, + "step": 2123 + }, + { + "epoch": 0.08853319994998124, + "grad_norm": 228.0, + "learning_rate": 9.910454596553353e-05, + "loss": 11.5638, + "step": 2124 + }, + { + "epoch": 0.08857488224750948, + "grad_norm": 414.0, + "learning_rate": 9.910327376362753e-05, + "loss": 15.0011, + "step": 2125 + }, + { + "epoch": 0.08861656454503772, + "grad_norm": 356.0, + "learning_rate": 9.910200066681002e-05, + "loss": 12.1889, + "step": 2126 + }, + { + "epoch": 0.08865824684256596, + "grad_norm": 872.0, + "learning_rate": 9.910072667510417e-05, + "loss": 24.0008, + "step": 2127 + }, + { + "epoch": 0.0886999291400942, + "grad_norm": 524.0, + "learning_rate": 9.909945178853324e-05, + "loss": 15.502, + "step": 2128 + }, + { + "epoch": 0.08874161143762244, + "grad_norm": 316.0, + "learning_rate": 9.909817600712041e-05, + "loss": 12.5007, + "step": 2129 + }, + { + "epoch": 0.08878329373515068, + "grad_norm": 237.0, + "learning_rate": 9.909689933088899e-05, + "loss": 11.5632, + "step": 2130 + }, + { + "epoch": 0.08882497603267892, + "grad_norm": 596.0, + "learning_rate": 9.909562175986221e-05, + "loss": 16.7517, + "step": 2131 + }, + { + "epoch": 0.08886665833020715, + "grad_norm": 165.0, + "learning_rate": 9.909434329406338e-05, + "loss": 10.5044, + "step": 2132 + }, + { + "epoch": 0.08890834062773541, + "grad_norm": 536.0, + "learning_rate": 9.909306393351576e-05, + "loss": 17.7505, + "step": 2133 + }, + { + "epoch": 0.08895002292526365, + "grad_norm": 200.0, + "learning_rate": 9.90917836782427e-05, + "loss": 11.1258, + "step": 2134 + }, + { + "epoch": 0.08899170522279189, + "grad_norm": 86.0, + "learning_rate": 9.909050252826752e-05, + "loss": 9.0007, + "step": 2135 + }, + { + "epoch": 0.08903338752032013, + "grad_norm": 928.0, + "learning_rate": 9.90892204836136e-05, + "loss": 22.6315, + "step": 2136 + }, + { + "epoch": 0.08907506981784837, + "grad_norm": 189.0, + "learning_rate": 9.908793754430426e-05, + "loss": 10.6254, + "step": 2137 + }, + { + "epoch": 0.0891167521153766, + "grad_norm": 250.0, + "learning_rate": 9.908665371036289e-05, + "loss": 12.2512, + "step": 2138 + }, + { + "epoch": 0.08915843441290484, + "grad_norm": 225.0, + "learning_rate": 9.908536898181292e-05, + "loss": 11.8756, + "step": 2139 + }, + { + "epoch": 0.08920011671043308, + "grad_norm": 608.0, + "learning_rate": 9.908408335867774e-05, + "loss": 19.7506, + "step": 2140 + }, + { + "epoch": 0.08924179900796132, + "grad_norm": 264.0, + "learning_rate": 9.908279684098076e-05, + "loss": 12.377, + "step": 2141 + }, + { + "epoch": 0.08928348130548956, + "grad_norm": 110.5, + "learning_rate": 9.908150942874548e-05, + "loss": 5.8132, + "step": 2142 + }, + { + "epoch": 0.0893251636030178, + "grad_norm": 748.0, + "learning_rate": 9.908022112199531e-05, + "loss": 20.5048, + "step": 2143 + }, + { + "epoch": 0.08936684590054604, + "grad_norm": 672.0, + "learning_rate": 9.907893192075377e-05, + "loss": 19.5004, + "step": 2144 + }, + { + "epoch": 0.08940852819807428, + "grad_norm": 384.0, + "learning_rate": 9.907764182504434e-05, + "loss": 13.938, + "step": 2145 + }, + { + "epoch": 0.08945021049560252, + "grad_norm": 444.0, + "learning_rate": 9.90763508348905e-05, + "loss": 15.7504, + "step": 2146 + }, + { + "epoch": 0.08949189279313076, + "grad_norm": 218.0, + "learning_rate": 9.907505895031584e-05, + "loss": 11.063, + "step": 2147 + }, + { + "epoch": 0.089533575090659, + "grad_norm": 304.0, + "learning_rate": 9.907376617134388e-05, + "loss": 12.8754, + "step": 2148 + }, + { + "epoch": 0.08957525738818724, + "grad_norm": 600.0, + "learning_rate": 9.907247249799815e-05, + "loss": 18.8758, + "step": 2149 + }, + { + "epoch": 0.08961693968571548, + "grad_norm": 292.0, + "learning_rate": 9.907117793030227e-05, + "loss": 13.3753, + "step": 2150 + }, + { + "epoch": 0.08965862198324372, + "grad_norm": 266.0, + "learning_rate": 9.906988246827982e-05, + "loss": 13.6879, + "step": 2151 + }, + { + "epoch": 0.08970030428077196, + "grad_norm": 210.0, + "learning_rate": 9.906858611195439e-05, + "loss": 11.8142, + "step": 2152 + }, + { + "epoch": 0.0897419865783002, + "grad_norm": 241.0, + "learning_rate": 9.906728886134963e-05, + "loss": 11.5629, + "step": 2153 + }, + { + "epoch": 0.08978366887582843, + "grad_norm": 133.0, + "learning_rate": 9.906599071648917e-05, + "loss": 8.8131, + "step": 2154 + }, + { + "epoch": 0.08982535117335667, + "grad_norm": 392.0, + "learning_rate": 9.906469167739667e-05, + "loss": 15.1257, + "step": 2155 + }, + { + "epoch": 0.08986703347088491, + "grad_norm": 494.0, + "learning_rate": 9.906339174409582e-05, + "loss": 16.7502, + "step": 2156 + }, + { + "epoch": 0.08990871576841315, + "grad_norm": 255.0, + "learning_rate": 9.90620909166103e-05, + "loss": 12.9379, + "step": 2157 + }, + { + "epoch": 0.08995039806594139, + "grad_norm": 624.0, + "learning_rate": 9.90607891949638e-05, + "loss": 18.6255, + "step": 2158 + }, + { + "epoch": 0.08999208036346963, + "grad_norm": 408.0, + "learning_rate": 9.905948657918008e-05, + "loss": 14.5671, + "step": 2159 + }, + { + "epoch": 0.09003376266099787, + "grad_norm": 318.0, + "learning_rate": 9.905818306928286e-05, + "loss": 9.6878, + "step": 2160 + }, + { + "epoch": 0.09007544495852611, + "grad_norm": 338.0, + "learning_rate": 9.90568786652959e-05, + "loss": 13.7514, + "step": 2161 + }, + { + "epoch": 0.09011712725605435, + "grad_norm": 123.5, + "learning_rate": 9.905557336724296e-05, + "loss": 9.5005, + "step": 2162 + }, + { + "epoch": 0.09015880955358259, + "grad_norm": 456.0, + "learning_rate": 9.905426717514785e-05, + "loss": 15.3788, + "step": 2163 + }, + { + "epoch": 0.09020049185111083, + "grad_norm": 494.0, + "learning_rate": 9.905296008903437e-05, + "loss": 16.7504, + "step": 2164 + }, + { + "epoch": 0.09024217414863907, + "grad_norm": 528.0, + "learning_rate": 9.905165210892633e-05, + "loss": 18.1254, + "step": 2165 + }, + { + "epoch": 0.09028385644616731, + "grad_norm": 123.5, + "learning_rate": 9.905034323484756e-05, + "loss": 9.9381, + "step": 2166 + }, + { + "epoch": 0.09032553874369555, + "grad_norm": 346.0, + "learning_rate": 9.904903346682196e-05, + "loss": 15.0005, + "step": 2167 + }, + { + "epoch": 0.09036722104122379, + "grad_norm": 564.0, + "learning_rate": 9.904772280487336e-05, + "loss": 18.6262, + "step": 2168 + }, + { + "epoch": 0.09040890333875203, + "grad_norm": 616.0, + "learning_rate": 9.904641124902565e-05, + "loss": 19.7507, + "step": 2169 + }, + { + "epoch": 0.09045058563628028, + "grad_norm": 193.0, + "learning_rate": 9.904509879930275e-05, + "loss": 8.8137, + "step": 2170 + }, + { + "epoch": 0.09049226793380852, + "grad_norm": 304.0, + "learning_rate": 9.904378545572857e-05, + "loss": 13.0005, + "step": 2171 + }, + { + "epoch": 0.09053395023133676, + "grad_norm": 540.0, + "learning_rate": 9.904247121832703e-05, + "loss": 17.2509, + "step": 2172 + }, + { + "epoch": 0.090575632528865, + "grad_norm": 384.0, + "learning_rate": 9.904115608712213e-05, + "loss": 15.0629, + "step": 2173 + }, + { + "epoch": 0.09061731482639324, + "grad_norm": 540.0, + "learning_rate": 9.903984006213778e-05, + "loss": 17.8754, + "step": 2174 + }, + { + "epoch": 0.09065899712392148, + "grad_norm": 210.0, + "learning_rate": 9.903852314339801e-05, + "loss": 11.0014, + "step": 2175 + }, + { + "epoch": 0.09070067942144971, + "grad_norm": 844.0, + "learning_rate": 9.903720533092679e-05, + "loss": 19.7558, + "step": 2176 + }, + { + "epoch": 0.09074236171897795, + "grad_norm": 470.0, + "learning_rate": 9.903588662474814e-05, + "loss": 15.9379, + "step": 2177 + }, + { + "epoch": 0.0907840440165062, + "grad_norm": 1020.0, + "learning_rate": 9.903456702488611e-05, + "loss": 24.7523, + "step": 2178 + }, + { + "epoch": 0.09082572631403443, + "grad_norm": 272.0, + "learning_rate": 9.903324653136477e-05, + "loss": 11.688, + "step": 2179 + }, + { + "epoch": 0.09086740861156267, + "grad_norm": 404.0, + "learning_rate": 9.903192514420814e-05, + "loss": 14.7516, + "step": 2180 + }, + { + "epoch": 0.09090909090909091, + "grad_norm": 348.0, + "learning_rate": 9.90306028634403e-05, + "loss": 14.0629, + "step": 2181 + }, + { + "epoch": 0.09095077320661915, + "grad_norm": 302.0, + "learning_rate": 9.90292796890854e-05, + "loss": 11.2513, + "step": 2182 + }, + { + "epoch": 0.09099245550414739, + "grad_norm": 442.0, + "learning_rate": 9.902795562116751e-05, + "loss": 16.2506, + "step": 2183 + }, + { + "epoch": 0.09103413780167563, + "grad_norm": 221.0, + "learning_rate": 9.902663065971078e-05, + "loss": 11.8129, + "step": 2184 + }, + { + "epoch": 0.09107582009920387, + "grad_norm": 306.0, + "learning_rate": 9.902530480473936e-05, + "loss": 13.2504, + "step": 2185 + }, + { + "epoch": 0.09111750239673211, + "grad_norm": 560.0, + "learning_rate": 9.902397805627741e-05, + "loss": 18.3752, + "step": 2186 + }, + { + "epoch": 0.09115918469426035, + "grad_norm": 211.0, + "learning_rate": 9.902265041434909e-05, + "loss": 11.1255, + "step": 2187 + }, + { + "epoch": 0.09120086699178859, + "grad_norm": 247.0, + "learning_rate": 9.902132187897863e-05, + "loss": 11.0629, + "step": 2188 + }, + { + "epoch": 0.09124254928931683, + "grad_norm": 186.0, + "learning_rate": 9.901999245019022e-05, + "loss": 11.0641, + "step": 2189 + }, + { + "epoch": 0.09128423158684507, + "grad_norm": 528.0, + "learning_rate": 9.90186621280081e-05, + "loss": 18.5007, + "step": 2190 + }, + { + "epoch": 0.0913259138843733, + "grad_norm": 632.0, + "learning_rate": 9.901733091245651e-05, + "loss": 18.7502, + "step": 2191 + }, + { + "epoch": 0.09136759618190154, + "grad_norm": 388.0, + "learning_rate": 9.901599880355972e-05, + "loss": 16.5006, + "step": 2192 + }, + { + "epoch": 0.09140927847942978, + "grad_norm": 1248.0, + "learning_rate": 9.9014665801342e-05, + "loss": 25.7547, + "step": 2193 + }, + { + "epoch": 0.09145096077695802, + "grad_norm": 394.0, + "learning_rate": 9.901333190582762e-05, + "loss": 14.8753, + "step": 2194 + }, + { + "epoch": 0.09149264307448626, + "grad_norm": 728.0, + "learning_rate": 9.901199711704093e-05, + "loss": 19.8762, + "step": 2195 + }, + { + "epoch": 0.0915343253720145, + "grad_norm": 476.0, + "learning_rate": 9.901066143500626e-05, + "loss": 17.0025, + "step": 2196 + }, + { + "epoch": 0.09157600766954274, + "grad_norm": 193.0, + "learning_rate": 9.900932485974791e-05, + "loss": 10.8755, + "step": 2197 + }, + { + "epoch": 0.09161768996707098, + "grad_norm": 354.0, + "learning_rate": 9.900798739129027e-05, + "loss": 14.3754, + "step": 2198 + }, + { + "epoch": 0.09165937226459922, + "grad_norm": 350.0, + "learning_rate": 9.90066490296577e-05, + "loss": 13.8157, + "step": 2199 + }, + { + "epoch": 0.09170105456212746, + "grad_norm": 312.0, + "learning_rate": 9.900530977487463e-05, + "loss": 13.8757, + "step": 2200 + }, + { + "epoch": 0.0917427368596557, + "grad_norm": 704.0, + "learning_rate": 9.90039696269654e-05, + "loss": 21.5003, + "step": 2201 + }, + { + "epoch": 0.09178441915718394, + "grad_norm": 448.0, + "learning_rate": 9.90026285859545e-05, + "loss": 14.0634, + "step": 2202 + }, + { + "epoch": 0.09182610145471218, + "grad_norm": 456.0, + "learning_rate": 9.900128665186634e-05, + "loss": 16.8755, + "step": 2203 + }, + { + "epoch": 0.09186778375224042, + "grad_norm": 576.0, + "learning_rate": 9.89999438247254e-05, + "loss": 16.379, + "step": 2204 + }, + { + "epoch": 0.09190946604976866, + "grad_norm": 294.0, + "learning_rate": 9.899860010455611e-05, + "loss": 13.8754, + "step": 2205 + }, + { + "epoch": 0.09195114834729691, + "grad_norm": 237.0, + "learning_rate": 9.899725549138297e-05, + "loss": 12.8141, + "step": 2206 + }, + { + "epoch": 0.09199283064482515, + "grad_norm": 640.0, + "learning_rate": 9.899590998523053e-05, + "loss": 18.6254, + "step": 2207 + }, + { + "epoch": 0.09203451294235339, + "grad_norm": 632.0, + "learning_rate": 9.899456358612327e-05, + "loss": 20.7513, + "step": 2208 + }, + { + "epoch": 0.09207619523988163, + "grad_norm": 380.0, + "learning_rate": 9.899321629408573e-05, + "loss": 14.1252, + "step": 2209 + }, + { + "epoch": 0.09211787753740987, + "grad_norm": 424.0, + "learning_rate": 9.899186810914249e-05, + "loss": 14.313, + "step": 2210 + }, + { + "epoch": 0.0921595598349381, + "grad_norm": 300.0, + "learning_rate": 9.899051903131809e-05, + "loss": 14.1254, + "step": 2211 + }, + { + "epoch": 0.09220124213246635, + "grad_norm": 952.0, + "learning_rate": 9.898916906063714e-05, + "loss": 26.6256, + "step": 2212 + }, + { + "epoch": 0.09224292442999459, + "grad_norm": 386.0, + "learning_rate": 9.898781819712422e-05, + "loss": 15.1881, + "step": 2213 + }, + { + "epoch": 0.09228460672752282, + "grad_norm": 256.0, + "learning_rate": 9.898646644080398e-05, + "loss": 12.0627, + "step": 2214 + }, + { + "epoch": 0.09232628902505106, + "grad_norm": 237.0, + "learning_rate": 9.898511379170104e-05, + "loss": 11.3133, + "step": 2215 + }, + { + "epoch": 0.0923679713225793, + "grad_norm": 280.0, + "learning_rate": 9.898376024984003e-05, + "loss": 12.4379, + "step": 2216 + }, + { + "epoch": 0.09240965362010754, + "grad_norm": 450.0, + "learning_rate": 9.898240581524567e-05, + "loss": 12.8757, + "step": 2217 + }, + { + "epoch": 0.09245133591763578, + "grad_norm": 262.0, + "learning_rate": 9.898105048794262e-05, + "loss": 12.8131, + "step": 2218 + }, + { + "epoch": 0.09249301821516402, + "grad_norm": 454.0, + "learning_rate": 9.897969426795555e-05, + "loss": 15.6879, + "step": 2219 + }, + { + "epoch": 0.09253470051269226, + "grad_norm": 444.0, + "learning_rate": 9.897833715530922e-05, + "loss": 14.8755, + "step": 2220 + }, + { + "epoch": 0.0925763828102205, + "grad_norm": 672.0, + "learning_rate": 9.897697915002835e-05, + "loss": 20.877, + "step": 2221 + }, + { + "epoch": 0.09261806510774874, + "grad_norm": 126.5, + "learning_rate": 9.897562025213766e-05, + "loss": 9.6258, + "step": 2222 + }, + { + "epoch": 0.09265974740527698, + "grad_norm": 544.0, + "learning_rate": 9.897426046166198e-05, + "loss": 18.0003, + "step": 2223 + }, + { + "epoch": 0.09270142970280522, + "grad_norm": 272.0, + "learning_rate": 9.897289977862604e-05, + "loss": 11.6259, + "step": 2224 + }, + { + "epoch": 0.09274311200033346, + "grad_norm": 332.0, + "learning_rate": 9.897153820305464e-05, + "loss": 13.7506, + "step": 2225 + }, + { + "epoch": 0.0927847942978617, + "grad_norm": 344.0, + "learning_rate": 9.897017573497263e-05, + "loss": 13.7507, + "step": 2226 + }, + { + "epoch": 0.09282647659538994, + "grad_norm": 608.0, + "learning_rate": 9.896881237440481e-05, + "loss": 17.1259, + "step": 2227 + }, + { + "epoch": 0.09286815889291818, + "grad_norm": 916.0, + "learning_rate": 9.896744812137604e-05, + "loss": 21.754, + "step": 2228 + }, + { + "epoch": 0.09290984119044642, + "grad_norm": 214.0, + "learning_rate": 9.896608297591119e-05, + "loss": 10.5629, + "step": 2229 + }, + { + "epoch": 0.09295152348797465, + "grad_norm": 1240.0, + "learning_rate": 9.896471693803511e-05, + "loss": 32.0004, + "step": 2230 + }, + { + "epoch": 0.0929932057855029, + "grad_norm": 181.0, + "learning_rate": 9.896335000777273e-05, + "loss": 9.938, + "step": 2231 + }, + { + "epoch": 0.09303488808303113, + "grad_norm": 372.0, + "learning_rate": 9.896198218514896e-05, + "loss": 10.6894, + "step": 2232 + }, + { + "epoch": 0.09307657038055937, + "grad_norm": 100.0, + "learning_rate": 9.89606134701887e-05, + "loss": 10.002, + "step": 2233 + }, + { + "epoch": 0.09311825267808761, + "grad_norm": 640.0, + "learning_rate": 9.895924386291693e-05, + "loss": 21.2506, + "step": 2234 + }, + { + "epoch": 0.09315993497561585, + "grad_norm": 1256.0, + "learning_rate": 9.895787336335858e-05, + "loss": 27.7564, + "step": 2235 + }, + { + "epoch": 0.09320161727314409, + "grad_norm": 380.0, + "learning_rate": 9.895650197153864e-05, + "loss": 14.3754, + "step": 2236 + }, + { + "epoch": 0.09324329957067233, + "grad_norm": 314.0, + "learning_rate": 9.895512968748211e-05, + "loss": 14.1253, + "step": 2237 + }, + { + "epoch": 0.09328498186820057, + "grad_norm": 245.0, + "learning_rate": 9.8953756511214e-05, + "loss": 12.688, + "step": 2238 + }, + { + "epoch": 0.09332666416572881, + "grad_norm": 548.0, + "learning_rate": 9.895238244275933e-05, + "loss": 15.1878, + "step": 2239 + }, + { + "epoch": 0.09336834646325705, + "grad_norm": 338.0, + "learning_rate": 9.895100748214314e-05, + "loss": 11.8756, + "step": 2240 + }, + { + "epoch": 0.09341002876078529, + "grad_norm": 404.0, + "learning_rate": 9.89496316293905e-05, + "loss": 14.5627, + "step": 2241 + }, + { + "epoch": 0.09345171105831353, + "grad_norm": 168.0, + "learning_rate": 9.894825488452648e-05, + "loss": 10.4379, + "step": 2242 + }, + { + "epoch": 0.09349339335584178, + "grad_norm": 904.0, + "learning_rate": 9.894687724757616e-05, + "loss": 23.8755, + "step": 2243 + }, + { + "epoch": 0.09353507565337002, + "grad_norm": 151.0, + "learning_rate": 9.894549871856466e-05, + "loss": 9.5629, + "step": 2244 + }, + { + "epoch": 0.09357675795089826, + "grad_norm": 164.0, + "learning_rate": 9.89441192975171e-05, + "loss": 10.5005, + "step": 2245 + }, + { + "epoch": 0.0936184402484265, + "grad_norm": 820.0, + "learning_rate": 9.894273898445863e-05, + "loss": 22.2504, + "step": 2246 + }, + { + "epoch": 0.09366012254595474, + "grad_norm": 189.0, + "learning_rate": 9.894135777941439e-05, + "loss": 8.9381, + "step": 2247 + }, + { + "epoch": 0.09370180484348298, + "grad_norm": 356.0, + "learning_rate": 9.893997568240956e-05, + "loss": 15.0005, + "step": 2248 + }, + { + "epoch": 0.09374348714101122, + "grad_norm": 124.5, + "learning_rate": 9.893859269346933e-05, + "loss": 8.6884, + "step": 2249 + }, + { + "epoch": 0.09378516943853946, + "grad_norm": 1176.0, + "learning_rate": 9.89372088126189e-05, + "loss": 27.256, + "step": 2250 + }, + { + "epoch": 0.0938268517360677, + "grad_norm": 470.0, + "learning_rate": 9.893582403988349e-05, + "loss": 16.1254, + "step": 2251 + }, + { + "epoch": 0.09386853403359594, + "grad_norm": 440.0, + "learning_rate": 9.893443837528835e-05, + "loss": 15.3127, + "step": 2252 + }, + { + "epoch": 0.09391021633112417, + "grad_norm": 356.0, + "learning_rate": 9.893305181885873e-05, + "loss": 14.0019, + "step": 2253 + }, + { + "epoch": 0.09395189862865241, + "grad_norm": 748.0, + "learning_rate": 9.89316643706199e-05, + "loss": 21.6261, + "step": 2254 + }, + { + "epoch": 0.09399358092618065, + "grad_norm": 498.0, + "learning_rate": 9.893027603059712e-05, + "loss": 20.0011, + "step": 2255 + }, + { + "epoch": 0.09403526322370889, + "grad_norm": 608.0, + "learning_rate": 9.892888679881573e-05, + "loss": 18.126, + "step": 2256 + }, + { + "epoch": 0.09407694552123713, + "grad_norm": 402.0, + "learning_rate": 9.892749667530105e-05, + "loss": 14.7507, + "step": 2257 + }, + { + "epoch": 0.09411862781876537, + "grad_norm": 250.0, + "learning_rate": 9.892610566007838e-05, + "loss": 12.9379, + "step": 2258 + }, + { + "epoch": 0.09416031011629361, + "grad_norm": 252.0, + "learning_rate": 9.89247137531731e-05, + "loss": 12.2505, + "step": 2259 + }, + { + "epoch": 0.09420199241382185, + "grad_norm": 306.0, + "learning_rate": 9.892332095461056e-05, + "loss": 13.3129, + "step": 2260 + }, + { + "epoch": 0.09424367471135009, + "grad_norm": 1504.0, + "learning_rate": 9.892192726441615e-05, + "loss": 28.7547, + "step": 2261 + }, + { + "epoch": 0.09428535700887833, + "grad_norm": 324.0, + "learning_rate": 9.89205326826153e-05, + "loss": 13.3753, + "step": 2262 + }, + { + "epoch": 0.09432703930640657, + "grad_norm": 424.0, + "learning_rate": 9.891913720923337e-05, + "loss": 15.5636, + "step": 2263 + }, + { + "epoch": 0.09436872160393481, + "grad_norm": 432.0, + "learning_rate": 9.891774084429584e-05, + "loss": 12.505, + "step": 2264 + }, + { + "epoch": 0.09441040390146305, + "grad_norm": 141.0, + "learning_rate": 9.891634358782814e-05, + "loss": 10.3755, + "step": 2265 + }, + { + "epoch": 0.09445208619899129, + "grad_norm": 516.0, + "learning_rate": 9.891494543985573e-05, + "loss": 18.0012, + "step": 2266 + }, + { + "epoch": 0.09449376849651953, + "grad_norm": 548.0, + "learning_rate": 9.89135464004041e-05, + "loss": 18.0003, + "step": 2267 + }, + { + "epoch": 0.09453545079404777, + "grad_norm": 176.0, + "learning_rate": 9.891214646949874e-05, + "loss": 10.0004, + "step": 2268 + }, + { + "epoch": 0.094577133091576, + "grad_norm": 122.5, + "learning_rate": 9.891074564716516e-05, + "loss": 7.9704, + "step": 2269 + }, + { + "epoch": 0.09461881538910424, + "grad_norm": 254.0, + "learning_rate": 9.89093439334289e-05, + "loss": 11.3764, + "step": 2270 + }, + { + "epoch": 0.09466049768663248, + "grad_norm": 528.0, + "learning_rate": 9.890794132831551e-05, + "loss": 16.2513, + "step": 2271 + }, + { + "epoch": 0.09470217998416072, + "grad_norm": 504.0, + "learning_rate": 9.890653783185055e-05, + "loss": 17.8755, + "step": 2272 + }, + { + "epoch": 0.09474386228168896, + "grad_norm": 732.0, + "learning_rate": 9.890513344405961e-05, + "loss": 20.1284, + "step": 2273 + }, + { + "epoch": 0.0947855445792172, + "grad_norm": 260.0, + "learning_rate": 9.890372816496825e-05, + "loss": 12.3756, + "step": 2274 + }, + { + "epoch": 0.09482722687674544, + "grad_norm": 740.0, + "learning_rate": 9.89023219946021e-05, + "loss": 19.8799, + "step": 2275 + }, + { + "epoch": 0.09486890917427368, + "grad_norm": 548.0, + "learning_rate": 9.890091493298681e-05, + "loss": 17.0005, + "step": 2276 + }, + { + "epoch": 0.09491059147180192, + "grad_norm": 486.0, + "learning_rate": 9.8899506980148e-05, + "loss": 14.813, + "step": 2277 + }, + { + "epoch": 0.09495227376933016, + "grad_norm": 68.5, + "learning_rate": 9.889809813611133e-05, + "loss": 7.9382, + "step": 2278 + }, + { + "epoch": 0.09499395606685841, + "grad_norm": 298.0, + "learning_rate": 9.889668840090247e-05, + "loss": 11.5002, + "step": 2279 + }, + { + "epoch": 0.09503563836438665, + "grad_norm": 268.0, + "learning_rate": 9.889527777454715e-05, + "loss": 12.4383, + "step": 2280 + }, + { + "epoch": 0.09507732066191489, + "grad_norm": 384.0, + "learning_rate": 9.889386625707102e-05, + "loss": 14.1893, + "step": 2281 + }, + { + "epoch": 0.09511900295944313, + "grad_norm": 312.0, + "learning_rate": 9.889245384849985e-05, + "loss": 13.8765, + "step": 2282 + }, + { + "epoch": 0.09516068525697137, + "grad_norm": 400.0, + "learning_rate": 9.889104054885938e-05, + "loss": 15.3758, + "step": 2283 + }, + { + "epoch": 0.09520236755449961, + "grad_norm": 358.0, + "learning_rate": 9.888962635817535e-05, + "loss": 14.001, + "step": 2284 + }, + { + "epoch": 0.09524404985202785, + "grad_norm": 320.0, + "learning_rate": 9.888821127647354e-05, + "loss": 12.0628, + "step": 2285 + }, + { + "epoch": 0.09528573214955609, + "grad_norm": 592.0, + "learning_rate": 9.888679530377973e-05, + "loss": 17.6254, + "step": 2286 + }, + { + "epoch": 0.09532741444708433, + "grad_norm": 336.0, + "learning_rate": 9.888537844011974e-05, + "loss": 13.938, + "step": 2287 + }, + { + "epoch": 0.09536909674461257, + "grad_norm": 278.0, + "learning_rate": 9.88839606855194e-05, + "loss": 14.0012, + "step": 2288 + }, + { + "epoch": 0.0954107790421408, + "grad_norm": 436.0, + "learning_rate": 9.888254204000451e-05, + "loss": 15.4378, + "step": 2289 + }, + { + "epoch": 0.09545246133966905, + "grad_norm": 103.0, + "learning_rate": 9.888112250360098e-05, + "loss": 8.1892, + "step": 2290 + }, + { + "epoch": 0.09549414363719728, + "grad_norm": 344.0, + "learning_rate": 9.887970207633464e-05, + "loss": 14.0636, + "step": 2291 + }, + { + "epoch": 0.09553582593472552, + "grad_norm": 1288.0, + "learning_rate": 9.887828075823139e-05, + "loss": 33.5003, + "step": 2292 + }, + { + "epoch": 0.09557750823225376, + "grad_norm": 892.0, + "learning_rate": 9.887685854931714e-05, + "loss": 23.8754, + "step": 2293 + }, + { + "epoch": 0.095619190529782, + "grad_norm": 232.0, + "learning_rate": 9.887543544961779e-05, + "loss": 11.6883, + "step": 2294 + }, + { + "epoch": 0.09566087282731024, + "grad_norm": 684.0, + "learning_rate": 9.887401145915931e-05, + "loss": 22.6255, + "step": 2295 + }, + { + "epoch": 0.09570255512483848, + "grad_norm": 394.0, + "learning_rate": 9.887258657796762e-05, + "loss": 14.1878, + "step": 2296 + }, + { + "epoch": 0.09574423742236672, + "grad_norm": 430.0, + "learning_rate": 9.88711608060687e-05, + "loss": 14.8131, + "step": 2297 + }, + { + "epoch": 0.09578591971989496, + "grad_norm": 448.0, + "learning_rate": 9.886973414348855e-05, + "loss": 17.1254, + "step": 2298 + }, + { + "epoch": 0.0958276020174232, + "grad_norm": 246.0, + "learning_rate": 9.886830659025315e-05, + "loss": 12.5632, + "step": 2299 + }, + { + "epoch": 0.09586928431495144, + "grad_norm": 175.0, + "learning_rate": 9.88668781463885e-05, + "loss": 10.3755, + "step": 2300 + }, + { + "epoch": 0.09591096661247968, + "grad_norm": 576.0, + "learning_rate": 9.886544881192069e-05, + "loss": 18.2502, + "step": 2301 + }, + { + "epoch": 0.09595264891000792, + "grad_norm": 159.0, + "learning_rate": 9.886401858687573e-05, + "loss": 6.9068, + "step": 2302 + }, + { + "epoch": 0.09599433120753616, + "grad_norm": 332.0, + "learning_rate": 9.886258747127969e-05, + "loss": 14.0009, + "step": 2303 + }, + { + "epoch": 0.0960360135050644, + "grad_norm": 478.0, + "learning_rate": 9.886115546515865e-05, + "loss": 16.2502, + "step": 2304 + }, + { + "epoch": 0.09607769580259264, + "grad_norm": 151.0, + "learning_rate": 9.885972256853873e-05, + "loss": 8.6256, + "step": 2305 + }, + { + "epoch": 0.09611937810012088, + "grad_norm": 1440.0, + "learning_rate": 9.8858288781446e-05, + "loss": 36.7506, + "step": 2306 + }, + { + "epoch": 0.09616106039764911, + "grad_norm": 872.0, + "learning_rate": 9.885685410390665e-05, + "loss": 22.5049, + "step": 2307 + }, + { + "epoch": 0.09620274269517735, + "grad_norm": 182.0, + "learning_rate": 9.885541853594677e-05, + "loss": 9.8137, + "step": 2308 + }, + { + "epoch": 0.0962444249927056, + "grad_norm": 620.0, + "learning_rate": 9.885398207759257e-05, + "loss": 18.126, + "step": 2309 + }, + { + "epoch": 0.09628610729023383, + "grad_norm": 207.0, + "learning_rate": 9.885254472887021e-05, + "loss": 11.813, + "step": 2310 + }, + { + "epoch": 0.09632778958776207, + "grad_norm": 348.0, + "learning_rate": 9.885110648980588e-05, + "loss": 15.3754, + "step": 2311 + }, + { + "epoch": 0.09636947188529031, + "grad_norm": 390.0, + "learning_rate": 9.884966736042581e-05, + "loss": 15.9385, + "step": 2312 + }, + { + "epoch": 0.09641115418281855, + "grad_norm": 278.0, + "learning_rate": 9.884822734075619e-05, + "loss": 12.938, + "step": 2313 + }, + { + "epoch": 0.09645283648034679, + "grad_norm": 528.0, + "learning_rate": 9.88467864308233e-05, + "loss": 17.6289, + "step": 2314 + }, + { + "epoch": 0.09649451877787503, + "grad_norm": 604.0, + "learning_rate": 9.884534463065341e-05, + "loss": 17.7503, + "step": 2315 + }, + { + "epoch": 0.09653620107540328, + "grad_norm": 588.0, + "learning_rate": 9.884390194027276e-05, + "loss": 19.5022, + "step": 2316 + }, + { + "epoch": 0.09657788337293152, + "grad_norm": 175.0, + "learning_rate": 9.884245835970767e-05, + "loss": 10.6255, + "step": 2317 + }, + { + "epoch": 0.09661956567045976, + "grad_norm": 436.0, + "learning_rate": 9.884101388898443e-05, + "loss": 14.9384, + "step": 2318 + }, + { + "epoch": 0.096661247967988, + "grad_norm": 147.0, + "learning_rate": 9.883956852812938e-05, + "loss": 9.5628, + "step": 2319 + }, + { + "epoch": 0.09670293026551624, + "grad_norm": 86.0, + "learning_rate": 9.883812227716885e-05, + "loss": 5.5944, + "step": 2320 + }, + { + "epoch": 0.09674461256304448, + "grad_norm": 270.0, + "learning_rate": 9.883667513612923e-05, + "loss": 12.8755, + "step": 2321 + }, + { + "epoch": 0.09678629486057272, + "grad_norm": 700.0, + "learning_rate": 9.883522710503686e-05, + "loss": 20.5009, + "step": 2322 + }, + { + "epoch": 0.09682797715810096, + "grad_norm": 230.0, + "learning_rate": 9.883377818391812e-05, + "loss": 12.313, + "step": 2323 + }, + { + "epoch": 0.0968696594556292, + "grad_norm": 169.0, + "learning_rate": 9.883232837279946e-05, + "loss": 10.3753, + "step": 2324 + }, + { + "epoch": 0.09691134175315744, + "grad_norm": 494.0, + "learning_rate": 9.883087767170727e-05, + "loss": 16.6253, + "step": 2325 + }, + { + "epoch": 0.09695302405068568, + "grad_norm": 692.0, + "learning_rate": 9.8829426080668e-05, + "loss": 20.8755, + "step": 2326 + }, + { + "epoch": 0.09699470634821392, + "grad_norm": 412.0, + "learning_rate": 9.882797359970812e-05, + "loss": 13.1264, + "step": 2327 + }, + { + "epoch": 0.09703638864574216, + "grad_norm": 342.0, + "learning_rate": 9.882652022885406e-05, + "loss": 13.8133, + "step": 2328 + }, + { + "epoch": 0.0970780709432704, + "grad_norm": 276.0, + "learning_rate": 9.882506596813235e-05, + "loss": 12.6266, + "step": 2329 + }, + { + "epoch": 0.09711975324079863, + "grad_norm": 724.0, + "learning_rate": 9.882361081756948e-05, + "loss": 21.3754, + "step": 2330 + }, + { + "epoch": 0.09716143553832687, + "grad_norm": 428.0, + "learning_rate": 9.882215477719197e-05, + "loss": 13.8754, + "step": 2331 + }, + { + "epoch": 0.09720311783585511, + "grad_norm": 268.0, + "learning_rate": 9.882069784702635e-05, + "loss": 11.8131, + "step": 2332 + }, + { + "epoch": 0.09724480013338335, + "grad_norm": 362.0, + "learning_rate": 9.881924002709918e-05, + "loss": 13.9407, + "step": 2333 + }, + { + "epoch": 0.09728648243091159, + "grad_norm": 142.0, + "learning_rate": 9.881778131743702e-05, + "loss": 8.5633, + "step": 2334 + }, + { + "epoch": 0.09732816472843983, + "grad_norm": 250.0, + "learning_rate": 9.881632171806648e-05, + "loss": 11.6255, + "step": 2335 + }, + { + "epoch": 0.09736984702596807, + "grad_norm": 215.0, + "learning_rate": 9.881486122901414e-05, + "loss": 9.6268, + "step": 2336 + }, + { + "epoch": 0.09741152932349631, + "grad_norm": 236.0, + "learning_rate": 9.88133998503066e-05, + "loss": 11.2504, + "step": 2337 + }, + { + "epoch": 0.09745321162102455, + "grad_norm": 130.0, + "learning_rate": 9.881193758197052e-05, + "loss": 9.6879, + "step": 2338 + }, + { + "epoch": 0.09749489391855279, + "grad_norm": 304.0, + "learning_rate": 9.881047442403255e-05, + "loss": 10.0665, + "step": 2339 + }, + { + "epoch": 0.09753657621608103, + "grad_norm": 348.0, + "learning_rate": 9.880901037651935e-05, + "loss": 13.8755, + "step": 2340 + }, + { + "epoch": 0.09757825851360927, + "grad_norm": 500.0, + "learning_rate": 9.88075454394576e-05, + "loss": 16.3759, + "step": 2341 + }, + { + "epoch": 0.0976199408111375, + "grad_norm": 1520.0, + "learning_rate": 9.880607961287401e-05, + "loss": 29.1306, + "step": 2342 + }, + { + "epoch": 0.09766162310866575, + "grad_norm": 350.0, + "learning_rate": 9.880461289679528e-05, + "loss": 13.1883, + "step": 2343 + }, + { + "epoch": 0.09770330540619399, + "grad_norm": 262.0, + "learning_rate": 9.880314529124816e-05, + "loss": 13.0004, + "step": 2344 + }, + { + "epoch": 0.09774498770372222, + "grad_norm": 366.0, + "learning_rate": 9.880167679625937e-05, + "loss": 13.6878, + "step": 2345 + }, + { + "epoch": 0.09778667000125046, + "grad_norm": 284.0, + "learning_rate": 9.880020741185569e-05, + "loss": 11.6255, + "step": 2346 + }, + { + "epoch": 0.0978283522987787, + "grad_norm": 356.0, + "learning_rate": 9.87987371380639e-05, + "loss": 13.0003, + "step": 2347 + }, + { + "epoch": 0.09787003459630694, + "grad_norm": 290.0, + "learning_rate": 9.879726597491079e-05, + "loss": 13.2505, + "step": 2348 + }, + { + "epoch": 0.09791171689383518, + "grad_norm": 230.0, + "learning_rate": 9.879579392242318e-05, + "loss": 11.1255, + "step": 2349 + }, + { + "epoch": 0.09795339919136342, + "grad_norm": 344.0, + "learning_rate": 9.879432098062789e-05, + "loss": 14.6879, + "step": 2350 + }, + { + "epoch": 0.09799508148889166, + "grad_norm": 131.0, + "learning_rate": 9.879284714955179e-05, + "loss": 8.5011, + "step": 2351 + }, + { + "epoch": 0.09803676378641991, + "grad_norm": 568.0, + "learning_rate": 9.87913724292217e-05, + "loss": 18.626, + "step": 2352 + }, + { + "epoch": 0.09807844608394815, + "grad_norm": 368.0, + "learning_rate": 9.87898968196645e-05, + "loss": 14.0632, + "step": 2353 + }, + { + "epoch": 0.09812012838147639, + "grad_norm": 450.0, + "learning_rate": 9.878842032090713e-05, + "loss": 16.2504, + "step": 2354 + }, + { + "epoch": 0.09816181067900463, + "grad_norm": 221.0, + "learning_rate": 9.878694293297645e-05, + "loss": 11.1878, + "step": 2355 + }, + { + "epoch": 0.09820349297653287, + "grad_norm": 178.0, + "learning_rate": 9.878546465589942e-05, + "loss": 9.5628, + "step": 2356 + }, + { + "epoch": 0.09824517527406111, + "grad_norm": 153.0, + "learning_rate": 9.878398548970295e-05, + "loss": 10.1877, + "step": 2357 + }, + { + "epoch": 0.09828685757158935, + "grad_norm": 404.0, + "learning_rate": 9.878250543441401e-05, + "loss": 16.0008, + "step": 2358 + }, + { + "epoch": 0.09832853986911759, + "grad_norm": 888.0, + "learning_rate": 9.878102449005959e-05, + "loss": 26.0008, + "step": 2359 + }, + { + "epoch": 0.09837022216664583, + "grad_norm": 159.0, + "learning_rate": 9.877954265666667e-05, + "loss": 10.5633, + "step": 2360 + }, + { + "epoch": 0.09841190446417407, + "grad_norm": 203.0, + "learning_rate": 9.877805993426225e-05, + "loss": 10.813, + "step": 2361 + }, + { + "epoch": 0.09845358676170231, + "grad_norm": 524.0, + "learning_rate": 9.877657632287335e-05, + "loss": 17.0005, + "step": 2362 + }, + { + "epoch": 0.09849526905923055, + "grad_norm": 232.0, + "learning_rate": 9.877509182252703e-05, + "loss": 12.063, + "step": 2363 + }, + { + "epoch": 0.09853695135675879, + "grad_norm": 700.0, + "learning_rate": 9.877360643325033e-05, + "loss": 21.6256, + "step": 2364 + }, + { + "epoch": 0.09857863365428703, + "grad_norm": 532.0, + "learning_rate": 9.877212015507031e-05, + "loss": 17.5014, + "step": 2365 + }, + { + "epoch": 0.09862031595181527, + "grad_norm": 304.0, + "learning_rate": 9.877063298801407e-05, + "loss": 13.0003, + "step": 2366 + }, + { + "epoch": 0.0986619982493435, + "grad_norm": 318.0, + "learning_rate": 9.876914493210874e-05, + "loss": 14.0007, + "step": 2367 + }, + { + "epoch": 0.09870368054687174, + "grad_norm": 326.0, + "learning_rate": 9.87676559873814e-05, + "loss": 13.3133, + "step": 2368 + }, + { + "epoch": 0.09874536284439998, + "grad_norm": 71.0, + "learning_rate": 9.876616615385918e-05, + "loss": 7.8128, + "step": 2369 + }, + { + "epoch": 0.09878704514192822, + "grad_norm": 1012.0, + "learning_rate": 9.876467543156928e-05, + "loss": 23.8805, + "step": 2370 + }, + { + "epoch": 0.09882872743945646, + "grad_norm": 416.0, + "learning_rate": 9.876318382053884e-05, + "loss": 15.3769, + "step": 2371 + }, + { + "epoch": 0.0988704097369847, + "grad_norm": 454.0, + "learning_rate": 9.876169132079503e-05, + "loss": 16.6257, + "step": 2372 + }, + { + "epoch": 0.09891209203451294, + "grad_norm": 141.0, + "learning_rate": 9.876019793236509e-05, + "loss": 8.5005, + "step": 2373 + }, + { + "epoch": 0.09895377433204118, + "grad_norm": 189.0, + "learning_rate": 9.875870365527618e-05, + "loss": 11.563, + "step": 2374 + }, + { + "epoch": 0.09899545662956942, + "grad_norm": 556.0, + "learning_rate": 9.875720848955559e-05, + "loss": 15.7545, + "step": 2375 + }, + { + "epoch": 0.09903713892709766, + "grad_norm": 183.0, + "learning_rate": 9.875571243523055e-05, + "loss": 11.5631, + "step": 2376 + }, + { + "epoch": 0.0990788212246259, + "grad_norm": 205.0, + "learning_rate": 9.875421549232831e-05, + "loss": 9.001, + "step": 2377 + }, + { + "epoch": 0.09912050352215414, + "grad_norm": 1416.0, + "learning_rate": 9.875271766087617e-05, + "loss": 33.0048, + "step": 2378 + }, + { + "epoch": 0.09916218581968238, + "grad_norm": 233.0, + "learning_rate": 9.875121894090142e-05, + "loss": 11.6255, + "step": 2379 + }, + { + "epoch": 0.09920386811721062, + "grad_norm": 186.0, + "learning_rate": 9.874971933243139e-05, + "loss": 11.1253, + "step": 2380 + }, + { + "epoch": 0.09924555041473886, + "grad_norm": 138.0, + "learning_rate": 9.874821883549338e-05, + "loss": 9.3762, + "step": 2381 + }, + { + "epoch": 0.0992872327122671, + "grad_norm": 624.0, + "learning_rate": 9.874671745011477e-05, + "loss": 21.0012, + "step": 2382 + }, + { + "epoch": 0.09932891500979533, + "grad_norm": 300.0, + "learning_rate": 9.874521517632289e-05, + "loss": 12.9384, + "step": 2383 + }, + { + "epoch": 0.09937059730732357, + "grad_norm": 296.0, + "learning_rate": 9.874371201414517e-05, + "loss": 12.8754, + "step": 2384 + }, + { + "epoch": 0.09941227960485181, + "grad_norm": 544.0, + "learning_rate": 9.874220796360894e-05, + "loss": 15.4383, + "step": 2385 + }, + { + "epoch": 0.09945396190238005, + "grad_norm": 192.0, + "learning_rate": 9.874070302474165e-05, + "loss": 10.3131, + "step": 2386 + }, + { + "epoch": 0.09949564419990829, + "grad_norm": 372.0, + "learning_rate": 9.873919719757072e-05, + "loss": 11.6286, + "step": 2387 + }, + { + "epoch": 0.09953732649743653, + "grad_norm": 147.0, + "learning_rate": 9.873769048212359e-05, + "loss": 9.1883, + "step": 2388 + }, + { + "epoch": 0.09957900879496479, + "grad_norm": 253.0, + "learning_rate": 9.873618287842773e-05, + "loss": 10.8138, + "step": 2389 + }, + { + "epoch": 0.09962069109249302, + "grad_norm": 408.0, + "learning_rate": 9.87346743865106e-05, + "loss": 16.3755, + "step": 2390 + }, + { + "epoch": 0.09966237339002126, + "grad_norm": 576.0, + "learning_rate": 9.873316500639972e-05, + "loss": 19.2504, + "step": 2391 + }, + { + "epoch": 0.0997040556875495, + "grad_norm": 692.0, + "learning_rate": 9.873165473812258e-05, + "loss": 20.6255, + "step": 2392 + }, + { + "epoch": 0.09974573798507774, + "grad_norm": 187.0, + "learning_rate": 9.873014358170669e-05, + "loss": 8.188, + "step": 2393 + }, + { + "epoch": 0.09978742028260598, + "grad_norm": 115.5, + "learning_rate": 9.872863153717961e-05, + "loss": 9.0632, + "step": 2394 + }, + { + "epoch": 0.09982910258013422, + "grad_norm": 472.0, + "learning_rate": 9.872711860456891e-05, + "loss": 15.8161, + "step": 2395 + }, + { + "epoch": 0.09987078487766246, + "grad_norm": 288.0, + "learning_rate": 9.872560478390214e-05, + "loss": 10.378, + "step": 2396 + }, + { + "epoch": 0.0999124671751907, + "grad_norm": 223.0, + "learning_rate": 9.872409007520691e-05, + "loss": 12.1888, + "step": 2397 + }, + { + "epoch": 0.09995414947271894, + "grad_norm": 194.0, + "learning_rate": 9.87225744785108e-05, + "loss": 10.2507, + "step": 2398 + }, + { + "epoch": 0.09999583177024718, + "grad_norm": 440.0, + "learning_rate": 9.872105799384144e-05, + "loss": 15.0627, + "step": 2399 + }, + { + "epoch": 0.10003751406777542, + "grad_norm": 278.0, + "learning_rate": 9.871954062122648e-05, + "loss": 13.1254, + "step": 2400 + }, + { + "epoch": 0.10007919636530366, + "grad_norm": 81.5, + "learning_rate": 9.871802236069356e-05, + "loss": 8.188, + "step": 2401 + }, + { + "epoch": 0.1001208786628319, + "grad_norm": 252.0, + "learning_rate": 9.871650321227038e-05, + "loss": 12.1254, + "step": 2402 + }, + { + "epoch": 0.10016256096036014, + "grad_norm": 592.0, + "learning_rate": 9.871498317598457e-05, + "loss": 18.2504, + "step": 2403 + }, + { + "epoch": 0.10020424325788838, + "grad_norm": 616.0, + "learning_rate": 9.871346225186389e-05, + "loss": 20.1257, + "step": 2404 + }, + { + "epoch": 0.10024592555541662, + "grad_norm": 696.0, + "learning_rate": 9.871194043993603e-05, + "loss": 21.1252, + "step": 2405 + }, + { + "epoch": 0.10028760785294485, + "grad_norm": 724.0, + "learning_rate": 9.871041774022873e-05, + "loss": 21.0003, + "step": 2406 + }, + { + "epoch": 0.1003292901504731, + "grad_norm": 238.0, + "learning_rate": 9.870889415276975e-05, + "loss": 11.8753, + "step": 2407 + }, + { + "epoch": 0.10037097244800133, + "grad_norm": 326.0, + "learning_rate": 9.870736967758684e-05, + "loss": 13.5634, + "step": 2408 + }, + { + "epoch": 0.10041265474552957, + "grad_norm": 324.0, + "learning_rate": 9.87058443147078e-05, + "loss": 13.6879, + "step": 2409 + }, + { + "epoch": 0.10045433704305781, + "grad_norm": 1216.0, + "learning_rate": 9.870431806416043e-05, + "loss": 28.1303, + "step": 2410 + }, + { + "epoch": 0.10049601934058605, + "grad_norm": 288.0, + "learning_rate": 9.870279092597252e-05, + "loss": 13.3756, + "step": 2411 + }, + { + "epoch": 0.10053770163811429, + "grad_norm": 222.0, + "learning_rate": 9.870126290017194e-05, + "loss": 11.8757, + "step": 2412 + }, + { + "epoch": 0.10057938393564253, + "grad_norm": 852.0, + "learning_rate": 9.869973398678651e-05, + "loss": 22.2509, + "step": 2413 + }, + { + "epoch": 0.10062106623317077, + "grad_norm": 288.0, + "learning_rate": 9.869820418584412e-05, + "loss": 11.4428, + "step": 2414 + }, + { + "epoch": 0.10066274853069901, + "grad_norm": 260.0, + "learning_rate": 9.869667349737261e-05, + "loss": 12.9383, + "step": 2415 + }, + { + "epoch": 0.10070443082822725, + "grad_norm": 532.0, + "learning_rate": 9.869514192139993e-05, + "loss": 16.2543, + "step": 2416 + }, + { + "epoch": 0.10074611312575549, + "grad_norm": 446.0, + "learning_rate": 9.869360945795395e-05, + "loss": 15.2502, + "step": 2417 + }, + { + "epoch": 0.10078779542328373, + "grad_norm": 63.25, + "learning_rate": 9.869207610706261e-05, + "loss": 7.3134, + "step": 2418 + }, + { + "epoch": 0.10082947772081197, + "grad_norm": 580.0, + "learning_rate": 9.869054186875387e-05, + "loss": 18.3759, + "step": 2419 + }, + { + "epoch": 0.1008711600183402, + "grad_norm": 241.0, + "learning_rate": 9.868900674305567e-05, + "loss": 13.5007, + "step": 2420 + }, + { + "epoch": 0.10091284231586845, + "grad_norm": 244.0, + "learning_rate": 9.868747072999602e-05, + "loss": 11.9383, + "step": 2421 + }, + { + "epoch": 0.10095452461339668, + "grad_norm": 270.0, + "learning_rate": 9.86859338296029e-05, + "loss": 12.5628, + "step": 2422 + }, + { + "epoch": 0.10099620691092492, + "grad_norm": 258.0, + "learning_rate": 9.868439604190429e-05, + "loss": 12.3131, + "step": 2423 + }, + { + "epoch": 0.10103788920845316, + "grad_norm": 768.0, + "learning_rate": 9.868285736692824e-05, + "loss": 21.0011, + "step": 2424 + }, + { + "epoch": 0.10107957150598142, + "grad_norm": 322.0, + "learning_rate": 9.86813178047028e-05, + "loss": 10.1889, + "step": 2425 + }, + { + "epoch": 0.10112125380350966, + "grad_norm": 442.0, + "learning_rate": 9.867977735525602e-05, + "loss": 15.5013, + "step": 2426 + }, + { + "epoch": 0.1011629361010379, + "grad_norm": 338.0, + "learning_rate": 9.8678236018616e-05, + "loss": 11.439, + "step": 2427 + }, + { + "epoch": 0.10120461839856613, + "grad_norm": 264.0, + "learning_rate": 9.867669379481078e-05, + "loss": 12.8757, + "step": 2428 + }, + { + "epoch": 0.10124630069609437, + "grad_norm": 462.0, + "learning_rate": 9.86751506838685e-05, + "loss": 14.6888, + "step": 2429 + }, + { + "epoch": 0.10128798299362261, + "grad_norm": 528.0, + "learning_rate": 9.867360668581726e-05, + "loss": 17.6259, + "step": 2430 + }, + { + "epoch": 0.10132966529115085, + "grad_norm": 1192.0, + "learning_rate": 9.867206180068525e-05, + "loss": 31.0002, + "step": 2431 + }, + { + "epoch": 0.10137134758867909, + "grad_norm": 470.0, + "learning_rate": 9.867051602850057e-05, + "loss": 16.2511, + "step": 2432 + }, + { + "epoch": 0.10141302988620733, + "grad_norm": 414.0, + "learning_rate": 9.866896936929142e-05, + "loss": 14.938, + "step": 2433 + }, + { + "epoch": 0.10145471218373557, + "grad_norm": 223.0, + "learning_rate": 9.866742182308599e-05, + "loss": 12.0629, + "step": 2434 + }, + { + "epoch": 0.10149639448126381, + "grad_norm": 322.0, + "learning_rate": 9.866587338991248e-05, + "loss": 12.8135, + "step": 2435 + }, + { + "epoch": 0.10153807677879205, + "grad_norm": 344.0, + "learning_rate": 9.86643240697991e-05, + "loss": 14.0003, + "step": 2436 + }, + { + "epoch": 0.10157975907632029, + "grad_norm": 222.0, + "learning_rate": 9.86627738627741e-05, + "loss": 13.1256, + "step": 2437 + }, + { + "epoch": 0.10162144137384853, + "grad_norm": 123.5, + "learning_rate": 9.866122276886571e-05, + "loss": 9.6257, + "step": 2438 + }, + { + "epoch": 0.10166312367137677, + "grad_norm": 386.0, + "learning_rate": 9.865967078810223e-05, + "loss": 14.0639, + "step": 2439 + }, + { + "epoch": 0.10170480596890501, + "grad_norm": 286.0, + "learning_rate": 9.865811792051191e-05, + "loss": 11.8759, + "step": 2440 + }, + { + "epoch": 0.10174648826643325, + "grad_norm": 580.0, + "learning_rate": 9.86565641661231e-05, + "loss": 18.5021, + "step": 2441 + }, + { + "epoch": 0.10178817056396149, + "grad_norm": 474.0, + "learning_rate": 9.865500952496407e-05, + "loss": 17.2511, + "step": 2442 + }, + { + "epoch": 0.10182985286148973, + "grad_norm": 286.0, + "learning_rate": 9.865345399706319e-05, + "loss": 13.314, + "step": 2443 + }, + { + "epoch": 0.10187153515901796, + "grad_norm": 1104.0, + "learning_rate": 9.865189758244877e-05, + "loss": 27.3753, + "step": 2444 + }, + { + "epoch": 0.1019132174565462, + "grad_norm": 450.0, + "learning_rate": 9.865034028114922e-05, + "loss": 15.5012, + "step": 2445 + }, + { + "epoch": 0.10195489975407444, + "grad_norm": 400.0, + "learning_rate": 9.864878209319288e-05, + "loss": 15.0633, + "step": 2446 + }, + { + "epoch": 0.10199658205160268, + "grad_norm": 800.0, + "learning_rate": 9.864722301860817e-05, + "loss": 23.1258, + "step": 2447 + }, + { + "epoch": 0.10203826434913092, + "grad_norm": 218.0, + "learning_rate": 9.864566305742352e-05, + "loss": 12.3133, + "step": 2448 + }, + { + "epoch": 0.10207994664665916, + "grad_norm": 520.0, + "learning_rate": 9.864410220966731e-05, + "loss": 18.8754, + "step": 2449 + }, + { + "epoch": 0.1021216289441874, + "grad_norm": 245.0, + "learning_rate": 9.864254047536806e-05, + "loss": 12.0002, + "step": 2450 + }, + { + "epoch": 0.10216331124171564, + "grad_norm": 348.0, + "learning_rate": 9.864097785455416e-05, + "loss": 14.5635, + "step": 2451 + }, + { + "epoch": 0.10220499353924388, + "grad_norm": 860.0, + "learning_rate": 9.863941434725413e-05, + "loss": 21.7561, + "step": 2452 + }, + { + "epoch": 0.10224667583677212, + "grad_norm": 430.0, + "learning_rate": 9.863784995349646e-05, + "loss": 16.2516, + "step": 2453 + }, + { + "epoch": 0.10228835813430036, + "grad_norm": 229.0, + "learning_rate": 9.863628467330965e-05, + "loss": 11.3132, + "step": 2454 + }, + { + "epoch": 0.1023300404318286, + "grad_norm": 74.0, + "learning_rate": 9.863471850672224e-05, + "loss": 6.9069, + "step": 2455 + }, + { + "epoch": 0.10237172272935684, + "grad_norm": 218.0, + "learning_rate": 9.863315145376276e-05, + "loss": 11.0628, + "step": 2456 + }, + { + "epoch": 0.10241340502688508, + "grad_norm": 204.0, + "learning_rate": 9.863158351445979e-05, + "loss": 10.0011, + "step": 2457 + }, + { + "epoch": 0.10245508732441332, + "grad_norm": 250.0, + "learning_rate": 9.863001468884188e-05, + "loss": 11.8131, + "step": 2458 + }, + { + "epoch": 0.10249676962194156, + "grad_norm": 213.0, + "learning_rate": 9.862844497693764e-05, + "loss": 12.0009, + "step": 2459 + }, + { + "epoch": 0.1025384519194698, + "grad_norm": 808.0, + "learning_rate": 9.862687437877567e-05, + "loss": 21.6254, + "step": 2460 + }, + { + "epoch": 0.10258013421699803, + "grad_norm": 440.0, + "learning_rate": 9.862530289438461e-05, + "loss": 15.8142, + "step": 2461 + }, + { + "epoch": 0.10262181651452629, + "grad_norm": 2736.0, + "learning_rate": 9.862373052379308e-05, + "loss": 51.5051, + "step": 2462 + }, + { + "epoch": 0.10266349881205453, + "grad_norm": 396.0, + "learning_rate": 9.862215726702974e-05, + "loss": 14.6881, + "step": 2463 + }, + { + "epoch": 0.10270518110958277, + "grad_norm": 125.5, + "learning_rate": 9.862058312412326e-05, + "loss": 9.3754, + "step": 2464 + }, + { + "epoch": 0.102746863407111, + "grad_norm": 326.0, + "learning_rate": 9.861900809510236e-05, + "loss": 13.5632, + "step": 2465 + }, + { + "epoch": 0.10278854570463924, + "grad_norm": 284.0, + "learning_rate": 9.861743217999571e-05, + "loss": 11.063, + "step": 2466 + }, + { + "epoch": 0.10283022800216748, + "grad_norm": 147.0, + "learning_rate": 9.861585537883205e-05, + "loss": 10.0631, + "step": 2467 + }, + { + "epoch": 0.10287191029969572, + "grad_norm": 224.0, + "learning_rate": 9.861427769164008e-05, + "loss": 11.3136, + "step": 2468 + }, + { + "epoch": 0.10291359259722396, + "grad_norm": 664.0, + "learning_rate": 9.861269911844861e-05, + "loss": 20.5003, + "step": 2469 + }, + { + "epoch": 0.1029552748947522, + "grad_norm": 312.0, + "learning_rate": 9.86111196592864e-05, + "loss": 13.3127, + "step": 2470 + }, + { + "epoch": 0.10299695719228044, + "grad_norm": 324.0, + "learning_rate": 9.860953931418218e-05, + "loss": 13.1252, + "step": 2471 + }, + { + "epoch": 0.10303863948980868, + "grad_norm": 416.0, + "learning_rate": 9.86079580831648e-05, + "loss": 15.8134, + "step": 2472 + }, + { + "epoch": 0.10308032178733692, + "grad_norm": 612.0, + "learning_rate": 9.86063759662631e-05, + "loss": 19.6267, + "step": 2473 + }, + { + "epoch": 0.10312200408486516, + "grad_norm": 668.0, + "learning_rate": 9.860479296350586e-05, + "loss": 21.1253, + "step": 2474 + }, + { + "epoch": 0.1031636863823934, + "grad_norm": 244.0, + "learning_rate": 9.860320907492196e-05, + "loss": 11.9379, + "step": 2475 + }, + { + "epoch": 0.10320536867992164, + "grad_norm": 220.0, + "learning_rate": 9.860162430054025e-05, + "loss": 10.0006, + "step": 2476 + }, + { + "epoch": 0.10324705097744988, + "grad_norm": 1360.0, + "learning_rate": 9.860003864038962e-05, + "loss": 34.5018, + "step": 2477 + }, + { + "epoch": 0.10328873327497812, + "grad_norm": 732.0, + "learning_rate": 9.859845209449898e-05, + "loss": 19.7504, + "step": 2478 + }, + { + "epoch": 0.10333041557250636, + "grad_norm": 488.0, + "learning_rate": 9.859686466289723e-05, + "loss": 18.1254, + "step": 2479 + }, + { + "epoch": 0.1033720978700346, + "grad_norm": 512.0, + "learning_rate": 9.859527634561332e-05, + "loss": 16.6257, + "step": 2480 + }, + { + "epoch": 0.10341378016756284, + "grad_norm": 248.0, + "learning_rate": 9.859368714267617e-05, + "loss": 11.2505, + "step": 2481 + }, + { + "epoch": 0.10345546246509107, + "grad_norm": 260.0, + "learning_rate": 9.859209705411477e-05, + "loss": 11.9381, + "step": 2482 + }, + { + "epoch": 0.10349714476261931, + "grad_norm": 936.0, + "learning_rate": 9.859050607995808e-05, + "loss": 23.0061, + "step": 2483 + }, + { + "epoch": 0.10353882706014755, + "grad_norm": 61.5, + "learning_rate": 9.85889142202351e-05, + "loss": 7.3142, + "step": 2484 + }, + { + "epoch": 0.10358050935767579, + "grad_norm": 394.0, + "learning_rate": 9.858732147497486e-05, + "loss": 14.2505, + "step": 2485 + }, + { + "epoch": 0.10362219165520403, + "grad_norm": 344.0, + "learning_rate": 9.858572784420637e-05, + "loss": 13.5635, + "step": 2486 + }, + { + "epoch": 0.10366387395273227, + "grad_norm": 400.0, + "learning_rate": 9.858413332795866e-05, + "loss": 15.8128, + "step": 2487 + }, + { + "epoch": 0.10370555625026051, + "grad_norm": 1184.0, + "learning_rate": 9.858253792626083e-05, + "loss": 25.5054, + "step": 2488 + }, + { + "epoch": 0.10374723854778875, + "grad_norm": 520.0, + "learning_rate": 9.858094163914191e-05, + "loss": 16.0003, + "step": 2489 + }, + { + "epoch": 0.10378892084531699, + "grad_norm": 402.0, + "learning_rate": 9.857934446663103e-05, + "loss": 14.4382, + "step": 2490 + }, + { + "epoch": 0.10383060314284523, + "grad_norm": 282.0, + "learning_rate": 9.857774640875727e-05, + "loss": 13.0629, + "step": 2491 + }, + { + "epoch": 0.10387228544037347, + "grad_norm": 386.0, + "learning_rate": 9.85761474655498e-05, + "loss": 14.0009, + "step": 2492 + }, + { + "epoch": 0.10391396773790171, + "grad_norm": 348.0, + "learning_rate": 9.857454763703771e-05, + "loss": 14.6257, + "step": 2493 + }, + { + "epoch": 0.10395565003542995, + "grad_norm": 169.0, + "learning_rate": 9.857294692325017e-05, + "loss": 10.2504, + "step": 2494 + }, + { + "epoch": 0.10399733233295819, + "grad_norm": 410.0, + "learning_rate": 9.857134532421637e-05, + "loss": 17.5003, + "step": 2495 + }, + { + "epoch": 0.10403901463048643, + "grad_norm": 884.0, + "learning_rate": 9.85697428399655e-05, + "loss": 20.1301, + "step": 2496 + }, + { + "epoch": 0.10408069692801467, + "grad_norm": 360.0, + "learning_rate": 9.856813947052673e-05, + "loss": 14.0005, + "step": 2497 + }, + { + "epoch": 0.10412237922554292, + "grad_norm": 278.0, + "learning_rate": 9.856653521592932e-05, + "loss": 12.8754, + "step": 2498 + }, + { + "epoch": 0.10416406152307116, + "grad_norm": 332.0, + "learning_rate": 9.85649300762025e-05, + "loss": 13.5631, + "step": 2499 + }, + { + "epoch": 0.1042057438205994, + "grad_norm": 544.0, + "learning_rate": 9.856332405137552e-05, + "loss": 18.5003, + "step": 2500 + }, + { + "epoch": 0.10424742611812764, + "grad_norm": 145.0, + "learning_rate": 9.856171714147764e-05, + "loss": 9.4378, + "step": 2501 + }, + { + "epoch": 0.10428910841565588, + "grad_norm": 125.0, + "learning_rate": 9.856010934653815e-05, + "loss": 8.129, + "step": 2502 + }, + { + "epoch": 0.10433079071318412, + "grad_norm": 210.0, + "learning_rate": 9.855850066658636e-05, + "loss": 12.4378, + "step": 2503 + }, + { + "epoch": 0.10437247301071235, + "grad_norm": 468.0, + "learning_rate": 9.855689110165158e-05, + "loss": 15.5627, + "step": 2504 + }, + { + "epoch": 0.1044141553082406, + "grad_norm": 494.0, + "learning_rate": 9.855528065176316e-05, + "loss": 17.0004, + "step": 2505 + }, + { + "epoch": 0.10445583760576883, + "grad_norm": 143.0, + "learning_rate": 9.855366931695043e-05, + "loss": 10.3763, + "step": 2506 + }, + { + "epoch": 0.10449751990329707, + "grad_norm": 212.0, + "learning_rate": 9.855205709724277e-05, + "loss": 10.5051, + "step": 2507 + }, + { + "epoch": 0.10453920220082531, + "grad_norm": 398.0, + "learning_rate": 9.855044399266957e-05, + "loss": 16.0005, + "step": 2508 + }, + { + "epoch": 0.10458088449835355, + "grad_norm": 354.0, + "learning_rate": 9.85488300032602e-05, + "loss": 14.0639, + "step": 2509 + }, + { + "epoch": 0.10462256679588179, + "grad_norm": 932.0, + "learning_rate": 9.85472151290441e-05, + "loss": 27.2504, + "step": 2510 + }, + { + "epoch": 0.10466424909341003, + "grad_norm": 1472.0, + "learning_rate": 9.85455993700507e-05, + "loss": 28.507, + "step": 2511 + }, + { + "epoch": 0.10470593139093827, + "grad_norm": 406.0, + "learning_rate": 9.854398272630945e-05, + "loss": 13.4392, + "step": 2512 + }, + { + "epoch": 0.10474761368846651, + "grad_norm": 235.0, + "learning_rate": 9.854236519784978e-05, + "loss": 12.6255, + "step": 2513 + }, + { + "epoch": 0.10478929598599475, + "grad_norm": 308.0, + "learning_rate": 9.854074678470122e-05, + "loss": 13.252, + "step": 2514 + }, + { + "epoch": 0.10483097828352299, + "grad_norm": 956.0, + "learning_rate": 9.853912748689322e-05, + "loss": 23.126, + "step": 2515 + }, + { + "epoch": 0.10487266058105123, + "grad_norm": 716.0, + "learning_rate": 9.853750730445533e-05, + "loss": 21.2508, + "step": 2516 + }, + { + "epoch": 0.10491434287857947, + "grad_norm": 244.0, + "learning_rate": 9.853588623741705e-05, + "loss": 12.2504, + "step": 2517 + }, + { + "epoch": 0.1049560251761077, + "grad_norm": 454.0, + "learning_rate": 9.853426428580795e-05, + "loss": 16.5002, + "step": 2518 + }, + { + "epoch": 0.10499770747363595, + "grad_norm": 808.0, + "learning_rate": 9.853264144965755e-05, + "loss": 20.0048, + "step": 2519 + }, + { + "epoch": 0.10503938977116418, + "grad_norm": 218.0, + "learning_rate": 9.853101772899547e-05, + "loss": 11.5003, + "step": 2520 + }, + { + "epoch": 0.10508107206869242, + "grad_norm": 398.0, + "learning_rate": 9.852939312385128e-05, + "loss": 15.8132, + "step": 2521 + }, + { + "epoch": 0.10512275436622066, + "grad_norm": 540.0, + "learning_rate": 9.852776763425458e-05, + "loss": 16.507, + "step": 2522 + }, + { + "epoch": 0.1051644366637489, + "grad_norm": 1004.0, + "learning_rate": 9.852614126023503e-05, + "loss": 24.381, + "step": 2523 + }, + { + "epoch": 0.10520611896127714, + "grad_norm": 330.0, + "learning_rate": 9.852451400182223e-05, + "loss": 11.6254, + "step": 2524 + }, + { + "epoch": 0.10524780125880538, + "grad_norm": 370.0, + "learning_rate": 9.852288585904586e-05, + "loss": 14.7507, + "step": 2525 + }, + { + "epoch": 0.10528948355633362, + "grad_norm": 298.0, + "learning_rate": 9.852125683193559e-05, + "loss": 13.8133, + "step": 2526 + }, + { + "epoch": 0.10533116585386186, + "grad_norm": 338.0, + "learning_rate": 9.851962692052111e-05, + "loss": 13.8133, + "step": 2527 + }, + { + "epoch": 0.1053728481513901, + "grad_norm": 354.0, + "learning_rate": 9.851799612483211e-05, + "loss": 14.3132, + "step": 2528 + }, + { + "epoch": 0.10541453044891834, + "grad_norm": 712.0, + "learning_rate": 9.851636444489832e-05, + "loss": 20.3758, + "step": 2529 + }, + { + "epoch": 0.10545621274644658, + "grad_norm": 346.0, + "learning_rate": 9.851473188074949e-05, + "loss": 15.3128, + "step": 2530 + }, + { + "epoch": 0.10549789504397482, + "grad_norm": 253.0, + "learning_rate": 9.851309843241536e-05, + "loss": 12.5629, + "step": 2531 + }, + { + "epoch": 0.10553957734150306, + "grad_norm": 171.0, + "learning_rate": 9.851146409992572e-05, + "loss": 9.6254, + "step": 2532 + }, + { + "epoch": 0.1055812596390313, + "grad_norm": 828.0, + "learning_rate": 9.850982888331032e-05, + "loss": 23.3752, + "step": 2533 + }, + { + "epoch": 0.10562294193655954, + "grad_norm": 296.0, + "learning_rate": 9.850819278259899e-05, + "loss": 8.5013, + "step": 2534 + }, + { + "epoch": 0.10566462423408779, + "grad_norm": 266.0, + "learning_rate": 9.850655579782155e-05, + "loss": 12.6879, + "step": 2535 + }, + { + "epoch": 0.10570630653161603, + "grad_norm": 498.0, + "learning_rate": 9.850491792900782e-05, + "loss": 16.3755, + "step": 2536 + }, + { + "epoch": 0.10574798882914427, + "grad_norm": 404.0, + "learning_rate": 9.850327917618766e-05, + "loss": 15.1888, + "step": 2537 + }, + { + "epoch": 0.10578967112667251, + "grad_norm": 352.0, + "learning_rate": 9.850163953939091e-05, + "loss": 15.314, + "step": 2538 + }, + { + "epoch": 0.10583135342420075, + "grad_norm": 868.0, + "learning_rate": 9.849999901864749e-05, + "loss": 21.2546, + "step": 2539 + }, + { + "epoch": 0.10587303572172899, + "grad_norm": 171.0, + "learning_rate": 9.84983576139873e-05, + "loss": 10.2516, + "step": 2540 + }, + { + "epoch": 0.10591471801925723, + "grad_norm": 616.0, + "learning_rate": 9.84967153254402e-05, + "loss": 20.2508, + "step": 2541 + }, + { + "epoch": 0.10595640031678547, + "grad_norm": 240.0, + "learning_rate": 9.849507215303618e-05, + "loss": 13.0629, + "step": 2542 + }, + { + "epoch": 0.1059980826143137, + "grad_norm": 219.0, + "learning_rate": 9.849342809680515e-05, + "loss": 11.438, + "step": 2543 + }, + { + "epoch": 0.10603976491184194, + "grad_norm": 256.0, + "learning_rate": 9.849178315677709e-05, + "loss": 13.1887, + "step": 2544 + }, + { + "epoch": 0.10608144720937018, + "grad_norm": 572.0, + "learning_rate": 9.8490137332982e-05, + "loss": 16.2504, + "step": 2545 + }, + { + "epoch": 0.10612312950689842, + "grad_norm": 282.0, + "learning_rate": 9.848849062544984e-05, + "loss": 12.8755, + "step": 2546 + }, + { + "epoch": 0.10616481180442666, + "grad_norm": 328.0, + "learning_rate": 9.848684303421063e-05, + "loss": 13.6877, + "step": 2547 + }, + { + "epoch": 0.1062064941019549, + "grad_norm": 206.0, + "learning_rate": 9.84851945592944e-05, + "loss": 9.5629, + "step": 2548 + }, + { + "epoch": 0.10624817639948314, + "grad_norm": 356.0, + "learning_rate": 9.84835452007312e-05, + "loss": 14.0006, + "step": 2549 + }, + { + "epoch": 0.10628985869701138, + "grad_norm": 312.0, + "learning_rate": 9.848189495855108e-05, + "loss": 13.1257, + "step": 2550 + }, + { + "epoch": 0.10633154099453962, + "grad_norm": 448.0, + "learning_rate": 9.848024383278413e-05, + "loss": 17.2504, + "step": 2551 + }, + { + "epoch": 0.10637322329206786, + "grad_norm": 294.0, + "learning_rate": 9.847859182346042e-05, + "loss": 12.4384, + "step": 2552 + }, + { + "epoch": 0.1064149055895961, + "grad_norm": 209.0, + "learning_rate": 9.847693893061007e-05, + "loss": 10.6256, + "step": 2553 + }, + { + "epoch": 0.10645658788712434, + "grad_norm": 202.0, + "learning_rate": 9.847528515426321e-05, + "loss": 9.1263, + "step": 2554 + }, + { + "epoch": 0.10649827018465258, + "grad_norm": 552.0, + "learning_rate": 9.847363049444997e-05, + "loss": 18.5013, + "step": 2555 + }, + { + "epoch": 0.10653995248218082, + "grad_norm": 79.0, + "learning_rate": 9.847197495120053e-05, + "loss": 8.1877, + "step": 2556 + }, + { + "epoch": 0.10658163477970906, + "grad_norm": 314.0, + "learning_rate": 9.847031852454502e-05, + "loss": 13.0006, + "step": 2557 + }, + { + "epoch": 0.1066233170772373, + "grad_norm": 300.0, + "learning_rate": 9.846866121451366e-05, + "loss": 13.0628, + "step": 2558 + }, + { + "epoch": 0.10666499937476553, + "grad_norm": 358.0, + "learning_rate": 9.846700302113665e-05, + "loss": 14.4378, + "step": 2559 + }, + { + "epoch": 0.10670668167229377, + "grad_norm": 67.5, + "learning_rate": 9.846534394444421e-05, + "loss": 9.1258, + "step": 2560 + }, + { + "epoch": 0.10674836396982201, + "grad_norm": 532.0, + "learning_rate": 9.846368398446657e-05, + "loss": 16.8753, + "step": 2561 + }, + { + "epoch": 0.10679004626735025, + "grad_norm": 780.0, + "learning_rate": 9.846202314123399e-05, + "loss": 21.754, + "step": 2562 + }, + { + "epoch": 0.10683172856487849, + "grad_norm": 392.0, + "learning_rate": 9.846036141477673e-05, + "loss": 14.563, + "step": 2563 + }, + { + "epoch": 0.10687341086240673, + "grad_norm": 408.0, + "learning_rate": 9.845869880512508e-05, + "loss": 14.8133, + "step": 2564 + }, + { + "epoch": 0.10691509315993497, + "grad_norm": 660.0, + "learning_rate": 9.845703531230935e-05, + "loss": 20.5003, + "step": 2565 + }, + { + "epoch": 0.10695677545746321, + "grad_norm": 472.0, + "learning_rate": 9.845537093635985e-05, + "loss": 15.6307, + "step": 2566 + }, + { + "epoch": 0.10699845775499145, + "grad_norm": 344.0, + "learning_rate": 9.845370567730691e-05, + "loss": 14.7507, + "step": 2567 + }, + { + "epoch": 0.10704014005251969, + "grad_norm": 143.0, + "learning_rate": 9.845203953518089e-05, + "loss": 9.1257, + "step": 2568 + }, + { + "epoch": 0.10708182235004793, + "grad_norm": 338.0, + "learning_rate": 9.845037251001213e-05, + "loss": 15.2511, + "step": 2569 + }, + { + "epoch": 0.10712350464757617, + "grad_norm": 402.0, + "learning_rate": 9.844870460183104e-05, + "loss": 15.0008, + "step": 2570 + }, + { + "epoch": 0.10716518694510442, + "grad_norm": 228.0, + "learning_rate": 9.8447035810668e-05, + "loss": 11.5633, + "step": 2571 + }, + { + "epoch": 0.10720686924263266, + "grad_norm": 410.0, + "learning_rate": 9.844536613655346e-05, + "loss": 13.4383, + "step": 2572 + }, + { + "epoch": 0.1072485515401609, + "grad_norm": 498.0, + "learning_rate": 9.84436955795178e-05, + "loss": 17.2503, + "step": 2573 + }, + { + "epoch": 0.10729023383768914, + "grad_norm": 324.0, + "learning_rate": 9.84420241395915e-05, + "loss": 14.3128, + "step": 2574 + }, + { + "epoch": 0.10733191613521738, + "grad_norm": 140.0, + "learning_rate": 9.844035181680499e-05, + "loss": 8.688, + "step": 2575 + }, + { + "epoch": 0.10737359843274562, + "grad_norm": 1544.0, + "learning_rate": 9.843867861118878e-05, + "loss": 31.8788, + "step": 2576 + }, + { + "epoch": 0.10741528073027386, + "grad_norm": 636.0, + "learning_rate": 9.843700452277333e-05, + "loss": 15.63, + "step": 2577 + }, + { + "epoch": 0.1074569630278021, + "grad_norm": 1200.0, + "learning_rate": 9.843532955158921e-05, + "loss": 28.1272, + "step": 2578 + }, + { + "epoch": 0.10749864532533034, + "grad_norm": 952.0, + "learning_rate": 9.843365369766688e-05, + "loss": 22.2505, + "step": 2579 + }, + { + "epoch": 0.10754032762285858, + "grad_norm": 304.0, + "learning_rate": 9.843197696103694e-05, + "loss": 13.688, + "step": 2580 + }, + { + "epoch": 0.10758200992038681, + "grad_norm": 460.0, + "learning_rate": 9.843029934172989e-05, + "loss": 15.7502, + "step": 2581 + }, + { + "epoch": 0.10762369221791505, + "grad_norm": 134.0, + "learning_rate": 9.842862083977636e-05, + "loss": 9.188, + "step": 2582 + }, + { + "epoch": 0.1076653745154433, + "grad_norm": 84.5, + "learning_rate": 9.842694145520691e-05, + "loss": 9.7506, + "step": 2583 + }, + { + "epoch": 0.10770705681297153, + "grad_norm": 408.0, + "learning_rate": 9.842526118805214e-05, + "loss": 15.0628, + "step": 2584 + }, + { + "epoch": 0.10774873911049977, + "grad_norm": 191.0, + "learning_rate": 9.842358003834269e-05, + "loss": 11.1877, + "step": 2585 + }, + { + "epoch": 0.10779042140802801, + "grad_norm": 438.0, + "learning_rate": 9.84218980061092e-05, + "loss": 14.4418, + "step": 2586 + }, + { + "epoch": 0.10783210370555625, + "grad_norm": 442.0, + "learning_rate": 9.842021509138232e-05, + "loss": 16.3759, + "step": 2587 + }, + { + "epoch": 0.10787378600308449, + "grad_norm": 532.0, + "learning_rate": 9.841853129419271e-05, + "loss": 16.8753, + "step": 2588 + }, + { + "epoch": 0.10791546830061273, + "grad_norm": 304.0, + "learning_rate": 9.841684661457109e-05, + "loss": 12.3131, + "step": 2589 + }, + { + "epoch": 0.10795715059814097, + "grad_norm": 372.0, + "learning_rate": 9.841516105254813e-05, + "loss": 14.1253, + "step": 2590 + }, + { + "epoch": 0.10799883289566921, + "grad_norm": 780.0, + "learning_rate": 9.841347460815456e-05, + "loss": 18.6334, + "step": 2591 + }, + { + "epoch": 0.10804051519319745, + "grad_norm": 620.0, + "learning_rate": 9.841178728142113e-05, + "loss": 18.6255, + "step": 2592 + }, + { + "epoch": 0.10808219749072569, + "grad_norm": 298.0, + "learning_rate": 9.841009907237857e-05, + "loss": 11.7508, + "step": 2593 + }, + { + "epoch": 0.10812387978825393, + "grad_norm": 464.0, + "learning_rate": 9.840840998105764e-05, + "loss": 16.3752, + "step": 2594 + }, + { + "epoch": 0.10816556208578217, + "grad_norm": 292.0, + "learning_rate": 9.840672000748916e-05, + "loss": 12.376, + "step": 2595 + }, + { + "epoch": 0.1082072443833104, + "grad_norm": 440.0, + "learning_rate": 9.840502915170392e-05, + "loss": 16.2506, + "step": 2596 + }, + { + "epoch": 0.10824892668083864, + "grad_norm": 280.0, + "learning_rate": 9.840333741373271e-05, + "loss": 11.5025, + "step": 2597 + }, + { + "epoch": 0.10829060897836688, + "grad_norm": 460.0, + "learning_rate": 9.840164479360639e-05, + "loss": 14.5637, + "step": 2598 + }, + { + "epoch": 0.10833229127589512, + "grad_norm": 170.0, + "learning_rate": 9.839995129135579e-05, + "loss": 10.5002, + "step": 2599 + }, + { + "epoch": 0.10837397357342336, + "grad_norm": 436.0, + "learning_rate": 9.839825690701179e-05, + "loss": 16.1257, + "step": 2600 + }, + { + "epoch": 0.1084156558709516, + "grad_norm": 81.5, + "learning_rate": 9.839656164060525e-05, + "loss": 7.5942, + "step": 2601 + }, + { + "epoch": 0.10845733816847984, + "grad_norm": 174.0, + "learning_rate": 9.839486549216708e-05, + "loss": 8.5626, + "step": 2602 + }, + { + "epoch": 0.10849902046600808, + "grad_norm": 254.0, + "learning_rate": 9.839316846172819e-05, + "loss": 11.8128, + "step": 2603 + }, + { + "epoch": 0.10854070276353632, + "grad_norm": 294.0, + "learning_rate": 9.839147054931951e-05, + "loss": 12.1253, + "step": 2604 + }, + { + "epoch": 0.10858238506106456, + "grad_norm": 146.0, + "learning_rate": 9.838977175497198e-05, + "loss": 10.1254, + "step": 2605 + }, + { + "epoch": 0.1086240673585928, + "grad_norm": 134.0, + "learning_rate": 9.838807207871656e-05, + "loss": 8.6884, + "step": 2606 + }, + { + "epoch": 0.10866574965612105, + "grad_norm": 354.0, + "learning_rate": 9.838637152058425e-05, + "loss": 14.3754, + "step": 2607 + }, + { + "epoch": 0.10870743195364929, + "grad_norm": 704.0, + "learning_rate": 9.838467008060602e-05, + "loss": 22.1254, + "step": 2608 + }, + { + "epoch": 0.10874911425117753, + "grad_norm": 382.0, + "learning_rate": 9.838296775881287e-05, + "loss": 14.4396, + "step": 2609 + }, + { + "epoch": 0.10879079654870577, + "grad_norm": 368.0, + "learning_rate": 9.838126455523584e-05, + "loss": 14.5003, + "step": 2610 + }, + { + "epoch": 0.10883247884623401, + "grad_norm": 458.0, + "learning_rate": 9.837956046990597e-05, + "loss": 14.57, + "step": 2611 + }, + { + "epoch": 0.10887416114376225, + "grad_norm": 95.5, + "learning_rate": 9.837785550285432e-05, + "loss": 9.7508, + "step": 2612 + }, + { + "epoch": 0.10891584344129049, + "grad_norm": 478.0, + "learning_rate": 9.837614965411195e-05, + "loss": 18.0011, + "step": 2613 + }, + { + "epoch": 0.10895752573881873, + "grad_norm": 502.0, + "learning_rate": 9.837444292370996e-05, + "loss": 16.6257, + "step": 2614 + }, + { + "epoch": 0.10899920803634697, + "grad_norm": 1232.0, + "learning_rate": 9.837273531167946e-05, + "loss": 26.0054, + "step": 2615 + }, + { + "epoch": 0.1090408903338752, + "grad_norm": 648.0, + "learning_rate": 9.837102681805157e-05, + "loss": 18.0008, + "step": 2616 + }, + { + "epoch": 0.10908257263140345, + "grad_norm": 348.0, + "learning_rate": 9.836931744285741e-05, + "loss": 13.8129, + "step": 2617 + }, + { + "epoch": 0.10912425492893169, + "grad_norm": 342.0, + "learning_rate": 9.836760718612815e-05, + "loss": 13.938, + "step": 2618 + }, + { + "epoch": 0.10916593722645992, + "grad_norm": 576.0, + "learning_rate": 9.836589604789495e-05, + "loss": 19.0003, + "step": 2619 + }, + { + "epoch": 0.10920761952398816, + "grad_norm": 486.0, + "learning_rate": 9.8364184028189e-05, + "loss": 16.7505, + "step": 2620 + }, + { + "epoch": 0.1092493018215164, + "grad_norm": 101.5, + "learning_rate": 9.836247112704152e-05, + "loss": 7.2192, + "step": 2621 + }, + { + "epoch": 0.10929098411904464, + "grad_norm": 1432.0, + "learning_rate": 9.83607573444837e-05, + "loss": 30.3808, + "step": 2622 + }, + { + "epoch": 0.10933266641657288, + "grad_norm": 228.0, + "learning_rate": 9.835904268054678e-05, + "loss": 10.6255, + "step": 2623 + }, + { + "epoch": 0.10937434871410112, + "grad_norm": 844.0, + "learning_rate": 9.835732713526203e-05, + "loss": 20.8805, + "step": 2624 + }, + { + "epoch": 0.10941603101162936, + "grad_norm": 194.0, + "learning_rate": 9.835561070866069e-05, + "loss": 9.1881, + "step": 2625 + }, + { + "epoch": 0.1094577133091576, + "grad_norm": 424.0, + "learning_rate": 9.835389340077403e-05, + "loss": 15.2508, + "step": 2626 + }, + { + "epoch": 0.10949939560668584, + "grad_norm": 264.0, + "learning_rate": 9.83521752116334e-05, + "loss": 12.3753, + "step": 2627 + }, + { + "epoch": 0.10954107790421408, + "grad_norm": 568.0, + "learning_rate": 9.835045614127008e-05, + "loss": 16.6252, + "step": 2628 + }, + { + "epoch": 0.10958276020174232, + "grad_norm": 136.0, + "learning_rate": 9.834873618971539e-05, + "loss": 8.938, + "step": 2629 + }, + { + "epoch": 0.10962444249927056, + "grad_norm": 240.0, + "learning_rate": 9.83470153570007e-05, + "loss": 11.0003, + "step": 2630 + }, + { + "epoch": 0.1096661247967988, + "grad_norm": 215.0, + "learning_rate": 9.834529364315736e-05, + "loss": 10.7518, + "step": 2631 + }, + { + "epoch": 0.10970780709432704, + "grad_norm": 464.0, + "learning_rate": 9.834357104821676e-05, + "loss": 16.7505, + "step": 2632 + }, + { + "epoch": 0.10974948939185528, + "grad_norm": 512.0, + "learning_rate": 9.834184757221028e-05, + "loss": 18.752, + "step": 2633 + }, + { + "epoch": 0.10979117168938352, + "grad_norm": 764.0, + "learning_rate": 9.834012321516935e-05, + "loss": 22.2504, + "step": 2634 + }, + { + "epoch": 0.10983285398691175, + "grad_norm": 251.0, + "learning_rate": 9.833839797712537e-05, + "loss": 12.6892, + "step": 2635 + }, + { + "epoch": 0.10987453628444, + "grad_norm": 244.0, + "learning_rate": 9.83366718581098e-05, + "loss": 12.5631, + "step": 2636 + }, + { + "epoch": 0.10991621858196823, + "grad_norm": 568.0, + "learning_rate": 9.833494485815409e-05, + "loss": 18.1254, + "step": 2637 + }, + { + "epoch": 0.10995790087949647, + "grad_norm": 338.0, + "learning_rate": 9.833321697728971e-05, + "loss": 14.4379, + "step": 2638 + }, + { + "epoch": 0.10999958317702471, + "grad_norm": 206.0, + "learning_rate": 9.833148821554818e-05, + "loss": 10.876, + "step": 2639 + }, + { + "epoch": 0.11004126547455295, + "grad_norm": 67.5, + "learning_rate": 9.832975857296096e-05, + "loss": 8.5022, + "step": 2640 + }, + { + "epoch": 0.11008294777208119, + "grad_norm": 1768.0, + "learning_rate": 9.832802804955963e-05, + "loss": 37.0005, + "step": 2641 + }, + { + "epoch": 0.11012463006960943, + "grad_norm": 380.0, + "learning_rate": 9.832629664537568e-05, + "loss": 14.8763, + "step": 2642 + }, + { + "epoch": 0.11016631236713767, + "grad_norm": 536.0, + "learning_rate": 9.832456436044068e-05, + "loss": 14.8129, + "step": 2643 + }, + { + "epoch": 0.11020799466466592, + "grad_norm": 250.0, + "learning_rate": 9.832283119478623e-05, + "loss": 11.626, + "step": 2644 + }, + { + "epoch": 0.11024967696219416, + "grad_norm": 836.0, + "learning_rate": 9.832109714844387e-05, + "loss": 24.2511, + "step": 2645 + }, + { + "epoch": 0.1102913592597224, + "grad_norm": 592.0, + "learning_rate": 9.831936222144523e-05, + "loss": 19.751, + "step": 2646 + }, + { + "epoch": 0.11033304155725064, + "grad_norm": 452.0, + "learning_rate": 9.831762641382192e-05, + "loss": 16.0007, + "step": 2647 + }, + { + "epoch": 0.11037472385477888, + "grad_norm": 234.0, + "learning_rate": 9.831588972560559e-05, + "loss": 11.9392, + "step": 2648 + }, + { + "epoch": 0.11041640615230712, + "grad_norm": 230.0, + "learning_rate": 9.831415215682786e-05, + "loss": 10.5004, + "step": 2649 + }, + { + "epoch": 0.11045808844983536, + "grad_norm": 268.0, + "learning_rate": 9.831241370752045e-05, + "loss": 13.4392, + "step": 2650 + }, + { + "epoch": 0.1104997707473636, + "grad_norm": 127.5, + "learning_rate": 9.831067437771498e-05, + "loss": 7.9698, + "step": 2651 + }, + { + "epoch": 0.11054145304489184, + "grad_norm": 468.0, + "learning_rate": 9.83089341674432e-05, + "loss": 17.7502, + "step": 2652 + }, + { + "epoch": 0.11058313534242008, + "grad_norm": 199.0, + "learning_rate": 9.830719307673679e-05, + "loss": 12.0629, + "step": 2653 + }, + { + "epoch": 0.11062481763994832, + "grad_norm": 524.0, + "learning_rate": 9.830545110562752e-05, + "loss": 19.2503, + "step": 2654 + }, + { + "epoch": 0.11066649993747656, + "grad_norm": 644.0, + "learning_rate": 9.83037082541471e-05, + "loss": 18.5008, + "step": 2655 + }, + { + "epoch": 0.1107081822350048, + "grad_norm": 438.0, + "learning_rate": 9.830196452232732e-05, + "loss": 15.6891, + "step": 2656 + }, + { + "epoch": 0.11074986453253303, + "grad_norm": 544.0, + "learning_rate": 9.830021991019993e-05, + "loss": 15.8752, + "step": 2657 + }, + { + "epoch": 0.11079154683006127, + "grad_norm": 972.0, + "learning_rate": 9.829847441779675e-05, + "loss": 22.6305, + "step": 2658 + }, + { + "epoch": 0.11083322912758951, + "grad_norm": 266.0, + "learning_rate": 9.82967280451496e-05, + "loss": 12.8763, + "step": 2659 + }, + { + "epoch": 0.11087491142511775, + "grad_norm": 268.0, + "learning_rate": 9.82949807922903e-05, + "loss": 13.0002, + "step": 2660 + }, + { + "epoch": 0.11091659372264599, + "grad_norm": 101.0, + "learning_rate": 9.829323265925066e-05, + "loss": 8.8759, + "step": 2661 + }, + { + "epoch": 0.11095827602017423, + "grad_norm": 184.0, + "learning_rate": 9.829148364606258e-05, + "loss": 9.938, + "step": 2662 + }, + { + "epoch": 0.11099995831770247, + "grad_norm": 1280.0, + "learning_rate": 9.828973375275793e-05, + "loss": 27.8796, + "step": 2663 + }, + { + "epoch": 0.11104164061523071, + "grad_norm": 142.0, + "learning_rate": 9.828798297936859e-05, + "loss": 8.5636, + "step": 2664 + }, + { + "epoch": 0.11108332291275895, + "grad_norm": 268.0, + "learning_rate": 9.828623132592647e-05, + "loss": 12.5005, + "step": 2665 + }, + { + "epoch": 0.11112500521028719, + "grad_norm": 382.0, + "learning_rate": 9.828447879246349e-05, + "loss": 15.3753, + "step": 2666 + }, + { + "epoch": 0.11116668750781543, + "grad_norm": 420.0, + "learning_rate": 9.828272537901162e-05, + "loss": 16.5003, + "step": 2667 + }, + { + "epoch": 0.11120836980534367, + "grad_norm": 468.0, + "learning_rate": 9.828097108560279e-05, + "loss": 16.5014, + "step": 2668 + }, + { + "epoch": 0.11125005210287191, + "grad_norm": 956.0, + "learning_rate": 9.827921591226897e-05, + "loss": 24.3754, + "step": 2669 + }, + { + "epoch": 0.11129173440040015, + "grad_norm": 378.0, + "learning_rate": 9.827745985904216e-05, + "loss": 14.6892, + "step": 2670 + }, + { + "epoch": 0.11133341669792839, + "grad_norm": 410.0, + "learning_rate": 9.827570292595434e-05, + "loss": 14.6256, + "step": 2671 + }, + { + "epoch": 0.11137509899545663, + "grad_norm": 296.0, + "learning_rate": 9.827394511303755e-05, + "loss": 12.1253, + "step": 2672 + }, + { + "epoch": 0.11141678129298486, + "grad_norm": 952.0, + "learning_rate": 9.827218642032384e-05, + "loss": 23.8808, + "step": 2673 + }, + { + "epoch": 0.1114584635905131, + "grad_norm": 378.0, + "learning_rate": 9.827042684784524e-05, + "loss": 14.1255, + "step": 2674 + }, + { + "epoch": 0.11150014588804134, + "grad_norm": 326.0, + "learning_rate": 9.826866639563384e-05, + "loss": 12.8128, + "step": 2675 + }, + { + "epoch": 0.11154182818556958, + "grad_norm": 506.0, + "learning_rate": 9.826690506372169e-05, + "loss": 16.3759, + "step": 2676 + }, + { + "epoch": 0.11158351048309782, + "grad_norm": 438.0, + "learning_rate": 9.826514285214092e-05, + "loss": 14.3759, + "step": 2677 + }, + { + "epoch": 0.11162519278062606, + "grad_norm": 183.0, + "learning_rate": 9.826337976092364e-05, + "loss": 10.877, + "step": 2678 + }, + { + "epoch": 0.1116668750781543, + "grad_norm": 95.0, + "learning_rate": 9.826161579010197e-05, + "loss": 7.6886, + "step": 2679 + }, + { + "epoch": 0.11170855737568255, + "grad_norm": 324.0, + "learning_rate": 9.825985093970807e-05, + "loss": 13.6262, + "step": 2680 + }, + { + "epoch": 0.1117502396732108, + "grad_norm": 376.0, + "learning_rate": 9.825808520977411e-05, + "loss": 14.3753, + "step": 2681 + }, + { + "epoch": 0.11179192197073903, + "grad_norm": 374.0, + "learning_rate": 9.825631860033225e-05, + "loss": 13.2507, + "step": 2682 + }, + { + "epoch": 0.11183360426826727, + "grad_norm": 193.0, + "learning_rate": 9.825455111141471e-05, + "loss": 10.0629, + "step": 2683 + }, + { + "epoch": 0.11187528656579551, + "grad_norm": 324.0, + "learning_rate": 9.825278274305369e-05, + "loss": 13.7503, + "step": 2684 + }, + { + "epoch": 0.11191696886332375, + "grad_norm": 192.0, + "learning_rate": 9.825101349528144e-05, + "loss": 10.4385, + "step": 2685 + }, + { + "epoch": 0.11195865116085199, + "grad_norm": 294.0, + "learning_rate": 9.824924336813015e-05, + "loss": 12.688, + "step": 2686 + }, + { + "epoch": 0.11200033345838023, + "grad_norm": 133.0, + "learning_rate": 9.824747236163213e-05, + "loss": 8.2505, + "step": 2687 + }, + { + "epoch": 0.11204201575590847, + "grad_norm": 454.0, + "learning_rate": 9.824570047581965e-05, + "loss": 15.5002, + "step": 2688 + }, + { + "epoch": 0.11208369805343671, + "grad_norm": 218.0, + "learning_rate": 9.824392771072499e-05, + "loss": 11.1883, + "step": 2689 + }, + { + "epoch": 0.11212538035096495, + "grad_norm": 1208.0, + "learning_rate": 9.824215406638046e-05, + "loss": 26.5058, + "step": 2690 + }, + { + "epoch": 0.11216706264849319, + "grad_norm": 246.0, + "learning_rate": 9.82403795428184e-05, + "loss": 11.9377, + "step": 2691 + }, + { + "epoch": 0.11220874494602143, + "grad_norm": 404.0, + "learning_rate": 9.823860414007113e-05, + "loss": 15.0006, + "step": 2692 + }, + { + "epoch": 0.11225042724354967, + "grad_norm": 428.0, + "learning_rate": 9.823682785817103e-05, + "loss": 16.2503, + "step": 2693 + }, + { + "epoch": 0.1122921095410779, + "grad_norm": 396.0, + "learning_rate": 9.823505069715047e-05, + "loss": 14.7504, + "step": 2694 + }, + { + "epoch": 0.11233379183860615, + "grad_norm": 402.0, + "learning_rate": 9.823327265704181e-05, + "loss": 14.5011, + "step": 2695 + }, + { + "epoch": 0.11237547413613438, + "grad_norm": 354.0, + "learning_rate": 9.823149373787746e-05, + "loss": 12.6252, + "step": 2696 + }, + { + "epoch": 0.11241715643366262, + "grad_norm": 536.0, + "learning_rate": 9.822971393968988e-05, + "loss": 13.1278, + "step": 2697 + }, + { + "epoch": 0.11245883873119086, + "grad_norm": 306.0, + "learning_rate": 9.822793326251147e-05, + "loss": 11.4379, + "step": 2698 + }, + { + "epoch": 0.1125005210287191, + "grad_norm": 398.0, + "learning_rate": 9.822615170637471e-05, + "loss": 16.1252, + "step": 2699 + }, + { + "epoch": 0.11254220332624734, + "grad_norm": 340.0, + "learning_rate": 9.822436927131204e-05, + "loss": 13.4377, + "step": 2700 + }, + { + "epoch": 0.11258388562377558, + "grad_norm": 488.0, + "learning_rate": 9.822258595735596e-05, + "loss": 17.1256, + "step": 2701 + }, + { + "epoch": 0.11262556792130382, + "grad_norm": 416.0, + "learning_rate": 9.822080176453897e-05, + "loss": 14.8131, + "step": 2702 + }, + { + "epoch": 0.11266725021883206, + "grad_norm": 406.0, + "learning_rate": 9.82190166928936e-05, + "loss": 13.9378, + "step": 2703 + }, + { + "epoch": 0.1127089325163603, + "grad_norm": 258.0, + "learning_rate": 9.821723074245235e-05, + "loss": 11.6877, + "step": 2704 + }, + { + "epoch": 0.11275061481388854, + "grad_norm": 356.0, + "learning_rate": 9.821544391324779e-05, + "loss": 14.5628, + "step": 2705 + }, + { + "epoch": 0.11279229711141678, + "grad_norm": 209.0, + "learning_rate": 9.82136562053125e-05, + "loss": 9.8754, + "step": 2706 + }, + { + "epoch": 0.11283397940894502, + "grad_norm": 382.0, + "learning_rate": 9.821186761867905e-05, + "loss": 14.688, + "step": 2707 + }, + { + "epoch": 0.11287566170647326, + "grad_norm": 186.0, + "learning_rate": 9.821007815338002e-05, + "loss": 12.2505, + "step": 2708 + }, + { + "epoch": 0.1129173440040015, + "grad_norm": 1448.0, + "learning_rate": 9.820828780944805e-05, + "loss": 29.5045, + "step": 2709 + }, + { + "epoch": 0.11295902630152974, + "grad_norm": 235.0, + "learning_rate": 9.820649658691573e-05, + "loss": 11.7502, + "step": 2710 + }, + { + "epoch": 0.11300070859905798, + "grad_norm": 420.0, + "learning_rate": 9.820470448581574e-05, + "loss": 16.1261, + "step": 2711 + }, + { + "epoch": 0.11304239089658621, + "grad_norm": 588.0, + "learning_rate": 9.820291150618073e-05, + "loss": 19.1254, + "step": 2712 + }, + { + "epoch": 0.11308407319411445, + "grad_norm": 253.0, + "learning_rate": 9.820111764804338e-05, + "loss": 10.8754, + "step": 2713 + }, + { + "epoch": 0.1131257554916427, + "grad_norm": 186.0, + "learning_rate": 9.81993229114364e-05, + "loss": 9.5004, + "step": 2714 + }, + { + "epoch": 0.11316743778917093, + "grad_norm": 163.0, + "learning_rate": 9.819752729639247e-05, + "loss": 12.2507, + "step": 2715 + }, + { + "epoch": 0.11320912008669917, + "grad_norm": 310.0, + "learning_rate": 9.819573080294431e-05, + "loss": 13.1879, + "step": 2716 + }, + { + "epoch": 0.11325080238422743, + "grad_norm": 1704.0, + "learning_rate": 9.81939334311247e-05, + "loss": 43.2532, + "step": 2717 + }, + { + "epoch": 0.11329248468175566, + "grad_norm": 442.0, + "learning_rate": 9.819213518096637e-05, + "loss": 16.0031, + "step": 2718 + }, + { + "epoch": 0.1133341669792839, + "grad_norm": 217.0, + "learning_rate": 9.819033605250209e-05, + "loss": 12.3129, + "step": 2719 + }, + { + "epoch": 0.11337584927681214, + "grad_norm": 256.0, + "learning_rate": 9.818853604576465e-05, + "loss": 12.5003, + "step": 2720 + }, + { + "epoch": 0.11341753157434038, + "grad_norm": 528.0, + "learning_rate": 9.818673516078689e-05, + "loss": 16.7507, + "step": 2721 + }, + { + "epoch": 0.11345921387186862, + "grad_norm": 466.0, + "learning_rate": 9.818493339760158e-05, + "loss": 15.8763, + "step": 2722 + }, + { + "epoch": 0.11350089616939686, + "grad_norm": 684.0, + "learning_rate": 9.818313075624159e-05, + "loss": 19.2509, + "step": 2723 + }, + { + "epoch": 0.1135425784669251, + "grad_norm": 239.0, + "learning_rate": 9.818132723673977e-05, + "loss": 10.8754, + "step": 2724 + }, + { + "epoch": 0.11358426076445334, + "grad_norm": 408.0, + "learning_rate": 9.817952283912896e-05, + "loss": 16.2509, + "step": 2725 + }, + { + "epoch": 0.11362594306198158, + "grad_norm": 556.0, + "learning_rate": 9.81777175634421e-05, + "loss": 17.7515, + "step": 2726 + }, + { + "epoch": 0.11366762535950982, + "grad_norm": 161.0, + "learning_rate": 9.817591140971204e-05, + "loss": 9.9377, + "step": 2727 + }, + { + "epoch": 0.11370930765703806, + "grad_norm": 246.0, + "learning_rate": 9.817410437797172e-05, + "loss": 11.9378, + "step": 2728 + }, + { + "epoch": 0.1137509899545663, + "grad_norm": 400.0, + "learning_rate": 9.817229646825407e-05, + "loss": 14.9381, + "step": 2729 + }, + { + "epoch": 0.11379267225209454, + "grad_norm": 302.0, + "learning_rate": 9.817048768059207e-05, + "loss": 8.4393, + "step": 2730 + }, + { + "epoch": 0.11383435454962278, + "grad_norm": 336.0, + "learning_rate": 9.816867801501863e-05, + "loss": 12.691, + "step": 2731 + }, + { + "epoch": 0.11387603684715102, + "grad_norm": 724.0, + "learning_rate": 9.816686747156676e-05, + "loss": 17.63, + "step": 2732 + }, + { + "epoch": 0.11391771914467926, + "grad_norm": 282.0, + "learning_rate": 9.816505605026944e-05, + "loss": 13.1881, + "step": 2733 + }, + { + "epoch": 0.1139594014422075, + "grad_norm": 402.0, + "learning_rate": 9.816324375115973e-05, + "loss": 16.3755, + "step": 2734 + }, + { + "epoch": 0.11400108373973573, + "grad_norm": 247.0, + "learning_rate": 9.816143057427061e-05, + "loss": 11.3756, + "step": 2735 + }, + { + "epoch": 0.11404276603726397, + "grad_norm": 288.0, + "learning_rate": 9.815961651963513e-05, + "loss": 12.8129, + "step": 2736 + }, + { + "epoch": 0.11408444833479221, + "grad_norm": 272.0, + "learning_rate": 9.815780158728638e-05, + "loss": 12.2503, + "step": 2737 + }, + { + "epoch": 0.11412613063232045, + "grad_norm": 274.0, + "learning_rate": 9.815598577725741e-05, + "loss": 13.9383, + "step": 2738 + }, + { + "epoch": 0.11416781292984869, + "grad_norm": 215.0, + "learning_rate": 9.815416908958132e-05, + "loss": 11.1254, + "step": 2739 + }, + { + "epoch": 0.11420949522737693, + "grad_norm": 298.0, + "learning_rate": 9.815235152429125e-05, + "loss": 14.3132, + "step": 2740 + }, + { + "epoch": 0.11425117752490517, + "grad_norm": 134.0, + "learning_rate": 9.815053308142029e-05, + "loss": 9.6259, + "step": 2741 + }, + { + "epoch": 0.11429285982243341, + "grad_norm": 398.0, + "learning_rate": 9.814871376100158e-05, + "loss": 14.5627, + "step": 2742 + }, + { + "epoch": 0.11433454211996165, + "grad_norm": 304.0, + "learning_rate": 9.814689356306828e-05, + "loss": 12.1253, + "step": 2743 + }, + { + "epoch": 0.11437622441748989, + "grad_norm": 1088.0, + "learning_rate": 9.814507248765359e-05, + "loss": 31.7522, + "step": 2744 + }, + { + "epoch": 0.11441790671501813, + "grad_norm": 438.0, + "learning_rate": 9.814325053479067e-05, + "loss": 16.2503, + "step": 2745 + }, + { + "epoch": 0.11445958901254637, + "grad_norm": 80.0, + "learning_rate": 9.814142770451274e-05, + "loss": 8.3753, + "step": 2746 + }, + { + "epoch": 0.1145012713100746, + "grad_norm": 572.0, + "learning_rate": 9.8139603996853e-05, + "loss": 18.2509, + "step": 2747 + }, + { + "epoch": 0.11454295360760285, + "grad_norm": 540.0, + "learning_rate": 9.813777941184472e-05, + "loss": 17.6256, + "step": 2748 + }, + { + "epoch": 0.11458463590513109, + "grad_norm": 248.0, + "learning_rate": 9.813595394952114e-05, + "loss": 12.0005, + "step": 2749 + }, + { + "epoch": 0.11462631820265932, + "grad_norm": 312.0, + "learning_rate": 9.813412760991552e-05, + "loss": 12.689, + "step": 2750 + }, + { + "epoch": 0.11466800050018756, + "grad_norm": 502.0, + "learning_rate": 9.813230039306114e-05, + "loss": 17.0008, + "step": 2751 + }, + { + "epoch": 0.1147096827977158, + "grad_norm": 274.0, + "learning_rate": 9.813047229899132e-05, + "loss": 12.2503, + "step": 2752 + }, + { + "epoch": 0.11475136509524406, + "grad_norm": 191.0, + "learning_rate": 9.81286433277394e-05, + "loss": 10.0627, + "step": 2753 + }, + { + "epoch": 0.1147930473927723, + "grad_norm": 292.0, + "learning_rate": 9.812681347933863e-05, + "loss": 13.0637, + "step": 2754 + }, + { + "epoch": 0.11483472969030054, + "grad_norm": 448.0, + "learning_rate": 9.812498275382245e-05, + "loss": 15.3128, + "step": 2755 + }, + { + "epoch": 0.11487641198782877, + "grad_norm": 180.0, + "learning_rate": 9.812315115122417e-05, + "loss": 9.8756, + "step": 2756 + }, + { + "epoch": 0.11491809428535701, + "grad_norm": 416.0, + "learning_rate": 9.81213186715772e-05, + "loss": 15.8758, + "step": 2757 + }, + { + "epoch": 0.11495977658288525, + "grad_norm": 360.0, + "learning_rate": 9.811948531491491e-05, + "loss": 14.5002, + "step": 2758 + }, + { + "epoch": 0.11500145888041349, + "grad_norm": 220.0, + "learning_rate": 9.811765108127073e-05, + "loss": 10.8754, + "step": 2759 + }, + { + "epoch": 0.11504314117794173, + "grad_norm": 290.0, + "learning_rate": 9.81158159706781e-05, + "loss": 12.8127, + "step": 2760 + }, + { + "epoch": 0.11508482347546997, + "grad_norm": 330.0, + "learning_rate": 9.811397998317045e-05, + "loss": 13.8755, + "step": 2761 + }, + { + "epoch": 0.11512650577299821, + "grad_norm": 728.0, + "learning_rate": 9.811214311878124e-05, + "loss": 21.2503, + "step": 2762 + }, + { + "epoch": 0.11516818807052645, + "grad_norm": 404.0, + "learning_rate": 9.811030537754395e-05, + "loss": 16.3757, + "step": 2763 + }, + { + "epoch": 0.11520987036805469, + "grad_norm": 167.0, + "learning_rate": 9.810846675949208e-05, + "loss": 9.5628, + "step": 2764 + }, + { + "epoch": 0.11525155266558293, + "grad_norm": 120.5, + "learning_rate": 9.810662726465913e-05, + "loss": 9.5011, + "step": 2765 + }, + { + "epoch": 0.11529323496311117, + "grad_norm": 272.0, + "learning_rate": 9.810478689307863e-05, + "loss": 12.2503, + "step": 2766 + }, + { + "epoch": 0.11533491726063941, + "grad_norm": 568.0, + "learning_rate": 9.810294564478411e-05, + "loss": 19.1257, + "step": 2767 + }, + { + "epoch": 0.11537659955816765, + "grad_norm": 158.0, + "learning_rate": 9.810110351980914e-05, + "loss": 8.7506, + "step": 2768 + }, + { + "epoch": 0.11541828185569589, + "grad_norm": 233.0, + "learning_rate": 9.809926051818727e-05, + "loss": 12.3129, + "step": 2769 + }, + { + "epoch": 0.11545996415322413, + "grad_norm": 316.0, + "learning_rate": 9.809741663995213e-05, + "loss": 14.1256, + "step": 2770 + }, + { + "epoch": 0.11550164645075237, + "grad_norm": 302.0, + "learning_rate": 9.809557188513731e-05, + "loss": 13.1886, + "step": 2771 + }, + { + "epoch": 0.1155433287482806, + "grad_norm": 320.0, + "learning_rate": 9.80937262537764e-05, + "loss": 13.6256, + "step": 2772 + }, + { + "epoch": 0.11558501104580884, + "grad_norm": 348.0, + "learning_rate": 9.809187974590307e-05, + "loss": 11.5632, + "step": 2773 + }, + { + "epoch": 0.11562669334333708, + "grad_norm": 498.0, + "learning_rate": 9.809003236155097e-05, + "loss": 16.6253, + "step": 2774 + }, + { + "epoch": 0.11566837564086532, + "grad_norm": 406.0, + "learning_rate": 9.808818410075374e-05, + "loss": 15.8752, + "step": 2775 + }, + { + "epoch": 0.11571005793839356, + "grad_norm": 178.0, + "learning_rate": 9.80863349635451e-05, + "loss": 9.501, + "step": 2776 + }, + { + "epoch": 0.1157517402359218, + "grad_norm": 284.0, + "learning_rate": 9.808448494995875e-05, + "loss": 13.5633, + "step": 2777 + }, + { + "epoch": 0.11579342253345004, + "grad_norm": 476.0, + "learning_rate": 9.808263406002837e-05, + "loss": 15.5635, + "step": 2778 + }, + { + "epoch": 0.11583510483097828, + "grad_norm": 520.0, + "learning_rate": 9.808078229378771e-05, + "loss": 16.5003, + "step": 2779 + }, + { + "epoch": 0.11587678712850652, + "grad_norm": 145.0, + "learning_rate": 9.807892965127055e-05, + "loss": 9.8131, + "step": 2780 + }, + { + "epoch": 0.11591846942603476, + "grad_norm": 988.0, + "learning_rate": 9.807707613251062e-05, + "loss": 21.38, + "step": 2781 + }, + { + "epoch": 0.115960151723563, + "grad_norm": 450.0, + "learning_rate": 9.80752217375417e-05, + "loss": 15.1264, + "step": 2782 + }, + { + "epoch": 0.11600183402109124, + "grad_norm": 178.0, + "learning_rate": 9.80733664663976e-05, + "loss": 10.6878, + "step": 2783 + }, + { + "epoch": 0.11604351631861948, + "grad_norm": 322.0, + "learning_rate": 9.807151031911214e-05, + "loss": 13.8137, + "step": 2784 + }, + { + "epoch": 0.11608519861614772, + "grad_norm": 360.0, + "learning_rate": 9.806965329571912e-05, + "loss": 14.6255, + "step": 2785 + }, + { + "epoch": 0.11612688091367596, + "grad_norm": 258.0, + "learning_rate": 9.806779539625241e-05, + "loss": 11.631, + "step": 2786 + }, + { + "epoch": 0.1161685632112042, + "grad_norm": 536.0, + "learning_rate": 9.806593662074586e-05, + "loss": 19.1257, + "step": 2787 + }, + { + "epoch": 0.11621024550873243, + "grad_norm": 211.0, + "learning_rate": 9.806407696923336e-05, + "loss": 8.5003, + "step": 2788 + }, + { + "epoch": 0.11625192780626067, + "grad_norm": 892.0, + "learning_rate": 9.806221644174877e-05, + "loss": 23.2503, + "step": 2789 + }, + { + "epoch": 0.11629361010378893, + "grad_norm": 256.0, + "learning_rate": 9.806035503832603e-05, + "loss": 12.0627, + "step": 2790 + }, + { + "epoch": 0.11633529240131717, + "grad_norm": 408.0, + "learning_rate": 9.805849275899905e-05, + "loss": 14.8754, + "step": 2791 + }, + { + "epoch": 0.1163769746988454, + "grad_norm": 177.0, + "learning_rate": 9.805662960380178e-05, + "loss": 10.8129, + "step": 2792 + }, + { + "epoch": 0.11641865699637365, + "grad_norm": 772.0, + "learning_rate": 9.805476557276816e-05, + "loss": 22.6252, + "step": 2793 + }, + { + "epoch": 0.11646033929390188, + "grad_norm": 260.0, + "learning_rate": 9.805290066593218e-05, + "loss": 11.813, + "step": 2794 + }, + { + "epoch": 0.11650202159143012, + "grad_norm": 268.0, + "learning_rate": 9.805103488332782e-05, + "loss": 12.5005, + "step": 2795 + }, + { + "epoch": 0.11654370388895836, + "grad_norm": 788.0, + "learning_rate": 9.804916822498908e-05, + "loss": 24.1253, + "step": 2796 + }, + { + "epoch": 0.1165853861864866, + "grad_norm": 328.0, + "learning_rate": 9.804730069094998e-05, + "loss": 13.8752, + "step": 2797 + }, + { + "epoch": 0.11662706848401484, + "grad_norm": 516.0, + "learning_rate": 9.804543228124456e-05, + "loss": 18.0009, + "step": 2798 + }, + { + "epoch": 0.11666875078154308, + "grad_norm": 222.0, + "learning_rate": 9.804356299590688e-05, + "loss": 11.6268, + "step": 2799 + }, + { + "epoch": 0.11671043307907132, + "grad_norm": 432.0, + "learning_rate": 9.804169283497099e-05, + "loss": 15.6881, + "step": 2800 + }, + { + "epoch": 0.11675211537659956, + "grad_norm": 1224.0, + "learning_rate": 9.803982179847099e-05, + "loss": 24.8795, + "step": 2801 + }, + { + "epoch": 0.1167937976741278, + "grad_norm": 500.0, + "learning_rate": 9.803794988644097e-05, + "loss": 16.5002, + "step": 2802 + }, + { + "epoch": 0.11683547997165604, + "grad_norm": 177.0, + "learning_rate": 9.803607709891504e-05, + "loss": 12.0633, + "step": 2803 + }, + { + "epoch": 0.11687716226918428, + "grad_norm": 179.0, + "learning_rate": 9.803420343592736e-05, + "loss": 7.6278, + "step": 2804 + }, + { + "epoch": 0.11691884456671252, + "grad_norm": 408.0, + "learning_rate": 9.803232889751203e-05, + "loss": 14.2504, + "step": 2805 + }, + { + "epoch": 0.11696052686424076, + "grad_norm": 314.0, + "learning_rate": 9.803045348370327e-05, + "loss": 13.2505, + "step": 2806 + }, + { + "epoch": 0.117002209161769, + "grad_norm": 304.0, + "learning_rate": 9.802857719453523e-05, + "loss": 12.3754, + "step": 2807 + }, + { + "epoch": 0.11704389145929724, + "grad_norm": 972.0, + "learning_rate": 9.802670003004208e-05, + "loss": 24.8803, + "step": 2808 + }, + { + "epoch": 0.11708557375682548, + "grad_norm": 612.0, + "learning_rate": 9.802482199025808e-05, + "loss": 18.8756, + "step": 2809 + }, + { + "epoch": 0.11712725605435372, + "grad_norm": 159.0, + "learning_rate": 9.802294307521744e-05, + "loss": 9.2504, + "step": 2810 + }, + { + "epoch": 0.11716893835188195, + "grad_norm": 356.0, + "learning_rate": 9.80210632849544e-05, + "loss": 12.6896, + "step": 2811 + }, + { + "epoch": 0.1172106206494102, + "grad_norm": 492.0, + "learning_rate": 9.80191826195032e-05, + "loss": 16.8753, + "step": 2812 + }, + { + "epoch": 0.11725230294693843, + "grad_norm": 632.0, + "learning_rate": 9.801730107889815e-05, + "loss": 19.3753, + "step": 2813 + }, + { + "epoch": 0.11729398524446667, + "grad_norm": 266.0, + "learning_rate": 9.801541866317352e-05, + "loss": 13.4378, + "step": 2814 + }, + { + "epoch": 0.11733566754199491, + "grad_norm": 330.0, + "learning_rate": 9.801353537236361e-05, + "loss": 13.8755, + "step": 2815 + }, + { + "epoch": 0.11737734983952315, + "grad_norm": 250.0, + "learning_rate": 9.801165120650278e-05, + "loss": 11.3133, + "step": 2816 + }, + { + "epoch": 0.11741903213705139, + "grad_norm": 264.0, + "learning_rate": 9.800976616562533e-05, + "loss": 11.2503, + "step": 2817 + }, + { + "epoch": 0.11746071443457963, + "grad_norm": 270.0, + "learning_rate": 9.800788024976564e-05, + "loss": 12.3753, + "step": 2818 + }, + { + "epoch": 0.11750239673210787, + "grad_norm": 350.0, + "learning_rate": 9.800599345895805e-05, + "loss": 12.5007, + "step": 2819 + }, + { + "epoch": 0.11754407902963611, + "grad_norm": 612.0, + "learning_rate": 9.800410579323698e-05, + "loss": 19.3791, + "step": 2820 + }, + { + "epoch": 0.11758576132716435, + "grad_norm": 239.0, + "learning_rate": 9.800221725263683e-05, + "loss": 11.3758, + "step": 2821 + }, + { + "epoch": 0.11762744362469259, + "grad_norm": 155.0, + "learning_rate": 9.8000327837192e-05, + "loss": 4.1879, + "step": 2822 + }, + { + "epoch": 0.11766912592222083, + "grad_norm": 229.0, + "learning_rate": 9.799843754693693e-05, + "loss": 12.252, + "step": 2823 + }, + { + "epoch": 0.11771080821974907, + "grad_norm": 940.0, + "learning_rate": 9.799654638190607e-05, + "loss": 21.8805, + "step": 2824 + }, + { + "epoch": 0.1177524905172773, + "grad_norm": 384.0, + "learning_rate": 9.799465434213391e-05, + "loss": 14.6892, + "step": 2825 + }, + { + "epoch": 0.11779417281480556, + "grad_norm": 528.0, + "learning_rate": 9.79927614276549e-05, + "loss": 16.8769, + "step": 2826 + }, + { + "epoch": 0.1178358551123338, + "grad_norm": 394.0, + "learning_rate": 9.799086763850355e-05, + "loss": 11.6263, + "step": 2827 + }, + { + "epoch": 0.11787753740986204, + "grad_norm": 398.0, + "learning_rate": 9.798897297471439e-05, + "loss": 13.876, + "step": 2828 + }, + { + "epoch": 0.11791921970739028, + "grad_norm": 346.0, + "learning_rate": 9.798707743632194e-05, + "loss": 14.4385, + "step": 2829 + }, + { + "epoch": 0.11796090200491852, + "grad_norm": 644.0, + "learning_rate": 9.798518102336073e-05, + "loss": 19.3764, + "step": 2830 + }, + { + "epoch": 0.11800258430244676, + "grad_norm": 292.0, + "learning_rate": 9.798328373586534e-05, + "loss": 13.1252, + "step": 2831 + }, + { + "epoch": 0.118044266599975, + "grad_norm": 224.0, + "learning_rate": 9.798138557387034e-05, + "loss": 11.4379, + "step": 2832 + }, + { + "epoch": 0.11808594889750323, + "grad_norm": 560.0, + "learning_rate": 9.797948653741034e-05, + "loss": 18.6252, + "step": 2833 + }, + { + "epoch": 0.11812763119503147, + "grad_norm": 660.0, + "learning_rate": 9.797758662651992e-05, + "loss": 17.3758, + "step": 2834 + }, + { + "epoch": 0.11816931349255971, + "grad_norm": 386.0, + "learning_rate": 9.797568584123375e-05, + "loss": 14.6261, + "step": 2835 + }, + { + "epoch": 0.11821099579008795, + "grad_norm": 368.0, + "learning_rate": 9.797378418158643e-05, + "loss": 14.0012, + "step": 2836 + }, + { + "epoch": 0.11825267808761619, + "grad_norm": 620.0, + "learning_rate": 9.797188164761264e-05, + "loss": 19.2508, + "step": 2837 + }, + { + "epoch": 0.11829436038514443, + "grad_norm": 209.0, + "learning_rate": 9.796997823934704e-05, + "loss": 11.751, + "step": 2838 + }, + { + "epoch": 0.11833604268267267, + "grad_norm": 121.5, + "learning_rate": 9.796807395682434e-05, + "loss": 8.1882, + "step": 2839 + }, + { + "epoch": 0.11837772498020091, + "grad_norm": 266.0, + "learning_rate": 9.796616880007922e-05, + "loss": 11.6254, + "step": 2840 + }, + { + "epoch": 0.11841940727772915, + "grad_norm": 756.0, + "learning_rate": 9.796426276914643e-05, + "loss": 21.7506, + "step": 2841 + }, + { + "epoch": 0.11846108957525739, + "grad_norm": 324.0, + "learning_rate": 9.796235586406068e-05, + "loss": 13.6254, + "step": 2842 + }, + { + "epoch": 0.11850277187278563, + "grad_norm": 1480.0, + "learning_rate": 9.796044808485677e-05, + "loss": 27.3802, + "step": 2843 + }, + { + "epoch": 0.11854445417031387, + "grad_norm": 138.0, + "learning_rate": 9.79585394315694e-05, + "loss": 7.094, + "step": 2844 + }, + { + "epoch": 0.11858613646784211, + "grad_norm": 876.0, + "learning_rate": 9.79566299042334e-05, + "loss": 26.0013, + "step": 2845 + }, + { + "epoch": 0.11862781876537035, + "grad_norm": 141.0, + "learning_rate": 9.795471950288355e-05, + "loss": 8.8755, + "step": 2846 + }, + { + "epoch": 0.11866950106289859, + "grad_norm": 247.0, + "learning_rate": 9.795280822755471e-05, + "loss": 12.5628, + "step": 2847 + }, + { + "epoch": 0.11871118336042683, + "grad_norm": 278.0, + "learning_rate": 9.795089607828167e-05, + "loss": 12.7503, + "step": 2848 + }, + { + "epoch": 0.11875286565795506, + "grad_norm": 444.0, + "learning_rate": 9.794898305509927e-05, + "loss": 14.688, + "step": 2849 + }, + { + "epoch": 0.1187945479554833, + "grad_norm": 320.0, + "learning_rate": 9.794706915804243e-05, + "loss": 13.5012, + "step": 2850 + }, + { + "epoch": 0.11883623025301154, + "grad_norm": 100.0, + "learning_rate": 9.794515438714598e-05, + "loss": 8.4384, + "step": 2851 + }, + { + "epoch": 0.11887791255053978, + "grad_norm": 366.0, + "learning_rate": 9.794323874244485e-05, + "loss": 14.5634, + "step": 2852 + }, + { + "epoch": 0.11891959484806802, + "grad_norm": 438.0, + "learning_rate": 9.794132222397392e-05, + "loss": 16.376, + "step": 2853 + }, + { + "epoch": 0.11896127714559626, + "grad_norm": 224.0, + "learning_rate": 9.793940483176815e-05, + "loss": 12.0628, + "step": 2854 + }, + { + "epoch": 0.1190029594431245, + "grad_norm": 238.0, + "learning_rate": 9.793748656586245e-05, + "loss": 7.6889, + "step": 2855 + }, + { + "epoch": 0.11904464174065274, + "grad_norm": 464.0, + "learning_rate": 9.793556742629183e-05, + "loss": 15.8777, + "step": 2856 + }, + { + "epoch": 0.11908632403818098, + "grad_norm": 454.0, + "learning_rate": 9.793364741309122e-05, + "loss": 17.1257, + "step": 2857 + }, + { + "epoch": 0.11912800633570922, + "grad_norm": 171.0, + "learning_rate": 9.793172652629564e-05, + "loss": 9.6894, + "step": 2858 + }, + { + "epoch": 0.11916968863323746, + "grad_norm": 322.0, + "learning_rate": 9.792980476594009e-05, + "loss": 13.1255, + "step": 2859 + }, + { + "epoch": 0.1192113709307657, + "grad_norm": 334.0, + "learning_rate": 9.792788213205959e-05, + "loss": 13.939, + "step": 2860 + }, + { + "epoch": 0.11925305322829394, + "grad_norm": 217.0, + "learning_rate": 9.792595862468919e-05, + "loss": 10.6882, + "step": 2861 + }, + { + "epoch": 0.11929473552582218, + "grad_norm": 424.0, + "learning_rate": 9.792403424386392e-05, + "loss": 15.8134, + "step": 2862 + }, + { + "epoch": 0.11933641782335043, + "grad_norm": 162.0, + "learning_rate": 9.792210898961889e-05, + "loss": 10.0641, + "step": 2863 + }, + { + "epoch": 0.11937810012087867, + "grad_norm": 132.0, + "learning_rate": 9.792018286198917e-05, + "loss": 7.1573, + "step": 2864 + }, + { + "epoch": 0.11941978241840691, + "grad_norm": 360.0, + "learning_rate": 9.791825586100985e-05, + "loss": 14.6253, + "step": 2865 + }, + { + "epoch": 0.11946146471593515, + "grad_norm": 71.0, + "learning_rate": 9.791632798671606e-05, + "loss": 7.7504, + "step": 2866 + }, + { + "epoch": 0.11950314701346339, + "grad_norm": 282.0, + "learning_rate": 9.791439923914295e-05, + "loss": 13.4378, + "step": 2867 + }, + { + "epoch": 0.11954482931099163, + "grad_norm": 640.0, + "learning_rate": 9.791246961832565e-05, + "loss": 17.1303, + "step": 2868 + }, + { + "epoch": 0.11958651160851987, + "grad_norm": 225.0, + "learning_rate": 9.791053912429935e-05, + "loss": 11.1883, + "step": 2869 + }, + { + "epoch": 0.1196281939060481, + "grad_norm": 466.0, + "learning_rate": 9.790860775709923e-05, + "loss": 16.0005, + "step": 2870 + }, + { + "epoch": 0.11966987620357634, + "grad_norm": 1216.0, + "learning_rate": 9.790667551676046e-05, + "loss": 26.6323, + "step": 2871 + }, + { + "epoch": 0.11971155850110458, + "grad_norm": 1128.0, + "learning_rate": 9.790474240331828e-05, + "loss": 24.0044, + "step": 2872 + }, + { + "epoch": 0.11975324079863282, + "grad_norm": 231.0, + "learning_rate": 9.790280841680793e-05, + "loss": 11.689, + "step": 2873 + }, + { + "epoch": 0.11979492309616106, + "grad_norm": 141.0, + "learning_rate": 9.790087355726463e-05, + "loss": 8.4377, + "step": 2874 + }, + { + "epoch": 0.1198366053936893, + "grad_norm": 101.5, + "learning_rate": 9.789893782472367e-05, + "loss": 8.6877, + "step": 2875 + }, + { + "epoch": 0.11987828769121754, + "grad_norm": 760.0, + "learning_rate": 9.789700121922031e-05, + "loss": 26.2503, + "step": 2876 + }, + { + "epoch": 0.11991996998874578, + "grad_norm": 388.0, + "learning_rate": 9.789506374078985e-05, + "loss": 13.6257, + "step": 2877 + }, + { + "epoch": 0.11996165228627402, + "grad_norm": 139.0, + "learning_rate": 9.78931253894676e-05, + "loss": 9.2502, + "step": 2878 + }, + { + "epoch": 0.12000333458380226, + "grad_norm": 1416.0, + "learning_rate": 9.789118616528889e-05, + "loss": 29.0035, + "step": 2879 + }, + { + "epoch": 0.1200450168813305, + "grad_norm": 644.0, + "learning_rate": 9.788924606828905e-05, + "loss": 18.3757, + "step": 2880 + }, + { + "epoch": 0.12008669917885874, + "grad_norm": 486.0, + "learning_rate": 9.788730509850346e-05, + "loss": 16.2525, + "step": 2881 + }, + { + "epoch": 0.12012838147638698, + "grad_norm": 588.0, + "learning_rate": 9.788536325596749e-05, + "loss": 19.1254, + "step": 2882 + }, + { + "epoch": 0.12017006377391522, + "grad_norm": 229.0, + "learning_rate": 9.78834205407165e-05, + "loss": 12.0627, + "step": 2883 + }, + { + "epoch": 0.12021174607144346, + "grad_norm": 101.0, + "learning_rate": 9.788147695278596e-05, + "loss": 9.0627, + "step": 2884 + }, + { + "epoch": 0.1202534283689717, + "grad_norm": 300.0, + "learning_rate": 9.787953249221123e-05, + "loss": 13.6257, + "step": 2885 + }, + { + "epoch": 0.12029511066649994, + "grad_norm": 696.0, + "learning_rate": 9.787758715902775e-05, + "loss": 20.5006, + "step": 2886 + }, + { + "epoch": 0.12033679296402817, + "grad_norm": 330.0, + "learning_rate": 9.787564095327102e-05, + "loss": 14.3754, + "step": 2887 + }, + { + "epoch": 0.12037847526155641, + "grad_norm": 776.0, + "learning_rate": 9.787369387497647e-05, + "loss": 21.1254, + "step": 2888 + }, + { + "epoch": 0.12042015755908465, + "grad_norm": 146.0, + "learning_rate": 9.78717459241796e-05, + "loss": 9.8137, + "step": 2889 + }, + { + "epoch": 0.12046183985661289, + "grad_norm": 300.0, + "learning_rate": 9.786979710091593e-05, + "loss": 13.1877, + "step": 2890 + }, + { + "epoch": 0.12050352215414113, + "grad_norm": 400.0, + "learning_rate": 9.786784740522095e-05, + "loss": 14.2506, + "step": 2891 + }, + { + "epoch": 0.12054520445166937, + "grad_norm": 302.0, + "learning_rate": 9.78658968371302e-05, + "loss": 11.5021, + "step": 2892 + }, + { + "epoch": 0.12058688674919761, + "grad_norm": 286.0, + "learning_rate": 9.786394539667922e-05, + "loss": 14.1257, + "step": 2893 + }, + { + "epoch": 0.12062856904672585, + "grad_norm": 388.0, + "learning_rate": 9.786199308390358e-05, + "loss": 15.5635, + "step": 2894 + }, + { + "epoch": 0.12067025134425409, + "grad_norm": 77.5, + "learning_rate": 9.786003989883889e-05, + "loss": 7.8127, + "step": 2895 + }, + { + "epoch": 0.12071193364178233, + "grad_norm": 390.0, + "learning_rate": 9.785808584152071e-05, + "loss": 14.3128, + "step": 2896 + }, + { + "epoch": 0.12075361593931057, + "grad_norm": 238.0, + "learning_rate": 9.785613091198467e-05, + "loss": 11.1878, + "step": 2897 + }, + { + "epoch": 0.12079529823683881, + "grad_norm": 158.0, + "learning_rate": 9.78541751102664e-05, + "loss": 9.3128, + "step": 2898 + }, + { + "epoch": 0.12083698053436706, + "grad_norm": 244.0, + "learning_rate": 9.785221843640153e-05, + "loss": 11.688, + "step": 2899 + }, + { + "epoch": 0.1208786628318953, + "grad_norm": 384.0, + "learning_rate": 9.785026089042575e-05, + "loss": 14.4378, + "step": 2900 + }, + { + "epoch": 0.12092034512942354, + "grad_norm": 460.0, + "learning_rate": 9.784830247237469e-05, + "loss": 17.6269, + "step": 2901 + }, + { + "epoch": 0.12096202742695178, + "grad_norm": 197.0, + "learning_rate": 9.784634318228409e-05, + "loss": 10.8754, + "step": 2902 + }, + { + "epoch": 0.12100370972448002, + "grad_norm": 458.0, + "learning_rate": 9.784438302018963e-05, + "loss": 16.126, + "step": 2903 + }, + { + "epoch": 0.12104539202200826, + "grad_norm": 336.0, + "learning_rate": 9.784242198612705e-05, + "loss": 10.7514, + "step": 2904 + }, + { + "epoch": 0.1210870743195365, + "grad_norm": 294.0, + "learning_rate": 9.784046008013208e-05, + "loss": 13.3752, + "step": 2905 + }, + { + "epoch": 0.12112875661706474, + "grad_norm": 344.0, + "learning_rate": 9.783849730224048e-05, + "loss": 12.7521, + "step": 2906 + }, + { + "epoch": 0.12117043891459298, + "grad_norm": 540.0, + "learning_rate": 9.783653365248802e-05, + "loss": 18.0005, + "step": 2907 + }, + { + "epoch": 0.12121212121212122, + "grad_norm": 318.0, + "learning_rate": 9.783456913091048e-05, + "loss": 13.6878, + "step": 2908 + }, + { + "epoch": 0.12125380350964945, + "grad_norm": 64.5, + "learning_rate": 9.783260373754368e-05, + "loss": 6.8445, + "step": 2909 + }, + { + "epoch": 0.1212954858071777, + "grad_norm": 256.0, + "learning_rate": 9.783063747242343e-05, + "loss": 13.0042, + "step": 2910 + }, + { + "epoch": 0.12133716810470593, + "grad_norm": 510.0, + "learning_rate": 9.782867033558556e-05, + "loss": 19.6255, + "step": 2911 + }, + { + "epoch": 0.12137885040223417, + "grad_norm": 402.0, + "learning_rate": 9.782670232706592e-05, + "loss": 15.0003, + "step": 2912 + }, + { + "epoch": 0.12142053269976241, + "grad_norm": 468.0, + "learning_rate": 9.78247334469004e-05, + "loss": 15.6297, + "step": 2913 + }, + { + "epoch": 0.12146221499729065, + "grad_norm": 384.0, + "learning_rate": 9.782276369512487e-05, + "loss": 14.5024, + "step": 2914 + }, + { + "epoch": 0.12150389729481889, + "grad_norm": 322.0, + "learning_rate": 9.782079307177521e-05, + "loss": 13.3758, + "step": 2915 + }, + { + "epoch": 0.12154557959234713, + "grad_norm": 410.0, + "learning_rate": 9.781882157688735e-05, + "loss": 15.6264, + "step": 2916 + }, + { + "epoch": 0.12158726188987537, + "grad_norm": 256.0, + "learning_rate": 9.781684921049722e-05, + "loss": 11.688, + "step": 2917 + }, + { + "epoch": 0.12162894418740361, + "grad_norm": 237.0, + "learning_rate": 9.781487597264079e-05, + "loss": 12.6261, + "step": 2918 + }, + { + "epoch": 0.12167062648493185, + "grad_norm": 326.0, + "learning_rate": 9.7812901863354e-05, + "loss": 13.688, + "step": 2919 + }, + { + "epoch": 0.12171230878246009, + "grad_norm": 532.0, + "learning_rate": 9.781092688267281e-05, + "loss": 18.3755, + "step": 2920 + }, + { + "epoch": 0.12175399107998833, + "grad_norm": 410.0, + "learning_rate": 9.780895103063323e-05, + "loss": 14.6254, + "step": 2921 + }, + { + "epoch": 0.12179567337751657, + "grad_norm": 255.0, + "learning_rate": 9.780697430727129e-05, + "loss": 9.1882, + "step": 2922 + }, + { + "epoch": 0.1218373556750448, + "grad_norm": 139.0, + "learning_rate": 9.7804996712623e-05, + "loss": 9.4379, + "step": 2923 + }, + { + "epoch": 0.12187903797257305, + "grad_norm": 260.0, + "learning_rate": 9.78030182467244e-05, + "loss": 9.5637, + "step": 2924 + }, + { + "epoch": 0.12192072027010128, + "grad_norm": 748.0, + "learning_rate": 9.780103890961154e-05, + "loss": 21.5004, + "step": 2925 + }, + { + "epoch": 0.12196240256762952, + "grad_norm": 302.0, + "learning_rate": 9.779905870132051e-05, + "loss": 12.6254, + "step": 2926 + }, + { + "epoch": 0.12200408486515776, + "grad_norm": 556.0, + "learning_rate": 9.779707762188739e-05, + "loss": 17.377, + "step": 2927 + }, + { + "epoch": 0.122045767162686, + "grad_norm": 676.0, + "learning_rate": 9.779509567134828e-05, + "loss": 20.2515, + "step": 2928 + }, + { + "epoch": 0.12208744946021424, + "grad_norm": 166.0, + "learning_rate": 9.779311284973931e-05, + "loss": 9.1257, + "step": 2929 + }, + { + "epoch": 0.12212913175774248, + "grad_norm": 320.0, + "learning_rate": 9.779112915709662e-05, + "loss": 13.0011, + "step": 2930 + }, + { + "epoch": 0.12217081405527072, + "grad_norm": 414.0, + "learning_rate": 9.778914459345636e-05, + "loss": 15.6269, + "step": 2931 + }, + { + "epoch": 0.12221249635279896, + "grad_norm": 118.0, + "learning_rate": 9.77871591588547e-05, + "loss": 9.001, + "step": 2932 + }, + { + "epoch": 0.1222541786503272, + "grad_norm": 408.0, + "learning_rate": 9.778517285332783e-05, + "loss": 14.5631, + "step": 2933 + }, + { + "epoch": 0.12229586094785544, + "grad_norm": 374.0, + "learning_rate": 9.778318567691191e-05, + "loss": 13.7518, + "step": 2934 + }, + { + "epoch": 0.12233754324538368, + "grad_norm": 784.0, + "learning_rate": 9.778119762964322e-05, + "loss": 21.7509, + "step": 2935 + }, + { + "epoch": 0.12237922554291193, + "grad_norm": 366.0, + "learning_rate": 9.777920871155795e-05, + "loss": 15.3161, + "step": 2936 + }, + { + "epoch": 0.12242090784044017, + "grad_norm": 580.0, + "learning_rate": 9.777721892269236e-05, + "loss": 16.6297, + "step": 2937 + }, + { + "epoch": 0.12246259013796841, + "grad_norm": 364.0, + "learning_rate": 9.777522826308272e-05, + "loss": 14.0628, + "step": 2938 + }, + { + "epoch": 0.12250427243549665, + "grad_norm": 227.0, + "learning_rate": 9.777323673276528e-05, + "loss": 10.9378, + "step": 2939 + }, + { + "epoch": 0.12254595473302489, + "grad_norm": 304.0, + "learning_rate": 9.777124433177639e-05, + "loss": 12.6877, + "step": 2940 + }, + { + "epoch": 0.12258763703055313, + "grad_norm": 496.0, + "learning_rate": 9.776925106015231e-05, + "loss": 16.1255, + "step": 2941 + }, + { + "epoch": 0.12262931932808137, + "grad_norm": 812.0, + "learning_rate": 9.776725691792941e-05, + "loss": 21.7554, + "step": 2942 + }, + { + "epoch": 0.12267100162560961, + "grad_norm": 328.0, + "learning_rate": 9.776526190514399e-05, + "loss": 13.9379, + "step": 2943 + }, + { + "epoch": 0.12271268392313785, + "grad_norm": 278.0, + "learning_rate": 9.776326602183246e-05, + "loss": 13.5014, + "step": 2944 + }, + { + "epoch": 0.12275436622066609, + "grad_norm": 696.0, + "learning_rate": 9.776126926803115e-05, + "loss": 21.6267, + "step": 2945 + }, + { + "epoch": 0.12279604851819433, + "grad_norm": 290.0, + "learning_rate": 9.775927164377645e-05, + "loss": 11.8753, + "step": 2946 + }, + { + "epoch": 0.12283773081572257, + "grad_norm": 386.0, + "learning_rate": 9.775727314910481e-05, + "loss": 15.4378, + "step": 2947 + }, + { + "epoch": 0.1228794131132508, + "grad_norm": 246.0, + "learning_rate": 9.775527378405261e-05, + "loss": 12.0004, + "step": 2948 + }, + { + "epoch": 0.12292109541077904, + "grad_norm": 620.0, + "learning_rate": 9.775327354865633e-05, + "loss": 20.251, + "step": 2949 + }, + { + "epoch": 0.12296277770830728, + "grad_norm": 302.0, + "learning_rate": 9.775127244295237e-05, + "loss": 13.8129, + "step": 2950 + }, + { + "epoch": 0.12300446000583552, + "grad_norm": 452.0, + "learning_rate": 9.774927046697725e-05, + "loss": 15.6254, + "step": 2951 + }, + { + "epoch": 0.12304614230336376, + "grad_norm": 528.0, + "learning_rate": 9.774726762076742e-05, + "loss": 16.8762, + "step": 2952 + }, + { + "epoch": 0.123087824600892, + "grad_norm": 556.0, + "learning_rate": 9.774526390435943e-05, + "loss": 18.2513, + "step": 2953 + }, + { + "epoch": 0.12312950689842024, + "grad_norm": 1016.0, + "learning_rate": 9.774325931778974e-05, + "loss": 27.2503, + "step": 2954 + }, + { + "epoch": 0.12317118919594848, + "grad_norm": 131.0, + "learning_rate": 9.774125386109492e-05, + "loss": 9.6255, + "step": 2955 + }, + { + "epoch": 0.12321287149347672, + "grad_norm": 720.0, + "learning_rate": 9.773924753431152e-05, + "loss": 19.877, + "step": 2956 + }, + { + "epoch": 0.12325455379100496, + "grad_norm": 199.0, + "learning_rate": 9.773724033747608e-05, + "loss": 8.8756, + "step": 2957 + }, + { + "epoch": 0.1232962360885332, + "grad_norm": 482.0, + "learning_rate": 9.77352322706252e-05, + "loss": 16.2505, + "step": 2958 + }, + { + "epoch": 0.12333791838606144, + "grad_norm": 868.0, + "learning_rate": 9.773322333379548e-05, + "loss": 22.5047, + "step": 2959 + }, + { + "epoch": 0.12337960068358968, + "grad_norm": 380.0, + "learning_rate": 9.773121352702353e-05, + "loss": 15.4393, + "step": 2960 + }, + { + "epoch": 0.12342128298111792, + "grad_norm": 840.0, + "learning_rate": 9.772920285034596e-05, + "loss": 25.502, + "step": 2961 + }, + { + "epoch": 0.12346296527864616, + "grad_norm": 134.0, + "learning_rate": 9.772719130379944e-05, + "loss": 9.7506, + "step": 2962 + }, + { + "epoch": 0.1235046475761744, + "grad_norm": 596.0, + "learning_rate": 9.772517888742063e-05, + "loss": 17.3794, + "step": 2963 + }, + { + "epoch": 0.12354632987370263, + "grad_norm": 1032.0, + "learning_rate": 9.772316560124618e-05, + "loss": 25.3765, + "step": 2964 + }, + { + "epoch": 0.12358801217123087, + "grad_norm": 318.0, + "learning_rate": 9.772115144531281e-05, + "loss": 13.2527, + "step": 2965 + }, + { + "epoch": 0.12362969446875911, + "grad_norm": 524.0, + "learning_rate": 9.771913641965722e-05, + "loss": 16.2509, + "step": 2966 + }, + { + "epoch": 0.12367137676628735, + "grad_norm": 266.0, + "learning_rate": 9.771712052431614e-05, + "loss": 13.1268, + "step": 2967 + }, + { + "epoch": 0.12371305906381559, + "grad_norm": 300.0, + "learning_rate": 9.771510375932628e-05, + "loss": 13.5639, + "step": 2968 + }, + { + "epoch": 0.12375474136134383, + "grad_norm": 212.0, + "learning_rate": 9.771308612472444e-05, + "loss": 10.3755, + "step": 2969 + }, + { + "epoch": 0.12379642365887207, + "grad_norm": 404.0, + "learning_rate": 9.771106762054736e-05, + "loss": 15.6258, + "step": 2970 + }, + { + "epoch": 0.12383810595640031, + "grad_norm": 812.0, + "learning_rate": 9.770904824683185e-05, + "loss": 21.2505, + "step": 2971 + }, + { + "epoch": 0.12387978825392856, + "grad_norm": 247.0, + "learning_rate": 9.770702800361469e-05, + "loss": 10.6881, + "step": 2972 + }, + { + "epoch": 0.1239214705514568, + "grad_norm": 676.0, + "learning_rate": 9.77050068909327e-05, + "loss": 22.0019, + "step": 2973 + }, + { + "epoch": 0.12396315284898504, + "grad_norm": 400.0, + "learning_rate": 9.770298490882273e-05, + "loss": 16.2503, + "step": 2974 + }, + { + "epoch": 0.12400483514651328, + "grad_norm": 54.75, + "learning_rate": 9.770096205732164e-05, + "loss": 7.6884, + "step": 2975 + }, + { + "epoch": 0.12404651744404152, + "grad_norm": 418.0, + "learning_rate": 9.769893833646627e-05, + "loss": 16.6252, + "step": 2976 + }, + { + "epoch": 0.12408819974156976, + "grad_norm": 1384.0, + "learning_rate": 9.769691374629352e-05, + "loss": 31.0061, + "step": 2977 + }, + { + "epoch": 0.124129882039098, + "grad_norm": 141.0, + "learning_rate": 9.769488828684029e-05, + "loss": 9.0008, + "step": 2978 + }, + { + "epoch": 0.12417156433662624, + "grad_norm": 300.0, + "learning_rate": 9.769286195814346e-05, + "loss": 12.5637, + "step": 2979 + }, + { + "epoch": 0.12421324663415448, + "grad_norm": 170.0, + "learning_rate": 9.769083476024e-05, + "loss": 8.8128, + "step": 2980 + }, + { + "epoch": 0.12425492893168272, + "grad_norm": 139.0, + "learning_rate": 9.768880669316685e-05, + "loss": 10.1254, + "step": 2981 + }, + { + "epoch": 0.12429661122921096, + "grad_norm": 552.0, + "learning_rate": 9.768677775696095e-05, + "loss": 18.7506, + "step": 2982 + }, + { + "epoch": 0.1243382935267392, + "grad_norm": 52.0, + "learning_rate": 9.768474795165932e-05, + "loss": 7.3762, + "step": 2983 + }, + { + "epoch": 0.12437997582426744, + "grad_norm": 188.0, + "learning_rate": 9.76827172772989e-05, + "loss": 11.1887, + "step": 2984 + }, + { + "epoch": 0.12442165812179568, + "grad_norm": 452.0, + "learning_rate": 9.768068573391674e-05, + "loss": 15.189, + "step": 2985 + }, + { + "epoch": 0.12446334041932391, + "grad_norm": 360.0, + "learning_rate": 9.767865332154984e-05, + "loss": 13.8755, + "step": 2986 + }, + { + "epoch": 0.12450502271685215, + "grad_norm": 225.0, + "learning_rate": 9.767662004023525e-05, + "loss": 12.251, + "step": 2987 + }, + { + "epoch": 0.1245467050143804, + "grad_norm": 804.0, + "learning_rate": 9.767458589001002e-05, + "loss": 23.5019, + "step": 2988 + }, + { + "epoch": 0.12458838731190863, + "grad_norm": 616.0, + "learning_rate": 9.767255087091125e-05, + "loss": 17.6255, + "step": 2989 + }, + { + "epoch": 0.12463006960943687, + "grad_norm": 256.0, + "learning_rate": 9.767051498297599e-05, + "loss": 13.2521, + "step": 2990 + }, + { + "epoch": 0.12467175190696511, + "grad_norm": 226.0, + "learning_rate": 9.766847822624138e-05, + "loss": 12.0002, + "step": 2991 + }, + { + "epoch": 0.12471343420449335, + "grad_norm": 454.0, + "learning_rate": 9.76664406007445e-05, + "loss": 15.2546, + "step": 2992 + }, + { + "epoch": 0.12475511650202159, + "grad_norm": 152.0, + "learning_rate": 9.766440210652254e-05, + "loss": 8.8755, + "step": 2993 + }, + { + "epoch": 0.12479679879954983, + "grad_norm": 238.0, + "learning_rate": 9.76623627436126e-05, + "loss": 12.813, + "step": 2994 + }, + { + "epoch": 0.12483848109707807, + "grad_norm": 420.0, + "learning_rate": 9.766032251205186e-05, + "loss": 15.1883, + "step": 2995 + }, + { + "epoch": 0.12488016339460631, + "grad_norm": 238.0, + "learning_rate": 9.765828141187753e-05, + "loss": 10.6879, + "step": 2996 + }, + { + "epoch": 0.12492184569213455, + "grad_norm": 360.0, + "learning_rate": 9.765623944312679e-05, + "loss": 14.7503, + "step": 2997 + }, + { + "epoch": 0.12496352798966279, + "grad_norm": 724.0, + "learning_rate": 9.765419660583683e-05, + "loss": 21.6258, + "step": 2998 + }, + { + "epoch": 0.12500521028719103, + "grad_norm": 340.0, + "learning_rate": 9.765215290004494e-05, + "loss": 14.438, + "step": 2999 + }, + { + "epoch": 0.12504689258471927, + "grad_norm": 296.0, + "learning_rate": 9.765010832578831e-05, + "loss": 12.6253, + "step": 3000 + }, + { + "epoch": 0.1250885748822475, + "grad_norm": 194.0, + "learning_rate": 9.764806288310424e-05, + "loss": 10.0005, + "step": 3001 + }, + { + "epoch": 0.12513025717977574, + "grad_norm": 78.0, + "learning_rate": 9.764601657202998e-05, + "loss": 9.7519, + "step": 3002 + }, + { + "epoch": 0.12517193947730398, + "grad_norm": 290.0, + "learning_rate": 9.764396939260285e-05, + "loss": 13.1254, + "step": 3003 + }, + { + "epoch": 0.12521362177483222, + "grad_norm": 392.0, + "learning_rate": 9.764192134486014e-05, + "loss": 14.1878, + "step": 3004 + }, + { + "epoch": 0.12525530407236046, + "grad_norm": 201.0, + "learning_rate": 9.763987242883919e-05, + "loss": 12.1882, + "step": 3005 + }, + { + "epoch": 0.1252969863698887, + "grad_norm": 276.0, + "learning_rate": 9.763782264457734e-05, + "loss": 12.6879, + "step": 3006 + }, + { + "epoch": 0.12533866866741694, + "grad_norm": 796.0, + "learning_rate": 9.763577199211193e-05, + "loss": 22.8754, + "step": 3007 + }, + { + "epoch": 0.12538035096494518, + "grad_norm": 182.0, + "learning_rate": 9.763372047148036e-05, + "loss": 10.6256, + "step": 3008 + }, + { + "epoch": 0.12542203326247342, + "grad_norm": 222.0, + "learning_rate": 9.763166808271999e-05, + "loss": 11.9381, + "step": 3009 + }, + { + "epoch": 0.12546371556000166, + "grad_norm": 508.0, + "learning_rate": 9.762961482586826e-05, + "loss": 17.0008, + "step": 3010 + }, + { + "epoch": 0.1255053978575299, + "grad_norm": 304.0, + "learning_rate": 9.762756070096257e-05, + "loss": 11.7506, + "step": 3011 + }, + { + "epoch": 0.12554708015505814, + "grad_norm": 119.5, + "learning_rate": 9.762550570804035e-05, + "loss": 8.2518, + "step": 3012 + }, + { + "epoch": 0.12558876245258638, + "grad_norm": 344.0, + "learning_rate": 9.762344984713904e-05, + "loss": 14.1255, + "step": 3013 + }, + { + "epoch": 0.12563044475011462, + "grad_norm": 442.0, + "learning_rate": 9.762139311829617e-05, + "loss": 16.626, + "step": 3014 + }, + { + "epoch": 0.12567212704764286, + "grad_norm": 149.0, + "learning_rate": 9.761933552154916e-05, + "loss": 11.1274, + "step": 3015 + }, + { + "epoch": 0.1257138093451711, + "grad_norm": 290.0, + "learning_rate": 9.761727705693552e-05, + "loss": 12.2502, + "step": 3016 + }, + { + "epoch": 0.12575549164269934, + "grad_norm": 372.0, + "learning_rate": 9.76152177244928e-05, + "loss": 15.2505, + "step": 3017 + }, + { + "epoch": 0.12579717394022757, + "grad_norm": 608.0, + "learning_rate": 9.76131575242585e-05, + "loss": 19.7511, + "step": 3018 + }, + { + "epoch": 0.12583885623775581, + "grad_norm": 266.0, + "learning_rate": 9.761109645627019e-05, + "loss": 11.8753, + "step": 3019 + }, + { + "epoch": 0.12588053853528405, + "grad_norm": 588.0, + "learning_rate": 9.760903452056542e-05, + "loss": 18.6257, + "step": 3020 + }, + { + "epoch": 0.1259222208328123, + "grad_norm": 268.0, + "learning_rate": 9.760697171718176e-05, + "loss": 11.8754, + "step": 3021 + }, + { + "epoch": 0.12596390313034053, + "grad_norm": 1368.0, + "learning_rate": 9.76049080461568e-05, + "loss": 32.0006, + "step": 3022 + }, + { + "epoch": 0.12600558542786877, + "grad_norm": 504.0, + "learning_rate": 9.76028435075282e-05, + "loss": 16.8771, + "step": 3023 + }, + { + "epoch": 0.126047267725397, + "grad_norm": 230.0, + "learning_rate": 9.760077810133353e-05, + "loss": 11.7504, + "step": 3024 + }, + { + "epoch": 0.12608895002292525, + "grad_norm": 346.0, + "learning_rate": 9.759871182761044e-05, + "loss": 14.3129, + "step": 3025 + }, + { + "epoch": 0.1261306323204535, + "grad_norm": 58.25, + "learning_rate": 9.759664468639664e-05, + "loss": 6.5346, + "step": 3026 + }, + { + "epoch": 0.12617231461798176, + "grad_norm": 198.0, + "learning_rate": 9.759457667772973e-05, + "loss": 11.0628, + "step": 3027 + }, + { + "epoch": 0.12621399691551, + "grad_norm": 452.0, + "learning_rate": 9.759250780164745e-05, + "loss": 15.5005, + "step": 3028 + }, + { + "epoch": 0.12625567921303824, + "grad_norm": 266.0, + "learning_rate": 9.759043805818748e-05, + "loss": 10.8753, + "step": 3029 + }, + { + "epoch": 0.12629736151056647, + "grad_norm": 168.0, + "learning_rate": 9.758836744738757e-05, + "loss": 10.8757, + "step": 3030 + }, + { + "epoch": 0.12633904380809471, + "grad_norm": 648.0, + "learning_rate": 9.758629596928543e-05, + "loss": 18.1309, + "step": 3031 + }, + { + "epoch": 0.12638072610562295, + "grad_norm": 884.0, + "learning_rate": 9.758422362391881e-05, + "loss": 24.3772, + "step": 3032 + }, + { + "epoch": 0.1264224084031512, + "grad_norm": 482.0, + "learning_rate": 9.75821504113255e-05, + "loss": 15.3131, + "step": 3033 + }, + { + "epoch": 0.12646409070067943, + "grad_norm": 240.0, + "learning_rate": 9.758007633154328e-05, + "loss": 12.1255, + "step": 3034 + }, + { + "epoch": 0.12650577299820767, + "grad_norm": 256.0, + "learning_rate": 9.757800138460994e-05, + "loss": 14.1892, + "step": 3035 + }, + { + "epoch": 0.1265474552957359, + "grad_norm": 346.0, + "learning_rate": 9.75759255705633e-05, + "loss": 14.7508, + "step": 3036 + }, + { + "epoch": 0.12658913759326415, + "grad_norm": 360.0, + "learning_rate": 9.757384888944119e-05, + "loss": 14.7515, + "step": 3037 + }, + { + "epoch": 0.1266308198907924, + "grad_norm": 458.0, + "learning_rate": 9.757177134128147e-05, + "loss": 15.1885, + "step": 3038 + }, + { + "epoch": 0.12667250218832063, + "grad_norm": 312.0, + "learning_rate": 9.756969292612199e-05, + "loss": 10.0641, + "step": 3039 + }, + { + "epoch": 0.12671418448584887, + "grad_norm": 430.0, + "learning_rate": 9.756761364400063e-05, + "loss": 17.3773, + "step": 3040 + }, + { + "epoch": 0.1267558667833771, + "grad_norm": 524.0, + "learning_rate": 9.75655334949553e-05, + "loss": 14.5006, + "step": 3041 + }, + { + "epoch": 0.12679754908090535, + "grad_norm": 103.0, + "learning_rate": 9.756345247902388e-05, + "loss": 6.2203, + "step": 3042 + }, + { + "epoch": 0.1268392313784336, + "grad_norm": 276.0, + "learning_rate": 9.756137059624432e-05, + "loss": 12.5009, + "step": 3043 + }, + { + "epoch": 0.12688091367596183, + "grad_norm": 248.0, + "learning_rate": 9.755928784665459e-05, + "loss": 12.5634, + "step": 3044 + }, + { + "epoch": 0.12692259597349007, + "grad_norm": 306.0, + "learning_rate": 9.755720423029258e-05, + "loss": 13.0005, + "step": 3045 + }, + { + "epoch": 0.1269642782710183, + "grad_norm": 97.0, + "learning_rate": 9.755511974719631e-05, + "loss": 8.6257, + "step": 3046 + }, + { + "epoch": 0.12700596056854654, + "grad_norm": 174.0, + "learning_rate": 9.755303439740378e-05, + "loss": 10.6256, + "step": 3047 + }, + { + "epoch": 0.12704764286607478, + "grad_norm": 472.0, + "learning_rate": 9.755094818095296e-05, + "loss": 17.2508, + "step": 3048 + }, + { + "epoch": 0.12708932516360302, + "grad_norm": 266.0, + "learning_rate": 9.754886109788188e-05, + "loss": 10.6879, + "step": 3049 + }, + { + "epoch": 0.12713100746113126, + "grad_norm": 632.0, + "learning_rate": 9.75467731482286e-05, + "loss": 18.3757, + "step": 3050 + }, + { + "epoch": 0.1271726897586595, + "grad_norm": 380.0, + "learning_rate": 9.754468433203115e-05, + "loss": 16.0008, + "step": 3051 + }, + { + "epoch": 0.12721437205618774, + "grad_norm": 454.0, + "learning_rate": 9.754259464932762e-05, + "loss": 15.753, + "step": 3052 + }, + { + "epoch": 0.12725605435371598, + "grad_norm": 1040.0, + "learning_rate": 9.754050410015607e-05, + "loss": 25.1263, + "step": 3053 + }, + { + "epoch": 0.12729773665124422, + "grad_norm": 466.0, + "learning_rate": 9.753841268455462e-05, + "loss": 16.8794, + "step": 3054 + }, + { + "epoch": 0.12733941894877246, + "grad_norm": 656.0, + "learning_rate": 9.753632040256137e-05, + "loss": 19.0004, + "step": 3055 + }, + { + "epoch": 0.1273811012463007, + "grad_norm": 370.0, + "learning_rate": 9.753422725421446e-05, + "loss": 14.4385, + "step": 3056 + }, + { + "epoch": 0.12742278354382894, + "grad_norm": 186.0, + "learning_rate": 9.753213323955204e-05, + "loss": 11.0003, + "step": 3057 + }, + { + "epoch": 0.12746446584135718, + "grad_norm": 220.0, + "learning_rate": 9.753003835861228e-05, + "loss": 11.9391, + "step": 3058 + }, + { + "epoch": 0.12750614813888542, + "grad_norm": 628.0, + "learning_rate": 9.752794261143334e-05, + "loss": 19.1301, + "step": 3059 + }, + { + "epoch": 0.12754783043641366, + "grad_norm": 132.0, + "learning_rate": 9.752584599805344e-05, + "loss": 9.3765, + "step": 3060 + }, + { + "epoch": 0.1275895127339419, + "grad_norm": 940.0, + "learning_rate": 9.752374851851079e-05, + "loss": 25.1254, + "step": 3061 + }, + { + "epoch": 0.12763119503147013, + "grad_norm": 308.0, + "learning_rate": 9.752165017284357e-05, + "loss": 12.0662, + "step": 3062 + }, + { + "epoch": 0.12767287732899837, + "grad_norm": 115.0, + "learning_rate": 9.751955096109006e-05, + "loss": 8.8761, + "step": 3063 + }, + { + "epoch": 0.1277145596265266, + "grad_norm": 444.0, + "learning_rate": 9.751745088328855e-05, + "loss": 16.1255, + "step": 3064 + }, + { + "epoch": 0.12775624192405485, + "grad_norm": 378.0, + "learning_rate": 9.751534993947725e-05, + "loss": 14.1879, + "step": 3065 + }, + { + "epoch": 0.1277979242215831, + "grad_norm": 640.0, + "learning_rate": 9.751324812969448e-05, + "loss": 20.5004, + "step": 3066 + }, + { + "epoch": 0.12783960651911133, + "grad_norm": 236.0, + "learning_rate": 9.751114545397856e-05, + "loss": 10.3129, + "step": 3067 + }, + { + "epoch": 0.12788128881663957, + "grad_norm": 314.0, + "learning_rate": 9.750904191236779e-05, + "loss": 10.8131, + "step": 3068 + }, + { + "epoch": 0.1279229711141678, + "grad_norm": 164.0, + "learning_rate": 9.750693750490052e-05, + "loss": 10.6255, + "step": 3069 + }, + { + "epoch": 0.12796465341169605, + "grad_norm": 382.0, + "learning_rate": 9.750483223161509e-05, + "loss": 15.5629, + "step": 3070 + }, + { + "epoch": 0.1280063357092243, + "grad_norm": 476.0, + "learning_rate": 9.750272609254987e-05, + "loss": 15.3173, + "step": 3071 + }, + { + "epoch": 0.12804801800675253, + "grad_norm": 552.0, + "learning_rate": 9.750061908774325e-05, + "loss": 17.5017, + "step": 3072 + }, + { + "epoch": 0.12808970030428077, + "grad_norm": 494.0, + "learning_rate": 9.749851121723363e-05, + "loss": 15.6255, + "step": 3073 + }, + { + "epoch": 0.128131382601809, + "grad_norm": 418.0, + "learning_rate": 9.749640248105943e-05, + "loss": 13.3136, + "step": 3074 + }, + { + "epoch": 0.12817306489933725, + "grad_norm": 227.0, + "learning_rate": 9.749429287925909e-05, + "loss": 10.5004, + "step": 3075 + }, + { + "epoch": 0.1282147471968655, + "grad_norm": 73.5, + "learning_rate": 9.749218241187103e-05, + "loss": 8.3754, + "step": 3076 + }, + { + "epoch": 0.12825642949439373, + "grad_norm": 544.0, + "learning_rate": 9.749007107893373e-05, + "loss": 16.2508, + "step": 3077 + }, + { + "epoch": 0.12829811179192196, + "grad_norm": 398.0, + "learning_rate": 9.748795888048567e-05, + "loss": 15.0006, + "step": 3078 + }, + { + "epoch": 0.1283397940894502, + "grad_norm": 524.0, + "learning_rate": 9.748584581656535e-05, + "loss": 16.1252, + "step": 3079 + }, + { + "epoch": 0.12838147638697844, + "grad_norm": 338.0, + "learning_rate": 9.748373188721128e-05, + "loss": 12.8781, + "step": 3080 + }, + { + "epoch": 0.12842315868450668, + "grad_norm": 179.0, + "learning_rate": 9.748161709246198e-05, + "loss": 11.2504, + "step": 3081 + }, + { + "epoch": 0.12846484098203492, + "grad_norm": 660.0, + "learning_rate": 9.747950143235598e-05, + "loss": 18.3788, + "step": 3082 + }, + { + "epoch": 0.12850652327956316, + "grad_norm": 454.0, + "learning_rate": 9.747738490693185e-05, + "loss": 16.3754, + "step": 3083 + }, + { + "epoch": 0.1285482055770914, + "grad_norm": 676.0, + "learning_rate": 9.747526751622819e-05, + "loss": 19.6277, + "step": 3084 + }, + { + "epoch": 0.12858988787461964, + "grad_norm": 462.0, + "learning_rate": 9.747314926028354e-05, + "loss": 16.3755, + "step": 3085 + }, + { + "epoch": 0.12863157017214788, + "grad_norm": 280.0, + "learning_rate": 9.747103013913654e-05, + "loss": 12.3759, + "step": 3086 + }, + { + "epoch": 0.12867325246967612, + "grad_norm": 191.0, + "learning_rate": 9.74689101528258e-05, + "loss": 9.0635, + "step": 3087 + }, + { + "epoch": 0.12871493476720436, + "grad_norm": 92.5, + "learning_rate": 9.746678930138996e-05, + "loss": 9.3755, + "step": 3088 + }, + { + "epoch": 0.1287566170647326, + "grad_norm": 350.0, + "learning_rate": 9.746466758486768e-05, + "loss": 14.189, + "step": 3089 + }, + { + "epoch": 0.12879829936226084, + "grad_norm": 628.0, + "learning_rate": 9.74625450032976e-05, + "loss": 20.1252, + "step": 3090 + }, + { + "epoch": 0.12883998165978908, + "grad_norm": 109.5, + "learning_rate": 9.746042155671844e-05, + "loss": 9.4378, + "step": 3091 + }, + { + "epoch": 0.12888166395731732, + "grad_norm": 128.0, + "learning_rate": 9.745829724516888e-05, + "loss": 7.0319, + "step": 3092 + }, + { + "epoch": 0.12892334625484556, + "grad_norm": 1024.0, + "learning_rate": 9.745617206868764e-05, + "loss": 26.1252, + "step": 3093 + }, + { + "epoch": 0.1289650285523738, + "grad_norm": 100.0, + "learning_rate": 9.745404602731345e-05, + "loss": 8.0632, + "step": 3094 + }, + { + "epoch": 0.12900671084990203, + "grad_norm": 158.0, + "learning_rate": 9.745191912108504e-05, + "loss": 9.3755, + "step": 3095 + }, + { + "epoch": 0.12904839314743027, + "grad_norm": 167.0, + "learning_rate": 9.744979135004122e-05, + "loss": 9.7505, + "step": 3096 + }, + { + "epoch": 0.1290900754449585, + "grad_norm": 784.0, + "learning_rate": 9.744766271422072e-05, + "loss": 22.7503, + "step": 3097 + }, + { + "epoch": 0.12913175774248675, + "grad_norm": 356.0, + "learning_rate": 9.744553321366238e-05, + "loss": 15.0003, + "step": 3098 + }, + { + "epoch": 0.129173440040015, + "grad_norm": 314.0, + "learning_rate": 9.744340284840497e-05, + "loss": 13.3752, + "step": 3099 + }, + { + "epoch": 0.12921512233754326, + "grad_norm": 288.0, + "learning_rate": 9.744127161848732e-05, + "loss": 12.6286, + "step": 3100 + }, + { + "epoch": 0.1292568046350715, + "grad_norm": 239.0, + "learning_rate": 9.74391395239483e-05, + "loss": 11.1879, + "step": 3101 + }, + { + "epoch": 0.12929848693259974, + "grad_norm": 412.0, + "learning_rate": 9.743700656482675e-05, + "loss": 15.4378, + "step": 3102 + }, + { + "epoch": 0.12934016923012798, + "grad_norm": 500.0, + "learning_rate": 9.743487274116154e-05, + "loss": 17.6253, + "step": 3103 + }, + { + "epoch": 0.12938185152765622, + "grad_norm": 1208.0, + "learning_rate": 9.743273805299155e-05, + "loss": 30.8753, + "step": 3104 + }, + { + "epoch": 0.12942353382518446, + "grad_norm": 780.0, + "learning_rate": 9.743060250035571e-05, + "loss": 20.8793, + "step": 3105 + }, + { + "epoch": 0.1294652161227127, + "grad_norm": 908.0, + "learning_rate": 9.742846608329295e-05, + "loss": 22.6261, + "step": 3106 + }, + { + "epoch": 0.12950689842024093, + "grad_norm": 101.0, + "learning_rate": 9.742632880184214e-05, + "loss": 9.0633, + "step": 3107 + }, + { + "epoch": 0.12954858071776917, + "grad_norm": 132.0, + "learning_rate": 9.742419065604231e-05, + "loss": 10.6268, + "step": 3108 + }, + { + "epoch": 0.1295902630152974, + "grad_norm": 312.0, + "learning_rate": 9.74220516459324e-05, + "loss": 13.3753, + "step": 3109 + }, + { + "epoch": 0.12963194531282565, + "grad_norm": 520.0, + "learning_rate": 9.741991177155138e-05, + "loss": 20.0004, + "step": 3110 + }, + { + "epoch": 0.1296736276103539, + "grad_norm": 264.0, + "learning_rate": 9.741777103293825e-05, + "loss": 13.8129, + "step": 3111 + }, + { + "epoch": 0.12971530990788213, + "grad_norm": 540.0, + "learning_rate": 9.741562943013204e-05, + "loss": 18.2549, + "step": 3112 + }, + { + "epoch": 0.12975699220541037, + "grad_norm": 438.0, + "learning_rate": 9.741348696317177e-05, + "loss": 17.0007, + "step": 3113 + }, + { + "epoch": 0.1297986745029386, + "grad_norm": 1104.0, + "learning_rate": 9.74113436320965e-05, + "loss": 25.8801, + "step": 3114 + }, + { + "epoch": 0.12984035680046685, + "grad_norm": 668.0, + "learning_rate": 9.740919943694527e-05, + "loss": 19.2503, + "step": 3115 + }, + { + "epoch": 0.1298820390979951, + "grad_norm": 348.0, + "learning_rate": 9.740705437775719e-05, + "loss": 12.5633, + "step": 3116 + }, + { + "epoch": 0.12992372139552333, + "grad_norm": 266.0, + "learning_rate": 9.740490845457133e-05, + "loss": 11.8754, + "step": 3117 + }, + { + "epoch": 0.12996540369305157, + "grad_norm": 50.5, + "learning_rate": 9.740276166742679e-05, + "loss": 7.3755, + "step": 3118 + }, + { + "epoch": 0.1300070859905798, + "grad_norm": 458.0, + "learning_rate": 9.740061401636272e-05, + "loss": 16.1269, + "step": 3119 + }, + { + "epoch": 0.13004876828810805, + "grad_norm": 175.0, + "learning_rate": 9.739846550141826e-05, + "loss": 11.063, + "step": 3120 + }, + { + "epoch": 0.13009045058563629, + "grad_norm": 370.0, + "learning_rate": 9.739631612263255e-05, + "loss": 14.0638, + "step": 3121 + }, + { + "epoch": 0.13013213288316453, + "grad_norm": 504.0, + "learning_rate": 9.739416588004478e-05, + "loss": 14.8158, + "step": 3122 + }, + { + "epoch": 0.13017381518069276, + "grad_norm": 215.0, + "learning_rate": 9.73920147736941e-05, + "loss": 11.2503, + "step": 3123 + }, + { + "epoch": 0.130215497478221, + "grad_norm": 524.0, + "learning_rate": 9.738986280361978e-05, + "loss": 17.5023, + "step": 3124 + }, + { + "epoch": 0.13025717977574924, + "grad_norm": 330.0, + "learning_rate": 9.738770996986099e-05, + "loss": 12.563, + "step": 3125 + }, + { + "epoch": 0.13029886207327748, + "grad_norm": 147.0, + "learning_rate": 9.738555627245697e-05, + "loss": 8.7504, + "step": 3126 + }, + { + "epoch": 0.13034054437080572, + "grad_norm": 496.0, + "learning_rate": 9.7383401711447e-05, + "loss": 18.7523, + "step": 3127 + }, + { + "epoch": 0.13038222666833396, + "grad_norm": 354.0, + "learning_rate": 9.738124628687031e-05, + "loss": 14.4378, + "step": 3128 + }, + { + "epoch": 0.1304239089658622, + "grad_norm": 544.0, + "learning_rate": 9.73790899987662e-05, + "loss": 18.2503, + "step": 3129 + }, + { + "epoch": 0.13046559126339044, + "grad_norm": 180.0, + "learning_rate": 9.737693284717398e-05, + "loss": 10.6254, + "step": 3130 + }, + { + "epoch": 0.13050727356091868, + "grad_norm": 532.0, + "learning_rate": 9.737477483213295e-05, + "loss": 17.2503, + "step": 3131 + }, + { + "epoch": 0.13054895585844692, + "grad_norm": 154.0, + "learning_rate": 9.737261595368243e-05, + "loss": 9.8761, + "step": 3132 + }, + { + "epoch": 0.13059063815597516, + "grad_norm": 234.0, + "learning_rate": 9.737045621186181e-05, + "loss": 12.8757, + "step": 3133 + }, + { + "epoch": 0.1306323204535034, + "grad_norm": 1496.0, + "learning_rate": 9.73682956067104e-05, + "loss": 38.2542, + "step": 3134 + }, + { + "epoch": 0.13067400275103164, + "grad_norm": 157.0, + "learning_rate": 9.736613413826758e-05, + "loss": 9.7504, + "step": 3135 + }, + { + "epoch": 0.13071568504855988, + "grad_norm": 384.0, + "learning_rate": 9.736397180657279e-05, + "loss": 14.2505, + "step": 3136 + }, + { + "epoch": 0.13075736734608812, + "grad_norm": 140.0, + "learning_rate": 9.73618086116654e-05, + "loss": 10.1253, + "step": 3137 + }, + { + "epoch": 0.13079904964361636, + "grad_norm": 478.0, + "learning_rate": 9.735964455358484e-05, + "loss": 16.7524, + "step": 3138 + }, + { + "epoch": 0.1308407319411446, + "grad_norm": 71.5, + "learning_rate": 9.735747963237055e-05, + "loss": 7.0326, + "step": 3139 + }, + { + "epoch": 0.13088241423867283, + "grad_norm": 904.0, + "learning_rate": 9.7355313848062e-05, + "loss": 22.1253, + "step": 3140 + }, + { + "epoch": 0.13092409653620107, + "grad_norm": 370.0, + "learning_rate": 9.735314720069864e-05, + "loss": 12.6881, + "step": 3141 + }, + { + "epoch": 0.1309657788337293, + "grad_norm": 356.0, + "learning_rate": 9.735097969031998e-05, + "loss": 13.3773, + "step": 3142 + }, + { + "epoch": 0.13100746113125755, + "grad_norm": 171.0, + "learning_rate": 9.73488113169655e-05, + "loss": 10.7515, + "step": 3143 + }, + { + "epoch": 0.1310491434287858, + "grad_norm": 197.0, + "learning_rate": 9.734664208067475e-05, + "loss": 11.7518, + "step": 3144 + }, + { + "epoch": 0.13109082572631403, + "grad_norm": 352.0, + "learning_rate": 9.734447198148721e-05, + "loss": 14.5636, + "step": 3145 + }, + { + "epoch": 0.13113250802384227, + "grad_norm": 556.0, + "learning_rate": 9.73423010194425e-05, + "loss": 16.0004, + "step": 3146 + }, + { + "epoch": 0.1311741903213705, + "grad_norm": 314.0, + "learning_rate": 9.734012919458014e-05, + "loss": 14.1881, + "step": 3147 + }, + { + "epoch": 0.13121587261889875, + "grad_norm": 109.0, + "learning_rate": 9.733795650693971e-05, + "loss": 8.5005, + "step": 3148 + }, + { + "epoch": 0.131257554916427, + "grad_norm": 75.5, + "learning_rate": 9.733578295656083e-05, + "loss": 8.6885, + "step": 3149 + }, + { + "epoch": 0.13129923721395523, + "grad_norm": 338.0, + "learning_rate": 9.733360854348311e-05, + "loss": 14.3752, + "step": 3150 + }, + { + "epoch": 0.13134091951148347, + "grad_norm": 544.0, + "learning_rate": 9.733143326774618e-05, + "loss": 17.3793, + "step": 3151 + }, + { + "epoch": 0.1313826018090117, + "grad_norm": 532.0, + "learning_rate": 9.732925712938966e-05, + "loss": 17.5006, + "step": 3152 + }, + { + "epoch": 0.13142428410653995, + "grad_norm": 344.0, + "learning_rate": 9.732708012845323e-05, + "loss": 14.6904, + "step": 3153 + }, + { + "epoch": 0.13146596640406819, + "grad_norm": 1240.0, + "learning_rate": 9.732490226497656e-05, + "loss": 24.1311, + "step": 3154 + }, + { + "epoch": 0.13150764870159642, + "grad_norm": 468.0, + "learning_rate": 9.732272353899936e-05, + "loss": 15.6263, + "step": 3155 + }, + { + "epoch": 0.13154933099912466, + "grad_norm": 256.0, + "learning_rate": 9.732054395056131e-05, + "loss": 10.0628, + "step": 3156 + }, + { + "epoch": 0.1315910132966529, + "grad_norm": 200.0, + "learning_rate": 9.731836349970213e-05, + "loss": 11.2507, + "step": 3157 + }, + { + "epoch": 0.13163269559418114, + "grad_norm": 184.0, + "learning_rate": 9.731618218646161e-05, + "loss": 10.5002, + "step": 3158 + }, + { + "epoch": 0.13167437789170938, + "grad_norm": 260.0, + "learning_rate": 9.731400001087945e-05, + "loss": 13.2513, + "step": 3159 + }, + { + "epoch": 0.13171606018923762, + "grad_norm": 404.0, + "learning_rate": 9.731181697299544e-05, + "loss": 16.0011, + "step": 3160 + }, + { + "epoch": 0.13175774248676586, + "grad_norm": 182.0, + "learning_rate": 9.730963307284936e-05, + "loss": 10.9391, + "step": 3161 + }, + { + "epoch": 0.1317994247842941, + "grad_norm": 340.0, + "learning_rate": 9.730744831048103e-05, + "loss": 14.3134, + "step": 3162 + }, + { + "epoch": 0.13184110708182234, + "grad_norm": 236.0, + "learning_rate": 9.730526268593025e-05, + "loss": 12.5006, + "step": 3163 + }, + { + "epoch": 0.13188278937935058, + "grad_norm": 173.0, + "learning_rate": 9.730307619923686e-05, + "loss": 10.1257, + "step": 3164 + }, + { + "epoch": 0.13192447167687882, + "grad_norm": 294.0, + "learning_rate": 9.730088885044071e-05, + "loss": 12.6881, + "step": 3165 + }, + { + "epoch": 0.13196615397440706, + "grad_norm": 272.0, + "learning_rate": 9.729870063958165e-05, + "loss": 13.0007, + "step": 3166 + }, + { + "epoch": 0.1320078362719353, + "grad_norm": 197.0, + "learning_rate": 9.729651156669959e-05, + "loss": 11.188, + "step": 3167 + }, + { + "epoch": 0.13204951856946354, + "grad_norm": 149.0, + "learning_rate": 9.729432163183439e-05, + "loss": 8.8757, + "step": 3168 + }, + { + "epoch": 0.13209120086699178, + "grad_norm": 218.0, + "learning_rate": 9.729213083502599e-05, + "loss": 10.6257, + "step": 3169 + }, + { + "epoch": 0.13213288316452002, + "grad_norm": 644.0, + "learning_rate": 9.728993917631431e-05, + "loss": 20.8781, + "step": 3170 + }, + { + "epoch": 0.13217456546204825, + "grad_norm": 484.0, + "learning_rate": 9.728774665573928e-05, + "loss": 17.1262, + "step": 3171 + }, + { + "epoch": 0.1322162477595765, + "grad_norm": 164.0, + "learning_rate": 9.728555327334087e-05, + "loss": 8.9377, + "step": 3172 + }, + { + "epoch": 0.13225793005710476, + "grad_norm": 532.0, + "learning_rate": 9.728335902915906e-05, + "loss": 17.8757, + "step": 3173 + }, + { + "epoch": 0.132299612354633, + "grad_norm": 536.0, + "learning_rate": 9.728116392323383e-05, + "loss": 16.8752, + "step": 3174 + }, + { + "epoch": 0.13234129465216124, + "grad_norm": 242.0, + "learning_rate": 9.727896795560518e-05, + "loss": 10.5629, + "step": 3175 + }, + { + "epoch": 0.13238297694968948, + "grad_norm": 808.0, + "learning_rate": 9.727677112631318e-05, + "loss": 22.7521, + "step": 3176 + }, + { + "epoch": 0.13242465924721772, + "grad_norm": 246.0, + "learning_rate": 9.727457343539779e-05, + "loss": 10.3751, + "step": 3177 + }, + { + "epoch": 0.13246634154474596, + "grad_norm": 500.0, + "learning_rate": 9.727237488289911e-05, + "loss": 15.4379, + "step": 3178 + }, + { + "epoch": 0.1325080238422742, + "grad_norm": 178.0, + "learning_rate": 9.727017546885721e-05, + "loss": 10.0661, + "step": 3179 + }, + { + "epoch": 0.13254970613980244, + "grad_norm": 256.0, + "learning_rate": 9.726797519331217e-05, + "loss": 13.0628, + "step": 3180 + }, + { + "epoch": 0.13259138843733068, + "grad_norm": 476.0, + "learning_rate": 9.726577405630408e-05, + "loss": 16.1252, + "step": 3181 + }, + { + "epoch": 0.13263307073485892, + "grad_norm": 206.0, + "learning_rate": 9.726357205787304e-05, + "loss": 11.5629, + "step": 3182 + }, + { + "epoch": 0.13267475303238715, + "grad_norm": 316.0, + "learning_rate": 9.726136919805924e-05, + "loss": 13.0006, + "step": 3183 + }, + { + "epoch": 0.1327164353299154, + "grad_norm": 234.0, + "learning_rate": 9.725916547690277e-05, + "loss": 10.5003, + "step": 3184 + }, + { + "epoch": 0.13275811762744363, + "grad_norm": 416.0, + "learning_rate": 9.725696089444383e-05, + "loss": 13.5005, + "step": 3185 + }, + { + "epoch": 0.13279979992497187, + "grad_norm": 113.0, + "learning_rate": 9.725475545072255e-05, + "loss": 8.1253, + "step": 3186 + }, + { + "epoch": 0.1328414822225001, + "grad_norm": 310.0, + "learning_rate": 9.72525491457792e-05, + "loss": 12.8757, + "step": 3187 + }, + { + "epoch": 0.13288316452002835, + "grad_norm": 264.0, + "learning_rate": 9.725034197965391e-05, + "loss": 12.3756, + "step": 3188 + }, + { + "epoch": 0.1329248468175566, + "grad_norm": 1112.0, + "learning_rate": 9.724813395238697e-05, + "loss": 24.3755, + "step": 3189 + }, + { + "epoch": 0.13296652911508483, + "grad_norm": 306.0, + "learning_rate": 9.724592506401857e-05, + "loss": 12.9388, + "step": 3190 + }, + { + "epoch": 0.13300821141261307, + "grad_norm": 442.0, + "learning_rate": 9.724371531458902e-05, + "loss": 14.5634, + "step": 3191 + }, + { + "epoch": 0.1330498937101413, + "grad_norm": 344.0, + "learning_rate": 9.724150470413855e-05, + "loss": 14.2504, + "step": 3192 + }, + { + "epoch": 0.13309157600766955, + "grad_norm": 472.0, + "learning_rate": 9.723929323270745e-05, + "loss": 17.0041, + "step": 3193 + }, + { + "epoch": 0.1331332583051978, + "grad_norm": 400.0, + "learning_rate": 9.723708090033605e-05, + "loss": 14.4377, + "step": 3194 + }, + { + "epoch": 0.13317494060272603, + "grad_norm": 358.0, + "learning_rate": 9.723486770706466e-05, + "loss": 13.4395, + "step": 3195 + }, + { + "epoch": 0.13321662290025427, + "grad_norm": 904.0, + "learning_rate": 9.723265365293361e-05, + "loss": 24.6255, + "step": 3196 + }, + { + "epoch": 0.1332583051977825, + "grad_norm": 418.0, + "learning_rate": 9.723043873798326e-05, + "loss": 16.6254, + "step": 3197 + }, + { + "epoch": 0.13329998749531075, + "grad_norm": 496.0, + "learning_rate": 9.722822296225395e-05, + "loss": 18.13, + "step": 3198 + }, + { + "epoch": 0.13334166979283898, + "grad_norm": 364.0, + "learning_rate": 9.722600632578611e-05, + "loss": 13.2502, + "step": 3199 + }, + { + "epoch": 0.13338335209036722, + "grad_norm": 334.0, + "learning_rate": 9.722378882862009e-05, + "loss": 14.3131, + "step": 3200 + }, + { + "epoch": 0.13342503438789546, + "grad_norm": 390.0, + "learning_rate": 9.722157047079634e-05, + "loss": 14.5628, + "step": 3201 + }, + { + "epoch": 0.1334667166854237, + "grad_norm": 552.0, + "learning_rate": 9.721935125235528e-05, + "loss": 17.2501, + "step": 3202 + }, + { + "epoch": 0.13350839898295194, + "grad_norm": 544.0, + "learning_rate": 9.721713117333734e-05, + "loss": 17.6253, + "step": 3203 + }, + { + "epoch": 0.13355008128048018, + "grad_norm": 1024.0, + "learning_rate": 9.7214910233783e-05, + "loss": 26.2506, + "step": 3204 + }, + { + "epoch": 0.13359176357800842, + "grad_norm": 258.0, + "learning_rate": 9.721268843373273e-05, + "loss": 12.3146, + "step": 3205 + }, + { + "epoch": 0.13363344587553666, + "grad_norm": 134.0, + "learning_rate": 9.721046577322701e-05, + "loss": 8.4385, + "step": 3206 + }, + { + "epoch": 0.1336751281730649, + "grad_norm": 143.0, + "learning_rate": 9.720824225230639e-05, + "loss": 7.8753, + "step": 3207 + }, + { + "epoch": 0.13371681047059314, + "grad_norm": 1064.0, + "learning_rate": 9.720601787101133e-05, + "loss": 27.7503, + "step": 3208 + }, + { + "epoch": 0.13375849276812138, + "grad_norm": 366.0, + "learning_rate": 9.720379262938241e-05, + "loss": 13.5644, + "step": 3209 + }, + { + "epoch": 0.13380017506564962, + "grad_norm": 510.0, + "learning_rate": 9.720156652746019e-05, + "loss": 17.0004, + "step": 3210 + }, + { + "epoch": 0.13384185736317786, + "grad_norm": 588.0, + "learning_rate": 9.719933956528523e-05, + "loss": 18.3758, + "step": 3211 + }, + { + "epoch": 0.1338835396607061, + "grad_norm": 432.0, + "learning_rate": 9.719711174289812e-05, + "loss": 15.8754, + "step": 3212 + }, + { + "epoch": 0.13392522195823434, + "grad_norm": 480.0, + "learning_rate": 9.719488306033944e-05, + "loss": 14.7502, + "step": 3213 + }, + { + "epoch": 0.13396690425576258, + "grad_norm": 486.0, + "learning_rate": 9.719265351764984e-05, + "loss": 17.1259, + "step": 3214 + }, + { + "epoch": 0.13400858655329081, + "grad_norm": 374.0, + "learning_rate": 9.719042311486995e-05, + "loss": 13.8754, + "step": 3215 + }, + { + "epoch": 0.13405026885081905, + "grad_norm": 348.0, + "learning_rate": 9.71881918520404e-05, + "loss": 14.2502, + "step": 3216 + }, + { + "epoch": 0.1340919511483473, + "grad_norm": 510.0, + "learning_rate": 9.718595972920186e-05, + "loss": 15.8127, + "step": 3217 + }, + { + "epoch": 0.13413363344587553, + "grad_norm": 236.0, + "learning_rate": 9.718372674639502e-05, + "loss": 12.0634, + "step": 3218 + }, + { + "epoch": 0.13417531574340377, + "grad_norm": 1368.0, + "learning_rate": 9.718149290366056e-05, + "loss": 35.2538, + "step": 3219 + }, + { + "epoch": 0.134216998040932, + "grad_norm": 198.0, + "learning_rate": 9.717925820103922e-05, + "loss": 12.0011, + "step": 3220 + }, + { + "epoch": 0.13425868033846025, + "grad_norm": 300.0, + "learning_rate": 9.717702263857169e-05, + "loss": 13.4377, + "step": 3221 + }, + { + "epoch": 0.1343003626359885, + "grad_norm": 548.0, + "learning_rate": 9.717478621629876e-05, + "loss": 18.1262, + "step": 3222 + }, + { + "epoch": 0.13434204493351673, + "grad_norm": 248.0, + "learning_rate": 9.717254893426115e-05, + "loss": 10.9381, + "step": 3223 + }, + { + "epoch": 0.13438372723104497, + "grad_norm": 380.0, + "learning_rate": 9.717031079249965e-05, + "loss": 15.1878, + "step": 3224 + }, + { + "epoch": 0.1344254095285732, + "grad_norm": 115.5, + "learning_rate": 9.716807179105505e-05, + "loss": 7.9067, + "step": 3225 + }, + { + "epoch": 0.13446709182610145, + "grad_norm": 183.0, + "learning_rate": 9.716583192996815e-05, + "loss": 8.3143, + "step": 3226 + }, + { + "epoch": 0.1345087741236297, + "grad_norm": 392.0, + "learning_rate": 9.716359120927978e-05, + "loss": 14.8131, + "step": 3227 + }, + { + "epoch": 0.13455045642115793, + "grad_norm": 284.0, + "learning_rate": 9.716134962903076e-05, + "loss": 9.3762, + "step": 3228 + }, + { + "epoch": 0.13459213871868617, + "grad_norm": 612.0, + "learning_rate": 9.715910718926198e-05, + "loss": 20.5003, + "step": 3229 + }, + { + "epoch": 0.1346338210162144, + "grad_norm": 812.0, + "learning_rate": 9.715686389001426e-05, + "loss": 20.131, + "step": 3230 + }, + { + "epoch": 0.13467550331374264, + "grad_norm": 312.0, + "learning_rate": 9.715461973132854e-05, + "loss": 11.6256, + "step": 3231 + }, + { + "epoch": 0.13471718561127088, + "grad_norm": 71.0, + "learning_rate": 9.715237471324566e-05, + "loss": 7.72, + "step": 3232 + }, + { + "epoch": 0.13475886790879912, + "grad_norm": 286.0, + "learning_rate": 9.715012883580657e-05, + "loss": 13.0629, + "step": 3233 + }, + { + "epoch": 0.13480055020632736, + "grad_norm": 272.0, + "learning_rate": 9.714788209905222e-05, + "loss": 12.1263, + "step": 3234 + }, + { + "epoch": 0.1348422325038556, + "grad_norm": 169.0, + "learning_rate": 9.71456345030235e-05, + "loss": 7.9067, + "step": 3235 + }, + { + "epoch": 0.13488391480138384, + "grad_norm": 235.0, + "learning_rate": 9.714338604776143e-05, + "loss": 12.1885, + "step": 3236 + }, + { + "epoch": 0.13492559709891208, + "grad_norm": 560.0, + "learning_rate": 9.714113673330697e-05, + "loss": 18.2546, + "step": 3237 + }, + { + "epoch": 0.13496727939644032, + "grad_norm": 51.0, + "learning_rate": 9.713888655970108e-05, + "loss": 7.6564, + "step": 3238 + }, + { + "epoch": 0.13500896169396856, + "grad_norm": 216.0, + "learning_rate": 9.713663552698482e-05, + "loss": 10.938, + "step": 3239 + }, + { + "epoch": 0.1350506439914968, + "grad_norm": 108.5, + "learning_rate": 9.713438363519918e-05, + "loss": 9.4379, + "step": 3240 + }, + { + "epoch": 0.13509232628902504, + "grad_norm": 290.0, + "learning_rate": 9.713213088438522e-05, + "loss": 13.8127, + "step": 3241 + }, + { + "epoch": 0.13513400858655328, + "grad_norm": 486.0, + "learning_rate": 9.712987727458399e-05, + "loss": 15.6257, + "step": 3242 + }, + { + "epoch": 0.13517569088408152, + "grad_norm": 390.0, + "learning_rate": 9.712762280583656e-05, + "loss": 14.6881, + "step": 3243 + }, + { + "epoch": 0.13521737318160976, + "grad_norm": 135.0, + "learning_rate": 9.712536747818402e-05, + "loss": 10.0628, + "step": 3244 + }, + { + "epoch": 0.13525905547913802, + "grad_norm": 1104.0, + "learning_rate": 9.712311129166749e-05, + "loss": 27.7506, + "step": 3245 + }, + { + "epoch": 0.13530073777666626, + "grad_norm": 388.0, + "learning_rate": 9.712085424632806e-05, + "loss": 16.7528, + "step": 3246 + }, + { + "epoch": 0.1353424200741945, + "grad_norm": 324.0, + "learning_rate": 9.711859634220689e-05, + "loss": 13.3756, + "step": 3247 + }, + { + "epoch": 0.13538410237172274, + "grad_norm": 350.0, + "learning_rate": 9.711633757934509e-05, + "loss": 14.8158, + "step": 3248 + }, + { + "epoch": 0.13542578466925098, + "grad_norm": 318.0, + "learning_rate": 9.711407795778388e-05, + "loss": 13.9386, + "step": 3249 + }, + { + "epoch": 0.13546746696677922, + "grad_norm": 165.0, + "learning_rate": 9.711181747756441e-05, + "loss": 9.3754, + "step": 3250 + }, + { + "epoch": 0.13550914926430746, + "grad_norm": 470.0, + "learning_rate": 9.710955613872788e-05, + "loss": 16.6252, + "step": 3251 + }, + { + "epoch": 0.1355508315618357, + "grad_norm": 338.0, + "learning_rate": 9.710729394131552e-05, + "loss": 13.8767, + "step": 3252 + }, + { + "epoch": 0.13559251385936394, + "grad_norm": 70.0, + "learning_rate": 9.710503088536854e-05, + "loss": 7.6253, + "step": 3253 + }, + { + "epoch": 0.13563419615689218, + "grad_norm": 231.0, + "learning_rate": 9.710276697092818e-05, + "loss": 12.1268, + "step": 3254 + }, + { + "epoch": 0.13567587845442042, + "grad_norm": 292.0, + "learning_rate": 9.710050219803572e-05, + "loss": 13.1252, + "step": 3255 + }, + { + "epoch": 0.13571756075194866, + "grad_norm": 330.0, + "learning_rate": 9.709823656673243e-05, + "loss": 13.5004, + "step": 3256 + }, + { + "epoch": 0.1357592430494769, + "grad_norm": 214.0, + "learning_rate": 9.709597007705959e-05, + "loss": 11.2504, + "step": 3257 + }, + { + "epoch": 0.13580092534700514, + "grad_norm": 374.0, + "learning_rate": 9.709370272905851e-05, + "loss": 15.5006, + "step": 3258 + }, + { + "epoch": 0.13584260764453338, + "grad_norm": 306.0, + "learning_rate": 9.709143452277053e-05, + "loss": 13.0015, + "step": 3259 + }, + { + "epoch": 0.13588428994206161, + "grad_norm": 362.0, + "learning_rate": 9.708916545823696e-05, + "loss": 13.5002, + "step": 3260 + }, + { + "epoch": 0.13592597223958985, + "grad_norm": 330.0, + "learning_rate": 9.708689553549919e-05, + "loss": 14.1256, + "step": 3261 + }, + { + "epoch": 0.1359676545371181, + "grad_norm": 510.0, + "learning_rate": 9.708462475459857e-05, + "loss": 17.5002, + "step": 3262 + }, + { + "epoch": 0.13600933683464633, + "grad_norm": 248.0, + "learning_rate": 9.708235311557646e-05, + "loss": 11.9383, + "step": 3263 + }, + { + "epoch": 0.13605101913217457, + "grad_norm": 89.5, + "learning_rate": 9.708008061847428e-05, + "loss": 8.2505, + "step": 3264 + }, + { + "epoch": 0.1360927014297028, + "grad_norm": 524.0, + "learning_rate": 9.707780726333348e-05, + "loss": 14.9402, + "step": 3265 + }, + { + "epoch": 0.13613438372723105, + "grad_norm": 135.0, + "learning_rate": 9.707553305019546e-05, + "loss": 8.9379, + "step": 3266 + }, + { + "epoch": 0.1361760660247593, + "grad_norm": 205.0, + "learning_rate": 9.707325797910165e-05, + "loss": 10.9381, + "step": 3267 + }, + { + "epoch": 0.13621774832228753, + "grad_norm": 884.0, + "learning_rate": 9.707098205009355e-05, + "loss": 21.5035, + "step": 3268 + }, + { + "epoch": 0.13625943061981577, + "grad_norm": 324.0, + "learning_rate": 9.706870526321262e-05, + "loss": 13.066, + "step": 3269 + }, + { + "epoch": 0.136301112917344, + "grad_norm": 564.0, + "learning_rate": 9.706642761850035e-05, + "loss": 17.8767, + "step": 3270 + }, + { + "epoch": 0.13634279521487225, + "grad_norm": 332.0, + "learning_rate": 9.706414911599828e-05, + "loss": 12.7502, + "step": 3271 + }, + { + "epoch": 0.1363844775124005, + "grad_norm": 588.0, + "learning_rate": 9.70618697557479e-05, + "loss": 18.6259, + "step": 3272 + }, + { + "epoch": 0.13642615980992873, + "grad_norm": 458.0, + "learning_rate": 9.705958953779077e-05, + "loss": 17.7503, + "step": 3273 + }, + { + "epoch": 0.13646784210745697, + "grad_norm": 408.0, + "learning_rate": 9.705730846216844e-05, + "loss": 15.0017, + "step": 3274 + }, + { + "epoch": 0.1365095244049852, + "grad_norm": 368.0, + "learning_rate": 9.705502652892249e-05, + "loss": 10.4383, + "step": 3275 + }, + { + "epoch": 0.13655120670251344, + "grad_norm": 506.0, + "learning_rate": 9.70527437380945e-05, + "loss": 17.3759, + "step": 3276 + }, + { + "epoch": 0.13659288900004168, + "grad_norm": 372.0, + "learning_rate": 9.705046008972607e-05, + "loss": 13.6263, + "step": 3277 + }, + { + "epoch": 0.13663457129756992, + "grad_norm": 132.0, + "learning_rate": 9.704817558385885e-05, + "loss": 7.7822, + "step": 3278 + }, + { + "epoch": 0.13667625359509816, + "grad_norm": 304.0, + "learning_rate": 9.704589022053443e-05, + "loss": 12.4379, + "step": 3279 + }, + { + "epoch": 0.1367179358926264, + "grad_norm": 239.0, + "learning_rate": 9.704360399979451e-05, + "loss": 12.1253, + "step": 3280 + }, + { + "epoch": 0.13675961819015464, + "grad_norm": 450.0, + "learning_rate": 9.70413169216807e-05, + "loss": 16.1253, + "step": 3281 + }, + { + "epoch": 0.13680130048768288, + "grad_norm": 1040.0, + "learning_rate": 9.703902898623474e-05, + "loss": 27.2502, + "step": 3282 + }, + { + "epoch": 0.13684298278521112, + "grad_norm": 332.0, + "learning_rate": 9.703674019349829e-05, + "loss": 12.7505, + "step": 3283 + }, + { + "epoch": 0.13688466508273936, + "grad_norm": 246.0, + "learning_rate": 9.703445054351307e-05, + "loss": 12.4379, + "step": 3284 + }, + { + "epoch": 0.1369263473802676, + "grad_norm": 772.0, + "learning_rate": 9.703216003632081e-05, + "loss": 23.0007, + "step": 3285 + }, + { + "epoch": 0.13696802967779584, + "grad_norm": 280.0, + "learning_rate": 9.702986867196328e-05, + "loss": 13.4379, + "step": 3286 + }, + { + "epoch": 0.13700971197532408, + "grad_norm": 131.0, + "learning_rate": 9.702757645048219e-05, + "loss": 9.6884, + "step": 3287 + }, + { + "epoch": 0.13705139427285232, + "grad_norm": 552.0, + "learning_rate": 9.702528337191937e-05, + "loss": 17.6255, + "step": 3288 + }, + { + "epoch": 0.13709307657038056, + "grad_norm": 448.0, + "learning_rate": 9.702298943631656e-05, + "loss": 14.0004, + "step": 3289 + }, + { + "epoch": 0.1371347588679088, + "grad_norm": 446.0, + "learning_rate": 9.702069464371561e-05, + "loss": 16.5006, + "step": 3290 + }, + { + "epoch": 0.13717644116543704, + "grad_norm": 1136.0, + "learning_rate": 9.701839899415834e-05, + "loss": 25.7551, + "step": 3291 + }, + { + "epoch": 0.13721812346296527, + "grad_norm": 434.0, + "learning_rate": 9.701610248768656e-05, + "loss": 18.0004, + "step": 3292 + }, + { + "epoch": 0.13725980576049351, + "grad_norm": 352.0, + "learning_rate": 9.701380512434213e-05, + "loss": 14.1255, + "step": 3293 + }, + { + "epoch": 0.13730148805802175, + "grad_norm": 648.0, + "learning_rate": 9.701150690416694e-05, + "loss": 20.7504, + "step": 3294 + }, + { + "epoch": 0.13734317035555, + "grad_norm": 239.0, + "learning_rate": 9.700920782720285e-05, + "loss": 12.0005, + "step": 3295 + }, + { + "epoch": 0.13738485265307823, + "grad_norm": 274.0, + "learning_rate": 9.700690789349178e-05, + "loss": 13.5627, + "step": 3296 + }, + { + "epoch": 0.13742653495060647, + "grad_norm": 220.0, + "learning_rate": 9.700460710307565e-05, + "loss": 12.313, + "step": 3297 + }, + { + "epoch": 0.1374682172481347, + "grad_norm": 390.0, + "learning_rate": 9.700230545599638e-05, + "loss": 15.563, + "step": 3298 + }, + { + "epoch": 0.13750989954566295, + "grad_norm": 408.0, + "learning_rate": 9.70000029522959e-05, + "loss": 14.6881, + "step": 3299 + }, + { + "epoch": 0.1375515818431912, + "grad_norm": 166.0, + "learning_rate": 9.699769959201623e-05, + "loss": 8.8133, + "step": 3300 + }, + { + "epoch": 0.13759326414071943, + "grad_norm": 338.0, + "learning_rate": 9.699539537519928e-05, + "loss": 13.8759, + "step": 3301 + }, + { + "epoch": 0.13763494643824767, + "grad_norm": 506.0, + "learning_rate": 9.69930903018871e-05, + "loss": 17.0004, + "step": 3302 + }, + { + "epoch": 0.1376766287357759, + "grad_norm": 704.0, + "learning_rate": 9.699078437212166e-05, + "loss": 17.0049, + "step": 3303 + }, + { + "epoch": 0.13771831103330415, + "grad_norm": 243.0, + "learning_rate": 9.698847758594502e-05, + "loss": 10.8752, + "step": 3304 + }, + { + "epoch": 0.1377599933308324, + "grad_norm": 40.75, + "learning_rate": 9.698616994339919e-05, + "loss": 7.0002, + "step": 3305 + }, + { + "epoch": 0.13780167562836063, + "grad_norm": 418.0, + "learning_rate": 9.698386144452624e-05, + "loss": 15.8128, + "step": 3306 + }, + { + "epoch": 0.13784335792588887, + "grad_norm": 784.0, + "learning_rate": 9.698155208936825e-05, + "loss": 22.0003, + "step": 3307 + }, + { + "epoch": 0.1378850402234171, + "grad_norm": 162.0, + "learning_rate": 9.697924187796732e-05, + "loss": 9.7504, + "step": 3308 + }, + { + "epoch": 0.13792672252094534, + "grad_norm": 592.0, + "learning_rate": 9.697693081036551e-05, + "loss": 18.8756, + "step": 3309 + }, + { + "epoch": 0.13796840481847358, + "grad_norm": 250.0, + "learning_rate": 9.697461888660498e-05, + "loss": 12.4381, + "step": 3310 + }, + { + "epoch": 0.13801008711600182, + "grad_norm": 340.0, + "learning_rate": 9.697230610672785e-05, + "loss": 15.3127, + "step": 3311 + }, + { + "epoch": 0.13805176941353006, + "grad_norm": 424.0, + "learning_rate": 9.696999247077627e-05, + "loss": 15.1878, + "step": 3312 + }, + { + "epoch": 0.1380934517110583, + "grad_norm": 340.0, + "learning_rate": 9.69676779787924e-05, + "loss": 11.7501, + "step": 3313 + }, + { + "epoch": 0.13813513400858654, + "grad_norm": 171.0, + "learning_rate": 9.696536263081843e-05, + "loss": 11.3754, + "step": 3314 + }, + { + "epoch": 0.13817681630611478, + "grad_norm": 444.0, + "learning_rate": 9.696304642689657e-05, + "loss": 16.0012, + "step": 3315 + }, + { + "epoch": 0.13821849860364302, + "grad_norm": 472.0, + "learning_rate": 9.696072936706901e-05, + "loss": 15.6893, + "step": 3316 + }, + { + "epoch": 0.13826018090117126, + "grad_norm": 536.0, + "learning_rate": 9.6958411451378e-05, + "loss": 16.2558, + "step": 3317 + }, + { + "epoch": 0.13830186319869953, + "grad_norm": 716.0, + "learning_rate": 9.695609267986576e-05, + "loss": 20.5004, + "step": 3318 + }, + { + "epoch": 0.13834354549622777, + "grad_norm": 116.5, + "learning_rate": 9.695377305257457e-05, + "loss": 9.8757, + "step": 3319 + }, + { + "epoch": 0.138385227793756, + "grad_norm": 516.0, + "learning_rate": 9.69514525695467e-05, + "loss": 18.253, + "step": 3320 + }, + { + "epoch": 0.13842691009128424, + "grad_norm": 1448.0, + "learning_rate": 9.694913123082443e-05, + "loss": 30.2545, + "step": 3321 + }, + { + "epoch": 0.13846859238881248, + "grad_norm": 294.0, + "learning_rate": 9.694680903645009e-05, + "loss": 12.6253, + "step": 3322 + }, + { + "epoch": 0.13851027468634072, + "grad_norm": 199.0, + "learning_rate": 9.694448598646597e-05, + "loss": 9.9378, + "step": 3323 + }, + { + "epoch": 0.13855195698386896, + "grad_norm": 228.0, + "learning_rate": 9.694216208091443e-05, + "loss": 11.3752, + "step": 3324 + }, + { + "epoch": 0.1385936392813972, + "grad_norm": 186.0, + "learning_rate": 9.693983731983782e-05, + "loss": 11.0039, + "step": 3325 + }, + { + "epoch": 0.13863532157892544, + "grad_norm": 418.0, + "learning_rate": 9.693751170327849e-05, + "loss": 15.2502, + "step": 3326 + }, + { + "epoch": 0.13867700387645368, + "grad_norm": 185.0, + "learning_rate": 9.693518523127888e-05, + "loss": 11.6256, + "step": 3327 + }, + { + "epoch": 0.13871868617398192, + "grad_norm": 241.0, + "learning_rate": 9.693285790388133e-05, + "loss": 11.0632, + "step": 3328 + }, + { + "epoch": 0.13876036847151016, + "grad_norm": 416.0, + "learning_rate": 9.693052972112829e-05, + "loss": 14.3754, + "step": 3329 + }, + { + "epoch": 0.1388020507690384, + "grad_norm": 228.0, + "learning_rate": 9.692820068306216e-05, + "loss": 11.8128, + "step": 3330 + }, + { + "epoch": 0.13884373306656664, + "grad_norm": 404.0, + "learning_rate": 9.692587078972541e-05, + "loss": 15.3128, + "step": 3331 + }, + { + "epoch": 0.13888541536409488, + "grad_norm": 390.0, + "learning_rate": 9.69235400411605e-05, + "loss": 12.7502, + "step": 3332 + }, + { + "epoch": 0.13892709766162312, + "grad_norm": 169.0, + "learning_rate": 9.692120843740993e-05, + "loss": 9.4382, + "step": 3333 + }, + { + "epoch": 0.13896877995915136, + "grad_norm": 440.0, + "learning_rate": 9.691887597851616e-05, + "loss": 16.0004, + "step": 3334 + }, + { + "epoch": 0.1390104622566796, + "grad_norm": 1512.0, + "learning_rate": 9.691654266452171e-05, + "loss": 34.5015, + "step": 3335 + }, + { + "epoch": 0.13905214455420783, + "grad_norm": 268.0, + "learning_rate": 9.691420849546909e-05, + "loss": 12.5639, + "step": 3336 + }, + { + "epoch": 0.13909382685173607, + "grad_norm": 191.0, + "learning_rate": 9.691187347140087e-05, + "loss": 9.3752, + "step": 3337 + }, + { + "epoch": 0.1391355091492643, + "grad_norm": 174.0, + "learning_rate": 9.690953759235959e-05, + "loss": 9.3759, + "step": 3338 + }, + { + "epoch": 0.13917719144679255, + "grad_norm": 251.0, + "learning_rate": 9.690720085838781e-05, + "loss": 12.0003, + "step": 3339 + }, + { + "epoch": 0.1392188737443208, + "grad_norm": 628.0, + "learning_rate": 9.690486326952815e-05, + "loss": 18.7509, + "step": 3340 + }, + { + "epoch": 0.13926055604184903, + "grad_norm": 184.0, + "learning_rate": 9.690252482582318e-05, + "loss": 8.9377, + "step": 3341 + }, + { + "epoch": 0.13930223833937727, + "grad_norm": 344.0, + "learning_rate": 9.690018552731554e-05, + "loss": 13.5632, + "step": 3342 + }, + { + "epoch": 0.1393439206369055, + "grad_norm": 93.5, + "learning_rate": 9.689784537404784e-05, + "loss": 5.3442, + "step": 3343 + }, + { + "epoch": 0.13938560293443375, + "grad_norm": 350.0, + "learning_rate": 9.689550436606276e-05, + "loss": 15.063, + "step": 3344 + }, + { + "epoch": 0.139427285231962, + "grad_norm": 182.0, + "learning_rate": 9.689316250340294e-05, + "loss": 10.1254, + "step": 3345 + }, + { + "epoch": 0.13946896752949023, + "grad_norm": 680.0, + "learning_rate": 9.689081978611108e-05, + "loss": 18.1317, + "step": 3346 + }, + { + "epoch": 0.13951064982701847, + "grad_norm": 255.0, + "learning_rate": 9.688847621422986e-05, + "loss": 12.4381, + "step": 3347 + }, + { + "epoch": 0.1395523321245467, + "grad_norm": 462.0, + "learning_rate": 9.6886131787802e-05, + "loss": 16.1254, + "step": 3348 + }, + { + "epoch": 0.13959401442207495, + "grad_norm": 684.0, + "learning_rate": 9.688378650687024e-05, + "loss": 20.0004, + "step": 3349 + }, + { + "epoch": 0.1396356967196032, + "grad_norm": 232.0, + "learning_rate": 9.688144037147729e-05, + "loss": 12.3129, + "step": 3350 + }, + { + "epoch": 0.13967737901713143, + "grad_norm": 306.0, + "learning_rate": 9.687909338166593e-05, + "loss": 14.0004, + "step": 3351 + }, + { + "epoch": 0.13971906131465966, + "grad_norm": 468.0, + "learning_rate": 9.687674553747895e-05, + "loss": 16.3763, + "step": 3352 + }, + { + "epoch": 0.1397607436121879, + "grad_norm": 444.0, + "learning_rate": 9.68743968389591e-05, + "loss": 14.4381, + "step": 3353 + }, + { + "epoch": 0.13980242590971614, + "grad_norm": 466.0, + "learning_rate": 9.68720472861492e-05, + "loss": 16.3752, + "step": 3354 + }, + { + "epoch": 0.13984410820724438, + "grad_norm": 306.0, + "learning_rate": 9.68696968790921e-05, + "loss": 13.5006, + "step": 3355 + }, + { + "epoch": 0.13988579050477262, + "grad_norm": 1112.0, + "learning_rate": 9.68673456178306e-05, + "loss": 24.8806, + "step": 3356 + }, + { + "epoch": 0.13992747280230086, + "grad_norm": 138.0, + "learning_rate": 9.686499350240757e-05, + "loss": 10.5633, + "step": 3357 + }, + { + "epoch": 0.1399691550998291, + "grad_norm": 336.0, + "learning_rate": 9.686264053286586e-05, + "loss": 14.1251, + "step": 3358 + }, + { + "epoch": 0.14001083739735734, + "grad_norm": 302.0, + "learning_rate": 9.686028670924839e-05, + "loss": 13.2504, + "step": 3359 + }, + { + "epoch": 0.14005251969488558, + "grad_norm": 260.0, + "learning_rate": 9.685793203159803e-05, + "loss": 12.8128, + "step": 3360 + }, + { + "epoch": 0.14009420199241382, + "grad_norm": 318.0, + "learning_rate": 9.68555764999577e-05, + "loss": 13.3127, + "step": 3361 + }, + { + "epoch": 0.14013588428994206, + "grad_norm": 158.0, + "learning_rate": 9.68532201143703e-05, + "loss": 9.8752, + "step": 3362 + }, + { + "epoch": 0.1401775665874703, + "grad_norm": 712.0, + "learning_rate": 9.685086287487883e-05, + "loss": 21.5003, + "step": 3363 + }, + { + "epoch": 0.14021924888499854, + "grad_norm": 236.0, + "learning_rate": 9.684850478152622e-05, + "loss": 12.7502, + "step": 3364 + }, + { + "epoch": 0.14026093118252678, + "grad_norm": 252.0, + "learning_rate": 9.684614583435546e-05, + "loss": 11.4378, + "step": 3365 + }, + { + "epoch": 0.14030261348005502, + "grad_norm": 116.0, + "learning_rate": 9.684378603340952e-05, + "loss": 8.688, + "step": 3366 + }, + { + "epoch": 0.14034429577758326, + "grad_norm": 692.0, + "learning_rate": 9.684142537873142e-05, + "loss": 21.3755, + "step": 3367 + }, + { + "epoch": 0.1403859780751115, + "grad_norm": 120.5, + "learning_rate": 9.68390638703642e-05, + "loss": 9.1257, + "step": 3368 + }, + { + "epoch": 0.14042766037263973, + "grad_norm": 280.0, + "learning_rate": 9.683670150835087e-05, + "loss": 13.7503, + "step": 3369 + }, + { + "epoch": 0.14046934267016797, + "grad_norm": 214.0, + "learning_rate": 9.68343382927345e-05, + "loss": 12.1878, + "step": 3370 + }, + { + "epoch": 0.1405110249676962, + "grad_norm": 468.0, + "learning_rate": 9.683197422355816e-05, + "loss": 16.6254, + "step": 3371 + }, + { + "epoch": 0.14055270726522445, + "grad_norm": 368.0, + "learning_rate": 9.682960930086493e-05, + "loss": 11.5031, + "step": 3372 + }, + { + "epoch": 0.1405943895627527, + "grad_norm": 220.0, + "learning_rate": 9.682724352469792e-05, + "loss": 9.1265, + "step": 3373 + }, + { + "epoch": 0.14063607186028093, + "grad_norm": 234.0, + "learning_rate": 9.682487689510023e-05, + "loss": 11.3755, + "step": 3374 + }, + { + "epoch": 0.14067775415780917, + "grad_norm": 127.0, + "learning_rate": 9.6822509412115e-05, + "loss": 9.6252, + "step": 3375 + }, + { + "epoch": 0.1407194364553374, + "grad_norm": 478.0, + "learning_rate": 9.682014107578538e-05, + "loss": 15.1878, + "step": 3376 + }, + { + "epoch": 0.14076111875286565, + "grad_norm": 207.0, + "learning_rate": 9.681777188615454e-05, + "loss": 11.0004, + "step": 3377 + }, + { + "epoch": 0.1408028010503939, + "grad_norm": 176.0, + "learning_rate": 9.681540184326565e-05, + "loss": 10.8753, + "step": 3378 + }, + { + "epoch": 0.14084448334792213, + "grad_norm": 402.0, + "learning_rate": 9.68130309471619e-05, + "loss": 14.8753, + "step": 3379 + }, + { + "epoch": 0.14088616564545037, + "grad_norm": 608.0, + "learning_rate": 9.681065919788652e-05, + "loss": 19.0011, + "step": 3380 + }, + { + "epoch": 0.1409278479429786, + "grad_norm": 241.0, + "learning_rate": 9.680828659548271e-05, + "loss": 11.3131, + "step": 3381 + }, + { + "epoch": 0.14096953024050685, + "grad_norm": 247.0, + "learning_rate": 9.680591313999372e-05, + "loss": 12.5016, + "step": 3382 + }, + { + "epoch": 0.14101121253803509, + "grad_norm": 524.0, + "learning_rate": 9.680353883146281e-05, + "loss": 16.8759, + "step": 3383 + }, + { + "epoch": 0.14105289483556332, + "grad_norm": 516.0, + "learning_rate": 9.680116366993323e-05, + "loss": 19.0005, + "step": 3384 + }, + { + "epoch": 0.14109457713309156, + "grad_norm": 198.0, + "learning_rate": 9.679878765544831e-05, + "loss": 10.6879, + "step": 3385 + }, + { + "epoch": 0.1411362594306198, + "grad_norm": 1608.0, + "learning_rate": 9.679641078805133e-05, + "loss": 34.0063, + "step": 3386 + }, + { + "epoch": 0.14117794172814804, + "grad_norm": 408.0, + "learning_rate": 9.67940330677856e-05, + "loss": 15.2508, + "step": 3387 + }, + { + "epoch": 0.14121962402567628, + "grad_norm": 480.0, + "learning_rate": 9.679165449469449e-05, + "loss": 16.8754, + "step": 3388 + }, + { + "epoch": 0.14126130632320452, + "grad_norm": 226.0, + "learning_rate": 9.678927506882129e-05, + "loss": 11.6271, + "step": 3389 + }, + { + "epoch": 0.14130298862073276, + "grad_norm": 390.0, + "learning_rate": 9.678689479020942e-05, + "loss": 14.3133, + "step": 3390 + }, + { + "epoch": 0.14134467091826103, + "grad_norm": 532.0, + "learning_rate": 9.678451365890222e-05, + "loss": 18.0003, + "step": 3391 + }, + { + "epoch": 0.14138635321578927, + "grad_norm": 193.0, + "learning_rate": 9.678213167494312e-05, + "loss": 10.9392, + "step": 3392 + }, + { + "epoch": 0.1414280355133175, + "grad_norm": 160.0, + "learning_rate": 9.677974883837551e-05, + "loss": 10.1253, + "step": 3393 + }, + { + "epoch": 0.14146971781084575, + "grad_norm": 532.0, + "learning_rate": 9.677736514924283e-05, + "loss": 18.1264, + "step": 3394 + }, + { + "epoch": 0.14151140010837399, + "grad_norm": 147.0, + "learning_rate": 9.677498060758852e-05, + "loss": 11.438, + "step": 3395 + }, + { + "epoch": 0.14155308240590223, + "grad_norm": 462.0, + "learning_rate": 9.677259521345602e-05, + "loss": 16.1258, + "step": 3396 + }, + { + "epoch": 0.14159476470343046, + "grad_norm": 237.0, + "learning_rate": 9.677020896688885e-05, + "loss": 11.9379, + "step": 3397 + }, + { + "epoch": 0.1416364470009587, + "grad_norm": 348.0, + "learning_rate": 9.676782186793043e-05, + "loss": 13.001, + "step": 3398 + }, + { + "epoch": 0.14167812929848694, + "grad_norm": 136.0, + "learning_rate": 9.676543391662434e-05, + "loss": 10.9381, + "step": 3399 + }, + { + "epoch": 0.14171981159601518, + "grad_norm": 792.0, + "learning_rate": 9.676304511301404e-05, + "loss": 23.5004, + "step": 3400 + }, + { + "epoch": 0.14176149389354342, + "grad_norm": 342.0, + "learning_rate": 9.67606554571431e-05, + "loss": 12.7513, + "step": 3401 + }, + { + "epoch": 0.14180317619107166, + "grad_norm": 1144.0, + "learning_rate": 9.675826494905507e-05, + "loss": 27.6256, + "step": 3402 + }, + { + "epoch": 0.1418448584885999, + "grad_norm": 620.0, + "learning_rate": 9.67558735887935e-05, + "loss": 19.0001, + "step": 3403 + }, + { + "epoch": 0.14188654078612814, + "grad_norm": 127.0, + "learning_rate": 9.675348137640198e-05, + "loss": 10.1254, + "step": 3404 + }, + { + "epoch": 0.14192822308365638, + "grad_norm": 992.0, + "learning_rate": 9.675108831192415e-05, + "loss": 27.3754, + "step": 3405 + }, + { + "epoch": 0.14196990538118462, + "grad_norm": 348.0, + "learning_rate": 9.674869439540355e-05, + "loss": 13.5628, + "step": 3406 + }, + { + "epoch": 0.14201158767871286, + "grad_norm": 214.0, + "learning_rate": 9.674629962688384e-05, + "loss": 12.0637, + "step": 3407 + }, + { + "epoch": 0.1420532699762411, + "grad_norm": 688.0, + "learning_rate": 9.674390400640868e-05, + "loss": 21.1254, + "step": 3408 + }, + { + "epoch": 0.14209495227376934, + "grad_norm": 588.0, + "learning_rate": 9.674150753402173e-05, + "loss": 18.3766, + "step": 3409 + }, + { + "epoch": 0.14213663457129758, + "grad_norm": 676.0, + "learning_rate": 9.673911020976664e-05, + "loss": 20.5003, + "step": 3410 + }, + { + "epoch": 0.14217831686882582, + "grad_norm": 828.0, + "learning_rate": 9.673671203368713e-05, + "loss": 21.6259, + "step": 3411 + }, + { + "epoch": 0.14221999916635406, + "grad_norm": 442.0, + "learning_rate": 9.673431300582688e-05, + "loss": 15.6254, + "step": 3412 + }, + { + "epoch": 0.1422616814638823, + "grad_norm": 444.0, + "learning_rate": 9.673191312622964e-05, + "loss": 14.6884, + "step": 3413 + }, + { + "epoch": 0.14230336376141053, + "grad_norm": 278.0, + "learning_rate": 9.672951239493913e-05, + "loss": 13.5634, + "step": 3414 + }, + { + "epoch": 0.14234504605893877, + "grad_norm": 668.0, + "learning_rate": 9.67271108119991e-05, + "loss": 17.6288, + "step": 3415 + }, + { + "epoch": 0.142386728356467, + "grad_norm": 218.0, + "learning_rate": 9.672470837745334e-05, + "loss": 10.2505, + "step": 3416 + }, + { + "epoch": 0.14242841065399525, + "grad_norm": 320.0, + "learning_rate": 9.67223050913456e-05, + "loss": 13.5631, + "step": 3417 + }, + { + "epoch": 0.1424700929515235, + "grad_norm": 334.0, + "learning_rate": 9.671990095371972e-05, + "loss": 13.063, + "step": 3418 + }, + { + "epoch": 0.14251177524905173, + "grad_norm": 740.0, + "learning_rate": 9.67174959646195e-05, + "loss": 22.1256, + "step": 3419 + }, + { + "epoch": 0.14255345754657997, + "grad_norm": 394.0, + "learning_rate": 9.671509012408877e-05, + "loss": 15.8753, + "step": 3420 + }, + { + "epoch": 0.1425951398441082, + "grad_norm": 334.0, + "learning_rate": 9.671268343217137e-05, + "loss": 14.1253, + "step": 3421 + }, + { + "epoch": 0.14263682214163645, + "grad_norm": 334.0, + "learning_rate": 9.671027588891118e-05, + "loss": 12.0005, + "step": 3422 + }, + { + "epoch": 0.1426785044391647, + "grad_norm": 472.0, + "learning_rate": 9.670786749435204e-05, + "loss": 14.1883, + "step": 3423 + }, + { + "epoch": 0.14272018673669293, + "grad_norm": 560.0, + "learning_rate": 9.670545824853789e-05, + "loss": 18.7502, + "step": 3424 + }, + { + "epoch": 0.14276186903422117, + "grad_norm": 422.0, + "learning_rate": 9.670304815151262e-05, + "loss": 15.8127, + "step": 3425 + }, + { + "epoch": 0.1428035513317494, + "grad_norm": 193.0, + "learning_rate": 9.670063720332012e-05, + "loss": 11.0005, + "step": 3426 + }, + { + "epoch": 0.14284523362927765, + "grad_norm": 372.0, + "learning_rate": 9.669822540400438e-05, + "loss": 14.1879, + "step": 3427 + }, + { + "epoch": 0.14288691592680589, + "grad_norm": 448.0, + "learning_rate": 9.669581275360935e-05, + "loss": 15.1258, + "step": 3428 + }, + { + "epoch": 0.14292859822433412, + "grad_norm": 444.0, + "learning_rate": 9.669339925217897e-05, + "loss": 15.5006, + "step": 3429 + }, + { + "epoch": 0.14297028052186236, + "grad_norm": 338.0, + "learning_rate": 9.669098489975725e-05, + "loss": 12.7502, + "step": 3430 + }, + { + "epoch": 0.1430119628193906, + "grad_norm": 171.0, + "learning_rate": 9.668856969638817e-05, + "loss": 9.5004, + "step": 3431 + }, + { + "epoch": 0.14305364511691884, + "grad_norm": 404.0, + "learning_rate": 9.66861536421158e-05, + "loss": 12.3147, + "step": 3432 + }, + { + "epoch": 0.14309532741444708, + "grad_norm": 288.0, + "learning_rate": 9.66837367369841e-05, + "loss": 12.1255, + "step": 3433 + }, + { + "epoch": 0.14313700971197532, + "grad_norm": 492.0, + "learning_rate": 9.668131898103716e-05, + "loss": 16.7505, + "step": 3434 + }, + { + "epoch": 0.14317869200950356, + "grad_norm": 246.0, + "learning_rate": 9.667890037431906e-05, + "loss": 12.1255, + "step": 3435 + }, + { + "epoch": 0.1432203743070318, + "grad_norm": 600.0, + "learning_rate": 9.667648091687384e-05, + "loss": 19.0003, + "step": 3436 + }, + { + "epoch": 0.14326205660456004, + "grad_norm": 366.0, + "learning_rate": 9.667406060874559e-05, + "loss": 13.0636, + "step": 3437 + }, + { + "epoch": 0.14330373890208828, + "grad_norm": 406.0, + "learning_rate": 9.667163944997848e-05, + "loss": 14.3135, + "step": 3438 + }, + { + "epoch": 0.14334542119961652, + "grad_norm": 122.0, + "learning_rate": 9.666921744061658e-05, + "loss": 8.1253, + "step": 3439 + }, + { + "epoch": 0.14338710349714476, + "grad_norm": 110.5, + "learning_rate": 9.666679458070405e-05, + "loss": 8.9378, + "step": 3440 + }, + { + "epoch": 0.143428785794673, + "grad_norm": 348.0, + "learning_rate": 9.666437087028505e-05, + "loss": 15.063, + "step": 3441 + }, + { + "epoch": 0.14347046809220124, + "grad_norm": 494.0, + "learning_rate": 9.666194630940375e-05, + "loss": 16.3753, + "step": 3442 + }, + { + "epoch": 0.14351215038972948, + "grad_norm": 516.0, + "learning_rate": 9.665952089810432e-05, + "loss": 16.1266, + "step": 3443 + }, + { + "epoch": 0.14355383268725772, + "grad_norm": 360.0, + "learning_rate": 9.6657094636431e-05, + "loss": 14.5628, + "step": 3444 + }, + { + "epoch": 0.14359551498478595, + "grad_norm": 342.0, + "learning_rate": 9.665466752442797e-05, + "loss": 11.7505, + "step": 3445 + }, + { + "epoch": 0.1436371972823142, + "grad_norm": 472.0, + "learning_rate": 9.66522395621395e-05, + "loss": 17.0003, + "step": 3446 + }, + { + "epoch": 0.14367887957984243, + "grad_norm": 290.0, + "learning_rate": 9.664981074960981e-05, + "loss": 13.8765, + "step": 3447 + }, + { + "epoch": 0.14372056187737067, + "grad_norm": 205.0, + "learning_rate": 9.66473810868832e-05, + "loss": 11.6255, + "step": 3448 + }, + { + "epoch": 0.1437622441748989, + "grad_norm": 165.0, + "learning_rate": 9.66449505740039e-05, + "loss": 10.7506, + "step": 3449 + }, + { + "epoch": 0.14380392647242715, + "grad_norm": 500.0, + "learning_rate": 9.664251921101625e-05, + "loss": 17.1255, + "step": 3450 + }, + { + "epoch": 0.1438456087699554, + "grad_norm": 976.0, + "learning_rate": 9.664008699796455e-05, + "loss": 26.0003, + "step": 3451 + }, + { + "epoch": 0.14388729106748363, + "grad_norm": 512.0, + "learning_rate": 9.663765393489311e-05, + "loss": 16.5003, + "step": 3452 + }, + { + "epoch": 0.14392897336501187, + "grad_norm": 424.0, + "learning_rate": 9.66352200218463e-05, + "loss": 16.2506, + "step": 3453 + }, + { + "epoch": 0.1439706556625401, + "grad_norm": 600.0, + "learning_rate": 9.663278525886845e-05, + "loss": 16.3762, + "step": 3454 + }, + { + "epoch": 0.14401233796006835, + "grad_norm": 358.0, + "learning_rate": 9.663034964600396e-05, + "loss": 13.5004, + "step": 3455 + }, + { + "epoch": 0.1440540202575966, + "grad_norm": 320.0, + "learning_rate": 9.66279131832972e-05, + "loss": 14.1879, + "step": 3456 + }, + { + "epoch": 0.14409570255512483, + "grad_norm": 213.0, + "learning_rate": 9.66254758707926e-05, + "loss": 11.5628, + "step": 3457 + }, + { + "epoch": 0.14413738485265307, + "grad_norm": 1168.0, + "learning_rate": 9.662303770853456e-05, + "loss": 27.0004, + "step": 3458 + }, + { + "epoch": 0.1441790671501813, + "grad_norm": 191.0, + "learning_rate": 9.66205986965675e-05, + "loss": 11.9382, + "step": 3459 + }, + { + "epoch": 0.14422074944770955, + "grad_norm": 374.0, + "learning_rate": 9.66181588349359e-05, + "loss": 12.8754, + "step": 3460 + }, + { + "epoch": 0.14426243174523778, + "grad_norm": 476.0, + "learning_rate": 9.661571812368421e-05, + "loss": 15.7505, + "step": 3461 + }, + { + "epoch": 0.14430411404276602, + "grad_norm": 400.0, + "learning_rate": 9.661327656285694e-05, + "loss": 16.126, + "step": 3462 + }, + { + "epoch": 0.14434579634029426, + "grad_norm": 406.0, + "learning_rate": 9.661083415249856e-05, + "loss": 17.0004, + "step": 3463 + }, + { + "epoch": 0.14438747863782253, + "grad_norm": 448.0, + "learning_rate": 9.66083908926536e-05, + "loss": 17.2507, + "step": 3464 + }, + { + "epoch": 0.14442916093535077, + "grad_norm": 255.0, + "learning_rate": 9.660594678336654e-05, + "loss": 12.4378, + "step": 3465 + }, + { + "epoch": 0.144470843232879, + "grad_norm": 636.0, + "learning_rate": 9.660350182468198e-05, + "loss": 21.6254, + "step": 3466 + }, + { + "epoch": 0.14451252553040725, + "grad_norm": 154.0, + "learning_rate": 9.660105601664448e-05, + "loss": 8.6879, + "step": 3467 + }, + { + "epoch": 0.1445542078279355, + "grad_norm": 354.0, + "learning_rate": 9.659860935929859e-05, + "loss": 13.8756, + "step": 3468 + }, + { + "epoch": 0.14459589012546373, + "grad_norm": 250.0, + "learning_rate": 9.659616185268889e-05, + "loss": 12.2521, + "step": 3469 + }, + { + "epoch": 0.14463757242299197, + "grad_norm": 240.0, + "learning_rate": 9.659371349686001e-05, + "loss": 12.2523, + "step": 3470 + }, + { + "epoch": 0.1446792547205202, + "grad_norm": 532.0, + "learning_rate": 9.659126429185659e-05, + "loss": 18.1252, + "step": 3471 + }, + { + "epoch": 0.14472093701804845, + "grad_norm": 466.0, + "learning_rate": 9.658881423772322e-05, + "loss": 15.9387, + "step": 3472 + }, + { + "epoch": 0.14476261931557668, + "grad_norm": 334.0, + "learning_rate": 9.658636333450457e-05, + "loss": 13.7506, + "step": 3473 + }, + { + "epoch": 0.14480430161310492, + "grad_norm": 294.0, + "learning_rate": 9.658391158224532e-05, + "loss": 13.2504, + "step": 3474 + }, + { + "epoch": 0.14484598391063316, + "grad_norm": 372.0, + "learning_rate": 9.658145898099015e-05, + "loss": 15.1878, + "step": 3475 + }, + { + "epoch": 0.1448876662081614, + "grad_norm": 704.0, + "learning_rate": 9.657900553078376e-05, + "loss": 18.0053, + "step": 3476 + }, + { + "epoch": 0.14492934850568964, + "grad_norm": 484.0, + "learning_rate": 9.657655123167084e-05, + "loss": 17.7542, + "step": 3477 + }, + { + "epoch": 0.14497103080321788, + "grad_norm": 58.25, + "learning_rate": 9.657409608369616e-05, + "loss": 9.0628, + "step": 3478 + }, + { + "epoch": 0.14501271310074612, + "grad_norm": 354.0, + "learning_rate": 9.657164008690443e-05, + "loss": 14.942, + "step": 3479 + }, + { + "epoch": 0.14505439539827436, + "grad_norm": 217.0, + "learning_rate": 9.656918324134044e-05, + "loss": 13.0631, + "step": 3480 + }, + { + "epoch": 0.1450960776958026, + "grad_norm": 110.5, + "learning_rate": 9.656672554704892e-05, + "loss": 7.8756, + "step": 3481 + }, + { + "epoch": 0.14513775999333084, + "grad_norm": 212.0, + "learning_rate": 9.656426700407473e-05, + "loss": 11.6882, + "step": 3482 + }, + { + "epoch": 0.14517944229085908, + "grad_norm": 127.5, + "learning_rate": 9.656180761246262e-05, + "loss": 10.3129, + "step": 3483 + }, + { + "epoch": 0.14522112458838732, + "grad_norm": 536.0, + "learning_rate": 9.655934737225743e-05, + "loss": 15.1264, + "step": 3484 + }, + { + "epoch": 0.14526280688591556, + "grad_norm": 244.0, + "learning_rate": 9.655688628350401e-05, + "loss": 11.5004, + "step": 3485 + }, + { + "epoch": 0.1453044891834438, + "grad_norm": 338.0, + "learning_rate": 9.655442434624721e-05, + "loss": 14.1877, + "step": 3486 + }, + { + "epoch": 0.14534617148097204, + "grad_norm": 171.0, + "learning_rate": 9.655196156053187e-05, + "loss": 9.3134, + "step": 3487 + }, + { + "epoch": 0.14538785377850028, + "grad_norm": 161.0, + "learning_rate": 9.654949792640294e-05, + "loss": 10.2509, + "step": 3488 + }, + { + "epoch": 0.14542953607602851, + "grad_norm": 240.0, + "learning_rate": 9.654703344390525e-05, + "loss": 11.8129, + "step": 3489 + }, + { + "epoch": 0.14547121837355675, + "grad_norm": 162.0, + "learning_rate": 9.654456811308375e-05, + "loss": 9.5003, + "step": 3490 + }, + { + "epoch": 0.145512900671085, + "grad_norm": 1504.0, + "learning_rate": 9.654210193398335e-05, + "loss": 28.1371, + "step": 3491 + }, + { + "epoch": 0.14555458296861323, + "grad_norm": 336.0, + "learning_rate": 9.653963490664902e-05, + "loss": 13.1259, + "step": 3492 + }, + { + "epoch": 0.14559626526614147, + "grad_norm": 708.0, + "learning_rate": 9.653716703112572e-05, + "loss": 17.8784, + "step": 3493 + }, + { + "epoch": 0.1456379475636697, + "grad_norm": 238.0, + "learning_rate": 9.65346983074584e-05, + "loss": 11.8127, + "step": 3494 + }, + { + "epoch": 0.14567962986119795, + "grad_norm": 302.0, + "learning_rate": 9.653222873569209e-05, + "loss": 12.8755, + "step": 3495 + }, + { + "epoch": 0.1457213121587262, + "grad_norm": 177.0, + "learning_rate": 9.652975831587176e-05, + "loss": 9.0012, + "step": 3496 + }, + { + "epoch": 0.14576299445625443, + "grad_norm": 352.0, + "learning_rate": 9.652728704804249e-05, + "loss": 14.8137, + "step": 3497 + }, + { + "epoch": 0.14580467675378267, + "grad_norm": 220.0, + "learning_rate": 9.652481493224926e-05, + "loss": 10.438, + "step": 3498 + }, + { + "epoch": 0.1458463590513109, + "grad_norm": 356.0, + "learning_rate": 9.652234196853714e-05, + "loss": 14.5629, + "step": 3499 + }, + { + "epoch": 0.14588804134883915, + "grad_norm": 147.0, + "learning_rate": 9.651986815695122e-05, + "loss": 7.5002, + "step": 3500 + }, + { + "epoch": 0.1459297236463674, + "grad_norm": 306.0, + "learning_rate": 9.651739349753657e-05, + "loss": 13.9382, + "step": 3501 + }, + { + "epoch": 0.14597140594389563, + "grad_norm": 498.0, + "learning_rate": 9.651491799033829e-05, + "loss": 17.0002, + "step": 3502 + }, + { + "epoch": 0.14601308824142387, + "grad_norm": 1272.0, + "learning_rate": 9.651244163540152e-05, + "loss": 29.3794, + "step": 3503 + }, + { + "epoch": 0.1460547705389521, + "grad_norm": 306.0, + "learning_rate": 9.650996443277136e-05, + "loss": 13.2504, + "step": 3504 + }, + { + "epoch": 0.14609645283648034, + "grad_norm": 496.0, + "learning_rate": 9.650748638249296e-05, + "loss": 16.5005, + "step": 3505 + }, + { + "epoch": 0.14613813513400858, + "grad_norm": 572.0, + "learning_rate": 9.65050074846115e-05, + "loss": 19.0003, + "step": 3506 + }, + { + "epoch": 0.14617981743153682, + "grad_norm": 187.0, + "learning_rate": 9.650252773917214e-05, + "loss": 10.7506, + "step": 3507 + }, + { + "epoch": 0.14622149972906506, + "grad_norm": 468.0, + "learning_rate": 9.65000471462201e-05, + "loss": 17.377, + "step": 3508 + }, + { + "epoch": 0.1462631820265933, + "grad_norm": 316.0, + "learning_rate": 9.649756570580057e-05, + "loss": 13.6878, + "step": 3509 + }, + { + "epoch": 0.14630486432412154, + "grad_norm": 135.0, + "learning_rate": 9.649508341795877e-05, + "loss": 9.3756, + "step": 3510 + }, + { + "epoch": 0.14634654662164978, + "grad_norm": 173.0, + "learning_rate": 9.649260028273995e-05, + "loss": 11.0629, + "step": 3511 + }, + { + "epoch": 0.14638822891917802, + "grad_norm": 612.0, + "learning_rate": 9.649011630018936e-05, + "loss": 18.2506, + "step": 3512 + }, + { + "epoch": 0.14642991121670626, + "grad_norm": 262.0, + "learning_rate": 9.648763147035229e-05, + "loss": 13.3761, + "step": 3513 + }, + { + "epoch": 0.1464715935142345, + "grad_norm": 83.0, + "learning_rate": 9.648514579327399e-05, + "loss": 9.063, + "step": 3514 + }, + { + "epoch": 0.14651327581176274, + "grad_norm": 492.0, + "learning_rate": 9.648265926899979e-05, + "loss": 18.2503, + "step": 3515 + }, + { + "epoch": 0.14655495810929098, + "grad_norm": 239.0, + "learning_rate": 9.648017189757499e-05, + "loss": 11.9378, + "step": 3516 + }, + { + "epoch": 0.14659664040681922, + "grad_norm": 532.0, + "learning_rate": 9.647768367904494e-05, + "loss": 18.3751, + "step": 3517 + }, + { + "epoch": 0.14663832270434746, + "grad_norm": 1672.0, + "learning_rate": 9.647519461345498e-05, + "loss": 42.2518, + "step": 3518 + }, + { + "epoch": 0.1466800050018757, + "grad_norm": 564.0, + "learning_rate": 9.647270470085046e-05, + "loss": 17.8798, + "step": 3519 + }, + { + "epoch": 0.14672168729940394, + "grad_norm": 466.0, + "learning_rate": 9.647021394127678e-05, + "loss": 16.2508, + "step": 3520 + }, + { + "epoch": 0.14676336959693217, + "grad_norm": 203.0, + "learning_rate": 9.646772233477934e-05, + "loss": 11.6877, + "step": 3521 + }, + { + "epoch": 0.14680505189446041, + "grad_norm": 201.0, + "learning_rate": 9.646522988140352e-05, + "loss": 11.6266, + "step": 3522 + }, + { + "epoch": 0.14684673419198865, + "grad_norm": 364.0, + "learning_rate": 9.646273658119476e-05, + "loss": 14.0003, + "step": 3523 + }, + { + "epoch": 0.1468884164895169, + "grad_norm": 204.0, + "learning_rate": 9.646024243419848e-05, + "loss": 10.8755, + "step": 3524 + }, + { + "epoch": 0.14693009878704513, + "grad_norm": 760.0, + "learning_rate": 9.64577474404602e-05, + "loss": 21.5007, + "step": 3525 + }, + { + "epoch": 0.14697178108457337, + "grad_norm": 266.0, + "learning_rate": 9.645525160002533e-05, + "loss": 12.4379, + "step": 3526 + }, + { + "epoch": 0.1470134633821016, + "grad_norm": 366.0, + "learning_rate": 9.645275491293937e-05, + "loss": 14.5009, + "step": 3527 + }, + { + "epoch": 0.14705514567962985, + "grad_norm": 980.0, + "learning_rate": 9.645025737924782e-05, + "loss": 24.1278, + "step": 3528 + }, + { + "epoch": 0.1470968279771581, + "grad_norm": 608.0, + "learning_rate": 9.644775899899623e-05, + "loss": 19.3754, + "step": 3529 + }, + { + "epoch": 0.14713851027468633, + "grad_norm": 756.0, + "learning_rate": 9.64452597722301e-05, + "loss": 24.5005, + "step": 3530 + }, + { + "epoch": 0.14718019257221457, + "grad_norm": 1504.0, + "learning_rate": 9.644275969899498e-05, + "loss": 31.6298, + "step": 3531 + }, + { + "epoch": 0.1472218748697428, + "grad_norm": 372.0, + "learning_rate": 9.644025877933645e-05, + "loss": 14.5629, + "step": 3532 + }, + { + "epoch": 0.14726355716727105, + "grad_norm": 458.0, + "learning_rate": 9.643775701330007e-05, + "loss": 16.0006, + "step": 3533 + }, + { + "epoch": 0.1473052394647993, + "grad_norm": 270.0, + "learning_rate": 9.643525440093147e-05, + "loss": 13.0003, + "step": 3534 + }, + { + "epoch": 0.14734692176232753, + "grad_norm": 203.0, + "learning_rate": 9.64327509422762e-05, + "loss": 11.0628, + "step": 3535 + }, + { + "epoch": 0.14738860405985577, + "grad_norm": 179.0, + "learning_rate": 9.643024663737994e-05, + "loss": 11.6881, + "step": 3536 + }, + { + "epoch": 0.14743028635738403, + "grad_norm": 390.0, + "learning_rate": 9.642774148628832e-05, + "loss": 16.1252, + "step": 3537 + }, + { + "epoch": 0.14747196865491227, + "grad_norm": 201.0, + "learning_rate": 9.642523548904699e-05, + "loss": 11.6254, + "step": 3538 + }, + { + "epoch": 0.1475136509524405, + "grad_norm": 217.0, + "learning_rate": 9.642272864570162e-05, + "loss": 10.626, + "step": 3539 + }, + { + "epoch": 0.14755533324996875, + "grad_norm": 362.0, + "learning_rate": 9.64202209562979e-05, + "loss": 13.9383, + "step": 3540 + }, + { + "epoch": 0.147597015547497, + "grad_norm": 376.0, + "learning_rate": 9.64177124208815e-05, + "loss": 14.6879, + "step": 3541 + }, + { + "epoch": 0.14763869784502523, + "grad_norm": 432.0, + "learning_rate": 9.641520303949822e-05, + "loss": 16.1261, + "step": 3542 + }, + { + "epoch": 0.14768038014255347, + "grad_norm": 752.0, + "learning_rate": 9.641269281219372e-05, + "loss": 23.2504, + "step": 3543 + }, + { + "epoch": 0.1477220624400817, + "grad_norm": 178.0, + "learning_rate": 9.641018173901378e-05, + "loss": 10.7513, + "step": 3544 + }, + { + "epoch": 0.14776374473760995, + "grad_norm": 430.0, + "learning_rate": 9.640766982000415e-05, + "loss": 14.3127, + "step": 3545 + }, + { + "epoch": 0.1478054270351382, + "grad_norm": 532.0, + "learning_rate": 9.640515705521063e-05, + "loss": 17.0004, + "step": 3546 + }, + { + "epoch": 0.14784710933266643, + "grad_norm": 350.0, + "learning_rate": 9.640264344467898e-05, + "loss": 13.9421, + "step": 3547 + }, + { + "epoch": 0.14788879163019467, + "grad_norm": 71.5, + "learning_rate": 9.640012898845505e-05, + "loss": 7.3762, + "step": 3548 + }, + { + "epoch": 0.1479304739277229, + "grad_norm": 302.0, + "learning_rate": 9.639761368658467e-05, + "loss": 13.7502, + "step": 3549 + }, + { + "epoch": 0.14797215622525114, + "grad_norm": 310.0, + "learning_rate": 9.639509753911363e-05, + "loss": 13.3131, + "step": 3550 + }, + { + "epoch": 0.14801383852277938, + "grad_norm": 160.0, + "learning_rate": 9.639258054608783e-05, + "loss": 10.5628, + "step": 3551 + }, + { + "epoch": 0.14805552082030762, + "grad_norm": 173.0, + "learning_rate": 9.639006270755313e-05, + "loss": 8.8757, + "step": 3552 + }, + { + "epoch": 0.14809720311783586, + "grad_norm": 416.0, + "learning_rate": 9.638754402355542e-05, + "loss": 15.438, + "step": 3553 + }, + { + "epoch": 0.1481388854153641, + "grad_norm": 358.0, + "learning_rate": 9.63850244941406e-05, + "loss": 14.6254, + "step": 3554 + }, + { + "epoch": 0.14818056771289234, + "grad_norm": 532.0, + "learning_rate": 9.638250411935459e-05, + "loss": 17.3776, + "step": 3555 + }, + { + "epoch": 0.14822225001042058, + "grad_norm": 160.0, + "learning_rate": 9.637998289924333e-05, + "loss": 10.4384, + "step": 3556 + }, + { + "epoch": 0.14826393230794882, + "grad_norm": 286.0, + "learning_rate": 9.637746083385276e-05, + "loss": 13.6272, + "step": 3557 + }, + { + "epoch": 0.14830561460547706, + "grad_norm": 78.0, + "learning_rate": 9.637493792322885e-05, + "loss": 6.8442, + "step": 3558 + }, + { + "epoch": 0.1483472969030053, + "grad_norm": 106.5, + "learning_rate": 9.637241416741758e-05, + "loss": 9.1882, + "step": 3559 + }, + { + "epoch": 0.14838897920053354, + "grad_norm": 422.0, + "learning_rate": 9.636988956646495e-05, + "loss": 15.5002, + "step": 3560 + }, + { + "epoch": 0.14843066149806178, + "grad_norm": 328.0, + "learning_rate": 9.636736412041696e-05, + "loss": 14.2518, + "step": 3561 + }, + { + "epoch": 0.14847234379559002, + "grad_norm": 98.0, + "learning_rate": 9.636483782931965e-05, + "loss": 9.0628, + "step": 3562 + }, + { + "epoch": 0.14851402609311826, + "grad_norm": 414.0, + "learning_rate": 9.636231069321905e-05, + "loss": 16.0003, + "step": 3563 + }, + { + "epoch": 0.1485557083906465, + "grad_norm": 378.0, + "learning_rate": 9.635978271216122e-05, + "loss": 14.5004, + "step": 3564 + }, + { + "epoch": 0.14859739068817474, + "grad_norm": 504.0, + "learning_rate": 9.635725388619223e-05, + "loss": 15.9379, + "step": 3565 + }, + { + "epoch": 0.14863907298570297, + "grad_norm": 344.0, + "learning_rate": 9.635472421535818e-05, + "loss": 14.8755, + "step": 3566 + }, + { + "epoch": 0.14868075528323121, + "grad_norm": 302.0, + "learning_rate": 9.635219369970518e-05, + "loss": 13.1879, + "step": 3567 + }, + { + "epoch": 0.14872243758075945, + "grad_norm": 148.0, + "learning_rate": 9.634966233927931e-05, + "loss": 8.9383, + "step": 3568 + }, + { + "epoch": 0.1487641198782877, + "grad_norm": 712.0, + "learning_rate": 9.634713013412675e-05, + "loss": 21.1258, + "step": 3569 + }, + { + "epoch": 0.14880580217581593, + "grad_norm": 238.0, + "learning_rate": 9.634459708429361e-05, + "loss": 7.0004, + "step": 3570 + }, + { + "epoch": 0.14884748447334417, + "grad_norm": 298.0, + "learning_rate": 9.634206318982609e-05, + "loss": 13.9382, + "step": 3571 + }, + { + "epoch": 0.1488891667708724, + "grad_norm": 88.0, + "learning_rate": 9.633952845077034e-05, + "loss": 9.3128, + "step": 3572 + }, + { + "epoch": 0.14893084906840065, + "grad_norm": 214.0, + "learning_rate": 9.633699286717259e-05, + "loss": 11.6254, + "step": 3573 + }, + { + "epoch": 0.1489725313659289, + "grad_norm": 336.0, + "learning_rate": 9.633445643907901e-05, + "loss": 13.4394, + "step": 3574 + }, + { + "epoch": 0.14901421366345713, + "grad_norm": 424.0, + "learning_rate": 9.633191916653585e-05, + "loss": 15.7502, + "step": 3575 + }, + { + "epoch": 0.14905589596098537, + "grad_norm": 242.0, + "learning_rate": 9.632938104958936e-05, + "loss": 11.8756, + "step": 3576 + }, + { + "epoch": 0.1490975782585136, + "grad_norm": 632.0, + "learning_rate": 9.632684208828579e-05, + "loss": 18.6252, + "step": 3577 + }, + { + "epoch": 0.14913926055604185, + "grad_norm": 234.0, + "learning_rate": 9.63243022826714e-05, + "loss": 12.2504, + "step": 3578 + }, + { + "epoch": 0.1491809428535701, + "grad_norm": 326.0, + "learning_rate": 9.63217616327925e-05, + "loss": 12.6881, + "step": 3579 + }, + { + "epoch": 0.14922262515109833, + "grad_norm": 136.0, + "learning_rate": 9.631922013869537e-05, + "loss": 10.6882, + "step": 3580 + }, + { + "epoch": 0.14926430744862657, + "grad_norm": 1624.0, + "learning_rate": 9.631667780042634e-05, + "loss": 33.2548, + "step": 3581 + }, + { + "epoch": 0.1493059897461548, + "grad_norm": 700.0, + "learning_rate": 9.631413461803176e-05, + "loss": 22.751, + "step": 3582 + }, + { + "epoch": 0.14934767204368304, + "grad_norm": 140.0, + "learning_rate": 9.631159059155797e-05, + "loss": 10.0631, + "step": 3583 + }, + { + "epoch": 0.14938935434121128, + "grad_norm": 402.0, + "learning_rate": 9.630904572105131e-05, + "loss": 15.688, + "step": 3584 + }, + { + "epoch": 0.14943103663873952, + "grad_norm": 692.0, + "learning_rate": 9.63065000065582e-05, + "loss": 21.2517, + "step": 3585 + }, + { + "epoch": 0.14947271893626776, + "grad_norm": 372.0, + "learning_rate": 9.630395344812499e-05, + "loss": 13.313, + "step": 3586 + }, + { + "epoch": 0.149514401233796, + "grad_norm": 440.0, + "learning_rate": 9.630140604579814e-05, + "loss": 16.5006, + "step": 3587 + }, + { + "epoch": 0.14955608353132424, + "grad_norm": 744.0, + "learning_rate": 9.629885779962405e-05, + "loss": 21.7502, + "step": 3588 + }, + { + "epoch": 0.14959776582885248, + "grad_norm": 44.25, + "learning_rate": 9.629630870964917e-05, + "loss": 7.4383, + "step": 3589 + }, + { + "epoch": 0.14963944812638072, + "grad_norm": 414.0, + "learning_rate": 9.629375877591992e-05, + "loss": 15.129, + "step": 3590 + }, + { + "epoch": 0.14968113042390896, + "grad_norm": 92.0, + "learning_rate": 9.629120799848286e-05, + "loss": 9.2509, + "step": 3591 + }, + { + "epoch": 0.1497228127214372, + "grad_norm": 104.0, + "learning_rate": 9.62886563773844e-05, + "loss": 9.7504, + "step": 3592 + }, + { + "epoch": 0.14976449501896544, + "grad_norm": 764.0, + "learning_rate": 9.628610391267105e-05, + "loss": 21.7507, + "step": 3593 + }, + { + "epoch": 0.14980617731649368, + "grad_norm": 1600.0, + "learning_rate": 9.628355060438937e-05, + "loss": 32.503, + "step": 3594 + }, + { + "epoch": 0.14984785961402192, + "grad_norm": 624.0, + "learning_rate": 9.628099645258587e-05, + "loss": 18.6259, + "step": 3595 + }, + { + "epoch": 0.14988954191155016, + "grad_norm": 235.0, + "learning_rate": 9.62784414573071e-05, + "loss": 11.063, + "step": 3596 + }, + { + "epoch": 0.1499312242090784, + "grad_norm": 162.0, + "learning_rate": 9.627588561859961e-05, + "loss": 12.3134, + "step": 3597 + }, + { + "epoch": 0.14997290650660663, + "grad_norm": 884.0, + "learning_rate": 9.627332893651002e-05, + "loss": 23.3771, + "step": 3598 + }, + { + "epoch": 0.15001458880413487, + "grad_norm": 458.0, + "learning_rate": 9.62707714110849e-05, + "loss": 15.1252, + "step": 3599 + }, + { + "epoch": 0.1500562711016631, + "grad_norm": 448.0, + "learning_rate": 9.626821304237086e-05, + "loss": 13.6877, + "step": 3600 + }, + { + "epoch": 0.15009795339919135, + "grad_norm": 1248.0, + "learning_rate": 9.626565383041452e-05, + "loss": 28.6299, + "step": 3601 + }, + { + "epoch": 0.1501396356967196, + "grad_norm": 370.0, + "learning_rate": 9.626309377526254e-05, + "loss": 14.3752, + "step": 3602 + }, + { + "epoch": 0.15018131799424783, + "grad_norm": 221.0, + "learning_rate": 9.626053287696157e-05, + "loss": 11.6255, + "step": 3603 + }, + { + "epoch": 0.15022300029177607, + "grad_norm": 672.0, + "learning_rate": 9.625797113555828e-05, + "loss": 18.5008, + "step": 3604 + }, + { + "epoch": 0.1502646825893043, + "grad_norm": 478.0, + "learning_rate": 9.625540855109936e-05, + "loss": 16.127, + "step": 3605 + }, + { + "epoch": 0.15030636488683255, + "grad_norm": 249.0, + "learning_rate": 9.62528451236315e-05, + "loss": 11.9377, + "step": 3606 + }, + { + "epoch": 0.1503480471843608, + "grad_norm": 478.0, + "learning_rate": 9.625028085320145e-05, + "loss": 16.6253, + "step": 3607 + }, + { + "epoch": 0.15038972948188903, + "grad_norm": 239.0, + "learning_rate": 9.624771573985592e-05, + "loss": 11.6877, + "step": 3608 + }, + { + "epoch": 0.15043141177941727, + "grad_norm": 164.0, + "learning_rate": 9.624514978364165e-05, + "loss": 9.3132, + "step": 3609 + }, + { + "epoch": 0.15047309407694553, + "grad_norm": 584.0, + "learning_rate": 9.624258298460545e-05, + "loss": 19.6256, + "step": 3610 + }, + { + "epoch": 0.15051477637447377, + "grad_norm": 163.0, + "learning_rate": 9.624001534279405e-05, + "loss": 8.6889, + "step": 3611 + }, + { + "epoch": 0.150556458672002, + "grad_norm": 480.0, + "learning_rate": 9.623744685825426e-05, + "loss": 17.2511, + "step": 3612 + }, + { + "epoch": 0.15059814096953025, + "grad_norm": 318.0, + "learning_rate": 9.62348775310329e-05, + "loss": 11.6879, + "step": 3613 + }, + { + "epoch": 0.1506398232670585, + "grad_norm": 744.0, + "learning_rate": 9.623230736117682e-05, + "loss": 20.7504, + "step": 3614 + }, + { + "epoch": 0.15068150556458673, + "grad_norm": 668.0, + "learning_rate": 9.62297363487328e-05, + "loss": 20.7505, + "step": 3615 + }, + { + "epoch": 0.15072318786211497, + "grad_norm": 470.0, + "learning_rate": 9.622716449374775e-05, + "loss": 14.9378, + "step": 3616 + }, + { + "epoch": 0.1507648701596432, + "grad_norm": 316.0, + "learning_rate": 9.622459179626852e-05, + "loss": 14.1879, + "step": 3617 + }, + { + "epoch": 0.15080655245717145, + "grad_norm": 1184.0, + "learning_rate": 9.622201825634198e-05, + "loss": 28.6253, + "step": 3618 + }, + { + "epoch": 0.1508482347546997, + "grad_norm": 147.0, + "learning_rate": 9.62194438740151e-05, + "loss": 10.5629, + "step": 3619 + }, + { + "epoch": 0.15088991705222793, + "grad_norm": 177.0, + "learning_rate": 9.62168686493347e-05, + "loss": 9.6894, + "step": 3620 + }, + { + "epoch": 0.15093159934975617, + "grad_norm": 460.0, + "learning_rate": 9.621429258234779e-05, + "loss": 16.7503, + "step": 3621 + }, + { + "epoch": 0.1509732816472844, + "grad_norm": 256.0, + "learning_rate": 9.62117156731013e-05, + "loss": 12.6254, + "step": 3622 + }, + { + "epoch": 0.15101496394481265, + "grad_norm": 298.0, + "learning_rate": 9.620913792164219e-05, + "loss": 10.8131, + "step": 3623 + }, + { + "epoch": 0.1510566462423409, + "grad_norm": 121.5, + "learning_rate": 9.620655932801743e-05, + "loss": 9.7502, + "step": 3624 + }, + { + "epoch": 0.15109832853986913, + "grad_norm": 74.5, + "learning_rate": 9.620397989227403e-05, + "loss": 8.2504, + "step": 3625 + }, + { + "epoch": 0.15114001083739736, + "grad_norm": 760.0, + "learning_rate": 9.620139961445899e-05, + "loss": 21.7516, + "step": 3626 + }, + { + "epoch": 0.1511816931349256, + "grad_norm": 484.0, + "learning_rate": 9.619881849461936e-05, + "loss": 18.2503, + "step": 3627 + }, + { + "epoch": 0.15122337543245384, + "grad_norm": 2080.0, + "learning_rate": 9.619623653280215e-05, + "loss": 46.0002, + "step": 3628 + }, + { + "epoch": 0.15126505772998208, + "grad_norm": 478.0, + "learning_rate": 9.619365372905442e-05, + "loss": 16.7538, + "step": 3629 + }, + { + "epoch": 0.15130674002751032, + "grad_norm": 296.0, + "learning_rate": 9.619107008342325e-05, + "loss": 13.2526, + "step": 3630 + }, + { + "epoch": 0.15134842232503856, + "grad_norm": 672.0, + "learning_rate": 9.618848559595572e-05, + "loss": 19.7502, + "step": 3631 + }, + { + "epoch": 0.1513901046225668, + "grad_norm": 340.0, + "learning_rate": 9.618590026669896e-05, + "loss": 13.0628, + "step": 3632 + }, + { + "epoch": 0.15143178692009504, + "grad_norm": 264.0, + "learning_rate": 9.618331409570005e-05, + "loss": 11.6882, + "step": 3633 + }, + { + "epoch": 0.15147346921762328, + "grad_norm": 264.0, + "learning_rate": 9.618072708300617e-05, + "loss": 12.2503, + "step": 3634 + }, + { + "epoch": 0.15151515151515152, + "grad_norm": 400.0, + "learning_rate": 9.61781392286644e-05, + "loss": 14.7502, + "step": 3635 + }, + { + "epoch": 0.15155683381267976, + "grad_norm": 144.0, + "learning_rate": 9.617555053272197e-05, + "loss": 9.1884, + "step": 3636 + }, + { + "epoch": 0.151598516110208, + "grad_norm": 496.0, + "learning_rate": 9.617296099522602e-05, + "loss": 19.2503, + "step": 3637 + }, + { + "epoch": 0.15164019840773624, + "grad_norm": 213.0, + "learning_rate": 9.617037061622375e-05, + "loss": 12.0627, + "step": 3638 + }, + { + "epoch": 0.15168188070526448, + "grad_norm": 380.0, + "learning_rate": 9.616777939576241e-05, + "loss": 16.7507, + "step": 3639 + }, + { + "epoch": 0.15172356300279272, + "grad_norm": 1464.0, + "learning_rate": 9.616518733388916e-05, + "loss": 30.5043, + "step": 3640 + }, + { + "epoch": 0.15176524530032096, + "grad_norm": 93.5, + "learning_rate": 9.616259443065129e-05, + "loss": 7.5637, + "step": 3641 + }, + { + "epoch": 0.1518069275978492, + "grad_norm": 356.0, + "learning_rate": 9.616000068609602e-05, + "loss": 13.3128, + "step": 3642 + }, + { + "epoch": 0.15184860989537743, + "grad_norm": 344.0, + "learning_rate": 9.615740610027066e-05, + "loss": 14.8129, + "step": 3643 + }, + { + "epoch": 0.15189029219290567, + "grad_norm": 336.0, + "learning_rate": 9.615481067322247e-05, + "loss": 13.3128, + "step": 3644 + }, + { + "epoch": 0.1519319744904339, + "grad_norm": 544.0, + "learning_rate": 9.615221440499876e-05, + "loss": 17.8753, + "step": 3645 + }, + { + "epoch": 0.15197365678796215, + "grad_norm": 458.0, + "learning_rate": 9.614961729564683e-05, + "loss": 14.6255, + "step": 3646 + }, + { + "epoch": 0.1520153390854904, + "grad_norm": 368.0, + "learning_rate": 9.614701934521404e-05, + "loss": 14.8133, + "step": 3647 + }, + { + "epoch": 0.15205702138301863, + "grad_norm": 253.0, + "learning_rate": 9.614442055374773e-05, + "loss": 12.6878, + "step": 3648 + }, + { + "epoch": 0.15209870368054687, + "grad_norm": 144.0, + "learning_rate": 9.614182092129526e-05, + "loss": 7.344, + "step": 3649 + }, + { + "epoch": 0.1521403859780751, + "grad_norm": 158.0, + "learning_rate": 9.6139220447904e-05, + "loss": 9.7504, + "step": 3650 + }, + { + "epoch": 0.15218206827560335, + "grad_norm": 450.0, + "learning_rate": 9.613661913362135e-05, + "loss": 16.3766, + "step": 3651 + }, + { + "epoch": 0.1522237505731316, + "grad_norm": 700.0, + "learning_rate": 9.613401697849473e-05, + "loss": 20.5035, + "step": 3652 + }, + { + "epoch": 0.15226543287065983, + "grad_norm": 212.0, + "learning_rate": 9.613141398257155e-05, + "loss": 10.6254, + "step": 3653 + }, + { + "epoch": 0.15230711516818807, + "grad_norm": 268.0, + "learning_rate": 9.612881014589924e-05, + "loss": 8.8133, + "step": 3654 + }, + { + "epoch": 0.1523487974657163, + "grad_norm": 218.0, + "learning_rate": 9.612620546852529e-05, + "loss": 11.2505, + "step": 3655 + }, + { + "epoch": 0.15239047976324455, + "grad_norm": 310.0, + "learning_rate": 9.612359995049715e-05, + "loss": 13.4392, + "step": 3656 + }, + { + "epoch": 0.15243216206077279, + "grad_norm": 255.0, + "learning_rate": 9.612099359186229e-05, + "loss": 10.2523, + "step": 3657 + }, + { + "epoch": 0.15247384435830103, + "grad_norm": 61.75, + "learning_rate": 9.611838639266823e-05, + "loss": 8.1884, + "step": 3658 + }, + { + "epoch": 0.15251552665582926, + "grad_norm": 1400.0, + "learning_rate": 9.611577835296251e-05, + "loss": 33.5023, + "step": 3659 + }, + { + "epoch": 0.1525572089533575, + "grad_norm": 225.0, + "learning_rate": 9.61131694727926e-05, + "loss": 12.1256, + "step": 3660 + }, + { + "epoch": 0.15259889125088574, + "grad_norm": 486.0, + "learning_rate": 9.61105597522061e-05, + "loss": 17.3751, + "step": 3661 + }, + { + "epoch": 0.15264057354841398, + "grad_norm": 1440.0, + "learning_rate": 9.610794919125056e-05, + "loss": 33.5002, + "step": 3662 + }, + { + "epoch": 0.15268225584594222, + "grad_norm": 450.0, + "learning_rate": 9.610533778997357e-05, + "loss": 16.8753, + "step": 3663 + }, + { + "epoch": 0.15272393814347046, + "grad_norm": 560.0, + "learning_rate": 9.610272554842268e-05, + "loss": 17.0005, + "step": 3664 + }, + { + "epoch": 0.1527656204409987, + "grad_norm": 211.0, + "learning_rate": 9.610011246664553e-05, + "loss": 11.5629, + "step": 3665 + }, + { + "epoch": 0.15280730273852694, + "grad_norm": 201.0, + "learning_rate": 9.609749854468973e-05, + "loss": 12.5004, + "step": 3666 + }, + { + "epoch": 0.15284898503605518, + "grad_norm": 1472.0, + "learning_rate": 9.609488378260295e-05, + "loss": 35.5047, + "step": 3667 + }, + { + "epoch": 0.15289066733358342, + "grad_norm": 422.0, + "learning_rate": 9.609226818043279e-05, + "loss": 15.8755, + "step": 3668 + }, + { + "epoch": 0.15293234963111166, + "grad_norm": 436.0, + "learning_rate": 9.608965173822697e-05, + "loss": 16.3754, + "step": 3669 + }, + { + "epoch": 0.1529740319286399, + "grad_norm": 288.0, + "learning_rate": 9.608703445603315e-05, + "loss": 13.2502, + "step": 3670 + }, + { + "epoch": 0.15301571422616814, + "grad_norm": 174.0, + "learning_rate": 9.608441633389905e-05, + "loss": 9.3752, + "step": 3671 + }, + { + "epoch": 0.15305739652369638, + "grad_norm": 382.0, + "learning_rate": 9.608179737187234e-05, + "loss": 15.0628, + "step": 3672 + }, + { + "epoch": 0.15309907882122462, + "grad_norm": 450.0, + "learning_rate": 9.60791775700008e-05, + "loss": 17.7506, + "step": 3673 + }, + { + "epoch": 0.15314076111875286, + "grad_norm": 732.0, + "learning_rate": 9.607655692833217e-05, + "loss": 20.7513, + "step": 3674 + }, + { + "epoch": 0.1531824434162811, + "grad_norm": 452.0, + "learning_rate": 9.607393544691418e-05, + "loss": 16.5006, + "step": 3675 + }, + { + "epoch": 0.15322412571380933, + "grad_norm": 1440.0, + "learning_rate": 9.607131312579463e-05, + "loss": 27.6306, + "step": 3676 + }, + { + "epoch": 0.15326580801133757, + "grad_norm": 217.0, + "learning_rate": 9.606868996502132e-05, + "loss": 11.8128, + "step": 3677 + }, + { + "epoch": 0.1533074903088658, + "grad_norm": 468.0, + "learning_rate": 9.606606596464203e-05, + "loss": 15.7505, + "step": 3678 + }, + { + "epoch": 0.15334917260639405, + "grad_norm": 121.0, + "learning_rate": 9.606344112470461e-05, + "loss": 9.4398, + "step": 3679 + }, + { + "epoch": 0.1533908549039223, + "grad_norm": 346.0, + "learning_rate": 9.606081544525689e-05, + "loss": 14.0633, + "step": 3680 + }, + { + "epoch": 0.15343253720145053, + "grad_norm": 452.0, + "learning_rate": 9.60581889263467e-05, + "loss": 16.8754, + "step": 3681 + }, + { + "epoch": 0.15347421949897877, + "grad_norm": 364.0, + "learning_rate": 9.605556156802196e-05, + "loss": 13.4383, + "step": 3682 + }, + { + "epoch": 0.15351590179650704, + "grad_norm": 318.0, + "learning_rate": 9.60529333703305e-05, + "loss": 14.626, + "step": 3683 + }, + { + "epoch": 0.15355758409403528, + "grad_norm": 446.0, + "learning_rate": 9.605030433332023e-05, + "loss": 15.6879, + "step": 3684 + }, + { + "epoch": 0.15359926639156352, + "grad_norm": 314.0, + "learning_rate": 9.60476744570391e-05, + "loss": 12.2505, + "step": 3685 + }, + { + "epoch": 0.15364094868909176, + "grad_norm": 688.0, + "learning_rate": 9.6045043741535e-05, + "loss": 19.1303, + "step": 3686 + }, + { + "epoch": 0.15368263098662, + "grad_norm": 174.0, + "learning_rate": 9.60424121868559e-05, + "loss": 10.6262, + "step": 3687 + }, + { + "epoch": 0.15372431328414823, + "grad_norm": 178.0, + "learning_rate": 9.603977979304975e-05, + "loss": 11.0629, + "step": 3688 + }, + { + "epoch": 0.15376599558167647, + "grad_norm": 294.0, + "learning_rate": 9.603714656016452e-05, + "loss": 11.0044, + "step": 3689 + }, + { + "epoch": 0.1538076778792047, + "grad_norm": 406.0, + "learning_rate": 9.603451248824819e-05, + "loss": 17.0018, + "step": 3690 + }, + { + "epoch": 0.15384936017673295, + "grad_norm": 580.0, + "learning_rate": 9.603187757734882e-05, + "loss": 18.1257, + "step": 3691 + }, + { + "epoch": 0.1538910424742612, + "grad_norm": 402.0, + "learning_rate": 9.602924182751436e-05, + "loss": 13.5003, + "step": 3692 + }, + { + "epoch": 0.15393272477178943, + "grad_norm": 1112.0, + "learning_rate": 9.602660523879291e-05, + "loss": 19.8801, + "step": 3693 + }, + { + "epoch": 0.15397440706931767, + "grad_norm": 103.5, + "learning_rate": 9.602396781123248e-05, + "loss": 9.6889, + "step": 3694 + }, + { + "epoch": 0.1540160893668459, + "grad_norm": 720.0, + "learning_rate": 9.602132954488115e-05, + "loss": 21.2503, + "step": 3695 + }, + { + "epoch": 0.15405777166437415, + "grad_norm": 524.0, + "learning_rate": 9.601869043978702e-05, + "loss": 17.8763, + "step": 3696 + }, + { + "epoch": 0.1540994539619024, + "grad_norm": 446.0, + "learning_rate": 9.601605049599815e-05, + "loss": 15.6895, + "step": 3697 + }, + { + "epoch": 0.15414113625943063, + "grad_norm": 458.0, + "learning_rate": 9.601340971356268e-05, + "loss": 17.127, + "step": 3698 + }, + { + "epoch": 0.15418281855695887, + "grad_norm": 288.0, + "learning_rate": 9.601076809252873e-05, + "loss": 13.0002, + "step": 3699 + }, + { + "epoch": 0.1542245008544871, + "grad_norm": 197.0, + "learning_rate": 9.600812563294447e-05, + "loss": 7.7504, + "step": 3700 + }, + { + "epoch": 0.15426618315201535, + "grad_norm": 328.0, + "learning_rate": 9.600548233485802e-05, + "loss": 13.5007, + "step": 3701 + }, + { + "epoch": 0.15430786544954359, + "grad_norm": 107.0, + "learning_rate": 9.600283819831756e-05, + "loss": 8.5014, + "step": 3702 + }, + { + "epoch": 0.15434954774707182, + "grad_norm": 924.0, + "learning_rate": 9.60001932233713e-05, + "loss": 25.5017, + "step": 3703 + }, + { + "epoch": 0.15439123004460006, + "grad_norm": 152.0, + "learning_rate": 9.599754741006744e-05, + "loss": 9.3753, + "step": 3704 + }, + { + "epoch": 0.1544329123421283, + "grad_norm": 528.0, + "learning_rate": 9.599490075845418e-05, + "loss": 17.7504, + "step": 3705 + }, + { + "epoch": 0.15447459463965654, + "grad_norm": 128.0, + "learning_rate": 9.599225326857979e-05, + "loss": 9.4378, + "step": 3706 + }, + { + "epoch": 0.15451627693718478, + "grad_norm": 171.0, + "learning_rate": 9.59896049404925e-05, + "loss": 9.6877, + "step": 3707 + }, + { + "epoch": 0.15455795923471302, + "grad_norm": 117.0, + "learning_rate": 9.598695577424057e-05, + "loss": 8.3129, + "step": 3708 + }, + { + "epoch": 0.15459964153224126, + "grad_norm": 462.0, + "learning_rate": 9.598430576987228e-05, + "loss": 14.2526, + "step": 3709 + }, + { + "epoch": 0.1546413238297695, + "grad_norm": 390.0, + "learning_rate": 9.598165492743593e-05, + "loss": 14.0629, + "step": 3710 + }, + { + "epoch": 0.15468300612729774, + "grad_norm": 600.0, + "learning_rate": 9.597900324697986e-05, + "loss": 19.3753, + "step": 3711 + }, + { + "epoch": 0.15472468842482598, + "grad_norm": 60.25, + "learning_rate": 9.597635072855237e-05, + "loss": 6.7505, + "step": 3712 + }, + { + "epoch": 0.15476637072235422, + "grad_norm": 69.0, + "learning_rate": 9.59736973722018e-05, + "loss": 8.8773, + "step": 3713 + }, + { + "epoch": 0.15480805301988246, + "grad_norm": 254.0, + "learning_rate": 9.597104317797651e-05, + "loss": 13.1255, + "step": 3714 + }, + { + "epoch": 0.1548497353174107, + "grad_norm": 422.0, + "learning_rate": 9.596838814592488e-05, + "loss": 15.002, + "step": 3715 + }, + { + "epoch": 0.15489141761493894, + "grad_norm": 494.0, + "learning_rate": 9.59657322760953e-05, + "loss": 14.3774, + "step": 3716 + }, + { + "epoch": 0.15493309991246718, + "grad_norm": 268.0, + "learning_rate": 9.596307556853616e-05, + "loss": 11.0074, + "step": 3717 + }, + { + "epoch": 0.15497478220999542, + "grad_norm": 356.0, + "learning_rate": 9.596041802329589e-05, + "loss": 14.0646, + "step": 3718 + }, + { + "epoch": 0.15501646450752365, + "grad_norm": 1656.0, + "learning_rate": 9.595775964042294e-05, + "loss": 41.2515, + "step": 3719 + }, + { + "epoch": 0.1550581468050519, + "grad_norm": 464.0, + "learning_rate": 9.595510041996572e-05, + "loss": 16.627, + "step": 3720 + }, + { + "epoch": 0.15509982910258013, + "grad_norm": 604.0, + "learning_rate": 9.595244036197272e-05, + "loss": 18.752, + "step": 3721 + }, + { + "epoch": 0.15514151140010837, + "grad_norm": 904.0, + "learning_rate": 9.594977946649242e-05, + "loss": 21.5101, + "step": 3722 + }, + { + "epoch": 0.1551831936976366, + "grad_norm": 540.0, + "learning_rate": 9.59471177335733e-05, + "loss": 17.2588, + "step": 3723 + }, + { + "epoch": 0.15522487599516485, + "grad_norm": 816.0, + "learning_rate": 9.594445516326389e-05, + "loss": 21.7506, + "step": 3724 + }, + { + "epoch": 0.1552665582926931, + "grad_norm": 688.0, + "learning_rate": 9.594179175561271e-05, + "loss": 21.0006, + "step": 3725 + }, + { + "epoch": 0.15530824059022133, + "grad_norm": 466.0, + "learning_rate": 9.593912751066829e-05, + "loss": 15.5711, + "step": 3726 + }, + { + "epoch": 0.15534992288774957, + "grad_norm": 434.0, + "learning_rate": 9.593646242847919e-05, + "loss": 14.5666, + "step": 3727 + }, + { + "epoch": 0.1553916051852778, + "grad_norm": 688.0, + "learning_rate": 9.593379650909398e-05, + "loss": 21.0002, + "step": 3728 + }, + { + "epoch": 0.15543328748280605, + "grad_norm": 298.0, + "learning_rate": 9.593112975256126e-05, + "loss": 11.9384, + "step": 3729 + }, + { + "epoch": 0.1554749697803343, + "grad_norm": 212.0, + "learning_rate": 9.592846215892964e-05, + "loss": 11.3758, + "step": 3730 + }, + { + "epoch": 0.15551665207786253, + "grad_norm": 188.0, + "learning_rate": 9.592579372824768e-05, + "loss": 11.5015, + "step": 3731 + }, + { + "epoch": 0.15555833437539077, + "grad_norm": 166.0, + "learning_rate": 9.592312446056408e-05, + "loss": 10.6878, + "step": 3732 + }, + { + "epoch": 0.155600016672919, + "grad_norm": 440.0, + "learning_rate": 9.592045435592745e-05, + "loss": 15.8132, + "step": 3733 + }, + { + "epoch": 0.15564169897044725, + "grad_norm": 175.0, + "learning_rate": 9.591778341438646e-05, + "loss": 10.7517, + "step": 3734 + }, + { + "epoch": 0.15568338126797548, + "grad_norm": 402.0, + "learning_rate": 9.59151116359898e-05, + "loss": 15.6254, + "step": 3735 + }, + { + "epoch": 0.15572506356550372, + "grad_norm": 128.0, + "learning_rate": 9.591243902078615e-05, + "loss": 9.7508, + "step": 3736 + }, + { + "epoch": 0.15576674586303196, + "grad_norm": 294.0, + "learning_rate": 9.590976556882423e-05, + "loss": 11.9378, + "step": 3737 + }, + { + "epoch": 0.1558084281605602, + "grad_norm": 454.0, + "learning_rate": 9.590709128015276e-05, + "loss": 15.7506, + "step": 3738 + }, + { + "epoch": 0.15585011045808844, + "grad_norm": 158.0, + "learning_rate": 9.590441615482047e-05, + "loss": 10.8758, + "step": 3739 + }, + { + "epoch": 0.15589179275561668, + "grad_norm": 328.0, + "learning_rate": 9.590174019287611e-05, + "loss": 14.1252, + "step": 3740 + }, + { + "epoch": 0.15593347505314492, + "grad_norm": 272.0, + "learning_rate": 9.589906339436847e-05, + "loss": 12.0003, + "step": 3741 + }, + { + "epoch": 0.15597515735067316, + "grad_norm": 404.0, + "learning_rate": 9.589638575934632e-05, + "loss": 15.3755, + "step": 3742 + }, + { + "epoch": 0.1560168396482014, + "grad_norm": 1824.0, + "learning_rate": 9.589370728785847e-05, + "loss": 38.7508, + "step": 3743 + }, + { + "epoch": 0.15605852194572964, + "grad_norm": 540.0, + "learning_rate": 9.589102797995373e-05, + "loss": 18.7514, + "step": 3744 + }, + { + "epoch": 0.15610020424325788, + "grad_norm": 608.0, + "learning_rate": 9.588834783568094e-05, + "loss": 19.0011, + "step": 3745 + }, + { + "epoch": 0.15614188654078612, + "grad_norm": 193.0, + "learning_rate": 9.588566685508892e-05, + "loss": 11.3762, + "step": 3746 + }, + { + "epoch": 0.15618356883831436, + "grad_norm": 282.0, + "learning_rate": 9.588298503822655e-05, + "loss": 12.3754, + "step": 3747 + }, + { + "epoch": 0.1562252511358426, + "grad_norm": 880.0, + "learning_rate": 9.588030238514272e-05, + "loss": 20.0018, + "step": 3748 + }, + { + "epoch": 0.15626693343337084, + "grad_norm": 724.0, + "learning_rate": 9.587761889588628e-05, + "loss": 22.1259, + "step": 3749 + }, + { + "epoch": 0.15630861573089908, + "grad_norm": 588.0, + "learning_rate": 9.587493457050619e-05, + "loss": 18.0005, + "step": 3750 + }, + { + "epoch": 0.15635029802842731, + "grad_norm": 506.0, + "learning_rate": 9.587224940905133e-05, + "loss": 16.6264, + "step": 3751 + }, + { + "epoch": 0.15639198032595555, + "grad_norm": 382.0, + "learning_rate": 9.586956341157067e-05, + "loss": 14.1258, + "step": 3752 + }, + { + "epoch": 0.1564336626234838, + "grad_norm": 164.0, + "learning_rate": 9.586687657811314e-05, + "loss": 9.9381, + "step": 3753 + }, + { + "epoch": 0.15647534492101203, + "grad_norm": 211.0, + "learning_rate": 9.586418890872769e-05, + "loss": 10.8128, + "step": 3754 + }, + { + "epoch": 0.15651702721854027, + "grad_norm": 56.0, + "learning_rate": 9.586150040346333e-05, + "loss": 8.1884, + "step": 3755 + }, + { + "epoch": 0.15655870951606854, + "grad_norm": 532.0, + "learning_rate": 9.585881106236907e-05, + "loss": 18.3753, + "step": 3756 + }, + { + "epoch": 0.15660039181359678, + "grad_norm": 174.0, + "learning_rate": 9.58561208854939e-05, + "loss": 9.8756, + "step": 3757 + }, + { + "epoch": 0.15664207411112502, + "grad_norm": 163.0, + "learning_rate": 9.585342987288686e-05, + "loss": 10.0629, + "step": 3758 + }, + { + "epoch": 0.15668375640865326, + "grad_norm": 92.5, + "learning_rate": 9.585073802459699e-05, + "loss": 8.188, + "step": 3759 + }, + { + "epoch": 0.1567254387061815, + "grad_norm": 209.0, + "learning_rate": 9.584804534067335e-05, + "loss": 10.5003, + "step": 3760 + }, + { + "epoch": 0.15676712100370974, + "grad_norm": 384.0, + "learning_rate": 9.5845351821165e-05, + "loss": 14.5002, + "step": 3761 + }, + { + "epoch": 0.15680880330123798, + "grad_norm": 280.0, + "learning_rate": 9.584265746612106e-05, + "loss": 13.3753, + "step": 3762 + }, + { + "epoch": 0.15685048559876621, + "grad_norm": 490.0, + "learning_rate": 9.58399622755906e-05, + "loss": 17.8752, + "step": 3763 + }, + { + "epoch": 0.15689216789629445, + "grad_norm": 604.0, + "learning_rate": 9.583726624962278e-05, + "loss": 18.8758, + "step": 3764 + }, + { + "epoch": 0.1569338501938227, + "grad_norm": 290.0, + "learning_rate": 9.583456938826671e-05, + "loss": 13.0002, + "step": 3765 + }, + { + "epoch": 0.15697553249135093, + "grad_norm": 178.0, + "learning_rate": 9.583187169157153e-05, + "loss": 11.626, + "step": 3766 + }, + { + "epoch": 0.15701721478887917, + "grad_norm": 221.0, + "learning_rate": 9.582917315958642e-05, + "loss": 10.8753, + "step": 3767 + }, + { + "epoch": 0.1570588970864074, + "grad_norm": 612.0, + "learning_rate": 9.582647379236058e-05, + "loss": 18.1262, + "step": 3768 + }, + { + "epoch": 0.15710057938393565, + "grad_norm": 328.0, + "learning_rate": 9.582377358994317e-05, + "loss": 13.5007, + "step": 3769 + }, + { + "epoch": 0.1571422616814639, + "grad_norm": 424.0, + "learning_rate": 9.582107255238342e-05, + "loss": 14.628, + "step": 3770 + }, + { + "epoch": 0.15718394397899213, + "grad_norm": 402.0, + "learning_rate": 9.581837067973056e-05, + "loss": 13.3753, + "step": 3771 + }, + { + "epoch": 0.15722562627652037, + "grad_norm": 154.0, + "learning_rate": 9.581566797203384e-05, + "loss": 8.2503, + "step": 3772 + }, + { + "epoch": 0.1572673085740486, + "grad_norm": 228.0, + "learning_rate": 9.581296442934248e-05, + "loss": 10.688, + "step": 3773 + }, + { + "epoch": 0.15730899087157685, + "grad_norm": 876.0, + "learning_rate": 9.581026005170577e-05, + "loss": 22.5002, + "step": 3774 + }, + { + "epoch": 0.1573506731691051, + "grad_norm": 450.0, + "learning_rate": 9.580755483917303e-05, + "loss": 15.938, + "step": 3775 + }, + { + "epoch": 0.15739235546663333, + "grad_norm": 368.0, + "learning_rate": 9.580484879179352e-05, + "loss": 13.7503, + "step": 3776 + }, + { + "epoch": 0.15743403776416157, + "grad_norm": 438.0, + "learning_rate": 9.580214190961659e-05, + "loss": 13.3797, + "step": 3777 + }, + { + "epoch": 0.1574757200616898, + "grad_norm": 322.0, + "learning_rate": 9.579943419269155e-05, + "loss": 13.9379, + "step": 3778 + }, + { + "epoch": 0.15751740235921805, + "grad_norm": 652.0, + "learning_rate": 9.579672564106776e-05, + "loss": 19.8752, + "step": 3779 + }, + { + "epoch": 0.15755908465674628, + "grad_norm": 124.5, + "learning_rate": 9.579401625479456e-05, + "loss": 10.5007, + "step": 3780 + }, + { + "epoch": 0.15760076695427452, + "grad_norm": 225.0, + "learning_rate": 9.579130603392137e-05, + "loss": 12.1253, + "step": 3781 + }, + { + "epoch": 0.15764244925180276, + "grad_norm": 314.0, + "learning_rate": 9.578859497849755e-05, + "loss": 12.0649, + "step": 3782 + }, + { + "epoch": 0.157684131549331, + "grad_norm": 248.0, + "learning_rate": 9.578588308857253e-05, + "loss": 11.7503, + "step": 3783 + }, + { + "epoch": 0.15772581384685924, + "grad_norm": 776.0, + "learning_rate": 9.578317036419573e-05, + "loss": 22.5006, + "step": 3784 + }, + { + "epoch": 0.15776749614438748, + "grad_norm": 167.0, + "learning_rate": 9.578045680541657e-05, + "loss": 9.7504, + "step": 3785 + }, + { + "epoch": 0.15780917844191572, + "grad_norm": 426.0, + "learning_rate": 9.577774241228454e-05, + "loss": 17.2526, + "step": 3786 + }, + { + "epoch": 0.15785086073944396, + "grad_norm": 162.0, + "learning_rate": 9.577502718484908e-05, + "loss": 10.3762, + "step": 3787 + }, + { + "epoch": 0.1578925430369722, + "grad_norm": 125.0, + "learning_rate": 9.577231112315967e-05, + "loss": 8.2505, + "step": 3788 + }, + { + "epoch": 0.15793422533450044, + "grad_norm": 233.0, + "learning_rate": 9.576959422726586e-05, + "loss": 11.0003, + "step": 3789 + }, + { + "epoch": 0.15797590763202868, + "grad_norm": 382.0, + "learning_rate": 9.576687649721711e-05, + "loss": 14.2508, + "step": 3790 + }, + { + "epoch": 0.15801758992955692, + "grad_norm": 86.5, + "learning_rate": 9.576415793306298e-05, + "loss": 7.5947, + "step": 3791 + }, + { + "epoch": 0.15805927222708516, + "grad_norm": 1592.0, + "learning_rate": 9.5761438534853e-05, + "loss": 33.5067, + "step": 3792 + }, + { + "epoch": 0.1581009545246134, + "grad_norm": 1408.0, + "learning_rate": 9.575871830263675e-05, + "loss": 27.5033, + "step": 3793 + }, + { + "epoch": 0.15814263682214164, + "grad_norm": 520.0, + "learning_rate": 9.57559972364638e-05, + "loss": 17.2502, + "step": 3794 + }, + { + "epoch": 0.15818431911966988, + "grad_norm": 896.0, + "learning_rate": 9.575327533638371e-05, + "loss": 22.3804, + "step": 3795 + }, + { + "epoch": 0.15822600141719811, + "grad_norm": 442.0, + "learning_rate": 9.575055260244615e-05, + "loss": 16.2503, + "step": 3796 + }, + { + "epoch": 0.15826768371472635, + "grad_norm": 640.0, + "learning_rate": 9.57478290347007e-05, + "loss": 20.8753, + "step": 3797 + }, + { + "epoch": 0.1583093660122546, + "grad_norm": 508.0, + "learning_rate": 9.574510463319699e-05, + "loss": 16.1298, + "step": 3798 + }, + { + "epoch": 0.15835104830978283, + "grad_norm": 342.0, + "learning_rate": 9.57423793979847e-05, + "loss": 14.3754, + "step": 3799 + }, + { + "epoch": 0.15839273060731107, + "grad_norm": 864.0, + "learning_rate": 9.573965332911349e-05, + "loss": 21.0053, + "step": 3800 + }, + { + "epoch": 0.1584344129048393, + "grad_norm": 382.0, + "learning_rate": 9.573692642663303e-05, + "loss": 15.1878, + "step": 3801 + }, + { + "epoch": 0.15847609520236755, + "grad_norm": 780.0, + "learning_rate": 9.573419869059302e-05, + "loss": 20.0007, + "step": 3802 + }, + { + "epoch": 0.1585177774998958, + "grad_norm": 652.0, + "learning_rate": 9.573147012104319e-05, + "loss": 17.2541, + "step": 3803 + }, + { + "epoch": 0.15855945979742403, + "grad_norm": 147.0, + "learning_rate": 9.572874071803324e-05, + "loss": 8.7503, + "step": 3804 + }, + { + "epoch": 0.15860114209495227, + "grad_norm": 744.0, + "learning_rate": 9.572601048161294e-05, + "loss": 21.5007, + "step": 3805 + }, + { + "epoch": 0.1586428243924805, + "grad_norm": 175.0, + "learning_rate": 9.572327941183206e-05, + "loss": 11.1259, + "step": 3806 + }, + { + "epoch": 0.15868450669000875, + "grad_norm": 398.0, + "learning_rate": 9.572054750874033e-05, + "loss": 14.5003, + "step": 3807 + }, + { + "epoch": 0.158726188987537, + "grad_norm": 488.0, + "learning_rate": 9.571781477238757e-05, + "loss": 16.1256, + "step": 3808 + }, + { + "epoch": 0.15876787128506523, + "grad_norm": 402.0, + "learning_rate": 9.571508120282357e-05, + "loss": 13.131, + "step": 3809 + }, + { + "epoch": 0.15880955358259347, + "grad_norm": 466.0, + "learning_rate": 9.571234680009817e-05, + "loss": 16.3752, + "step": 3810 + }, + { + "epoch": 0.1588512358801217, + "grad_norm": 596.0, + "learning_rate": 9.570961156426118e-05, + "loss": 18.6262, + "step": 3811 + }, + { + "epoch": 0.15889291817764994, + "grad_norm": 153.0, + "learning_rate": 9.570687549536245e-05, + "loss": 9.7511, + "step": 3812 + }, + { + "epoch": 0.15893460047517818, + "grad_norm": 470.0, + "learning_rate": 9.570413859345189e-05, + "loss": 17.1252, + "step": 3813 + }, + { + "epoch": 0.15897628277270642, + "grad_norm": 510.0, + "learning_rate": 9.570140085857933e-05, + "loss": 16.7503, + "step": 3814 + }, + { + "epoch": 0.15901796507023466, + "grad_norm": 156.0, + "learning_rate": 9.569866229079468e-05, + "loss": 10.5003, + "step": 3815 + }, + { + "epoch": 0.1590596473677629, + "grad_norm": 580.0, + "learning_rate": 9.569592289014786e-05, + "loss": 18.3801, + "step": 3816 + }, + { + "epoch": 0.15910132966529114, + "grad_norm": 960.0, + "learning_rate": 9.569318265668879e-05, + "loss": 25.8755, + "step": 3817 + }, + { + "epoch": 0.15914301196281938, + "grad_norm": 294.0, + "learning_rate": 9.56904415904674e-05, + "loss": 13.814, + "step": 3818 + }, + { + "epoch": 0.15918469426034762, + "grad_norm": 584.0, + "learning_rate": 9.568769969153366e-05, + "loss": 17.5024, + "step": 3819 + }, + { + "epoch": 0.15922637655787586, + "grad_norm": 544.0, + "learning_rate": 9.568495695993754e-05, + "loss": 17.2503, + "step": 3820 + }, + { + "epoch": 0.1592680588554041, + "grad_norm": 240.0, + "learning_rate": 9.568221339572901e-05, + "loss": 12.188, + "step": 3821 + }, + { + "epoch": 0.15930974115293234, + "grad_norm": 229.0, + "learning_rate": 9.56794689989581e-05, + "loss": 11.0628, + "step": 3822 + }, + { + "epoch": 0.15935142345046058, + "grad_norm": 139.0, + "learning_rate": 9.56767237696748e-05, + "loss": 9.3754, + "step": 3823 + }, + { + "epoch": 0.15939310574798882, + "grad_norm": 326.0, + "learning_rate": 9.567397770792916e-05, + "loss": 13.0628, + "step": 3824 + }, + { + "epoch": 0.15943478804551706, + "grad_norm": 320.0, + "learning_rate": 9.567123081377123e-05, + "loss": 13.6878, + "step": 3825 + }, + { + "epoch": 0.1594764703430453, + "grad_norm": 106.0, + "learning_rate": 9.566848308725106e-05, + "loss": 10.1263, + "step": 3826 + }, + { + "epoch": 0.15951815264057354, + "grad_norm": 167.0, + "learning_rate": 9.566573452841872e-05, + "loss": 11.0007, + "step": 3827 + }, + { + "epoch": 0.15955983493810177, + "grad_norm": 207.0, + "learning_rate": 9.566298513732433e-05, + "loss": 11.3129, + "step": 3828 + }, + { + "epoch": 0.15960151723563004, + "grad_norm": 227.0, + "learning_rate": 9.566023491401798e-05, + "loss": 11.7502, + "step": 3829 + }, + { + "epoch": 0.15964319953315828, + "grad_norm": 129.0, + "learning_rate": 9.565748385854981e-05, + "loss": 8.8148, + "step": 3830 + }, + { + "epoch": 0.15968488183068652, + "grad_norm": 362.0, + "learning_rate": 9.56547319709699e-05, + "loss": 15.5648, + "step": 3831 + }, + { + "epoch": 0.15972656412821476, + "grad_norm": 194.0, + "learning_rate": 9.565197925132849e-05, + "loss": 11.5639, + "step": 3832 + }, + { + "epoch": 0.159768246425743, + "grad_norm": 338.0, + "learning_rate": 9.564922569967568e-05, + "loss": 14.0006, + "step": 3833 + }, + { + "epoch": 0.15980992872327124, + "grad_norm": 191.0, + "learning_rate": 9.564647131606168e-05, + "loss": 9.3754, + "step": 3834 + }, + { + "epoch": 0.15985161102079948, + "grad_norm": 648.0, + "learning_rate": 9.56437161005367e-05, + "loss": 18.3795, + "step": 3835 + }, + { + "epoch": 0.15989329331832772, + "grad_norm": 276.0, + "learning_rate": 9.564096005315094e-05, + "loss": 13.938, + "step": 3836 + }, + { + "epoch": 0.15993497561585596, + "grad_norm": 231.0, + "learning_rate": 9.563820317395462e-05, + "loss": 11.4378, + "step": 3837 + }, + { + "epoch": 0.1599766579133842, + "grad_norm": 446.0, + "learning_rate": 9.5635445462998e-05, + "loss": 14.6879, + "step": 3838 + }, + { + "epoch": 0.16001834021091244, + "grad_norm": 556.0, + "learning_rate": 9.563268692033136e-05, + "loss": 19.1252, + "step": 3839 + }, + { + "epoch": 0.16006002250844067, + "grad_norm": 442.0, + "learning_rate": 9.562992754600493e-05, + "loss": 15.8753, + "step": 3840 + }, + { + "epoch": 0.16010170480596891, + "grad_norm": 1048.0, + "learning_rate": 9.562716734006902e-05, + "loss": 21.2508, + "step": 3841 + }, + { + "epoch": 0.16014338710349715, + "grad_norm": 173.0, + "learning_rate": 9.562440630257392e-05, + "loss": 10.6879, + "step": 3842 + }, + { + "epoch": 0.1601850694010254, + "grad_norm": 792.0, + "learning_rate": 9.562164443356998e-05, + "loss": 21.8753, + "step": 3843 + }, + { + "epoch": 0.16022675169855363, + "grad_norm": 536.0, + "learning_rate": 9.561888173310754e-05, + "loss": 17.7503, + "step": 3844 + }, + { + "epoch": 0.16026843399608187, + "grad_norm": 1296.0, + "learning_rate": 9.56161182012369e-05, + "loss": 27.7554, + "step": 3845 + }, + { + "epoch": 0.1603101162936101, + "grad_norm": 292.0, + "learning_rate": 9.561335383800846e-05, + "loss": 12.0006, + "step": 3846 + }, + { + "epoch": 0.16035179859113835, + "grad_norm": 438.0, + "learning_rate": 9.56105886434726e-05, + "loss": 15.8755, + "step": 3847 + }, + { + "epoch": 0.1603934808886666, + "grad_norm": 362.0, + "learning_rate": 9.560782261767974e-05, + "loss": 11.1883, + "step": 3848 + }, + { + "epoch": 0.16043516318619483, + "grad_norm": 99.5, + "learning_rate": 9.560505576068022e-05, + "loss": 8.3774, + "step": 3849 + }, + { + "epoch": 0.16047684548372307, + "grad_norm": 276.0, + "learning_rate": 9.560228807252453e-05, + "loss": 12.313, + "step": 3850 + }, + { + "epoch": 0.1605185277812513, + "grad_norm": 176.0, + "learning_rate": 9.55995195532631e-05, + "loss": 9.8128, + "step": 3851 + }, + { + "epoch": 0.16056021007877955, + "grad_norm": 468.0, + "learning_rate": 9.559675020294637e-05, + "loss": 16.7509, + "step": 3852 + }, + { + "epoch": 0.1606018923763078, + "grad_norm": 804.0, + "learning_rate": 9.559398002162482e-05, + "loss": 23.7505, + "step": 3853 + }, + { + "epoch": 0.16064357467383603, + "grad_norm": 394.0, + "learning_rate": 9.559120900934893e-05, + "loss": 15.2516, + "step": 3854 + }, + { + "epoch": 0.16068525697136427, + "grad_norm": 55.0, + "learning_rate": 9.558843716616923e-05, + "loss": 7.4067, + "step": 3855 + }, + { + "epoch": 0.1607269392688925, + "grad_norm": 808.0, + "learning_rate": 9.55856644921362e-05, + "loss": 23.3759, + "step": 3856 + }, + { + "epoch": 0.16076862156642074, + "grad_norm": 776.0, + "learning_rate": 9.558289098730037e-05, + "loss": 21.1255, + "step": 3857 + }, + { + "epoch": 0.16081030386394898, + "grad_norm": 230.0, + "learning_rate": 9.558011665171234e-05, + "loss": 11.564, + "step": 3858 + }, + { + "epoch": 0.16085198616147722, + "grad_norm": 239.0, + "learning_rate": 9.557734148542262e-05, + "loss": 11.0004, + "step": 3859 + }, + { + "epoch": 0.16089366845900546, + "grad_norm": 392.0, + "learning_rate": 9.557456548848181e-05, + "loss": 14.7505, + "step": 3860 + }, + { + "epoch": 0.1609353507565337, + "grad_norm": 121.0, + "learning_rate": 9.557178866094049e-05, + "loss": 9.4381, + "step": 3861 + }, + { + "epoch": 0.16097703305406194, + "grad_norm": 420.0, + "learning_rate": 9.556901100284929e-05, + "loss": 15.7507, + "step": 3862 + }, + { + "epoch": 0.16101871535159018, + "grad_norm": 1304.0, + "learning_rate": 9.55662325142588e-05, + "loss": 30.88, + "step": 3863 + }, + { + "epoch": 0.16106039764911842, + "grad_norm": 240.0, + "learning_rate": 9.55634531952197e-05, + "loss": 12.0629, + "step": 3864 + }, + { + "epoch": 0.16110207994664666, + "grad_norm": 160.0, + "learning_rate": 9.55606730457826e-05, + "loss": 10.6256, + "step": 3865 + }, + { + "epoch": 0.1611437622441749, + "grad_norm": 474.0, + "learning_rate": 9.555789206599821e-05, + "loss": 16.2516, + "step": 3866 + }, + { + "epoch": 0.16118544454170314, + "grad_norm": 446.0, + "learning_rate": 9.555511025591716e-05, + "loss": 15.0007, + "step": 3867 + }, + { + "epoch": 0.16122712683923138, + "grad_norm": 536.0, + "learning_rate": 9.555232761559022e-05, + "loss": 18.8762, + "step": 3868 + }, + { + "epoch": 0.16126880913675962, + "grad_norm": 306.0, + "learning_rate": 9.554954414506805e-05, + "loss": 14.5034, + "step": 3869 + }, + { + "epoch": 0.16131049143428786, + "grad_norm": 89.5, + "learning_rate": 9.554675984440138e-05, + "loss": 8.188, + "step": 3870 + }, + { + "epoch": 0.1613521737318161, + "grad_norm": 372.0, + "learning_rate": 9.5543974713641e-05, + "loss": 13.8754, + "step": 3871 + }, + { + "epoch": 0.16139385602934433, + "grad_norm": 246.0, + "learning_rate": 9.554118875283762e-05, + "loss": 10.6254, + "step": 3872 + }, + { + "epoch": 0.16143553832687257, + "grad_norm": 408.0, + "learning_rate": 9.553840196204203e-05, + "loss": 14.8762, + "step": 3873 + }, + { + "epoch": 0.1614772206244008, + "grad_norm": 756.0, + "learning_rate": 9.553561434130501e-05, + "loss": 24.0003, + "step": 3874 + }, + { + "epoch": 0.16151890292192905, + "grad_norm": 604.0, + "learning_rate": 9.55328258906774e-05, + "loss": 21.3753, + "step": 3875 + }, + { + "epoch": 0.1615605852194573, + "grad_norm": 450.0, + "learning_rate": 9.553003661020998e-05, + "loss": 15.1877, + "step": 3876 + }, + { + "epoch": 0.16160226751698553, + "grad_norm": 540.0, + "learning_rate": 9.552724649995361e-05, + "loss": 17.3752, + "step": 3877 + }, + { + "epoch": 0.16164394981451377, + "grad_norm": 1600.0, + "learning_rate": 9.552445555995913e-05, + "loss": 38.2503, + "step": 3878 + }, + { + "epoch": 0.161685632112042, + "grad_norm": 364.0, + "learning_rate": 9.552166379027739e-05, + "loss": 11.8767, + "step": 3879 + }, + { + "epoch": 0.16172731440957025, + "grad_norm": 224.0, + "learning_rate": 9.55188711909593e-05, + "loss": 11.6255, + "step": 3880 + }, + { + "epoch": 0.1617689967070985, + "grad_norm": 125.5, + "learning_rate": 9.551607776205576e-05, + "loss": 9.6878, + "step": 3881 + }, + { + "epoch": 0.16181067900462673, + "grad_norm": 180.0, + "learning_rate": 9.551328350361763e-05, + "loss": 12.0628, + "step": 3882 + }, + { + "epoch": 0.16185236130215497, + "grad_norm": 544.0, + "learning_rate": 9.551048841569588e-05, + "loss": 17.1254, + "step": 3883 + }, + { + "epoch": 0.1618940435996832, + "grad_norm": 366.0, + "learning_rate": 9.550769249834144e-05, + "loss": 13.063, + "step": 3884 + }, + { + "epoch": 0.16193572589721145, + "grad_norm": 292.0, + "learning_rate": 9.550489575160527e-05, + "loss": 12.6251, + "step": 3885 + }, + { + "epoch": 0.16197740819473969, + "grad_norm": 592.0, + "learning_rate": 9.550209817553832e-05, + "loss": 19.5002, + "step": 3886 + }, + { + "epoch": 0.16201909049226793, + "grad_norm": 482.0, + "learning_rate": 9.549929977019161e-05, + "loss": 17.3755, + "step": 3887 + }, + { + "epoch": 0.16206077278979616, + "grad_norm": 442.0, + "learning_rate": 9.549650053561612e-05, + "loss": 16.1253, + "step": 3888 + }, + { + "epoch": 0.1621024550873244, + "grad_norm": 652.0, + "learning_rate": 9.549370047186286e-05, + "loss": 21.3758, + "step": 3889 + }, + { + "epoch": 0.16214413738485264, + "grad_norm": 652.0, + "learning_rate": 9.549089957898287e-05, + "loss": 19.6252, + "step": 3890 + }, + { + "epoch": 0.16218581968238088, + "grad_norm": 392.0, + "learning_rate": 9.548809785702719e-05, + "loss": 14.3127, + "step": 3891 + }, + { + "epoch": 0.16222750197990912, + "grad_norm": 126.0, + "learning_rate": 9.548529530604691e-05, + "loss": 7.9705, + "step": 3892 + }, + { + "epoch": 0.16226918427743736, + "grad_norm": 137.0, + "learning_rate": 9.548249192609306e-05, + "loss": 9.6877, + "step": 3893 + }, + { + "epoch": 0.1623108665749656, + "grad_norm": 442.0, + "learning_rate": 9.547968771721678e-05, + "loss": 14.8752, + "step": 3894 + }, + { + "epoch": 0.16235254887249384, + "grad_norm": 246.0, + "learning_rate": 9.547688267946915e-05, + "loss": 13.0628, + "step": 3895 + }, + { + "epoch": 0.16239423117002208, + "grad_norm": 568.0, + "learning_rate": 9.547407681290128e-05, + "loss": 17.0048, + "step": 3896 + }, + { + "epoch": 0.16243591346755032, + "grad_norm": 868.0, + "learning_rate": 9.547127011756434e-05, + "loss": 25.0003, + "step": 3897 + }, + { + "epoch": 0.16247759576507856, + "grad_norm": 520.0, + "learning_rate": 9.546846259350945e-05, + "loss": 18.6263, + "step": 3898 + }, + { + "epoch": 0.1625192780626068, + "grad_norm": 364.0, + "learning_rate": 9.546565424078781e-05, + "loss": 14.3757, + "step": 3899 + }, + { + "epoch": 0.16256096036013504, + "grad_norm": 976.0, + "learning_rate": 9.546284505945057e-05, + "loss": 23.1255, + "step": 3900 + }, + { + "epoch": 0.16260264265766328, + "grad_norm": 161.0, + "learning_rate": 9.546003504954895e-05, + "loss": 9.3755, + "step": 3901 + }, + { + "epoch": 0.16264432495519154, + "grad_norm": 716.0, + "learning_rate": 9.545722421113416e-05, + "loss": 20.0004, + "step": 3902 + }, + { + "epoch": 0.16268600725271978, + "grad_norm": 191.0, + "learning_rate": 9.545441254425742e-05, + "loss": 9.2505, + "step": 3903 + }, + { + "epoch": 0.16272768955024802, + "grad_norm": 316.0, + "learning_rate": 9.545160004896998e-05, + "loss": 14.5632, + "step": 3904 + }, + { + "epoch": 0.16276937184777626, + "grad_norm": 462.0, + "learning_rate": 9.54487867253231e-05, + "loss": 14.8754, + "step": 3905 + }, + { + "epoch": 0.1628110541453045, + "grad_norm": 442.0, + "learning_rate": 9.544597257336802e-05, + "loss": 16.5006, + "step": 3906 + }, + { + "epoch": 0.16285273644283274, + "grad_norm": 384.0, + "learning_rate": 9.544315759315607e-05, + "loss": 15.6282, + "step": 3907 + }, + { + "epoch": 0.16289441874036098, + "grad_norm": 768.0, + "learning_rate": 9.544034178473855e-05, + "loss": 21.6252, + "step": 3908 + }, + { + "epoch": 0.16293610103788922, + "grad_norm": 48.25, + "learning_rate": 9.543752514816675e-05, + "loss": 7.9378, + "step": 3909 + }, + { + "epoch": 0.16297778333541746, + "grad_norm": 512.0, + "learning_rate": 9.543470768349203e-05, + "loss": 18.1258, + "step": 3910 + }, + { + "epoch": 0.1630194656329457, + "grad_norm": 276.0, + "learning_rate": 9.543188939076572e-05, + "loss": 12.063, + "step": 3911 + }, + { + "epoch": 0.16306114793047394, + "grad_norm": 350.0, + "learning_rate": 9.542907027003923e-05, + "loss": 15.5002, + "step": 3912 + }, + { + "epoch": 0.16310283022800218, + "grad_norm": 232.0, + "learning_rate": 9.542625032136385e-05, + "loss": 13.0019, + "step": 3913 + }, + { + "epoch": 0.16314451252553042, + "grad_norm": 218.0, + "learning_rate": 9.542342954479106e-05, + "loss": 11.1254, + "step": 3914 + }, + { + "epoch": 0.16318619482305866, + "grad_norm": 424.0, + "learning_rate": 9.542060794037222e-05, + "loss": 15.7507, + "step": 3915 + }, + { + "epoch": 0.1632278771205869, + "grad_norm": 928.0, + "learning_rate": 9.541778550815876e-05, + "loss": 24.0002, + "step": 3916 + }, + { + "epoch": 0.16326955941811513, + "grad_norm": 464.0, + "learning_rate": 9.541496224820214e-05, + "loss": 16.6252, + "step": 3917 + }, + { + "epoch": 0.16331124171564337, + "grad_norm": 322.0, + "learning_rate": 9.541213816055381e-05, + "loss": 13.188, + "step": 3918 + }, + { + "epoch": 0.1633529240131716, + "grad_norm": 576.0, + "learning_rate": 9.540931324526521e-05, + "loss": 19.5008, + "step": 3919 + }, + { + "epoch": 0.16339460631069985, + "grad_norm": 600.0, + "learning_rate": 9.540648750238785e-05, + "loss": 21.5002, + "step": 3920 + }, + { + "epoch": 0.1634362886082281, + "grad_norm": 434.0, + "learning_rate": 9.540366093197323e-05, + "loss": 15.7516, + "step": 3921 + }, + { + "epoch": 0.16347797090575633, + "grad_norm": 356.0, + "learning_rate": 9.540083353407287e-05, + "loss": 15.0628, + "step": 3922 + }, + { + "epoch": 0.16351965320328457, + "grad_norm": 378.0, + "learning_rate": 9.539800530873828e-05, + "loss": 15.6877, + "step": 3923 + }, + { + "epoch": 0.1635613355008128, + "grad_norm": 388.0, + "learning_rate": 9.5395176256021e-05, + "loss": 14.3127, + "step": 3924 + }, + { + "epoch": 0.16360301779834105, + "grad_norm": 892.0, + "learning_rate": 9.53923463759726e-05, + "loss": 23.3759, + "step": 3925 + }, + { + "epoch": 0.1636447000958693, + "grad_norm": 1448.0, + "learning_rate": 9.538951566864468e-05, + "loss": 34.0003, + "step": 3926 + }, + { + "epoch": 0.16368638239339753, + "grad_norm": 888.0, + "learning_rate": 9.53866841340888e-05, + "loss": 23.6262, + "step": 3927 + }, + { + "epoch": 0.16372806469092577, + "grad_norm": 872.0, + "learning_rate": 9.538385177235658e-05, + "loss": 24.2515, + "step": 3928 + }, + { + "epoch": 0.163769746988454, + "grad_norm": 362.0, + "learning_rate": 9.538101858349962e-05, + "loss": 13.4386, + "step": 3929 + }, + { + "epoch": 0.16381142928598225, + "grad_norm": 322.0, + "learning_rate": 9.537818456756957e-05, + "loss": 14.0628, + "step": 3930 + }, + { + "epoch": 0.16385311158351049, + "grad_norm": 488.0, + "learning_rate": 9.537534972461808e-05, + "loss": 13.1288, + "step": 3931 + }, + { + "epoch": 0.16389479388103873, + "grad_norm": 147.0, + "learning_rate": 9.537251405469681e-05, + "loss": 11.5636, + "step": 3932 + }, + { + "epoch": 0.16393647617856696, + "grad_norm": 209.0, + "learning_rate": 9.536967755785744e-05, + "loss": 12.0008, + "step": 3933 + }, + { + "epoch": 0.1639781584760952, + "grad_norm": 143.0, + "learning_rate": 9.536684023415167e-05, + "loss": 10.8132, + "step": 3934 + }, + { + "epoch": 0.16401984077362344, + "grad_norm": 456.0, + "learning_rate": 9.536400208363122e-05, + "loss": 19.0004, + "step": 3935 + }, + { + "epoch": 0.16406152307115168, + "grad_norm": 80.5, + "learning_rate": 9.536116310634779e-05, + "loss": 7.6255, + "step": 3936 + }, + { + "epoch": 0.16410320536867992, + "grad_norm": 494.0, + "learning_rate": 9.535832330235314e-05, + "loss": 16.7512, + "step": 3937 + }, + { + "epoch": 0.16414488766620816, + "grad_norm": 416.0, + "learning_rate": 9.535548267169903e-05, + "loss": 14.8132, + "step": 3938 + }, + { + "epoch": 0.1641865699637364, + "grad_norm": 864.0, + "learning_rate": 9.535264121443722e-05, + "loss": 21.2553, + "step": 3939 + }, + { + "epoch": 0.16422825226126464, + "grad_norm": 476.0, + "learning_rate": 9.534979893061951e-05, + "loss": 16.5003, + "step": 3940 + }, + { + "epoch": 0.16426993455879288, + "grad_norm": 460.0, + "learning_rate": 9.534695582029767e-05, + "loss": 16.6254, + "step": 3941 + }, + { + "epoch": 0.16431161685632112, + "grad_norm": 452.0, + "learning_rate": 9.534411188352352e-05, + "loss": 15.6877, + "step": 3942 + }, + { + "epoch": 0.16435329915384936, + "grad_norm": 253.0, + "learning_rate": 9.534126712034895e-05, + "loss": 11.8752, + "step": 3943 + }, + { + "epoch": 0.1643949814513776, + "grad_norm": 308.0, + "learning_rate": 9.533842153082572e-05, + "loss": 13.9379, + "step": 3944 + }, + { + "epoch": 0.16443666374890584, + "grad_norm": 556.0, + "learning_rate": 9.533557511500574e-05, + "loss": 18.0002, + "step": 3945 + }, + { + "epoch": 0.16447834604643408, + "grad_norm": 322.0, + "learning_rate": 9.53327278729409e-05, + "loss": 12.6255, + "step": 3946 + }, + { + "epoch": 0.16452002834396232, + "grad_norm": 472.0, + "learning_rate": 9.532987980468305e-05, + "loss": 15.8761, + "step": 3947 + }, + { + "epoch": 0.16456171064149056, + "grad_norm": 474.0, + "learning_rate": 9.532703091028412e-05, + "loss": 16.376, + "step": 3948 + }, + { + "epoch": 0.1646033929390188, + "grad_norm": 70.5, + "learning_rate": 9.532418118979605e-05, + "loss": 8.6267, + "step": 3949 + }, + { + "epoch": 0.16464507523654703, + "grad_norm": 266.0, + "learning_rate": 9.532133064327073e-05, + "loss": 13.188, + "step": 3950 + }, + { + "epoch": 0.16468675753407527, + "grad_norm": 304.0, + "learning_rate": 9.531847927076015e-05, + "loss": 11.938, + "step": 3951 + }, + { + "epoch": 0.1647284398316035, + "grad_norm": 95.5, + "learning_rate": 9.531562707231625e-05, + "loss": 8.3131, + "step": 3952 + }, + { + "epoch": 0.16477012212913175, + "grad_norm": 129.0, + "learning_rate": 9.531277404799101e-05, + "loss": 8.1259, + "step": 3953 + }, + { + "epoch": 0.16481180442666, + "grad_norm": 326.0, + "learning_rate": 9.530992019783647e-05, + "loss": 13.7508, + "step": 3954 + }, + { + "epoch": 0.16485348672418823, + "grad_norm": 171.0, + "learning_rate": 9.530706552190461e-05, + "loss": 9.7502, + "step": 3955 + }, + { + "epoch": 0.16489516902171647, + "grad_norm": 200.0, + "learning_rate": 9.530421002024744e-05, + "loss": 10.9377, + "step": 3956 + }, + { + "epoch": 0.1649368513192447, + "grad_norm": 804.0, + "learning_rate": 9.530135369291702e-05, + "loss": 20.7503, + "step": 3957 + }, + { + "epoch": 0.16497853361677295, + "grad_norm": 544.0, + "learning_rate": 9.529849653996543e-05, + "loss": 19.1258, + "step": 3958 + }, + { + "epoch": 0.1650202159143012, + "grad_norm": 284.0, + "learning_rate": 9.52956385614447e-05, + "loss": 12.5006, + "step": 3959 + }, + { + "epoch": 0.16506189821182943, + "grad_norm": 476.0, + "learning_rate": 9.529277975740694e-05, + "loss": 15.9378, + "step": 3960 + }, + { + "epoch": 0.16510358050935767, + "grad_norm": 158.0, + "learning_rate": 9.528992012790425e-05, + "loss": 11.3753, + "step": 3961 + }, + { + "epoch": 0.1651452628068859, + "grad_norm": 916.0, + "learning_rate": 9.528705967298876e-05, + "loss": 26.1254, + "step": 3962 + }, + { + "epoch": 0.16518694510441415, + "grad_norm": 540.0, + "learning_rate": 9.528419839271257e-05, + "loss": 17.7509, + "step": 3963 + }, + { + "epoch": 0.16522862740194239, + "grad_norm": 272.0, + "learning_rate": 9.528133628712785e-05, + "loss": 12.6255, + "step": 3964 + }, + { + "epoch": 0.16527030969947062, + "grad_norm": 216.0, + "learning_rate": 9.527847335628675e-05, + "loss": 11.0003, + "step": 3965 + }, + { + "epoch": 0.16531199199699886, + "grad_norm": 368.0, + "learning_rate": 9.527560960024146e-05, + "loss": 15.6252, + "step": 3966 + }, + { + "epoch": 0.1653536742945271, + "grad_norm": 952.0, + "learning_rate": 9.527274501904416e-05, + "loss": 27.0007, + "step": 3967 + }, + { + "epoch": 0.16539535659205534, + "grad_norm": 1432.0, + "learning_rate": 9.526987961274707e-05, + "loss": 29.6299, + "step": 3968 + }, + { + "epoch": 0.16543703888958358, + "grad_norm": 448.0, + "learning_rate": 9.52670133814024e-05, + "loss": 16.7504, + "step": 3969 + }, + { + "epoch": 0.16547872118711182, + "grad_norm": 184.0, + "learning_rate": 9.526414632506239e-05, + "loss": 11.6259, + "step": 3970 + }, + { + "epoch": 0.16552040348464006, + "grad_norm": 1320.0, + "learning_rate": 9.52612784437793e-05, + "loss": 26.3807, + "step": 3971 + }, + { + "epoch": 0.1655620857821683, + "grad_norm": 214.0, + "learning_rate": 9.52584097376054e-05, + "loss": 9.4378, + "step": 3972 + }, + { + "epoch": 0.16560376807969654, + "grad_norm": 262.0, + "learning_rate": 9.525554020659295e-05, + "loss": 12.1253, + "step": 3973 + }, + { + "epoch": 0.16564545037722478, + "grad_norm": 298.0, + "learning_rate": 9.525266985079426e-05, + "loss": 12.8127, + "step": 3974 + }, + { + "epoch": 0.16568713267475305, + "grad_norm": 240.0, + "learning_rate": 9.524979867026168e-05, + "loss": 11.1254, + "step": 3975 + }, + { + "epoch": 0.16572881497228129, + "grad_norm": 274.0, + "learning_rate": 9.524692666504746e-05, + "loss": 13.063, + "step": 3976 + }, + { + "epoch": 0.16577049726980952, + "grad_norm": 80.5, + "learning_rate": 9.5244053835204e-05, + "loss": 8.0637, + "step": 3977 + }, + { + "epoch": 0.16581217956733776, + "grad_norm": 292.0, + "learning_rate": 9.524118018078366e-05, + "loss": 13.7503, + "step": 3978 + }, + { + "epoch": 0.165853861864866, + "grad_norm": 732.0, + "learning_rate": 9.523830570183876e-05, + "loss": 21.2501, + "step": 3979 + }, + { + "epoch": 0.16589554416239424, + "grad_norm": 161.0, + "learning_rate": 9.523543039842174e-05, + "loss": 10.3754, + "step": 3980 + }, + { + "epoch": 0.16593722645992248, + "grad_norm": 684.0, + "learning_rate": 9.5232554270585e-05, + "loss": 20.2503, + "step": 3981 + }, + { + "epoch": 0.16597890875745072, + "grad_norm": 1352.0, + "learning_rate": 9.522967731838093e-05, + "loss": 28.7553, + "step": 3982 + }, + { + "epoch": 0.16602059105497896, + "grad_norm": 175.0, + "learning_rate": 9.522679954186197e-05, + "loss": 10.938, + "step": 3983 + }, + { + "epoch": 0.1660622733525072, + "grad_norm": 328.0, + "learning_rate": 9.522392094108056e-05, + "loss": 10.7503, + "step": 3984 + }, + { + "epoch": 0.16610395565003544, + "grad_norm": 62.0, + "learning_rate": 9.522104151608922e-05, + "loss": 7.5632, + "step": 3985 + }, + { + "epoch": 0.16614563794756368, + "grad_norm": 380.0, + "learning_rate": 9.521816126694035e-05, + "loss": 14.8132, + "step": 3986 + }, + { + "epoch": 0.16618732024509192, + "grad_norm": 274.0, + "learning_rate": 9.521528019368648e-05, + "loss": 12.1879, + "step": 3987 + }, + { + "epoch": 0.16622900254262016, + "grad_norm": 160.0, + "learning_rate": 9.521239829638013e-05, + "loss": 10.5632, + "step": 3988 + }, + { + "epoch": 0.1662706848401484, + "grad_norm": 61.0, + "learning_rate": 9.52095155750738e-05, + "loss": 7.7815, + "step": 3989 + }, + { + "epoch": 0.16631236713767664, + "grad_norm": 354.0, + "learning_rate": 9.520663202982004e-05, + "loss": 14.6252, + "step": 3990 + }, + { + "epoch": 0.16635404943520488, + "grad_norm": 200.0, + "learning_rate": 9.520374766067137e-05, + "loss": 11.0027, + "step": 3991 + }, + { + "epoch": 0.16639573173273312, + "grad_norm": 268.0, + "learning_rate": 9.520086246768041e-05, + "loss": 13.1255, + "step": 3992 + }, + { + "epoch": 0.16643741403026135, + "grad_norm": 172.0, + "learning_rate": 9.519797645089971e-05, + "loss": 10.688, + "step": 3993 + }, + { + "epoch": 0.1664790963277896, + "grad_norm": 246.0, + "learning_rate": 9.51950896103819e-05, + "loss": 12.2503, + "step": 3994 + }, + { + "epoch": 0.16652077862531783, + "grad_norm": 536.0, + "learning_rate": 9.519220194617955e-05, + "loss": 17.8755, + "step": 3995 + }, + { + "epoch": 0.16656246092284607, + "grad_norm": 398.0, + "learning_rate": 9.51893134583453e-05, + "loss": 14.6258, + "step": 3996 + }, + { + "epoch": 0.1666041432203743, + "grad_norm": 512.0, + "learning_rate": 9.518642414693182e-05, + "loss": 17.5003, + "step": 3997 + }, + { + "epoch": 0.16664582551790255, + "grad_norm": 296.0, + "learning_rate": 9.518353401199173e-05, + "loss": 13.5631, + "step": 3998 + }, + { + "epoch": 0.1666875078154308, + "grad_norm": 192.0, + "learning_rate": 9.518064305357773e-05, + "loss": 9.1879, + "step": 3999 + }, + { + "epoch": 0.16672919011295903, + "grad_norm": 118.0, + "learning_rate": 9.517775127174252e-05, + "loss": 9.5011, + "step": 4000 + }, + { + "epoch": 0.16677087241048727, + "grad_norm": 114.0, + "learning_rate": 9.517485866653874e-05, + "loss": 7.8127, + "step": 4001 + }, + { + "epoch": 0.1668125547080155, + "grad_norm": 520.0, + "learning_rate": 9.517196523801919e-05, + "loss": 16.5003, + "step": 4002 + }, + { + "epoch": 0.16685423700554375, + "grad_norm": 360.0, + "learning_rate": 9.516907098623654e-05, + "loss": 15.0645, + "step": 4003 + }, + { + "epoch": 0.166895919303072, + "grad_norm": 620.0, + "learning_rate": 9.516617591124357e-05, + "loss": 22.3752, + "step": 4004 + }, + { + "epoch": 0.16693760160060023, + "grad_norm": 362.0, + "learning_rate": 9.516328001309303e-05, + "loss": 14.5003, + "step": 4005 + }, + { + "epoch": 0.16697928389812847, + "grad_norm": 120.5, + "learning_rate": 9.516038329183771e-05, + "loss": 11.3762, + "step": 4006 + }, + { + "epoch": 0.1670209661956567, + "grad_norm": 195.0, + "learning_rate": 9.515748574753038e-05, + "loss": 11.7513, + "step": 4007 + }, + { + "epoch": 0.16706264849318495, + "grad_norm": 564.0, + "learning_rate": 9.515458738022389e-05, + "loss": 19.3751, + "step": 4008 + }, + { + "epoch": 0.16710433079071318, + "grad_norm": 210.0, + "learning_rate": 9.515168818997102e-05, + "loss": 11.3127, + "step": 4009 + }, + { + "epoch": 0.16714601308824142, + "grad_norm": 410.0, + "learning_rate": 9.514878817682462e-05, + "loss": 14.4381, + "step": 4010 + }, + { + "epoch": 0.16718769538576966, + "grad_norm": 428.0, + "learning_rate": 9.514588734083756e-05, + "loss": 15.1251, + "step": 4011 + }, + { + "epoch": 0.1672293776832979, + "grad_norm": 1384.0, + "learning_rate": 9.514298568206268e-05, + "loss": 32.252, + "step": 4012 + }, + { + "epoch": 0.16727105998082614, + "grad_norm": 298.0, + "learning_rate": 9.514008320055289e-05, + "loss": 11.438, + "step": 4013 + }, + { + "epoch": 0.16731274227835438, + "grad_norm": 205.0, + "learning_rate": 9.513717989636107e-05, + "loss": 10.6878, + "step": 4014 + }, + { + "epoch": 0.16735442457588262, + "grad_norm": 238.0, + "learning_rate": 9.513427576954015e-05, + "loss": 11.1877, + "step": 4015 + }, + { + "epoch": 0.16739610687341086, + "grad_norm": 284.0, + "learning_rate": 9.513137082014305e-05, + "loss": 12.2504, + "step": 4016 + }, + { + "epoch": 0.1674377891709391, + "grad_norm": 324.0, + "learning_rate": 9.512846504822268e-05, + "loss": 11.1876, + "step": 4017 + }, + { + "epoch": 0.16747947146846734, + "grad_norm": 290.0, + "learning_rate": 9.512555845383207e-05, + "loss": 12.6265, + "step": 4018 + }, + { + "epoch": 0.16752115376599558, + "grad_norm": 201.0, + "learning_rate": 9.512265103702411e-05, + "loss": 11.1263, + "step": 4019 + }, + { + "epoch": 0.16756283606352382, + "grad_norm": 576.0, + "learning_rate": 9.511974279785185e-05, + "loss": 18.6254, + "step": 4020 + }, + { + "epoch": 0.16760451836105206, + "grad_norm": 600.0, + "learning_rate": 9.511683373636828e-05, + "loss": 18.1254, + "step": 4021 + }, + { + "epoch": 0.1676462006585803, + "grad_norm": 350.0, + "learning_rate": 9.511392385262641e-05, + "loss": 15.1257, + "step": 4022 + }, + { + "epoch": 0.16768788295610854, + "grad_norm": 242.0, + "learning_rate": 9.511101314667925e-05, + "loss": 10.6253, + "step": 4023 + }, + { + "epoch": 0.16772956525363678, + "grad_norm": 494.0, + "learning_rate": 9.51081016185799e-05, + "loss": 18.0007, + "step": 4024 + }, + { + "epoch": 0.16777124755116501, + "grad_norm": 61.75, + "learning_rate": 9.510518926838137e-05, + "loss": 7.719, + "step": 4025 + }, + { + "epoch": 0.16781292984869325, + "grad_norm": 336.0, + "learning_rate": 9.510227609613678e-05, + "loss": 13.439, + "step": 4026 + }, + { + "epoch": 0.1678546121462215, + "grad_norm": 92.0, + "learning_rate": 9.509936210189918e-05, + "loss": 7.1254, + "step": 4027 + }, + { + "epoch": 0.16789629444374973, + "grad_norm": 322.0, + "learning_rate": 9.509644728572172e-05, + "loss": 14.626, + "step": 4028 + }, + { + "epoch": 0.16793797674127797, + "grad_norm": 972.0, + "learning_rate": 9.50935316476575e-05, + "loss": 23.3804, + "step": 4029 + }, + { + "epoch": 0.1679796590388062, + "grad_norm": 272.0, + "learning_rate": 9.509061518775967e-05, + "loss": 11.5628, + "step": 4030 + }, + { + "epoch": 0.16802134133633445, + "grad_norm": 242.0, + "learning_rate": 9.508769790608136e-05, + "loss": 13.0003, + "step": 4031 + }, + { + "epoch": 0.1680630236338627, + "grad_norm": 488.0, + "learning_rate": 9.508477980267577e-05, + "loss": 15.0636, + "step": 4032 + }, + { + "epoch": 0.16810470593139093, + "grad_norm": 304.0, + "learning_rate": 9.508186087759606e-05, + "loss": 14.0634, + "step": 4033 + }, + { + "epoch": 0.16814638822891917, + "grad_norm": 498.0, + "learning_rate": 9.507894113089544e-05, + "loss": 17.6254, + "step": 4034 + }, + { + "epoch": 0.1681880705264474, + "grad_norm": 326.0, + "learning_rate": 9.50760205626271e-05, + "loss": 10.315, + "step": 4035 + }, + { + "epoch": 0.16822975282397565, + "grad_norm": 161.0, + "learning_rate": 9.50730991728443e-05, + "loss": 11.2505, + "step": 4036 + }, + { + "epoch": 0.1682714351215039, + "grad_norm": 217.0, + "learning_rate": 9.507017696160025e-05, + "loss": 11.0015, + "step": 4037 + }, + { + "epoch": 0.16831311741903213, + "grad_norm": 136.0, + "learning_rate": 9.506725392894824e-05, + "loss": 8.1256, + "step": 4038 + }, + { + "epoch": 0.16835479971656037, + "grad_norm": 241.0, + "learning_rate": 9.506433007494151e-05, + "loss": 12.1878, + "step": 4039 + }, + { + "epoch": 0.1683964820140886, + "grad_norm": 600.0, + "learning_rate": 9.506140539963337e-05, + "loss": 16.8793, + "step": 4040 + }, + { + "epoch": 0.16843816431161684, + "grad_norm": 282.0, + "learning_rate": 9.505847990307713e-05, + "loss": 13.1881, + "step": 4041 + }, + { + "epoch": 0.16847984660914508, + "grad_norm": 506.0, + "learning_rate": 9.505555358532608e-05, + "loss": 17.0004, + "step": 4042 + }, + { + "epoch": 0.16852152890667332, + "grad_norm": 536.0, + "learning_rate": 9.505262644643357e-05, + "loss": 17.2502, + "step": 4043 + }, + { + "epoch": 0.16856321120420156, + "grad_norm": 382.0, + "learning_rate": 9.504969848645293e-05, + "loss": 15.6253, + "step": 4044 + }, + { + "epoch": 0.1686048935017298, + "grad_norm": 249.0, + "learning_rate": 9.504676970543755e-05, + "loss": 12.5014, + "step": 4045 + }, + { + "epoch": 0.16864657579925804, + "grad_norm": 212.0, + "learning_rate": 9.504384010344081e-05, + "loss": 11.0002, + "step": 4046 + }, + { + "epoch": 0.16868825809678628, + "grad_norm": 1016.0, + "learning_rate": 9.504090968051605e-05, + "loss": 21.6295, + "step": 4047 + }, + { + "epoch": 0.16872994039431455, + "grad_norm": 230.0, + "learning_rate": 9.503797843671673e-05, + "loss": 11.6254, + "step": 4048 + }, + { + "epoch": 0.1687716226918428, + "grad_norm": 792.0, + "learning_rate": 9.503504637209625e-05, + "loss": 18.8777, + "step": 4049 + }, + { + "epoch": 0.16881330498937103, + "grad_norm": 644.0, + "learning_rate": 9.503211348670806e-05, + "loss": 18.8757, + "step": 4050 + }, + { + "epoch": 0.16885498728689927, + "grad_norm": 366.0, + "learning_rate": 9.502917978060562e-05, + "loss": 14.2502, + "step": 4051 + }, + { + "epoch": 0.1688966695844275, + "grad_norm": 236.0, + "learning_rate": 9.502624525384235e-05, + "loss": 11.1891, + "step": 4052 + }, + { + "epoch": 0.16893835188195575, + "grad_norm": 1888.0, + "learning_rate": 9.502330990647177e-05, + "loss": 38.5032, + "step": 4053 + }, + { + "epoch": 0.16898003417948398, + "grad_norm": 362.0, + "learning_rate": 9.502037373854737e-05, + "loss": 15.6877, + "step": 4054 + }, + { + "epoch": 0.16902171647701222, + "grad_norm": 396.0, + "learning_rate": 9.501743675012268e-05, + "loss": 14.5008, + "step": 4055 + }, + { + "epoch": 0.16906339877454046, + "grad_norm": 1104.0, + "learning_rate": 9.50144989412512e-05, + "loss": 23.5057, + "step": 4056 + }, + { + "epoch": 0.1691050810720687, + "grad_norm": 362.0, + "learning_rate": 9.501156031198647e-05, + "loss": 15.5009, + "step": 4057 + }, + { + "epoch": 0.16914676336959694, + "grad_norm": 217.0, + "learning_rate": 9.500862086238206e-05, + "loss": 11.8755, + "step": 4058 + }, + { + "epoch": 0.16918844566712518, + "grad_norm": 262.0, + "learning_rate": 9.500568059249155e-05, + "loss": 12.439, + "step": 4059 + }, + { + "epoch": 0.16923012796465342, + "grad_norm": 484.0, + "learning_rate": 9.50027395023685e-05, + "loss": 15.2504, + "step": 4060 + }, + { + "epoch": 0.16927181026218166, + "grad_norm": 150.0, + "learning_rate": 9.499979759206655e-05, + "loss": 10.188, + "step": 4061 + }, + { + "epoch": 0.1693134925597099, + "grad_norm": 564.0, + "learning_rate": 9.499685486163928e-05, + "loss": 19.1252, + "step": 4062 + }, + { + "epoch": 0.16935517485723814, + "grad_norm": 136.0, + "learning_rate": 9.499391131114032e-05, + "loss": 11.0003, + "step": 4063 + }, + { + "epoch": 0.16939685715476638, + "grad_norm": 442.0, + "learning_rate": 9.499096694062337e-05, + "loss": 14.7508, + "step": 4064 + }, + { + "epoch": 0.16943853945229462, + "grad_norm": 73.0, + "learning_rate": 9.498802175014203e-05, + "loss": 7.4377, + "step": 4065 + }, + { + "epoch": 0.16948022174982286, + "grad_norm": 243.0, + "learning_rate": 9.498507573975e-05, + "loss": 10.1878, + "step": 4066 + }, + { + "epoch": 0.1695219040473511, + "grad_norm": 219.0, + "learning_rate": 9.498212890950097e-05, + "loss": 10.7508, + "step": 4067 + }, + { + "epoch": 0.16956358634487934, + "grad_norm": 161.0, + "learning_rate": 9.497918125944864e-05, + "loss": 10.0627, + "step": 4068 + }, + { + "epoch": 0.16960526864240758, + "grad_norm": 524.0, + "learning_rate": 9.497623278964675e-05, + "loss": 19.2504, + "step": 4069 + }, + { + "epoch": 0.16964695093993581, + "grad_norm": 197.0, + "learning_rate": 9.497328350014904e-05, + "loss": 10.8752, + "step": 4070 + }, + { + "epoch": 0.16968863323746405, + "grad_norm": 306.0, + "learning_rate": 9.497033339100922e-05, + "loss": 13.5635, + "step": 4071 + }, + { + "epoch": 0.1697303155349923, + "grad_norm": 294.0, + "learning_rate": 9.49673824622811e-05, + "loss": 12.1878, + "step": 4072 + }, + { + "epoch": 0.16977199783252053, + "grad_norm": 179.0, + "learning_rate": 9.496443071401844e-05, + "loss": 10.8759, + "step": 4073 + }, + { + "epoch": 0.16981368013004877, + "grad_norm": 380.0, + "learning_rate": 9.496147814627503e-05, + "loss": 15.4381, + "step": 4074 + }, + { + "epoch": 0.169855362427577, + "grad_norm": 382.0, + "learning_rate": 9.49585247591047e-05, + "loss": 13.6879, + "step": 4075 + }, + { + "epoch": 0.16989704472510525, + "grad_norm": 268.0, + "learning_rate": 9.495557055256125e-05, + "loss": 13.5628, + "step": 4076 + }, + { + "epoch": 0.1699387270226335, + "grad_norm": 620.0, + "learning_rate": 9.495261552669853e-05, + "loss": 19.5002, + "step": 4077 + }, + { + "epoch": 0.16998040932016173, + "grad_norm": 161.0, + "learning_rate": 9.494965968157044e-05, + "loss": 9.6882, + "step": 4078 + }, + { + "epoch": 0.17002209161768997, + "grad_norm": 222.0, + "learning_rate": 9.494670301723077e-05, + "loss": 11.6253, + "step": 4079 + }, + { + "epoch": 0.1700637739152182, + "grad_norm": 119.5, + "learning_rate": 9.494374553373348e-05, + "loss": 9.3127, + "step": 4080 + }, + { + "epoch": 0.17010545621274645, + "grad_norm": 446.0, + "learning_rate": 9.494078723113242e-05, + "loss": 17.2508, + "step": 4081 + }, + { + "epoch": 0.1701471385102747, + "grad_norm": 268.0, + "learning_rate": 9.493782810948152e-05, + "loss": 11.9379, + "step": 4082 + }, + { + "epoch": 0.17018882080780293, + "grad_norm": 388.0, + "learning_rate": 9.493486816883472e-05, + "loss": 14.064, + "step": 4083 + }, + { + "epoch": 0.17023050310533117, + "grad_norm": 348.0, + "learning_rate": 9.493190740924596e-05, + "loss": 14.8752, + "step": 4084 + }, + { + "epoch": 0.1702721854028594, + "grad_norm": 324.0, + "learning_rate": 9.492894583076918e-05, + "loss": 13.1261, + "step": 4085 + }, + { + "epoch": 0.17031386770038764, + "grad_norm": 139.0, + "learning_rate": 9.49259834334584e-05, + "loss": 9.1881, + "step": 4086 + }, + { + "epoch": 0.17035554999791588, + "grad_norm": 140.0, + "learning_rate": 9.492302021736759e-05, + "loss": 9.813, + "step": 4087 + }, + { + "epoch": 0.17039723229544412, + "grad_norm": 245.0, + "learning_rate": 9.492005618255072e-05, + "loss": 10.0629, + "step": 4088 + }, + { + "epoch": 0.17043891459297236, + "grad_norm": 330.0, + "learning_rate": 9.491709132906185e-05, + "loss": 12.3135, + "step": 4089 + }, + { + "epoch": 0.1704805968905006, + "grad_norm": 470.0, + "learning_rate": 9.491412565695501e-05, + "loss": 17.6268, + "step": 4090 + }, + { + "epoch": 0.17052227918802884, + "grad_norm": 354.0, + "learning_rate": 9.491115916628424e-05, + "loss": 13.8754, + "step": 4091 + }, + { + "epoch": 0.17056396148555708, + "grad_norm": 198.0, + "learning_rate": 9.49081918571036e-05, + "loss": 11.7504, + "step": 4092 + }, + { + "epoch": 0.17060564378308532, + "grad_norm": 336.0, + "learning_rate": 9.490522372946718e-05, + "loss": 13.5628, + "step": 4093 + }, + { + "epoch": 0.17064732608061356, + "grad_norm": 298.0, + "learning_rate": 9.490225478342906e-05, + "loss": 11.6876, + "step": 4094 + }, + { + "epoch": 0.1706890083781418, + "grad_norm": 210.0, + "learning_rate": 9.489928501904339e-05, + "loss": 11.1255, + "step": 4095 + }, + { + "epoch": 0.17073069067567004, + "grad_norm": 394.0, + "learning_rate": 9.489631443636424e-05, + "loss": 12.2508, + "step": 4096 + }, + { + "epoch": 0.17077237297319828, + "grad_norm": 564.0, + "learning_rate": 9.48933430354458e-05, + "loss": 17.6252, + "step": 4097 + }, + { + "epoch": 0.17081405527072652, + "grad_norm": 328.0, + "learning_rate": 9.489037081634217e-05, + "loss": 13.7503, + "step": 4098 + }, + { + "epoch": 0.17085573756825476, + "grad_norm": 612.0, + "learning_rate": 9.488739777910756e-05, + "loss": 20.8756, + "step": 4099 + }, + { + "epoch": 0.170897419865783, + "grad_norm": 446.0, + "learning_rate": 9.488442392379613e-05, + "loss": 15.9395, + "step": 4100 + }, + { + "epoch": 0.17093910216331124, + "grad_norm": 66.5, + "learning_rate": 9.48814492504621e-05, + "loss": 5.7502, + "step": 4101 + }, + { + "epoch": 0.17098078446083947, + "grad_norm": 320.0, + "learning_rate": 9.487847375915966e-05, + "loss": 13.8128, + "step": 4102 + }, + { + "epoch": 0.1710224667583677, + "grad_norm": 190.0, + "learning_rate": 9.487549744994306e-05, + "loss": 10.6879, + "step": 4103 + }, + { + "epoch": 0.17106414905589595, + "grad_norm": 406.0, + "learning_rate": 9.487252032286655e-05, + "loss": 15.7503, + "step": 4104 + }, + { + "epoch": 0.1711058313534242, + "grad_norm": 314.0, + "learning_rate": 9.486954237798435e-05, + "loss": 13.1256, + "step": 4105 + }, + { + "epoch": 0.17114751365095243, + "grad_norm": 624.0, + "learning_rate": 9.486656361535077e-05, + "loss": 18.8753, + "step": 4106 + }, + { + "epoch": 0.17118919594848067, + "grad_norm": 372.0, + "learning_rate": 9.486358403502008e-05, + "loss": 14.563, + "step": 4107 + }, + { + "epoch": 0.1712308782460089, + "grad_norm": 644.0, + "learning_rate": 9.486060363704658e-05, + "loss": 18.7554, + "step": 4108 + }, + { + "epoch": 0.17127256054353715, + "grad_norm": 616.0, + "learning_rate": 9.485762242148461e-05, + "loss": 18.7511, + "step": 4109 + }, + { + "epoch": 0.1713142428410654, + "grad_norm": 284.0, + "learning_rate": 9.48546403883885e-05, + "loss": 12.6877, + "step": 4110 + }, + { + "epoch": 0.17135592513859363, + "grad_norm": 110.5, + "learning_rate": 9.485165753781257e-05, + "loss": 6.9071, + "step": 4111 + }, + { + "epoch": 0.17139760743612187, + "grad_norm": 628.0, + "learning_rate": 9.48486738698112e-05, + "loss": 19.1253, + "step": 4112 + }, + { + "epoch": 0.1714392897336501, + "grad_norm": 107.0, + "learning_rate": 9.484568938443878e-05, + "loss": 6.2825, + "step": 4113 + }, + { + "epoch": 0.17148097203117835, + "grad_norm": 612.0, + "learning_rate": 9.484270408174966e-05, + "loss": 19.6274, + "step": 4114 + }, + { + "epoch": 0.1715226543287066, + "grad_norm": 430.0, + "learning_rate": 9.483971796179831e-05, + "loss": 17.1258, + "step": 4115 + }, + { + "epoch": 0.17156433662623483, + "grad_norm": 249.0, + "learning_rate": 9.483673102463911e-05, + "loss": 11.2503, + "step": 4116 + }, + { + "epoch": 0.17160601892376307, + "grad_norm": 328.0, + "learning_rate": 9.48337432703265e-05, + "loss": 12.6877, + "step": 4117 + }, + { + "epoch": 0.1716477012212913, + "grad_norm": 234.0, + "learning_rate": 9.483075469891495e-05, + "loss": 11.6257, + "step": 4118 + }, + { + "epoch": 0.17168938351881954, + "grad_norm": 260.0, + "learning_rate": 9.48277653104589e-05, + "loss": 13.1878, + "step": 4119 + }, + { + "epoch": 0.17173106581634778, + "grad_norm": 1080.0, + "learning_rate": 9.482477510501286e-05, + "loss": 29.0003, + "step": 4120 + }, + { + "epoch": 0.17177274811387605, + "grad_norm": 57.75, + "learning_rate": 9.482178408263132e-05, + "loss": 8.0009, + "step": 4121 + }, + { + "epoch": 0.1718144304114043, + "grad_norm": 516.0, + "learning_rate": 9.481879224336877e-05, + "loss": 16.2503, + "step": 4122 + }, + { + "epoch": 0.17185611270893253, + "grad_norm": 1232.0, + "learning_rate": 9.481579958727975e-05, + "loss": 31.0011, + "step": 4123 + }, + { + "epoch": 0.17189779500646077, + "grad_norm": 133.0, + "learning_rate": 9.481280611441883e-05, + "loss": 9.5632, + "step": 4124 + }, + { + "epoch": 0.171939477303989, + "grad_norm": 342.0, + "learning_rate": 9.480981182484053e-05, + "loss": 14.4381, + "step": 4125 + }, + { + "epoch": 0.17198115960151725, + "grad_norm": 430.0, + "learning_rate": 9.480681671859941e-05, + "loss": 15.9377, + "step": 4126 + }, + { + "epoch": 0.1720228418990455, + "grad_norm": 258.0, + "learning_rate": 9.48038207957501e-05, + "loss": 12.0005, + "step": 4127 + }, + { + "epoch": 0.17206452419657373, + "grad_norm": 292.0, + "learning_rate": 9.480082405634717e-05, + "loss": 13.2504, + "step": 4128 + }, + { + "epoch": 0.17210620649410197, + "grad_norm": 95.5, + "learning_rate": 9.479782650044524e-05, + "loss": 9.6879, + "step": 4129 + }, + { + "epoch": 0.1721478887916302, + "grad_norm": 179.0, + "learning_rate": 9.479482812809897e-05, + "loss": 9.1878, + "step": 4130 + }, + { + "epoch": 0.17218957108915844, + "grad_norm": 422.0, + "learning_rate": 9.479182893936296e-05, + "loss": 16.3754, + "step": 4131 + }, + { + "epoch": 0.17223125338668668, + "grad_norm": 364.0, + "learning_rate": 9.478882893429188e-05, + "loss": 15.1879, + "step": 4132 + }, + { + "epoch": 0.17227293568421492, + "grad_norm": 474.0, + "learning_rate": 9.478582811294044e-05, + "loss": 16.2506, + "step": 4133 + }, + { + "epoch": 0.17231461798174316, + "grad_norm": 952.0, + "learning_rate": 9.47828264753633e-05, + "loss": 25.2506, + "step": 4134 + }, + { + "epoch": 0.1723563002792714, + "grad_norm": 976.0, + "learning_rate": 9.477982402161517e-05, + "loss": 23.2545, + "step": 4135 + }, + { + "epoch": 0.17239798257679964, + "grad_norm": 426.0, + "learning_rate": 9.477682075175076e-05, + "loss": 16.1267, + "step": 4136 + }, + { + "epoch": 0.17243966487432788, + "grad_norm": 628.0, + "learning_rate": 9.477381666582483e-05, + "loss": 19.5003, + "step": 4137 + }, + { + "epoch": 0.17248134717185612, + "grad_norm": 414.0, + "learning_rate": 9.477081176389212e-05, + "loss": 15.3128, + "step": 4138 + }, + { + "epoch": 0.17252302946938436, + "grad_norm": 454.0, + "learning_rate": 9.476780604600739e-05, + "loss": 16.8757, + "step": 4139 + }, + { + "epoch": 0.1725647117669126, + "grad_norm": 438.0, + "learning_rate": 9.476479951222542e-05, + "loss": 16.5002, + "step": 4140 + }, + { + "epoch": 0.17260639406444084, + "grad_norm": 312.0, + "learning_rate": 9.476179216260099e-05, + "loss": 12.6253, + "step": 4141 + }, + { + "epoch": 0.17264807636196908, + "grad_norm": 262.0, + "learning_rate": 9.475878399718894e-05, + "loss": 13.6253, + "step": 4142 + }, + { + "epoch": 0.17268975865949732, + "grad_norm": 446.0, + "learning_rate": 9.475577501604408e-05, + "loss": 16.876, + "step": 4143 + }, + { + "epoch": 0.17273144095702556, + "grad_norm": 125.0, + "learning_rate": 9.475276521922124e-05, + "loss": 8.2508, + "step": 4144 + }, + { + "epoch": 0.1727731232545538, + "grad_norm": 129.0, + "learning_rate": 9.474975460677528e-05, + "loss": 9.4378, + "step": 4145 + }, + { + "epoch": 0.17281480555208203, + "grad_norm": 756.0, + "learning_rate": 9.474674317876108e-05, + "loss": 21.6256, + "step": 4146 + }, + { + "epoch": 0.17285648784961027, + "grad_norm": 312.0, + "learning_rate": 9.474373093523349e-05, + "loss": 11.5008, + "step": 4147 + }, + { + "epoch": 0.1728981701471385, + "grad_norm": 508.0, + "learning_rate": 9.474071787624745e-05, + "loss": 17.1253, + "step": 4148 + }, + { + "epoch": 0.17293985244466675, + "grad_norm": 312.0, + "learning_rate": 9.473770400185787e-05, + "loss": 14.1252, + "step": 4149 + }, + { + "epoch": 0.172981534742195, + "grad_norm": 119.0, + "learning_rate": 9.473468931211964e-05, + "loss": 9.0017, + "step": 4150 + }, + { + "epoch": 0.17302321703972323, + "grad_norm": 190.0, + "learning_rate": 9.473167380708773e-05, + "loss": 10.1877, + "step": 4151 + }, + { + "epoch": 0.17306489933725147, + "grad_norm": 568.0, + "learning_rate": 9.47286574868171e-05, + "loss": 17.5017, + "step": 4152 + }, + { + "epoch": 0.1731065816347797, + "grad_norm": 1304.0, + "learning_rate": 9.472564035136271e-05, + "loss": 26.7543, + "step": 4153 + }, + { + "epoch": 0.17314826393230795, + "grad_norm": 316.0, + "learning_rate": 9.472262240077956e-05, + "loss": 14.189, + "step": 4154 + }, + { + "epoch": 0.1731899462298362, + "grad_norm": 688.0, + "learning_rate": 9.471960363512264e-05, + "loss": 20.6252, + "step": 4155 + }, + { + "epoch": 0.17323162852736443, + "grad_norm": 394.0, + "learning_rate": 9.471658405444697e-05, + "loss": 14.504, + "step": 4156 + }, + { + "epoch": 0.17327331082489267, + "grad_norm": 300.0, + "learning_rate": 9.471356365880759e-05, + "loss": 13.4377, + "step": 4157 + }, + { + "epoch": 0.1733149931224209, + "grad_norm": 71.0, + "learning_rate": 9.471054244825955e-05, + "loss": 7.5005, + "step": 4158 + }, + { + "epoch": 0.17335667541994915, + "grad_norm": 892.0, + "learning_rate": 9.47075204228579e-05, + "loss": 20.3754, + "step": 4159 + }, + { + "epoch": 0.17339835771747739, + "grad_norm": 69.5, + "learning_rate": 9.470449758265771e-05, + "loss": 8.6253, + "step": 4160 + }, + { + "epoch": 0.17344004001500563, + "grad_norm": 584.0, + "learning_rate": 9.470147392771412e-05, + "loss": 18.8759, + "step": 4161 + }, + { + "epoch": 0.17348172231253386, + "grad_norm": 724.0, + "learning_rate": 9.469844945808216e-05, + "loss": 21.0019, + "step": 4162 + }, + { + "epoch": 0.1735234046100621, + "grad_norm": 255.0, + "learning_rate": 9.4695424173817e-05, + "loss": 11.5628, + "step": 4163 + }, + { + "epoch": 0.17356508690759034, + "grad_norm": 94.0, + "learning_rate": 9.469239807497379e-05, + "loss": 6.9079, + "step": 4164 + }, + { + "epoch": 0.17360676920511858, + "grad_norm": 239.0, + "learning_rate": 9.468937116160763e-05, + "loss": 10.8129, + "step": 4165 + }, + { + "epoch": 0.17364845150264682, + "grad_norm": 884.0, + "learning_rate": 9.468634343377371e-05, + "loss": 21.2548, + "step": 4166 + }, + { + "epoch": 0.17369013380017506, + "grad_norm": 560.0, + "learning_rate": 9.468331489152724e-05, + "loss": 18.1253, + "step": 4167 + }, + { + "epoch": 0.1737318160977033, + "grad_norm": 524.0, + "learning_rate": 9.468028553492338e-05, + "loss": 19.0005, + "step": 4168 + }, + { + "epoch": 0.17377349839523154, + "grad_norm": 166.0, + "learning_rate": 9.467725536401734e-05, + "loss": 11.3128, + "step": 4169 + }, + { + "epoch": 0.17381518069275978, + "grad_norm": 318.0, + "learning_rate": 9.467422437886436e-05, + "loss": 12.5019, + "step": 4170 + }, + { + "epoch": 0.17385686299028802, + "grad_norm": 344.0, + "learning_rate": 9.467119257951969e-05, + "loss": 13.8752, + "step": 4171 + }, + { + "epoch": 0.17389854528781626, + "grad_norm": 928.0, + "learning_rate": 9.466815996603855e-05, + "loss": 24.2507, + "step": 4172 + }, + { + "epoch": 0.1739402275853445, + "grad_norm": 225.0, + "learning_rate": 9.466512653847623e-05, + "loss": 8.563, + "step": 4173 + }, + { + "epoch": 0.17398190988287274, + "grad_norm": 796.0, + "learning_rate": 9.466209229688801e-05, + "loss": 22.7512, + "step": 4174 + }, + { + "epoch": 0.17402359218040098, + "grad_norm": 284.0, + "learning_rate": 9.46590572413292e-05, + "loss": 13.2505, + "step": 4175 + }, + { + "epoch": 0.17406527447792922, + "grad_norm": 552.0, + "learning_rate": 9.465602137185511e-05, + "loss": 17.6258, + "step": 4176 + }, + { + "epoch": 0.17410695677545746, + "grad_norm": 490.0, + "learning_rate": 9.465298468852107e-05, + "loss": 16.8753, + "step": 4177 + }, + { + "epoch": 0.1741486390729857, + "grad_norm": 214.0, + "learning_rate": 9.464994719138241e-05, + "loss": 10.6253, + "step": 4178 + }, + { + "epoch": 0.17419032137051393, + "grad_norm": 145.0, + "learning_rate": 9.464690888049451e-05, + "loss": 9.7507, + "step": 4179 + }, + { + "epoch": 0.17423200366804217, + "grad_norm": 330.0, + "learning_rate": 9.464386975591273e-05, + "loss": 13.8753, + "step": 4180 + }, + { + "epoch": 0.1742736859655704, + "grad_norm": 350.0, + "learning_rate": 9.464082981769245e-05, + "loss": 13.5005, + "step": 4181 + }, + { + "epoch": 0.17431536826309865, + "grad_norm": 140.0, + "learning_rate": 9.46377890658891e-05, + "loss": 9.688, + "step": 4182 + }, + { + "epoch": 0.1743570505606269, + "grad_norm": 354.0, + "learning_rate": 9.463474750055808e-05, + "loss": 13.6885, + "step": 4183 + }, + { + "epoch": 0.17439873285815513, + "grad_norm": 1864.0, + "learning_rate": 9.46317051217548e-05, + "loss": 39.5006, + "step": 4184 + }, + { + "epoch": 0.17444041515568337, + "grad_norm": 1256.0, + "learning_rate": 9.462866192953475e-05, + "loss": 26.8756, + "step": 4185 + }, + { + "epoch": 0.1744820974532116, + "grad_norm": 276.0, + "learning_rate": 9.462561792395338e-05, + "loss": 12.7504, + "step": 4186 + }, + { + "epoch": 0.17452377975073985, + "grad_norm": 219.0, + "learning_rate": 9.462257310506615e-05, + "loss": 12.1878, + "step": 4187 + }, + { + "epoch": 0.1745654620482681, + "grad_norm": 332.0, + "learning_rate": 9.461952747292857e-05, + "loss": 14.5003, + "step": 4188 + }, + { + "epoch": 0.17460714434579633, + "grad_norm": 205.0, + "learning_rate": 9.461648102759614e-05, + "loss": 10.1253, + "step": 4189 + }, + { + "epoch": 0.17464882664332457, + "grad_norm": 452.0, + "learning_rate": 9.461343376912438e-05, + "loss": 15.0637, + "step": 4190 + }, + { + "epoch": 0.1746905089408528, + "grad_norm": 496.0, + "learning_rate": 9.461038569756883e-05, + "loss": 16.6252, + "step": 4191 + }, + { + "epoch": 0.17473219123838105, + "grad_norm": 188.0, + "learning_rate": 9.460733681298504e-05, + "loss": 10.0627, + "step": 4192 + }, + { + "epoch": 0.1747738735359093, + "grad_norm": 231.0, + "learning_rate": 9.460428711542859e-05, + "loss": 10.8126, + "step": 4193 + }, + { + "epoch": 0.17481555583343755, + "grad_norm": 79.5, + "learning_rate": 9.460123660495504e-05, + "loss": 8.9384, + "step": 4194 + }, + { + "epoch": 0.1748572381309658, + "grad_norm": 520.0, + "learning_rate": 9.459818528161998e-05, + "loss": 17.1253, + "step": 4195 + }, + { + "epoch": 0.17489892042849403, + "grad_norm": 135.0, + "learning_rate": 9.459513314547904e-05, + "loss": 10.4378, + "step": 4196 + }, + { + "epoch": 0.17494060272602227, + "grad_norm": 254.0, + "learning_rate": 9.459208019658785e-05, + "loss": 11.8753, + "step": 4197 + }, + { + "epoch": 0.1749822850235505, + "grad_norm": 548.0, + "learning_rate": 9.458902643500203e-05, + "loss": 15.8763, + "step": 4198 + }, + { + "epoch": 0.17502396732107875, + "grad_norm": 424.0, + "learning_rate": 9.458597186077724e-05, + "loss": 16.7507, + "step": 4199 + }, + { + "epoch": 0.175065649618607, + "grad_norm": 560.0, + "learning_rate": 9.458291647396918e-05, + "loss": 18.1252, + "step": 4200 + }, + { + "epoch": 0.17510733191613523, + "grad_norm": 192.0, + "learning_rate": 9.457986027463348e-05, + "loss": 10.1879, + "step": 4201 + }, + { + "epoch": 0.17514901421366347, + "grad_norm": 652.0, + "learning_rate": 9.457680326282588e-05, + "loss": 17.5016, + "step": 4202 + }, + { + "epoch": 0.1751906965111917, + "grad_norm": 448.0, + "learning_rate": 9.457374543860208e-05, + "loss": 14.3128, + "step": 4203 + }, + { + "epoch": 0.17523237880871995, + "grad_norm": 115.0, + "learning_rate": 9.457068680201783e-05, + "loss": 9.2504, + "step": 4204 + }, + { + "epoch": 0.17527406110624819, + "grad_norm": 350.0, + "learning_rate": 9.456762735312884e-05, + "loss": 14.8751, + "step": 4205 + }, + { + "epoch": 0.17531574340377643, + "grad_norm": 506.0, + "learning_rate": 9.456456709199089e-05, + "loss": 7.5972, + "step": 4206 + }, + { + "epoch": 0.17535742570130466, + "grad_norm": 1720.0, + "learning_rate": 9.456150601865975e-05, + "loss": 37.7555, + "step": 4207 + }, + { + "epoch": 0.1753991079988329, + "grad_norm": 46.75, + "learning_rate": 9.45584441331912e-05, + "loss": 6.6878, + "step": 4208 + }, + { + "epoch": 0.17544079029636114, + "grad_norm": 1056.0, + "learning_rate": 9.455538143564105e-05, + "loss": 24.2547, + "step": 4209 + }, + { + "epoch": 0.17548247259388938, + "grad_norm": 446.0, + "learning_rate": 9.455231792606514e-05, + "loss": 14.6268, + "step": 4210 + }, + { + "epoch": 0.17552415489141762, + "grad_norm": 366.0, + "learning_rate": 9.454925360451925e-05, + "loss": 14.5004, + "step": 4211 + }, + { + "epoch": 0.17556583718894586, + "grad_norm": 848.0, + "learning_rate": 9.454618847105927e-05, + "loss": 24.6251, + "step": 4212 + }, + { + "epoch": 0.1756075194864741, + "grad_norm": 316.0, + "learning_rate": 9.454312252574105e-05, + "loss": 13.8127, + "step": 4213 + }, + { + "epoch": 0.17564920178400234, + "grad_norm": 226.0, + "learning_rate": 9.454005576862049e-05, + "loss": 11.3752, + "step": 4214 + }, + { + "epoch": 0.17569088408153058, + "grad_norm": 234.0, + "learning_rate": 9.453698819975344e-05, + "loss": 10.938, + "step": 4215 + }, + { + "epoch": 0.17573256637905882, + "grad_norm": 436.0, + "learning_rate": 9.453391981919581e-05, + "loss": 17.8753, + "step": 4216 + }, + { + "epoch": 0.17577424867658706, + "grad_norm": 548.0, + "learning_rate": 9.453085062700356e-05, + "loss": 16.8774, + "step": 4217 + }, + { + "epoch": 0.1758159309741153, + "grad_norm": 388.0, + "learning_rate": 9.45277806232326e-05, + "loss": 15.3128, + "step": 4218 + }, + { + "epoch": 0.17585761327164354, + "grad_norm": 764.0, + "learning_rate": 9.452470980793888e-05, + "loss": 22.0004, + "step": 4219 + }, + { + "epoch": 0.17589929556917178, + "grad_norm": 544.0, + "learning_rate": 9.452163818117838e-05, + "loss": 16.7505, + "step": 4220 + }, + { + "epoch": 0.17594097786670002, + "grad_norm": 342.0, + "learning_rate": 9.451856574300705e-05, + "loss": 14.3763, + "step": 4221 + }, + { + "epoch": 0.17598266016422826, + "grad_norm": 256.0, + "learning_rate": 9.451549249348093e-05, + "loss": 12.5627, + "step": 4222 + }, + { + "epoch": 0.1760243424617565, + "grad_norm": 206.0, + "learning_rate": 9.451241843265602e-05, + "loss": 11.1878, + "step": 4223 + }, + { + "epoch": 0.17606602475928473, + "grad_norm": 1440.0, + "learning_rate": 9.450934356058829e-05, + "loss": 34.7502, + "step": 4224 + }, + { + "epoch": 0.17610770705681297, + "grad_norm": 164.0, + "learning_rate": 9.450626787733386e-05, + "loss": 9.7502, + "step": 4225 + }, + { + "epoch": 0.1761493893543412, + "grad_norm": 229.0, + "learning_rate": 9.450319138294873e-05, + "loss": 11.8753, + "step": 4226 + }, + { + "epoch": 0.17619107165186945, + "grad_norm": 61.0, + "learning_rate": 9.450011407748898e-05, + "loss": 8.3755, + "step": 4227 + }, + { + "epoch": 0.1762327539493977, + "grad_norm": 376.0, + "learning_rate": 9.449703596101071e-05, + "loss": 14.0034, + "step": 4228 + }, + { + "epoch": 0.17627443624692593, + "grad_norm": 588.0, + "learning_rate": 9.449395703357002e-05, + "loss": 17.5015, + "step": 4229 + }, + { + "epoch": 0.17631611854445417, + "grad_norm": 346.0, + "learning_rate": 9.4490877295223e-05, + "loss": 14.6254, + "step": 4230 + }, + { + "epoch": 0.1763578008419824, + "grad_norm": 392.0, + "learning_rate": 9.44877967460258e-05, + "loss": 15.3753, + "step": 4231 + }, + { + "epoch": 0.17639948313951065, + "grad_norm": 254.0, + "learning_rate": 9.448471538603454e-05, + "loss": 12.7502, + "step": 4232 + }, + { + "epoch": 0.1764411654370389, + "grad_norm": 350.0, + "learning_rate": 9.44816332153054e-05, + "loss": 13.5006, + "step": 4233 + }, + { + "epoch": 0.17648284773456713, + "grad_norm": 440.0, + "learning_rate": 9.447855023389455e-05, + "loss": 14.939, + "step": 4234 + }, + { + "epoch": 0.17652453003209537, + "grad_norm": 126.0, + "learning_rate": 9.447546644185818e-05, + "loss": 10.0012, + "step": 4235 + }, + { + "epoch": 0.1765662123296236, + "grad_norm": 378.0, + "learning_rate": 9.447238183925248e-05, + "loss": 16.0002, + "step": 4236 + }, + { + "epoch": 0.17660789462715185, + "grad_norm": 161.0, + "learning_rate": 9.446929642613367e-05, + "loss": 10.2503, + "step": 4237 + }, + { + "epoch": 0.17664957692468009, + "grad_norm": 208.0, + "learning_rate": 9.4466210202558e-05, + "loss": 11.5005, + "step": 4238 + }, + { + "epoch": 0.17669125922220832, + "grad_norm": 510.0, + "learning_rate": 9.446312316858168e-05, + "loss": 17.2502, + "step": 4239 + }, + { + "epoch": 0.17673294151973656, + "grad_norm": 161.0, + "learning_rate": 9.4460035324261e-05, + "loss": 10.0001, + "step": 4240 + }, + { + "epoch": 0.1767746238172648, + "grad_norm": 620.0, + "learning_rate": 9.445694666965222e-05, + "loss": 19.5008, + "step": 4241 + }, + { + "epoch": 0.17681630611479304, + "grad_norm": 239.0, + "learning_rate": 9.445385720481166e-05, + "loss": 12.8758, + "step": 4242 + }, + { + "epoch": 0.17685798841232128, + "grad_norm": 360.0, + "learning_rate": 9.44507669297956e-05, + "loss": 13.3752, + "step": 4243 + }, + { + "epoch": 0.17689967070984952, + "grad_norm": 528.0, + "learning_rate": 9.444767584466036e-05, + "loss": 19.0001, + "step": 4244 + }, + { + "epoch": 0.17694135300737776, + "grad_norm": 274.0, + "learning_rate": 9.444458394946229e-05, + "loss": 11.4412, + "step": 4245 + }, + { + "epoch": 0.176983035304906, + "grad_norm": 450.0, + "learning_rate": 9.444149124425771e-05, + "loss": 16.0008, + "step": 4246 + }, + { + "epoch": 0.17702471760243424, + "grad_norm": 262.0, + "learning_rate": 9.443839772910304e-05, + "loss": 12.4379, + "step": 4247 + }, + { + "epoch": 0.17706639989996248, + "grad_norm": 376.0, + "learning_rate": 9.44353034040546e-05, + "loss": 14.0628, + "step": 4248 + }, + { + "epoch": 0.17710808219749072, + "grad_norm": 494.0, + "learning_rate": 9.443220826916883e-05, + "loss": 16.5004, + "step": 4249 + }, + { + "epoch": 0.17714976449501896, + "grad_norm": 312.0, + "learning_rate": 9.442911232450212e-05, + "loss": 12.0002, + "step": 4250 + }, + { + "epoch": 0.1771914467925472, + "grad_norm": 2112.0, + "learning_rate": 9.442601557011088e-05, + "loss": 42.2502, + "step": 4251 + }, + { + "epoch": 0.17723312909007544, + "grad_norm": 125.5, + "learning_rate": 9.442291800605156e-05, + "loss": 9.001, + "step": 4252 + }, + { + "epoch": 0.17727481138760368, + "grad_norm": 532.0, + "learning_rate": 9.441981963238062e-05, + "loss": 16.8755, + "step": 4253 + }, + { + "epoch": 0.17731649368513192, + "grad_norm": 115.5, + "learning_rate": 9.441672044915453e-05, + "loss": 10.0635, + "step": 4254 + }, + { + "epoch": 0.17735817598266015, + "grad_norm": 488.0, + "learning_rate": 9.441362045642977e-05, + "loss": 18.0003, + "step": 4255 + }, + { + "epoch": 0.1773998582801884, + "grad_norm": 652.0, + "learning_rate": 9.441051965426283e-05, + "loss": 20.6263, + "step": 4256 + }, + { + "epoch": 0.17744154057771663, + "grad_norm": 253.0, + "learning_rate": 9.440741804271022e-05, + "loss": 12.5627, + "step": 4257 + }, + { + "epoch": 0.17748322287524487, + "grad_norm": 544.0, + "learning_rate": 9.440431562182849e-05, + "loss": 18.7504, + "step": 4258 + }, + { + "epoch": 0.1775249051727731, + "grad_norm": 223.0, + "learning_rate": 9.440121239167416e-05, + "loss": 11.6879, + "step": 4259 + }, + { + "epoch": 0.17756658747030135, + "grad_norm": 324.0, + "learning_rate": 9.439810835230379e-05, + "loss": 13.5039, + "step": 4260 + }, + { + "epoch": 0.1776082697678296, + "grad_norm": 163.0, + "learning_rate": 9.439500350377395e-05, + "loss": 9.3128, + "step": 4261 + }, + { + "epoch": 0.17764995206535783, + "grad_norm": 304.0, + "learning_rate": 9.439189784614122e-05, + "loss": 11.5652, + "step": 4262 + }, + { + "epoch": 0.17769163436288607, + "grad_norm": 282.0, + "learning_rate": 9.438879137946222e-05, + "loss": 13.3135, + "step": 4263 + }, + { + "epoch": 0.1777333166604143, + "grad_norm": 596.0, + "learning_rate": 9.438568410379356e-05, + "loss": 19.0004, + "step": 4264 + }, + { + "epoch": 0.17777499895794255, + "grad_norm": 540.0, + "learning_rate": 9.438257601919188e-05, + "loss": 20.0006, + "step": 4265 + }, + { + "epoch": 0.17781668125547082, + "grad_norm": 262.0, + "learning_rate": 9.43794671257138e-05, + "loss": 13.1911, + "step": 4266 + }, + { + "epoch": 0.17785836355299905, + "grad_norm": 260.0, + "learning_rate": 9.437635742341598e-05, + "loss": 12.0015, + "step": 4267 + }, + { + "epoch": 0.1779000458505273, + "grad_norm": 224.0, + "learning_rate": 9.437324691235512e-05, + "loss": 10.6271, + "step": 4268 + }, + { + "epoch": 0.17794172814805553, + "grad_norm": 260.0, + "learning_rate": 9.437013559258789e-05, + "loss": 11.8129, + "step": 4269 + }, + { + "epoch": 0.17798341044558377, + "grad_norm": 568.0, + "learning_rate": 9.436702346417101e-05, + "loss": 18.7513, + "step": 4270 + }, + { + "epoch": 0.178025092743112, + "grad_norm": 1012.0, + "learning_rate": 9.436391052716119e-05, + "loss": 28.626, + "step": 4271 + }, + { + "epoch": 0.17806677504064025, + "grad_norm": 386.0, + "learning_rate": 9.436079678161514e-05, + "loss": 12.2516, + "step": 4272 + }, + { + "epoch": 0.1781084573381685, + "grad_norm": 274.0, + "learning_rate": 9.435768222758965e-05, + "loss": 12.188, + "step": 4273 + }, + { + "epoch": 0.17815013963569673, + "grad_norm": 203.0, + "learning_rate": 9.435456686514145e-05, + "loss": 10.5653, + "step": 4274 + }, + { + "epoch": 0.17819182193322497, + "grad_norm": 520.0, + "learning_rate": 9.435145069432735e-05, + "loss": 15.4378, + "step": 4275 + }, + { + "epoch": 0.1782335042307532, + "grad_norm": 466.0, + "learning_rate": 9.434833371520411e-05, + "loss": 15.6271, + "step": 4276 + }, + { + "epoch": 0.17827518652828145, + "grad_norm": 328.0, + "learning_rate": 9.434521592782856e-05, + "loss": 13.5628, + "step": 4277 + }, + { + "epoch": 0.1783168688258097, + "grad_norm": 284.0, + "learning_rate": 9.43420973322575e-05, + "loss": 14.0034, + "step": 4278 + }, + { + "epoch": 0.17835855112333793, + "grad_norm": 262.0, + "learning_rate": 9.43389779285478e-05, + "loss": 13.0005, + "step": 4279 + }, + { + "epoch": 0.17840023342086617, + "grad_norm": 326.0, + "learning_rate": 9.433585771675629e-05, + "loss": 13.4378, + "step": 4280 + }, + { + "epoch": 0.1784419157183944, + "grad_norm": 304.0, + "learning_rate": 9.433273669693981e-05, + "loss": 13.3129, + "step": 4281 + }, + { + "epoch": 0.17848359801592265, + "grad_norm": 108.5, + "learning_rate": 9.432961486915528e-05, + "loss": 10.7506, + "step": 4282 + }, + { + "epoch": 0.17852528031345088, + "grad_norm": 556.0, + "learning_rate": 9.432649223345959e-05, + "loss": 18.2505, + "step": 4283 + }, + { + "epoch": 0.17856696261097912, + "grad_norm": 316.0, + "learning_rate": 9.432336878990965e-05, + "loss": 13.6883, + "step": 4284 + }, + { + "epoch": 0.17860864490850736, + "grad_norm": 167.0, + "learning_rate": 9.432024453856236e-05, + "loss": 10.8757, + "step": 4285 + }, + { + "epoch": 0.1786503272060356, + "grad_norm": 452.0, + "learning_rate": 9.43171194794747e-05, + "loss": 16.3753, + "step": 4286 + }, + { + "epoch": 0.17869200950356384, + "grad_norm": 239.0, + "learning_rate": 9.431399361270359e-05, + "loss": 12.0003, + "step": 4287 + }, + { + "epoch": 0.17873369180109208, + "grad_norm": 440.0, + "learning_rate": 9.431086693830602e-05, + "loss": 15.4379, + "step": 4288 + }, + { + "epoch": 0.17877537409862032, + "grad_norm": 1240.0, + "learning_rate": 9.430773945633896e-05, + "loss": 27.8794, + "step": 4289 + }, + { + "epoch": 0.17881705639614856, + "grad_norm": 508.0, + "learning_rate": 9.430461116685943e-05, + "loss": 15.3754, + "step": 4290 + }, + { + "epoch": 0.1788587386936768, + "grad_norm": 212.0, + "learning_rate": 9.43014820699244e-05, + "loss": 11.7505, + "step": 4291 + }, + { + "epoch": 0.17890042099120504, + "grad_norm": 143.0, + "learning_rate": 9.429835216559096e-05, + "loss": 10.0006, + "step": 4292 + }, + { + "epoch": 0.17894210328873328, + "grad_norm": 564.0, + "learning_rate": 9.42952214539161e-05, + "loss": 19.2503, + "step": 4293 + }, + { + "epoch": 0.17898378558626152, + "grad_norm": 296.0, + "learning_rate": 9.429208993495689e-05, + "loss": 13.6882, + "step": 4294 + }, + { + "epoch": 0.17902546788378976, + "grad_norm": 2016.0, + "learning_rate": 9.428895760877041e-05, + "loss": 40.5043, + "step": 4295 + }, + { + "epoch": 0.179067150181318, + "grad_norm": 524.0, + "learning_rate": 9.428582447541376e-05, + "loss": 17.1253, + "step": 4296 + }, + { + "epoch": 0.17910883247884624, + "grad_norm": 490.0, + "learning_rate": 9.428269053494403e-05, + "loss": 17.6254, + "step": 4297 + }, + { + "epoch": 0.17915051477637448, + "grad_norm": 175.0, + "learning_rate": 9.427955578741832e-05, + "loss": 9.1258, + "step": 4298 + }, + { + "epoch": 0.17919219707390271, + "grad_norm": 420.0, + "learning_rate": 9.427642023289377e-05, + "loss": 16.2503, + "step": 4299 + }, + { + "epoch": 0.17923387937143095, + "grad_norm": 576.0, + "learning_rate": 9.427328387142755e-05, + "loss": 16.7508, + "step": 4300 + }, + { + "epoch": 0.1792755616689592, + "grad_norm": 85.5, + "learning_rate": 9.427014670307679e-05, + "loss": 6.0009, + "step": 4301 + }, + { + "epoch": 0.17931724396648743, + "grad_norm": 1128.0, + "learning_rate": 9.426700872789869e-05, + "loss": 26.1293, + "step": 4302 + }, + { + "epoch": 0.17935892626401567, + "grad_norm": 276.0, + "learning_rate": 9.42638699459504e-05, + "loss": 12.9379, + "step": 4303 + }, + { + "epoch": 0.1794006085615439, + "grad_norm": 390.0, + "learning_rate": 9.426073035728917e-05, + "loss": 15.0627, + "step": 4304 + }, + { + "epoch": 0.17944229085907215, + "grad_norm": 148.0, + "learning_rate": 9.425758996197221e-05, + "loss": 9.938, + "step": 4305 + }, + { + "epoch": 0.1794839731566004, + "grad_norm": 218.0, + "learning_rate": 9.425444876005671e-05, + "loss": 12.0636, + "step": 4306 + }, + { + "epoch": 0.17952565545412863, + "grad_norm": 103.5, + "learning_rate": 9.42513067516e-05, + "loss": 9.2507, + "step": 4307 + }, + { + "epoch": 0.17956733775165687, + "grad_norm": 392.0, + "learning_rate": 9.424816393665925e-05, + "loss": 14.0627, + "step": 4308 + }, + { + "epoch": 0.1796090200491851, + "grad_norm": 97.0, + "learning_rate": 9.424502031529181e-05, + "loss": 8.5628, + "step": 4309 + }, + { + "epoch": 0.17965070234671335, + "grad_norm": 496.0, + "learning_rate": 9.424187588755493e-05, + "loss": 17.6257, + "step": 4310 + }, + { + "epoch": 0.1796923846442416, + "grad_norm": 229.0, + "learning_rate": 9.423873065350595e-05, + "loss": 11.1877, + "step": 4311 + }, + { + "epoch": 0.17973406694176983, + "grad_norm": 532.0, + "learning_rate": 9.423558461320216e-05, + "loss": 17.5007, + "step": 4312 + }, + { + "epoch": 0.17977574923929807, + "grad_norm": 233.0, + "learning_rate": 9.423243776670093e-05, + "loss": 12.7503, + "step": 4313 + }, + { + "epoch": 0.1798174315368263, + "grad_norm": 306.0, + "learning_rate": 9.422929011405959e-05, + "loss": 13.1253, + "step": 4314 + }, + { + "epoch": 0.17985911383435454, + "grad_norm": 174.0, + "learning_rate": 9.422614165533552e-05, + "loss": 10.3149, + "step": 4315 + }, + { + "epoch": 0.17990079613188278, + "grad_norm": 150.0, + "learning_rate": 9.422299239058607e-05, + "loss": 10.6889, + "step": 4316 + }, + { + "epoch": 0.17994247842941102, + "grad_norm": 378.0, + "learning_rate": 9.421984231986868e-05, + "loss": 14.1257, + "step": 4317 + }, + { + "epoch": 0.17998416072693926, + "grad_norm": 223.0, + "learning_rate": 9.421669144324072e-05, + "loss": 11.0634, + "step": 4318 + }, + { + "epoch": 0.1800258430244675, + "grad_norm": 680.0, + "learning_rate": 9.421353976075965e-05, + "loss": 22.5026, + "step": 4319 + }, + { + "epoch": 0.18006752532199574, + "grad_norm": 568.0, + "learning_rate": 9.421038727248288e-05, + "loss": 18.3755, + "step": 4320 + }, + { + "epoch": 0.18010920761952398, + "grad_norm": 732.0, + "learning_rate": 9.42072339784679e-05, + "loss": 20.1255, + "step": 4321 + }, + { + "epoch": 0.18015088991705222, + "grad_norm": 498.0, + "learning_rate": 9.420407987877213e-05, + "loss": 16.7523, + "step": 4322 + }, + { + "epoch": 0.18019257221458046, + "grad_norm": 390.0, + "learning_rate": 9.420092497345308e-05, + "loss": 15.3126, + "step": 4323 + }, + { + "epoch": 0.1802342545121087, + "grad_norm": 130.0, + "learning_rate": 9.419776926256827e-05, + "loss": 9.0639, + "step": 4324 + }, + { + "epoch": 0.18027593680963694, + "grad_norm": 50.5, + "learning_rate": 9.419461274617518e-05, + "loss": 8.3143, + "step": 4325 + }, + { + "epoch": 0.18031761910716518, + "grad_norm": 2096.0, + "learning_rate": 9.419145542433134e-05, + "loss": 43.5079, + "step": 4326 + }, + { + "epoch": 0.18035930140469342, + "grad_norm": 420.0, + "learning_rate": 9.41882972970943e-05, + "loss": 14.5628, + "step": 4327 + }, + { + "epoch": 0.18040098370222166, + "grad_norm": 117.0, + "learning_rate": 9.418513836452166e-05, + "loss": 9.4377, + "step": 4328 + }, + { + "epoch": 0.1804426659997499, + "grad_norm": 486.0, + "learning_rate": 9.418197862667091e-05, + "loss": 17.2504, + "step": 4329 + }, + { + "epoch": 0.18048434829727814, + "grad_norm": 840.0, + "learning_rate": 9.417881808359969e-05, + "loss": 22.6256, + "step": 4330 + }, + { + "epoch": 0.18052603059480637, + "grad_norm": 268.0, + "learning_rate": 9.417565673536558e-05, + "loss": 12.4394, + "step": 4331 + }, + { + "epoch": 0.18056771289233461, + "grad_norm": 492.0, + "learning_rate": 9.417249458202622e-05, + "loss": 16.2503, + "step": 4332 + }, + { + "epoch": 0.18060939518986285, + "grad_norm": 298.0, + "learning_rate": 9.41693316236392e-05, + "loss": 12.0633, + "step": 4333 + }, + { + "epoch": 0.1806510774873911, + "grad_norm": 478.0, + "learning_rate": 9.416616786026222e-05, + "loss": 16.8757, + "step": 4334 + }, + { + "epoch": 0.18069275978491933, + "grad_norm": 596.0, + "learning_rate": 9.416300329195289e-05, + "loss": 18.1296, + "step": 4335 + }, + { + "epoch": 0.18073444208244757, + "grad_norm": 183.0, + "learning_rate": 9.41598379187689e-05, + "loss": 5.0948, + "step": 4336 + }, + { + "epoch": 0.1807761243799758, + "grad_norm": 280.0, + "learning_rate": 9.415667174076796e-05, + "loss": 12.3129, + "step": 4337 + }, + { + "epoch": 0.18081780667750405, + "grad_norm": 378.0, + "learning_rate": 9.415350475800776e-05, + "loss": 14.8757, + "step": 4338 + }, + { + "epoch": 0.18085948897503232, + "grad_norm": 458.0, + "learning_rate": 9.4150336970546e-05, + "loss": 18.5006, + "step": 4339 + }, + { + "epoch": 0.18090117127256056, + "grad_norm": 472.0, + "learning_rate": 9.414716837844044e-05, + "loss": 14.0639, + "step": 4340 + }, + { + "epoch": 0.1809428535700888, + "grad_norm": 171.0, + "learning_rate": 9.414399898174881e-05, + "loss": 10.8128, + "step": 4341 + }, + { + "epoch": 0.18098453586761704, + "grad_norm": 600.0, + "learning_rate": 9.41408287805289e-05, + "loss": 19.2503, + "step": 4342 + }, + { + "epoch": 0.18102621816514528, + "grad_norm": 584.0, + "learning_rate": 9.413765777483845e-05, + "loss": 17.5005, + "step": 4343 + }, + { + "epoch": 0.18106790046267351, + "grad_norm": 61.75, + "learning_rate": 9.413448596473527e-05, + "loss": 6.5639, + "step": 4344 + }, + { + "epoch": 0.18110958276020175, + "grad_norm": 462.0, + "learning_rate": 9.413131335027716e-05, + "loss": 16.0001, + "step": 4345 + }, + { + "epoch": 0.18115126505773, + "grad_norm": 352.0, + "learning_rate": 9.412813993152195e-05, + "loss": 13.6884, + "step": 4346 + }, + { + "epoch": 0.18119294735525823, + "grad_norm": 552.0, + "learning_rate": 9.412496570852748e-05, + "loss": 17.8755, + "step": 4347 + }, + { + "epoch": 0.18123462965278647, + "grad_norm": 1632.0, + "learning_rate": 9.412179068135158e-05, + "loss": 32.2576, + "step": 4348 + }, + { + "epoch": 0.1812763119503147, + "grad_norm": 356.0, + "learning_rate": 9.411861485005213e-05, + "loss": 12.8752, + "step": 4349 + }, + { + "epoch": 0.18131799424784295, + "grad_norm": 276.0, + "learning_rate": 9.411543821468702e-05, + "loss": 11.7505, + "step": 4350 + }, + { + "epoch": 0.1813596765453712, + "grad_norm": 202.0, + "learning_rate": 9.411226077531413e-05, + "loss": 11.813, + "step": 4351 + }, + { + "epoch": 0.18140135884289943, + "grad_norm": 290.0, + "learning_rate": 9.410908253199136e-05, + "loss": 13.5024, + "step": 4352 + }, + { + "epoch": 0.18144304114042767, + "grad_norm": 512.0, + "learning_rate": 9.410590348477665e-05, + "loss": 18.6252, + "step": 4353 + }, + { + "epoch": 0.1814847234379559, + "grad_norm": 410.0, + "learning_rate": 9.410272363372795e-05, + "loss": 15.0674, + "step": 4354 + }, + { + "epoch": 0.18152640573548415, + "grad_norm": 260.0, + "learning_rate": 9.409954297890318e-05, + "loss": 13.2502, + "step": 4355 + }, + { + "epoch": 0.1815680880330124, + "grad_norm": 386.0, + "learning_rate": 9.409636152036032e-05, + "loss": 14.5004, + "step": 4356 + }, + { + "epoch": 0.18160977033054063, + "grad_norm": 1088.0, + "learning_rate": 9.409317925815737e-05, + "loss": 27.3753, + "step": 4357 + }, + { + "epoch": 0.18165145262806887, + "grad_norm": 262.0, + "learning_rate": 9.408999619235231e-05, + "loss": 11.8127, + "step": 4358 + }, + { + "epoch": 0.1816931349255971, + "grad_norm": 170.0, + "learning_rate": 9.408681232300315e-05, + "loss": 10.1878, + "step": 4359 + }, + { + "epoch": 0.18173481722312534, + "grad_norm": 314.0, + "learning_rate": 9.408362765016791e-05, + "loss": 14.0629, + "step": 4360 + }, + { + "epoch": 0.18177649952065358, + "grad_norm": 227.0, + "learning_rate": 9.408044217390468e-05, + "loss": 11.3752, + "step": 4361 + }, + { + "epoch": 0.18181818181818182, + "grad_norm": 212.0, + "learning_rate": 9.407725589427144e-05, + "loss": 11.438, + "step": 4362 + }, + { + "epoch": 0.18185986411571006, + "grad_norm": 92.0, + "learning_rate": 9.407406881132633e-05, + "loss": 8.6876, + "step": 4363 + }, + { + "epoch": 0.1819015464132383, + "grad_norm": 298.0, + "learning_rate": 9.407088092512737e-05, + "loss": 13.4379, + "step": 4364 + }, + { + "epoch": 0.18194322871076654, + "grad_norm": 334.0, + "learning_rate": 9.406769223573272e-05, + "loss": 14.3129, + "step": 4365 + }, + { + "epoch": 0.18198491100829478, + "grad_norm": 1336.0, + "learning_rate": 9.406450274320045e-05, + "loss": 27.7543, + "step": 4366 + }, + { + "epoch": 0.18202659330582302, + "grad_norm": 478.0, + "learning_rate": 9.406131244758871e-05, + "loss": 17.1253, + "step": 4367 + }, + { + "epoch": 0.18206827560335126, + "grad_norm": 260.0, + "learning_rate": 9.405812134895564e-05, + "loss": 11.8128, + "step": 4368 + }, + { + "epoch": 0.1821099579008795, + "grad_norm": 262.0, + "learning_rate": 9.405492944735941e-05, + "loss": 9.1258, + "step": 4369 + }, + { + "epoch": 0.18215164019840774, + "grad_norm": 187.0, + "learning_rate": 9.405173674285817e-05, + "loss": 9.6253, + "step": 4370 + }, + { + "epoch": 0.18219332249593598, + "grad_norm": 316.0, + "learning_rate": 9.404854323551011e-05, + "loss": 12.6278, + "step": 4371 + }, + { + "epoch": 0.18223500479346422, + "grad_norm": 55.5, + "learning_rate": 9.404534892537344e-05, + "loss": 8.2507, + "step": 4372 + }, + { + "epoch": 0.18227668709099246, + "grad_norm": 260.0, + "learning_rate": 9.404215381250638e-05, + "loss": 11.8132, + "step": 4373 + }, + { + "epoch": 0.1823183693885207, + "grad_norm": 936.0, + "learning_rate": 9.403895789696717e-05, + "loss": 20.1295, + "step": 4374 + }, + { + "epoch": 0.18236005168604894, + "grad_norm": 111.5, + "learning_rate": 9.403576117881403e-05, + "loss": 8.8753, + "step": 4375 + }, + { + "epoch": 0.18240173398357717, + "grad_norm": 320.0, + "learning_rate": 9.403256365810524e-05, + "loss": 13.5007, + "step": 4376 + }, + { + "epoch": 0.1824434162811054, + "grad_norm": 110.0, + "learning_rate": 9.402936533489906e-05, + "loss": 6.4693, + "step": 4377 + }, + { + "epoch": 0.18248509857863365, + "grad_norm": 49.25, + "learning_rate": 9.40261662092538e-05, + "loss": 7.5626, + "step": 4378 + }, + { + "epoch": 0.1825267808761619, + "grad_norm": 532.0, + "learning_rate": 9.402296628122774e-05, + "loss": 17.1259, + "step": 4379 + }, + { + "epoch": 0.18256846317369013, + "grad_norm": 130.0, + "learning_rate": 9.401976555087921e-05, + "loss": 7.1581, + "step": 4380 + }, + { + "epoch": 0.18261014547121837, + "grad_norm": 220.0, + "learning_rate": 9.401656401826656e-05, + "loss": 11.0005, + "step": 4381 + }, + { + "epoch": 0.1826518277687466, + "grad_norm": 120.5, + "learning_rate": 9.40133616834481e-05, + "loss": 8.063, + "step": 4382 + }, + { + "epoch": 0.18269351006627485, + "grad_norm": 446.0, + "learning_rate": 9.401015854648223e-05, + "loss": 14.8132, + "step": 4383 + }, + { + "epoch": 0.1827351923638031, + "grad_norm": 262.0, + "learning_rate": 9.400695460742732e-05, + "loss": 11.4392, + "step": 4384 + }, + { + "epoch": 0.18277687466133133, + "grad_norm": 292.0, + "learning_rate": 9.400374986634175e-05, + "loss": 13.0005, + "step": 4385 + }, + { + "epoch": 0.18281855695885957, + "grad_norm": 227.0, + "learning_rate": 9.400054432328394e-05, + "loss": 11.4378, + "step": 4386 + }, + { + "epoch": 0.1828602392563878, + "grad_norm": 338.0, + "learning_rate": 9.399733797831229e-05, + "loss": 14.5628, + "step": 4387 + }, + { + "epoch": 0.18290192155391605, + "grad_norm": 436.0, + "learning_rate": 9.399413083148525e-05, + "loss": 16.2504, + "step": 4388 + }, + { + "epoch": 0.1829436038514443, + "grad_norm": 133.0, + "learning_rate": 9.399092288286128e-05, + "loss": 6.9696, + "step": 4389 + }, + { + "epoch": 0.18298528614897253, + "grad_norm": 704.0, + "learning_rate": 9.398771413249883e-05, + "loss": 21.0006, + "step": 4390 + }, + { + "epoch": 0.18302696844650077, + "grad_norm": 498.0, + "learning_rate": 9.398450458045638e-05, + "loss": 18.1252, + "step": 4391 + }, + { + "epoch": 0.183068650744029, + "grad_norm": 137.0, + "learning_rate": 9.398129422679243e-05, + "loss": 10.313, + "step": 4392 + }, + { + "epoch": 0.18311033304155724, + "grad_norm": 320.0, + "learning_rate": 9.397808307156551e-05, + "loss": 14.3752, + "step": 4393 + }, + { + "epoch": 0.18315201533908548, + "grad_norm": 368.0, + "learning_rate": 9.39748711148341e-05, + "loss": 15.5627, + "step": 4394 + }, + { + "epoch": 0.18319369763661372, + "grad_norm": 274.0, + "learning_rate": 9.397165835665676e-05, + "loss": 11.8752, + "step": 4395 + }, + { + "epoch": 0.18323537993414196, + "grad_norm": 660.0, + "learning_rate": 9.396844479709205e-05, + "loss": 19.6252, + "step": 4396 + }, + { + "epoch": 0.1832770622316702, + "grad_norm": 362.0, + "learning_rate": 9.396523043619852e-05, + "loss": 14.1255, + "step": 4397 + }, + { + "epoch": 0.18331874452919844, + "grad_norm": 115.5, + "learning_rate": 9.396201527403477e-05, + "loss": 9.9385, + "step": 4398 + }, + { + "epoch": 0.18336042682672668, + "grad_norm": 235.0, + "learning_rate": 9.395879931065939e-05, + "loss": 11.5631, + "step": 4399 + }, + { + "epoch": 0.18340210912425492, + "grad_norm": 450.0, + "learning_rate": 9.3955582546131e-05, + "loss": 16.5007, + "step": 4400 + }, + { + "epoch": 0.18344379142178316, + "grad_norm": 91.5, + "learning_rate": 9.39523649805082e-05, + "loss": 8.6879, + "step": 4401 + }, + { + "epoch": 0.1834854737193114, + "grad_norm": 262.0, + "learning_rate": 9.394914661384966e-05, + "loss": 12.1879, + "step": 4402 + }, + { + "epoch": 0.18352715601683964, + "grad_norm": 274.0, + "learning_rate": 9.394592744621401e-05, + "loss": 12.6253, + "step": 4403 + }, + { + "epoch": 0.18356883831436788, + "grad_norm": 249.0, + "learning_rate": 9.394270747765994e-05, + "loss": 12.3133, + "step": 4404 + }, + { + "epoch": 0.18361052061189612, + "grad_norm": 350.0, + "learning_rate": 9.393948670824612e-05, + "loss": 14.4378, + "step": 4405 + }, + { + "epoch": 0.18365220290942436, + "grad_norm": 314.0, + "learning_rate": 9.393626513803124e-05, + "loss": 13.5626, + "step": 4406 + }, + { + "epoch": 0.1836938852069526, + "grad_norm": 306.0, + "learning_rate": 9.393304276707406e-05, + "loss": 13.6253, + "step": 4407 + }, + { + "epoch": 0.18373556750448083, + "grad_norm": 340.0, + "learning_rate": 9.392981959543326e-05, + "loss": 14.5002, + "step": 4408 + }, + { + "epoch": 0.18377724980200907, + "grad_norm": 264.0, + "learning_rate": 9.392659562316759e-05, + "loss": 12.2503, + "step": 4409 + }, + { + "epoch": 0.1838189320995373, + "grad_norm": 364.0, + "learning_rate": 9.392337085033582e-05, + "loss": 14.563, + "step": 4410 + }, + { + "epoch": 0.18386061439706555, + "grad_norm": 390.0, + "learning_rate": 9.392014527699671e-05, + "loss": 14.3137, + "step": 4411 + }, + { + "epoch": 0.18390229669459382, + "grad_norm": 540.0, + "learning_rate": 9.391691890320906e-05, + "loss": 17.2509, + "step": 4412 + }, + { + "epoch": 0.18394397899212206, + "grad_norm": 398.0, + "learning_rate": 9.391369172903167e-05, + "loss": 14.9378, + "step": 4413 + }, + { + "epoch": 0.1839856612896503, + "grad_norm": 1472.0, + "learning_rate": 9.391046375452335e-05, + "loss": 37.0002, + "step": 4414 + }, + { + "epoch": 0.18402734358717854, + "grad_norm": 258.0, + "learning_rate": 9.390723497974292e-05, + "loss": 9.3129, + "step": 4415 + }, + { + "epoch": 0.18406902588470678, + "grad_norm": 76.5, + "learning_rate": 9.390400540474924e-05, + "loss": 8.3756, + "step": 4416 + }, + { + "epoch": 0.18411070818223502, + "grad_norm": 102.5, + "learning_rate": 9.390077502960117e-05, + "loss": 8.8127, + "step": 4417 + }, + { + "epoch": 0.18415239047976326, + "grad_norm": 240.0, + "learning_rate": 9.389754385435757e-05, + "loss": 12.1882, + "step": 4418 + }, + { + "epoch": 0.1841940727772915, + "grad_norm": 210.0, + "learning_rate": 9.389431187907734e-05, + "loss": 10.3757, + "step": 4419 + }, + { + "epoch": 0.18423575507481973, + "grad_norm": 320.0, + "learning_rate": 9.389107910381937e-05, + "loss": 13.5634, + "step": 4420 + }, + { + "epoch": 0.18427743737234797, + "grad_norm": 676.0, + "learning_rate": 9.38878455286426e-05, + "loss": 20.3753, + "step": 4421 + }, + { + "epoch": 0.1843191196698762, + "grad_norm": 226.0, + "learning_rate": 9.388461115360595e-05, + "loss": 11.8764, + "step": 4422 + }, + { + "epoch": 0.18436080196740445, + "grad_norm": 189.0, + "learning_rate": 9.388137597876836e-05, + "loss": 10.9379, + "step": 4423 + }, + { + "epoch": 0.1844024842649327, + "grad_norm": 596.0, + "learning_rate": 9.38781400041888e-05, + "loss": 18.7506, + "step": 4424 + }, + { + "epoch": 0.18444416656246093, + "grad_norm": 217.0, + "learning_rate": 9.387490322992624e-05, + "loss": 12.1878, + "step": 4425 + }, + { + "epoch": 0.18448584885998917, + "grad_norm": 177.0, + "learning_rate": 9.387166565603967e-05, + "loss": 10.8133, + "step": 4426 + }, + { + "epoch": 0.1845275311575174, + "grad_norm": 336.0, + "learning_rate": 9.386842728258811e-05, + "loss": 11.7505, + "step": 4427 + }, + { + "epoch": 0.18456921345504565, + "grad_norm": 1128.0, + "learning_rate": 9.386518810963057e-05, + "loss": 27.6252, + "step": 4428 + }, + { + "epoch": 0.1846108957525739, + "grad_norm": 362.0, + "learning_rate": 9.386194813722607e-05, + "loss": 14.7537, + "step": 4429 + }, + { + "epoch": 0.18465257805010213, + "grad_norm": 286.0, + "learning_rate": 9.385870736543368e-05, + "loss": 12.5002, + "step": 4430 + }, + { + "epoch": 0.18469426034763037, + "grad_norm": 247.0, + "learning_rate": 9.385546579431244e-05, + "loss": 11.1253, + "step": 4431 + }, + { + "epoch": 0.1847359426451586, + "grad_norm": 608.0, + "learning_rate": 9.385222342392146e-05, + "loss": 19.0003, + "step": 4432 + }, + { + "epoch": 0.18477762494268685, + "grad_norm": 476.0, + "learning_rate": 9.384898025431981e-05, + "loss": 15.1885, + "step": 4433 + }, + { + "epoch": 0.18481930724021509, + "grad_norm": 490.0, + "learning_rate": 9.38457362855666e-05, + "loss": 18.3753, + "step": 4434 + }, + { + "epoch": 0.18486098953774333, + "grad_norm": 804.0, + "learning_rate": 9.384249151772095e-05, + "loss": 24.126, + "step": 4435 + }, + { + "epoch": 0.18490267183527156, + "grad_norm": 388.0, + "learning_rate": 9.383924595084202e-05, + "loss": 12.2504, + "step": 4436 + }, + { + "epoch": 0.1849443541327998, + "grad_norm": 205.0, + "learning_rate": 9.383599958498892e-05, + "loss": 8.6888, + "step": 4437 + }, + { + "epoch": 0.18498603643032804, + "grad_norm": 274.0, + "learning_rate": 9.383275242022082e-05, + "loss": 12.7503, + "step": 4438 + }, + { + "epoch": 0.18502771872785628, + "grad_norm": 524.0, + "learning_rate": 9.382950445659695e-05, + "loss": 15.753, + "step": 4439 + }, + { + "epoch": 0.18506940102538452, + "grad_norm": 274.0, + "learning_rate": 9.382625569417646e-05, + "loss": 13.7508, + "step": 4440 + }, + { + "epoch": 0.18511108332291276, + "grad_norm": 448.0, + "learning_rate": 9.382300613301857e-05, + "loss": 15.5003, + "step": 4441 + }, + { + "epoch": 0.185152765620441, + "grad_norm": 193.0, + "learning_rate": 9.38197557731825e-05, + "loss": 12.0004, + "step": 4442 + }, + { + "epoch": 0.18519444791796924, + "grad_norm": 924.0, + "learning_rate": 9.38165046147275e-05, + "loss": 21.5103, + "step": 4443 + }, + { + "epoch": 0.18523613021549748, + "grad_norm": 456.0, + "learning_rate": 9.381325265771279e-05, + "loss": 15.1263, + "step": 4444 + }, + { + "epoch": 0.18527781251302572, + "grad_norm": 484.0, + "learning_rate": 9.380999990219768e-05, + "loss": 16.7514, + "step": 4445 + }, + { + "epoch": 0.18531949481055396, + "grad_norm": 364.0, + "learning_rate": 9.380674634824143e-05, + "loss": 13.8752, + "step": 4446 + }, + { + "epoch": 0.1853611771080822, + "grad_norm": 298.0, + "learning_rate": 9.380349199590335e-05, + "loss": 12.063, + "step": 4447 + }, + { + "epoch": 0.18540285940561044, + "grad_norm": 478.0, + "learning_rate": 9.380023684524274e-05, + "loss": 16.1282, + "step": 4448 + }, + { + "epoch": 0.18544454170313868, + "grad_norm": 195.0, + "learning_rate": 9.37969808963189e-05, + "loss": 10.9378, + "step": 4449 + }, + { + "epoch": 0.18548622400066692, + "grad_norm": 532.0, + "learning_rate": 9.379372414919121e-05, + "loss": 17.7506, + "step": 4450 + }, + { + "epoch": 0.18552790629819516, + "grad_norm": 374.0, + "learning_rate": 9.3790466603919e-05, + "loss": 15.0643, + "step": 4451 + }, + { + "epoch": 0.1855695885957234, + "grad_norm": 362.0, + "learning_rate": 9.378720826056167e-05, + "loss": 14.1254, + "step": 4452 + }, + { + "epoch": 0.18561127089325163, + "grad_norm": 1528.0, + "learning_rate": 9.378394911917856e-05, + "loss": 34.2511, + "step": 4453 + }, + { + "epoch": 0.18565295319077987, + "grad_norm": 316.0, + "learning_rate": 9.378068917982909e-05, + "loss": 13.2504, + "step": 4454 + }, + { + "epoch": 0.1856946354883081, + "grad_norm": 608.0, + "learning_rate": 9.377742844257269e-05, + "loss": 17.3755, + "step": 4455 + }, + { + "epoch": 0.18573631778583635, + "grad_norm": 490.0, + "learning_rate": 9.377416690746876e-05, + "loss": 17.6252, + "step": 4456 + }, + { + "epoch": 0.1857780000833646, + "grad_norm": 266.0, + "learning_rate": 9.377090457457676e-05, + "loss": 12.3753, + "step": 4457 + }, + { + "epoch": 0.18581968238089283, + "grad_norm": 648.0, + "learning_rate": 9.376764144395614e-05, + "loss": 18.502, + "step": 4458 + }, + { + "epoch": 0.18586136467842107, + "grad_norm": 510.0, + "learning_rate": 9.376437751566636e-05, + "loss": 18.8753, + "step": 4459 + }, + { + "epoch": 0.1859030469759493, + "grad_norm": 584.0, + "learning_rate": 9.376111278976692e-05, + "loss": 19.2536, + "step": 4460 + }, + { + "epoch": 0.18594472927347755, + "grad_norm": 176.0, + "learning_rate": 9.375784726631732e-05, + "loss": 6.5318, + "step": 4461 + }, + { + "epoch": 0.1859864115710058, + "grad_norm": 42.75, + "learning_rate": 9.375458094537706e-05, + "loss": 7.7504, + "step": 4462 + }, + { + "epoch": 0.18602809386853403, + "grad_norm": 334.0, + "learning_rate": 9.375131382700566e-05, + "loss": 14.2511, + "step": 4463 + }, + { + "epoch": 0.18606977616606227, + "grad_norm": 242.0, + "learning_rate": 9.374804591126272e-05, + "loss": 12.5012, + "step": 4464 + }, + { + "epoch": 0.1861114584635905, + "grad_norm": 632.0, + "learning_rate": 9.374477719820773e-05, + "loss": 18.6252, + "step": 4465 + }, + { + "epoch": 0.18615314076111875, + "grad_norm": 157.0, + "learning_rate": 9.374150768790031e-05, + "loss": 10.063, + "step": 4466 + }, + { + "epoch": 0.18619482305864699, + "grad_norm": 1128.0, + "learning_rate": 9.373823738040002e-05, + "loss": 25.5045, + "step": 4467 + }, + { + "epoch": 0.18623650535617522, + "grad_norm": 960.0, + "learning_rate": 9.373496627576648e-05, + "loss": 21.5021, + "step": 4468 + }, + { + "epoch": 0.18627818765370346, + "grad_norm": 398.0, + "learning_rate": 9.373169437405928e-05, + "loss": 14.7503, + "step": 4469 + }, + { + "epoch": 0.1863198699512317, + "grad_norm": 724.0, + "learning_rate": 9.372842167533809e-05, + "loss": 22.2503, + "step": 4470 + }, + { + "epoch": 0.18636155224875994, + "grad_norm": 246.0, + "learning_rate": 9.372514817966251e-05, + "loss": 10.6878, + "step": 4471 + }, + { + "epoch": 0.18640323454628818, + "grad_norm": 247.0, + "learning_rate": 9.372187388709224e-05, + "loss": 13.8754, + "step": 4472 + }, + { + "epoch": 0.18644491684381642, + "grad_norm": 204.0, + "learning_rate": 9.371859879768692e-05, + "loss": 12.2509, + "step": 4473 + }, + { + "epoch": 0.18648659914134466, + "grad_norm": 600.0, + "learning_rate": 9.371532291150627e-05, + "loss": 20.1254, + "step": 4474 + }, + { + "epoch": 0.1865282814388729, + "grad_norm": 720.0, + "learning_rate": 9.371204622860998e-05, + "loss": 20.6273, + "step": 4475 + }, + { + "epoch": 0.18656996373640114, + "grad_norm": 410.0, + "learning_rate": 9.370876874905776e-05, + "loss": 16.1253, + "step": 4476 + }, + { + "epoch": 0.18661164603392938, + "grad_norm": 372.0, + "learning_rate": 9.370549047290936e-05, + "loss": 12.1883, + "step": 4477 + }, + { + "epoch": 0.18665332833145762, + "grad_norm": 304.0, + "learning_rate": 9.37022114002245e-05, + "loss": 13.8128, + "step": 4478 + }, + { + "epoch": 0.18669501062898586, + "grad_norm": 241.0, + "learning_rate": 9.369893153106298e-05, + "loss": 12.5003, + "step": 4479 + }, + { + "epoch": 0.1867366929265141, + "grad_norm": 322.0, + "learning_rate": 9.369565086548453e-05, + "loss": 14.3763, + "step": 4480 + }, + { + "epoch": 0.18677837522404234, + "grad_norm": 1736.0, + "learning_rate": 9.369236940354898e-05, + "loss": 44.7506, + "step": 4481 + }, + { + "epoch": 0.18682005752157058, + "grad_norm": 75.0, + "learning_rate": 9.368908714531611e-05, + "loss": 8.1889, + "step": 4482 + }, + { + "epoch": 0.18686173981909882, + "grad_norm": 56.5, + "learning_rate": 9.368580409084576e-05, + "loss": 8.501, + "step": 4483 + }, + { + "epoch": 0.18690342211662705, + "grad_norm": 278.0, + "learning_rate": 9.368252024019775e-05, + "loss": 13.5628, + "step": 4484 + }, + { + "epoch": 0.18694510441415532, + "grad_norm": 143.0, + "learning_rate": 9.367923559343191e-05, + "loss": 9.5005, + "step": 4485 + }, + { + "epoch": 0.18698678671168356, + "grad_norm": 520.0, + "learning_rate": 9.367595015060815e-05, + "loss": 17.0002, + "step": 4486 + }, + { + "epoch": 0.1870284690092118, + "grad_norm": 368.0, + "learning_rate": 9.367266391178631e-05, + "loss": 12.4401, + "step": 4487 + }, + { + "epoch": 0.18707015130674004, + "grad_norm": 394.0, + "learning_rate": 9.366937687702628e-05, + "loss": 14.9381, + "step": 4488 + }, + { + "epoch": 0.18711183360426828, + "grad_norm": 229.0, + "learning_rate": 9.3666089046388e-05, + "loss": 10.1876, + "step": 4489 + }, + { + "epoch": 0.18715351590179652, + "grad_norm": 296.0, + "learning_rate": 9.366280041993136e-05, + "loss": 13.1879, + "step": 4490 + }, + { + "epoch": 0.18719519819932476, + "grad_norm": 432.0, + "learning_rate": 9.365951099771631e-05, + "loss": 14.4411, + "step": 4491 + }, + { + "epoch": 0.187236880496853, + "grad_norm": 222.0, + "learning_rate": 9.36562207798028e-05, + "loss": 10.5002, + "step": 4492 + }, + { + "epoch": 0.18727856279438124, + "grad_norm": 1016.0, + "learning_rate": 9.365292976625078e-05, + "loss": 24.8753, + "step": 4493 + }, + { + "epoch": 0.18732024509190948, + "grad_norm": 306.0, + "learning_rate": 9.364963795712025e-05, + "loss": 13.8762, + "step": 4494 + }, + { + "epoch": 0.18736192738943772, + "grad_norm": 478.0, + "learning_rate": 9.36463453524712e-05, + "loss": 17.3762, + "step": 4495 + }, + { + "epoch": 0.18740360968696596, + "grad_norm": 608.0, + "learning_rate": 9.364305195236362e-05, + "loss": 18.3755, + "step": 4496 + }, + { + "epoch": 0.1874452919844942, + "grad_norm": 516.0, + "learning_rate": 9.363975775685753e-05, + "loss": 18.7506, + "step": 4497 + }, + { + "epoch": 0.18748697428202243, + "grad_norm": 442.0, + "learning_rate": 9.3636462766013e-05, + "loss": 14.6251, + "step": 4498 + }, + { + "epoch": 0.18752865657955067, + "grad_norm": 354.0, + "learning_rate": 9.363316697989007e-05, + "loss": 11.313, + "step": 4499 + }, + { + "epoch": 0.1875703388770789, + "grad_norm": 146.0, + "learning_rate": 9.362987039854878e-05, + "loss": 8.6257, + "step": 4500 + }, + { + "epoch": 0.18761202117460715, + "grad_norm": 408.0, + "learning_rate": 9.362657302204925e-05, + "loss": 15.5628, + "step": 4501 + }, + { + "epoch": 0.1876537034721354, + "grad_norm": 580.0, + "learning_rate": 9.362327485045153e-05, + "loss": 18.2504, + "step": 4502 + }, + { + "epoch": 0.18769538576966363, + "grad_norm": 332.0, + "learning_rate": 9.361997588381577e-05, + "loss": 14.063, + "step": 4503 + }, + { + "epoch": 0.18773706806719187, + "grad_norm": 207.0, + "learning_rate": 9.361667612220207e-05, + "loss": 10.5628, + "step": 4504 + }, + { + "epoch": 0.1877787503647201, + "grad_norm": 588.0, + "learning_rate": 9.361337556567058e-05, + "loss": 18.0013, + "step": 4505 + }, + { + "epoch": 0.18782043266224835, + "grad_norm": 436.0, + "learning_rate": 9.361007421428144e-05, + "loss": 15.6879, + "step": 4506 + }, + { + "epoch": 0.1878621149597766, + "grad_norm": 290.0, + "learning_rate": 9.360677206809482e-05, + "loss": 13.3127, + "step": 4507 + }, + { + "epoch": 0.18790379725730483, + "grad_norm": 165.0, + "learning_rate": 9.360346912717093e-05, + "loss": 9.9379, + "step": 4508 + }, + { + "epoch": 0.18794547955483307, + "grad_norm": 684.0, + "learning_rate": 9.360016539156993e-05, + "loss": 21.0007, + "step": 4509 + }, + { + "epoch": 0.1879871618523613, + "grad_norm": 378.0, + "learning_rate": 9.359686086135204e-05, + "loss": 13.6903, + "step": 4510 + }, + { + "epoch": 0.18802884414988955, + "grad_norm": 83.0, + "learning_rate": 9.359355553657751e-05, + "loss": 6.0002, + "step": 4511 + }, + { + "epoch": 0.18807052644741779, + "grad_norm": 91.0, + "learning_rate": 9.359024941730654e-05, + "loss": 7.8753, + "step": 4512 + }, + { + "epoch": 0.18811220874494602, + "grad_norm": 336.0, + "learning_rate": 9.358694250359943e-05, + "loss": 13.3753, + "step": 4513 + }, + { + "epoch": 0.18815389104247426, + "grad_norm": 168.0, + "learning_rate": 9.358363479551639e-05, + "loss": 11.0002, + "step": 4514 + }, + { + "epoch": 0.1881955733400025, + "grad_norm": 191.0, + "learning_rate": 9.358032629311776e-05, + "loss": 11.0632, + "step": 4515 + }, + { + "epoch": 0.18823725563753074, + "grad_norm": 91.0, + "learning_rate": 9.357701699646382e-05, + "loss": 8.1252, + "step": 4516 + }, + { + "epoch": 0.18827893793505898, + "grad_norm": 916.0, + "learning_rate": 9.357370690561486e-05, + "loss": 23.1254, + "step": 4517 + }, + { + "epoch": 0.18832062023258722, + "grad_norm": 1032.0, + "learning_rate": 9.357039602063122e-05, + "loss": 24.6294, + "step": 4518 + }, + { + "epoch": 0.18836230253011546, + "grad_norm": 344.0, + "learning_rate": 9.356708434157327e-05, + "loss": 14.3756, + "step": 4519 + }, + { + "epoch": 0.1884039848276437, + "grad_norm": 424.0, + "learning_rate": 9.356377186850131e-05, + "loss": 15.313, + "step": 4520 + }, + { + "epoch": 0.18844566712517194, + "grad_norm": 576.0, + "learning_rate": 9.356045860147577e-05, + "loss": 16.1254, + "step": 4521 + }, + { + "epoch": 0.18848734942270018, + "grad_norm": 360.0, + "learning_rate": 9.355714454055699e-05, + "loss": 12.5004, + "step": 4522 + }, + { + "epoch": 0.18852903172022842, + "grad_norm": 792.0, + "learning_rate": 9.355382968580537e-05, + "loss": 21.0003, + "step": 4523 + }, + { + "epoch": 0.18857071401775666, + "grad_norm": 370.0, + "learning_rate": 9.355051403728137e-05, + "loss": 14.1878, + "step": 4524 + }, + { + "epoch": 0.1886123963152849, + "grad_norm": 516.0, + "learning_rate": 9.354719759504535e-05, + "loss": 16.8755, + "step": 4525 + }, + { + "epoch": 0.18865407861281314, + "grad_norm": 298.0, + "learning_rate": 9.354388035915782e-05, + "loss": 13.5005, + "step": 4526 + }, + { + "epoch": 0.18869576091034138, + "grad_norm": 1080.0, + "learning_rate": 9.354056232967919e-05, + "loss": 27.6299, + "step": 4527 + }, + { + "epoch": 0.18873744320786962, + "grad_norm": 1160.0, + "learning_rate": 9.353724350666994e-05, + "loss": 32.7503, + "step": 4528 + }, + { + "epoch": 0.18877912550539785, + "grad_norm": 105.5, + "learning_rate": 9.353392389019056e-05, + "loss": 8.5627, + "step": 4529 + }, + { + "epoch": 0.1888208078029261, + "grad_norm": 143.0, + "learning_rate": 9.353060348030156e-05, + "loss": 11.063, + "step": 4530 + }, + { + "epoch": 0.18886249010045433, + "grad_norm": 272.0, + "learning_rate": 9.352728227706346e-05, + "loss": 13.252, + "step": 4531 + }, + { + "epoch": 0.18890417239798257, + "grad_norm": 150.0, + "learning_rate": 9.352396028053676e-05, + "loss": 11.0009, + "step": 4532 + }, + { + "epoch": 0.1889458546955108, + "grad_norm": 1032.0, + "learning_rate": 9.352063749078203e-05, + "loss": 25.3809, + "step": 4533 + }, + { + "epoch": 0.18898753699303905, + "grad_norm": 620.0, + "learning_rate": 9.351731390785981e-05, + "loss": 19.1253, + "step": 4534 + }, + { + "epoch": 0.1890292192905673, + "grad_norm": 656.0, + "learning_rate": 9.351398953183069e-05, + "loss": 19.8756, + "step": 4535 + }, + { + "epoch": 0.18907090158809553, + "grad_norm": 202.0, + "learning_rate": 9.351066436275524e-05, + "loss": 11.6879, + "step": 4536 + }, + { + "epoch": 0.18911258388562377, + "grad_norm": 1152.0, + "learning_rate": 9.350733840069406e-05, + "loss": 30.5003, + "step": 4537 + }, + { + "epoch": 0.189154266183152, + "grad_norm": 486.0, + "learning_rate": 9.350401164570779e-05, + "loss": 16.7518, + "step": 4538 + }, + { + "epoch": 0.18919594848068025, + "grad_norm": 268.0, + "learning_rate": 9.350068409785704e-05, + "loss": 11.5017, + "step": 4539 + }, + { + "epoch": 0.1892376307782085, + "grad_norm": 125.5, + "learning_rate": 9.349735575720247e-05, + "loss": 10.8771, + "step": 4540 + }, + { + "epoch": 0.18927931307573673, + "grad_norm": 386.0, + "learning_rate": 9.349402662380472e-05, + "loss": 14.8127, + "step": 4541 + }, + { + "epoch": 0.18932099537326497, + "grad_norm": 736.0, + "learning_rate": 9.349069669772448e-05, + "loss": 19.0011, + "step": 4542 + }, + { + "epoch": 0.1893626776707932, + "grad_norm": 106.0, + "learning_rate": 9.348736597902243e-05, + "loss": 9.2508, + "step": 4543 + }, + { + "epoch": 0.18940435996832145, + "grad_norm": 396.0, + "learning_rate": 9.348403446775927e-05, + "loss": 14.9383, + "step": 4544 + }, + { + "epoch": 0.18944604226584968, + "grad_norm": 338.0, + "learning_rate": 9.348070216399572e-05, + "loss": 14.1252, + "step": 4545 + }, + { + "epoch": 0.18948772456337792, + "grad_norm": 340.0, + "learning_rate": 9.347736906779252e-05, + "loss": 15.1252, + "step": 4546 + }, + { + "epoch": 0.18952940686090616, + "grad_norm": 384.0, + "learning_rate": 9.347403517921041e-05, + "loss": 16.8769, + "step": 4547 + }, + { + "epoch": 0.1895710891584344, + "grad_norm": 246.0, + "learning_rate": 9.347070049831015e-05, + "loss": 13.8148, + "step": 4548 + }, + { + "epoch": 0.18961277145596264, + "grad_norm": 140.0, + "learning_rate": 9.346736502515252e-05, + "loss": 9.1255, + "step": 4549 + }, + { + "epoch": 0.18965445375349088, + "grad_norm": 300.0, + "learning_rate": 9.346402875979829e-05, + "loss": 11.8754, + "step": 4550 + }, + { + "epoch": 0.18969613605101912, + "grad_norm": 760.0, + "learning_rate": 9.346069170230828e-05, + "loss": 24.0006, + "step": 4551 + }, + { + "epoch": 0.18973781834854736, + "grad_norm": 300.0, + "learning_rate": 9.34573538527433e-05, + "loss": 13.1255, + "step": 4552 + }, + { + "epoch": 0.1897795006460756, + "grad_norm": 386.0, + "learning_rate": 9.34540152111642e-05, + "loss": 15.5005, + "step": 4553 + }, + { + "epoch": 0.18982118294360384, + "grad_norm": 274.0, + "learning_rate": 9.345067577763182e-05, + "loss": 11.3752, + "step": 4554 + }, + { + "epoch": 0.18986286524113208, + "grad_norm": 348.0, + "learning_rate": 9.344733555220701e-05, + "loss": 13.4377, + "step": 4555 + }, + { + "epoch": 0.18990454753866032, + "grad_norm": 426.0, + "learning_rate": 9.344399453495066e-05, + "loss": 15.2548, + "step": 4556 + }, + { + "epoch": 0.18994622983618856, + "grad_norm": 482.0, + "learning_rate": 9.344065272592363e-05, + "loss": 15.4408, + "step": 4557 + }, + { + "epoch": 0.18998791213371682, + "grad_norm": 216.0, + "learning_rate": 9.343731012518686e-05, + "loss": 11.1259, + "step": 4558 + }, + { + "epoch": 0.19002959443124506, + "grad_norm": 280.0, + "learning_rate": 9.343396673280126e-05, + "loss": 13.0006, + "step": 4559 + }, + { + "epoch": 0.1900712767287733, + "grad_norm": 560.0, + "learning_rate": 9.343062254882775e-05, + "loss": 16.2503, + "step": 4560 + }, + { + "epoch": 0.19011295902630154, + "grad_norm": 284.0, + "learning_rate": 9.34272775733273e-05, + "loss": 12.7502, + "step": 4561 + }, + { + "epoch": 0.19015464132382978, + "grad_norm": 408.0, + "learning_rate": 9.342393180636086e-05, + "loss": 14.5003, + "step": 4562 + }, + { + "epoch": 0.19019632362135802, + "grad_norm": 322.0, + "learning_rate": 9.342058524798942e-05, + "loss": 14.2509, + "step": 4563 + }, + { + "epoch": 0.19023800591888626, + "grad_norm": 636.0, + "learning_rate": 9.341723789827393e-05, + "loss": 19.8752, + "step": 4564 + }, + { + "epoch": 0.1902796882164145, + "grad_norm": 260.0, + "learning_rate": 9.341388975727545e-05, + "loss": 12.6253, + "step": 4565 + }, + { + "epoch": 0.19032137051394274, + "grad_norm": 107.0, + "learning_rate": 9.341054082505496e-05, + "loss": 9.8755, + "step": 4566 + }, + { + "epoch": 0.19036305281147098, + "grad_norm": 414.0, + "learning_rate": 9.340719110167352e-05, + "loss": 15.063, + "step": 4567 + }, + { + "epoch": 0.19040473510899922, + "grad_norm": 390.0, + "learning_rate": 9.340384058719216e-05, + "loss": 15.563, + "step": 4568 + }, + { + "epoch": 0.19044641740652746, + "grad_norm": 178.0, + "learning_rate": 9.340048928167196e-05, + "loss": 11.0013, + "step": 4569 + }, + { + "epoch": 0.1904880997040557, + "grad_norm": 312.0, + "learning_rate": 9.339713718517399e-05, + "loss": 13.7504, + "step": 4570 + }, + { + "epoch": 0.19052978200158394, + "grad_norm": 149.0, + "learning_rate": 9.339378429775934e-05, + "loss": 9.6878, + "step": 4571 + }, + { + "epoch": 0.19057146429911218, + "grad_norm": 524.0, + "learning_rate": 9.339043061948911e-05, + "loss": 15.1931, + "step": 4572 + }, + { + "epoch": 0.19061314659664041, + "grad_norm": 108.5, + "learning_rate": 9.338707615042445e-05, + "loss": 8.3133, + "step": 4573 + }, + { + "epoch": 0.19065482889416865, + "grad_norm": 346.0, + "learning_rate": 9.338372089062646e-05, + "loss": 15.0642, + "step": 4574 + }, + { + "epoch": 0.1906965111916969, + "grad_norm": 454.0, + "learning_rate": 9.338036484015631e-05, + "loss": 16.2515, + "step": 4575 + }, + { + "epoch": 0.19073819348922513, + "grad_norm": 230.0, + "learning_rate": 9.337700799907517e-05, + "loss": 11.8127, + "step": 4576 + }, + { + "epoch": 0.19077987578675337, + "grad_norm": 223.0, + "learning_rate": 9.337365036744419e-05, + "loss": 13.6263, + "step": 4577 + }, + { + "epoch": 0.1908215580842816, + "grad_norm": 152.0, + "learning_rate": 9.337029194532459e-05, + "loss": 9.2502, + "step": 4578 + }, + { + "epoch": 0.19086324038180985, + "grad_norm": 776.0, + "learning_rate": 9.336693273277757e-05, + "loss": 19.5002, + "step": 4579 + }, + { + "epoch": 0.1909049226793381, + "grad_norm": 201.0, + "learning_rate": 9.336357272986434e-05, + "loss": 11.3133, + "step": 4580 + }, + { + "epoch": 0.19094660497686633, + "grad_norm": 644.0, + "learning_rate": 9.336021193664617e-05, + "loss": 19.2502, + "step": 4581 + }, + { + "epoch": 0.19098828727439457, + "grad_norm": 428.0, + "learning_rate": 9.335685035318426e-05, + "loss": 14.8762, + "step": 4582 + }, + { + "epoch": 0.1910299695719228, + "grad_norm": 572.0, + "learning_rate": 9.335348797953993e-05, + "loss": 18.7503, + "step": 4583 + }, + { + "epoch": 0.19107165186945105, + "grad_norm": 1096.0, + "learning_rate": 9.335012481577442e-05, + "loss": 29.8754, + "step": 4584 + }, + { + "epoch": 0.1911133341669793, + "grad_norm": 724.0, + "learning_rate": 9.334676086194904e-05, + "loss": 21.1256, + "step": 4585 + }, + { + "epoch": 0.19115501646450753, + "grad_norm": 460.0, + "learning_rate": 9.33433961181251e-05, + "loss": 13.7506, + "step": 4586 + }, + { + "epoch": 0.19119669876203577, + "grad_norm": 184.0, + "learning_rate": 9.334003058436391e-05, + "loss": 11.8754, + "step": 4587 + }, + { + "epoch": 0.191238381059564, + "grad_norm": 596.0, + "learning_rate": 9.333666426072682e-05, + "loss": 17.8785, + "step": 4588 + }, + { + "epoch": 0.19128006335709224, + "grad_norm": 628.0, + "learning_rate": 9.333329714727517e-05, + "loss": 20.8753, + "step": 4589 + }, + { + "epoch": 0.19132174565462048, + "grad_norm": 207.0, + "learning_rate": 9.332992924407034e-05, + "loss": 11.2507, + "step": 4590 + }, + { + "epoch": 0.19136342795214872, + "grad_norm": 764.0, + "learning_rate": 9.332656055117371e-05, + "loss": 21.8755, + "step": 4591 + }, + { + "epoch": 0.19140511024967696, + "grad_norm": 484.0, + "learning_rate": 9.332319106864664e-05, + "loss": 16.629, + "step": 4592 + }, + { + "epoch": 0.1914467925472052, + "grad_norm": 368.0, + "learning_rate": 9.331982079655059e-05, + "loss": 15.1882, + "step": 4593 + }, + { + "epoch": 0.19148847484473344, + "grad_norm": 556.0, + "learning_rate": 9.331644973494695e-05, + "loss": 20.2512, + "step": 4594 + }, + { + "epoch": 0.19153015714226168, + "grad_norm": 342.0, + "learning_rate": 9.331307788389719e-05, + "loss": 15.6887, + "step": 4595 + }, + { + "epoch": 0.19157183943978992, + "grad_norm": 68.5, + "learning_rate": 9.33097052434627e-05, + "loss": 9.5007, + "step": 4596 + }, + { + "epoch": 0.19161352173731816, + "grad_norm": 262.0, + "learning_rate": 9.330633181370503e-05, + "loss": 12.0003, + "step": 4597 + }, + { + "epoch": 0.1916552040348464, + "grad_norm": 496.0, + "learning_rate": 9.330295759468559e-05, + "loss": 19.7509, + "step": 4598 + }, + { + "epoch": 0.19169688633237464, + "grad_norm": 652.0, + "learning_rate": 9.329958258646592e-05, + "loss": 19.3753, + "step": 4599 + }, + { + "epoch": 0.19173856862990288, + "grad_norm": 197.0, + "learning_rate": 9.32962067891075e-05, + "loss": 12.0629, + "step": 4600 + }, + { + "epoch": 0.19178025092743112, + "grad_norm": 225.0, + "learning_rate": 9.329283020267188e-05, + "loss": 9.8758, + "step": 4601 + }, + { + "epoch": 0.19182193322495936, + "grad_norm": 418.0, + "learning_rate": 9.328945282722057e-05, + "loss": 15.6877, + "step": 4602 + }, + { + "epoch": 0.1918636155224876, + "grad_norm": 616.0, + "learning_rate": 9.328607466281516e-05, + "loss": 19.1255, + "step": 4603 + }, + { + "epoch": 0.19190529782001584, + "grad_norm": 458.0, + "learning_rate": 9.328269570951718e-05, + "loss": 14.1887, + "step": 4604 + }, + { + "epoch": 0.19194698011754407, + "grad_norm": 520.0, + "learning_rate": 9.327931596738824e-05, + "loss": 17.1254, + "step": 4605 + }, + { + "epoch": 0.19198866241507231, + "grad_norm": 290.0, + "learning_rate": 9.327593543648991e-05, + "loss": 12.4376, + "step": 4606 + }, + { + "epoch": 0.19203034471260055, + "grad_norm": 150.0, + "learning_rate": 9.327255411688383e-05, + "loss": 10.0012, + "step": 4607 + }, + { + "epoch": 0.1920720270101288, + "grad_norm": 104.0, + "learning_rate": 9.32691720086316e-05, + "loss": 8.1879, + "step": 4608 + }, + { + "epoch": 0.19211370930765703, + "grad_norm": 266.0, + "learning_rate": 9.326578911179488e-05, + "loss": 12.5632, + "step": 4609 + }, + { + "epoch": 0.19215539160518527, + "grad_norm": 568.0, + "learning_rate": 9.326240542643529e-05, + "loss": 18.8752, + "step": 4610 + }, + { + "epoch": 0.1921970739027135, + "grad_norm": 676.0, + "learning_rate": 9.325902095261454e-05, + "loss": 20.1252, + "step": 4611 + }, + { + "epoch": 0.19223875620024175, + "grad_norm": 308.0, + "learning_rate": 9.32556356903943e-05, + "loss": 13.6255, + "step": 4612 + }, + { + "epoch": 0.19228043849777, + "grad_norm": 296.0, + "learning_rate": 9.325224963983625e-05, + "loss": 12.3757, + "step": 4613 + }, + { + "epoch": 0.19232212079529823, + "grad_norm": 216.0, + "learning_rate": 9.324886280100211e-05, + "loss": 11.5627, + "step": 4614 + }, + { + "epoch": 0.19236380309282647, + "grad_norm": 580.0, + "learning_rate": 9.32454751739536e-05, + "loss": 18.2506, + "step": 4615 + }, + { + "epoch": 0.1924054853903547, + "grad_norm": 804.0, + "learning_rate": 9.324208675875248e-05, + "loss": 23.3753, + "step": 4616 + }, + { + "epoch": 0.19244716768788295, + "grad_norm": 114.5, + "learning_rate": 9.323869755546047e-05, + "loss": 9.6881, + "step": 4617 + }, + { + "epoch": 0.1924888499854112, + "grad_norm": 302.0, + "learning_rate": 9.323530756413938e-05, + "loss": 14.0009, + "step": 4618 + }, + { + "epoch": 0.19253053228293943, + "grad_norm": 856.0, + "learning_rate": 9.323191678485096e-05, + "loss": 21.1259, + "step": 4619 + }, + { + "epoch": 0.19257221458046767, + "grad_norm": 134.0, + "learning_rate": 9.322852521765701e-05, + "loss": 8.6257, + "step": 4620 + }, + { + "epoch": 0.1926138968779959, + "grad_norm": 308.0, + "learning_rate": 9.322513286261937e-05, + "loss": 13.1254, + "step": 4621 + }, + { + "epoch": 0.19265557917552414, + "grad_norm": 860.0, + "learning_rate": 9.322173971979984e-05, + "loss": 21.0053, + "step": 4622 + }, + { + "epoch": 0.19269726147305238, + "grad_norm": 416.0, + "learning_rate": 9.321834578926026e-05, + "loss": 15.938, + "step": 4623 + }, + { + "epoch": 0.19273894377058062, + "grad_norm": 478.0, + "learning_rate": 9.32149510710625e-05, + "loss": 15.563, + "step": 4624 + }, + { + "epoch": 0.19278062606810886, + "grad_norm": 143.0, + "learning_rate": 9.321155556526842e-05, + "loss": 10.1289, + "step": 4625 + }, + { + "epoch": 0.1928223083656371, + "grad_norm": 2224.0, + "learning_rate": 9.32081592719399e-05, + "loss": 43.2504, + "step": 4626 + }, + { + "epoch": 0.19286399066316534, + "grad_norm": 430.0, + "learning_rate": 9.320476219113883e-05, + "loss": 13.6881, + "step": 4627 + }, + { + "epoch": 0.19290567296069358, + "grad_norm": 408.0, + "learning_rate": 9.320136432292714e-05, + "loss": 14.0628, + "step": 4628 + }, + { + "epoch": 0.19294735525822182, + "grad_norm": 272.0, + "learning_rate": 9.319796566736676e-05, + "loss": 13.6881, + "step": 4629 + }, + { + "epoch": 0.19298903755575006, + "grad_norm": 292.0, + "learning_rate": 9.319456622451963e-05, + "loss": 11.8769, + "step": 4630 + }, + { + "epoch": 0.19303071985327833, + "grad_norm": 452.0, + "learning_rate": 9.319116599444769e-05, + "loss": 17.3764, + "step": 4631 + }, + { + "epoch": 0.19307240215080657, + "grad_norm": 540.0, + "learning_rate": 9.318776497721288e-05, + "loss": 18.6266, + "step": 4632 + }, + { + "epoch": 0.1931140844483348, + "grad_norm": 74.0, + "learning_rate": 9.318436317287726e-05, + "loss": 8.0626, + "step": 4633 + }, + { + "epoch": 0.19315576674586304, + "grad_norm": 266.0, + "learning_rate": 9.318096058150278e-05, + "loss": 11.8173, + "step": 4634 + }, + { + "epoch": 0.19319744904339128, + "grad_norm": 596.0, + "learning_rate": 9.317755720315145e-05, + "loss": 19.6255, + "step": 4635 + }, + { + "epoch": 0.19323913134091952, + "grad_norm": 233.0, + "learning_rate": 9.317415303788532e-05, + "loss": 12.0003, + "step": 4636 + }, + { + "epoch": 0.19328081363844776, + "grad_norm": 272.0, + "learning_rate": 9.31707480857664e-05, + "loss": 12.6255, + "step": 4637 + }, + { + "epoch": 0.193322495935976, + "grad_norm": 318.0, + "learning_rate": 9.316734234685678e-05, + "loss": 13.5013, + "step": 4638 + }, + { + "epoch": 0.19336417823350424, + "grad_norm": 342.0, + "learning_rate": 9.31639358212185e-05, + "loss": 10.6254, + "step": 4639 + }, + { + "epoch": 0.19340586053103248, + "grad_norm": 264.0, + "learning_rate": 9.316052850891367e-05, + "loss": 11.3763, + "step": 4640 + }, + { + "epoch": 0.19344754282856072, + "grad_norm": 576.0, + "learning_rate": 9.315712041000437e-05, + "loss": 18.6285, + "step": 4641 + }, + { + "epoch": 0.19348922512608896, + "grad_norm": 348.0, + "learning_rate": 9.315371152455272e-05, + "loss": 14.1253, + "step": 4642 + }, + { + "epoch": 0.1935309074236172, + "grad_norm": 308.0, + "learning_rate": 9.315030185262086e-05, + "loss": 13.1882, + "step": 4643 + }, + { + "epoch": 0.19357258972114544, + "grad_norm": 612.0, + "learning_rate": 9.31468913942709e-05, + "loss": 17.6254, + "step": 4644 + }, + { + "epoch": 0.19361427201867368, + "grad_norm": 195.0, + "learning_rate": 9.314348014956502e-05, + "loss": 10.6882, + "step": 4645 + }, + { + "epoch": 0.19365595431620192, + "grad_norm": 1200.0, + "learning_rate": 9.314006811856537e-05, + "loss": 30.5014, + "step": 4646 + }, + { + "epoch": 0.19369763661373016, + "grad_norm": 346.0, + "learning_rate": 9.313665530133418e-05, + "loss": 13.0633, + "step": 4647 + }, + { + "epoch": 0.1937393189112584, + "grad_norm": 936.0, + "learning_rate": 9.313324169793359e-05, + "loss": 24.8752, + "step": 4648 + }, + { + "epoch": 0.19378100120878664, + "grad_norm": 236.0, + "learning_rate": 9.312982730842585e-05, + "loss": 12.8752, + "step": 4649 + }, + { + "epoch": 0.19382268350631487, + "grad_norm": 278.0, + "learning_rate": 9.312641213287319e-05, + "loss": 12.6253, + "step": 4650 + }, + { + "epoch": 0.19386436580384311, + "grad_norm": 402.0, + "learning_rate": 9.312299617133782e-05, + "loss": 15.2503, + "step": 4651 + }, + { + "epoch": 0.19390604810137135, + "grad_norm": 169.0, + "learning_rate": 9.311957942388203e-05, + "loss": 11.6254, + "step": 4652 + }, + { + "epoch": 0.1939477303988996, + "grad_norm": 182.0, + "learning_rate": 9.311616189056808e-05, + "loss": 10.1256, + "step": 4653 + }, + { + "epoch": 0.19398941269642783, + "grad_norm": 428.0, + "learning_rate": 9.311274357145824e-05, + "loss": 15.9378, + "step": 4654 + }, + { + "epoch": 0.19403109499395607, + "grad_norm": 580.0, + "learning_rate": 9.310932446661484e-05, + "loss": 17.1296, + "step": 4655 + }, + { + "epoch": 0.1940727772914843, + "grad_norm": 374.0, + "learning_rate": 9.310590457610015e-05, + "loss": 14.0631, + "step": 4656 + }, + { + "epoch": 0.19411445958901255, + "grad_norm": 504.0, + "learning_rate": 9.310248389997654e-05, + "loss": 17.1253, + "step": 4657 + }, + { + "epoch": 0.1941561418865408, + "grad_norm": 724.0, + "learning_rate": 9.309906243830633e-05, + "loss": 20.3787, + "step": 4658 + }, + { + "epoch": 0.19419782418406903, + "grad_norm": 239.0, + "learning_rate": 9.309564019115188e-05, + "loss": 11.3129, + "step": 4659 + }, + { + "epoch": 0.19423950648159727, + "grad_norm": 512.0, + "learning_rate": 9.309221715857557e-05, + "loss": 17.6257, + "step": 4660 + }, + { + "epoch": 0.1942811887791255, + "grad_norm": 62.75, + "learning_rate": 9.308879334063976e-05, + "loss": 9.0009, + "step": 4661 + }, + { + "epoch": 0.19432287107665375, + "grad_norm": 342.0, + "learning_rate": 9.308536873740688e-05, + "loss": 14.3752, + "step": 4662 + }, + { + "epoch": 0.194364553374182, + "grad_norm": 294.0, + "learning_rate": 9.308194334893933e-05, + "loss": 12.0003, + "step": 4663 + }, + { + "epoch": 0.19440623567171023, + "grad_norm": 227.0, + "learning_rate": 9.307851717529954e-05, + "loss": 11.8752, + "step": 4664 + }, + { + "epoch": 0.19444791796923847, + "grad_norm": 440.0, + "learning_rate": 9.307509021654993e-05, + "loss": 15.7502, + "step": 4665 + }, + { + "epoch": 0.1944896002667667, + "grad_norm": 608.0, + "learning_rate": 9.3071662472753e-05, + "loss": 17.8758, + "step": 4666 + }, + { + "epoch": 0.19453128256429494, + "grad_norm": 470.0, + "learning_rate": 9.306823394397118e-05, + "loss": 17.6253, + "step": 4667 + }, + { + "epoch": 0.19457296486182318, + "grad_norm": 544.0, + "learning_rate": 9.306480463026699e-05, + "loss": 16.2502, + "step": 4668 + }, + { + "epoch": 0.19461464715935142, + "grad_norm": 362.0, + "learning_rate": 9.306137453170289e-05, + "loss": 13.5632, + "step": 4669 + }, + { + "epoch": 0.19465632945687966, + "grad_norm": 54.75, + "learning_rate": 9.305794364834143e-05, + "loss": 6.5321, + "step": 4670 + }, + { + "epoch": 0.1946980117544079, + "grad_norm": 152.0, + "learning_rate": 9.305451198024513e-05, + "loss": 10.0036, + "step": 4671 + }, + { + "epoch": 0.19473969405193614, + "grad_norm": 330.0, + "learning_rate": 9.305107952747654e-05, + "loss": 14.1251, + "step": 4672 + }, + { + "epoch": 0.19478137634946438, + "grad_norm": 276.0, + "learning_rate": 9.304764629009817e-05, + "loss": 13.8132, + "step": 4673 + }, + { + "epoch": 0.19482305864699262, + "grad_norm": 508.0, + "learning_rate": 9.304421226817264e-05, + "loss": 18.5005, + "step": 4674 + }, + { + "epoch": 0.19486474094452086, + "grad_norm": 326.0, + "learning_rate": 9.304077746176253e-05, + "loss": 13.3131, + "step": 4675 + }, + { + "epoch": 0.1949064232420491, + "grad_norm": 326.0, + "learning_rate": 9.303734187093043e-05, + "loss": 13.7502, + "step": 4676 + }, + { + "epoch": 0.19494810553957734, + "grad_norm": 249.0, + "learning_rate": 9.303390549573894e-05, + "loss": 9.002, + "step": 4677 + }, + { + "epoch": 0.19498978783710558, + "grad_norm": 470.0, + "learning_rate": 9.30304683362507e-05, + "loss": 16.2557, + "step": 4678 + }, + { + "epoch": 0.19503147013463382, + "grad_norm": 286.0, + "learning_rate": 9.302703039252835e-05, + "loss": 13.0628, + "step": 4679 + }, + { + "epoch": 0.19507315243216206, + "grad_norm": 284.0, + "learning_rate": 9.302359166463458e-05, + "loss": 10.6257, + "step": 4680 + }, + { + "epoch": 0.1951148347296903, + "grad_norm": 170.0, + "learning_rate": 9.302015215263202e-05, + "loss": 10.8755, + "step": 4681 + }, + { + "epoch": 0.19515651702721853, + "grad_norm": 76.5, + "learning_rate": 9.301671185658336e-05, + "loss": 7.501, + "step": 4682 + }, + { + "epoch": 0.19519819932474677, + "grad_norm": 516.0, + "learning_rate": 9.301327077655131e-05, + "loss": 18.3753, + "step": 4683 + }, + { + "epoch": 0.195239881622275, + "grad_norm": 159.0, + "learning_rate": 9.300982891259858e-05, + "loss": 10.5627, + "step": 4684 + }, + { + "epoch": 0.19528156391980325, + "grad_norm": 226.0, + "learning_rate": 9.30063862647879e-05, + "loss": 11.438, + "step": 4685 + }, + { + "epoch": 0.1953232462173315, + "grad_norm": 144.0, + "learning_rate": 9.300294283318203e-05, + "loss": 9.8127, + "step": 4686 + }, + { + "epoch": 0.19536492851485973, + "grad_norm": 716.0, + "learning_rate": 9.299949861784369e-05, + "loss": 18.7502, + "step": 4687 + }, + { + "epoch": 0.19540661081238797, + "grad_norm": 324.0, + "learning_rate": 9.299605361883568e-05, + "loss": 12.8752, + "step": 4688 + }, + { + "epoch": 0.1954482931099162, + "grad_norm": 298.0, + "learning_rate": 9.299260783622076e-05, + "loss": 11.4378, + "step": 4689 + }, + { + "epoch": 0.19548997540744445, + "grad_norm": 276.0, + "learning_rate": 9.298916127006176e-05, + "loss": 12.7504, + "step": 4690 + }, + { + "epoch": 0.1955316577049727, + "grad_norm": 532.0, + "learning_rate": 9.298571392042148e-05, + "loss": 17.2505, + "step": 4691 + }, + { + "epoch": 0.19557334000250093, + "grad_norm": 241.0, + "learning_rate": 9.298226578736275e-05, + "loss": 12.6257, + "step": 4692 + }, + { + "epoch": 0.19561502230002917, + "grad_norm": 326.0, + "learning_rate": 9.297881687094841e-05, + "loss": 13.6877, + "step": 4693 + }, + { + "epoch": 0.1956567045975574, + "grad_norm": 272.0, + "learning_rate": 9.297536717124131e-05, + "loss": 13.0013, + "step": 4694 + }, + { + "epoch": 0.19569838689508565, + "grad_norm": 226.0, + "learning_rate": 9.297191668830433e-05, + "loss": 11.6253, + "step": 4695 + }, + { + "epoch": 0.19574006919261389, + "grad_norm": 68.0, + "learning_rate": 9.296846542220034e-05, + "loss": 6.8127, + "step": 4696 + }, + { + "epoch": 0.19578175149014213, + "grad_norm": 272.0, + "learning_rate": 9.296501337299228e-05, + "loss": 12.5007, + "step": 4697 + }, + { + "epoch": 0.19582343378767036, + "grad_norm": 173.0, + "learning_rate": 9.296156054074303e-05, + "loss": 11.3128, + "step": 4698 + }, + { + "epoch": 0.1958651160851986, + "grad_norm": 620.0, + "learning_rate": 9.295810692551552e-05, + "loss": 18.7505, + "step": 4699 + }, + { + "epoch": 0.19590679838272684, + "grad_norm": 135.0, + "learning_rate": 9.29546525273727e-05, + "loss": 9.1888, + "step": 4700 + }, + { + "epoch": 0.19594848068025508, + "grad_norm": 656.0, + "learning_rate": 9.295119734637752e-05, + "loss": 19.1255, + "step": 4701 + }, + { + "epoch": 0.19599016297778332, + "grad_norm": 416.0, + "learning_rate": 9.294774138259296e-05, + "loss": 15.6879, + "step": 4702 + }, + { + "epoch": 0.19603184527531156, + "grad_norm": 652.0, + "learning_rate": 9.2944284636082e-05, + "loss": 20.3751, + "step": 4703 + }, + { + "epoch": 0.19607352757283983, + "grad_norm": 264.0, + "learning_rate": 9.294082710690764e-05, + "loss": 13.1878, + "step": 4704 + }, + { + "epoch": 0.19611520987036807, + "grad_norm": 1984.0, + "learning_rate": 9.293736879513288e-05, + "loss": 42.0016, + "step": 4705 + }, + { + "epoch": 0.1961568921678963, + "grad_norm": 380.0, + "learning_rate": 9.293390970082079e-05, + "loss": 15.6254, + "step": 4706 + }, + { + "epoch": 0.19619857446542455, + "grad_norm": 392.0, + "learning_rate": 9.293044982403436e-05, + "loss": 15.8752, + "step": 4707 + }, + { + "epoch": 0.19624025676295279, + "grad_norm": 900.0, + "learning_rate": 9.292698916483668e-05, + "loss": 22.5002, + "step": 4708 + }, + { + "epoch": 0.19628193906048103, + "grad_norm": 348.0, + "learning_rate": 9.29235277232908e-05, + "loss": 14.2504, + "step": 4709 + }, + { + "epoch": 0.19632362135800926, + "grad_norm": 496.0, + "learning_rate": 9.292006549945984e-05, + "loss": 16.7503, + "step": 4710 + }, + { + "epoch": 0.1963653036555375, + "grad_norm": 436.0, + "learning_rate": 9.291660249340687e-05, + "loss": 15.5629, + "step": 4711 + }, + { + "epoch": 0.19640698595306574, + "grad_norm": 189.0, + "learning_rate": 9.2913138705195e-05, + "loss": 10.5009, + "step": 4712 + }, + { + "epoch": 0.19644866825059398, + "grad_norm": 334.0, + "learning_rate": 9.290967413488739e-05, + "loss": 11.563, + "step": 4713 + }, + { + "epoch": 0.19649035054812222, + "grad_norm": 222.0, + "learning_rate": 9.290620878254713e-05, + "loss": 12.0002, + "step": 4714 + }, + { + "epoch": 0.19653203284565046, + "grad_norm": 241.0, + "learning_rate": 9.290274264823742e-05, + "loss": 11.1882, + "step": 4715 + }, + { + "epoch": 0.1965737151431787, + "grad_norm": 504.0, + "learning_rate": 9.289927573202141e-05, + "loss": 16.8752, + "step": 4716 + }, + { + "epoch": 0.19661539744070694, + "grad_norm": 148.0, + "learning_rate": 9.289580803396229e-05, + "loss": 9.6259, + "step": 4717 + }, + { + "epoch": 0.19665707973823518, + "grad_norm": 608.0, + "learning_rate": 9.289233955412327e-05, + "loss": 19.7503, + "step": 4718 + }, + { + "epoch": 0.19669876203576342, + "grad_norm": 276.0, + "learning_rate": 9.288887029256755e-05, + "loss": 12.6878, + "step": 4719 + }, + { + "epoch": 0.19674044433329166, + "grad_norm": 752.0, + "learning_rate": 9.288540024935837e-05, + "loss": 20.5003, + "step": 4720 + }, + { + "epoch": 0.1967821266308199, + "grad_norm": 278.0, + "learning_rate": 9.288192942455896e-05, + "loss": 11.2502, + "step": 4721 + }, + { + "epoch": 0.19682380892834814, + "grad_norm": 282.0, + "learning_rate": 9.287845781823257e-05, + "loss": 13.251, + "step": 4722 + }, + { + "epoch": 0.19686549122587638, + "grad_norm": 418.0, + "learning_rate": 9.287498543044248e-05, + "loss": 13.8754, + "step": 4723 + }, + { + "epoch": 0.19690717352340462, + "grad_norm": 167.0, + "learning_rate": 9.287151226125198e-05, + "loss": 10.0007, + "step": 4724 + }, + { + "epoch": 0.19694885582093286, + "grad_norm": 306.0, + "learning_rate": 9.286803831072436e-05, + "loss": 14.6257, + "step": 4725 + }, + { + "epoch": 0.1969905381184611, + "grad_norm": 426.0, + "learning_rate": 9.286456357892295e-05, + "loss": 14.1878, + "step": 4726 + }, + { + "epoch": 0.19703222041598933, + "grad_norm": 178.0, + "learning_rate": 9.286108806591105e-05, + "loss": 12.1256, + "step": 4727 + }, + { + "epoch": 0.19707390271351757, + "grad_norm": 268.0, + "learning_rate": 9.285761177175201e-05, + "loss": 13.2505, + "step": 4728 + }, + { + "epoch": 0.1971155850110458, + "grad_norm": 71.5, + "learning_rate": 9.28541346965092e-05, + "loss": 7.7505, + "step": 4729 + }, + { + "epoch": 0.19715726730857405, + "grad_norm": 486.0, + "learning_rate": 9.285065684024599e-05, + "loss": 17.126, + "step": 4730 + }, + { + "epoch": 0.1971989496061023, + "grad_norm": 556.0, + "learning_rate": 9.284717820302573e-05, + "loss": 18.5008, + "step": 4731 + }, + { + "epoch": 0.19724063190363053, + "grad_norm": 121.0, + "learning_rate": 9.284369878491186e-05, + "loss": 10.0007, + "step": 4732 + }, + { + "epoch": 0.19728231420115877, + "grad_norm": 53.5, + "learning_rate": 9.284021858596779e-05, + "loss": 7.9691, + "step": 4733 + }, + { + "epoch": 0.197323996498687, + "grad_norm": 210.0, + "learning_rate": 9.28367376062569e-05, + "loss": 11.5629, + "step": 4734 + }, + { + "epoch": 0.19736567879621525, + "grad_norm": 171.0, + "learning_rate": 9.283325584584268e-05, + "loss": 10.8754, + "step": 4735 + }, + { + "epoch": 0.1974073610937435, + "grad_norm": 704.0, + "learning_rate": 9.282977330478859e-05, + "loss": 20.3761, + "step": 4736 + }, + { + "epoch": 0.19744904339127173, + "grad_norm": 161.0, + "learning_rate": 9.282628998315806e-05, + "loss": 10.0627, + "step": 4737 + }, + { + "epoch": 0.19749072568879997, + "grad_norm": 732.0, + "learning_rate": 9.282280588101459e-05, + "loss": 19.3756, + "step": 4738 + }, + { + "epoch": 0.1975324079863282, + "grad_norm": 440.0, + "learning_rate": 9.281932099842167e-05, + "loss": 15.1253, + "step": 4739 + }, + { + "epoch": 0.19757409028385645, + "grad_norm": 348.0, + "learning_rate": 9.281583533544285e-05, + "loss": 14.5636, + "step": 4740 + }, + { + "epoch": 0.19761577258138469, + "grad_norm": 752.0, + "learning_rate": 9.281234889214162e-05, + "loss": 22.3754, + "step": 4741 + }, + { + "epoch": 0.19765745487891292, + "grad_norm": 276.0, + "learning_rate": 9.280886166858154e-05, + "loss": 13.0627, + "step": 4742 + }, + { + "epoch": 0.19769913717644116, + "grad_norm": 520.0, + "learning_rate": 9.280537366482614e-05, + "loss": 17.2502, + "step": 4743 + }, + { + "epoch": 0.1977408194739694, + "grad_norm": 270.0, + "learning_rate": 9.280188488093901e-05, + "loss": 11.9377, + "step": 4744 + }, + { + "epoch": 0.19778250177149764, + "grad_norm": 1152.0, + "learning_rate": 9.279839531698374e-05, + "loss": 28.7529, + "step": 4745 + }, + { + "epoch": 0.19782418406902588, + "grad_norm": 97.5, + "learning_rate": 9.279490497302389e-05, + "loss": 5.7816, + "step": 4746 + }, + { + "epoch": 0.19786586636655412, + "grad_norm": 174.0, + "learning_rate": 9.279141384912312e-05, + "loss": 11.3755, + "step": 4747 + }, + { + "epoch": 0.19790754866408236, + "grad_norm": 101.5, + "learning_rate": 9.278792194534502e-05, + "loss": 9.8754, + "step": 4748 + }, + { + "epoch": 0.1979492309616106, + "grad_norm": 460.0, + "learning_rate": 9.278442926175326e-05, + "loss": 16.2505, + "step": 4749 + }, + { + "epoch": 0.19799091325913884, + "grad_norm": 196.0, + "learning_rate": 9.278093579841146e-05, + "loss": 10.6877, + "step": 4750 + }, + { + "epoch": 0.19803259555666708, + "grad_norm": 1456.0, + "learning_rate": 9.277744155538333e-05, + "loss": 33.0008, + "step": 4751 + }, + { + "epoch": 0.19807427785419532, + "grad_norm": 115.0, + "learning_rate": 9.27739465327325e-05, + "loss": 9.1258, + "step": 4752 + }, + { + "epoch": 0.19811596015172356, + "grad_norm": 91.5, + "learning_rate": 9.277045073052272e-05, + "loss": 8.188, + "step": 4753 + }, + { + "epoch": 0.1981576424492518, + "grad_norm": 528.0, + "learning_rate": 9.276695414881768e-05, + "loss": 17.2504, + "step": 4754 + }, + { + "epoch": 0.19819932474678004, + "grad_norm": 322.0, + "learning_rate": 9.27634567876811e-05, + "loss": 13.1877, + "step": 4755 + }, + { + "epoch": 0.19824100704430828, + "grad_norm": 282.0, + "learning_rate": 9.275995864717672e-05, + "loss": 12.9386, + "step": 4756 + }, + { + "epoch": 0.19828268934183652, + "grad_norm": 450.0, + "learning_rate": 9.275645972736829e-05, + "loss": 16.3752, + "step": 4757 + }, + { + "epoch": 0.19832437163936475, + "grad_norm": 496.0, + "learning_rate": 9.27529600283196e-05, + "loss": 17.2502, + "step": 4758 + }, + { + "epoch": 0.198366053936893, + "grad_norm": 556.0, + "learning_rate": 9.274945955009442e-05, + "loss": 16.6254, + "step": 4759 + }, + { + "epoch": 0.19840773623442123, + "grad_norm": 342.0, + "learning_rate": 9.274595829275653e-05, + "loss": 14.6877, + "step": 4760 + }, + { + "epoch": 0.19844941853194947, + "grad_norm": 452.0, + "learning_rate": 9.274245625636978e-05, + "loss": 16.5005, + "step": 4761 + }, + { + "epoch": 0.1984911008294777, + "grad_norm": 426.0, + "learning_rate": 9.273895344099794e-05, + "loss": 15.5007, + "step": 4762 + }, + { + "epoch": 0.19853278312700595, + "grad_norm": 612.0, + "learning_rate": 9.273544984670489e-05, + "loss": 20.6252, + "step": 4763 + }, + { + "epoch": 0.1985744654245342, + "grad_norm": 916.0, + "learning_rate": 9.273194547355449e-05, + "loss": 19.8801, + "step": 4764 + }, + { + "epoch": 0.19861614772206243, + "grad_norm": 436.0, + "learning_rate": 9.272844032161056e-05, + "loss": 16.3754, + "step": 4765 + }, + { + "epoch": 0.19865783001959067, + "grad_norm": 376.0, + "learning_rate": 9.272493439093704e-05, + "loss": 15.3753, + "step": 4766 + }, + { + "epoch": 0.1986995123171189, + "grad_norm": 536.0, + "learning_rate": 9.272142768159777e-05, + "loss": 18.252, + "step": 4767 + }, + { + "epoch": 0.19874119461464715, + "grad_norm": 410.0, + "learning_rate": 9.271792019365672e-05, + "loss": 14.8755, + "step": 4768 + }, + { + "epoch": 0.1987828769121754, + "grad_norm": 208.0, + "learning_rate": 9.271441192717776e-05, + "loss": 11.5005, + "step": 4769 + }, + { + "epoch": 0.19882455920970363, + "grad_norm": 210.0, + "learning_rate": 9.271090288222486e-05, + "loss": 11.9378, + "step": 4770 + }, + { + "epoch": 0.19886624150723187, + "grad_norm": 560.0, + "learning_rate": 9.270739305886195e-05, + "loss": 17.2503, + "step": 4771 + }, + { + "epoch": 0.1989079238047601, + "grad_norm": 312.0, + "learning_rate": 9.270388245715303e-05, + "loss": 13.7508, + "step": 4772 + }, + { + "epoch": 0.19894960610228835, + "grad_norm": 1184.0, + "learning_rate": 9.270037107716206e-05, + "loss": 29.7503, + "step": 4773 + }, + { + "epoch": 0.19899128839981658, + "grad_norm": 143.0, + "learning_rate": 9.269685891895302e-05, + "loss": 9.0003, + "step": 4774 + }, + { + "epoch": 0.19903297069734482, + "grad_norm": 692.0, + "learning_rate": 9.269334598258994e-05, + "loss": 22.3753, + "step": 4775 + }, + { + "epoch": 0.19907465299487306, + "grad_norm": 664.0, + "learning_rate": 9.268983226813686e-05, + "loss": 20.0003, + "step": 4776 + }, + { + "epoch": 0.19911633529240133, + "grad_norm": 428.0, + "learning_rate": 9.268631777565777e-05, + "loss": 16.0003, + "step": 4777 + }, + { + "epoch": 0.19915801758992957, + "grad_norm": 584.0, + "learning_rate": 9.268280250521677e-05, + "loss": 15.6257, + "step": 4778 + }, + { + "epoch": 0.1991996998874578, + "grad_norm": 420.0, + "learning_rate": 9.267928645687788e-05, + "loss": 14.9385, + "step": 4779 + }, + { + "epoch": 0.19924138218498605, + "grad_norm": 540.0, + "learning_rate": 9.267576963070524e-05, + "loss": 18.3755, + "step": 4780 + }, + { + "epoch": 0.1992830644825143, + "grad_norm": 362.0, + "learning_rate": 9.267225202676289e-05, + "loss": 13.2503, + "step": 4781 + }, + { + "epoch": 0.19932474678004253, + "grad_norm": 616.0, + "learning_rate": 9.266873364511494e-05, + "loss": 18.7512, + "step": 4782 + }, + { + "epoch": 0.19936642907757077, + "grad_norm": 430.0, + "learning_rate": 9.266521448582557e-05, + "loss": 14.5626, + "step": 4783 + }, + { + "epoch": 0.199408111375099, + "grad_norm": 232.0, + "learning_rate": 9.266169454895886e-05, + "loss": 11.9378, + "step": 4784 + }, + { + "epoch": 0.19944979367262725, + "grad_norm": 286.0, + "learning_rate": 9.265817383457898e-05, + "loss": 11.8129, + "step": 4785 + }, + { + "epoch": 0.19949147597015549, + "grad_norm": 187.0, + "learning_rate": 9.265465234275009e-05, + "loss": 10.9384, + "step": 4786 + }, + { + "epoch": 0.19953315826768372, + "grad_norm": 1960.0, + "learning_rate": 9.265113007353639e-05, + "loss": 40.5013, + "step": 4787 + }, + { + "epoch": 0.19957484056521196, + "grad_norm": 226.0, + "learning_rate": 9.264760702700204e-05, + "loss": 9.3128, + "step": 4788 + }, + { + "epoch": 0.1996165228627402, + "grad_norm": 108.5, + "learning_rate": 9.264408320321128e-05, + "loss": 10.7504, + "step": 4789 + }, + { + "epoch": 0.19965820516026844, + "grad_norm": 482.0, + "learning_rate": 9.26405586022283e-05, + "loss": 17.0003, + "step": 4790 + }, + { + "epoch": 0.19969988745779668, + "grad_norm": 624.0, + "learning_rate": 9.263703322411736e-05, + "loss": 19.0001, + "step": 4791 + }, + { + "epoch": 0.19974156975532492, + "grad_norm": 426.0, + "learning_rate": 9.263350706894272e-05, + "loss": 16.1257, + "step": 4792 + }, + { + "epoch": 0.19978325205285316, + "grad_norm": 316.0, + "learning_rate": 9.262998013676862e-05, + "loss": 13.5632, + "step": 4793 + }, + { + "epoch": 0.1998249343503814, + "grad_norm": 141.0, + "learning_rate": 9.262645242765935e-05, + "loss": 9.4377, + "step": 4794 + }, + { + "epoch": 0.19986661664790964, + "grad_norm": 362.0, + "learning_rate": 9.26229239416792e-05, + "loss": 13.0004, + "step": 4795 + }, + { + "epoch": 0.19990829894543788, + "grad_norm": 716.0, + "learning_rate": 9.261939467889246e-05, + "loss": 19.0051, + "step": 4796 + }, + { + "epoch": 0.19994998124296612, + "grad_norm": 270.0, + "learning_rate": 9.261586463936349e-05, + "loss": 12.563, + "step": 4797 + }, + { + "epoch": 0.19999166354049436, + "grad_norm": 490.0, + "learning_rate": 9.261233382315659e-05, + "loss": 13.3759, + "step": 4798 + }, + { + "epoch": 0.2000333458380226, + "grad_norm": 65.0, + "learning_rate": 9.260880223033613e-05, + "loss": 9.3131, + "step": 4799 + }, + { + "epoch": 0.20007502813555084, + "grad_norm": 194.0, + "learning_rate": 9.260526986096647e-05, + "loss": 11.4391, + "step": 4800 + }, + { + "epoch": 0.20011671043307908, + "grad_norm": 440.0, + "learning_rate": 9.260173671511199e-05, + "loss": 15.188, + "step": 4801 + }, + { + "epoch": 0.20015839273060732, + "grad_norm": 380.0, + "learning_rate": 9.259820279283706e-05, + "loss": 15.0007, + "step": 4802 + }, + { + "epoch": 0.20020007502813555, + "grad_norm": 592.0, + "learning_rate": 9.259466809420611e-05, + "loss": 17.3756, + "step": 4803 + }, + { + "epoch": 0.2002417573256638, + "grad_norm": 272.0, + "learning_rate": 9.259113261928356e-05, + "loss": 14.0007, + "step": 4804 + }, + { + "epoch": 0.20028343962319203, + "grad_norm": 155.0, + "learning_rate": 9.258759636813383e-05, + "loss": 9.3755, + "step": 4805 + }, + { + "epoch": 0.20032512192072027, + "grad_norm": 490.0, + "learning_rate": 9.258405934082137e-05, + "loss": 16.6253, + "step": 4806 + }, + { + "epoch": 0.2003668042182485, + "grad_norm": 584.0, + "learning_rate": 9.258052153741065e-05, + "loss": 16.7554, + "step": 4807 + }, + { + "epoch": 0.20040848651577675, + "grad_norm": 148.0, + "learning_rate": 9.257698295796615e-05, + "loss": 9.5001, + "step": 4808 + }, + { + "epoch": 0.200450168813305, + "grad_norm": 840.0, + "learning_rate": 9.257344360255235e-05, + "loss": 24.7502, + "step": 4809 + }, + { + "epoch": 0.20049185111083323, + "grad_norm": 442.0, + "learning_rate": 9.256990347123378e-05, + "loss": 16.0006, + "step": 4810 + }, + { + "epoch": 0.20053353340836147, + "grad_norm": 396.0, + "learning_rate": 9.256636256407492e-05, + "loss": 14.9378, + "step": 4811 + }, + { + "epoch": 0.2005752157058897, + "grad_norm": 84.5, + "learning_rate": 9.256282088114034e-05, + "loss": 9.3135, + "step": 4812 + }, + { + "epoch": 0.20061689800341795, + "grad_norm": 668.0, + "learning_rate": 9.255927842249455e-05, + "loss": 19.2502, + "step": 4813 + }, + { + "epoch": 0.2006585803009462, + "grad_norm": 464.0, + "learning_rate": 9.255573518820216e-05, + "loss": 16.0008, + "step": 4814 + }, + { + "epoch": 0.20070026259847443, + "grad_norm": 348.0, + "learning_rate": 9.255219117832769e-05, + "loss": 13.8751, + "step": 4815 + }, + { + "epoch": 0.20074194489600267, + "grad_norm": 172.0, + "learning_rate": 9.254864639293577e-05, + "loss": 7.0009, + "step": 4816 + }, + { + "epoch": 0.2007836271935309, + "grad_norm": 182.0, + "learning_rate": 9.2545100832091e-05, + "loss": 11.5629, + "step": 4817 + }, + { + "epoch": 0.20082530949105915, + "grad_norm": 420.0, + "learning_rate": 9.254155449585797e-05, + "loss": 16.5008, + "step": 4818 + }, + { + "epoch": 0.20086699178858738, + "grad_norm": 520.0, + "learning_rate": 9.253800738430136e-05, + "loss": 14.3788, + "step": 4819 + }, + { + "epoch": 0.20090867408611562, + "grad_norm": 344.0, + "learning_rate": 9.253445949748577e-05, + "loss": 13.3129, + "step": 4820 + }, + { + "epoch": 0.20095035638364386, + "grad_norm": 75.5, + "learning_rate": 9.253091083547589e-05, + "loss": 7.8754, + "step": 4821 + }, + { + "epoch": 0.2009920386811721, + "grad_norm": 250.0, + "learning_rate": 9.252736139833638e-05, + "loss": 12.0003, + "step": 4822 + }, + { + "epoch": 0.20103372097870034, + "grad_norm": 390.0, + "learning_rate": 9.252381118613192e-05, + "loss": 15.0628, + "step": 4823 + }, + { + "epoch": 0.20107540327622858, + "grad_norm": 79.0, + "learning_rate": 9.252026019892724e-05, + "loss": 8.5628, + "step": 4824 + }, + { + "epoch": 0.20111708557375682, + "grad_norm": 376.0, + "learning_rate": 9.251670843678705e-05, + "loss": 15.5007, + "step": 4825 + }, + { + "epoch": 0.20115876787128506, + "grad_norm": 362.0, + "learning_rate": 9.251315589977607e-05, + "loss": 13.9382, + "step": 4826 + }, + { + "epoch": 0.2012004501688133, + "grad_norm": 177.0, + "learning_rate": 9.250960258795904e-05, + "loss": 12.1261, + "step": 4827 + }, + { + "epoch": 0.20124213246634154, + "grad_norm": 296.0, + "learning_rate": 9.250604850140074e-05, + "loss": 13.5631, + "step": 4828 + }, + { + "epoch": 0.20128381476386978, + "grad_norm": 207.0, + "learning_rate": 9.250249364016592e-05, + "loss": 11.5002, + "step": 4829 + }, + { + "epoch": 0.20132549706139802, + "grad_norm": 104.0, + "learning_rate": 9.24989380043194e-05, + "loss": 9.5627, + "step": 4830 + }, + { + "epoch": 0.20136717935892626, + "grad_norm": 90.0, + "learning_rate": 9.249538159392595e-05, + "loss": 9.6257, + "step": 4831 + }, + { + "epoch": 0.2014088616564545, + "grad_norm": 251.0, + "learning_rate": 9.24918244090504e-05, + "loss": 10.1877, + "step": 4832 + }, + { + "epoch": 0.20145054395398274, + "grad_norm": 156.0, + "learning_rate": 9.248826644975756e-05, + "loss": 11.1879, + "step": 4833 + }, + { + "epoch": 0.20149222625151098, + "grad_norm": 478.0, + "learning_rate": 9.248470771611232e-05, + "loss": 14.7505, + "step": 4834 + }, + { + "epoch": 0.20153390854903921, + "grad_norm": 320.0, + "learning_rate": 9.24811482081795e-05, + "loss": 16.2504, + "step": 4835 + }, + { + "epoch": 0.20157559084656745, + "grad_norm": 422.0, + "learning_rate": 9.247758792602398e-05, + "loss": 14.5003, + "step": 4836 + }, + { + "epoch": 0.2016172731440957, + "grad_norm": 544.0, + "learning_rate": 9.247402686971065e-05, + "loss": 17.6254, + "step": 4837 + }, + { + "epoch": 0.20165895544162393, + "grad_norm": 45.5, + "learning_rate": 9.247046503930442e-05, + "loss": 7.9379, + "step": 4838 + }, + { + "epoch": 0.20170063773915217, + "grad_norm": 268.0, + "learning_rate": 9.246690243487018e-05, + "loss": 11.4386, + "step": 4839 + }, + { + "epoch": 0.2017423200366804, + "grad_norm": 572.0, + "learning_rate": 9.246333905647288e-05, + "loss": 18.0019, + "step": 4840 + }, + { + "epoch": 0.20178400233420865, + "grad_norm": 82.5, + "learning_rate": 9.245977490417745e-05, + "loss": 9.3142, + "step": 4841 + }, + { + "epoch": 0.2018256846317369, + "grad_norm": 470.0, + "learning_rate": 9.245620997804886e-05, + "loss": 15.7522, + "step": 4842 + }, + { + "epoch": 0.20186736692926513, + "grad_norm": 836.0, + "learning_rate": 9.245264427815207e-05, + "loss": 20.5004, + "step": 4843 + }, + { + "epoch": 0.20190904922679337, + "grad_norm": 588.0, + "learning_rate": 9.244907780455208e-05, + "loss": 18.1274, + "step": 4844 + }, + { + "epoch": 0.2019507315243216, + "grad_norm": 292.0, + "learning_rate": 9.244551055731386e-05, + "loss": 12.9382, + "step": 4845 + }, + { + "epoch": 0.20199241382184985, + "grad_norm": 484.0, + "learning_rate": 9.244194253650245e-05, + "loss": 18.0003, + "step": 4846 + }, + { + "epoch": 0.2020340961193781, + "grad_norm": 632.0, + "learning_rate": 9.243837374218287e-05, + "loss": 20.5003, + "step": 4847 + }, + { + "epoch": 0.20207577841690633, + "grad_norm": 107.5, + "learning_rate": 9.243480417442016e-05, + "loss": 9.1252, + "step": 4848 + }, + { + "epoch": 0.20211746071443457, + "grad_norm": 884.0, + "learning_rate": 9.243123383327938e-05, + "loss": 26.1253, + "step": 4849 + }, + { + "epoch": 0.20215914301196283, + "grad_norm": 210.0, + "learning_rate": 9.24276627188256e-05, + "loss": 12.3757, + "step": 4850 + }, + { + "epoch": 0.20220082530949107, + "grad_norm": 338.0, + "learning_rate": 9.24240908311239e-05, + "loss": 13.8128, + "step": 4851 + }, + { + "epoch": 0.2022425076070193, + "grad_norm": 328.0, + "learning_rate": 9.242051817023938e-05, + "loss": 14.1255, + "step": 4852 + }, + { + "epoch": 0.20228418990454755, + "grad_norm": 262.0, + "learning_rate": 9.241694473623715e-05, + "loss": 11.4379, + "step": 4853 + }, + { + "epoch": 0.2023258722020758, + "grad_norm": 434.0, + "learning_rate": 9.241337052918233e-05, + "loss": 15.3128, + "step": 4854 + }, + { + "epoch": 0.20236755449960403, + "grad_norm": 502.0, + "learning_rate": 9.240979554914006e-05, + "loss": 15.8772, + "step": 4855 + }, + { + "epoch": 0.20240923679713227, + "grad_norm": 232.0, + "learning_rate": 9.240621979617552e-05, + "loss": 11.7501, + "step": 4856 + }, + { + "epoch": 0.2024509190946605, + "grad_norm": 362.0, + "learning_rate": 9.240264327035385e-05, + "loss": 12.8128, + "step": 4857 + }, + { + "epoch": 0.20249260139218875, + "grad_norm": 312.0, + "learning_rate": 9.239906597174024e-05, + "loss": 13.1878, + "step": 4858 + }, + { + "epoch": 0.202534283689717, + "grad_norm": 1004.0, + "learning_rate": 9.239548790039989e-05, + "loss": 25.7505, + "step": 4859 + }, + { + "epoch": 0.20257596598724523, + "grad_norm": 780.0, + "learning_rate": 9.2391909056398e-05, + "loss": 22.376, + "step": 4860 + }, + { + "epoch": 0.20261764828477347, + "grad_norm": 1360.0, + "learning_rate": 9.238832943979983e-05, + "loss": 28.38, + "step": 4861 + }, + { + "epoch": 0.2026593305823017, + "grad_norm": 356.0, + "learning_rate": 9.238474905067059e-05, + "loss": 13.8131, + "step": 4862 + }, + { + "epoch": 0.20270101287982994, + "grad_norm": 314.0, + "learning_rate": 9.238116788907552e-05, + "loss": 14.2506, + "step": 4863 + }, + { + "epoch": 0.20274269517735818, + "grad_norm": 109.5, + "learning_rate": 9.237758595507991e-05, + "loss": 9.438, + "step": 4864 + }, + { + "epoch": 0.20278437747488642, + "grad_norm": 628.0, + "learning_rate": 9.237400324874901e-05, + "loss": 21.0002, + "step": 4865 + }, + { + "epoch": 0.20282605977241466, + "grad_norm": 178.0, + "learning_rate": 9.237041977014818e-05, + "loss": 9.3127, + "step": 4866 + }, + { + "epoch": 0.2028677420699429, + "grad_norm": 168.0, + "learning_rate": 9.236683551934267e-05, + "loss": 6.5003, + "step": 4867 + }, + { + "epoch": 0.20290942436747114, + "grad_norm": 544.0, + "learning_rate": 9.236325049639782e-05, + "loss": 16.7507, + "step": 4868 + }, + { + "epoch": 0.20295110666499938, + "grad_norm": 980.0, + "learning_rate": 9.235966470137895e-05, + "loss": 25.8754, + "step": 4869 + }, + { + "epoch": 0.20299278896252762, + "grad_norm": 157.0, + "learning_rate": 9.235607813435145e-05, + "loss": 10.6878, + "step": 4870 + }, + { + "epoch": 0.20303447126005586, + "grad_norm": 492.0, + "learning_rate": 9.235249079538068e-05, + "loss": 17.7511, + "step": 4871 + }, + { + "epoch": 0.2030761535575841, + "grad_norm": 220.0, + "learning_rate": 9.234890268453199e-05, + "loss": 11.5628, + "step": 4872 + }, + { + "epoch": 0.20311783585511234, + "grad_norm": 464.0, + "learning_rate": 9.23453138018708e-05, + "loss": 16.3753, + "step": 4873 + }, + { + "epoch": 0.20315951815264058, + "grad_norm": 69.0, + "learning_rate": 9.234172414746248e-05, + "loss": 6.5629, + "step": 4874 + }, + { + "epoch": 0.20320120045016882, + "grad_norm": 340.0, + "learning_rate": 9.23381337213725e-05, + "loss": 12.3756, + "step": 4875 + }, + { + "epoch": 0.20324288274769706, + "grad_norm": 296.0, + "learning_rate": 9.233454252366626e-05, + "loss": 13.4385, + "step": 4876 + }, + { + "epoch": 0.2032845650452253, + "grad_norm": 182.0, + "learning_rate": 9.233095055440925e-05, + "loss": 10.7503, + "step": 4877 + }, + { + "epoch": 0.20332624734275354, + "grad_norm": 434.0, + "learning_rate": 9.232735781366689e-05, + "loss": 15.3752, + "step": 4878 + }, + { + "epoch": 0.20336792964028177, + "grad_norm": 1040.0, + "learning_rate": 9.232376430150468e-05, + "loss": 24.505, + "step": 4879 + }, + { + "epoch": 0.20340961193781001, + "grad_norm": 246.0, + "learning_rate": 9.23201700179881e-05, + "loss": 11.1252, + "step": 4880 + }, + { + "epoch": 0.20345129423533825, + "grad_norm": 179.0, + "learning_rate": 9.231657496318268e-05, + "loss": 11.0004, + "step": 4881 + }, + { + "epoch": 0.2034929765328665, + "grad_norm": 228.0, + "learning_rate": 9.23129791371539e-05, + "loss": 12.1253, + "step": 4882 + }, + { + "epoch": 0.20353465883039473, + "grad_norm": 284.0, + "learning_rate": 9.230938253996735e-05, + "loss": 12.8129, + "step": 4883 + }, + { + "epoch": 0.20357634112792297, + "grad_norm": 193.0, + "learning_rate": 9.230578517168854e-05, + "loss": 11.1267, + "step": 4884 + }, + { + "epoch": 0.2036180234254512, + "grad_norm": 115.5, + "learning_rate": 9.230218703238303e-05, + "loss": 9.9381, + "step": 4885 + }, + { + "epoch": 0.20365970572297945, + "grad_norm": 348.0, + "learning_rate": 9.22985881221164e-05, + "loss": 14.6878, + "step": 4886 + }, + { + "epoch": 0.2037013880205077, + "grad_norm": 336.0, + "learning_rate": 9.229498844095427e-05, + "loss": 14.3128, + "step": 4887 + }, + { + "epoch": 0.20374307031803593, + "grad_norm": 188.0, + "learning_rate": 9.22913879889622e-05, + "loss": 11.1918, + "step": 4888 + }, + { + "epoch": 0.20378475261556417, + "grad_norm": 788.0, + "learning_rate": 9.228778676620585e-05, + "loss": 22.1297, + "step": 4889 + }, + { + "epoch": 0.2038264349130924, + "grad_norm": 520.0, + "learning_rate": 9.228418477275081e-05, + "loss": 15.1884, + "step": 4890 + }, + { + "epoch": 0.20386811721062065, + "grad_norm": 868.0, + "learning_rate": 9.228058200866276e-05, + "loss": 17.7578, + "step": 4891 + }, + { + "epoch": 0.2039097995081489, + "grad_norm": 458.0, + "learning_rate": 9.227697847400734e-05, + "loss": 16.503, + "step": 4892 + }, + { + "epoch": 0.20395148180567713, + "grad_norm": 396.0, + "learning_rate": 9.227337416885024e-05, + "loss": 14.8135, + "step": 4893 + }, + { + "epoch": 0.20399316410320537, + "grad_norm": 272.0, + "learning_rate": 9.226976909325715e-05, + "loss": 12.1252, + "step": 4894 + }, + { + "epoch": 0.2040348464007336, + "grad_norm": 568.0, + "learning_rate": 9.226616324729376e-05, + "loss": 18.2551, + "step": 4895 + }, + { + "epoch": 0.20407652869826184, + "grad_norm": 134.0, + "learning_rate": 9.226255663102578e-05, + "loss": 9.1878, + "step": 4896 + }, + { + "epoch": 0.20411821099579008, + "grad_norm": 161.0, + "learning_rate": 9.225894924451898e-05, + "loss": 12.1253, + "step": 4897 + }, + { + "epoch": 0.20415989329331832, + "grad_norm": 486.0, + "learning_rate": 9.225534108783906e-05, + "loss": 18.1253, + "step": 4898 + }, + { + "epoch": 0.20420157559084656, + "grad_norm": 198.0, + "learning_rate": 9.22517321610518e-05, + "loss": 11.7505, + "step": 4899 + }, + { + "epoch": 0.2042432578883748, + "grad_norm": 386.0, + "learning_rate": 9.224812246422297e-05, + "loss": 13.441, + "step": 4900 + }, + { + "epoch": 0.20428494018590304, + "grad_norm": 358.0, + "learning_rate": 9.224451199741837e-05, + "loss": 14.3127, + "step": 4901 + }, + { + "epoch": 0.20432662248343128, + "grad_norm": 270.0, + "learning_rate": 9.224090076070378e-05, + "loss": 12.3762, + "step": 4902 + }, + { + "epoch": 0.20436830478095952, + "grad_norm": 544.0, + "learning_rate": 9.223728875414503e-05, + "loss": 15.6923, + "step": 4903 + }, + { + "epoch": 0.20440998707848776, + "grad_norm": 356.0, + "learning_rate": 9.223367597780792e-05, + "loss": 12.2534, + "step": 4904 + }, + { + "epoch": 0.204451669376016, + "grad_norm": 452.0, + "learning_rate": 9.223006243175833e-05, + "loss": 15.6878, + "step": 4905 + }, + { + "epoch": 0.20449335167354424, + "grad_norm": 532.0, + "learning_rate": 9.222644811606211e-05, + "loss": 19.0001, + "step": 4906 + }, + { + "epoch": 0.20453503397107248, + "grad_norm": 350.0, + "learning_rate": 9.222283303078511e-05, + "loss": 14.6879, + "step": 4907 + }, + { + "epoch": 0.20457671626860072, + "grad_norm": 508.0, + "learning_rate": 9.221921717599326e-05, + "loss": 15.501, + "step": 4908 + }, + { + "epoch": 0.20461839856612896, + "grad_norm": 286.0, + "learning_rate": 9.22156005517524e-05, + "loss": 13.0629, + "step": 4909 + }, + { + "epoch": 0.2046600808636572, + "grad_norm": 270.0, + "learning_rate": 9.221198315812849e-05, + "loss": 13.0002, + "step": 4910 + }, + { + "epoch": 0.20470176316118543, + "grad_norm": 320.0, + "learning_rate": 9.220836499518743e-05, + "loss": 13.1878, + "step": 4911 + }, + { + "epoch": 0.20474344545871367, + "grad_norm": 392.0, + "learning_rate": 9.220474606299516e-05, + "loss": 15.3753, + "step": 4912 + }, + { + "epoch": 0.2047851277562419, + "grad_norm": 418.0, + "learning_rate": 9.220112636161767e-05, + "loss": 15.0634, + "step": 4913 + }, + { + "epoch": 0.20482681005377015, + "grad_norm": 556.0, + "learning_rate": 9.219750589112089e-05, + "loss": 16.5005, + "step": 4914 + }, + { + "epoch": 0.2048684923512984, + "grad_norm": 290.0, + "learning_rate": 9.219388465157082e-05, + "loss": 12.7503, + "step": 4915 + }, + { + "epoch": 0.20491017464882663, + "grad_norm": 180.0, + "learning_rate": 9.219026264303347e-05, + "loss": 11.1266, + "step": 4916 + }, + { + "epoch": 0.20495185694635487, + "grad_norm": 160.0, + "learning_rate": 9.218663986557483e-05, + "loss": 11.001, + "step": 4917 + }, + { + "epoch": 0.2049935392438831, + "grad_norm": 348.0, + "learning_rate": 9.218301631926092e-05, + "loss": 11.8756, + "step": 4918 + }, + { + "epoch": 0.20503522154141135, + "grad_norm": 490.0, + "learning_rate": 9.21793920041578e-05, + "loss": 16.5003, + "step": 4919 + }, + { + "epoch": 0.2050769038389396, + "grad_norm": 330.0, + "learning_rate": 9.217576692033153e-05, + "loss": 12.0627, + "step": 4920 + }, + { + "epoch": 0.20511858613646783, + "grad_norm": 170.0, + "learning_rate": 9.217214106784816e-05, + "loss": 11.2507, + "step": 4921 + }, + { + "epoch": 0.20516026843399607, + "grad_norm": 376.0, + "learning_rate": 9.216851444677377e-05, + "loss": 14.5633, + "step": 4922 + }, + { + "epoch": 0.20520195073152434, + "grad_norm": 608.0, + "learning_rate": 9.216488705717445e-05, + "loss": 19.5005, + "step": 4923 + }, + { + "epoch": 0.20524363302905257, + "grad_norm": 358.0, + "learning_rate": 9.216125889911633e-05, + "loss": 13.5637, + "step": 4924 + }, + { + "epoch": 0.20528531532658081, + "grad_norm": 153.0, + "learning_rate": 9.215762997266552e-05, + "loss": 10.8132, + "step": 4925 + }, + { + "epoch": 0.20532699762410905, + "grad_norm": 348.0, + "learning_rate": 9.215400027788817e-05, + "loss": 14.0001, + "step": 4926 + }, + { + "epoch": 0.2053686799216373, + "grad_norm": 163.0, + "learning_rate": 9.215036981485042e-05, + "loss": 10.1256, + "step": 4927 + }, + { + "epoch": 0.20541036221916553, + "grad_norm": 336.0, + "learning_rate": 9.214673858361844e-05, + "loss": 13.6278, + "step": 4928 + }, + { + "epoch": 0.20545204451669377, + "grad_norm": 608.0, + "learning_rate": 9.21431065842584e-05, + "loss": 19.0005, + "step": 4929 + }, + { + "epoch": 0.205493726814222, + "grad_norm": 724.0, + "learning_rate": 9.213947381683651e-05, + "loss": 20.2502, + "step": 4930 + }, + { + "epoch": 0.20553540911175025, + "grad_norm": 278.0, + "learning_rate": 9.213584028141897e-05, + "loss": 9.3126, + "step": 4931 + }, + { + "epoch": 0.2055770914092785, + "grad_norm": 142.0, + "learning_rate": 9.213220597807199e-05, + "loss": 9.9379, + "step": 4932 + }, + { + "epoch": 0.20561877370680673, + "grad_norm": 199.0, + "learning_rate": 9.212857090686182e-05, + "loss": 9.3754, + "step": 4933 + }, + { + "epoch": 0.20566045600433497, + "grad_norm": 494.0, + "learning_rate": 9.212493506785472e-05, + "loss": 17.0002, + "step": 4934 + }, + { + "epoch": 0.2057021383018632, + "grad_norm": 268.0, + "learning_rate": 9.212129846111693e-05, + "loss": 11.4381, + "step": 4935 + }, + { + "epoch": 0.20574382059939145, + "grad_norm": 320.0, + "learning_rate": 9.211766108671471e-05, + "loss": 13.5013, + "step": 4936 + }, + { + "epoch": 0.2057855028969197, + "grad_norm": 452.0, + "learning_rate": 9.211402294471441e-05, + "loss": 12.8761, + "step": 4937 + }, + { + "epoch": 0.20582718519444793, + "grad_norm": 464.0, + "learning_rate": 9.211038403518229e-05, + "loss": 16.2504, + "step": 4938 + }, + { + "epoch": 0.20586886749197617, + "grad_norm": 108.0, + "learning_rate": 9.210674435818468e-05, + "loss": 9.7505, + "step": 4939 + }, + { + "epoch": 0.2059105497895044, + "grad_norm": 206.0, + "learning_rate": 9.210310391378793e-05, + "loss": 10.5003, + "step": 4940 + }, + { + "epoch": 0.20595223208703264, + "grad_norm": 696.0, + "learning_rate": 9.209946270205836e-05, + "loss": 17.3773, + "step": 4941 + }, + { + "epoch": 0.20599391438456088, + "grad_norm": 211.0, + "learning_rate": 9.209582072306235e-05, + "loss": 11.8753, + "step": 4942 + }, + { + "epoch": 0.20603559668208912, + "grad_norm": 280.0, + "learning_rate": 9.209217797686626e-05, + "loss": 12.6253, + "step": 4943 + }, + { + "epoch": 0.20607727897961736, + "grad_norm": 488.0, + "learning_rate": 9.208853446353651e-05, + "loss": 15.5036, + "step": 4944 + }, + { + "epoch": 0.2061189612771456, + "grad_norm": 422.0, + "learning_rate": 9.208489018313948e-05, + "loss": 15.0008, + "step": 4945 + }, + { + "epoch": 0.20616064357467384, + "grad_norm": 338.0, + "learning_rate": 9.208124513574158e-05, + "loss": 14.7502, + "step": 4946 + }, + { + "epoch": 0.20620232587220208, + "grad_norm": 264.0, + "learning_rate": 9.207759932140925e-05, + "loss": 11.6258, + "step": 4947 + }, + { + "epoch": 0.20624400816973032, + "grad_norm": 1136.0, + "learning_rate": 9.207395274020896e-05, + "loss": 33.5002, + "step": 4948 + }, + { + "epoch": 0.20628569046725856, + "grad_norm": 83.0, + "learning_rate": 9.20703053922071e-05, + "loss": 9.3135, + "step": 4949 + }, + { + "epoch": 0.2063273727647868, + "grad_norm": 328.0, + "learning_rate": 9.206665727747024e-05, + "loss": 13.1265, + "step": 4950 + }, + { + "epoch": 0.20636905506231504, + "grad_norm": 157.0, + "learning_rate": 9.206300839606478e-05, + "loss": 9.563, + "step": 4951 + }, + { + "epoch": 0.20641073735984328, + "grad_norm": 464.0, + "learning_rate": 9.205935874805728e-05, + "loss": 15.1261, + "step": 4952 + }, + { + "epoch": 0.20645241965737152, + "grad_norm": 46.0, + "learning_rate": 9.205570833351422e-05, + "loss": 6.5011, + "step": 4953 + }, + { + "epoch": 0.20649410195489976, + "grad_norm": 360.0, + "learning_rate": 9.205205715250216e-05, + "loss": 13.7504, + "step": 4954 + }, + { + "epoch": 0.206535784252428, + "grad_norm": 294.0, + "learning_rate": 9.204840520508762e-05, + "loss": 13.8756, + "step": 4955 + }, + { + "epoch": 0.20657746654995623, + "grad_norm": 486.0, + "learning_rate": 9.204475249133715e-05, + "loss": 16.6252, + "step": 4956 + }, + { + "epoch": 0.20661914884748447, + "grad_norm": 57.5, + "learning_rate": 9.204109901131734e-05, + "loss": 9.0629, + "step": 4957 + }, + { + "epoch": 0.2066608311450127, + "grad_norm": 956.0, + "learning_rate": 9.203744476509478e-05, + "loss": 26.5005, + "step": 4958 + }, + { + "epoch": 0.20670251344254095, + "grad_norm": 412.0, + "learning_rate": 9.203378975273603e-05, + "loss": 15.5003, + "step": 4959 + }, + { + "epoch": 0.2067441957400692, + "grad_norm": 396.0, + "learning_rate": 9.203013397430775e-05, + "loss": 15.3133, + "step": 4960 + }, + { + "epoch": 0.20678587803759743, + "grad_norm": 1280.0, + "learning_rate": 9.202647742987655e-05, + "loss": 26.8798, + "step": 4961 + }, + { + "epoch": 0.20682756033512567, + "grad_norm": 308.0, + "learning_rate": 9.202282011950904e-05, + "loss": 13.1275, + "step": 4962 + }, + { + "epoch": 0.2068692426326539, + "grad_norm": 332.0, + "learning_rate": 9.201916204327194e-05, + "loss": 14.3755, + "step": 4963 + }, + { + "epoch": 0.20691092493018215, + "grad_norm": 90.0, + "learning_rate": 9.201550320123187e-05, + "loss": 7.219, + "step": 4964 + }, + { + "epoch": 0.2069526072277104, + "grad_norm": 153.0, + "learning_rate": 9.201184359345552e-05, + "loss": 8.8755, + "step": 4965 + }, + { + "epoch": 0.20699428952523863, + "grad_norm": 258.0, + "learning_rate": 9.200818322000958e-05, + "loss": 11.4379, + "step": 4966 + }, + { + "epoch": 0.20703597182276687, + "grad_norm": 116.0, + "learning_rate": 9.200452208096079e-05, + "loss": 9.8128, + "step": 4967 + }, + { + "epoch": 0.2070776541202951, + "grad_norm": 157.0, + "learning_rate": 9.200086017637583e-05, + "loss": 9.5002, + "step": 4968 + }, + { + "epoch": 0.20711933641782335, + "grad_norm": 708.0, + "learning_rate": 9.19971975063215e-05, + "loss": 19.1253, + "step": 4969 + }, + { + "epoch": 0.20716101871535159, + "grad_norm": 438.0, + "learning_rate": 9.199353407086449e-05, + "loss": 15.7502, + "step": 4970 + }, + { + "epoch": 0.20720270101287983, + "grad_norm": 244.0, + "learning_rate": 9.19898698700716e-05, + "loss": 12.0007, + "step": 4971 + }, + { + "epoch": 0.20724438331040806, + "grad_norm": 398.0, + "learning_rate": 9.198620490400962e-05, + "loss": 15.8752, + "step": 4972 + }, + { + "epoch": 0.2072860656079363, + "grad_norm": 356.0, + "learning_rate": 9.198253917274532e-05, + "loss": 14.6265, + "step": 4973 + }, + { + "epoch": 0.20732774790546454, + "grad_norm": 131.0, + "learning_rate": 9.197887267634551e-05, + "loss": 8.0642, + "step": 4974 + }, + { + "epoch": 0.20736943020299278, + "grad_norm": 258.0, + "learning_rate": 9.197520541487703e-05, + "loss": 12.5006, + "step": 4975 + }, + { + "epoch": 0.20741111250052102, + "grad_norm": 584.0, + "learning_rate": 9.197153738840669e-05, + "loss": 19.8752, + "step": 4976 + }, + { + "epoch": 0.20745279479804926, + "grad_norm": 221.0, + "learning_rate": 9.196786859700137e-05, + "loss": 8.3127, + "step": 4977 + }, + { + "epoch": 0.2074944770955775, + "grad_norm": 266.0, + "learning_rate": 9.19641990407279e-05, + "loss": 11.8128, + "step": 4978 + }, + { + "epoch": 0.20753615939310574, + "grad_norm": 358.0, + "learning_rate": 9.196052871965319e-05, + "loss": 14.2508, + "step": 4979 + }, + { + "epoch": 0.20757784169063398, + "grad_norm": 580.0, + "learning_rate": 9.195685763384412e-05, + "loss": 16.1293, + "step": 4980 + }, + { + "epoch": 0.20761952398816222, + "grad_norm": 274.0, + "learning_rate": 9.19531857833676e-05, + "loss": 14.0013, + "step": 4981 + }, + { + "epoch": 0.20766120628569046, + "grad_norm": 636.0, + "learning_rate": 9.194951316829053e-05, + "loss": 19.1255, + "step": 4982 + }, + { + "epoch": 0.2077028885832187, + "grad_norm": 148.0, + "learning_rate": 9.194583978867988e-05, + "loss": 10.1255, + "step": 4983 + }, + { + "epoch": 0.20774457088074694, + "grad_norm": 392.0, + "learning_rate": 9.194216564460255e-05, + "loss": 15.2503, + "step": 4984 + }, + { + "epoch": 0.20778625317827518, + "grad_norm": 161.0, + "learning_rate": 9.193849073612555e-05, + "loss": 10.2513, + "step": 4985 + }, + { + "epoch": 0.20782793547580342, + "grad_norm": 71.0, + "learning_rate": 9.193481506331582e-05, + "loss": 8.1883, + "step": 4986 + }, + { + "epoch": 0.20786961777333166, + "grad_norm": 1496.0, + "learning_rate": 9.193113862624037e-05, + "loss": 35.2504, + "step": 4987 + }, + { + "epoch": 0.2079113000708599, + "grad_norm": 470.0, + "learning_rate": 9.192746142496619e-05, + "loss": 15.0003, + "step": 4988 + }, + { + "epoch": 0.20795298236838813, + "grad_norm": 362.0, + "learning_rate": 9.19237834595603e-05, + "loss": 14.939, + "step": 4989 + }, + { + "epoch": 0.20799466466591637, + "grad_norm": 470.0, + "learning_rate": 9.192010473008974e-05, + "loss": 16.7504, + "step": 4990 + }, + { + "epoch": 0.2080363469634446, + "grad_norm": 296.0, + "learning_rate": 9.191642523662156e-05, + "loss": 13.5013, + "step": 4991 + }, + { + "epoch": 0.20807802926097285, + "grad_norm": 131.0, + "learning_rate": 9.19127449792228e-05, + "loss": 10.4379, + "step": 4992 + }, + { + "epoch": 0.2081197115585011, + "grad_norm": 173.0, + "learning_rate": 9.190906395796054e-05, + "loss": 10.3772, + "step": 4993 + }, + { + "epoch": 0.20816139385602933, + "grad_norm": 290.0, + "learning_rate": 9.190538217290187e-05, + "loss": 11.5003, + "step": 4994 + }, + { + "epoch": 0.20820307615355757, + "grad_norm": 318.0, + "learning_rate": 9.190169962411389e-05, + "loss": 13.3135, + "step": 4995 + }, + { + "epoch": 0.20824475845108584, + "grad_norm": 516.0, + "learning_rate": 9.189801631166371e-05, + "loss": 17.6252, + "step": 4996 + }, + { + "epoch": 0.20828644074861408, + "grad_norm": 288.0, + "learning_rate": 9.189433223561848e-05, + "loss": 14.1254, + "step": 4997 + }, + { + "epoch": 0.20832812304614232, + "grad_norm": 107.5, + "learning_rate": 9.189064739604532e-05, + "loss": 8.8129, + "step": 4998 + }, + { + "epoch": 0.20836980534367056, + "grad_norm": 233.0, + "learning_rate": 9.188696179301137e-05, + "loss": 9.6882, + "step": 4999 + }, + { + "epoch": 0.2084114876411988, + "grad_norm": 536.0, + "learning_rate": 9.188327542658384e-05, + "loss": 18.0007, + "step": 5000 + }, + { + "epoch": 0.20845316993872703, + "grad_norm": 320.0, + "learning_rate": 9.18795882968299e-05, + "loss": 14.0004, + "step": 5001 + }, + { + "epoch": 0.20849485223625527, + "grad_norm": 692.0, + "learning_rate": 9.187590040381676e-05, + "loss": 19.6257, + "step": 5002 + }, + { + "epoch": 0.2085365345337835, + "grad_norm": 352.0, + "learning_rate": 9.18722117476116e-05, + "loss": 13.0003, + "step": 5003 + }, + { + "epoch": 0.20857821683131175, + "grad_norm": 312.0, + "learning_rate": 9.186852232828164e-05, + "loss": 12.8752, + "step": 5004 + }, + { + "epoch": 0.20861989912884, + "grad_norm": 652.0, + "learning_rate": 9.186483214589418e-05, + "loss": 17.2553, + "step": 5005 + }, + { + "epoch": 0.20866158142636823, + "grad_norm": 760.0, + "learning_rate": 9.186114120051643e-05, + "loss": 19.8755, + "step": 5006 + }, + { + "epoch": 0.20870326372389647, + "grad_norm": 280.0, + "learning_rate": 9.185744949221566e-05, + "loss": 11.813, + "step": 5007 + }, + { + "epoch": 0.2087449460214247, + "grad_norm": 772.0, + "learning_rate": 9.185375702105916e-05, + "loss": 20.3766, + "step": 5008 + }, + { + "epoch": 0.20878662831895295, + "grad_norm": 123.0, + "learning_rate": 9.185006378711423e-05, + "loss": 8.1255, + "step": 5009 + }, + { + "epoch": 0.2088283106164812, + "grad_norm": 320.0, + "learning_rate": 9.184636979044816e-05, + "loss": 14.3137, + "step": 5010 + }, + { + "epoch": 0.20886999291400943, + "grad_norm": 322.0, + "learning_rate": 9.184267503112829e-05, + "loss": 13.3754, + "step": 5011 + }, + { + "epoch": 0.20891167521153767, + "grad_norm": 960.0, + "learning_rate": 9.183897950922194e-05, + "loss": 25.7502, + "step": 5012 + }, + { + "epoch": 0.2089533575090659, + "grad_norm": 498.0, + "learning_rate": 9.183528322479648e-05, + "loss": 17.3757, + "step": 5013 + }, + { + "epoch": 0.20899503980659415, + "grad_norm": 212.0, + "learning_rate": 9.18315861779193e-05, + "loss": 11.3755, + "step": 5014 + }, + { + "epoch": 0.20903672210412239, + "grad_norm": 78.5, + "learning_rate": 9.182788836865772e-05, + "loss": 9.0008, + "step": 5015 + }, + { + "epoch": 0.20907840440165062, + "grad_norm": 185.0, + "learning_rate": 9.182418979707917e-05, + "loss": 11.1259, + "step": 5016 + }, + { + "epoch": 0.20912008669917886, + "grad_norm": 376.0, + "learning_rate": 9.182049046325103e-05, + "loss": 12.7501, + "step": 5017 + }, + { + "epoch": 0.2091617689967071, + "grad_norm": 358.0, + "learning_rate": 9.181679036724076e-05, + "loss": 12.6894, + "step": 5018 + }, + { + "epoch": 0.20920345129423534, + "grad_norm": 276.0, + "learning_rate": 9.181308950911576e-05, + "loss": 12.5628, + "step": 5019 + }, + { + "epoch": 0.20924513359176358, + "grad_norm": 224.0, + "learning_rate": 9.180938788894351e-05, + "loss": 11.7504, + "step": 5020 + }, + { + "epoch": 0.20928681588929182, + "grad_norm": 73.0, + "learning_rate": 9.180568550679143e-05, + "loss": 8.6877, + "step": 5021 + }, + { + "epoch": 0.20932849818682006, + "grad_norm": 388.0, + "learning_rate": 9.180198236272704e-05, + "loss": 14.8757, + "step": 5022 + }, + { + "epoch": 0.2093701804843483, + "grad_norm": 132.0, + "learning_rate": 9.179827845681782e-05, + "loss": 8.7506, + "step": 5023 + }, + { + "epoch": 0.20941186278187654, + "grad_norm": 644.0, + "learning_rate": 9.179457378913124e-05, + "loss": 19.6255, + "step": 5024 + }, + { + "epoch": 0.20945354507940478, + "grad_norm": 410.0, + "learning_rate": 9.179086835973484e-05, + "loss": 15.8751, + "step": 5025 + }, + { + "epoch": 0.20949522737693302, + "grad_norm": 354.0, + "learning_rate": 9.178716216869616e-05, + "loss": 14.6879, + "step": 5026 + }, + { + "epoch": 0.20953690967446126, + "grad_norm": 412.0, + "learning_rate": 9.178345521608276e-05, + "loss": 15.6252, + "step": 5027 + }, + { + "epoch": 0.2095785919719895, + "grad_norm": 202.0, + "learning_rate": 9.177974750196216e-05, + "loss": 12.1266, + "step": 5028 + }, + { + "epoch": 0.20962027426951774, + "grad_norm": 612.0, + "learning_rate": 9.177603902640195e-05, + "loss": 20.5002, + "step": 5029 + }, + { + "epoch": 0.20966195656704598, + "grad_norm": 138.0, + "learning_rate": 9.177232978946973e-05, + "loss": 10.3755, + "step": 5030 + }, + { + "epoch": 0.20970363886457422, + "grad_norm": 160.0, + "learning_rate": 9.176861979123307e-05, + "loss": 11.5628, + "step": 5031 + }, + { + "epoch": 0.20974532116210245, + "grad_norm": 552.0, + "learning_rate": 9.176490903175965e-05, + "loss": 18.2504, + "step": 5032 + }, + { + "epoch": 0.2097870034596307, + "grad_norm": 290.0, + "learning_rate": 9.176119751111703e-05, + "loss": 13.5626, + "step": 5033 + }, + { + "epoch": 0.20982868575715893, + "grad_norm": 374.0, + "learning_rate": 9.175748522937287e-05, + "loss": 12.8133, + "step": 5034 + }, + { + "epoch": 0.20987036805468717, + "grad_norm": 1440.0, + "learning_rate": 9.175377218659485e-05, + "loss": 31.1303, + "step": 5035 + }, + { + "epoch": 0.2099120503522154, + "grad_norm": 376.0, + "learning_rate": 9.175005838285062e-05, + "loss": 15.1881, + "step": 5036 + }, + { + "epoch": 0.20995373264974365, + "grad_norm": 408.0, + "learning_rate": 9.174634381820786e-05, + "loss": 15.563, + "step": 5037 + }, + { + "epoch": 0.2099954149472719, + "grad_norm": 410.0, + "learning_rate": 9.17426284927343e-05, + "loss": 13.9408, + "step": 5038 + }, + { + "epoch": 0.21003709724480013, + "grad_norm": 1456.0, + "learning_rate": 9.17389124064976e-05, + "loss": 30.6258, + "step": 5039 + }, + { + "epoch": 0.21007877954232837, + "grad_norm": 360.0, + "learning_rate": 9.173519555956553e-05, + "loss": 15.4387, + "step": 5040 + }, + { + "epoch": 0.2101204618398566, + "grad_norm": 238.0, + "learning_rate": 9.173147795200583e-05, + "loss": 12.3128, + "step": 5041 + }, + { + "epoch": 0.21016214413738485, + "grad_norm": 400.0, + "learning_rate": 9.172775958388623e-05, + "loss": 12.3752, + "step": 5042 + }, + { + "epoch": 0.2102038264349131, + "grad_norm": 442.0, + "learning_rate": 9.17240404552745e-05, + "loss": 15.5052, + "step": 5043 + }, + { + "epoch": 0.21024550873244133, + "grad_norm": 330.0, + "learning_rate": 9.172032056623843e-05, + "loss": 13.6253, + "step": 5044 + }, + { + "epoch": 0.21028719102996957, + "grad_norm": 324.0, + "learning_rate": 9.171659991684583e-05, + "loss": 13.6265, + "step": 5045 + }, + { + "epoch": 0.2103288733274978, + "grad_norm": 213.0, + "learning_rate": 9.171287850716448e-05, + "loss": 11.1254, + "step": 5046 + }, + { + "epoch": 0.21037055562502605, + "grad_norm": 288.0, + "learning_rate": 9.17091563372622e-05, + "loss": 12.5627, + "step": 5047 + }, + { + "epoch": 0.21041223792255428, + "grad_norm": 209.0, + "learning_rate": 9.170543340720687e-05, + "loss": 11.376, + "step": 5048 + }, + { + "epoch": 0.21045392022008252, + "grad_norm": 160.0, + "learning_rate": 9.170170971706631e-05, + "loss": 9.6878, + "step": 5049 + }, + { + "epoch": 0.21049560251761076, + "grad_norm": 728.0, + "learning_rate": 9.169798526690838e-05, + "loss": 21.2505, + "step": 5050 + }, + { + "epoch": 0.210537284815139, + "grad_norm": 240.0, + "learning_rate": 9.169426005680097e-05, + "loss": 12.1883, + "step": 5051 + }, + { + "epoch": 0.21057896711266724, + "grad_norm": 462.0, + "learning_rate": 9.169053408681197e-05, + "loss": 16.3753, + "step": 5052 + }, + { + "epoch": 0.21062064941019548, + "grad_norm": 284.0, + "learning_rate": 9.168680735700928e-05, + "loss": 10.3136, + "step": 5053 + }, + { + "epoch": 0.21066233170772372, + "grad_norm": 290.0, + "learning_rate": 9.168307986746083e-05, + "loss": 12.6267, + "step": 5054 + }, + { + "epoch": 0.21070401400525196, + "grad_norm": 576.0, + "learning_rate": 9.167935161823456e-05, + "loss": 16.7504, + "step": 5055 + }, + { + "epoch": 0.2107456963027802, + "grad_norm": 196.0, + "learning_rate": 9.16756226093984e-05, + "loss": 12.0007, + "step": 5056 + }, + { + "epoch": 0.21078737860030844, + "grad_norm": 98.5, + "learning_rate": 9.16718928410203e-05, + "loss": 9.5629, + "step": 5057 + }, + { + "epoch": 0.21082906089783668, + "grad_norm": 370.0, + "learning_rate": 9.166816231316825e-05, + "loss": 15.6253, + "step": 5058 + }, + { + "epoch": 0.21087074319536492, + "grad_norm": 209.0, + "learning_rate": 9.166443102591028e-05, + "loss": 10.6876, + "step": 5059 + }, + { + "epoch": 0.21091242549289316, + "grad_norm": 127.5, + "learning_rate": 9.166069897931433e-05, + "loss": 8.6898, + "step": 5060 + }, + { + "epoch": 0.2109541077904214, + "grad_norm": 640.0, + "learning_rate": 9.165696617344846e-05, + "loss": 18.8753, + "step": 5061 + }, + { + "epoch": 0.21099579008794964, + "grad_norm": 536.0, + "learning_rate": 9.165323260838066e-05, + "loss": 18.1252, + "step": 5062 + }, + { + "epoch": 0.21103747238547788, + "grad_norm": 258.0, + "learning_rate": 9.164949828417902e-05, + "loss": 12.5627, + "step": 5063 + }, + { + "epoch": 0.21107915468300611, + "grad_norm": 74.5, + "learning_rate": 9.164576320091156e-05, + "loss": 8.6888, + "step": 5064 + }, + { + "epoch": 0.21112083698053435, + "grad_norm": 155.0, + "learning_rate": 9.164202735864638e-05, + "loss": 10.1877, + "step": 5065 + }, + { + "epoch": 0.2111625192780626, + "grad_norm": 344.0, + "learning_rate": 9.163829075745155e-05, + "loss": 13.7502, + "step": 5066 + }, + { + "epoch": 0.21120420157559083, + "grad_norm": 446.0, + "learning_rate": 9.163455339739517e-05, + "loss": 16.0001, + "step": 5067 + }, + { + "epoch": 0.21124588387311907, + "grad_norm": 354.0, + "learning_rate": 9.163081527854537e-05, + "loss": 15.3127, + "step": 5068 + }, + { + "epoch": 0.21128756617064734, + "grad_norm": 274.0, + "learning_rate": 9.162707640097026e-05, + "loss": 11.8752, + "step": 5069 + }, + { + "epoch": 0.21132924846817558, + "grad_norm": 648.0, + "learning_rate": 9.162333676473798e-05, + "loss": 20.0021, + "step": 5070 + }, + { + "epoch": 0.21137093076570382, + "grad_norm": 200.0, + "learning_rate": 9.16195963699167e-05, + "loss": 10.938, + "step": 5071 + }, + { + "epoch": 0.21141261306323206, + "grad_norm": 1264.0, + "learning_rate": 9.161585521657458e-05, + "loss": 25.8814, + "step": 5072 + }, + { + "epoch": 0.2114542953607603, + "grad_norm": 88.0, + "learning_rate": 9.161211330477981e-05, + "loss": 6.3135, + "step": 5073 + }, + { + "epoch": 0.21149597765828854, + "grad_norm": 406.0, + "learning_rate": 9.160837063460057e-05, + "loss": 14.6255, + "step": 5074 + }, + { + "epoch": 0.21153765995581678, + "grad_norm": 221.0, + "learning_rate": 9.160462720610509e-05, + "loss": 11.0627, + "step": 5075 + }, + { + "epoch": 0.21157934225334502, + "grad_norm": 79.5, + "learning_rate": 9.160088301936159e-05, + "loss": 7.4065, + "step": 5076 + }, + { + "epoch": 0.21162102455087325, + "grad_norm": 322.0, + "learning_rate": 9.159713807443829e-05, + "loss": 14.4379, + "step": 5077 + }, + { + "epoch": 0.2116627068484015, + "grad_norm": 944.0, + "learning_rate": 9.159339237140346e-05, + "loss": 26.5002, + "step": 5078 + }, + { + "epoch": 0.21170438914592973, + "grad_norm": 227.0, + "learning_rate": 9.158964591032537e-05, + "loss": 11.0627, + "step": 5079 + }, + { + "epoch": 0.21174607144345797, + "grad_norm": 376.0, + "learning_rate": 9.158589869127229e-05, + "loss": 14.4378, + "step": 5080 + }, + { + "epoch": 0.2117877537409862, + "grad_norm": 61.5, + "learning_rate": 9.158215071431251e-05, + "loss": 8.563, + "step": 5081 + }, + { + "epoch": 0.21182943603851445, + "grad_norm": 328.0, + "learning_rate": 9.157840197951433e-05, + "loss": 13.8759, + "step": 5082 + }, + { + "epoch": 0.2118711183360427, + "grad_norm": 254.0, + "learning_rate": 9.15746524869461e-05, + "loss": 8.4379, + "step": 5083 + }, + { + "epoch": 0.21191280063357093, + "grad_norm": 174.0, + "learning_rate": 9.157090223667614e-05, + "loss": 10.6259, + "step": 5084 + }, + { + "epoch": 0.21195448293109917, + "grad_norm": 548.0, + "learning_rate": 9.156715122877279e-05, + "loss": 18.7513, + "step": 5085 + }, + { + "epoch": 0.2119961652286274, + "grad_norm": 460.0, + "learning_rate": 9.156339946330441e-05, + "loss": 14.7502, + "step": 5086 + }, + { + "epoch": 0.21203784752615565, + "grad_norm": 656.0, + "learning_rate": 9.15596469403394e-05, + "loss": 19.6252, + "step": 5087 + }, + { + "epoch": 0.2120795298236839, + "grad_norm": 298.0, + "learning_rate": 9.155589365994612e-05, + "loss": 13.0626, + "step": 5088 + }, + { + "epoch": 0.21212121212121213, + "grad_norm": 189.0, + "learning_rate": 9.1552139622193e-05, + "loss": 10.2504, + "step": 5089 + }, + { + "epoch": 0.21216289441874037, + "grad_norm": 432.0, + "learning_rate": 9.154838482714844e-05, + "loss": 16.1255, + "step": 5090 + }, + { + "epoch": 0.2122045767162686, + "grad_norm": 568.0, + "learning_rate": 9.154462927488089e-05, + "loss": 17.1253, + "step": 5091 + }, + { + "epoch": 0.21224625901379685, + "grad_norm": 168.0, + "learning_rate": 9.154087296545877e-05, + "loss": 10.6879, + "step": 5092 + }, + { + "epoch": 0.21228794131132508, + "grad_norm": 282.0, + "learning_rate": 9.153711589895057e-05, + "loss": 12.0626, + "step": 5093 + }, + { + "epoch": 0.21232962360885332, + "grad_norm": 245.0, + "learning_rate": 9.153335807542472e-05, + "loss": 11.0631, + "step": 5094 + }, + { + "epoch": 0.21237130590638156, + "grad_norm": 592.0, + "learning_rate": 9.152959949494975e-05, + "loss": 18.3752, + "step": 5095 + }, + { + "epoch": 0.2124129882039098, + "grad_norm": 600.0, + "learning_rate": 9.152584015759413e-05, + "loss": 16.7503, + "step": 5096 + }, + { + "epoch": 0.21245467050143804, + "grad_norm": 684.0, + "learning_rate": 9.152208006342641e-05, + "loss": 21.5003, + "step": 5097 + }, + { + "epoch": 0.21249635279896628, + "grad_norm": 166.0, + "learning_rate": 9.151831921251508e-05, + "loss": 10.4385, + "step": 5098 + }, + { + "epoch": 0.21253803509649452, + "grad_norm": 1136.0, + "learning_rate": 9.151455760492868e-05, + "loss": 25.3804, + "step": 5099 + }, + { + "epoch": 0.21257971739402276, + "grad_norm": 374.0, + "learning_rate": 9.151079524073581e-05, + "loss": 15.2503, + "step": 5100 + }, + { + "epoch": 0.212621399691551, + "grad_norm": 438.0, + "learning_rate": 9.1507032120005e-05, + "loss": 14.9406, + "step": 5101 + }, + { + "epoch": 0.21266308198907924, + "grad_norm": 462.0, + "learning_rate": 9.150326824280483e-05, + "loss": 15.5003, + "step": 5102 + }, + { + "epoch": 0.21270476428660748, + "grad_norm": 1464.0, + "learning_rate": 9.149950360920394e-05, + "loss": 31.0053, + "step": 5103 + }, + { + "epoch": 0.21274644658413572, + "grad_norm": 412.0, + "learning_rate": 9.149573821927091e-05, + "loss": 16.2533, + "step": 5104 + }, + { + "epoch": 0.21278812888166396, + "grad_norm": 724.0, + "learning_rate": 9.149197207307435e-05, + "loss": 24.6257, + "step": 5105 + }, + { + "epoch": 0.2128298111791922, + "grad_norm": 234.0, + "learning_rate": 9.148820517068292e-05, + "loss": 10.8139, + "step": 5106 + }, + { + "epoch": 0.21287149347672044, + "grad_norm": 100.0, + "learning_rate": 9.148443751216527e-05, + "loss": 8.8755, + "step": 5107 + }, + { + "epoch": 0.21291317577424868, + "grad_norm": 580.0, + "learning_rate": 9.148066909759006e-05, + "loss": 19.1255, + "step": 5108 + }, + { + "epoch": 0.21295485807177691, + "grad_norm": 572.0, + "learning_rate": 9.1476899927026e-05, + "loss": 17.2519, + "step": 5109 + }, + { + "epoch": 0.21299654036930515, + "grad_norm": 258.0, + "learning_rate": 9.147313000054171e-05, + "loss": 11.7503, + "step": 5110 + }, + { + "epoch": 0.2130382226668334, + "grad_norm": 390.0, + "learning_rate": 9.146935931820598e-05, + "loss": 14.2503, + "step": 5111 + }, + { + "epoch": 0.21307990496436163, + "grad_norm": 652.0, + "learning_rate": 9.146558788008747e-05, + "loss": 19.6254, + "step": 5112 + }, + { + "epoch": 0.21312158726188987, + "grad_norm": 98.0, + "learning_rate": 9.146181568625496e-05, + "loss": 10.4384, + "step": 5113 + }, + { + "epoch": 0.2131632695594181, + "grad_norm": 1592.0, + "learning_rate": 9.145804273677719e-05, + "loss": 33.7549, + "step": 5114 + }, + { + "epoch": 0.21320495185694635, + "grad_norm": 156.0, + "learning_rate": 9.145426903172288e-05, + "loss": 10.8128, + "step": 5115 + }, + { + "epoch": 0.2132466341544746, + "grad_norm": 314.0, + "learning_rate": 9.145049457116085e-05, + "loss": 14.5628, + "step": 5116 + }, + { + "epoch": 0.21328831645200283, + "grad_norm": 516.0, + "learning_rate": 9.144671935515988e-05, + "loss": 14.816, + "step": 5117 + }, + { + "epoch": 0.21332999874953107, + "grad_norm": 81.0, + "learning_rate": 9.144294338378875e-05, + "loss": 7.5631, + "step": 5118 + }, + { + "epoch": 0.2133716810470593, + "grad_norm": 264.0, + "learning_rate": 9.143916665711632e-05, + "loss": 11.5009, + "step": 5119 + }, + { + "epoch": 0.21341336334458755, + "grad_norm": 388.0, + "learning_rate": 9.143538917521139e-05, + "loss": 15.7523, + "step": 5120 + }, + { + "epoch": 0.2134550456421158, + "grad_norm": 110.5, + "learning_rate": 9.143161093814283e-05, + "loss": 9.6883, + "step": 5121 + }, + { + "epoch": 0.21349672793964403, + "grad_norm": 172.0, + "learning_rate": 9.142783194597946e-05, + "loss": 11.0006, + "step": 5122 + }, + { + "epoch": 0.21353841023717227, + "grad_norm": 191.0, + "learning_rate": 9.14240521987902e-05, + "loss": 10.6883, + "step": 5123 + }, + { + "epoch": 0.2135800925347005, + "grad_norm": 300.0, + "learning_rate": 9.142027169664389e-05, + "loss": 13.6254, + "step": 5124 + }, + { + "epoch": 0.21362177483222874, + "grad_norm": 348.0, + "learning_rate": 9.141649043960948e-05, + "loss": 14.0005, + "step": 5125 + }, + { + "epoch": 0.21366345712975698, + "grad_norm": 227.0, + "learning_rate": 9.141270842775581e-05, + "loss": 11.3145, + "step": 5126 + }, + { + "epoch": 0.21370513942728522, + "grad_norm": 286.0, + "learning_rate": 9.140892566115187e-05, + "loss": 12.6252, + "step": 5127 + }, + { + "epoch": 0.21374682172481346, + "grad_norm": 237.0, + "learning_rate": 9.140514213986659e-05, + "loss": 10.8764, + "step": 5128 + }, + { + "epoch": 0.2137885040223417, + "grad_norm": 250.0, + "learning_rate": 9.140135786396893e-05, + "loss": 12.8129, + "step": 5129 + }, + { + "epoch": 0.21383018631986994, + "grad_norm": 320.0, + "learning_rate": 9.139757283352784e-05, + "loss": 13.0003, + "step": 5130 + }, + { + "epoch": 0.21387186861739818, + "grad_norm": 214.0, + "learning_rate": 9.13937870486123e-05, + "loss": 11.0627, + "step": 5131 + }, + { + "epoch": 0.21391355091492642, + "grad_norm": 352.0, + "learning_rate": 9.139000050929132e-05, + "loss": 14.9379, + "step": 5132 + }, + { + "epoch": 0.21395523321245466, + "grad_norm": 262.0, + "learning_rate": 9.13862132156339e-05, + "loss": 13.0627, + "step": 5133 + }, + { + "epoch": 0.2139969155099829, + "grad_norm": 506.0, + "learning_rate": 9.138242516770909e-05, + "loss": 16.6253, + "step": 5134 + }, + { + "epoch": 0.21403859780751114, + "grad_norm": 138.0, + "learning_rate": 9.13786363655859e-05, + "loss": 9.5636, + "step": 5135 + }, + { + "epoch": 0.21408028010503938, + "grad_norm": 348.0, + "learning_rate": 9.13748468093334e-05, + "loss": 13.9379, + "step": 5136 + }, + { + "epoch": 0.21412196240256762, + "grad_norm": 90.0, + "learning_rate": 9.137105649902061e-05, + "loss": 9.8132, + "step": 5137 + }, + { + "epoch": 0.21416364470009586, + "grad_norm": 179.0, + "learning_rate": 9.136726543471667e-05, + "loss": 6.0631, + "step": 5138 + }, + { + "epoch": 0.2142053269976241, + "grad_norm": 215.0, + "learning_rate": 9.136347361649063e-05, + "loss": 11.8128, + "step": 5139 + }, + { + "epoch": 0.21424700929515234, + "grad_norm": 438.0, + "learning_rate": 9.135968104441161e-05, + "loss": 16.7506, + "step": 5140 + }, + { + "epoch": 0.2142886915926806, + "grad_norm": 196.0, + "learning_rate": 9.135588771854874e-05, + "loss": 8.8763, + "step": 5141 + }, + { + "epoch": 0.21433037389020884, + "grad_norm": 344.0, + "learning_rate": 9.135209363897116e-05, + "loss": 14.3752, + "step": 5142 + }, + { + "epoch": 0.21437205618773708, + "grad_norm": 151.0, + "learning_rate": 9.134829880574799e-05, + "loss": 10.1252, + "step": 5143 + }, + { + "epoch": 0.21441373848526532, + "grad_norm": 588.0, + "learning_rate": 9.13445032189484e-05, + "loss": 18.8752, + "step": 5144 + }, + { + "epoch": 0.21445542078279356, + "grad_norm": 185.0, + "learning_rate": 9.134070687864157e-05, + "loss": 10.4378, + "step": 5145 + }, + { + "epoch": 0.2144971030803218, + "grad_norm": 234.0, + "learning_rate": 9.133690978489669e-05, + "loss": 11.5003, + "step": 5146 + }, + { + "epoch": 0.21453878537785004, + "grad_norm": 236.0, + "learning_rate": 9.133311193778295e-05, + "loss": 11.1878, + "step": 5147 + }, + { + "epoch": 0.21458046767537828, + "grad_norm": 414.0, + "learning_rate": 9.132931333736958e-05, + "loss": 12.8135, + "step": 5148 + }, + { + "epoch": 0.21462214997290652, + "grad_norm": 454.0, + "learning_rate": 9.132551398372582e-05, + "loss": 16.0005, + "step": 5149 + }, + { + "epoch": 0.21466383227043476, + "grad_norm": 284.0, + "learning_rate": 9.132171387692088e-05, + "loss": 11.5627, + "step": 5150 + }, + { + "epoch": 0.214705514567963, + "grad_norm": 158.0, + "learning_rate": 9.131791301702404e-05, + "loss": 6.4071, + "step": 5151 + }, + { + "epoch": 0.21474719686549124, + "grad_norm": 248.0, + "learning_rate": 9.131411140410457e-05, + "loss": 12.188, + "step": 5152 + }, + { + "epoch": 0.21478887916301947, + "grad_norm": 596.0, + "learning_rate": 9.131030903823176e-05, + "loss": 18.1263, + "step": 5153 + }, + { + "epoch": 0.21483056146054771, + "grad_norm": 212.0, + "learning_rate": 9.130650591947489e-05, + "loss": 10.0627, + "step": 5154 + }, + { + "epoch": 0.21487224375807595, + "grad_norm": 322.0, + "learning_rate": 9.130270204790329e-05, + "loss": 13.6886, + "step": 5155 + }, + { + "epoch": 0.2149139260556042, + "grad_norm": 258.0, + "learning_rate": 9.129889742358628e-05, + "loss": 12.2505, + "step": 5156 + }, + { + "epoch": 0.21495560835313243, + "grad_norm": 216.0, + "learning_rate": 9.129509204659319e-05, + "loss": 9.4378, + "step": 5157 + }, + { + "epoch": 0.21499729065066067, + "grad_norm": 384.0, + "learning_rate": 9.129128591699339e-05, + "loss": 12.6262, + "step": 5158 + }, + { + "epoch": 0.2150389729481889, + "grad_norm": 464.0, + "learning_rate": 9.128747903485622e-05, + "loss": 16.2509, + "step": 5159 + }, + { + "epoch": 0.21508065524571715, + "grad_norm": 636.0, + "learning_rate": 9.12836714002511e-05, + "loss": 19.8777, + "step": 5160 + }, + { + "epoch": 0.2151223375432454, + "grad_norm": 52.75, + "learning_rate": 9.12798630132474e-05, + "loss": 8.8133, + "step": 5161 + }, + { + "epoch": 0.21516401984077363, + "grad_norm": 816.0, + "learning_rate": 9.127605387391452e-05, + "loss": 19.2549, + "step": 5162 + }, + { + "epoch": 0.21520570213830187, + "grad_norm": 318.0, + "learning_rate": 9.12722439823219e-05, + "loss": 13.4379, + "step": 5163 + }, + { + "epoch": 0.2152473844358301, + "grad_norm": 238.0, + "learning_rate": 9.126843333853898e-05, + "loss": 11.8752, + "step": 5164 + }, + { + "epoch": 0.21528906673335835, + "grad_norm": 576.0, + "learning_rate": 9.126462194263518e-05, + "loss": 18.8766, + "step": 5165 + }, + { + "epoch": 0.2153307490308866, + "grad_norm": 93.0, + "learning_rate": 9.126080979468e-05, + "loss": 7.6257, + "step": 5166 + }, + { + "epoch": 0.21537243132841483, + "grad_norm": 424.0, + "learning_rate": 9.12569968947429e-05, + "loss": 15.9381, + "step": 5167 + }, + { + "epoch": 0.21541411362594307, + "grad_norm": 394.0, + "learning_rate": 9.125318324289335e-05, + "loss": 14.1253, + "step": 5168 + }, + { + "epoch": 0.2154557959234713, + "grad_norm": 468.0, + "learning_rate": 9.12493688392009e-05, + "loss": 14.7531, + "step": 5169 + }, + { + "epoch": 0.21549747822099954, + "grad_norm": 884.0, + "learning_rate": 9.124555368373502e-05, + "loss": 23.2502, + "step": 5170 + }, + { + "epoch": 0.21553916051852778, + "grad_norm": 396.0, + "learning_rate": 9.124173777656527e-05, + "loss": 14.9377, + "step": 5171 + }, + { + "epoch": 0.21558084281605602, + "grad_norm": 230.0, + "learning_rate": 9.123792111776119e-05, + "loss": 11.0003, + "step": 5172 + }, + { + "epoch": 0.21562252511358426, + "grad_norm": 292.0, + "learning_rate": 9.123410370739231e-05, + "loss": 13.3128, + "step": 5173 + }, + { + "epoch": 0.2156642074111125, + "grad_norm": 100.5, + "learning_rate": 9.123028554552825e-05, + "loss": 10.3754, + "step": 5174 + }, + { + "epoch": 0.21570588970864074, + "grad_norm": 145.0, + "learning_rate": 9.12264666322386e-05, + "loss": 11.1883, + "step": 5175 + }, + { + "epoch": 0.21574757200616898, + "grad_norm": 209.0, + "learning_rate": 9.12226469675929e-05, + "loss": 10.3135, + "step": 5176 + }, + { + "epoch": 0.21578925430369722, + "grad_norm": 219.0, + "learning_rate": 9.121882655166082e-05, + "loss": 10.6883, + "step": 5177 + }, + { + "epoch": 0.21583093660122546, + "grad_norm": 292.0, + "learning_rate": 9.121500538451196e-05, + "loss": 12.3798, + "step": 5178 + }, + { + "epoch": 0.2158726188987537, + "grad_norm": 147.0, + "learning_rate": 9.121118346621598e-05, + "loss": 10.0006, + "step": 5179 + }, + { + "epoch": 0.21591430119628194, + "grad_norm": 348.0, + "learning_rate": 9.12073607968425e-05, + "loss": 13.8753, + "step": 5180 + }, + { + "epoch": 0.21595598349381018, + "grad_norm": 816.0, + "learning_rate": 9.120353737646123e-05, + "loss": 22.8752, + "step": 5181 + }, + { + "epoch": 0.21599766579133842, + "grad_norm": 260.0, + "learning_rate": 9.119971320514183e-05, + "loss": 11.8753, + "step": 5182 + }, + { + "epoch": 0.21603934808886666, + "grad_norm": 237.0, + "learning_rate": 9.119588828295398e-05, + "loss": 13.0628, + "step": 5183 + }, + { + "epoch": 0.2160810303863949, + "grad_norm": 286.0, + "learning_rate": 9.119206260996743e-05, + "loss": 12.4377, + "step": 5184 + }, + { + "epoch": 0.21612271268392313, + "grad_norm": 736.0, + "learning_rate": 9.118823618625188e-05, + "loss": 22.3753, + "step": 5185 + }, + { + "epoch": 0.21616439498145137, + "grad_norm": 216.0, + "learning_rate": 9.118440901187706e-05, + "loss": 11.8127, + "step": 5186 + }, + { + "epoch": 0.2162060772789796, + "grad_norm": 227.0, + "learning_rate": 9.118058108691274e-05, + "loss": 12.4377, + "step": 5187 + }, + { + "epoch": 0.21624775957650785, + "grad_norm": 548.0, + "learning_rate": 9.117675241142866e-05, + "loss": 18.5003, + "step": 5188 + }, + { + "epoch": 0.2162894418740361, + "grad_norm": 362.0, + "learning_rate": 9.117292298549462e-05, + "loss": 14.3752, + "step": 5189 + }, + { + "epoch": 0.21633112417156433, + "grad_norm": 402.0, + "learning_rate": 9.11690928091804e-05, + "loss": 15.0629, + "step": 5190 + }, + { + "epoch": 0.21637280646909257, + "grad_norm": 816.0, + "learning_rate": 9.116526188255583e-05, + "loss": 21.6254, + "step": 5191 + }, + { + "epoch": 0.2164144887666208, + "grad_norm": 544.0, + "learning_rate": 9.11614302056907e-05, + "loss": 16.0035, + "step": 5192 + }, + { + "epoch": 0.21645617106414905, + "grad_norm": 380.0, + "learning_rate": 9.115759777865483e-05, + "loss": 14.2505, + "step": 5193 + }, + { + "epoch": 0.2164978533616773, + "grad_norm": 676.0, + "learning_rate": 9.11537646015181e-05, + "loss": 18.88, + "step": 5194 + }, + { + "epoch": 0.21653953565920553, + "grad_norm": 364.0, + "learning_rate": 9.114993067435036e-05, + "loss": 14.0627, + "step": 5195 + }, + { + "epoch": 0.21658121795673377, + "grad_norm": 468.0, + "learning_rate": 9.114609599722148e-05, + "loss": 15.2502, + "step": 5196 + }, + { + "epoch": 0.216622900254262, + "grad_norm": 211.0, + "learning_rate": 9.114226057020134e-05, + "loss": 10.5633, + "step": 5197 + }, + { + "epoch": 0.21666458255179025, + "grad_norm": 604.0, + "learning_rate": 9.113842439335986e-05, + "loss": 17.2506, + "step": 5198 + }, + { + "epoch": 0.2167062648493185, + "grad_norm": 480.0, + "learning_rate": 9.113458746676694e-05, + "loss": 17.6253, + "step": 5199 + }, + { + "epoch": 0.21674794714684673, + "grad_norm": 700.0, + "learning_rate": 9.11307497904925e-05, + "loss": 17.6263, + "step": 5200 + }, + { + "epoch": 0.21678962944437496, + "grad_norm": 140.0, + "learning_rate": 9.11269113646065e-05, + "loss": 11.0022, + "step": 5201 + }, + { + "epoch": 0.2168313117419032, + "grad_norm": 354.0, + "learning_rate": 9.11230721891789e-05, + "loss": 14.0627, + "step": 5202 + }, + { + "epoch": 0.21687299403943144, + "grad_norm": 340.0, + "learning_rate": 9.111923226427965e-05, + "loss": 13.6254, + "step": 5203 + }, + { + "epoch": 0.21691467633695968, + "grad_norm": 1552.0, + "learning_rate": 9.111539158997873e-05, + "loss": 36.5002, + "step": 5204 + }, + { + "epoch": 0.21695635863448792, + "grad_norm": 272.0, + "learning_rate": 9.111155016634616e-05, + "loss": 12.1256, + "step": 5205 + }, + { + "epoch": 0.21699804093201616, + "grad_norm": 1016.0, + "learning_rate": 9.110770799345194e-05, + "loss": 29.0003, + "step": 5206 + }, + { + "epoch": 0.2170397232295444, + "grad_norm": 73.0, + "learning_rate": 9.11038650713661e-05, + "loss": 9.5629, + "step": 5207 + }, + { + "epoch": 0.21708140552707264, + "grad_norm": 366.0, + "learning_rate": 9.110002140015866e-05, + "loss": 13.7503, + "step": 5208 + }, + { + "epoch": 0.21712308782460088, + "grad_norm": 1776.0, + "learning_rate": 9.109617697989967e-05, + "loss": 37.2537, + "step": 5209 + }, + { + "epoch": 0.21716477012212912, + "grad_norm": 316.0, + "learning_rate": 9.109233181065923e-05, + "loss": 14.752, + "step": 5210 + }, + { + "epoch": 0.21720645241965736, + "grad_norm": 656.0, + "learning_rate": 9.108848589250737e-05, + "loss": 18.8752, + "step": 5211 + }, + { + "epoch": 0.2172481347171856, + "grad_norm": 344.0, + "learning_rate": 9.108463922551423e-05, + "loss": 14.8753, + "step": 5212 + }, + { + "epoch": 0.21728981701471384, + "grad_norm": 292.0, + "learning_rate": 9.108079180974989e-05, + "loss": 13.188, + "step": 5213 + }, + { + "epoch": 0.2173314993122421, + "grad_norm": 258.0, + "learning_rate": 9.107694364528448e-05, + "loss": 12.0001, + "step": 5214 + }, + { + "epoch": 0.21737318160977034, + "grad_norm": 412.0, + "learning_rate": 9.10730947321881e-05, + "loss": 15.6877, + "step": 5215 + }, + { + "epoch": 0.21741486390729858, + "grad_norm": 219.0, + "learning_rate": 9.106924507053094e-05, + "loss": 11.3764, + "step": 5216 + }, + { + "epoch": 0.21745654620482682, + "grad_norm": 444.0, + "learning_rate": 9.106539466038313e-05, + "loss": 15.5001, + "step": 5217 + }, + { + "epoch": 0.21749822850235506, + "grad_norm": 484.0, + "learning_rate": 9.106154350181486e-05, + "loss": 16.1254, + "step": 5218 + }, + { + "epoch": 0.2175399107998833, + "grad_norm": 85.5, + "learning_rate": 9.105769159489632e-05, + "loss": 7.9692, + "step": 5219 + }, + { + "epoch": 0.21758159309741154, + "grad_norm": 334.0, + "learning_rate": 9.105383893969771e-05, + "loss": 12.9379, + "step": 5220 + }, + { + "epoch": 0.21762327539493978, + "grad_norm": 292.0, + "learning_rate": 9.104998553628923e-05, + "loss": 13.9378, + "step": 5221 + }, + { + "epoch": 0.21766495769246802, + "grad_norm": 220.0, + "learning_rate": 9.104613138474114e-05, + "loss": 12.0627, + "step": 5222 + }, + { + "epoch": 0.21770663998999626, + "grad_norm": 242.0, + "learning_rate": 9.104227648512364e-05, + "loss": 11.7502, + "step": 5223 + }, + { + "epoch": 0.2177483222875245, + "grad_norm": 124.5, + "learning_rate": 9.1038420837507e-05, + "loss": 9.1251, + "step": 5224 + }, + { + "epoch": 0.21779000458505274, + "grad_norm": 500.0, + "learning_rate": 9.103456444196152e-05, + "loss": 17.1253, + "step": 5225 + }, + { + "epoch": 0.21783168688258098, + "grad_norm": 232.0, + "learning_rate": 9.103070729855745e-05, + "loss": 10.0002, + "step": 5226 + }, + { + "epoch": 0.21787336918010922, + "grad_norm": 1176.0, + "learning_rate": 9.10268494073651e-05, + "loss": 27.1303, + "step": 5227 + }, + { + "epoch": 0.21791505147763746, + "grad_norm": 312.0, + "learning_rate": 9.102299076845477e-05, + "loss": 12.2506, + "step": 5228 + }, + { + "epoch": 0.2179567337751657, + "grad_norm": 506.0, + "learning_rate": 9.101913138189682e-05, + "loss": 18.1253, + "step": 5229 + }, + { + "epoch": 0.21799841607269393, + "grad_norm": 253.0, + "learning_rate": 9.101527124776152e-05, + "loss": 13.2505, + "step": 5230 + }, + { + "epoch": 0.21804009837022217, + "grad_norm": 264.0, + "learning_rate": 9.101141036611929e-05, + "loss": 12.8762, + "step": 5231 + }, + { + "epoch": 0.2180817806677504, + "grad_norm": 352.0, + "learning_rate": 9.100754873704044e-05, + "loss": 15.1255, + "step": 5232 + }, + { + "epoch": 0.21812346296527865, + "grad_norm": 464.0, + "learning_rate": 9.10036863605954e-05, + "loss": 15.0002, + "step": 5233 + }, + { + "epoch": 0.2181651452628069, + "grad_norm": 498.0, + "learning_rate": 9.099982323685451e-05, + "loss": 16.2507, + "step": 5234 + }, + { + "epoch": 0.21820682756033513, + "grad_norm": 548.0, + "learning_rate": 9.099595936588822e-05, + "loss": 18.1256, + "step": 5235 + }, + { + "epoch": 0.21824850985786337, + "grad_norm": 426.0, + "learning_rate": 9.099209474776694e-05, + "loss": 15.9377, + "step": 5236 + }, + { + "epoch": 0.2182901921553916, + "grad_norm": 424.0, + "learning_rate": 9.098822938256106e-05, + "loss": 18.7505, + "step": 5237 + }, + { + "epoch": 0.21833187445291985, + "grad_norm": 464.0, + "learning_rate": 9.09843632703411e-05, + "loss": 15.3762, + "step": 5238 + }, + { + "epoch": 0.2183735567504481, + "grad_norm": 472.0, + "learning_rate": 9.098049641117745e-05, + "loss": 16.5027, + "step": 5239 + }, + { + "epoch": 0.21841523904797633, + "grad_norm": 1112.0, + "learning_rate": 9.097662880514062e-05, + "loss": 25.7502, + "step": 5240 + }, + { + "epoch": 0.21845692134550457, + "grad_norm": 548.0, + "learning_rate": 9.097276045230111e-05, + "loss": 16.7528, + "step": 5241 + }, + { + "epoch": 0.2184986036430328, + "grad_norm": 304.0, + "learning_rate": 9.096889135272939e-05, + "loss": 14.1254, + "step": 5242 + }, + { + "epoch": 0.21854028594056105, + "grad_norm": 300.0, + "learning_rate": 9.0965021506496e-05, + "loss": 12.8132, + "step": 5243 + }, + { + "epoch": 0.21858196823808929, + "grad_norm": 392.0, + "learning_rate": 9.096115091367145e-05, + "loss": 15.3136, + "step": 5244 + }, + { + "epoch": 0.21862365053561753, + "grad_norm": 408.0, + "learning_rate": 9.095727957432627e-05, + "loss": 16.6282, + "step": 5245 + }, + { + "epoch": 0.21866533283314576, + "grad_norm": 370.0, + "learning_rate": 9.095340748853104e-05, + "loss": 16.2501, + "step": 5246 + }, + { + "epoch": 0.218707015130674, + "grad_norm": 1280.0, + "learning_rate": 9.094953465635635e-05, + "loss": 32.0005, + "step": 5247 + }, + { + "epoch": 0.21874869742820224, + "grad_norm": 94.5, + "learning_rate": 9.094566107787275e-05, + "loss": 6.7206, + "step": 5248 + }, + { + "epoch": 0.21879037972573048, + "grad_norm": 326.0, + "learning_rate": 9.094178675315081e-05, + "loss": 14.0629, + "step": 5249 + }, + { + "epoch": 0.21883206202325872, + "grad_norm": 434.0, + "learning_rate": 9.09379116822612e-05, + "loss": 16.0001, + "step": 5250 + }, + { + "epoch": 0.21887374432078696, + "grad_norm": 308.0, + "learning_rate": 9.093403586527452e-05, + "loss": 13.0003, + "step": 5251 + }, + { + "epoch": 0.2189154266183152, + "grad_norm": 262.0, + "learning_rate": 9.093015930226139e-05, + "loss": 12.7503, + "step": 5252 + }, + { + "epoch": 0.21895710891584344, + "grad_norm": 93.0, + "learning_rate": 9.092628199329248e-05, + "loss": 7.8753, + "step": 5253 + }, + { + "epoch": 0.21899879121337168, + "grad_norm": 308.0, + "learning_rate": 9.092240393843842e-05, + "loss": 13.5628, + "step": 5254 + }, + { + "epoch": 0.21904047351089992, + "grad_norm": 260.0, + "learning_rate": 9.091852513776995e-05, + "loss": 12.5011, + "step": 5255 + }, + { + "epoch": 0.21908215580842816, + "grad_norm": 520.0, + "learning_rate": 9.091464559135772e-05, + "loss": 18.7502, + "step": 5256 + }, + { + "epoch": 0.2191238381059564, + "grad_norm": 506.0, + "learning_rate": 9.091076529927242e-05, + "loss": 17.5005, + "step": 5257 + }, + { + "epoch": 0.21916552040348464, + "grad_norm": 280.0, + "learning_rate": 9.090688426158481e-05, + "loss": 13.5626, + "step": 5258 + }, + { + "epoch": 0.21920720270101288, + "grad_norm": 372.0, + "learning_rate": 9.090300247836561e-05, + "loss": 15.5002, + "step": 5259 + }, + { + "epoch": 0.21924888499854112, + "grad_norm": 452.0, + "learning_rate": 9.089911994968554e-05, + "loss": 12.8129, + "step": 5260 + }, + { + "epoch": 0.21929056729606936, + "grad_norm": 420.0, + "learning_rate": 9.089523667561539e-05, + "loss": 15.0627, + "step": 5261 + }, + { + "epoch": 0.2193322495935976, + "grad_norm": 59.75, + "learning_rate": 9.089135265622591e-05, + "loss": 7.0627, + "step": 5262 + }, + { + "epoch": 0.21937393189112583, + "grad_norm": 532.0, + "learning_rate": 9.088746789158791e-05, + "loss": 16.3776, + "step": 5263 + }, + { + "epoch": 0.21941561418865407, + "grad_norm": 452.0, + "learning_rate": 9.088358238177216e-05, + "loss": 16.5002, + "step": 5264 + }, + { + "epoch": 0.2194572964861823, + "grad_norm": 104.5, + "learning_rate": 9.087969612684952e-05, + "loss": 8.4377, + "step": 5265 + }, + { + "epoch": 0.21949897878371055, + "grad_norm": 240.0, + "learning_rate": 9.087580912689077e-05, + "loss": 11.5627, + "step": 5266 + }, + { + "epoch": 0.2195406610812388, + "grad_norm": 308.0, + "learning_rate": 9.087192138196678e-05, + "loss": 12.8141, + "step": 5267 + }, + { + "epoch": 0.21958234337876703, + "grad_norm": 608.0, + "learning_rate": 9.086803289214838e-05, + "loss": 21.3761, + "step": 5268 + }, + { + "epoch": 0.21962402567629527, + "grad_norm": 548.0, + "learning_rate": 9.086414365750647e-05, + "loss": 17.8761, + "step": 5269 + }, + { + "epoch": 0.2196657079738235, + "grad_norm": 191.0, + "learning_rate": 9.08602536781119e-05, + "loss": 10.8137, + "step": 5270 + }, + { + "epoch": 0.21970739027135175, + "grad_norm": 112.0, + "learning_rate": 9.085636295403559e-05, + "loss": 9.3131, + "step": 5271 + }, + { + "epoch": 0.21974907256888, + "grad_norm": 191.0, + "learning_rate": 9.085247148534843e-05, + "loss": 10.5627, + "step": 5272 + }, + { + "epoch": 0.21979075486640823, + "grad_norm": 316.0, + "learning_rate": 9.084857927212135e-05, + "loss": 12.5027, + "step": 5273 + }, + { + "epoch": 0.21983243716393647, + "grad_norm": 300.0, + "learning_rate": 9.08446863144253e-05, + "loss": 12.9377, + "step": 5274 + }, + { + "epoch": 0.2198741194614647, + "grad_norm": 498.0, + "learning_rate": 9.08407926123312e-05, + "loss": 15.6271, + "step": 5275 + }, + { + "epoch": 0.21991580175899295, + "grad_norm": 268.0, + "learning_rate": 9.083689816591004e-05, + "loss": 12.3751, + "step": 5276 + }, + { + "epoch": 0.21995748405652119, + "grad_norm": 496.0, + "learning_rate": 9.083300297523279e-05, + "loss": 16.8752, + "step": 5277 + }, + { + "epoch": 0.21999916635404942, + "grad_norm": 322.0, + "learning_rate": 9.082910704037042e-05, + "loss": 14.5627, + "step": 5278 + }, + { + "epoch": 0.22004084865157766, + "grad_norm": 1192.0, + "learning_rate": 9.082521036139395e-05, + "loss": 33.5011, + "step": 5279 + }, + { + "epoch": 0.2200825309491059, + "grad_norm": 322.0, + "learning_rate": 9.082131293837441e-05, + "loss": 13.6878, + "step": 5280 + }, + { + "epoch": 0.22012421324663414, + "grad_norm": 104.0, + "learning_rate": 9.081741477138282e-05, + "loss": 9.1883, + "step": 5281 + }, + { + "epoch": 0.22016589554416238, + "grad_norm": 472.0, + "learning_rate": 9.08135158604902e-05, + "loss": 13.4376, + "step": 5282 + }, + { + "epoch": 0.22020757784169062, + "grad_norm": 760.0, + "learning_rate": 9.080961620576765e-05, + "loss": 20.8752, + "step": 5283 + }, + { + "epoch": 0.22024926013921886, + "grad_norm": 356.0, + "learning_rate": 9.08057158072862e-05, + "loss": 14.3751, + "step": 5284 + }, + { + "epoch": 0.2202909424367471, + "grad_norm": 204.0, + "learning_rate": 9.0801814665117e-05, + "loss": 11.7514, + "step": 5285 + }, + { + "epoch": 0.22033262473427534, + "grad_norm": 376.0, + "learning_rate": 9.079791277933106e-05, + "loss": 14.7503, + "step": 5286 + }, + { + "epoch": 0.2203743070318036, + "grad_norm": 294.0, + "learning_rate": 9.079401014999956e-05, + "loss": 12.7502, + "step": 5287 + }, + { + "epoch": 0.22041598932933185, + "grad_norm": 350.0, + "learning_rate": 9.079010677719359e-05, + "loss": 13.2509, + "step": 5288 + }, + { + "epoch": 0.22045767162686009, + "grad_norm": 126.5, + "learning_rate": 9.078620266098432e-05, + "loss": 9.3759, + "step": 5289 + }, + { + "epoch": 0.22049935392438832, + "grad_norm": 158.0, + "learning_rate": 9.078229780144289e-05, + "loss": 10.1259, + "step": 5290 + }, + { + "epoch": 0.22054103622191656, + "grad_norm": 304.0, + "learning_rate": 9.077839219864044e-05, + "loss": 13.5005, + "step": 5291 + }, + { + "epoch": 0.2205827185194448, + "grad_norm": 140.0, + "learning_rate": 9.077448585264819e-05, + "loss": 10.0627, + "step": 5292 + }, + { + "epoch": 0.22062440081697304, + "grad_norm": 406.0, + "learning_rate": 9.077057876353731e-05, + "loss": 16.3759, + "step": 5293 + }, + { + "epoch": 0.22066608311450128, + "grad_norm": 163.0, + "learning_rate": 9.076667093137901e-05, + "loss": 7.5003, + "step": 5294 + }, + { + "epoch": 0.22070776541202952, + "grad_norm": 358.0, + "learning_rate": 9.076276235624452e-05, + "loss": 14.7505, + "step": 5295 + }, + { + "epoch": 0.22074944770955776, + "grad_norm": 270.0, + "learning_rate": 9.075885303820506e-05, + "loss": 12.3127, + "step": 5296 + }, + { + "epoch": 0.220791130007086, + "grad_norm": 426.0, + "learning_rate": 9.075494297733189e-05, + "loss": 15.1253, + "step": 5297 + }, + { + "epoch": 0.22083281230461424, + "grad_norm": 127.0, + "learning_rate": 9.075103217369626e-05, + "loss": 10.1255, + "step": 5298 + }, + { + "epoch": 0.22087449460214248, + "grad_norm": 182.0, + "learning_rate": 9.074712062736945e-05, + "loss": 9.2516, + "step": 5299 + }, + { + "epoch": 0.22091617689967072, + "grad_norm": 308.0, + "learning_rate": 9.074320833842276e-05, + "loss": 13.751, + "step": 5300 + }, + { + "epoch": 0.22095785919719896, + "grad_norm": 352.0, + "learning_rate": 9.073929530692747e-05, + "loss": 14.0002, + "step": 5301 + }, + { + "epoch": 0.2209995414947272, + "grad_norm": 132.0, + "learning_rate": 9.07353815329549e-05, + "loss": 10.563, + "step": 5302 + }, + { + "epoch": 0.22104122379225544, + "grad_norm": 227.0, + "learning_rate": 9.073146701657642e-05, + "loss": 12.2502, + "step": 5303 + }, + { + "epoch": 0.22108290608978368, + "grad_norm": 274.0, + "learning_rate": 9.072755175786332e-05, + "loss": 12.6882, + "step": 5304 + }, + { + "epoch": 0.22112458838731192, + "grad_norm": 156.0, + "learning_rate": 9.072363575688696e-05, + "loss": 8.8131, + "step": 5305 + }, + { + "epoch": 0.22116627068484015, + "grad_norm": 388.0, + "learning_rate": 9.071971901371873e-05, + "loss": 15.4379, + "step": 5306 + }, + { + "epoch": 0.2212079529823684, + "grad_norm": 616.0, + "learning_rate": 9.071580152843001e-05, + "loss": 20.3754, + "step": 5307 + }, + { + "epoch": 0.22124963527989663, + "grad_norm": 660.0, + "learning_rate": 9.071188330109219e-05, + "loss": 19.0005, + "step": 5308 + }, + { + "epoch": 0.22129131757742487, + "grad_norm": 330.0, + "learning_rate": 9.070796433177669e-05, + "loss": 13.6252, + "step": 5309 + }, + { + "epoch": 0.2213329998749531, + "grad_norm": 848.0, + "learning_rate": 9.070404462055491e-05, + "loss": 20.0032, + "step": 5310 + }, + { + "epoch": 0.22137468217248135, + "grad_norm": 868.0, + "learning_rate": 9.070012416749831e-05, + "loss": 23.7504, + "step": 5311 + }, + { + "epoch": 0.2214163644700096, + "grad_norm": 120.0, + "learning_rate": 9.069620297267835e-05, + "loss": 8.5009, + "step": 5312 + }, + { + "epoch": 0.22145804676753783, + "grad_norm": 688.0, + "learning_rate": 9.069228103616646e-05, + "loss": 17.3773, + "step": 5313 + }, + { + "epoch": 0.22149972906506607, + "grad_norm": 1272.0, + "learning_rate": 9.068835835803415e-05, + "loss": 30.5004, + "step": 5314 + }, + { + "epoch": 0.2215414113625943, + "grad_norm": 272.0, + "learning_rate": 9.068443493835289e-05, + "loss": 12.8143, + "step": 5315 + }, + { + "epoch": 0.22158309366012255, + "grad_norm": 298.0, + "learning_rate": 9.068051077719417e-05, + "loss": 12.3127, + "step": 5316 + }, + { + "epoch": 0.2216247759576508, + "grad_norm": 78.0, + "learning_rate": 9.067658587462956e-05, + "loss": 8.6894, + "step": 5317 + }, + { + "epoch": 0.22166645825517903, + "grad_norm": 454.0, + "learning_rate": 9.067266023073055e-05, + "loss": 16.5002, + "step": 5318 + }, + { + "epoch": 0.22170814055270727, + "grad_norm": 664.0, + "learning_rate": 9.06687338455687e-05, + "loss": 21.3754, + "step": 5319 + }, + { + "epoch": 0.2217498228502355, + "grad_norm": 262.0, + "learning_rate": 9.066480671921556e-05, + "loss": 12.8129, + "step": 5320 + }, + { + "epoch": 0.22179150514776375, + "grad_norm": 604.0, + "learning_rate": 9.06608788517427e-05, + "loss": 18.6264, + "step": 5321 + }, + { + "epoch": 0.22183318744529198, + "grad_norm": 304.0, + "learning_rate": 9.065695024322174e-05, + "loss": 12.2504, + "step": 5322 + }, + { + "epoch": 0.22187486974282022, + "grad_norm": 268.0, + "learning_rate": 9.065302089372422e-05, + "loss": 12.5002, + "step": 5323 + }, + { + "epoch": 0.22191655204034846, + "grad_norm": 258.0, + "learning_rate": 9.064909080332182e-05, + "loss": 11.9379, + "step": 5324 + }, + { + "epoch": 0.2219582343378767, + "grad_norm": 478.0, + "learning_rate": 9.064515997208611e-05, + "loss": 16.7505, + "step": 5325 + }, + { + "epoch": 0.22199991663540494, + "grad_norm": 414.0, + "learning_rate": 9.064122840008875e-05, + "loss": 15.2512, + "step": 5326 + }, + { + "epoch": 0.22204159893293318, + "grad_norm": 612.0, + "learning_rate": 9.06372960874014e-05, + "loss": 20.7506, + "step": 5327 + }, + { + "epoch": 0.22208328123046142, + "grad_norm": 384.0, + "learning_rate": 9.063336303409573e-05, + "loss": 14.7554, + "step": 5328 + }, + { + "epoch": 0.22212496352798966, + "grad_norm": 171.0, + "learning_rate": 9.062942924024341e-05, + "loss": 9.0629, + "step": 5329 + }, + { + "epoch": 0.2221666458255179, + "grad_norm": 161.0, + "learning_rate": 9.062549470591612e-05, + "loss": 9.0633, + "step": 5330 + }, + { + "epoch": 0.22220832812304614, + "grad_norm": 145.0, + "learning_rate": 9.062155943118559e-05, + "loss": 8.4379, + "step": 5331 + }, + { + "epoch": 0.22225001042057438, + "grad_norm": 752.0, + "learning_rate": 9.061762341612354e-05, + "loss": 21.2535, + "step": 5332 + }, + { + "epoch": 0.22229169271810262, + "grad_norm": 288.0, + "learning_rate": 9.061368666080167e-05, + "loss": 13.0628, + "step": 5333 + }, + { + "epoch": 0.22233337501563086, + "grad_norm": 1584.0, + "learning_rate": 9.060974916529179e-05, + "loss": 35.5004, + "step": 5334 + }, + { + "epoch": 0.2223750573131591, + "grad_norm": 150.0, + "learning_rate": 9.06058109296656e-05, + "loss": 10.5007, + "step": 5335 + }, + { + "epoch": 0.22241673961068734, + "grad_norm": 254.0, + "learning_rate": 9.060187195399492e-05, + "loss": 11.8126, + "step": 5336 + }, + { + "epoch": 0.22245842190821558, + "grad_norm": 708.0, + "learning_rate": 9.059793223835151e-05, + "loss": 20.6257, + "step": 5337 + }, + { + "epoch": 0.22250010420574381, + "grad_norm": 466.0, + "learning_rate": 9.059399178280718e-05, + "loss": 16.5003, + "step": 5338 + }, + { + "epoch": 0.22254178650327205, + "grad_norm": 250.0, + "learning_rate": 9.059005058743376e-05, + "loss": 10.8753, + "step": 5339 + }, + { + "epoch": 0.2225834688008003, + "grad_norm": 410.0, + "learning_rate": 9.058610865230306e-05, + "loss": 15.6877, + "step": 5340 + }, + { + "epoch": 0.22262515109832853, + "grad_norm": 260.0, + "learning_rate": 9.058216597748692e-05, + "loss": 11.7503, + "step": 5341 + }, + { + "epoch": 0.22266683339585677, + "grad_norm": 183.0, + "learning_rate": 9.05782225630572e-05, + "loss": 11.5631, + "step": 5342 + }, + { + "epoch": 0.222708515693385, + "grad_norm": 336.0, + "learning_rate": 9.057427840908577e-05, + "loss": 13.7504, + "step": 5343 + }, + { + "epoch": 0.22275019799091325, + "grad_norm": 462.0, + "learning_rate": 9.057033351564453e-05, + "loss": 16.2502, + "step": 5344 + }, + { + "epoch": 0.2227918802884415, + "grad_norm": 460.0, + "learning_rate": 9.056638788280534e-05, + "loss": 15.6876, + "step": 5345 + }, + { + "epoch": 0.22283356258596973, + "grad_norm": 356.0, + "learning_rate": 9.056244151064015e-05, + "loss": 14.3752, + "step": 5346 + }, + { + "epoch": 0.22287524488349797, + "grad_norm": 458.0, + "learning_rate": 9.055849439922085e-05, + "loss": 18.1257, + "step": 5347 + }, + { + "epoch": 0.2229169271810262, + "grad_norm": 520.0, + "learning_rate": 9.055454654861939e-05, + "loss": 17.0008, + "step": 5348 + }, + { + "epoch": 0.22295860947855445, + "grad_norm": 314.0, + "learning_rate": 9.055059795890772e-05, + "loss": 14.3754, + "step": 5349 + }, + { + "epoch": 0.2230002917760827, + "grad_norm": 572.0, + "learning_rate": 9.05466486301578e-05, + "loss": 15.9399, + "step": 5350 + }, + { + "epoch": 0.22304197407361093, + "grad_norm": 75.0, + "learning_rate": 9.054269856244162e-05, + "loss": 10.3756, + "step": 5351 + }, + { + "epoch": 0.22308365637113917, + "grad_norm": 274.0, + "learning_rate": 9.053874775583115e-05, + "loss": 9.1254, + "step": 5352 + }, + { + "epoch": 0.2231253386686674, + "grad_norm": 96.0, + "learning_rate": 9.053479621039839e-05, + "loss": 7.969, + "step": 5353 + }, + { + "epoch": 0.22316702096619564, + "grad_norm": 844.0, + "learning_rate": 9.05308439262154e-05, + "loss": 18.8799, + "step": 5354 + }, + { + "epoch": 0.22320870326372388, + "grad_norm": 484.0, + "learning_rate": 9.052689090335416e-05, + "loss": 16.3754, + "step": 5355 + }, + { + "epoch": 0.22325038556125212, + "grad_norm": 804.0, + "learning_rate": 9.052293714188675e-05, + "loss": 27.7504, + "step": 5356 + }, + { + "epoch": 0.22329206785878036, + "grad_norm": 318.0, + "learning_rate": 9.051898264188521e-05, + "loss": 13.8757, + "step": 5357 + }, + { + "epoch": 0.2233337501563086, + "grad_norm": 936.0, + "learning_rate": 9.051502740342161e-05, + "loss": 28.7509, + "step": 5358 + }, + { + "epoch": 0.22337543245383684, + "grad_norm": 532.0, + "learning_rate": 9.051107142656804e-05, + "loss": 16.6257, + "step": 5359 + }, + { + "epoch": 0.2234171147513651, + "grad_norm": 398.0, + "learning_rate": 9.050711471139658e-05, + "loss": 15.5628, + "step": 5360 + }, + { + "epoch": 0.22345879704889335, + "grad_norm": 260.0, + "learning_rate": 9.050315725797938e-05, + "loss": 11.5631, + "step": 5361 + }, + { + "epoch": 0.2235004793464216, + "grad_norm": 330.0, + "learning_rate": 9.049919906638855e-05, + "loss": 13.8752, + "step": 5362 + }, + { + "epoch": 0.22354216164394983, + "grad_norm": 332.0, + "learning_rate": 9.049524013669622e-05, + "loss": 12.6882, + "step": 5363 + }, + { + "epoch": 0.22358384394147807, + "grad_norm": 444.0, + "learning_rate": 9.049128046897453e-05, + "loss": 15.9396, + "step": 5364 + }, + { + "epoch": 0.2236255262390063, + "grad_norm": 316.0, + "learning_rate": 9.048732006329565e-05, + "loss": 12.3169, + "step": 5365 + }, + { + "epoch": 0.22366720853653455, + "grad_norm": 390.0, + "learning_rate": 9.048335891973179e-05, + "loss": 14.563, + "step": 5366 + }, + { + "epoch": 0.22370889083406278, + "grad_norm": 130.0, + "learning_rate": 9.047939703835511e-05, + "loss": 9.063, + "step": 5367 + }, + { + "epoch": 0.22375057313159102, + "grad_norm": 227.0, + "learning_rate": 9.047543441923782e-05, + "loss": 8.1885, + "step": 5368 + }, + { + "epoch": 0.22379225542911926, + "grad_norm": 340.0, + "learning_rate": 9.047147106245216e-05, + "loss": 11.7506, + "step": 5369 + }, + { + "epoch": 0.2238339377266475, + "grad_norm": 228.0, + "learning_rate": 9.046750696807033e-05, + "loss": 12.1253, + "step": 5370 + }, + { + "epoch": 0.22387562002417574, + "grad_norm": 272.0, + "learning_rate": 9.046354213616459e-05, + "loss": 12.8127, + "step": 5371 + }, + { + "epoch": 0.22391730232170398, + "grad_norm": 376.0, + "learning_rate": 9.045957656680722e-05, + "loss": 14.8755, + "step": 5372 + }, + { + "epoch": 0.22395898461923222, + "grad_norm": 308.0, + "learning_rate": 9.045561026007048e-05, + "loss": 12.5011, + "step": 5373 + }, + { + "epoch": 0.22400066691676046, + "grad_norm": 235.0, + "learning_rate": 9.045164321602664e-05, + "loss": 11.5633, + "step": 5374 + }, + { + "epoch": 0.2240423492142887, + "grad_norm": 314.0, + "learning_rate": 9.0447675434748e-05, + "loss": 11.1876, + "step": 5375 + }, + { + "epoch": 0.22408403151181694, + "grad_norm": 254.0, + "learning_rate": 9.04437069163069e-05, + "loss": 11.6878, + "step": 5376 + }, + { + "epoch": 0.22412571380934518, + "grad_norm": 568.0, + "learning_rate": 9.043973766077565e-05, + "loss": 19.3752, + "step": 5377 + }, + { + "epoch": 0.22416739610687342, + "grad_norm": 296.0, + "learning_rate": 9.043576766822659e-05, + "loss": 10.5004, + "step": 5378 + }, + { + "epoch": 0.22420907840440166, + "grad_norm": 274.0, + "learning_rate": 9.043179693873208e-05, + "loss": 11.6253, + "step": 5379 + }, + { + "epoch": 0.2242507607019299, + "grad_norm": 700.0, + "learning_rate": 9.042782547236446e-05, + "loss": 20.2504, + "step": 5380 + }, + { + "epoch": 0.22429244299945814, + "grad_norm": 121.0, + "learning_rate": 9.042385326919616e-05, + "loss": 9.0005, + "step": 5381 + }, + { + "epoch": 0.22433412529698638, + "grad_norm": 560.0, + "learning_rate": 9.041988032929952e-05, + "loss": 18.2502, + "step": 5382 + }, + { + "epoch": 0.22437580759451461, + "grad_norm": 149.0, + "learning_rate": 9.0415906652747e-05, + "loss": 10.1896, + "step": 5383 + }, + { + "epoch": 0.22441748989204285, + "grad_norm": 668.0, + "learning_rate": 9.041193223961096e-05, + "loss": 20.0002, + "step": 5384 + }, + { + "epoch": 0.2244591721895711, + "grad_norm": 612.0, + "learning_rate": 9.040795708996389e-05, + "loss": 16.6254, + "step": 5385 + }, + { + "epoch": 0.22450085448709933, + "grad_norm": 172.0, + "learning_rate": 9.04039812038782e-05, + "loss": 11.688, + "step": 5386 + }, + { + "epoch": 0.22454253678462757, + "grad_norm": 438.0, + "learning_rate": 9.040000458142639e-05, + "loss": 15.6879, + "step": 5387 + }, + { + "epoch": 0.2245842190821558, + "grad_norm": 560.0, + "learning_rate": 9.03960272226809e-05, + "loss": 18.1253, + "step": 5388 + }, + { + "epoch": 0.22462590137968405, + "grad_norm": 1136.0, + "learning_rate": 9.039204912771422e-05, + "loss": 24.0009, + "step": 5389 + }, + { + "epoch": 0.2246675836772123, + "grad_norm": 171.0, + "learning_rate": 9.038807029659885e-05, + "loss": 10.6885, + "step": 5390 + }, + { + "epoch": 0.22470926597474053, + "grad_norm": 496.0, + "learning_rate": 9.038409072940734e-05, + "loss": 17.2501, + "step": 5391 + }, + { + "epoch": 0.22475094827226877, + "grad_norm": 243.0, + "learning_rate": 9.038011042621219e-05, + "loss": 12.8151, + "step": 5392 + }, + { + "epoch": 0.224792630569797, + "grad_norm": 724.0, + "learning_rate": 9.037612938708593e-05, + "loss": 25.2503, + "step": 5393 + }, + { + "epoch": 0.22483431286732525, + "grad_norm": 258.0, + "learning_rate": 9.037214761210113e-05, + "loss": 12.8754, + "step": 5394 + }, + { + "epoch": 0.2248759951648535, + "grad_norm": 402.0, + "learning_rate": 9.036816510133035e-05, + "loss": 15.7503, + "step": 5395 + }, + { + "epoch": 0.22491767746238173, + "grad_norm": 170.0, + "learning_rate": 9.036418185484618e-05, + "loss": 10.6257, + "step": 5396 + }, + { + "epoch": 0.22495935975990997, + "grad_norm": 512.0, + "learning_rate": 9.036019787272121e-05, + "loss": 18.6253, + "step": 5397 + }, + { + "epoch": 0.2250010420574382, + "grad_norm": 648.0, + "learning_rate": 9.035621315502805e-05, + "loss": 19.0031, + "step": 5398 + }, + { + "epoch": 0.22504272435496644, + "grad_norm": 928.0, + "learning_rate": 9.035222770183934e-05, + "loss": 25.1253, + "step": 5399 + }, + { + "epoch": 0.22508440665249468, + "grad_norm": 548.0, + "learning_rate": 9.034824151322768e-05, + "loss": 18.5002, + "step": 5400 + }, + { + "epoch": 0.22512608895002292, + "grad_norm": 1320.0, + "learning_rate": 9.034425458926574e-05, + "loss": 31.0044, + "step": 5401 + }, + { + "epoch": 0.22516777124755116, + "grad_norm": 125.0, + "learning_rate": 9.03402669300262e-05, + "loss": 9.8139, + "step": 5402 + }, + { + "epoch": 0.2252094535450794, + "grad_norm": 374.0, + "learning_rate": 9.033627853558168e-05, + "loss": 14.9384, + "step": 5403 + }, + { + "epoch": 0.22525113584260764, + "grad_norm": 476.0, + "learning_rate": 9.033228940600493e-05, + "loss": 15.8134, + "step": 5404 + }, + { + "epoch": 0.22529281814013588, + "grad_norm": 302.0, + "learning_rate": 9.032829954136862e-05, + "loss": 14.0627, + "step": 5405 + }, + { + "epoch": 0.22533450043766412, + "grad_norm": 560.0, + "learning_rate": 9.032430894174545e-05, + "loss": 17.2506, + "step": 5406 + }, + { + "epoch": 0.22537618273519236, + "grad_norm": 326.0, + "learning_rate": 9.032031760720818e-05, + "loss": 14.3128, + "step": 5407 + }, + { + "epoch": 0.2254178650327206, + "grad_norm": 624.0, + "learning_rate": 9.031632553782956e-05, + "loss": 20.5003, + "step": 5408 + }, + { + "epoch": 0.22545954733024884, + "grad_norm": 1272.0, + "learning_rate": 9.031233273368231e-05, + "loss": 32.5002, + "step": 5409 + }, + { + "epoch": 0.22550122962777708, + "grad_norm": 364.0, + "learning_rate": 9.030833919483923e-05, + "loss": 16.1252, + "step": 5410 + }, + { + "epoch": 0.22554291192530532, + "grad_norm": 380.0, + "learning_rate": 9.030434492137307e-05, + "loss": 15.5631, + "step": 5411 + }, + { + "epoch": 0.22558459422283356, + "grad_norm": 134.0, + "learning_rate": 9.030034991335666e-05, + "loss": 9.3752, + "step": 5412 + }, + { + "epoch": 0.2256262765203618, + "grad_norm": 272.0, + "learning_rate": 9.02963541708628e-05, + "loss": 12.5628, + "step": 5413 + }, + { + "epoch": 0.22566795881789004, + "grad_norm": 434.0, + "learning_rate": 9.029235769396429e-05, + "loss": 16.3756, + "step": 5414 + }, + { + "epoch": 0.22570964111541827, + "grad_norm": 272.0, + "learning_rate": 9.0288360482734e-05, + "loss": 12.9381, + "step": 5415 + }, + { + "epoch": 0.22575132341294651, + "grad_norm": 68.5, + "learning_rate": 9.028436253724475e-05, + "loss": 7.0627, + "step": 5416 + }, + { + "epoch": 0.22579300571047475, + "grad_norm": 247.0, + "learning_rate": 9.028036385756944e-05, + "loss": 12.1899, + "step": 5417 + }, + { + "epoch": 0.225834688008003, + "grad_norm": 688.0, + "learning_rate": 9.027636444378089e-05, + "loss": 18.7536, + "step": 5418 + }, + { + "epoch": 0.22587637030553123, + "grad_norm": 840.0, + "learning_rate": 9.027236429595205e-05, + "loss": 25.1256, + "step": 5419 + }, + { + "epoch": 0.22591805260305947, + "grad_norm": 900.0, + "learning_rate": 9.02683634141558e-05, + "loss": 24.5003, + "step": 5420 + }, + { + "epoch": 0.2259597349005877, + "grad_norm": 390.0, + "learning_rate": 9.026436179846502e-05, + "loss": 12.6254, + "step": 5421 + }, + { + "epoch": 0.22600141719811595, + "grad_norm": 356.0, + "learning_rate": 9.02603594489527e-05, + "loss": 14.9383, + "step": 5422 + }, + { + "epoch": 0.2260430994956442, + "grad_norm": 163.0, + "learning_rate": 9.025635636569174e-05, + "loss": 10.0629, + "step": 5423 + }, + { + "epoch": 0.22608478179317243, + "grad_norm": 348.0, + "learning_rate": 9.025235254875513e-05, + "loss": 14.8129, + "step": 5424 + }, + { + "epoch": 0.22612646409070067, + "grad_norm": 147.0, + "learning_rate": 9.02483479982158e-05, + "loss": 9.6253, + "step": 5425 + }, + { + "epoch": 0.2261681463882289, + "grad_norm": 308.0, + "learning_rate": 9.024434271414677e-05, + "loss": 11.5007, + "step": 5426 + }, + { + "epoch": 0.22620982868575715, + "grad_norm": 290.0, + "learning_rate": 9.024033669662101e-05, + "loss": 12.6256, + "step": 5427 + }, + { + "epoch": 0.2262515109832854, + "grad_norm": 296.0, + "learning_rate": 9.023632994571153e-05, + "loss": 14.1882, + "step": 5428 + }, + { + "epoch": 0.22629319328081363, + "grad_norm": 164.0, + "learning_rate": 9.023232246149139e-05, + "loss": 10.5002, + "step": 5429 + }, + { + "epoch": 0.22633487557834187, + "grad_norm": 326.0, + "learning_rate": 9.022831424403359e-05, + "loss": 12.6877, + "step": 5430 + }, + { + "epoch": 0.2263765578758701, + "grad_norm": 260.0, + "learning_rate": 9.02243052934112e-05, + "loss": 11.8754, + "step": 5431 + }, + { + "epoch": 0.22641824017339834, + "grad_norm": 446.0, + "learning_rate": 9.022029560969727e-05, + "loss": 15.8132, + "step": 5432 + }, + { + "epoch": 0.2264599224709266, + "grad_norm": 418.0, + "learning_rate": 9.021628519296488e-05, + "loss": 14.8763, + "step": 5433 + }, + { + "epoch": 0.22650160476845485, + "grad_norm": 552.0, + "learning_rate": 9.021227404328712e-05, + "loss": 16.1255, + "step": 5434 + }, + { + "epoch": 0.2265432870659831, + "grad_norm": 141.0, + "learning_rate": 9.02082621607371e-05, + "loss": 10.0004, + "step": 5435 + }, + { + "epoch": 0.22658496936351133, + "grad_norm": 612.0, + "learning_rate": 9.020424954538793e-05, + "loss": 16.6275, + "step": 5436 + }, + { + "epoch": 0.22662665166103957, + "grad_norm": 288.0, + "learning_rate": 9.020023619731275e-05, + "loss": 13.6258, + "step": 5437 + }, + { + "epoch": 0.2266683339585678, + "grad_norm": 276.0, + "learning_rate": 9.019622211658469e-05, + "loss": 12.8763, + "step": 5438 + }, + { + "epoch": 0.22671001625609605, + "grad_norm": 476.0, + "learning_rate": 9.019220730327693e-05, + "loss": 16.1252, + "step": 5439 + }, + { + "epoch": 0.2267516985536243, + "grad_norm": 506.0, + "learning_rate": 9.018819175746261e-05, + "loss": 16.1252, + "step": 5440 + }, + { + "epoch": 0.22679338085115253, + "grad_norm": 876.0, + "learning_rate": 9.018417547921492e-05, + "loss": 22.0041, + "step": 5441 + }, + { + "epoch": 0.22683506314868077, + "grad_norm": 296.0, + "learning_rate": 9.018015846860707e-05, + "loss": 13.3127, + "step": 5442 + }, + { + "epoch": 0.226876745446209, + "grad_norm": 1048.0, + "learning_rate": 9.017614072571228e-05, + "loss": 25.8755, + "step": 5443 + }, + { + "epoch": 0.22691842774373724, + "grad_norm": 374.0, + "learning_rate": 9.017212225060374e-05, + "loss": 15.5002, + "step": 5444 + }, + { + "epoch": 0.22696011004126548, + "grad_norm": 358.0, + "learning_rate": 9.01681030433547e-05, + "loss": 11.0002, + "step": 5445 + }, + { + "epoch": 0.22700179233879372, + "grad_norm": 468.0, + "learning_rate": 9.016408310403843e-05, + "loss": 16.2506, + "step": 5446 + }, + { + "epoch": 0.22704347463632196, + "grad_norm": 154.0, + "learning_rate": 9.016006243272818e-05, + "loss": 10.938, + "step": 5447 + }, + { + "epoch": 0.2270851569338502, + "grad_norm": 232.0, + "learning_rate": 9.015604102949722e-05, + "loss": 11.6881, + "step": 5448 + }, + { + "epoch": 0.22712683923137844, + "grad_norm": 384.0, + "learning_rate": 9.015201889441887e-05, + "loss": 15.2506, + "step": 5449 + }, + { + "epoch": 0.22716852152890668, + "grad_norm": 492.0, + "learning_rate": 9.014799602756639e-05, + "loss": 17.8753, + "step": 5450 + }, + { + "epoch": 0.22721020382643492, + "grad_norm": 402.0, + "learning_rate": 9.014397242901311e-05, + "loss": 16.1251, + "step": 5451 + }, + { + "epoch": 0.22725188612396316, + "grad_norm": 220.0, + "learning_rate": 9.013994809883239e-05, + "loss": 10.8752, + "step": 5452 + }, + { + "epoch": 0.2272935684214914, + "grad_norm": 456.0, + "learning_rate": 9.013592303709754e-05, + "loss": 15.6878, + "step": 5453 + }, + { + "epoch": 0.22733525071901964, + "grad_norm": 326.0, + "learning_rate": 9.013189724388193e-05, + "loss": 14.0627, + "step": 5454 + }, + { + "epoch": 0.22737693301654788, + "grad_norm": 448.0, + "learning_rate": 9.012787071925893e-05, + "loss": 15.6252, + "step": 5455 + }, + { + "epoch": 0.22741861531407612, + "grad_norm": 576.0, + "learning_rate": 9.012384346330193e-05, + "loss": 19.2505, + "step": 5456 + }, + { + "epoch": 0.22746029761160436, + "grad_norm": 282.0, + "learning_rate": 9.011981547608432e-05, + "loss": 13.8781, + "step": 5457 + }, + { + "epoch": 0.2275019799091326, + "grad_norm": 215.0, + "learning_rate": 9.011578675767951e-05, + "loss": 11.2502, + "step": 5458 + }, + { + "epoch": 0.22754366220666083, + "grad_norm": 478.0, + "learning_rate": 9.011175730816093e-05, + "loss": 16.629, + "step": 5459 + }, + { + "epoch": 0.22758534450418907, + "grad_norm": 160.0, + "learning_rate": 9.010772712760201e-05, + "loss": 10.1299, + "step": 5460 + }, + { + "epoch": 0.2276270268017173, + "grad_norm": 312.0, + "learning_rate": 9.010369621607619e-05, + "loss": 12.9379, + "step": 5461 + }, + { + "epoch": 0.22766870909924555, + "grad_norm": 131.0, + "learning_rate": 9.009966457365695e-05, + "loss": 10.1269, + "step": 5462 + }, + { + "epoch": 0.2277103913967738, + "grad_norm": 286.0, + "learning_rate": 9.009563220041777e-05, + "loss": 14.0005, + "step": 5463 + }, + { + "epoch": 0.22775207369430203, + "grad_norm": 282.0, + "learning_rate": 9.009159909643215e-05, + "loss": 12.5629, + "step": 5464 + }, + { + "epoch": 0.22779375599183027, + "grad_norm": 490.0, + "learning_rate": 9.008756526177355e-05, + "loss": 15.5002, + "step": 5465 + }, + { + "epoch": 0.2278354382893585, + "grad_norm": 396.0, + "learning_rate": 9.008353069651551e-05, + "loss": 16.1281, + "step": 5466 + }, + { + "epoch": 0.22787712058688675, + "grad_norm": 364.0, + "learning_rate": 9.007949540073159e-05, + "loss": 15.0628, + "step": 5467 + }, + { + "epoch": 0.227918802884415, + "grad_norm": 474.0, + "learning_rate": 9.007545937449529e-05, + "loss": 16.6252, + "step": 5468 + }, + { + "epoch": 0.22796048518194323, + "grad_norm": 628.0, + "learning_rate": 9.00714226178802e-05, + "loss": 20.2507, + "step": 5469 + }, + { + "epoch": 0.22800216747947147, + "grad_norm": 436.0, + "learning_rate": 9.006738513095987e-05, + "loss": 16.3767, + "step": 5470 + }, + { + "epoch": 0.2280438497769997, + "grad_norm": 280.0, + "learning_rate": 9.006334691380788e-05, + "loss": 13.8129, + "step": 5471 + }, + { + "epoch": 0.22808553207452795, + "grad_norm": 592.0, + "learning_rate": 9.005930796649784e-05, + "loss": 18.6259, + "step": 5472 + }, + { + "epoch": 0.2281272143720562, + "grad_norm": 322.0, + "learning_rate": 9.005526828910337e-05, + "loss": 13.2503, + "step": 5473 + }, + { + "epoch": 0.22816889666958443, + "grad_norm": 270.0, + "learning_rate": 9.005122788169806e-05, + "loss": 12.1883, + "step": 5474 + }, + { + "epoch": 0.22821057896711266, + "grad_norm": 344.0, + "learning_rate": 9.004718674435559e-05, + "loss": 12.5001, + "step": 5475 + }, + { + "epoch": 0.2282522612646409, + "grad_norm": 348.0, + "learning_rate": 9.004314487714956e-05, + "loss": 10.6301, + "step": 5476 + }, + { + "epoch": 0.22829394356216914, + "grad_norm": 392.0, + "learning_rate": 9.003910228015369e-05, + "loss": 14.7503, + "step": 5477 + }, + { + "epoch": 0.22833562585969738, + "grad_norm": 249.0, + "learning_rate": 9.00350589534416e-05, + "loss": 11.4377, + "step": 5478 + }, + { + "epoch": 0.22837730815722562, + "grad_norm": 139.0, + "learning_rate": 9.0031014897087e-05, + "loss": 9.9383, + "step": 5479 + }, + { + "epoch": 0.22841899045475386, + "grad_norm": 428.0, + "learning_rate": 9.002697011116364e-05, + "loss": 16.6281, + "step": 5480 + }, + { + "epoch": 0.2284606727522821, + "grad_norm": 234.0, + "learning_rate": 9.002292459574517e-05, + "loss": 11.5627, + "step": 5481 + }, + { + "epoch": 0.22850235504981034, + "grad_norm": 334.0, + "learning_rate": 9.001887835090535e-05, + "loss": 13.8132, + "step": 5482 + }, + { + "epoch": 0.22854403734733858, + "grad_norm": 241.0, + "learning_rate": 9.001483137671791e-05, + "loss": 10.3138, + "step": 5483 + }, + { + "epoch": 0.22858571964486682, + "grad_norm": 102.5, + "learning_rate": 9.001078367325662e-05, + "loss": 9.7506, + "step": 5484 + }, + { + "epoch": 0.22862740194239506, + "grad_norm": 326.0, + "learning_rate": 9.000673524059525e-05, + "loss": 14.3126, + "step": 5485 + }, + { + "epoch": 0.2286690842399233, + "grad_norm": 332.0, + "learning_rate": 9.000268607880757e-05, + "loss": 11.8757, + "step": 5486 + }, + { + "epoch": 0.22871076653745154, + "grad_norm": 280.0, + "learning_rate": 8.99986361879674e-05, + "loss": 13.0668, + "step": 5487 + }, + { + "epoch": 0.22875244883497978, + "grad_norm": 272.0, + "learning_rate": 8.999458556814853e-05, + "loss": 12.7508, + "step": 5488 + }, + { + "epoch": 0.22879413113250802, + "grad_norm": 1200.0, + "learning_rate": 8.999053421942478e-05, + "loss": 27.0032, + "step": 5489 + }, + { + "epoch": 0.22883581343003626, + "grad_norm": 101.5, + "learning_rate": 8.998648214187e-05, + "loss": 6.7502, + "step": 5490 + }, + { + "epoch": 0.2288774957275645, + "grad_norm": 302.0, + "learning_rate": 8.998242933555802e-05, + "loss": 12.6256, + "step": 5491 + }, + { + "epoch": 0.22891917802509273, + "grad_norm": 217.0, + "learning_rate": 8.997837580056275e-05, + "loss": 11.2503, + "step": 5492 + }, + { + "epoch": 0.22896086032262097, + "grad_norm": 360.0, + "learning_rate": 8.997432153695799e-05, + "loss": 15.1877, + "step": 5493 + }, + { + "epoch": 0.2290025426201492, + "grad_norm": 696.0, + "learning_rate": 8.99702665448177e-05, + "loss": 20.1274, + "step": 5494 + }, + { + "epoch": 0.22904422491767745, + "grad_norm": 270.0, + "learning_rate": 8.996621082421575e-05, + "loss": 13.0629, + "step": 5495 + }, + { + "epoch": 0.2290859072152057, + "grad_norm": 560.0, + "learning_rate": 8.996215437522607e-05, + "loss": 15.6254, + "step": 5496 + }, + { + "epoch": 0.22912758951273393, + "grad_norm": 51.25, + "learning_rate": 8.995809719792254e-05, + "loss": 7.9696, + "step": 5497 + }, + { + "epoch": 0.22916927181026217, + "grad_norm": 148.0, + "learning_rate": 8.995403929237918e-05, + "loss": 11.1257, + "step": 5498 + }, + { + "epoch": 0.2292109541077904, + "grad_norm": 908.0, + "learning_rate": 8.994998065866989e-05, + "loss": 24.0041, + "step": 5499 + }, + { + "epoch": 0.22925263640531865, + "grad_norm": 1408.0, + "learning_rate": 8.994592129686865e-05, + "loss": 29.3797, + "step": 5500 + }, + { + "epoch": 0.2292943187028469, + "grad_norm": 1016.0, + "learning_rate": 8.994186120704947e-05, + "loss": 24.6257, + "step": 5501 + }, + { + "epoch": 0.22933600100037513, + "grad_norm": 310.0, + "learning_rate": 8.993780038928629e-05, + "loss": 13.5631, + "step": 5502 + }, + { + "epoch": 0.22937768329790337, + "grad_norm": 384.0, + "learning_rate": 8.993373884365319e-05, + "loss": 15.563, + "step": 5503 + }, + { + "epoch": 0.2294193655954316, + "grad_norm": 636.0, + "learning_rate": 8.992967657022413e-05, + "loss": 18.7515, + "step": 5504 + }, + { + "epoch": 0.22946104789295985, + "grad_norm": 346.0, + "learning_rate": 8.992561356907318e-05, + "loss": 14.3752, + "step": 5505 + }, + { + "epoch": 0.2295027301904881, + "grad_norm": 232.0, + "learning_rate": 8.992154984027438e-05, + "loss": 12.1886, + "step": 5506 + }, + { + "epoch": 0.22954441248801635, + "grad_norm": 77.5, + "learning_rate": 8.991748538390179e-05, + "loss": 8.4388, + "step": 5507 + }, + { + "epoch": 0.2295860947855446, + "grad_norm": 596.0, + "learning_rate": 8.991342020002948e-05, + "loss": 18.8753, + "step": 5508 + }, + { + "epoch": 0.22962777708307283, + "grad_norm": 432.0, + "learning_rate": 8.990935428873154e-05, + "loss": 16.0004, + "step": 5509 + }, + { + "epoch": 0.22966945938060107, + "grad_norm": 150.0, + "learning_rate": 8.990528765008209e-05, + "loss": 9.5003, + "step": 5510 + }, + { + "epoch": 0.2297111416781293, + "grad_norm": 352.0, + "learning_rate": 8.990122028415521e-05, + "loss": 14.0626, + "step": 5511 + }, + { + "epoch": 0.22975282397565755, + "grad_norm": 524.0, + "learning_rate": 8.989715219102505e-05, + "loss": 15.0002, + "step": 5512 + }, + { + "epoch": 0.2297945062731858, + "grad_norm": 242.0, + "learning_rate": 8.989308337076576e-05, + "loss": 12.0014, + "step": 5513 + }, + { + "epoch": 0.22983618857071403, + "grad_norm": 528.0, + "learning_rate": 8.988901382345149e-05, + "loss": 18.3757, + "step": 5514 + }, + { + "epoch": 0.22987787086824227, + "grad_norm": 474.0, + "learning_rate": 8.988494354915639e-05, + "loss": 16.6252, + "step": 5515 + }, + { + "epoch": 0.2299195531657705, + "grad_norm": 172.0, + "learning_rate": 8.988087254795465e-05, + "loss": 11.3756, + "step": 5516 + }, + { + "epoch": 0.22996123546329875, + "grad_norm": 146.0, + "learning_rate": 8.987680081992049e-05, + "loss": 6.4386, + "step": 5517 + }, + { + "epoch": 0.23000291776082699, + "grad_norm": 976.0, + "learning_rate": 8.987272836512808e-05, + "loss": 23.1305, + "step": 5518 + }, + { + "epoch": 0.23004460005835523, + "grad_norm": 616.0, + "learning_rate": 8.986865518365165e-05, + "loss": 19.1254, + "step": 5519 + }, + { + "epoch": 0.23008628235588346, + "grad_norm": 70.0, + "learning_rate": 8.986458127556545e-05, + "loss": 8.688, + "step": 5520 + }, + { + "epoch": 0.2301279646534117, + "grad_norm": 384.0, + "learning_rate": 8.986050664094373e-05, + "loss": 14.876, + "step": 5521 + }, + { + "epoch": 0.23016964695093994, + "grad_norm": 544.0, + "learning_rate": 8.98564312798607e-05, + "loss": 17.2504, + "step": 5522 + }, + { + "epoch": 0.23021132924846818, + "grad_norm": 416.0, + "learning_rate": 8.98523551923907e-05, + "loss": 15.3761, + "step": 5523 + }, + { + "epoch": 0.23025301154599642, + "grad_norm": 1096.0, + "learning_rate": 8.984827837860799e-05, + "loss": 22.6301, + "step": 5524 + }, + { + "epoch": 0.23029469384352466, + "grad_norm": 342.0, + "learning_rate": 8.984420083858684e-05, + "loss": 14.6254, + "step": 5525 + }, + { + "epoch": 0.2303363761410529, + "grad_norm": 133.0, + "learning_rate": 8.984012257240162e-05, + "loss": 9.2515, + "step": 5526 + }, + { + "epoch": 0.23037805843858114, + "grad_norm": 960.0, + "learning_rate": 8.983604358012663e-05, + "loss": 24.5009, + "step": 5527 + }, + { + "epoch": 0.23041974073610938, + "grad_norm": 736.0, + "learning_rate": 8.983196386183621e-05, + "loss": 19.3758, + "step": 5528 + }, + { + "epoch": 0.23046142303363762, + "grad_norm": 496.0, + "learning_rate": 8.98278834176047e-05, + "loss": 15.6881, + "step": 5529 + }, + { + "epoch": 0.23050310533116586, + "grad_norm": 892.0, + "learning_rate": 8.982380224750649e-05, + "loss": 23.5013, + "step": 5530 + }, + { + "epoch": 0.2305447876286941, + "grad_norm": 182.0, + "learning_rate": 8.981972035161594e-05, + "loss": 10.3128, + "step": 5531 + }, + { + "epoch": 0.23058646992622234, + "grad_norm": 330.0, + "learning_rate": 8.981563773000745e-05, + "loss": 11.5005, + "step": 5532 + }, + { + "epoch": 0.23062815222375058, + "grad_norm": 450.0, + "learning_rate": 8.981155438275544e-05, + "loss": 16.6252, + "step": 5533 + }, + { + "epoch": 0.23066983452127882, + "grad_norm": 154.0, + "learning_rate": 8.980747030993431e-05, + "loss": 9.3177, + "step": 5534 + }, + { + "epoch": 0.23071151681880706, + "grad_norm": 384.0, + "learning_rate": 8.980338551161849e-05, + "loss": 16.1252, + "step": 5535 + }, + { + "epoch": 0.2307531991163353, + "grad_norm": 222.0, + "learning_rate": 8.979929998788245e-05, + "loss": 7.6252, + "step": 5536 + }, + { + "epoch": 0.23079488141386353, + "grad_norm": 588.0, + "learning_rate": 8.979521373880061e-05, + "loss": 18.251, + "step": 5537 + }, + { + "epoch": 0.23083656371139177, + "grad_norm": 452.0, + "learning_rate": 8.97911267644475e-05, + "loss": 15.7515, + "step": 5538 + }, + { + "epoch": 0.23087824600892, + "grad_norm": 237.0, + "learning_rate": 8.978703906489756e-05, + "loss": 11.563, + "step": 5539 + }, + { + "epoch": 0.23091992830644825, + "grad_norm": 448.0, + "learning_rate": 8.97829506402253e-05, + "loss": 16.6257, + "step": 5540 + }, + { + "epoch": 0.2309616106039765, + "grad_norm": 124.5, + "learning_rate": 8.977886149050523e-05, + "loss": 9.6252, + "step": 5541 + }, + { + "epoch": 0.23100329290150473, + "grad_norm": 228.0, + "learning_rate": 8.977477161581189e-05, + "loss": 12.3138, + "step": 5542 + }, + { + "epoch": 0.23104497519903297, + "grad_norm": 350.0, + "learning_rate": 8.977068101621979e-05, + "loss": 13.6876, + "step": 5543 + }, + { + "epoch": 0.2310866574965612, + "grad_norm": 580.0, + "learning_rate": 8.976658969180352e-05, + "loss": 17.2504, + "step": 5544 + }, + { + "epoch": 0.23112833979408945, + "grad_norm": 390.0, + "learning_rate": 8.97624976426376e-05, + "loss": 15.252, + "step": 5545 + }, + { + "epoch": 0.2311700220916177, + "grad_norm": 264.0, + "learning_rate": 8.975840486879663e-05, + "loss": 13.3756, + "step": 5546 + }, + { + "epoch": 0.23121170438914593, + "grad_norm": 498.0, + "learning_rate": 8.975431137035522e-05, + "loss": 15.9376, + "step": 5547 + }, + { + "epoch": 0.23125338668667417, + "grad_norm": 398.0, + "learning_rate": 8.975021714738793e-05, + "loss": 17.2508, + "step": 5548 + }, + { + "epoch": 0.2312950689842024, + "grad_norm": 185.0, + "learning_rate": 8.974612219996943e-05, + "loss": 12.0003, + "step": 5549 + }, + { + "epoch": 0.23133675128173065, + "grad_norm": 180.0, + "learning_rate": 8.974202652817432e-05, + "loss": 9.751, + "step": 5550 + }, + { + "epoch": 0.23137843357925889, + "grad_norm": 344.0, + "learning_rate": 8.973793013207725e-05, + "loss": 15.0003, + "step": 5551 + }, + { + "epoch": 0.23142011587678712, + "grad_norm": 202.0, + "learning_rate": 8.973383301175287e-05, + "loss": 10.8752, + "step": 5552 + }, + { + "epoch": 0.23146179817431536, + "grad_norm": 100.0, + "learning_rate": 8.972973516727585e-05, + "loss": 8.5006, + "step": 5553 + }, + { + "epoch": 0.2315034804718436, + "grad_norm": 336.0, + "learning_rate": 8.972563659872088e-05, + "loss": 13.8754, + "step": 5554 + }, + { + "epoch": 0.23154516276937184, + "grad_norm": 350.0, + "learning_rate": 8.972153730616266e-05, + "loss": 14.6256, + "step": 5555 + }, + { + "epoch": 0.23158684506690008, + "grad_norm": 240.0, + "learning_rate": 8.97174372896759e-05, + "loss": 12.7503, + "step": 5556 + }, + { + "epoch": 0.23162852736442832, + "grad_norm": 296.0, + "learning_rate": 8.971333654933532e-05, + "loss": 13.1252, + "step": 5557 + }, + { + "epoch": 0.23167020966195656, + "grad_norm": 264.0, + "learning_rate": 8.970923508521565e-05, + "loss": 11.6255, + "step": 5558 + }, + { + "epoch": 0.2317118919594848, + "grad_norm": 235.0, + "learning_rate": 8.970513289739165e-05, + "loss": 6.4075, + "step": 5559 + }, + { + "epoch": 0.23175357425701304, + "grad_norm": 237.0, + "learning_rate": 8.970102998593808e-05, + "loss": 10.6881, + "step": 5560 + }, + { + "epoch": 0.23179525655454128, + "grad_norm": 138.0, + "learning_rate": 8.96969263509297e-05, + "loss": 9.1253, + "step": 5561 + }, + { + "epoch": 0.23183693885206952, + "grad_norm": 71.0, + "learning_rate": 8.969282199244134e-05, + "loss": 6.8768, + "step": 5562 + }, + { + "epoch": 0.23187862114959776, + "grad_norm": 800.0, + "learning_rate": 8.968871691054776e-05, + "loss": 20.0067, + "step": 5563 + }, + { + "epoch": 0.231920303447126, + "grad_norm": 219.0, + "learning_rate": 8.968461110532378e-05, + "loss": 11.7504, + "step": 5564 + }, + { + "epoch": 0.23196198574465424, + "grad_norm": 243.0, + "learning_rate": 8.968050457684425e-05, + "loss": 11.5626, + "step": 5565 + }, + { + "epoch": 0.23200366804218248, + "grad_norm": 572.0, + "learning_rate": 8.9676397325184e-05, + "loss": 16.7503, + "step": 5566 + }, + { + "epoch": 0.23204535033971072, + "grad_norm": 272.0, + "learning_rate": 8.96722893504179e-05, + "loss": 13.7505, + "step": 5567 + }, + { + "epoch": 0.23208703263723895, + "grad_norm": 53.5, + "learning_rate": 8.966818065262079e-05, + "loss": 7.0317, + "step": 5568 + }, + { + "epoch": 0.2321287149347672, + "grad_norm": 346.0, + "learning_rate": 8.966407123186757e-05, + "loss": 13.2508, + "step": 5569 + }, + { + "epoch": 0.23217039723229543, + "grad_norm": 48.5, + "learning_rate": 8.965996108823313e-05, + "loss": 5.9067, + "step": 5570 + }, + { + "epoch": 0.23221207952982367, + "grad_norm": 58.75, + "learning_rate": 8.965585022179238e-05, + "loss": 7.594, + "step": 5571 + }, + { + "epoch": 0.2322537618273519, + "grad_norm": 105.5, + "learning_rate": 8.965173863262024e-05, + "loss": 8.1877, + "step": 5572 + }, + { + "epoch": 0.23229544412488015, + "grad_norm": 204.0, + "learning_rate": 8.964762632079165e-05, + "loss": 10.9378, + "step": 5573 + }, + { + "epoch": 0.2323371264224084, + "grad_norm": 438.0, + "learning_rate": 8.964351328638153e-05, + "loss": 14.5628, + "step": 5574 + }, + { + "epoch": 0.23237880871993663, + "grad_norm": 732.0, + "learning_rate": 8.963939952946488e-05, + "loss": 20.6261, + "step": 5575 + }, + { + "epoch": 0.23242049101746487, + "grad_norm": 358.0, + "learning_rate": 8.963528505011664e-05, + "loss": 14.3754, + "step": 5576 + }, + { + "epoch": 0.2324621733149931, + "grad_norm": 1032.0, + "learning_rate": 8.963116984841182e-05, + "loss": 21.0003, + "step": 5577 + }, + { + "epoch": 0.23250385561252135, + "grad_norm": 183.0, + "learning_rate": 8.96270539244254e-05, + "loss": 11.6257, + "step": 5578 + }, + { + "epoch": 0.23254553791004962, + "grad_norm": 282.0, + "learning_rate": 8.962293727823243e-05, + "loss": 14.6268, + "step": 5579 + }, + { + "epoch": 0.23258722020757785, + "grad_norm": 125.0, + "learning_rate": 8.96188199099079e-05, + "loss": 8.7515, + "step": 5580 + }, + { + "epoch": 0.2326289025051061, + "grad_norm": 88.0, + "learning_rate": 8.961470181952685e-05, + "loss": 6.9381, + "step": 5581 + }, + { + "epoch": 0.23267058480263433, + "grad_norm": 368.0, + "learning_rate": 8.961058300716435e-05, + "loss": 14.1253, + "step": 5582 + }, + { + "epoch": 0.23271226710016257, + "grad_norm": 528.0, + "learning_rate": 8.960646347289545e-05, + "loss": 14.3755, + "step": 5583 + }, + { + "epoch": 0.2327539493976908, + "grad_norm": 255.0, + "learning_rate": 8.960234321679526e-05, + "loss": 12.0633, + "step": 5584 + }, + { + "epoch": 0.23279563169521905, + "grad_norm": 528.0, + "learning_rate": 8.959822223893882e-05, + "loss": 18.0004, + "step": 5585 + }, + { + "epoch": 0.2328373139927473, + "grad_norm": 247.0, + "learning_rate": 8.959410053940128e-05, + "loss": 12.1877, + "step": 5586 + }, + { + "epoch": 0.23287899629027553, + "grad_norm": 584.0, + "learning_rate": 8.958997811825775e-05, + "loss": 18.3752, + "step": 5587 + }, + { + "epoch": 0.23292067858780377, + "grad_norm": 304.0, + "learning_rate": 8.958585497558334e-05, + "loss": 12.4376, + "step": 5588 + }, + { + "epoch": 0.232962360885332, + "grad_norm": 171.0, + "learning_rate": 8.958173111145322e-05, + "loss": 8.6298, + "step": 5589 + }, + { + "epoch": 0.23300404318286025, + "grad_norm": 472.0, + "learning_rate": 8.957760652594252e-05, + "loss": 15.938, + "step": 5590 + }, + { + "epoch": 0.2330457254803885, + "grad_norm": 480.0, + "learning_rate": 8.957348121912645e-05, + "loss": 15.938, + "step": 5591 + }, + { + "epoch": 0.23308740777791673, + "grad_norm": 792.0, + "learning_rate": 8.956935519108016e-05, + "loss": 19.8779, + "step": 5592 + }, + { + "epoch": 0.23312909007544497, + "grad_norm": 218.0, + "learning_rate": 8.956522844187884e-05, + "loss": 11.5003, + "step": 5593 + }, + { + "epoch": 0.2331707723729732, + "grad_norm": 63.75, + "learning_rate": 8.956110097159776e-05, + "loss": 7.6266, + "step": 5594 + }, + { + "epoch": 0.23321245467050145, + "grad_norm": 664.0, + "learning_rate": 8.955697278031208e-05, + "loss": 21.0012, + "step": 5595 + }, + { + "epoch": 0.23325413696802968, + "grad_norm": 256.0, + "learning_rate": 8.955284386809706e-05, + "loss": 12.8752, + "step": 5596 + }, + { + "epoch": 0.23329581926555792, + "grad_norm": 640.0, + "learning_rate": 8.954871423502795e-05, + "loss": 21.1252, + "step": 5597 + }, + { + "epoch": 0.23333750156308616, + "grad_norm": 209.0, + "learning_rate": 8.954458388118001e-05, + "loss": 12.3134, + "step": 5598 + }, + { + "epoch": 0.2333791838606144, + "grad_norm": 96.0, + "learning_rate": 8.954045280662851e-05, + "loss": 7.2818, + "step": 5599 + }, + { + "epoch": 0.23342086615814264, + "grad_norm": 131.0, + "learning_rate": 8.953632101144876e-05, + "loss": 11.001, + "step": 5600 + }, + { + "epoch": 0.23346254845567088, + "grad_norm": 470.0, + "learning_rate": 8.953218849571605e-05, + "loss": 17.1257, + "step": 5601 + }, + { + "epoch": 0.23350423075319912, + "grad_norm": 1048.0, + "learning_rate": 8.95280552595057e-05, + "loss": 28.3778, + "step": 5602 + }, + { + "epoch": 0.23354591305072736, + "grad_norm": 240.0, + "learning_rate": 8.952392130289301e-05, + "loss": 12.0635, + "step": 5603 + }, + { + "epoch": 0.2335875953482556, + "grad_norm": 600.0, + "learning_rate": 8.951978662595338e-05, + "loss": 19.7504, + "step": 5604 + }, + { + "epoch": 0.23362927764578384, + "grad_norm": 326.0, + "learning_rate": 8.95156512287621e-05, + "loss": 12.7503, + "step": 5605 + }, + { + "epoch": 0.23367095994331208, + "grad_norm": 90.5, + "learning_rate": 8.95115151113946e-05, + "loss": 8.5627, + "step": 5606 + }, + { + "epoch": 0.23371264224084032, + "grad_norm": 460.0, + "learning_rate": 8.950737827392622e-05, + "loss": 17.2503, + "step": 5607 + }, + { + "epoch": 0.23375432453836856, + "grad_norm": 260.0, + "learning_rate": 8.950324071643234e-05, + "loss": 11.5627, + "step": 5608 + }, + { + "epoch": 0.2337960068358968, + "grad_norm": 252.0, + "learning_rate": 8.949910243898841e-05, + "loss": 11.8759, + "step": 5609 + }, + { + "epoch": 0.23383768913342504, + "grad_norm": 245.0, + "learning_rate": 8.949496344166983e-05, + "loss": 12.8128, + "step": 5610 + }, + { + "epoch": 0.23387937143095328, + "grad_norm": 1376.0, + "learning_rate": 8.949082372455201e-05, + "loss": 33.2504, + "step": 5611 + }, + { + "epoch": 0.23392105372848152, + "grad_norm": 320.0, + "learning_rate": 8.948668328771046e-05, + "loss": 14.1259, + "step": 5612 + }, + { + "epoch": 0.23396273602600975, + "grad_norm": 924.0, + "learning_rate": 8.948254213122058e-05, + "loss": 25.5002, + "step": 5613 + }, + { + "epoch": 0.234004418323538, + "grad_norm": 91.0, + "learning_rate": 8.947840025515787e-05, + "loss": 7.5637, + "step": 5614 + }, + { + "epoch": 0.23404610062106623, + "grad_norm": 256.0, + "learning_rate": 8.947425765959783e-05, + "loss": 11.9377, + "step": 5615 + }, + { + "epoch": 0.23408778291859447, + "grad_norm": 556.0, + "learning_rate": 8.947011434461592e-05, + "loss": 18.0004, + "step": 5616 + }, + { + "epoch": 0.2341294652161227, + "grad_norm": 211.0, + "learning_rate": 8.946597031028767e-05, + "loss": 8.5627, + "step": 5617 + }, + { + "epoch": 0.23417114751365095, + "grad_norm": 344.0, + "learning_rate": 8.94618255566886e-05, + "loss": 14.0626, + "step": 5618 + }, + { + "epoch": 0.2342128298111792, + "grad_norm": 384.0, + "learning_rate": 8.945768008389428e-05, + "loss": 13.8144, + "step": 5619 + }, + { + "epoch": 0.23425451210870743, + "grad_norm": 132.0, + "learning_rate": 8.945353389198023e-05, + "loss": 9.2504, + "step": 5620 + }, + { + "epoch": 0.23429619440623567, + "grad_norm": 205.0, + "learning_rate": 8.9449386981022e-05, + "loss": 11.0626, + "step": 5621 + }, + { + "epoch": 0.2343378767037639, + "grad_norm": 166.0, + "learning_rate": 8.944523935109523e-05, + "loss": 10.7502, + "step": 5622 + }, + { + "epoch": 0.23437955900129215, + "grad_norm": 153.0, + "learning_rate": 8.944109100227544e-05, + "loss": 10.2502, + "step": 5623 + }, + { + "epoch": 0.2344212412988204, + "grad_norm": 668.0, + "learning_rate": 8.943694193463827e-05, + "loss": 20.7503, + "step": 5624 + }, + { + "epoch": 0.23446292359634863, + "grad_norm": 179.0, + "learning_rate": 8.943279214825935e-05, + "loss": 9.688, + "step": 5625 + }, + { + "epoch": 0.23450460589387687, + "grad_norm": 272.0, + "learning_rate": 8.942864164321427e-05, + "loss": 8.7504, + "step": 5626 + }, + { + "epoch": 0.2345462881914051, + "grad_norm": 131.0, + "learning_rate": 8.94244904195787e-05, + "loss": 9.6279, + "step": 5627 + }, + { + "epoch": 0.23458797048893335, + "grad_norm": 258.0, + "learning_rate": 8.94203384774283e-05, + "loss": 13.0004, + "step": 5628 + }, + { + "epoch": 0.23462965278646158, + "grad_norm": 692.0, + "learning_rate": 8.941618581683872e-05, + "loss": 21.3754, + "step": 5629 + }, + { + "epoch": 0.23467133508398982, + "grad_norm": 416.0, + "learning_rate": 8.941203243788567e-05, + "loss": 15.1254, + "step": 5630 + }, + { + "epoch": 0.23471301738151806, + "grad_norm": 376.0, + "learning_rate": 8.940787834064484e-05, + "loss": 14.5006, + "step": 5631 + }, + { + "epoch": 0.2347546996790463, + "grad_norm": 520.0, + "learning_rate": 8.94037235251919e-05, + "loss": 19.3755, + "step": 5632 + }, + { + "epoch": 0.23479638197657454, + "grad_norm": 243.0, + "learning_rate": 8.939956799160262e-05, + "loss": 11.1265, + "step": 5633 + }, + { + "epoch": 0.23483806427410278, + "grad_norm": 560.0, + "learning_rate": 8.939541173995271e-05, + "loss": 17.0002, + "step": 5634 + }, + { + "epoch": 0.23487974657163102, + "grad_norm": 452.0, + "learning_rate": 8.939125477031792e-05, + "loss": 17.3754, + "step": 5635 + }, + { + "epoch": 0.23492142886915926, + "grad_norm": 101.0, + "learning_rate": 8.938709708277402e-05, + "loss": 8.2504, + "step": 5636 + }, + { + "epoch": 0.2349631111666875, + "grad_norm": 147.0, + "learning_rate": 8.938293867739678e-05, + "loss": 10.1913, + "step": 5637 + }, + { + "epoch": 0.23500479346421574, + "grad_norm": 358.0, + "learning_rate": 8.937877955426199e-05, + "loss": 15.4377, + "step": 5638 + }, + { + "epoch": 0.23504647576174398, + "grad_norm": 112.0, + "learning_rate": 8.937461971344542e-05, + "loss": 6.4066, + "step": 5639 + }, + { + "epoch": 0.23508815805927222, + "grad_norm": 604.0, + "learning_rate": 8.937045915502294e-05, + "loss": 17.7503, + "step": 5640 + }, + { + "epoch": 0.23512984035680046, + "grad_norm": 224.0, + "learning_rate": 8.936629787907034e-05, + "loss": 10.8753, + "step": 5641 + }, + { + "epoch": 0.2351715226543287, + "grad_norm": 328.0, + "learning_rate": 8.936213588566347e-05, + "loss": 14.1259, + "step": 5642 + }, + { + "epoch": 0.23521320495185694, + "grad_norm": 123.5, + "learning_rate": 8.935797317487816e-05, + "loss": 10.3754, + "step": 5643 + }, + { + "epoch": 0.23525488724938518, + "grad_norm": 298.0, + "learning_rate": 8.93538097467903e-05, + "loss": 12.8753, + "step": 5644 + }, + { + "epoch": 0.23529656954691341, + "grad_norm": 396.0, + "learning_rate": 8.934964560147579e-05, + "loss": 14.688, + "step": 5645 + }, + { + "epoch": 0.23533825184444165, + "grad_norm": 856.0, + "learning_rate": 8.934548073901048e-05, + "loss": 23.1258, + "step": 5646 + }, + { + "epoch": 0.2353799341419699, + "grad_norm": 532.0, + "learning_rate": 8.934131515947028e-05, + "loss": 17.753, + "step": 5647 + }, + { + "epoch": 0.23542161643949813, + "grad_norm": 584.0, + "learning_rate": 8.933714886293114e-05, + "loss": 17.6255, + "step": 5648 + }, + { + "epoch": 0.23546329873702637, + "grad_norm": 156.0, + "learning_rate": 8.933298184946895e-05, + "loss": 8.9387, + "step": 5649 + }, + { + "epoch": 0.2355049810345546, + "grad_norm": 700.0, + "learning_rate": 8.932881411915968e-05, + "loss": 17.3803, + "step": 5650 + }, + { + "epoch": 0.23554666333208285, + "grad_norm": 744.0, + "learning_rate": 8.932464567207928e-05, + "loss": 21.3788, + "step": 5651 + }, + { + "epoch": 0.23558834562961112, + "grad_norm": 59.0, + "learning_rate": 8.932047650830373e-05, + "loss": 8.188, + "step": 5652 + }, + { + "epoch": 0.23563002792713936, + "grad_norm": 306.0, + "learning_rate": 8.9316306627909e-05, + "loss": 12.9393, + "step": 5653 + }, + { + "epoch": 0.2356717102246676, + "grad_norm": 520.0, + "learning_rate": 8.931213603097109e-05, + "loss": 18.2503, + "step": 5654 + }, + { + "epoch": 0.23571339252219584, + "grad_norm": 205.0, + "learning_rate": 8.930796471756602e-05, + "loss": 12.0009, + "step": 5655 + }, + { + "epoch": 0.23575507481972408, + "grad_norm": 370.0, + "learning_rate": 8.930379268776979e-05, + "loss": 14.8135, + "step": 5656 + }, + { + "epoch": 0.23579675711725231, + "grad_norm": 712.0, + "learning_rate": 8.929961994165845e-05, + "loss": 22.5003, + "step": 5657 + }, + { + "epoch": 0.23583843941478055, + "grad_norm": 266.0, + "learning_rate": 8.929544647930805e-05, + "loss": 12.5629, + "step": 5658 + }, + { + "epoch": 0.2358801217123088, + "grad_norm": 460.0, + "learning_rate": 8.929127230079466e-05, + "loss": 16.7506, + "step": 5659 + }, + { + "epoch": 0.23592180400983703, + "grad_norm": 304.0, + "learning_rate": 8.928709740619434e-05, + "loss": 12.2502, + "step": 5660 + }, + { + "epoch": 0.23596348630736527, + "grad_norm": 278.0, + "learning_rate": 8.928292179558317e-05, + "loss": 11.4378, + "step": 5661 + }, + { + "epoch": 0.2360051686048935, + "grad_norm": 290.0, + "learning_rate": 8.927874546903727e-05, + "loss": 13.8756, + "step": 5662 + }, + { + "epoch": 0.23604685090242175, + "grad_norm": 238.0, + "learning_rate": 8.927456842663275e-05, + "loss": 12.6879, + "step": 5663 + }, + { + "epoch": 0.23608853319995, + "grad_norm": 1288.0, + "learning_rate": 8.927039066844573e-05, + "loss": 29.1293, + "step": 5664 + }, + { + "epoch": 0.23613021549747823, + "grad_norm": 200.0, + "learning_rate": 8.926621219455237e-05, + "loss": 11.6253, + "step": 5665 + }, + { + "epoch": 0.23617189779500647, + "grad_norm": 428.0, + "learning_rate": 8.926203300502879e-05, + "loss": 15.8127, + "step": 5666 + }, + { + "epoch": 0.2362135800925347, + "grad_norm": 348.0, + "learning_rate": 8.925785309995118e-05, + "loss": 13.8126, + "step": 5667 + }, + { + "epoch": 0.23625526239006295, + "grad_norm": 524.0, + "learning_rate": 8.925367247939572e-05, + "loss": 15.2503, + "step": 5668 + }, + { + "epoch": 0.2362969446875912, + "grad_norm": 500.0, + "learning_rate": 8.924949114343857e-05, + "loss": 17.2506, + "step": 5669 + }, + { + "epoch": 0.23633862698511943, + "grad_norm": 552.0, + "learning_rate": 8.924530909215597e-05, + "loss": 15.8171, + "step": 5670 + }, + { + "epoch": 0.23638030928264767, + "grad_norm": 141.0, + "learning_rate": 8.924112632562414e-05, + "loss": 10.8754, + "step": 5671 + }, + { + "epoch": 0.2364219915801759, + "grad_norm": 1360.0, + "learning_rate": 8.923694284391928e-05, + "loss": 33.2516, + "step": 5672 + }, + { + "epoch": 0.23646367387770414, + "grad_norm": 294.0, + "learning_rate": 8.923275864711766e-05, + "loss": 13.876, + "step": 5673 + }, + { + "epoch": 0.23650535617523238, + "grad_norm": 412.0, + "learning_rate": 8.922857373529554e-05, + "loss": 16.1255, + "step": 5674 + }, + { + "epoch": 0.23654703847276062, + "grad_norm": 59.5, + "learning_rate": 8.922438810852917e-05, + "loss": 7.5635, + "step": 5675 + }, + { + "epoch": 0.23658872077028886, + "grad_norm": 644.0, + "learning_rate": 8.922020176689485e-05, + "loss": 18.631, + "step": 5676 + }, + { + "epoch": 0.2366304030678171, + "grad_norm": 700.0, + "learning_rate": 8.921601471046888e-05, + "loss": 21.2504, + "step": 5677 + }, + { + "epoch": 0.23667208536534534, + "grad_norm": 360.0, + "learning_rate": 8.921182693932754e-05, + "loss": 14.188, + "step": 5678 + }, + { + "epoch": 0.23671376766287358, + "grad_norm": 212.0, + "learning_rate": 8.920763845354721e-05, + "loss": 8.5655, + "step": 5679 + }, + { + "epoch": 0.23675544996040182, + "grad_norm": 544.0, + "learning_rate": 8.920344925320416e-05, + "loss": 17.8759, + "step": 5680 + }, + { + "epoch": 0.23679713225793006, + "grad_norm": 398.0, + "learning_rate": 8.919925933837476e-05, + "loss": 12.6878, + "step": 5681 + }, + { + "epoch": 0.2368388145554583, + "grad_norm": 330.0, + "learning_rate": 8.919506870913539e-05, + "loss": 14.3751, + "step": 5682 + }, + { + "epoch": 0.23688049685298654, + "grad_norm": 218.0, + "learning_rate": 8.919087736556242e-05, + "loss": 11.1879, + "step": 5683 + }, + { + "epoch": 0.23692217915051478, + "grad_norm": 486.0, + "learning_rate": 8.918668530773222e-05, + "loss": 17.6261, + "step": 5684 + }, + { + "epoch": 0.23696386144804302, + "grad_norm": 306.0, + "learning_rate": 8.918249253572121e-05, + "loss": 12.8126, + "step": 5685 + }, + { + "epoch": 0.23700554374557126, + "grad_norm": 225.0, + "learning_rate": 8.917829904960579e-05, + "loss": 11.0031, + "step": 5686 + }, + { + "epoch": 0.2370472260430995, + "grad_norm": 552.0, + "learning_rate": 8.917410484946237e-05, + "loss": 16.876, + "step": 5687 + }, + { + "epoch": 0.23708890834062774, + "grad_norm": 520.0, + "learning_rate": 8.916990993536745e-05, + "loss": 18.8752, + "step": 5688 + }, + { + "epoch": 0.23713059063815597, + "grad_norm": 358.0, + "learning_rate": 8.916571430739743e-05, + "loss": 15.1262, + "step": 5689 + }, + { + "epoch": 0.23717227293568421, + "grad_norm": 466.0, + "learning_rate": 8.91615179656288e-05, + "loss": 16.0004, + "step": 5690 + }, + { + "epoch": 0.23721395523321245, + "grad_norm": 1088.0, + "learning_rate": 8.9157320910138e-05, + "loss": 34.0003, + "step": 5691 + }, + { + "epoch": 0.2372556375307407, + "grad_norm": 270.0, + "learning_rate": 8.915312314100156e-05, + "loss": 12.7543, + "step": 5692 + }, + { + "epoch": 0.23729731982826893, + "grad_norm": 400.0, + "learning_rate": 8.9148924658296e-05, + "loss": 16.7502, + "step": 5693 + }, + { + "epoch": 0.23733900212579717, + "grad_norm": 416.0, + "learning_rate": 8.914472546209778e-05, + "loss": 16.7504, + "step": 5694 + }, + { + "epoch": 0.2373806844233254, + "grad_norm": 400.0, + "learning_rate": 8.91405255524835e-05, + "loss": 15.0002, + "step": 5695 + }, + { + "epoch": 0.23742236672085365, + "grad_norm": 366.0, + "learning_rate": 8.913632492952963e-05, + "loss": 15.0627, + "step": 5696 + }, + { + "epoch": 0.2374640490183819, + "grad_norm": 175.0, + "learning_rate": 8.913212359331278e-05, + "loss": 11.4379, + "step": 5697 + }, + { + "epoch": 0.23750573131591013, + "grad_norm": 139.0, + "learning_rate": 8.91279215439095e-05, + "loss": 10.813, + "step": 5698 + }, + { + "epoch": 0.23754741361343837, + "grad_norm": 466.0, + "learning_rate": 8.912371878139638e-05, + "loss": 16.3752, + "step": 5699 + }, + { + "epoch": 0.2375890959109666, + "grad_norm": 282.0, + "learning_rate": 8.911951530585e-05, + "loss": 12.6251, + "step": 5700 + }, + { + "epoch": 0.23763077820849485, + "grad_norm": 227.0, + "learning_rate": 8.911531111734702e-05, + "loss": 7.2817, + "step": 5701 + }, + { + "epoch": 0.2376724605060231, + "grad_norm": 744.0, + "learning_rate": 8.9111106215964e-05, + "loss": 22.6264, + "step": 5702 + }, + { + "epoch": 0.23771414280355133, + "grad_norm": 496.0, + "learning_rate": 8.910690060177757e-05, + "loss": 18.2502, + "step": 5703 + }, + { + "epoch": 0.23775582510107957, + "grad_norm": 1208.0, + "learning_rate": 8.910269427486443e-05, + "loss": 25.5045, + "step": 5704 + }, + { + "epoch": 0.2377975073986078, + "grad_norm": 242.0, + "learning_rate": 8.909848723530122e-05, + "loss": 13.3754, + "step": 5705 + }, + { + "epoch": 0.23783918969613604, + "grad_norm": 330.0, + "learning_rate": 8.90942794831646e-05, + "loss": 13.8127, + "step": 5706 + }, + { + "epoch": 0.23788087199366428, + "grad_norm": 141.0, + "learning_rate": 8.909007101853127e-05, + "loss": 9.6878, + "step": 5707 + }, + { + "epoch": 0.23792255429119252, + "grad_norm": 266.0, + "learning_rate": 8.908586184147791e-05, + "loss": 12.8133, + "step": 5708 + }, + { + "epoch": 0.23796423658872076, + "grad_norm": 464.0, + "learning_rate": 8.908165195208127e-05, + "loss": 17.3756, + "step": 5709 + }, + { + "epoch": 0.238005918886249, + "grad_norm": 255.0, + "learning_rate": 8.907744135041805e-05, + "loss": 12.7506, + "step": 5710 + }, + { + "epoch": 0.23804760118377724, + "grad_norm": 356.0, + "learning_rate": 8.907323003656498e-05, + "loss": 12.1275, + "step": 5711 + }, + { + "epoch": 0.23808928348130548, + "grad_norm": 178.0, + "learning_rate": 8.906901801059884e-05, + "loss": 10.3133, + "step": 5712 + }, + { + "epoch": 0.23813096577883372, + "grad_norm": 278.0, + "learning_rate": 8.906480527259638e-05, + "loss": 13.0639, + "step": 5713 + }, + { + "epoch": 0.23817264807636196, + "grad_norm": 736.0, + "learning_rate": 8.906059182263435e-05, + "loss": 17.6298, + "step": 5714 + }, + { + "epoch": 0.2382143303738902, + "grad_norm": 408.0, + "learning_rate": 8.905637766078959e-05, + "loss": 15.6252, + "step": 5715 + }, + { + "epoch": 0.23825601267141844, + "grad_norm": 300.0, + "learning_rate": 8.905216278713887e-05, + "loss": 12.8751, + "step": 5716 + }, + { + "epoch": 0.23829769496894668, + "grad_norm": 284.0, + "learning_rate": 8.904794720175902e-05, + "loss": 14.2503, + "step": 5717 + }, + { + "epoch": 0.23833937726647492, + "grad_norm": 540.0, + "learning_rate": 8.904373090472686e-05, + "loss": 17.3757, + "step": 5718 + }, + { + "epoch": 0.23838105956400316, + "grad_norm": 444.0, + "learning_rate": 8.903951389611925e-05, + "loss": 14.0043, + "step": 5719 + }, + { + "epoch": 0.2384227418615314, + "grad_norm": 118.0, + "learning_rate": 8.903529617601303e-05, + "loss": 7.2503, + "step": 5720 + }, + { + "epoch": 0.23846442415905963, + "grad_norm": 516.0, + "learning_rate": 8.903107774448507e-05, + "loss": 16.501, + "step": 5721 + }, + { + "epoch": 0.23850610645658787, + "grad_norm": 220.0, + "learning_rate": 8.902685860161224e-05, + "loss": 10.8127, + "step": 5722 + }, + { + "epoch": 0.2385477887541161, + "grad_norm": 632.0, + "learning_rate": 8.902263874747146e-05, + "loss": 20.6257, + "step": 5723 + }, + { + "epoch": 0.23858947105164435, + "grad_norm": 462.0, + "learning_rate": 8.901841818213963e-05, + "loss": 16.5009, + "step": 5724 + }, + { + "epoch": 0.23863115334917262, + "grad_norm": 186.0, + "learning_rate": 8.901419690569365e-05, + "loss": 11.3129, + "step": 5725 + }, + { + "epoch": 0.23867283564670086, + "grad_norm": 318.0, + "learning_rate": 8.900997491821048e-05, + "loss": 14.1878, + "step": 5726 + }, + { + "epoch": 0.2387145179442291, + "grad_norm": 326.0, + "learning_rate": 8.900575221976706e-05, + "loss": 14.8757, + "step": 5727 + }, + { + "epoch": 0.23875620024175734, + "grad_norm": 258.0, + "learning_rate": 8.900152881044033e-05, + "loss": 11.3753, + "step": 5728 + }, + { + "epoch": 0.23879788253928558, + "grad_norm": 520.0, + "learning_rate": 8.899730469030729e-05, + "loss": 17.6252, + "step": 5729 + }, + { + "epoch": 0.23883956483681382, + "grad_norm": 276.0, + "learning_rate": 8.89930798594449e-05, + "loss": 13.2504, + "step": 5730 + }, + { + "epoch": 0.23888124713434206, + "grad_norm": 175.0, + "learning_rate": 8.898885431793016e-05, + "loss": 11.9382, + "step": 5731 + }, + { + "epoch": 0.2389229294318703, + "grad_norm": 97.0, + "learning_rate": 8.898462806584009e-05, + "loss": 8.8757, + "step": 5732 + }, + { + "epoch": 0.23896461172939854, + "grad_norm": 992.0, + "learning_rate": 8.898040110325172e-05, + "loss": 26.8769, + "step": 5733 + }, + { + "epoch": 0.23900629402692677, + "grad_norm": 164.0, + "learning_rate": 8.897617343024209e-05, + "loss": 11.3755, + "step": 5734 + }, + { + "epoch": 0.239047976324455, + "grad_norm": 1184.0, + "learning_rate": 8.897194504688821e-05, + "loss": 26.8798, + "step": 5735 + }, + { + "epoch": 0.23908965862198325, + "grad_norm": 318.0, + "learning_rate": 8.89677159532672e-05, + "loss": 13.0627, + "step": 5736 + }, + { + "epoch": 0.2391313409195115, + "grad_norm": 160.0, + "learning_rate": 8.896348614945611e-05, + "loss": 8.313, + "step": 5737 + }, + { + "epoch": 0.23917302321703973, + "grad_norm": 350.0, + "learning_rate": 8.8959255635532e-05, + "loss": 14.0627, + "step": 5738 + }, + { + "epoch": 0.23921470551456797, + "grad_norm": 1248.0, + "learning_rate": 8.895502441157203e-05, + "loss": 24.7544, + "step": 5739 + }, + { + "epoch": 0.2392563878120962, + "grad_norm": 250.0, + "learning_rate": 8.895079247765325e-05, + "loss": 9.8764, + "step": 5740 + }, + { + "epoch": 0.23929807010962445, + "grad_norm": 416.0, + "learning_rate": 8.894655983385283e-05, + "loss": 15.1256, + "step": 5741 + }, + { + "epoch": 0.2393397524071527, + "grad_norm": 138.0, + "learning_rate": 8.894232648024791e-05, + "loss": 10.3142, + "step": 5742 + }, + { + "epoch": 0.23938143470468093, + "grad_norm": 384.0, + "learning_rate": 8.893809241691561e-05, + "loss": 12.7518, + "step": 5743 + }, + { + "epoch": 0.23942311700220917, + "grad_norm": 170.0, + "learning_rate": 8.893385764393314e-05, + "loss": 10.2501, + "step": 5744 + }, + { + "epoch": 0.2394647992997374, + "grad_norm": 1072.0, + "learning_rate": 8.892962216137766e-05, + "loss": 25.3804, + "step": 5745 + }, + { + "epoch": 0.23950648159726565, + "grad_norm": 478.0, + "learning_rate": 8.892538596932634e-05, + "loss": 14.5006, + "step": 5746 + }, + { + "epoch": 0.2395481638947939, + "grad_norm": 174.0, + "learning_rate": 8.892114906785642e-05, + "loss": 11.5003, + "step": 5747 + }, + { + "epoch": 0.23958984619232213, + "grad_norm": 225.0, + "learning_rate": 8.89169114570451e-05, + "loss": 12.8127, + "step": 5748 + }, + { + "epoch": 0.23963152848985037, + "grad_norm": 468.0, + "learning_rate": 8.891267313696963e-05, + "loss": 17.3775, + "step": 5749 + }, + { + "epoch": 0.2396732107873786, + "grad_norm": 174.0, + "learning_rate": 8.890843410770722e-05, + "loss": 9.0009, + "step": 5750 + }, + { + "epoch": 0.23971489308490684, + "grad_norm": 660.0, + "learning_rate": 8.890419436933514e-05, + "loss": 21.5002, + "step": 5751 + }, + { + "epoch": 0.23975657538243508, + "grad_norm": 288.0, + "learning_rate": 8.889995392193067e-05, + "loss": 13.5628, + "step": 5752 + }, + { + "epoch": 0.23979825767996332, + "grad_norm": 83.0, + "learning_rate": 8.889571276557109e-05, + "loss": 9.1882, + "step": 5753 + }, + { + "epoch": 0.23983993997749156, + "grad_norm": 244.0, + "learning_rate": 8.889147090033369e-05, + "loss": 11.6877, + "step": 5754 + }, + { + "epoch": 0.2398816222750198, + "grad_norm": 466.0, + "learning_rate": 8.888722832629577e-05, + "loss": 15.5008, + "step": 5755 + }, + { + "epoch": 0.23992330457254804, + "grad_norm": 580.0, + "learning_rate": 8.888298504353468e-05, + "loss": 18.0005, + "step": 5756 + }, + { + "epoch": 0.23996498687007628, + "grad_norm": 448.0, + "learning_rate": 8.887874105212773e-05, + "loss": 16.7507, + "step": 5757 + }, + { + "epoch": 0.24000666916760452, + "grad_norm": 620.0, + "learning_rate": 8.887449635215225e-05, + "loss": 19.2505, + "step": 5758 + }, + { + "epoch": 0.24004835146513276, + "grad_norm": 484.0, + "learning_rate": 8.887025094368567e-05, + "loss": 15.938, + "step": 5759 + }, + { + "epoch": 0.240090033762661, + "grad_norm": 256.0, + "learning_rate": 8.886600482680527e-05, + "loss": 13.6257, + "step": 5760 + }, + { + "epoch": 0.24013171606018924, + "grad_norm": 264.0, + "learning_rate": 8.886175800158851e-05, + "loss": 11.5643, + "step": 5761 + }, + { + "epoch": 0.24017339835771748, + "grad_norm": 474.0, + "learning_rate": 8.885751046811275e-05, + "loss": 17.001, + "step": 5762 + }, + { + "epoch": 0.24021508065524572, + "grad_norm": 408.0, + "learning_rate": 8.88532622264554e-05, + "loss": 15.8127, + "step": 5763 + }, + { + "epoch": 0.24025676295277396, + "grad_norm": 262.0, + "learning_rate": 8.884901327669393e-05, + "loss": 12.6878, + "step": 5764 + }, + { + "epoch": 0.2402984452503022, + "grad_norm": 380.0, + "learning_rate": 8.884476361890572e-05, + "loss": 14.7505, + "step": 5765 + }, + { + "epoch": 0.24034012754783043, + "grad_norm": 254.0, + "learning_rate": 8.884051325316825e-05, + "loss": 13.0005, + "step": 5766 + }, + { + "epoch": 0.24038180984535867, + "grad_norm": 268.0, + "learning_rate": 8.883626217955898e-05, + "loss": 11.3128, + "step": 5767 + }, + { + "epoch": 0.2404234921428869, + "grad_norm": 272.0, + "learning_rate": 8.883201039815538e-05, + "loss": 14.2509, + "step": 5768 + }, + { + "epoch": 0.24046517444041515, + "grad_norm": 410.0, + "learning_rate": 8.882775790903494e-05, + "loss": 13.7543, + "step": 5769 + }, + { + "epoch": 0.2405068567379434, + "grad_norm": 580.0, + "learning_rate": 8.882350471227516e-05, + "loss": 17.3758, + "step": 5770 + }, + { + "epoch": 0.24054853903547163, + "grad_norm": 268.0, + "learning_rate": 8.881925080795357e-05, + "loss": 12.7502, + "step": 5771 + }, + { + "epoch": 0.24059022133299987, + "grad_norm": 268.0, + "learning_rate": 8.881499619614769e-05, + "loss": 12.1879, + "step": 5772 + }, + { + "epoch": 0.2406319036305281, + "grad_norm": 328.0, + "learning_rate": 8.881074087693506e-05, + "loss": 13.6259, + "step": 5773 + }, + { + "epoch": 0.24067358592805635, + "grad_norm": 244.0, + "learning_rate": 8.880648485039322e-05, + "loss": 12.564, + "step": 5774 + }, + { + "epoch": 0.2407152682255846, + "grad_norm": 928.0, + "learning_rate": 8.880222811659977e-05, + "loss": 25.6252, + "step": 5775 + }, + { + "epoch": 0.24075695052311283, + "grad_norm": 400.0, + "learning_rate": 8.879797067563225e-05, + "loss": 15.6884, + "step": 5776 + }, + { + "epoch": 0.24079863282064107, + "grad_norm": 382.0, + "learning_rate": 8.879371252756827e-05, + "loss": 14.8127, + "step": 5777 + }, + { + "epoch": 0.2408403151181693, + "grad_norm": 612.0, + "learning_rate": 8.878945367248546e-05, + "loss": 19.7532, + "step": 5778 + }, + { + "epoch": 0.24088199741569755, + "grad_norm": 43.25, + "learning_rate": 8.878519411046137e-05, + "loss": 6.7216, + "step": 5779 + }, + { + "epoch": 0.24092367971322579, + "grad_norm": 450.0, + "learning_rate": 8.87809338415737e-05, + "loss": 17.1254, + "step": 5780 + }, + { + "epoch": 0.24096536201075403, + "grad_norm": 152.0, + "learning_rate": 8.877667286590007e-05, + "loss": 10.3765, + "step": 5781 + }, + { + "epoch": 0.24100704430828226, + "grad_norm": 172.0, + "learning_rate": 8.877241118351814e-05, + "loss": 9.8127, + "step": 5782 + }, + { + "epoch": 0.2410487266058105, + "grad_norm": 288.0, + "learning_rate": 8.876814879450557e-05, + "loss": 13.2521, + "step": 5783 + }, + { + "epoch": 0.24109040890333874, + "grad_norm": 458.0, + "learning_rate": 8.876388569894004e-05, + "loss": 16.2504, + "step": 5784 + }, + { + "epoch": 0.24113209120086698, + "grad_norm": 262.0, + "learning_rate": 8.875962189689926e-05, + "loss": 13.3151, + "step": 5785 + }, + { + "epoch": 0.24117377349839522, + "grad_norm": 208.0, + "learning_rate": 8.875535738846092e-05, + "loss": 11.7512, + "step": 5786 + }, + { + "epoch": 0.24121545579592346, + "grad_norm": 124.5, + "learning_rate": 8.875109217370276e-05, + "loss": 9.2504, + "step": 5787 + }, + { + "epoch": 0.2412571380934517, + "grad_norm": 1112.0, + "learning_rate": 8.87468262527025e-05, + "loss": 27.5006, + "step": 5788 + }, + { + "epoch": 0.24129882039097994, + "grad_norm": 122.5, + "learning_rate": 8.874255962553788e-05, + "loss": 9.7506, + "step": 5789 + }, + { + "epoch": 0.24134050268850818, + "grad_norm": 268.0, + "learning_rate": 8.873829229228669e-05, + "loss": 12.501, + "step": 5790 + }, + { + "epoch": 0.24138218498603642, + "grad_norm": 442.0, + "learning_rate": 8.873402425302668e-05, + "loss": 17.1258, + "step": 5791 + }, + { + "epoch": 0.24142386728356466, + "grad_norm": 240.0, + "learning_rate": 8.872975550783564e-05, + "loss": 12.1255, + "step": 5792 + }, + { + "epoch": 0.2414655495810929, + "grad_norm": 116.0, + "learning_rate": 8.872548605679136e-05, + "loss": 8.6259, + "step": 5793 + }, + { + "epoch": 0.24150723187862114, + "grad_norm": 672.0, + "learning_rate": 8.872121589997167e-05, + "loss": 18.0061, + "step": 5794 + }, + { + "epoch": 0.24154891417614938, + "grad_norm": 980.0, + "learning_rate": 8.871694503745437e-05, + "loss": 28.5011, + "step": 5795 + }, + { + "epoch": 0.24159059647367762, + "grad_norm": 211.0, + "learning_rate": 8.871267346931732e-05, + "loss": 11.3758, + "step": 5796 + }, + { + "epoch": 0.24163227877120586, + "grad_norm": 340.0, + "learning_rate": 8.870840119563836e-05, + "loss": 14.8758, + "step": 5797 + }, + { + "epoch": 0.24167396106873412, + "grad_norm": 588.0, + "learning_rate": 8.870412821649535e-05, + "loss": 17.0003, + "step": 5798 + }, + { + "epoch": 0.24171564336626236, + "grad_norm": 1336.0, + "learning_rate": 8.869985453196617e-05, + "loss": 27.2572, + "step": 5799 + }, + { + "epoch": 0.2417573256637906, + "grad_norm": 266.0, + "learning_rate": 8.86955801421287e-05, + "loss": 12.3757, + "step": 5800 + }, + { + "epoch": 0.24179900796131884, + "grad_norm": 412.0, + "learning_rate": 8.869130504706085e-05, + "loss": 15.6253, + "step": 5801 + }, + { + "epoch": 0.24184069025884708, + "grad_norm": 171.0, + "learning_rate": 8.868702924684052e-05, + "loss": 9.5003, + "step": 5802 + }, + { + "epoch": 0.24188237255637532, + "grad_norm": 228.0, + "learning_rate": 8.868275274154567e-05, + "loss": 11.627, + "step": 5803 + }, + { + "epoch": 0.24192405485390356, + "grad_norm": 229.0, + "learning_rate": 8.867847553125419e-05, + "loss": 11.813, + "step": 5804 + }, + { + "epoch": 0.2419657371514318, + "grad_norm": 940.0, + "learning_rate": 8.867419761604408e-05, + "loss": 21.7575, + "step": 5805 + }, + { + "epoch": 0.24200741944896004, + "grad_norm": 272.0, + "learning_rate": 8.866991899599328e-05, + "loss": 12.2504, + "step": 5806 + }, + { + "epoch": 0.24204910174648828, + "grad_norm": 260.0, + "learning_rate": 8.866563967117977e-05, + "loss": 11.1881, + "step": 5807 + }, + { + "epoch": 0.24209078404401652, + "grad_norm": 548.0, + "learning_rate": 8.866135964168154e-05, + "loss": 17.2515, + "step": 5808 + }, + { + "epoch": 0.24213246634154476, + "grad_norm": 374.0, + "learning_rate": 8.86570789075766e-05, + "loss": 14.188, + "step": 5809 + }, + { + "epoch": 0.242174148639073, + "grad_norm": 340.0, + "learning_rate": 8.865279746894298e-05, + "loss": 14.8775, + "step": 5810 + }, + { + "epoch": 0.24221583093660123, + "grad_norm": 576.0, + "learning_rate": 8.86485153258587e-05, + "loss": 17.5042, + "step": 5811 + }, + { + "epoch": 0.24225751323412947, + "grad_norm": 80.5, + "learning_rate": 8.864423247840176e-05, + "loss": 7.5317, + "step": 5812 + }, + { + "epoch": 0.2422991955316577, + "grad_norm": 428.0, + "learning_rate": 8.863994892665029e-05, + "loss": 15.1267, + "step": 5813 + }, + { + "epoch": 0.24234087782918595, + "grad_norm": 338.0, + "learning_rate": 8.86356646706823e-05, + "loss": 12.9382, + "step": 5814 + }, + { + "epoch": 0.2423825601267142, + "grad_norm": 464.0, + "learning_rate": 8.863137971057589e-05, + "loss": 17.5004, + "step": 5815 + }, + { + "epoch": 0.24242424242424243, + "grad_norm": 91.5, + "learning_rate": 8.862709404640916e-05, + "loss": 7.9378, + "step": 5816 + }, + { + "epoch": 0.24246592472177067, + "grad_norm": 308.0, + "learning_rate": 8.862280767826023e-05, + "loss": 10.6287, + "step": 5817 + }, + { + "epoch": 0.2425076070192989, + "grad_norm": 206.0, + "learning_rate": 8.861852060620719e-05, + "loss": 12.0007, + "step": 5818 + }, + { + "epoch": 0.24254928931682715, + "grad_norm": 388.0, + "learning_rate": 8.861423283032817e-05, + "loss": 14.5005, + "step": 5819 + }, + { + "epoch": 0.2425909716143554, + "grad_norm": 358.0, + "learning_rate": 8.860994435070133e-05, + "loss": 15.8131, + "step": 5820 + }, + { + "epoch": 0.24263265391188363, + "grad_norm": 300.0, + "learning_rate": 8.860565516740485e-05, + "loss": 12.1251, + "step": 5821 + }, + { + "epoch": 0.24267433620941187, + "grad_norm": 290.0, + "learning_rate": 8.860136528051685e-05, + "loss": 8.6884, + "step": 5822 + }, + { + "epoch": 0.2427160185069401, + "grad_norm": 684.0, + "learning_rate": 8.859707469011556e-05, + "loss": 20.6253, + "step": 5823 + }, + { + "epoch": 0.24275770080446835, + "grad_norm": 228.0, + "learning_rate": 8.859278339627916e-05, + "loss": 9.9385, + "step": 5824 + }, + { + "epoch": 0.24279938310199659, + "grad_norm": 494.0, + "learning_rate": 8.858849139908585e-05, + "loss": 16.1262, + "step": 5825 + }, + { + "epoch": 0.24284106539952482, + "grad_norm": 272.0, + "learning_rate": 8.858419869861385e-05, + "loss": 13.0651, + "step": 5826 + }, + { + "epoch": 0.24288274769705306, + "grad_norm": 412.0, + "learning_rate": 8.85799052949414e-05, + "loss": 15.4382, + "step": 5827 + }, + { + "epoch": 0.2429244299945813, + "grad_norm": 125.5, + "learning_rate": 8.857561118814676e-05, + "loss": 9.3759, + "step": 5828 + }, + { + "epoch": 0.24296611229210954, + "grad_norm": 298.0, + "learning_rate": 8.857131637830818e-05, + "loss": 13.1257, + "step": 5829 + }, + { + "epoch": 0.24300779458963778, + "grad_norm": 294.0, + "learning_rate": 8.856702086550395e-05, + "loss": 11.8753, + "step": 5830 + }, + { + "epoch": 0.24304947688716602, + "grad_norm": 1048.0, + "learning_rate": 8.856272464981232e-05, + "loss": 26.8756, + "step": 5831 + }, + { + "epoch": 0.24309115918469426, + "grad_norm": 160.0, + "learning_rate": 8.855842773131162e-05, + "loss": 9.6283, + "step": 5832 + }, + { + "epoch": 0.2431328414822225, + "grad_norm": 704.0, + "learning_rate": 8.855413011008016e-05, + "loss": 20.2506, + "step": 5833 + }, + { + "epoch": 0.24317452377975074, + "grad_norm": 57.75, + "learning_rate": 8.854983178619624e-05, + "loss": 8.4385, + "step": 5834 + }, + { + "epoch": 0.24321620607727898, + "grad_norm": 394.0, + "learning_rate": 8.854553275973822e-05, + "loss": 14.6259, + "step": 5835 + }, + { + "epoch": 0.24325788837480722, + "grad_norm": 504.0, + "learning_rate": 8.854123303078445e-05, + "loss": 18.0026, + "step": 5836 + }, + { + "epoch": 0.24329957067233546, + "grad_norm": 356.0, + "learning_rate": 8.853693259941328e-05, + "loss": 14.0631, + "step": 5837 + }, + { + "epoch": 0.2433412529698637, + "grad_norm": 364.0, + "learning_rate": 8.85326314657031e-05, + "loss": 12.6877, + "step": 5838 + }, + { + "epoch": 0.24338293526739194, + "grad_norm": 426.0, + "learning_rate": 8.852832962973227e-05, + "loss": 15.5003, + "step": 5839 + }, + { + "epoch": 0.24342461756492018, + "grad_norm": 195.0, + "learning_rate": 8.852402709157923e-05, + "loss": 11.3753, + "step": 5840 + }, + { + "epoch": 0.24346629986244842, + "grad_norm": 470.0, + "learning_rate": 8.851972385132237e-05, + "loss": 16.3753, + "step": 5841 + }, + { + "epoch": 0.24350798215997665, + "grad_norm": 454.0, + "learning_rate": 8.851541990904013e-05, + "loss": 15.5637, + "step": 5842 + }, + { + "epoch": 0.2435496644575049, + "grad_norm": 302.0, + "learning_rate": 8.851111526481094e-05, + "loss": 13.1884, + "step": 5843 + }, + { + "epoch": 0.24359134675503313, + "grad_norm": 768.0, + "learning_rate": 8.850680991871326e-05, + "loss": 20.7507, + "step": 5844 + }, + { + "epoch": 0.24363302905256137, + "grad_norm": 306.0, + "learning_rate": 8.850250387082554e-05, + "loss": 12.6881, + "step": 5845 + }, + { + "epoch": 0.2436747113500896, + "grad_norm": 442.0, + "learning_rate": 8.849819712122626e-05, + "loss": 15.8127, + "step": 5846 + }, + { + "epoch": 0.24371639364761785, + "grad_norm": 512.0, + "learning_rate": 8.849388966999395e-05, + "loss": 15.7502, + "step": 5847 + }, + { + "epoch": 0.2437580759451461, + "grad_norm": 424.0, + "learning_rate": 8.848958151720705e-05, + "loss": 16.5006, + "step": 5848 + }, + { + "epoch": 0.24379975824267433, + "grad_norm": 632.0, + "learning_rate": 8.848527266294415e-05, + "loss": 19.1256, + "step": 5849 + }, + { + "epoch": 0.24384144054020257, + "grad_norm": 96.0, + "learning_rate": 8.848096310728371e-05, + "loss": 6.1261, + "step": 5850 + }, + { + "epoch": 0.2438831228377308, + "grad_norm": 524.0, + "learning_rate": 8.84766528503043e-05, + "loss": 14.9391, + "step": 5851 + }, + { + "epoch": 0.24392480513525905, + "grad_norm": 210.0, + "learning_rate": 8.847234189208448e-05, + "loss": 11.6253, + "step": 5852 + }, + { + "epoch": 0.2439664874327873, + "grad_norm": 175.0, + "learning_rate": 8.846803023270282e-05, + "loss": 11.0628, + "step": 5853 + }, + { + "epoch": 0.24400816973031553, + "grad_norm": 227.0, + "learning_rate": 8.84637178722379e-05, + "loss": 11.5631, + "step": 5854 + }, + { + "epoch": 0.24404985202784377, + "grad_norm": 158.0, + "learning_rate": 8.84594048107683e-05, + "loss": 10.8752, + "step": 5855 + }, + { + "epoch": 0.244091534325372, + "grad_norm": 139.0, + "learning_rate": 8.845509104837262e-05, + "loss": 8.6877, + "step": 5856 + }, + { + "epoch": 0.24413321662290025, + "grad_norm": 346.0, + "learning_rate": 8.845077658512953e-05, + "loss": 14.3129, + "step": 5857 + }, + { + "epoch": 0.24417489892042848, + "grad_norm": 552.0, + "learning_rate": 8.844646142111758e-05, + "loss": 18.0003, + "step": 5858 + }, + { + "epoch": 0.24421658121795672, + "grad_norm": 326.0, + "learning_rate": 8.844214555641548e-05, + "loss": 14.3752, + "step": 5859 + }, + { + "epoch": 0.24425826351548496, + "grad_norm": 268.0, + "learning_rate": 8.843782899110186e-05, + "loss": 12.3752, + "step": 5860 + }, + { + "epoch": 0.2442999458130132, + "grad_norm": 1280.0, + "learning_rate": 8.843351172525539e-05, + "loss": 25.8814, + "step": 5861 + }, + { + "epoch": 0.24434162811054144, + "grad_norm": 480.0, + "learning_rate": 8.842919375895477e-05, + "loss": 17.7503, + "step": 5862 + }, + { + "epoch": 0.24438331040806968, + "grad_norm": 154.0, + "learning_rate": 8.842487509227868e-05, + "loss": 9.0003, + "step": 5863 + }, + { + "epoch": 0.24442499270559792, + "grad_norm": 498.0, + "learning_rate": 8.84205557253058e-05, + "loss": 16.1253, + "step": 5864 + }, + { + "epoch": 0.24446667500312616, + "grad_norm": 245.0, + "learning_rate": 8.841623565811492e-05, + "loss": 13.6265, + "step": 5865 + }, + { + "epoch": 0.2445083573006544, + "grad_norm": 376.0, + "learning_rate": 8.84119148907847e-05, + "loss": 14.8769, + "step": 5866 + }, + { + "epoch": 0.24455003959818264, + "grad_norm": 151.0, + "learning_rate": 8.840759342339396e-05, + "loss": 10.1255, + "step": 5867 + }, + { + "epoch": 0.24459172189571088, + "grad_norm": 286.0, + "learning_rate": 8.84032712560214e-05, + "loss": 12.1878, + "step": 5868 + }, + { + "epoch": 0.24463340419323912, + "grad_norm": 336.0, + "learning_rate": 8.839894838874582e-05, + "loss": 12.8128, + "step": 5869 + }, + { + "epoch": 0.24467508649076736, + "grad_norm": 1064.0, + "learning_rate": 8.839462482164598e-05, + "loss": 28.7502, + "step": 5870 + }, + { + "epoch": 0.24471676878829562, + "grad_norm": 266.0, + "learning_rate": 8.83903005548007e-05, + "loss": 12.188, + "step": 5871 + }, + { + "epoch": 0.24475845108582386, + "grad_norm": 812.0, + "learning_rate": 8.83859755882888e-05, + "loss": 21.6254, + "step": 5872 + }, + { + "epoch": 0.2448001333833521, + "grad_norm": 334.0, + "learning_rate": 8.838164992218907e-05, + "loss": 13.0628, + "step": 5873 + }, + { + "epoch": 0.24484181568088034, + "grad_norm": 564.0, + "learning_rate": 8.837732355658037e-05, + "loss": 18.8754, + "step": 5874 + }, + { + "epoch": 0.24488349797840858, + "grad_norm": 270.0, + "learning_rate": 8.837299649154153e-05, + "loss": 12.8129, + "step": 5875 + }, + { + "epoch": 0.24492518027593682, + "grad_norm": 378.0, + "learning_rate": 8.836866872715143e-05, + "loss": 15.0003, + "step": 5876 + }, + { + "epoch": 0.24496686257346506, + "grad_norm": 552.0, + "learning_rate": 8.836434026348896e-05, + "loss": 17.0006, + "step": 5877 + }, + { + "epoch": 0.2450085448709933, + "grad_norm": 71.5, + "learning_rate": 8.836001110063296e-05, + "loss": 8.3133, + "step": 5878 + }, + { + "epoch": 0.24505022716852154, + "grad_norm": 193.0, + "learning_rate": 8.835568123866235e-05, + "loss": 11.0006, + "step": 5879 + }, + { + "epoch": 0.24509190946604978, + "grad_norm": 382.0, + "learning_rate": 8.835135067765606e-05, + "loss": 15.8756, + "step": 5880 + }, + { + "epoch": 0.24513359176357802, + "grad_norm": 1840.0, + "learning_rate": 8.834701941769298e-05, + "loss": 35.0091, + "step": 5881 + }, + { + "epoch": 0.24517527406110626, + "grad_norm": 130.0, + "learning_rate": 8.834268745885208e-05, + "loss": 8.6254, + "step": 5882 + }, + { + "epoch": 0.2452169563586345, + "grad_norm": 620.0, + "learning_rate": 8.833835480121229e-05, + "loss": 20.8756, + "step": 5883 + }, + { + "epoch": 0.24525863865616274, + "grad_norm": 596.0, + "learning_rate": 8.833402144485259e-05, + "loss": 18.1252, + "step": 5884 + }, + { + "epoch": 0.24530032095369098, + "grad_norm": 380.0, + "learning_rate": 8.832968738985194e-05, + "loss": 14.3753, + "step": 5885 + }, + { + "epoch": 0.24534200325121922, + "grad_norm": 66.5, + "learning_rate": 8.832535263628933e-05, + "loss": 7.6565, + "step": 5886 + }, + { + "epoch": 0.24538368554874745, + "grad_norm": 300.0, + "learning_rate": 8.832101718424377e-05, + "loss": 12.6876, + "step": 5887 + }, + { + "epoch": 0.2454253678462757, + "grad_norm": 232.0, + "learning_rate": 8.831668103379427e-05, + "loss": 11.8131, + "step": 5888 + }, + { + "epoch": 0.24546705014380393, + "grad_norm": 139.0, + "learning_rate": 8.831234418501986e-05, + "loss": 9.188, + "step": 5889 + }, + { + "epoch": 0.24550873244133217, + "grad_norm": 438.0, + "learning_rate": 8.830800663799957e-05, + "loss": 17.5007, + "step": 5890 + }, + { + "epoch": 0.2455504147388604, + "grad_norm": 78.5, + "learning_rate": 8.830366839281245e-05, + "loss": 8.3755, + "step": 5891 + }, + { + "epoch": 0.24559209703638865, + "grad_norm": 398.0, + "learning_rate": 8.82993294495376e-05, + "loss": 15.1255, + "step": 5892 + }, + { + "epoch": 0.2456337793339169, + "grad_norm": 84.5, + "learning_rate": 8.829498980825406e-05, + "loss": 9.0008, + "step": 5893 + }, + { + "epoch": 0.24567546163144513, + "grad_norm": 288.0, + "learning_rate": 8.829064946904092e-05, + "loss": 12.2516, + "step": 5894 + }, + { + "epoch": 0.24571714392897337, + "grad_norm": 848.0, + "learning_rate": 8.828630843197729e-05, + "loss": 22.5032, + "step": 5895 + }, + { + "epoch": 0.2457588262265016, + "grad_norm": 492.0, + "learning_rate": 8.82819666971423e-05, + "loss": 13.5629, + "step": 5896 + }, + { + "epoch": 0.24580050852402985, + "grad_norm": 194.0, + "learning_rate": 8.827762426461508e-05, + "loss": 10.8128, + "step": 5897 + }, + { + "epoch": 0.2458421908215581, + "grad_norm": 322.0, + "learning_rate": 8.827328113447475e-05, + "loss": 12.7506, + "step": 5898 + }, + { + "epoch": 0.24588387311908633, + "grad_norm": 900.0, + "learning_rate": 8.826893730680047e-05, + "loss": 20.3791, + "step": 5899 + }, + { + "epoch": 0.24592555541661457, + "grad_norm": 1528.0, + "learning_rate": 8.826459278167141e-05, + "loss": 30.0056, + "step": 5900 + }, + { + "epoch": 0.2459672377141428, + "grad_norm": 115.5, + "learning_rate": 8.826024755916675e-05, + "loss": 8.3757, + "step": 5901 + }, + { + "epoch": 0.24600892001167105, + "grad_norm": 410.0, + "learning_rate": 8.82559016393657e-05, + "loss": 16.2502, + "step": 5902 + }, + { + "epoch": 0.24605060230919928, + "grad_norm": 117.5, + "learning_rate": 8.825155502234742e-05, + "loss": 10.0006, + "step": 5903 + }, + { + "epoch": 0.24609228460672752, + "grad_norm": 1208.0, + "learning_rate": 8.824720770819117e-05, + "loss": 26.7549, + "step": 5904 + }, + { + "epoch": 0.24613396690425576, + "grad_norm": 262.0, + "learning_rate": 8.824285969697615e-05, + "loss": 12.6253, + "step": 5905 + }, + { + "epoch": 0.246175649201784, + "grad_norm": 408.0, + "learning_rate": 8.823851098878165e-05, + "loss": 14.3755, + "step": 5906 + }, + { + "epoch": 0.24621733149931224, + "grad_norm": 180.0, + "learning_rate": 8.823416158368684e-05, + "loss": 10.8753, + "step": 5907 + }, + { + "epoch": 0.24625901379684048, + "grad_norm": 600.0, + "learning_rate": 8.822981148177107e-05, + "loss": 19.6275, + "step": 5908 + }, + { + "epoch": 0.24630069609436872, + "grad_norm": 91.0, + "learning_rate": 8.822546068311361e-05, + "loss": 10.0631, + "step": 5909 + }, + { + "epoch": 0.24634237839189696, + "grad_norm": 248.0, + "learning_rate": 8.822110918779372e-05, + "loss": 12.7502, + "step": 5910 + }, + { + "epoch": 0.2463840606894252, + "grad_norm": 242.0, + "learning_rate": 8.821675699589072e-05, + "loss": 11.3753, + "step": 5911 + }, + { + "epoch": 0.24642574298695344, + "grad_norm": 1200.0, + "learning_rate": 8.821240410748393e-05, + "loss": 26.2544, + "step": 5912 + }, + { + "epoch": 0.24646742528448168, + "grad_norm": 124.0, + "learning_rate": 8.820805052265269e-05, + "loss": 10.1879, + "step": 5913 + }, + { + "epoch": 0.24650910758200992, + "grad_norm": 201.0, + "learning_rate": 8.820369624147632e-05, + "loss": 11.3132, + "step": 5914 + }, + { + "epoch": 0.24655078987953816, + "grad_norm": 334.0, + "learning_rate": 8.81993412640342e-05, + "loss": 12.9402, + "step": 5915 + }, + { + "epoch": 0.2465924721770664, + "grad_norm": 144.0, + "learning_rate": 8.81949855904057e-05, + "loss": 9.0005, + "step": 5916 + }, + { + "epoch": 0.24663415447459464, + "grad_norm": 274.0, + "learning_rate": 8.81906292206702e-05, + "loss": 12.3753, + "step": 5917 + }, + { + "epoch": 0.24667583677212288, + "grad_norm": 444.0, + "learning_rate": 8.818627215490709e-05, + "loss": 17.1256, + "step": 5918 + }, + { + "epoch": 0.24671751906965111, + "grad_norm": 290.0, + "learning_rate": 8.818191439319578e-05, + "loss": 12.4377, + "step": 5919 + }, + { + "epoch": 0.24675920136717935, + "grad_norm": 122.5, + "learning_rate": 8.817755593561569e-05, + "loss": 9.5628, + "step": 5920 + }, + { + "epoch": 0.2468008836647076, + "grad_norm": 408.0, + "learning_rate": 8.817319678224626e-05, + "loss": 16.1254, + "step": 5921 + }, + { + "epoch": 0.24684256596223583, + "grad_norm": 237.0, + "learning_rate": 8.816883693316692e-05, + "loss": 11.8143, + "step": 5922 + }, + { + "epoch": 0.24688424825976407, + "grad_norm": 648.0, + "learning_rate": 8.816447638845716e-05, + "loss": 20.5111, + "step": 5923 + }, + { + "epoch": 0.2469259305572923, + "grad_norm": 211.0, + "learning_rate": 8.81601151481964e-05, + "loss": 10.938, + "step": 5924 + }, + { + "epoch": 0.24696761285482055, + "grad_norm": 250.0, + "learning_rate": 8.815575321246416e-05, + "loss": 12.5012, + "step": 5925 + }, + { + "epoch": 0.2470092951523488, + "grad_norm": 81.5, + "learning_rate": 8.815139058133994e-05, + "loss": 8.0628, + "step": 5926 + }, + { + "epoch": 0.24705097744987703, + "grad_norm": 868.0, + "learning_rate": 8.814702725490323e-05, + "loss": 25.8753, + "step": 5927 + }, + { + "epoch": 0.24709265974740527, + "grad_norm": 412.0, + "learning_rate": 8.814266323323356e-05, + "loss": 15.2501, + "step": 5928 + }, + { + "epoch": 0.2471343420449335, + "grad_norm": 76.5, + "learning_rate": 8.813829851641049e-05, + "loss": 8.0002, + "step": 5929 + }, + { + "epoch": 0.24717602434246175, + "grad_norm": 692.0, + "learning_rate": 8.813393310451353e-05, + "loss": 20.2526, + "step": 5930 + }, + { + "epoch": 0.24721770663999, + "grad_norm": 362.0, + "learning_rate": 8.812956699762224e-05, + "loss": 15.3153, + "step": 5931 + }, + { + "epoch": 0.24725938893751823, + "grad_norm": 520.0, + "learning_rate": 8.812520019581622e-05, + "loss": 17.1252, + "step": 5932 + }, + { + "epoch": 0.24730107123504647, + "grad_norm": 246.0, + "learning_rate": 8.812083269917506e-05, + "loss": 10.5004, + "step": 5933 + }, + { + "epoch": 0.2473427535325747, + "grad_norm": 110.0, + "learning_rate": 8.811646450777832e-05, + "loss": 7.1257, + "step": 5934 + }, + { + "epoch": 0.24738443583010294, + "grad_norm": 338.0, + "learning_rate": 8.811209562170562e-05, + "loss": 11.8131, + "step": 5935 + }, + { + "epoch": 0.24742611812763118, + "grad_norm": 568.0, + "learning_rate": 8.81077260410366e-05, + "loss": 17.7507, + "step": 5936 + }, + { + "epoch": 0.24746780042515942, + "grad_norm": 294.0, + "learning_rate": 8.810335576585091e-05, + "loss": 11.9405, + "step": 5937 + }, + { + "epoch": 0.24750948272268766, + "grad_norm": 580.0, + "learning_rate": 8.809898479622816e-05, + "loss": 17.3752, + "step": 5938 + }, + { + "epoch": 0.2475511650202159, + "grad_norm": 286.0, + "learning_rate": 8.809461313224804e-05, + "loss": 13.2509, + "step": 5939 + }, + { + "epoch": 0.24759284731774414, + "grad_norm": 588.0, + "learning_rate": 8.80902407739902e-05, + "loss": 19.6253, + "step": 5940 + }, + { + "epoch": 0.24763452961527238, + "grad_norm": 282.0, + "learning_rate": 8.808586772153435e-05, + "loss": 12.6878, + "step": 5941 + }, + { + "epoch": 0.24767621191280062, + "grad_norm": 608.0, + "learning_rate": 8.808149397496019e-05, + "loss": 16.8759, + "step": 5942 + }, + { + "epoch": 0.24771789421032886, + "grad_norm": 276.0, + "learning_rate": 8.80771195343474e-05, + "loss": 11.8752, + "step": 5943 + }, + { + "epoch": 0.24775957650785713, + "grad_norm": 1680.0, + "learning_rate": 8.807274439977575e-05, + "loss": 32.7515, + "step": 5944 + }, + { + "epoch": 0.24780125880538537, + "grad_norm": 508.0, + "learning_rate": 8.806836857132495e-05, + "loss": 16.8753, + "step": 5945 + }, + { + "epoch": 0.2478429411029136, + "grad_norm": 868.0, + "learning_rate": 8.806399204907472e-05, + "loss": 19.1285, + "step": 5946 + }, + { + "epoch": 0.24788462340044184, + "grad_norm": 332.0, + "learning_rate": 8.805961483310488e-05, + "loss": 14.5633, + "step": 5947 + }, + { + "epoch": 0.24792630569797008, + "grad_norm": 272.0, + "learning_rate": 8.805523692349518e-05, + "loss": 12.8759, + "step": 5948 + }, + { + "epoch": 0.24796798799549832, + "grad_norm": 328.0, + "learning_rate": 8.805085832032543e-05, + "loss": 14.0002, + "step": 5949 + }, + { + "epoch": 0.24800967029302656, + "grad_norm": 356.0, + "learning_rate": 8.804647902367537e-05, + "loss": 14.1252, + "step": 5950 + }, + { + "epoch": 0.2480513525905548, + "grad_norm": 182.0, + "learning_rate": 8.804209903362488e-05, + "loss": 10.5645, + "step": 5951 + }, + { + "epoch": 0.24809303488808304, + "grad_norm": 488.0, + "learning_rate": 8.803771835025374e-05, + "loss": 16.6256, + "step": 5952 + }, + { + "epoch": 0.24813471718561128, + "grad_norm": 374.0, + "learning_rate": 8.803333697364182e-05, + "loss": 14.5016, + "step": 5953 + }, + { + "epoch": 0.24817639948313952, + "grad_norm": 352.0, + "learning_rate": 8.802895490386895e-05, + "loss": 14.938, + "step": 5954 + }, + { + "epoch": 0.24821808178066776, + "grad_norm": 520.0, + "learning_rate": 8.802457214101501e-05, + "loss": 16.7503, + "step": 5955 + }, + { + "epoch": 0.248259764078196, + "grad_norm": 332.0, + "learning_rate": 8.802018868515986e-05, + "loss": 14.2511, + "step": 5956 + }, + { + "epoch": 0.24830144637572424, + "grad_norm": 123.0, + "learning_rate": 8.80158045363834e-05, + "loss": 11.1254, + "step": 5957 + }, + { + "epoch": 0.24834312867325248, + "grad_norm": 256.0, + "learning_rate": 8.801141969476552e-05, + "loss": 12.0627, + "step": 5958 + }, + { + "epoch": 0.24838481097078072, + "grad_norm": 708.0, + "learning_rate": 8.800703416038615e-05, + "loss": 19.5036, + "step": 5959 + }, + { + "epoch": 0.24842649326830896, + "grad_norm": 213.0, + "learning_rate": 8.80026479333252e-05, + "loss": 12.3772, + "step": 5960 + }, + { + "epoch": 0.2484681755658372, + "grad_norm": 229.0, + "learning_rate": 8.799826101366262e-05, + "loss": 10.6257, + "step": 5961 + }, + { + "epoch": 0.24850985786336544, + "grad_norm": 223.0, + "learning_rate": 8.799387340147837e-05, + "loss": 12.1254, + "step": 5962 + }, + { + "epoch": 0.24855154016089367, + "grad_norm": 182.0, + "learning_rate": 8.79894850968524e-05, + "loss": 10.6888, + "step": 5963 + }, + { + "epoch": 0.24859322245842191, + "grad_norm": 648.0, + "learning_rate": 8.798509609986468e-05, + "loss": 19.0009, + "step": 5964 + }, + { + "epoch": 0.24863490475595015, + "grad_norm": 70.5, + "learning_rate": 8.798070641059522e-05, + "loss": 7.7199, + "step": 5965 + }, + { + "epoch": 0.2486765870534784, + "grad_norm": 382.0, + "learning_rate": 8.797631602912401e-05, + "loss": 15.7507, + "step": 5966 + }, + { + "epoch": 0.24871826935100663, + "grad_norm": 88.5, + "learning_rate": 8.797192495553109e-05, + "loss": 7.8447, + "step": 5967 + }, + { + "epoch": 0.24875995164853487, + "grad_norm": 406.0, + "learning_rate": 8.796753318989643e-05, + "loss": 15.0627, + "step": 5968 + }, + { + "epoch": 0.2488016339460631, + "grad_norm": 142.0, + "learning_rate": 8.796314073230015e-05, + "loss": 6.5317, + "step": 5969 + }, + { + "epoch": 0.24884331624359135, + "grad_norm": 382.0, + "learning_rate": 8.795874758282223e-05, + "loss": 13.3754, + "step": 5970 + }, + { + "epoch": 0.2488849985411196, + "grad_norm": 484.0, + "learning_rate": 8.795435374154278e-05, + "loss": 16.2503, + "step": 5971 + }, + { + "epoch": 0.24892668083864783, + "grad_norm": 408.0, + "learning_rate": 8.794995920854184e-05, + "loss": 14.3141, + "step": 5972 + }, + { + "epoch": 0.24896836313617607, + "grad_norm": 720.0, + "learning_rate": 8.794556398389955e-05, + "loss": 21.6255, + "step": 5973 + }, + { + "epoch": 0.2490100454337043, + "grad_norm": 141.0, + "learning_rate": 8.794116806769597e-05, + "loss": 11.1886, + "step": 5974 + }, + { + "epoch": 0.24905172773123255, + "grad_norm": 163.0, + "learning_rate": 8.793677146001125e-05, + "loss": 10.3127, + "step": 5975 + }, + { + "epoch": 0.2490934100287608, + "grad_norm": 324.0, + "learning_rate": 8.793237416092551e-05, + "loss": 14.3753, + "step": 5976 + }, + { + "epoch": 0.24913509232628903, + "grad_norm": 202.0, + "learning_rate": 8.792797617051885e-05, + "loss": 9.6878, + "step": 5977 + }, + { + "epoch": 0.24917677462381727, + "grad_norm": 422.0, + "learning_rate": 8.792357748887148e-05, + "loss": 14.501, + "step": 5978 + }, + { + "epoch": 0.2492184569213455, + "grad_norm": 235.0, + "learning_rate": 8.791917811606353e-05, + "loss": 10.2504, + "step": 5979 + }, + { + "epoch": 0.24926013921887374, + "grad_norm": 484.0, + "learning_rate": 8.79147780521752e-05, + "loss": 17.1262, + "step": 5980 + }, + { + "epoch": 0.24930182151640198, + "grad_norm": 284.0, + "learning_rate": 8.791037729728668e-05, + "loss": 13.2502, + "step": 5981 + }, + { + "epoch": 0.24934350381393022, + "grad_norm": 696.0, + "learning_rate": 8.790597585147818e-05, + "loss": 16.1277, + "step": 5982 + }, + { + "epoch": 0.24938518611145846, + "grad_norm": 290.0, + "learning_rate": 8.790157371482987e-05, + "loss": 13.9389, + "step": 5983 + }, + { + "epoch": 0.2494268684089867, + "grad_norm": 200.0, + "learning_rate": 8.789717088742204e-05, + "loss": 9.0628, + "step": 5984 + }, + { + "epoch": 0.24946855070651494, + "grad_norm": 628.0, + "learning_rate": 8.789276736933491e-05, + "loss": 19.8756, + "step": 5985 + }, + { + "epoch": 0.24951023300404318, + "grad_norm": 944.0, + "learning_rate": 8.788836316064873e-05, + "loss": 23.3808, + "step": 5986 + }, + { + "epoch": 0.24955191530157142, + "grad_norm": 348.0, + "learning_rate": 8.788395826144376e-05, + "loss": 13.8771, + "step": 5987 + }, + { + "epoch": 0.24959359759909966, + "grad_norm": 119.5, + "learning_rate": 8.787955267180028e-05, + "loss": 8.501, + "step": 5988 + }, + { + "epoch": 0.2496352798966279, + "grad_norm": 592.0, + "learning_rate": 8.78751463917986e-05, + "loss": 17.3758, + "step": 5989 + }, + { + "epoch": 0.24967696219415614, + "grad_norm": 430.0, + "learning_rate": 8.7870739421519e-05, + "loss": 17.1252, + "step": 5990 + }, + { + "epoch": 0.24971864449168438, + "grad_norm": 688.0, + "learning_rate": 8.786633176104182e-05, + "loss": 23.0005, + "step": 5991 + }, + { + "epoch": 0.24976032678921262, + "grad_norm": 300.0, + "learning_rate": 8.78619234104474e-05, + "loss": 11.2506, + "step": 5992 + }, + { + "epoch": 0.24980200908674086, + "grad_norm": 544.0, + "learning_rate": 8.785751436981604e-05, + "loss": 16.7512, + "step": 5993 + }, + { + "epoch": 0.2498436913842691, + "grad_norm": 386.0, + "learning_rate": 8.785310463922814e-05, + "loss": 13.8751, + "step": 5994 + }, + { + "epoch": 0.24988537368179733, + "grad_norm": 278.0, + "learning_rate": 8.784869421876402e-05, + "loss": 12.5001, + "step": 5995 + }, + { + "epoch": 0.24992705597932557, + "grad_norm": 896.0, + "learning_rate": 8.784428310850412e-05, + "loss": 21.5055, + "step": 5996 + }, + { + "epoch": 0.2499687382768538, + "grad_norm": 352.0, + "learning_rate": 8.783987130852878e-05, + "loss": 14.9377, + "step": 5997 + }, + { + "epoch": 0.25001042057438205, + "grad_norm": 154.0, + "learning_rate": 8.783545881891843e-05, + "loss": 9.8753, + "step": 5998 + }, + { + "epoch": 0.2500521028719103, + "grad_norm": 236.0, + "learning_rate": 8.783104563975351e-05, + "loss": 12.4379, + "step": 5999 + }, + { + "epoch": 0.25009378516943853, + "grad_norm": 153.0, + "learning_rate": 8.782663177111438e-05, + "loss": 10.9382, + "step": 6000 + }, + { + "epoch": 0.2501354674669668, + "grad_norm": 340.0, + "learning_rate": 8.782221721308157e-05, + "loss": 13.5006, + "step": 6001 + }, + { + "epoch": 0.250177149764495, + "grad_norm": 131.0, + "learning_rate": 8.781780196573545e-05, + "loss": 10.4378, + "step": 6002 + }, + { + "epoch": 0.2502188320620233, + "grad_norm": 84.5, + "learning_rate": 8.781338602915656e-05, + "loss": 7.8446, + "step": 6003 + }, + { + "epoch": 0.2502605143595515, + "grad_norm": 556.0, + "learning_rate": 8.780896940342535e-05, + "loss": 17.7502, + "step": 6004 + }, + { + "epoch": 0.25030219665707976, + "grad_norm": 508.0, + "learning_rate": 8.780455208862232e-05, + "loss": 18.0004, + "step": 6005 + }, + { + "epoch": 0.25034387895460797, + "grad_norm": 584.0, + "learning_rate": 8.780013408482796e-05, + "loss": 18.1254, + "step": 6006 + }, + { + "epoch": 0.25038556125213624, + "grad_norm": 450.0, + "learning_rate": 8.779571539212283e-05, + "loss": 15.5628, + "step": 6007 + }, + { + "epoch": 0.25042724354966445, + "grad_norm": 83.0, + "learning_rate": 8.77912960105874e-05, + "loss": 7.2504, + "step": 6008 + }, + { + "epoch": 0.2504689258471927, + "grad_norm": 302.0, + "learning_rate": 8.778687594030226e-05, + "loss": 13.1878, + "step": 6009 + }, + { + "epoch": 0.2505106081447209, + "grad_norm": 256.0, + "learning_rate": 8.778245518134794e-05, + "loss": 12.7502, + "step": 6010 + }, + { + "epoch": 0.2505522904422492, + "grad_norm": 454.0, + "learning_rate": 8.777803373380503e-05, + "loss": 16.2502, + "step": 6011 + }, + { + "epoch": 0.2505939727397774, + "grad_norm": 1384.0, + "learning_rate": 8.77736115977541e-05, + "loss": 27.2544, + "step": 6012 + }, + { + "epoch": 0.25063565503730567, + "grad_norm": 672.0, + "learning_rate": 8.776918877327574e-05, + "loss": 20.1253, + "step": 6013 + }, + { + "epoch": 0.2506773373348339, + "grad_norm": 1008.0, + "learning_rate": 8.776476526045057e-05, + "loss": 23.5072, + "step": 6014 + }, + { + "epoch": 0.25071901963236215, + "grad_norm": 270.0, + "learning_rate": 8.77603410593592e-05, + "loss": 12.3754, + "step": 6015 + }, + { + "epoch": 0.25076070192989036, + "grad_norm": 374.0, + "learning_rate": 8.775591617008225e-05, + "loss": 13.3752, + "step": 6016 + }, + { + "epoch": 0.25080238422741863, + "grad_norm": 161.0, + "learning_rate": 8.775149059270038e-05, + "loss": 10.2503, + "step": 6017 + }, + { + "epoch": 0.25084406652494684, + "grad_norm": 326.0, + "learning_rate": 8.774706432729425e-05, + "loss": 14.2502, + "step": 6018 + }, + { + "epoch": 0.2508857488224751, + "grad_norm": 108.0, + "learning_rate": 8.774263737394453e-05, + "loss": 10.7509, + "step": 6019 + }, + { + "epoch": 0.2509274311200033, + "grad_norm": 226.0, + "learning_rate": 8.773820973273188e-05, + "loss": 11.8754, + "step": 6020 + }, + { + "epoch": 0.2509691134175316, + "grad_norm": 310.0, + "learning_rate": 8.7733781403737e-05, + "loss": 12.3152, + "step": 6021 + }, + { + "epoch": 0.2510107957150598, + "grad_norm": 334.0, + "learning_rate": 8.772935238704062e-05, + "loss": 13.7502, + "step": 6022 + }, + { + "epoch": 0.25105247801258807, + "grad_norm": 166.0, + "learning_rate": 8.772492268272343e-05, + "loss": 9.8128, + "step": 6023 + }, + { + "epoch": 0.2510941603101163, + "grad_norm": 138.0, + "learning_rate": 8.772049229086619e-05, + "loss": 9.4377, + "step": 6024 + }, + { + "epoch": 0.25113584260764454, + "grad_norm": 272.0, + "learning_rate": 8.771606121154962e-05, + "loss": 12.5628, + "step": 6025 + }, + { + "epoch": 0.25117752490517276, + "grad_norm": 104.0, + "learning_rate": 8.771162944485449e-05, + "loss": 8.7504, + "step": 6026 + }, + { + "epoch": 0.251219207202701, + "grad_norm": 410.0, + "learning_rate": 8.770719699086156e-05, + "loss": 15.313, + "step": 6027 + }, + { + "epoch": 0.25126088950022923, + "grad_norm": 430.0, + "learning_rate": 8.770276384965163e-05, + "loss": 16.1257, + "step": 6028 + }, + { + "epoch": 0.2513025717977575, + "grad_norm": 620.0, + "learning_rate": 8.769833002130548e-05, + "loss": 19.1253, + "step": 6029 + }, + { + "epoch": 0.2513442540952857, + "grad_norm": 470.0, + "learning_rate": 8.76938955059039e-05, + "loss": 17.1259, + "step": 6030 + }, + { + "epoch": 0.251385936392814, + "grad_norm": 354.0, + "learning_rate": 8.768946030352774e-05, + "loss": 13.0004, + "step": 6031 + }, + { + "epoch": 0.2514276186903422, + "grad_norm": 190.0, + "learning_rate": 8.768502441425782e-05, + "loss": 10.6266, + "step": 6032 + }, + { + "epoch": 0.25146930098787046, + "grad_norm": 229.0, + "learning_rate": 8.768058783817499e-05, + "loss": 10.4379, + "step": 6033 + }, + { + "epoch": 0.25151098328539867, + "grad_norm": 380.0, + "learning_rate": 8.767615057536009e-05, + "loss": 14.6284, + "step": 6034 + }, + { + "epoch": 0.25155266558292694, + "grad_norm": 732.0, + "learning_rate": 8.767171262589403e-05, + "loss": 19.1255, + "step": 6035 + }, + { + "epoch": 0.25159434788045515, + "grad_norm": 456.0, + "learning_rate": 8.766727398985763e-05, + "loss": 17.3753, + "step": 6036 + }, + { + "epoch": 0.2516360301779834, + "grad_norm": 764.0, + "learning_rate": 8.766283466733183e-05, + "loss": 20.8792, + "step": 6037 + }, + { + "epoch": 0.25167771247551163, + "grad_norm": 628.0, + "learning_rate": 8.765839465839751e-05, + "loss": 19.1253, + "step": 6038 + }, + { + "epoch": 0.2517193947730399, + "grad_norm": 318.0, + "learning_rate": 8.765395396313563e-05, + "loss": 13.0003, + "step": 6039 + }, + { + "epoch": 0.2517610770705681, + "grad_norm": 624.0, + "learning_rate": 8.764951258162707e-05, + "loss": 19.7503, + "step": 6040 + }, + { + "epoch": 0.2518027593680964, + "grad_norm": 264.0, + "learning_rate": 8.764507051395282e-05, + "loss": 12.6254, + "step": 6041 + }, + { + "epoch": 0.2518444416656246, + "grad_norm": 316.0, + "learning_rate": 8.764062776019381e-05, + "loss": 13.6878, + "step": 6042 + }, + { + "epoch": 0.25188612396315285, + "grad_norm": 310.0, + "learning_rate": 8.763618432043104e-05, + "loss": 12.6878, + "step": 6043 + }, + { + "epoch": 0.25192780626068106, + "grad_norm": 568.0, + "learning_rate": 8.763174019474544e-05, + "loss": 18.3768, + "step": 6044 + }, + { + "epoch": 0.25196948855820933, + "grad_norm": 282.0, + "learning_rate": 8.762729538321804e-05, + "loss": 13.5006, + "step": 6045 + }, + { + "epoch": 0.25201117085573754, + "grad_norm": 185.0, + "learning_rate": 8.762284988592984e-05, + "loss": 8.2507, + "step": 6046 + }, + { + "epoch": 0.2520528531532658, + "grad_norm": 338.0, + "learning_rate": 8.761840370296189e-05, + "loss": 12.8129, + "step": 6047 + }, + { + "epoch": 0.252094535450794, + "grad_norm": 172.0, + "learning_rate": 8.761395683439515e-05, + "loss": 10.8752, + "step": 6048 + }, + { + "epoch": 0.2521362177483223, + "grad_norm": 157.0, + "learning_rate": 8.760950928031073e-05, + "loss": 10.063, + "step": 6049 + }, + { + "epoch": 0.2521779000458505, + "grad_norm": 412.0, + "learning_rate": 8.760506104078968e-05, + "loss": 14.7506, + "step": 6050 + }, + { + "epoch": 0.25221958234337877, + "grad_norm": 284.0, + "learning_rate": 8.760061211591301e-05, + "loss": 12.1253, + "step": 6051 + }, + { + "epoch": 0.252261264640907, + "grad_norm": 340.0, + "learning_rate": 8.759616250576188e-05, + "loss": 14.6878, + "step": 6052 + }, + { + "epoch": 0.25230294693843525, + "grad_norm": 956.0, + "learning_rate": 8.759171221041736e-05, + "loss": 20.8797, + "step": 6053 + }, + { + "epoch": 0.2523446292359635, + "grad_norm": 254.0, + "learning_rate": 8.758726122996053e-05, + "loss": 10.6254, + "step": 6054 + }, + { + "epoch": 0.2523863115334917, + "grad_norm": 720.0, + "learning_rate": 8.758280956447252e-05, + "loss": 18.6254, + "step": 6055 + }, + { + "epoch": 0.25242799383102, + "grad_norm": 78.0, + "learning_rate": 8.757835721403448e-05, + "loss": 9.1881, + "step": 6056 + }, + { + "epoch": 0.2524696761285482, + "grad_norm": 238.0, + "learning_rate": 8.757390417872755e-05, + "loss": 8.3132, + "step": 6057 + }, + { + "epoch": 0.25251135842607647, + "grad_norm": 588.0, + "learning_rate": 8.756945045863288e-05, + "loss": 19.1256, + "step": 6058 + }, + { + "epoch": 0.2525530407236047, + "grad_norm": 596.0, + "learning_rate": 8.756499605383162e-05, + "loss": 20.7502, + "step": 6059 + }, + { + "epoch": 0.25259472302113295, + "grad_norm": 304.0, + "learning_rate": 8.756054096440498e-05, + "loss": 13.688, + "step": 6060 + }, + { + "epoch": 0.25263640531866116, + "grad_norm": 800.0, + "learning_rate": 8.755608519043416e-05, + "loss": 22.0002, + "step": 6061 + }, + { + "epoch": 0.25267808761618943, + "grad_norm": 130.0, + "learning_rate": 8.755162873200033e-05, + "loss": 9.8754, + "step": 6062 + }, + { + "epoch": 0.25271976991371764, + "grad_norm": 306.0, + "learning_rate": 8.754717158918476e-05, + "loss": 13.2503, + "step": 6063 + }, + { + "epoch": 0.2527614522112459, + "grad_norm": 416.0, + "learning_rate": 8.754271376206864e-05, + "loss": 15.0009, + "step": 6064 + }, + { + "epoch": 0.2528031345087741, + "grad_norm": 230.0, + "learning_rate": 8.753825525073323e-05, + "loss": 10.4378, + "step": 6065 + }, + { + "epoch": 0.2528448168063024, + "grad_norm": 374.0, + "learning_rate": 8.75337960552598e-05, + "loss": 14.0626, + "step": 6066 + }, + { + "epoch": 0.2528864991038306, + "grad_norm": 201.0, + "learning_rate": 8.752933617572958e-05, + "loss": 11.3755, + "step": 6067 + }, + { + "epoch": 0.25292818140135886, + "grad_norm": 328.0, + "learning_rate": 8.752487561222389e-05, + "loss": 13.1253, + "step": 6068 + }, + { + "epoch": 0.2529698636988871, + "grad_norm": 264.0, + "learning_rate": 8.752041436482402e-05, + "loss": 12.6877, + "step": 6069 + }, + { + "epoch": 0.25301154599641534, + "grad_norm": 458.0, + "learning_rate": 8.751595243361126e-05, + "loss": 15.6251, + "step": 6070 + }, + { + "epoch": 0.25305322829394356, + "grad_norm": 684.0, + "learning_rate": 8.751148981866692e-05, + "loss": 19.7512, + "step": 6071 + }, + { + "epoch": 0.2530949105914718, + "grad_norm": 360.0, + "learning_rate": 8.750702652007237e-05, + "loss": 13.3131, + "step": 6072 + }, + { + "epoch": 0.25313659288900003, + "grad_norm": 252.0, + "learning_rate": 8.750256253790892e-05, + "loss": 11.8752, + "step": 6073 + }, + { + "epoch": 0.2531782751865283, + "grad_norm": 332.0, + "learning_rate": 8.749809787225794e-05, + "loss": 12.9383, + "step": 6074 + }, + { + "epoch": 0.2532199574840565, + "grad_norm": 312.0, + "learning_rate": 8.749363252320079e-05, + "loss": 13.3765, + "step": 6075 + }, + { + "epoch": 0.2532616397815848, + "grad_norm": 230.0, + "learning_rate": 8.748916649081888e-05, + "loss": 10.3752, + "step": 6076 + }, + { + "epoch": 0.253303322079113, + "grad_norm": 61.5, + "learning_rate": 8.748469977519358e-05, + "loss": 6.7816, + "step": 6077 + }, + { + "epoch": 0.25334500437664126, + "grad_norm": 153.0, + "learning_rate": 8.748023237640628e-05, + "loss": 10.0012, + "step": 6078 + }, + { + "epoch": 0.25338668667416947, + "grad_norm": 52.25, + "learning_rate": 8.747576429453844e-05, + "loss": 7.7204, + "step": 6079 + }, + { + "epoch": 0.25342836897169774, + "grad_norm": 616.0, + "learning_rate": 8.747129552967144e-05, + "loss": 19.1254, + "step": 6080 + }, + { + "epoch": 0.25347005126922595, + "grad_norm": 247.0, + "learning_rate": 8.746682608188678e-05, + "loss": 12.1261, + "step": 6081 + }, + { + "epoch": 0.2535117335667542, + "grad_norm": 276.0, + "learning_rate": 8.746235595126588e-05, + "loss": 12.1251, + "step": 6082 + }, + { + "epoch": 0.2535534158642824, + "grad_norm": 197.0, + "learning_rate": 8.745788513789022e-05, + "loss": 11.1884, + "step": 6083 + }, + { + "epoch": 0.2535950981618107, + "grad_norm": 462.0, + "learning_rate": 8.745341364184127e-05, + "loss": 16.6264, + "step": 6084 + }, + { + "epoch": 0.2536367804593389, + "grad_norm": 290.0, + "learning_rate": 8.744894146320052e-05, + "loss": 13.8753, + "step": 6085 + }, + { + "epoch": 0.2536784627568672, + "grad_norm": 348.0, + "learning_rate": 8.744446860204951e-05, + "loss": 15.5642, + "step": 6086 + }, + { + "epoch": 0.2537201450543954, + "grad_norm": 290.0, + "learning_rate": 8.743999505846973e-05, + "loss": 14.1257, + "step": 6087 + }, + { + "epoch": 0.25376182735192365, + "grad_norm": 168.0, + "learning_rate": 8.743552083254272e-05, + "loss": 10.3129, + "step": 6088 + }, + { + "epoch": 0.25380350964945186, + "grad_norm": 1012.0, + "learning_rate": 8.743104592435001e-05, + "loss": 23.0055, + "step": 6089 + }, + { + "epoch": 0.25384519194698013, + "grad_norm": 476.0, + "learning_rate": 8.742657033397316e-05, + "loss": 16.1252, + "step": 6090 + }, + { + "epoch": 0.25388687424450834, + "grad_norm": 243.0, + "learning_rate": 8.742209406149376e-05, + "loss": 11.1878, + "step": 6091 + }, + { + "epoch": 0.2539285565420366, + "grad_norm": 1288.0, + "learning_rate": 8.741761710699336e-05, + "loss": 25.6296, + "step": 6092 + }, + { + "epoch": 0.2539702388395648, + "grad_norm": 376.0, + "learning_rate": 8.741313947055358e-05, + "loss": 14.938, + "step": 6093 + }, + { + "epoch": 0.2540119211370931, + "grad_norm": 504.0, + "learning_rate": 8.7408661152256e-05, + "loss": 16.1253, + "step": 6094 + }, + { + "epoch": 0.2540536034346213, + "grad_norm": 676.0, + "learning_rate": 8.740418215218227e-05, + "loss": 21.8751, + "step": 6095 + }, + { + "epoch": 0.25409528573214957, + "grad_norm": 102.0, + "learning_rate": 8.739970247041399e-05, + "loss": 9.0004, + "step": 6096 + }, + { + "epoch": 0.2541369680296778, + "grad_norm": 628.0, + "learning_rate": 8.73952221070328e-05, + "loss": 20.5006, + "step": 6097 + }, + { + "epoch": 0.25417865032720605, + "grad_norm": 294.0, + "learning_rate": 8.739074106212036e-05, + "loss": 12.6258, + "step": 6098 + }, + { + "epoch": 0.25422033262473426, + "grad_norm": 616.0, + "learning_rate": 8.738625933575837e-05, + "loss": 20.2503, + "step": 6099 + }, + { + "epoch": 0.2542620149222625, + "grad_norm": 384.0, + "learning_rate": 8.738177692802847e-05, + "loss": 15.7503, + "step": 6100 + }, + { + "epoch": 0.25430369721979074, + "grad_norm": 1048.0, + "learning_rate": 8.737729383901237e-05, + "loss": 27.3753, + "step": 6101 + }, + { + "epoch": 0.254345379517319, + "grad_norm": 198.0, + "learning_rate": 8.737281006879177e-05, + "loss": 11.2512, + "step": 6102 + }, + { + "epoch": 0.2543870618148472, + "grad_norm": 520.0, + "learning_rate": 8.736832561744839e-05, + "loss": 16.8753, + "step": 6103 + }, + { + "epoch": 0.2544287441123755, + "grad_norm": 328.0, + "learning_rate": 8.736384048506396e-05, + "loss": 13.0628, + "step": 6104 + }, + { + "epoch": 0.2544704264099037, + "grad_norm": 1232.0, + "learning_rate": 8.735935467172022e-05, + "loss": 28.8752, + "step": 6105 + }, + { + "epoch": 0.25451210870743196, + "grad_norm": 346.0, + "learning_rate": 8.735486817749892e-05, + "loss": 15.0627, + "step": 6106 + }, + { + "epoch": 0.2545537910049602, + "grad_norm": 276.0, + "learning_rate": 8.735038100248184e-05, + "loss": 12.8754, + "step": 6107 + }, + { + "epoch": 0.25459547330248844, + "grad_norm": 472.0, + "learning_rate": 8.734589314675074e-05, + "loss": 17.5006, + "step": 6108 + }, + { + "epoch": 0.25463715560001665, + "grad_norm": 436.0, + "learning_rate": 8.734140461038743e-05, + "loss": 15.6877, + "step": 6109 + }, + { + "epoch": 0.2546788378975449, + "grad_norm": 462.0, + "learning_rate": 8.73369153934737e-05, + "loss": 17.0015, + "step": 6110 + }, + { + "epoch": 0.25472052019507313, + "grad_norm": 236.0, + "learning_rate": 8.733242549609139e-05, + "loss": 10.9378, + "step": 6111 + }, + { + "epoch": 0.2547622024926014, + "grad_norm": 117.0, + "learning_rate": 8.73279349183223e-05, + "loss": 11.3755, + "step": 6112 + }, + { + "epoch": 0.2548038847901296, + "grad_norm": 239.0, + "learning_rate": 8.732344366024827e-05, + "loss": 11.9381, + "step": 6113 + }, + { + "epoch": 0.2548455670876579, + "grad_norm": 270.0, + "learning_rate": 8.731895172195119e-05, + "loss": 11.3753, + "step": 6114 + }, + { + "epoch": 0.2548872493851861, + "grad_norm": 372.0, + "learning_rate": 8.731445910351288e-05, + "loss": 14.6252, + "step": 6115 + }, + { + "epoch": 0.25492893168271435, + "grad_norm": 684.0, + "learning_rate": 8.730996580501525e-05, + "loss": 20.3758, + "step": 6116 + }, + { + "epoch": 0.25497061398024257, + "grad_norm": 264.0, + "learning_rate": 8.730547182654018e-05, + "loss": 12.0626, + "step": 6117 + }, + { + "epoch": 0.25501229627777083, + "grad_norm": 732.0, + "learning_rate": 8.730097716816958e-05, + "loss": 19.6257, + "step": 6118 + }, + { + "epoch": 0.25505397857529905, + "grad_norm": 240.0, + "learning_rate": 8.729648182998535e-05, + "loss": 11.3754, + "step": 6119 + }, + { + "epoch": 0.2550956608728273, + "grad_norm": 916.0, + "learning_rate": 8.729198581206943e-05, + "loss": 21.7542, + "step": 6120 + }, + { + "epoch": 0.2551373431703555, + "grad_norm": 500.0, + "learning_rate": 8.728748911450375e-05, + "loss": 16.1252, + "step": 6121 + }, + { + "epoch": 0.2551790254678838, + "grad_norm": 450.0, + "learning_rate": 8.72829917373703e-05, + "loss": 15.1253, + "step": 6122 + }, + { + "epoch": 0.255220707765412, + "grad_norm": 334.0, + "learning_rate": 8.727849368075098e-05, + "loss": 12.6258, + "step": 6123 + }, + { + "epoch": 0.25526239006294027, + "grad_norm": 103.0, + "learning_rate": 8.727399494472782e-05, + "loss": 5.9702, + "step": 6124 + }, + { + "epoch": 0.2553040723604685, + "grad_norm": 400.0, + "learning_rate": 8.72694955293828e-05, + "loss": 15.6252, + "step": 6125 + }, + { + "epoch": 0.25534575465799675, + "grad_norm": 498.0, + "learning_rate": 8.726499543479791e-05, + "loss": 15.5006, + "step": 6126 + }, + { + "epoch": 0.255387436955525, + "grad_norm": 1024.0, + "learning_rate": 8.726049466105517e-05, + "loss": 25.7506, + "step": 6127 + }, + { + "epoch": 0.2554291192530532, + "grad_norm": 408.0, + "learning_rate": 8.725599320823659e-05, + "loss": 15.9382, + "step": 6128 + }, + { + "epoch": 0.2554708015505815, + "grad_norm": 390.0, + "learning_rate": 8.725149107642426e-05, + "loss": 14.7503, + "step": 6129 + }, + { + "epoch": 0.2555124838481097, + "grad_norm": 238.0, + "learning_rate": 8.724698826570018e-05, + "loss": 12.8128, + "step": 6130 + }, + { + "epoch": 0.255554166145638, + "grad_norm": 225.0, + "learning_rate": 8.724248477614643e-05, + "loss": 11.8133, + "step": 6131 + }, + { + "epoch": 0.2555958484431662, + "grad_norm": 676.0, + "learning_rate": 8.723798060784509e-05, + "loss": 20.2501, + "step": 6132 + }, + { + "epoch": 0.25563753074069445, + "grad_norm": 470.0, + "learning_rate": 8.723347576087824e-05, + "loss": 14.4379, + "step": 6133 + }, + { + "epoch": 0.25567921303822266, + "grad_norm": 380.0, + "learning_rate": 8.7228970235328e-05, + "loss": 14.4386, + "step": 6134 + }, + { + "epoch": 0.25572089533575093, + "grad_norm": 478.0, + "learning_rate": 8.722446403127647e-05, + "loss": 15.5003, + "step": 6135 + }, + { + "epoch": 0.25576257763327914, + "grad_norm": 572.0, + "learning_rate": 8.721995714880578e-05, + "loss": 16.6252, + "step": 6136 + }, + { + "epoch": 0.2558042599308074, + "grad_norm": 604.0, + "learning_rate": 8.721544958799808e-05, + "loss": 19.001, + "step": 6137 + }, + { + "epoch": 0.2558459422283356, + "grad_norm": 40.5, + "learning_rate": 8.721094134893549e-05, + "loss": 7.0631, + "step": 6138 + }, + { + "epoch": 0.2558876245258639, + "grad_norm": 222.0, + "learning_rate": 8.72064324317002e-05, + "loss": 12.3751, + "step": 6139 + }, + { + "epoch": 0.2559293068233921, + "grad_norm": 57.5, + "learning_rate": 8.720192283637436e-05, + "loss": 5.7818, + "step": 6140 + }, + { + "epoch": 0.25597098912092037, + "grad_norm": 260.0, + "learning_rate": 8.719741256304018e-05, + "loss": 11.8128, + "step": 6141 + }, + { + "epoch": 0.2560126714184486, + "grad_norm": 510.0, + "learning_rate": 8.719290161177987e-05, + "loss": 16.5018, + "step": 6142 + }, + { + "epoch": 0.25605435371597685, + "grad_norm": 568.0, + "learning_rate": 8.71883899826756e-05, + "loss": 18.6253, + "step": 6143 + }, + { + "epoch": 0.25609603601350506, + "grad_norm": 354.0, + "learning_rate": 8.718387767580964e-05, + "loss": 14.3127, + "step": 6144 + }, + { + "epoch": 0.2561377183110333, + "grad_norm": 194.0, + "learning_rate": 8.71793646912642e-05, + "loss": 12.063, + "step": 6145 + }, + { + "epoch": 0.25617940060856154, + "grad_norm": 173.0, + "learning_rate": 8.717485102912155e-05, + "loss": 8.2505, + "step": 6146 + }, + { + "epoch": 0.2562210829060898, + "grad_norm": 155.0, + "learning_rate": 8.717033668946393e-05, + "loss": 9.6879, + "step": 6147 + }, + { + "epoch": 0.256262765203618, + "grad_norm": 492.0, + "learning_rate": 8.716582167237361e-05, + "loss": 16.5006, + "step": 6148 + }, + { + "epoch": 0.2563044475011463, + "grad_norm": 560.0, + "learning_rate": 8.716130597793293e-05, + "loss": 17.2505, + "step": 6149 + }, + { + "epoch": 0.2563461297986745, + "grad_norm": 952.0, + "learning_rate": 8.715678960622412e-05, + "loss": 21.3753, + "step": 6150 + }, + { + "epoch": 0.25638781209620276, + "grad_norm": 536.0, + "learning_rate": 8.715227255732952e-05, + "loss": 16.5006, + "step": 6151 + }, + { + "epoch": 0.256429494393731, + "grad_norm": 442.0, + "learning_rate": 8.714775483133146e-05, + "loss": 14.8135, + "step": 6152 + }, + { + "epoch": 0.25647117669125924, + "grad_norm": 52.5, + "learning_rate": 8.714323642831227e-05, + "loss": 8.2506, + "step": 6153 + }, + { + "epoch": 0.25651285898878745, + "grad_norm": 212.0, + "learning_rate": 8.71387173483543e-05, + "loss": 10.8753, + "step": 6154 + }, + { + "epoch": 0.2565545412863157, + "grad_norm": 229.0, + "learning_rate": 8.713419759153993e-05, + "loss": 11.7503, + "step": 6155 + }, + { + "epoch": 0.25659622358384393, + "grad_norm": 221.0, + "learning_rate": 8.712967715795148e-05, + "loss": 9.5628, + "step": 6156 + }, + { + "epoch": 0.2566379058813722, + "grad_norm": 270.0, + "learning_rate": 8.712515604767138e-05, + "loss": 12.6252, + "step": 6157 + }, + { + "epoch": 0.2566795881789004, + "grad_norm": 382.0, + "learning_rate": 8.712063426078203e-05, + "loss": 15.5636, + "step": 6158 + }, + { + "epoch": 0.2567212704764287, + "grad_norm": 238.0, + "learning_rate": 8.711611179736581e-05, + "loss": 11.5628, + "step": 6159 + }, + { + "epoch": 0.2567629527739569, + "grad_norm": 2368.0, + "learning_rate": 8.711158865750515e-05, + "loss": 40.7576, + "step": 6160 + }, + { + "epoch": 0.25680463507148515, + "grad_norm": 840.0, + "learning_rate": 8.710706484128251e-05, + "loss": 24.7504, + "step": 6161 + }, + { + "epoch": 0.25684631736901337, + "grad_norm": 728.0, + "learning_rate": 8.710254034878031e-05, + "loss": 19.8788, + "step": 6162 + }, + { + "epoch": 0.25688799966654163, + "grad_norm": 504.0, + "learning_rate": 8.7098015180081e-05, + "loss": 17.0006, + "step": 6163 + }, + { + "epoch": 0.25692968196406984, + "grad_norm": 230.0, + "learning_rate": 8.70934893352671e-05, + "loss": 8.6253, + "step": 6164 + }, + { + "epoch": 0.2569713642615981, + "grad_norm": 136.0, + "learning_rate": 8.708896281442105e-05, + "loss": 9.3751, + "step": 6165 + }, + { + "epoch": 0.2570130465591263, + "grad_norm": 1560.0, + "learning_rate": 8.708443561762535e-05, + "loss": 30.3793, + "step": 6166 + }, + { + "epoch": 0.2570547288566546, + "grad_norm": 424.0, + "learning_rate": 8.707990774496256e-05, + "loss": 15.938, + "step": 6167 + }, + { + "epoch": 0.2570964111541828, + "grad_norm": 668.0, + "learning_rate": 8.707537919651512e-05, + "loss": 20.3788, + "step": 6168 + }, + { + "epoch": 0.25713809345171107, + "grad_norm": 140.0, + "learning_rate": 8.707084997236561e-05, + "loss": 10.0628, + "step": 6169 + }, + { + "epoch": 0.2571797757492393, + "grad_norm": 282.0, + "learning_rate": 8.706632007259658e-05, + "loss": 12.6885, + "step": 6170 + }, + { + "epoch": 0.25722145804676755, + "grad_norm": 140.0, + "learning_rate": 8.706178949729057e-05, + "loss": 10.188, + "step": 6171 + }, + { + "epoch": 0.25726314034429576, + "grad_norm": 300.0, + "learning_rate": 8.705725824653015e-05, + "loss": 13.3128, + "step": 6172 + }, + { + "epoch": 0.257304822641824, + "grad_norm": 107.5, + "learning_rate": 8.705272632039792e-05, + "loss": 9.4381, + "step": 6173 + }, + { + "epoch": 0.25734650493935224, + "grad_norm": 1368.0, + "learning_rate": 8.704819371897646e-05, + "loss": 27.5027, + "step": 6174 + }, + { + "epoch": 0.2573881872368805, + "grad_norm": 1688.0, + "learning_rate": 8.704366044234838e-05, + "loss": 39.2504, + "step": 6175 + }, + { + "epoch": 0.2574298695344087, + "grad_norm": 151.0, + "learning_rate": 8.70391264905963e-05, + "loss": 9.3753, + "step": 6176 + }, + { + "epoch": 0.257471551831937, + "grad_norm": 956.0, + "learning_rate": 8.703459186380283e-05, + "loss": 24.5004, + "step": 6177 + }, + { + "epoch": 0.2575132341294652, + "grad_norm": 536.0, + "learning_rate": 8.703005656205067e-05, + "loss": 17.2503, + "step": 6178 + }, + { + "epoch": 0.25755491642699346, + "grad_norm": 492.0, + "learning_rate": 8.702552058542241e-05, + "loss": 14.7502, + "step": 6179 + }, + { + "epoch": 0.2575965987245217, + "grad_norm": 560.0, + "learning_rate": 8.702098393400078e-05, + "loss": 18.5003, + "step": 6180 + }, + { + "epoch": 0.25763828102204994, + "grad_norm": 186.0, + "learning_rate": 8.701644660786841e-05, + "loss": 10.7507, + "step": 6181 + }, + { + "epoch": 0.25767996331957815, + "grad_norm": 416.0, + "learning_rate": 8.701190860710803e-05, + "loss": 15.0006, + "step": 6182 + }, + { + "epoch": 0.2577216456171064, + "grad_norm": 308.0, + "learning_rate": 8.700736993180233e-05, + "loss": 12.438, + "step": 6183 + }, + { + "epoch": 0.25776332791463463, + "grad_norm": 186.0, + "learning_rate": 8.700283058203402e-05, + "loss": 9.8127, + "step": 6184 + }, + { + "epoch": 0.2578050102121629, + "grad_norm": 796.0, + "learning_rate": 8.699829055788584e-05, + "loss": 20.7505, + "step": 6185 + }, + { + "epoch": 0.2578466925096911, + "grad_norm": 616.0, + "learning_rate": 8.699374985944053e-05, + "loss": 18.5013, + "step": 6186 + }, + { + "epoch": 0.2578883748072194, + "grad_norm": 222.0, + "learning_rate": 8.698920848678085e-05, + "loss": 10.0631, + "step": 6187 + }, + { + "epoch": 0.2579300571047476, + "grad_norm": 366.0, + "learning_rate": 8.698466643998954e-05, + "loss": 13.1904, + "step": 6188 + }, + { + "epoch": 0.25797173940227586, + "grad_norm": 241.0, + "learning_rate": 8.698012371914942e-05, + "loss": 10.7509, + "step": 6189 + }, + { + "epoch": 0.25801342169980407, + "grad_norm": 310.0, + "learning_rate": 8.697558032434327e-05, + "loss": 13.1878, + "step": 6190 + }, + { + "epoch": 0.25805510399733234, + "grad_norm": 189.0, + "learning_rate": 8.697103625565387e-05, + "loss": 10.4378, + "step": 6191 + }, + { + "epoch": 0.25809678629486055, + "grad_norm": 206.0, + "learning_rate": 8.696649151316405e-05, + "loss": 11.8136, + "step": 6192 + }, + { + "epoch": 0.2581384685923888, + "grad_norm": 776.0, + "learning_rate": 8.696194609695665e-05, + "loss": 21.8753, + "step": 6193 + }, + { + "epoch": 0.258180150889917, + "grad_norm": 446.0, + "learning_rate": 8.69574000071145e-05, + "loss": 15.6261, + "step": 6194 + }, + { + "epoch": 0.2582218331874453, + "grad_norm": 438.0, + "learning_rate": 8.695285324372047e-05, + "loss": 15.4381, + "step": 6195 + }, + { + "epoch": 0.2582635154849735, + "grad_norm": 145.0, + "learning_rate": 8.694830580685737e-05, + "loss": 9.3755, + "step": 6196 + }, + { + "epoch": 0.25830519778250177, + "grad_norm": 632.0, + "learning_rate": 8.694375769660816e-05, + "loss": 17.7504, + "step": 6197 + }, + { + "epoch": 0.25834688008003, + "grad_norm": 402.0, + "learning_rate": 8.693920891305565e-05, + "loss": 15.3126, + "step": 6198 + }, + { + "epoch": 0.25838856237755825, + "grad_norm": 490.0, + "learning_rate": 8.693465945628281e-05, + "loss": 17.0004, + "step": 6199 + }, + { + "epoch": 0.2584302446750865, + "grad_norm": 832.0, + "learning_rate": 8.69301093263725e-05, + "loss": 19.8801, + "step": 6200 + }, + { + "epoch": 0.25847192697261473, + "grad_norm": 732.0, + "learning_rate": 8.692555852340767e-05, + "loss": 22.2501, + "step": 6201 + }, + { + "epoch": 0.258513609270143, + "grad_norm": 211.0, + "learning_rate": 8.692100704747127e-05, + "loss": 11.0005, + "step": 6202 + }, + { + "epoch": 0.2585552915676712, + "grad_norm": 102.5, + "learning_rate": 8.691645489864624e-05, + "loss": 9.063, + "step": 6203 + }, + { + "epoch": 0.2585969738651995, + "grad_norm": 564.0, + "learning_rate": 8.691190207701551e-05, + "loss": 16.0003, + "step": 6204 + }, + { + "epoch": 0.2586386561627277, + "grad_norm": 272.0, + "learning_rate": 8.690734858266212e-05, + "loss": 13.6257, + "step": 6205 + }, + { + "epoch": 0.25868033846025595, + "grad_norm": 76.5, + "learning_rate": 8.690279441566902e-05, + "loss": 8.6252, + "step": 6206 + }, + { + "epoch": 0.25872202075778417, + "grad_norm": 764.0, + "learning_rate": 8.689823957611922e-05, + "loss": 20.6257, + "step": 6207 + }, + { + "epoch": 0.25876370305531243, + "grad_norm": 238.0, + "learning_rate": 8.689368406409573e-05, + "loss": 10.6257, + "step": 6208 + }, + { + "epoch": 0.25880538535284064, + "grad_norm": 288.0, + "learning_rate": 8.688912787968155e-05, + "loss": 14.2506, + "step": 6209 + }, + { + "epoch": 0.2588470676503689, + "grad_norm": 149.0, + "learning_rate": 8.688457102295976e-05, + "loss": 10.6252, + "step": 6210 + }, + { + "epoch": 0.2588887499478971, + "grad_norm": 410.0, + "learning_rate": 8.688001349401338e-05, + "loss": 16.0004, + "step": 6211 + }, + { + "epoch": 0.2589304322454254, + "grad_norm": 310.0, + "learning_rate": 8.687545529292548e-05, + "loss": 14.6255, + "step": 6212 + }, + { + "epoch": 0.2589721145429536, + "grad_norm": 406.0, + "learning_rate": 8.687089641977915e-05, + "loss": 16.8752, + "step": 6213 + }, + { + "epoch": 0.25901379684048187, + "grad_norm": 516.0, + "learning_rate": 8.686633687465745e-05, + "loss": 16.3754, + "step": 6214 + }, + { + "epoch": 0.2590554791380101, + "grad_norm": 580.0, + "learning_rate": 8.686177665764348e-05, + "loss": 18.1252, + "step": 6215 + }, + { + "epoch": 0.25909716143553835, + "grad_norm": 191.0, + "learning_rate": 8.685721576882037e-05, + "loss": 10.0014, + "step": 6216 + }, + { + "epoch": 0.25913884373306656, + "grad_norm": 592.0, + "learning_rate": 8.685265420827122e-05, + "loss": 20.7502, + "step": 6217 + }, + { + "epoch": 0.2591805260305948, + "grad_norm": 502.0, + "learning_rate": 8.684809197607917e-05, + "loss": 16.7538, + "step": 6218 + }, + { + "epoch": 0.25922220832812304, + "grad_norm": 564.0, + "learning_rate": 8.68435290723274e-05, + "loss": 18.6254, + "step": 6219 + }, + { + "epoch": 0.2592638906256513, + "grad_norm": 157.0, + "learning_rate": 8.683896549709903e-05, + "loss": 10.2503, + "step": 6220 + }, + { + "epoch": 0.2593055729231795, + "grad_norm": 404.0, + "learning_rate": 8.683440125047721e-05, + "loss": 15.5628, + "step": 6221 + }, + { + "epoch": 0.2593472552207078, + "grad_norm": 71.5, + "learning_rate": 8.68298363325452e-05, + "loss": 8.8132, + "step": 6222 + }, + { + "epoch": 0.259388937518236, + "grad_norm": 364.0, + "learning_rate": 8.682527074338613e-05, + "loss": 15.1884, + "step": 6223 + }, + { + "epoch": 0.25943061981576426, + "grad_norm": 120.0, + "learning_rate": 8.682070448308324e-05, + "loss": 10.0633, + "step": 6224 + }, + { + "epoch": 0.2594723021132925, + "grad_norm": 150.0, + "learning_rate": 8.681613755171975e-05, + "loss": 8.6876, + "step": 6225 + }, + { + "epoch": 0.25951398441082074, + "grad_norm": 280.0, + "learning_rate": 8.681156994937886e-05, + "loss": 12.8759, + "step": 6226 + }, + { + "epoch": 0.25955566670834895, + "grad_norm": 102.5, + "learning_rate": 8.680700167614387e-05, + "loss": 6.501, + "step": 6227 + }, + { + "epoch": 0.2595973490058772, + "grad_norm": 119.5, + "learning_rate": 8.6802432732098e-05, + "loss": 10.5012, + "step": 6228 + }, + { + "epoch": 0.25963903130340543, + "grad_norm": 304.0, + "learning_rate": 8.679786311732452e-05, + "loss": 13.3753, + "step": 6229 + }, + { + "epoch": 0.2596807136009337, + "grad_norm": 86.0, + "learning_rate": 8.679329283190672e-05, + "loss": 9.0002, + "step": 6230 + }, + { + "epoch": 0.2597223958984619, + "grad_norm": 156.0, + "learning_rate": 8.678872187592789e-05, + "loss": 7.8128, + "step": 6231 + }, + { + "epoch": 0.2597640781959902, + "grad_norm": 620.0, + "learning_rate": 8.678415024947133e-05, + "loss": 18.6258, + "step": 6232 + }, + { + "epoch": 0.2598057604935184, + "grad_norm": 406.0, + "learning_rate": 8.677957795262038e-05, + "loss": 15.0003, + "step": 6233 + }, + { + "epoch": 0.25984744279104666, + "grad_norm": 232.0, + "learning_rate": 8.677500498545834e-05, + "loss": 11.1881, + "step": 6234 + }, + { + "epoch": 0.25988912508857487, + "grad_norm": 260.0, + "learning_rate": 8.677043134806859e-05, + "loss": 10.0635, + "step": 6235 + }, + { + "epoch": 0.25993080738610314, + "grad_norm": 308.0, + "learning_rate": 8.676585704053445e-05, + "loss": 11.063, + "step": 6236 + }, + { + "epoch": 0.25997248968363135, + "grad_norm": 200.0, + "learning_rate": 8.676128206293931e-05, + "loss": 10.6881, + "step": 6237 + }, + { + "epoch": 0.2600141719811596, + "grad_norm": 181.0, + "learning_rate": 8.675670641536653e-05, + "loss": 10.3755, + "step": 6238 + }, + { + "epoch": 0.2600558542786878, + "grad_norm": 212.0, + "learning_rate": 8.675213009789953e-05, + "loss": 11.5004, + "step": 6239 + }, + { + "epoch": 0.2600975365762161, + "grad_norm": 272.0, + "learning_rate": 8.674755311062168e-05, + "loss": 12.5627, + "step": 6240 + }, + { + "epoch": 0.2601392188737443, + "grad_norm": 580.0, + "learning_rate": 8.674297545361643e-05, + "loss": 18.0002, + "step": 6241 + }, + { + "epoch": 0.26018090117127257, + "grad_norm": 330.0, + "learning_rate": 8.673839712696716e-05, + "loss": 12.2507, + "step": 6242 + }, + { + "epoch": 0.2602225834688008, + "grad_norm": 220.0, + "learning_rate": 8.673381813075737e-05, + "loss": 13.2532, + "step": 6243 + }, + { + "epoch": 0.26026426576632905, + "grad_norm": 144.0, + "learning_rate": 8.672923846507049e-05, + "loss": 5.0002, + "step": 6244 + }, + { + "epoch": 0.26030594806385726, + "grad_norm": 624.0, + "learning_rate": 8.672465812998995e-05, + "loss": 19.5002, + "step": 6245 + }, + { + "epoch": 0.26034763036138553, + "grad_norm": 1184.0, + "learning_rate": 8.672007712559927e-05, + "loss": 26.7558, + "step": 6246 + }, + { + "epoch": 0.26038931265891374, + "grad_norm": 330.0, + "learning_rate": 8.671549545198192e-05, + "loss": 13.8127, + "step": 6247 + }, + { + "epoch": 0.260430994956442, + "grad_norm": 82.5, + "learning_rate": 8.671091310922141e-05, + "loss": 7.2504, + "step": 6248 + }, + { + "epoch": 0.2604726772539702, + "grad_norm": 224.0, + "learning_rate": 8.670633009740124e-05, + "loss": 11.6876, + "step": 6249 + }, + { + "epoch": 0.2605143595514985, + "grad_norm": 548.0, + "learning_rate": 8.670174641660495e-05, + "loss": 14.7545, + "step": 6250 + }, + { + "epoch": 0.2605560418490267, + "grad_norm": 888.0, + "learning_rate": 8.66971620669161e-05, + "loss": 24.7546, + "step": 6251 + }, + { + "epoch": 0.26059772414655497, + "grad_norm": 386.0, + "learning_rate": 8.669257704841818e-05, + "loss": 15.6288, + "step": 6252 + }, + { + "epoch": 0.2606394064440832, + "grad_norm": 760.0, + "learning_rate": 8.66879913611948e-05, + "loss": 22.7502, + "step": 6253 + }, + { + "epoch": 0.26068108874161144, + "grad_norm": 260.0, + "learning_rate": 8.668340500532952e-05, + "loss": 13.063, + "step": 6254 + }, + { + "epoch": 0.26072277103913966, + "grad_norm": 174.0, + "learning_rate": 8.667881798090591e-05, + "loss": 10.7504, + "step": 6255 + }, + { + "epoch": 0.2607644533366679, + "grad_norm": 482.0, + "learning_rate": 8.667423028800761e-05, + "loss": 16.6256, + "step": 6256 + }, + { + "epoch": 0.26080613563419613, + "grad_norm": 308.0, + "learning_rate": 8.666964192671821e-05, + "loss": 14.3758, + "step": 6257 + }, + { + "epoch": 0.2608478179317244, + "grad_norm": 512.0, + "learning_rate": 8.66650528971213e-05, + "loss": 17.1263, + "step": 6258 + }, + { + "epoch": 0.2608895002292526, + "grad_norm": 688.0, + "learning_rate": 8.666046319930057e-05, + "loss": 20.8755, + "step": 6259 + }, + { + "epoch": 0.2609311825267809, + "grad_norm": 1008.0, + "learning_rate": 8.665587283333965e-05, + "loss": 24.6251, + "step": 6260 + }, + { + "epoch": 0.2609728648243091, + "grad_norm": 137.0, + "learning_rate": 8.665128179932218e-05, + "loss": 10.1878, + "step": 6261 + }, + { + "epoch": 0.26101454712183736, + "grad_norm": 296.0, + "learning_rate": 8.664669009733184e-05, + "loss": 13.0627, + "step": 6262 + }, + { + "epoch": 0.26105622941936557, + "grad_norm": 564.0, + "learning_rate": 8.664209772745233e-05, + "loss": 18.8751, + "step": 6263 + }, + { + "epoch": 0.26109791171689384, + "grad_norm": 336.0, + "learning_rate": 8.663750468976733e-05, + "loss": 12.8133, + "step": 6264 + }, + { + "epoch": 0.26113959401442205, + "grad_norm": 410.0, + "learning_rate": 8.663291098436057e-05, + "loss": 14.4378, + "step": 6265 + }, + { + "epoch": 0.2611812763119503, + "grad_norm": 324.0, + "learning_rate": 8.662831661131574e-05, + "loss": 13.6253, + "step": 6266 + }, + { + "epoch": 0.26122295860947853, + "grad_norm": 229.0, + "learning_rate": 8.662372157071659e-05, + "loss": 11.3752, + "step": 6267 + }, + { + "epoch": 0.2612646409070068, + "grad_norm": 222.0, + "learning_rate": 8.661912586264686e-05, + "loss": 9.9379, + "step": 6268 + }, + { + "epoch": 0.261306323204535, + "grad_norm": 394.0, + "learning_rate": 8.661452948719032e-05, + "loss": 13.377, + "step": 6269 + }, + { + "epoch": 0.2613480055020633, + "grad_norm": 310.0, + "learning_rate": 8.660993244443072e-05, + "loss": 14.2504, + "step": 6270 + }, + { + "epoch": 0.2613896877995915, + "grad_norm": 302.0, + "learning_rate": 8.660533473445187e-05, + "loss": 14.063, + "step": 6271 + }, + { + "epoch": 0.26143137009711975, + "grad_norm": 262.0, + "learning_rate": 8.660073635733752e-05, + "loss": 14.0022, + "step": 6272 + }, + { + "epoch": 0.261473052394648, + "grad_norm": 516.0, + "learning_rate": 8.659613731317152e-05, + "loss": 17.3752, + "step": 6273 + }, + { + "epoch": 0.26151473469217623, + "grad_norm": 235.0, + "learning_rate": 8.659153760203766e-05, + "loss": 11.3133, + "step": 6274 + }, + { + "epoch": 0.2615564169897045, + "grad_norm": 272.0, + "learning_rate": 8.658693722401979e-05, + "loss": 13.5628, + "step": 6275 + }, + { + "epoch": 0.2615980992872327, + "grad_norm": 384.0, + "learning_rate": 8.658233617920172e-05, + "loss": 15.0627, + "step": 6276 + }, + { + "epoch": 0.261639781584761, + "grad_norm": 101.5, + "learning_rate": 8.657773446766734e-05, + "loss": 8.8755, + "step": 6277 + }, + { + "epoch": 0.2616814638822892, + "grad_norm": 306.0, + "learning_rate": 8.65731320895005e-05, + "loss": 13.5004, + "step": 6278 + }, + { + "epoch": 0.26172314617981746, + "grad_norm": 404.0, + "learning_rate": 8.656852904478507e-05, + "loss": 16.6252, + "step": 6279 + }, + { + "epoch": 0.26176482847734567, + "grad_norm": 386.0, + "learning_rate": 8.656392533360495e-05, + "loss": 14.9377, + "step": 6280 + }, + { + "epoch": 0.26180651077487394, + "grad_norm": 216.0, + "learning_rate": 8.655932095604406e-05, + "loss": 10.0627, + "step": 6281 + }, + { + "epoch": 0.26184819307240215, + "grad_norm": 424.0, + "learning_rate": 8.655471591218632e-05, + "loss": 14.3128, + "step": 6282 + }, + { + "epoch": 0.2618898753699304, + "grad_norm": 50.75, + "learning_rate": 8.655011020211561e-05, + "loss": 6.7202, + "step": 6283 + }, + { + "epoch": 0.2619315576674586, + "grad_norm": 372.0, + "learning_rate": 8.65455038259159e-05, + "loss": 15.9377, + "step": 6284 + }, + { + "epoch": 0.2619732399649869, + "grad_norm": 107.0, + "learning_rate": 8.654089678367113e-05, + "loss": 6.8128, + "step": 6285 + }, + { + "epoch": 0.2620149222625151, + "grad_norm": 328.0, + "learning_rate": 8.653628907546528e-05, + "loss": 13.5627, + "step": 6286 + }, + { + "epoch": 0.26205660456004337, + "grad_norm": 360.0, + "learning_rate": 8.653168070138232e-05, + "loss": 14.5628, + "step": 6287 + }, + { + "epoch": 0.2620982868575716, + "grad_norm": 239.0, + "learning_rate": 8.652707166150624e-05, + "loss": 10.5007, + "step": 6288 + }, + { + "epoch": 0.26213996915509985, + "grad_norm": 214.0, + "learning_rate": 8.652246195592104e-05, + "loss": 11.6877, + "step": 6289 + }, + { + "epoch": 0.26218165145262806, + "grad_norm": 1280.0, + "learning_rate": 8.651785158471072e-05, + "loss": 24.7557, + "step": 6290 + }, + { + "epoch": 0.26222333375015633, + "grad_norm": 300.0, + "learning_rate": 8.651324054795931e-05, + "loss": 12.4377, + "step": 6291 + }, + { + "epoch": 0.26226501604768454, + "grad_norm": 532.0, + "learning_rate": 8.650862884575085e-05, + "loss": 18.1254, + "step": 6292 + }, + { + "epoch": 0.2623066983452128, + "grad_norm": 330.0, + "learning_rate": 8.65040164781694e-05, + "loss": 13.8127, + "step": 6293 + }, + { + "epoch": 0.262348380642741, + "grad_norm": 464.0, + "learning_rate": 8.6499403445299e-05, + "loss": 15.3128, + "step": 6294 + }, + { + "epoch": 0.2623900629402693, + "grad_norm": 255.0, + "learning_rate": 8.649478974722374e-05, + "loss": 10.938, + "step": 6295 + }, + { + "epoch": 0.2624317452377975, + "grad_norm": 924.0, + "learning_rate": 8.649017538402769e-05, + "loss": 24.8758, + "step": 6296 + }, + { + "epoch": 0.26247342753532577, + "grad_norm": 308.0, + "learning_rate": 8.648556035579495e-05, + "loss": 12.6877, + "step": 6297 + }, + { + "epoch": 0.262515109832854, + "grad_norm": 282.0, + "learning_rate": 8.648094466260964e-05, + "loss": 11.8753, + "step": 6298 + }, + { + "epoch": 0.26255679213038224, + "grad_norm": 320.0, + "learning_rate": 8.647632830455588e-05, + "loss": 13.3752, + "step": 6299 + }, + { + "epoch": 0.26259847442791046, + "grad_norm": 340.0, + "learning_rate": 8.647171128171778e-05, + "loss": 13.3129, + "step": 6300 + }, + { + "epoch": 0.2626401567254387, + "grad_norm": 404.0, + "learning_rate": 8.646709359417951e-05, + "loss": 14.4386, + "step": 6301 + }, + { + "epoch": 0.26268183902296693, + "grad_norm": 141.0, + "learning_rate": 8.646247524202524e-05, + "loss": 10.1883, + "step": 6302 + }, + { + "epoch": 0.2627235213204952, + "grad_norm": 336.0, + "learning_rate": 8.645785622533911e-05, + "loss": 14.1254, + "step": 6303 + }, + { + "epoch": 0.2627652036180234, + "grad_norm": 232.0, + "learning_rate": 8.645323654420532e-05, + "loss": 12.1254, + "step": 6304 + }, + { + "epoch": 0.2628068859155517, + "grad_norm": 394.0, + "learning_rate": 8.644861619870805e-05, + "loss": 14.1877, + "step": 6305 + }, + { + "epoch": 0.2628485682130799, + "grad_norm": 266.0, + "learning_rate": 8.644399518893152e-05, + "loss": 10.8135, + "step": 6306 + }, + { + "epoch": 0.26289025051060816, + "grad_norm": 227.0, + "learning_rate": 8.643937351495992e-05, + "loss": 12.7502, + "step": 6307 + }, + { + "epoch": 0.26293193280813637, + "grad_norm": 330.0, + "learning_rate": 8.643475117687753e-05, + "loss": 12.8757, + "step": 6308 + }, + { + "epoch": 0.26297361510566464, + "grad_norm": 127.0, + "learning_rate": 8.643012817476855e-05, + "loss": 9.5001, + "step": 6309 + }, + { + "epoch": 0.26301529740319285, + "grad_norm": 728.0, + "learning_rate": 8.642550450871727e-05, + "loss": 22.0016, + "step": 6310 + }, + { + "epoch": 0.2630569797007211, + "grad_norm": 1004.0, + "learning_rate": 8.642088017880792e-05, + "loss": 23.5002, + "step": 6311 + }, + { + "epoch": 0.26309866199824933, + "grad_norm": 836.0, + "learning_rate": 8.641625518512479e-05, + "loss": 24.3783, + "step": 6312 + }, + { + "epoch": 0.2631403442957776, + "grad_norm": 528.0, + "learning_rate": 8.641162952775219e-05, + "loss": 17.7502, + "step": 6313 + }, + { + "epoch": 0.2631820265933058, + "grad_norm": 194.0, + "learning_rate": 8.64070032067744e-05, + "loss": 8.6259, + "step": 6314 + }, + { + "epoch": 0.2632237088908341, + "grad_norm": 528.0, + "learning_rate": 8.640237622227576e-05, + "loss": 17.6253, + "step": 6315 + }, + { + "epoch": 0.2632653911883623, + "grad_norm": 169.0, + "learning_rate": 8.639774857434057e-05, + "loss": 10.6876, + "step": 6316 + }, + { + "epoch": 0.26330707348589055, + "grad_norm": 352.0, + "learning_rate": 8.639312026305318e-05, + "loss": 12.0015, + "step": 6317 + }, + { + "epoch": 0.26334875578341876, + "grad_norm": 1064.0, + "learning_rate": 8.638849128849795e-05, + "loss": 21.8803, + "step": 6318 + }, + { + "epoch": 0.26339043808094703, + "grad_norm": 384.0, + "learning_rate": 8.638386165075922e-05, + "loss": 15.6882, + "step": 6319 + }, + { + "epoch": 0.26343212037847524, + "grad_norm": 180.0, + "learning_rate": 8.637923134992139e-05, + "loss": 11.1252, + "step": 6320 + }, + { + "epoch": 0.2634738026760035, + "grad_norm": 440.0, + "learning_rate": 8.637460038606885e-05, + "loss": 15.4385, + "step": 6321 + }, + { + "epoch": 0.2635154849735317, + "grad_norm": 940.0, + "learning_rate": 8.636996875928598e-05, + "loss": 24.5007, + "step": 6322 + }, + { + "epoch": 0.26355716727106, + "grad_norm": 426.0, + "learning_rate": 8.63653364696572e-05, + "loss": 12.9378, + "step": 6323 + }, + { + "epoch": 0.2635988495685882, + "grad_norm": 298.0, + "learning_rate": 8.636070351726692e-05, + "loss": 14.3752, + "step": 6324 + }, + { + "epoch": 0.26364053186611647, + "grad_norm": 247.0, + "learning_rate": 8.635606990219963e-05, + "loss": 12.9377, + "step": 6325 + }, + { + "epoch": 0.2636822141636447, + "grad_norm": 494.0, + "learning_rate": 8.635143562453971e-05, + "loss": 17.7552, + "step": 6326 + }, + { + "epoch": 0.26372389646117295, + "grad_norm": 564.0, + "learning_rate": 8.634680068437166e-05, + "loss": 17.1257, + "step": 6327 + }, + { + "epoch": 0.26376557875870116, + "grad_norm": 360.0, + "learning_rate": 8.634216508177992e-05, + "loss": 14.2513, + "step": 6328 + }, + { + "epoch": 0.2638072610562294, + "grad_norm": 640.0, + "learning_rate": 8.633752881684902e-05, + "loss": 20.6253, + "step": 6329 + }, + { + "epoch": 0.26384894335375764, + "grad_norm": 264.0, + "learning_rate": 8.633289188966343e-05, + "loss": 12.0006, + "step": 6330 + }, + { + "epoch": 0.2638906256512859, + "grad_norm": 148.0, + "learning_rate": 8.632825430030764e-05, + "loss": 9.1251, + "step": 6331 + }, + { + "epoch": 0.2639323079488141, + "grad_norm": 69.0, + "learning_rate": 8.632361604886621e-05, + "loss": 9.0634, + "step": 6332 + }, + { + "epoch": 0.2639739902463424, + "grad_norm": 888.0, + "learning_rate": 8.631897713542364e-05, + "loss": 23.0047, + "step": 6333 + }, + { + "epoch": 0.2640156725438706, + "grad_norm": 239.0, + "learning_rate": 8.631433756006448e-05, + "loss": 12.251, + "step": 6334 + }, + { + "epoch": 0.26405735484139886, + "grad_norm": 227.0, + "learning_rate": 8.630969732287332e-05, + "loss": 11.3127, + "step": 6335 + }, + { + "epoch": 0.2640990371389271, + "grad_norm": 249.0, + "learning_rate": 8.630505642393468e-05, + "loss": 10.8755, + "step": 6336 + }, + { + "epoch": 0.26414071943645534, + "grad_norm": 264.0, + "learning_rate": 8.630041486333318e-05, + "loss": 12.3127, + "step": 6337 + }, + { + "epoch": 0.26418240173398355, + "grad_norm": 149.0, + "learning_rate": 8.629577264115338e-05, + "loss": 10.2509, + "step": 6338 + }, + { + "epoch": 0.2642240840315118, + "grad_norm": 196.0, + "learning_rate": 8.629112975747993e-05, + "loss": 9.6252, + "step": 6339 + }, + { + "epoch": 0.26426576632904003, + "grad_norm": 480.0, + "learning_rate": 8.628648621239739e-05, + "loss": 16.6266, + "step": 6340 + }, + { + "epoch": 0.2643074486265683, + "grad_norm": 880.0, + "learning_rate": 8.628184200599043e-05, + "loss": 24.2508, + "step": 6341 + }, + { + "epoch": 0.2643491309240965, + "grad_norm": 1472.0, + "learning_rate": 8.627719713834368e-05, + "loss": 26.8791, + "step": 6342 + }, + { + "epoch": 0.2643908132216248, + "grad_norm": 342.0, + "learning_rate": 8.627255160954178e-05, + "loss": 11.9377, + "step": 6343 + }, + { + "epoch": 0.264432495519153, + "grad_norm": 324.0, + "learning_rate": 8.626790541966942e-05, + "loss": 13.8752, + "step": 6344 + }, + { + "epoch": 0.26447417781668126, + "grad_norm": 716.0, + "learning_rate": 8.626325856881126e-05, + "loss": 21.1258, + "step": 6345 + }, + { + "epoch": 0.2645158601142095, + "grad_norm": 474.0, + "learning_rate": 8.625861105705199e-05, + "loss": 15.0664, + "step": 6346 + }, + { + "epoch": 0.26455754241173773, + "grad_norm": 112.0, + "learning_rate": 8.625396288447631e-05, + "loss": 9.1256, + "step": 6347 + }, + { + "epoch": 0.264599224709266, + "grad_norm": 296.0, + "learning_rate": 8.624931405116896e-05, + "loss": 13.0627, + "step": 6348 + }, + { + "epoch": 0.2646409070067942, + "grad_norm": 516.0, + "learning_rate": 8.624466455721462e-05, + "loss": 17.2505, + "step": 6349 + }, + { + "epoch": 0.2646825893043225, + "grad_norm": 402.0, + "learning_rate": 8.624001440269807e-05, + "loss": 16.0003, + "step": 6350 + }, + { + "epoch": 0.2647242716018507, + "grad_norm": 536.0, + "learning_rate": 8.623536358770402e-05, + "loss": 15.8755, + "step": 6351 + }, + { + "epoch": 0.26476595389937896, + "grad_norm": 486.0, + "learning_rate": 8.623071211231725e-05, + "loss": 16.5013, + "step": 6352 + }, + { + "epoch": 0.26480763619690717, + "grad_norm": 476.0, + "learning_rate": 8.622605997662257e-05, + "loss": 15.8751, + "step": 6353 + }, + { + "epoch": 0.26484931849443544, + "grad_norm": 924.0, + "learning_rate": 8.622140718070471e-05, + "loss": 23.0042, + "step": 6354 + }, + { + "epoch": 0.26489100079196365, + "grad_norm": 282.0, + "learning_rate": 8.621675372464848e-05, + "loss": 14.5014, + "step": 6355 + }, + { + "epoch": 0.2649326830894919, + "grad_norm": 398.0, + "learning_rate": 8.62120996085387e-05, + "loss": 15.3772, + "step": 6356 + }, + { + "epoch": 0.2649743653870201, + "grad_norm": 420.0, + "learning_rate": 8.62074448324602e-05, + "loss": 15.2503, + "step": 6357 + }, + { + "epoch": 0.2650160476845484, + "grad_norm": 724.0, + "learning_rate": 8.62027893964978e-05, + "loss": 19.3756, + "step": 6358 + }, + { + "epoch": 0.2650577299820766, + "grad_norm": 326.0, + "learning_rate": 8.619813330073634e-05, + "loss": 12.8128, + "step": 6359 + }, + { + "epoch": 0.2650994122796049, + "grad_norm": 174.0, + "learning_rate": 8.61934765452607e-05, + "loss": 9.439, + "step": 6360 + }, + { + "epoch": 0.2651410945771331, + "grad_norm": 446.0, + "learning_rate": 8.618881913015574e-05, + "loss": 15.0009, + "step": 6361 + }, + { + "epoch": 0.26518277687466135, + "grad_norm": 592.0, + "learning_rate": 8.618416105550633e-05, + "loss": 18.8752, + "step": 6362 + }, + { + "epoch": 0.26522445917218956, + "grad_norm": 398.0, + "learning_rate": 8.617950232139737e-05, + "loss": 14.3752, + "step": 6363 + }, + { + "epoch": 0.26526614146971783, + "grad_norm": 284.0, + "learning_rate": 8.617484292791377e-05, + "loss": 14.0011, + "step": 6364 + }, + { + "epoch": 0.26530782376724604, + "grad_norm": 374.0, + "learning_rate": 8.617018287514044e-05, + "loss": 13.8127, + "step": 6365 + }, + { + "epoch": 0.2653495060647743, + "grad_norm": 184.0, + "learning_rate": 8.616552216316234e-05, + "loss": 9.7503, + "step": 6366 + }, + { + "epoch": 0.2653911883623025, + "grad_norm": 320.0, + "learning_rate": 8.616086079206437e-05, + "loss": 12.3133, + "step": 6367 + }, + { + "epoch": 0.2654328706598308, + "grad_norm": 105.0, + "learning_rate": 8.615619876193151e-05, + "loss": 7.8754, + "step": 6368 + }, + { + "epoch": 0.265474552957359, + "grad_norm": 71.5, + "learning_rate": 8.61515360728487e-05, + "loss": 7.2817, + "step": 6369 + }, + { + "epoch": 0.26551623525488727, + "grad_norm": 1096.0, + "learning_rate": 8.614687272490096e-05, + "loss": 25.2511, + "step": 6370 + }, + { + "epoch": 0.2655579175524155, + "grad_norm": 492.0, + "learning_rate": 8.614220871817324e-05, + "loss": 17.5012, + "step": 6371 + }, + { + "epoch": 0.26559959984994375, + "grad_norm": 588.0, + "learning_rate": 8.613754405275057e-05, + "loss": 19.8758, + "step": 6372 + }, + { + "epoch": 0.26564128214747196, + "grad_norm": 540.0, + "learning_rate": 8.613287872871793e-05, + "loss": 18.0001, + "step": 6373 + }, + { + "epoch": 0.2656829644450002, + "grad_norm": 446.0, + "learning_rate": 8.612821274616038e-05, + "loss": 15.1264, + "step": 6374 + }, + { + "epoch": 0.26572464674252844, + "grad_norm": 256.0, + "learning_rate": 8.612354610516295e-05, + "loss": 12.7504, + "step": 6375 + }, + { + "epoch": 0.2657663290400567, + "grad_norm": 195.0, + "learning_rate": 8.611887880581069e-05, + "loss": 10.4378, + "step": 6376 + }, + { + "epoch": 0.2658080113375849, + "grad_norm": 432.0, + "learning_rate": 8.611421084818865e-05, + "loss": 14.1266, + "step": 6377 + }, + { + "epoch": 0.2658496936351132, + "grad_norm": 296.0, + "learning_rate": 8.61095422323819e-05, + "loss": 13.8753, + "step": 6378 + }, + { + "epoch": 0.2658913759326414, + "grad_norm": 908.0, + "learning_rate": 8.610487295847555e-05, + "loss": 24.8752, + "step": 6379 + }, + { + "epoch": 0.26593305823016966, + "grad_norm": 250.0, + "learning_rate": 8.610020302655468e-05, + "loss": 12.0005, + "step": 6380 + }, + { + "epoch": 0.2659747405276979, + "grad_norm": 208.0, + "learning_rate": 8.609553243670441e-05, + "loss": 12.1267, + "step": 6381 + }, + { + "epoch": 0.26601642282522614, + "grad_norm": 1592.0, + "learning_rate": 8.609086118900986e-05, + "loss": 36.7513, + "step": 6382 + }, + { + "epoch": 0.26605810512275435, + "grad_norm": 221.0, + "learning_rate": 8.608618928355616e-05, + "loss": 11.1253, + "step": 6383 + }, + { + "epoch": 0.2660997874202826, + "grad_norm": 584.0, + "learning_rate": 8.608151672042845e-05, + "loss": 18.2518, + "step": 6384 + }, + { + "epoch": 0.26614146971781083, + "grad_norm": 235.0, + "learning_rate": 8.60768434997119e-05, + "loss": 11.5006, + "step": 6385 + }, + { + "epoch": 0.2661831520153391, + "grad_norm": 536.0, + "learning_rate": 8.607216962149167e-05, + "loss": 19.7503, + "step": 6386 + }, + { + "epoch": 0.2662248343128673, + "grad_norm": 1392.0, + "learning_rate": 8.606749508585294e-05, + "loss": 28.6316, + "step": 6387 + }, + { + "epoch": 0.2662665166103956, + "grad_norm": 880.0, + "learning_rate": 8.606281989288093e-05, + "loss": 22.2505, + "step": 6388 + }, + { + "epoch": 0.2663081989079238, + "grad_norm": 72.5, + "learning_rate": 8.605814404266081e-05, + "loss": 7.6884, + "step": 6389 + }, + { + "epoch": 0.26634988120545205, + "grad_norm": 512.0, + "learning_rate": 8.605346753527784e-05, + "loss": 18.5002, + "step": 6390 + }, + { + "epoch": 0.26639156350298027, + "grad_norm": 312.0, + "learning_rate": 8.604879037081719e-05, + "loss": 14.5627, + "step": 6391 + }, + { + "epoch": 0.26643324580050853, + "grad_norm": 236.0, + "learning_rate": 8.604411254936415e-05, + "loss": 12.0002, + "step": 6392 + }, + { + "epoch": 0.26647492809803675, + "grad_norm": 548.0, + "learning_rate": 8.603943407100395e-05, + "loss": 17.2502, + "step": 6393 + }, + { + "epoch": 0.266516610395565, + "grad_norm": 348.0, + "learning_rate": 8.603475493582187e-05, + "loss": 14.9378, + "step": 6394 + }, + { + "epoch": 0.2665582926930932, + "grad_norm": 65.0, + "learning_rate": 8.603007514390319e-05, + "loss": 6.6881, + "step": 6395 + }, + { + "epoch": 0.2665999749906215, + "grad_norm": 490.0, + "learning_rate": 8.602539469533318e-05, + "loss": 18.1252, + "step": 6396 + }, + { + "epoch": 0.2666416572881497, + "grad_norm": 132.0, + "learning_rate": 8.602071359019717e-05, + "loss": 8.563, + "step": 6397 + }, + { + "epoch": 0.26668333958567797, + "grad_norm": 245.0, + "learning_rate": 8.601603182858045e-05, + "loss": 12.0636, + "step": 6398 + }, + { + "epoch": 0.2667250218832062, + "grad_norm": 400.0, + "learning_rate": 8.601134941056834e-05, + "loss": 16.3752, + "step": 6399 + }, + { + "epoch": 0.26676670418073445, + "grad_norm": 572.0, + "learning_rate": 8.60066663362462e-05, + "loss": 18.2502, + "step": 6400 + }, + { + "epoch": 0.26680838647826266, + "grad_norm": 432.0, + "learning_rate": 8.600198260569937e-05, + "loss": 14.1258, + "step": 6401 + }, + { + "epoch": 0.2668500687757909, + "grad_norm": 640.0, + "learning_rate": 8.599729821901321e-05, + "loss": 16.8753, + "step": 6402 + }, + { + "epoch": 0.26689175107331914, + "grad_norm": 284.0, + "learning_rate": 8.59926131762731e-05, + "loss": 12.8127, + "step": 6403 + }, + { + "epoch": 0.2669334333708474, + "grad_norm": 188.0, + "learning_rate": 8.598792747756441e-05, + "loss": 10.9379, + "step": 6404 + }, + { + "epoch": 0.2669751156683756, + "grad_norm": 266.0, + "learning_rate": 8.598324112297256e-05, + "loss": 13.188, + "step": 6405 + }, + { + "epoch": 0.2670167979659039, + "grad_norm": 398.0, + "learning_rate": 8.597855411258293e-05, + "loss": 14.3128, + "step": 6406 + }, + { + "epoch": 0.2670584802634321, + "grad_norm": 220.0, + "learning_rate": 8.597386644648097e-05, + "loss": 10.1881, + "step": 6407 + }, + { + "epoch": 0.26710016256096036, + "grad_norm": 171.0, + "learning_rate": 8.59691781247521e-05, + "loss": 9.563, + "step": 6408 + }, + { + "epoch": 0.2671418448584886, + "grad_norm": 318.0, + "learning_rate": 8.596448914748176e-05, + "loss": 13.2502, + "step": 6409 + }, + { + "epoch": 0.26718352715601684, + "grad_norm": 366.0, + "learning_rate": 8.595979951475541e-05, + "loss": 13.3127, + "step": 6410 + }, + { + "epoch": 0.26722520945354505, + "grad_norm": 358.0, + "learning_rate": 8.595510922665852e-05, + "loss": 14.5629, + "step": 6411 + }, + { + "epoch": 0.2672668917510733, + "grad_norm": 328.0, + "learning_rate": 8.595041828327657e-05, + "loss": 13.8754, + "step": 6412 + }, + { + "epoch": 0.26730857404860153, + "grad_norm": 416.0, + "learning_rate": 8.594572668469508e-05, + "loss": 14.5003, + "step": 6413 + }, + { + "epoch": 0.2673502563461298, + "grad_norm": 254.0, + "learning_rate": 8.594103443099951e-05, + "loss": 12.3751, + "step": 6414 + }, + { + "epoch": 0.267391938643658, + "grad_norm": 272.0, + "learning_rate": 8.593634152227541e-05, + "loss": 12.1884, + "step": 6415 + }, + { + "epoch": 0.2674336209411863, + "grad_norm": 242.0, + "learning_rate": 8.593164795860829e-05, + "loss": 12.5008, + "step": 6416 + }, + { + "epoch": 0.2674753032387145, + "grad_norm": 243.0, + "learning_rate": 8.592695374008368e-05, + "loss": 12.0629, + "step": 6417 + }, + { + "epoch": 0.26751698553624276, + "grad_norm": 564.0, + "learning_rate": 8.592225886678716e-05, + "loss": 19.8752, + "step": 6418 + }, + { + "epoch": 0.267558667833771, + "grad_norm": 378.0, + "learning_rate": 8.591756333880429e-05, + "loss": 12.6252, + "step": 6419 + }, + { + "epoch": 0.26760035013129924, + "grad_norm": 332.0, + "learning_rate": 8.591286715622062e-05, + "loss": 12.2502, + "step": 6420 + }, + { + "epoch": 0.2676420324288275, + "grad_norm": 548.0, + "learning_rate": 8.590817031912178e-05, + "loss": 18.2512, + "step": 6421 + }, + { + "epoch": 0.2676837147263557, + "grad_norm": 432.0, + "learning_rate": 8.590347282759334e-05, + "loss": 14.4378, + "step": 6422 + }, + { + "epoch": 0.267725397023884, + "grad_norm": 422.0, + "learning_rate": 8.589877468172092e-05, + "loss": 15.7503, + "step": 6423 + }, + { + "epoch": 0.2677670793214122, + "grad_norm": 428.0, + "learning_rate": 8.589407588159016e-05, + "loss": 15.1253, + "step": 6424 + }, + { + "epoch": 0.26780876161894046, + "grad_norm": 223.0, + "learning_rate": 8.588937642728664e-05, + "loss": 11.8752, + "step": 6425 + }, + { + "epoch": 0.2678504439164687, + "grad_norm": 228.0, + "learning_rate": 8.588467631889609e-05, + "loss": 11.1252, + "step": 6426 + }, + { + "epoch": 0.26789212621399694, + "grad_norm": 474.0, + "learning_rate": 8.58799755565041e-05, + "loss": 17.2515, + "step": 6427 + }, + { + "epoch": 0.26793380851152515, + "grad_norm": 352.0, + "learning_rate": 8.587527414019641e-05, + "loss": 14.0633, + "step": 6428 + }, + { + "epoch": 0.2679754908090534, + "grad_norm": 444.0, + "learning_rate": 8.587057207005862e-05, + "loss": 17.5002, + "step": 6429 + }, + { + "epoch": 0.26801717310658163, + "grad_norm": 360.0, + "learning_rate": 8.586586934617649e-05, + "loss": 16.1259, + "step": 6430 + }, + { + "epoch": 0.2680588554041099, + "grad_norm": 608.0, + "learning_rate": 8.586116596863571e-05, + "loss": 17.7547, + "step": 6431 + }, + { + "epoch": 0.2681005377016381, + "grad_norm": 292.0, + "learning_rate": 8.585646193752199e-05, + "loss": 12.0627, + "step": 6432 + }, + { + "epoch": 0.2681422199991664, + "grad_norm": 1360.0, + "learning_rate": 8.585175725292107e-05, + "loss": 34.2516, + "step": 6433 + }, + { + "epoch": 0.2681839022966946, + "grad_norm": 620.0, + "learning_rate": 8.584705191491869e-05, + "loss": 18.1256, + "step": 6434 + }, + { + "epoch": 0.26822558459422285, + "grad_norm": 148.0, + "learning_rate": 8.58423459236006e-05, + "loss": 8.5003, + "step": 6435 + }, + { + "epoch": 0.26826726689175107, + "grad_norm": 314.0, + "learning_rate": 8.58376392790526e-05, + "loss": 13.314, + "step": 6436 + }, + { + "epoch": 0.26830894918927933, + "grad_norm": 1012.0, + "learning_rate": 8.583293198136041e-05, + "loss": 25.128, + "step": 6437 + }, + { + "epoch": 0.26835063148680754, + "grad_norm": 364.0, + "learning_rate": 8.582822403060986e-05, + "loss": 15.1879, + "step": 6438 + }, + { + "epoch": 0.2683923137843358, + "grad_norm": 234.0, + "learning_rate": 8.582351542688675e-05, + "loss": 11.4382, + "step": 6439 + }, + { + "epoch": 0.268433996081864, + "grad_norm": 308.0, + "learning_rate": 8.581880617027691e-05, + "loss": 12.7513, + "step": 6440 + }, + { + "epoch": 0.2684756783793923, + "grad_norm": 300.0, + "learning_rate": 8.581409626086614e-05, + "loss": 13.6882, + "step": 6441 + }, + { + "epoch": 0.2685173606769205, + "grad_norm": 378.0, + "learning_rate": 8.580938569874027e-05, + "loss": 15.9386, + "step": 6442 + }, + { + "epoch": 0.26855904297444877, + "grad_norm": 784.0, + "learning_rate": 8.580467448398516e-05, + "loss": 22.1252, + "step": 6443 + }, + { + "epoch": 0.268600725271977, + "grad_norm": 255.0, + "learning_rate": 8.579996261668672e-05, + "loss": 12.1878, + "step": 6444 + }, + { + "epoch": 0.26864240756950525, + "grad_norm": 576.0, + "learning_rate": 8.579525009693074e-05, + "loss": 18.0007, + "step": 6445 + }, + { + "epoch": 0.26868408986703346, + "grad_norm": 432.0, + "learning_rate": 8.579053692480318e-05, + "loss": 16.8754, + "step": 6446 + }, + { + "epoch": 0.2687257721645617, + "grad_norm": 119.0, + "learning_rate": 8.57858231003899e-05, + "loss": 8.3759, + "step": 6447 + }, + { + "epoch": 0.26876745446208994, + "grad_norm": 464.0, + "learning_rate": 8.57811086237768e-05, + "loss": 17.3756, + "step": 6448 + }, + { + "epoch": 0.2688091367596182, + "grad_norm": 238.0, + "learning_rate": 8.577639349504983e-05, + "loss": 11.8753, + "step": 6449 + }, + { + "epoch": 0.2688508190571464, + "grad_norm": 193.0, + "learning_rate": 8.577167771429492e-05, + "loss": 11.0636, + "step": 6450 + }, + { + "epoch": 0.2688925013546747, + "grad_norm": 362.0, + "learning_rate": 8.5766961281598e-05, + "loss": 13.9378, + "step": 6451 + }, + { + "epoch": 0.2689341836522029, + "grad_norm": 330.0, + "learning_rate": 8.576224419704504e-05, + "loss": 13.6257, + "step": 6452 + }, + { + "epoch": 0.26897586594973116, + "grad_norm": 87.0, + "learning_rate": 8.575752646072201e-05, + "loss": 8.3755, + "step": 6453 + }, + { + "epoch": 0.2690175482472594, + "grad_norm": 520.0, + "learning_rate": 8.575280807271488e-05, + "loss": 18.1252, + "step": 6454 + }, + { + "epoch": 0.26905923054478764, + "grad_norm": 1536.0, + "learning_rate": 8.574808903310964e-05, + "loss": 34.7503, + "step": 6455 + }, + { + "epoch": 0.26910091284231585, + "grad_norm": 736.0, + "learning_rate": 8.57433693419923e-05, + "loss": 21.3766, + "step": 6456 + }, + { + "epoch": 0.2691425951398441, + "grad_norm": 46.25, + "learning_rate": 8.573864899944891e-05, + "loss": 7.5007, + "step": 6457 + }, + { + "epoch": 0.26918427743737233, + "grad_norm": 270.0, + "learning_rate": 8.573392800556545e-05, + "loss": 11.8754, + "step": 6458 + }, + { + "epoch": 0.2692259597349006, + "grad_norm": 143.0, + "learning_rate": 8.572920636042798e-05, + "loss": 7.2195, + "step": 6459 + }, + { + "epoch": 0.2692676420324288, + "grad_norm": 1032.0, + "learning_rate": 8.572448406412255e-05, + "loss": 26.6281, + "step": 6460 + }, + { + "epoch": 0.2693093243299571, + "grad_norm": 86.0, + "learning_rate": 8.571976111673523e-05, + "loss": 8.3129, + "step": 6461 + }, + { + "epoch": 0.2693510066274853, + "grad_norm": 158.0, + "learning_rate": 8.571503751835209e-05, + "loss": 10.8128, + "step": 6462 + }, + { + "epoch": 0.26939268892501356, + "grad_norm": 482.0, + "learning_rate": 8.571031326905923e-05, + "loss": 17.8752, + "step": 6463 + }, + { + "epoch": 0.26943437122254177, + "grad_norm": 506.0, + "learning_rate": 8.570558836894274e-05, + "loss": 16.3753, + "step": 6464 + }, + { + "epoch": 0.26947605352007004, + "grad_norm": 79.5, + "learning_rate": 8.570086281808871e-05, + "loss": 8.7502, + "step": 6465 + }, + { + "epoch": 0.26951773581759825, + "grad_norm": 67.5, + "learning_rate": 8.569613661658331e-05, + "loss": 8.6878, + "step": 6466 + }, + { + "epoch": 0.2695594181151265, + "grad_norm": 482.0, + "learning_rate": 8.569140976451265e-05, + "loss": 19.251, + "step": 6467 + }, + { + "epoch": 0.2696011004126547, + "grad_norm": 406.0, + "learning_rate": 8.568668226196286e-05, + "loss": 16.5003, + "step": 6468 + }, + { + "epoch": 0.269642782710183, + "grad_norm": 444.0, + "learning_rate": 8.568195410902014e-05, + "loss": 17.0006, + "step": 6469 + }, + { + "epoch": 0.2696844650077112, + "grad_norm": 134.0, + "learning_rate": 8.567722530577062e-05, + "loss": 5.5316, + "step": 6470 + }, + { + "epoch": 0.26972614730523947, + "grad_norm": 668.0, + "learning_rate": 8.56724958523005e-05, + "loss": 21.0009, + "step": 6471 + }, + { + "epoch": 0.2697678296027677, + "grad_norm": 556.0, + "learning_rate": 8.5667765748696e-05, + "loss": 15.0653, + "step": 6472 + }, + { + "epoch": 0.26980951190029595, + "grad_norm": 888.0, + "learning_rate": 8.566303499504329e-05, + "loss": 24.2505, + "step": 6473 + }, + { + "epoch": 0.26985119419782416, + "grad_norm": 282.0, + "learning_rate": 8.56583035914286e-05, + "loss": 11.7504, + "step": 6474 + }, + { + "epoch": 0.26989287649535243, + "grad_norm": 255.0, + "learning_rate": 8.565357153793815e-05, + "loss": 12.1876, + "step": 6475 + }, + { + "epoch": 0.26993455879288064, + "grad_norm": 166.0, + "learning_rate": 8.564883883465822e-05, + "loss": 10.5629, + "step": 6476 + }, + { + "epoch": 0.2699762410904089, + "grad_norm": 211.0, + "learning_rate": 8.564410548167503e-05, + "loss": 11.7509, + "step": 6477 + }, + { + "epoch": 0.2700179233879371, + "grad_norm": 272.0, + "learning_rate": 8.563937147907483e-05, + "loss": 12.0642, + "step": 6478 + }, + { + "epoch": 0.2700596056854654, + "grad_norm": 410.0, + "learning_rate": 8.563463682694395e-05, + "loss": 13.0629, + "step": 6479 + }, + { + "epoch": 0.2701012879829936, + "grad_norm": 318.0, + "learning_rate": 8.562990152536864e-05, + "loss": 13.8752, + "step": 6480 + }, + { + "epoch": 0.27014297028052187, + "grad_norm": 272.0, + "learning_rate": 8.56251655744352e-05, + "loss": 12.5006, + "step": 6481 + }, + { + "epoch": 0.2701846525780501, + "grad_norm": 348.0, + "learning_rate": 8.562042897422997e-05, + "loss": 14.9379, + "step": 6482 + }, + { + "epoch": 0.27022633487557834, + "grad_norm": 352.0, + "learning_rate": 8.561569172483926e-05, + "loss": 12.9378, + "step": 6483 + }, + { + "epoch": 0.27026801717310656, + "grad_norm": 458.0, + "learning_rate": 8.561095382634941e-05, + "loss": 14.8756, + "step": 6484 + }, + { + "epoch": 0.2703096994706348, + "grad_norm": 680.0, + "learning_rate": 8.560621527884674e-05, + "loss": 16.8797, + "step": 6485 + }, + { + "epoch": 0.27035138176816303, + "grad_norm": 416.0, + "learning_rate": 8.560147608241767e-05, + "loss": 16.376, + "step": 6486 + }, + { + "epoch": 0.2703930640656913, + "grad_norm": 159.0, + "learning_rate": 8.55967362371485e-05, + "loss": 9.9385, + "step": 6487 + }, + { + "epoch": 0.2704347463632195, + "grad_norm": 290.0, + "learning_rate": 8.559199574312569e-05, + "loss": 12.6256, + "step": 6488 + }, + { + "epoch": 0.2704764286607478, + "grad_norm": 187.0, + "learning_rate": 8.558725460043557e-05, + "loss": 6.8446, + "step": 6489 + }, + { + "epoch": 0.27051811095827605, + "grad_norm": 153.0, + "learning_rate": 8.558251280916458e-05, + "loss": 10.5007, + "step": 6490 + }, + { + "epoch": 0.27055979325580426, + "grad_norm": 382.0, + "learning_rate": 8.557777036939916e-05, + "loss": 15.4377, + "step": 6491 + }, + { + "epoch": 0.2706014755533325, + "grad_norm": 44.5, + "learning_rate": 8.557302728122569e-05, + "loss": 7.4385, + "step": 6492 + }, + { + "epoch": 0.27064315785086074, + "grad_norm": 298.0, + "learning_rate": 8.556828354473064e-05, + "loss": 12.3755, + "step": 6493 + }, + { + "epoch": 0.270684840148389, + "grad_norm": 1448.0, + "learning_rate": 8.556353916000048e-05, + "loss": 35.5017, + "step": 6494 + }, + { + "epoch": 0.2707265224459172, + "grad_norm": 382.0, + "learning_rate": 8.555879412712164e-05, + "loss": 14.6254, + "step": 6495 + }, + { + "epoch": 0.2707682047434455, + "grad_norm": 392.0, + "learning_rate": 8.555404844618065e-05, + "loss": 13.6254, + "step": 6496 + }, + { + "epoch": 0.2708098870409737, + "grad_norm": 245.0, + "learning_rate": 8.554930211726395e-05, + "loss": 11.5627, + "step": 6497 + }, + { + "epoch": 0.27085156933850196, + "grad_norm": 692.0, + "learning_rate": 8.554455514045808e-05, + "loss": 21.5004, + "step": 6498 + }, + { + "epoch": 0.2708932516360302, + "grad_norm": 478.0, + "learning_rate": 8.553980751584953e-05, + "loss": 17.1258, + "step": 6499 + }, + { + "epoch": 0.27093493393355844, + "grad_norm": 221.0, + "learning_rate": 8.553505924352481e-05, + "loss": 10.6253, + "step": 6500 + }, + { + "epoch": 0.27097661623108665, + "grad_norm": 748.0, + "learning_rate": 8.55303103235705e-05, + "loss": 21.2513, + "step": 6501 + }, + { + "epoch": 0.2710182985286149, + "grad_norm": 384.0, + "learning_rate": 8.552556075607315e-05, + "loss": 14.3131, + "step": 6502 + }, + { + "epoch": 0.27105998082614313, + "grad_norm": 328.0, + "learning_rate": 8.552081054111927e-05, + "loss": 12.6253, + "step": 6503 + }, + { + "epoch": 0.2711016631236714, + "grad_norm": 684.0, + "learning_rate": 8.551605967879547e-05, + "loss": 19.7502, + "step": 6504 + }, + { + "epoch": 0.2711433454211996, + "grad_norm": 512.0, + "learning_rate": 8.551130816918836e-05, + "loss": 18.2503, + "step": 6505 + }, + { + "epoch": 0.2711850277187279, + "grad_norm": 156.0, + "learning_rate": 8.550655601238447e-05, + "loss": 9.5004, + "step": 6506 + }, + { + "epoch": 0.2712267100162561, + "grad_norm": 111.0, + "learning_rate": 8.550180320847046e-05, + "loss": 9.5015, + "step": 6507 + }, + { + "epoch": 0.27126839231378436, + "grad_norm": 620.0, + "learning_rate": 8.549704975753292e-05, + "loss": 24.8763, + "step": 6508 + }, + { + "epoch": 0.27131007461131257, + "grad_norm": 159.0, + "learning_rate": 8.549229565965851e-05, + "loss": 11.1255, + "step": 6509 + }, + { + "epoch": 0.27135175690884084, + "grad_norm": 158.0, + "learning_rate": 8.548754091493387e-05, + "loss": 9.9378, + "step": 6510 + }, + { + "epoch": 0.27139343920636905, + "grad_norm": 402.0, + "learning_rate": 8.548278552344563e-05, + "loss": 14.4378, + "step": 6511 + }, + { + "epoch": 0.2714351215038973, + "grad_norm": 624.0, + "learning_rate": 8.547802948528048e-05, + "loss": 17.7501, + "step": 6512 + }, + { + "epoch": 0.2714768038014255, + "grad_norm": 344.0, + "learning_rate": 8.547327280052509e-05, + "loss": 15.4378, + "step": 6513 + }, + { + "epoch": 0.2715184860989538, + "grad_norm": 276.0, + "learning_rate": 8.546851546926615e-05, + "loss": 10.9383, + "step": 6514 + }, + { + "epoch": 0.271560168396482, + "grad_norm": 342.0, + "learning_rate": 8.546375749159039e-05, + "loss": 14.1881, + "step": 6515 + }, + { + "epoch": 0.27160185069401027, + "grad_norm": 494.0, + "learning_rate": 8.545899886758448e-05, + "loss": 16.6252, + "step": 6516 + }, + { + "epoch": 0.2716435329915385, + "grad_norm": 298.0, + "learning_rate": 8.545423959733519e-05, + "loss": 13.1252, + "step": 6517 + }, + { + "epoch": 0.27168521528906675, + "grad_norm": 161.0, + "learning_rate": 8.54494796809292e-05, + "loss": 10.3128, + "step": 6518 + }, + { + "epoch": 0.27172689758659496, + "grad_norm": 600.0, + "learning_rate": 8.544471911845332e-05, + "loss": 21.8753, + "step": 6519 + }, + { + "epoch": 0.27176857988412323, + "grad_norm": 412.0, + "learning_rate": 8.543995790999428e-05, + "loss": 14.8755, + "step": 6520 + }, + { + "epoch": 0.27181026218165144, + "grad_norm": 424.0, + "learning_rate": 8.543519605563887e-05, + "loss": 15.0001, + "step": 6521 + }, + { + "epoch": 0.2718519444791797, + "grad_norm": 69.0, + "learning_rate": 8.543043355547387e-05, + "loss": 8.1256, + "step": 6522 + }, + { + "epoch": 0.2718936267767079, + "grad_norm": 84.5, + "learning_rate": 8.542567040958604e-05, + "loss": 6.4377, + "step": 6523 + }, + { + "epoch": 0.2719353090742362, + "grad_norm": 556.0, + "learning_rate": 8.542090661806226e-05, + "loss": 18.2505, + "step": 6524 + }, + { + "epoch": 0.2719769913717644, + "grad_norm": 286.0, + "learning_rate": 8.54161421809893e-05, + "loss": 12.3753, + "step": 6525 + }, + { + "epoch": 0.27201867366929267, + "grad_norm": 442.0, + "learning_rate": 8.5411377098454e-05, + "loss": 15.5628, + "step": 6526 + }, + { + "epoch": 0.2720603559668209, + "grad_norm": 75.0, + "learning_rate": 8.540661137054321e-05, + "loss": 6.8752, + "step": 6527 + }, + { + "epoch": 0.27210203826434914, + "grad_norm": 588.0, + "learning_rate": 8.540184499734379e-05, + "loss": 19.7503, + "step": 6528 + }, + { + "epoch": 0.27214372056187736, + "grad_norm": 652.0, + "learning_rate": 8.53970779789426e-05, + "loss": 21.2509, + "step": 6529 + }, + { + "epoch": 0.2721854028594056, + "grad_norm": 142.0, + "learning_rate": 8.539231031542651e-05, + "loss": 9.3755, + "step": 6530 + }, + { + "epoch": 0.27222708515693383, + "grad_norm": 217.0, + "learning_rate": 8.538754200688244e-05, + "loss": 11.8755, + "step": 6531 + }, + { + "epoch": 0.2722687674544621, + "grad_norm": 716.0, + "learning_rate": 8.538277305339726e-05, + "loss": 21.5003, + "step": 6532 + }, + { + "epoch": 0.2723104497519903, + "grad_norm": 93.5, + "learning_rate": 8.53780034550579e-05, + "loss": 8.0003, + "step": 6533 + }, + { + "epoch": 0.2723521320495186, + "grad_norm": 368.0, + "learning_rate": 8.537323321195131e-05, + "loss": 15.0658, + "step": 6534 + }, + { + "epoch": 0.2723938143470468, + "grad_norm": 796.0, + "learning_rate": 8.536846232416438e-05, + "loss": 20.6291, + "step": 6535 + }, + { + "epoch": 0.27243549664457506, + "grad_norm": 228.0, + "learning_rate": 8.53636907917841e-05, + "loss": 11.7503, + "step": 6536 + }, + { + "epoch": 0.27247717894210327, + "grad_norm": 572.0, + "learning_rate": 8.535891861489741e-05, + "loss": 18.6251, + "step": 6537 + }, + { + "epoch": 0.27251886123963154, + "grad_norm": 416.0, + "learning_rate": 8.53541457935913e-05, + "loss": 14.1253, + "step": 6538 + }, + { + "epoch": 0.27256054353715975, + "grad_norm": 296.0, + "learning_rate": 8.534937232795273e-05, + "loss": 13.3752, + "step": 6539 + }, + { + "epoch": 0.272602225834688, + "grad_norm": 452.0, + "learning_rate": 8.534459821806871e-05, + "loss": 11.44, + "step": 6540 + }, + { + "epoch": 0.27264390813221623, + "grad_norm": 62.75, + "learning_rate": 8.533982346402625e-05, + "loss": 7.6254, + "step": 6541 + }, + { + "epoch": 0.2726855904297445, + "grad_norm": 532.0, + "learning_rate": 8.533504806591237e-05, + "loss": 17.3757, + "step": 6542 + }, + { + "epoch": 0.2727272727272727, + "grad_norm": 113.5, + "learning_rate": 8.533027202381412e-05, + "loss": 9.0633, + "step": 6543 + }, + { + "epoch": 0.272768955024801, + "grad_norm": 398.0, + "learning_rate": 8.53254953378185e-05, + "loss": 15.8755, + "step": 6544 + }, + { + "epoch": 0.2728106373223292, + "grad_norm": 310.0, + "learning_rate": 8.53207180080126e-05, + "loss": 13.0003, + "step": 6545 + }, + { + "epoch": 0.27285231961985745, + "grad_norm": 496.0, + "learning_rate": 8.531594003448349e-05, + "loss": 16.8753, + "step": 6546 + }, + { + "epoch": 0.27289400191738566, + "grad_norm": 206.0, + "learning_rate": 8.531116141731823e-05, + "loss": 11.5007, + "step": 6547 + }, + { + "epoch": 0.27293568421491393, + "grad_norm": 648.0, + "learning_rate": 8.530638215660391e-05, + "loss": 18.5053, + "step": 6548 + }, + { + "epoch": 0.27297736651244214, + "grad_norm": 250.0, + "learning_rate": 8.530160225242767e-05, + "loss": 12.5627, + "step": 6549 + }, + { + "epoch": 0.2730190488099704, + "grad_norm": 214.0, + "learning_rate": 8.529682170487656e-05, + "loss": 10.5004, + "step": 6550 + }, + { + "epoch": 0.2730607311074986, + "grad_norm": 254.0, + "learning_rate": 8.529204051403776e-05, + "loss": 11.0627, + "step": 6551 + }, + { + "epoch": 0.2731024134050269, + "grad_norm": 482.0, + "learning_rate": 8.528725867999839e-05, + "loss": 17.3786, + "step": 6552 + }, + { + "epoch": 0.2731440957025551, + "grad_norm": 466.0, + "learning_rate": 8.528247620284559e-05, + "loss": 16.6252, + "step": 6553 + }, + { + "epoch": 0.27318577800008337, + "grad_norm": 185.0, + "learning_rate": 8.527769308266654e-05, + "loss": 10.8145, + "step": 6554 + }, + { + "epoch": 0.2732274602976116, + "grad_norm": 244.0, + "learning_rate": 8.527290931954839e-05, + "loss": 13.0634, + "step": 6555 + }, + { + "epoch": 0.27326914259513985, + "grad_norm": 167.0, + "learning_rate": 8.526812491357834e-05, + "loss": 10.3132, + "step": 6556 + }, + { + "epoch": 0.27331082489266806, + "grad_norm": 165.0, + "learning_rate": 8.526333986484358e-05, + "loss": 9.9379, + "step": 6557 + }, + { + "epoch": 0.2733525071901963, + "grad_norm": 362.0, + "learning_rate": 8.525855417343132e-05, + "loss": 13.6259, + "step": 6558 + }, + { + "epoch": 0.27339418948772454, + "grad_norm": 588.0, + "learning_rate": 8.525376783942879e-05, + "loss": 18.5004, + "step": 6559 + }, + { + "epoch": 0.2734358717852528, + "grad_norm": 189.0, + "learning_rate": 8.524898086292321e-05, + "loss": 9.8763, + "step": 6560 + }, + { + "epoch": 0.273477554082781, + "grad_norm": 616.0, + "learning_rate": 8.524419324400181e-05, + "loss": 20.3755, + "step": 6561 + }, + { + "epoch": 0.2735192363803093, + "grad_norm": 237.0, + "learning_rate": 8.523940498275187e-05, + "loss": 11.5626, + "step": 6562 + }, + { + "epoch": 0.27356091867783755, + "grad_norm": 258.0, + "learning_rate": 8.523461607926064e-05, + "loss": 12.6256, + "step": 6563 + }, + { + "epoch": 0.27360260097536576, + "grad_norm": 376.0, + "learning_rate": 8.522982653361541e-05, + "loss": 13.5002, + "step": 6564 + }, + { + "epoch": 0.27364428327289403, + "grad_norm": 528.0, + "learning_rate": 8.522503634590347e-05, + "loss": 18.5008, + "step": 6565 + }, + { + "epoch": 0.27368596557042224, + "grad_norm": 520.0, + "learning_rate": 8.522024551621211e-05, + "loss": 16.7503, + "step": 6566 + }, + { + "epoch": 0.2737276478679505, + "grad_norm": 640.0, + "learning_rate": 8.521545404462865e-05, + "loss": 20.3751, + "step": 6567 + }, + { + "epoch": 0.2737693301654787, + "grad_norm": 63.0, + "learning_rate": 8.52106619312404e-05, + "loss": 7.0943, + "step": 6568 + }, + { + "epoch": 0.273811012463007, + "grad_norm": 153.0, + "learning_rate": 8.520586917613473e-05, + "loss": 11.063, + "step": 6569 + }, + { + "epoch": 0.2738526947605352, + "grad_norm": 840.0, + "learning_rate": 8.520107577939896e-05, + "loss": 22.0054, + "step": 6570 + }, + { + "epoch": 0.27389437705806347, + "grad_norm": 242.0, + "learning_rate": 8.519628174112047e-05, + "loss": 11.4378, + "step": 6571 + }, + { + "epoch": 0.2739360593555917, + "grad_norm": 448.0, + "learning_rate": 8.51914870613866e-05, + "loss": 16.3754, + "step": 6572 + }, + { + "epoch": 0.27397774165311994, + "grad_norm": 516.0, + "learning_rate": 8.518669174028477e-05, + "loss": 17.1252, + "step": 6573 + }, + { + "epoch": 0.27401942395064816, + "grad_norm": 454.0, + "learning_rate": 8.518189577790237e-05, + "loss": 13.6903, + "step": 6574 + }, + { + "epoch": 0.2740611062481764, + "grad_norm": 284.0, + "learning_rate": 8.517709917432677e-05, + "loss": 13.1885, + "step": 6575 + }, + { + "epoch": 0.27410278854570463, + "grad_norm": 360.0, + "learning_rate": 8.517230192964542e-05, + "loss": 13.1877, + "step": 6576 + }, + { + "epoch": 0.2741444708432329, + "grad_norm": 268.0, + "learning_rate": 8.516750404394576e-05, + "loss": 11.7508, + "step": 6577 + }, + { + "epoch": 0.2741861531407611, + "grad_norm": 320.0, + "learning_rate": 8.51627055173152e-05, + "loss": 14.3752, + "step": 6578 + }, + { + "epoch": 0.2742278354382894, + "grad_norm": 358.0, + "learning_rate": 8.515790634984122e-05, + "loss": 15.2504, + "step": 6579 + }, + { + "epoch": 0.2742695177358176, + "grad_norm": 153.0, + "learning_rate": 8.515310654161128e-05, + "loss": 11.4387, + "step": 6580 + }, + { + "epoch": 0.27431120003334586, + "grad_norm": 430.0, + "learning_rate": 8.514830609271285e-05, + "loss": 14.8128, + "step": 6581 + }, + { + "epoch": 0.27435288233087407, + "grad_norm": 274.0, + "learning_rate": 8.51435050032334e-05, + "loss": 12.7507, + "step": 6582 + }, + { + "epoch": 0.27439456462840234, + "grad_norm": 256.0, + "learning_rate": 8.513870327326048e-05, + "loss": 12.8128, + "step": 6583 + }, + { + "epoch": 0.27443624692593055, + "grad_norm": 126.0, + "learning_rate": 8.513390090288156e-05, + "loss": 8.3133, + "step": 6584 + }, + { + "epoch": 0.2744779292234588, + "grad_norm": 732.0, + "learning_rate": 8.512909789218418e-05, + "loss": 19.2545, + "step": 6585 + }, + { + "epoch": 0.27451961152098703, + "grad_norm": 490.0, + "learning_rate": 8.512429424125588e-05, + "loss": 17.0004, + "step": 6586 + }, + { + "epoch": 0.2745612938185153, + "grad_norm": 368.0, + "learning_rate": 8.511948995018418e-05, + "loss": 14.3751, + "step": 6587 + }, + { + "epoch": 0.2746029761160435, + "grad_norm": 386.0, + "learning_rate": 8.511468501905667e-05, + "loss": 14.3153, + "step": 6588 + }, + { + "epoch": 0.2746446584135718, + "grad_norm": 352.0, + "learning_rate": 8.510987944796092e-05, + "loss": 14.0627, + "step": 6589 + }, + { + "epoch": 0.2746863407111, + "grad_norm": 676.0, + "learning_rate": 8.510507323698448e-05, + "loss": 20.7507, + "step": 6590 + }, + { + "epoch": 0.27472802300862825, + "grad_norm": 102.5, + "learning_rate": 8.510026638621497e-05, + "loss": 8.5005, + "step": 6591 + }, + { + "epoch": 0.27476970530615646, + "grad_norm": 500.0, + "learning_rate": 8.509545889574e-05, + "loss": 17.1253, + "step": 6592 + }, + { + "epoch": 0.27481138760368473, + "grad_norm": 384.0, + "learning_rate": 8.509065076564717e-05, + "loss": 12.0005, + "step": 6593 + }, + { + "epoch": 0.27485306990121294, + "grad_norm": 392.0, + "learning_rate": 8.50858419960241e-05, + "loss": 15.1884, + "step": 6594 + }, + { + "epoch": 0.2748947521987412, + "grad_norm": 1144.0, + "learning_rate": 8.508103258695845e-05, + "loss": 24.1315, + "step": 6595 + }, + { + "epoch": 0.2749364344962694, + "grad_norm": 382.0, + "learning_rate": 8.507622253853789e-05, + "loss": 14.938, + "step": 6596 + }, + { + "epoch": 0.2749781167937977, + "grad_norm": 464.0, + "learning_rate": 8.507141185085004e-05, + "loss": 15.5649, + "step": 6597 + }, + { + "epoch": 0.2750197990913259, + "grad_norm": 294.0, + "learning_rate": 8.50666005239826e-05, + "loss": 12.1881, + "step": 6598 + }, + { + "epoch": 0.27506148138885417, + "grad_norm": 212.0, + "learning_rate": 8.506178855802325e-05, + "loss": 12.0627, + "step": 6599 + }, + { + "epoch": 0.2751031636863824, + "grad_norm": 382.0, + "learning_rate": 8.505697595305971e-05, + "loss": 15.0627, + "step": 6600 + }, + { + "epoch": 0.27514484598391065, + "grad_norm": 736.0, + "learning_rate": 8.505216270917964e-05, + "loss": 20.2532, + "step": 6601 + }, + { + "epoch": 0.27518652828143886, + "grad_norm": 384.0, + "learning_rate": 8.50473488264708e-05, + "loss": 14.0044, + "step": 6602 + }, + { + "epoch": 0.2752282105789671, + "grad_norm": 232.0, + "learning_rate": 8.504253430502094e-05, + "loss": 11.1254, + "step": 6603 + }, + { + "epoch": 0.27526989287649534, + "grad_norm": 560.0, + "learning_rate": 8.503771914491776e-05, + "loss": 19.8753, + "step": 6604 + }, + { + "epoch": 0.2753115751740236, + "grad_norm": 424.0, + "learning_rate": 8.503290334624905e-05, + "loss": 15.5628, + "step": 6605 + }, + { + "epoch": 0.2753532574715518, + "grad_norm": 640.0, + "learning_rate": 8.502808690910255e-05, + "loss": 19.0004, + "step": 6606 + }, + { + "epoch": 0.2753949397690801, + "grad_norm": 86.5, + "learning_rate": 8.502326983356607e-05, + "loss": 7.469, + "step": 6607 + }, + { + "epoch": 0.2754366220666083, + "grad_norm": 179.0, + "learning_rate": 8.501845211972739e-05, + "loss": 6.8754, + "step": 6608 + }, + { + "epoch": 0.27547830436413656, + "grad_norm": 416.0, + "learning_rate": 8.501363376767431e-05, + "loss": 16.6254, + "step": 6609 + }, + { + "epoch": 0.2755199866616648, + "grad_norm": 470.0, + "learning_rate": 8.500881477749463e-05, + "loss": 15.6881, + "step": 6610 + }, + { + "epoch": 0.27556166895919304, + "grad_norm": 143.0, + "learning_rate": 8.50039951492762e-05, + "loss": 8.0628, + "step": 6611 + }, + { + "epoch": 0.27560335125672125, + "grad_norm": 624.0, + "learning_rate": 8.499917488310687e-05, + "loss": 19.5004, + "step": 6612 + }, + { + "epoch": 0.2756450335542495, + "grad_norm": 378.0, + "learning_rate": 8.499435397907445e-05, + "loss": 14.2504, + "step": 6613 + }, + { + "epoch": 0.27568671585177773, + "grad_norm": 458.0, + "learning_rate": 8.498953243726682e-05, + "loss": 15.8128, + "step": 6614 + }, + { + "epoch": 0.275728398149306, + "grad_norm": 99.0, + "learning_rate": 8.498471025777188e-05, + "loss": 8.8752, + "step": 6615 + }, + { + "epoch": 0.2757700804468342, + "grad_norm": 612.0, + "learning_rate": 8.497988744067746e-05, + "loss": 15.8201, + "step": 6616 + }, + { + "epoch": 0.2758117627443625, + "grad_norm": 324.0, + "learning_rate": 8.49750639860715e-05, + "loss": 14.1257, + "step": 6617 + }, + { + "epoch": 0.2758534450418907, + "grad_norm": 170.0, + "learning_rate": 8.497023989404191e-05, + "loss": 10.0003, + "step": 6618 + }, + { + "epoch": 0.27589512733941896, + "grad_norm": 127.0, + "learning_rate": 8.496541516467657e-05, + "loss": 8.6889, + "step": 6619 + }, + { + "epoch": 0.27593680963694717, + "grad_norm": 200.0, + "learning_rate": 8.496058979806346e-05, + "loss": 11.7502, + "step": 6620 + }, + { + "epoch": 0.27597849193447543, + "grad_norm": 380.0, + "learning_rate": 8.495576379429047e-05, + "loss": 16.2503, + "step": 6621 + }, + { + "epoch": 0.27602017423200365, + "grad_norm": 272.0, + "learning_rate": 8.495093715344559e-05, + "loss": 12.3753, + "step": 6622 + }, + { + "epoch": 0.2760618565295319, + "grad_norm": 157.0, + "learning_rate": 8.494610987561678e-05, + "loss": 9.6254, + "step": 6623 + }, + { + "epoch": 0.2761035388270601, + "grad_norm": 256.0, + "learning_rate": 8.494128196089201e-05, + "loss": 12.0006, + "step": 6624 + }, + { + "epoch": 0.2761452211245884, + "grad_norm": 736.0, + "learning_rate": 8.493645340935928e-05, + "loss": 21.0005, + "step": 6625 + }, + { + "epoch": 0.2761869034221166, + "grad_norm": 226.0, + "learning_rate": 8.493162422110658e-05, + "loss": 10.8753, + "step": 6626 + }, + { + "epoch": 0.27622858571964487, + "grad_norm": 480.0, + "learning_rate": 8.492679439622193e-05, + "loss": 18.2503, + "step": 6627 + }, + { + "epoch": 0.2762702680171731, + "grad_norm": 390.0, + "learning_rate": 8.492196393479336e-05, + "loss": 15.1255, + "step": 6628 + }, + { + "epoch": 0.27631195031470135, + "grad_norm": 232.0, + "learning_rate": 8.49171328369089e-05, + "loss": 11.188, + "step": 6629 + }, + { + "epoch": 0.27635363261222956, + "grad_norm": 548.0, + "learning_rate": 8.491230110265658e-05, + "loss": 17.3772, + "step": 6630 + }, + { + "epoch": 0.2763953149097578, + "grad_norm": 380.0, + "learning_rate": 8.490746873212448e-05, + "loss": 15.6253, + "step": 6631 + }, + { + "epoch": 0.27643699720728604, + "grad_norm": 282.0, + "learning_rate": 8.490263572540066e-05, + "loss": 12.8751, + "step": 6632 + }, + { + "epoch": 0.2764786795048143, + "grad_norm": 324.0, + "learning_rate": 8.489780208257321e-05, + "loss": 14.0634, + "step": 6633 + }, + { + "epoch": 0.2765203618023425, + "grad_norm": 348.0, + "learning_rate": 8.489296780373022e-05, + "loss": 14.1257, + "step": 6634 + }, + { + "epoch": 0.2765620440998708, + "grad_norm": 436.0, + "learning_rate": 8.488813288895978e-05, + "loss": 16.1253, + "step": 6635 + }, + { + "epoch": 0.27660372639739905, + "grad_norm": 434.0, + "learning_rate": 8.488329733835003e-05, + "loss": 16.2503, + "step": 6636 + }, + { + "epoch": 0.27664540869492726, + "grad_norm": 396.0, + "learning_rate": 8.48784611519891e-05, + "loss": 14.5003, + "step": 6637 + }, + { + "epoch": 0.27668709099245553, + "grad_norm": 101.0, + "learning_rate": 8.487362432996511e-05, + "loss": 9.3755, + "step": 6638 + }, + { + "epoch": 0.27672877328998374, + "grad_norm": 436.0, + "learning_rate": 8.486878687236622e-05, + "loss": 14.3787, + "step": 6639 + }, + { + "epoch": 0.276770455587512, + "grad_norm": 166.0, + "learning_rate": 8.48639487792806e-05, + "loss": 9.3137, + "step": 6640 + }, + { + "epoch": 0.2768121378850402, + "grad_norm": 67.0, + "learning_rate": 8.48591100507964e-05, + "loss": 9.7508, + "step": 6641 + }, + { + "epoch": 0.2768538201825685, + "grad_norm": 268.0, + "learning_rate": 8.485427068700185e-05, + "loss": 12.2525, + "step": 6642 + }, + { + "epoch": 0.2768955024800967, + "grad_norm": 396.0, + "learning_rate": 8.484943068798511e-05, + "loss": 15.5626, + "step": 6643 + }, + { + "epoch": 0.27693718477762497, + "grad_norm": 300.0, + "learning_rate": 8.484459005383441e-05, + "loss": 14.1255, + "step": 6644 + }, + { + "epoch": 0.2769788670751532, + "grad_norm": 124.5, + "learning_rate": 8.483974878463794e-05, + "loss": 10.8127, + "step": 6645 + }, + { + "epoch": 0.27702054937268145, + "grad_norm": 464.0, + "learning_rate": 8.483490688048399e-05, + "loss": 18.2509, + "step": 6646 + }, + { + "epoch": 0.27706223167020966, + "grad_norm": 87.0, + "learning_rate": 8.483006434146075e-05, + "loss": 9.6896, + "step": 6647 + }, + { + "epoch": 0.2771039139677379, + "grad_norm": 146.0, + "learning_rate": 8.482522116765648e-05, + "loss": 10.6255, + "step": 6648 + }, + { + "epoch": 0.27714559626526614, + "grad_norm": 560.0, + "learning_rate": 8.482037735915948e-05, + "loss": 19.5023, + "step": 6649 + }, + { + "epoch": 0.2771872785627944, + "grad_norm": 48.0, + "learning_rate": 8.481553291605801e-05, + "loss": 7.469, + "step": 6650 + }, + { + "epoch": 0.2772289608603226, + "grad_norm": 148.0, + "learning_rate": 8.481068783844038e-05, + "loss": 9.9377, + "step": 6651 + }, + { + "epoch": 0.2772706431578509, + "grad_norm": 408.0, + "learning_rate": 8.480584212639483e-05, + "loss": 16.6259, + "step": 6652 + }, + { + "epoch": 0.2773123254553791, + "grad_norm": 1144.0, + "learning_rate": 8.480099578000976e-05, + "loss": 30.7512, + "step": 6653 + }, + { + "epoch": 0.27735400775290736, + "grad_norm": 268.0, + "learning_rate": 8.479614879937344e-05, + "loss": 12.0627, + "step": 6654 + }, + { + "epoch": 0.2773956900504356, + "grad_norm": 326.0, + "learning_rate": 8.479130118457421e-05, + "loss": 13.8127, + "step": 6655 + }, + { + "epoch": 0.27743737234796384, + "grad_norm": 189.0, + "learning_rate": 8.478645293570045e-05, + "loss": 11.0004, + "step": 6656 + }, + { + "epoch": 0.27747905464549205, + "grad_norm": 548.0, + "learning_rate": 8.478160405284046e-05, + "loss": 18.256, + "step": 6657 + }, + { + "epoch": 0.2775207369430203, + "grad_norm": 121.5, + "learning_rate": 8.477675453608268e-05, + "loss": 9.3753, + "step": 6658 + }, + { + "epoch": 0.27756241924054853, + "grad_norm": 102.0, + "learning_rate": 8.477190438551546e-05, + "loss": 7.7814, + "step": 6659 + }, + { + "epoch": 0.2776041015380768, + "grad_norm": 338.0, + "learning_rate": 8.476705360122717e-05, + "loss": 14.4377, + "step": 6660 + }, + { + "epoch": 0.277645783835605, + "grad_norm": 812.0, + "learning_rate": 8.476220218330626e-05, + "loss": 23.5003, + "step": 6661 + }, + { + "epoch": 0.2776874661331333, + "grad_norm": 704.0, + "learning_rate": 8.475735013184114e-05, + "loss": 21.2504, + "step": 6662 + }, + { + "epoch": 0.2777291484306615, + "grad_norm": 230.0, + "learning_rate": 8.475249744692021e-05, + "loss": 10.3765, + "step": 6663 + }, + { + "epoch": 0.27777083072818975, + "grad_norm": 398.0, + "learning_rate": 8.474764412863194e-05, + "loss": 14.4379, + "step": 6664 + }, + { + "epoch": 0.27781251302571797, + "grad_norm": 380.0, + "learning_rate": 8.474279017706475e-05, + "loss": 15.7505, + "step": 6665 + }, + { + "epoch": 0.27785419532324623, + "grad_norm": 456.0, + "learning_rate": 8.473793559230714e-05, + "loss": 16.7518, + "step": 6666 + }, + { + "epoch": 0.27789587762077445, + "grad_norm": 540.0, + "learning_rate": 8.473308037444758e-05, + "loss": 15.7502, + "step": 6667 + }, + { + "epoch": 0.2779375599183027, + "grad_norm": 520.0, + "learning_rate": 8.472822452357454e-05, + "loss": 16.3753, + "step": 6668 + }, + { + "epoch": 0.2779792422158309, + "grad_norm": 268.0, + "learning_rate": 8.472336803977652e-05, + "loss": 11.8141, + "step": 6669 + }, + { + "epoch": 0.2780209245133592, + "grad_norm": 207.0, + "learning_rate": 8.471851092314204e-05, + "loss": 13.1256, + "step": 6670 + }, + { + "epoch": 0.2780626068108874, + "grad_norm": 342.0, + "learning_rate": 8.471365317375961e-05, + "loss": 13.2501, + "step": 6671 + }, + { + "epoch": 0.27810428910841567, + "grad_norm": 238.0, + "learning_rate": 8.470879479171778e-05, + "loss": 11.7501, + "step": 6672 + }, + { + "epoch": 0.2781459714059439, + "grad_norm": 350.0, + "learning_rate": 8.470393577710507e-05, + "loss": 15.0627, + "step": 6673 + }, + { + "epoch": 0.27818765370347215, + "grad_norm": 304.0, + "learning_rate": 8.469907613001006e-05, + "loss": 12.6255, + "step": 6674 + }, + { + "epoch": 0.27822933600100036, + "grad_norm": 227.0, + "learning_rate": 8.469421585052131e-05, + "loss": 8.6879, + "step": 6675 + }, + { + "epoch": 0.2782710182985286, + "grad_norm": 288.0, + "learning_rate": 8.468935493872738e-05, + "loss": 13.7512, + "step": 6676 + }, + { + "epoch": 0.27831270059605684, + "grad_norm": 1272.0, + "learning_rate": 8.468449339471689e-05, + "loss": 30.0007, + "step": 6677 + }, + { + "epoch": 0.2783543828935851, + "grad_norm": 245.0, + "learning_rate": 8.467963121857843e-05, + "loss": 11.6877, + "step": 6678 + }, + { + "epoch": 0.2783960651911133, + "grad_norm": 132.0, + "learning_rate": 8.467476841040061e-05, + "loss": 7.0326, + "step": 6679 + }, + { + "epoch": 0.2784377474886416, + "grad_norm": 158.0, + "learning_rate": 8.466990497027204e-05, + "loss": 7.7194, + "step": 6680 + }, + { + "epoch": 0.2784794297861698, + "grad_norm": 406.0, + "learning_rate": 8.46650408982814e-05, + "loss": 15.3752, + "step": 6681 + }, + { + "epoch": 0.27852111208369806, + "grad_norm": 366.0, + "learning_rate": 8.466017619451729e-05, + "loss": 14.5017, + "step": 6682 + }, + { + "epoch": 0.2785627943812263, + "grad_norm": 376.0, + "learning_rate": 8.465531085906842e-05, + "loss": 16.0002, + "step": 6683 + }, + { + "epoch": 0.27860447667875454, + "grad_norm": 184.0, + "learning_rate": 8.46504448920234e-05, + "loss": 11.0631, + "step": 6684 + }, + { + "epoch": 0.27864615897628275, + "grad_norm": 324.0, + "learning_rate": 8.464557829347097e-05, + "loss": 13.8757, + "step": 6685 + }, + { + "epoch": 0.278687841273811, + "grad_norm": 214.0, + "learning_rate": 8.46407110634998e-05, + "loss": 11.5003, + "step": 6686 + }, + { + "epoch": 0.27872952357133923, + "grad_norm": 96.0, + "learning_rate": 8.46358432021986e-05, + "loss": 10.3755, + "step": 6687 + }, + { + "epoch": 0.2787712058688675, + "grad_norm": 254.0, + "learning_rate": 8.463097470965607e-05, + "loss": 12.0628, + "step": 6688 + }, + { + "epoch": 0.2788128881663957, + "grad_norm": 374.0, + "learning_rate": 8.462610558596094e-05, + "loss": 14.2504, + "step": 6689 + }, + { + "epoch": 0.278854570463924, + "grad_norm": 120.0, + "learning_rate": 8.462123583120198e-05, + "loss": 9.3127, + "step": 6690 + }, + { + "epoch": 0.2788962527614522, + "grad_norm": 482.0, + "learning_rate": 8.461636544546792e-05, + "loss": 16.3798, + "step": 6691 + }, + { + "epoch": 0.27893793505898046, + "grad_norm": 772.0, + "learning_rate": 8.461149442884752e-05, + "loss": 23.2535, + "step": 6692 + }, + { + "epoch": 0.27897961735650867, + "grad_norm": 532.0, + "learning_rate": 8.460662278142957e-05, + "loss": 15.8128, + "step": 6693 + }, + { + "epoch": 0.27902129965403694, + "grad_norm": 342.0, + "learning_rate": 8.460175050330284e-05, + "loss": 12.1877, + "step": 6694 + }, + { + "epoch": 0.27906298195156515, + "grad_norm": 466.0, + "learning_rate": 8.459687759455615e-05, + "loss": 17.3755, + "step": 6695 + }, + { + "epoch": 0.2791046642490934, + "grad_norm": 242.0, + "learning_rate": 8.459200405527827e-05, + "loss": 11.3752, + "step": 6696 + }, + { + "epoch": 0.2791463465466216, + "grad_norm": 213.0, + "learning_rate": 8.458712988555807e-05, + "loss": 10.7502, + "step": 6697 + }, + { + "epoch": 0.2791880288441499, + "grad_norm": 1144.0, + "learning_rate": 8.458225508548434e-05, + "loss": 22.3802, + "step": 6698 + }, + { + "epoch": 0.2792297111416781, + "grad_norm": 288.0, + "learning_rate": 8.457737965514596e-05, + "loss": 13.2503, + "step": 6699 + }, + { + "epoch": 0.2792713934392064, + "grad_norm": 552.0, + "learning_rate": 8.457250359463176e-05, + "loss": 17.5017, + "step": 6700 + }, + { + "epoch": 0.2793130757367346, + "grad_norm": 382.0, + "learning_rate": 8.456762690403059e-05, + "loss": 15.6877, + "step": 6701 + }, + { + "epoch": 0.27935475803426285, + "grad_norm": 112.5, + "learning_rate": 8.456274958343137e-05, + "loss": 10.2504, + "step": 6702 + }, + { + "epoch": 0.27939644033179106, + "grad_norm": 1576.0, + "learning_rate": 8.455787163292297e-05, + "loss": 30.5052, + "step": 6703 + }, + { + "epoch": 0.27943812262931933, + "grad_norm": 255.0, + "learning_rate": 8.45529930525943e-05, + "loss": 12.1252, + "step": 6704 + }, + { + "epoch": 0.27947980492684754, + "grad_norm": 308.0, + "learning_rate": 8.454811384253425e-05, + "loss": 15.3127, + "step": 6705 + }, + { + "epoch": 0.2795214872243758, + "grad_norm": 350.0, + "learning_rate": 8.454323400283177e-05, + "loss": 14.8134, + "step": 6706 + }, + { + "epoch": 0.279563169521904, + "grad_norm": 135.0, + "learning_rate": 8.453835353357578e-05, + "loss": 8.5003, + "step": 6707 + }, + { + "epoch": 0.2796048518194323, + "grad_norm": 1168.0, + "learning_rate": 8.453347243485522e-05, + "loss": 26.1253, + "step": 6708 + }, + { + "epoch": 0.27964653411696055, + "grad_norm": 492.0, + "learning_rate": 8.452859070675908e-05, + "loss": 14.0033, + "step": 6709 + }, + { + "epoch": 0.27968821641448877, + "grad_norm": 640.0, + "learning_rate": 8.452370834937628e-05, + "loss": 19.5003, + "step": 6710 + }, + { + "epoch": 0.27972989871201703, + "grad_norm": 227.0, + "learning_rate": 8.451882536279586e-05, + "loss": 12.1879, + "step": 6711 + }, + { + "epoch": 0.27977158100954524, + "grad_norm": 97.5, + "learning_rate": 8.451394174710677e-05, + "loss": 8.3753, + "step": 6712 + }, + { + "epoch": 0.2798132633070735, + "grad_norm": 91.5, + "learning_rate": 8.450905750239803e-05, + "loss": 8.3754, + "step": 6713 + }, + { + "epoch": 0.2798549456046017, + "grad_norm": 360.0, + "learning_rate": 8.450417262875865e-05, + "loss": 14.7503, + "step": 6714 + }, + { + "epoch": 0.27989662790213, + "grad_norm": 812.0, + "learning_rate": 8.449928712627766e-05, + "loss": 20.8751, + "step": 6715 + }, + { + "epoch": 0.2799383101996582, + "grad_norm": 462.0, + "learning_rate": 8.44944009950441e-05, + "loss": 16.1282, + "step": 6716 + }, + { + "epoch": 0.27997999249718647, + "grad_norm": 548.0, + "learning_rate": 8.448951423514702e-05, + "loss": 19.5007, + "step": 6717 + }, + { + "epoch": 0.2800216747947147, + "grad_norm": 172.0, + "learning_rate": 8.448462684667549e-05, + "loss": 11.5007, + "step": 6718 + }, + { + "epoch": 0.28006335709224295, + "grad_norm": 434.0, + "learning_rate": 8.447973882971856e-05, + "loss": 14.9376, + "step": 6719 + }, + { + "epoch": 0.28010503938977116, + "grad_norm": 250.0, + "learning_rate": 8.447485018436534e-05, + "loss": 12.4388, + "step": 6720 + }, + { + "epoch": 0.2801467216872994, + "grad_norm": 308.0, + "learning_rate": 8.446996091070491e-05, + "loss": 14.1254, + "step": 6721 + }, + { + "epoch": 0.28018840398482764, + "grad_norm": 78.0, + "learning_rate": 8.44650710088264e-05, + "loss": 7.2195, + "step": 6722 + }, + { + "epoch": 0.2802300862823559, + "grad_norm": 153.0, + "learning_rate": 8.446018047881889e-05, + "loss": 10.3776, + "step": 6723 + }, + { + "epoch": 0.2802717685798841, + "grad_norm": 138.0, + "learning_rate": 8.445528932077154e-05, + "loss": 11.5627, + "step": 6724 + }, + { + "epoch": 0.2803134508774124, + "grad_norm": 350.0, + "learning_rate": 8.445039753477347e-05, + "loss": 15.5008, + "step": 6725 + }, + { + "epoch": 0.2803551331749406, + "grad_norm": 988.0, + "learning_rate": 8.444550512091384e-05, + "loss": 22.5055, + "step": 6726 + }, + { + "epoch": 0.28039681547246886, + "grad_norm": 436.0, + "learning_rate": 8.444061207928186e-05, + "loss": 15.0628, + "step": 6727 + }, + { + "epoch": 0.2804384977699971, + "grad_norm": 1216.0, + "learning_rate": 8.443571840996665e-05, + "loss": 29.3757, + "step": 6728 + }, + { + "epoch": 0.28048018006752534, + "grad_norm": 504.0, + "learning_rate": 8.443082411305741e-05, + "loss": 13.9377, + "step": 6729 + }, + { + "epoch": 0.28052186236505355, + "grad_norm": 912.0, + "learning_rate": 8.442592918864334e-05, + "loss": 25.2503, + "step": 6730 + }, + { + "epoch": 0.2805635446625818, + "grad_norm": 243.0, + "learning_rate": 8.442103363681367e-05, + "loss": 13.1254, + "step": 6731 + }, + { + "epoch": 0.28060522696011003, + "grad_norm": 420.0, + "learning_rate": 8.441613745765759e-05, + "loss": 17.3753, + "step": 6732 + }, + { + "epoch": 0.2806469092576383, + "grad_norm": 58.25, + "learning_rate": 8.441124065126434e-05, + "loss": 8.3127, + "step": 6733 + }, + { + "epoch": 0.2806885915551665, + "grad_norm": 370.0, + "learning_rate": 8.440634321772321e-05, + "loss": 14.5627, + "step": 6734 + }, + { + "epoch": 0.2807302738526948, + "grad_norm": 195.0, + "learning_rate": 8.440144515712338e-05, + "loss": 9.4377, + "step": 6735 + }, + { + "epoch": 0.280771956150223, + "grad_norm": 358.0, + "learning_rate": 8.439654646955419e-05, + "loss": 13.8127, + "step": 6736 + }, + { + "epoch": 0.28081363844775126, + "grad_norm": 318.0, + "learning_rate": 8.439164715510488e-05, + "loss": 14.7503, + "step": 6737 + }, + { + "epoch": 0.28085532074527947, + "grad_norm": 278.0, + "learning_rate": 8.438674721386473e-05, + "loss": 13.2502, + "step": 6738 + }, + { + "epoch": 0.28089700304280774, + "grad_norm": 436.0, + "learning_rate": 8.438184664592308e-05, + "loss": 15.1265, + "step": 6739 + }, + { + "epoch": 0.28093868534033595, + "grad_norm": 253.0, + "learning_rate": 8.437694545136922e-05, + "loss": 11.8131, + "step": 6740 + }, + { + "epoch": 0.2809803676378642, + "grad_norm": 298.0, + "learning_rate": 8.437204363029248e-05, + "loss": 11.6261, + "step": 6741 + }, + { + "epoch": 0.2810220499353924, + "grad_norm": 324.0, + "learning_rate": 8.436714118278217e-05, + "loss": 13.6251, + "step": 6742 + }, + { + "epoch": 0.2810637322329207, + "grad_norm": 296.0, + "learning_rate": 8.436223810892768e-05, + "loss": 13.3752, + "step": 6743 + }, + { + "epoch": 0.2811054145304489, + "grad_norm": 75.5, + "learning_rate": 8.435733440881835e-05, + "loss": 9.4386, + "step": 6744 + }, + { + "epoch": 0.28114709682797717, + "grad_norm": 420.0, + "learning_rate": 8.435243008254355e-05, + "loss": 15.1882, + "step": 6745 + }, + { + "epoch": 0.2811887791255054, + "grad_norm": 314.0, + "learning_rate": 8.434752513019266e-05, + "loss": 13.0003, + "step": 6746 + }, + { + "epoch": 0.28123046142303365, + "grad_norm": 98.0, + "learning_rate": 8.434261955185508e-05, + "loss": 8.0628, + "step": 6747 + }, + { + "epoch": 0.28127214372056186, + "grad_norm": 143.0, + "learning_rate": 8.43377133476202e-05, + "loss": 8.8755, + "step": 6748 + }, + { + "epoch": 0.28131382601809013, + "grad_norm": 208.0, + "learning_rate": 8.433280651757745e-05, + "loss": 11.7502, + "step": 6749 + }, + { + "epoch": 0.28135550831561834, + "grad_norm": 600.0, + "learning_rate": 8.432789906181627e-05, + "loss": 19.5003, + "step": 6750 + }, + { + "epoch": 0.2813971906131466, + "grad_norm": 948.0, + "learning_rate": 8.432299098042605e-05, + "loss": 21.7553, + "step": 6751 + }, + { + "epoch": 0.2814388729106748, + "grad_norm": 374.0, + "learning_rate": 8.431808227349629e-05, + "loss": 14.8129, + "step": 6752 + }, + { + "epoch": 0.2814805552082031, + "grad_norm": 232.0, + "learning_rate": 8.431317294111643e-05, + "loss": 12.2505, + "step": 6753 + }, + { + "epoch": 0.2815222375057313, + "grad_norm": 312.0, + "learning_rate": 8.430826298337595e-05, + "loss": 14.0003, + "step": 6754 + }, + { + "epoch": 0.28156391980325957, + "grad_norm": 636.0, + "learning_rate": 8.430335240036434e-05, + "loss": 17.1292, + "step": 6755 + }, + { + "epoch": 0.2816056021007878, + "grad_norm": 408.0, + "learning_rate": 8.429844119217108e-05, + "loss": 16.2512, + "step": 6756 + }, + { + "epoch": 0.28164728439831604, + "grad_norm": 388.0, + "learning_rate": 8.429352935888568e-05, + "loss": 14.5009, + "step": 6757 + }, + { + "epoch": 0.28168896669584426, + "grad_norm": 1112.0, + "learning_rate": 8.428861690059767e-05, + "loss": 23.7559, + "step": 6758 + }, + { + "epoch": 0.2817306489933725, + "grad_norm": 89.5, + "learning_rate": 8.428370381739657e-05, + "loss": 8.5005, + "step": 6759 + }, + { + "epoch": 0.28177233129090073, + "grad_norm": 552.0, + "learning_rate": 8.427879010937191e-05, + "loss": 16.3751, + "step": 6760 + }, + { + "epoch": 0.281814013588429, + "grad_norm": 676.0, + "learning_rate": 8.427387577661328e-05, + "loss": 18.6253, + "step": 6761 + }, + { + "epoch": 0.2818556958859572, + "grad_norm": 508.0, + "learning_rate": 8.426896081921022e-05, + "loss": 17.7505, + "step": 6762 + }, + { + "epoch": 0.2818973781834855, + "grad_norm": 600.0, + "learning_rate": 8.42640452372523e-05, + "loss": 18.1257, + "step": 6763 + }, + { + "epoch": 0.2819390604810137, + "grad_norm": 358.0, + "learning_rate": 8.42591290308291e-05, + "loss": 15.0003, + "step": 6764 + }, + { + "epoch": 0.28198074277854196, + "grad_norm": 250.0, + "learning_rate": 8.425421220003025e-05, + "loss": 11.2506, + "step": 6765 + }, + { + "epoch": 0.28202242507607017, + "grad_norm": 187.0, + "learning_rate": 8.424929474494534e-05, + "loss": 12.1887, + "step": 6766 + }, + { + "epoch": 0.28206410737359844, + "grad_norm": 664.0, + "learning_rate": 8.424437666566399e-05, + "loss": 18.0052, + "step": 6767 + }, + { + "epoch": 0.28210578967112665, + "grad_norm": 174.0, + "learning_rate": 8.423945796227584e-05, + "loss": 10.8134, + "step": 6768 + }, + { + "epoch": 0.2821474719686549, + "grad_norm": 868.0, + "learning_rate": 8.423453863487052e-05, + "loss": 25.5002, + "step": 6769 + }, + { + "epoch": 0.28218915426618313, + "grad_norm": 205.0, + "learning_rate": 8.422961868353772e-05, + "loss": 10.0628, + "step": 6770 + }, + { + "epoch": 0.2822308365637114, + "grad_norm": 808.0, + "learning_rate": 8.422469810836704e-05, + "loss": 21.7505, + "step": 6771 + }, + { + "epoch": 0.2822725188612396, + "grad_norm": 171.0, + "learning_rate": 8.421977690944822e-05, + "loss": 10.5005, + "step": 6772 + }, + { + "epoch": 0.2823142011587679, + "grad_norm": 960.0, + "learning_rate": 8.421485508687093e-05, + "loss": 30.7504, + "step": 6773 + }, + { + "epoch": 0.2823558834562961, + "grad_norm": 93.0, + "learning_rate": 8.420993264072488e-05, + "loss": 9.3133, + "step": 6774 + }, + { + "epoch": 0.28239756575382435, + "grad_norm": 540.0, + "learning_rate": 8.420500957109974e-05, + "loss": 17.8754, + "step": 6775 + }, + { + "epoch": 0.28243924805135256, + "grad_norm": 183.0, + "learning_rate": 8.420008587808528e-05, + "loss": 11.6881, + "step": 6776 + }, + { + "epoch": 0.28248093034888083, + "grad_norm": 1024.0, + "learning_rate": 8.419516156177123e-05, + "loss": 23.8791, + "step": 6777 + }, + { + "epoch": 0.28252261264640904, + "grad_norm": 260.0, + "learning_rate": 8.419023662224731e-05, + "loss": 11.938, + "step": 6778 + }, + { + "epoch": 0.2825642949439373, + "grad_norm": 352.0, + "learning_rate": 8.41853110596033e-05, + "loss": 14.1252, + "step": 6779 + }, + { + "epoch": 0.2826059772414655, + "grad_norm": 636.0, + "learning_rate": 8.418038487392895e-05, + "loss": 18.1301, + "step": 6780 + }, + { + "epoch": 0.2826476595389938, + "grad_norm": 652.0, + "learning_rate": 8.417545806531406e-05, + "loss": 19.3752, + "step": 6781 + }, + { + "epoch": 0.28268934183652206, + "grad_norm": 408.0, + "learning_rate": 8.417053063384841e-05, + "loss": 14.8136, + "step": 6782 + }, + { + "epoch": 0.28273102413405027, + "grad_norm": 288.0, + "learning_rate": 8.416560257962181e-05, + "loss": 10.9435, + "step": 6783 + }, + { + "epoch": 0.28277270643157854, + "grad_norm": 223.0, + "learning_rate": 8.416067390272406e-05, + "loss": 9.313, + "step": 6784 + }, + { + "epoch": 0.28281438872910675, + "grad_norm": 860.0, + "learning_rate": 8.415574460324501e-05, + "loss": 28.3752, + "step": 6785 + }, + { + "epoch": 0.282856071026635, + "grad_norm": 1072.0, + "learning_rate": 8.415081468127448e-05, + "loss": 27.5009, + "step": 6786 + }, + { + "epoch": 0.2828977533241632, + "grad_norm": 250.0, + "learning_rate": 8.414588413690232e-05, + "loss": 13.3753, + "step": 6787 + }, + { + "epoch": 0.2829394356216915, + "grad_norm": 239.0, + "learning_rate": 8.41409529702184e-05, + "loss": 11.1255, + "step": 6788 + }, + { + "epoch": 0.2829811179192197, + "grad_norm": 56.75, + "learning_rate": 8.413602118131256e-05, + "loss": 6.7502, + "step": 6789 + }, + { + "epoch": 0.28302280021674797, + "grad_norm": 1352.0, + "learning_rate": 8.413108877027471e-05, + "loss": 30.13, + "step": 6790 + }, + { + "epoch": 0.2830644825142762, + "grad_norm": 400.0, + "learning_rate": 8.412615573719473e-05, + "loss": 14.6877, + "step": 6791 + }, + { + "epoch": 0.28310616481180445, + "grad_norm": 348.0, + "learning_rate": 8.412122208216256e-05, + "loss": 16.0003, + "step": 6792 + }, + { + "epoch": 0.28314784710933266, + "grad_norm": 262.0, + "learning_rate": 8.411628780526805e-05, + "loss": 11.9378, + "step": 6793 + }, + { + "epoch": 0.28318952940686093, + "grad_norm": 474.0, + "learning_rate": 8.41113529066012e-05, + "loss": 17.5007, + "step": 6794 + }, + { + "epoch": 0.28323121170438914, + "grad_norm": 210.0, + "learning_rate": 8.41064173862519e-05, + "loss": 10.4379, + "step": 6795 + }, + { + "epoch": 0.2832728940019174, + "grad_norm": 600.0, + "learning_rate": 8.410148124431012e-05, + "loss": 18.3776, + "step": 6796 + }, + { + "epoch": 0.2833145762994456, + "grad_norm": 352.0, + "learning_rate": 8.409654448086582e-05, + "loss": 14.3127, + "step": 6797 + }, + { + "epoch": 0.2833562585969739, + "grad_norm": 238.0, + "learning_rate": 8.409160709600895e-05, + "loss": 12.3128, + "step": 6798 + }, + { + "epoch": 0.2833979408945021, + "grad_norm": 608.0, + "learning_rate": 8.408666908982954e-05, + "loss": 19.376, + "step": 6799 + }, + { + "epoch": 0.28343962319203037, + "grad_norm": 282.0, + "learning_rate": 8.408173046241755e-05, + "loss": 13.5004, + "step": 6800 + }, + { + "epoch": 0.2834813054895586, + "grad_norm": 204.0, + "learning_rate": 8.407679121386298e-05, + "loss": 12.0009, + "step": 6801 + }, + { + "epoch": 0.28352298778708684, + "grad_norm": 169.0, + "learning_rate": 8.407185134425588e-05, + "loss": 9.938, + "step": 6802 + }, + { + "epoch": 0.28356467008461506, + "grad_norm": 300.0, + "learning_rate": 8.406691085368628e-05, + "loss": 14.5004, + "step": 6803 + }, + { + "epoch": 0.2836063523821433, + "grad_norm": 302.0, + "learning_rate": 8.406196974224418e-05, + "loss": 13.1252, + "step": 6804 + }, + { + "epoch": 0.28364803467967153, + "grad_norm": 180.0, + "learning_rate": 8.405702801001966e-05, + "loss": 11.8752, + "step": 6805 + }, + { + "epoch": 0.2836897169771998, + "grad_norm": 66.5, + "learning_rate": 8.405208565710279e-05, + "loss": 8.438, + "step": 6806 + }, + { + "epoch": 0.283731399274728, + "grad_norm": 420.0, + "learning_rate": 8.404714268358365e-05, + "loss": 16.2504, + "step": 6807 + }, + { + "epoch": 0.2837730815722563, + "grad_norm": 392.0, + "learning_rate": 8.404219908955229e-05, + "loss": 15.688, + "step": 6808 + }, + { + "epoch": 0.2838147638697845, + "grad_norm": 324.0, + "learning_rate": 8.403725487509883e-05, + "loss": 13.7502, + "step": 6809 + }, + { + "epoch": 0.28385644616731276, + "grad_norm": 532.0, + "learning_rate": 8.40323100403134e-05, + "loss": 18.5007, + "step": 6810 + }, + { + "epoch": 0.28389812846484097, + "grad_norm": 804.0, + "learning_rate": 8.402736458528607e-05, + "loss": 20.1254, + "step": 6811 + }, + { + "epoch": 0.28393981076236924, + "grad_norm": 692.0, + "learning_rate": 8.402241851010701e-05, + "loss": 20.6256, + "step": 6812 + }, + { + "epoch": 0.28398149305989745, + "grad_norm": 255.0, + "learning_rate": 8.401747181486637e-05, + "loss": 13.1261, + "step": 6813 + }, + { + "epoch": 0.2840231753574257, + "grad_norm": 492.0, + "learning_rate": 8.401252449965426e-05, + "loss": 17.1255, + "step": 6814 + }, + { + "epoch": 0.28406485765495393, + "grad_norm": 356.0, + "learning_rate": 8.400757656456089e-05, + "loss": 14.192, + "step": 6815 + }, + { + "epoch": 0.2841065399524822, + "grad_norm": 229.0, + "learning_rate": 8.400262800967641e-05, + "loss": 12.1883, + "step": 6816 + }, + { + "epoch": 0.2841482222500104, + "grad_norm": 792.0, + "learning_rate": 8.399767883509102e-05, + "loss": 22.8762, + "step": 6817 + }, + { + "epoch": 0.2841899045475387, + "grad_norm": 222.0, + "learning_rate": 8.399272904089492e-05, + "loss": 12.3752, + "step": 6818 + }, + { + "epoch": 0.2842315868450669, + "grad_norm": 310.0, + "learning_rate": 8.39877786271783e-05, + "loss": 14.4377, + "step": 6819 + }, + { + "epoch": 0.28427326914259515, + "grad_norm": 306.0, + "learning_rate": 8.398282759403141e-05, + "loss": 13.0002, + "step": 6820 + }, + { + "epoch": 0.28431495144012336, + "grad_norm": 366.0, + "learning_rate": 8.397787594154446e-05, + "loss": 14.6252, + "step": 6821 + }, + { + "epoch": 0.28435663373765163, + "grad_norm": 476.0, + "learning_rate": 8.397292366980772e-05, + "loss": 16.3752, + "step": 6822 + }, + { + "epoch": 0.28439831603517984, + "grad_norm": 316.0, + "learning_rate": 8.39679707789114e-05, + "loss": 15.0002, + "step": 6823 + }, + { + "epoch": 0.2844399983327081, + "grad_norm": 896.0, + "learning_rate": 8.396301726894583e-05, + "loss": 21.6329, + "step": 6824 + }, + { + "epoch": 0.2844816806302363, + "grad_norm": 209.0, + "learning_rate": 8.395806314000126e-05, + "loss": 10.3755, + "step": 6825 + }, + { + "epoch": 0.2845233629277646, + "grad_norm": 402.0, + "learning_rate": 8.395310839216795e-05, + "loss": 15.3128, + "step": 6826 + }, + { + "epoch": 0.2845650452252928, + "grad_norm": 179.0, + "learning_rate": 8.394815302553623e-05, + "loss": 11.2502, + "step": 6827 + }, + { + "epoch": 0.28460672752282107, + "grad_norm": 328.0, + "learning_rate": 8.394319704019641e-05, + "loss": 13.1878, + "step": 6828 + }, + { + "epoch": 0.2846484098203493, + "grad_norm": 196.0, + "learning_rate": 8.393824043623881e-05, + "loss": 10.1258, + "step": 6829 + }, + { + "epoch": 0.28469009211787755, + "grad_norm": 376.0, + "learning_rate": 8.393328321375377e-05, + "loss": 14.938, + "step": 6830 + }, + { + "epoch": 0.28473177441540576, + "grad_norm": 440.0, + "learning_rate": 8.392832537283162e-05, + "loss": 15.127, + "step": 6831 + }, + { + "epoch": 0.284773456712934, + "grad_norm": 160.0, + "learning_rate": 8.392336691356275e-05, + "loss": 8.0627, + "step": 6832 + }, + { + "epoch": 0.28481513901046224, + "grad_norm": 432.0, + "learning_rate": 8.391840783603749e-05, + "loss": 14.6881, + "step": 6833 + }, + { + "epoch": 0.2848568213079905, + "grad_norm": 318.0, + "learning_rate": 8.391344814034622e-05, + "loss": 11.2502, + "step": 6834 + }, + { + "epoch": 0.2848985036055187, + "grad_norm": 396.0, + "learning_rate": 8.390848782657938e-05, + "loss": 15.1254, + "step": 6835 + }, + { + "epoch": 0.284940185903047, + "grad_norm": 340.0, + "learning_rate": 8.390352689482733e-05, + "loss": 14.2512, + "step": 6836 + }, + { + "epoch": 0.2849818682005752, + "grad_norm": 378.0, + "learning_rate": 8.389856534518048e-05, + "loss": 14.1252, + "step": 6837 + }, + { + "epoch": 0.28502355049810346, + "grad_norm": 198.0, + "learning_rate": 8.389360317772927e-05, + "loss": 10.2503, + "step": 6838 + }, + { + "epoch": 0.2850652327956317, + "grad_norm": 564.0, + "learning_rate": 8.388864039256414e-05, + "loss": 17.2504, + "step": 6839 + }, + { + "epoch": 0.28510691509315994, + "grad_norm": 672.0, + "learning_rate": 8.388367698977554e-05, + "loss": 21.2502, + "step": 6840 + }, + { + "epoch": 0.28514859739068815, + "grad_norm": 241.0, + "learning_rate": 8.387871296945391e-05, + "loss": 12.5631, + "step": 6841 + }, + { + "epoch": 0.2851902796882164, + "grad_norm": 486.0, + "learning_rate": 8.387374833168973e-05, + "loss": 16.0007, + "step": 6842 + }, + { + "epoch": 0.28523196198574463, + "grad_norm": 262.0, + "learning_rate": 8.386878307657346e-05, + "loss": 13.0628, + "step": 6843 + }, + { + "epoch": 0.2852736442832729, + "grad_norm": 424.0, + "learning_rate": 8.386381720419564e-05, + "loss": 13.5039, + "step": 6844 + }, + { + "epoch": 0.2853153265808011, + "grad_norm": 276.0, + "learning_rate": 8.385885071464673e-05, + "loss": 13.1253, + "step": 6845 + }, + { + "epoch": 0.2853570088783294, + "grad_norm": 368.0, + "learning_rate": 8.385388360801727e-05, + "loss": 13.8752, + "step": 6846 + }, + { + "epoch": 0.2853986911758576, + "grad_norm": 478.0, + "learning_rate": 8.384891588439776e-05, + "loss": 16.6253, + "step": 6847 + }, + { + "epoch": 0.28544037347338586, + "grad_norm": 568.0, + "learning_rate": 8.384394754387876e-05, + "loss": 16.2557, + "step": 6848 + }, + { + "epoch": 0.28548205577091407, + "grad_norm": 460.0, + "learning_rate": 8.383897858655082e-05, + "loss": 20.1254, + "step": 6849 + }, + { + "epoch": 0.28552373806844233, + "grad_norm": 243.0, + "learning_rate": 8.383400901250449e-05, + "loss": 12.1258, + "step": 6850 + }, + { + "epoch": 0.28556542036597055, + "grad_norm": 318.0, + "learning_rate": 8.382903882183033e-05, + "loss": 13.3754, + "step": 6851 + }, + { + "epoch": 0.2856071026634988, + "grad_norm": 368.0, + "learning_rate": 8.382406801461894e-05, + "loss": 13.5629, + "step": 6852 + }, + { + "epoch": 0.285648784961027, + "grad_norm": 1744.0, + "learning_rate": 8.381909659096092e-05, + "loss": 37.0002, + "step": 6853 + }, + { + "epoch": 0.2856904672585553, + "grad_norm": 544.0, + "learning_rate": 8.381412455094683e-05, + "loss": 18.7501, + "step": 6854 + }, + { + "epoch": 0.28573214955608356, + "grad_norm": 249.0, + "learning_rate": 8.380915189466736e-05, + "loss": 10.1251, + "step": 6855 + }, + { + "epoch": 0.28577383185361177, + "grad_norm": 462.0, + "learning_rate": 8.380417862221308e-05, + "loss": 17.0001, + "step": 6856 + }, + { + "epoch": 0.28581551415114004, + "grad_norm": 260.0, + "learning_rate": 8.379920473367464e-05, + "loss": 4.3446, + "step": 6857 + }, + { + "epoch": 0.28585719644866825, + "grad_norm": 348.0, + "learning_rate": 8.379423022914268e-05, + "loss": 14.8753, + "step": 6858 + }, + { + "epoch": 0.2858988787461965, + "grad_norm": 304.0, + "learning_rate": 8.378925510870789e-05, + "loss": 13.3753, + "step": 6859 + }, + { + "epoch": 0.28594056104372473, + "grad_norm": 392.0, + "learning_rate": 8.378427937246091e-05, + "loss": 15.1881, + "step": 6860 + }, + { + "epoch": 0.285982243341253, + "grad_norm": 428.0, + "learning_rate": 8.377930302049246e-05, + "loss": 13.1903, + "step": 6861 + }, + { + "epoch": 0.2860239256387812, + "grad_norm": 328.0, + "learning_rate": 8.377432605289321e-05, + "loss": 13.8752, + "step": 6862 + }, + { + "epoch": 0.2860656079363095, + "grad_norm": 298.0, + "learning_rate": 8.376934846975386e-05, + "loss": 12.4378, + "step": 6863 + }, + { + "epoch": 0.2861072902338377, + "grad_norm": 482.0, + "learning_rate": 8.376437027116515e-05, + "loss": 12.5627, + "step": 6864 + }, + { + "epoch": 0.28614897253136595, + "grad_norm": 133.0, + "learning_rate": 8.375939145721778e-05, + "loss": 10.1255, + "step": 6865 + }, + { + "epoch": 0.28619065482889416, + "grad_norm": 366.0, + "learning_rate": 8.375441202800252e-05, + "loss": 13.7502, + "step": 6866 + }, + { + "epoch": 0.28623233712642243, + "grad_norm": 640.0, + "learning_rate": 8.37494319836101e-05, + "loss": 21.6255, + "step": 6867 + }, + { + "epoch": 0.28627401942395064, + "grad_norm": 364.0, + "learning_rate": 8.374445132413127e-05, + "loss": 15.3752, + "step": 6868 + }, + { + "epoch": 0.2863157017214789, + "grad_norm": 210.0, + "learning_rate": 8.373947004965684e-05, + "loss": 12.1264, + "step": 6869 + }, + { + "epoch": 0.2863573840190071, + "grad_norm": 648.0, + "learning_rate": 8.373448816027756e-05, + "loss": 20.3764, + "step": 6870 + }, + { + "epoch": 0.2863990663165354, + "grad_norm": 1744.0, + "learning_rate": 8.372950565608424e-05, + "loss": 38.2533, + "step": 6871 + }, + { + "epoch": 0.2864407486140636, + "grad_norm": 608.0, + "learning_rate": 8.372452253716767e-05, + "loss": 19.3753, + "step": 6872 + }, + { + "epoch": 0.28648243091159187, + "grad_norm": 324.0, + "learning_rate": 8.371953880361871e-05, + "loss": 13.5629, + "step": 6873 + }, + { + "epoch": 0.2865241132091201, + "grad_norm": 324.0, + "learning_rate": 8.371455445552815e-05, + "loss": 14.1923, + "step": 6874 + }, + { + "epoch": 0.28656579550664835, + "grad_norm": 83.0, + "learning_rate": 8.370956949298685e-05, + "loss": 9.3128, + "step": 6875 + }, + { + "epoch": 0.28660747780417656, + "grad_norm": 406.0, + "learning_rate": 8.370458391608565e-05, + "loss": 16.7507, + "step": 6876 + }, + { + "epoch": 0.2866491601017048, + "grad_norm": 446.0, + "learning_rate": 8.369959772491541e-05, + "loss": 15.0005, + "step": 6877 + }, + { + "epoch": 0.28669084239923304, + "grad_norm": 600.0, + "learning_rate": 8.369461091956701e-05, + "loss": 20.2509, + "step": 6878 + }, + { + "epoch": 0.2867325246967613, + "grad_norm": 248.0, + "learning_rate": 8.368962350013133e-05, + "loss": 12.3752, + "step": 6879 + }, + { + "epoch": 0.2867742069942895, + "grad_norm": 956.0, + "learning_rate": 8.368463546669929e-05, + "loss": 27.1257, + "step": 6880 + }, + { + "epoch": 0.2868158892918178, + "grad_norm": 980.0, + "learning_rate": 8.367964681936175e-05, + "loss": 26.1252, + "step": 6881 + }, + { + "epoch": 0.286857571589346, + "grad_norm": 278.0, + "learning_rate": 8.367465755820969e-05, + "loss": 12.6878, + "step": 6882 + }, + { + "epoch": 0.28689925388687426, + "grad_norm": 784.0, + "learning_rate": 8.366966768333398e-05, + "loss": 25.7522, + "step": 6883 + }, + { + "epoch": 0.2869409361844025, + "grad_norm": 286.0, + "learning_rate": 8.36646771948256e-05, + "loss": 14.1877, + "step": 6884 + }, + { + "epoch": 0.28698261848193074, + "grad_norm": 92.0, + "learning_rate": 8.36596860927755e-05, + "loss": 8.7503, + "step": 6885 + }, + { + "epoch": 0.28702430077945895, + "grad_norm": 76.0, + "learning_rate": 8.365469437727461e-05, + "loss": 4.9377, + "step": 6886 + }, + { + "epoch": 0.2870659830769872, + "grad_norm": 204.0, + "learning_rate": 8.364970204841394e-05, + "loss": 11.5626, + "step": 6887 + }, + { + "epoch": 0.28710766537451543, + "grad_norm": 270.0, + "learning_rate": 8.364470910628446e-05, + "loss": 12.5627, + "step": 6888 + }, + { + "epoch": 0.2871493476720437, + "grad_norm": 588.0, + "learning_rate": 8.363971555097717e-05, + "loss": 18.376, + "step": 6889 + }, + { + "epoch": 0.2871910299695719, + "grad_norm": 632.0, + "learning_rate": 8.363472138258308e-05, + "loss": 20.2506, + "step": 6890 + }, + { + "epoch": 0.2872327122671002, + "grad_norm": 402.0, + "learning_rate": 8.36297266011932e-05, + "loss": 14.6252, + "step": 6891 + }, + { + "epoch": 0.2872743945646284, + "grad_norm": 159.0, + "learning_rate": 8.362473120689858e-05, + "loss": 7.563, + "step": 6892 + }, + { + "epoch": 0.28731607686215666, + "grad_norm": 368.0, + "learning_rate": 8.361973519979023e-05, + "loss": 12.9378, + "step": 6893 + }, + { + "epoch": 0.28735775915968487, + "grad_norm": 572.0, + "learning_rate": 8.361473857995925e-05, + "loss": 20.1251, + "step": 6894 + }, + { + "epoch": 0.28739944145721313, + "grad_norm": 198.0, + "learning_rate": 8.360974134749665e-05, + "loss": 12.3127, + "step": 6895 + }, + { + "epoch": 0.28744112375474135, + "grad_norm": 498.0, + "learning_rate": 8.360474350249355e-05, + "loss": 17.8757, + "step": 6896 + }, + { + "epoch": 0.2874828060522696, + "grad_norm": 384.0, + "learning_rate": 8.3599745045041e-05, + "loss": 14.3127, + "step": 6897 + }, + { + "epoch": 0.2875244883497978, + "grad_norm": 286.0, + "learning_rate": 8.359474597523011e-05, + "loss": 12.1254, + "step": 6898 + }, + { + "epoch": 0.2875661706473261, + "grad_norm": 318.0, + "learning_rate": 8.358974629315201e-05, + "loss": 11.1254, + "step": 6899 + }, + { + "epoch": 0.2876078529448543, + "grad_norm": 636.0, + "learning_rate": 8.358474599889778e-05, + "loss": 18.7517, + "step": 6900 + }, + { + "epoch": 0.28764953524238257, + "grad_norm": 276.0, + "learning_rate": 8.35797450925586e-05, + "loss": 13.5004, + "step": 6901 + }, + { + "epoch": 0.2876912175399108, + "grad_norm": 65.5, + "learning_rate": 8.357474357422557e-05, + "loss": 8.4379, + "step": 6902 + }, + { + "epoch": 0.28773289983743905, + "grad_norm": 165.0, + "learning_rate": 8.356974144398986e-05, + "loss": 10.5627, + "step": 6903 + }, + { + "epoch": 0.28777458213496726, + "grad_norm": 239.0, + "learning_rate": 8.356473870194262e-05, + "loss": 12.5627, + "step": 6904 + }, + { + "epoch": 0.2878162644324955, + "grad_norm": 636.0, + "learning_rate": 8.355973534817506e-05, + "loss": 16.3751, + "step": 6905 + }, + { + "epoch": 0.28785794673002374, + "grad_norm": 616.0, + "learning_rate": 8.355473138277832e-05, + "loss": 18.251, + "step": 6906 + }, + { + "epoch": 0.287899629027552, + "grad_norm": 231.0, + "learning_rate": 8.354972680584364e-05, + "loss": 11.4376, + "step": 6907 + }, + { + "epoch": 0.2879413113250802, + "grad_norm": 304.0, + "learning_rate": 8.354472161746221e-05, + "loss": 11.8753, + "step": 6908 + }, + { + "epoch": 0.2879829936226085, + "grad_norm": 211.0, + "learning_rate": 8.353971581772524e-05, + "loss": 11.3133, + "step": 6909 + }, + { + "epoch": 0.2880246759201367, + "grad_norm": 476.0, + "learning_rate": 8.353470940672397e-05, + "loss": 15.9379, + "step": 6910 + }, + { + "epoch": 0.28806635821766496, + "grad_norm": 484.0, + "learning_rate": 8.352970238454966e-05, + "loss": 14.6253, + "step": 6911 + }, + { + "epoch": 0.2881080405151932, + "grad_norm": 540.0, + "learning_rate": 8.352469475129355e-05, + "loss": 17.2505, + "step": 6912 + }, + { + "epoch": 0.28814972281272144, + "grad_norm": 302.0, + "learning_rate": 8.351968650704687e-05, + "loss": 13.3127, + "step": 6913 + }, + { + "epoch": 0.28819140511024965, + "grad_norm": 438.0, + "learning_rate": 8.351467765190096e-05, + "loss": 16.6259, + "step": 6914 + }, + { + "epoch": 0.2882330874077779, + "grad_norm": 528.0, + "learning_rate": 8.350966818594706e-05, + "loss": 17.3758, + "step": 6915 + }, + { + "epoch": 0.28827476970530613, + "grad_norm": 149.0, + "learning_rate": 8.350465810927648e-05, + "loss": 11.0627, + "step": 6916 + }, + { + "epoch": 0.2883164520028344, + "grad_norm": 836.0, + "learning_rate": 8.349964742198054e-05, + "loss": 23.2504, + "step": 6917 + }, + { + "epoch": 0.2883581343003626, + "grad_norm": 368.0, + "learning_rate": 8.349463612415056e-05, + "loss": 13.0001, + "step": 6918 + }, + { + "epoch": 0.2883998165978909, + "grad_norm": 438.0, + "learning_rate": 8.348962421587785e-05, + "loss": 14.4378, + "step": 6919 + }, + { + "epoch": 0.2884414988954191, + "grad_norm": 418.0, + "learning_rate": 8.348461169725376e-05, + "loss": 15.3753, + "step": 6920 + }, + { + "epoch": 0.28848318119294736, + "grad_norm": 392.0, + "learning_rate": 8.347959856836967e-05, + "loss": 14.6876, + "step": 6921 + }, + { + "epoch": 0.28852486349047557, + "grad_norm": 494.0, + "learning_rate": 8.34745848293169e-05, + "loss": 14.8751, + "step": 6922 + }, + { + "epoch": 0.28856654578800384, + "grad_norm": 178.0, + "learning_rate": 8.346957048018686e-05, + "loss": 11.1879, + "step": 6923 + }, + { + "epoch": 0.28860822808553205, + "grad_norm": 296.0, + "learning_rate": 8.346455552107093e-05, + "loss": 13.1878, + "step": 6924 + }, + { + "epoch": 0.2886499103830603, + "grad_norm": 532.0, + "learning_rate": 8.345953995206051e-05, + "loss": 16.2501, + "step": 6925 + }, + { + "epoch": 0.2886915926805885, + "grad_norm": 390.0, + "learning_rate": 8.3454523773247e-05, + "loss": 15.4378, + "step": 6926 + }, + { + "epoch": 0.2887332749781168, + "grad_norm": 153.0, + "learning_rate": 8.344950698472183e-05, + "loss": 8.7503, + "step": 6927 + }, + { + "epoch": 0.28877495727564506, + "grad_norm": 304.0, + "learning_rate": 8.344448958657641e-05, + "loss": 13.1256, + "step": 6928 + }, + { + "epoch": 0.2888166395731733, + "grad_norm": 320.0, + "learning_rate": 8.34394715789022e-05, + "loss": 13.2502, + "step": 6929 + }, + { + "epoch": 0.28885832187070154, + "grad_norm": 232.0, + "learning_rate": 8.343445296179065e-05, + "loss": 10.9378, + "step": 6930 + }, + { + "epoch": 0.28890000416822975, + "grad_norm": 430.0, + "learning_rate": 8.342943373533324e-05, + "loss": 16.6257, + "step": 6931 + }, + { + "epoch": 0.288941686465758, + "grad_norm": 964.0, + "learning_rate": 8.342441389962144e-05, + "loss": 24.5004, + "step": 6932 + }, + { + "epoch": 0.28898336876328623, + "grad_norm": 135.0, + "learning_rate": 8.341939345474671e-05, + "loss": 9.2501, + "step": 6933 + }, + { + "epoch": 0.2890250510608145, + "grad_norm": 209.0, + "learning_rate": 8.341437240080057e-05, + "loss": 10.4386, + "step": 6934 + }, + { + "epoch": 0.2890667333583427, + "grad_norm": 724.0, + "learning_rate": 8.340935073787452e-05, + "loss": 21.1252, + "step": 6935 + }, + { + "epoch": 0.289108415655871, + "grad_norm": 1672.0, + "learning_rate": 8.340432846606011e-05, + "loss": 31.7542, + "step": 6936 + }, + { + "epoch": 0.2891500979533992, + "grad_norm": 908.0, + "learning_rate": 8.339930558544884e-05, + "loss": 23.8751, + "step": 6937 + }, + { + "epoch": 0.28919178025092745, + "grad_norm": 82.0, + "learning_rate": 8.339428209613224e-05, + "loss": 7.938, + "step": 6938 + }, + { + "epoch": 0.28923346254845567, + "grad_norm": 508.0, + "learning_rate": 8.338925799820191e-05, + "loss": 18.1253, + "step": 6939 + }, + { + "epoch": 0.28927514484598393, + "grad_norm": 176.0, + "learning_rate": 8.338423329174938e-05, + "loss": 11.5628, + "step": 6940 + }, + { + "epoch": 0.28931682714351215, + "grad_norm": 95.5, + "learning_rate": 8.337920797686624e-05, + "loss": 6.9689, + "step": 6941 + }, + { + "epoch": 0.2893585094410404, + "grad_norm": 97.0, + "learning_rate": 8.337418205364407e-05, + "loss": 9.438, + "step": 6942 + }, + { + "epoch": 0.2894001917385686, + "grad_norm": 432.0, + "learning_rate": 8.336915552217445e-05, + "loss": 16.5004, + "step": 6943 + }, + { + "epoch": 0.2894418740360969, + "grad_norm": 668.0, + "learning_rate": 8.336412838254903e-05, + "loss": 21.2502, + "step": 6944 + }, + { + "epoch": 0.2894835563336251, + "grad_norm": 334.0, + "learning_rate": 8.335910063485941e-05, + "loss": 13.3127, + "step": 6945 + }, + { + "epoch": 0.28952523863115337, + "grad_norm": 230.0, + "learning_rate": 8.335407227919721e-05, + "loss": 11.4388, + "step": 6946 + }, + { + "epoch": 0.2895669209286816, + "grad_norm": 247.0, + "learning_rate": 8.334904331565407e-05, + "loss": 12.0003, + "step": 6947 + }, + { + "epoch": 0.28960860322620985, + "grad_norm": 644.0, + "learning_rate": 8.334401374432169e-05, + "loss": 19.5003, + "step": 6948 + }, + { + "epoch": 0.28965028552373806, + "grad_norm": 396.0, + "learning_rate": 8.333898356529167e-05, + "loss": 16.2504, + "step": 6949 + }, + { + "epoch": 0.2896919678212663, + "grad_norm": 418.0, + "learning_rate": 8.333395277865572e-05, + "loss": 16.6253, + "step": 6950 + }, + { + "epoch": 0.28973365011879454, + "grad_norm": 154.0, + "learning_rate": 8.332892138450552e-05, + "loss": 9.9385, + "step": 6951 + }, + { + "epoch": 0.2897753324163228, + "grad_norm": 155.0, + "learning_rate": 8.332388938293278e-05, + "loss": 6.5627, + "step": 6952 + }, + { + "epoch": 0.289817014713851, + "grad_norm": 240.0, + "learning_rate": 8.33188567740292e-05, + "loss": 10.5002, + "step": 6953 + }, + { + "epoch": 0.2898586970113793, + "grad_norm": 324.0, + "learning_rate": 8.331382355788649e-05, + "loss": 13.5022, + "step": 6954 + }, + { + "epoch": 0.2899003793089075, + "grad_norm": 224.0, + "learning_rate": 8.330878973459637e-05, + "loss": 11.8752, + "step": 6955 + }, + { + "epoch": 0.28994206160643576, + "grad_norm": 374.0, + "learning_rate": 8.330375530425063e-05, + "loss": 13.7502, + "step": 6956 + }, + { + "epoch": 0.289983743903964, + "grad_norm": 326.0, + "learning_rate": 8.329872026694098e-05, + "loss": 13.4377, + "step": 6957 + }, + { + "epoch": 0.29002542620149224, + "grad_norm": 370.0, + "learning_rate": 8.329368462275919e-05, + "loss": 12.1885, + "step": 6958 + }, + { + "epoch": 0.29006710849902045, + "grad_norm": 440.0, + "learning_rate": 8.328864837179705e-05, + "loss": 16.5003, + "step": 6959 + }, + { + "epoch": 0.2901087907965487, + "grad_norm": 624.0, + "learning_rate": 8.328361151414634e-05, + "loss": 20.2504, + "step": 6960 + }, + { + "epoch": 0.29015047309407693, + "grad_norm": 286.0, + "learning_rate": 8.327857404989885e-05, + "loss": 12.6877, + "step": 6961 + }, + { + "epoch": 0.2901921553916052, + "grad_norm": 908.0, + "learning_rate": 8.32735359791464e-05, + "loss": 20.506, + "step": 6962 + }, + { + "epoch": 0.2902338376891334, + "grad_norm": 103.0, + "learning_rate": 8.326849730198081e-05, + "loss": 8.6253, + "step": 6963 + }, + { + "epoch": 0.2902755199866617, + "grad_norm": 272.0, + "learning_rate": 8.326345801849389e-05, + "loss": 13.2503, + "step": 6964 + }, + { + "epoch": 0.2903172022841899, + "grad_norm": 242.0, + "learning_rate": 8.32584181287775e-05, + "loss": 12.3754, + "step": 6965 + }, + { + "epoch": 0.29035888458171816, + "grad_norm": 470.0, + "learning_rate": 8.325337763292348e-05, + "loss": 16.7502, + "step": 6966 + }, + { + "epoch": 0.29040056687924637, + "grad_norm": 720.0, + "learning_rate": 8.324833653102371e-05, + "loss": 21.7521, + "step": 6967 + }, + { + "epoch": 0.29044224917677464, + "grad_norm": 386.0, + "learning_rate": 8.324329482317004e-05, + "loss": 14.2501, + "step": 6968 + }, + { + "epoch": 0.29048393147430285, + "grad_norm": 188.0, + "learning_rate": 8.323825250945439e-05, + "loss": 11.1254, + "step": 6969 + }, + { + "epoch": 0.2905256137718311, + "grad_norm": 640.0, + "learning_rate": 8.323320958996862e-05, + "loss": 19.5002, + "step": 6970 + }, + { + "epoch": 0.2905672960693593, + "grad_norm": 492.0, + "learning_rate": 8.322816606480469e-05, + "loss": 17.5004, + "step": 6971 + }, + { + "epoch": 0.2906089783668876, + "grad_norm": 223.0, + "learning_rate": 8.322312193405443e-05, + "loss": 11.7502, + "step": 6972 + }, + { + "epoch": 0.2906506606644158, + "grad_norm": 358.0, + "learning_rate": 8.321807719780987e-05, + "loss": 15.0627, + "step": 6973 + }, + { + "epoch": 0.2906923429619441, + "grad_norm": 268.0, + "learning_rate": 8.321303185616288e-05, + "loss": 11.4402, + "step": 6974 + }, + { + "epoch": 0.2907340252594723, + "grad_norm": 856.0, + "learning_rate": 8.320798590920545e-05, + "loss": 24.0002, + "step": 6975 + }, + { + "epoch": 0.29077570755700055, + "grad_norm": 410.0, + "learning_rate": 8.320293935702952e-05, + "loss": 16.0003, + "step": 6976 + }, + { + "epoch": 0.29081738985452876, + "grad_norm": 560.0, + "learning_rate": 8.319789219972707e-05, + "loss": 18.7502, + "step": 6977 + }, + { + "epoch": 0.29085907215205703, + "grad_norm": 205.0, + "learning_rate": 8.319284443739009e-05, + "loss": 9.8757, + "step": 6978 + }, + { + "epoch": 0.29090075444958524, + "grad_norm": 424.0, + "learning_rate": 8.318779607011058e-05, + "loss": 15.8753, + "step": 6979 + }, + { + "epoch": 0.2909424367471135, + "grad_norm": 224.0, + "learning_rate": 8.318274709798053e-05, + "loss": 11.3751, + "step": 6980 + }, + { + "epoch": 0.2909841190446417, + "grad_norm": 484.0, + "learning_rate": 8.317769752109197e-05, + "loss": 16.8762, + "step": 6981 + }, + { + "epoch": 0.29102580134217, + "grad_norm": 125.5, + "learning_rate": 8.317264733953694e-05, + "loss": 7.6257, + "step": 6982 + }, + { + "epoch": 0.2910674836396982, + "grad_norm": 168.0, + "learning_rate": 8.316759655340746e-05, + "loss": 10.0002, + "step": 6983 + }, + { + "epoch": 0.29110916593722647, + "grad_norm": 520.0, + "learning_rate": 8.316254516279558e-05, + "loss": 18.2504, + "step": 6984 + }, + { + "epoch": 0.2911508482347547, + "grad_norm": 436.0, + "learning_rate": 8.315749316779338e-05, + "loss": 14.6878, + "step": 6985 + }, + { + "epoch": 0.29119253053228294, + "grad_norm": 436.0, + "learning_rate": 8.315244056849292e-05, + "loss": 13.688, + "step": 6986 + }, + { + "epoch": 0.29123421282981116, + "grad_norm": 193.0, + "learning_rate": 8.314738736498629e-05, + "loss": 11.1877, + "step": 6987 + }, + { + "epoch": 0.2912758951273394, + "grad_norm": 229.0, + "learning_rate": 8.314233355736556e-05, + "loss": 11.2503, + "step": 6988 + }, + { + "epoch": 0.29131757742486764, + "grad_norm": 190.0, + "learning_rate": 8.31372791457229e-05, + "loss": 11.3129, + "step": 6989 + }, + { + "epoch": 0.2913592597223959, + "grad_norm": 412.0, + "learning_rate": 8.313222413015036e-05, + "loss": 15.3127, + "step": 6990 + }, + { + "epoch": 0.2914009420199241, + "grad_norm": 640.0, + "learning_rate": 8.312716851074009e-05, + "loss": 20.3751, + "step": 6991 + }, + { + "epoch": 0.2914426243174524, + "grad_norm": 179.0, + "learning_rate": 8.312211228758425e-05, + "loss": 9.1252, + "step": 6992 + }, + { + "epoch": 0.2914843066149806, + "grad_norm": 178.0, + "learning_rate": 8.311705546077497e-05, + "loss": 10.3128, + "step": 6993 + }, + { + "epoch": 0.29152598891250886, + "grad_norm": 78.5, + "learning_rate": 8.31119980304044e-05, + "loss": 7.5629, + "step": 6994 + }, + { + "epoch": 0.29156767121003707, + "grad_norm": 123.0, + "learning_rate": 8.310693999656473e-05, + "loss": 9.188, + "step": 6995 + }, + { + "epoch": 0.29160935350756534, + "grad_norm": 624.0, + "learning_rate": 8.310188135934815e-05, + "loss": 19.7546, + "step": 6996 + }, + { + "epoch": 0.29165103580509355, + "grad_norm": 276.0, + "learning_rate": 8.309682211884685e-05, + "loss": 13.1256, + "step": 6997 + }, + { + "epoch": 0.2916927181026218, + "grad_norm": 278.0, + "learning_rate": 8.309176227515303e-05, + "loss": 12.6878, + "step": 6998 + }, + { + "epoch": 0.29173440040015003, + "grad_norm": 412.0, + "learning_rate": 8.30867018283589e-05, + "loss": 14.6255, + "step": 6999 + }, + { + "epoch": 0.2917760826976783, + "grad_norm": 1004.0, + "learning_rate": 8.308164077855667e-05, + "loss": 22.7543, + "step": 7000 + }, + { + "epoch": 0.29181776499520656, + "grad_norm": 396.0, + "learning_rate": 8.307657912583862e-05, + "loss": 15.0007, + "step": 7001 + }, + { + "epoch": 0.2918594472927348, + "grad_norm": 248.0, + "learning_rate": 8.307151687029697e-05, + "loss": 12.1886, + "step": 7002 + }, + { + "epoch": 0.29190112959026304, + "grad_norm": 840.0, + "learning_rate": 8.3066454012024e-05, + "loss": 21.5015, + "step": 7003 + }, + { + "epoch": 0.29194281188779125, + "grad_norm": 260.0, + "learning_rate": 8.306139055111197e-05, + "loss": 11.6255, + "step": 7004 + }, + { + "epoch": 0.2919844941853195, + "grad_norm": 436.0, + "learning_rate": 8.305632648765314e-05, + "loss": 16.1254, + "step": 7005 + }, + { + "epoch": 0.29202617648284773, + "grad_norm": 235.0, + "learning_rate": 8.305126182173984e-05, + "loss": 11.7502, + "step": 7006 + }, + { + "epoch": 0.292067858780376, + "grad_norm": 358.0, + "learning_rate": 8.304619655346437e-05, + "loss": 15.0627, + "step": 7007 + }, + { + "epoch": 0.2921095410779042, + "grad_norm": 744.0, + "learning_rate": 8.304113068291903e-05, + "loss": 22.7501, + "step": 7008 + }, + { + "epoch": 0.2921512233754325, + "grad_norm": 346.0, + "learning_rate": 8.303606421019614e-05, + "loss": 14.0009, + "step": 7009 + }, + { + "epoch": 0.2921929056729607, + "grad_norm": 720.0, + "learning_rate": 8.303099713538805e-05, + "loss": 21.7502, + "step": 7010 + }, + { + "epoch": 0.29223458797048896, + "grad_norm": 165.0, + "learning_rate": 8.302592945858712e-05, + "loss": 9.1252, + "step": 7011 + }, + { + "epoch": 0.29227627026801717, + "grad_norm": 418.0, + "learning_rate": 8.302086117988568e-05, + "loss": 16.1252, + "step": 7012 + }, + { + "epoch": 0.29231795256554544, + "grad_norm": 280.0, + "learning_rate": 8.301579229937611e-05, + "loss": 12.9382, + "step": 7013 + }, + { + "epoch": 0.29235963486307365, + "grad_norm": 109.5, + "learning_rate": 8.30107228171508e-05, + "loss": 9.1251, + "step": 7014 + }, + { + "epoch": 0.2924013171606019, + "grad_norm": 151.0, + "learning_rate": 8.300565273330212e-05, + "loss": 10.7507, + "step": 7015 + }, + { + "epoch": 0.2924429994581301, + "grad_norm": 243.0, + "learning_rate": 8.300058204792251e-05, + "loss": 12.3753, + "step": 7016 + }, + { + "epoch": 0.2924846817556584, + "grad_norm": 624.0, + "learning_rate": 8.299551076110436e-05, + "loss": 18.1287, + "step": 7017 + }, + { + "epoch": 0.2925263640531866, + "grad_norm": 520.0, + "learning_rate": 8.29904388729401e-05, + "loss": 16.7502, + "step": 7018 + }, + { + "epoch": 0.29256804635071487, + "grad_norm": 456.0, + "learning_rate": 8.298536638352216e-05, + "loss": 16.7502, + "step": 7019 + }, + { + "epoch": 0.2926097286482431, + "grad_norm": 130.0, + "learning_rate": 8.298029329294299e-05, + "loss": 9.6876, + "step": 7020 + }, + { + "epoch": 0.29265141094577135, + "grad_norm": 282.0, + "learning_rate": 8.297521960129505e-05, + "loss": 12.7502, + "step": 7021 + }, + { + "epoch": 0.29269309324329956, + "grad_norm": 510.0, + "learning_rate": 8.297014530867079e-05, + "loss": 17.1257, + "step": 7022 + }, + { + "epoch": 0.29273477554082783, + "grad_norm": 620.0, + "learning_rate": 8.296507041516272e-05, + "loss": 18.3756, + "step": 7023 + }, + { + "epoch": 0.29277645783835604, + "grad_norm": 474.0, + "learning_rate": 8.295999492086331e-05, + "loss": 17.2502, + "step": 7024 + }, + { + "epoch": 0.2928181401358843, + "grad_norm": 912.0, + "learning_rate": 8.295491882586506e-05, + "loss": 22.1255, + "step": 7025 + }, + { + "epoch": 0.2928598224334125, + "grad_norm": 916.0, + "learning_rate": 8.294984213026049e-05, + "loss": 26.3751, + "step": 7026 + }, + { + "epoch": 0.2929015047309408, + "grad_norm": 664.0, + "learning_rate": 8.294476483414213e-05, + "loss": 19.626, + "step": 7027 + }, + { + "epoch": 0.292943187028469, + "grad_norm": 644.0, + "learning_rate": 8.29396869376025e-05, + "loss": 20.3752, + "step": 7028 + }, + { + "epoch": 0.29298486932599727, + "grad_norm": 190.0, + "learning_rate": 8.293460844073416e-05, + "loss": 7.219, + "step": 7029 + }, + { + "epoch": 0.2930265516235255, + "grad_norm": 716.0, + "learning_rate": 8.292952934362966e-05, + "loss": 22.2502, + "step": 7030 + }, + { + "epoch": 0.29306823392105374, + "grad_norm": 348.0, + "learning_rate": 8.292444964638155e-05, + "loss": 14.5002, + "step": 7031 + }, + { + "epoch": 0.29310991621858196, + "grad_norm": 338.0, + "learning_rate": 8.291936934908242e-05, + "loss": 14.0627, + "step": 7032 + }, + { + "epoch": 0.2931515985161102, + "grad_norm": 131.0, + "learning_rate": 8.291428845182487e-05, + "loss": 9.8754, + "step": 7033 + }, + { + "epoch": 0.29319328081363843, + "grad_norm": 256.0, + "learning_rate": 8.29092069547015e-05, + "loss": 12.3752, + "step": 7034 + }, + { + "epoch": 0.2932349631111667, + "grad_norm": 452.0, + "learning_rate": 8.29041248578049e-05, + "loss": 16.7503, + "step": 7035 + }, + { + "epoch": 0.2932766454086949, + "grad_norm": 676.0, + "learning_rate": 8.289904216122771e-05, + "loss": 19.2503, + "step": 7036 + }, + { + "epoch": 0.2933183277062232, + "grad_norm": 294.0, + "learning_rate": 8.289395886506255e-05, + "loss": 12.6877, + "step": 7037 + }, + { + "epoch": 0.2933600100037514, + "grad_norm": 157.0, + "learning_rate": 8.288887496940208e-05, + "loss": 11.3128, + "step": 7038 + }, + { + "epoch": 0.29340169230127966, + "grad_norm": 684.0, + "learning_rate": 8.288379047433894e-05, + "loss": 16.2546, + "step": 7039 + }, + { + "epoch": 0.29344337459880787, + "grad_norm": 342.0, + "learning_rate": 8.28787053799658e-05, + "loss": 14.6878, + "step": 7040 + }, + { + "epoch": 0.29348505689633614, + "grad_norm": 223.0, + "learning_rate": 8.287361968637532e-05, + "loss": 11.3129, + "step": 7041 + }, + { + "epoch": 0.29352673919386435, + "grad_norm": 60.25, + "learning_rate": 8.286853339366022e-05, + "loss": 8.5007, + "step": 7042 + }, + { + "epoch": 0.2935684214913926, + "grad_norm": 568.0, + "learning_rate": 8.286344650191318e-05, + "loss": 17.7502, + "step": 7043 + }, + { + "epoch": 0.29361010378892083, + "grad_norm": 400.0, + "learning_rate": 8.285835901122689e-05, + "loss": 14.4376, + "step": 7044 + }, + { + "epoch": 0.2936517860864491, + "grad_norm": 76.0, + "learning_rate": 8.285327092169411e-05, + "loss": 9.5008, + "step": 7045 + }, + { + "epoch": 0.2936934683839773, + "grad_norm": 400.0, + "learning_rate": 8.284818223340755e-05, + "loss": 13.3777, + "step": 7046 + }, + { + "epoch": 0.2937351506815056, + "grad_norm": 227.0, + "learning_rate": 8.284309294645996e-05, + "loss": 12.1878, + "step": 7047 + }, + { + "epoch": 0.2937768329790338, + "grad_norm": 504.0, + "learning_rate": 8.283800306094407e-05, + "loss": 17.1252, + "step": 7048 + }, + { + "epoch": 0.29381851527656205, + "grad_norm": 1600.0, + "learning_rate": 8.283291257695267e-05, + "loss": 29.8802, + "step": 7049 + }, + { + "epoch": 0.29386019757409026, + "grad_norm": 262.0, + "learning_rate": 8.282782149457851e-05, + "loss": 13.1879, + "step": 7050 + }, + { + "epoch": 0.29390187987161853, + "grad_norm": 154.0, + "learning_rate": 8.28227298139144e-05, + "loss": 8.5636, + "step": 7051 + }, + { + "epoch": 0.29394356216914674, + "grad_norm": 428.0, + "learning_rate": 8.281763753505311e-05, + "loss": 14.1888, + "step": 7052 + }, + { + "epoch": 0.293985244466675, + "grad_norm": 420.0, + "learning_rate": 8.281254465808749e-05, + "loss": 12.8176, + "step": 7053 + }, + { + "epoch": 0.2940269267642032, + "grad_norm": 258.0, + "learning_rate": 8.28074511831103e-05, + "loss": 13.1257, + "step": 7054 + }, + { + "epoch": 0.2940686090617315, + "grad_norm": 528.0, + "learning_rate": 8.280235711021442e-05, + "loss": 18.2521, + "step": 7055 + }, + { + "epoch": 0.2941102913592597, + "grad_norm": 392.0, + "learning_rate": 8.279726243949268e-05, + "loss": 13.8126, + "step": 7056 + }, + { + "epoch": 0.29415197365678797, + "grad_norm": 536.0, + "learning_rate": 8.27921671710379e-05, + "loss": 16.8755, + "step": 7057 + }, + { + "epoch": 0.2941936559543162, + "grad_norm": 155.0, + "learning_rate": 8.278707130494297e-05, + "loss": 10.7503, + "step": 7058 + }, + { + "epoch": 0.29423533825184445, + "grad_norm": 1784.0, + "learning_rate": 8.278197484130075e-05, + "loss": 38.0002, + "step": 7059 + }, + { + "epoch": 0.29427702054937266, + "grad_norm": 241.0, + "learning_rate": 8.277687778020414e-05, + "loss": 11.5631, + "step": 7060 + }, + { + "epoch": 0.2943187028469009, + "grad_norm": 56.0, + "learning_rate": 8.2771780121746e-05, + "loss": 7.938, + "step": 7061 + }, + { + "epoch": 0.29436038514442914, + "grad_norm": 452.0, + "learning_rate": 8.276668186601928e-05, + "loss": 16.6311, + "step": 7062 + }, + { + "epoch": 0.2944020674419574, + "grad_norm": 1744.0, + "learning_rate": 8.276158301311686e-05, + "loss": 38.7501, + "step": 7063 + }, + { + "epoch": 0.2944437497394856, + "grad_norm": 284.0, + "learning_rate": 8.27564835631317e-05, + "loss": 13.0004, + "step": 7064 + }, + { + "epoch": 0.2944854320370139, + "grad_norm": 314.0, + "learning_rate": 8.27513835161567e-05, + "loss": 14.8133, + "step": 7065 + }, + { + "epoch": 0.2945271143345421, + "grad_norm": 442.0, + "learning_rate": 8.274628287228482e-05, + "loss": 13.8756, + "step": 7066 + }, + { + "epoch": 0.29456879663207036, + "grad_norm": 74.5, + "learning_rate": 8.274118163160906e-05, + "loss": 8.0626, + "step": 7067 + }, + { + "epoch": 0.2946104789295986, + "grad_norm": 390.0, + "learning_rate": 8.273607979422234e-05, + "loss": 15.0626, + "step": 7068 + }, + { + "epoch": 0.29465216122712684, + "grad_norm": 205.0, + "learning_rate": 8.273097736021765e-05, + "loss": 11.6252, + "step": 7069 + }, + { + "epoch": 0.29469384352465505, + "grad_norm": 84.5, + "learning_rate": 8.272587432968801e-05, + "loss": 7.5944, + "step": 7070 + }, + { + "epoch": 0.2947355258221833, + "grad_norm": 316.0, + "learning_rate": 8.272077070272639e-05, + "loss": 13.6877, + "step": 7071 + }, + { + "epoch": 0.29477720811971153, + "grad_norm": 276.0, + "learning_rate": 8.271566647942583e-05, + "loss": 13.188, + "step": 7072 + }, + { + "epoch": 0.2948188904172398, + "grad_norm": 324.0, + "learning_rate": 8.271056165987934e-05, + "loss": 14.3752, + "step": 7073 + }, + { + "epoch": 0.29486057271476807, + "grad_norm": 134.0, + "learning_rate": 8.270545624417997e-05, + "loss": 9.2504, + "step": 7074 + }, + { + "epoch": 0.2949022550122963, + "grad_norm": 536.0, + "learning_rate": 8.270035023242075e-05, + "loss": 18.3754, + "step": 7075 + }, + { + "epoch": 0.29494393730982454, + "grad_norm": 430.0, + "learning_rate": 8.269524362469474e-05, + "loss": 17.7504, + "step": 7076 + }, + { + "epoch": 0.29498561960735276, + "grad_norm": 119.0, + "learning_rate": 8.269013642109501e-05, + "loss": 8.0004, + "step": 7077 + }, + { + "epoch": 0.295027301904881, + "grad_norm": 636.0, + "learning_rate": 8.268502862171464e-05, + "loss": 21.5006, + "step": 7078 + }, + { + "epoch": 0.29506898420240923, + "grad_norm": 244.0, + "learning_rate": 8.267992022664674e-05, + "loss": 7.5016, + "step": 7079 + }, + { + "epoch": 0.2951106664999375, + "grad_norm": 434.0, + "learning_rate": 8.267481123598437e-05, + "loss": 15.2521, + "step": 7080 + }, + { + "epoch": 0.2951523487974657, + "grad_norm": 452.0, + "learning_rate": 8.266970164982069e-05, + "loss": 15.0012, + "step": 7081 + }, + { + "epoch": 0.295194031094994, + "grad_norm": 680.0, + "learning_rate": 8.266459146824876e-05, + "loss": 19.7509, + "step": 7082 + }, + { + "epoch": 0.2952357133925222, + "grad_norm": 62.0, + "learning_rate": 8.265948069136178e-05, + "loss": 7.8126, + "step": 7083 + }, + { + "epoch": 0.29527739569005046, + "grad_norm": 1176.0, + "learning_rate": 8.265436931925287e-05, + "loss": 29.2502, + "step": 7084 + }, + { + "epoch": 0.29531907798757867, + "grad_norm": 932.0, + "learning_rate": 8.264925735201516e-05, + "loss": 22.2503, + "step": 7085 + }, + { + "epoch": 0.29536076028510694, + "grad_norm": 422.0, + "learning_rate": 8.264414478974185e-05, + "loss": 16.8752, + "step": 7086 + }, + { + "epoch": 0.29540244258263515, + "grad_norm": 123.0, + "learning_rate": 8.26390316325261e-05, + "loss": 9.6877, + "step": 7087 + }, + { + "epoch": 0.2954441248801634, + "grad_norm": 616.0, + "learning_rate": 8.263391788046108e-05, + "loss": 18.5012, + "step": 7088 + }, + { + "epoch": 0.29548580717769163, + "grad_norm": 266.0, + "learning_rate": 8.262880353364004e-05, + "loss": 11.6253, + "step": 7089 + }, + { + "epoch": 0.2955274894752199, + "grad_norm": 230.0, + "learning_rate": 8.262368859215614e-05, + "loss": 12.0002, + "step": 7090 + }, + { + "epoch": 0.2955691717727481, + "grad_norm": 452.0, + "learning_rate": 8.261857305610264e-05, + "loss": 16.7505, + "step": 7091 + }, + { + "epoch": 0.2956108540702764, + "grad_norm": 544.0, + "learning_rate": 8.261345692557274e-05, + "loss": 19.7504, + "step": 7092 + }, + { + "epoch": 0.2956525363678046, + "grad_norm": 254.0, + "learning_rate": 8.260834020065968e-05, + "loss": 12.5003, + "step": 7093 + }, + { + "epoch": 0.29569421866533285, + "grad_norm": 596.0, + "learning_rate": 8.260322288145675e-05, + "loss": 19.1253, + "step": 7094 + }, + { + "epoch": 0.29573590096286106, + "grad_norm": 896.0, + "learning_rate": 8.259810496805717e-05, + "loss": 24.8753, + "step": 7095 + }, + { + "epoch": 0.29577758326038933, + "grad_norm": 96.5, + "learning_rate": 8.259298646055423e-05, + "loss": 7.8447, + "step": 7096 + }, + { + "epoch": 0.29581926555791754, + "grad_norm": 502.0, + "learning_rate": 8.258786735904123e-05, + "loss": 18.0002, + "step": 7097 + }, + { + "epoch": 0.2958609478554458, + "grad_norm": 462.0, + "learning_rate": 8.258274766361145e-05, + "loss": 16.5003, + "step": 7098 + }, + { + "epoch": 0.295902630152974, + "grad_norm": 748.0, + "learning_rate": 8.25776273743582e-05, + "loss": 21.6257, + "step": 7099 + }, + { + "epoch": 0.2959443124505023, + "grad_norm": 772.0, + "learning_rate": 8.25725064913748e-05, + "loss": 19.8753, + "step": 7100 + }, + { + "epoch": 0.2959859947480305, + "grad_norm": 227.0, + "learning_rate": 8.256738501475459e-05, + "loss": 11.1878, + "step": 7101 + }, + { + "epoch": 0.29602767704555877, + "grad_norm": 98.0, + "learning_rate": 8.256226294459088e-05, + "loss": 9.1879, + "step": 7102 + }, + { + "epoch": 0.296069359343087, + "grad_norm": 290.0, + "learning_rate": 8.255714028097704e-05, + "loss": 11.6262, + "step": 7103 + }, + { + "epoch": 0.29611104164061525, + "grad_norm": 358.0, + "learning_rate": 8.255201702400644e-05, + "loss": 14.3757, + "step": 7104 + }, + { + "epoch": 0.29615272393814346, + "grad_norm": 264.0, + "learning_rate": 8.254689317377242e-05, + "loss": 12.8752, + "step": 7105 + }, + { + "epoch": 0.2961944062356717, + "grad_norm": 382.0, + "learning_rate": 8.254176873036839e-05, + "loss": 15.2502, + "step": 7106 + }, + { + "epoch": 0.29623608853319994, + "grad_norm": 260.0, + "learning_rate": 8.253664369388774e-05, + "loss": 10.1252, + "step": 7107 + }, + { + "epoch": 0.2962777708307282, + "grad_norm": 716.0, + "learning_rate": 8.253151806442388e-05, + "loss": 18.8755, + "step": 7108 + }, + { + "epoch": 0.2963194531282564, + "grad_norm": 480.0, + "learning_rate": 8.25263918420702e-05, + "loss": 17.8752, + "step": 7109 + }, + { + "epoch": 0.2963611354257847, + "grad_norm": 368.0, + "learning_rate": 8.252126502692015e-05, + "loss": 15.6257, + "step": 7110 + }, + { + "epoch": 0.2964028177233129, + "grad_norm": 544.0, + "learning_rate": 8.251613761906715e-05, + "loss": 17.8753, + "step": 7111 + }, + { + "epoch": 0.29644450002084116, + "grad_norm": 246.0, + "learning_rate": 8.251100961860466e-05, + "loss": 12.9377, + "step": 7112 + }, + { + "epoch": 0.2964861823183694, + "grad_norm": 1040.0, + "learning_rate": 8.250588102562614e-05, + "loss": 24.2548, + "step": 7113 + }, + { + "epoch": 0.29652786461589764, + "grad_norm": 708.0, + "learning_rate": 8.250075184022503e-05, + "loss": 20.7503, + "step": 7114 + }, + { + "epoch": 0.29656954691342585, + "grad_norm": 278.0, + "learning_rate": 8.249562206249486e-05, + "loss": 12.3137, + "step": 7115 + }, + { + "epoch": 0.2966112292109541, + "grad_norm": 406.0, + "learning_rate": 8.249049169252908e-05, + "loss": 14.8127, + "step": 7116 + }, + { + "epoch": 0.29665291150848233, + "grad_norm": 430.0, + "learning_rate": 8.24853607304212e-05, + "loss": 14.8138, + "step": 7117 + }, + { + "epoch": 0.2966945938060106, + "grad_norm": 452.0, + "learning_rate": 8.248022917626474e-05, + "loss": 15.5001, + "step": 7118 + }, + { + "epoch": 0.2967362761035388, + "grad_norm": 1176.0, + "learning_rate": 8.247509703015324e-05, + "loss": 24.1292, + "step": 7119 + }, + { + "epoch": 0.2967779584010671, + "grad_norm": 93.5, + "learning_rate": 8.246996429218019e-05, + "loss": 5.2502, + "step": 7120 + }, + { + "epoch": 0.2968196406985953, + "grad_norm": 968.0, + "learning_rate": 8.246483096243916e-05, + "loss": 23.8794, + "step": 7121 + }, + { + "epoch": 0.29686132299612356, + "grad_norm": 352.0, + "learning_rate": 8.245969704102371e-05, + "loss": 12.8132, + "step": 7122 + }, + { + "epoch": 0.29690300529365177, + "grad_norm": 568.0, + "learning_rate": 8.24545625280274e-05, + "loss": 17.7502, + "step": 7123 + }, + { + "epoch": 0.29694468759118003, + "grad_norm": 223.0, + "learning_rate": 8.24494274235438e-05, + "loss": 12.6253, + "step": 7124 + }, + { + "epoch": 0.29698636988870825, + "grad_norm": 276.0, + "learning_rate": 8.244429172766652e-05, + "loss": 14.0007, + "step": 7125 + }, + { + "epoch": 0.2970280521862365, + "grad_norm": 640.0, + "learning_rate": 8.243915544048912e-05, + "loss": 20.1257, + "step": 7126 + }, + { + "epoch": 0.2970697344837647, + "grad_norm": 196.0, + "learning_rate": 8.243401856210524e-05, + "loss": 11.3752, + "step": 7127 + }, + { + "epoch": 0.297111416781293, + "grad_norm": 328.0, + "learning_rate": 8.242888109260852e-05, + "loss": 13.4377, + "step": 7128 + }, + { + "epoch": 0.2971530990788212, + "grad_norm": 732.0, + "learning_rate": 8.242374303209253e-05, + "loss": 20.8754, + "step": 7129 + }, + { + "epoch": 0.29719478137634947, + "grad_norm": 508.0, + "learning_rate": 8.241860438065095e-05, + "loss": 17.1296, + "step": 7130 + }, + { + "epoch": 0.2972364636738777, + "grad_norm": 238.0, + "learning_rate": 8.241346513837744e-05, + "loss": 11.3751, + "step": 7131 + }, + { + "epoch": 0.29727814597140595, + "grad_norm": 68.0, + "learning_rate": 8.240832530536564e-05, + "loss": 9.1254, + "step": 7132 + }, + { + "epoch": 0.29731982826893416, + "grad_norm": 186.0, + "learning_rate": 8.240318488170924e-05, + "loss": 10.6254, + "step": 7133 + }, + { + "epoch": 0.29736151056646243, + "grad_norm": 404.0, + "learning_rate": 8.239804386750192e-05, + "loss": 14.0003, + "step": 7134 + }, + { + "epoch": 0.29740319286399064, + "grad_norm": 860.0, + "learning_rate": 8.239290226283737e-05, + "loss": 21.8752, + "step": 7135 + }, + { + "epoch": 0.2974448751615189, + "grad_norm": 116.0, + "learning_rate": 8.23877600678093e-05, + "loss": 9.3128, + "step": 7136 + }, + { + "epoch": 0.2974865574590471, + "grad_norm": 150.0, + "learning_rate": 8.238261728251143e-05, + "loss": 9.3758, + "step": 7137 + }, + { + "epoch": 0.2975282397565754, + "grad_norm": 462.0, + "learning_rate": 8.237747390703749e-05, + "loss": 15.8792, + "step": 7138 + }, + { + "epoch": 0.2975699220541036, + "grad_norm": 390.0, + "learning_rate": 8.23723299414812e-05, + "loss": 14.9377, + "step": 7139 + }, + { + "epoch": 0.29761160435163186, + "grad_norm": 160.0, + "learning_rate": 8.236718538593633e-05, + "loss": 11.0628, + "step": 7140 + }, + { + "epoch": 0.2976532866491601, + "grad_norm": 386.0, + "learning_rate": 8.236204024049665e-05, + "loss": 14.8752, + "step": 7141 + }, + { + "epoch": 0.29769496894668834, + "grad_norm": 250.0, + "learning_rate": 8.23568945052559e-05, + "loss": 10.1879, + "step": 7142 + }, + { + "epoch": 0.29773665124421655, + "grad_norm": 360.0, + "learning_rate": 8.235174818030787e-05, + "loss": 12.5629, + "step": 7143 + }, + { + "epoch": 0.2977783335417448, + "grad_norm": 676.0, + "learning_rate": 8.234660126574635e-05, + "loss": 18.127, + "step": 7144 + }, + { + "epoch": 0.29782001583927303, + "grad_norm": 207.0, + "learning_rate": 8.234145376166517e-05, + "loss": 11.1258, + "step": 7145 + }, + { + "epoch": 0.2978616981368013, + "grad_norm": 474.0, + "learning_rate": 8.233630566815811e-05, + "loss": 13.6254, + "step": 7146 + }, + { + "epoch": 0.29790338043432957, + "grad_norm": 330.0, + "learning_rate": 8.233115698531901e-05, + "loss": 8.1879, + "step": 7147 + }, + { + "epoch": 0.2979450627318578, + "grad_norm": 424.0, + "learning_rate": 8.232600771324173e-05, + "loss": 15.813, + "step": 7148 + }, + { + "epoch": 0.29798674502938605, + "grad_norm": 155.0, + "learning_rate": 8.232085785202006e-05, + "loss": 10.2503, + "step": 7149 + }, + { + "epoch": 0.29802842732691426, + "grad_norm": 2240.0, + "learning_rate": 8.23157074017479e-05, + "loss": 44.7513, + "step": 7150 + }, + { + "epoch": 0.2980701096244425, + "grad_norm": 2272.0, + "learning_rate": 8.23105563625191e-05, + "loss": 42.5008, + "step": 7151 + }, + { + "epoch": 0.29811179192197074, + "grad_norm": 80.0, + "learning_rate": 8.230540473442754e-05, + "loss": 7.6877, + "step": 7152 + }, + { + "epoch": 0.298153474219499, + "grad_norm": 243.0, + "learning_rate": 8.23002525175671e-05, + "loss": 12.7513, + "step": 7153 + }, + { + "epoch": 0.2981951565170272, + "grad_norm": 284.0, + "learning_rate": 8.22950997120317e-05, + "loss": 13.1877, + "step": 7154 + }, + { + "epoch": 0.2982368388145555, + "grad_norm": 812.0, + "learning_rate": 8.228994631791525e-05, + "loss": 25.0001, + "step": 7155 + }, + { + "epoch": 0.2982785211120837, + "grad_norm": 298.0, + "learning_rate": 8.228479233531166e-05, + "loss": 14.4377, + "step": 7156 + }, + { + "epoch": 0.29832020340961196, + "grad_norm": 164.0, + "learning_rate": 8.227963776431485e-05, + "loss": 10.6259, + "step": 7157 + }, + { + "epoch": 0.2983618857071402, + "grad_norm": 266.0, + "learning_rate": 8.227448260501879e-05, + "loss": 12.1884, + "step": 7158 + }, + { + "epoch": 0.29840356800466844, + "grad_norm": 238.0, + "learning_rate": 8.22693268575174e-05, + "loss": 12.3754, + "step": 7159 + }, + { + "epoch": 0.29844525030219665, + "grad_norm": 324.0, + "learning_rate": 8.22641705219047e-05, + "loss": 13.6255, + "step": 7160 + }, + { + "epoch": 0.2984869325997249, + "grad_norm": 97.0, + "learning_rate": 8.225901359827459e-05, + "loss": 8.8756, + "step": 7161 + }, + { + "epoch": 0.29852861489725313, + "grad_norm": 223.0, + "learning_rate": 8.225385608672111e-05, + "loss": 13.2516, + "step": 7162 + }, + { + "epoch": 0.2985702971947814, + "grad_norm": 572.0, + "learning_rate": 8.224869798733825e-05, + "loss": 17.7512, + "step": 7163 + }, + { + "epoch": 0.2986119794923096, + "grad_norm": 488.0, + "learning_rate": 8.224353930021998e-05, + "loss": 17.2502, + "step": 7164 + }, + { + "epoch": 0.2986536617898379, + "grad_norm": 540.0, + "learning_rate": 8.223838002546035e-05, + "loss": 18.1256, + "step": 7165 + }, + { + "epoch": 0.2986953440873661, + "grad_norm": 768.0, + "learning_rate": 8.22332201631534e-05, + "loss": 22.0002, + "step": 7166 + }, + { + "epoch": 0.29873702638489436, + "grad_norm": 130.0, + "learning_rate": 8.222805971339315e-05, + "loss": 9.3751, + "step": 7167 + }, + { + "epoch": 0.29877870868242257, + "grad_norm": 520.0, + "learning_rate": 8.222289867627364e-05, + "loss": 15.8149, + "step": 7168 + }, + { + "epoch": 0.29882039097995083, + "grad_norm": 207.0, + "learning_rate": 8.221773705188895e-05, + "loss": 12.1253, + "step": 7169 + }, + { + "epoch": 0.29886207327747905, + "grad_norm": 488.0, + "learning_rate": 8.221257484033314e-05, + "loss": 17.1252, + "step": 7170 + }, + { + "epoch": 0.2989037555750073, + "grad_norm": 296.0, + "learning_rate": 8.220741204170029e-05, + "loss": 12.8752, + "step": 7171 + }, + { + "epoch": 0.2989454378725355, + "grad_norm": 432.0, + "learning_rate": 8.220224865608448e-05, + "loss": 15.6252, + "step": 7172 + }, + { + "epoch": 0.2989871201700638, + "grad_norm": 648.0, + "learning_rate": 8.219708468357985e-05, + "loss": 19.6253, + "step": 7173 + }, + { + "epoch": 0.299028802467592, + "grad_norm": 217.0, + "learning_rate": 8.219192012428049e-05, + "loss": 11.5001, + "step": 7174 + }, + { + "epoch": 0.29907048476512027, + "grad_norm": 52.25, + "learning_rate": 8.218675497828054e-05, + "loss": 8.376, + "step": 7175 + }, + { + "epoch": 0.2991121670626485, + "grad_norm": 346.0, + "learning_rate": 8.21815892456741e-05, + "loss": 15.4388, + "step": 7176 + }, + { + "epoch": 0.29915384936017675, + "grad_norm": 528.0, + "learning_rate": 8.217642292655536e-05, + "loss": 16.1264, + "step": 7177 + }, + { + "epoch": 0.29919553165770496, + "grad_norm": 110.0, + "learning_rate": 8.217125602101843e-05, + "loss": 6.9377, + "step": 7178 + }, + { + "epoch": 0.2992372139552332, + "grad_norm": 139.0, + "learning_rate": 8.216608852915753e-05, + "loss": 9.1877, + "step": 7179 + }, + { + "epoch": 0.29927889625276144, + "grad_norm": 688.0, + "learning_rate": 8.21609204510668e-05, + "loss": 20.6252, + "step": 7180 + }, + { + "epoch": 0.2993205785502897, + "grad_norm": 340.0, + "learning_rate": 8.215575178684042e-05, + "loss": 13.3754, + "step": 7181 + }, + { + "epoch": 0.2993622608478179, + "grad_norm": 180.0, + "learning_rate": 8.215058253657264e-05, + "loss": 11.2502, + "step": 7182 + }, + { + "epoch": 0.2994039431453462, + "grad_norm": 205.0, + "learning_rate": 8.214541270035763e-05, + "loss": 9.2509, + "step": 7183 + }, + { + "epoch": 0.2994456254428744, + "grad_norm": 390.0, + "learning_rate": 8.214024227828963e-05, + "loss": 14.7502, + "step": 7184 + }, + { + "epoch": 0.29948730774040266, + "grad_norm": 274.0, + "learning_rate": 8.213507127046284e-05, + "loss": 12.9394, + "step": 7185 + }, + { + "epoch": 0.2995289900379309, + "grad_norm": 78.0, + "learning_rate": 8.212989967697154e-05, + "loss": 6.5627, + "step": 7186 + }, + { + "epoch": 0.29957067233545914, + "grad_norm": 336.0, + "learning_rate": 8.212472749790995e-05, + "loss": 13.6877, + "step": 7187 + }, + { + "epoch": 0.29961235463298735, + "grad_norm": 668.0, + "learning_rate": 8.211955473337236e-05, + "loss": 18.6291, + "step": 7188 + }, + { + "epoch": 0.2996540369305156, + "grad_norm": 464.0, + "learning_rate": 8.211438138345302e-05, + "loss": 16.7503, + "step": 7189 + }, + { + "epoch": 0.29969571922804383, + "grad_norm": 246.0, + "learning_rate": 8.210920744824624e-05, + "loss": 10.6882, + "step": 7190 + }, + { + "epoch": 0.2997374015255721, + "grad_norm": 436.0, + "learning_rate": 8.210403292784632e-05, + "loss": 16.6256, + "step": 7191 + }, + { + "epoch": 0.2997790838231003, + "grad_norm": 572.0, + "learning_rate": 8.209885782234752e-05, + "loss": 19.0011, + "step": 7192 + }, + { + "epoch": 0.2998207661206286, + "grad_norm": 209.0, + "learning_rate": 8.20936821318442e-05, + "loss": 11.0628, + "step": 7193 + }, + { + "epoch": 0.2998624484181568, + "grad_norm": 160.0, + "learning_rate": 8.208850585643068e-05, + "loss": 10.8132, + "step": 7194 + }, + { + "epoch": 0.29990413071568506, + "grad_norm": 192.0, + "learning_rate": 8.208332899620127e-05, + "loss": 11.4382, + "step": 7195 + }, + { + "epoch": 0.29994581301321327, + "grad_norm": 258.0, + "learning_rate": 8.207815155125039e-05, + "loss": 8.6878, + "step": 7196 + }, + { + "epoch": 0.29998749531074154, + "grad_norm": 390.0, + "learning_rate": 8.20729735216723e-05, + "loss": 14.5627, + "step": 7197 + }, + { + "epoch": 0.30002917760826975, + "grad_norm": 704.0, + "learning_rate": 8.206779490756144e-05, + "loss": 22.7502, + "step": 7198 + }, + { + "epoch": 0.300070859905798, + "grad_norm": 169.0, + "learning_rate": 8.206261570901216e-05, + "loss": 10.064, + "step": 7199 + }, + { + "epoch": 0.3001125422033262, + "grad_norm": 592.0, + "learning_rate": 8.205743592611888e-05, + "loss": 19.7502, + "step": 7200 + }, + { + "epoch": 0.3001542245008545, + "grad_norm": 190.0, + "learning_rate": 8.205225555897598e-05, + "loss": 11.2518, + "step": 7201 + }, + { + "epoch": 0.3001959067983827, + "grad_norm": 482.0, + "learning_rate": 8.204707460767786e-05, + "loss": 15.8132, + "step": 7202 + }, + { + "epoch": 0.300237589095911, + "grad_norm": 388.0, + "learning_rate": 8.204189307231899e-05, + "loss": 13.4384, + "step": 7203 + }, + { + "epoch": 0.3002792713934392, + "grad_norm": 440.0, + "learning_rate": 8.203671095299375e-05, + "loss": 15.376, + "step": 7204 + }, + { + "epoch": 0.30032095369096745, + "grad_norm": 284.0, + "learning_rate": 8.203152824979664e-05, + "loss": 11.8752, + "step": 7205 + }, + { + "epoch": 0.30036263598849566, + "grad_norm": 227.0, + "learning_rate": 8.202634496282204e-05, + "loss": 12.1261, + "step": 7206 + }, + { + "epoch": 0.30040431828602393, + "grad_norm": 76.5, + "learning_rate": 8.202116109216449e-05, + "loss": 9.9381, + "step": 7207 + }, + { + "epoch": 0.30044600058355214, + "grad_norm": 54.25, + "learning_rate": 8.201597663791843e-05, + "loss": 7.7189, + "step": 7208 + }, + { + "epoch": 0.3004876828810804, + "grad_norm": 282.0, + "learning_rate": 8.201079160017835e-05, + "loss": 12.6879, + "step": 7209 + }, + { + "epoch": 0.3005293651786086, + "grad_norm": 147.0, + "learning_rate": 8.200560597903874e-05, + "loss": 9.5626, + "step": 7210 + }, + { + "epoch": 0.3005710474761369, + "grad_norm": 324.0, + "learning_rate": 8.200041977459414e-05, + "loss": 11.188, + "step": 7211 + }, + { + "epoch": 0.3006127297736651, + "grad_norm": 260.0, + "learning_rate": 8.199523298693904e-05, + "loss": 11.9377, + "step": 7212 + }, + { + "epoch": 0.30065441207119337, + "grad_norm": 368.0, + "learning_rate": 8.1990045616168e-05, + "loss": 15.1253, + "step": 7213 + }, + { + "epoch": 0.3006960943687216, + "grad_norm": 198.0, + "learning_rate": 8.198485766237549e-05, + "loss": 10.3765, + "step": 7214 + }, + { + "epoch": 0.30073777666624985, + "grad_norm": 240.0, + "learning_rate": 8.197966912565615e-05, + "loss": 12.0001, + "step": 7215 + }, + { + "epoch": 0.30077945896377806, + "grad_norm": 520.0, + "learning_rate": 8.197448000610448e-05, + "loss": 16.8772, + "step": 7216 + }, + { + "epoch": 0.3008211412613063, + "grad_norm": 596.0, + "learning_rate": 8.19692903038151e-05, + "loss": 18.5005, + "step": 7217 + }, + { + "epoch": 0.30086282355883454, + "grad_norm": 225.0, + "learning_rate": 8.196410001888256e-05, + "loss": 10.6253, + "step": 7218 + }, + { + "epoch": 0.3009045058563628, + "grad_norm": 268.0, + "learning_rate": 8.195890915140144e-05, + "loss": 11.6878, + "step": 7219 + }, + { + "epoch": 0.30094618815389107, + "grad_norm": 59.5, + "learning_rate": 8.195371770146637e-05, + "loss": 8.2503, + "step": 7220 + }, + { + "epoch": 0.3009878704514193, + "grad_norm": 454.0, + "learning_rate": 8.194852566917195e-05, + "loss": 16.0003, + "step": 7221 + }, + { + "epoch": 0.30102955274894755, + "grad_norm": 304.0, + "learning_rate": 8.194333305461283e-05, + "loss": 10.001, + "step": 7222 + }, + { + "epoch": 0.30107123504647576, + "grad_norm": 290.0, + "learning_rate": 8.193813985788362e-05, + "loss": 13.3132, + "step": 7223 + }, + { + "epoch": 0.301112917344004, + "grad_norm": 472.0, + "learning_rate": 8.193294607907897e-05, + "loss": 17.2503, + "step": 7224 + }, + { + "epoch": 0.30115459964153224, + "grad_norm": 644.0, + "learning_rate": 8.192775171829356e-05, + "loss": 16.7508, + "step": 7225 + }, + { + "epoch": 0.3011962819390605, + "grad_norm": 189.0, + "learning_rate": 8.192255677562203e-05, + "loss": 5.5945, + "step": 7226 + }, + { + "epoch": 0.3012379642365887, + "grad_norm": 414.0, + "learning_rate": 8.191736125115908e-05, + "loss": 14.1884, + "step": 7227 + }, + { + "epoch": 0.301279646534117, + "grad_norm": 32.75, + "learning_rate": 8.191216514499937e-05, + "loss": 5.8439, + "step": 7228 + }, + { + "epoch": 0.3013213288316452, + "grad_norm": 410.0, + "learning_rate": 8.190696845723765e-05, + "loss": 14.3758, + "step": 7229 + }, + { + "epoch": 0.30136301112917346, + "grad_norm": 189.0, + "learning_rate": 8.190177118796856e-05, + "loss": 10.6254, + "step": 7230 + }, + { + "epoch": 0.3014046934267017, + "grad_norm": 298.0, + "learning_rate": 8.189657333728687e-05, + "loss": 14.0003, + "step": 7231 + }, + { + "epoch": 0.30144637572422994, + "grad_norm": 384.0, + "learning_rate": 8.189137490528731e-05, + "loss": 13.2502, + "step": 7232 + }, + { + "epoch": 0.30148805802175815, + "grad_norm": 1328.0, + "learning_rate": 8.18861758920646e-05, + "loss": 26.7538, + "step": 7233 + }, + { + "epoch": 0.3015297403192864, + "grad_norm": 229.0, + "learning_rate": 8.188097629771351e-05, + "loss": 11.6877, + "step": 7234 + }, + { + "epoch": 0.30157142261681463, + "grad_norm": 231.0, + "learning_rate": 8.18757761223288e-05, + "loss": 10.8759, + "step": 7235 + }, + { + "epoch": 0.3016131049143429, + "grad_norm": 498.0, + "learning_rate": 8.187057536600522e-05, + "loss": 16.3782, + "step": 7236 + }, + { + "epoch": 0.3016547872118711, + "grad_norm": 620.0, + "learning_rate": 8.18653740288376e-05, + "loss": 20.0002, + "step": 7237 + }, + { + "epoch": 0.3016964695093994, + "grad_norm": 222.0, + "learning_rate": 8.18601721109207e-05, + "loss": 11.4377, + "step": 7238 + }, + { + "epoch": 0.3017381518069276, + "grad_norm": 400.0, + "learning_rate": 8.185496961234933e-05, + "loss": 15.7502, + "step": 7239 + }, + { + "epoch": 0.30177983410445586, + "grad_norm": 516.0, + "learning_rate": 8.184976653321831e-05, + "loss": 17.2502, + "step": 7240 + }, + { + "epoch": 0.30182151640198407, + "grad_norm": 748.0, + "learning_rate": 8.184456287362248e-05, + "loss": 22.1271, + "step": 7241 + }, + { + "epoch": 0.30186319869951234, + "grad_norm": 64.0, + "learning_rate": 8.183935863365665e-05, + "loss": 7.7825, + "step": 7242 + }, + { + "epoch": 0.30190488099704055, + "grad_norm": 107.5, + "learning_rate": 8.183415381341569e-05, + "loss": 8.126, + "step": 7243 + }, + { + "epoch": 0.3019465632945688, + "grad_norm": 458.0, + "learning_rate": 8.182894841299445e-05, + "loss": 17.0002, + "step": 7244 + }, + { + "epoch": 0.301988245592097, + "grad_norm": 342.0, + "learning_rate": 8.182374243248781e-05, + "loss": 14.0007, + "step": 7245 + }, + { + "epoch": 0.3020299278896253, + "grad_norm": 155.0, + "learning_rate": 8.181853587199062e-05, + "loss": 12.0005, + "step": 7246 + }, + { + "epoch": 0.3020716101871535, + "grad_norm": 382.0, + "learning_rate": 8.18133287315978e-05, + "loss": 15.4377, + "step": 7247 + }, + { + "epoch": 0.3021132924846818, + "grad_norm": 428.0, + "learning_rate": 8.180812101140423e-05, + "loss": 17.2509, + "step": 7248 + }, + { + "epoch": 0.30215497478221, + "grad_norm": 720.0, + "learning_rate": 8.180291271150485e-05, + "loss": 20.7509, + "step": 7249 + }, + { + "epoch": 0.30219665707973825, + "grad_norm": 266.0, + "learning_rate": 8.179770383199455e-05, + "loss": 10.9381, + "step": 7250 + }, + { + "epoch": 0.30223833937726646, + "grad_norm": 235.0, + "learning_rate": 8.179249437296828e-05, + "loss": 12.3753, + "step": 7251 + }, + { + "epoch": 0.30228002167479473, + "grad_norm": 458.0, + "learning_rate": 8.178728433452097e-05, + "loss": 14.8752, + "step": 7252 + }, + { + "epoch": 0.30232170397232294, + "grad_norm": 540.0, + "learning_rate": 8.178207371674757e-05, + "loss": 21.8755, + "step": 7253 + }, + { + "epoch": 0.3023633862698512, + "grad_norm": 528.0, + "learning_rate": 8.177686251974308e-05, + "loss": 17.3755, + "step": 7254 + }, + { + "epoch": 0.3024050685673794, + "grad_norm": 552.0, + "learning_rate": 8.177165074360245e-05, + "loss": 18.5006, + "step": 7255 + }, + { + "epoch": 0.3024467508649077, + "grad_norm": 280.0, + "learning_rate": 8.176643838842065e-05, + "loss": 13.0628, + "step": 7256 + }, + { + "epoch": 0.3024884331624359, + "grad_norm": 109.5, + "learning_rate": 8.176122545429269e-05, + "loss": 6.3134, + "step": 7257 + }, + { + "epoch": 0.30253011545996417, + "grad_norm": 304.0, + "learning_rate": 8.175601194131357e-05, + "loss": 14.0629, + "step": 7258 + }, + { + "epoch": 0.3025717977574924, + "grad_norm": 340.0, + "learning_rate": 8.175079784957834e-05, + "loss": 12.6281, + "step": 7259 + }, + { + "epoch": 0.30261348005502064, + "grad_norm": 160.0, + "learning_rate": 8.174558317918197e-05, + "loss": 9.6877, + "step": 7260 + }, + { + "epoch": 0.30265516235254886, + "grad_norm": 568.0, + "learning_rate": 8.174036793021955e-05, + "loss": 17.2502, + "step": 7261 + }, + { + "epoch": 0.3026968446500771, + "grad_norm": 98.5, + "learning_rate": 8.173515210278611e-05, + "loss": 9.1881, + "step": 7262 + }, + { + "epoch": 0.30273852694760534, + "grad_norm": 158.0, + "learning_rate": 8.172993569697669e-05, + "loss": 8.7505, + "step": 7263 + }, + { + "epoch": 0.3027802092451336, + "grad_norm": 1472.0, + "learning_rate": 8.172471871288638e-05, + "loss": 31.2528, + "step": 7264 + }, + { + "epoch": 0.3028218915426618, + "grad_norm": 544.0, + "learning_rate": 8.171950115061025e-05, + "loss": 18.3756, + "step": 7265 + }, + { + "epoch": 0.3028635738401901, + "grad_norm": 338.0, + "learning_rate": 8.171428301024341e-05, + "loss": 15.1889, + "step": 7266 + }, + { + "epoch": 0.3029052561377183, + "grad_norm": 183.0, + "learning_rate": 8.170906429188094e-05, + "loss": 11.6878, + "step": 7267 + }, + { + "epoch": 0.30294693843524656, + "grad_norm": 516.0, + "learning_rate": 8.170384499561796e-05, + "loss": 16.7512, + "step": 7268 + }, + { + "epoch": 0.30298862073277477, + "grad_norm": 336.0, + "learning_rate": 8.169862512154959e-05, + "loss": 14.8127, + "step": 7269 + }, + { + "epoch": 0.30303030303030304, + "grad_norm": 494.0, + "learning_rate": 8.169340466977095e-05, + "loss": 17.5001, + "step": 7270 + }, + { + "epoch": 0.30307198532783125, + "grad_norm": 344.0, + "learning_rate": 8.168818364037722e-05, + "loss": 14.2504, + "step": 7271 + }, + { + "epoch": 0.3031136676253595, + "grad_norm": 157.0, + "learning_rate": 8.168296203346351e-05, + "loss": 11.126, + "step": 7272 + }, + { + "epoch": 0.30315534992288773, + "grad_norm": 920.0, + "learning_rate": 8.167773984912501e-05, + "loss": 20.3788, + "step": 7273 + }, + { + "epoch": 0.303197032220416, + "grad_norm": 233.0, + "learning_rate": 8.167251708745689e-05, + "loss": 11.1254, + "step": 7274 + }, + { + "epoch": 0.3032387145179442, + "grad_norm": 205.0, + "learning_rate": 8.166729374855435e-05, + "loss": 11.7507, + "step": 7275 + }, + { + "epoch": 0.3032803968154725, + "grad_norm": 306.0, + "learning_rate": 8.166206983251254e-05, + "loss": 12.8753, + "step": 7276 + }, + { + "epoch": 0.3033220791130007, + "grad_norm": 146.0, + "learning_rate": 8.165684533942672e-05, + "loss": 10.5631, + "step": 7277 + }, + { + "epoch": 0.30336376141052895, + "grad_norm": 304.0, + "learning_rate": 8.165162026939208e-05, + "loss": 12.0003, + "step": 7278 + }, + { + "epoch": 0.30340544370805717, + "grad_norm": 478.0, + "learning_rate": 8.164639462250385e-05, + "loss": 14.8809, + "step": 7279 + }, + { + "epoch": 0.30344712600558543, + "grad_norm": 270.0, + "learning_rate": 8.164116839885725e-05, + "loss": 13.0007, + "step": 7280 + }, + { + "epoch": 0.30348880830311364, + "grad_norm": 138.0, + "learning_rate": 8.163594159854757e-05, + "loss": 9.6878, + "step": 7281 + }, + { + "epoch": 0.3035304906006419, + "grad_norm": 548.0, + "learning_rate": 8.163071422167004e-05, + "loss": 17.2502, + "step": 7282 + }, + { + "epoch": 0.3035721728981701, + "grad_norm": 158.0, + "learning_rate": 8.162548626831993e-05, + "loss": 8.5628, + "step": 7283 + }, + { + "epoch": 0.3036138551956984, + "grad_norm": 696.0, + "learning_rate": 8.162025773859252e-05, + "loss": 20.7503, + "step": 7284 + }, + { + "epoch": 0.3036555374932266, + "grad_norm": 362.0, + "learning_rate": 8.161502863258312e-05, + "loss": 16.2506, + "step": 7285 + }, + { + "epoch": 0.30369721979075487, + "grad_norm": 552.0, + "learning_rate": 8.160979895038702e-05, + "loss": 16.5036, + "step": 7286 + }, + { + "epoch": 0.3037389020882831, + "grad_norm": 318.0, + "learning_rate": 8.160456869209952e-05, + "loss": 14.0001, + "step": 7287 + }, + { + "epoch": 0.30378058438581135, + "grad_norm": 140.0, + "learning_rate": 8.159933785781595e-05, + "loss": 8.5632, + "step": 7288 + }, + { + "epoch": 0.30382226668333956, + "grad_norm": 239.0, + "learning_rate": 8.159410644763164e-05, + "loss": 12.0002, + "step": 7289 + }, + { + "epoch": 0.3038639489808678, + "grad_norm": 580.0, + "learning_rate": 8.158887446164192e-05, + "loss": 17.1254, + "step": 7290 + }, + { + "epoch": 0.30390563127839604, + "grad_norm": 235.0, + "learning_rate": 8.158364189994218e-05, + "loss": 9.8753, + "step": 7291 + }, + { + "epoch": 0.3039473135759243, + "grad_norm": 111.0, + "learning_rate": 8.157840876262776e-05, + "loss": 8.6881, + "step": 7292 + }, + { + "epoch": 0.30398899587345257, + "grad_norm": 258.0, + "learning_rate": 8.157317504979405e-05, + "loss": 11.8141, + "step": 7293 + }, + { + "epoch": 0.3040306781709808, + "grad_norm": 434.0, + "learning_rate": 8.15679407615364e-05, + "loss": 16.2502, + "step": 7294 + }, + { + "epoch": 0.30407236046850905, + "grad_norm": 564.0, + "learning_rate": 8.156270589795023e-05, + "loss": 17.8751, + "step": 7295 + }, + { + "epoch": 0.30411404276603726, + "grad_norm": 158.0, + "learning_rate": 8.155747045913094e-05, + "loss": 9.8755, + "step": 7296 + }, + { + "epoch": 0.30415572506356553, + "grad_norm": 348.0, + "learning_rate": 8.155223444517398e-05, + "loss": 14.0005, + "step": 7297 + }, + { + "epoch": 0.30419740736109374, + "grad_norm": 260.0, + "learning_rate": 8.15469978561747e-05, + "loss": 13.3755, + "step": 7298 + }, + { + "epoch": 0.304239089658622, + "grad_norm": 928.0, + "learning_rate": 8.154176069222862e-05, + "loss": 26.2503, + "step": 7299 + }, + { + "epoch": 0.3042807719561502, + "grad_norm": 904.0, + "learning_rate": 8.153652295343114e-05, + "loss": 22.5046, + "step": 7300 + }, + { + "epoch": 0.3043224542536785, + "grad_norm": 816.0, + "learning_rate": 8.153128463987772e-05, + "loss": 22.5002, + "step": 7301 + }, + { + "epoch": 0.3043641365512067, + "grad_norm": 148.0, + "learning_rate": 8.152604575166384e-05, + "loss": 12.5632, + "step": 7302 + }, + { + "epoch": 0.30440581884873497, + "grad_norm": 231.0, + "learning_rate": 8.152080628888499e-05, + "loss": 8.4379, + "step": 7303 + }, + { + "epoch": 0.3044475011462632, + "grad_norm": 85.0, + "learning_rate": 8.151556625163664e-05, + "loss": 8.0627, + "step": 7304 + }, + { + "epoch": 0.30448918344379144, + "grad_norm": 241.0, + "learning_rate": 8.151032564001431e-05, + "loss": 13.3763, + "step": 7305 + }, + { + "epoch": 0.30453086574131966, + "grad_norm": 548.0, + "learning_rate": 8.150508445411348e-05, + "loss": 17.8752, + "step": 7306 + }, + { + "epoch": 0.3045725480388479, + "grad_norm": 1792.0, + "learning_rate": 8.14998426940297e-05, + "loss": 38.7507, + "step": 7307 + }, + { + "epoch": 0.30461423033637613, + "grad_norm": 167.0, + "learning_rate": 8.149460035985847e-05, + "loss": 11.063, + "step": 7308 + }, + { + "epoch": 0.3046559126339044, + "grad_norm": 334.0, + "learning_rate": 8.148935745169536e-05, + "loss": 12.5009, + "step": 7309 + }, + { + "epoch": 0.3046975949314326, + "grad_norm": 115.5, + "learning_rate": 8.148411396963593e-05, + "loss": 8.8754, + "step": 7310 + }, + { + "epoch": 0.3047392772289609, + "grad_norm": 76.0, + "learning_rate": 8.147886991377573e-05, + "loss": 6.344, + "step": 7311 + }, + { + "epoch": 0.3047809595264891, + "grad_norm": 227.0, + "learning_rate": 8.147362528421033e-05, + "loss": 11.8132, + "step": 7312 + }, + { + "epoch": 0.30482264182401736, + "grad_norm": 444.0, + "learning_rate": 8.14683800810353e-05, + "loss": 16.8755, + "step": 7313 + }, + { + "epoch": 0.30486432412154557, + "grad_norm": 253.0, + "learning_rate": 8.146313430434627e-05, + "loss": 12.6885, + "step": 7314 + }, + { + "epoch": 0.30490600641907384, + "grad_norm": 848.0, + "learning_rate": 8.14578879542388e-05, + "loss": 23.8753, + "step": 7315 + }, + { + "epoch": 0.30494768871660205, + "grad_norm": 484.0, + "learning_rate": 8.145264103080855e-05, + "loss": 18.3765, + "step": 7316 + }, + { + "epoch": 0.3049893710141303, + "grad_norm": 147.0, + "learning_rate": 8.14473935341511e-05, + "loss": 11.1877, + "step": 7317 + }, + { + "epoch": 0.30503105331165853, + "grad_norm": 688.0, + "learning_rate": 8.144214546436212e-05, + "loss": 19.2548, + "step": 7318 + }, + { + "epoch": 0.3050727356091868, + "grad_norm": 181.0, + "learning_rate": 8.143689682153728e-05, + "loss": 10.0628, + "step": 7319 + }, + { + "epoch": 0.305114417906715, + "grad_norm": 512.0, + "learning_rate": 8.143164760577218e-05, + "loss": 17.5009, + "step": 7320 + }, + { + "epoch": 0.3051561002042433, + "grad_norm": 352.0, + "learning_rate": 8.14263978171625e-05, + "loss": 12.7507, + "step": 7321 + }, + { + "epoch": 0.3051977825017715, + "grad_norm": 424.0, + "learning_rate": 8.142114745580393e-05, + "loss": 17.2507, + "step": 7322 + }, + { + "epoch": 0.30523946479929975, + "grad_norm": 1696.0, + "learning_rate": 8.141589652179218e-05, + "loss": 31.5018, + "step": 7323 + }, + { + "epoch": 0.30528114709682797, + "grad_norm": 1064.0, + "learning_rate": 8.141064501522294e-05, + "loss": 19.8811, + "step": 7324 + }, + { + "epoch": 0.30532282939435623, + "grad_norm": 322.0, + "learning_rate": 8.140539293619187e-05, + "loss": 13.3763, + "step": 7325 + }, + { + "epoch": 0.30536451169188444, + "grad_norm": 492.0, + "learning_rate": 8.140014028479474e-05, + "loss": 16.7505, + "step": 7326 + }, + { + "epoch": 0.3054061939894127, + "grad_norm": 104.0, + "learning_rate": 8.139488706112726e-05, + "loss": 8.6264, + "step": 7327 + }, + { + "epoch": 0.3054478762869409, + "grad_norm": 370.0, + "learning_rate": 8.138963326528518e-05, + "loss": 13.5627, + "step": 7328 + }, + { + "epoch": 0.3054895585844692, + "grad_norm": 432.0, + "learning_rate": 8.138437889736425e-05, + "loss": 16.3752, + "step": 7329 + }, + { + "epoch": 0.3055312408819974, + "grad_norm": 552.0, + "learning_rate": 8.137912395746023e-05, + "loss": 19.8753, + "step": 7330 + }, + { + "epoch": 0.30557292317952567, + "grad_norm": 732.0, + "learning_rate": 8.137386844566887e-05, + "loss": 19.0033, + "step": 7331 + }, + { + "epoch": 0.3056146054770539, + "grad_norm": 752.0, + "learning_rate": 8.1368612362086e-05, + "loss": 24.0004, + "step": 7332 + }, + { + "epoch": 0.30565628777458215, + "grad_norm": 366.0, + "learning_rate": 8.136335570680738e-05, + "loss": 12.8752, + "step": 7333 + }, + { + "epoch": 0.30569797007211036, + "grad_norm": 149.0, + "learning_rate": 8.135809847992882e-05, + "loss": 9.4379, + "step": 7334 + }, + { + "epoch": 0.3057396523696386, + "grad_norm": 1004.0, + "learning_rate": 8.135284068154612e-05, + "loss": 26.3764, + "step": 7335 + }, + { + "epoch": 0.30578133466716684, + "grad_norm": 478.0, + "learning_rate": 8.134758231175512e-05, + "loss": 17.5005, + "step": 7336 + }, + { + "epoch": 0.3058230169646951, + "grad_norm": 328.0, + "learning_rate": 8.134232337065163e-05, + "loss": 14.7501, + "step": 7337 + }, + { + "epoch": 0.3058646992622233, + "grad_norm": 214.0, + "learning_rate": 8.133706385833156e-05, + "loss": 11.4381, + "step": 7338 + }, + { + "epoch": 0.3059063815597516, + "grad_norm": 868.0, + "learning_rate": 8.133180377489068e-05, + "loss": 22.0013, + "step": 7339 + }, + { + "epoch": 0.3059480638572798, + "grad_norm": 892.0, + "learning_rate": 8.132654312042491e-05, + "loss": 19.631, + "step": 7340 + }, + { + "epoch": 0.30598974615480806, + "grad_norm": 61.0, + "learning_rate": 8.13212818950301e-05, + "loss": 8.1881, + "step": 7341 + }, + { + "epoch": 0.3060314284523363, + "grad_norm": 752.0, + "learning_rate": 8.131602009880216e-05, + "loss": 22.7507, + "step": 7342 + }, + { + "epoch": 0.30607311074986454, + "grad_norm": 248.0, + "learning_rate": 8.131075773183696e-05, + "loss": 13.3752, + "step": 7343 + }, + { + "epoch": 0.30611479304739275, + "grad_norm": 292.0, + "learning_rate": 8.130549479423044e-05, + "loss": 12.5626, + "step": 7344 + }, + { + "epoch": 0.306156475344921, + "grad_norm": 203.0, + "learning_rate": 8.130023128607849e-05, + "loss": 9.6255, + "step": 7345 + }, + { + "epoch": 0.30619815764244923, + "grad_norm": 358.0, + "learning_rate": 8.129496720747703e-05, + "loss": 13.1878, + "step": 7346 + }, + { + "epoch": 0.3062398399399775, + "grad_norm": 55.75, + "learning_rate": 8.128970255852205e-05, + "loss": 6.7503, + "step": 7347 + }, + { + "epoch": 0.3062815222375057, + "grad_norm": 213.0, + "learning_rate": 8.128443733930942e-05, + "loss": 10.502, + "step": 7348 + }, + { + "epoch": 0.306323204535034, + "grad_norm": 135.0, + "learning_rate": 8.127917154993518e-05, + "loss": 6.6566, + "step": 7349 + }, + { + "epoch": 0.3063648868325622, + "grad_norm": 162.0, + "learning_rate": 8.127390519049525e-05, + "loss": 11.0002, + "step": 7350 + }, + { + "epoch": 0.30640656913009046, + "grad_norm": 133.0, + "learning_rate": 8.126863826108562e-05, + "loss": 9.0628, + "step": 7351 + }, + { + "epoch": 0.30644825142761867, + "grad_norm": 238.0, + "learning_rate": 8.126337076180227e-05, + "loss": 10.5628, + "step": 7352 + }, + { + "epoch": 0.30648993372514693, + "grad_norm": 888.0, + "learning_rate": 8.12581026927412e-05, + "loss": 24.2503, + "step": 7353 + }, + { + "epoch": 0.30653161602267515, + "grad_norm": 135.0, + "learning_rate": 8.125283405399847e-05, + "loss": 9.6877, + "step": 7354 + }, + { + "epoch": 0.3065732983202034, + "grad_norm": 188.0, + "learning_rate": 8.124756484567005e-05, + "loss": 10.813, + "step": 7355 + }, + { + "epoch": 0.3066149806177316, + "grad_norm": 330.0, + "learning_rate": 8.1242295067852e-05, + "loss": 12.8131, + "step": 7356 + }, + { + "epoch": 0.3066566629152599, + "grad_norm": 215.0, + "learning_rate": 8.123702472064032e-05, + "loss": 12.0004, + "step": 7357 + }, + { + "epoch": 0.3066983452127881, + "grad_norm": 304.0, + "learning_rate": 8.123175380413112e-05, + "loss": 13.8757, + "step": 7358 + }, + { + "epoch": 0.30674002751031637, + "grad_norm": 264.0, + "learning_rate": 8.122648231842042e-05, + "loss": 12.8127, + "step": 7359 + }, + { + "epoch": 0.3067817098078446, + "grad_norm": 205.0, + "learning_rate": 8.122121026360431e-05, + "loss": 11.7509, + "step": 7360 + }, + { + "epoch": 0.30682339210537285, + "grad_norm": 482.0, + "learning_rate": 8.121593763977886e-05, + "loss": 17.0054, + "step": 7361 + }, + { + "epoch": 0.30686507440290106, + "grad_norm": 560.0, + "learning_rate": 8.12106644470402e-05, + "loss": 19.1255, + "step": 7362 + }, + { + "epoch": 0.30690675670042933, + "grad_norm": 446.0, + "learning_rate": 8.120539068548439e-05, + "loss": 16.7504, + "step": 7363 + }, + { + "epoch": 0.30694843899795754, + "grad_norm": 58.0, + "learning_rate": 8.120011635520757e-05, + "loss": 8.0003, + "step": 7364 + }, + { + "epoch": 0.3069901212954858, + "grad_norm": 700.0, + "learning_rate": 8.119484145630586e-05, + "loss": 21.0009, + "step": 7365 + }, + { + "epoch": 0.3070318035930141, + "grad_norm": 113.5, + "learning_rate": 8.11895659888754e-05, + "loss": 4.8128, + "step": 7366 + }, + { + "epoch": 0.3070734858905423, + "grad_norm": 556.0, + "learning_rate": 8.118428995301233e-05, + "loss": 16.1252, + "step": 7367 + }, + { + "epoch": 0.30711516818807055, + "grad_norm": 308.0, + "learning_rate": 8.11790133488128e-05, + "loss": 11.9384, + "step": 7368 + }, + { + "epoch": 0.30715685048559876, + "grad_norm": 338.0, + "learning_rate": 8.117373617637299e-05, + "loss": 14.3136, + "step": 7369 + }, + { + "epoch": 0.30719853278312703, + "grad_norm": 168.0, + "learning_rate": 8.116845843578907e-05, + "loss": 10.6252, + "step": 7370 + }, + { + "epoch": 0.30724021508065524, + "grad_norm": 245.0, + "learning_rate": 8.116318012715722e-05, + "loss": 12.313, + "step": 7371 + }, + { + "epoch": 0.3072818973781835, + "grad_norm": 332.0, + "learning_rate": 8.115790125057365e-05, + "loss": 13.4395, + "step": 7372 + }, + { + "epoch": 0.3073235796757117, + "grad_norm": 404.0, + "learning_rate": 8.115262180613456e-05, + "loss": 14.5003, + "step": 7373 + }, + { + "epoch": 0.30736526197324, + "grad_norm": 1032.0, + "learning_rate": 8.11473417939362e-05, + "loss": 22.8799, + "step": 7374 + }, + { + "epoch": 0.3074069442707682, + "grad_norm": 992.0, + "learning_rate": 8.114206121407473e-05, + "loss": 27.6257, + "step": 7375 + }, + { + "epoch": 0.30744862656829647, + "grad_norm": 552.0, + "learning_rate": 8.113678006664647e-05, + "loss": 19.7503, + "step": 7376 + }, + { + "epoch": 0.3074903088658247, + "grad_norm": 169.0, + "learning_rate": 8.11314983517476e-05, + "loss": 10.6265, + "step": 7377 + }, + { + "epoch": 0.30753199116335295, + "grad_norm": 105.5, + "learning_rate": 8.112621606947441e-05, + "loss": 8.3754, + "step": 7378 + }, + { + "epoch": 0.30757367346088116, + "grad_norm": 428.0, + "learning_rate": 8.112093321992318e-05, + "loss": 16.6254, + "step": 7379 + }, + { + "epoch": 0.3076153557584094, + "grad_norm": 192.0, + "learning_rate": 8.111564980319018e-05, + "loss": 9.8135, + "step": 7380 + }, + { + "epoch": 0.30765703805593764, + "grad_norm": 370.0, + "learning_rate": 8.111036581937169e-05, + "loss": 10.8768, + "step": 7381 + }, + { + "epoch": 0.3076987203534659, + "grad_norm": 227.0, + "learning_rate": 8.110508126856403e-05, + "loss": 11.4383, + "step": 7382 + }, + { + "epoch": 0.3077404026509941, + "grad_norm": 512.0, + "learning_rate": 8.109979615086349e-05, + "loss": 17.2505, + "step": 7383 + }, + { + "epoch": 0.3077820849485224, + "grad_norm": 217.0, + "learning_rate": 8.109451046636642e-05, + "loss": 11.8129, + "step": 7384 + }, + { + "epoch": 0.3078237672460506, + "grad_norm": 348.0, + "learning_rate": 8.108922421516913e-05, + "loss": 13.8127, + "step": 7385 + }, + { + "epoch": 0.30786544954357886, + "grad_norm": 322.0, + "learning_rate": 8.108393739736798e-05, + "loss": 14.1878, + "step": 7386 + }, + { + "epoch": 0.3079071318411071, + "grad_norm": 368.0, + "learning_rate": 8.10786500130593e-05, + "loss": 12.7503, + "step": 7387 + }, + { + "epoch": 0.30794881413863534, + "grad_norm": 564.0, + "learning_rate": 8.107336206233946e-05, + "loss": 18.6253, + "step": 7388 + }, + { + "epoch": 0.30799049643616355, + "grad_norm": 366.0, + "learning_rate": 8.106807354530483e-05, + "loss": 14.0663, + "step": 7389 + }, + { + "epoch": 0.3080321787336918, + "grad_norm": 560.0, + "learning_rate": 8.106278446205183e-05, + "loss": 17.3755, + "step": 7390 + }, + { + "epoch": 0.30807386103122003, + "grad_norm": 368.0, + "learning_rate": 8.10574948126768e-05, + "loss": 14.3755, + "step": 7391 + }, + { + "epoch": 0.3081155433287483, + "grad_norm": 632.0, + "learning_rate": 8.105220459727618e-05, + "loss": 16.7504, + "step": 7392 + }, + { + "epoch": 0.3081572256262765, + "grad_norm": 155.0, + "learning_rate": 8.104691381594638e-05, + "loss": 11.2505, + "step": 7393 + }, + { + "epoch": 0.3081989079238048, + "grad_norm": 382.0, + "learning_rate": 8.104162246878382e-05, + "loss": 14.8129, + "step": 7394 + }, + { + "epoch": 0.308240590221333, + "grad_norm": 348.0, + "learning_rate": 8.103633055588493e-05, + "loss": 14.1877, + "step": 7395 + }, + { + "epoch": 0.30828227251886126, + "grad_norm": 408.0, + "learning_rate": 8.103103807734616e-05, + "loss": 17.6253, + "step": 7396 + }, + { + "epoch": 0.30832395481638947, + "grad_norm": 330.0, + "learning_rate": 8.102574503326396e-05, + "loss": 14.3128, + "step": 7397 + }, + { + "epoch": 0.30836563711391773, + "grad_norm": 418.0, + "learning_rate": 8.10204514237348e-05, + "loss": 14.9384, + "step": 7398 + }, + { + "epoch": 0.30840731941144595, + "grad_norm": 117.5, + "learning_rate": 8.101515724885518e-05, + "loss": 11.2513, + "step": 7399 + }, + { + "epoch": 0.3084490017089742, + "grad_norm": 294.0, + "learning_rate": 8.100986250872156e-05, + "loss": 12.6253, + "step": 7400 + }, + { + "epoch": 0.3084906840065024, + "grad_norm": 1096.0, + "learning_rate": 8.100456720343042e-05, + "loss": 26.2512, + "step": 7401 + }, + { + "epoch": 0.3085323663040307, + "grad_norm": 232.0, + "learning_rate": 8.099927133307832e-05, + "loss": 13.3129, + "step": 7402 + }, + { + "epoch": 0.3085740486015589, + "grad_norm": 91.5, + "learning_rate": 8.099397489776172e-05, + "loss": 8.9377, + "step": 7403 + }, + { + "epoch": 0.30861573089908717, + "grad_norm": 108.0, + "learning_rate": 8.09886778975772e-05, + "loss": 9.6257, + "step": 7404 + }, + { + "epoch": 0.3086574131966154, + "grad_norm": 338.0, + "learning_rate": 8.098338033262127e-05, + "loss": 14.5639, + "step": 7405 + }, + { + "epoch": 0.30869909549414365, + "grad_norm": 107.5, + "learning_rate": 8.097808220299048e-05, + "loss": 6.469, + "step": 7406 + }, + { + "epoch": 0.30874077779167186, + "grad_norm": 448.0, + "learning_rate": 8.09727835087814e-05, + "loss": 15.3751, + "step": 7407 + }, + { + "epoch": 0.30878246008920013, + "grad_norm": 392.0, + "learning_rate": 8.096748425009056e-05, + "loss": 14.7503, + "step": 7408 + }, + { + "epoch": 0.30882414238672834, + "grad_norm": 147.0, + "learning_rate": 8.09621844270146e-05, + "loss": 10.0021, + "step": 7409 + }, + { + "epoch": 0.3088658246842566, + "grad_norm": 344.0, + "learning_rate": 8.095688403965007e-05, + "loss": 13.6252, + "step": 7410 + }, + { + "epoch": 0.3089075069817848, + "grad_norm": 524.0, + "learning_rate": 8.095158308809359e-05, + "loss": 17.6254, + "step": 7411 + }, + { + "epoch": 0.3089491892793131, + "grad_norm": 448.0, + "learning_rate": 8.094628157244175e-05, + "loss": 14.5642, + "step": 7412 + }, + { + "epoch": 0.3089908715768413, + "grad_norm": 486.0, + "learning_rate": 8.094097949279118e-05, + "loss": 17.7515, + "step": 7413 + }, + { + "epoch": 0.30903255387436956, + "grad_norm": 328.0, + "learning_rate": 8.09356768492385e-05, + "loss": 12.1887, + "step": 7414 + }, + { + "epoch": 0.3090742361718978, + "grad_norm": 504.0, + "learning_rate": 8.093037364188039e-05, + "loss": 17.6251, + "step": 7415 + }, + { + "epoch": 0.30911591846942604, + "grad_norm": 192.0, + "learning_rate": 8.092506987081347e-05, + "loss": 9.8758, + "step": 7416 + }, + { + "epoch": 0.30915760076695425, + "grad_norm": 288.0, + "learning_rate": 8.091976553613439e-05, + "loss": 13.6262, + "step": 7417 + }, + { + "epoch": 0.3091992830644825, + "grad_norm": 284.0, + "learning_rate": 8.091446063793987e-05, + "loss": 9.8753, + "step": 7418 + }, + { + "epoch": 0.30924096536201073, + "grad_norm": 608.0, + "learning_rate": 8.090915517632652e-05, + "loss": 21.1253, + "step": 7419 + }, + { + "epoch": 0.309282647659539, + "grad_norm": 83.0, + "learning_rate": 8.09038491513911e-05, + "loss": 6.7817, + "step": 7420 + }, + { + "epoch": 0.3093243299570672, + "grad_norm": 936.0, + "learning_rate": 8.089854256323028e-05, + "loss": 20.5056, + "step": 7421 + }, + { + "epoch": 0.3093660122545955, + "grad_norm": 544.0, + "learning_rate": 8.089323541194075e-05, + "loss": 16.8787, + "step": 7422 + }, + { + "epoch": 0.3094076945521237, + "grad_norm": 118.5, + "learning_rate": 8.08879276976193e-05, + "loss": 9.0005, + "step": 7423 + }, + { + "epoch": 0.30944937684965196, + "grad_norm": 108.5, + "learning_rate": 8.088261942036262e-05, + "loss": 8.7504, + "step": 7424 + }, + { + "epoch": 0.30949105914718017, + "grad_norm": 226.0, + "learning_rate": 8.087731058026747e-05, + "loss": 11.9377, + "step": 7425 + }, + { + "epoch": 0.30953274144470844, + "grad_norm": 346.0, + "learning_rate": 8.087200117743057e-05, + "loss": 14.1876, + "step": 7426 + }, + { + "epoch": 0.30957442374223665, + "grad_norm": 155.0, + "learning_rate": 8.08666912119487e-05, + "loss": 9.5643, + "step": 7427 + }, + { + "epoch": 0.3096161060397649, + "grad_norm": 172.0, + "learning_rate": 8.086138068391866e-05, + "loss": 10.6879, + "step": 7428 + }, + { + "epoch": 0.3096577883372931, + "grad_norm": 282.0, + "learning_rate": 8.085606959343723e-05, + "loss": 12.8758, + "step": 7429 + }, + { + "epoch": 0.3096994706348214, + "grad_norm": 148.0, + "learning_rate": 8.085075794060118e-05, + "loss": 10.5011, + "step": 7430 + }, + { + "epoch": 0.3097411529323496, + "grad_norm": 440.0, + "learning_rate": 8.084544572550731e-05, + "loss": 16.0011, + "step": 7431 + }, + { + "epoch": 0.3097828352298779, + "grad_norm": 308.0, + "learning_rate": 8.084013294825248e-05, + "loss": 12.6883, + "step": 7432 + }, + { + "epoch": 0.3098245175274061, + "grad_norm": 470.0, + "learning_rate": 8.083481960893348e-05, + "loss": 17.3753, + "step": 7433 + }, + { + "epoch": 0.30986619982493435, + "grad_norm": 234.0, + "learning_rate": 8.082950570764714e-05, + "loss": 11.6253, + "step": 7434 + }, + { + "epoch": 0.30990788212246256, + "grad_norm": 366.0, + "learning_rate": 8.082419124449035e-05, + "loss": 14.0629, + "step": 7435 + }, + { + "epoch": 0.30994956441999083, + "grad_norm": 296.0, + "learning_rate": 8.081887621955992e-05, + "loss": 13.5002, + "step": 7436 + }, + { + "epoch": 0.30999124671751904, + "grad_norm": 320.0, + "learning_rate": 8.081356063295273e-05, + "loss": 13.5042, + "step": 7437 + }, + { + "epoch": 0.3100329290150473, + "grad_norm": 510.0, + "learning_rate": 8.080824448476567e-05, + "loss": 15.1297, + "step": 7438 + }, + { + "epoch": 0.3100746113125756, + "grad_norm": 368.0, + "learning_rate": 8.080292777509563e-05, + "loss": 15.8132, + "step": 7439 + }, + { + "epoch": 0.3101162936101038, + "grad_norm": 203.0, + "learning_rate": 8.079761050403949e-05, + "loss": 12.1253, + "step": 7440 + }, + { + "epoch": 0.31015797590763206, + "grad_norm": 190.0, + "learning_rate": 8.079229267169415e-05, + "loss": 9.7503, + "step": 7441 + }, + { + "epoch": 0.31019965820516027, + "grad_norm": 119.5, + "learning_rate": 8.078697427815656e-05, + "loss": 8.4377, + "step": 7442 + }, + { + "epoch": 0.31024134050268853, + "grad_norm": 856.0, + "learning_rate": 8.078165532352362e-05, + "loss": 18.38, + "step": 7443 + }, + { + "epoch": 0.31028302280021675, + "grad_norm": 580.0, + "learning_rate": 8.077633580789229e-05, + "loss": 19.1255, + "step": 7444 + }, + { + "epoch": 0.310324705097745, + "grad_norm": 426.0, + "learning_rate": 8.07710157313595e-05, + "loss": 16.1256, + "step": 7445 + }, + { + "epoch": 0.3103663873952732, + "grad_norm": 556.0, + "learning_rate": 8.076569509402222e-05, + "loss": 17.6253, + "step": 7446 + }, + { + "epoch": 0.3104080696928015, + "grad_norm": 460.0, + "learning_rate": 8.076037389597742e-05, + "loss": 15.0677, + "step": 7447 + }, + { + "epoch": 0.3104497519903297, + "grad_norm": 398.0, + "learning_rate": 8.075505213732206e-05, + "loss": 14.3752, + "step": 7448 + }, + { + "epoch": 0.31049143428785797, + "grad_norm": 468.0, + "learning_rate": 8.074972981815316e-05, + "loss": 15.3753, + "step": 7449 + }, + { + "epoch": 0.3105331165853862, + "grad_norm": 318.0, + "learning_rate": 8.074440693856768e-05, + "loss": 12.8129, + "step": 7450 + }, + { + "epoch": 0.31057479888291445, + "grad_norm": 350.0, + "learning_rate": 8.073908349866268e-05, + "loss": 14.5003, + "step": 7451 + }, + { + "epoch": 0.31061648118044266, + "grad_norm": 608.0, + "learning_rate": 8.073375949853515e-05, + "loss": 15.6878, + "step": 7452 + }, + { + "epoch": 0.31065816347797093, + "grad_norm": 183.0, + "learning_rate": 8.072843493828213e-05, + "loss": 11.8756, + "step": 7453 + }, + { + "epoch": 0.31069984577549914, + "grad_norm": 180.0, + "learning_rate": 8.072310981800065e-05, + "loss": 10.0626, + "step": 7454 + }, + { + "epoch": 0.3107415280730274, + "grad_norm": 384.0, + "learning_rate": 8.071778413778775e-05, + "loss": 12.7502, + "step": 7455 + }, + { + "epoch": 0.3107832103705556, + "grad_norm": 205.0, + "learning_rate": 8.071245789774053e-05, + "loss": 10.0003, + "step": 7456 + }, + { + "epoch": 0.3108248926680839, + "grad_norm": 154.0, + "learning_rate": 8.070713109795603e-05, + "loss": 11.8131, + "step": 7457 + }, + { + "epoch": 0.3108665749656121, + "grad_norm": 364.0, + "learning_rate": 8.070180373853134e-05, + "loss": 14.8126, + "step": 7458 + }, + { + "epoch": 0.31090825726314036, + "grad_norm": 772.0, + "learning_rate": 8.069647581956355e-05, + "loss": 22.0009, + "step": 7459 + }, + { + "epoch": 0.3109499395606686, + "grad_norm": 456.0, + "learning_rate": 8.069114734114976e-05, + "loss": 17.1251, + "step": 7460 + }, + { + "epoch": 0.31099162185819684, + "grad_norm": 712.0, + "learning_rate": 8.068581830338708e-05, + "loss": 21.3757, + "step": 7461 + }, + { + "epoch": 0.31103330415572505, + "grad_norm": 296.0, + "learning_rate": 8.068048870637265e-05, + "loss": 12.6877, + "step": 7462 + }, + { + "epoch": 0.3110749864532533, + "grad_norm": 684.0, + "learning_rate": 8.067515855020357e-05, + "loss": 19.6252, + "step": 7463 + }, + { + "epoch": 0.31111666875078153, + "grad_norm": 388.0, + "learning_rate": 8.066982783497702e-05, + "loss": 15.5003, + "step": 7464 + }, + { + "epoch": 0.3111583510483098, + "grad_norm": 462.0, + "learning_rate": 8.066449656079015e-05, + "loss": 16.1254, + "step": 7465 + }, + { + "epoch": 0.311200033345838, + "grad_norm": 245.0, + "learning_rate": 8.065916472774009e-05, + "loss": 11.3127, + "step": 7466 + }, + { + "epoch": 0.3112417156433663, + "grad_norm": 106.0, + "learning_rate": 8.065383233592404e-05, + "loss": 8.6894, + "step": 7467 + }, + { + "epoch": 0.3112833979408945, + "grad_norm": 1176.0, + "learning_rate": 8.064849938543915e-05, + "loss": 25.38, + "step": 7468 + }, + { + "epoch": 0.31132508023842276, + "grad_norm": 160.0, + "learning_rate": 8.064316587638265e-05, + "loss": 10.1878, + "step": 7469 + }, + { + "epoch": 0.31136676253595097, + "grad_norm": 96.0, + "learning_rate": 8.063783180885173e-05, + "loss": 8.6879, + "step": 7470 + }, + { + "epoch": 0.31140844483347924, + "grad_norm": 276.0, + "learning_rate": 8.063249718294363e-05, + "loss": 9.8768, + "step": 7471 + }, + { + "epoch": 0.31145012713100745, + "grad_norm": 396.0, + "learning_rate": 8.062716199875553e-05, + "loss": 16.1264, + "step": 7472 + }, + { + "epoch": 0.3114918094285357, + "grad_norm": 644.0, + "learning_rate": 8.062182625638468e-05, + "loss": 20.0004, + "step": 7473 + }, + { + "epoch": 0.3115334917260639, + "grad_norm": 422.0, + "learning_rate": 8.061648995592833e-05, + "loss": 15.6878, + "step": 7474 + }, + { + "epoch": 0.3115751740235922, + "grad_norm": 270.0, + "learning_rate": 8.061115309748374e-05, + "loss": 11.3127, + "step": 7475 + }, + { + "epoch": 0.3116168563211204, + "grad_norm": 308.0, + "learning_rate": 8.060581568114816e-05, + "loss": 14.313, + "step": 7476 + }, + { + "epoch": 0.3116585386186487, + "grad_norm": 302.0, + "learning_rate": 8.060047770701889e-05, + "loss": 12.0007, + "step": 7477 + }, + { + "epoch": 0.3117002209161769, + "grad_norm": 326.0, + "learning_rate": 8.059513917519316e-05, + "loss": 13.3128, + "step": 7478 + }, + { + "epoch": 0.31174190321370515, + "grad_norm": 189.0, + "learning_rate": 8.058980008576833e-05, + "loss": 12.1254, + "step": 7479 + }, + { + "epoch": 0.31178358551123336, + "grad_norm": 342.0, + "learning_rate": 8.058446043884168e-05, + "loss": 14.5006, + "step": 7480 + }, + { + "epoch": 0.31182526780876163, + "grad_norm": 43.5, + "learning_rate": 8.057912023451051e-05, + "loss": 7.5003, + "step": 7481 + }, + { + "epoch": 0.31186695010628984, + "grad_norm": 101.0, + "learning_rate": 8.057377947287217e-05, + "loss": 8.8126, + "step": 7482 + }, + { + "epoch": 0.3119086324038181, + "grad_norm": 284.0, + "learning_rate": 8.056843815402399e-05, + "loss": 11.1882, + "step": 7483 + }, + { + "epoch": 0.3119503147013463, + "grad_norm": 204.0, + "learning_rate": 8.056309627806329e-05, + "loss": 11.0641, + "step": 7484 + }, + { + "epoch": 0.3119919969988746, + "grad_norm": 330.0, + "learning_rate": 8.055775384508746e-05, + "loss": 14.501, + "step": 7485 + }, + { + "epoch": 0.3120336792964028, + "grad_norm": 286.0, + "learning_rate": 8.055241085519384e-05, + "loss": 12.8752, + "step": 7486 + }, + { + "epoch": 0.31207536159393107, + "grad_norm": 968.0, + "learning_rate": 8.054706730847985e-05, + "loss": 26.1251, + "step": 7487 + }, + { + "epoch": 0.3121170438914593, + "grad_norm": 316.0, + "learning_rate": 8.054172320504284e-05, + "loss": 13.1255, + "step": 7488 + }, + { + "epoch": 0.31215872618898755, + "grad_norm": 216.0, + "learning_rate": 8.053637854498018e-05, + "loss": 12.0628, + "step": 7489 + }, + { + "epoch": 0.31220040848651576, + "grad_norm": 111.5, + "learning_rate": 8.053103332838934e-05, + "loss": 10.2504, + "step": 7490 + }, + { + "epoch": 0.312242090784044, + "grad_norm": 480.0, + "learning_rate": 8.05256875553677e-05, + "loss": 17.1253, + "step": 7491 + }, + { + "epoch": 0.31228377308157224, + "grad_norm": 448.0, + "learning_rate": 8.052034122601269e-05, + "loss": 16.0004, + "step": 7492 + }, + { + "epoch": 0.3123254553791005, + "grad_norm": 592.0, + "learning_rate": 8.051499434042176e-05, + "loss": 18.8755, + "step": 7493 + }, + { + "epoch": 0.3123671376766287, + "grad_norm": 336.0, + "learning_rate": 8.050964689869234e-05, + "loss": 14.3755, + "step": 7494 + }, + { + "epoch": 0.312408819974157, + "grad_norm": 222.0, + "learning_rate": 8.05042989009219e-05, + "loss": 12.3754, + "step": 7495 + }, + { + "epoch": 0.3124505022716852, + "grad_norm": 324.0, + "learning_rate": 8.049895034720791e-05, + "loss": 11.5004, + "step": 7496 + }, + { + "epoch": 0.31249218456921346, + "grad_norm": 394.0, + "learning_rate": 8.049360123764785e-05, + "loss": 14.5001, + "step": 7497 + }, + { + "epoch": 0.31253386686674167, + "grad_norm": 316.0, + "learning_rate": 8.048825157233917e-05, + "loss": 12.3128, + "step": 7498 + }, + { + "epoch": 0.31257554916426994, + "grad_norm": 524.0, + "learning_rate": 8.048290135137942e-05, + "loss": 17.1253, + "step": 7499 + }, + { + "epoch": 0.31261723146179815, + "grad_norm": 332.0, + "learning_rate": 8.047755057486609e-05, + "loss": 13.9381, + "step": 7500 + }, + { + "epoch": 0.3126589137593264, + "grad_norm": 159.0, + "learning_rate": 8.047219924289669e-05, + "loss": 6.2816, + "step": 7501 + }, + { + "epoch": 0.31270059605685463, + "grad_norm": 84.0, + "learning_rate": 8.046684735556875e-05, + "loss": 5.8441, + "step": 7502 + }, + { + "epoch": 0.3127422783543829, + "grad_norm": 410.0, + "learning_rate": 8.046149491297983e-05, + "loss": 13.0631, + "step": 7503 + }, + { + "epoch": 0.3127839606519111, + "grad_norm": 221.0, + "learning_rate": 8.045614191522743e-05, + "loss": 12.438, + "step": 7504 + }, + { + "epoch": 0.3128256429494394, + "grad_norm": 406.0, + "learning_rate": 8.045078836240916e-05, + "loss": 14.8758, + "step": 7505 + }, + { + "epoch": 0.3128673252469676, + "grad_norm": 266.0, + "learning_rate": 8.044543425462257e-05, + "loss": 12.3755, + "step": 7506 + }, + { + "epoch": 0.31290900754449585, + "grad_norm": 189.0, + "learning_rate": 8.044007959196523e-05, + "loss": 12.0006, + "step": 7507 + }, + { + "epoch": 0.31295068984202407, + "grad_norm": 82.0, + "learning_rate": 8.043472437453474e-05, + "loss": 9.126, + "step": 7508 + }, + { + "epoch": 0.31299237213955233, + "grad_norm": 140.0, + "learning_rate": 8.04293686024287e-05, + "loss": 7.844, + "step": 7509 + }, + { + "epoch": 0.31303405443708054, + "grad_norm": 116.0, + "learning_rate": 8.042401227574473e-05, + "loss": 8.1257, + "step": 7510 + }, + { + "epoch": 0.3130757367346088, + "grad_norm": 172.0, + "learning_rate": 8.04186553945804e-05, + "loss": 10.5629, + "step": 7511 + }, + { + "epoch": 0.3131174190321371, + "grad_norm": 296.0, + "learning_rate": 8.04132979590334e-05, + "loss": 13.5627, + "step": 7512 + }, + { + "epoch": 0.3131591013296653, + "grad_norm": 247.0, + "learning_rate": 8.040793996920133e-05, + "loss": 12.9381, + "step": 7513 + }, + { + "epoch": 0.31320078362719356, + "grad_norm": 182.0, + "learning_rate": 8.040258142518187e-05, + "loss": 10.1886, + "step": 7514 + }, + { + "epoch": 0.31324246592472177, + "grad_norm": 328.0, + "learning_rate": 8.039722232707266e-05, + "loss": 15.3129, + "step": 7515 + }, + { + "epoch": 0.31328414822225004, + "grad_norm": 296.0, + "learning_rate": 8.039186267497136e-05, + "loss": 14.0635, + "step": 7516 + }, + { + "epoch": 0.31332583051977825, + "grad_norm": 354.0, + "learning_rate": 8.038650246897567e-05, + "loss": 13.5634, + "step": 7517 + }, + { + "epoch": 0.3133675128173065, + "grad_norm": 468.0, + "learning_rate": 8.038114170918329e-05, + "loss": 17.1251, + "step": 7518 + }, + { + "epoch": 0.3134091951148347, + "grad_norm": 237.0, + "learning_rate": 8.037578039569192e-05, + "loss": 11.5003, + "step": 7519 + }, + { + "epoch": 0.313450877412363, + "grad_norm": 350.0, + "learning_rate": 8.037041852859922e-05, + "loss": 15.5629, + "step": 7520 + }, + { + "epoch": 0.3134925597098912, + "grad_norm": 89.0, + "learning_rate": 8.036505610800296e-05, + "loss": 9.2506, + "step": 7521 + }, + { + "epoch": 0.3135342420074195, + "grad_norm": 556.0, + "learning_rate": 8.035969313400086e-05, + "loss": 19.6255, + "step": 7522 + }, + { + "epoch": 0.3135759243049477, + "grad_norm": 416.0, + "learning_rate": 8.035432960669065e-05, + "loss": 15.6252, + "step": 7523 + }, + { + "epoch": 0.31361760660247595, + "grad_norm": 486.0, + "learning_rate": 8.03489655261701e-05, + "loss": 17.1252, + "step": 7524 + }, + { + "epoch": 0.31365928890000416, + "grad_norm": 358.0, + "learning_rate": 8.034360089253694e-05, + "loss": 14.688, + "step": 7525 + }, + { + "epoch": 0.31370097119753243, + "grad_norm": 560.0, + "learning_rate": 8.033823570588897e-05, + "loss": 16.8757, + "step": 7526 + }, + { + "epoch": 0.31374265349506064, + "grad_norm": 290.0, + "learning_rate": 8.033286996632396e-05, + "loss": 12.1253, + "step": 7527 + }, + { + "epoch": 0.3137843357925889, + "grad_norm": 368.0, + "learning_rate": 8.03275036739397e-05, + "loss": 14.0646, + "step": 7528 + }, + { + "epoch": 0.3138260180901171, + "grad_norm": 652.0, + "learning_rate": 8.032213682883401e-05, + "loss": 19.5003, + "step": 7529 + }, + { + "epoch": 0.3138677003876454, + "grad_norm": 344.0, + "learning_rate": 8.031676943110467e-05, + "loss": 14.4377, + "step": 7530 + }, + { + "epoch": 0.3139093826851736, + "grad_norm": 320.0, + "learning_rate": 8.031140148084953e-05, + "loss": 14.0002, + "step": 7531 + }, + { + "epoch": 0.31395106498270187, + "grad_norm": 410.0, + "learning_rate": 8.03060329781664e-05, + "loss": 15.5627, + "step": 7532 + }, + { + "epoch": 0.3139927472802301, + "grad_norm": 254.0, + "learning_rate": 8.030066392315312e-05, + "loss": 10.6905, + "step": 7533 + }, + { + "epoch": 0.31403442957775834, + "grad_norm": 324.0, + "learning_rate": 8.029529431590754e-05, + "loss": 13.0002, + "step": 7534 + }, + { + "epoch": 0.31407611187528656, + "grad_norm": 430.0, + "learning_rate": 8.028992415652755e-05, + "loss": 17.2502, + "step": 7535 + }, + { + "epoch": 0.3141177941728148, + "grad_norm": 406.0, + "learning_rate": 8.0284553445111e-05, + "loss": 17.7505, + "step": 7536 + }, + { + "epoch": 0.31415947647034304, + "grad_norm": 640.0, + "learning_rate": 8.027918218175579e-05, + "loss": 19.7505, + "step": 7537 + }, + { + "epoch": 0.3142011587678713, + "grad_norm": 484.0, + "learning_rate": 8.027381036655977e-05, + "loss": 15.9379, + "step": 7538 + }, + { + "epoch": 0.3142428410653995, + "grad_norm": 548.0, + "learning_rate": 8.026843799962088e-05, + "loss": 16.6257, + "step": 7539 + }, + { + "epoch": 0.3142845233629278, + "grad_norm": 310.0, + "learning_rate": 8.026306508103702e-05, + "loss": 13.7504, + "step": 7540 + }, + { + "epoch": 0.314326205660456, + "grad_norm": 247.0, + "learning_rate": 8.02576916109061e-05, + "loss": 9.0627, + "step": 7541 + }, + { + "epoch": 0.31436788795798426, + "grad_norm": 612.0, + "learning_rate": 8.025231758932608e-05, + "loss": 19.1253, + "step": 7542 + }, + { + "epoch": 0.31440957025551247, + "grad_norm": 412.0, + "learning_rate": 8.024694301639489e-05, + "loss": 12.7515, + "step": 7543 + }, + { + "epoch": 0.31445125255304074, + "grad_norm": 1048.0, + "learning_rate": 8.024156789221046e-05, + "loss": 23.5049, + "step": 7544 + }, + { + "epoch": 0.31449293485056895, + "grad_norm": 400.0, + "learning_rate": 8.023619221687079e-05, + "loss": 15.0002, + "step": 7545 + }, + { + "epoch": 0.3145346171480972, + "grad_norm": 828.0, + "learning_rate": 8.023081599047384e-05, + "loss": 22.3753, + "step": 7546 + }, + { + "epoch": 0.31457629944562543, + "grad_norm": 496.0, + "learning_rate": 8.022543921311756e-05, + "loss": 18.0002, + "step": 7547 + }, + { + "epoch": 0.3146179817431537, + "grad_norm": 300.0, + "learning_rate": 8.022006188489998e-05, + "loss": 13.5628, + "step": 7548 + }, + { + "epoch": 0.3146596640406819, + "grad_norm": 223.0, + "learning_rate": 8.021468400591909e-05, + "loss": 12.188, + "step": 7549 + }, + { + "epoch": 0.3147013463382102, + "grad_norm": 56.0, + "learning_rate": 8.020930557627288e-05, + "loss": 8.1253, + "step": 7550 + }, + { + "epoch": 0.3147430286357384, + "grad_norm": 221.0, + "learning_rate": 8.020392659605942e-05, + "loss": 11.4377, + "step": 7551 + }, + { + "epoch": 0.31478471093326665, + "grad_norm": 354.0, + "learning_rate": 8.019854706537672e-05, + "loss": 13.3753, + "step": 7552 + }, + { + "epoch": 0.31482639323079487, + "grad_norm": 213.0, + "learning_rate": 8.01931669843228e-05, + "loss": 11.5002, + "step": 7553 + }, + { + "epoch": 0.31486807552832313, + "grad_norm": 155.0, + "learning_rate": 8.018778635299574e-05, + "loss": 11.7502, + "step": 7554 + }, + { + "epoch": 0.31490975782585134, + "grad_norm": 124.5, + "learning_rate": 8.018240517149359e-05, + "loss": 10.3756, + "step": 7555 + }, + { + "epoch": 0.3149514401233796, + "grad_norm": 62.5, + "learning_rate": 8.017702343991444e-05, + "loss": 7.8138, + "step": 7556 + }, + { + "epoch": 0.3149931224209078, + "grad_norm": 308.0, + "learning_rate": 8.017164115835634e-05, + "loss": 12.9381, + "step": 7557 + }, + { + "epoch": 0.3150348047184361, + "grad_norm": 192.0, + "learning_rate": 8.016625832691741e-05, + "loss": 10.1881, + "step": 7558 + }, + { + "epoch": 0.3150764870159643, + "grad_norm": 784.0, + "learning_rate": 8.016087494569575e-05, + "loss": 22.6253, + "step": 7559 + }, + { + "epoch": 0.31511816931349257, + "grad_norm": 2064.0, + "learning_rate": 8.015549101478945e-05, + "loss": 44.5008, + "step": 7560 + }, + { + "epoch": 0.3151598516110208, + "grad_norm": 402.0, + "learning_rate": 8.015010653429667e-05, + "loss": 14.8752, + "step": 7561 + }, + { + "epoch": 0.31520153390854905, + "grad_norm": 197.0, + "learning_rate": 8.014472150431552e-05, + "loss": 10.5002, + "step": 7562 + }, + { + "epoch": 0.31524321620607726, + "grad_norm": 396.0, + "learning_rate": 8.013933592494412e-05, + "loss": 12.2503, + "step": 7563 + }, + { + "epoch": 0.3152848985036055, + "grad_norm": 796.0, + "learning_rate": 8.013394979628066e-05, + "loss": 22.7522, + "step": 7564 + }, + { + "epoch": 0.31532658080113374, + "grad_norm": 392.0, + "learning_rate": 8.01285631184233e-05, + "loss": 16.3751, + "step": 7565 + }, + { + "epoch": 0.315368263098662, + "grad_norm": 242.0, + "learning_rate": 8.01231758914702e-05, + "loss": 11.6882, + "step": 7566 + }, + { + "epoch": 0.3154099453961902, + "grad_norm": 450.0, + "learning_rate": 8.011778811551953e-05, + "loss": 14.8754, + "step": 7567 + }, + { + "epoch": 0.3154516276937185, + "grad_norm": 416.0, + "learning_rate": 8.011239979066952e-05, + "loss": 13.9394, + "step": 7568 + }, + { + "epoch": 0.3154933099912467, + "grad_norm": 55.25, + "learning_rate": 8.010701091701833e-05, + "loss": 8.1253, + "step": 7569 + }, + { + "epoch": 0.31553499228877496, + "grad_norm": 312.0, + "learning_rate": 8.01016214946642e-05, + "loss": 13.4379, + "step": 7570 + }, + { + "epoch": 0.3155766745863032, + "grad_norm": 482.0, + "learning_rate": 8.009623152370536e-05, + "loss": 17.0004, + "step": 7571 + }, + { + "epoch": 0.31561835688383144, + "grad_norm": 720.0, + "learning_rate": 8.009084100424003e-05, + "loss": 21.2504, + "step": 7572 + }, + { + "epoch": 0.31566003918135965, + "grad_norm": 444.0, + "learning_rate": 8.008544993636642e-05, + "loss": 17.1253, + "step": 7573 + }, + { + "epoch": 0.3157017214788879, + "grad_norm": 544.0, + "learning_rate": 8.008005832018284e-05, + "loss": 17.5002, + "step": 7574 + }, + { + "epoch": 0.31574340377641613, + "grad_norm": 438.0, + "learning_rate": 8.007466615578752e-05, + "loss": 15.0628, + "step": 7575 + }, + { + "epoch": 0.3157850860739444, + "grad_norm": 231.0, + "learning_rate": 8.006927344327874e-05, + "loss": 9.5005, + "step": 7576 + }, + { + "epoch": 0.3158267683714726, + "grad_norm": 382.0, + "learning_rate": 8.006388018275477e-05, + "loss": 15.6255, + "step": 7577 + }, + { + "epoch": 0.3158684506690009, + "grad_norm": 472.0, + "learning_rate": 8.005848637431395e-05, + "loss": 17.2503, + "step": 7578 + }, + { + "epoch": 0.3159101329665291, + "grad_norm": 506.0, + "learning_rate": 8.005309201805451e-05, + "loss": 18.3753, + "step": 7579 + }, + { + "epoch": 0.31595181526405736, + "grad_norm": 156.0, + "learning_rate": 8.004769711407481e-05, + "loss": 11.1253, + "step": 7580 + }, + { + "epoch": 0.31599349756158557, + "grad_norm": 223.0, + "learning_rate": 8.004230166247318e-05, + "loss": 11.1876, + "step": 7581 + }, + { + "epoch": 0.31603517985911384, + "grad_norm": 346.0, + "learning_rate": 8.003690566334792e-05, + "loss": 14.7504, + "step": 7582 + }, + { + "epoch": 0.31607686215664205, + "grad_norm": 124.0, + "learning_rate": 8.003150911679739e-05, + "loss": 9.7503, + "step": 7583 + }, + { + "epoch": 0.3161185444541703, + "grad_norm": 370.0, + "learning_rate": 8.002611202291993e-05, + "loss": 14.5003, + "step": 7584 + }, + { + "epoch": 0.3161602267516986, + "grad_norm": 294.0, + "learning_rate": 8.002071438181393e-05, + "loss": 12.1252, + "step": 7585 + }, + { + "epoch": 0.3162019090492268, + "grad_norm": 632.0, + "learning_rate": 8.001531619357773e-05, + "loss": 19.5026, + "step": 7586 + }, + { + "epoch": 0.31624359134675506, + "grad_norm": 406.0, + "learning_rate": 8.000991745830975e-05, + "loss": 14.5627, + "step": 7587 + }, + { + "epoch": 0.31628527364428327, + "grad_norm": 1408.0, + "learning_rate": 8.000451817610835e-05, + "loss": 33.0003, + "step": 7588 + }, + { + "epoch": 0.31632695594181154, + "grad_norm": 472.0, + "learning_rate": 7.999911834707193e-05, + "loss": 15.8127, + "step": 7589 + }, + { + "epoch": 0.31636863823933975, + "grad_norm": 928.0, + "learning_rate": 7.999371797129893e-05, + "loss": 21.8802, + "step": 7590 + }, + { + "epoch": 0.316410320536868, + "grad_norm": 288.0, + "learning_rate": 7.998831704888775e-05, + "loss": 12.2503, + "step": 7591 + }, + { + "epoch": 0.31645200283439623, + "grad_norm": 324.0, + "learning_rate": 7.998291557993683e-05, + "loss": 13.3752, + "step": 7592 + }, + { + "epoch": 0.3164936851319245, + "grad_norm": 268.0, + "learning_rate": 7.997751356454462e-05, + "loss": 12.4377, + "step": 7593 + }, + { + "epoch": 0.3165353674294527, + "grad_norm": 326.0, + "learning_rate": 7.997211100280955e-05, + "loss": 11.1255, + "step": 7594 + }, + { + "epoch": 0.316577049726981, + "grad_norm": 324.0, + "learning_rate": 7.99667078948301e-05, + "loss": 14.2502, + "step": 7595 + }, + { + "epoch": 0.3166187320245092, + "grad_norm": 326.0, + "learning_rate": 7.996130424070475e-05, + "loss": 13.1252, + "step": 7596 + }, + { + "epoch": 0.31666041432203745, + "grad_norm": 199.0, + "learning_rate": 7.995590004053196e-05, + "loss": 10.8134, + "step": 7597 + }, + { + "epoch": 0.31670209661956567, + "grad_norm": 170.0, + "learning_rate": 7.995049529441023e-05, + "loss": 11.0008, + "step": 7598 + }, + { + "epoch": 0.31674377891709393, + "grad_norm": 266.0, + "learning_rate": 7.994509000243809e-05, + "loss": 11.7504, + "step": 7599 + }, + { + "epoch": 0.31678546121462214, + "grad_norm": 253.0, + "learning_rate": 7.993968416471399e-05, + "loss": 11.6877, + "step": 7600 + }, + { + "epoch": 0.3168271435121504, + "grad_norm": 290.0, + "learning_rate": 7.99342777813365e-05, + "loss": 13.6254, + "step": 7601 + }, + { + "epoch": 0.3168688258096786, + "grad_norm": 219.0, + "learning_rate": 7.992887085240414e-05, + "loss": 10.6879, + "step": 7602 + }, + { + "epoch": 0.3169105081072069, + "grad_norm": 330.0, + "learning_rate": 7.992346337801546e-05, + "loss": 13.3757, + "step": 7603 + }, + { + "epoch": 0.3169521904047351, + "grad_norm": 312.0, + "learning_rate": 7.991805535826901e-05, + "loss": 14.3128, + "step": 7604 + }, + { + "epoch": 0.31699387270226337, + "grad_norm": 564.0, + "learning_rate": 7.991264679326333e-05, + "loss": 17.7505, + "step": 7605 + }, + { + "epoch": 0.3170355549997916, + "grad_norm": 156.0, + "learning_rate": 7.990723768309702e-05, + "loss": 10.4384, + "step": 7606 + }, + { + "epoch": 0.31707723729731985, + "grad_norm": 328.0, + "learning_rate": 7.990182802786864e-05, + "loss": 13.8128, + "step": 7607 + }, + { + "epoch": 0.31711891959484806, + "grad_norm": 584.0, + "learning_rate": 7.989641782767679e-05, + "loss": 18.3751, + "step": 7608 + }, + { + "epoch": 0.3171606018923763, + "grad_norm": 524.0, + "learning_rate": 7.989100708262008e-05, + "loss": 15.6877, + "step": 7609 + }, + { + "epoch": 0.31720228418990454, + "grad_norm": 97.0, + "learning_rate": 7.988559579279708e-05, + "loss": 8.9377, + "step": 7610 + }, + { + "epoch": 0.3172439664874328, + "grad_norm": 208.0, + "learning_rate": 7.988018395830647e-05, + "loss": 11.1882, + "step": 7611 + }, + { + "epoch": 0.317285648784961, + "grad_norm": 262.0, + "learning_rate": 7.987477157924685e-05, + "loss": 13.1253, + "step": 7612 + }, + { + "epoch": 0.3173273310824893, + "grad_norm": 182.0, + "learning_rate": 7.986935865571688e-05, + "loss": 10.0629, + "step": 7613 + }, + { + "epoch": 0.3173690133800175, + "grad_norm": 170.0, + "learning_rate": 7.986394518781519e-05, + "loss": 9.8753, + "step": 7614 + }, + { + "epoch": 0.31741069567754576, + "grad_norm": 188.0, + "learning_rate": 7.985853117564044e-05, + "loss": 11.3756, + "step": 7615 + }, + { + "epoch": 0.317452377975074, + "grad_norm": 268.0, + "learning_rate": 7.985311661929131e-05, + "loss": 10.7514, + "step": 7616 + }, + { + "epoch": 0.31749406027260224, + "grad_norm": 380.0, + "learning_rate": 7.984770151886647e-05, + "loss": 13.0002, + "step": 7617 + }, + { + "epoch": 0.31753574257013045, + "grad_norm": 165.0, + "learning_rate": 7.984228587446463e-05, + "loss": 5.2816, + "step": 7618 + }, + { + "epoch": 0.3175774248676587, + "grad_norm": 988.0, + "learning_rate": 7.983686968618449e-05, + "loss": 21.0048, + "step": 7619 + }, + { + "epoch": 0.31761910716518693, + "grad_norm": 67.0, + "learning_rate": 7.983145295412474e-05, + "loss": 6.8751, + "step": 7620 + }, + { + "epoch": 0.3176607894627152, + "grad_norm": 458.0, + "learning_rate": 7.982603567838412e-05, + "loss": 17.3752, + "step": 7621 + }, + { + "epoch": 0.3177024717602434, + "grad_norm": 280.0, + "learning_rate": 7.982061785906134e-05, + "loss": 12.1877, + "step": 7622 + }, + { + "epoch": 0.3177441540577717, + "grad_norm": 306.0, + "learning_rate": 7.981519949625515e-05, + "loss": 10.6885, + "step": 7623 + }, + { + "epoch": 0.3177858363552999, + "grad_norm": 430.0, + "learning_rate": 7.980978059006431e-05, + "loss": 14.7503, + "step": 7624 + }, + { + "epoch": 0.31782751865282816, + "grad_norm": 324.0, + "learning_rate": 7.980436114058758e-05, + "loss": 12.6879, + "step": 7625 + }, + { + "epoch": 0.31786920095035637, + "grad_norm": 268.0, + "learning_rate": 7.979894114792372e-05, + "loss": 11.5627, + "step": 7626 + }, + { + "epoch": 0.31791088324788463, + "grad_norm": 1272.0, + "learning_rate": 7.979352061217151e-05, + "loss": 35.5001, + "step": 7627 + }, + { + "epoch": 0.31795256554541285, + "grad_norm": 1120.0, + "learning_rate": 7.978809953342973e-05, + "loss": 29.5001, + "step": 7628 + }, + { + "epoch": 0.3179942478429411, + "grad_norm": 352.0, + "learning_rate": 7.978267791179722e-05, + "loss": 14.3127, + "step": 7629 + }, + { + "epoch": 0.3180359301404693, + "grad_norm": 520.0, + "learning_rate": 7.977725574737273e-05, + "loss": 17.2537, + "step": 7630 + }, + { + "epoch": 0.3180776124379976, + "grad_norm": 704.0, + "learning_rate": 7.977183304025512e-05, + "loss": 21.1253, + "step": 7631 + }, + { + "epoch": 0.3181192947355258, + "grad_norm": 255.0, + "learning_rate": 7.976640979054322e-05, + "loss": 12.4379, + "step": 7632 + }, + { + "epoch": 0.31816097703305407, + "grad_norm": 688.0, + "learning_rate": 7.976098599833586e-05, + "loss": 19.8769, + "step": 7633 + }, + { + "epoch": 0.3182026593305823, + "grad_norm": 121.0, + "learning_rate": 7.975556166373188e-05, + "loss": 9.5627, + "step": 7634 + }, + { + "epoch": 0.31824434162811055, + "grad_norm": 350.0, + "learning_rate": 7.975013678683014e-05, + "loss": 13.9378, + "step": 7635 + }, + { + "epoch": 0.31828602392563876, + "grad_norm": 430.0, + "learning_rate": 7.974471136772953e-05, + "loss": 15.5646, + "step": 7636 + }, + { + "epoch": 0.31832770622316703, + "grad_norm": 516.0, + "learning_rate": 7.973928540652891e-05, + "loss": 16.5025, + "step": 7637 + }, + { + "epoch": 0.31836938852069524, + "grad_norm": 171.0, + "learning_rate": 7.973385890332717e-05, + "loss": 8.9384, + "step": 7638 + }, + { + "epoch": 0.3184110708182235, + "grad_norm": 494.0, + "learning_rate": 7.972843185822322e-05, + "loss": 18.0002, + "step": 7639 + }, + { + "epoch": 0.3184527531157517, + "grad_norm": 366.0, + "learning_rate": 7.972300427131596e-05, + "loss": 14.0629, + "step": 7640 + }, + { + "epoch": 0.31849443541328, + "grad_norm": 964.0, + "learning_rate": 7.97175761427043e-05, + "loss": 24.1295, + "step": 7641 + }, + { + "epoch": 0.3185361177108082, + "grad_norm": 580.0, + "learning_rate": 7.971214747248717e-05, + "loss": 18.0004, + "step": 7642 + }, + { + "epoch": 0.31857780000833646, + "grad_norm": 314.0, + "learning_rate": 7.970671826076353e-05, + "loss": 14.0628, + "step": 7643 + }, + { + "epoch": 0.3186194823058647, + "grad_norm": 584.0, + "learning_rate": 7.97012885076323e-05, + "loss": 20.0002, + "step": 7644 + }, + { + "epoch": 0.31866116460339294, + "grad_norm": 90.0, + "learning_rate": 7.969585821319246e-05, + "loss": 8.0004, + "step": 7645 + }, + { + "epoch": 0.31870284690092116, + "grad_norm": 328.0, + "learning_rate": 7.969042737754297e-05, + "loss": 13.7503, + "step": 7646 + }, + { + "epoch": 0.3187445291984494, + "grad_norm": 356.0, + "learning_rate": 7.96849960007828e-05, + "loss": 15.4378, + "step": 7647 + }, + { + "epoch": 0.31878621149597763, + "grad_norm": 640.0, + "learning_rate": 7.967956408301095e-05, + "loss": 19.0002, + "step": 7648 + }, + { + "epoch": 0.3188278937935059, + "grad_norm": 576.0, + "learning_rate": 7.96741316243264e-05, + "loss": 16.5005, + "step": 7649 + }, + { + "epoch": 0.3188695760910341, + "grad_norm": 876.0, + "learning_rate": 7.966869862482818e-05, + "loss": 24.379, + "step": 7650 + }, + { + "epoch": 0.3189112583885624, + "grad_norm": 430.0, + "learning_rate": 7.966326508461528e-05, + "loss": 15.1253, + "step": 7651 + }, + { + "epoch": 0.3189529406860906, + "grad_norm": 296.0, + "learning_rate": 7.965783100378674e-05, + "loss": 10.9378, + "step": 7652 + }, + { + "epoch": 0.31899462298361886, + "grad_norm": 162.0, + "learning_rate": 7.96523963824416e-05, + "loss": 10.1255, + "step": 7653 + }, + { + "epoch": 0.31903630528114707, + "grad_norm": 143.0, + "learning_rate": 7.96469612206789e-05, + "loss": 8.7502, + "step": 7654 + }, + { + "epoch": 0.31907798757867534, + "grad_norm": 450.0, + "learning_rate": 7.964152551859772e-05, + "loss": 15.3155, + "step": 7655 + }, + { + "epoch": 0.31911966987620355, + "grad_norm": 52.75, + "learning_rate": 7.963608927629708e-05, + "loss": 7.7503, + "step": 7656 + }, + { + "epoch": 0.3191613521737318, + "grad_norm": 504.0, + "learning_rate": 7.963065249387609e-05, + "loss": 18.0011, + "step": 7657 + }, + { + "epoch": 0.3192030344712601, + "grad_norm": 712.0, + "learning_rate": 7.962521517143384e-05, + "loss": 21.3755, + "step": 7658 + }, + { + "epoch": 0.3192447167687883, + "grad_norm": 446.0, + "learning_rate": 7.96197773090694e-05, + "loss": 16.8754, + "step": 7659 + }, + { + "epoch": 0.31928639906631656, + "grad_norm": 302.0, + "learning_rate": 7.96143389068819e-05, + "loss": 13.6263, + "step": 7660 + }, + { + "epoch": 0.3193280813638448, + "grad_norm": 320.0, + "learning_rate": 7.960889996497043e-05, + "loss": 14.4377, + "step": 7661 + }, + { + "epoch": 0.31936976366137304, + "grad_norm": 684.0, + "learning_rate": 7.960346048343414e-05, + "loss": 19.0023, + "step": 7662 + }, + { + "epoch": 0.31941144595890125, + "grad_norm": 217.0, + "learning_rate": 7.959802046237215e-05, + "loss": 12.0628, + "step": 7663 + }, + { + "epoch": 0.3194531282564295, + "grad_norm": 510.0, + "learning_rate": 7.959257990188363e-05, + "loss": 17.1253, + "step": 7664 + }, + { + "epoch": 0.31949481055395773, + "grad_norm": 149.0, + "learning_rate": 7.958713880206768e-05, + "loss": 9.8127, + "step": 7665 + }, + { + "epoch": 0.319536492851486, + "grad_norm": 394.0, + "learning_rate": 7.958169716302353e-05, + "loss": 15.4378, + "step": 7666 + }, + { + "epoch": 0.3195781751490142, + "grad_norm": 214.0, + "learning_rate": 7.957625498485029e-05, + "loss": 12.1879, + "step": 7667 + }, + { + "epoch": 0.3196198574465425, + "grad_norm": 70.0, + "learning_rate": 7.95708122676472e-05, + "loss": 9.126, + "step": 7668 + }, + { + "epoch": 0.3196615397440707, + "grad_norm": 348.0, + "learning_rate": 7.956536901151343e-05, + "loss": 14.3132, + "step": 7669 + }, + { + "epoch": 0.31970322204159896, + "grad_norm": 304.0, + "learning_rate": 7.955992521654818e-05, + "loss": 13.6254, + "step": 7670 + }, + { + "epoch": 0.31974490433912717, + "grad_norm": 812.0, + "learning_rate": 7.955448088285067e-05, + "loss": 21.2553, + "step": 7671 + }, + { + "epoch": 0.31978658663665543, + "grad_norm": 1200.0, + "learning_rate": 7.954903601052013e-05, + "loss": 27.2555, + "step": 7672 + }, + { + "epoch": 0.31982826893418365, + "grad_norm": 280.0, + "learning_rate": 7.954359059965578e-05, + "loss": 13.2504, + "step": 7673 + }, + { + "epoch": 0.3198699512317119, + "grad_norm": 330.0, + "learning_rate": 7.953814465035687e-05, + "loss": 13.1879, + "step": 7674 + }, + { + "epoch": 0.3199116335292401, + "grad_norm": 226.0, + "learning_rate": 7.953269816272265e-05, + "loss": 11.563, + "step": 7675 + }, + { + "epoch": 0.3199533158267684, + "grad_norm": 2160.0, + "learning_rate": 7.952725113685238e-05, + "loss": 43.0003, + "step": 7676 + }, + { + "epoch": 0.3199949981242966, + "grad_norm": 412.0, + "learning_rate": 7.952180357284534e-05, + "loss": 15.0014, + "step": 7677 + }, + { + "epoch": 0.32003668042182487, + "grad_norm": 134.0, + "learning_rate": 7.951635547080081e-05, + "loss": 9.5627, + "step": 7678 + }, + { + "epoch": 0.3200783627193531, + "grad_norm": 352.0, + "learning_rate": 7.951090683081808e-05, + "loss": 14.7506, + "step": 7679 + }, + { + "epoch": 0.32012004501688135, + "grad_norm": 390.0, + "learning_rate": 7.950545765299645e-05, + "loss": 13.6906, + "step": 7680 + }, + { + "epoch": 0.32016172731440956, + "grad_norm": 346.0, + "learning_rate": 7.950000793743524e-05, + "loss": 13.688, + "step": 7681 + }, + { + "epoch": 0.32020340961193783, + "grad_norm": 211.0, + "learning_rate": 7.949455768423378e-05, + "loss": 11.0629, + "step": 7682 + }, + { + "epoch": 0.32024509190946604, + "grad_norm": 892.0, + "learning_rate": 7.948910689349136e-05, + "loss": 23.6275, + "step": 7683 + }, + { + "epoch": 0.3202867742069943, + "grad_norm": 410.0, + "learning_rate": 7.948365556530737e-05, + "loss": 14.6266, + "step": 7684 + }, + { + "epoch": 0.3203284565045225, + "grad_norm": 246.0, + "learning_rate": 7.947820369978112e-05, + "loss": 9.813, + "step": 7685 + }, + { + "epoch": 0.3203701388020508, + "grad_norm": 948.0, + "learning_rate": 7.947275129701202e-05, + "loss": 25.1256, + "step": 7686 + }, + { + "epoch": 0.320411821099579, + "grad_norm": 424.0, + "learning_rate": 7.946729835709938e-05, + "loss": 14.5003, + "step": 7687 + }, + { + "epoch": 0.32045350339710726, + "grad_norm": 226.0, + "learning_rate": 7.946184488014263e-05, + "loss": 10.2529, + "step": 7688 + }, + { + "epoch": 0.3204951856946355, + "grad_norm": 139.0, + "learning_rate": 7.945639086624115e-05, + "loss": 10.1253, + "step": 7689 + }, + { + "epoch": 0.32053686799216374, + "grad_norm": 474.0, + "learning_rate": 7.945093631549431e-05, + "loss": 17.5004, + "step": 7690 + }, + { + "epoch": 0.32057855028969195, + "grad_norm": 195.0, + "learning_rate": 7.944548122800155e-05, + "loss": 9.813, + "step": 7691 + }, + { + "epoch": 0.3206202325872202, + "grad_norm": 270.0, + "learning_rate": 7.944002560386228e-05, + "loss": 14.4378, + "step": 7692 + }, + { + "epoch": 0.32066191488474843, + "grad_norm": 124.0, + "learning_rate": 7.943456944317593e-05, + "loss": 9.0005, + "step": 7693 + }, + { + "epoch": 0.3207035971822767, + "grad_norm": 145.0, + "learning_rate": 7.942911274604194e-05, + "loss": 10.001, + "step": 7694 + }, + { + "epoch": 0.3207452794798049, + "grad_norm": 460.0, + "learning_rate": 7.942365551255978e-05, + "loss": 17.0013, + "step": 7695 + }, + { + "epoch": 0.3207869617773332, + "grad_norm": 364.0, + "learning_rate": 7.941819774282884e-05, + "loss": 13.6253, + "step": 7696 + }, + { + "epoch": 0.3208286440748614, + "grad_norm": 228.0, + "learning_rate": 7.941273943694867e-05, + "loss": 12.5003, + "step": 7697 + }, + { + "epoch": 0.32087032637238966, + "grad_norm": 151.0, + "learning_rate": 7.940728059501869e-05, + "loss": 9.5004, + "step": 7698 + }, + { + "epoch": 0.32091200866991787, + "grad_norm": 440.0, + "learning_rate": 7.940182121713843e-05, + "loss": 15.1272, + "step": 7699 + }, + { + "epoch": 0.32095369096744614, + "grad_norm": 354.0, + "learning_rate": 7.939636130340736e-05, + "loss": 15.4377, + "step": 7700 + }, + { + "epoch": 0.32099537326497435, + "grad_norm": 1456.0, + "learning_rate": 7.9390900853925e-05, + "loss": 35.2512, + "step": 7701 + }, + { + "epoch": 0.3210370555625026, + "grad_norm": 472.0, + "learning_rate": 7.938543986879086e-05, + "loss": 16.5002, + "step": 7702 + }, + { + "epoch": 0.3210787378600308, + "grad_norm": 239.0, + "learning_rate": 7.937997834810446e-05, + "loss": 11.8754, + "step": 7703 + }, + { + "epoch": 0.3211204201575591, + "grad_norm": 106.5, + "learning_rate": 7.937451629196536e-05, + "loss": 5.938, + "step": 7704 + }, + { + "epoch": 0.3211621024550873, + "grad_norm": 107.5, + "learning_rate": 7.936905370047308e-05, + "loss": 8.5002, + "step": 7705 + }, + { + "epoch": 0.3212037847526156, + "grad_norm": 181.0, + "learning_rate": 7.93635905737272e-05, + "loss": 11.3127, + "step": 7706 + }, + { + "epoch": 0.3212454670501438, + "grad_norm": 572.0, + "learning_rate": 7.935812691182727e-05, + "loss": 19.2503, + "step": 7707 + }, + { + "epoch": 0.32128714934767205, + "grad_norm": 548.0, + "learning_rate": 7.935266271487287e-05, + "loss": 18.6274, + "step": 7708 + }, + { + "epoch": 0.32132883164520026, + "grad_norm": 428.0, + "learning_rate": 7.93471979829636e-05, + "loss": 16.6259, + "step": 7709 + }, + { + "epoch": 0.32137051394272853, + "grad_norm": 308.0, + "learning_rate": 7.934173271619902e-05, + "loss": 13.8759, + "step": 7710 + }, + { + "epoch": 0.32141219624025674, + "grad_norm": 416.0, + "learning_rate": 7.933626691467877e-05, + "loss": 16.3757, + "step": 7711 + }, + { + "epoch": 0.321453878537785, + "grad_norm": 656.0, + "learning_rate": 7.933080057850245e-05, + "loss": 20.2504, + "step": 7712 + }, + { + "epoch": 0.3214955608353132, + "grad_norm": 382.0, + "learning_rate": 7.932533370776969e-05, + "loss": 13.8767, + "step": 7713 + }, + { + "epoch": 0.3215372431328415, + "grad_norm": 556.0, + "learning_rate": 7.931986630258012e-05, + "loss": 20.8754, + "step": 7714 + }, + { + "epoch": 0.3215789254303697, + "grad_norm": 1048.0, + "learning_rate": 7.931439836303338e-05, + "loss": 24.2551, + "step": 7715 + }, + { + "epoch": 0.32162060772789797, + "grad_norm": 288.0, + "learning_rate": 7.930892988922911e-05, + "loss": 12.8753, + "step": 7716 + }, + { + "epoch": 0.3216622900254262, + "grad_norm": 418.0, + "learning_rate": 7.930346088126701e-05, + "loss": 15.1252, + "step": 7717 + }, + { + "epoch": 0.32170397232295445, + "grad_norm": 444.0, + "learning_rate": 7.929799133924673e-05, + "loss": 16.7503, + "step": 7718 + }, + { + "epoch": 0.32174565462048266, + "grad_norm": 306.0, + "learning_rate": 7.929252126326795e-05, + "loss": 11.5006, + "step": 7719 + }, + { + "epoch": 0.3217873369180109, + "grad_norm": 428.0, + "learning_rate": 7.928705065343039e-05, + "loss": 13.6259, + "step": 7720 + }, + { + "epoch": 0.32182901921553914, + "grad_norm": 125.0, + "learning_rate": 7.928157950983372e-05, + "loss": 7.563, + "step": 7721 + }, + { + "epoch": 0.3218707015130674, + "grad_norm": 358.0, + "learning_rate": 7.927610783257766e-05, + "loss": 13.3127, + "step": 7722 + }, + { + "epoch": 0.3219123838105956, + "grad_norm": 172.0, + "learning_rate": 7.927063562176193e-05, + "loss": 7.8754, + "step": 7723 + }, + { + "epoch": 0.3219540661081239, + "grad_norm": 668.0, + "learning_rate": 7.926516287748629e-05, + "loss": 20.3752, + "step": 7724 + }, + { + "epoch": 0.3219957484056521, + "grad_norm": 692.0, + "learning_rate": 7.925968959985044e-05, + "loss": 21.1281, + "step": 7725 + }, + { + "epoch": 0.32203743070318036, + "grad_norm": 245.0, + "learning_rate": 7.925421578895415e-05, + "loss": 11.2506, + "step": 7726 + }, + { + "epoch": 0.3220791130007086, + "grad_norm": 420.0, + "learning_rate": 7.924874144489719e-05, + "loss": 14.8754, + "step": 7727 + }, + { + "epoch": 0.32212079529823684, + "grad_norm": 154.0, + "learning_rate": 7.924326656777931e-05, + "loss": 10.2502, + "step": 7728 + }, + { + "epoch": 0.32216247759576505, + "grad_norm": 812.0, + "learning_rate": 7.923779115770032e-05, + "loss": 21.3778, + "step": 7729 + }, + { + "epoch": 0.3222041598932933, + "grad_norm": 252.0, + "learning_rate": 7.923231521475996e-05, + "loss": 10.9378, + "step": 7730 + }, + { + "epoch": 0.3222458421908216, + "grad_norm": 592.0, + "learning_rate": 7.922683873905808e-05, + "loss": 18.6252, + "step": 7731 + }, + { + "epoch": 0.3222875244883498, + "grad_norm": 456.0, + "learning_rate": 7.922136173069448e-05, + "loss": 15.2504, + "step": 7732 + }, + { + "epoch": 0.32232920678587806, + "grad_norm": 173.0, + "learning_rate": 7.921588418976895e-05, + "loss": 11.6258, + "step": 7733 + }, + { + "epoch": 0.3223708890834063, + "grad_norm": 356.0, + "learning_rate": 7.921040611638134e-05, + "loss": 13.7503, + "step": 7734 + }, + { + "epoch": 0.32241257138093454, + "grad_norm": 86.0, + "learning_rate": 7.920492751063149e-05, + "loss": 7.8755, + "step": 7735 + }, + { + "epoch": 0.32245425367846275, + "grad_norm": 596.0, + "learning_rate": 7.919944837261924e-05, + "loss": 15.0661, + "step": 7736 + }, + { + "epoch": 0.322495935975991, + "grad_norm": 179.0, + "learning_rate": 7.919396870244444e-05, + "loss": 10.438, + "step": 7737 + }, + { + "epoch": 0.32253761827351923, + "grad_norm": 768.0, + "learning_rate": 7.918848850020699e-05, + "loss": 22.5006, + "step": 7738 + }, + { + "epoch": 0.3225793005710475, + "grad_norm": 410.0, + "learning_rate": 7.918300776600673e-05, + "loss": 14.8133, + "step": 7739 + }, + { + "epoch": 0.3226209828685757, + "grad_norm": 700.0, + "learning_rate": 7.917752649994358e-05, + "loss": 19.3752, + "step": 7740 + }, + { + "epoch": 0.322662665166104, + "grad_norm": 181.0, + "learning_rate": 7.917204470211741e-05, + "loss": 11.0002, + "step": 7741 + }, + { + "epoch": 0.3227043474636322, + "grad_norm": 376.0, + "learning_rate": 7.916656237262814e-05, + "loss": 14.3757, + "step": 7742 + }, + { + "epoch": 0.32274602976116046, + "grad_norm": 424.0, + "learning_rate": 7.916107951157566e-05, + "loss": 14.3752, + "step": 7743 + }, + { + "epoch": 0.32278771205868867, + "grad_norm": 268.0, + "learning_rate": 7.915559611905994e-05, + "loss": 9.2506, + "step": 7744 + }, + { + "epoch": 0.32282939435621694, + "grad_norm": 588.0, + "learning_rate": 7.915011219518089e-05, + "loss": 16.0026, + "step": 7745 + }, + { + "epoch": 0.32287107665374515, + "grad_norm": 788.0, + "learning_rate": 7.914462774003846e-05, + "loss": 23.376, + "step": 7746 + }, + { + "epoch": 0.3229127589512734, + "grad_norm": 360.0, + "learning_rate": 7.913914275373258e-05, + "loss": 15.2504, + "step": 7747 + }, + { + "epoch": 0.3229544412488016, + "grad_norm": 1256.0, + "learning_rate": 7.913365723636326e-05, + "loss": 26.7505, + "step": 7748 + }, + { + "epoch": 0.3229961235463299, + "grad_norm": 528.0, + "learning_rate": 7.912817118803044e-05, + "loss": 14.3147, + "step": 7749 + }, + { + "epoch": 0.3230378058438581, + "grad_norm": 420.0, + "learning_rate": 7.912268460883412e-05, + "loss": 16.2503, + "step": 7750 + }, + { + "epoch": 0.3230794881413864, + "grad_norm": 302.0, + "learning_rate": 7.911719749887428e-05, + "loss": 7.5653, + "step": 7751 + }, + { + "epoch": 0.3231211704389146, + "grad_norm": 310.0, + "learning_rate": 7.911170985825094e-05, + "loss": 13.2512, + "step": 7752 + }, + { + "epoch": 0.32316285273644285, + "grad_norm": 624.0, + "learning_rate": 7.91062216870641e-05, + "loss": 17.3752, + "step": 7753 + }, + { + "epoch": 0.32320453503397106, + "grad_norm": 1064.0, + "learning_rate": 7.910073298541378e-05, + "loss": 26.7502, + "step": 7754 + }, + { + "epoch": 0.32324621733149933, + "grad_norm": 1416.0, + "learning_rate": 7.909524375340003e-05, + "loss": 30.2504, + "step": 7755 + }, + { + "epoch": 0.32328789962902754, + "grad_norm": 442.0, + "learning_rate": 7.908975399112286e-05, + "loss": 15.9381, + "step": 7756 + }, + { + "epoch": 0.3233295819265558, + "grad_norm": 270.0, + "learning_rate": 7.908426369868236e-05, + "loss": 12.5003, + "step": 7757 + }, + { + "epoch": 0.323371264224084, + "grad_norm": 482.0, + "learning_rate": 7.907877287617857e-05, + "loss": 16.6275, + "step": 7758 + }, + { + "epoch": 0.3234129465216123, + "grad_norm": 444.0, + "learning_rate": 7.907328152371156e-05, + "loss": 15.938, + "step": 7759 + }, + { + "epoch": 0.3234546288191405, + "grad_norm": 250.0, + "learning_rate": 7.906778964138142e-05, + "loss": 11.4411, + "step": 7760 + }, + { + "epoch": 0.32349631111666877, + "grad_norm": 191.0, + "learning_rate": 7.906229722928822e-05, + "loss": 9.1896, + "step": 7761 + }, + { + "epoch": 0.323537993414197, + "grad_norm": 398.0, + "learning_rate": 7.905680428753207e-05, + "loss": 15.5002, + "step": 7762 + }, + { + "epoch": 0.32357967571172525, + "grad_norm": 398.0, + "learning_rate": 7.90513108162131e-05, + "loss": 14.7502, + "step": 7763 + }, + { + "epoch": 0.32362135800925346, + "grad_norm": 354.0, + "learning_rate": 7.90458168154314e-05, + "loss": 13.6252, + "step": 7764 + }, + { + "epoch": 0.3236630403067817, + "grad_norm": 680.0, + "learning_rate": 7.904032228528711e-05, + "loss": 21.0001, + "step": 7765 + }, + { + "epoch": 0.32370472260430994, + "grad_norm": 1080.0, + "learning_rate": 7.903482722588038e-05, + "loss": 30.1257, + "step": 7766 + }, + { + "epoch": 0.3237464049018382, + "grad_norm": 174.0, + "learning_rate": 7.902933163731133e-05, + "loss": 10.2507, + "step": 7767 + }, + { + "epoch": 0.3237880871993664, + "grad_norm": 768.0, + "learning_rate": 7.902383551968013e-05, + "loss": 21.7502, + "step": 7768 + }, + { + "epoch": 0.3238297694968947, + "grad_norm": 216.0, + "learning_rate": 7.901833887308698e-05, + "loss": 10.9377, + "step": 7769 + }, + { + "epoch": 0.3238714517944229, + "grad_norm": 63.0, + "learning_rate": 7.901284169763201e-05, + "loss": 7.6878, + "step": 7770 + }, + { + "epoch": 0.32391313409195116, + "grad_norm": 434.0, + "learning_rate": 7.900734399341543e-05, + "loss": 15.1255, + "step": 7771 + }, + { + "epoch": 0.32395481638947937, + "grad_norm": 800.0, + "learning_rate": 7.900184576053742e-05, + "loss": 20.7502, + "step": 7772 + }, + { + "epoch": 0.32399649868700764, + "grad_norm": 458.0, + "learning_rate": 7.89963469990982e-05, + "loss": 16.5006, + "step": 7773 + }, + { + "epoch": 0.32403818098453585, + "grad_norm": 720.0, + "learning_rate": 7.899084770919798e-05, + "loss": 19.2539, + "step": 7774 + }, + { + "epoch": 0.3240798632820641, + "grad_norm": 310.0, + "learning_rate": 7.8985347890937e-05, + "loss": 12.3754, + "step": 7775 + }, + { + "epoch": 0.32412154557959233, + "grad_norm": 326.0, + "learning_rate": 7.897984754441546e-05, + "loss": 11.5629, + "step": 7776 + }, + { + "epoch": 0.3241632278771206, + "grad_norm": 235.0, + "learning_rate": 7.897434666973364e-05, + "loss": 10.8133, + "step": 7777 + }, + { + "epoch": 0.3242049101746488, + "grad_norm": 680.0, + "learning_rate": 7.896884526699177e-05, + "loss": 16.8756, + "step": 7778 + }, + { + "epoch": 0.3242465924721771, + "grad_norm": 400.0, + "learning_rate": 7.896334333629014e-05, + "loss": 14.2506, + "step": 7779 + }, + { + "epoch": 0.3242882747697053, + "grad_norm": 1400.0, + "learning_rate": 7.895784087772899e-05, + "loss": 30.2541, + "step": 7780 + }, + { + "epoch": 0.32432995706723355, + "grad_norm": 960.0, + "learning_rate": 7.895233789140863e-05, + "loss": 22.2555, + "step": 7781 + }, + { + "epoch": 0.32437163936476177, + "grad_norm": 356.0, + "learning_rate": 7.894683437742934e-05, + "loss": 12.6885, + "step": 7782 + }, + { + "epoch": 0.32441332166229003, + "grad_norm": 436.0, + "learning_rate": 7.894133033589143e-05, + "loss": 16.8753, + "step": 7783 + }, + { + "epoch": 0.32445500395981824, + "grad_norm": 720.0, + "learning_rate": 7.89358257668952e-05, + "loss": 22.1252, + "step": 7784 + }, + { + "epoch": 0.3244966862573465, + "grad_norm": 264.0, + "learning_rate": 7.893032067054097e-05, + "loss": 12.1882, + "step": 7785 + }, + { + "epoch": 0.3245383685548747, + "grad_norm": 239.0, + "learning_rate": 7.89248150469291e-05, + "loss": 11.6888, + "step": 7786 + }, + { + "epoch": 0.324580050852403, + "grad_norm": 236.0, + "learning_rate": 7.891930889615988e-05, + "loss": 12.8752, + "step": 7787 + }, + { + "epoch": 0.3246217331499312, + "grad_norm": 346.0, + "learning_rate": 7.891380221833368e-05, + "loss": 11.3135, + "step": 7788 + }, + { + "epoch": 0.32466341544745947, + "grad_norm": 404.0, + "learning_rate": 7.89082950135509e-05, + "loss": 14.7508, + "step": 7789 + }, + { + "epoch": 0.3247050977449877, + "grad_norm": 338.0, + "learning_rate": 7.890278728191187e-05, + "loss": 14.5003, + "step": 7790 + }, + { + "epoch": 0.32474678004251595, + "grad_norm": 322.0, + "learning_rate": 7.889727902351697e-05, + "loss": 9.8129, + "step": 7791 + }, + { + "epoch": 0.32478846234004416, + "grad_norm": 270.0, + "learning_rate": 7.88917702384666e-05, + "loss": 12.9384, + "step": 7792 + }, + { + "epoch": 0.3248301446375724, + "grad_norm": 1704.0, + "learning_rate": 7.888626092686113e-05, + "loss": 31.5048, + "step": 7793 + }, + { + "epoch": 0.32487182693510064, + "grad_norm": 184.0, + "learning_rate": 7.888075108880102e-05, + "loss": 10.438, + "step": 7794 + }, + { + "epoch": 0.3249135092326289, + "grad_norm": 424.0, + "learning_rate": 7.887524072438664e-05, + "loss": 16.2515, + "step": 7795 + }, + { + "epoch": 0.3249551915301571, + "grad_norm": 364.0, + "learning_rate": 7.886972983371844e-05, + "loss": 15.1878, + "step": 7796 + }, + { + "epoch": 0.3249968738276854, + "grad_norm": 276.0, + "learning_rate": 7.886421841689686e-05, + "loss": 13.3756, + "step": 7797 + }, + { + "epoch": 0.3250385561252136, + "grad_norm": 338.0, + "learning_rate": 7.885870647402232e-05, + "loss": 14.6253, + "step": 7798 + }, + { + "epoch": 0.32508023842274186, + "grad_norm": 368.0, + "learning_rate": 7.88531940051953e-05, + "loss": 12.0639, + "step": 7799 + }, + { + "epoch": 0.3251219207202701, + "grad_norm": 262.0, + "learning_rate": 7.884768101051625e-05, + "loss": 11.8128, + "step": 7800 + }, + { + "epoch": 0.32516360301779834, + "grad_norm": 332.0, + "learning_rate": 7.884216749008566e-05, + "loss": 13.0629, + "step": 7801 + }, + { + "epoch": 0.32520528531532655, + "grad_norm": 672.0, + "learning_rate": 7.883665344400401e-05, + "loss": 21.3755, + "step": 7802 + }, + { + "epoch": 0.3252469676128548, + "grad_norm": 366.0, + "learning_rate": 7.883113887237179e-05, + "loss": 14.9377, + "step": 7803 + }, + { + "epoch": 0.3252886499103831, + "grad_norm": 536.0, + "learning_rate": 7.882562377528951e-05, + "loss": 19.1253, + "step": 7804 + }, + { + "epoch": 0.3253303322079113, + "grad_norm": 166.0, + "learning_rate": 7.882010815285766e-05, + "loss": 10.0005, + "step": 7805 + }, + { + "epoch": 0.32537201450543957, + "grad_norm": 462.0, + "learning_rate": 7.88145920051768e-05, + "loss": 15.3751, + "step": 7806 + }, + { + "epoch": 0.3254136968029678, + "grad_norm": 83.0, + "learning_rate": 7.880907533234743e-05, + "loss": 5.9692, + "step": 7807 + }, + { + "epoch": 0.32545537910049605, + "grad_norm": 564.0, + "learning_rate": 7.880355813447012e-05, + "loss": 19.1252, + "step": 7808 + }, + { + "epoch": 0.32549706139802426, + "grad_norm": 748.0, + "learning_rate": 7.879804041164538e-05, + "loss": 22.3753, + "step": 7809 + }, + { + "epoch": 0.3255387436955525, + "grad_norm": 356.0, + "learning_rate": 7.879252216397382e-05, + "loss": 13.9377, + "step": 7810 + }, + { + "epoch": 0.32558042599308074, + "grad_norm": 91.5, + "learning_rate": 7.878700339155597e-05, + "loss": 9.7503, + "step": 7811 + }, + { + "epoch": 0.325622108290609, + "grad_norm": 360.0, + "learning_rate": 7.878148409449244e-05, + "loss": 16.0003, + "step": 7812 + }, + { + "epoch": 0.3256637905881372, + "grad_norm": 436.0, + "learning_rate": 7.877596427288381e-05, + "loss": 16.5007, + "step": 7813 + }, + { + "epoch": 0.3257054728856655, + "grad_norm": 492.0, + "learning_rate": 7.877044392683066e-05, + "loss": 17.6253, + "step": 7814 + }, + { + "epoch": 0.3257471551831937, + "grad_norm": 90.5, + "learning_rate": 7.876492305643364e-05, + "loss": 6.2504, + "step": 7815 + }, + { + "epoch": 0.32578883748072196, + "grad_norm": 156.0, + "learning_rate": 7.875940166179333e-05, + "loss": 11.0632, + "step": 7816 + }, + { + "epoch": 0.32583051977825017, + "grad_norm": 156.0, + "learning_rate": 7.875387974301036e-05, + "loss": 9.127, + "step": 7817 + }, + { + "epoch": 0.32587220207577844, + "grad_norm": 75.0, + "learning_rate": 7.87483573001854e-05, + "loss": 8.5632, + "step": 7818 + }, + { + "epoch": 0.32591388437330665, + "grad_norm": 212.0, + "learning_rate": 7.874283433341907e-05, + "loss": 9.1269, + "step": 7819 + }, + { + "epoch": 0.3259555666708349, + "grad_norm": 226.0, + "learning_rate": 7.873731084281202e-05, + "loss": 10.7504, + "step": 7820 + }, + { + "epoch": 0.32599724896836313, + "grad_norm": 1216.0, + "learning_rate": 7.873178682846493e-05, + "loss": 27.2509, + "step": 7821 + }, + { + "epoch": 0.3260389312658914, + "grad_norm": 264.0, + "learning_rate": 7.87262622904785e-05, + "loss": 11.125, + "step": 7822 + }, + { + "epoch": 0.3260806135634196, + "grad_norm": 117.5, + "learning_rate": 7.872073722895337e-05, + "loss": 10.2503, + "step": 7823 + }, + { + "epoch": 0.3261222958609479, + "grad_norm": 382.0, + "learning_rate": 7.871521164399025e-05, + "loss": 14.5005, + "step": 7824 + }, + { + "epoch": 0.3261639781584761, + "grad_norm": 384.0, + "learning_rate": 7.870968553568986e-05, + "loss": 15.7503, + "step": 7825 + }, + { + "epoch": 0.32620566045600435, + "grad_norm": 664.0, + "learning_rate": 7.870415890415291e-05, + "loss": 21.3755, + "step": 7826 + }, + { + "epoch": 0.32624734275353257, + "grad_norm": 197.0, + "learning_rate": 7.869863174948009e-05, + "loss": 12.2503, + "step": 7827 + }, + { + "epoch": 0.32628902505106083, + "grad_norm": 224.0, + "learning_rate": 7.869310407177217e-05, + "loss": 10.0643, + "step": 7828 + }, + { + "epoch": 0.32633070734858904, + "grad_norm": 580.0, + "learning_rate": 7.868757587112989e-05, + "loss": 16.3786, + "step": 7829 + }, + { + "epoch": 0.3263723896461173, + "grad_norm": 320.0, + "learning_rate": 7.868204714765399e-05, + "loss": 13.8752, + "step": 7830 + }, + { + "epoch": 0.3264140719436455, + "grad_norm": 264.0, + "learning_rate": 7.867651790144523e-05, + "loss": 12.251, + "step": 7831 + }, + { + "epoch": 0.3264557542411738, + "grad_norm": 241.0, + "learning_rate": 7.867098813260439e-05, + "loss": 13.0008, + "step": 7832 + }, + { + "epoch": 0.326497436538702, + "grad_norm": 1552.0, + "learning_rate": 7.866545784123223e-05, + "loss": 31.7545, + "step": 7833 + }, + { + "epoch": 0.32653911883623027, + "grad_norm": 270.0, + "learning_rate": 7.865992702742959e-05, + "loss": 12.3128, + "step": 7834 + }, + { + "epoch": 0.3265808011337585, + "grad_norm": 247.0, + "learning_rate": 7.86543956912972e-05, + "loss": 11.7511, + "step": 7835 + }, + { + "epoch": 0.32662248343128675, + "grad_norm": 440.0, + "learning_rate": 7.864886383293592e-05, + "loss": 15.938, + "step": 7836 + }, + { + "epoch": 0.32666416572881496, + "grad_norm": 532.0, + "learning_rate": 7.864333145244656e-05, + "loss": 16.8783, + "step": 7837 + }, + { + "epoch": 0.3267058480263432, + "grad_norm": 286.0, + "learning_rate": 7.863779854992993e-05, + "loss": 12.188, + "step": 7838 + }, + { + "epoch": 0.32674753032387144, + "grad_norm": 134.0, + "learning_rate": 7.863226512548689e-05, + "loss": 10.6254, + "step": 7839 + }, + { + "epoch": 0.3267892126213997, + "grad_norm": 456.0, + "learning_rate": 7.862673117921826e-05, + "loss": 12.2545, + "step": 7840 + }, + { + "epoch": 0.3268308949189279, + "grad_norm": 280.0, + "learning_rate": 7.862119671122494e-05, + "loss": 13.6252, + "step": 7841 + }, + { + "epoch": 0.3268725772164562, + "grad_norm": 408.0, + "learning_rate": 7.861566172160774e-05, + "loss": 16.1252, + "step": 7842 + }, + { + "epoch": 0.3269142595139844, + "grad_norm": 300.0, + "learning_rate": 7.861012621046758e-05, + "loss": 14.1258, + "step": 7843 + }, + { + "epoch": 0.32695594181151266, + "grad_norm": 121.0, + "learning_rate": 7.860459017790532e-05, + "loss": 8.8128, + "step": 7844 + }, + { + "epoch": 0.3269976241090409, + "grad_norm": 255.0, + "learning_rate": 7.859905362402187e-05, + "loss": 12.6878, + "step": 7845 + }, + { + "epoch": 0.32703930640656914, + "grad_norm": 1440.0, + "learning_rate": 7.859351654891814e-05, + "loss": 26.754, + "step": 7846 + }, + { + "epoch": 0.32708098870409735, + "grad_norm": 352.0, + "learning_rate": 7.858797895269503e-05, + "loss": 13.314, + "step": 7847 + }, + { + "epoch": 0.3271226710016256, + "grad_norm": 296.0, + "learning_rate": 7.858244083545346e-05, + "loss": 12.9377, + "step": 7848 + }, + { + "epoch": 0.32716435329915383, + "grad_norm": 1152.0, + "learning_rate": 7.857690219729437e-05, + "loss": 25.3793, + "step": 7849 + }, + { + "epoch": 0.3272060355966821, + "grad_norm": 228.0, + "learning_rate": 7.857136303831869e-05, + "loss": 12.6255, + "step": 7850 + }, + { + "epoch": 0.3272477178942103, + "grad_norm": 306.0, + "learning_rate": 7.856582335862739e-05, + "loss": 13.0002, + "step": 7851 + }, + { + "epoch": 0.3272894001917386, + "grad_norm": 1024.0, + "learning_rate": 7.856028315832142e-05, + "loss": 26.6253, + "step": 7852 + }, + { + "epoch": 0.3273310824892668, + "grad_norm": 502.0, + "learning_rate": 7.855474243750176e-05, + "loss": 19.0004, + "step": 7853 + }, + { + "epoch": 0.32737276478679506, + "grad_norm": 310.0, + "learning_rate": 7.854920119626938e-05, + "loss": 12.5023, + "step": 7854 + }, + { + "epoch": 0.32741444708432327, + "grad_norm": 157.0, + "learning_rate": 7.854365943472529e-05, + "loss": 6.5321, + "step": 7855 + }, + { + "epoch": 0.32745612938185154, + "grad_norm": 233.0, + "learning_rate": 7.853811715297044e-05, + "loss": 11.1878, + "step": 7856 + }, + { + "epoch": 0.32749781167937975, + "grad_norm": 624.0, + "learning_rate": 7.85325743511059e-05, + "loss": 19.7504, + "step": 7857 + }, + { + "epoch": 0.327539493976908, + "grad_norm": 456.0, + "learning_rate": 7.852703102923264e-05, + "loss": 15.8127, + "step": 7858 + }, + { + "epoch": 0.3275811762744362, + "grad_norm": 316.0, + "learning_rate": 7.852148718745172e-05, + "loss": 13.8756, + "step": 7859 + }, + { + "epoch": 0.3276228585719645, + "grad_norm": 370.0, + "learning_rate": 7.851594282586416e-05, + "loss": 14.4381, + "step": 7860 + }, + { + "epoch": 0.3276645408694927, + "grad_norm": 820.0, + "learning_rate": 7.851039794457102e-05, + "loss": 21.3753, + "step": 7861 + }, + { + "epoch": 0.32770622316702097, + "grad_norm": 2176.0, + "learning_rate": 7.850485254367335e-05, + "loss": 37.7545, + "step": 7862 + }, + { + "epoch": 0.3277479054645492, + "grad_norm": 984.0, + "learning_rate": 7.849930662327218e-05, + "loss": 23.6288, + "step": 7863 + }, + { + "epoch": 0.32778958776207745, + "grad_norm": 74.5, + "learning_rate": 7.849376018346865e-05, + "loss": 8.3126, + "step": 7864 + }, + { + "epoch": 0.32783127005960566, + "grad_norm": 412.0, + "learning_rate": 7.84882132243638e-05, + "loss": 14.5634, + "step": 7865 + }, + { + "epoch": 0.32787295235713393, + "grad_norm": 744.0, + "learning_rate": 7.848266574605873e-05, + "loss": 21.7511, + "step": 7866 + }, + { + "epoch": 0.32791463465466214, + "grad_norm": 231.0, + "learning_rate": 7.847711774865455e-05, + "loss": 11.2507, + "step": 7867 + }, + { + "epoch": 0.3279563169521904, + "grad_norm": 1208.0, + "learning_rate": 7.847156923225237e-05, + "loss": 31.1251, + "step": 7868 + }, + { + "epoch": 0.3279979992497186, + "grad_norm": 268.0, + "learning_rate": 7.84660201969533e-05, + "loss": 14.0665, + "step": 7869 + }, + { + "epoch": 0.3280396815472469, + "grad_norm": 448.0, + "learning_rate": 7.846047064285851e-05, + "loss": 15.6881, + "step": 7870 + }, + { + "epoch": 0.3280813638447751, + "grad_norm": 552.0, + "learning_rate": 7.845492057006911e-05, + "loss": 18.5007, + "step": 7871 + }, + { + "epoch": 0.32812304614230337, + "grad_norm": 332.0, + "learning_rate": 7.844936997868626e-05, + "loss": 13.8752, + "step": 7872 + }, + { + "epoch": 0.3281647284398316, + "grad_norm": 258.0, + "learning_rate": 7.844381886881112e-05, + "loss": 11.8128, + "step": 7873 + }, + { + "epoch": 0.32820641073735984, + "grad_norm": 584.0, + "learning_rate": 7.843826724054484e-05, + "loss": 18.0009, + "step": 7874 + }, + { + "epoch": 0.32824809303488806, + "grad_norm": 376.0, + "learning_rate": 7.843271509398862e-05, + "loss": 14.4377, + "step": 7875 + }, + { + "epoch": 0.3282897753324163, + "grad_norm": 390.0, + "learning_rate": 7.842716242924364e-05, + "loss": 14.5002, + "step": 7876 + }, + { + "epoch": 0.3283314576299446, + "grad_norm": 162.0, + "learning_rate": 7.84216092464111e-05, + "loss": 9.5004, + "step": 7877 + }, + { + "epoch": 0.3283731399274728, + "grad_norm": 60.25, + "learning_rate": 7.841605554559222e-05, + "loss": 8.9382, + "step": 7878 + }, + { + "epoch": 0.32841482222500107, + "grad_norm": 62.75, + "learning_rate": 7.84105013268882e-05, + "loss": 8.3127, + "step": 7879 + }, + { + "epoch": 0.3284565045225293, + "grad_norm": 696.0, + "learning_rate": 7.840494659040028e-05, + "loss": 15.6885, + "step": 7880 + }, + { + "epoch": 0.32849818682005755, + "grad_norm": 153.0, + "learning_rate": 7.839939133622966e-05, + "loss": 10.6253, + "step": 7881 + }, + { + "epoch": 0.32853986911758576, + "grad_norm": 458.0, + "learning_rate": 7.839383556447764e-05, + "loss": 16.3754, + "step": 7882 + }, + { + "epoch": 0.328581551415114, + "grad_norm": 708.0, + "learning_rate": 7.838827927524542e-05, + "loss": 21.0014, + "step": 7883 + }, + { + "epoch": 0.32862323371264224, + "grad_norm": 238.0, + "learning_rate": 7.838272246863431e-05, + "loss": 11.4382, + "step": 7884 + }, + { + "epoch": 0.3286649160101705, + "grad_norm": 712.0, + "learning_rate": 7.837716514474556e-05, + "loss": 20.2503, + "step": 7885 + }, + { + "epoch": 0.3287065983076987, + "grad_norm": 508.0, + "learning_rate": 7.837160730368045e-05, + "loss": 13.8755, + "step": 7886 + }, + { + "epoch": 0.328748280605227, + "grad_norm": 644.0, + "learning_rate": 7.836604894554029e-05, + "loss": 16.8799, + "step": 7887 + }, + { + "epoch": 0.3287899629027552, + "grad_norm": 288.0, + "learning_rate": 7.836049007042637e-05, + "loss": 13.3762, + "step": 7888 + }, + { + "epoch": 0.32883164520028346, + "grad_norm": 356.0, + "learning_rate": 7.835493067843998e-05, + "loss": 14.7509, + "step": 7889 + }, + { + "epoch": 0.3288733274978117, + "grad_norm": 255.0, + "learning_rate": 7.834937076968247e-05, + "loss": 13.1254, + "step": 7890 + }, + { + "epoch": 0.32891500979533994, + "grad_norm": 752.0, + "learning_rate": 7.834381034425518e-05, + "loss": 22.2501, + "step": 7891 + }, + { + "epoch": 0.32895669209286815, + "grad_norm": 532.0, + "learning_rate": 7.833824940225942e-05, + "loss": 18.0005, + "step": 7892 + }, + { + "epoch": 0.3289983743903964, + "grad_norm": 171.0, + "learning_rate": 7.833268794379653e-05, + "loss": 10.0627, + "step": 7893 + }, + { + "epoch": 0.32904005668792463, + "grad_norm": 468.0, + "learning_rate": 7.83271259689679e-05, + "loss": 15.4379, + "step": 7894 + }, + { + "epoch": 0.3290817389854529, + "grad_norm": 236.0, + "learning_rate": 7.83215634778749e-05, + "loss": 12.1287, + "step": 7895 + }, + { + "epoch": 0.3291234212829811, + "grad_norm": 368.0, + "learning_rate": 7.831600047061888e-05, + "loss": 14.7503, + "step": 7896 + }, + { + "epoch": 0.3291651035805094, + "grad_norm": 362.0, + "learning_rate": 7.831043694730123e-05, + "loss": 14.3752, + "step": 7897 + }, + { + "epoch": 0.3292067858780376, + "grad_norm": 324.0, + "learning_rate": 7.830487290802336e-05, + "loss": 12.5627, + "step": 7898 + }, + { + "epoch": 0.32924846817556586, + "grad_norm": 382.0, + "learning_rate": 7.829930835288669e-05, + "loss": 14.6263, + "step": 7899 + }, + { + "epoch": 0.32929015047309407, + "grad_norm": 568.0, + "learning_rate": 7.829374328199257e-05, + "loss": 18.1257, + "step": 7900 + }, + { + "epoch": 0.32933183277062233, + "grad_norm": 924.0, + "learning_rate": 7.828817769544249e-05, + "loss": 23.7508, + "step": 7901 + }, + { + "epoch": 0.32937351506815055, + "grad_norm": 532.0, + "learning_rate": 7.828261159333786e-05, + "loss": 18.7512, + "step": 7902 + }, + { + "epoch": 0.3294151973656788, + "grad_norm": 454.0, + "learning_rate": 7.827704497578012e-05, + "loss": 16.7502, + "step": 7903 + }, + { + "epoch": 0.329456879663207, + "grad_norm": 99.0, + "learning_rate": 7.827147784287072e-05, + "loss": 9.0003, + "step": 7904 + }, + { + "epoch": 0.3294985619607353, + "grad_norm": 149.0, + "learning_rate": 7.826591019471114e-05, + "loss": 8.4386, + "step": 7905 + }, + { + "epoch": 0.3295402442582635, + "grad_norm": 241.0, + "learning_rate": 7.826034203140283e-05, + "loss": 11.1254, + "step": 7906 + }, + { + "epoch": 0.32958192655579177, + "grad_norm": 47.25, + "learning_rate": 7.825477335304728e-05, + "loss": 7.0002, + "step": 7907 + }, + { + "epoch": 0.32962360885332, + "grad_norm": 318.0, + "learning_rate": 7.824920415974597e-05, + "loss": 12.4381, + "step": 7908 + }, + { + "epoch": 0.32966529115084825, + "grad_norm": 448.0, + "learning_rate": 7.824363445160042e-05, + "loss": 16.3756, + "step": 7909 + }, + { + "epoch": 0.32970697344837646, + "grad_norm": 1216.0, + "learning_rate": 7.823806422871212e-05, + "loss": 27.2527, + "step": 7910 + }, + { + "epoch": 0.32974865574590473, + "grad_norm": 306.0, + "learning_rate": 7.823249349118258e-05, + "loss": 14.438, + "step": 7911 + }, + { + "epoch": 0.32979033804343294, + "grad_norm": 104.0, + "learning_rate": 7.822692223911336e-05, + "loss": 9.2503, + "step": 7912 + }, + { + "epoch": 0.3298320203409612, + "grad_norm": 344.0, + "learning_rate": 7.822135047260596e-05, + "loss": 13.1252, + "step": 7913 + }, + { + "epoch": 0.3298737026384894, + "grad_norm": 195.0, + "learning_rate": 7.821577819176195e-05, + "loss": 11.0006, + "step": 7914 + }, + { + "epoch": 0.3299153849360177, + "grad_norm": 276.0, + "learning_rate": 7.821020539668287e-05, + "loss": 12.813, + "step": 7915 + }, + { + "epoch": 0.3299570672335459, + "grad_norm": 350.0, + "learning_rate": 7.820463208747031e-05, + "loss": 15.2502, + "step": 7916 + }, + { + "epoch": 0.32999874953107416, + "grad_norm": 474.0, + "learning_rate": 7.819905826422582e-05, + "loss": 16.2503, + "step": 7917 + }, + { + "epoch": 0.3300404318286024, + "grad_norm": 165.0, + "learning_rate": 7.819348392705097e-05, + "loss": 10.4381, + "step": 7918 + }, + { + "epoch": 0.33008211412613064, + "grad_norm": 450.0, + "learning_rate": 7.818790907604738e-05, + "loss": 15.2502, + "step": 7919 + }, + { + "epoch": 0.33012379642365886, + "grad_norm": 460.0, + "learning_rate": 7.818233371131666e-05, + "loss": 15.6253, + "step": 7920 + }, + { + "epoch": 0.3301654787211871, + "grad_norm": 624.0, + "learning_rate": 7.81767578329604e-05, + "loss": 19.1256, + "step": 7921 + }, + { + "epoch": 0.33020716101871533, + "grad_norm": 116.5, + "learning_rate": 7.817118144108023e-05, + "loss": 8.563, + "step": 7922 + }, + { + "epoch": 0.3302488433162436, + "grad_norm": 158.0, + "learning_rate": 7.816560453577777e-05, + "loss": 8.4381, + "step": 7923 + }, + { + "epoch": 0.3302905256137718, + "grad_norm": 716.0, + "learning_rate": 7.816002711715467e-05, + "loss": 17.7552, + "step": 7924 + }, + { + "epoch": 0.3303322079113001, + "grad_norm": 78.0, + "learning_rate": 7.815444918531257e-05, + "loss": 7.4689, + "step": 7925 + }, + { + "epoch": 0.3303738902088283, + "grad_norm": 1328.0, + "learning_rate": 7.814887074035314e-05, + "loss": 31.7516, + "step": 7926 + }, + { + "epoch": 0.33041557250635656, + "grad_norm": 232.0, + "learning_rate": 7.814329178237804e-05, + "loss": 11.9382, + "step": 7927 + }, + { + "epoch": 0.33045725480388477, + "grad_norm": 536.0, + "learning_rate": 7.813771231148895e-05, + "loss": 17.0005, + "step": 7928 + }, + { + "epoch": 0.33049893710141304, + "grad_norm": 171.0, + "learning_rate": 7.813213232778755e-05, + "loss": 10.7508, + "step": 7929 + }, + { + "epoch": 0.33054061939894125, + "grad_norm": 548.0, + "learning_rate": 7.812655183137556e-05, + "loss": 16.1257, + "step": 7930 + }, + { + "epoch": 0.3305823016964695, + "grad_norm": 516.0, + "learning_rate": 7.812097082235465e-05, + "loss": 15.7504, + "step": 7931 + }, + { + "epoch": 0.3306239839939977, + "grad_norm": 246.0, + "learning_rate": 7.811538930082655e-05, + "loss": 12.313, + "step": 7932 + }, + { + "epoch": 0.330665666291526, + "grad_norm": 54.75, + "learning_rate": 7.810980726689299e-05, + "loss": 7.6262, + "step": 7933 + }, + { + "epoch": 0.3307073485890542, + "grad_norm": 330.0, + "learning_rate": 7.810422472065571e-05, + "loss": 12.9377, + "step": 7934 + }, + { + "epoch": 0.3307490308865825, + "grad_norm": 286.0, + "learning_rate": 7.809864166221641e-05, + "loss": 12.1883, + "step": 7935 + }, + { + "epoch": 0.3307907131841107, + "grad_norm": 644.0, + "learning_rate": 7.809305809167688e-05, + "loss": 19.8763, + "step": 7936 + }, + { + "epoch": 0.33083239548163895, + "grad_norm": 346.0, + "learning_rate": 7.808747400913889e-05, + "loss": 14.9378, + "step": 7937 + }, + { + "epoch": 0.33087407777916716, + "grad_norm": 756.0, + "learning_rate": 7.808188941470419e-05, + "loss": 22.6252, + "step": 7938 + }, + { + "epoch": 0.33091576007669543, + "grad_norm": 588.0, + "learning_rate": 7.807630430847454e-05, + "loss": 16.5002, + "step": 7939 + }, + { + "epoch": 0.33095744237422364, + "grad_norm": 600.0, + "learning_rate": 7.807071869055176e-05, + "loss": 17.3753, + "step": 7940 + }, + { + "epoch": 0.3309991246717519, + "grad_norm": 164.0, + "learning_rate": 7.806513256103765e-05, + "loss": 10.6252, + "step": 7941 + }, + { + "epoch": 0.3310408069692801, + "grad_norm": 512.0, + "learning_rate": 7.805954592003401e-05, + "loss": 17.0002, + "step": 7942 + }, + { + "epoch": 0.3310824892668084, + "grad_norm": 264.0, + "learning_rate": 7.805395876764264e-05, + "loss": 13.5627, + "step": 7943 + }, + { + "epoch": 0.3311241715643366, + "grad_norm": 528.0, + "learning_rate": 7.804837110396538e-05, + "loss": 15.8778, + "step": 7944 + }, + { + "epoch": 0.33116585386186487, + "grad_norm": 78.0, + "learning_rate": 7.804278292910407e-05, + "loss": 8.8128, + "step": 7945 + }, + { + "epoch": 0.3312075361593931, + "grad_norm": 150.0, + "learning_rate": 7.803719424316058e-05, + "loss": 10.1254, + "step": 7946 + }, + { + "epoch": 0.33124921845692135, + "grad_norm": 418.0, + "learning_rate": 7.80316050462367e-05, + "loss": 15.5629, + "step": 7947 + }, + { + "epoch": 0.33129090075444956, + "grad_norm": 296.0, + "learning_rate": 7.802601533843434e-05, + "loss": 12.6258, + "step": 7948 + }, + { + "epoch": 0.3313325830519778, + "grad_norm": 390.0, + "learning_rate": 7.802042511985536e-05, + "loss": 15.3128, + "step": 7949 + }, + { + "epoch": 0.3313742653495061, + "grad_norm": 302.0, + "learning_rate": 7.801483439060167e-05, + "loss": 12.0002, + "step": 7950 + }, + { + "epoch": 0.3314159476470343, + "grad_norm": 560.0, + "learning_rate": 7.80092431507751e-05, + "loss": 19.0003, + "step": 7951 + }, + { + "epoch": 0.33145762994456257, + "grad_norm": 314.0, + "learning_rate": 7.80036514004776e-05, + "loss": 12.5002, + "step": 7952 + }, + { + "epoch": 0.3314993122420908, + "grad_norm": 175.0, + "learning_rate": 7.799805913981107e-05, + "loss": 6.9067, + "step": 7953 + }, + { + "epoch": 0.33154099453961905, + "grad_norm": 442.0, + "learning_rate": 7.799246636887743e-05, + "loss": 15.7505, + "step": 7954 + }, + { + "epoch": 0.33158267683714726, + "grad_norm": 456.0, + "learning_rate": 7.798687308777861e-05, + "loss": 15.8788, + "step": 7955 + }, + { + "epoch": 0.33162435913467553, + "grad_norm": 404.0, + "learning_rate": 7.798127929661654e-05, + "loss": 15.8754, + "step": 7956 + }, + { + "epoch": 0.33166604143220374, + "grad_norm": 195.0, + "learning_rate": 7.797568499549316e-05, + "loss": 12.6879, + "step": 7957 + }, + { + "epoch": 0.331707723729732, + "grad_norm": 199.0, + "learning_rate": 7.797009018451044e-05, + "loss": 11.3127, + "step": 7958 + }, + { + "epoch": 0.3317494060272602, + "grad_norm": 720.0, + "learning_rate": 7.796449486377035e-05, + "loss": 21.6282, + "step": 7959 + }, + { + "epoch": 0.3317910883247885, + "grad_norm": 151.0, + "learning_rate": 7.795889903337486e-05, + "loss": 9.7502, + "step": 7960 + }, + { + "epoch": 0.3318327706223167, + "grad_norm": 464.0, + "learning_rate": 7.795330269342595e-05, + "loss": 15.438, + "step": 7961 + }, + { + "epoch": 0.33187445291984496, + "grad_norm": 488.0, + "learning_rate": 7.794770584402562e-05, + "loss": 16.1255, + "step": 7962 + }, + { + "epoch": 0.3319161352173732, + "grad_norm": 300.0, + "learning_rate": 7.794210848527585e-05, + "loss": 11.0628, + "step": 7963 + }, + { + "epoch": 0.33195781751490144, + "grad_norm": 376.0, + "learning_rate": 7.793651061727869e-05, + "loss": 14.1876, + "step": 7964 + }, + { + "epoch": 0.33199949981242965, + "grad_norm": 130.0, + "learning_rate": 7.793091224013615e-05, + "loss": 9.1252, + "step": 7965 + }, + { + "epoch": 0.3320411821099579, + "grad_norm": 852.0, + "learning_rate": 7.792531335395025e-05, + "loss": 24.7506, + "step": 7966 + }, + { + "epoch": 0.33208286440748613, + "grad_norm": 228.0, + "learning_rate": 7.791971395882302e-05, + "loss": 11.8127, + "step": 7967 + }, + { + "epoch": 0.3321245467050144, + "grad_norm": 62.25, + "learning_rate": 7.791411405485656e-05, + "loss": 7.1881, + "step": 7968 + }, + { + "epoch": 0.3321662290025426, + "grad_norm": 366.0, + "learning_rate": 7.790851364215286e-05, + "loss": 14.0628, + "step": 7969 + }, + { + "epoch": 0.3322079113000709, + "grad_norm": 444.0, + "learning_rate": 7.790291272081402e-05, + "loss": 15.6253, + "step": 7970 + }, + { + "epoch": 0.3322495935975991, + "grad_norm": 158.0, + "learning_rate": 7.789731129094214e-05, + "loss": 7.6877, + "step": 7971 + }, + { + "epoch": 0.33229127589512736, + "grad_norm": 336.0, + "learning_rate": 7.789170935263928e-05, + "loss": 14.2506, + "step": 7972 + }, + { + "epoch": 0.33233295819265557, + "grad_norm": 135.0, + "learning_rate": 7.788610690600753e-05, + "loss": 9.3131, + "step": 7973 + }, + { + "epoch": 0.33237464049018384, + "grad_norm": 376.0, + "learning_rate": 7.788050395114902e-05, + "loss": 13.2507, + "step": 7974 + }, + { + "epoch": 0.33241632278771205, + "grad_norm": 169.0, + "learning_rate": 7.787490048816584e-05, + "loss": 8.4377, + "step": 7975 + }, + { + "epoch": 0.3324580050852403, + "grad_norm": 84.5, + "learning_rate": 7.786929651716013e-05, + "loss": 7.4065, + "step": 7976 + }, + { + "epoch": 0.3324996873827685, + "grad_norm": 208.0, + "learning_rate": 7.7863692038234e-05, + "loss": 11.8129, + "step": 7977 + }, + { + "epoch": 0.3325413696802968, + "grad_norm": 74.0, + "learning_rate": 7.785808705148963e-05, + "loss": 6.9385, + "step": 7978 + }, + { + "epoch": 0.332583051977825, + "grad_norm": 436.0, + "learning_rate": 7.785248155702916e-05, + "loss": 15.8128, + "step": 7979 + }, + { + "epoch": 0.3326247342753533, + "grad_norm": 366.0, + "learning_rate": 7.784687555495471e-05, + "loss": 11.0675, + "step": 7980 + }, + { + "epoch": 0.3326664165728815, + "grad_norm": 298.0, + "learning_rate": 7.78412690453685e-05, + "loss": 12.501, + "step": 7981 + }, + { + "epoch": 0.33270809887040975, + "grad_norm": 164.0, + "learning_rate": 7.783566202837269e-05, + "loss": 9.2501, + "step": 7982 + }, + { + "epoch": 0.33274978116793796, + "grad_norm": 314.0, + "learning_rate": 7.783005450406946e-05, + "loss": 13.2506, + "step": 7983 + }, + { + "epoch": 0.33279146346546623, + "grad_norm": 376.0, + "learning_rate": 7.782444647256102e-05, + "loss": 15.2502, + "step": 7984 + }, + { + "epoch": 0.33283314576299444, + "grad_norm": 536.0, + "learning_rate": 7.781883793394957e-05, + "loss": 16.8752, + "step": 7985 + }, + { + "epoch": 0.3328748280605227, + "grad_norm": 888.0, + "learning_rate": 7.781322888833734e-05, + "loss": 22.003, + "step": 7986 + }, + { + "epoch": 0.3329165103580509, + "grad_norm": 804.0, + "learning_rate": 7.780761933582654e-05, + "loss": 21.007, + "step": 7987 + }, + { + "epoch": 0.3329581926555792, + "grad_norm": 1200.0, + "learning_rate": 7.780200927651941e-05, + "loss": 24.2549, + "step": 7988 + }, + { + "epoch": 0.3329998749531074, + "grad_norm": 444.0, + "learning_rate": 7.779639871051819e-05, + "loss": 16.1259, + "step": 7989 + }, + { + "epoch": 0.33304155725063567, + "grad_norm": 223.0, + "learning_rate": 7.779078763792514e-05, + "loss": 11.9378, + "step": 7990 + }, + { + "epoch": 0.3330832395481639, + "grad_norm": 512.0, + "learning_rate": 7.77851760588425e-05, + "loss": 17.3754, + "step": 7991 + }, + { + "epoch": 0.33312492184569215, + "grad_norm": 1312.0, + "learning_rate": 7.777956397337259e-05, + "loss": 28.2503, + "step": 7992 + }, + { + "epoch": 0.33316660414322036, + "grad_norm": 350.0, + "learning_rate": 7.777395138161763e-05, + "loss": 13.9376, + "step": 7993 + }, + { + "epoch": 0.3332082864407486, + "grad_norm": 211.0, + "learning_rate": 7.776833828367995e-05, + "loss": 10.1877, + "step": 7994 + }, + { + "epoch": 0.33324996873827684, + "grad_norm": 233.0, + "learning_rate": 7.776272467966185e-05, + "loss": 12.8752, + "step": 7995 + }, + { + "epoch": 0.3332916510358051, + "grad_norm": 440.0, + "learning_rate": 7.775711056966561e-05, + "loss": 12.8136, + "step": 7996 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 1280.0, + "learning_rate": 7.775149595379359e-05, + "loss": 27.5039, + "step": 7997 + }, + { + "epoch": 0.3333750156308616, + "grad_norm": 276.0, + "learning_rate": 7.774588083214807e-05, + "loss": 12.8752, + "step": 7998 + }, + { + "epoch": 0.3334166979283898, + "grad_norm": 113.0, + "learning_rate": 7.774026520483141e-05, + "loss": 10.1878, + "step": 7999 + }, + { + "epoch": 0.33345838022591806, + "grad_norm": 243.0, + "learning_rate": 7.773464907194598e-05, + "loss": 12.2503, + "step": 8000 + }, + { + "epoch": 0.3335000625234463, + "grad_norm": 125.5, + "learning_rate": 7.772903243359409e-05, + "loss": 10.3129, + "step": 8001 + }, + { + "epoch": 0.33354174482097454, + "grad_norm": 292.0, + "learning_rate": 7.772341528987812e-05, + "loss": 14.2501, + "step": 8002 + }, + { + "epoch": 0.33358342711850275, + "grad_norm": 442.0, + "learning_rate": 7.771779764090046e-05, + "loss": 15.5021, + "step": 8003 + }, + { + "epoch": 0.333625109416031, + "grad_norm": 584.0, + "learning_rate": 7.771217948676346e-05, + "loss": 17.5002, + "step": 8004 + }, + { + "epoch": 0.33366679171355923, + "grad_norm": 196.0, + "learning_rate": 7.770656082756953e-05, + "loss": 11.3129, + "step": 8005 + }, + { + "epoch": 0.3337084740110875, + "grad_norm": 322.0, + "learning_rate": 7.770094166342107e-05, + "loss": 13.438, + "step": 8006 + }, + { + "epoch": 0.3337501563086157, + "grad_norm": 134.0, + "learning_rate": 7.769532199442051e-05, + "loss": 8.3756, + "step": 8007 + }, + { + "epoch": 0.333791838606144, + "grad_norm": 232.0, + "learning_rate": 7.768970182067023e-05, + "loss": 11.9378, + "step": 8008 + }, + { + "epoch": 0.3338335209036722, + "grad_norm": 117.0, + "learning_rate": 7.768408114227268e-05, + "loss": 10.1882, + "step": 8009 + }, + { + "epoch": 0.33387520320120045, + "grad_norm": 242.0, + "learning_rate": 7.767845995933029e-05, + "loss": 10.0004, + "step": 8010 + }, + { + "epoch": 0.33391688549872867, + "grad_norm": 326.0, + "learning_rate": 7.767283827194551e-05, + "loss": 13.4379, + "step": 8011 + }, + { + "epoch": 0.33395856779625693, + "grad_norm": 276.0, + "learning_rate": 7.76672160802208e-05, + "loss": 12.4381, + "step": 8012 + }, + { + "epoch": 0.33400025009378514, + "grad_norm": 274.0, + "learning_rate": 7.766159338425863e-05, + "loss": 10.6256, + "step": 8013 + }, + { + "epoch": 0.3340419323913134, + "grad_norm": 155.0, + "learning_rate": 7.765597018416144e-05, + "loss": 7.5633, + "step": 8014 + }, + { + "epoch": 0.3340836146888416, + "grad_norm": 450.0, + "learning_rate": 7.765034648003175e-05, + "loss": 12.6882, + "step": 8015 + }, + { + "epoch": 0.3341252969863699, + "grad_norm": 298.0, + "learning_rate": 7.764472227197205e-05, + "loss": 13.3126, + "step": 8016 + }, + { + "epoch": 0.3341669792838981, + "grad_norm": 424.0, + "learning_rate": 7.763909756008483e-05, + "loss": 15.6877, + "step": 8017 + }, + { + "epoch": 0.33420866158142637, + "grad_norm": 380.0, + "learning_rate": 7.76334723444726e-05, + "loss": 16.0006, + "step": 8018 + }, + { + "epoch": 0.3342503438789546, + "grad_norm": 346.0, + "learning_rate": 7.762784662523787e-05, + "loss": 13.6883, + "step": 8019 + }, + { + "epoch": 0.33429202617648285, + "grad_norm": 732.0, + "learning_rate": 7.76222204024832e-05, + "loss": 20.6256, + "step": 8020 + }, + { + "epoch": 0.33433370847401106, + "grad_norm": 82.0, + "learning_rate": 7.761659367631111e-05, + "loss": 6.3449, + "step": 8021 + }, + { + "epoch": 0.3343753907715393, + "grad_norm": 544.0, + "learning_rate": 7.761096644682414e-05, + "loss": 17.6252, + "step": 8022 + }, + { + "epoch": 0.3344170730690676, + "grad_norm": 314.0, + "learning_rate": 7.760533871412485e-05, + "loss": 12.2524, + "step": 8023 + }, + { + "epoch": 0.3344587553665958, + "grad_norm": 392.0, + "learning_rate": 7.759971047831583e-05, + "loss": 14.9379, + "step": 8024 + }, + { + "epoch": 0.3345004376641241, + "grad_norm": 464.0, + "learning_rate": 7.759408173949963e-05, + "loss": 15.0006, + "step": 8025 + }, + { + "epoch": 0.3345421199616523, + "grad_norm": 258.0, + "learning_rate": 7.758845249777885e-05, + "loss": 12.3759, + "step": 8026 + }, + { + "epoch": 0.33458380225918055, + "grad_norm": 219.0, + "learning_rate": 7.758282275325606e-05, + "loss": 11.8127, + "step": 8027 + }, + { + "epoch": 0.33462548455670876, + "grad_norm": 286.0, + "learning_rate": 7.757719250603387e-05, + "loss": 13.6879, + "step": 8028 + }, + { + "epoch": 0.33466716685423703, + "grad_norm": 386.0, + "learning_rate": 7.75715617562149e-05, + "loss": 14.7503, + "step": 8029 + }, + { + "epoch": 0.33470884915176524, + "grad_norm": 237.0, + "learning_rate": 7.756593050390179e-05, + "loss": 12.9379, + "step": 8030 + }, + { + "epoch": 0.3347505314492935, + "grad_norm": 516.0, + "learning_rate": 7.756029874919713e-05, + "loss": 17.6256, + "step": 8031 + }, + { + "epoch": 0.3347922137468217, + "grad_norm": 368.0, + "learning_rate": 7.755466649220359e-05, + "loss": 14.2502, + "step": 8032 + }, + { + "epoch": 0.33483389604435, + "grad_norm": 202.0, + "learning_rate": 7.75490337330238e-05, + "loss": 12.6254, + "step": 8033 + }, + { + "epoch": 0.3348755783418782, + "grad_norm": 418.0, + "learning_rate": 7.754340047176043e-05, + "loss": 16.5001, + "step": 8034 + }, + { + "epoch": 0.33491726063940647, + "grad_norm": 380.0, + "learning_rate": 7.753776670851613e-05, + "loss": 14.6878, + "step": 8035 + }, + { + "epoch": 0.3349589429369347, + "grad_norm": 172.0, + "learning_rate": 7.75321324433936e-05, + "loss": 11.2505, + "step": 8036 + }, + { + "epoch": 0.33500062523446295, + "grad_norm": 796.0, + "learning_rate": 7.752649767649552e-05, + "loss": 18.878, + "step": 8037 + }, + { + "epoch": 0.33504230753199116, + "grad_norm": 1464.0, + "learning_rate": 7.752086240792455e-05, + "loss": 42.0007, + "step": 8038 + }, + { + "epoch": 0.3350839898295194, + "grad_norm": 410.0, + "learning_rate": 7.751522663778343e-05, + "loss": 15.4377, + "step": 8039 + }, + { + "epoch": 0.33512567212704764, + "grad_norm": 236.0, + "learning_rate": 7.750959036617487e-05, + "loss": 12.7503, + "step": 8040 + }, + { + "epoch": 0.3351673544245759, + "grad_norm": 215.0, + "learning_rate": 7.750395359320157e-05, + "loss": 12.0035, + "step": 8041 + }, + { + "epoch": 0.3352090367221041, + "grad_norm": 134.0, + "learning_rate": 7.74983163189663e-05, + "loss": 9.6878, + "step": 8042 + }, + { + "epoch": 0.3352507190196324, + "grad_norm": 94.0, + "learning_rate": 7.749267854357175e-05, + "loss": 8.2506, + "step": 8043 + }, + { + "epoch": 0.3352924013171606, + "grad_norm": 330.0, + "learning_rate": 7.74870402671207e-05, + "loss": 14.5004, + "step": 8044 + }, + { + "epoch": 0.33533408361468886, + "grad_norm": 458.0, + "learning_rate": 7.74814014897159e-05, + "loss": 17.0002, + "step": 8045 + }, + { + "epoch": 0.33537576591221707, + "grad_norm": 314.0, + "learning_rate": 7.747576221146011e-05, + "loss": 13.1253, + "step": 8046 + }, + { + "epoch": 0.33541744820974534, + "grad_norm": 318.0, + "learning_rate": 7.747012243245613e-05, + "loss": 13.7503, + "step": 8047 + }, + { + "epoch": 0.33545913050727355, + "grad_norm": 344.0, + "learning_rate": 7.746448215280672e-05, + "loss": 14.1252, + "step": 8048 + }, + { + "epoch": 0.3355008128048018, + "grad_norm": 276.0, + "learning_rate": 7.74588413726147e-05, + "loss": 10.8753, + "step": 8049 + }, + { + "epoch": 0.33554249510233003, + "grad_norm": 400.0, + "learning_rate": 7.745320009198285e-05, + "loss": 13.063, + "step": 8050 + }, + { + "epoch": 0.3355841773998583, + "grad_norm": 190.0, + "learning_rate": 7.744755831101398e-05, + "loss": 8.7508, + "step": 8051 + }, + { + "epoch": 0.3356258596973865, + "grad_norm": 143.0, + "learning_rate": 7.744191602981093e-05, + "loss": 7.6565, + "step": 8052 + }, + { + "epoch": 0.3356675419949148, + "grad_norm": 444.0, + "learning_rate": 7.743627324847653e-05, + "loss": 15.6877, + "step": 8053 + }, + { + "epoch": 0.335709224292443, + "grad_norm": 66.5, + "learning_rate": 7.743062996711361e-05, + "loss": 7.4379, + "step": 8054 + }, + { + "epoch": 0.33575090658997125, + "grad_norm": 428.0, + "learning_rate": 7.742498618582502e-05, + "loss": 14.6906, + "step": 8055 + }, + { + "epoch": 0.33579258888749947, + "grad_norm": 117.5, + "learning_rate": 7.741934190471363e-05, + "loss": 9.3145, + "step": 8056 + }, + { + "epoch": 0.33583427118502773, + "grad_norm": 394.0, + "learning_rate": 7.741369712388229e-05, + "loss": 14.1885, + "step": 8057 + }, + { + "epoch": 0.33587595348255594, + "grad_norm": 171.0, + "learning_rate": 7.74080518434339e-05, + "loss": 10.6253, + "step": 8058 + }, + { + "epoch": 0.3359176357800842, + "grad_norm": 318.0, + "learning_rate": 7.740240606347133e-05, + "loss": 11.0627, + "step": 8059 + }, + { + "epoch": 0.3359593180776124, + "grad_norm": 520.0, + "learning_rate": 7.739675978409746e-05, + "loss": 16.5005, + "step": 8060 + }, + { + "epoch": 0.3360010003751407, + "grad_norm": 173.0, + "learning_rate": 7.739111300541522e-05, + "loss": 9.8127, + "step": 8061 + }, + { + "epoch": 0.3360426826726689, + "grad_norm": 568.0, + "learning_rate": 7.738546572752751e-05, + "loss": 18.8759, + "step": 8062 + }, + { + "epoch": 0.33608436497019717, + "grad_norm": 320.0, + "learning_rate": 7.737981795053728e-05, + "loss": 12.1266, + "step": 8063 + }, + { + "epoch": 0.3361260472677254, + "grad_norm": 292.0, + "learning_rate": 7.73741696745474e-05, + "loss": 13.3127, + "step": 8064 + }, + { + "epoch": 0.33616772956525365, + "grad_norm": 178.0, + "learning_rate": 7.736852089966088e-05, + "loss": 10.5011, + "step": 8065 + }, + { + "epoch": 0.33620941186278186, + "grad_norm": 544.0, + "learning_rate": 7.736287162598062e-05, + "loss": 19.0002, + "step": 8066 + }, + { + "epoch": 0.3362510941603101, + "grad_norm": 290.0, + "learning_rate": 7.73572218536096e-05, + "loss": 12.4384, + "step": 8067 + }, + { + "epoch": 0.33629277645783834, + "grad_norm": 202.0, + "learning_rate": 7.735157158265078e-05, + "loss": 11.2501, + "step": 8068 + }, + { + "epoch": 0.3363344587553666, + "grad_norm": 434.0, + "learning_rate": 7.734592081320715e-05, + "loss": 16.5009, + "step": 8069 + }, + { + "epoch": 0.3363761410528948, + "grad_norm": 568.0, + "learning_rate": 7.734026954538168e-05, + "loss": 19.2503, + "step": 8070 + }, + { + "epoch": 0.3364178233504231, + "grad_norm": 498.0, + "learning_rate": 7.733461777927736e-05, + "loss": 16.5002, + "step": 8071 + }, + { + "epoch": 0.3364595056479513, + "grad_norm": 156.0, + "learning_rate": 7.732896551499722e-05, + "loss": 10.6255, + "step": 8072 + }, + { + "epoch": 0.33650118794547956, + "grad_norm": 720.0, + "learning_rate": 7.732331275264424e-05, + "loss": 18.88, + "step": 8073 + }, + { + "epoch": 0.3365428702430078, + "grad_norm": 150.0, + "learning_rate": 7.731765949232148e-05, + "loss": 8.3129, + "step": 8074 + }, + { + "epoch": 0.33658455254053604, + "grad_norm": 564.0, + "learning_rate": 7.731200573413193e-05, + "loss": 18.0005, + "step": 8075 + }, + { + "epoch": 0.33662623483806425, + "grad_norm": 288.0, + "learning_rate": 7.730635147817867e-05, + "loss": 10.3766, + "step": 8076 + }, + { + "epoch": 0.3366679171355925, + "grad_norm": 168.0, + "learning_rate": 7.730069672456473e-05, + "loss": 10.1258, + "step": 8077 + }, + { + "epoch": 0.33670959943312073, + "grad_norm": 418.0, + "learning_rate": 7.729504147339314e-05, + "loss": 15.1259, + "step": 8078 + }, + { + "epoch": 0.336751281730649, + "grad_norm": 612.0, + "learning_rate": 7.728938572476703e-05, + "loss": 17.5006, + "step": 8079 + }, + { + "epoch": 0.3367929640281772, + "grad_norm": 249.0, + "learning_rate": 7.728372947878943e-05, + "loss": 11.6251, + "step": 8080 + }, + { + "epoch": 0.3368346463257055, + "grad_norm": 272.0, + "learning_rate": 7.727807273556344e-05, + "loss": 12.8752, + "step": 8081 + }, + { + "epoch": 0.3368763286232337, + "grad_norm": 572.0, + "learning_rate": 7.727241549519214e-05, + "loss": 18.5044, + "step": 8082 + }, + { + "epoch": 0.33691801092076196, + "grad_norm": 251.0, + "learning_rate": 7.726675775777865e-05, + "loss": 12.813, + "step": 8083 + }, + { + "epoch": 0.33695969321829017, + "grad_norm": 536.0, + "learning_rate": 7.72610995234261e-05, + "loss": 17.3787, + "step": 8084 + }, + { + "epoch": 0.33700137551581844, + "grad_norm": 318.0, + "learning_rate": 7.725544079223757e-05, + "loss": 13.126, + "step": 8085 + }, + { + "epoch": 0.33704305781334665, + "grad_norm": 256.0, + "learning_rate": 7.724978156431621e-05, + "loss": 12.5003, + "step": 8086 + }, + { + "epoch": 0.3370847401108749, + "grad_norm": 180.0, + "learning_rate": 7.724412183976519e-05, + "loss": 9.8756, + "step": 8087 + }, + { + "epoch": 0.3371264224084031, + "grad_norm": 472.0, + "learning_rate": 7.723846161868759e-05, + "loss": 16.7506, + "step": 8088 + }, + { + "epoch": 0.3371681047059314, + "grad_norm": 572.0, + "learning_rate": 7.723280090118665e-05, + "loss": 18.5003, + "step": 8089 + }, + { + "epoch": 0.3372097870034596, + "grad_norm": 296.0, + "learning_rate": 7.722713968736547e-05, + "loss": 13.5636, + "step": 8090 + }, + { + "epoch": 0.33725146930098787, + "grad_norm": 360.0, + "learning_rate": 7.722147797732726e-05, + "loss": 15.1256, + "step": 8091 + }, + { + "epoch": 0.3372931515985161, + "grad_norm": 61.0, + "learning_rate": 7.72158157711752e-05, + "loss": 7.469, + "step": 8092 + }, + { + "epoch": 0.33733483389604435, + "grad_norm": 292.0, + "learning_rate": 7.721015306901246e-05, + "loss": 13.938, + "step": 8093 + }, + { + "epoch": 0.33737651619357256, + "grad_norm": 314.0, + "learning_rate": 7.72044898709423e-05, + "loss": 13.3751, + "step": 8094 + }, + { + "epoch": 0.33741819849110083, + "grad_norm": 200.0, + "learning_rate": 7.719882617706788e-05, + "loss": 11.5627, + "step": 8095 + }, + { + "epoch": 0.3374598807886291, + "grad_norm": 322.0, + "learning_rate": 7.719316198749243e-05, + "loss": 13.2507, + "step": 8096 + }, + { + "epoch": 0.3375015630861573, + "grad_norm": 163.0, + "learning_rate": 7.71874973023192e-05, + "loss": 9.4382, + "step": 8097 + }, + { + "epoch": 0.3375432453836856, + "grad_norm": 604.0, + "learning_rate": 7.718183212165142e-05, + "loss": 19.7505, + "step": 8098 + }, + { + "epoch": 0.3375849276812138, + "grad_norm": 318.0, + "learning_rate": 7.717616644559233e-05, + "loss": 13.1878, + "step": 8099 + }, + { + "epoch": 0.33762660997874205, + "grad_norm": 524.0, + "learning_rate": 7.717050027424519e-05, + "loss": 17.6253, + "step": 8100 + }, + { + "epoch": 0.33766829227627027, + "grad_norm": 178.0, + "learning_rate": 7.716483360771329e-05, + "loss": 10.7508, + "step": 8101 + }, + { + "epoch": 0.33770997457379853, + "grad_norm": 1328.0, + "learning_rate": 7.715916644609986e-05, + "loss": 30.256, + "step": 8102 + }, + { + "epoch": 0.33775165687132674, + "grad_norm": 286.0, + "learning_rate": 7.715349878950823e-05, + "loss": 12.9377, + "step": 8103 + }, + { + "epoch": 0.337793339168855, + "grad_norm": 560.0, + "learning_rate": 7.714783063804166e-05, + "loss": 17.7508, + "step": 8104 + }, + { + "epoch": 0.3378350214663832, + "grad_norm": 1000.0, + "learning_rate": 7.714216199180349e-05, + "loss": 26.2502, + "step": 8105 + }, + { + "epoch": 0.3378767037639115, + "grad_norm": 468.0, + "learning_rate": 7.713649285089698e-05, + "loss": 16.6266, + "step": 8106 + }, + { + "epoch": 0.3379183860614397, + "grad_norm": 334.0, + "learning_rate": 7.713082321542549e-05, + "loss": 13.3132, + "step": 8107 + }, + { + "epoch": 0.33796006835896797, + "grad_norm": 1752.0, + "learning_rate": 7.712515308549233e-05, + "loss": 33.5033, + "step": 8108 + }, + { + "epoch": 0.3380017506564962, + "grad_norm": 454.0, + "learning_rate": 7.711948246120086e-05, + "loss": 12.314, + "step": 8109 + }, + { + "epoch": 0.33804343295402445, + "grad_norm": 326.0, + "learning_rate": 7.711381134265442e-05, + "loss": 14.2503, + "step": 8110 + }, + { + "epoch": 0.33808511525155266, + "grad_norm": 422.0, + "learning_rate": 7.710813972995635e-05, + "loss": 12.7509, + "step": 8111 + }, + { + "epoch": 0.3381267975490809, + "grad_norm": 392.0, + "learning_rate": 7.710246762321003e-05, + "loss": 15.3752, + "step": 8112 + }, + { + "epoch": 0.33816847984660914, + "grad_norm": 190.0, + "learning_rate": 7.709679502251883e-05, + "loss": 10.5628, + "step": 8113 + }, + { + "epoch": 0.3382101621441374, + "grad_norm": 346.0, + "learning_rate": 7.709112192798614e-05, + "loss": 14.1887, + "step": 8114 + }, + { + "epoch": 0.3382518444416656, + "grad_norm": 322.0, + "learning_rate": 7.708544833971534e-05, + "loss": 13.0627, + "step": 8115 + }, + { + "epoch": 0.3382935267391939, + "grad_norm": 97.5, + "learning_rate": 7.707977425780983e-05, + "loss": 10.1891, + "step": 8116 + }, + { + "epoch": 0.3383352090367221, + "grad_norm": 154.0, + "learning_rate": 7.707409968237306e-05, + "loss": 10.1254, + "step": 8117 + }, + { + "epoch": 0.33837689133425036, + "grad_norm": 378.0, + "learning_rate": 7.70684246135084e-05, + "loss": 15.0002, + "step": 8118 + }, + { + "epoch": 0.3384185736317786, + "grad_norm": 254.0, + "learning_rate": 7.70627490513193e-05, + "loss": 12.5004, + "step": 8119 + }, + { + "epoch": 0.33846025592930684, + "grad_norm": 400.0, + "learning_rate": 7.705707299590921e-05, + "loss": 16.1254, + "step": 8120 + }, + { + "epoch": 0.33850193822683505, + "grad_norm": 420.0, + "learning_rate": 7.705139644738155e-05, + "loss": 14.1251, + "step": 8121 + }, + { + "epoch": 0.3385436205243633, + "grad_norm": 214.0, + "learning_rate": 7.704571940583978e-05, + "loss": 12.0003, + "step": 8122 + }, + { + "epoch": 0.33858530282189153, + "grad_norm": 48.75, + "learning_rate": 7.704004187138739e-05, + "loss": 6.3129, + "step": 8123 + }, + { + "epoch": 0.3386269851194198, + "grad_norm": 82.5, + "learning_rate": 7.703436384412782e-05, + "loss": 8.438, + "step": 8124 + }, + { + "epoch": 0.338668667416948, + "grad_norm": 640.0, + "learning_rate": 7.702868532416459e-05, + "loss": 18.5014, + "step": 8125 + }, + { + "epoch": 0.3387103497144763, + "grad_norm": 53.75, + "learning_rate": 7.702300631160116e-05, + "loss": 8.4383, + "step": 8126 + }, + { + "epoch": 0.3387520320120045, + "grad_norm": 256.0, + "learning_rate": 7.701732680654103e-05, + "loss": 12.688, + "step": 8127 + }, + { + "epoch": 0.33879371430953276, + "grad_norm": 316.0, + "learning_rate": 7.701164680908772e-05, + "loss": 12.6879, + "step": 8128 + }, + { + "epoch": 0.33883539660706097, + "grad_norm": 253.0, + "learning_rate": 7.700596631934477e-05, + "loss": 10.1251, + "step": 8129 + }, + { + "epoch": 0.33887707890458924, + "grad_norm": 588.0, + "learning_rate": 7.700028533741566e-05, + "loss": 18.502, + "step": 8130 + }, + { + "epoch": 0.33891876120211745, + "grad_norm": 568.0, + "learning_rate": 7.699460386340398e-05, + "loss": 20.6253, + "step": 8131 + }, + { + "epoch": 0.3389604434996457, + "grad_norm": 414.0, + "learning_rate": 7.698892189741323e-05, + "loss": 14.5004, + "step": 8132 + }, + { + "epoch": 0.3390021257971739, + "grad_norm": 173.0, + "learning_rate": 7.6983239439547e-05, + "loss": 10.6879, + "step": 8133 + }, + { + "epoch": 0.3390438080947022, + "grad_norm": 112.5, + "learning_rate": 7.69775564899088e-05, + "loss": 6.6878, + "step": 8134 + }, + { + "epoch": 0.3390854903922304, + "grad_norm": 130.0, + "learning_rate": 7.697187304860228e-05, + "loss": 9.1891, + "step": 8135 + }, + { + "epoch": 0.33912717268975867, + "grad_norm": 1000.0, + "learning_rate": 7.696618911573096e-05, + "loss": 23.1307, + "step": 8136 + }, + { + "epoch": 0.3391688549872869, + "grad_norm": 584.0, + "learning_rate": 7.696050469139846e-05, + "loss": 19.5004, + "step": 8137 + }, + { + "epoch": 0.33921053728481515, + "grad_norm": 332.0, + "learning_rate": 7.695481977570836e-05, + "loss": 13.2507, + "step": 8138 + }, + { + "epoch": 0.33925221958234336, + "grad_norm": 1080.0, + "learning_rate": 7.694913436876427e-05, + "loss": 21.3814, + "step": 8139 + }, + { + "epoch": 0.33929390187987163, + "grad_norm": 62.0, + "learning_rate": 7.694344847066982e-05, + "loss": 8.5631, + "step": 8140 + }, + { + "epoch": 0.33933558417739984, + "grad_norm": 312.0, + "learning_rate": 7.693776208152863e-05, + "loss": 10.4403, + "step": 8141 + }, + { + "epoch": 0.3393772664749281, + "grad_norm": 428.0, + "learning_rate": 7.693207520144434e-05, + "loss": 14.3751, + "step": 8142 + }, + { + "epoch": 0.3394189487724563, + "grad_norm": 184.0, + "learning_rate": 7.692638783052058e-05, + "loss": 9.438, + "step": 8143 + }, + { + "epoch": 0.3394606310699846, + "grad_norm": 48.25, + "learning_rate": 7.6920699968861e-05, + "loss": 6.719, + "step": 8144 + }, + { + "epoch": 0.3395023133675128, + "grad_norm": 1128.0, + "learning_rate": 7.69150116165693e-05, + "loss": 27.1293, + "step": 8145 + }, + { + "epoch": 0.33954399566504107, + "grad_norm": 344.0, + "learning_rate": 7.690932277374911e-05, + "loss": 13.3752, + "step": 8146 + }, + { + "epoch": 0.3395856779625693, + "grad_norm": 278.0, + "learning_rate": 7.690363344050413e-05, + "loss": 12.3754, + "step": 8147 + }, + { + "epoch": 0.33962736026009754, + "grad_norm": 748.0, + "learning_rate": 7.689794361693804e-05, + "loss": 24.0002, + "step": 8148 + }, + { + "epoch": 0.33966904255762576, + "grad_norm": 180.0, + "learning_rate": 7.689225330315454e-05, + "loss": 9.3752, + "step": 8149 + }, + { + "epoch": 0.339710724855154, + "grad_norm": 420.0, + "learning_rate": 7.688656249925735e-05, + "loss": 14.8753, + "step": 8150 + }, + { + "epoch": 0.33975240715268223, + "grad_norm": 338.0, + "learning_rate": 7.688087120535015e-05, + "loss": 14.1879, + "step": 8151 + }, + { + "epoch": 0.3397940894502105, + "grad_norm": 544.0, + "learning_rate": 7.687517942153668e-05, + "loss": 18.0001, + "step": 8152 + }, + { + "epoch": 0.3398357717477387, + "grad_norm": 236.0, + "learning_rate": 7.686948714792069e-05, + "loss": 9.8756, + "step": 8153 + }, + { + "epoch": 0.339877454045267, + "grad_norm": 456.0, + "learning_rate": 7.686379438460592e-05, + "loss": 16.0006, + "step": 8154 + }, + { + "epoch": 0.3399191363427952, + "grad_norm": 240.0, + "learning_rate": 7.685810113169609e-05, + "loss": 8.0643, + "step": 8155 + }, + { + "epoch": 0.33996081864032346, + "grad_norm": 364.0, + "learning_rate": 7.6852407389295e-05, + "loss": 15.188, + "step": 8156 + }, + { + "epoch": 0.34000250093785167, + "grad_norm": 446.0, + "learning_rate": 7.68467131575064e-05, + "loss": 17.1258, + "step": 8157 + }, + { + "epoch": 0.34004418323537994, + "grad_norm": 184.0, + "learning_rate": 7.684101843643408e-05, + "loss": 11.0633, + "step": 8158 + }, + { + "epoch": 0.34008586553290815, + "grad_norm": 484.0, + "learning_rate": 7.68353232261818e-05, + "loss": 17.8773, + "step": 8159 + }, + { + "epoch": 0.3401275478304364, + "grad_norm": 61.0, + "learning_rate": 7.682962752685339e-05, + "loss": 7.7191, + "step": 8160 + }, + { + "epoch": 0.34016923012796463, + "grad_norm": 336.0, + "learning_rate": 7.682393133855262e-05, + "loss": 12.938, + "step": 8161 + }, + { + "epoch": 0.3402109124254929, + "grad_norm": 118.5, + "learning_rate": 7.681823466138331e-05, + "loss": 9.188, + "step": 8162 + }, + { + "epoch": 0.3402525947230211, + "grad_norm": 102.5, + "learning_rate": 7.681253749544932e-05, + "loss": 5.5004, + "step": 8163 + }, + { + "epoch": 0.3402942770205494, + "grad_norm": 216.0, + "learning_rate": 7.680683984085444e-05, + "loss": 10.813, + "step": 8164 + }, + { + "epoch": 0.3403359593180776, + "grad_norm": 516.0, + "learning_rate": 7.680114169770252e-05, + "loss": 16.8771, + "step": 8165 + }, + { + "epoch": 0.34037764161560585, + "grad_norm": 356.0, + "learning_rate": 7.679544306609743e-05, + "loss": 14.5008, + "step": 8166 + }, + { + "epoch": 0.34041932391313406, + "grad_norm": 408.0, + "learning_rate": 7.678974394614298e-05, + "loss": 14.6891, + "step": 8167 + }, + { + "epoch": 0.34046100621066233, + "grad_norm": 117.0, + "learning_rate": 7.678404433794308e-05, + "loss": 9.3753, + "step": 8168 + }, + { + "epoch": 0.3405026885081906, + "grad_norm": 213.0, + "learning_rate": 7.677834424160162e-05, + "loss": 11.5003, + "step": 8169 + }, + { + "epoch": 0.3405443708057188, + "grad_norm": 498.0, + "learning_rate": 7.677264365722243e-05, + "loss": 17.2502, + "step": 8170 + }, + { + "epoch": 0.3405860531032471, + "grad_norm": 270.0, + "learning_rate": 7.676694258490945e-05, + "loss": 7.7508, + "step": 8171 + }, + { + "epoch": 0.3406277354007753, + "grad_norm": 240.0, + "learning_rate": 7.676124102476656e-05, + "loss": 11.8754, + "step": 8172 + }, + { + "epoch": 0.34066941769830356, + "grad_norm": 344.0, + "learning_rate": 7.675553897689766e-05, + "loss": 13.8128, + "step": 8173 + }, + { + "epoch": 0.34071109999583177, + "grad_norm": 131.0, + "learning_rate": 7.674983644140672e-05, + "loss": 8.3128, + "step": 8174 + }, + { + "epoch": 0.34075278229336003, + "grad_norm": 187.0, + "learning_rate": 7.674413341839761e-05, + "loss": 9.5628, + "step": 8175 + }, + { + "epoch": 0.34079446459088825, + "grad_norm": 390.0, + "learning_rate": 7.67384299079743e-05, + "loss": 15.7505, + "step": 8176 + }, + { + "epoch": 0.3408361468884165, + "grad_norm": 170.0, + "learning_rate": 7.673272591024074e-05, + "loss": 10.7504, + "step": 8177 + }, + { + "epoch": 0.3408778291859447, + "grad_norm": 184.0, + "learning_rate": 7.672702142530088e-05, + "loss": 10.9376, + "step": 8178 + }, + { + "epoch": 0.340919511483473, + "grad_norm": 532.0, + "learning_rate": 7.672131645325867e-05, + "loss": 18.6255, + "step": 8179 + }, + { + "epoch": 0.3409611937810012, + "grad_norm": 316.0, + "learning_rate": 7.67156109942181e-05, + "loss": 14.563, + "step": 8180 + }, + { + "epoch": 0.34100287607852947, + "grad_norm": 234.0, + "learning_rate": 7.670990504828314e-05, + "loss": 11.5003, + "step": 8181 + }, + { + "epoch": 0.3410445583760577, + "grad_norm": 352.0, + "learning_rate": 7.67041986155578e-05, + "loss": 14.813, + "step": 8182 + }, + { + "epoch": 0.34108624067358595, + "grad_norm": 266.0, + "learning_rate": 7.669849169614607e-05, + "loss": 11.938, + "step": 8183 + }, + { + "epoch": 0.34112792297111416, + "grad_norm": 668.0, + "learning_rate": 7.669278429015195e-05, + "loss": 20.2509, + "step": 8184 + }, + { + "epoch": 0.34116960526864243, + "grad_norm": 600.0, + "learning_rate": 7.668707639767948e-05, + "loss": 19.1281, + "step": 8185 + }, + { + "epoch": 0.34121128756617064, + "grad_norm": 209.0, + "learning_rate": 7.668136801883266e-05, + "loss": 10.8752, + "step": 8186 + }, + { + "epoch": 0.3412529698636989, + "grad_norm": 656.0, + "learning_rate": 7.667565915371556e-05, + "loss": 16.8796, + "step": 8187 + }, + { + "epoch": 0.3412946521612271, + "grad_norm": 768.0, + "learning_rate": 7.666994980243217e-05, + "loss": 23.5002, + "step": 8188 + }, + { + "epoch": 0.3413363344587554, + "grad_norm": 692.0, + "learning_rate": 7.66642399650866e-05, + "loss": 15.5003, + "step": 8189 + }, + { + "epoch": 0.3413780167562836, + "grad_norm": 252.0, + "learning_rate": 7.665852964178289e-05, + "loss": 12.6259, + "step": 8190 + }, + { + "epoch": 0.34141969905381186, + "grad_norm": 250.0, + "learning_rate": 7.66528188326251e-05, + "loss": 12.5003, + "step": 8191 + }, + { + "epoch": 0.3414613813513401, + "grad_norm": 191.0, + "learning_rate": 7.664710753771734e-05, + "loss": 9.8134, + "step": 8192 + }, + { + "epoch": 0.34150306364886834, + "grad_norm": 207.0, + "learning_rate": 7.664139575716365e-05, + "loss": 11.0627, + "step": 8193 + }, + { + "epoch": 0.34154474594639656, + "grad_norm": 616.0, + "learning_rate": 7.663568349106817e-05, + "loss": 15.3786, + "step": 8194 + }, + { + "epoch": 0.3415864282439248, + "grad_norm": 460.0, + "learning_rate": 7.6629970739535e-05, + "loss": 13.8127, + "step": 8195 + }, + { + "epoch": 0.34162811054145303, + "grad_norm": 182.0, + "learning_rate": 7.662425750266824e-05, + "loss": 10.3127, + "step": 8196 + }, + { + "epoch": 0.3416697928389813, + "grad_norm": 199.0, + "learning_rate": 7.661854378057203e-05, + "loss": 9.8133, + "step": 8197 + }, + { + "epoch": 0.3417114751365095, + "grad_norm": 564.0, + "learning_rate": 7.66128295733505e-05, + "loss": 19.8753, + "step": 8198 + }, + { + "epoch": 0.3417531574340378, + "grad_norm": 316.0, + "learning_rate": 7.66071148811078e-05, + "loss": 15.3129, + "step": 8199 + }, + { + "epoch": 0.341794839731566, + "grad_norm": 328.0, + "learning_rate": 7.660139970394803e-05, + "loss": 13.5628, + "step": 8200 + }, + { + "epoch": 0.34183652202909426, + "grad_norm": 214.0, + "learning_rate": 7.659568404197544e-05, + "loss": 11.7512, + "step": 8201 + }, + { + "epoch": 0.34187820432662247, + "grad_norm": 1176.0, + "learning_rate": 7.658996789529411e-05, + "loss": 26.6297, + "step": 8202 + }, + { + "epoch": 0.34191988662415074, + "grad_norm": 192.0, + "learning_rate": 7.658425126400827e-05, + "loss": 8.7503, + "step": 8203 + }, + { + "epoch": 0.34196156892167895, + "grad_norm": 528.0, + "learning_rate": 7.657853414822208e-05, + "loss": 17.5001, + "step": 8204 + }, + { + "epoch": 0.3420032512192072, + "grad_norm": 197.0, + "learning_rate": 7.657281654803977e-05, + "loss": 11.1878, + "step": 8205 + }, + { + "epoch": 0.3420449335167354, + "grad_norm": 552.0, + "learning_rate": 7.656709846356548e-05, + "loss": 17.7512, + "step": 8206 + }, + { + "epoch": 0.3420866158142637, + "grad_norm": 356.0, + "learning_rate": 7.656137989490349e-05, + "loss": 13.6878, + "step": 8207 + }, + { + "epoch": 0.3421282981117919, + "grad_norm": 69.0, + "learning_rate": 7.655566084215797e-05, + "loss": 5.6879, + "step": 8208 + }, + { + "epoch": 0.3421699804093202, + "grad_norm": 446.0, + "learning_rate": 7.65499413054332e-05, + "loss": 14.7508, + "step": 8209 + }, + { + "epoch": 0.3422116627068484, + "grad_norm": 138.0, + "learning_rate": 7.654422128483338e-05, + "loss": 9.9379, + "step": 8210 + }, + { + "epoch": 0.34225334500437665, + "grad_norm": 608.0, + "learning_rate": 7.653850078046278e-05, + "loss": 17.3752, + "step": 8211 + }, + { + "epoch": 0.34229502730190486, + "grad_norm": 524.0, + "learning_rate": 7.653277979242564e-05, + "loss": 17.3757, + "step": 8212 + }, + { + "epoch": 0.34233670959943313, + "grad_norm": 336.0, + "learning_rate": 7.652705832082624e-05, + "loss": 14.0004, + "step": 8213 + }, + { + "epoch": 0.34237839189696134, + "grad_norm": 456.0, + "learning_rate": 7.652133636576884e-05, + "loss": 14.5027, + "step": 8214 + }, + { + "epoch": 0.3424200741944896, + "grad_norm": 292.0, + "learning_rate": 7.651561392735773e-05, + "loss": 12.938, + "step": 8215 + }, + { + "epoch": 0.3424617564920178, + "grad_norm": 282.0, + "learning_rate": 7.650989100569721e-05, + "loss": 8.063, + "step": 8216 + }, + { + "epoch": 0.3425034387895461, + "grad_norm": 920.0, + "learning_rate": 7.650416760089156e-05, + "loss": 27.2502, + "step": 8217 + }, + { + "epoch": 0.3425451210870743, + "grad_norm": 215.0, + "learning_rate": 7.64984437130451e-05, + "loss": 11.1254, + "step": 8218 + }, + { + "epoch": 0.34258680338460257, + "grad_norm": 724.0, + "learning_rate": 7.649271934226216e-05, + "loss": 22.2504, + "step": 8219 + }, + { + "epoch": 0.3426284856821308, + "grad_norm": 211.0, + "learning_rate": 7.648699448864707e-05, + "loss": 11.1877, + "step": 8220 + }, + { + "epoch": 0.34267016797965905, + "grad_norm": 1616.0, + "learning_rate": 7.648126915230414e-05, + "loss": 27.135, + "step": 8221 + }, + { + "epoch": 0.34271185027718726, + "grad_norm": 104.5, + "learning_rate": 7.647554333333773e-05, + "loss": 7.594, + "step": 8222 + }, + { + "epoch": 0.3427535325747155, + "grad_norm": 342.0, + "learning_rate": 7.646981703185221e-05, + "loss": 13.4379, + "step": 8223 + }, + { + "epoch": 0.34279521487224374, + "grad_norm": 244.0, + "learning_rate": 7.64640902479519e-05, + "loss": 12.3757, + "step": 8224 + }, + { + "epoch": 0.342836897169772, + "grad_norm": 300.0, + "learning_rate": 7.64583629817412e-05, + "loss": 11.2505, + "step": 8225 + }, + { + "epoch": 0.3428785794673002, + "grad_norm": 390.0, + "learning_rate": 7.64526352333245e-05, + "loss": 15.1252, + "step": 8226 + }, + { + "epoch": 0.3429202617648285, + "grad_norm": 520.0, + "learning_rate": 7.644690700280617e-05, + "loss": 18.5003, + "step": 8227 + }, + { + "epoch": 0.3429619440623567, + "grad_norm": 636.0, + "learning_rate": 7.644117829029061e-05, + "loss": 19.8752, + "step": 8228 + }, + { + "epoch": 0.34300362635988496, + "grad_norm": 136.0, + "learning_rate": 7.643544909588222e-05, + "loss": 10.6879, + "step": 8229 + }, + { + "epoch": 0.3430453086574132, + "grad_norm": 452.0, + "learning_rate": 7.642971941968543e-05, + "loss": 15.7503, + "step": 8230 + }, + { + "epoch": 0.34308699095494144, + "grad_norm": 402.0, + "learning_rate": 7.642398926180465e-05, + "loss": 15.4394, + "step": 8231 + }, + { + "epoch": 0.34312867325246965, + "grad_norm": 264.0, + "learning_rate": 7.641825862234432e-05, + "loss": 12.3754, + "step": 8232 + }, + { + "epoch": 0.3431703555499979, + "grad_norm": 452.0, + "learning_rate": 7.64125275014089e-05, + "loss": 17.0003, + "step": 8233 + }, + { + "epoch": 0.34321203784752613, + "grad_norm": 116.0, + "learning_rate": 7.64067958991028e-05, + "loss": 10.0628, + "step": 8234 + }, + { + "epoch": 0.3432537201450544, + "grad_norm": 252.0, + "learning_rate": 7.640106381553051e-05, + "loss": 12.3761, + "step": 8235 + }, + { + "epoch": 0.3432954024425826, + "grad_norm": 724.0, + "learning_rate": 7.63953312507965e-05, + "loss": 21.3752, + "step": 8236 + }, + { + "epoch": 0.3433370847401109, + "grad_norm": 324.0, + "learning_rate": 7.638959820500521e-05, + "loss": 12.3751, + "step": 8237 + }, + { + "epoch": 0.3433787670376391, + "grad_norm": 348.0, + "learning_rate": 7.638386467826118e-05, + "loss": 14.6252, + "step": 8238 + }, + { + "epoch": 0.34342044933516735, + "grad_norm": 656.0, + "learning_rate": 7.637813067066886e-05, + "loss": 19.5008, + "step": 8239 + }, + { + "epoch": 0.34346213163269557, + "grad_norm": 422.0, + "learning_rate": 7.637239618233276e-05, + "loss": 15.3129, + "step": 8240 + }, + { + "epoch": 0.34350381393022383, + "grad_norm": 115.5, + "learning_rate": 7.63666612133574e-05, + "loss": 11.3128, + "step": 8241 + }, + { + "epoch": 0.3435454962277521, + "grad_norm": 476.0, + "learning_rate": 7.63609257638473e-05, + "loss": 16.3754, + "step": 8242 + }, + { + "epoch": 0.3435871785252803, + "grad_norm": 238.0, + "learning_rate": 7.635518983390699e-05, + "loss": 12.1889, + "step": 8243 + }, + { + "epoch": 0.3436288608228086, + "grad_norm": 478.0, + "learning_rate": 7.634945342364101e-05, + "loss": 14.6253, + "step": 8244 + }, + { + "epoch": 0.3436705431203368, + "grad_norm": 276.0, + "learning_rate": 7.63437165331539e-05, + "loss": 11.6274, + "step": 8245 + }, + { + "epoch": 0.34371222541786506, + "grad_norm": 572.0, + "learning_rate": 7.63379791625502e-05, + "loss": 17.8795, + "step": 8246 + }, + { + "epoch": 0.34375390771539327, + "grad_norm": 416.0, + "learning_rate": 7.633224131193452e-05, + "loss": 15.1251, + "step": 8247 + }, + { + "epoch": 0.34379559001292154, + "grad_norm": 223.0, + "learning_rate": 7.632650298141139e-05, + "loss": 12.5005, + "step": 8248 + }, + { + "epoch": 0.34383727231044975, + "grad_norm": 221.0, + "learning_rate": 7.63207641710854e-05, + "loss": 11.9378, + "step": 8249 + }, + { + "epoch": 0.343878954607978, + "grad_norm": 600.0, + "learning_rate": 7.631502488106116e-05, + "loss": 19.5007, + "step": 8250 + }, + { + "epoch": 0.3439206369055062, + "grad_norm": 672.0, + "learning_rate": 7.630928511144325e-05, + "loss": 20.6251, + "step": 8251 + }, + { + "epoch": 0.3439623192030345, + "grad_norm": 1632.0, + "learning_rate": 7.630354486233628e-05, + "loss": 31.5046, + "step": 8252 + }, + { + "epoch": 0.3440040015005627, + "grad_norm": 372.0, + "learning_rate": 7.629780413384488e-05, + "loss": 16.0004, + "step": 8253 + }, + { + "epoch": 0.344045683798091, + "grad_norm": 139.0, + "learning_rate": 7.629206292607366e-05, + "loss": 9.8129, + "step": 8254 + }, + { + "epoch": 0.3440873660956192, + "grad_norm": 358.0, + "learning_rate": 7.628632123912725e-05, + "loss": 15.5004, + "step": 8255 + }, + { + "epoch": 0.34412904839314745, + "grad_norm": 454.0, + "learning_rate": 7.62805790731103e-05, + "loss": 15.6885, + "step": 8256 + }, + { + "epoch": 0.34417073069067566, + "grad_norm": 480.0, + "learning_rate": 7.627483642812747e-05, + "loss": 17.0008, + "step": 8257 + }, + { + "epoch": 0.34421241298820393, + "grad_norm": 193.0, + "learning_rate": 7.626909330428342e-05, + "loss": 10.8127, + "step": 8258 + }, + { + "epoch": 0.34425409528573214, + "grad_norm": 152.0, + "learning_rate": 7.626334970168281e-05, + "loss": 9.814, + "step": 8259 + }, + { + "epoch": 0.3442957775832604, + "grad_norm": 808.0, + "learning_rate": 7.625760562043032e-05, + "loss": 22.3759, + "step": 8260 + }, + { + "epoch": 0.3443374598807886, + "grad_norm": 358.0, + "learning_rate": 7.625186106063065e-05, + "loss": 15.3762, + "step": 8261 + }, + { + "epoch": 0.3443791421783169, + "grad_norm": 302.0, + "learning_rate": 7.624611602238846e-05, + "loss": 12.5005, + "step": 8262 + }, + { + "epoch": 0.3444208244758451, + "grad_norm": 136.0, + "learning_rate": 7.624037050580848e-05, + "loss": 9.8759, + "step": 8263 + }, + { + "epoch": 0.34446250677337337, + "grad_norm": 556.0, + "learning_rate": 7.623462451099542e-05, + "loss": 18.1253, + "step": 8264 + }, + { + "epoch": 0.3445041890709016, + "grad_norm": 492.0, + "learning_rate": 7.622887803805401e-05, + "loss": 16.3751, + "step": 8265 + }, + { + "epoch": 0.34454587136842985, + "grad_norm": 552.0, + "learning_rate": 7.622313108708896e-05, + "loss": 17.3755, + "step": 8266 + }, + { + "epoch": 0.34458755366595806, + "grad_norm": 260.0, + "learning_rate": 7.621738365820501e-05, + "loss": 11.501, + "step": 8267 + }, + { + "epoch": 0.3446292359634863, + "grad_norm": 444.0, + "learning_rate": 7.621163575150692e-05, + "loss": 15.9383, + "step": 8268 + }, + { + "epoch": 0.34467091826101454, + "grad_norm": 217.0, + "learning_rate": 7.620588736709944e-05, + "loss": 11.6253, + "step": 8269 + }, + { + "epoch": 0.3447126005585428, + "grad_norm": 1240.0, + "learning_rate": 7.620013850508733e-05, + "loss": 29.1253, + "step": 8270 + }, + { + "epoch": 0.344754282856071, + "grad_norm": 352.0, + "learning_rate": 7.61943891655754e-05, + "loss": 13.0003, + "step": 8271 + }, + { + "epoch": 0.3447959651535993, + "grad_norm": 632.0, + "learning_rate": 7.618863934866838e-05, + "loss": 19.8767, + "step": 8272 + }, + { + "epoch": 0.3448376474511275, + "grad_norm": 532.0, + "learning_rate": 7.618288905447108e-05, + "loss": 17.6253, + "step": 8273 + }, + { + "epoch": 0.34487932974865576, + "grad_norm": 584.0, + "learning_rate": 7.617713828308831e-05, + "loss": 18.2502, + "step": 8274 + }, + { + "epoch": 0.344921012046184, + "grad_norm": 316.0, + "learning_rate": 7.617138703462488e-05, + "loss": 12.9388, + "step": 8275 + }, + { + "epoch": 0.34496269434371224, + "grad_norm": 304.0, + "learning_rate": 7.616563530918558e-05, + "loss": 12.5003, + "step": 8276 + }, + { + "epoch": 0.34500437664124045, + "grad_norm": 332.0, + "learning_rate": 7.615988310687525e-05, + "loss": 13.8128, + "step": 8277 + }, + { + "epoch": 0.3450460589387687, + "grad_norm": 270.0, + "learning_rate": 7.615413042779876e-05, + "loss": 12.0002, + "step": 8278 + }, + { + "epoch": 0.34508774123629693, + "grad_norm": 644.0, + "learning_rate": 7.614837727206089e-05, + "loss": 18.8752, + "step": 8279 + }, + { + "epoch": 0.3451294235338252, + "grad_norm": 374.0, + "learning_rate": 7.614262363976654e-05, + "loss": 14.9379, + "step": 8280 + }, + { + "epoch": 0.3451711058313534, + "grad_norm": 43.75, + "learning_rate": 7.613686953102053e-05, + "loss": 7.1876, + "step": 8281 + }, + { + "epoch": 0.3452127881288817, + "grad_norm": 632.0, + "learning_rate": 7.613111494592777e-05, + "loss": 19.1277, + "step": 8282 + }, + { + "epoch": 0.3452544704264099, + "grad_norm": 143.0, + "learning_rate": 7.612535988459312e-05, + "loss": 9.1256, + "step": 8283 + }, + { + "epoch": 0.34529615272393815, + "grad_norm": 172.0, + "learning_rate": 7.611960434712147e-05, + "loss": 11.3148, + "step": 8284 + }, + { + "epoch": 0.34533783502146637, + "grad_norm": 159.0, + "learning_rate": 7.61138483336177e-05, + "loss": 11.5636, + "step": 8285 + }, + { + "epoch": 0.34537951731899463, + "grad_norm": 374.0, + "learning_rate": 7.610809184418674e-05, + "loss": 15.0001, + "step": 8286 + }, + { + "epoch": 0.34542119961652284, + "grad_norm": 532.0, + "learning_rate": 7.610233487893349e-05, + "loss": 17.2506, + "step": 8287 + }, + { + "epoch": 0.3454628819140511, + "grad_norm": 310.0, + "learning_rate": 7.609657743796285e-05, + "loss": 11.9381, + "step": 8288 + }, + { + "epoch": 0.3455045642115793, + "grad_norm": 488.0, + "learning_rate": 7.609081952137979e-05, + "loss": 14.7506, + "step": 8289 + }, + { + "epoch": 0.3455462465091076, + "grad_norm": 330.0, + "learning_rate": 7.608506112928923e-05, + "loss": 12.8128, + "step": 8290 + }, + { + "epoch": 0.3455879288066358, + "grad_norm": 181.0, + "learning_rate": 7.60793022617961e-05, + "loss": 8.8756, + "step": 8291 + }, + { + "epoch": 0.34562961110416407, + "grad_norm": 414.0, + "learning_rate": 7.607354291900538e-05, + "loss": 15.0627, + "step": 8292 + }, + { + "epoch": 0.3456712934016923, + "grad_norm": 484.0, + "learning_rate": 7.606778310102203e-05, + "loss": 16.3757, + "step": 8293 + }, + { + "epoch": 0.34571297569922055, + "grad_norm": 342.0, + "learning_rate": 7.6062022807951e-05, + "loss": 13.7502, + "step": 8294 + }, + { + "epoch": 0.34575465799674876, + "grad_norm": 430.0, + "learning_rate": 7.60562620398973e-05, + "loss": 15.4377, + "step": 8295 + }, + { + "epoch": 0.345796340294277, + "grad_norm": 358.0, + "learning_rate": 7.605050079696591e-05, + "loss": 15.1253, + "step": 8296 + }, + { + "epoch": 0.34583802259180524, + "grad_norm": 348.0, + "learning_rate": 7.604473907926184e-05, + "loss": 14.0632, + "step": 8297 + }, + { + "epoch": 0.3458797048893335, + "grad_norm": 1224.0, + "learning_rate": 7.603897688689009e-05, + "loss": 27.7505, + "step": 8298 + }, + { + "epoch": 0.3459213871868617, + "grad_norm": 262.0, + "learning_rate": 7.603321421995567e-05, + "loss": 12.6252, + "step": 8299 + }, + { + "epoch": 0.34596306948439, + "grad_norm": 402.0, + "learning_rate": 7.602745107856359e-05, + "loss": 15.0006, + "step": 8300 + }, + { + "epoch": 0.3460047517819182, + "grad_norm": 364.0, + "learning_rate": 7.602168746281893e-05, + "loss": 15.0009, + "step": 8301 + }, + { + "epoch": 0.34604643407944646, + "grad_norm": 260.0, + "learning_rate": 7.601592337282668e-05, + "loss": 13.1253, + "step": 8302 + }, + { + "epoch": 0.3460881163769747, + "grad_norm": 306.0, + "learning_rate": 7.601015880869194e-05, + "loss": 12.4394, + "step": 8303 + }, + { + "epoch": 0.34612979867450294, + "grad_norm": 228.0, + "learning_rate": 7.600439377051974e-05, + "loss": 11.5011, + "step": 8304 + }, + { + "epoch": 0.34617148097203115, + "grad_norm": 241.0, + "learning_rate": 7.599862825841515e-05, + "loss": 12.0629, + "step": 8305 + }, + { + "epoch": 0.3462131632695594, + "grad_norm": 376.0, + "learning_rate": 7.599286227248327e-05, + "loss": 14.751, + "step": 8306 + }, + { + "epoch": 0.34625484556708763, + "grad_norm": 438.0, + "learning_rate": 7.598709581282915e-05, + "loss": 15.813, + "step": 8307 + }, + { + "epoch": 0.3462965278646159, + "grad_norm": 76.5, + "learning_rate": 7.59813288795579e-05, + "loss": 8.6255, + "step": 8308 + }, + { + "epoch": 0.3463382101621441, + "grad_norm": 162.0, + "learning_rate": 7.597556147277462e-05, + "loss": 7.7512, + "step": 8309 + }, + { + "epoch": 0.3463798924596724, + "grad_norm": 740.0, + "learning_rate": 7.596979359258445e-05, + "loss": 21.8755, + "step": 8310 + }, + { + "epoch": 0.3464215747572006, + "grad_norm": 215.0, + "learning_rate": 7.596402523909248e-05, + "loss": 12.1254, + "step": 8311 + }, + { + "epoch": 0.34646325705472886, + "grad_norm": 1256.0, + "learning_rate": 7.595825641240384e-05, + "loss": 29.7504, + "step": 8312 + }, + { + "epoch": 0.3465049393522571, + "grad_norm": 1312.0, + "learning_rate": 7.595248711262365e-05, + "loss": 28.0007, + "step": 8313 + }, + { + "epoch": 0.34654662164978534, + "grad_norm": 174.0, + "learning_rate": 7.594671733985712e-05, + "loss": 11.2504, + "step": 8314 + }, + { + "epoch": 0.3465883039473136, + "grad_norm": 1160.0, + "learning_rate": 7.594094709420934e-05, + "loss": 30.7525, + "step": 8315 + }, + { + "epoch": 0.3466299862448418, + "grad_norm": 348.0, + "learning_rate": 7.593517637578549e-05, + "loss": 14.0628, + "step": 8316 + }, + { + "epoch": 0.3466716685423701, + "grad_norm": 308.0, + "learning_rate": 7.592940518469076e-05, + "loss": 13.6879, + "step": 8317 + }, + { + "epoch": 0.3467133508398983, + "grad_norm": 215.0, + "learning_rate": 7.592363352103031e-05, + "loss": 11.7509, + "step": 8318 + }, + { + "epoch": 0.34675503313742656, + "grad_norm": 1016.0, + "learning_rate": 7.591786138490935e-05, + "loss": 23.6312, + "step": 8319 + }, + { + "epoch": 0.34679671543495477, + "grad_norm": 348.0, + "learning_rate": 7.591208877643305e-05, + "loss": 14.7519, + "step": 8320 + }, + { + "epoch": 0.34683839773248304, + "grad_norm": 354.0, + "learning_rate": 7.590631569570665e-05, + "loss": 13.5628, + "step": 8321 + }, + { + "epoch": 0.34688008003001125, + "grad_norm": 342.0, + "learning_rate": 7.590054214283533e-05, + "loss": 13.9376, + "step": 8322 + }, + { + "epoch": 0.3469217623275395, + "grad_norm": 80.0, + "learning_rate": 7.589476811792434e-05, + "loss": 7.5941, + "step": 8323 + }, + { + "epoch": 0.34696344462506773, + "grad_norm": 58.25, + "learning_rate": 7.58889936210789e-05, + "loss": 7.688, + "step": 8324 + }, + { + "epoch": 0.347005126922596, + "grad_norm": 398.0, + "learning_rate": 7.588321865240427e-05, + "loss": 14.3752, + "step": 8325 + }, + { + "epoch": 0.3470468092201242, + "grad_norm": 197.0, + "learning_rate": 7.587744321200565e-05, + "loss": 11.2502, + "step": 8326 + }, + { + "epoch": 0.3470884915176525, + "grad_norm": 200.0, + "learning_rate": 7.587166729998836e-05, + "loss": 11.9377, + "step": 8327 + }, + { + "epoch": 0.3471301738151807, + "grad_norm": 568.0, + "learning_rate": 7.586589091645761e-05, + "loss": 16.8769, + "step": 8328 + }, + { + "epoch": 0.34717185611270895, + "grad_norm": 358.0, + "learning_rate": 7.586011406151872e-05, + "loss": 14.6254, + "step": 8329 + }, + { + "epoch": 0.34721353841023717, + "grad_norm": 596.0, + "learning_rate": 7.585433673527696e-05, + "loss": 19.8754, + "step": 8330 + }, + { + "epoch": 0.34725522070776543, + "grad_norm": 116.5, + "learning_rate": 7.58485589378376e-05, + "loss": 8.7502, + "step": 8331 + }, + { + "epoch": 0.34729690300529364, + "grad_norm": 552.0, + "learning_rate": 7.584278066930596e-05, + "loss": 17.7504, + "step": 8332 + }, + { + "epoch": 0.3473385853028219, + "grad_norm": 237.0, + "learning_rate": 7.583700192978736e-05, + "loss": 12.189, + "step": 8333 + }, + { + "epoch": 0.3473802676003501, + "grad_norm": 284.0, + "learning_rate": 7.58312227193871e-05, + "loss": 13.0004, + "step": 8334 + }, + { + "epoch": 0.3474219498978784, + "grad_norm": 490.0, + "learning_rate": 7.582544303821052e-05, + "loss": 18.3754, + "step": 8335 + }, + { + "epoch": 0.3474636321954066, + "grad_norm": 177.0, + "learning_rate": 7.581966288636293e-05, + "loss": 10.1877, + "step": 8336 + }, + { + "epoch": 0.34750531449293487, + "grad_norm": 179.0, + "learning_rate": 7.58138822639497e-05, + "loss": 10.6253, + "step": 8337 + }, + { + "epoch": 0.3475469967904631, + "grad_norm": 234.0, + "learning_rate": 7.580810117107619e-05, + "loss": 13.9383, + "step": 8338 + }, + { + "epoch": 0.34758867908799135, + "grad_norm": 406.0, + "learning_rate": 7.580231960784773e-05, + "loss": 15.0006, + "step": 8339 + }, + { + "epoch": 0.34763036138551956, + "grad_norm": 580.0, + "learning_rate": 7.579653757436971e-05, + "loss": 19.5002, + "step": 8340 + }, + { + "epoch": 0.3476720436830478, + "grad_norm": 612.0, + "learning_rate": 7.57907550707475e-05, + "loss": 19.3778, + "step": 8341 + }, + { + "epoch": 0.34771372598057604, + "grad_norm": 460.0, + "learning_rate": 7.578497209708648e-05, + "loss": 15.8752, + "step": 8342 + }, + { + "epoch": 0.3477554082781043, + "grad_norm": 78.0, + "learning_rate": 7.577918865349207e-05, + "loss": 8.813, + "step": 8343 + }, + { + "epoch": 0.3477970905756325, + "grad_norm": 324.0, + "learning_rate": 7.577340474006965e-05, + "loss": 15.0628, + "step": 8344 + }, + { + "epoch": 0.3478387728731608, + "grad_norm": 326.0, + "learning_rate": 7.576762035692464e-05, + "loss": 13.0017, + "step": 8345 + }, + { + "epoch": 0.347880455170689, + "grad_norm": 198.0, + "learning_rate": 7.576183550416247e-05, + "loss": 11.0637, + "step": 8346 + }, + { + "epoch": 0.34792213746821726, + "grad_norm": 270.0, + "learning_rate": 7.575605018188855e-05, + "loss": 10.7501, + "step": 8347 + }, + { + "epoch": 0.3479638197657455, + "grad_norm": 324.0, + "learning_rate": 7.575026439020833e-05, + "loss": 12.5627, + "step": 8348 + }, + { + "epoch": 0.34800550206327374, + "grad_norm": 149.0, + "learning_rate": 7.574447812922728e-05, + "loss": 9.6879, + "step": 8349 + }, + { + "epoch": 0.34804718436080195, + "grad_norm": 312.0, + "learning_rate": 7.57386913990508e-05, + "loss": 14.3751, + "step": 8350 + }, + { + "epoch": 0.3480888666583302, + "grad_norm": 120.0, + "learning_rate": 7.57329041997844e-05, + "loss": 9.2512, + "step": 8351 + }, + { + "epoch": 0.34813054895585843, + "grad_norm": 346.0, + "learning_rate": 7.572711653153353e-05, + "loss": 13.3753, + "step": 8352 + }, + { + "epoch": 0.3481722312533867, + "grad_norm": 384.0, + "learning_rate": 7.572132839440367e-05, + "loss": 14.379, + "step": 8353 + }, + { + "epoch": 0.3482139135509149, + "grad_norm": 174.0, + "learning_rate": 7.571553978850033e-05, + "loss": 10.188, + "step": 8354 + }, + { + "epoch": 0.3482555958484432, + "grad_norm": 220.0, + "learning_rate": 7.5709750713929e-05, + "loss": 13.9378, + "step": 8355 + }, + { + "epoch": 0.3482972781459714, + "grad_norm": 190.0, + "learning_rate": 7.570396117079517e-05, + "loss": 8.563, + "step": 8356 + }, + { + "epoch": 0.34833896044349966, + "grad_norm": 380.0, + "learning_rate": 7.569817115920434e-05, + "loss": 15.1253, + "step": 8357 + }, + { + "epoch": 0.34838064274102787, + "grad_norm": 406.0, + "learning_rate": 7.56923806792621e-05, + "loss": 15.751, + "step": 8358 + }, + { + "epoch": 0.34842232503855614, + "grad_norm": 188.0, + "learning_rate": 7.568658973107393e-05, + "loss": 11.2509, + "step": 8359 + }, + { + "epoch": 0.34846400733608435, + "grad_norm": 422.0, + "learning_rate": 7.568079831474537e-05, + "loss": 16.1254, + "step": 8360 + }, + { + "epoch": 0.3485056896336126, + "grad_norm": 75.5, + "learning_rate": 7.567500643038201e-05, + "loss": 9.4382, + "step": 8361 + }, + { + "epoch": 0.3485473719311408, + "grad_norm": 264.0, + "learning_rate": 7.566921407808936e-05, + "loss": 12.8127, + "step": 8362 + }, + { + "epoch": 0.3485890542286691, + "grad_norm": 488.0, + "learning_rate": 7.566342125797299e-05, + "loss": 15.6879, + "step": 8363 + }, + { + "epoch": 0.3486307365261973, + "grad_norm": 239.0, + "learning_rate": 7.56576279701385e-05, + "loss": 10.0008, + "step": 8364 + }, + { + "epoch": 0.34867241882372557, + "grad_norm": 362.0, + "learning_rate": 7.565183421469148e-05, + "loss": 14.8753, + "step": 8365 + }, + { + "epoch": 0.3487141011212538, + "grad_norm": 91.0, + "learning_rate": 7.564603999173749e-05, + "loss": 7.8757, + "step": 8366 + }, + { + "epoch": 0.34875578341878205, + "grad_norm": 159.0, + "learning_rate": 7.564024530138214e-05, + "loss": 6.5942, + "step": 8367 + }, + { + "epoch": 0.34879746571631026, + "grad_norm": 147.0, + "learning_rate": 7.563445014373105e-05, + "loss": 10.2505, + "step": 8368 + }, + { + "epoch": 0.34883914801383853, + "grad_norm": 164.0, + "learning_rate": 7.562865451888983e-05, + "loss": 10.2505, + "step": 8369 + }, + { + "epoch": 0.34888083031136674, + "grad_norm": 624.0, + "learning_rate": 7.562285842696409e-05, + "loss": 20.8755, + "step": 8370 + }, + { + "epoch": 0.348922512608895, + "grad_norm": 202.0, + "learning_rate": 7.561706186805951e-05, + "loss": 11.6252, + "step": 8371 + }, + { + "epoch": 0.3489641949064232, + "grad_norm": 1016.0, + "learning_rate": 7.561126484228167e-05, + "loss": 24.5041, + "step": 8372 + }, + { + "epoch": 0.3490058772039515, + "grad_norm": 394.0, + "learning_rate": 7.560546734973628e-05, + "loss": 13.3126, + "step": 8373 + }, + { + "epoch": 0.3490475595014797, + "grad_norm": 452.0, + "learning_rate": 7.559966939052897e-05, + "loss": 15.0631, + "step": 8374 + }, + { + "epoch": 0.34908924179900797, + "grad_norm": 204.0, + "learning_rate": 7.559387096476542e-05, + "loss": 10.6255, + "step": 8375 + }, + { + "epoch": 0.3491309240965362, + "grad_norm": 67.0, + "learning_rate": 7.558807207255128e-05, + "loss": 8.3131, + "step": 8376 + }, + { + "epoch": 0.34917260639406444, + "grad_norm": 266.0, + "learning_rate": 7.558227271399228e-05, + "loss": 12.0003, + "step": 8377 + }, + { + "epoch": 0.34921428869159266, + "grad_norm": 225.0, + "learning_rate": 7.557647288919407e-05, + "loss": 14.3753, + "step": 8378 + }, + { + "epoch": 0.3492559709891209, + "grad_norm": 296.0, + "learning_rate": 7.557067259826236e-05, + "loss": 12.5631, + "step": 8379 + }, + { + "epoch": 0.34929765328664913, + "grad_norm": 1152.0, + "learning_rate": 7.55648718413029e-05, + "loss": 26.8788, + "step": 8380 + }, + { + "epoch": 0.3493393355841774, + "grad_norm": 292.0, + "learning_rate": 7.555907061842136e-05, + "loss": 13.9377, + "step": 8381 + }, + { + "epoch": 0.3493810178817056, + "grad_norm": 644.0, + "learning_rate": 7.55532689297235e-05, + "loss": 19.1258, + "step": 8382 + }, + { + "epoch": 0.3494227001792339, + "grad_norm": 268.0, + "learning_rate": 7.554746677531503e-05, + "loss": 13.8129, + "step": 8383 + }, + { + "epoch": 0.3494643824767621, + "grad_norm": 211.0, + "learning_rate": 7.554166415530173e-05, + "loss": 11.0005, + "step": 8384 + }, + { + "epoch": 0.34950606477429036, + "grad_norm": 544.0, + "learning_rate": 7.553586106978932e-05, + "loss": 18.2505, + "step": 8385 + }, + { + "epoch": 0.3495477470718186, + "grad_norm": 136.0, + "learning_rate": 7.553005751888358e-05, + "loss": 10.0002, + "step": 8386 + }, + { + "epoch": 0.34958942936934684, + "grad_norm": 524.0, + "learning_rate": 7.552425350269028e-05, + "loss": 18.0012, + "step": 8387 + }, + { + "epoch": 0.3496311116668751, + "grad_norm": 432.0, + "learning_rate": 7.55184490213152e-05, + "loss": 15.3752, + "step": 8388 + }, + { + "epoch": 0.3496727939644033, + "grad_norm": 684.0, + "learning_rate": 7.551264407486411e-05, + "loss": 21.8765, + "step": 8389 + }, + { + "epoch": 0.3497144762619316, + "grad_norm": 596.0, + "learning_rate": 7.550683866344282e-05, + "loss": 17.3752, + "step": 8390 + }, + { + "epoch": 0.3497561585594598, + "grad_norm": 468.0, + "learning_rate": 7.550103278715713e-05, + "loss": 16.3752, + "step": 8391 + }, + { + "epoch": 0.34979784085698806, + "grad_norm": 224.0, + "learning_rate": 7.549522644611285e-05, + "loss": 9.0007, + "step": 8392 + }, + { + "epoch": 0.3498395231545163, + "grad_norm": 532.0, + "learning_rate": 7.548941964041581e-05, + "loss": 13.1267, + "step": 8393 + }, + { + "epoch": 0.34988120545204454, + "grad_norm": 254.0, + "learning_rate": 7.548361237017183e-05, + "loss": 11.5629, + "step": 8394 + }, + { + "epoch": 0.34992288774957275, + "grad_norm": 62.5, + "learning_rate": 7.547780463548675e-05, + "loss": 7.5314, + "step": 8395 + }, + { + "epoch": 0.349964570047101, + "grad_norm": 135.0, + "learning_rate": 7.547199643646642e-05, + "loss": 10.7517, + "step": 8396 + }, + { + "epoch": 0.35000625234462923, + "grad_norm": 135.0, + "learning_rate": 7.54661877732167e-05, + "loss": 8.8126, + "step": 8397 + }, + { + "epoch": 0.3500479346421575, + "grad_norm": 548.0, + "learning_rate": 7.546037864584344e-05, + "loss": 17.6253, + "step": 8398 + }, + { + "epoch": 0.3500896169396857, + "grad_norm": 234.0, + "learning_rate": 7.545456905445253e-05, + "loss": 12.6882, + "step": 8399 + }, + { + "epoch": 0.350131299237214, + "grad_norm": 152.0, + "learning_rate": 7.544875899914983e-05, + "loss": 10.9401, + "step": 8400 + }, + { + "epoch": 0.3501729815347422, + "grad_norm": 560.0, + "learning_rate": 7.544294848004124e-05, + "loss": 18.3763, + "step": 8401 + }, + { + "epoch": 0.35021466383227046, + "grad_norm": 524.0, + "learning_rate": 7.543713749723265e-05, + "loss": 18.0004, + "step": 8402 + }, + { + "epoch": 0.35025634612979867, + "grad_norm": 1104.0, + "learning_rate": 7.543132605082999e-05, + "loss": 24.132, + "step": 8403 + }, + { + "epoch": 0.35029802842732694, + "grad_norm": 372.0, + "learning_rate": 7.542551414093915e-05, + "loss": 14.9377, + "step": 8404 + }, + { + "epoch": 0.35033971072485515, + "grad_norm": 276.0, + "learning_rate": 7.541970176766604e-05, + "loss": 13.0005, + "step": 8405 + }, + { + "epoch": 0.3503813930223834, + "grad_norm": 145.0, + "learning_rate": 7.541388893111661e-05, + "loss": 10.2505, + "step": 8406 + }, + { + "epoch": 0.3504230753199116, + "grad_norm": 151.0, + "learning_rate": 7.54080756313968e-05, + "loss": 10.3757, + "step": 8407 + }, + { + "epoch": 0.3504647576174399, + "grad_norm": 110.0, + "learning_rate": 7.540226186861256e-05, + "loss": 5.0629, + "step": 8408 + }, + { + "epoch": 0.3505064399149681, + "grad_norm": 213.0, + "learning_rate": 7.539644764286986e-05, + "loss": 11.0631, + "step": 8409 + }, + { + "epoch": 0.35054812221249637, + "grad_norm": 1136.0, + "learning_rate": 7.539063295427463e-05, + "loss": 29.5002, + "step": 8410 + }, + { + "epoch": 0.3505898045100246, + "grad_norm": 426.0, + "learning_rate": 7.538481780293286e-05, + "loss": 15.5004, + "step": 8411 + }, + { + "epoch": 0.35063148680755285, + "grad_norm": 414.0, + "learning_rate": 7.537900218895053e-05, + "loss": 14.3753, + "step": 8412 + }, + { + "epoch": 0.35067316910508106, + "grad_norm": 588.0, + "learning_rate": 7.537318611243365e-05, + "loss": 15.0631, + "step": 8413 + }, + { + "epoch": 0.35071485140260933, + "grad_norm": 884.0, + "learning_rate": 7.536736957348817e-05, + "loss": 22.1251, + "step": 8414 + }, + { + "epoch": 0.35075653370013754, + "grad_norm": 332.0, + "learning_rate": 7.536155257222016e-05, + "loss": 15.5005, + "step": 8415 + }, + { + "epoch": 0.3507982159976658, + "grad_norm": 197.0, + "learning_rate": 7.535573510873559e-05, + "loss": 11.0003, + "step": 8416 + }, + { + "epoch": 0.350839898295194, + "grad_norm": 237.0, + "learning_rate": 7.53499171831405e-05, + "loss": 12.8127, + "step": 8417 + }, + { + "epoch": 0.3508815805927223, + "grad_norm": 434.0, + "learning_rate": 7.534409879554091e-05, + "loss": 16.5002, + "step": 8418 + }, + { + "epoch": 0.3509232628902505, + "grad_norm": 160.0, + "learning_rate": 7.533827994604287e-05, + "loss": 11.0627, + "step": 8419 + }, + { + "epoch": 0.35096494518777877, + "grad_norm": 716.0, + "learning_rate": 7.533246063475243e-05, + "loss": 20.3762, + "step": 8420 + }, + { + "epoch": 0.351006627485307, + "grad_norm": 466.0, + "learning_rate": 7.532664086177564e-05, + "loss": 16.5002, + "step": 8421 + }, + { + "epoch": 0.35104830978283524, + "grad_norm": 304.0, + "learning_rate": 7.53208206272186e-05, + "loss": 13.7506, + "step": 8422 + }, + { + "epoch": 0.35108999208036346, + "grad_norm": 536.0, + "learning_rate": 7.531499993118732e-05, + "loss": 17.2504, + "step": 8423 + }, + { + "epoch": 0.3511316743778917, + "grad_norm": 221.0, + "learning_rate": 7.530917877378794e-05, + "loss": 10.8751, + "step": 8424 + }, + { + "epoch": 0.35117335667541993, + "grad_norm": 984.0, + "learning_rate": 7.530335715512653e-05, + "loss": 22.8794, + "step": 8425 + }, + { + "epoch": 0.3512150389729482, + "grad_norm": 179.0, + "learning_rate": 7.529753507530918e-05, + "loss": 10.2503, + "step": 8426 + }, + { + "epoch": 0.3512567212704764, + "grad_norm": 516.0, + "learning_rate": 7.529171253444202e-05, + "loss": 18.0026, + "step": 8427 + }, + { + "epoch": 0.3512984035680047, + "grad_norm": 370.0, + "learning_rate": 7.528588953263114e-05, + "loss": 15.0626, + "step": 8428 + }, + { + "epoch": 0.3513400858655329, + "grad_norm": 384.0, + "learning_rate": 7.528006606998268e-05, + "loss": 14.7503, + "step": 8429 + }, + { + "epoch": 0.35138176816306116, + "grad_norm": 412.0, + "learning_rate": 7.527424214660279e-05, + "loss": 14.5003, + "step": 8430 + }, + { + "epoch": 0.35142345046058937, + "grad_norm": 176.0, + "learning_rate": 7.526841776259757e-05, + "loss": 10.563, + "step": 8431 + }, + { + "epoch": 0.35146513275811764, + "grad_norm": 844.0, + "learning_rate": 7.52625929180732e-05, + "loss": 19.2545, + "step": 8432 + }, + { + "epoch": 0.35150681505564585, + "grad_norm": 664.0, + "learning_rate": 7.525676761313584e-05, + "loss": 18.6255, + "step": 8433 + }, + { + "epoch": 0.3515484973531741, + "grad_norm": 1136.0, + "learning_rate": 7.525094184789163e-05, + "loss": 28.6264, + "step": 8434 + }, + { + "epoch": 0.35159017965070233, + "grad_norm": 173.0, + "learning_rate": 7.524511562244679e-05, + "loss": 10.8132, + "step": 8435 + }, + { + "epoch": 0.3516318619482306, + "grad_norm": 390.0, + "learning_rate": 7.523928893690746e-05, + "loss": 14.1253, + "step": 8436 + }, + { + "epoch": 0.3516735442457588, + "grad_norm": 136.0, + "learning_rate": 7.523346179137985e-05, + "loss": 9.688, + "step": 8437 + }, + { + "epoch": 0.3517152265432871, + "grad_norm": 144.0, + "learning_rate": 7.522763418597017e-05, + "loss": 10.1881, + "step": 8438 + }, + { + "epoch": 0.3517569088408153, + "grad_norm": 126.0, + "learning_rate": 7.52218061207846e-05, + "loss": 9.8754, + "step": 8439 + }, + { + "epoch": 0.35179859113834355, + "grad_norm": 360.0, + "learning_rate": 7.521597759592938e-05, + "loss": 15.5627, + "step": 8440 + }, + { + "epoch": 0.35184027343587176, + "grad_norm": 456.0, + "learning_rate": 7.521014861151073e-05, + "loss": 14.8131, + "step": 8441 + }, + { + "epoch": 0.35188195573340003, + "grad_norm": 410.0, + "learning_rate": 7.520431916763489e-05, + "loss": 15.0634, + "step": 8442 + }, + { + "epoch": 0.35192363803092824, + "grad_norm": 173.0, + "learning_rate": 7.519848926440809e-05, + "loss": 8.6254, + "step": 8443 + }, + { + "epoch": 0.3519653203284565, + "grad_norm": 274.0, + "learning_rate": 7.519265890193659e-05, + "loss": 12.8753, + "step": 8444 + }, + { + "epoch": 0.3520070026259847, + "grad_norm": 177.0, + "learning_rate": 7.518682808032663e-05, + "loss": 10.0014, + "step": 8445 + }, + { + "epoch": 0.352048684923513, + "grad_norm": 120.0, + "learning_rate": 7.518099679968451e-05, + "loss": 7.2506, + "step": 8446 + }, + { + "epoch": 0.3520903672210412, + "grad_norm": 237.0, + "learning_rate": 7.517516506011648e-05, + "loss": 11.5631, + "step": 8447 + }, + { + "epoch": 0.35213204951856947, + "grad_norm": 364.0, + "learning_rate": 7.516933286172883e-05, + "loss": 14.0627, + "step": 8448 + }, + { + "epoch": 0.3521737318160977, + "grad_norm": 270.0, + "learning_rate": 7.516350020462785e-05, + "loss": 11.0003, + "step": 8449 + }, + { + "epoch": 0.35221541411362595, + "grad_norm": 496.0, + "learning_rate": 7.515766708891987e-05, + "loss": 15.8757, + "step": 8450 + }, + { + "epoch": 0.35225709641115416, + "grad_norm": 264.0, + "learning_rate": 7.515183351471116e-05, + "loss": 11.7503, + "step": 8451 + }, + { + "epoch": 0.3522987787086824, + "grad_norm": 193.0, + "learning_rate": 7.514599948210805e-05, + "loss": 9.001, + "step": 8452 + }, + { + "epoch": 0.35234046100621064, + "grad_norm": 227.0, + "learning_rate": 7.514016499121687e-05, + "loss": 11.0628, + "step": 8453 + }, + { + "epoch": 0.3523821433037389, + "grad_norm": 207.0, + "learning_rate": 7.513433004214394e-05, + "loss": 11.2501, + "step": 8454 + }, + { + "epoch": 0.3524238256012671, + "grad_norm": 322.0, + "learning_rate": 7.512849463499563e-05, + "loss": 14.6252, + "step": 8455 + }, + { + "epoch": 0.3524655078987954, + "grad_norm": 260.0, + "learning_rate": 7.512265876987826e-05, + "loss": 11.8752, + "step": 8456 + }, + { + "epoch": 0.3525071901963236, + "grad_norm": 1112.0, + "learning_rate": 7.511682244689821e-05, + "loss": 25.5113, + "step": 8457 + }, + { + "epoch": 0.35254887249385186, + "grad_norm": 1020.0, + "learning_rate": 7.511098566616184e-05, + "loss": 25.7519, + "step": 8458 + }, + { + "epoch": 0.35259055479138013, + "grad_norm": 624.0, + "learning_rate": 7.510514842777552e-05, + "loss": 20.8764, + "step": 8459 + }, + { + "epoch": 0.35263223708890834, + "grad_norm": 402.0, + "learning_rate": 7.509931073184566e-05, + "loss": 14.6253, + "step": 8460 + }, + { + "epoch": 0.3526739193864366, + "grad_norm": 316.0, + "learning_rate": 7.509347257847861e-05, + "loss": 13.563, + "step": 8461 + }, + { + "epoch": 0.3527156016839648, + "grad_norm": 340.0, + "learning_rate": 7.508763396778081e-05, + "loss": 14.3753, + "step": 8462 + }, + { + "epoch": 0.3527572839814931, + "grad_norm": 255.0, + "learning_rate": 7.508179489985865e-05, + "loss": 12.6879, + "step": 8463 + }, + { + "epoch": 0.3527989662790213, + "grad_norm": 608.0, + "learning_rate": 7.507595537481856e-05, + "loss": 18.751, + "step": 8464 + }, + { + "epoch": 0.35284064857654956, + "grad_norm": 688.0, + "learning_rate": 7.507011539276695e-05, + "loss": 19.7525, + "step": 8465 + }, + { + "epoch": 0.3528823308740778, + "grad_norm": 218.0, + "learning_rate": 7.506427495381026e-05, + "loss": 11.5627, + "step": 8466 + }, + { + "epoch": 0.35292401317160604, + "grad_norm": 278.0, + "learning_rate": 7.505843405805493e-05, + "loss": 11.2502, + "step": 8467 + }, + { + "epoch": 0.35296569546913426, + "grad_norm": 416.0, + "learning_rate": 7.505259270560743e-05, + "loss": 14.3128, + "step": 8468 + }, + { + "epoch": 0.3530073777666625, + "grad_norm": 78.5, + "learning_rate": 7.50467508965742e-05, + "loss": 8.5633, + "step": 8469 + }, + { + "epoch": 0.35304906006419073, + "grad_norm": 664.0, + "learning_rate": 7.50409086310617e-05, + "loss": 18.6265, + "step": 8470 + }, + { + "epoch": 0.353090742361719, + "grad_norm": 148.0, + "learning_rate": 7.503506590917642e-05, + "loss": 8.5009, + "step": 8471 + }, + { + "epoch": 0.3531324246592472, + "grad_norm": 912.0, + "learning_rate": 7.502922273102484e-05, + "loss": 21.5054, + "step": 8472 + }, + { + "epoch": 0.3531741069567755, + "grad_norm": 157.0, + "learning_rate": 7.502337909671347e-05, + "loss": 9.1878, + "step": 8473 + }, + { + "epoch": 0.3532157892543037, + "grad_norm": 330.0, + "learning_rate": 7.501753500634877e-05, + "loss": 13.2503, + "step": 8474 + }, + { + "epoch": 0.35325747155183196, + "grad_norm": 358.0, + "learning_rate": 7.50116904600373e-05, + "loss": 15.0002, + "step": 8475 + }, + { + "epoch": 0.35329915384936017, + "grad_norm": 195.0, + "learning_rate": 7.500584545788552e-05, + "loss": 10.8753, + "step": 8476 + }, + { + "epoch": 0.35334083614688844, + "grad_norm": 604.0, + "learning_rate": 7.500000000000001e-05, + "loss": 19.2507, + "step": 8477 + }, + { + "epoch": 0.35338251844441665, + "grad_norm": 1256.0, + "learning_rate": 7.499415408648727e-05, + "loss": 26.8756, + "step": 8478 + }, + { + "epoch": 0.3534242007419449, + "grad_norm": 548.0, + "learning_rate": 7.498830771745386e-05, + "loss": 18.3753, + "step": 8479 + }, + { + "epoch": 0.3534658830394731, + "grad_norm": 247.0, + "learning_rate": 7.498246089300632e-05, + "loss": 11.8128, + "step": 8480 + }, + { + "epoch": 0.3535075653370014, + "grad_norm": 380.0, + "learning_rate": 7.497661361325122e-05, + "loss": 14.8753, + "step": 8481 + }, + { + "epoch": 0.3535492476345296, + "grad_norm": 492.0, + "learning_rate": 7.497076587829512e-05, + "loss": 19.0003, + "step": 8482 + }, + { + "epoch": 0.3535909299320579, + "grad_norm": 136.0, + "learning_rate": 7.496491768824458e-05, + "loss": 9.8128, + "step": 8483 + }, + { + "epoch": 0.3536326122295861, + "grad_norm": 450.0, + "learning_rate": 7.49590690432062e-05, + "loss": 15.3128, + "step": 8484 + }, + { + "epoch": 0.35367429452711435, + "grad_norm": 182.0, + "learning_rate": 7.495321994328658e-05, + "loss": 11.3753, + "step": 8485 + }, + { + "epoch": 0.35371597682464256, + "grad_norm": 1160.0, + "learning_rate": 7.494737038859231e-05, + "loss": 31.8761, + "step": 8486 + }, + { + "epoch": 0.35375765912217083, + "grad_norm": 1296.0, + "learning_rate": 7.494152037923e-05, + "loss": 26.7548, + "step": 8487 + }, + { + "epoch": 0.35379934141969904, + "grad_norm": 152.0, + "learning_rate": 7.493566991530627e-05, + "loss": 9.6254, + "step": 8488 + }, + { + "epoch": 0.3538410237172273, + "grad_norm": 604.0, + "learning_rate": 7.492981899692773e-05, + "loss": 19.5004, + "step": 8489 + }, + { + "epoch": 0.3538827060147555, + "grad_norm": 362.0, + "learning_rate": 7.492396762420103e-05, + "loss": 15.0004, + "step": 8490 + }, + { + "epoch": 0.3539243883122838, + "grad_norm": 568.0, + "learning_rate": 7.491811579723282e-05, + "loss": 18.8753, + "step": 8491 + }, + { + "epoch": 0.353966070609812, + "grad_norm": 74.0, + "learning_rate": 7.491226351612974e-05, + "loss": 7.2504, + "step": 8492 + }, + { + "epoch": 0.35400775290734027, + "grad_norm": 604.0, + "learning_rate": 7.490641078099843e-05, + "loss": 19.1256, + "step": 8493 + }, + { + "epoch": 0.3540494352048685, + "grad_norm": 227.0, + "learning_rate": 7.490055759194559e-05, + "loss": 12.6879, + "step": 8494 + }, + { + "epoch": 0.35409111750239675, + "grad_norm": 724.0, + "learning_rate": 7.489470394907785e-05, + "loss": 17.7542, + "step": 8495 + }, + { + "epoch": 0.35413279979992496, + "grad_norm": 392.0, + "learning_rate": 7.488884985250194e-05, + "loss": 14.8129, + "step": 8496 + }, + { + "epoch": 0.3541744820974532, + "grad_norm": 344.0, + "learning_rate": 7.488299530232452e-05, + "loss": 12.4379, + "step": 8497 + }, + { + "epoch": 0.35421616439498144, + "grad_norm": 243.0, + "learning_rate": 7.487714029865232e-05, + "loss": 10.9376, + "step": 8498 + }, + { + "epoch": 0.3542578466925097, + "grad_norm": 268.0, + "learning_rate": 7.4871284841592e-05, + "loss": 12.8137, + "step": 8499 + }, + { + "epoch": 0.3542995289900379, + "grad_norm": 428.0, + "learning_rate": 7.486542893125034e-05, + "loss": 16.7503, + "step": 8500 + }, + { + "epoch": 0.3543412112875662, + "grad_norm": 432.0, + "learning_rate": 7.485957256773401e-05, + "loss": 16.3752, + "step": 8501 + }, + { + "epoch": 0.3543828935850944, + "grad_norm": 245.0, + "learning_rate": 7.485371575114977e-05, + "loss": 11.8131, + "step": 8502 + }, + { + "epoch": 0.35442457588262266, + "grad_norm": 248.0, + "learning_rate": 7.484785848160436e-05, + "loss": 12.8752, + "step": 8503 + }, + { + "epoch": 0.3544662581801509, + "grad_norm": 1640.0, + "learning_rate": 7.484200075920451e-05, + "loss": 32.7518, + "step": 8504 + }, + { + "epoch": 0.35450794047767914, + "grad_norm": 544.0, + "learning_rate": 7.4836142584057e-05, + "loss": 16.5028, + "step": 8505 + }, + { + "epoch": 0.35454962277520735, + "grad_norm": 63.75, + "learning_rate": 7.483028395626859e-05, + "loss": 6.6253, + "step": 8506 + }, + { + "epoch": 0.3545913050727356, + "grad_norm": 47.25, + "learning_rate": 7.482442487594604e-05, + "loss": 7.6255, + "step": 8507 + }, + { + "epoch": 0.35463298737026383, + "grad_norm": 628.0, + "learning_rate": 7.481856534319614e-05, + "loss": 20.0002, + "step": 8508 + }, + { + "epoch": 0.3546746696677921, + "grad_norm": 576.0, + "learning_rate": 7.48127053581257e-05, + "loss": 17.5022, + "step": 8509 + }, + { + "epoch": 0.3547163519653203, + "grad_norm": 308.0, + "learning_rate": 7.480684492084148e-05, + "loss": 13.0002, + "step": 8510 + }, + { + "epoch": 0.3547580342628486, + "grad_norm": 328.0, + "learning_rate": 7.480098403145033e-05, + "loss": 13.6271, + "step": 8511 + }, + { + "epoch": 0.3547997165603768, + "grad_norm": 270.0, + "learning_rate": 7.479512269005904e-05, + "loss": 13.3757, + "step": 8512 + }, + { + "epoch": 0.35484139885790505, + "grad_norm": 179.0, + "learning_rate": 7.478926089677443e-05, + "loss": 10.0627, + "step": 8513 + }, + { + "epoch": 0.35488308115543327, + "grad_norm": 764.0, + "learning_rate": 7.478339865170334e-05, + "loss": 23.0011, + "step": 8514 + }, + { + "epoch": 0.35492476345296153, + "grad_norm": 466.0, + "learning_rate": 7.477753595495261e-05, + "loss": 15.8128, + "step": 8515 + }, + { + "epoch": 0.35496644575048975, + "grad_norm": 274.0, + "learning_rate": 7.477167280662909e-05, + "loss": 12.2501, + "step": 8516 + }, + { + "epoch": 0.355008128048018, + "grad_norm": 382.0, + "learning_rate": 7.476580920683964e-05, + "loss": 12.6276, + "step": 8517 + }, + { + "epoch": 0.3550498103455462, + "grad_norm": 452.0, + "learning_rate": 7.475994515569112e-05, + "loss": 17.8755, + "step": 8518 + }, + { + "epoch": 0.3550914926430745, + "grad_norm": 150.0, + "learning_rate": 7.47540806532904e-05, + "loss": 9.0002, + "step": 8519 + }, + { + "epoch": 0.3551331749406027, + "grad_norm": 568.0, + "learning_rate": 7.474821569974434e-05, + "loss": 18.0001, + "step": 8520 + }, + { + "epoch": 0.35517485723813097, + "grad_norm": 944.0, + "learning_rate": 7.474235029515987e-05, + "loss": 26.2511, + "step": 8521 + }, + { + "epoch": 0.3552165395356592, + "grad_norm": 1080.0, + "learning_rate": 7.473648443964387e-05, + "loss": 26.7511, + "step": 8522 + }, + { + "epoch": 0.35525822183318745, + "grad_norm": 492.0, + "learning_rate": 7.473061813330325e-05, + "loss": 12.8145, + "step": 8523 + }, + { + "epoch": 0.35529990413071566, + "grad_norm": 732.0, + "learning_rate": 7.472475137624491e-05, + "loss": 21.5001, + "step": 8524 + }, + { + "epoch": 0.3553415864282439, + "grad_norm": 69.5, + "learning_rate": 7.471888416857578e-05, + "loss": 8.1877, + "step": 8525 + }, + { + "epoch": 0.35538326872577214, + "grad_norm": 474.0, + "learning_rate": 7.47130165104028e-05, + "loss": 17.1252, + "step": 8526 + }, + { + "epoch": 0.3554249510233004, + "grad_norm": 164.0, + "learning_rate": 7.470714840183291e-05, + "loss": 10.3136, + "step": 8527 + }, + { + "epoch": 0.3554666333208286, + "grad_norm": 115.5, + "learning_rate": 7.470127984297303e-05, + "loss": 9.252, + "step": 8528 + }, + { + "epoch": 0.3555083156183569, + "grad_norm": 288.0, + "learning_rate": 7.469541083393012e-05, + "loss": 13.8127, + "step": 8529 + }, + { + "epoch": 0.3555499979158851, + "grad_norm": 300.0, + "learning_rate": 7.468954137481118e-05, + "loss": 13.188, + "step": 8530 + }, + { + "epoch": 0.35559168021341336, + "grad_norm": 712.0, + "learning_rate": 7.468367146572315e-05, + "loss": 20.0003, + "step": 8531 + }, + { + "epoch": 0.35563336251094163, + "grad_norm": 352.0, + "learning_rate": 7.467780110677302e-05, + "loss": 12.5008, + "step": 8532 + }, + { + "epoch": 0.35567504480846984, + "grad_norm": 284.0, + "learning_rate": 7.467193029806775e-05, + "loss": 12.5005, + "step": 8533 + }, + { + "epoch": 0.3557167271059981, + "grad_norm": 106.5, + "learning_rate": 7.466605903971438e-05, + "loss": 10.813, + "step": 8534 + }, + { + "epoch": 0.3557584094035263, + "grad_norm": 184.0, + "learning_rate": 7.46601873318199e-05, + "loss": 11.5002, + "step": 8535 + }, + { + "epoch": 0.3558000917010546, + "grad_norm": 214.0, + "learning_rate": 7.46543151744913e-05, + "loss": 10.4377, + "step": 8536 + }, + { + "epoch": 0.3558417739985828, + "grad_norm": 600.0, + "learning_rate": 7.464844256783563e-05, + "loss": 19.5009, + "step": 8537 + }, + { + "epoch": 0.35588345629611107, + "grad_norm": 255.0, + "learning_rate": 7.46425695119599e-05, + "loss": 12.2501, + "step": 8538 + }, + { + "epoch": 0.3559251385936393, + "grad_norm": 428.0, + "learning_rate": 7.463669600697117e-05, + "loss": 15.5627, + "step": 8539 + }, + { + "epoch": 0.35596682089116755, + "grad_norm": 744.0, + "learning_rate": 7.463082205297644e-05, + "loss": 22.2503, + "step": 8540 + }, + { + "epoch": 0.35600850318869576, + "grad_norm": 524.0, + "learning_rate": 7.46249476500828e-05, + "loss": 17.253, + "step": 8541 + }, + { + "epoch": 0.356050185486224, + "grad_norm": 98.0, + "learning_rate": 7.461907279839733e-05, + "loss": 9.0632, + "step": 8542 + }, + { + "epoch": 0.35609186778375224, + "grad_norm": 220.0, + "learning_rate": 7.461319749802705e-05, + "loss": 11.1252, + "step": 8543 + }, + { + "epoch": 0.3561335500812805, + "grad_norm": 448.0, + "learning_rate": 7.460732174907905e-05, + "loss": 16.2506, + "step": 8544 + }, + { + "epoch": 0.3561752323788087, + "grad_norm": 624.0, + "learning_rate": 7.460144555166044e-05, + "loss": 18.0004, + "step": 8545 + }, + { + "epoch": 0.356216914676337, + "grad_norm": 199.0, + "learning_rate": 7.459556890587828e-05, + "loss": 11.1879, + "step": 8546 + }, + { + "epoch": 0.3562585969738652, + "grad_norm": 100.5, + "learning_rate": 7.458969181183972e-05, + "loss": 8.4377, + "step": 8547 + }, + { + "epoch": 0.35630027927139346, + "grad_norm": 73.0, + "learning_rate": 7.458381426965184e-05, + "loss": 7.7502, + "step": 8548 + }, + { + "epoch": 0.3563419615689217, + "grad_norm": 262.0, + "learning_rate": 7.457793627942176e-05, + "loss": 13.2508, + "step": 8549 + }, + { + "epoch": 0.35638364386644994, + "grad_norm": 1000.0, + "learning_rate": 7.457205784125661e-05, + "loss": 26.1254, + "step": 8550 + }, + { + "epoch": 0.35642532616397815, + "grad_norm": 480.0, + "learning_rate": 7.456617895526352e-05, + "loss": 16.251, + "step": 8551 + }, + { + "epoch": 0.3564670084615064, + "grad_norm": 456.0, + "learning_rate": 7.456029962154965e-05, + "loss": 16.2526, + "step": 8552 + }, + { + "epoch": 0.35650869075903463, + "grad_norm": 54.75, + "learning_rate": 7.455441984022214e-05, + "loss": 6.0317, + "step": 8553 + }, + { + "epoch": 0.3565503730565629, + "grad_norm": 516.0, + "learning_rate": 7.454853961138813e-05, + "loss": 16.7505, + "step": 8554 + }, + { + "epoch": 0.3565920553540911, + "grad_norm": 696.0, + "learning_rate": 7.454265893515482e-05, + "loss": 20.2508, + "step": 8555 + }, + { + "epoch": 0.3566337376516194, + "grad_norm": 302.0, + "learning_rate": 7.453677781162936e-05, + "loss": 13.4377, + "step": 8556 + }, + { + "epoch": 0.3566754199491476, + "grad_norm": 179.0, + "learning_rate": 7.453089624091896e-05, + "loss": 11.5628, + "step": 8557 + }, + { + "epoch": 0.35671710224667585, + "grad_norm": 280.0, + "learning_rate": 7.452501422313078e-05, + "loss": 11.6282, + "step": 8558 + }, + { + "epoch": 0.35675878454420407, + "grad_norm": 258.0, + "learning_rate": 7.451913175837206e-05, + "loss": 12.9386, + "step": 8559 + }, + { + "epoch": 0.35680046684173233, + "grad_norm": 332.0, + "learning_rate": 7.451324884674998e-05, + "loss": 15.6878, + "step": 8560 + }, + { + "epoch": 0.35684214913926054, + "grad_norm": 412.0, + "learning_rate": 7.450736548837176e-05, + "loss": 16.1255, + "step": 8561 + }, + { + "epoch": 0.3568838314367888, + "grad_norm": 482.0, + "learning_rate": 7.450148168334462e-05, + "loss": 17.0001, + "step": 8562 + }, + { + "epoch": 0.356925513734317, + "grad_norm": 272.0, + "learning_rate": 7.449559743177581e-05, + "loss": 11.3763, + "step": 8563 + }, + { + "epoch": 0.3569671960318453, + "grad_norm": 220.0, + "learning_rate": 7.448971273377257e-05, + "loss": 10.0634, + "step": 8564 + }, + { + "epoch": 0.3570088783293735, + "grad_norm": 952.0, + "learning_rate": 7.448382758944212e-05, + "loss": 22.3766, + "step": 8565 + }, + { + "epoch": 0.35705056062690177, + "grad_norm": 188.0, + "learning_rate": 7.447794199889177e-05, + "loss": 9.8128, + "step": 8566 + }, + { + "epoch": 0.35709224292443, + "grad_norm": 100.0, + "learning_rate": 7.447205596222872e-05, + "loss": 8.4378, + "step": 8567 + }, + { + "epoch": 0.35713392522195825, + "grad_norm": 120.5, + "learning_rate": 7.446616947956031e-05, + "loss": 7.844, + "step": 8568 + }, + { + "epoch": 0.35717560751948646, + "grad_norm": 436.0, + "learning_rate": 7.446028255099378e-05, + "loss": 16.2501, + "step": 8569 + }, + { + "epoch": 0.3572172898170147, + "grad_norm": 195.0, + "learning_rate": 7.445439517663641e-05, + "loss": 10.2503, + "step": 8570 + }, + { + "epoch": 0.35725897211454294, + "grad_norm": 187.0, + "learning_rate": 7.444850735659551e-05, + "loss": 9.1879, + "step": 8571 + }, + { + "epoch": 0.3573006544120712, + "grad_norm": 262.0, + "learning_rate": 7.444261909097842e-05, + "loss": 13.4379, + "step": 8572 + }, + { + "epoch": 0.3573423367095994, + "grad_norm": 1496.0, + "learning_rate": 7.443673037989243e-05, + "loss": 27.3797, + "step": 8573 + }, + { + "epoch": 0.3573840190071277, + "grad_norm": 253.0, + "learning_rate": 7.443084122344486e-05, + "loss": 13.8132, + "step": 8574 + }, + { + "epoch": 0.3574257013046559, + "grad_norm": 372.0, + "learning_rate": 7.442495162174303e-05, + "loss": 14.0628, + "step": 8575 + }, + { + "epoch": 0.35746738360218416, + "grad_norm": 148.0, + "learning_rate": 7.441906157489428e-05, + "loss": 11.0011, + "step": 8576 + }, + { + "epoch": 0.3575090658997124, + "grad_norm": 352.0, + "learning_rate": 7.441317108300598e-05, + "loss": 11.6881, + "step": 8577 + }, + { + "epoch": 0.35755074819724064, + "grad_norm": 332.0, + "learning_rate": 7.440728014618546e-05, + "loss": 13.6272, + "step": 8578 + }, + { + "epoch": 0.35759243049476885, + "grad_norm": 234.0, + "learning_rate": 7.440138876454009e-05, + "loss": 12.0003, + "step": 8579 + }, + { + "epoch": 0.3576341127922971, + "grad_norm": 788.0, + "learning_rate": 7.439549693817726e-05, + "loss": 19.2543, + "step": 8580 + }, + { + "epoch": 0.35767579508982533, + "grad_norm": 360.0, + "learning_rate": 7.438960466720431e-05, + "loss": 15.3128, + "step": 8581 + }, + { + "epoch": 0.3577174773873536, + "grad_norm": 436.0, + "learning_rate": 7.438371195172867e-05, + "loss": 16.2503, + "step": 8582 + }, + { + "epoch": 0.3577591596848818, + "grad_norm": 206.0, + "learning_rate": 7.437781879185772e-05, + "loss": 8.2502, + "step": 8583 + }, + { + "epoch": 0.3578008419824101, + "grad_norm": 338.0, + "learning_rate": 7.437192518769882e-05, + "loss": 13.8128, + "step": 8584 + }, + { + "epoch": 0.3578425242799383, + "grad_norm": 1024.0, + "learning_rate": 7.436603113935945e-05, + "loss": 22.1297, + "step": 8585 + }, + { + "epoch": 0.35788420657746656, + "grad_norm": 266.0, + "learning_rate": 7.4360136646947e-05, + "loss": 11.3757, + "step": 8586 + }, + { + "epoch": 0.35792588887499477, + "grad_norm": 215.0, + "learning_rate": 7.435424171056888e-05, + "loss": 10.6895, + "step": 8587 + }, + { + "epoch": 0.35796757117252304, + "grad_norm": 668.0, + "learning_rate": 7.434834633033256e-05, + "loss": 19.2512, + "step": 8588 + }, + { + "epoch": 0.35800925347005125, + "grad_norm": 288.0, + "learning_rate": 7.434245050634546e-05, + "loss": 12.0004, + "step": 8589 + }, + { + "epoch": 0.3580509357675795, + "grad_norm": 91.0, + "learning_rate": 7.433655423871505e-05, + "loss": 9.5002, + "step": 8590 + }, + { + "epoch": 0.3580926180651077, + "grad_norm": 516.0, + "learning_rate": 7.433065752754877e-05, + "loss": 16.7502, + "step": 8591 + }, + { + "epoch": 0.358134300362636, + "grad_norm": 172.0, + "learning_rate": 7.43247603729541e-05, + "loss": 11.1253, + "step": 8592 + }, + { + "epoch": 0.3581759826601642, + "grad_norm": 132.0, + "learning_rate": 7.43188627750385e-05, + "loss": 9.3754, + "step": 8593 + }, + { + "epoch": 0.35821766495769247, + "grad_norm": 163.0, + "learning_rate": 7.431296473390947e-05, + "loss": 9.8752, + "step": 8594 + }, + { + "epoch": 0.3582593472552207, + "grad_norm": 696.0, + "learning_rate": 7.430706624967452e-05, + "loss": 20.8753, + "step": 8595 + }, + { + "epoch": 0.35830102955274895, + "grad_norm": 920.0, + "learning_rate": 7.43011673224411e-05, + "loss": 20.3796, + "step": 8596 + }, + { + "epoch": 0.35834271185027716, + "grad_norm": 418.0, + "learning_rate": 7.429526795231677e-05, + "loss": 15.0003, + "step": 8597 + }, + { + "epoch": 0.35838439414780543, + "grad_norm": 95.5, + "learning_rate": 7.4289368139409e-05, + "loss": 8.9376, + "step": 8598 + }, + { + "epoch": 0.35842607644533364, + "grad_norm": 664.0, + "learning_rate": 7.428346788382536e-05, + "loss": 19.7507, + "step": 8599 + }, + { + "epoch": 0.3584677587428619, + "grad_norm": 225.0, + "learning_rate": 7.427756718567335e-05, + "loss": 12.1882, + "step": 8600 + }, + { + "epoch": 0.3585094410403901, + "grad_norm": 188.0, + "learning_rate": 7.427166604506056e-05, + "loss": 10.6255, + "step": 8601 + }, + { + "epoch": 0.3585511233379184, + "grad_norm": 130.0, + "learning_rate": 7.426576446209445e-05, + "loss": 9.2502, + "step": 8602 + }, + { + "epoch": 0.3585928056354466, + "grad_norm": 366.0, + "learning_rate": 7.425986243688266e-05, + "loss": 13.9381, + "step": 8603 + }, + { + "epoch": 0.35863448793297487, + "grad_norm": 245.0, + "learning_rate": 7.425395996953272e-05, + "loss": 13.3127, + "step": 8604 + }, + { + "epoch": 0.35867617023050313, + "grad_norm": 340.0, + "learning_rate": 7.42480570601522e-05, + "loss": 13.6252, + "step": 8605 + }, + { + "epoch": 0.35871785252803134, + "grad_norm": 196.0, + "learning_rate": 7.42421537088487e-05, + "loss": 10.0632, + "step": 8606 + }, + { + "epoch": 0.3587595348255596, + "grad_norm": 63.75, + "learning_rate": 7.423624991572977e-05, + "loss": 7.2212, + "step": 8607 + }, + { + "epoch": 0.3588012171230878, + "grad_norm": 478.0, + "learning_rate": 7.423034568090306e-05, + "loss": 18.0002, + "step": 8608 + }, + { + "epoch": 0.3588428994206161, + "grad_norm": 1480.0, + "learning_rate": 7.422444100447613e-05, + "loss": 36.0005, + "step": 8609 + }, + { + "epoch": 0.3588845817181443, + "grad_norm": 632.0, + "learning_rate": 7.421853588655662e-05, + "loss": 20.6254, + "step": 8610 + }, + { + "epoch": 0.35892626401567257, + "grad_norm": 153.0, + "learning_rate": 7.421263032725215e-05, + "loss": 10.3753, + "step": 8611 + }, + { + "epoch": 0.3589679463132008, + "grad_norm": 207.0, + "learning_rate": 7.420672432667033e-05, + "loss": 11.0005, + "step": 8612 + }, + { + "epoch": 0.35900962861072905, + "grad_norm": 2208.0, + "learning_rate": 7.420081788491882e-05, + "loss": 41.7503, + "step": 8613 + }, + { + "epoch": 0.35905131090825726, + "grad_norm": 976.0, + "learning_rate": 7.419491100210524e-05, + "loss": 20.6301, + "step": 8614 + }, + { + "epoch": 0.3590929932057855, + "grad_norm": 338.0, + "learning_rate": 7.418900367833729e-05, + "loss": 14.0627, + "step": 8615 + }, + { + "epoch": 0.35913467550331374, + "grad_norm": 129.0, + "learning_rate": 7.418309591372259e-05, + "loss": 9.8752, + "step": 8616 + }, + { + "epoch": 0.359176357800842, + "grad_norm": 239.0, + "learning_rate": 7.41771877083688e-05, + "loss": 12.2506, + "step": 8617 + }, + { + "epoch": 0.3592180400983702, + "grad_norm": 224.0, + "learning_rate": 7.417127906238364e-05, + "loss": 11.6252, + "step": 8618 + }, + { + "epoch": 0.3592597223958985, + "grad_norm": 256.0, + "learning_rate": 7.416536997587477e-05, + "loss": 10.5005, + "step": 8619 + }, + { + "epoch": 0.3593014046934267, + "grad_norm": 235.0, + "learning_rate": 7.415946044894988e-05, + "loss": 10.9378, + "step": 8620 + }, + { + "epoch": 0.35934308699095496, + "grad_norm": 540.0, + "learning_rate": 7.415355048171665e-05, + "loss": 18.1296, + "step": 8621 + }, + { + "epoch": 0.3593847692884832, + "grad_norm": 282.0, + "learning_rate": 7.414764007428286e-05, + "loss": 12.751, + "step": 8622 + }, + { + "epoch": 0.35942645158601144, + "grad_norm": 708.0, + "learning_rate": 7.414172922675618e-05, + "loss": 21.7502, + "step": 8623 + }, + { + "epoch": 0.35946813388353965, + "grad_norm": 466.0, + "learning_rate": 7.413581793924434e-05, + "loss": 16.7502, + "step": 8624 + }, + { + "epoch": 0.3595098161810679, + "grad_norm": 442.0, + "learning_rate": 7.412990621185508e-05, + "loss": 15.8755, + "step": 8625 + }, + { + "epoch": 0.35955149847859613, + "grad_norm": 93.0, + "learning_rate": 7.412399404469612e-05, + "loss": 9.2503, + "step": 8626 + }, + { + "epoch": 0.3595931807761244, + "grad_norm": 358.0, + "learning_rate": 7.411808143787523e-05, + "loss": 15.0012, + "step": 8627 + }, + { + "epoch": 0.3596348630736526, + "grad_norm": 292.0, + "learning_rate": 7.411216839150016e-05, + "loss": 11.5647, + "step": 8628 + }, + { + "epoch": 0.3596765453711809, + "grad_norm": 198.0, + "learning_rate": 7.41062549056787e-05, + "loss": 11.2505, + "step": 8629 + }, + { + "epoch": 0.3597182276687091, + "grad_norm": 532.0, + "learning_rate": 7.41003409805186e-05, + "loss": 18.6253, + "step": 8630 + }, + { + "epoch": 0.35975990996623736, + "grad_norm": 552.0, + "learning_rate": 7.409442661612764e-05, + "loss": 16.8753, + "step": 8631 + }, + { + "epoch": 0.35980159226376557, + "grad_norm": 596.0, + "learning_rate": 7.408851181261363e-05, + "loss": 19.1252, + "step": 8632 + }, + { + "epoch": 0.35984327456129384, + "grad_norm": 644.0, + "learning_rate": 7.408259657008433e-05, + "loss": 19.2505, + "step": 8633 + }, + { + "epoch": 0.35988495685882205, + "grad_norm": 370.0, + "learning_rate": 7.407668088864757e-05, + "loss": 15.0629, + "step": 8634 + }, + { + "epoch": 0.3599266391563503, + "grad_norm": 406.0, + "learning_rate": 7.407076476841118e-05, + "loss": 15.8126, + "step": 8635 + }, + { + "epoch": 0.3599683214538785, + "grad_norm": 243.0, + "learning_rate": 7.406484820948296e-05, + "loss": 11.7508, + "step": 8636 + }, + { + "epoch": 0.3600100037514068, + "grad_norm": 264.0, + "learning_rate": 7.405893121197075e-05, + "loss": 11.9382, + "step": 8637 + }, + { + "epoch": 0.360051686048935, + "grad_norm": 186.0, + "learning_rate": 7.405301377598237e-05, + "loss": 11.0004, + "step": 8638 + }, + { + "epoch": 0.36009336834646327, + "grad_norm": 157.0, + "learning_rate": 7.40470959016257e-05, + "loss": 10.7508, + "step": 8639 + }, + { + "epoch": 0.3601350506439915, + "grad_norm": 398.0, + "learning_rate": 7.404117758900855e-05, + "loss": 12.5635, + "step": 8640 + }, + { + "epoch": 0.36017673294151975, + "grad_norm": 504.0, + "learning_rate": 7.403525883823883e-05, + "loss": 15.6259, + "step": 8641 + }, + { + "epoch": 0.36021841523904796, + "grad_norm": 436.0, + "learning_rate": 7.402933964942435e-05, + "loss": 17.1253, + "step": 8642 + }, + { + "epoch": 0.36026009753657623, + "grad_norm": 576.0, + "learning_rate": 7.402342002267303e-05, + "loss": 18.8759, + "step": 8643 + }, + { + "epoch": 0.36030177983410444, + "grad_norm": 564.0, + "learning_rate": 7.401749995809277e-05, + "loss": 18.7502, + "step": 8644 + }, + { + "epoch": 0.3603434621316327, + "grad_norm": 300.0, + "learning_rate": 7.401157945579142e-05, + "loss": 13.5629, + "step": 8645 + }, + { + "epoch": 0.3603851444291609, + "grad_norm": 494.0, + "learning_rate": 7.400565851587691e-05, + "loss": 15.7507, + "step": 8646 + }, + { + "epoch": 0.3604268267266892, + "grad_norm": 161.0, + "learning_rate": 7.399973713845713e-05, + "loss": 11.1253, + "step": 8647 + }, + { + "epoch": 0.3604685090242174, + "grad_norm": 362.0, + "learning_rate": 7.399381532364003e-05, + "loss": 14.3752, + "step": 8648 + }, + { + "epoch": 0.36051019132174567, + "grad_norm": 290.0, + "learning_rate": 7.398789307153351e-05, + "loss": 13.2503, + "step": 8649 + }, + { + "epoch": 0.3605518736192739, + "grad_norm": 216.0, + "learning_rate": 7.398197038224551e-05, + "loss": 11.6878, + "step": 8650 + }, + { + "epoch": 0.36059355591680214, + "grad_norm": 270.0, + "learning_rate": 7.397604725588398e-05, + "loss": 12.8755, + "step": 8651 + }, + { + "epoch": 0.36063523821433036, + "grad_norm": 496.0, + "learning_rate": 7.397012369255685e-05, + "loss": 15.1879, + "step": 8652 + }, + { + "epoch": 0.3606769205118586, + "grad_norm": 468.0, + "learning_rate": 7.396419969237208e-05, + "loss": 14.8753, + "step": 8653 + }, + { + "epoch": 0.36071860280938683, + "grad_norm": 588.0, + "learning_rate": 7.395827525543766e-05, + "loss": 18.7502, + "step": 8654 + }, + { + "epoch": 0.3607602851069151, + "grad_norm": 506.0, + "learning_rate": 7.395235038186152e-05, + "loss": 17.5019, + "step": 8655 + }, + { + "epoch": 0.3608019674044433, + "grad_norm": 272.0, + "learning_rate": 7.39464250717517e-05, + "loss": 12.0628, + "step": 8656 + }, + { + "epoch": 0.3608436497019716, + "grad_norm": 346.0, + "learning_rate": 7.394049932521614e-05, + "loss": 13.9379, + "step": 8657 + }, + { + "epoch": 0.3608853319994998, + "grad_norm": 358.0, + "learning_rate": 7.393457314236285e-05, + "loss": 14.0646, + "step": 8658 + }, + { + "epoch": 0.36092701429702806, + "grad_norm": 448.0, + "learning_rate": 7.392864652329984e-05, + "loss": 15.0002, + "step": 8659 + }, + { + "epoch": 0.36096869659455627, + "grad_norm": 506.0, + "learning_rate": 7.392271946813514e-05, + "loss": 15.5633, + "step": 8660 + }, + { + "epoch": 0.36101037889208454, + "grad_norm": 528.0, + "learning_rate": 7.391679197697674e-05, + "loss": 17.5002, + "step": 8661 + }, + { + "epoch": 0.36105206118961275, + "grad_norm": 272.0, + "learning_rate": 7.391086404993268e-05, + "loss": 12.5666, + "step": 8662 + }, + { + "epoch": 0.361093743487141, + "grad_norm": 490.0, + "learning_rate": 7.3904935687111e-05, + "loss": 16.6254, + "step": 8663 + }, + { + "epoch": 0.36113542578466923, + "grad_norm": 452.0, + "learning_rate": 7.389900688861977e-05, + "loss": 15.5627, + "step": 8664 + }, + { + "epoch": 0.3611771080821975, + "grad_norm": 312.0, + "learning_rate": 7.389307765456699e-05, + "loss": 13.3128, + "step": 8665 + }, + { + "epoch": 0.3612187903797257, + "grad_norm": 88.5, + "learning_rate": 7.388714798506076e-05, + "loss": 8.0631, + "step": 8666 + }, + { + "epoch": 0.361260472677254, + "grad_norm": 151.0, + "learning_rate": 7.388121788020915e-05, + "loss": 9.7503, + "step": 8667 + }, + { + "epoch": 0.3613021549747822, + "grad_norm": 96.0, + "learning_rate": 7.38752873401202e-05, + "loss": 10.3753, + "step": 8668 + }, + { + "epoch": 0.36134383727231045, + "grad_norm": 444.0, + "learning_rate": 7.386935636490202e-05, + "loss": 15.6252, + "step": 8669 + }, + { + "epoch": 0.36138551956983866, + "grad_norm": 458.0, + "learning_rate": 7.386342495466272e-05, + "loss": 15.1255, + "step": 8670 + }, + { + "epoch": 0.36142720186736693, + "grad_norm": 234.0, + "learning_rate": 7.385749310951037e-05, + "loss": 13.5009, + "step": 8671 + }, + { + "epoch": 0.36146888416489514, + "grad_norm": 452.0, + "learning_rate": 7.385156082955308e-05, + "loss": 16.6261, + "step": 8672 + }, + { + "epoch": 0.3615105664624234, + "grad_norm": 163.0, + "learning_rate": 7.3845628114899e-05, + "loss": 9.8126, + "step": 8673 + }, + { + "epoch": 0.3615522487599516, + "grad_norm": 804.0, + "learning_rate": 7.383969496565621e-05, + "loss": 20.8776, + "step": 8674 + }, + { + "epoch": 0.3615939310574799, + "grad_norm": 89.5, + "learning_rate": 7.383376138193288e-05, + "loss": 8.7503, + "step": 8675 + }, + { + "epoch": 0.3616356133550081, + "grad_norm": 426.0, + "learning_rate": 7.38278273638371e-05, + "loss": 15.1258, + "step": 8676 + }, + { + "epoch": 0.36167729565253637, + "grad_norm": 752.0, + "learning_rate": 7.38218929114771e-05, + "loss": 20.2515, + "step": 8677 + }, + { + "epoch": 0.36171897795006464, + "grad_norm": 174.0, + "learning_rate": 7.381595802496095e-05, + "loss": 10.376, + "step": 8678 + }, + { + "epoch": 0.36176066024759285, + "grad_norm": 430.0, + "learning_rate": 7.381002270439687e-05, + "loss": 15.5031, + "step": 8679 + }, + { + "epoch": 0.3618023425451211, + "grad_norm": 168.0, + "learning_rate": 7.3804086949893e-05, + "loss": 11.2504, + "step": 8680 + }, + { + "epoch": 0.3618440248426493, + "grad_norm": 652.0, + "learning_rate": 7.379815076155755e-05, + "loss": 18.2547, + "step": 8681 + }, + { + "epoch": 0.3618857071401776, + "grad_norm": 226.0, + "learning_rate": 7.379221413949867e-05, + "loss": 11.1256, + "step": 8682 + }, + { + "epoch": 0.3619273894377058, + "grad_norm": 316.0, + "learning_rate": 7.37862770838246e-05, + "loss": 13.5665, + "step": 8683 + }, + { + "epoch": 0.36196907173523407, + "grad_norm": 234.0, + "learning_rate": 7.378033959464348e-05, + "loss": 12.2503, + "step": 8684 + }, + { + "epoch": 0.3620107540327623, + "grad_norm": 324.0, + "learning_rate": 7.377440167206359e-05, + "loss": 13.8757, + "step": 8685 + }, + { + "epoch": 0.36205243633029055, + "grad_norm": 380.0, + "learning_rate": 7.37684633161931e-05, + "loss": 13.3755, + "step": 8686 + }, + { + "epoch": 0.36209411862781876, + "grad_norm": 286.0, + "learning_rate": 7.376252452714027e-05, + "loss": 11.5002, + "step": 8687 + }, + { + "epoch": 0.36213580092534703, + "grad_norm": 576.0, + "learning_rate": 7.375658530501332e-05, + "loss": 19.3752, + "step": 8688 + }, + { + "epoch": 0.36217748322287524, + "grad_norm": 40.25, + "learning_rate": 7.37506456499205e-05, + "loss": 7.0945, + "step": 8689 + }, + { + "epoch": 0.3622191655204035, + "grad_norm": 262.0, + "learning_rate": 7.374470556197005e-05, + "loss": 11.876, + "step": 8690 + }, + { + "epoch": 0.3622608478179317, + "grad_norm": 824.0, + "learning_rate": 7.373876504127022e-05, + "loss": 21.3785, + "step": 8691 + }, + { + "epoch": 0.36230253011546, + "grad_norm": 119.0, + "learning_rate": 7.37328240879293e-05, + "loss": 10.063, + "step": 8692 + }, + { + "epoch": 0.3623442124129882, + "grad_norm": 172.0, + "learning_rate": 7.372688270205555e-05, + "loss": 10.0627, + "step": 8693 + }, + { + "epoch": 0.36238589471051647, + "grad_norm": 380.0, + "learning_rate": 7.372094088375728e-05, + "loss": 12.44, + "step": 8694 + }, + { + "epoch": 0.3624275770080447, + "grad_norm": 278.0, + "learning_rate": 7.371499863314272e-05, + "loss": 13.3752, + "step": 8695 + }, + { + "epoch": 0.36246925930557294, + "grad_norm": 386.0, + "learning_rate": 7.370905595032023e-05, + "loss": 14.6253, + "step": 8696 + }, + { + "epoch": 0.36251094160310116, + "grad_norm": 258.0, + "learning_rate": 7.370311283539807e-05, + "loss": 12.8127, + "step": 8697 + }, + { + "epoch": 0.3625526239006294, + "grad_norm": 408.0, + "learning_rate": 7.369716928848458e-05, + "loss": 14.1882, + "step": 8698 + }, + { + "epoch": 0.36259430619815763, + "grad_norm": 406.0, + "learning_rate": 7.369122530968809e-05, + "loss": 14.1252, + "step": 8699 + }, + { + "epoch": 0.3626359884956859, + "grad_norm": 380.0, + "learning_rate": 7.36852808991169e-05, + "loss": 14.6253, + "step": 8700 + }, + { + "epoch": 0.3626776707932141, + "grad_norm": 374.0, + "learning_rate": 7.367933605687938e-05, + "loss": 14.6877, + "step": 8701 + }, + { + "epoch": 0.3627193530907424, + "grad_norm": 278.0, + "learning_rate": 7.367339078308385e-05, + "loss": 12.7503, + "step": 8702 + }, + { + "epoch": 0.3627610353882706, + "grad_norm": 456.0, + "learning_rate": 7.366744507783866e-05, + "loss": 15.3758, + "step": 8703 + }, + { + "epoch": 0.36280271768579886, + "grad_norm": 430.0, + "learning_rate": 7.36614989412522e-05, + "loss": 16.0021, + "step": 8704 + }, + { + "epoch": 0.36284439998332707, + "grad_norm": 680.0, + "learning_rate": 7.365555237343283e-05, + "loss": 20.0003, + "step": 8705 + }, + { + "epoch": 0.36288608228085534, + "grad_norm": 282.0, + "learning_rate": 7.36496053744889e-05, + "loss": 13.2505, + "step": 8706 + }, + { + "epoch": 0.36292776457838355, + "grad_norm": 294.0, + "learning_rate": 7.364365794452881e-05, + "loss": 9.3768, + "step": 8707 + }, + { + "epoch": 0.3629694468759118, + "grad_norm": 278.0, + "learning_rate": 7.363771008366097e-05, + "loss": 12.8758, + "step": 8708 + }, + { + "epoch": 0.36301112917344003, + "grad_norm": 227.0, + "learning_rate": 7.363176179199373e-05, + "loss": 11.9379, + "step": 8709 + }, + { + "epoch": 0.3630528114709683, + "grad_norm": 700.0, + "learning_rate": 7.362581306963556e-05, + "loss": 19.2501, + "step": 8710 + }, + { + "epoch": 0.3630944937684965, + "grad_norm": 348.0, + "learning_rate": 7.361986391669486e-05, + "loss": 13.8129, + "step": 8711 + }, + { + "epoch": 0.3631361760660248, + "grad_norm": 215.0, + "learning_rate": 7.361391433328002e-05, + "loss": 11.188, + "step": 8712 + }, + { + "epoch": 0.363177858363553, + "grad_norm": 183.0, + "learning_rate": 7.360796431949951e-05, + "loss": 11.2503, + "step": 8713 + }, + { + "epoch": 0.36321954066108125, + "grad_norm": 248.0, + "learning_rate": 7.360201387546175e-05, + "loss": 12.4377, + "step": 8714 + }, + { + "epoch": 0.36326122295860946, + "grad_norm": 250.0, + "learning_rate": 7.35960630012752e-05, + "loss": 9.8136, + "step": 8715 + }, + { + "epoch": 0.36330290525613773, + "grad_norm": 812.0, + "learning_rate": 7.35901116970483e-05, + "loss": 21.8754, + "step": 8716 + }, + { + "epoch": 0.36334458755366594, + "grad_norm": 306.0, + "learning_rate": 7.358415996288952e-05, + "loss": 13.8781, + "step": 8717 + }, + { + "epoch": 0.3633862698511942, + "grad_norm": 744.0, + "learning_rate": 7.357820779890733e-05, + "loss": 21.1252, + "step": 8718 + }, + { + "epoch": 0.3634279521487224, + "grad_norm": 340.0, + "learning_rate": 7.357225520521022e-05, + "loss": 12.938, + "step": 8719 + }, + { + "epoch": 0.3634696344462507, + "grad_norm": 348.0, + "learning_rate": 7.356630218190666e-05, + "loss": 15.2503, + "step": 8720 + }, + { + "epoch": 0.3635113167437789, + "grad_norm": 212.0, + "learning_rate": 7.356034872910514e-05, + "loss": 11.5632, + "step": 8721 + }, + { + "epoch": 0.36355299904130717, + "grad_norm": 330.0, + "learning_rate": 7.355439484691418e-05, + "loss": 14.3767, + "step": 8722 + }, + { + "epoch": 0.3635946813388354, + "grad_norm": 932.0, + "learning_rate": 7.354844053544228e-05, + "loss": 21.0054, + "step": 8723 + }, + { + "epoch": 0.36363636363636365, + "grad_norm": 207.0, + "learning_rate": 7.354248579479797e-05, + "loss": 11.6252, + "step": 8724 + }, + { + "epoch": 0.36367804593389186, + "grad_norm": 96.5, + "learning_rate": 7.353653062508976e-05, + "loss": 8.1878, + "step": 8725 + }, + { + "epoch": 0.3637197282314201, + "grad_norm": 852.0, + "learning_rate": 7.353057502642618e-05, + "loss": 23.7504, + "step": 8726 + }, + { + "epoch": 0.36376141052894834, + "grad_norm": 454.0, + "learning_rate": 7.35246189989158e-05, + "loss": 16.5003, + "step": 8727 + }, + { + "epoch": 0.3638030928264766, + "grad_norm": 584.0, + "learning_rate": 7.351866254266716e-05, + "loss": 18.6254, + "step": 8728 + }, + { + "epoch": 0.3638447751240048, + "grad_norm": 185.0, + "learning_rate": 7.351270565778878e-05, + "loss": 10.1897, + "step": 8729 + }, + { + "epoch": 0.3638864574215331, + "grad_norm": 436.0, + "learning_rate": 7.350674834438925e-05, + "loss": 15.1889, + "step": 8730 + }, + { + "epoch": 0.3639281397190613, + "grad_norm": 86.0, + "learning_rate": 7.350079060257716e-05, + "loss": 8.1878, + "step": 8731 + }, + { + "epoch": 0.36396982201658956, + "grad_norm": 158.0, + "learning_rate": 7.349483243246108e-05, + "loss": 10.2505, + "step": 8732 + }, + { + "epoch": 0.3640115043141178, + "grad_norm": 97.5, + "learning_rate": 7.348887383414958e-05, + "loss": 7.969, + "step": 8733 + }, + { + "epoch": 0.36405318661164604, + "grad_norm": 296.0, + "learning_rate": 7.348291480775128e-05, + "loss": 12.8127, + "step": 8734 + }, + { + "epoch": 0.36409486890917425, + "grad_norm": 604.0, + "learning_rate": 7.347695535337475e-05, + "loss": 16.7512, + "step": 8735 + }, + { + "epoch": 0.3641365512067025, + "grad_norm": 1632.0, + "learning_rate": 7.347099547112865e-05, + "loss": 35.5003, + "step": 8736 + }, + { + "epoch": 0.36417823350423073, + "grad_norm": 167.0, + "learning_rate": 7.346503516112156e-05, + "loss": 10.4382, + "step": 8737 + }, + { + "epoch": 0.364219915801759, + "grad_norm": 352.0, + "learning_rate": 7.345907442346213e-05, + "loss": 13.5629, + "step": 8738 + }, + { + "epoch": 0.3642615980992872, + "grad_norm": 358.0, + "learning_rate": 7.345311325825899e-05, + "loss": 14.5627, + "step": 8739 + }, + { + "epoch": 0.3643032803968155, + "grad_norm": 368.0, + "learning_rate": 7.344715166562077e-05, + "loss": 13.9386, + "step": 8740 + }, + { + "epoch": 0.3643449626943437, + "grad_norm": 438.0, + "learning_rate": 7.344118964565614e-05, + "loss": 16.3754, + "step": 8741 + }, + { + "epoch": 0.36438664499187196, + "grad_norm": 436.0, + "learning_rate": 7.343522719847373e-05, + "loss": 15.1878, + "step": 8742 + }, + { + "epoch": 0.36442832728940017, + "grad_norm": 102.0, + "learning_rate": 7.342926432418223e-05, + "loss": 8.9377, + "step": 8743 + }, + { + "epoch": 0.36447000958692843, + "grad_norm": 207.0, + "learning_rate": 7.342330102289032e-05, + "loss": 10.6251, + "step": 8744 + }, + { + "epoch": 0.36451169188445665, + "grad_norm": 280.0, + "learning_rate": 7.341733729470666e-05, + "loss": 13.1899, + "step": 8745 + }, + { + "epoch": 0.3645533741819849, + "grad_norm": 548.0, + "learning_rate": 7.341137313973995e-05, + "loss": 18.0005, + "step": 8746 + }, + { + "epoch": 0.3645950564795131, + "grad_norm": 474.0, + "learning_rate": 7.340540855809887e-05, + "loss": 16.7503, + "step": 8747 + }, + { + "epoch": 0.3646367387770414, + "grad_norm": 452.0, + "learning_rate": 7.339944354989216e-05, + "loss": 16.3755, + "step": 8748 + }, + { + "epoch": 0.3646784210745696, + "grad_norm": 1056.0, + "learning_rate": 7.339347811522851e-05, + "loss": 28.8754, + "step": 8749 + }, + { + "epoch": 0.36472010337209787, + "grad_norm": 356.0, + "learning_rate": 7.338751225421665e-05, + "loss": 13.314, + "step": 8750 + }, + { + "epoch": 0.36476178566962614, + "grad_norm": 378.0, + "learning_rate": 7.33815459669653e-05, + "loss": 15.1876, + "step": 8751 + }, + { + "epoch": 0.36480346796715435, + "grad_norm": 118.0, + "learning_rate": 7.33755792535832e-05, + "loss": 7.8439, + "step": 8752 + }, + { + "epoch": 0.3648451502646826, + "grad_norm": 508.0, + "learning_rate": 7.33696121141791e-05, + "loss": 18.1251, + "step": 8753 + }, + { + "epoch": 0.3648868325622108, + "grad_norm": 350.0, + "learning_rate": 7.336364454886173e-05, + "loss": 14.3127, + "step": 8754 + }, + { + "epoch": 0.3649285148597391, + "grad_norm": 412.0, + "learning_rate": 7.335767655773987e-05, + "loss": 14.9377, + "step": 8755 + }, + { + "epoch": 0.3649701971572673, + "grad_norm": 376.0, + "learning_rate": 7.335170814092228e-05, + "loss": 13.7502, + "step": 8756 + }, + { + "epoch": 0.3650118794547956, + "grad_norm": 290.0, + "learning_rate": 7.334573929851773e-05, + "loss": 11.8753, + "step": 8757 + }, + { + "epoch": 0.3650535617523238, + "grad_norm": 368.0, + "learning_rate": 7.333977003063502e-05, + "loss": 14.8753, + "step": 8758 + }, + { + "epoch": 0.36509524404985205, + "grad_norm": 528.0, + "learning_rate": 7.333380033738293e-05, + "loss": 18.0021, + "step": 8759 + }, + { + "epoch": 0.36513692634738026, + "grad_norm": 348.0, + "learning_rate": 7.332783021887024e-05, + "loss": 15.4378, + "step": 8760 + }, + { + "epoch": 0.36517860864490853, + "grad_norm": 988.0, + "learning_rate": 7.332185967520579e-05, + "loss": 23.0002, + "step": 8761 + }, + { + "epoch": 0.36522029094243674, + "grad_norm": 314.0, + "learning_rate": 7.331588870649836e-05, + "loss": 13.6256, + "step": 8762 + }, + { + "epoch": 0.365261973239965, + "grad_norm": 470.0, + "learning_rate": 7.330991731285682e-05, + "loss": 17.7513, + "step": 8763 + }, + { + "epoch": 0.3653036555374932, + "grad_norm": 300.0, + "learning_rate": 7.330394549438995e-05, + "loss": 11.2502, + "step": 8764 + }, + { + "epoch": 0.3653453378350215, + "grad_norm": 156.0, + "learning_rate": 7.32979732512066e-05, + "loss": 11.5002, + "step": 8765 + }, + { + "epoch": 0.3653870201325497, + "grad_norm": 284.0, + "learning_rate": 7.329200058341562e-05, + "loss": 13.1254, + "step": 8766 + }, + { + "epoch": 0.36542870243007797, + "grad_norm": 418.0, + "learning_rate": 7.328602749112587e-05, + "loss": 15.2502, + "step": 8767 + }, + { + "epoch": 0.3654703847276062, + "grad_norm": 338.0, + "learning_rate": 7.32800539744462e-05, + "loss": 14.7502, + "step": 8768 + }, + { + "epoch": 0.36551206702513445, + "grad_norm": 370.0, + "learning_rate": 7.327408003348547e-05, + "loss": 12.9377, + "step": 8769 + }, + { + "epoch": 0.36555374932266266, + "grad_norm": 1008.0, + "learning_rate": 7.326810566835258e-05, + "loss": 25.1256, + "step": 8770 + }, + { + "epoch": 0.3655954316201909, + "grad_norm": 556.0, + "learning_rate": 7.32621308791564e-05, + "loss": 18.0036, + "step": 8771 + }, + { + "epoch": 0.36563711391771914, + "grad_norm": 318.0, + "learning_rate": 7.325615566600579e-05, + "loss": 13.1877, + "step": 8772 + }, + { + "epoch": 0.3656787962152474, + "grad_norm": 280.0, + "learning_rate": 7.325018002900969e-05, + "loss": 12.7504, + "step": 8773 + }, + { + "epoch": 0.3657204785127756, + "grad_norm": 748.0, + "learning_rate": 7.324420396827702e-05, + "loss": 23.5001, + "step": 8774 + }, + { + "epoch": 0.3657621608103039, + "grad_norm": 368.0, + "learning_rate": 7.323822748391664e-05, + "loss": 15.8129, + "step": 8775 + }, + { + "epoch": 0.3658038431078321, + "grad_norm": 1360.0, + "learning_rate": 7.32322505760375e-05, + "loss": 34.5005, + "step": 8776 + }, + { + "epoch": 0.36584552540536036, + "grad_norm": 432.0, + "learning_rate": 7.322627324474856e-05, + "loss": 14.6262, + "step": 8777 + }, + { + "epoch": 0.3658872077028886, + "grad_norm": 472.0, + "learning_rate": 7.32202954901587e-05, + "loss": 17.5008, + "step": 8778 + }, + { + "epoch": 0.36592889000041684, + "grad_norm": 272.0, + "learning_rate": 7.321431731237692e-05, + "loss": 12.563, + "step": 8779 + }, + { + "epoch": 0.36597057229794505, + "grad_norm": 384.0, + "learning_rate": 7.320833871151212e-05, + "loss": 14.0633, + "step": 8780 + }, + { + "epoch": 0.3660122545954733, + "grad_norm": 490.0, + "learning_rate": 7.32023596876733e-05, + "loss": 17.0001, + "step": 8781 + }, + { + "epoch": 0.36605393689300153, + "grad_norm": 245.0, + "learning_rate": 7.319638024096942e-05, + "loss": 11.5002, + "step": 8782 + }, + { + "epoch": 0.3660956191905298, + "grad_norm": 159.0, + "learning_rate": 7.319040037150945e-05, + "loss": 9.8134, + "step": 8783 + }, + { + "epoch": 0.366137301488058, + "grad_norm": 548.0, + "learning_rate": 7.318442007940236e-05, + "loss": 19.0005, + "step": 8784 + }, + { + "epoch": 0.3661789837855863, + "grad_norm": 222.0, + "learning_rate": 7.317843936475717e-05, + "loss": 12.0634, + "step": 8785 + }, + { + "epoch": 0.3662206660831145, + "grad_norm": 100.5, + "learning_rate": 7.317245822768286e-05, + "loss": 8.6265, + "step": 8786 + }, + { + "epoch": 0.36626234838064275, + "grad_norm": 764.0, + "learning_rate": 7.316647666828844e-05, + "loss": 22.1256, + "step": 8787 + }, + { + "epoch": 0.36630403067817097, + "grad_norm": 203.0, + "learning_rate": 7.316049468668292e-05, + "loss": 11.9379, + "step": 8788 + }, + { + "epoch": 0.36634571297569923, + "grad_norm": 284.0, + "learning_rate": 7.315451228297534e-05, + "loss": 10.6879, + "step": 8789 + }, + { + "epoch": 0.36638739527322745, + "grad_norm": 264.0, + "learning_rate": 7.314852945727472e-05, + "loss": 12.5006, + "step": 8790 + }, + { + "epoch": 0.3664290775707557, + "grad_norm": 412.0, + "learning_rate": 7.314254620969009e-05, + "loss": 15.3129, + "step": 8791 + }, + { + "epoch": 0.3664707598682839, + "grad_norm": 568.0, + "learning_rate": 7.31365625403305e-05, + "loss": 16.3786, + "step": 8792 + }, + { + "epoch": 0.3665124421658122, + "grad_norm": 174.0, + "learning_rate": 7.313057844930502e-05, + "loss": 10.3752, + "step": 8793 + }, + { + "epoch": 0.3665541244633404, + "grad_norm": 300.0, + "learning_rate": 7.312459393672267e-05, + "loss": 14.2508, + "step": 8794 + }, + { + "epoch": 0.36659580676086867, + "grad_norm": 245.0, + "learning_rate": 7.311860900269255e-05, + "loss": 12.5627, + "step": 8795 + }, + { + "epoch": 0.3666374890583969, + "grad_norm": 812.0, + "learning_rate": 7.311262364732373e-05, + "loss": 20.2515, + "step": 8796 + }, + { + "epoch": 0.36667917135592515, + "grad_norm": 178.0, + "learning_rate": 7.310663787072529e-05, + "loss": 9.1252, + "step": 8797 + }, + { + "epoch": 0.36672085365345336, + "grad_norm": 234.0, + "learning_rate": 7.310065167300633e-05, + "loss": 8.9388, + "step": 8798 + }, + { + "epoch": 0.3667625359509816, + "grad_norm": 648.0, + "learning_rate": 7.309466505427595e-05, + "loss": 19.2504, + "step": 8799 + }, + { + "epoch": 0.36680421824850984, + "grad_norm": 1104.0, + "learning_rate": 7.308867801464322e-05, + "loss": 29.3777, + "step": 8800 + }, + { + "epoch": 0.3668459005460381, + "grad_norm": 266.0, + "learning_rate": 7.30826905542173e-05, + "loss": 13.438, + "step": 8801 + }, + { + "epoch": 0.3668875828435663, + "grad_norm": 156.0, + "learning_rate": 7.30767026731073e-05, + "loss": 10.3753, + "step": 8802 + }, + { + "epoch": 0.3669292651410946, + "grad_norm": 133.0, + "learning_rate": 7.307071437142234e-05, + "loss": 10.064, + "step": 8803 + }, + { + "epoch": 0.3669709474386228, + "grad_norm": 1336.0, + "learning_rate": 7.306472564927156e-05, + "loss": 34.0002, + "step": 8804 + }, + { + "epoch": 0.36701262973615106, + "grad_norm": 952.0, + "learning_rate": 7.305873650676412e-05, + "loss": 26.7502, + "step": 8805 + }, + { + "epoch": 0.3670543120336793, + "grad_norm": 498.0, + "learning_rate": 7.305274694400916e-05, + "loss": 16.3754, + "step": 8806 + }, + { + "epoch": 0.36709599433120754, + "grad_norm": 204.0, + "learning_rate": 7.304675696111584e-05, + "loss": 11.4377, + "step": 8807 + }, + { + "epoch": 0.36713767662873575, + "grad_norm": 288.0, + "learning_rate": 7.304076655819332e-05, + "loss": 10.8755, + "step": 8808 + }, + { + "epoch": 0.367179358926264, + "grad_norm": 544.0, + "learning_rate": 7.303477573535078e-05, + "loss": 18.2504, + "step": 8809 + }, + { + "epoch": 0.36722104122379223, + "grad_norm": 560.0, + "learning_rate": 7.302878449269741e-05, + "loss": 18.7509, + "step": 8810 + }, + { + "epoch": 0.3672627235213205, + "grad_norm": 210.0, + "learning_rate": 7.302279283034241e-05, + "loss": 5.2505, + "step": 8811 + }, + { + "epoch": 0.3673044058188487, + "grad_norm": 330.0, + "learning_rate": 7.301680074839497e-05, + "loss": 14.8754, + "step": 8812 + }, + { + "epoch": 0.367346088116377, + "grad_norm": 326.0, + "learning_rate": 7.301080824696425e-05, + "loss": 13.5006, + "step": 8813 + }, + { + "epoch": 0.3673877704139052, + "grad_norm": 466.0, + "learning_rate": 7.300481532615955e-05, + "loss": 16.0003, + "step": 8814 + }, + { + "epoch": 0.36742945271143346, + "grad_norm": 74.5, + "learning_rate": 7.299882198609004e-05, + "loss": 8.5629, + "step": 8815 + }, + { + "epoch": 0.36747113500896167, + "grad_norm": 544.0, + "learning_rate": 7.299282822686495e-05, + "loss": 19.6253, + "step": 8816 + }, + { + "epoch": 0.36751281730648994, + "grad_norm": 416.0, + "learning_rate": 7.298683404859353e-05, + "loss": 14.5632, + "step": 8817 + }, + { + "epoch": 0.36755449960401815, + "grad_norm": 328.0, + "learning_rate": 7.298083945138501e-05, + "loss": 12.2505, + "step": 8818 + }, + { + "epoch": 0.3675961819015464, + "grad_norm": 632.0, + "learning_rate": 7.297484443534868e-05, + "loss": 19.8753, + "step": 8819 + }, + { + "epoch": 0.3676378641990746, + "grad_norm": 328.0, + "learning_rate": 7.296884900059374e-05, + "loss": 14.1253, + "step": 8820 + }, + { + "epoch": 0.3676795464966029, + "grad_norm": 328.0, + "learning_rate": 7.296285314722951e-05, + "loss": 13.8137, + "step": 8821 + }, + { + "epoch": 0.3677212287941311, + "grad_norm": 107.0, + "learning_rate": 7.295685687536522e-05, + "loss": 8.8754, + "step": 8822 + }, + { + "epoch": 0.3677629110916594, + "grad_norm": 500.0, + "learning_rate": 7.295086018511019e-05, + "loss": 16.5006, + "step": 8823 + }, + { + "epoch": 0.36780459338918764, + "grad_norm": 300.0, + "learning_rate": 7.29448630765737e-05, + "loss": 13.6291, + "step": 8824 + }, + { + "epoch": 0.36784627568671585, + "grad_norm": 370.0, + "learning_rate": 7.293886554986502e-05, + "loss": 15.1877, + "step": 8825 + }, + { + "epoch": 0.3678879579842441, + "grad_norm": 240.0, + "learning_rate": 7.29328676050935e-05, + "loss": 12.0629, + "step": 8826 + }, + { + "epoch": 0.36792964028177233, + "grad_norm": 258.0, + "learning_rate": 7.292686924236842e-05, + "loss": 11.8127, + "step": 8827 + }, + { + "epoch": 0.3679713225793006, + "grad_norm": 608.0, + "learning_rate": 7.292087046179912e-05, + "loss": 19.0003, + "step": 8828 + }, + { + "epoch": 0.3680130048768288, + "grad_norm": 1288.0, + "learning_rate": 7.291487126349492e-05, + "loss": 27.0039, + "step": 8829 + }, + { + "epoch": 0.3680546871743571, + "grad_norm": 286.0, + "learning_rate": 7.290887164756516e-05, + "loss": 12.2507, + "step": 8830 + }, + { + "epoch": 0.3680963694718853, + "grad_norm": 195.0, + "learning_rate": 7.290287161411918e-05, + "loss": 11.7502, + "step": 8831 + }, + { + "epoch": 0.36813805176941355, + "grad_norm": 668.0, + "learning_rate": 7.289687116326632e-05, + "loss": 22.6258, + "step": 8832 + }, + { + "epoch": 0.36817973406694177, + "grad_norm": 868.0, + "learning_rate": 7.289087029511596e-05, + "loss": 21.1296, + "step": 8833 + }, + { + "epoch": 0.36822141636447003, + "grad_norm": 229.0, + "learning_rate": 7.288486900977747e-05, + "loss": 11.6252, + "step": 8834 + }, + { + "epoch": 0.36826309866199824, + "grad_norm": 224.0, + "learning_rate": 7.287886730736018e-05, + "loss": 11.5003, + "step": 8835 + }, + { + "epoch": 0.3683047809595265, + "grad_norm": 208.0, + "learning_rate": 7.287286518797353e-05, + "loss": 10.8138, + "step": 8836 + }, + { + "epoch": 0.3683464632570547, + "grad_norm": 636.0, + "learning_rate": 7.286686265172687e-05, + "loss": 20.6256, + "step": 8837 + }, + { + "epoch": 0.368388145554583, + "grad_norm": 644.0, + "learning_rate": 7.28608596987296e-05, + "loss": 20.5015, + "step": 8838 + }, + { + "epoch": 0.3684298278521112, + "grad_norm": 167.0, + "learning_rate": 7.285485632909115e-05, + "loss": 8.3756, + "step": 8839 + }, + { + "epoch": 0.36847151014963947, + "grad_norm": 156.0, + "learning_rate": 7.284885254292091e-05, + "loss": 11.0001, + "step": 8840 + }, + { + "epoch": 0.3685131924471677, + "grad_norm": 1544.0, + "learning_rate": 7.28428483403283e-05, + "loss": 36.7503, + "step": 8841 + }, + { + "epoch": 0.36855487474469595, + "grad_norm": 280.0, + "learning_rate": 7.283684372142274e-05, + "loss": 11.8755, + "step": 8842 + }, + { + "epoch": 0.36859655704222416, + "grad_norm": 576.0, + "learning_rate": 7.28308386863137e-05, + "loss": 18.5005, + "step": 8843 + }, + { + "epoch": 0.3686382393397524, + "grad_norm": 131.0, + "learning_rate": 7.282483323511058e-05, + "loss": 7.2815, + "step": 8844 + }, + { + "epoch": 0.36867992163728064, + "grad_norm": 338.0, + "learning_rate": 7.281882736792285e-05, + "loss": 14.6253, + "step": 8845 + }, + { + "epoch": 0.3687216039348089, + "grad_norm": 174.0, + "learning_rate": 7.281282108485997e-05, + "loss": 11.9376, + "step": 8846 + }, + { + "epoch": 0.3687632862323371, + "grad_norm": 187.0, + "learning_rate": 7.280681438603141e-05, + "loss": 5.8446, + "step": 8847 + }, + { + "epoch": 0.3688049685298654, + "grad_norm": 334.0, + "learning_rate": 7.280080727154663e-05, + "loss": 15.3774, + "step": 8848 + }, + { + "epoch": 0.3688466508273936, + "grad_norm": 552.0, + "learning_rate": 7.27947997415151e-05, + "loss": 16.6278, + "step": 8849 + }, + { + "epoch": 0.36888833312492186, + "grad_norm": 256.0, + "learning_rate": 7.278879179604631e-05, + "loss": 13.2503, + "step": 8850 + }, + { + "epoch": 0.3689300154224501, + "grad_norm": 196.0, + "learning_rate": 7.278278343524979e-05, + "loss": 10.1877, + "step": 8851 + }, + { + "epoch": 0.36897169771997834, + "grad_norm": 266.0, + "learning_rate": 7.277677465923502e-05, + "loss": 13.6883, + "step": 8852 + }, + { + "epoch": 0.36901338001750655, + "grad_norm": 202.0, + "learning_rate": 7.27707654681115e-05, + "loss": 10.8753, + "step": 8853 + }, + { + "epoch": 0.3690550623150348, + "grad_norm": 328.0, + "learning_rate": 7.276475586198876e-05, + "loss": 13.0004, + "step": 8854 + }, + { + "epoch": 0.36909674461256303, + "grad_norm": 236.0, + "learning_rate": 7.275874584097632e-05, + "loss": 8.6882, + "step": 8855 + }, + { + "epoch": 0.3691384269100913, + "grad_norm": 482.0, + "learning_rate": 7.275273540518373e-05, + "loss": 15.8752, + "step": 8856 + }, + { + "epoch": 0.3691801092076195, + "grad_norm": 1016.0, + "learning_rate": 7.274672455472051e-05, + "loss": 23.0056, + "step": 8857 + }, + { + "epoch": 0.3692217915051478, + "grad_norm": 175.0, + "learning_rate": 7.274071328969621e-05, + "loss": 10.6253, + "step": 8858 + }, + { + "epoch": 0.369263473802676, + "grad_norm": 174.0, + "learning_rate": 7.27347016102204e-05, + "loss": 9.7503, + "step": 8859 + }, + { + "epoch": 0.36930515610020426, + "grad_norm": 460.0, + "learning_rate": 7.272868951640263e-05, + "loss": 16.0006, + "step": 8860 + }, + { + "epoch": 0.36934683839773247, + "grad_norm": 322.0, + "learning_rate": 7.272267700835248e-05, + "loss": 13.0002, + "step": 8861 + }, + { + "epoch": 0.36938852069526074, + "grad_norm": 81.0, + "learning_rate": 7.271666408617952e-05, + "loss": 7.5942, + "step": 8862 + }, + { + "epoch": 0.36943020299278895, + "grad_norm": 83.5, + "learning_rate": 7.271065074999333e-05, + "loss": 8.9384, + "step": 8863 + }, + { + "epoch": 0.3694718852903172, + "grad_norm": 608.0, + "learning_rate": 7.270463699990354e-05, + "loss": 18.6251, + "step": 8864 + }, + { + "epoch": 0.3695135675878454, + "grad_norm": 294.0, + "learning_rate": 7.26986228360197e-05, + "loss": 12.2506, + "step": 8865 + }, + { + "epoch": 0.3695552498853737, + "grad_norm": 364.0, + "learning_rate": 7.269260825845146e-05, + "loss": 13.6256, + "step": 8866 + }, + { + "epoch": 0.3695969321829019, + "grad_norm": 290.0, + "learning_rate": 7.268659326730841e-05, + "loss": 13.1884, + "step": 8867 + }, + { + "epoch": 0.36963861448043017, + "grad_norm": 236.0, + "learning_rate": 7.26805778627002e-05, + "loss": 12.1252, + "step": 8868 + }, + { + "epoch": 0.3696802967779584, + "grad_norm": 340.0, + "learning_rate": 7.267456204473642e-05, + "loss": 14.1267, + "step": 8869 + }, + { + "epoch": 0.36972197907548665, + "grad_norm": 92.0, + "learning_rate": 7.266854581352676e-05, + "loss": 8.5628, + "step": 8870 + }, + { + "epoch": 0.36976366137301486, + "grad_norm": 418.0, + "learning_rate": 7.266252916918082e-05, + "loss": 14.441, + "step": 8871 + }, + { + "epoch": 0.36980534367054313, + "grad_norm": 528.0, + "learning_rate": 7.265651211180829e-05, + "loss": 19.3753, + "step": 8872 + }, + { + "epoch": 0.36984702596807134, + "grad_norm": 472.0, + "learning_rate": 7.26504946415188e-05, + "loss": 16.5002, + "step": 8873 + }, + { + "epoch": 0.3698887082655996, + "grad_norm": 608.0, + "learning_rate": 7.264447675842205e-05, + "loss": 19.1251, + "step": 8874 + }, + { + "epoch": 0.3699303905631278, + "grad_norm": 242.0, + "learning_rate": 7.263845846262769e-05, + "loss": 10.1253, + "step": 8875 + }, + { + "epoch": 0.3699720728606561, + "grad_norm": 256.0, + "learning_rate": 7.263243975424541e-05, + "loss": 11.4378, + "step": 8876 + }, + { + "epoch": 0.3700137551581843, + "grad_norm": 126.5, + "learning_rate": 7.262642063338491e-05, + "loss": 9.7503, + "step": 8877 + }, + { + "epoch": 0.37005543745571257, + "grad_norm": 251.0, + "learning_rate": 7.262040110015589e-05, + "loss": 12.5629, + "step": 8878 + }, + { + "epoch": 0.3700971197532408, + "grad_norm": 158.0, + "learning_rate": 7.261438115466806e-05, + "loss": 6.6889, + "step": 8879 + }, + { + "epoch": 0.37013880205076904, + "grad_norm": 245.0, + "learning_rate": 7.26083607970311e-05, + "loss": 12.8128, + "step": 8880 + }, + { + "epoch": 0.37018048434829726, + "grad_norm": 320.0, + "learning_rate": 7.260234002735479e-05, + "loss": 12.7504, + "step": 8881 + }, + { + "epoch": 0.3702221666458255, + "grad_norm": 724.0, + "learning_rate": 7.259631884574882e-05, + "loss": 21.0008, + "step": 8882 + }, + { + "epoch": 0.37026384894335373, + "grad_norm": 77.0, + "learning_rate": 7.259029725232292e-05, + "loss": 8.5633, + "step": 8883 + }, + { + "epoch": 0.370305531240882, + "grad_norm": 183.0, + "learning_rate": 7.258427524718685e-05, + "loss": 11.6254, + "step": 8884 + }, + { + "epoch": 0.3703472135384102, + "grad_norm": 468.0, + "learning_rate": 7.257825283045035e-05, + "loss": 16.0001, + "step": 8885 + }, + { + "epoch": 0.3703888958359385, + "grad_norm": 356.0, + "learning_rate": 7.257223000222321e-05, + "loss": 14.3757, + "step": 8886 + }, + { + "epoch": 0.3704305781334667, + "grad_norm": 239.0, + "learning_rate": 7.256620676261516e-05, + "loss": 12.2503, + "step": 8887 + }, + { + "epoch": 0.37047226043099496, + "grad_norm": 107.0, + "learning_rate": 7.256018311173599e-05, + "loss": 8.6882, + "step": 8888 + }, + { + "epoch": 0.37051394272852317, + "grad_norm": 466.0, + "learning_rate": 7.255415904969548e-05, + "loss": 15.5005, + "step": 8889 + }, + { + "epoch": 0.37055562502605144, + "grad_norm": 500.0, + "learning_rate": 7.254813457660341e-05, + "loss": 13.6261, + "step": 8890 + }, + { + "epoch": 0.37059730732357965, + "grad_norm": 844.0, + "learning_rate": 7.254210969256959e-05, + "loss": 21.1264, + "step": 8891 + }, + { + "epoch": 0.3706389896211079, + "grad_norm": 1664.0, + "learning_rate": 7.253608439770383e-05, + "loss": 34.7547, + "step": 8892 + }, + { + "epoch": 0.37068067191863613, + "grad_norm": 888.0, + "learning_rate": 7.253005869211593e-05, + "loss": 25.3767, + "step": 8893 + }, + { + "epoch": 0.3707223542161644, + "grad_norm": 564.0, + "learning_rate": 7.252403257591572e-05, + "loss": 17.3753, + "step": 8894 + }, + { + "epoch": 0.3707640365136926, + "grad_norm": 191.0, + "learning_rate": 7.2518006049213e-05, + "loss": 11.5005, + "step": 8895 + }, + { + "epoch": 0.3708057188112209, + "grad_norm": 764.0, + "learning_rate": 7.25119791121176e-05, + "loss": 22.1254, + "step": 8896 + }, + { + "epoch": 0.37084740110874914, + "grad_norm": 235.0, + "learning_rate": 7.250595176473942e-05, + "loss": 13.3133, + "step": 8897 + }, + { + "epoch": 0.37088908340627735, + "grad_norm": 342.0, + "learning_rate": 7.249992400718825e-05, + "loss": 13.8127, + "step": 8898 + }, + { + "epoch": 0.3709307657038056, + "grad_norm": 354.0, + "learning_rate": 7.249389583957399e-05, + "loss": 14.7504, + "step": 8899 + }, + { + "epoch": 0.37097244800133383, + "grad_norm": 844.0, + "learning_rate": 7.248786726200647e-05, + "loss": 19.005, + "step": 8900 + }, + { + "epoch": 0.3710141302988621, + "grad_norm": 696.0, + "learning_rate": 7.248183827459556e-05, + "loss": 17.8761, + "step": 8901 + }, + { + "epoch": 0.3710558125963903, + "grad_norm": 498.0, + "learning_rate": 7.247580887745116e-05, + "loss": 18.7505, + "step": 8902 + }, + { + "epoch": 0.3710974948939186, + "grad_norm": 228.0, + "learning_rate": 7.246977907068316e-05, + "loss": 11.6877, + "step": 8903 + }, + { + "epoch": 0.3711391771914468, + "grad_norm": 496.0, + "learning_rate": 7.246374885440141e-05, + "loss": 18.3753, + "step": 8904 + }, + { + "epoch": 0.37118085948897506, + "grad_norm": 410.0, + "learning_rate": 7.245771822871588e-05, + "loss": 15.8753, + "step": 8905 + }, + { + "epoch": 0.37122254178650327, + "grad_norm": 464.0, + "learning_rate": 7.245168719373642e-05, + "loss": 17.2504, + "step": 8906 + }, + { + "epoch": 0.37126422408403154, + "grad_norm": 442.0, + "learning_rate": 7.244565574957297e-05, + "loss": 16.1252, + "step": 8907 + }, + { + "epoch": 0.37130590638155975, + "grad_norm": 288.0, + "learning_rate": 7.243962389633546e-05, + "loss": 13.1878, + "step": 8908 + }, + { + "epoch": 0.371347588679088, + "grad_norm": 362.0, + "learning_rate": 7.243359163413382e-05, + "loss": 15.1877, + "step": 8909 + }, + { + "epoch": 0.3713892709766162, + "grad_norm": 378.0, + "learning_rate": 7.242755896307796e-05, + "loss": 13.6881, + "step": 8910 + }, + { + "epoch": 0.3714309532741445, + "grad_norm": 123.0, + "learning_rate": 7.242152588327785e-05, + "loss": 9.8758, + "step": 8911 + }, + { + "epoch": 0.3714726355716727, + "grad_norm": 62.5, + "learning_rate": 7.241549239484345e-05, + "loss": 7.0628, + "step": 8912 + }, + { + "epoch": 0.37151431786920097, + "grad_norm": 175.0, + "learning_rate": 7.240945849788471e-05, + "loss": 10.5009, + "step": 8913 + }, + { + "epoch": 0.3715560001667292, + "grad_norm": 216.0, + "learning_rate": 7.240342419251158e-05, + "loss": 11.0629, + "step": 8914 + }, + { + "epoch": 0.37159768246425745, + "grad_norm": 362.0, + "learning_rate": 7.239738947883408e-05, + "loss": 15.1252, + "step": 8915 + }, + { + "epoch": 0.37163936476178566, + "grad_norm": 512.0, + "learning_rate": 7.239135435696215e-05, + "loss": 15.8133, + "step": 8916 + }, + { + "epoch": 0.37168104705931393, + "grad_norm": 648.0, + "learning_rate": 7.23853188270058e-05, + "loss": 19.5002, + "step": 8917 + }, + { + "epoch": 0.37172272935684214, + "grad_norm": 164.0, + "learning_rate": 7.237928288907505e-05, + "loss": 9.4377, + "step": 8918 + }, + { + "epoch": 0.3717644116543704, + "grad_norm": 221.0, + "learning_rate": 7.237324654327987e-05, + "loss": 12.3764, + "step": 8919 + }, + { + "epoch": 0.3718060939518986, + "grad_norm": 172.0, + "learning_rate": 7.236720978973029e-05, + "loss": 10.8129, + "step": 8920 + }, + { + "epoch": 0.3718477762494269, + "grad_norm": 204.0, + "learning_rate": 7.236117262853632e-05, + "loss": 10.7503, + "step": 8921 + }, + { + "epoch": 0.3718894585469551, + "grad_norm": 560.0, + "learning_rate": 7.2355135059808e-05, + "loss": 17.8754, + "step": 8922 + }, + { + "epoch": 0.37193114084448337, + "grad_norm": 266.0, + "learning_rate": 7.234909708365535e-05, + "loss": 6.4378, + "step": 8923 + }, + { + "epoch": 0.3719728231420116, + "grad_norm": 232.0, + "learning_rate": 7.234305870018842e-05, + "loss": 12.1258, + "step": 8924 + }, + { + "epoch": 0.37201450543953984, + "grad_norm": 88.5, + "learning_rate": 7.233701990951727e-05, + "loss": 8.3127, + "step": 8925 + }, + { + "epoch": 0.37205618773706806, + "grad_norm": 800.0, + "learning_rate": 7.233098071175194e-05, + "loss": 20.8796, + "step": 8926 + }, + { + "epoch": 0.3720978700345963, + "grad_norm": 346.0, + "learning_rate": 7.23249411070025e-05, + "loss": 15.2509, + "step": 8927 + }, + { + "epoch": 0.37213955233212453, + "grad_norm": 113.0, + "learning_rate": 7.231890109537904e-05, + "loss": 7.7189, + "step": 8928 + }, + { + "epoch": 0.3721812346296528, + "grad_norm": 478.0, + "learning_rate": 7.231286067699164e-05, + "loss": 16.8753, + "step": 8929 + }, + { + "epoch": 0.372222916927181, + "grad_norm": 668.0, + "learning_rate": 7.230681985195035e-05, + "loss": 20.1252, + "step": 8930 + }, + { + "epoch": 0.3722645992247093, + "grad_norm": 516.0, + "learning_rate": 7.23007786203653e-05, + "loss": 16.3782, + "step": 8931 + }, + { + "epoch": 0.3723062815222375, + "grad_norm": 394.0, + "learning_rate": 7.229473698234656e-05, + "loss": 14.2506, + "step": 8932 + }, + { + "epoch": 0.37234796381976576, + "grad_norm": 264.0, + "learning_rate": 7.228869493800429e-05, + "loss": 12.563, + "step": 8933 + }, + { + "epoch": 0.37238964611729397, + "grad_norm": 115.0, + "learning_rate": 7.228265248744855e-05, + "loss": 8.9378, + "step": 8934 + }, + { + "epoch": 0.37243132841482224, + "grad_norm": 466.0, + "learning_rate": 7.22766096307895e-05, + "loss": 16.1253, + "step": 8935 + }, + { + "epoch": 0.37247301071235045, + "grad_norm": 398.0, + "learning_rate": 7.227056636813727e-05, + "loss": 15.2512, + "step": 8936 + }, + { + "epoch": 0.3725146930098787, + "grad_norm": 386.0, + "learning_rate": 7.226452269960198e-05, + "loss": 14.5627, + "step": 8937 + }, + { + "epoch": 0.37255637530740693, + "grad_norm": 332.0, + "learning_rate": 7.225847862529379e-05, + "loss": 12.0033, + "step": 8938 + }, + { + "epoch": 0.3725980576049352, + "grad_norm": 740.0, + "learning_rate": 7.225243414532284e-05, + "loss": 21.1252, + "step": 8939 + }, + { + "epoch": 0.3726397399024634, + "grad_norm": 100.5, + "learning_rate": 7.224638925979932e-05, + "loss": 9.0627, + "step": 8940 + }, + { + "epoch": 0.3726814221999917, + "grad_norm": 386.0, + "learning_rate": 7.224034396883337e-05, + "loss": 16.0004, + "step": 8941 + }, + { + "epoch": 0.3727231044975199, + "grad_norm": 364.0, + "learning_rate": 7.223429827253518e-05, + "loss": 14.1252, + "step": 8942 + }, + { + "epoch": 0.37276478679504815, + "grad_norm": 124.0, + "learning_rate": 7.222825217101495e-05, + "loss": 8.6879, + "step": 8943 + }, + { + "epoch": 0.37280646909257636, + "grad_norm": 438.0, + "learning_rate": 7.222220566438282e-05, + "loss": 9.6883, + "step": 8944 + }, + { + "epoch": 0.37284815139010463, + "grad_norm": 274.0, + "learning_rate": 7.221615875274903e-05, + "loss": 12.4398, + "step": 8945 + }, + { + "epoch": 0.37288983368763284, + "grad_norm": 1744.0, + "learning_rate": 7.221011143622378e-05, + "loss": 34.2512, + "step": 8946 + }, + { + "epoch": 0.3729315159851611, + "grad_norm": 132.0, + "learning_rate": 7.220406371491727e-05, + "loss": 9.8128, + "step": 8947 + }, + { + "epoch": 0.3729731982826893, + "grad_norm": 166.0, + "learning_rate": 7.219801558893974e-05, + "loss": 9.0002, + "step": 8948 + }, + { + "epoch": 0.3730148805802176, + "grad_norm": 516.0, + "learning_rate": 7.219196705840139e-05, + "loss": 18.3752, + "step": 8949 + }, + { + "epoch": 0.3730565628777458, + "grad_norm": 832.0, + "learning_rate": 7.218591812341248e-05, + "loss": 21.7507, + "step": 8950 + }, + { + "epoch": 0.37309824517527407, + "grad_norm": 406.0, + "learning_rate": 7.217986878408324e-05, + "loss": 15.2505, + "step": 8951 + }, + { + "epoch": 0.3731399274728023, + "grad_norm": 298.0, + "learning_rate": 7.217381904052392e-05, + "loss": 12.0629, + "step": 8952 + }, + { + "epoch": 0.37318160977033055, + "grad_norm": 848.0, + "learning_rate": 7.216776889284478e-05, + "loss": 24.7503, + "step": 8953 + }, + { + "epoch": 0.37322329206785876, + "grad_norm": 408.0, + "learning_rate": 7.216171834115608e-05, + "loss": 15.5002, + "step": 8954 + }, + { + "epoch": 0.373264974365387, + "grad_norm": 536.0, + "learning_rate": 7.21556673855681e-05, + "loss": 17.8757, + "step": 8955 + }, + { + "epoch": 0.37330665666291524, + "grad_norm": 69.5, + "learning_rate": 7.214961602619112e-05, + "loss": 9.7512, + "step": 8956 + }, + { + "epoch": 0.3733483389604435, + "grad_norm": 704.0, + "learning_rate": 7.214356426313542e-05, + "loss": 19.7541, + "step": 8957 + }, + { + "epoch": 0.3733900212579717, + "grad_norm": 398.0, + "learning_rate": 7.213751209651129e-05, + "loss": 13.6879, + "step": 8958 + }, + { + "epoch": 0.3734317035555, + "grad_norm": 1160.0, + "learning_rate": 7.213145952642905e-05, + "loss": 26.0003, + "step": 8959 + }, + { + "epoch": 0.3734733858530282, + "grad_norm": 181.0, + "learning_rate": 7.212540655299898e-05, + "loss": 9.6877, + "step": 8960 + }, + { + "epoch": 0.37351506815055646, + "grad_norm": 406.0, + "learning_rate": 7.211935317633142e-05, + "loss": 15.8131, + "step": 8961 + }, + { + "epoch": 0.3735567504480847, + "grad_norm": 420.0, + "learning_rate": 7.211329939653669e-05, + "loss": 14.1253, + "step": 8962 + }, + { + "epoch": 0.37359843274561294, + "grad_norm": 142.0, + "learning_rate": 7.210724521372509e-05, + "loss": 9.9379, + "step": 8963 + }, + { + "epoch": 0.37364011504314115, + "grad_norm": 314.0, + "learning_rate": 7.2101190628007e-05, + "loss": 11.8757, + "step": 8964 + }, + { + "epoch": 0.3736817973406694, + "grad_norm": 328.0, + "learning_rate": 7.209513563949275e-05, + "loss": 12.9377, + "step": 8965 + }, + { + "epoch": 0.37372347963819763, + "grad_norm": 360.0, + "learning_rate": 7.208908024829269e-05, + "loss": 14.8759, + "step": 8966 + }, + { + "epoch": 0.3737651619357259, + "grad_norm": 380.0, + "learning_rate": 7.20830244545172e-05, + "loss": 14.8752, + "step": 8967 + }, + { + "epoch": 0.3738068442332541, + "grad_norm": 140.0, + "learning_rate": 7.20769682582766e-05, + "loss": 9.7502, + "step": 8968 + }, + { + "epoch": 0.3738485265307824, + "grad_norm": 422.0, + "learning_rate": 7.20709116596813e-05, + "loss": 15.4377, + "step": 8969 + }, + { + "epoch": 0.37389020882831064, + "grad_norm": 992.0, + "learning_rate": 7.206485465884168e-05, + "loss": 25.6255, + "step": 8970 + }, + { + "epoch": 0.37393189112583886, + "grad_norm": 516.0, + "learning_rate": 7.205879725586814e-05, + "loss": 18.2501, + "step": 8971 + }, + { + "epoch": 0.3739735734233671, + "grad_norm": 237.0, + "learning_rate": 7.205273945087104e-05, + "loss": 11.4379, + "step": 8972 + }, + { + "epoch": 0.37401525572089533, + "grad_norm": 540.0, + "learning_rate": 7.204668124396083e-05, + "loss": 17.5005, + "step": 8973 + }, + { + "epoch": 0.3740569380184236, + "grad_norm": 292.0, + "learning_rate": 7.204062263524787e-05, + "loss": 12.313, + "step": 8974 + }, + { + "epoch": 0.3740986203159518, + "grad_norm": 122.5, + "learning_rate": 7.203456362484262e-05, + "loss": 8.9379, + "step": 8975 + }, + { + "epoch": 0.3741403026134801, + "grad_norm": 1296.0, + "learning_rate": 7.202850421285549e-05, + "loss": 30.3771, + "step": 8976 + }, + { + "epoch": 0.3741819849110083, + "grad_norm": 764.0, + "learning_rate": 7.20224443993969e-05, + "loss": 20.7503, + "step": 8977 + }, + { + "epoch": 0.37422366720853656, + "grad_norm": 180.0, + "learning_rate": 7.201638418457732e-05, + "loss": 12.1255, + "step": 8978 + }, + { + "epoch": 0.37426534950606477, + "grad_norm": 326.0, + "learning_rate": 7.201032356850719e-05, + "loss": 14.3129, + "step": 8979 + }, + { + "epoch": 0.37430703180359304, + "grad_norm": 264.0, + "learning_rate": 7.200426255129696e-05, + "loss": 11.5629, + "step": 8980 + }, + { + "epoch": 0.37434871410112125, + "grad_norm": 137.0, + "learning_rate": 7.199820113305708e-05, + "loss": 8.4379, + "step": 8981 + }, + { + "epoch": 0.3743903963986495, + "grad_norm": 406.0, + "learning_rate": 7.199213931389803e-05, + "loss": 14.5001, + "step": 8982 + }, + { + "epoch": 0.37443207869617773, + "grad_norm": 620.0, + "learning_rate": 7.19860770939303e-05, + "loss": 20.5004, + "step": 8983 + }, + { + "epoch": 0.374473760993706, + "grad_norm": 146.0, + "learning_rate": 7.198001447326436e-05, + "loss": 9.5631, + "step": 8984 + }, + { + "epoch": 0.3745154432912342, + "grad_norm": 328.0, + "learning_rate": 7.197395145201071e-05, + "loss": 13.9379, + "step": 8985 + }, + { + "epoch": 0.3745571255887625, + "grad_norm": 438.0, + "learning_rate": 7.196788803027983e-05, + "loss": 16.6259, + "step": 8986 + }, + { + "epoch": 0.3745988078862907, + "grad_norm": 2816.0, + "learning_rate": 7.196182420818225e-05, + "loss": 45.2618, + "step": 8987 + }, + { + "epoch": 0.37464049018381895, + "grad_norm": 402.0, + "learning_rate": 7.195575998582846e-05, + "loss": 14.0002, + "step": 8988 + }, + { + "epoch": 0.37468217248134716, + "grad_norm": 544.0, + "learning_rate": 7.194969536332901e-05, + "loss": 18.0002, + "step": 8989 + }, + { + "epoch": 0.37472385477887543, + "grad_norm": 280.0, + "learning_rate": 7.194363034079441e-05, + "loss": 13.5628, + "step": 8990 + }, + { + "epoch": 0.37476553707640364, + "grad_norm": 424.0, + "learning_rate": 7.19375649183352e-05, + "loss": 15.0003, + "step": 8991 + }, + { + "epoch": 0.3748072193739319, + "grad_norm": 276.0, + "learning_rate": 7.193149909606191e-05, + "loss": 12.3754, + "step": 8992 + }, + { + "epoch": 0.3748489016714601, + "grad_norm": 560.0, + "learning_rate": 7.192543287408513e-05, + "loss": 17.7505, + "step": 8993 + }, + { + "epoch": 0.3748905839689884, + "grad_norm": 192.0, + "learning_rate": 7.191936625251536e-05, + "loss": 12.2502, + "step": 8994 + }, + { + "epoch": 0.3749322662665166, + "grad_norm": 560.0, + "learning_rate": 7.191329923146322e-05, + "loss": 17.1274, + "step": 8995 + }, + { + "epoch": 0.37497394856404487, + "grad_norm": 186.0, + "learning_rate": 7.190723181103924e-05, + "loss": 11.3752, + "step": 8996 + }, + { + "epoch": 0.3750156308615731, + "grad_norm": 576.0, + "learning_rate": 7.190116399135401e-05, + "loss": 20.0002, + "step": 8997 + }, + { + "epoch": 0.37505731315910135, + "grad_norm": 320.0, + "learning_rate": 7.189509577251815e-05, + "loss": 13.9379, + "step": 8998 + }, + { + "epoch": 0.37509899545662956, + "grad_norm": 354.0, + "learning_rate": 7.18890271546422e-05, + "loss": 13.3758, + "step": 8999 + }, + { + "epoch": 0.3751406777541578, + "grad_norm": 484.0, + "learning_rate": 7.188295813783679e-05, + "loss": 16.7503, + "step": 9000 + }, + { + "epoch": 0.37518236005168604, + "grad_norm": 272.0, + "learning_rate": 7.187688872221254e-05, + "loss": 13.2502, + "step": 9001 + }, + { + "epoch": 0.3752240423492143, + "grad_norm": 296.0, + "learning_rate": 7.187081890788003e-05, + "loss": 13.0629, + "step": 9002 + }, + { + "epoch": 0.3752657246467425, + "grad_norm": 648.0, + "learning_rate": 7.18647486949499e-05, + "loss": 20.6252, + "step": 9003 + }, + { + "epoch": 0.3753074069442708, + "grad_norm": 280.0, + "learning_rate": 7.185867808353281e-05, + "loss": 13.3129, + "step": 9004 + }, + { + "epoch": 0.375349089241799, + "grad_norm": 462.0, + "learning_rate": 7.185260707373936e-05, + "loss": 15.7511, + "step": 9005 + }, + { + "epoch": 0.37539077153932726, + "grad_norm": 243.0, + "learning_rate": 7.18465356656802e-05, + "loss": 12.1876, + "step": 9006 + }, + { + "epoch": 0.3754324538368555, + "grad_norm": 760.0, + "learning_rate": 7.184046385946599e-05, + "loss": 21.7502, + "step": 9007 + }, + { + "epoch": 0.37547413613438374, + "grad_norm": 576.0, + "learning_rate": 7.18343916552074e-05, + "loss": 17.7503, + "step": 9008 + }, + { + "epoch": 0.37551581843191195, + "grad_norm": 458.0, + "learning_rate": 7.182831905301506e-05, + "loss": 14.3162, + "step": 9009 + }, + { + "epoch": 0.3755575007294402, + "grad_norm": 600.0, + "learning_rate": 7.18222460529997e-05, + "loss": 22.2502, + "step": 9010 + }, + { + "epoch": 0.37559918302696843, + "grad_norm": 109.5, + "learning_rate": 7.181617265527193e-05, + "loss": 9.1252, + "step": 9011 + }, + { + "epoch": 0.3756408653244967, + "grad_norm": 102.0, + "learning_rate": 7.181009885994252e-05, + "loss": 7.6254, + "step": 9012 + }, + { + "epoch": 0.3756825476220249, + "grad_norm": 256.0, + "learning_rate": 7.18040246671221e-05, + "loss": 11.7504, + "step": 9013 + }, + { + "epoch": 0.3757242299195532, + "grad_norm": 350.0, + "learning_rate": 7.179795007692138e-05, + "loss": 14.2503, + "step": 9014 + }, + { + "epoch": 0.3757659122170814, + "grad_norm": 172.0, + "learning_rate": 7.17918750894511e-05, + "loss": 11.4381, + "step": 9015 + }, + { + "epoch": 0.37580759451460966, + "grad_norm": 229.0, + "learning_rate": 7.178579970482195e-05, + "loss": 12.0005, + "step": 9016 + }, + { + "epoch": 0.37584927681213787, + "grad_norm": 434.0, + "learning_rate": 7.177972392314469e-05, + "loss": 16.2505, + "step": 9017 + }, + { + "epoch": 0.37589095910966613, + "grad_norm": 290.0, + "learning_rate": 7.177364774453002e-05, + "loss": 12.5628, + "step": 9018 + }, + { + "epoch": 0.37593264140719435, + "grad_norm": 232.0, + "learning_rate": 7.176757116908868e-05, + "loss": 11.1253, + "step": 9019 + }, + { + "epoch": 0.3759743237047226, + "grad_norm": 126.0, + "learning_rate": 7.176149419693142e-05, + "loss": 9.1252, + "step": 9020 + }, + { + "epoch": 0.3760160060022508, + "grad_norm": 128.0, + "learning_rate": 7.1755416828169e-05, + "loss": 8.8127, + "step": 9021 + }, + { + "epoch": 0.3760576882997791, + "grad_norm": 568.0, + "learning_rate": 7.17493390629122e-05, + "loss": 18.6251, + "step": 9022 + }, + { + "epoch": 0.3760993705973073, + "grad_norm": 2368.0, + "learning_rate": 7.174326090127173e-05, + "loss": 46.504, + "step": 9023 + }, + { + "epoch": 0.37614105289483557, + "grad_norm": 816.0, + "learning_rate": 7.173718234335842e-05, + "loss": 22.7524, + "step": 9024 + }, + { + "epoch": 0.3761827351923638, + "grad_norm": 374.0, + "learning_rate": 7.173110338928301e-05, + "loss": 14.0626, + "step": 9025 + }, + { + "epoch": 0.37622441748989205, + "grad_norm": 532.0, + "learning_rate": 7.172502403915634e-05, + "loss": 18.1256, + "step": 9026 + }, + { + "epoch": 0.37626609978742026, + "grad_norm": 212.0, + "learning_rate": 7.171894429308916e-05, + "loss": 11.4379, + "step": 9027 + }, + { + "epoch": 0.37630778208494853, + "grad_norm": 330.0, + "learning_rate": 7.171286415119229e-05, + "loss": 13.8772, + "step": 9028 + }, + { + "epoch": 0.37634946438247674, + "grad_norm": 896.0, + "learning_rate": 7.170678361357655e-05, + "loss": 22.6297, + "step": 9029 + }, + { + "epoch": 0.376391146680005, + "grad_norm": 215.0, + "learning_rate": 7.170070268035276e-05, + "loss": 11.5007, + "step": 9030 + }, + { + "epoch": 0.3764328289775332, + "grad_norm": 255.0, + "learning_rate": 7.169462135163174e-05, + "loss": 12.3752, + "step": 9031 + }, + { + "epoch": 0.3764745112750615, + "grad_norm": 51.25, + "learning_rate": 7.168853962752431e-05, + "loss": 6.6252, + "step": 9032 + }, + { + "epoch": 0.3765161935725897, + "grad_norm": 302.0, + "learning_rate": 7.168245750814131e-05, + "loss": 12.1877, + "step": 9033 + }, + { + "epoch": 0.37655787587011796, + "grad_norm": 115.5, + "learning_rate": 7.167637499359361e-05, + "loss": 8.1259, + "step": 9034 + }, + { + "epoch": 0.3765995581676462, + "grad_norm": 496.0, + "learning_rate": 7.167029208399205e-05, + "loss": 17.5003, + "step": 9035 + }, + { + "epoch": 0.37664124046517444, + "grad_norm": 134.0, + "learning_rate": 7.166420877944749e-05, + "loss": 8.9377, + "step": 9036 + }, + { + "epoch": 0.37668292276270265, + "grad_norm": 560.0, + "learning_rate": 7.165812508007081e-05, + "loss": 18.5002, + "step": 9037 + }, + { + "epoch": 0.3767246050602309, + "grad_norm": 118.0, + "learning_rate": 7.165204098597287e-05, + "loss": 9.9388, + "step": 9038 + }, + { + "epoch": 0.37676628735775913, + "grad_norm": 404.0, + "learning_rate": 7.164595649726457e-05, + "loss": 13.8756, + "step": 9039 + }, + { + "epoch": 0.3768079696552874, + "grad_norm": 336.0, + "learning_rate": 7.163987161405676e-05, + "loss": 14.5005, + "step": 9040 + }, + { + "epoch": 0.3768496519528156, + "grad_norm": 468.0, + "learning_rate": 7.163378633646039e-05, + "loss": 16.8753, + "step": 9041 + }, + { + "epoch": 0.3768913342503439, + "grad_norm": 75.5, + "learning_rate": 7.162770066458635e-05, + "loss": 7.3128, + "step": 9042 + }, + { + "epoch": 0.37693301654787215, + "grad_norm": 502.0, + "learning_rate": 7.162161459854556e-05, + "loss": 16.2526, + "step": 9043 + }, + { + "epoch": 0.37697469884540036, + "grad_norm": 516.0, + "learning_rate": 7.161552813844889e-05, + "loss": 17.6252, + "step": 9044 + }, + { + "epoch": 0.3770163811429286, + "grad_norm": 125.5, + "learning_rate": 7.160944128440732e-05, + "loss": 8.5003, + "step": 9045 + }, + { + "epoch": 0.37705806344045684, + "grad_norm": 506.0, + "learning_rate": 7.160335403653177e-05, + "loss": 17.2502, + "step": 9046 + }, + { + "epoch": 0.3770997457379851, + "grad_norm": 320.0, + "learning_rate": 7.159726639493316e-05, + "loss": 13.3758, + "step": 9047 + }, + { + "epoch": 0.3771414280355133, + "grad_norm": 120.0, + "learning_rate": 7.159117835972246e-05, + "loss": 8.5628, + "step": 9048 + }, + { + "epoch": 0.3771831103330416, + "grad_norm": 314.0, + "learning_rate": 7.158508993101062e-05, + "loss": 12.0629, + "step": 9049 + }, + { + "epoch": 0.3772247926305698, + "grad_norm": 332.0, + "learning_rate": 7.157900110890859e-05, + "loss": 12.6879, + "step": 9050 + }, + { + "epoch": 0.37726647492809806, + "grad_norm": 290.0, + "learning_rate": 7.157291189352736e-05, + "loss": 12.6251, + "step": 9051 + }, + { + "epoch": 0.3773081572256263, + "grad_norm": 104.5, + "learning_rate": 7.156682228497789e-05, + "loss": 9.6255, + "step": 9052 + }, + { + "epoch": 0.37734983952315454, + "grad_norm": 362.0, + "learning_rate": 7.156073228337116e-05, + "loss": 15.0003, + "step": 9053 + }, + { + "epoch": 0.37739152182068275, + "grad_norm": 1448.0, + "learning_rate": 7.155464188881818e-05, + "loss": 32.5049, + "step": 9054 + }, + { + "epoch": 0.377433204118211, + "grad_norm": 282.0, + "learning_rate": 7.154855110142994e-05, + "loss": 13.2506, + "step": 9055 + }, + { + "epoch": 0.37747488641573923, + "grad_norm": 260.0, + "learning_rate": 7.154245992131743e-05, + "loss": 14.5634, + "step": 9056 + }, + { + "epoch": 0.3775165687132675, + "grad_norm": 90.0, + "learning_rate": 7.15363683485917e-05, + "loss": 6.3759, + "step": 9057 + }, + { + "epoch": 0.3775582510107957, + "grad_norm": 380.0, + "learning_rate": 7.153027638336373e-05, + "loss": 15.1252, + "step": 9058 + }, + { + "epoch": 0.377599933308324, + "grad_norm": 186.0, + "learning_rate": 7.152418402574457e-05, + "loss": 11.8132, + "step": 9059 + }, + { + "epoch": 0.3776416156058522, + "grad_norm": 100.5, + "learning_rate": 7.151809127584523e-05, + "loss": 10.0642, + "step": 9060 + }, + { + "epoch": 0.37768329790338045, + "grad_norm": 193.0, + "learning_rate": 7.151199813377678e-05, + "loss": 11.5018, + "step": 9061 + }, + { + "epoch": 0.37772498020090867, + "grad_norm": 237.0, + "learning_rate": 7.150590459965025e-05, + "loss": 11.5006, + "step": 9062 + }, + { + "epoch": 0.37776666249843693, + "grad_norm": 264.0, + "learning_rate": 7.14998106735767e-05, + "loss": 13.1891, + "step": 9063 + }, + { + "epoch": 0.37780834479596515, + "grad_norm": 512.0, + "learning_rate": 7.14937163556672e-05, + "loss": 17.3768, + "step": 9064 + }, + { + "epoch": 0.3778500270934934, + "grad_norm": 426.0, + "learning_rate": 7.148762164603279e-05, + "loss": 14.8755, + "step": 9065 + }, + { + "epoch": 0.3778917093910216, + "grad_norm": 205.0, + "learning_rate": 7.148152654478458e-05, + "loss": 11.3755, + "step": 9066 + }, + { + "epoch": 0.3779333916885499, + "grad_norm": 556.0, + "learning_rate": 7.147543105203364e-05, + "loss": 18.2524, + "step": 9067 + }, + { + "epoch": 0.3779750739860781, + "grad_norm": 484.0, + "learning_rate": 7.146933516789107e-05, + "loss": 17.1258, + "step": 9068 + }, + { + "epoch": 0.37801675628360637, + "grad_norm": 552.0, + "learning_rate": 7.146323889246796e-05, + "loss": 18.6252, + "step": 9069 + }, + { + "epoch": 0.3780584385811346, + "grad_norm": 226.0, + "learning_rate": 7.145714222587541e-05, + "loss": 11.3127, + "step": 9070 + }, + { + "epoch": 0.37810012087866285, + "grad_norm": 588.0, + "learning_rate": 7.145104516822454e-05, + "loss": 19.3752, + "step": 9071 + }, + { + "epoch": 0.37814180317619106, + "grad_norm": 231.0, + "learning_rate": 7.144494771962647e-05, + "loss": 9.627, + "step": 9072 + }, + { + "epoch": 0.3781834854737193, + "grad_norm": 402.0, + "learning_rate": 7.143884988019232e-05, + "loss": 12.7503, + "step": 9073 + }, + { + "epoch": 0.37822516777124754, + "grad_norm": 382.0, + "learning_rate": 7.143275165003322e-05, + "loss": 14.8754, + "step": 9074 + }, + { + "epoch": 0.3782668500687758, + "grad_norm": 480.0, + "learning_rate": 7.142665302926034e-05, + "loss": 15.2507, + "step": 9075 + }, + { + "epoch": 0.378308532366304, + "grad_norm": 358.0, + "learning_rate": 7.14205540179848e-05, + "loss": 14.3757, + "step": 9076 + }, + { + "epoch": 0.3783502146638323, + "grad_norm": 154.0, + "learning_rate": 7.141445461631775e-05, + "loss": 10.0626, + "step": 9077 + }, + { + "epoch": 0.3783918969613605, + "grad_norm": 552.0, + "learning_rate": 7.140835482437036e-05, + "loss": 18.7506, + "step": 9078 + }, + { + "epoch": 0.37843357925888876, + "grad_norm": 190.0, + "learning_rate": 7.140225464225381e-05, + "loss": 10.7503, + "step": 9079 + }, + { + "epoch": 0.378475261556417, + "grad_norm": 161.0, + "learning_rate": 7.139615407007926e-05, + "loss": 9.9378, + "step": 9080 + }, + { + "epoch": 0.37851694385394524, + "grad_norm": 342.0, + "learning_rate": 7.139005310795792e-05, + "loss": 13.1254, + "step": 9081 + }, + { + "epoch": 0.37855862615147345, + "grad_norm": 440.0, + "learning_rate": 7.138395175600096e-05, + "loss": 16.8812, + "step": 9082 + }, + { + "epoch": 0.3786003084490017, + "grad_norm": 235.0, + "learning_rate": 7.137785001431958e-05, + "loss": 12.3126, + "step": 9083 + }, + { + "epoch": 0.37864199074652993, + "grad_norm": 306.0, + "learning_rate": 7.137174788302499e-05, + "loss": 13.3129, + "step": 9084 + }, + { + "epoch": 0.3786836730440582, + "grad_norm": 588.0, + "learning_rate": 7.136564536222838e-05, + "loss": 19.5007, + "step": 9085 + }, + { + "epoch": 0.3787253553415864, + "grad_norm": 412.0, + "learning_rate": 7.1359542452041e-05, + "loss": 16.5006, + "step": 9086 + }, + { + "epoch": 0.3787670376391147, + "grad_norm": 364.0, + "learning_rate": 7.135343915257407e-05, + "loss": 14.4377, + "step": 9087 + }, + { + "epoch": 0.3788087199366429, + "grad_norm": 588.0, + "learning_rate": 7.13473354639388e-05, + "loss": 18.3757, + "step": 9088 + }, + { + "epoch": 0.37885040223417116, + "grad_norm": 132.0, + "learning_rate": 7.134123138624646e-05, + "loss": 11.063, + "step": 9089 + }, + { + "epoch": 0.37889208453169937, + "grad_norm": 1448.0, + "learning_rate": 7.133512691960827e-05, + "loss": 31.8756, + "step": 9090 + }, + { + "epoch": 0.37893376682922764, + "grad_norm": 105.5, + "learning_rate": 7.132902206413549e-05, + "loss": 9.5005, + "step": 9091 + }, + { + "epoch": 0.37897544912675585, + "grad_norm": 158.0, + "learning_rate": 7.132291681993942e-05, + "loss": 9.7506, + "step": 9092 + }, + { + "epoch": 0.3790171314242841, + "grad_norm": 560.0, + "learning_rate": 7.131681118713127e-05, + "loss": 17.5018, + "step": 9093 + }, + { + "epoch": 0.3790588137218123, + "grad_norm": 308.0, + "learning_rate": 7.131070516582236e-05, + "loss": 12.0628, + "step": 9094 + }, + { + "epoch": 0.3791004960193406, + "grad_norm": 360.0, + "learning_rate": 7.130459875612395e-05, + "loss": 13.7502, + "step": 9095 + }, + { + "epoch": 0.3791421783168688, + "grad_norm": 548.0, + "learning_rate": 7.129849195814734e-05, + "loss": 17.2508, + "step": 9096 + }, + { + "epoch": 0.3791838606143971, + "grad_norm": 616.0, + "learning_rate": 7.129238477200383e-05, + "loss": 19.0006, + "step": 9097 + }, + { + "epoch": 0.3792255429119253, + "grad_norm": 298.0, + "learning_rate": 7.12862771978047e-05, + "loss": 12.6259, + "step": 9098 + }, + { + "epoch": 0.37926722520945355, + "grad_norm": 217.0, + "learning_rate": 7.12801692356613e-05, + "loss": 11.3128, + "step": 9099 + }, + { + "epoch": 0.37930890750698176, + "grad_norm": 276.0, + "learning_rate": 7.127406088568492e-05, + "loss": 12.5627, + "step": 9100 + }, + { + "epoch": 0.37935058980451003, + "grad_norm": 612.0, + "learning_rate": 7.126795214798687e-05, + "loss": 18.3753, + "step": 9101 + }, + { + "epoch": 0.37939227210203824, + "grad_norm": 390.0, + "learning_rate": 7.126184302267851e-05, + "loss": 14.1891, + "step": 9102 + }, + { + "epoch": 0.3794339543995665, + "grad_norm": 296.0, + "learning_rate": 7.125573350987118e-05, + "loss": 11.8754, + "step": 9103 + }, + { + "epoch": 0.3794756366970947, + "grad_norm": 96.5, + "learning_rate": 7.124962360967624e-05, + "loss": 8.5004, + "step": 9104 + }, + { + "epoch": 0.379517318994623, + "grad_norm": 422.0, + "learning_rate": 7.1243513322205e-05, + "loss": 16.6252, + "step": 9105 + }, + { + "epoch": 0.3795590012921512, + "grad_norm": 452.0, + "learning_rate": 7.123740264756885e-05, + "loss": 13.7511, + "step": 9106 + }, + { + "epoch": 0.37960068358967947, + "grad_norm": 181.0, + "learning_rate": 7.123129158587915e-05, + "loss": 10.4377, + "step": 9107 + }, + { + "epoch": 0.3796423658872077, + "grad_norm": 106.0, + "learning_rate": 7.122518013724728e-05, + "loss": 9.8763, + "step": 9108 + }, + { + "epoch": 0.37968404818473594, + "grad_norm": 460.0, + "learning_rate": 7.121906830178462e-05, + "loss": 16.6254, + "step": 9109 + }, + { + "epoch": 0.37972573048226416, + "grad_norm": 75.0, + "learning_rate": 7.121295607960254e-05, + "loss": 6.9689, + "step": 9110 + }, + { + "epoch": 0.3797674127797924, + "grad_norm": 290.0, + "learning_rate": 7.120684347081248e-05, + "loss": 13.1883, + "step": 9111 + }, + { + "epoch": 0.37980909507732064, + "grad_norm": 266.0, + "learning_rate": 7.12007304755258e-05, + "loss": 12.6877, + "step": 9112 + }, + { + "epoch": 0.3798507773748489, + "grad_norm": 396.0, + "learning_rate": 7.119461709385392e-05, + "loss": 15.5626, + "step": 9113 + }, + { + "epoch": 0.3798924596723771, + "grad_norm": 226.0, + "learning_rate": 7.118850332590827e-05, + "loss": 10.8127, + "step": 9114 + }, + { + "epoch": 0.3799341419699054, + "grad_norm": 410.0, + "learning_rate": 7.118238917180025e-05, + "loss": 15.5627, + "step": 9115 + }, + { + "epoch": 0.37997582426743365, + "grad_norm": 350.0, + "learning_rate": 7.117627463164132e-05, + "loss": 12.6256, + "step": 9116 + }, + { + "epoch": 0.38001750656496186, + "grad_norm": 676.0, + "learning_rate": 7.117015970554291e-05, + "loss": 19.3777, + "step": 9117 + }, + { + "epoch": 0.3800591888624901, + "grad_norm": 788.0, + "learning_rate": 7.116404439361645e-05, + "loss": 24.1253, + "step": 9118 + }, + { + "epoch": 0.38010087116001834, + "grad_norm": 229.0, + "learning_rate": 7.11579286959734e-05, + "loss": 13.5002, + "step": 9119 + }, + { + "epoch": 0.3801425534575466, + "grad_norm": 324.0, + "learning_rate": 7.115181261272523e-05, + "loss": 13.0627, + "step": 9120 + }, + { + "epoch": 0.3801842357550748, + "grad_norm": 482.0, + "learning_rate": 7.11456961439834e-05, + "loss": 17.6255, + "step": 9121 + }, + { + "epoch": 0.3802259180526031, + "grad_norm": 250.0, + "learning_rate": 7.113957928985938e-05, + "loss": 11.4377, + "step": 9122 + }, + { + "epoch": 0.3802676003501313, + "grad_norm": 254.0, + "learning_rate": 7.113346205046465e-05, + "loss": 11.1878, + "step": 9123 + }, + { + "epoch": 0.38030928264765956, + "grad_norm": 214.0, + "learning_rate": 7.11273444259107e-05, + "loss": 11.3128, + "step": 9124 + }, + { + "epoch": 0.3803509649451878, + "grad_norm": 251.0, + "learning_rate": 7.1121226416309e-05, + "loss": 5.2196, + "step": 9125 + }, + { + "epoch": 0.38039264724271604, + "grad_norm": 304.0, + "learning_rate": 7.11151080217711e-05, + "loss": 13.7504, + "step": 9126 + }, + { + "epoch": 0.38043432954024425, + "grad_norm": 556.0, + "learning_rate": 7.110898924240847e-05, + "loss": 17.8756, + "step": 9127 + }, + { + "epoch": 0.3804760118377725, + "grad_norm": 171.0, + "learning_rate": 7.110287007833262e-05, + "loss": 10.5627, + "step": 9128 + }, + { + "epoch": 0.38051769413530073, + "grad_norm": 478.0, + "learning_rate": 7.109675052965512e-05, + "loss": 16.7506, + "step": 9129 + }, + { + "epoch": 0.380559376432829, + "grad_norm": 316.0, + "learning_rate": 7.109063059648746e-05, + "loss": 12.1253, + "step": 9130 + }, + { + "epoch": 0.3806010587303572, + "grad_norm": 700.0, + "learning_rate": 7.108451027894118e-05, + "loss": 23.0004, + "step": 9131 + }, + { + "epoch": 0.3806427410278855, + "grad_norm": 227.0, + "learning_rate": 7.107838957712784e-05, + "loss": 13.1252, + "step": 9132 + }, + { + "epoch": 0.3806844233254137, + "grad_norm": 225.0, + "learning_rate": 7.107226849115897e-05, + "loss": 12.1251, + "step": 9133 + }, + { + "epoch": 0.38072610562294196, + "grad_norm": 107.5, + "learning_rate": 7.106614702114614e-05, + "loss": 9.8754, + "step": 9134 + }, + { + "epoch": 0.38076778792047017, + "grad_norm": 328.0, + "learning_rate": 7.106002516720091e-05, + "loss": 14.5004, + "step": 9135 + }, + { + "epoch": 0.38080947021799844, + "grad_norm": 229.0, + "learning_rate": 7.105390292943483e-05, + "loss": 9.0668, + "step": 9136 + }, + { + "epoch": 0.38085115251552665, + "grad_norm": 260.0, + "learning_rate": 7.104778030795954e-05, + "loss": 13.0016, + "step": 9137 + }, + { + "epoch": 0.3808928348130549, + "grad_norm": 286.0, + "learning_rate": 7.104165730288656e-05, + "loss": 13.1291, + "step": 9138 + }, + { + "epoch": 0.3809345171105831, + "grad_norm": 260.0, + "learning_rate": 7.103553391432752e-05, + "loss": 11.9377, + "step": 9139 + }, + { + "epoch": 0.3809761994081114, + "grad_norm": 392.0, + "learning_rate": 7.102941014239397e-05, + "loss": 14.5007, + "step": 9140 + }, + { + "epoch": 0.3810178817056396, + "grad_norm": 260.0, + "learning_rate": 7.102328598719759e-05, + "loss": 11.5003, + "step": 9141 + }, + { + "epoch": 0.38105956400316787, + "grad_norm": 144.0, + "learning_rate": 7.101716144884995e-05, + "loss": 9.0003, + "step": 9142 + }, + { + "epoch": 0.3811012463006961, + "grad_norm": 205.0, + "learning_rate": 7.101103652746268e-05, + "loss": 10.8128, + "step": 9143 + }, + { + "epoch": 0.38114292859822435, + "grad_norm": 948.0, + "learning_rate": 7.100491122314739e-05, + "loss": 25.3783, + "step": 9144 + }, + { + "epoch": 0.38118461089575256, + "grad_norm": 676.0, + "learning_rate": 7.099878553601574e-05, + "loss": 19.2501, + "step": 9145 + }, + { + "epoch": 0.38122629319328083, + "grad_norm": 292.0, + "learning_rate": 7.099265946617936e-05, + "loss": 11.7503, + "step": 9146 + }, + { + "epoch": 0.38126797549080904, + "grad_norm": 428.0, + "learning_rate": 7.09865330137499e-05, + "loss": 16.5003, + "step": 9147 + }, + { + "epoch": 0.3813096577883373, + "grad_norm": 288.0, + "learning_rate": 7.0980406178839e-05, + "loss": 11.5634, + "step": 9148 + }, + { + "epoch": 0.3813513400858655, + "grad_norm": 238.0, + "learning_rate": 7.097427896155835e-05, + "loss": 11.1255, + "step": 9149 + }, + { + "epoch": 0.3813930223833938, + "grad_norm": 92.5, + "learning_rate": 7.09681513620196e-05, + "loss": 8.5003, + "step": 9150 + }, + { + "epoch": 0.381434704680922, + "grad_norm": 60.0, + "learning_rate": 7.096202338033441e-05, + "loss": 7.9066, + "step": 9151 + }, + { + "epoch": 0.38147638697845027, + "grad_norm": 436.0, + "learning_rate": 7.095589501661452e-05, + "loss": 16.7515, + "step": 9152 + }, + { + "epoch": 0.3815180692759785, + "grad_norm": 126.0, + "learning_rate": 7.094976627097155e-05, + "loss": 8.1881, + "step": 9153 + }, + { + "epoch": 0.38155975157350674, + "grad_norm": 796.0, + "learning_rate": 7.094363714351724e-05, + "loss": 19.8775, + "step": 9154 + }, + { + "epoch": 0.38160143387103496, + "grad_norm": 292.0, + "learning_rate": 7.093750763436328e-05, + "loss": 12.1889, + "step": 9155 + }, + { + "epoch": 0.3816431161685632, + "grad_norm": 676.0, + "learning_rate": 7.093137774362139e-05, + "loss": 18.0002, + "step": 9156 + }, + { + "epoch": 0.38168479846609144, + "grad_norm": 474.0, + "learning_rate": 7.09252474714033e-05, + "loss": 17.3755, + "step": 9157 + }, + { + "epoch": 0.3817264807636197, + "grad_norm": 1224.0, + "learning_rate": 7.091911681782069e-05, + "loss": 33.5005, + "step": 9158 + }, + { + "epoch": 0.3817681630611479, + "grad_norm": 202.0, + "learning_rate": 7.091298578298534e-05, + "loss": 10.9378, + "step": 9159 + }, + { + "epoch": 0.3818098453586762, + "grad_norm": 524.0, + "learning_rate": 7.090685436700896e-05, + "loss": 18.0039, + "step": 9160 + }, + { + "epoch": 0.3818515276562044, + "grad_norm": 237.0, + "learning_rate": 7.090072257000329e-05, + "loss": 11.7508, + "step": 9161 + }, + { + "epoch": 0.38189320995373266, + "grad_norm": 87.0, + "learning_rate": 7.089459039208012e-05, + "loss": 8.2504, + "step": 9162 + }, + { + "epoch": 0.38193489225126087, + "grad_norm": 356.0, + "learning_rate": 7.088845783335116e-05, + "loss": 14.3127, + "step": 9163 + }, + { + "epoch": 0.38197657454878914, + "grad_norm": 314.0, + "learning_rate": 7.088232489392822e-05, + "loss": 14.0631, + "step": 9164 + }, + { + "epoch": 0.38201825684631735, + "grad_norm": 235.0, + "learning_rate": 7.087619157392306e-05, + "loss": 10.5003, + "step": 9165 + }, + { + "epoch": 0.3820599391438456, + "grad_norm": 153.0, + "learning_rate": 7.087005787344743e-05, + "loss": 10.2503, + "step": 9166 + }, + { + "epoch": 0.38210162144137383, + "grad_norm": 1368.0, + "learning_rate": 7.086392379261315e-05, + "loss": 30.2543, + "step": 9167 + }, + { + "epoch": 0.3821433037389021, + "grad_norm": 772.0, + "learning_rate": 7.085778933153202e-05, + "loss": 23.3766, + "step": 9168 + }, + { + "epoch": 0.3821849860364303, + "grad_norm": 428.0, + "learning_rate": 7.085165449031583e-05, + "loss": 12.6265, + "step": 9169 + }, + { + "epoch": 0.3822266683339586, + "grad_norm": 486.0, + "learning_rate": 7.084551926907636e-05, + "loss": 16.8771, + "step": 9170 + }, + { + "epoch": 0.3822683506314868, + "grad_norm": 952.0, + "learning_rate": 7.083938366792548e-05, + "loss": 25.7544, + "step": 9171 + }, + { + "epoch": 0.38231003292901505, + "grad_norm": 800.0, + "learning_rate": 7.083324768697497e-05, + "loss": 23.7503, + "step": 9172 + }, + { + "epoch": 0.38235171522654327, + "grad_norm": 160.0, + "learning_rate": 7.082711132633668e-05, + "loss": 11.626, + "step": 9173 + }, + { + "epoch": 0.38239339752407153, + "grad_norm": 350.0, + "learning_rate": 7.082097458612242e-05, + "loss": 14.5012, + "step": 9174 + }, + { + "epoch": 0.38243507982159974, + "grad_norm": 436.0, + "learning_rate": 7.081483746644406e-05, + "loss": 15.7504, + "step": 9175 + }, + { + "epoch": 0.382476762119128, + "grad_norm": 346.0, + "learning_rate": 7.080869996741343e-05, + "loss": 14.7502, + "step": 9176 + }, + { + "epoch": 0.3825184444166562, + "grad_norm": 358.0, + "learning_rate": 7.08025620891424e-05, + "loss": 13.6254, + "step": 9177 + }, + { + "epoch": 0.3825601267141845, + "grad_norm": 520.0, + "learning_rate": 7.079642383174283e-05, + "loss": 17.6253, + "step": 9178 + }, + { + "epoch": 0.3826018090117127, + "grad_norm": 245.0, + "learning_rate": 7.07902851953266e-05, + "loss": 12.188, + "step": 9179 + }, + { + "epoch": 0.38264349130924097, + "grad_norm": 1288.0, + "learning_rate": 7.078414618000558e-05, + "loss": 32.5004, + "step": 9180 + }, + { + "epoch": 0.3826851736067692, + "grad_norm": 143.0, + "learning_rate": 7.077800678589162e-05, + "loss": 10.8757, + "step": 9181 + }, + { + "epoch": 0.38272685590429745, + "grad_norm": 131.0, + "learning_rate": 7.077186701309667e-05, + "loss": 8.5626, + "step": 9182 + }, + { + "epoch": 0.38276853820182566, + "grad_norm": 286.0, + "learning_rate": 7.076572686173259e-05, + "loss": 12.5628, + "step": 9183 + }, + { + "epoch": 0.3828102204993539, + "grad_norm": 206.0, + "learning_rate": 7.07595863319113e-05, + "loss": 10.7508, + "step": 9184 + }, + { + "epoch": 0.38285190279688214, + "grad_norm": 568.0, + "learning_rate": 7.07534454237447e-05, + "loss": 18.1255, + "step": 9185 + }, + { + "epoch": 0.3828935850944104, + "grad_norm": 380.0, + "learning_rate": 7.074730413734472e-05, + "loss": 14.5628, + "step": 9186 + }, + { + "epoch": 0.3829352673919386, + "grad_norm": 490.0, + "learning_rate": 7.074116247282329e-05, + "loss": 15.3142, + "step": 9187 + }, + { + "epoch": 0.3829769496894669, + "grad_norm": 556.0, + "learning_rate": 7.073502043029232e-05, + "loss": 18.3754, + "step": 9188 + }, + { + "epoch": 0.38301863198699515, + "grad_norm": 438.0, + "learning_rate": 7.072887800986375e-05, + "loss": 18.1255, + "step": 9189 + }, + { + "epoch": 0.38306031428452336, + "grad_norm": 114.0, + "learning_rate": 7.072273521164955e-05, + "loss": 8.0003, + "step": 9190 + }, + { + "epoch": 0.38310199658205163, + "grad_norm": 1368.0, + "learning_rate": 7.071659203576166e-05, + "loss": 26.6298, + "step": 9191 + }, + { + "epoch": 0.38314367887957984, + "grad_norm": 386.0, + "learning_rate": 7.071044848231204e-05, + "loss": 15.1878, + "step": 9192 + }, + { + "epoch": 0.3831853611771081, + "grad_norm": 127.5, + "learning_rate": 7.070430455141266e-05, + "loss": 6.2819, + "step": 9193 + }, + { + "epoch": 0.3832270434746363, + "grad_norm": 318.0, + "learning_rate": 7.06981602431755e-05, + "loss": 13.8753, + "step": 9194 + }, + { + "epoch": 0.3832687257721646, + "grad_norm": 704.0, + "learning_rate": 7.069201555771251e-05, + "loss": 19.8757, + "step": 9195 + }, + { + "epoch": 0.3833104080696928, + "grad_norm": 552.0, + "learning_rate": 7.068587049513574e-05, + "loss": 18.5009, + "step": 9196 + }, + { + "epoch": 0.38335209036722107, + "grad_norm": 680.0, + "learning_rate": 7.067972505555712e-05, + "loss": 17.7544, + "step": 9197 + }, + { + "epoch": 0.3833937726647493, + "grad_norm": 484.0, + "learning_rate": 7.067357923908867e-05, + "loss": 17.5005, + "step": 9198 + }, + { + "epoch": 0.38343545496227754, + "grad_norm": 233.0, + "learning_rate": 7.06674330458424e-05, + "loss": 3.9534, + "step": 9199 + }, + { + "epoch": 0.38347713725980576, + "grad_norm": 236.0, + "learning_rate": 7.066128647593033e-05, + "loss": 11.4377, + "step": 9200 + }, + { + "epoch": 0.383518819557334, + "grad_norm": 354.0, + "learning_rate": 7.065513952946449e-05, + "loss": 15.0627, + "step": 9201 + }, + { + "epoch": 0.38356050185486223, + "grad_norm": 312.0, + "learning_rate": 7.064899220655688e-05, + "loss": 11.1253, + "step": 9202 + }, + { + "epoch": 0.3836021841523905, + "grad_norm": 608.0, + "learning_rate": 7.064284450731956e-05, + "loss": 19.5002, + "step": 9203 + }, + { + "epoch": 0.3836438664499187, + "grad_norm": 964.0, + "learning_rate": 7.063669643186458e-05, + "loss": 23.8752, + "step": 9204 + }, + { + "epoch": 0.383685548747447, + "grad_norm": 768.0, + "learning_rate": 7.063054798030396e-05, + "loss": 22.8762, + "step": 9205 + }, + { + "epoch": 0.3837272310449752, + "grad_norm": 652.0, + "learning_rate": 7.062439915274979e-05, + "loss": 17.1252, + "step": 9206 + }, + { + "epoch": 0.38376891334250346, + "grad_norm": 322.0, + "learning_rate": 7.06182499493141e-05, + "loss": 13.7502, + "step": 9207 + }, + { + "epoch": 0.38381059564003167, + "grad_norm": 964.0, + "learning_rate": 7.061210037010897e-05, + "loss": 24.5003, + "step": 9208 + }, + { + "epoch": 0.38385227793755994, + "grad_norm": 502.0, + "learning_rate": 7.06059504152465e-05, + "loss": 15.0039, + "step": 9209 + }, + { + "epoch": 0.38389396023508815, + "grad_norm": 584.0, + "learning_rate": 7.059980008483875e-05, + "loss": 18.3755, + "step": 9210 + }, + { + "epoch": 0.3839356425326164, + "grad_norm": 231.0, + "learning_rate": 7.05936493789978e-05, + "loss": 13.1255, + "step": 9211 + }, + { + "epoch": 0.38397732483014463, + "grad_norm": 106.5, + "learning_rate": 7.058749829783578e-05, + "loss": 9.8777, + "step": 9212 + }, + { + "epoch": 0.3840190071276729, + "grad_norm": 91.5, + "learning_rate": 7.058134684146476e-05, + "loss": 9.4377, + "step": 9213 + }, + { + "epoch": 0.3840606894252011, + "grad_norm": 360.0, + "learning_rate": 7.057519500999687e-05, + "loss": 16.2506, + "step": 9214 + }, + { + "epoch": 0.3841023717227294, + "grad_norm": 219.0, + "learning_rate": 7.056904280354423e-05, + "loss": 10.1877, + "step": 9215 + }, + { + "epoch": 0.3841440540202576, + "grad_norm": 572.0, + "learning_rate": 7.056289022221896e-05, + "loss": 18.1254, + "step": 9216 + }, + { + "epoch": 0.38418573631778585, + "grad_norm": 233.0, + "learning_rate": 7.055673726613319e-05, + "loss": 11.4377, + "step": 9217 + }, + { + "epoch": 0.38422741861531406, + "grad_norm": 510.0, + "learning_rate": 7.055058393539905e-05, + "loss": 17.5003, + "step": 9218 + }, + { + "epoch": 0.38426910091284233, + "grad_norm": 338.0, + "learning_rate": 7.05444302301287e-05, + "loss": 13.5627, + "step": 9219 + }, + { + "epoch": 0.38431078321037054, + "grad_norm": 390.0, + "learning_rate": 7.053827615043427e-05, + "loss": 13.6879, + "step": 9220 + }, + { + "epoch": 0.3843524655078988, + "grad_norm": 768.0, + "learning_rate": 7.053212169642795e-05, + "loss": 22.3763, + "step": 9221 + }, + { + "epoch": 0.384394147805427, + "grad_norm": 400.0, + "learning_rate": 7.05259668682219e-05, + "loss": 14.6259, + "step": 9222 + }, + { + "epoch": 0.3844358301029553, + "grad_norm": 262.0, + "learning_rate": 7.051981166592827e-05, + "loss": 13.3128, + "step": 9223 + }, + { + "epoch": 0.3844775124004835, + "grad_norm": 668.0, + "learning_rate": 7.051365608965925e-05, + "loss": 20.7502, + "step": 9224 + }, + { + "epoch": 0.38451919469801177, + "grad_norm": 82.5, + "learning_rate": 7.050750013952702e-05, + "loss": 7.6564, + "step": 9225 + }, + { + "epoch": 0.38456087699554, + "grad_norm": 322.0, + "learning_rate": 7.05013438156438e-05, + "loss": 14.0627, + "step": 9226 + }, + { + "epoch": 0.38460255929306825, + "grad_norm": 406.0, + "learning_rate": 7.049518711812175e-05, + "loss": 15.2511, + "step": 9227 + }, + { + "epoch": 0.38464424159059646, + "grad_norm": 400.0, + "learning_rate": 7.04890300470731e-05, + "loss": 16.1256, + "step": 9228 + }, + { + "epoch": 0.3846859238881247, + "grad_norm": 243.0, + "learning_rate": 7.048287260261005e-05, + "loss": 12.5009, + "step": 9229 + }, + { + "epoch": 0.38472760618565294, + "grad_norm": 2240.0, + "learning_rate": 7.047671478484485e-05, + "loss": 43.2511, + "step": 9230 + }, + { + "epoch": 0.3847692884831812, + "grad_norm": 156.0, + "learning_rate": 7.047055659388968e-05, + "loss": 10.0632, + "step": 9231 + }, + { + "epoch": 0.3848109707807094, + "grad_norm": 532.0, + "learning_rate": 7.046439802985682e-05, + "loss": 18.5002, + "step": 9232 + }, + { + "epoch": 0.3848526530782377, + "grad_norm": 225.0, + "learning_rate": 7.045823909285848e-05, + "loss": 11.0008, + "step": 9233 + }, + { + "epoch": 0.3848943353757659, + "grad_norm": 194.0, + "learning_rate": 7.045207978300692e-05, + "loss": 10.2502, + "step": 9234 + }, + { + "epoch": 0.38493601767329416, + "grad_norm": 328.0, + "learning_rate": 7.044592010041439e-05, + "loss": 14.2508, + "step": 9235 + }, + { + "epoch": 0.3849776999708224, + "grad_norm": 88.5, + "learning_rate": 7.043976004519314e-05, + "loss": 8.4385, + "step": 9236 + }, + { + "epoch": 0.38501938226835064, + "grad_norm": 326.0, + "learning_rate": 7.043359961745545e-05, + "loss": 14.3127, + "step": 9237 + }, + { + "epoch": 0.38506106456587885, + "grad_norm": 360.0, + "learning_rate": 7.042743881731362e-05, + "loss": 14.3754, + "step": 9238 + }, + { + "epoch": 0.3851027468634071, + "grad_norm": 772.0, + "learning_rate": 7.042127764487987e-05, + "loss": 23.5001, + "step": 9239 + }, + { + "epoch": 0.38514442916093533, + "grad_norm": 672.0, + "learning_rate": 7.041511610026655e-05, + "loss": 20.2541, + "step": 9240 + }, + { + "epoch": 0.3851861114584636, + "grad_norm": 272.0, + "learning_rate": 7.040895418358591e-05, + "loss": 11.5636, + "step": 9241 + }, + { + "epoch": 0.3852277937559918, + "grad_norm": 932.0, + "learning_rate": 7.040279189495026e-05, + "loss": 25.6252, + "step": 9242 + }, + { + "epoch": 0.3852694760535201, + "grad_norm": 253.0, + "learning_rate": 7.039662923447194e-05, + "loss": 11.5628, + "step": 9243 + }, + { + "epoch": 0.3853111583510483, + "grad_norm": 800.0, + "learning_rate": 7.039046620226322e-05, + "loss": 19.7548, + "step": 9244 + }, + { + "epoch": 0.38535284064857656, + "grad_norm": 322.0, + "learning_rate": 7.038430279843645e-05, + "loss": 13.813, + "step": 9245 + }, + { + "epoch": 0.38539452294610477, + "grad_norm": 278.0, + "learning_rate": 7.037813902310397e-05, + "loss": 13.0005, + "step": 9246 + }, + { + "epoch": 0.38543620524363303, + "grad_norm": 446.0, + "learning_rate": 7.037197487637808e-05, + "loss": 17.2504, + "step": 9247 + }, + { + "epoch": 0.38547788754116125, + "grad_norm": 193.0, + "learning_rate": 7.036581035837113e-05, + "loss": 10.5627, + "step": 9248 + }, + { + "epoch": 0.3855195698386895, + "grad_norm": 560.0, + "learning_rate": 7.03596454691955e-05, + "loss": 17.7503, + "step": 9249 + }, + { + "epoch": 0.3855612521362177, + "grad_norm": 704.0, + "learning_rate": 7.03534802089635e-05, + "loss": 22.3757, + "step": 9250 + }, + { + "epoch": 0.385602934433746, + "grad_norm": 652.0, + "learning_rate": 7.034731457778753e-05, + "loss": 15.7502, + "step": 9251 + }, + { + "epoch": 0.3856446167312742, + "grad_norm": 184.0, + "learning_rate": 7.034114857577996e-05, + "loss": 5.9695, + "step": 9252 + }, + { + "epoch": 0.38568629902880247, + "grad_norm": 226.0, + "learning_rate": 7.033498220305314e-05, + "loss": 11.8128, + "step": 9253 + }, + { + "epoch": 0.3857279813263307, + "grad_norm": 126.0, + "learning_rate": 7.032881545971945e-05, + "loss": 9.4382, + "step": 9254 + }, + { + "epoch": 0.38576966362385895, + "grad_norm": 548.0, + "learning_rate": 7.03226483458913e-05, + "loss": 17.1253, + "step": 9255 + }, + { + "epoch": 0.38581134592138716, + "grad_norm": 76.0, + "learning_rate": 7.031648086168109e-05, + "loss": 8.4377, + "step": 9256 + }, + { + "epoch": 0.38585302821891543, + "grad_norm": 294.0, + "learning_rate": 7.031031300720121e-05, + "loss": 12.9378, + "step": 9257 + }, + { + "epoch": 0.38589471051644364, + "grad_norm": 564.0, + "learning_rate": 7.030414478256408e-05, + "loss": 17.7502, + "step": 9258 + }, + { + "epoch": 0.3859363928139719, + "grad_norm": 560.0, + "learning_rate": 7.02979761878821e-05, + "loss": 19.0002, + "step": 9259 + }, + { + "epoch": 0.3859780751115001, + "grad_norm": 408.0, + "learning_rate": 7.029180722326769e-05, + "loss": 11.8752, + "step": 9260 + }, + { + "epoch": 0.3860197574090284, + "grad_norm": 928.0, + "learning_rate": 7.028563788883332e-05, + "loss": 24.6257, + "step": 9261 + }, + { + "epoch": 0.38606143970655665, + "grad_norm": 580.0, + "learning_rate": 7.027946818469137e-05, + "loss": 18.5002, + "step": 9262 + }, + { + "epoch": 0.38610312200408486, + "grad_norm": 300.0, + "learning_rate": 7.027329811095432e-05, + "loss": 14.1253, + "step": 9263 + }, + { + "epoch": 0.38614480430161313, + "grad_norm": 412.0, + "learning_rate": 7.026712766773462e-05, + "loss": 13.8129, + "step": 9264 + }, + { + "epoch": 0.38618648659914134, + "grad_norm": 340.0, + "learning_rate": 7.026095685514471e-05, + "loss": 13.7502, + "step": 9265 + }, + { + "epoch": 0.3862281688966696, + "grad_norm": 138.0, + "learning_rate": 7.025478567329706e-05, + "loss": 9.4378, + "step": 9266 + }, + { + "epoch": 0.3862698511941978, + "grad_norm": 470.0, + "learning_rate": 7.024861412230415e-05, + "loss": 15.5022, + "step": 9267 + }, + { + "epoch": 0.3863115334917261, + "grad_norm": 332.0, + "learning_rate": 7.024244220227845e-05, + "loss": 10.5629, + "step": 9268 + }, + { + "epoch": 0.3863532157892543, + "grad_norm": 376.0, + "learning_rate": 7.023626991333246e-05, + "loss": 14.6271, + "step": 9269 + }, + { + "epoch": 0.38639489808678257, + "grad_norm": 820.0, + "learning_rate": 7.023009725557863e-05, + "loss": 21.8777, + "step": 9270 + }, + { + "epoch": 0.3864365803843108, + "grad_norm": 780.0, + "learning_rate": 7.022392422912949e-05, + "loss": 18.3777, + "step": 9271 + }, + { + "epoch": 0.38647826268183905, + "grad_norm": 330.0, + "learning_rate": 7.021775083409754e-05, + "loss": 14.7501, + "step": 9272 + }, + { + "epoch": 0.38651994497936726, + "grad_norm": 364.0, + "learning_rate": 7.02115770705953e-05, + "loss": 14.5627, + "step": 9273 + }, + { + "epoch": 0.3865616272768955, + "grad_norm": 422.0, + "learning_rate": 7.020540293873524e-05, + "loss": 15.5007, + "step": 9274 + }, + { + "epoch": 0.38660330957442374, + "grad_norm": 348.0, + "learning_rate": 7.019922843862993e-05, + "loss": 13.8134, + "step": 9275 + }, + { + "epoch": 0.386644991871952, + "grad_norm": 120.0, + "learning_rate": 7.01930535703919e-05, + "loss": 8.3753, + "step": 9276 + }, + { + "epoch": 0.3866866741694802, + "grad_norm": 380.0, + "learning_rate": 7.018687833413366e-05, + "loss": 13.563, + "step": 9277 + }, + { + "epoch": 0.3867283564670085, + "grad_norm": 272.0, + "learning_rate": 7.01807027299678e-05, + "loss": 12.5646, + "step": 9278 + }, + { + "epoch": 0.3867700387645367, + "grad_norm": 470.0, + "learning_rate": 7.01745267580068e-05, + "loss": 16.6252, + "step": 9279 + }, + { + "epoch": 0.38681172106206496, + "grad_norm": 324.0, + "learning_rate": 7.016835041836328e-05, + "loss": 13.3765, + "step": 9280 + }, + { + "epoch": 0.3868534033595932, + "grad_norm": 338.0, + "learning_rate": 7.016217371114978e-05, + "loss": 14.7504, + "step": 9281 + }, + { + "epoch": 0.38689508565712144, + "grad_norm": 201.0, + "learning_rate": 7.015599663647888e-05, + "loss": 11.9377, + "step": 9282 + }, + { + "epoch": 0.38693676795464965, + "grad_norm": 232.0, + "learning_rate": 7.014981919446315e-05, + "loss": 12.5015, + "step": 9283 + }, + { + "epoch": 0.3869784502521779, + "grad_norm": 222.0, + "learning_rate": 7.014364138521517e-05, + "loss": 10.1879, + "step": 9284 + }, + { + "epoch": 0.38702013254970613, + "grad_norm": 87.5, + "learning_rate": 7.013746320884755e-05, + "loss": 7.3127, + "step": 9285 + }, + { + "epoch": 0.3870618148472344, + "grad_norm": 86.5, + "learning_rate": 7.013128466547287e-05, + "loss": 8.1877, + "step": 9286 + }, + { + "epoch": 0.3871034971447626, + "grad_norm": 270.0, + "learning_rate": 7.012510575520373e-05, + "loss": 12.8753, + "step": 9287 + }, + { + "epoch": 0.3871451794422909, + "grad_norm": 1224.0, + "learning_rate": 7.011892647815276e-05, + "loss": 26.2506, + "step": 9288 + }, + { + "epoch": 0.3871868617398191, + "grad_norm": 608.0, + "learning_rate": 7.011274683443258e-05, + "loss": 19.0002, + "step": 9289 + }, + { + "epoch": 0.38722854403734736, + "grad_norm": 237.0, + "learning_rate": 7.010656682415579e-05, + "loss": 9.6894, + "step": 9290 + }, + { + "epoch": 0.38727022633487557, + "grad_norm": 424.0, + "learning_rate": 7.010038644743504e-05, + "loss": 14.4377, + "step": 9291 + }, + { + "epoch": 0.38731190863240383, + "grad_norm": 334.0, + "learning_rate": 7.009420570438294e-05, + "loss": 15.0002, + "step": 9292 + }, + { + "epoch": 0.38735359092993205, + "grad_norm": 78.5, + "learning_rate": 7.008802459511217e-05, + "loss": 5.0638, + "step": 9293 + }, + { + "epoch": 0.3873952732274603, + "grad_norm": 134.0, + "learning_rate": 7.008184311973539e-05, + "loss": 9.6252, + "step": 9294 + }, + { + "epoch": 0.3874369555249885, + "grad_norm": 776.0, + "learning_rate": 7.007566127836522e-05, + "loss": 20.1276, + "step": 9295 + }, + { + "epoch": 0.3874786378225168, + "grad_norm": 262.0, + "learning_rate": 7.006947907111434e-05, + "loss": 11.5002, + "step": 9296 + }, + { + "epoch": 0.387520320120045, + "grad_norm": 368.0, + "learning_rate": 7.006329649809543e-05, + "loss": 13.9377, + "step": 9297 + }, + { + "epoch": 0.38756200241757327, + "grad_norm": 260.0, + "learning_rate": 7.005711355942115e-05, + "loss": 12.5004, + "step": 9298 + }, + { + "epoch": 0.3876036847151015, + "grad_norm": 708.0, + "learning_rate": 7.00509302552042e-05, + "loss": 23.7501, + "step": 9299 + }, + { + "epoch": 0.38764536701262975, + "grad_norm": 326.0, + "learning_rate": 7.004474658555726e-05, + "loss": 13.0003, + "step": 9300 + }, + { + "epoch": 0.38768704931015796, + "grad_norm": 266.0, + "learning_rate": 7.003856255059305e-05, + "loss": 11.0631, + "step": 9301 + }, + { + "epoch": 0.38772873160768623, + "grad_norm": 266.0, + "learning_rate": 7.003237815042425e-05, + "loss": 13.938, + "step": 9302 + }, + { + "epoch": 0.38777041390521444, + "grad_norm": 316.0, + "learning_rate": 7.002619338516358e-05, + "loss": 13.0627, + "step": 9303 + }, + { + "epoch": 0.3878120962027427, + "grad_norm": 480.0, + "learning_rate": 7.002000825492375e-05, + "loss": 16.8776, + "step": 9304 + }, + { + "epoch": 0.3878537785002709, + "grad_norm": 336.0, + "learning_rate": 7.001382275981749e-05, + "loss": 12.3148, + "step": 9305 + }, + { + "epoch": 0.3878954607977992, + "grad_norm": 1048.0, + "learning_rate": 7.000763689995755e-05, + "loss": 31.3753, + "step": 9306 + }, + { + "epoch": 0.3879371430953274, + "grad_norm": 412.0, + "learning_rate": 7.000145067545664e-05, + "loss": 13.5632, + "step": 9307 + }, + { + "epoch": 0.38797882539285566, + "grad_norm": 486.0, + "learning_rate": 6.999526408642751e-05, + "loss": 17.1256, + "step": 9308 + }, + { + "epoch": 0.3880205076903839, + "grad_norm": 224.0, + "learning_rate": 6.998907713298293e-05, + "loss": 10.1896, + "step": 9309 + }, + { + "epoch": 0.38806218998791214, + "grad_norm": 424.0, + "learning_rate": 6.998288981523564e-05, + "loss": 15.3754, + "step": 9310 + }, + { + "epoch": 0.38810387228544035, + "grad_norm": 500.0, + "learning_rate": 6.997670213329841e-05, + "loss": 16.2525, + "step": 9311 + }, + { + "epoch": 0.3881455545829686, + "grad_norm": 92.5, + "learning_rate": 6.997051408728401e-05, + "loss": 9.5007, + "step": 9312 + }, + { + "epoch": 0.38818723688049683, + "grad_norm": 350.0, + "learning_rate": 6.996432567730522e-05, + "loss": 14.4378, + "step": 9313 + }, + { + "epoch": 0.3882289191780251, + "grad_norm": 127.0, + "learning_rate": 6.995813690347483e-05, + "loss": 7.0945, + "step": 9314 + }, + { + "epoch": 0.3882706014755533, + "grad_norm": 394.0, + "learning_rate": 6.995194776590561e-05, + "loss": 15.2502, + "step": 9315 + }, + { + "epoch": 0.3883122837730816, + "grad_norm": 704.0, + "learning_rate": 6.994575826471037e-05, + "loss": 19.1294, + "step": 9316 + }, + { + "epoch": 0.3883539660706098, + "grad_norm": 184.0, + "learning_rate": 6.99395684000019e-05, + "loss": 10.3126, + "step": 9317 + }, + { + "epoch": 0.38839564836813806, + "grad_norm": 246.0, + "learning_rate": 6.993337817189306e-05, + "loss": 12.1256, + "step": 9318 + }, + { + "epoch": 0.38843733066566627, + "grad_norm": 235.0, + "learning_rate": 6.992718758049662e-05, + "loss": 12.0627, + "step": 9319 + }, + { + "epoch": 0.38847901296319454, + "grad_norm": 1040.0, + "learning_rate": 6.992099662592542e-05, + "loss": 29.5003, + "step": 9320 + }, + { + "epoch": 0.38852069526072275, + "grad_norm": 212.0, + "learning_rate": 6.991480530829228e-05, + "loss": 8.0022, + "step": 9321 + }, + { + "epoch": 0.388562377558251, + "grad_norm": 235.0, + "learning_rate": 6.990861362771005e-05, + "loss": 12.3136, + "step": 9322 + }, + { + "epoch": 0.3886040598557792, + "grad_norm": 628.0, + "learning_rate": 6.990242158429156e-05, + "loss": 19.1253, + "step": 9323 + }, + { + "epoch": 0.3886457421533075, + "grad_norm": 324.0, + "learning_rate": 6.989622917814968e-05, + "loss": 10.5003, + "step": 9324 + }, + { + "epoch": 0.3886874244508357, + "grad_norm": 316.0, + "learning_rate": 6.989003640939726e-05, + "loss": 13.7503, + "step": 9325 + }, + { + "epoch": 0.388729106748364, + "grad_norm": 176.0, + "learning_rate": 6.988384327814716e-05, + "loss": 10.6877, + "step": 9326 + }, + { + "epoch": 0.3887707890458922, + "grad_norm": 360.0, + "learning_rate": 6.987764978451226e-05, + "loss": 13.8131, + "step": 9327 + }, + { + "epoch": 0.38881247134342045, + "grad_norm": 362.0, + "learning_rate": 6.98714559286054e-05, + "loss": 16.0005, + "step": 9328 + }, + { + "epoch": 0.38885415364094866, + "grad_norm": 612.0, + "learning_rate": 6.986526171053951e-05, + "loss": 19.7505, + "step": 9329 + }, + { + "epoch": 0.38889583593847693, + "grad_norm": 540.0, + "learning_rate": 6.985906713042745e-05, + "loss": 17.8758, + "step": 9330 + }, + { + "epoch": 0.38893751823600514, + "grad_norm": 696.0, + "learning_rate": 6.985287218838214e-05, + "loss": 18.3752, + "step": 9331 + }, + { + "epoch": 0.3889792005335334, + "grad_norm": 95.5, + "learning_rate": 6.984667688451648e-05, + "loss": 10.3127, + "step": 9332 + }, + { + "epoch": 0.3890208828310616, + "grad_norm": 310.0, + "learning_rate": 6.984048121894335e-05, + "loss": 12.1254, + "step": 9333 + }, + { + "epoch": 0.3890625651285899, + "grad_norm": 736.0, + "learning_rate": 6.983428519177571e-05, + "loss": 21.7503, + "step": 9334 + }, + { + "epoch": 0.38910424742611815, + "grad_norm": 584.0, + "learning_rate": 6.982808880312644e-05, + "loss": 19.6252, + "step": 9335 + }, + { + "epoch": 0.38914592972364637, + "grad_norm": 202.0, + "learning_rate": 6.982189205310851e-05, + "loss": 12.313, + "step": 9336 + }, + { + "epoch": 0.38918761202117463, + "grad_norm": 268.0, + "learning_rate": 6.981569494183483e-05, + "loss": 11.5033, + "step": 9337 + }, + { + "epoch": 0.38922929431870285, + "grad_norm": 402.0, + "learning_rate": 6.980949746941836e-05, + "loss": 14.5012, + "step": 9338 + }, + { + "epoch": 0.3892709766162311, + "grad_norm": 268.0, + "learning_rate": 6.980329963597202e-05, + "loss": 12.1253, + "step": 9339 + }, + { + "epoch": 0.3893126589137593, + "grad_norm": 456.0, + "learning_rate": 6.97971014416088e-05, + "loss": 16.6254, + "step": 9340 + }, + { + "epoch": 0.3893543412112876, + "grad_norm": 233.0, + "learning_rate": 6.979090288644164e-05, + "loss": 11.6877, + "step": 9341 + }, + { + "epoch": 0.3893960235088158, + "grad_norm": 478.0, + "learning_rate": 6.978470397058352e-05, + "loss": 17.6261, + "step": 9342 + }, + { + "epoch": 0.38943770580634407, + "grad_norm": 620.0, + "learning_rate": 6.977850469414742e-05, + "loss": 17.2504, + "step": 9343 + }, + { + "epoch": 0.3894793881038723, + "grad_norm": 300.0, + "learning_rate": 6.977230505724632e-05, + "loss": 13.3751, + "step": 9344 + }, + { + "epoch": 0.38952107040140055, + "grad_norm": 192.0, + "learning_rate": 6.97661050599932e-05, + "loss": 11.313, + "step": 9345 + }, + { + "epoch": 0.38956275269892876, + "grad_norm": 568.0, + "learning_rate": 6.975990470250106e-05, + "loss": 19.377, + "step": 9346 + }, + { + "epoch": 0.389604434996457, + "grad_norm": 290.0, + "learning_rate": 6.97537039848829e-05, + "loss": 13.6877, + "step": 9347 + }, + { + "epoch": 0.38964611729398524, + "grad_norm": 612.0, + "learning_rate": 6.974750290725174e-05, + "loss": 17.3786, + "step": 9348 + }, + { + "epoch": 0.3896877995915135, + "grad_norm": 188.0, + "learning_rate": 6.974130146972057e-05, + "loss": 10.8127, + "step": 9349 + }, + { + "epoch": 0.3897294818890417, + "grad_norm": 136.0, + "learning_rate": 6.973509967240243e-05, + "loss": 10.2524, + "step": 9350 + }, + { + "epoch": 0.38977116418657, + "grad_norm": 1072.0, + "learning_rate": 6.972889751541036e-05, + "loss": 22.0008, + "step": 9351 + }, + { + "epoch": 0.3898128464840982, + "grad_norm": 458.0, + "learning_rate": 6.972269499885738e-05, + "loss": 14.3752, + "step": 9352 + }, + { + "epoch": 0.38985452878162646, + "grad_norm": 260.0, + "learning_rate": 6.971649212285654e-05, + "loss": 13.2526, + "step": 9353 + }, + { + "epoch": 0.3898962110791547, + "grad_norm": 49.25, + "learning_rate": 6.971028888752087e-05, + "loss": 7.5628, + "step": 9354 + }, + { + "epoch": 0.38993789337668294, + "grad_norm": 125.0, + "learning_rate": 6.970408529296342e-05, + "loss": 8.3127, + "step": 9355 + }, + { + "epoch": 0.38997957567421115, + "grad_norm": 1384.0, + "learning_rate": 6.969788133929729e-05, + "loss": 29.505, + "step": 9356 + }, + { + "epoch": 0.3900212579717394, + "grad_norm": 314.0, + "learning_rate": 6.96916770266355e-05, + "loss": 13.6253, + "step": 9357 + }, + { + "epoch": 0.39006294026926763, + "grad_norm": 112.5, + "learning_rate": 6.968547235509118e-05, + "loss": 8.5002, + "step": 9358 + }, + { + "epoch": 0.3901046225667959, + "grad_norm": 168.0, + "learning_rate": 6.967926732477735e-05, + "loss": 8.4381, + "step": 9359 + }, + { + "epoch": 0.3901463048643241, + "grad_norm": 145.0, + "learning_rate": 6.967306193580715e-05, + "loss": 10.3128, + "step": 9360 + }, + { + "epoch": 0.3901879871618524, + "grad_norm": 179.0, + "learning_rate": 6.966685618829362e-05, + "loss": 11.3127, + "step": 9361 + }, + { + "epoch": 0.3902296694593806, + "grad_norm": 724.0, + "learning_rate": 6.966065008234992e-05, + "loss": 22.2503, + "step": 9362 + }, + { + "epoch": 0.39027135175690886, + "grad_norm": 199.0, + "learning_rate": 6.965444361808912e-05, + "loss": 11.1897, + "step": 9363 + }, + { + "epoch": 0.39031303405443707, + "grad_norm": 900.0, + "learning_rate": 6.964823679562434e-05, + "loss": 23.5002, + "step": 9364 + }, + { + "epoch": 0.39035471635196534, + "grad_norm": 350.0, + "learning_rate": 6.964202961506869e-05, + "loss": 14.8753, + "step": 9365 + }, + { + "epoch": 0.39039639864949355, + "grad_norm": 217.0, + "learning_rate": 6.963582207653532e-05, + "loss": 12.1263, + "step": 9366 + }, + { + "epoch": 0.3904380809470218, + "grad_norm": 282.0, + "learning_rate": 6.962961418013732e-05, + "loss": 11.3755, + "step": 9367 + }, + { + "epoch": 0.39047976324455, + "grad_norm": 338.0, + "learning_rate": 6.962340592598789e-05, + "loss": 14.6251, + "step": 9368 + }, + { + "epoch": 0.3905214455420783, + "grad_norm": 446.0, + "learning_rate": 6.961719731420013e-05, + "loss": 15.3127, + "step": 9369 + }, + { + "epoch": 0.3905631278396065, + "grad_norm": 177.0, + "learning_rate": 6.961098834488722e-05, + "loss": 10.8126, + "step": 9370 + }, + { + "epoch": 0.3906048101371348, + "grad_norm": 354.0, + "learning_rate": 6.96047790181623e-05, + "loss": 13.6878, + "step": 9371 + }, + { + "epoch": 0.390646492434663, + "grad_norm": 368.0, + "learning_rate": 6.959856933413854e-05, + "loss": 13.1912, + "step": 9372 + }, + { + "epoch": 0.39068817473219125, + "grad_norm": 171.0, + "learning_rate": 6.959235929292912e-05, + "loss": 11.3127, + "step": 9373 + }, + { + "epoch": 0.39072985702971946, + "grad_norm": 368.0, + "learning_rate": 6.95861488946472e-05, + "loss": 13.7504, + "step": 9374 + }, + { + "epoch": 0.39077153932724773, + "grad_norm": 162.0, + "learning_rate": 6.9579938139406e-05, + "loss": 10.2504, + "step": 9375 + }, + { + "epoch": 0.39081322162477594, + "grad_norm": 204.0, + "learning_rate": 6.957372702731867e-05, + "loss": 9.8753, + "step": 9376 + }, + { + "epoch": 0.3908549039223042, + "grad_norm": 548.0, + "learning_rate": 6.956751555849843e-05, + "loss": 17.5011, + "step": 9377 + }, + { + "epoch": 0.3908965862198324, + "grad_norm": 156.0, + "learning_rate": 6.956130373305849e-05, + "loss": 10.4377, + "step": 9378 + }, + { + "epoch": 0.3909382685173607, + "grad_norm": 216.0, + "learning_rate": 6.955509155111204e-05, + "loss": 12.3128, + "step": 9379 + }, + { + "epoch": 0.3909799508148889, + "grad_norm": 82.5, + "learning_rate": 6.954887901277231e-05, + "loss": 10.4387, + "step": 9380 + }, + { + "epoch": 0.39102163311241717, + "grad_norm": 229.0, + "learning_rate": 6.954266611815255e-05, + "loss": 13.4395, + "step": 9381 + }, + { + "epoch": 0.3910633154099454, + "grad_norm": 380.0, + "learning_rate": 6.953645286736594e-05, + "loss": 13.5638, + "step": 9382 + }, + { + "epoch": 0.39110499770747365, + "grad_norm": 390.0, + "learning_rate": 6.953023926052576e-05, + "loss": 14.3751, + "step": 9383 + }, + { + "epoch": 0.39114668000500186, + "grad_norm": 302.0, + "learning_rate": 6.952402529774522e-05, + "loss": 13.5629, + "step": 9384 + }, + { + "epoch": 0.3911883623025301, + "grad_norm": 278.0, + "learning_rate": 6.951781097913758e-05, + "loss": 12.5646, + "step": 9385 + }, + { + "epoch": 0.39123004460005834, + "grad_norm": 187.0, + "learning_rate": 6.951159630481612e-05, + "loss": 11.1252, + "step": 9386 + }, + { + "epoch": 0.3912717268975866, + "grad_norm": 556.0, + "learning_rate": 6.950538127489408e-05, + "loss": 18.2502, + "step": 9387 + }, + { + "epoch": 0.3913134091951148, + "grad_norm": 252.0, + "learning_rate": 6.949916588948473e-05, + "loss": 12.6892, + "step": 9388 + }, + { + "epoch": 0.3913550914926431, + "grad_norm": 166.0, + "learning_rate": 6.949295014870137e-05, + "loss": 8.5006, + "step": 9389 + }, + { + "epoch": 0.3913967737901713, + "grad_norm": 524.0, + "learning_rate": 6.948673405265725e-05, + "loss": 17.3756, + "step": 9390 + }, + { + "epoch": 0.39143845608769956, + "grad_norm": 280.0, + "learning_rate": 6.948051760146565e-05, + "loss": 11.8127, + "step": 9391 + }, + { + "epoch": 0.39148013838522777, + "grad_norm": 134.0, + "learning_rate": 6.94743007952399e-05, + "loss": 9.3129, + "step": 9392 + }, + { + "epoch": 0.39152182068275604, + "grad_norm": 944.0, + "learning_rate": 6.94680836340933e-05, + "loss": 25.5003, + "step": 9393 + }, + { + "epoch": 0.39156350298028425, + "grad_norm": 224.0, + "learning_rate": 6.946186611813914e-05, + "loss": 10.1877, + "step": 9394 + }, + { + "epoch": 0.3916051852778125, + "grad_norm": 520.0, + "learning_rate": 6.945564824749075e-05, + "loss": 17.2508, + "step": 9395 + }, + { + "epoch": 0.39164686757534073, + "grad_norm": 776.0, + "learning_rate": 6.944943002226144e-05, + "loss": 21.5003, + "step": 9396 + }, + { + "epoch": 0.391688549872869, + "grad_norm": 82.5, + "learning_rate": 6.944321144256454e-05, + "loss": 9.5628, + "step": 9397 + }, + { + "epoch": 0.3917302321703972, + "grad_norm": 87.0, + "learning_rate": 6.943699250851338e-05, + "loss": 7.6256, + "step": 9398 + }, + { + "epoch": 0.3917719144679255, + "grad_norm": 360.0, + "learning_rate": 6.943077322022132e-05, + "loss": 12.2501, + "step": 9399 + }, + { + "epoch": 0.3918135967654537, + "grad_norm": 856.0, + "learning_rate": 6.942455357780169e-05, + "loss": 27.3752, + "step": 9400 + }, + { + "epoch": 0.39185527906298195, + "grad_norm": 422.0, + "learning_rate": 6.941833358136784e-05, + "loss": 14.063, + "step": 9401 + }, + { + "epoch": 0.39189696136051017, + "grad_norm": 231.0, + "learning_rate": 6.941211323103314e-05, + "loss": 13.0629, + "step": 9402 + }, + { + "epoch": 0.39193864365803843, + "grad_norm": 544.0, + "learning_rate": 6.940589252691096e-05, + "loss": 13.5642, + "step": 9403 + }, + { + "epoch": 0.39198032595556664, + "grad_norm": 251.0, + "learning_rate": 6.939967146911466e-05, + "loss": 12.3127, + "step": 9404 + }, + { + "epoch": 0.3920220082530949, + "grad_norm": 692.0, + "learning_rate": 6.93934500577576e-05, + "loss": 18.5003, + "step": 9405 + }, + { + "epoch": 0.3920636905506231, + "grad_norm": 230.0, + "learning_rate": 6.938722829295322e-05, + "loss": 13.1253, + "step": 9406 + }, + { + "epoch": 0.3921053728481514, + "grad_norm": 216.0, + "learning_rate": 6.938100617481488e-05, + "loss": 12.5002, + "step": 9407 + }, + { + "epoch": 0.39214705514567966, + "grad_norm": 520.0, + "learning_rate": 6.937478370345598e-05, + "loss": 18.1255, + "step": 9408 + }, + { + "epoch": 0.39218873744320787, + "grad_norm": 350.0, + "learning_rate": 6.936856087898993e-05, + "loss": 14.4379, + "step": 9409 + }, + { + "epoch": 0.39223041974073614, + "grad_norm": 188.0, + "learning_rate": 6.936233770153013e-05, + "loss": 11.8753, + "step": 9410 + }, + { + "epoch": 0.39227210203826435, + "grad_norm": 592.0, + "learning_rate": 6.935611417119002e-05, + "loss": 18.0015, + "step": 9411 + }, + { + "epoch": 0.3923137843357926, + "grad_norm": 328.0, + "learning_rate": 6.934989028808299e-05, + "loss": 14.5629, + "step": 9412 + }, + { + "epoch": 0.3923554666333208, + "grad_norm": 584.0, + "learning_rate": 6.934366605232251e-05, + "loss": 16.6269, + "step": 9413 + }, + { + "epoch": 0.3923971489308491, + "grad_norm": 516.0, + "learning_rate": 6.933744146402199e-05, + "loss": 17.6257, + "step": 9414 + }, + { + "epoch": 0.3924388312283773, + "grad_norm": 247.0, + "learning_rate": 6.933121652329489e-05, + "loss": 12.3131, + "step": 9415 + }, + { + "epoch": 0.39248051352590557, + "grad_norm": 402.0, + "learning_rate": 6.932499123025466e-05, + "loss": 14.2516, + "step": 9416 + }, + { + "epoch": 0.3925221958234338, + "grad_norm": 298.0, + "learning_rate": 6.931876558501474e-05, + "loss": 13.2506, + "step": 9417 + }, + { + "epoch": 0.39256387812096205, + "grad_norm": 394.0, + "learning_rate": 6.931253958768858e-05, + "loss": 14.0651, + "step": 9418 + }, + { + "epoch": 0.39260556041849026, + "grad_norm": 173.0, + "learning_rate": 6.93063132383897e-05, + "loss": 11.3131, + "step": 9419 + }, + { + "epoch": 0.39264724271601853, + "grad_norm": 956.0, + "learning_rate": 6.930008653723154e-05, + "loss": 24.0002, + "step": 9420 + }, + { + "epoch": 0.39268892501354674, + "grad_norm": 520.0, + "learning_rate": 6.929385948432758e-05, + "loss": 17.7509, + "step": 9421 + }, + { + "epoch": 0.392730607311075, + "grad_norm": 354.0, + "learning_rate": 6.928763207979133e-05, + "loss": 13.2501, + "step": 9422 + }, + { + "epoch": 0.3927722896086032, + "grad_norm": 360.0, + "learning_rate": 6.928140432373628e-05, + "loss": 14.5628, + "step": 9423 + }, + { + "epoch": 0.3928139719061315, + "grad_norm": 410.0, + "learning_rate": 6.927517621627591e-05, + "loss": 13.3131, + "step": 9424 + }, + { + "epoch": 0.3928556542036597, + "grad_norm": 384.0, + "learning_rate": 6.926894775752375e-05, + "loss": 14.8127, + "step": 9425 + }, + { + "epoch": 0.39289733650118797, + "grad_norm": 346.0, + "learning_rate": 6.92627189475933e-05, + "loss": 13.9377, + "step": 9426 + }, + { + "epoch": 0.3929390187987162, + "grad_norm": 980.0, + "learning_rate": 6.92564897865981e-05, + "loss": 19.1295, + "step": 9427 + }, + { + "epoch": 0.39298070109624444, + "grad_norm": 227.0, + "learning_rate": 6.925026027465165e-05, + "loss": 11.0627, + "step": 9428 + }, + { + "epoch": 0.39302238339377266, + "grad_norm": 460.0, + "learning_rate": 6.92440304118675e-05, + "loss": 15.1877, + "step": 9429 + }, + { + "epoch": 0.3930640656913009, + "grad_norm": 43.0, + "learning_rate": 6.923780019835918e-05, + "loss": 6.2502, + "step": 9430 + }, + { + "epoch": 0.39310574798882914, + "grad_norm": 420.0, + "learning_rate": 6.923156963424025e-05, + "loss": 15.1878, + "step": 9431 + }, + { + "epoch": 0.3931474302863574, + "grad_norm": 185.0, + "learning_rate": 6.922533871962426e-05, + "loss": 9.7503, + "step": 9432 + }, + { + "epoch": 0.3931891125838856, + "grad_norm": 342.0, + "learning_rate": 6.921910745462476e-05, + "loss": 14.1252, + "step": 9433 + }, + { + "epoch": 0.3932307948814139, + "grad_norm": 434.0, + "learning_rate": 6.921287583935535e-05, + "loss": 16.7501, + "step": 9434 + }, + { + "epoch": 0.3932724771789421, + "grad_norm": 240.0, + "learning_rate": 6.920664387392953e-05, + "loss": 11.438, + "step": 9435 + }, + { + "epoch": 0.39331415947647036, + "grad_norm": 124.5, + "learning_rate": 6.920041155846094e-05, + "loss": 9.4389, + "step": 9436 + }, + { + "epoch": 0.39335584177399857, + "grad_norm": 134.0, + "learning_rate": 6.919417889306314e-05, + "loss": 9.1878, + "step": 9437 + }, + { + "epoch": 0.39339752407152684, + "grad_norm": 144.0, + "learning_rate": 6.918794587784973e-05, + "loss": 9.6256, + "step": 9438 + }, + { + "epoch": 0.39343920636905505, + "grad_norm": 466.0, + "learning_rate": 6.91817125129343e-05, + "loss": 15.5003, + "step": 9439 + }, + { + "epoch": 0.3934808886665833, + "grad_norm": 296.0, + "learning_rate": 6.917547879843047e-05, + "loss": 12.7502, + "step": 9440 + }, + { + "epoch": 0.39352257096411153, + "grad_norm": 628.0, + "learning_rate": 6.916924473445182e-05, + "loss": 18.379, + "step": 9441 + }, + { + "epoch": 0.3935642532616398, + "grad_norm": 278.0, + "learning_rate": 6.916301032111198e-05, + "loss": 12.1883, + "step": 9442 + }, + { + "epoch": 0.393605935559168, + "grad_norm": 282.0, + "learning_rate": 6.915677555852459e-05, + "loss": 13.5006, + "step": 9443 + }, + { + "epoch": 0.3936476178566963, + "grad_norm": 57.5, + "learning_rate": 6.915054044680326e-05, + "loss": 6.7818, + "step": 9444 + }, + { + "epoch": 0.3936893001542245, + "grad_norm": 294.0, + "learning_rate": 6.914430498606162e-05, + "loss": 13.1251, + "step": 9445 + }, + { + "epoch": 0.39373098245175275, + "grad_norm": 440.0, + "learning_rate": 6.913806917641334e-05, + "loss": 15.1877, + "step": 9446 + }, + { + "epoch": 0.39377266474928097, + "grad_norm": 92.0, + "learning_rate": 6.913183301797204e-05, + "loss": 6.1564, + "step": 9447 + }, + { + "epoch": 0.39381434704680923, + "grad_norm": 173.0, + "learning_rate": 6.912559651085139e-05, + "loss": 10.8126, + "step": 9448 + }, + { + "epoch": 0.39385602934433744, + "grad_norm": 290.0, + "learning_rate": 6.911935965516504e-05, + "loss": 14.3128, + "step": 9449 + }, + { + "epoch": 0.3938977116418657, + "grad_norm": 440.0, + "learning_rate": 6.911312245102665e-05, + "loss": 15.1256, + "step": 9450 + }, + { + "epoch": 0.3939393939393939, + "grad_norm": 744.0, + "learning_rate": 6.910688489854993e-05, + "loss": 20.6252, + "step": 9451 + }, + { + "epoch": 0.3939810762369222, + "grad_norm": 388.0, + "learning_rate": 6.910064699784853e-05, + "loss": 14.5002, + "step": 9452 + }, + { + "epoch": 0.3940227585344504, + "grad_norm": 236.0, + "learning_rate": 6.909440874903615e-05, + "loss": 12.1252, + "step": 9453 + }, + { + "epoch": 0.39406444083197867, + "grad_norm": 140.0, + "learning_rate": 6.908817015222646e-05, + "loss": 10.6877, + "step": 9454 + }, + { + "epoch": 0.3941061231295069, + "grad_norm": 400.0, + "learning_rate": 6.908193120753317e-05, + "loss": 15.6263, + "step": 9455 + }, + { + "epoch": 0.39414780542703515, + "grad_norm": 436.0, + "learning_rate": 6.907569191507001e-05, + "loss": 16.5003, + "step": 9456 + }, + { + "epoch": 0.39418948772456336, + "grad_norm": 306.0, + "learning_rate": 6.906945227495066e-05, + "loss": 14.1877, + "step": 9457 + }, + { + "epoch": 0.3942311700220916, + "grad_norm": 236.0, + "learning_rate": 6.906321228728885e-05, + "loss": 11.9377, + "step": 9458 + }, + { + "epoch": 0.39427285231961984, + "grad_norm": 302.0, + "learning_rate": 6.905697195219832e-05, + "loss": 12.9377, + "step": 9459 + }, + { + "epoch": 0.3943145346171481, + "grad_norm": 116.0, + "learning_rate": 6.905073126979275e-05, + "loss": 8.6887, + "step": 9460 + }, + { + "epoch": 0.3943562169146763, + "grad_norm": 170.0, + "learning_rate": 6.904449024018593e-05, + "loss": 10.3131, + "step": 9461 + }, + { + "epoch": 0.3943978992122046, + "grad_norm": 312.0, + "learning_rate": 6.90382488634916e-05, + "loss": 13.7505, + "step": 9462 + }, + { + "epoch": 0.3944395815097328, + "grad_norm": 191.0, + "learning_rate": 6.903200713982347e-05, + "loss": 10.8756, + "step": 9463 + }, + { + "epoch": 0.39448126380726106, + "grad_norm": 284.0, + "learning_rate": 6.902576506929534e-05, + "loss": 12.1877, + "step": 9464 + }, + { + "epoch": 0.3945229461047893, + "grad_norm": 320.0, + "learning_rate": 6.901952265202094e-05, + "loss": 13.4377, + "step": 9465 + }, + { + "epoch": 0.39456462840231754, + "grad_norm": 624.0, + "learning_rate": 6.901327988811405e-05, + "loss": 20.7503, + "step": 9466 + }, + { + "epoch": 0.39460631069984575, + "grad_norm": 446.0, + "learning_rate": 6.900703677768846e-05, + "loss": 15.6877, + "step": 9467 + }, + { + "epoch": 0.394647992997374, + "grad_norm": 145.0, + "learning_rate": 6.900079332085791e-05, + "loss": 8.8132, + "step": 9468 + }, + { + "epoch": 0.39468967529490223, + "grad_norm": 452.0, + "learning_rate": 6.899454951773624e-05, + "loss": 16.6254, + "step": 9469 + }, + { + "epoch": 0.3947313575924305, + "grad_norm": 274.0, + "learning_rate": 6.898830536843721e-05, + "loss": 12.1877, + "step": 9470 + }, + { + "epoch": 0.3947730398899587, + "grad_norm": 484.0, + "learning_rate": 6.898206087307465e-05, + "loss": 17.2502, + "step": 9471 + }, + { + "epoch": 0.394814722187487, + "grad_norm": 280.0, + "learning_rate": 6.897581603176231e-05, + "loss": 13.3129, + "step": 9472 + }, + { + "epoch": 0.3948564044850152, + "grad_norm": 410.0, + "learning_rate": 6.896957084461406e-05, + "loss": 14.2502, + "step": 9473 + }, + { + "epoch": 0.39489808678254346, + "grad_norm": 632.0, + "learning_rate": 6.896332531174369e-05, + "loss": 19.6252, + "step": 9474 + }, + { + "epoch": 0.39493976908007167, + "grad_norm": 508.0, + "learning_rate": 6.895707943326504e-05, + "loss": 16.3752, + "step": 9475 + }, + { + "epoch": 0.39498145137759993, + "grad_norm": 1504.0, + "learning_rate": 6.895083320929192e-05, + "loss": 31.0039, + "step": 9476 + }, + { + "epoch": 0.39502313367512815, + "grad_norm": 57.75, + "learning_rate": 6.89445866399382e-05, + "loss": 6.0315, + "step": 9477 + }, + { + "epoch": 0.3950648159726564, + "grad_norm": 330.0, + "learning_rate": 6.89383397253177e-05, + "loss": 14.0017, + "step": 9478 + }, + { + "epoch": 0.3951064982701846, + "grad_norm": 195.0, + "learning_rate": 6.893209246554428e-05, + "loss": 12.1879, + "step": 9479 + }, + { + "epoch": 0.3951481805677129, + "grad_norm": 249.0, + "learning_rate": 6.89258448607318e-05, + "loss": 12.3137, + "step": 9480 + }, + { + "epoch": 0.39518986286524116, + "grad_norm": 476.0, + "learning_rate": 6.891959691099407e-05, + "loss": 17.1252, + "step": 9481 + }, + { + "epoch": 0.39523154516276937, + "grad_norm": 448.0, + "learning_rate": 6.891334861644506e-05, + "loss": 16.3752, + "step": 9482 + }, + { + "epoch": 0.39527322746029764, + "grad_norm": 258.0, + "learning_rate": 6.890709997719858e-05, + "loss": 7.157, + "step": 9483 + }, + { + "epoch": 0.39531490975782585, + "grad_norm": 152.0, + "learning_rate": 6.890085099336851e-05, + "loss": 10.0628, + "step": 9484 + }, + { + "epoch": 0.3953565920553541, + "grad_norm": 414.0, + "learning_rate": 6.889460166506877e-05, + "loss": 17.0002, + "step": 9485 + }, + { + "epoch": 0.39539827435288233, + "grad_norm": 232.0, + "learning_rate": 6.888835199241324e-05, + "loss": 7.283, + "step": 9486 + }, + { + "epoch": 0.3954399566504106, + "grad_norm": 296.0, + "learning_rate": 6.88821019755158e-05, + "loss": 12.688, + "step": 9487 + }, + { + "epoch": 0.3954816389479388, + "grad_norm": 414.0, + "learning_rate": 6.88758516144904e-05, + "loss": 17.1262, + "step": 9488 + }, + { + "epoch": 0.3955233212454671, + "grad_norm": 470.0, + "learning_rate": 6.886960090945091e-05, + "loss": 15.6255, + "step": 9489 + }, + { + "epoch": 0.3955650035429953, + "grad_norm": 318.0, + "learning_rate": 6.886334986051126e-05, + "loss": 12.0005, + "step": 9490 + }, + { + "epoch": 0.39560668584052355, + "grad_norm": 600.0, + "learning_rate": 6.885709846778541e-05, + "loss": 20.5002, + "step": 9491 + }, + { + "epoch": 0.39564836813805176, + "grad_norm": 214.0, + "learning_rate": 6.885084673138724e-05, + "loss": 11.2502, + "step": 9492 + }, + { + "epoch": 0.39569005043558003, + "grad_norm": 1264.0, + "learning_rate": 6.884459465143071e-05, + "loss": 38.0012, + "step": 9493 + }, + { + "epoch": 0.39573173273310824, + "grad_norm": 170.0, + "learning_rate": 6.88383422280298e-05, + "loss": 10.3754, + "step": 9494 + }, + { + "epoch": 0.3957734150306365, + "grad_norm": 380.0, + "learning_rate": 6.883208946129842e-05, + "loss": 13.1878, + "step": 9495 + }, + { + "epoch": 0.3958150973281647, + "grad_norm": 684.0, + "learning_rate": 6.882583635135054e-05, + "loss": 21.1255, + "step": 9496 + }, + { + "epoch": 0.395856779625693, + "grad_norm": 256.0, + "learning_rate": 6.881958289830011e-05, + "loss": 13.6269, + "step": 9497 + }, + { + "epoch": 0.3958984619232212, + "grad_norm": 416.0, + "learning_rate": 6.881332910226113e-05, + "loss": 15.1261, + "step": 9498 + }, + { + "epoch": 0.39594014422074947, + "grad_norm": 376.0, + "learning_rate": 6.880707496334753e-05, + "loss": 15.0633, + "step": 9499 + }, + { + "epoch": 0.3959818265182777, + "grad_norm": 444.0, + "learning_rate": 6.880082048167333e-05, + "loss": 16.3752, + "step": 9500 + }, + { + "epoch": 0.39602350881580595, + "grad_norm": 432.0, + "learning_rate": 6.879456565735252e-05, + "loss": 15.6879, + "step": 9501 + }, + { + "epoch": 0.39606519111333416, + "grad_norm": 240.0, + "learning_rate": 6.878831049049908e-05, + "loss": 11.3128, + "step": 9502 + }, + { + "epoch": 0.3961068734108624, + "grad_norm": 360.0, + "learning_rate": 6.8782054981227e-05, + "loss": 14.0627, + "step": 9503 + }, + { + "epoch": 0.39614855570839064, + "grad_norm": 474.0, + "learning_rate": 6.877579912965032e-05, + "loss": 17.0002, + "step": 9504 + }, + { + "epoch": 0.3961902380059189, + "grad_norm": 302.0, + "learning_rate": 6.876954293588301e-05, + "loss": 13.0633, + "step": 9505 + }, + { + "epoch": 0.3962319203034471, + "grad_norm": 256.0, + "learning_rate": 6.876328640003912e-05, + "loss": 12.8127, + "step": 9506 + }, + { + "epoch": 0.3962736026009754, + "grad_norm": 466.0, + "learning_rate": 6.875702952223267e-05, + "loss": 11.8753, + "step": 9507 + }, + { + "epoch": 0.3963152848985036, + "grad_norm": 150.0, + "learning_rate": 6.87507723025777e-05, + "loss": 10.5008, + "step": 9508 + }, + { + "epoch": 0.39635696719603186, + "grad_norm": 656.0, + "learning_rate": 6.874451474118824e-05, + "loss": 17.2513, + "step": 9509 + }, + { + "epoch": 0.3963986494935601, + "grad_norm": 122.0, + "learning_rate": 6.873825683817833e-05, + "loss": 8.7505, + "step": 9510 + }, + { + "epoch": 0.39644033179108834, + "grad_norm": 310.0, + "learning_rate": 6.873199859366204e-05, + "loss": 14.313, + "step": 9511 + }, + { + "epoch": 0.39648201408861655, + "grad_norm": 472.0, + "learning_rate": 6.87257400077534e-05, + "loss": 17.2504, + "step": 9512 + }, + { + "epoch": 0.3965236963861448, + "grad_norm": 376.0, + "learning_rate": 6.871948108056649e-05, + "loss": 12.1253, + "step": 9513 + }, + { + "epoch": 0.39656537868367303, + "grad_norm": 162.0, + "learning_rate": 6.871322181221537e-05, + "loss": 9.313, + "step": 9514 + }, + { + "epoch": 0.3966070609812013, + "grad_norm": 354.0, + "learning_rate": 6.870696220281412e-05, + "loss": 15.0004, + "step": 9515 + }, + { + "epoch": 0.3966487432787295, + "grad_norm": 223.0, + "learning_rate": 6.870070225247682e-05, + "loss": 12.0628, + "step": 9516 + }, + { + "epoch": 0.3966904255762578, + "grad_norm": 258.0, + "learning_rate": 6.869444196131758e-05, + "loss": 12.5004, + "step": 9517 + }, + { + "epoch": 0.396732107873786, + "grad_norm": 720.0, + "learning_rate": 6.868818132945045e-05, + "loss": 22.3756, + "step": 9518 + }, + { + "epoch": 0.39677379017131426, + "grad_norm": 272.0, + "learning_rate": 6.868192035698957e-05, + "loss": 9.8143, + "step": 9519 + }, + { + "epoch": 0.39681547246884247, + "grad_norm": 1192.0, + "learning_rate": 6.867565904404903e-05, + "loss": 29.0001, + "step": 9520 + }, + { + "epoch": 0.39685715476637073, + "grad_norm": 296.0, + "learning_rate": 6.866939739074297e-05, + "loss": 13.1253, + "step": 9521 + }, + { + "epoch": 0.39689883706389895, + "grad_norm": 486.0, + "learning_rate": 6.866313539718545e-05, + "loss": 17.3753, + "step": 9522 + }, + { + "epoch": 0.3969405193614272, + "grad_norm": 1480.0, + "learning_rate": 6.865687306349064e-05, + "loss": 34.0002, + "step": 9523 + }, + { + "epoch": 0.3969822016589554, + "grad_norm": 1152.0, + "learning_rate": 6.865061038977268e-05, + "loss": 25.8796, + "step": 9524 + }, + { + "epoch": 0.3970238839564837, + "grad_norm": 406.0, + "learning_rate": 6.864434737614567e-05, + "loss": 16.1252, + "step": 9525 + }, + { + "epoch": 0.3970655662540119, + "grad_norm": 1024.0, + "learning_rate": 6.863808402272379e-05, + "loss": 24.0027, + "step": 9526 + }, + { + "epoch": 0.39710724855154017, + "grad_norm": 608.0, + "learning_rate": 6.863182032962117e-05, + "loss": 16.8758, + "step": 9527 + }, + { + "epoch": 0.3971489308490684, + "grad_norm": 416.0, + "learning_rate": 6.862555629695196e-05, + "loss": 14.9385, + "step": 9528 + }, + { + "epoch": 0.39719061314659665, + "grad_norm": 74.0, + "learning_rate": 6.861929192483034e-05, + "loss": 8.0628, + "step": 9529 + }, + { + "epoch": 0.39723229544412486, + "grad_norm": 684.0, + "learning_rate": 6.861302721337046e-05, + "loss": 19.2547, + "step": 9530 + }, + { + "epoch": 0.39727397774165313, + "grad_norm": 276.0, + "learning_rate": 6.86067621626865e-05, + "loss": 12.6251, + "step": 9531 + }, + { + "epoch": 0.39731566003918134, + "grad_norm": 211.0, + "learning_rate": 6.860049677289269e-05, + "loss": 12.4387, + "step": 9532 + }, + { + "epoch": 0.3973573423367096, + "grad_norm": 736.0, + "learning_rate": 6.859423104410313e-05, + "loss": 22.0004, + "step": 9533 + }, + { + "epoch": 0.3973990246342378, + "grad_norm": 484.0, + "learning_rate": 6.858796497643209e-05, + "loss": 14.6277, + "step": 9534 + }, + { + "epoch": 0.3974407069317661, + "grad_norm": 245.0, + "learning_rate": 6.858169856999372e-05, + "loss": 10.9378, + "step": 9535 + }, + { + "epoch": 0.3974823892292943, + "grad_norm": 172.0, + "learning_rate": 6.857543182490225e-05, + "loss": 10.0011, + "step": 9536 + }, + { + "epoch": 0.39752407152682256, + "grad_norm": 216.0, + "learning_rate": 6.85691647412719e-05, + "loss": 11.7503, + "step": 9537 + }, + { + "epoch": 0.3975657538243508, + "grad_norm": 125.0, + "learning_rate": 6.856289731921685e-05, + "loss": 8.8753, + "step": 9538 + }, + { + "epoch": 0.39760743612187904, + "grad_norm": 876.0, + "learning_rate": 6.855662955885137e-05, + "loss": 23.3756, + "step": 9539 + }, + { + "epoch": 0.39764911841940725, + "grad_norm": 105.0, + "learning_rate": 6.855036146028966e-05, + "loss": 9.1883, + "step": 9540 + }, + { + "epoch": 0.3976908007169355, + "grad_norm": 1200.0, + "learning_rate": 6.854409302364597e-05, + "loss": 25.3792, + "step": 9541 + }, + { + "epoch": 0.39773248301446373, + "grad_norm": 266.0, + "learning_rate": 6.853782424903453e-05, + "loss": 12.0627, + "step": 9542 + }, + { + "epoch": 0.397774165311992, + "grad_norm": 344.0, + "learning_rate": 6.853155513656959e-05, + "loss": 12.6253, + "step": 9543 + }, + { + "epoch": 0.3978158476095202, + "grad_norm": 576.0, + "learning_rate": 6.852528568636543e-05, + "loss": 19.8758, + "step": 9544 + }, + { + "epoch": 0.3978575299070485, + "grad_norm": 162.0, + "learning_rate": 6.851901589853627e-05, + "loss": 10.188, + "step": 9545 + }, + { + "epoch": 0.3978992122045767, + "grad_norm": 324.0, + "learning_rate": 6.851274577319642e-05, + "loss": 13.3131, + "step": 9546 + }, + { + "epoch": 0.39794089450210496, + "grad_norm": 384.0, + "learning_rate": 6.850647531046014e-05, + "loss": 14.0628, + "step": 9547 + }, + { + "epoch": 0.39798257679963317, + "grad_norm": 480.0, + "learning_rate": 6.85002045104417e-05, + "loss": 16.5003, + "step": 9548 + }, + { + "epoch": 0.39802425909716144, + "grad_norm": 462.0, + "learning_rate": 6.849393337325538e-05, + "loss": 15.8754, + "step": 9549 + }, + { + "epoch": 0.39806594139468965, + "grad_norm": 880.0, + "learning_rate": 6.848766189901549e-05, + "loss": 19.1304, + "step": 9550 + }, + { + "epoch": 0.3981076236922179, + "grad_norm": 76.0, + "learning_rate": 6.848139008783632e-05, + "loss": 9.1885, + "step": 9551 + }, + { + "epoch": 0.3981493059897461, + "grad_norm": 416.0, + "learning_rate": 6.847511793983219e-05, + "loss": 16.1253, + "step": 9552 + }, + { + "epoch": 0.3981909882872744, + "grad_norm": 316.0, + "learning_rate": 6.846884545511738e-05, + "loss": 12.2502, + "step": 9553 + }, + { + "epoch": 0.39823267058480266, + "grad_norm": 177.0, + "learning_rate": 6.846257263380623e-05, + "loss": 12.001, + "step": 9554 + }, + { + "epoch": 0.3982743528823309, + "grad_norm": 736.0, + "learning_rate": 6.845629947601304e-05, + "loss": 20.7524, + "step": 9555 + }, + { + "epoch": 0.39831603517985914, + "grad_norm": 230.0, + "learning_rate": 6.845002598185215e-05, + "loss": 10.3131, + "step": 9556 + }, + { + "epoch": 0.39835771747738735, + "grad_norm": 93.5, + "learning_rate": 6.844375215143792e-05, + "loss": 7.8441, + "step": 9557 + }, + { + "epoch": 0.3983993997749156, + "grad_norm": 76.0, + "learning_rate": 6.843747798488466e-05, + "loss": 7.5004, + "step": 9558 + }, + { + "epoch": 0.39844108207244383, + "grad_norm": 136.0, + "learning_rate": 6.843120348230672e-05, + "loss": 9.5628, + "step": 9559 + }, + { + "epoch": 0.3984827643699721, + "grad_norm": 300.0, + "learning_rate": 6.842492864381848e-05, + "loss": 13.5017, + "step": 9560 + }, + { + "epoch": 0.3985244466675003, + "grad_norm": 152.0, + "learning_rate": 6.841865346953427e-05, + "loss": 9.1263, + "step": 9561 + }, + { + "epoch": 0.3985661289650286, + "grad_norm": 170.0, + "learning_rate": 6.841237795956845e-05, + "loss": 10.6878, + "step": 9562 + }, + { + "epoch": 0.3986078112625568, + "grad_norm": 434.0, + "learning_rate": 6.840610211403542e-05, + "loss": 16.3754, + "step": 9563 + }, + { + "epoch": 0.39864949356008506, + "grad_norm": 350.0, + "learning_rate": 6.839982593304954e-05, + "loss": 15.2503, + "step": 9564 + }, + { + "epoch": 0.39869117585761327, + "grad_norm": 115.5, + "learning_rate": 6.839354941672522e-05, + "loss": 9.5003, + "step": 9565 + }, + { + "epoch": 0.39873285815514153, + "grad_norm": 392.0, + "learning_rate": 6.83872725651768e-05, + "loss": 14.8127, + "step": 9566 + }, + { + "epoch": 0.39877454045266975, + "grad_norm": 94.5, + "learning_rate": 6.838099537851874e-05, + "loss": 7.5939, + "step": 9567 + }, + { + "epoch": 0.398816222750198, + "grad_norm": 272.0, + "learning_rate": 6.837471785686537e-05, + "loss": 14.0012, + "step": 9568 + }, + { + "epoch": 0.3988579050477262, + "grad_norm": 141.0, + "learning_rate": 6.836844000033115e-05, + "loss": 8.1885, + "step": 9569 + }, + { + "epoch": 0.3988995873452545, + "grad_norm": 245.0, + "learning_rate": 6.836216180903048e-05, + "loss": 12.3127, + "step": 9570 + }, + { + "epoch": 0.3989412696427827, + "grad_norm": 472.0, + "learning_rate": 6.835588328307779e-05, + "loss": 17.0001, + "step": 9571 + }, + { + "epoch": 0.39898295194031097, + "grad_norm": 160.0, + "learning_rate": 6.834960442258749e-05, + "loss": 7.5941, + "step": 9572 + }, + { + "epoch": 0.3990246342378392, + "grad_norm": 450.0, + "learning_rate": 6.834332522767402e-05, + "loss": 15.6253, + "step": 9573 + }, + { + "epoch": 0.39906631653536745, + "grad_norm": 272.0, + "learning_rate": 6.833704569845182e-05, + "loss": 12.7503, + "step": 9574 + }, + { + "epoch": 0.39910799883289566, + "grad_norm": 548.0, + "learning_rate": 6.833076583503533e-05, + "loss": 16.6252, + "step": 9575 + }, + { + "epoch": 0.39914968113042393, + "grad_norm": 290.0, + "learning_rate": 6.832448563753902e-05, + "loss": 13.3752, + "step": 9576 + }, + { + "epoch": 0.39919136342795214, + "grad_norm": 296.0, + "learning_rate": 6.831820510607731e-05, + "loss": 13.0627, + "step": 9577 + }, + { + "epoch": 0.3992330457254804, + "grad_norm": 66.0, + "learning_rate": 6.831192424076471e-05, + "loss": 8.5628, + "step": 9578 + }, + { + "epoch": 0.3992747280230086, + "grad_norm": 548.0, + "learning_rate": 6.830564304171565e-05, + "loss": 17.8755, + "step": 9579 + }, + { + "epoch": 0.3993164103205369, + "grad_norm": 146.0, + "learning_rate": 6.829936150904463e-05, + "loss": 9.7503, + "step": 9580 + }, + { + "epoch": 0.3993580926180651, + "grad_norm": 145.0, + "learning_rate": 6.829307964286609e-05, + "loss": 7.9692, + "step": 9581 + }, + { + "epoch": 0.39939977491559336, + "grad_norm": 194.0, + "learning_rate": 6.828679744329459e-05, + "loss": 9.3141, + "step": 9582 + }, + { + "epoch": 0.3994414572131216, + "grad_norm": 324.0, + "learning_rate": 6.828051491044455e-05, + "loss": 14.5629, + "step": 9583 + }, + { + "epoch": 0.39948313951064984, + "grad_norm": 768.0, + "learning_rate": 6.827423204443054e-05, + "loss": 22.5002, + "step": 9584 + }, + { + "epoch": 0.39952482180817805, + "grad_norm": 1160.0, + "learning_rate": 6.826794884536701e-05, + "loss": 22.7553, + "step": 9585 + }, + { + "epoch": 0.3995665041057063, + "grad_norm": 436.0, + "learning_rate": 6.82616653133685e-05, + "loss": 16.1258, + "step": 9586 + }, + { + "epoch": 0.39960818640323453, + "grad_norm": 132.0, + "learning_rate": 6.825538144854951e-05, + "loss": 9.6877, + "step": 9587 + }, + { + "epoch": 0.3996498687007628, + "grad_norm": 592.0, + "learning_rate": 6.824909725102457e-05, + "loss": 18.7502, + "step": 9588 + }, + { + "epoch": 0.399691550998291, + "grad_norm": 296.0, + "learning_rate": 6.82428127209082e-05, + "loss": 14.5004, + "step": 9589 + }, + { + "epoch": 0.3997332332958193, + "grad_norm": 68.0, + "learning_rate": 6.823652785831498e-05, + "loss": 7.4689, + "step": 9590 + }, + { + "epoch": 0.3997749155933475, + "grad_norm": 912.0, + "learning_rate": 6.82302426633594e-05, + "loss": 21.7576, + "step": 9591 + }, + { + "epoch": 0.39981659789087576, + "grad_norm": 438.0, + "learning_rate": 6.822395713615603e-05, + "loss": 15.3145, + "step": 9592 + }, + { + "epoch": 0.39985828018840397, + "grad_norm": 462.0, + "learning_rate": 6.821767127681942e-05, + "loss": 16.6258, + "step": 9593 + }, + { + "epoch": 0.39989996248593224, + "grad_norm": 232.0, + "learning_rate": 6.821138508546414e-05, + "loss": 11.6878, + "step": 9594 + }, + { + "epoch": 0.39994164478346045, + "grad_norm": 254.0, + "learning_rate": 6.820509856220476e-05, + "loss": 11.4379, + "step": 9595 + }, + { + "epoch": 0.3999833270809887, + "grad_norm": 100.0, + "learning_rate": 6.819881170715584e-05, + "loss": 9.2503, + "step": 9596 + }, + { + "epoch": 0.4000250093785169, + "grad_norm": 250.0, + "learning_rate": 6.819252452043194e-05, + "loss": 12.2501, + "step": 9597 + }, + { + "epoch": 0.4000666916760452, + "grad_norm": 217.0, + "learning_rate": 6.818623700214768e-05, + "loss": 11.6876, + "step": 9598 + }, + { + "epoch": 0.4001083739735734, + "grad_norm": 1048.0, + "learning_rate": 6.817994915241764e-05, + "loss": 26.5002, + "step": 9599 + }, + { + "epoch": 0.4001500562711017, + "grad_norm": 748.0, + "learning_rate": 6.817366097135641e-05, + "loss": 18.0003, + "step": 9600 + }, + { + "epoch": 0.4001917385686299, + "grad_norm": 382.0, + "learning_rate": 6.81673724590786e-05, + "loss": 15.3756, + "step": 9601 + }, + { + "epoch": 0.40023342086615815, + "grad_norm": 63.25, + "learning_rate": 6.816108361569881e-05, + "loss": 7.2504, + "step": 9602 + }, + { + "epoch": 0.40027510316368636, + "grad_norm": 528.0, + "learning_rate": 6.815479444133166e-05, + "loss": 18.3757, + "step": 9603 + }, + { + "epoch": 0.40031678546121463, + "grad_norm": 390.0, + "learning_rate": 6.814850493609176e-05, + "loss": 15.9379, + "step": 9604 + }, + { + "epoch": 0.40035846775874284, + "grad_norm": 384.0, + "learning_rate": 6.814221510009376e-05, + "loss": 15.1882, + "step": 9605 + }, + { + "epoch": 0.4004001500562711, + "grad_norm": 416.0, + "learning_rate": 6.813592493345227e-05, + "loss": 15.1255, + "step": 9606 + }, + { + "epoch": 0.4004418323537993, + "grad_norm": 572.0, + "learning_rate": 6.812963443628194e-05, + "loss": 18.7501, + "step": 9607 + }, + { + "epoch": 0.4004835146513276, + "grad_norm": 320.0, + "learning_rate": 6.812334360869742e-05, + "loss": 14.2502, + "step": 9608 + }, + { + "epoch": 0.4005251969488558, + "grad_norm": 133.0, + "learning_rate": 6.811705245081333e-05, + "loss": 9.5003, + "step": 9609 + }, + { + "epoch": 0.40056687924638407, + "grad_norm": 130.0, + "learning_rate": 6.811076096274438e-05, + "loss": 11.2509, + "step": 9610 + }, + { + "epoch": 0.4006085615439123, + "grad_norm": 712.0, + "learning_rate": 6.810446914460519e-05, + "loss": 18.3766, + "step": 9611 + }, + { + "epoch": 0.40065024384144055, + "grad_norm": 203.0, + "learning_rate": 6.809817699651045e-05, + "loss": 10.7506, + "step": 9612 + }, + { + "epoch": 0.40069192613896876, + "grad_norm": 268.0, + "learning_rate": 6.809188451857482e-05, + "loss": 12.1254, + "step": 9613 + }, + { + "epoch": 0.400733608436497, + "grad_norm": 600.0, + "learning_rate": 6.808559171091298e-05, + "loss": 18.2513, + "step": 9614 + }, + { + "epoch": 0.40077529073402524, + "grad_norm": 95.5, + "learning_rate": 6.807929857363964e-05, + "loss": 9.2501, + "step": 9615 + }, + { + "epoch": 0.4008169730315535, + "grad_norm": 600.0, + "learning_rate": 6.807300510686949e-05, + "loss": 19.8754, + "step": 9616 + }, + { + "epoch": 0.4008586553290817, + "grad_norm": 508.0, + "learning_rate": 6.80667113107172e-05, + "loss": 17.6253, + "step": 9617 + }, + { + "epoch": 0.40090033762661, + "grad_norm": 320.0, + "learning_rate": 6.806041718529749e-05, + "loss": 12.313, + "step": 9618 + }, + { + "epoch": 0.4009420199241382, + "grad_norm": 382.0, + "learning_rate": 6.805412273072506e-05, + "loss": 16.1251, + "step": 9619 + }, + { + "epoch": 0.40098370222166646, + "grad_norm": 386.0, + "learning_rate": 6.804782794711467e-05, + "loss": 16.3755, + "step": 9620 + }, + { + "epoch": 0.40102538451919467, + "grad_norm": 516.0, + "learning_rate": 6.804153283458102e-05, + "loss": 18.7502, + "step": 9621 + }, + { + "epoch": 0.40106706681672294, + "grad_norm": 294.0, + "learning_rate": 6.803523739323881e-05, + "loss": 15.1258, + "step": 9622 + }, + { + "epoch": 0.40110874911425115, + "grad_norm": 960.0, + "learning_rate": 6.80289416232028e-05, + "loss": 24.8761, + "step": 9623 + }, + { + "epoch": 0.4011504314117794, + "grad_norm": 580.0, + "learning_rate": 6.802264552458773e-05, + "loss": 17.7502, + "step": 9624 + }, + { + "epoch": 0.40119211370930763, + "grad_norm": 38.5, + "learning_rate": 6.801634909750834e-05, + "loss": 6.188, + "step": 9625 + }, + { + "epoch": 0.4012337960068359, + "grad_norm": 472.0, + "learning_rate": 6.80100523420794e-05, + "loss": 17.3753, + "step": 9626 + }, + { + "epoch": 0.40127547830436416, + "grad_norm": 342.0, + "learning_rate": 6.800375525841566e-05, + "loss": 13.3755, + "step": 9627 + }, + { + "epoch": 0.4013171606018924, + "grad_norm": 440.0, + "learning_rate": 6.799745784663187e-05, + "loss": 16.7502, + "step": 9628 + }, + { + "epoch": 0.40135884289942064, + "grad_norm": 378.0, + "learning_rate": 6.799116010684282e-05, + "loss": 17.5012, + "step": 9629 + }, + { + "epoch": 0.40140052519694885, + "grad_norm": 380.0, + "learning_rate": 6.798486203916328e-05, + "loss": 15.6877, + "step": 9630 + }, + { + "epoch": 0.4014422074944771, + "grad_norm": 324.0, + "learning_rate": 6.797856364370802e-05, + "loss": 15.6253, + "step": 9631 + }, + { + "epoch": 0.40148388979200533, + "grad_norm": 596.0, + "learning_rate": 6.797226492059186e-05, + "loss": 18.3751, + "step": 9632 + }, + { + "epoch": 0.4015255720895336, + "grad_norm": 1064.0, + "learning_rate": 6.796596586992956e-05, + "loss": 23.5056, + "step": 9633 + }, + { + "epoch": 0.4015672543870618, + "grad_norm": 848.0, + "learning_rate": 6.795966649183596e-05, + "loss": 23.8779, + "step": 9634 + }, + { + "epoch": 0.4016089366845901, + "grad_norm": 474.0, + "learning_rate": 6.795336678642582e-05, + "loss": 14.938, + "step": 9635 + }, + { + "epoch": 0.4016506189821183, + "grad_norm": 133.0, + "learning_rate": 6.794706675381398e-05, + "loss": 10.1255, + "step": 9636 + }, + { + "epoch": 0.40169230127964656, + "grad_norm": 95.5, + "learning_rate": 6.794076639411526e-05, + "loss": 7.7191, + "step": 9637 + }, + { + "epoch": 0.40173398357717477, + "grad_norm": 532.0, + "learning_rate": 6.793446570744448e-05, + "loss": 16.2505, + "step": 9638 + }, + { + "epoch": 0.40177566587470304, + "grad_norm": 183.0, + "learning_rate": 6.792816469391647e-05, + "loss": 10.8127, + "step": 9639 + }, + { + "epoch": 0.40181734817223125, + "grad_norm": 212.0, + "learning_rate": 6.792186335364608e-05, + "loss": 11.8131, + "step": 9640 + }, + { + "epoch": 0.4018590304697595, + "grad_norm": 166.0, + "learning_rate": 6.791556168674813e-05, + "loss": 10.0002, + "step": 9641 + }, + { + "epoch": 0.4019007127672877, + "grad_norm": 148.0, + "learning_rate": 6.790925969333748e-05, + "loss": 8.9386, + "step": 9642 + }, + { + "epoch": 0.401942395064816, + "grad_norm": 70.0, + "learning_rate": 6.790295737352898e-05, + "loss": 5.0627, + "step": 9643 + }, + { + "epoch": 0.4019840773623442, + "grad_norm": 187.0, + "learning_rate": 6.789665472743747e-05, + "loss": 11.0005, + "step": 9644 + }, + { + "epoch": 0.4020257596598725, + "grad_norm": 438.0, + "learning_rate": 6.789035175517786e-05, + "loss": 15.8133, + "step": 9645 + }, + { + "epoch": 0.4020674419574007, + "grad_norm": 418.0, + "learning_rate": 6.7884048456865e-05, + "loss": 15.0047, + "step": 9646 + }, + { + "epoch": 0.40210912425492895, + "grad_norm": 422.0, + "learning_rate": 6.787774483261377e-05, + "loss": 15.9381, + "step": 9647 + }, + { + "epoch": 0.40215080655245716, + "grad_norm": 107.0, + "learning_rate": 6.787144088253906e-05, + "loss": 8.5634, + "step": 9648 + }, + { + "epoch": 0.40219248884998543, + "grad_norm": 89.0, + "learning_rate": 6.786513660675573e-05, + "loss": 8.6252, + "step": 9649 + }, + { + "epoch": 0.40223417114751364, + "grad_norm": 442.0, + "learning_rate": 6.785883200537872e-05, + "loss": 14.8796, + "step": 9650 + }, + { + "epoch": 0.4022758534450419, + "grad_norm": 243.0, + "learning_rate": 6.785252707852292e-05, + "loss": 11.5001, + "step": 9651 + }, + { + "epoch": 0.4023175357425701, + "grad_norm": 225.0, + "learning_rate": 6.784622182630322e-05, + "loss": 13.2503, + "step": 9652 + }, + { + "epoch": 0.4023592180400984, + "grad_norm": 57.75, + "learning_rate": 6.783991624883453e-05, + "loss": 7.2818, + "step": 9653 + }, + { + "epoch": 0.4024009003376266, + "grad_norm": 51.25, + "learning_rate": 6.78336103462318e-05, + "loss": 8.0004, + "step": 9654 + }, + { + "epoch": 0.40244258263515487, + "grad_norm": 243.0, + "learning_rate": 6.782730411860993e-05, + "loss": 11.1252, + "step": 9655 + }, + { + "epoch": 0.4024842649326831, + "grad_norm": 540.0, + "learning_rate": 6.782099756608387e-05, + "loss": 17.2502, + "step": 9656 + }, + { + "epoch": 0.40252594723021135, + "grad_norm": 434.0, + "learning_rate": 6.78146906887685e-05, + "loss": 16.626, + "step": 9657 + }, + { + "epoch": 0.40256762952773956, + "grad_norm": 170.0, + "learning_rate": 6.780838348677887e-05, + "loss": 10.5629, + "step": 9658 + }, + { + "epoch": 0.4026093118252678, + "grad_norm": 202.0, + "learning_rate": 6.780207596022985e-05, + "loss": 11.5006, + "step": 9659 + }, + { + "epoch": 0.40265099412279604, + "grad_norm": 340.0, + "learning_rate": 6.779576810923641e-05, + "loss": 14.5629, + "step": 9660 + }, + { + "epoch": 0.4026926764203243, + "grad_norm": 145.0, + "learning_rate": 6.778945993391353e-05, + "loss": 7.9065, + "step": 9661 + }, + { + "epoch": 0.4027343587178525, + "grad_norm": 404.0, + "learning_rate": 6.778315143437615e-05, + "loss": 15.0006, + "step": 9662 + }, + { + "epoch": 0.4027760410153808, + "grad_norm": 53.5, + "learning_rate": 6.777684261073925e-05, + "loss": 7.1262, + "step": 9663 + }, + { + "epoch": 0.402817723312909, + "grad_norm": 158.0, + "learning_rate": 6.777053346311783e-05, + "loss": 10.7503, + "step": 9664 + }, + { + "epoch": 0.40285940561043726, + "grad_norm": 928.0, + "learning_rate": 6.776422399162685e-05, + "loss": 21.1301, + "step": 9665 + }, + { + "epoch": 0.40290108790796547, + "grad_norm": 458.0, + "learning_rate": 6.775791419638132e-05, + "loss": 16.3752, + "step": 9666 + }, + { + "epoch": 0.40294277020549374, + "grad_norm": 172.0, + "learning_rate": 6.775160407749623e-05, + "loss": 10.1253, + "step": 9667 + }, + { + "epoch": 0.40298445250302195, + "grad_norm": 300.0, + "learning_rate": 6.774529363508656e-05, + "loss": 13.6262, + "step": 9668 + }, + { + "epoch": 0.4030261348005502, + "grad_norm": 178.0, + "learning_rate": 6.773898286926736e-05, + "loss": 10.8127, + "step": 9669 + }, + { + "epoch": 0.40306781709807843, + "grad_norm": 48.25, + "learning_rate": 6.773267178015361e-05, + "loss": 7.4694, + "step": 9670 + }, + { + "epoch": 0.4031094993956067, + "grad_norm": 296.0, + "learning_rate": 6.772636036786034e-05, + "loss": 13.3767, + "step": 9671 + }, + { + "epoch": 0.4031511816931349, + "grad_norm": 636.0, + "learning_rate": 6.772004863250259e-05, + "loss": 16.8751, + "step": 9672 + }, + { + "epoch": 0.4031928639906632, + "grad_norm": 228.0, + "learning_rate": 6.771373657419538e-05, + "loss": 10.3135, + "step": 9673 + }, + { + "epoch": 0.4032345462881914, + "grad_norm": 386.0, + "learning_rate": 6.770742419305374e-05, + "loss": 12.8142, + "step": 9674 + }, + { + "epoch": 0.40327622858571965, + "grad_norm": 284.0, + "learning_rate": 6.770111148919274e-05, + "loss": 11.6255, + "step": 9675 + }, + { + "epoch": 0.40331791088324787, + "grad_norm": 412.0, + "learning_rate": 6.769479846272739e-05, + "loss": 15.8126, + "step": 9676 + }, + { + "epoch": 0.40335959318077613, + "grad_norm": 716.0, + "learning_rate": 6.768848511377277e-05, + "loss": 22.5008, + "step": 9677 + }, + { + "epoch": 0.40340127547830434, + "grad_norm": 528.0, + "learning_rate": 6.768217144244395e-05, + "loss": 16.1254, + "step": 9678 + }, + { + "epoch": 0.4034429577758326, + "grad_norm": 272.0, + "learning_rate": 6.767585744885598e-05, + "loss": 13.0005, + "step": 9679 + }, + { + "epoch": 0.4034846400733608, + "grad_norm": 210.0, + "learning_rate": 6.766954313312393e-05, + "loss": 7.6565, + "step": 9680 + }, + { + "epoch": 0.4035263223708891, + "grad_norm": 104.0, + "learning_rate": 6.766322849536291e-05, + "loss": 9.8133, + "step": 9681 + }, + { + "epoch": 0.4035680046684173, + "grad_norm": 676.0, + "learning_rate": 6.765691353568795e-05, + "loss": 21.376, + "step": 9682 + }, + { + "epoch": 0.40360968696594557, + "grad_norm": 282.0, + "learning_rate": 6.76505982542142e-05, + "loss": 12.2502, + "step": 9683 + }, + { + "epoch": 0.4036513692634738, + "grad_norm": 210.0, + "learning_rate": 6.764428265105673e-05, + "loss": 9.3755, + "step": 9684 + }, + { + "epoch": 0.40369305156100205, + "grad_norm": 632.0, + "learning_rate": 6.763796672633062e-05, + "loss": 18.3772, + "step": 9685 + }, + { + "epoch": 0.40373473385853026, + "grad_norm": 222.0, + "learning_rate": 6.763165048015102e-05, + "loss": 11.6889, + "step": 9686 + }, + { + "epoch": 0.4037764161560585, + "grad_norm": 572.0, + "learning_rate": 6.762533391263302e-05, + "loss": 18.3753, + "step": 9687 + }, + { + "epoch": 0.40381809845358674, + "grad_norm": 480.0, + "learning_rate": 6.761901702389175e-05, + "loss": 16.7523, + "step": 9688 + }, + { + "epoch": 0.403859780751115, + "grad_norm": 330.0, + "learning_rate": 6.761269981404233e-05, + "loss": 14.1256, + "step": 9689 + }, + { + "epoch": 0.4039014630486432, + "grad_norm": 428.0, + "learning_rate": 6.760638228319989e-05, + "loss": 16.6254, + "step": 9690 + }, + { + "epoch": 0.4039431453461715, + "grad_norm": 434.0, + "learning_rate": 6.760006443147956e-05, + "loss": 15.1903, + "step": 9691 + }, + { + "epoch": 0.4039848276436997, + "grad_norm": 250.0, + "learning_rate": 6.759374625899651e-05, + "loss": 13.1889, + "step": 9692 + }, + { + "epoch": 0.40402650994122796, + "grad_norm": 186.0, + "learning_rate": 6.758742776586586e-05, + "loss": 10.6253, + "step": 9693 + }, + { + "epoch": 0.4040681922387562, + "grad_norm": 103.0, + "learning_rate": 6.758110895220277e-05, + "loss": 9.5627, + "step": 9694 + }, + { + "epoch": 0.40410987453628444, + "grad_norm": 173.0, + "learning_rate": 6.757478981812242e-05, + "loss": 11.0633, + "step": 9695 + }, + { + "epoch": 0.40415155683381265, + "grad_norm": 644.0, + "learning_rate": 6.756847036373997e-05, + "loss": 19.5007, + "step": 9696 + }, + { + "epoch": 0.4041932391313409, + "grad_norm": 256.0, + "learning_rate": 6.756215058917058e-05, + "loss": 12.1876, + "step": 9697 + }, + { + "epoch": 0.40423492142886913, + "grad_norm": 187.0, + "learning_rate": 6.755583049452944e-05, + "loss": 11.8126, + "step": 9698 + }, + { + "epoch": 0.4042766037263974, + "grad_norm": 249.0, + "learning_rate": 6.754951007993173e-05, + "loss": 11.6877, + "step": 9699 + }, + { + "epoch": 0.40431828602392567, + "grad_norm": 456.0, + "learning_rate": 6.754318934549264e-05, + "loss": 16.0006, + "step": 9700 + }, + { + "epoch": 0.4043599683214539, + "grad_norm": 320.0, + "learning_rate": 6.753686829132737e-05, + "loss": 14.2511, + "step": 9701 + }, + { + "epoch": 0.40440165061898214, + "grad_norm": 772.0, + "learning_rate": 6.753054691755112e-05, + "loss": 23.3752, + "step": 9702 + }, + { + "epoch": 0.40444333291651036, + "grad_norm": 372.0, + "learning_rate": 6.75242252242791e-05, + "loss": 15.7509, + "step": 9703 + }, + { + "epoch": 0.4044850152140386, + "grad_norm": 380.0, + "learning_rate": 6.751790321162651e-05, + "loss": 13.5627, + "step": 9704 + }, + { + "epoch": 0.40452669751156684, + "grad_norm": 416.0, + "learning_rate": 6.751158087970858e-05, + "loss": 14.8751, + "step": 9705 + }, + { + "epoch": 0.4045683798090951, + "grad_norm": 140.0, + "learning_rate": 6.750525822864055e-05, + "loss": 10.1257, + "step": 9706 + }, + { + "epoch": 0.4046100621066233, + "grad_norm": 284.0, + "learning_rate": 6.74989352585376e-05, + "loss": 13.0004, + "step": 9707 + }, + { + "epoch": 0.4046517444041516, + "grad_norm": 632.0, + "learning_rate": 6.749261196951502e-05, + "loss": 19.6252, + "step": 9708 + }, + { + "epoch": 0.4046934267016798, + "grad_norm": 356.0, + "learning_rate": 6.748628836168804e-05, + "loss": 14.5004, + "step": 9709 + }, + { + "epoch": 0.40473510899920806, + "grad_norm": 142.0, + "learning_rate": 6.747996443517191e-05, + "loss": 9.6253, + "step": 9710 + }, + { + "epoch": 0.40477679129673627, + "grad_norm": 112.0, + "learning_rate": 6.747364019008185e-05, + "loss": 8.7502, + "step": 9711 + }, + { + "epoch": 0.40481847359426454, + "grad_norm": 736.0, + "learning_rate": 6.746731562653317e-05, + "loss": 24.2506, + "step": 9712 + }, + { + "epoch": 0.40486015589179275, + "grad_norm": 68.0, + "learning_rate": 6.74609907446411e-05, + "loss": 7.3755, + "step": 9713 + }, + { + "epoch": 0.404901838189321, + "grad_norm": 288.0, + "learning_rate": 6.745466554452094e-05, + "loss": 9.1254, + "step": 9714 + }, + { + "epoch": 0.40494352048684923, + "grad_norm": 584.0, + "learning_rate": 6.744834002628792e-05, + "loss": 15.7502, + "step": 9715 + }, + { + "epoch": 0.4049852027843775, + "grad_norm": 290.0, + "learning_rate": 6.744201419005738e-05, + "loss": 11.4378, + "step": 9716 + }, + { + "epoch": 0.4050268850819057, + "grad_norm": 290.0, + "learning_rate": 6.743568803594458e-05, + "loss": 12.8753, + "step": 9717 + }, + { + "epoch": 0.405068567379434, + "grad_norm": 217.0, + "learning_rate": 6.74293615640648e-05, + "loss": 12.1252, + "step": 9718 + }, + { + "epoch": 0.4051102496769622, + "grad_norm": 162.0, + "learning_rate": 6.742303477453337e-05, + "loss": 7.8132, + "step": 9719 + }, + { + "epoch": 0.40515193197449045, + "grad_norm": 390.0, + "learning_rate": 6.741670766746558e-05, + "loss": 15.1878, + "step": 9720 + }, + { + "epoch": 0.40519361427201867, + "grad_norm": 900.0, + "learning_rate": 6.741038024297676e-05, + "loss": 30.3775, + "step": 9721 + }, + { + "epoch": 0.40523529656954693, + "grad_norm": 584.0, + "learning_rate": 6.74040525011822e-05, + "loss": 16.8757, + "step": 9722 + }, + { + "epoch": 0.40527697886707514, + "grad_norm": 119.0, + "learning_rate": 6.739772444219725e-05, + "loss": 9.8127, + "step": 9723 + }, + { + "epoch": 0.4053186611646034, + "grad_norm": 752.0, + "learning_rate": 6.739139606613722e-05, + "loss": 20.8793, + "step": 9724 + }, + { + "epoch": 0.4053603434621316, + "grad_norm": 604.0, + "learning_rate": 6.738506737311747e-05, + "loss": 19.1256, + "step": 9725 + }, + { + "epoch": 0.4054020257596599, + "grad_norm": 272.0, + "learning_rate": 6.73787383632533e-05, + "loss": 13.2501, + "step": 9726 + }, + { + "epoch": 0.4054437080571881, + "grad_norm": 320.0, + "learning_rate": 6.737240903666008e-05, + "loss": 14.2502, + "step": 9727 + }, + { + "epoch": 0.40548539035471637, + "grad_norm": 163.0, + "learning_rate": 6.736607939345319e-05, + "loss": 9.6251, + "step": 9728 + }, + { + "epoch": 0.4055270726522446, + "grad_norm": 600.0, + "learning_rate": 6.735974943374793e-05, + "loss": 19.2513, + "step": 9729 + }, + { + "epoch": 0.40556875494977285, + "grad_norm": 167.0, + "learning_rate": 6.735341915765972e-05, + "loss": 9.3755, + "step": 9730 + }, + { + "epoch": 0.40561043724730106, + "grad_norm": 484.0, + "learning_rate": 6.73470885653039e-05, + "loss": 16.1268, + "step": 9731 + }, + { + "epoch": 0.4056521195448293, + "grad_norm": 332.0, + "learning_rate": 6.734075765679583e-05, + "loss": 14.0012, + "step": 9732 + }, + { + "epoch": 0.40569380184235754, + "grad_norm": 334.0, + "learning_rate": 6.733442643225094e-05, + "loss": 11.1885, + "step": 9733 + }, + { + "epoch": 0.4057354841398858, + "grad_norm": 237.0, + "learning_rate": 6.732809489178456e-05, + "loss": 11.5627, + "step": 9734 + }, + { + "epoch": 0.405777166437414, + "grad_norm": 332.0, + "learning_rate": 6.732176303551214e-05, + "loss": 14.1886, + "step": 9735 + }, + { + "epoch": 0.4058188487349423, + "grad_norm": 528.0, + "learning_rate": 6.731543086354904e-05, + "loss": 16.6253, + "step": 9736 + }, + { + "epoch": 0.4058605310324705, + "grad_norm": 60.75, + "learning_rate": 6.730909837601067e-05, + "loss": 7.5941, + "step": 9737 + }, + { + "epoch": 0.40590221332999876, + "grad_norm": 404.0, + "learning_rate": 6.730276557301246e-05, + "loss": 16.1252, + "step": 9738 + }, + { + "epoch": 0.405943895627527, + "grad_norm": 784.0, + "learning_rate": 6.72964324546698e-05, + "loss": 23.6254, + "step": 9739 + }, + { + "epoch": 0.40598557792505524, + "grad_norm": 334.0, + "learning_rate": 6.729009902109813e-05, + "loss": 15.3131, + "step": 9740 + }, + { + "epoch": 0.40602726022258345, + "grad_norm": 113.0, + "learning_rate": 6.728376527241286e-05, + "loss": 9.4378, + "step": 9741 + }, + { + "epoch": 0.4060689425201117, + "grad_norm": 482.0, + "learning_rate": 6.727743120872945e-05, + "loss": 17.7502, + "step": 9742 + }, + { + "epoch": 0.40611062481763993, + "grad_norm": 944.0, + "learning_rate": 6.72710968301633e-05, + "loss": 23.6254, + "step": 9743 + }, + { + "epoch": 0.4061523071151682, + "grad_norm": 316.0, + "learning_rate": 6.72647621368299e-05, + "loss": 13.9379, + "step": 9744 + }, + { + "epoch": 0.4061939894126964, + "grad_norm": 800.0, + "learning_rate": 6.725842712884466e-05, + "loss": 21.7506, + "step": 9745 + }, + { + "epoch": 0.4062356717102247, + "grad_norm": 186.0, + "learning_rate": 6.725209180632305e-05, + "loss": 10.2504, + "step": 9746 + }, + { + "epoch": 0.4062773540077529, + "grad_norm": 94.0, + "learning_rate": 6.724575616938055e-05, + "loss": 8.2505, + "step": 9747 + }, + { + "epoch": 0.40631903630528116, + "grad_norm": 219.0, + "learning_rate": 6.723942021813261e-05, + "loss": 12.5628, + "step": 9748 + }, + { + "epoch": 0.40636071860280937, + "grad_norm": 564.0, + "learning_rate": 6.72330839526947e-05, + "loss": 19.1252, + "step": 9749 + }, + { + "epoch": 0.40640240090033763, + "grad_norm": 242.0, + "learning_rate": 6.722674737318231e-05, + "loss": 12.1254, + "step": 9750 + }, + { + "epoch": 0.40644408319786585, + "grad_norm": 206.0, + "learning_rate": 6.722041047971093e-05, + "loss": 9.9377, + "step": 9751 + }, + { + "epoch": 0.4064857654953941, + "grad_norm": 266.0, + "learning_rate": 6.721407327239603e-05, + "loss": 11.5627, + "step": 9752 + }, + { + "epoch": 0.4065274477929223, + "grad_norm": 248.0, + "learning_rate": 6.720773575135312e-05, + "loss": 12.4387, + "step": 9753 + }, + { + "epoch": 0.4065691300904506, + "grad_norm": 157.0, + "learning_rate": 6.720139791669769e-05, + "loss": 10.1254, + "step": 9754 + }, + { + "epoch": 0.4066108123879788, + "grad_norm": 218.0, + "learning_rate": 6.719505976854527e-05, + "loss": 11.938, + "step": 9755 + }, + { + "epoch": 0.40665249468550707, + "grad_norm": 82.0, + "learning_rate": 6.718872130701135e-05, + "loss": 7.844, + "step": 9756 + }, + { + "epoch": 0.4066941769830353, + "grad_norm": 668.0, + "learning_rate": 6.718238253221145e-05, + "loss": 19.7545, + "step": 9757 + }, + { + "epoch": 0.40673585928056355, + "grad_norm": 159.0, + "learning_rate": 6.717604344426111e-05, + "loss": 9.7505, + "step": 9758 + }, + { + "epoch": 0.40677754157809176, + "grad_norm": 402.0, + "learning_rate": 6.716970404327585e-05, + "loss": 14.9377, + "step": 9759 + }, + { + "epoch": 0.40681922387562003, + "grad_norm": 324.0, + "learning_rate": 6.716336432937123e-05, + "loss": 12.3127, + "step": 9760 + }, + { + "epoch": 0.40686090617314824, + "grad_norm": 382.0, + "learning_rate": 6.715702430266275e-05, + "loss": 14.3761, + "step": 9761 + }, + { + "epoch": 0.4069025884706765, + "grad_norm": 588.0, + "learning_rate": 6.715068396326598e-05, + "loss": 19.3754, + "step": 9762 + }, + { + "epoch": 0.4069442707682047, + "grad_norm": 1144.0, + "learning_rate": 6.714434331129648e-05, + "loss": 28.7502, + "step": 9763 + }, + { + "epoch": 0.406985953065733, + "grad_norm": 396.0, + "learning_rate": 6.71380023468698e-05, + "loss": 15.1253, + "step": 9764 + }, + { + "epoch": 0.4070276353632612, + "grad_norm": 1192.0, + "learning_rate": 6.713166107010151e-05, + "loss": 29.1252, + "step": 9765 + }, + { + "epoch": 0.40706931766078946, + "grad_norm": 292.0, + "learning_rate": 6.712531948110716e-05, + "loss": 11.1254, + "step": 9766 + }, + { + "epoch": 0.4071109999583177, + "grad_norm": 422.0, + "learning_rate": 6.711897758000236e-05, + "loss": 15.4377, + "step": 9767 + }, + { + "epoch": 0.40715268225584594, + "grad_norm": 117.5, + "learning_rate": 6.711263536690266e-05, + "loss": 10.8127, + "step": 9768 + }, + { + "epoch": 0.40719436455337416, + "grad_norm": 270.0, + "learning_rate": 6.710629284192367e-05, + "loss": 13.0629, + "step": 9769 + }, + { + "epoch": 0.4072360468509024, + "grad_norm": 528.0, + "learning_rate": 6.709995000518097e-05, + "loss": 17.3764, + "step": 9770 + }, + { + "epoch": 0.40727772914843063, + "grad_norm": 177.0, + "learning_rate": 6.709360685679016e-05, + "loss": 10.1878, + "step": 9771 + }, + { + "epoch": 0.4073194114459589, + "grad_norm": 324.0, + "learning_rate": 6.708726339686686e-05, + "loss": 14.3127, + "step": 9772 + }, + { + "epoch": 0.40736109374348717, + "grad_norm": 251.0, + "learning_rate": 6.708091962552666e-05, + "loss": 12.3139, + "step": 9773 + }, + { + "epoch": 0.4074027760410154, + "grad_norm": 560.0, + "learning_rate": 6.707457554288519e-05, + "loss": 20.0003, + "step": 9774 + }, + { + "epoch": 0.40744445833854365, + "grad_norm": 302.0, + "learning_rate": 6.706823114905805e-05, + "loss": 13.7502, + "step": 9775 + }, + { + "epoch": 0.40748614063607186, + "grad_norm": 220.0, + "learning_rate": 6.706188644416089e-05, + "loss": 10.6877, + "step": 9776 + }, + { + "epoch": 0.4075278229336001, + "grad_norm": 348.0, + "learning_rate": 6.705554142830935e-05, + "loss": 14.1269, + "step": 9777 + }, + { + "epoch": 0.40756950523112834, + "grad_norm": 616.0, + "learning_rate": 6.704919610161903e-05, + "loss": 17.1263, + "step": 9778 + }, + { + "epoch": 0.4076111875286566, + "grad_norm": 396.0, + "learning_rate": 6.704285046420562e-05, + "loss": 15.5011, + "step": 9779 + }, + { + "epoch": 0.4076528698261848, + "grad_norm": 360.0, + "learning_rate": 6.703650451618473e-05, + "loss": 12.8752, + "step": 9780 + }, + { + "epoch": 0.4076945521237131, + "grad_norm": 253.0, + "learning_rate": 6.703015825767204e-05, + "loss": 11.5002, + "step": 9781 + }, + { + "epoch": 0.4077362344212413, + "grad_norm": 282.0, + "learning_rate": 6.70238116887832e-05, + "loss": 13.6254, + "step": 9782 + }, + { + "epoch": 0.40777791671876956, + "grad_norm": 130.0, + "learning_rate": 6.701746480963387e-05, + "loss": 9.6879, + "step": 9783 + }, + { + "epoch": 0.4078195990162978, + "grad_norm": 312.0, + "learning_rate": 6.701111762033975e-05, + "loss": 13.3755, + "step": 9784 + }, + { + "epoch": 0.40786128131382604, + "grad_norm": 516.0, + "learning_rate": 6.700477012101649e-05, + "loss": 18.8755, + "step": 9785 + }, + { + "epoch": 0.40790296361135425, + "grad_norm": 205.0, + "learning_rate": 6.69984223117798e-05, + "loss": 11.5005, + "step": 9786 + }, + { + "epoch": 0.4079446459088825, + "grad_norm": 256.0, + "learning_rate": 6.699207419274534e-05, + "loss": 12.4377, + "step": 9787 + }, + { + "epoch": 0.40798632820641073, + "grad_norm": 246.0, + "learning_rate": 6.698572576402883e-05, + "loss": 12.438, + "step": 9788 + }, + { + "epoch": 0.408028010503939, + "grad_norm": 384.0, + "learning_rate": 6.697937702574596e-05, + "loss": 13.3777, + "step": 9789 + }, + { + "epoch": 0.4080696928014672, + "grad_norm": 454.0, + "learning_rate": 6.697302797801244e-05, + "loss": 16.6251, + "step": 9790 + }, + { + "epoch": 0.4081113750989955, + "grad_norm": 728.0, + "learning_rate": 6.696667862094397e-05, + "loss": 21.0004, + "step": 9791 + }, + { + "epoch": 0.4081530573965237, + "grad_norm": 204.0, + "learning_rate": 6.696032895465628e-05, + "loss": 11.8758, + "step": 9792 + }, + { + "epoch": 0.40819473969405196, + "grad_norm": 330.0, + "learning_rate": 6.695397897926508e-05, + "loss": 13.7502, + "step": 9793 + }, + { + "epoch": 0.40823642199158017, + "grad_norm": 928.0, + "learning_rate": 6.694762869488612e-05, + "loss": 26.1256, + "step": 9794 + }, + { + "epoch": 0.40827810428910843, + "grad_norm": 225.0, + "learning_rate": 6.69412781016351e-05, + "loss": 12.0003, + "step": 9795 + }, + { + "epoch": 0.40831978658663665, + "grad_norm": 316.0, + "learning_rate": 6.693492719962779e-05, + "loss": 13.1878, + "step": 9796 + }, + { + "epoch": 0.4083614688841649, + "grad_norm": 68.5, + "learning_rate": 6.692857598897995e-05, + "loss": 6.7817, + "step": 9797 + }, + { + "epoch": 0.4084031511816931, + "grad_norm": 362.0, + "learning_rate": 6.692222446980729e-05, + "loss": 15.0627, + "step": 9798 + }, + { + "epoch": 0.4084448334792214, + "grad_norm": 63.25, + "learning_rate": 6.69158726422256e-05, + "loss": 8.3128, + "step": 9799 + }, + { + "epoch": 0.4084865157767496, + "grad_norm": 736.0, + "learning_rate": 6.690952050635062e-05, + "loss": 19.7503, + "step": 9800 + }, + { + "epoch": 0.40852819807427787, + "grad_norm": 314.0, + "learning_rate": 6.690316806229812e-05, + "loss": 14.0011, + "step": 9801 + }, + { + "epoch": 0.4085698803718061, + "grad_norm": 215.0, + "learning_rate": 6.68968153101839e-05, + "loss": 11.251, + "step": 9802 + }, + { + "epoch": 0.40861156266933435, + "grad_norm": 178.0, + "learning_rate": 6.689046225012372e-05, + "loss": 11.0628, + "step": 9803 + }, + { + "epoch": 0.40865324496686256, + "grad_norm": 229.0, + "learning_rate": 6.688410888223335e-05, + "loss": 11.0007, + "step": 9804 + }, + { + "epoch": 0.40869492726439083, + "grad_norm": 198.0, + "learning_rate": 6.687775520662861e-05, + "loss": 11.6877, + "step": 9805 + }, + { + "epoch": 0.40873660956191904, + "grad_norm": 466.0, + "learning_rate": 6.687140122342528e-05, + "loss": 13.3771, + "step": 9806 + }, + { + "epoch": 0.4087782918594473, + "grad_norm": 332.0, + "learning_rate": 6.686504693273917e-05, + "loss": 13.8127, + "step": 9807 + }, + { + "epoch": 0.4088199741569755, + "grad_norm": 193.0, + "learning_rate": 6.685869233468607e-05, + "loss": 10.8752, + "step": 9808 + }, + { + "epoch": 0.4088616564545038, + "grad_norm": 95.0, + "learning_rate": 6.68523374293818e-05, + "loss": 9.1879, + "step": 9809 + }, + { + "epoch": 0.408903338752032, + "grad_norm": 580.0, + "learning_rate": 6.684598221694221e-05, + "loss": 18.3753, + "step": 9810 + }, + { + "epoch": 0.40894502104956026, + "grad_norm": 544.0, + "learning_rate": 6.683962669748308e-05, + "loss": 18.3756, + "step": 9811 + }, + { + "epoch": 0.4089867033470885, + "grad_norm": 238.0, + "learning_rate": 6.683327087112027e-05, + "loss": 9.7502, + "step": 9812 + }, + { + "epoch": 0.40902838564461674, + "grad_norm": 208.0, + "learning_rate": 6.682691473796959e-05, + "loss": 10.2513, + "step": 9813 + }, + { + "epoch": 0.40907006794214495, + "grad_norm": 462.0, + "learning_rate": 6.68205582981469e-05, + "loss": 16.0008, + "step": 9814 + }, + { + "epoch": 0.4091117502396732, + "grad_norm": 112.5, + "learning_rate": 6.681420155176805e-05, + "loss": 10.5004, + "step": 9815 + }, + { + "epoch": 0.40915343253720143, + "grad_norm": 282.0, + "learning_rate": 6.680784449894888e-05, + "loss": 12.0628, + "step": 9816 + }, + { + "epoch": 0.4091951148347297, + "grad_norm": 564.0, + "learning_rate": 6.680148713980525e-05, + "loss": 19.1254, + "step": 9817 + }, + { + "epoch": 0.4092367971322579, + "grad_norm": 664.0, + "learning_rate": 6.679512947445304e-05, + "loss": 20.8768, + "step": 9818 + }, + { + "epoch": 0.4092784794297862, + "grad_norm": 564.0, + "learning_rate": 6.678877150300808e-05, + "loss": 19.1258, + "step": 9819 + }, + { + "epoch": 0.4093201617273144, + "grad_norm": 136.0, + "learning_rate": 6.678241322558629e-05, + "loss": 10.7502, + "step": 9820 + }, + { + "epoch": 0.40936184402484266, + "grad_norm": 85.0, + "learning_rate": 6.67760546423035e-05, + "loss": 7.1881, + "step": 9821 + }, + { + "epoch": 0.40940352632237087, + "grad_norm": 1232.0, + "learning_rate": 6.676969575327565e-05, + "loss": 27.1296, + "step": 9822 + }, + { + "epoch": 0.40944520861989914, + "grad_norm": 600.0, + "learning_rate": 6.676333655861859e-05, + "loss": 17.2507, + "step": 9823 + }, + { + "epoch": 0.40948689091742735, + "grad_norm": 344.0, + "learning_rate": 6.675697705844825e-05, + "loss": 13.3129, + "step": 9824 + }, + { + "epoch": 0.4095285732149556, + "grad_norm": 884.0, + "learning_rate": 6.67506172528805e-05, + "loss": 25.1252, + "step": 9825 + }, + { + "epoch": 0.4095702555124838, + "grad_norm": 426.0, + "learning_rate": 6.674425714203128e-05, + "loss": 15.6891, + "step": 9826 + }, + { + "epoch": 0.4096119378100121, + "grad_norm": 336.0, + "learning_rate": 6.673789672601649e-05, + "loss": 13.9377, + "step": 9827 + }, + { + "epoch": 0.4096536201075403, + "grad_norm": 344.0, + "learning_rate": 6.673153600495203e-05, + "loss": 15.3128, + "step": 9828 + }, + { + "epoch": 0.4096953024050686, + "grad_norm": 660.0, + "learning_rate": 6.672517497895385e-05, + "loss": 20.3753, + "step": 9829 + }, + { + "epoch": 0.4097369847025968, + "grad_norm": 426.0, + "learning_rate": 6.671881364813787e-05, + "loss": 15.0003, + "step": 9830 + }, + { + "epoch": 0.40977866700012505, + "grad_norm": 692.0, + "learning_rate": 6.671245201262002e-05, + "loss": 20.6252, + "step": 9831 + }, + { + "epoch": 0.40982034929765326, + "grad_norm": 540.0, + "learning_rate": 6.670609007251625e-05, + "loss": 16.8759, + "step": 9832 + }, + { + "epoch": 0.40986203159518153, + "grad_norm": 204.0, + "learning_rate": 6.66997278279425e-05, + "loss": 10.2511, + "step": 9833 + }, + { + "epoch": 0.40990371389270974, + "grad_norm": 255.0, + "learning_rate": 6.669336527901474e-05, + "loss": 11.8128, + "step": 9834 + }, + { + "epoch": 0.409945396190238, + "grad_norm": 234.0, + "learning_rate": 6.66870024258489e-05, + "loss": 12.1877, + "step": 9835 + }, + { + "epoch": 0.4099870784877662, + "grad_norm": 242.0, + "learning_rate": 6.668063926856098e-05, + "loss": 12.0002, + "step": 9836 + }, + { + "epoch": 0.4100287607852945, + "grad_norm": 368.0, + "learning_rate": 6.667427580726692e-05, + "loss": 14.6878, + "step": 9837 + }, + { + "epoch": 0.4100704430828227, + "grad_norm": 282.0, + "learning_rate": 6.666791204208272e-05, + "loss": 12.8756, + "step": 9838 + }, + { + "epoch": 0.41011212538035097, + "grad_norm": 560.0, + "learning_rate": 6.666154797312432e-05, + "loss": 16.1252, + "step": 9839 + }, + { + "epoch": 0.4101538076778792, + "grad_norm": 378.0, + "learning_rate": 6.665518360050773e-05, + "loss": 14.8757, + "step": 9840 + }, + { + "epoch": 0.41019548997540745, + "grad_norm": 161.0, + "learning_rate": 6.664881892434895e-05, + "loss": 10.2503, + "step": 9841 + }, + { + "epoch": 0.41023717227293566, + "grad_norm": 904.0, + "learning_rate": 6.664245394476397e-05, + "loss": 22.7539, + "step": 9842 + }, + { + "epoch": 0.4102788545704639, + "grad_norm": 181.0, + "learning_rate": 6.663608866186878e-05, + "loss": 10.1888, + "step": 9843 + }, + { + "epoch": 0.41032053686799214, + "grad_norm": 532.0, + "learning_rate": 6.66297230757794e-05, + "loss": 17.7538, + "step": 9844 + }, + { + "epoch": 0.4103622191655204, + "grad_norm": 262.0, + "learning_rate": 6.662335718661185e-05, + "loss": 13.1257, + "step": 9845 + }, + { + "epoch": 0.41040390146304867, + "grad_norm": 1608.0, + "learning_rate": 6.661699099448211e-05, + "loss": 44.5013, + "step": 9846 + }, + { + "epoch": 0.4104455837605769, + "grad_norm": 468.0, + "learning_rate": 6.661062449950625e-05, + "loss": 15.7542, + "step": 9847 + }, + { + "epoch": 0.41048726605810515, + "grad_norm": 294.0, + "learning_rate": 6.66042577018003e-05, + "loss": 12.3755, + "step": 9848 + }, + { + "epoch": 0.41052894835563336, + "grad_norm": 420.0, + "learning_rate": 6.659789060148027e-05, + "loss": 14.877, + "step": 9849 + }, + { + "epoch": 0.41057063065316163, + "grad_norm": 262.0, + "learning_rate": 6.659152319866221e-05, + "loss": 12.5011, + "step": 9850 + }, + { + "epoch": 0.41061231295068984, + "grad_norm": 544.0, + "learning_rate": 6.658515549346215e-05, + "loss": 18.5005, + "step": 9851 + }, + { + "epoch": 0.4106539952482181, + "grad_norm": 260.0, + "learning_rate": 6.657878748599618e-05, + "loss": 11.2511, + "step": 9852 + }, + { + "epoch": 0.4106956775457463, + "grad_norm": 74.0, + "learning_rate": 6.657241917638035e-05, + "loss": 8.9379, + "step": 9853 + }, + { + "epoch": 0.4107373598432746, + "grad_norm": 936.0, + "learning_rate": 6.656605056473069e-05, + "loss": 20.6296, + "step": 9854 + }, + { + "epoch": 0.4107790421408028, + "grad_norm": 272.0, + "learning_rate": 6.655968165116327e-05, + "loss": 13.4383, + "step": 9855 + }, + { + "epoch": 0.41082072443833106, + "grad_norm": 620.0, + "learning_rate": 6.655331243579421e-05, + "loss": 19.0001, + "step": 9856 + }, + { + "epoch": 0.4108624067358593, + "grad_norm": 612.0, + "learning_rate": 6.654694291873956e-05, + "loss": 18.7512, + "step": 9857 + }, + { + "epoch": 0.41090408903338754, + "grad_norm": 292.0, + "learning_rate": 6.654057310011537e-05, + "loss": 12.3149, + "step": 9858 + }, + { + "epoch": 0.41094577133091575, + "grad_norm": 1088.0, + "learning_rate": 6.65342029800378e-05, + "loss": 30.6252, + "step": 9859 + }, + { + "epoch": 0.410987453628444, + "grad_norm": 2128.0, + "learning_rate": 6.652783255862292e-05, + "loss": 52.5002, + "step": 9860 + }, + { + "epoch": 0.41102913592597223, + "grad_norm": 498.0, + "learning_rate": 6.65214618359868e-05, + "loss": 17.5018, + "step": 9861 + }, + { + "epoch": 0.4110708182235005, + "grad_norm": 210.0, + "learning_rate": 6.651509081224558e-05, + "loss": 10.0002, + "step": 9862 + }, + { + "epoch": 0.4111125005210287, + "grad_norm": 131.0, + "learning_rate": 6.650871948751536e-05, + "loss": 6.0628, + "step": 9863 + }, + { + "epoch": 0.411154182818557, + "grad_norm": 334.0, + "learning_rate": 6.650234786191228e-05, + "loss": 13.6883, + "step": 9864 + }, + { + "epoch": 0.4111958651160852, + "grad_norm": 121.0, + "learning_rate": 6.649597593555243e-05, + "loss": 8.1884, + "step": 9865 + }, + { + "epoch": 0.41123754741361346, + "grad_norm": 362.0, + "learning_rate": 6.648960370855196e-05, + "loss": 14.5034, + "step": 9866 + }, + { + "epoch": 0.41127922971114167, + "grad_norm": 360.0, + "learning_rate": 6.648323118102699e-05, + "loss": 14.0005, + "step": 9867 + }, + { + "epoch": 0.41132091200866994, + "grad_norm": 596.0, + "learning_rate": 6.647685835309369e-05, + "loss": 19.1254, + "step": 9868 + }, + { + "epoch": 0.41136259430619815, + "grad_norm": 78.5, + "learning_rate": 6.647048522486816e-05, + "loss": 9.6252, + "step": 9869 + }, + { + "epoch": 0.4114042766037264, + "grad_norm": 304.0, + "learning_rate": 6.64641117964666e-05, + "loss": 13.6263, + "step": 9870 + }, + { + "epoch": 0.4114459589012546, + "grad_norm": 248.0, + "learning_rate": 6.64577380680051e-05, + "loss": 11.0629, + "step": 9871 + }, + { + "epoch": 0.4114876411987829, + "grad_norm": 241.0, + "learning_rate": 6.645136403959989e-05, + "loss": 10.8752, + "step": 9872 + }, + { + "epoch": 0.4115293234963111, + "grad_norm": 216.0, + "learning_rate": 6.644498971136712e-05, + "loss": 11.9378, + "step": 9873 + }, + { + "epoch": 0.4115710057938394, + "grad_norm": 180.0, + "learning_rate": 6.643861508342293e-05, + "loss": 10.7503, + "step": 9874 + }, + { + "epoch": 0.4116126880913676, + "grad_norm": 200.0, + "learning_rate": 6.643224015588353e-05, + "loss": 11.6252, + "step": 9875 + }, + { + "epoch": 0.41165437038889585, + "grad_norm": 556.0, + "learning_rate": 6.64258649288651e-05, + "loss": 18.0028, + "step": 9876 + }, + { + "epoch": 0.41169605268642406, + "grad_norm": 103.5, + "learning_rate": 6.641948940248382e-05, + "loss": 8.6254, + "step": 9877 + }, + { + "epoch": 0.41173773498395233, + "grad_norm": 69.5, + "learning_rate": 6.641311357685588e-05, + "loss": 8.7505, + "step": 9878 + }, + { + "epoch": 0.41177941728148054, + "grad_norm": 404.0, + "learning_rate": 6.64067374520975e-05, + "loss": 13.4388, + "step": 9879 + }, + { + "epoch": 0.4118210995790088, + "grad_norm": 476.0, + "learning_rate": 6.640036102832486e-05, + "loss": 16.6252, + "step": 9880 + }, + { + "epoch": 0.411862781876537, + "grad_norm": 266.0, + "learning_rate": 6.63939843056542e-05, + "loss": 11.8762, + "step": 9881 + }, + { + "epoch": 0.4119044641740653, + "grad_norm": 434.0, + "learning_rate": 6.638760728420171e-05, + "loss": 15.251, + "step": 9882 + }, + { + "epoch": 0.4119461464715935, + "grad_norm": 1168.0, + "learning_rate": 6.638122996408362e-05, + "loss": 26.2537, + "step": 9883 + }, + { + "epoch": 0.41198782876912177, + "grad_norm": 150.0, + "learning_rate": 6.637485234541616e-05, + "loss": 10.938, + "step": 9884 + }, + { + "epoch": 0.41202951106665, + "grad_norm": 338.0, + "learning_rate": 6.636847442831557e-05, + "loss": 15.7503, + "step": 9885 + }, + { + "epoch": 0.41207119336417825, + "grad_norm": 180.0, + "learning_rate": 6.636209621289808e-05, + "loss": 10.7502, + "step": 9886 + }, + { + "epoch": 0.41211287566170646, + "grad_norm": 440.0, + "learning_rate": 6.635571769927993e-05, + "loss": 14.1254, + "step": 9887 + }, + { + "epoch": 0.4121545579592347, + "grad_norm": 358.0, + "learning_rate": 6.634933888757737e-05, + "loss": 14.1269, + "step": 9888 + }, + { + "epoch": 0.41219624025676294, + "grad_norm": 482.0, + "learning_rate": 6.634295977790668e-05, + "loss": 15.8131, + "step": 9889 + }, + { + "epoch": 0.4122379225542912, + "grad_norm": 352.0, + "learning_rate": 6.633658037038407e-05, + "loss": 13.8756, + "step": 9890 + }, + { + "epoch": 0.4122796048518194, + "grad_norm": 288.0, + "learning_rate": 6.633020066512584e-05, + "loss": 12.6877, + "step": 9891 + }, + { + "epoch": 0.4123212871493477, + "grad_norm": 2048.0, + "learning_rate": 6.632382066224826e-05, + "loss": 41.7546, + "step": 9892 + }, + { + "epoch": 0.4123629694468759, + "grad_norm": 900.0, + "learning_rate": 6.63174403618676e-05, + "loss": 22.8781, + "step": 9893 + }, + { + "epoch": 0.41240465174440416, + "grad_norm": 1144.0, + "learning_rate": 6.631105976410013e-05, + "loss": 29.3757, + "step": 9894 + }, + { + "epoch": 0.41244633404193237, + "grad_norm": 1992.0, + "learning_rate": 6.630467886906216e-05, + "loss": 39.0004, + "step": 9895 + }, + { + "epoch": 0.41248801633946064, + "grad_norm": 106.5, + "learning_rate": 6.629829767686995e-05, + "loss": 7.9068, + "step": 9896 + }, + { + "epoch": 0.41252969863698885, + "grad_norm": 85.5, + "learning_rate": 6.629191618763984e-05, + "loss": 8.3758, + "step": 9897 + }, + { + "epoch": 0.4125713809345171, + "grad_norm": 512.0, + "learning_rate": 6.628553440148809e-05, + "loss": 15.6256, + "step": 9898 + }, + { + "epoch": 0.41261306323204533, + "grad_norm": 736.0, + "learning_rate": 6.627915231853105e-05, + "loss": 20.1289, + "step": 9899 + }, + { + "epoch": 0.4126547455295736, + "grad_norm": 274.0, + "learning_rate": 6.6272769938885e-05, + "loss": 11.6253, + "step": 9900 + }, + { + "epoch": 0.4126964278271018, + "grad_norm": 80.0, + "learning_rate": 6.626638726266629e-05, + "loss": 8.3129, + "step": 9901 + }, + { + "epoch": 0.4127381101246301, + "grad_norm": 296.0, + "learning_rate": 6.626000428999122e-05, + "loss": 12.2503, + "step": 9902 + }, + { + "epoch": 0.4127797924221583, + "grad_norm": 225.0, + "learning_rate": 6.625362102097612e-05, + "loss": 11.1877, + "step": 9903 + }, + { + "epoch": 0.41282147471968655, + "grad_norm": 84.0, + "learning_rate": 6.624723745573734e-05, + "loss": 7.5322, + "step": 9904 + }, + { + "epoch": 0.41286315701721477, + "grad_norm": 310.0, + "learning_rate": 6.624085359439122e-05, + "loss": 13.8757, + "step": 9905 + }, + { + "epoch": 0.41290483931474303, + "grad_norm": 91.0, + "learning_rate": 6.623446943705409e-05, + "loss": 9.0009, + "step": 9906 + }, + { + "epoch": 0.41294652161227124, + "grad_norm": 224.0, + "learning_rate": 6.622808498384231e-05, + "loss": 10.7502, + "step": 9907 + }, + { + "epoch": 0.4129882039097995, + "grad_norm": 346.0, + "learning_rate": 6.622170023487226e-05, + "loss": 14.5002, + "step": 9908 + }, + { + "epoch": 0.4130298862073277, + "grad_norm": 262.0, + "learning_rate": 6.621531519026027e-05, + "loss": 12.6264, + "step": 9909 + }, + { + "epoch": 0.413071568504856, + "grad_norm": 348.0, + "learning_rate": 6.620892985012272e-05, + "loss": 13.8752, + "step": 9910 + }, + { + "epoch": 0.4131132508023842, + "grad_norm": 332.0, + "learning_rate": 6.6202544214576e-05, + "loss": 14.6894, + "step": 9911 + }, + { + "epoch": 0.41315493309991247, + "grad_norm": 696.0, + "learning_rate": 6.619615828373646e-05, + "loss": 20.2527, + "step": 9912 + }, + { + "epoch": 0.4131966153974407, + "grad_norm": 109.5, + "learning_rate": 6.61897720577205e-05, + "loss": 10.6254, + "step": 9913 + }, + { + "epoch": 0.41323829769496895, + "grad_norm": 768.0, + "learning_rate": 6.618338553664452e-05, + "loss": 22.6254, + "step": 9914 + }, + { + "epoch": 0.41327997999249716, + "grad_norm": 140.0, + "learning_rate": 6.617699872062489e-05, + "loss": 12.2502, + "step": 9915 + }, + { + "epoch": 0.4133216622900254, + "grad_norm": 183.0, + "learning_rate": 6.617061160977801e-05, + "loss": 10.6252, + "step": 9916 + }, + { + "epoch": 0.41336334458755364, + "grad_norm": 372.0, + "learning_rate": 6.616422420422032e-05, + "loss": 15.5629, + "step": 9917 + }, + { + "epoch": 0.4134050268850819, + "grad_norm": 796.0, + "learning_rate": 6.61578365040682e-05, + "loss": 21.6253, + "step": 9918 + }, + { + "epoch": 0.4134467091826102, + "grad_norm": 320.0, + "learning_rate": 6.615144850943807e-05, + "loss": 14.0005, + "step": 9919 + }, + { + "epoch": 0.4134883914801384, + "grad_norm": 166.0, + "learning_rate": 6.614506022044636e-05, + "loss": 9.8753, + "step": 9920 + }, + { + "epoch": 0.41353007377766665, + "grad_norm": 716.0, + "learning_rate": 6.613867163720948e-05, + "loss": 22.3753, + "step": 9921 + }, + { + "epoch": 0.41357175607519486, + "grad_norm": 378.0, + "learning_rate": 6.61322827598439e-05, + "loss": 14.3131, + "step": 9922 + }, + { + "epoch": 0.41361343837272313, + "grad_norm": 540.0, + "learning_rate": 6.612589358846602e-05, + "loss": 17.0004, + "step": 9923 + }, + { + "epoch": 0.41365512067025134, + "grad_norm": 592.0, + "learning_rate": 6.611950412319231e-05, + "loss": 20.1252, + "step": 9924 + }, + { + "epoch": 0.4136968029677796, + "grad_norm": 672.0, + "learning_rate": 6.611311436413921e-05, + "loss": 20.8756, + "step": 9925 + }, + { + "epoch": 0.4137384852653078, + "grad_norm": 416.0, + "learning_rate": 6.610672431142316e-05, + "loss": 15.1879, + "step": 9926 + }, + { + "epoch": 0.4137801675628361, + "grad_norm": 564.0, + "learning_rate": 6.610033396516063e-05, + "loss": 18.5012, + "step": 9927 + }, + { + "epoch": 0.4138218498603643, + "grad_norm": 478.0, + "learning_rate": 6.609394332546808e-05, + "loss": 16.5003, + "step": 9928 + }, + { + "epoch": 0.41386353215789257, + "grad_norm": 486.0, + "learning_rate": 6.608755239246198e-05, + "loss": 16.3752, + "step": 9929 + }, + { + "epoch": 0.4139052144554208, + "grad_norm": 264.0, + "learning_rate": 6.608116116625883e-05, + "loss": 11.1878, + "step": 9930 + }, + { + "epoch": 0.41394689675294905, + "grad_norm": 260.0, + "learning_rate": 6.607476964697508e-05, + "loss": 12.5006, + "step": 9931 + }, + { + "epoch": 0.41398857905047726, + "grad_norm": 412.0, + "learning_rate": 6.606837783472723e-05, + "loss": 15.2505, + "step": 9932 + }, + { + "epoch": 0.4140302613480055, + "grad_norm": 656.0, + "learning_rate": 6.606198572963175e-05, + "loss": 18.0002, + "step": 9933 + }, + { + "epoch": 0.41407194364553374, + "grad_norm": 1776.0, + "learning_rate": 6.605559333180516e-05, + "loss": 36.5007, + "step": 9934 + }, + { + "epoch": 0.414113625943062, + "grad_norm": 258.0, + "learning_rate": 6.604920064136396e-05, + "loss": 11.0632, + "step": 9935 + }, + { + "epoch": 0.4141553082405902, + "grad_norm": 380.0, + "learning_rate": 6.604280765842467e-05, + "loss": 16.6253, + "step": 9936 + }, + { + "epoch": 0.4141969905381185, + "grad_norm": 101.0, + "learning_rate": 6.603641438310376e-05, + "loss": 9.5627, + "step": 9937 + }, + { + "epoch": 0.4142386728356467, + "grad_norm": 366.0, + "learning_rate": 6.60300208155178e-05, + "loss": 13.9376, + "step": 9938 + }, + { + "epoch": 0.41428035513317496, + "grad_norm": 250.0, + "learning_rate": 6.602362695578327e-05, + "loss": 13.6879, + "step": 9939 + }, + { + "epoch": 0.41432203743070317, + "grad_norm": 346.0, + "learning_rate": 6.601723280401672e-05, + "loss": 13.8136, + "step": 9940 + }, + { + "epoch": 0.41436371972823144, + "grad_norm": 652.0, + "learning_rate": 6.601083836033469e-05, + "loss": 21.2534, + "step": 9941 + }, + { + "epoch": 0.41440540202575965, + "grad_norm": 302.0, + "learning_rate": 6.600444362485369e-05, + "loss": 12.5628, + "step": 9942 + }, + { + "epoch": 0.4144470843232879, + "grad_norm": 159.0, + "learning_rate": 6.59980485976903e-05, + "loss": 9.188, + "step": 9943 + }, + { + "epoch": 0.41448876662081613, + "grad_norm": 1104.0, + "learning_rate": 6.599165327896105e-05, + "loss": 25.1291, + "step": 9944 + }, + { + "epoch": 0.4145304489183444, + "grad_norm": 324.0, + "learning_rate": 6.59852576687825e-05, + "loss": 15.0001, + "step": 9945 + }, + { + "epoch": 0.4145721312158726, + "grad_norm": 146.0, + "learning_rate": 6.597886176727119e-05, + "loss": 10.9381, + "step": 9946 + }, + { + "epoch": 0.4146138135134009, + "grad_norm": 1720.0, + "learning_rate": 6.597246557454373e-05, + "loss": 38.5005, + "step": 9947 + }, + { + "epoch": 0.4146554958109291, + "grad_norm": 189.0, + "learning_rate": 6.596606909071667e-05, + "loss": 11.938, + "step": 9948 + }, + { + "epoch": 0.41469717810845735, + "grad_norm": 346.0, + "learning_rate": 6.595967231590656e-05, + "loss": 14.8755, + "step": 9949 + }, + { + "epoch": 0.41473886040598557, + "grad_norm": 139.0, + "learning_rate": 6.595327525023004e-05, + "loss": 10.7508, + "step": 9950 + }, + { + "epoch": 0.41478054270351383, + "grad_norm": 157.0, + "learning_rate": 6.594687789380363e-05, + "loss": 10.6878, + "step": 9951 + }, + { + "epoch": 0.41482222500104204, + "grad_norm": 330.0, + "learning_rate": 6.594048024674398e-05, + "loss": 13.5627, + "step": 9952 + }, + { + "epoch": 0.4148639072985703, + "grad_norm": 164.0, + "learning_rate": 6.593408230916764e-05, + "loss": 10.3752, + "step": 9953 + }, + { + "epoch": 0.4149055895960985, + "grad_norm": 332.0, + "learning_rate": 6.592768408119124e-05, + "loss": 14.1264, + "step": 9954 + }, + { + "epoch": 0.4149472718936268, + "grad_norm": 588.0, + "learning_rate": 6.59212855629314e-05, + "loss": 19.6252, + "step": 9955 + }, + { + "epoch": 0.414988954191155, + "grad_norm": 181.0, + "learning_rate": 6.59148867545047e-05, + "loss": 10.688, + "step": 9956 + }, + { + "epoch": 0.41503063648868327, + "grad_norm": 536.0, + "learning_rate": 6.590848765602779e-05, + "loss": 15.8752, + "step": 9957 + }, + { + "epoch": 0.4150723187862115, + "grad_norm": 316.0, + "learning_rate": 6.590208826761726e-05, + "loss": 13.1899, + "step": 9958 + }, + { + "epoch": 0.41511400108373975, + "grad_norm": 114.0, + "learning_rate": 6.589568858938976e-05, + "loss": 9.628, + "step": 9959 + }, + { + "epoch": 0.41515568338126796, + "grad_norm": 127.0, + "learning_rate": 6.588928862146194e-05, + "loss": 9.6878, + "step": 9960 + }, + { + "epoch": 0.4151973656787962, + "grad_norm": 229.0, + "learning_rate": 6.588288836395043e-05, + "loss": 9.4376, + "step": 9961 + }, + { + "epoch": 0.41523904797632444, + "grad_norm": 342.0, + "learning_rate": 6.587648781697185e-05, + "loss": 13.0006, + "step": 9962 + }, + { + "epoch": 0.4152807302738527, + "grad_norm": 536.0, + "learning_rate": 6.587008698064288e-05, + "loss": 16.6266, + "step": 9963 + }, + { + "epoch": 0.4153224125713809, + "grad_norm": 105.5, + "learning_rate": 6.586368585508017e-05, + "loss": 9.688, + "step": 9964 + }, + { + "epoch": 0.4153640948689092, + "grad_norm": 284.0, + "learning_rate": 6.585728444040038e-05, + "loss": 13.0005, + "step": 9965 + }, + { + "epoch": 0.4154057771664374, + "grad_norm": 306.0, + "learning_rate": 6.585088273672016e-05, + "loss": 13.5627, + "step": 9966 + }, + { + "epoch": 0.41544745946396566, + "grad_norm": 205.0, + "learning_rate": 6.584448074415621e-05, + "loss": 9.8754, + "step": 9967 + }, + { + "epoch": 0.4154891417614939, + "grad_norm": 466.0, + "learning_rate": 6.583807846282519e-05, + "loss": 16.8755, + "step": 9968 + }, + { + "epoch": 0.41553082405902214, + "grad_norm": 300.0, + "learning_rate": 6.583167589284377e-05, + "loss": 13.5636, + "step": 9969 + }, + { + "epoch": 0.41557250635655035, + "grad_norm": 410.0, + "learning_rate": 6.582527303432865e-05, + "loss": 15.1253, + "step": 9970 + }, + { + "epoch": 0.4156141886540786, + "grad_norm": 740.0, + "learning_rate": 6.581886988739654e-05, + "loss": 22.8752, + "step": 9971 + }, + { + "epoch": 0.41565587095160683, + "grad_norm": 191.0, + "learning_rate": 6.58124664521641e-05, + "loss": 10.9378, + "step": 9972 + }, + { + "epoch": 0.4156975532491351, + "grad_norm": 216.0, + "learning_rate": 6.580606272874807e-05, + "loss": 11.5004, + "step": 9973 + }, + { + "epoch": 0.4157392355466633, + "grad_norm": 262.0, + "learning_rate": 6.579965871726514e-05, + "loss": 11.6898, + "step": 9974 + }, + { + "epoch": 0.4157809178441916, + "grad_norm": 434.0, + "learning_rate": 6.579325441783204e-05, + "loss": 16.1257, + "step": 9975 + }, + { + "epoch": 0.4158226001417198, + "grad_norm": 736.0, + "learning_rate": 6.578684983056547e-05, + "loss": 19.6284, + "step": 9976 + }, + { + "epoch": 0.41586428243924806, + "grad_norm": 272.0, + "learning_rate": 6.578044495558216e-05, + "loss": 11.8129, + "step": 9977 + }, + { + "epoch": 0.41590596473677627, + "grad_norm": 181.0, + "learning_rate": 6.577403979299884e-05, + "loss": 10.5007, + "step": 9978 + }, + { + "epoch": 0.41594764703430454, + "grad_norm": 824.0, + "learning_rate": 6.576763434293224e-05, + "loss": 20.5005, + "step": 9979 + }, + { + "epoch": 0.41598932933183275, + "grad_norm": 217.0, + "learning_rate": 6.576122860549911e-05, + "loss": 12.0016, + "step": 9980 + }, + { + "epoch": 0.416031011629361, + "grad_norm": 448.0, + "learning_rate": 6.575482258081617e-05, + "loss": 16.2502, + "step": 9981 + }, + { + "epoch": 0.4160726939268892, + "grad_norm": 440.0, + "learning_rate": 6.574841626900021e-05, + "loss": 15.5002, + "step": 9982 + }, + { + "epoch": 0.4161143762244175, + "grad_norm": 197.0, + "learning_rate": 6.574200967016797e-05, + "loss": 10.1258, + "step": 9983 + }, + { + "epoch": 0.4161560585219457, + "grad_norm": 1520.0, + "learning_rate": 6.57356027844362e-05, + "loss": 32.2544, + "step": 9984 + }, + { + "epoch": 0.41619774081947397, + "grad_norm": 416.0, + "learning_rate": 6.572919561192166e-05, + "loss": 15.8127, + "step": 9985 + }, + { + "epoch": 0.4162394231170022, + "grad_norm": 161.0, + "learning_rate": 6.572278815274114e-05, + "loss": 10.1876, + "step": 9986 + }, + { + "epoch": 0.41628110541453045, + "grad_norm": 133.0, + "learning_rate": 6.571638040701141e-05, + "loss": 9.938, + "step": 9987 + }, + { + "epoch": 0.41632278771205866, + "grad_norm": 318.0, + "learning_rate": 6.570997237484926e-05, + "loss": 14.1252, + "step": 9988 + }, + { + "epoch": 0.41636447000958693, + "grad_norm": 808.0, + "learning_rate": 6.570356405637147e-05, + "loss": 21.6253, + "step": 9989 + }, + { + "epoch": 0.41640615230711514, + "grad_norm": 488.0, + "learning_rate": 6.569715545169483e-05, + "loss": 17.7502, + "step": 9990 + }, + { + "epoch": 0.4164478346046434, + "grad_norm": 648.0, + "learning_rate": 6.569074656093612e-05, + "loss": 19.5002, + "step": 9991 + }, + { + "epoch": 0.4164895169021717, + "grad_norm": 512.0, + "learning_rate": 6.568433738421218e-05, + "loss": 16.8754, + "step": 9992 + }, + { + "epoch": 0.4165311991996999, + "grad_norm": 1024.0, + "learning_rate": 6.567792792163981e-05, + "loss": 25.8754, + "step": 9993 + }, + { + "epoch": 0.41657288149722815, + "grad_norm": 170.0, + "learning_rate": 6.567151817333578e-05, + "loss": 10.0003, + "step": 9994 + }, + { + "epoch": 0.41661456379475637, + "grad_norm": 340.0, + "learning_rate": 6.566510813941695e-05, + "loss": 13.6878, + "step": 9995 + }, + { + "epoch": 0.41665624609228463, + "grad_norm": 324.0, + "learning_rate": 6.565869782000015e-05, + "loss": 13.6253, + "step": 9996 + }, + { + "epoch": 0.41669792838981284, + "grad_norm": 187.0, + "learning_rate": 6.565228721520217e-05, + "loss": 11.3127, + "step": 9997 + }, + { + "epoch": 0.4167396106873411, + "grad_norm": 243.0, + "learning_rate": 6.564587632513988e-05, + "loss": 12.8134, + "step": 9998 + }, + { + "epoch": 0.4167812929848693, + "grad_norm": 624.0, + "learning_rate": 6.56394651499301e-05, + "loss": 19.5003, + "step": 9999 + }, + { + "epoch": 0.4168229752823976, + "grad_norm": 232.0, + "learning_rate": 6.563305368968968e-05, + "loss": 12.3128, + "step": 10000 + }, + { + "epoch": 0.4168646575799258, + "grad_norm": 676.0, + "learning_rate": 6.562664194453548e-05, + "loss": 18.6295, + "step": 10001 + }, + { + "epoch": 0.41690633987745407, + "grad_norm": 416.0, + "learning_rate": 6.562022991458433e-05, + "loss": 15.3129, + "step": 10002 + }, + { + "epoch": 0.4169480221749823, + "grad_norm": 644.0, + "learning_rate": 6.561381759995311e-05, + "loss": 20.3752, + "step": 10003 + }, + { + "epoch": 0.41698970447251055, + "grad_norm": 428.0, + "learning_rate": 6.560740500075868e-05, + "loss": 15.2507, + "step": 10004 + }, + { + "epoch": 0.41703138677003876, + "grad_norm": 404.0, + "learning_rate": 6.560099211711789e-05, + "loss": 14.254, + "step": 10005 + }, + { + "epoch": 0.417073069067567, + "grad_norm": 230.0, + "learning_rate": 6.559457894914765e-05, + "loss": 12.3129, + "step": 10006 + }, + { + "epoch": 0.41711475136509524, + "grad_norm": 96.5, + "learning_rate": 6.558816549696483e-05, + "loss": 4.8129, + "step": 10007 + }, + { + "epoch": 0.4171564336626235, + "grad_norm": 157.0, + "learning_rate": 6.55817517606863e-05, + "loss": 11.6253, + "step": 10008 + }, + { + "epoch": 0.4171981159601517, + "grad_norm": 108.5, + "learning_rate": 6.557533774042895e-05, + "loss": 9.1254, + "step": 10009 + }, + { + "epoch": 0.41723979825768, + "grad_norm": 342.0, + "learning_rate": 6.55689234363097e-05, + "loss": 14.3133, + "step": 10010 + }, + { + "epoch": 0.4172814805552082, + "grad_norm": 544.0, + "learning_rate": 6.556250884844544e-05, + "loss": 18.1255, + "step": 10011 + }, + { + "epoch": 0.41732316285273646, + "grad_norm": 600.0, + "learning_rate": 6.555609397695307e-05, + "loss": 19.2502, + "step": 10012 + }, + { + "epoch": 0.4173648451502647, + "grad_norm": 490.0, + "learning_rate": 6.554967882194952e-05, + "loss": 17.3753, + "step": 10013 + }, + { + "epoch": 0.41740652744779294, + "grad_norm": 592.0, + "learning_rate": 6.554326338355168e-05, + "loss": 19.5009, + "step": 10014 + }, + { + "epoch": 0.41744820974532115, + "grad_norm": 652.0, + "learning_rate": 6.553684766187649e-05, + "loss": 18.8753, + "step": 10015 + }, + { + "epoch": 0.4174898920428494, + "grad_norm": 1176.0, + "learning_rate": 6.553043165704086e-05, + "loss": 25.6293, + "step": 10016 + }, + { + "epoch": 0.41753157434037763, + "grad_norm": 74.0, + "learning_rate": 6.552401536916175e-05, + "loss": 8.3753, + "step": 10017 + }, + { + "epoch": 0.4175732566379059, + "grad_norm": 198.0, + "learning_rate": 6.551759879835608e-05, + "loss": 9.7503, + "step": 10018 + }, + { + "epoch": 0.4176149389354341, + "grad_norm": 176.0, + "learning_rate": 6.551118194474077e-05, + "loss": 10.6877, + "step": 10019 + }, + { + "epoch": 0.4176566212329624, + "grad_norm": 258.0, + "learning_rate": 6.550476480843281e-05, + "loss": 13.3753, + "step": 10020 + }, + { + "epoch": 0.4176983035304906, + "grad_norm": 69.5, + "learning_rate": 6.549834738954915e-05, + "loss": 7.7815, + "step": 10021 + }, + { + "epoch": 0.41773998582801886, + "grad_norm": 223.0, + "learning_rate": 6.549192968820671e-05, + "loss": 12.5629, + "step": 10022 + }, + { + "epoch": 0.41778166812554707, + "grad_norm": 222.0, + "learning_rate": 6.548551170452248e-05, + "loss": 9.938, + "step": 10023 + }, + { + "epoch": 0.41782335042307533, + "grad_norm": 176.0, + "learning_rate": 6.547909343861344e-05, + "loss": 8.6888, + "step": 10024 + }, + { + "epoch": 0.41786503272060355, + "grad_norm": 520.0, + "learning_rate": 6.547267489059655e-05, + "loss": 16.5005, + "step": 10025 + }, + { + "epoch": 0.4179067150181318, + "grad_norm": 676.0, + "learning_rate": 6.546625606058876e-05, + "loss": 17.2504, + "step": 10026 + }, + { + "epoch": 0.41794839731566, + "grad_norm": 384.0, + "learning_rate": 6.54598369487071e-05, + "loss": 14.9379, + "step": 10027 + }, + { + "epoch": 0.4179900796131883, + "grad_norm": 540.0, + "learning_rate": 6.545341755506854e-05, + "loss": 18.8752, + "step": 10028 + }, + { + "epoch": 0.4180317619107165, + "grad_norm": 588.0, + "learning_rate": 6.544699787979007e-05, + "loss": 18.2501, + "step": 10029 + }, + { + "epoch": 0.41807344420824477, + "grad_norm": 434.0, + "learning_rate": 6.544057792298868e-05, + "loss": 15.563, + "step": 10030 + }, + { + "epoch": 0.418115126505773, + "grad_norm": 270.0, + "learning_rate": 6.54341576847814e-05, + "loss": 11.8752, + "step": 10031 + }, + { + "epoch": 0.41815680880330125, + "grad_norm": 210.0, + "learning_rate": 6.542773716528522e-05, + "loss": 8.7504, + "step": 10032 + }, + { + "epoch": 0.41819849110082946, + "grad_norm": 348.0, + "learning_rate": 6.542131636461717e-05, + "loss": 14.1877, + "step": 10033 + }, + { + "epoch": 0.41824017339835773, + "grad_norm": 175.0, + "learning_rate": 6.541489528289425e-05, + "loss": 10.3753, + "step": 10034 + }, + { + "epoch": 0.41828185569588594, + "grad_norm": 151.0, + "learning_rate": 6.540847392023348e-05, + "loss": 10.2503, + "step": 10035 + }, + { + "epoch": 0.4183235379934142, + "grad_norm": 200.0, + "learning_rate": 6.540205227675193e-05, + "loss": 12.0636, + "step": 10036 + }, + { + "epoch": 0.4183652202909424, + "grad_norm": 170.0, + "learning_rate": 6.53956303525666e-05, + "loss": 10.1879, + "step": 10037 + }, + { + "epoch": 0.4184069025884707, + "grad_norm": 211.0, + "learning_rate": 6.538920814779454e-05, + "loss": 12.3756, + "step": 10038 + }, + { + "epoch": 0.4184485848859989, + "grad_norm": 284.0, + "learning_rate": 6.53827856625528e-05, + "loss": 9.4381, + "step": 10039 + }, + { + "epoch": 0.41849026718352716, + "grad_norm": 276.0, + "learning_rate": 6.537636289695843e-05, + "loss": 12.7505, + "step": 10040 + }, + { + "epoch": 0.4185319494810554, + "grad_norm": 344.0, + "learning_rate": 6.536993985112849e-05, + "loss": 13.8762, + "step": 10041 + }, + { + "epoch": 0.41857363177858364, + "grad_norm": 486.0, + "learning_rate": 6.536351652518e-05, + "loss": 14.0027, + "step": 10042 + }, + { + "epoch": 0.41861531407611186, + "grad_norm": 316.0, + "learning_rate": 6.535709291923008e-05, + "loss": 12.7503, + "step": 10043 + }, + { + "epoch": 0.4186569963736401, + "grad_norm": 442.0, + "learning_rate": 6.535066903339577e-05, + "loss": 16.2501, + "step": 10044 + }, + { + "epoch": 0.41869867867116833, + "grad_norm": 188.0, + "learning_rate": 6.534424486779416e-05, + "loss": 11.8752, + "step": 10045 + }, + { + "epoch": 0.4187403609686966, + "grad_norm": 336.0, + "learning_rate": 6.533782042254232e-05, + "loss": 14.4383, + "step": 10046 + }, + { + "epoch": 0.4187820432662248, + "grad_norm": 234.0, + "learning_rate": 6.533139569775734e-05, + "loss": 12.7503, + "step": 10047 + }, + { + "epoch": 0.4188237255637531, + "grad_norm": 808.0, + "learning_rate": 6.532497069355632e-05, + "loss": 20.2544, + "step": 10048 + }, + { + "epoch": 0.4188654078612813, + "grad_norm": 688.0, + "learning_rate": 6.531854541005634e-05, + "loss": 19.3779, + "step": 10049 + }, + { + "epoch": 0.41890709015880956, + "grad_norm": 292.0, + "learning_rate": 6.531211984737452e-05, + "loss": 14.5634, + "step": 10050 + }, + { + "epoch": 0.41894877245633777, + "grad_norm": 106.0, + "learning_rate": 6.530569400562795e-05, + "loss": 8.9377, + "step": 10051 + }, + { + "epoch": 0.41899045475386604, + "grad_norm": 636.0, + "learning_rate": 6.529926788493374e-05, + "loss": 19.8752, + "step": 10052 + }, + { + "epoch": 0.41903213705139425, + "grad_norm": 298.0, + "learning_rate": 6.529284148540903e-05, + "loss": 13.7502, + "step": 10053 + }, + { + "epoch": 0.4190738193489225, + "grad_norm": 438.0, + "learning_rate": 6.528641480717092e-05, + "loss": 16.0025, + "step": 10054 + }, + { + "epoch": 0.4191155016464507, + "grad_norm": 764.0, + "learning_rate": 6.527998785033655e-05, + "loss": 21.8751, + "step": 10055 + }, + { + "epoch": 0.419157183943979, + "grad_norm": 374.0, + "learning_rate": 6.527356061502303e-05, + "loss": 14.7517, + "step": 10056 + }, + { + "epoch": 0.4191988662415072, + "grad_norm": 736.0, + "learning_rate": 6.526713310134753e-05, + "loss": 19.2529, + "step": 10057 + }, + { + "epoch": 0.4192405485390355, + "grad_norm": 152.0, + "learning_rate": 6.526070530942716e-05, + "loss": 9.8761, + "step": 10058 + }, + { + "epoch": 0.4192822308365637, + "grad_norm": 235.0, + "learning_rate": 6.525427723937909e-05, + "loss": 13.0627, + "step": 10059 + }, + { + "epoch": 0.41932391313409195, + "grad_norm": 552.0, + "learning_rate": 6.524784889132044e-05, + "loss": 19.3754, + "step": 10060 + }, + { + "epoch": 0.41936559543162016, + "grad_norm": 160.0, + "learning_rate": 6.524142026536841e-05, + "loss": 8.9378, + "step": 10061 + }, + { + "epoch": 0.41940727772914843, + "grad_norm": 860.0, + "learning_rate": 6.523499136164015e-05, + "loss": 24.8754, + "step": 10062 + }, + { + "epoch": 0.41944896002667664, + "grad_norm": 632.0, + "learning_rate": 6.522856218025282e-05, + "loss": 18.0051, + "step": 10063 + }, + { + "epoch": 0.4194906423242049, + "grad_norm": 328.0, + "learning_rate": 6.522213272132358e-05, + "loss": 11.8753, + "step": 10064 + }, + { + "epoch": 0.4195323246217332, + "grad_norm": 290.0, + "learning_rate": 6.521570298496961e-05, + "loss": 10.8127, + "step": 10065 + }, + { + "epoch": 0.4195740069192614, + "grad_norm": 144.0, + "learning_rate": 6.520927297130812e-05, + "loss": 10.4389, + "step": 10066 + }, + { + "epoch": 0.41961568921678966, + "grad_norm": 93.0, + "learning_rate": 6.520284268045629e-05, + "loss": 8.2507, + "step": 10067 + }, + { + "epoch": 0.41965737151431787, + "grad_norm": 276.0, + "learning_rate": 6.519641211253129e-05, + "loss": 12.6878, + "step": 10068 + }, + { + "epoch": 0.41969905381184613, + "grad_norm": 604.0, + "learning_rate": 6.518998126765032e-05, + "loss": 16.5001, + "step": 10069 + }, + { + "epoch": 0.41974073610937435, + "grad_norm": 1600.0, + "learning_rate": 6.51835501459306e-05, + "loss": 32.254, + "step": 10070 + }, + { + "epoch": 0.4197824184069026, + "grad_norm": 420.0, + "learning_rate": 6.517711874748934e-05, + "loss": 15.256, + "step": 10071 + }, + { + "epoch": 0.4198241007044308, + "grad_norm": 844.0, + "learning_rate": 6.517068707244373e-05, + "loss": 24.8755, + "step": 10072 + }, + { + "epoch": 0.4198657830019591, + "grad_norm": 544.0, + "learning_rate": 6.5164255120911e-05, + "loss": 17.626, + "step": 10073 + }, + { + "epoch": 0.4199074652994873, + "grad_norm": 178.0, + "learning_rate": 6.515782289300839e-05, + "loss": 10.1255, + "step": 10074 + }, + { + "epoch": 0.41994914759701557, + "grad_norm": 314.0, + "learning_rate": 6.51513903888531e-05, + "loss": 13.5631, + "step": 10075 + }, + { + "epoch": 0.4199908298945438, + "grad_norm": 243.0, + "learning_rate": 6.514495760856239e-05, + "loss": 11.3757, + "step": 10076 + }, + { + "epoch": 0.42003251219207205, + "grad_norm": 215.0, + "learning_rate": 6.513852455225347e-05, + "loss": 11.7511, + "step": 10077 + }, + { + "epoch": 0.42007419448960026, + "grad_norm": 868.0, + "learning_rate": 6.513209122004359e-05, + "loss": 24.2502, + "step": 10078 + }, + { + "epoch": 0.42011587678712853, + "grad_norm": 1464.0, + "learning_rate": 6.512565761205e-05, + "loss": 33.7503, + "step": 10079 + }, + { + "epoch": 0.42015755908465674, + "grad_norm": 156.0, + "learning_rate": 6.511922372838999e-05, + "loss": 10.5628, + "step": 10080 + }, + { + "epoch": 0.420199241382185, + "grad_norm": 366.0, + "learning_rate": 6.511278956918077e-05, + "loss": 15.4377, + "step": 10081 + }, + { + "epoch": 0.4202409236797132, + "grad_norm": 237.0, + "learning_rate": 6.51063551345396e-05, + "loss": 12.4381, + "step": 10082 + }, + { + "epoch": 0.4202826059772415, + "grad_norm": 202.0, + "learning_rate": 6.509992042458378e-05, + "loss": 10.8128, + "step": 10083 + }, + { + "epoch": 0.4203242882747697, + "grad_norm": 258.0, + "learning_rate": 6.509348543943056e-05, + "loss": 13.6255, + "step": 10084 + }, + { + "epoch": 0.42036597057229796, + "grad_norm": 304.0, + "learning_rate": 6.508705017919725e-05, + "loss": 14.3129, + "step": 10085 + }, + { + "epoch": 0.4204076528698262, + "grad_norm": 235.0, + "learning_rate": 6.50806146440011e-05, + "loss": 9.9415, + "step": 10086 + }, + { + "epoch": 0.42044933516735444, + "grad_norm": 210.0, + "learning_rate": 6.50741788339594e-05, + "loss": 11.5009, + "step": 10087 + }, + { + "epoch": 0.42049101746488265, + "grad_norm": 1680.0, + "learning_rate": 6.506774274918947e-05, + "loss": 34.0048, + "step": 10088 + }, + { + "epoch": 0.4205326997624109, + "grad_norm": 183.0, + "learning_rate": 6.506130638980858e-05, + "loss": 10.4377, + "step": 10089 + }, + { + "epoch": 0.42057438205993913, + "grad_norm": 119.0, + "learning_rate": 6.505486975593404e-05, + "loss": 9.1253, + "step": 10090 + }, + { + "epoch": 0.4206160643574674, + "grad_norm": 600.0, + "learning_rate": 6.504843284768317e-05, + "loss": 16.8809, + "step": 10091 + }, + { + "epoch": 0.4206577466549956, + "grad_norm": 181.0, + "learning_rate": 6.504199566517328e-05, + "loss": 11.001, + "step": 10092 + }, + { + "epoch": 0.4206994289525239, + "grad_norm": 362.0, + "learning_rate": 6.503555820852167e-05, + "loss": 15.0004, + "step": 10093 + }, + { + "epoch": 0.4207411112500521, + "grad_norm": 632.0, + "learning_rate": 6.502912047784568e-05, + "loss": 20.0001, + "step": 10094 + }, + { + "epoch": 0.42078279354758036, + "grad_norm": 390.0, + "learning_rate": 6.502268247326264e-05, + "loss": 15.8131, + "step": 10095 + }, + { + "epoch": 0.42082447584510857, + "grad_norm": 52.75, + "learning_rate": 6.501624419488988e-05, + "loss": 8.063, + "step": 10096 + }, + { + "epoch": 0.42086615814263684, + "grad_norm": 350.0, + "learning_rate": 6.500980564284473e-05, + "loss": 12.7531, + "step": 10097 + }, + { + "epoch": 0.42090784044016505, + "grad_norm": 139.0, + "learning_rate": 6.500336681724455e-05, + "loss": 9.2503, + "step": 10098 + }, + { + "epoch": 0.4209495227376933, + "grad_norm": 201.0, + "learning_rate": 6.499692771820667e-05, + "loss": 9.6881, + "step": 10099 + }, + { + "epoch": 0.4209912050352215, + "grad_norm": 312.0, + "learning_rate": 6.499048834584845e-05, + "loss": 12.2502, + "step": 10100 + }, + { + "epoch": 0.4210328873327498, + "grad_norm": 494.0, + "learning_rate": 6.498404870028725e-05, + "loss": 17.7505, + "step": 10101 + }, + { + "epoch": 0.421074569630278, + "grad_norm": 166.0, + "learning_rate": 6.497760878164043e-05, + "loss": 9.5627, + "step": 10102 + }, + { + "epoch": 0.4211162519278063, + "grad_norm": 524.0, + "learning_rate": 6.497116859002536e-05, + "loss": 17.8751, + "step": 10103 + }, + { + "epoch": 0.4211579342253345, + "grad_norm": 960.0, + "learning_rate": 6.496472812555942e-05, + "loss": 25.6253, + "step": 10104 + }, + { + "epoch": 0.42119961652286275, + "grad_norm": 136.0, + "learning_rate": 6.495828738835999e-05, + "loss": 10.1262, + "step": 10105 + }, + { + "epoch": 0.42124129882039096, + "grad_norm": 352.0, + "learning_rate": 6.495184637854443e-05, + "loss": 15.1254, + "step": 10106 + }, + { + "epoch": 0.42128298111791923, + "grad_norm": 141.0, + "learning_rate": 6.494540509623016e-05, + "loss": 10.3753, + "step": 10107 + }, + { + "epoch": 0.42132466341544744, + "grad_norm": 111.0, + "learning_rate": 6.493896354153453e-05, + "loss": 9.3757, + "step": 10108 + }, + { + "epoch": 0.4213663457129757, + "grad_norm": 125.0, + "learning_rate": 6.493252171457498e-05, + "loss": 10.188, + "step": 10109 + }, + { + "epoch": 0.4214080280105039, + "grad_norm": 324.0, + "learning_rate": 6.492607961546889e-05, + "loss": 12.4379, + "step": 10110 + }, + { + "epoch": 0.4214497103080322, + "grad_norm": 320.0, + "learning_rate": 6.491963724433367e-05, + "loss": 14.4377, + "step": 10111 + }, + { + "epoch": 0.4214913926055604, + "grad_norm": 426.0, + "learning_rate": 6.491319460128672e-05, + "loss": 16.1252, + "step": 10112 + }, + { + "epoch": 0.42153307490308867, + "grad_norm": 217.0, + "learning_rate": 6.490675168644552e-05, + "loss": 12.0002, + "step": 10113 + }, + { + "epoch": 0.4215747572006169, + "grad_norm": 106.5, + "learning_rate": 6.490030849992742e-05, + "loss": 9.3127, + "step": 10114 + }, + { + "epoch": 0.42161643949814515, + "grad_norm": 304.0, + "learning_rate": 6.489386504184988e-05, + "loss": 13.1252, + "step": 10115 + }, + { + "epoch": 0.42165812179567336, + "grad_norm": 486.0, + "learning_rate": 6.488742131233032e-05, + "loss": 18.1255, + "step": 10116 + }, + { + "epoch": 0.4216998040932016, + "grad_norm": 205.0, + "learning_rate": 6.488097731148619e-05, + "loss": 10.7504, + "step": 10117 + }, + { + "epoch": 0.42174148639072984, + "grad_norm": 171.0, + "learning_rate": 6.487453303943494e-05, + "loss": 9.6879, + "step": 10118 + }, + { + "epoch": 0.4217831686882581, + "grad_norm": 237.0, + "learning_rate": 6.486808849629398e-05, + "loss": 11.5627, + "step": 10119 + }, + { + "epoch": 0.4218248509857863, + "grad_norm": 434.0, + "learning_rate": 6.48616436821808e-05, + "loss": 14.4378, + "step": 10120 + }, + { + "epoch": 0.4218665332833146, + "grad_norm": 410.0, + "learning_rate": 6.485519859721285e-05, + "loss": 15.2501, + "step": 10121 + }, + { + "epoch": 0.4219082155808428, + "grad_norm": 1012.0, + "learning_rate": 6.484875324150759e-05, + "loss": 29.8757, + "step": 10122 + }, + { + "epoch": 0.42194989787837106, + "grad_norm": 400.0, + "learning_rate": 6.484230761518246e-05, + "loss": 15.0004, + "step": 10123 + }, + { + "epoch": 0.4219915801758993, + "grad_norm": 245.0, + "learning_rate": 6.483586171835497e-05, + "loss": 13.1917, + "step": 10124 + }, + { + "epoch": 0.42203326247342754, + "grad_norm": 147.0, + "learning_rate": 6.48294155511426e-05, + "loss": 5.8754, + "step": 10125 + }, + { + "epoch": 0.42207494477095575, + "grad_norm": 340.0, + "learning_rate": 6.48229691136628e-05, + "loss": 13.438, + "step": 10126 + }, + { + "epoch": 0.422116627068484, + "grad_norm": 89.0, + "learning_rate": 6.481652240603306e-05, + "loss": 8.9378, + "step": 10127 + }, + { + "epoch": 0.42215830936601223, + "grad_norm": 1152.0, + "learning_rate": 6.48100754283709e-05, + "loss": 32.0044, + "step": 10128 + }, + { + "epoch": 0.4221999916635405, + "grad_norm": 580.0, + "learning_rate": 6.48036281807938e-05, + "loss": 19.0005, + "step": 10129 + }, + { + "epoch": 0.4222416739610687, + "grad_norm": 1056.0, + "learning_rate": 6.479718066341925e-05, + "loss": 30.2502, + "step": 10130 + }, + { + "epoch": 0.422283356258597, + "grad_norm": 1004.0, + "learning_rate": 6.479073287636479e-05, + "loss": 23.0048, + "step": 10131 + }, + { + "epoch": 0.4223250385561252, + "grad_norm": 101.5, + "learning_rate": 6.47842848197479e-05, + "loss": 9.5004, + "step": 10132 + }, + { + "epoch": 0.42236672085365345, + "grad_norm": 214.0, + "learning_rate": 6.477783649368609e-05, + "loss": 11.2501, + "step": 10133 + }, + { + "epoch": 0.42240840315118167, + "grad_norm": 64.5, + "learning_rate": 6.477138789829692e-05, + "loss": 8.1881, + "step": 10134 + }, + { + "epoch": 0.42245008544870993, + "grad_norm": 464.0, + "learning_rate": 6.476493903369788e-05, + "loss": 17.2515, + "step": 10135 + }, + { + "epoch": 0.42249176774623814, + "grad_norm": 121.5, + "learning_rate": 6.475848990000653e-05, + "loss": 9.4386, + "step": 10136 + }, + { + "epoch": 0.4225334500437664, + "grad_norm": 494.0, + "learning_rate": 6.475204049734038e-05, + "loss": 17.1252, + "step": 10137 + }, + { + "epoch": 0.4225751323412947, + "grad_norm": 568.0, + "learning_rate": 6.474559082581699e-05, + "loss": 21.1254, + "step": 10138 + }, + { + "epoch": 0.4226168146388229, + "grad_norm": 604.0, + "learning_rate": 6.473914088555388e-05, + "loss": 19.8754, + "step": 10139 + }, + { + "epoch": 0.42265849693635116, + "grad_norm": 390.0, + "learning_rate": 6.473269067666865e-05, + "loss": 14.8137, + "step": 10140 + }, + { + "epoch": 0.42270017923387937, + "grad_norm": 640.0, + "learning_rate": 6.472624019927879e-05, + "loss": 19.2505, + "step": 10141 + }, + { + "epoch": 0.42274186153140764, + "grad_norm": 67.0, + "learning_rate": 6.471978945350192e-05, + "loss": 8.3756, + "step": 10142 + }, + { + "epoch": 0.42278354382893585, + "grad_norm": 720.0, + "learning_rate": 6.471333843945558e-05, + "loss": 22.1285, + "step": 10143 + }, + { + "epoch": 0.4228252261264641, + "grad_norm": 362.0, + "learning_rate": 6.470688715725734e-05, + "loss": 12.6273, + "step": 10144 + }, + { + "epoch": 0.4228669084239923, + "grad_norm": 708.0, + "learning_rate": 6.470043560702476e-05, + "loss": 23.3753, + "step": 10145 + }, + { + "epoch": 0.4229085907215206, + "grad_norm": 338.0, + "learning_rate": 6.469398378887546e-05, + "loss": 13.8127, + "step": 10146 + }, + { + "epoch": 0.4229502730190488, + "grad_norm": 680.0, + "learning_rate": 6.468753170292698e-05, + "loss": 18.5003, + "step": 10147 + }, + { + "epoch": 0.4229919553165771, + "grad_norm": 342.0, + "learning_rate": 6.468107934929692e-05, + "loss": 14.8129, + "step": 10148 + }, + { + "epoch": 0.4230336376141053, + "grad_norm": 494.0, + "learning_rate": 6.467462672810291e-05, + "loss": 17.1253, + "step": 10149 + }, + { + "epoch": 0.42307531991163355, + "grad_norm": 253.0, + "learning_rate": 6.466817383946252e-05, + "loss": 11.7506, + "step": 10150 + }, + { + "epoch": 0.42311700220916176, + "grad_norm": 136.0, + "learning_rate": 6.466172068349336e-05, + "loss": 7.907, + "step": 10151 + }, + { + "epoch": 0.42315868450669003, + "grad_norm": 308.0, + "learning_rate": 6.465526726031304e-05, + "loss": 11.6278, + "step": 10152 + }, + { + "epoch": 0.42320036680421824, + "grad_norm": 120.0, + "learning_rate": 6.464881357003917e-05, + "loss": 9.1881, + "step": 10153 + }, + { + "epoch": 0.4232420491017465, + "grad_norm": 262.0, + "learning_rate": 6.464235961278937e-05, + "loss": 12.813, + "step": 10154 + }, + { + "epoch": 0.4232837313992747, + "grad_norm": 448.0, + "learning_rate": 6.463590538868127e-05, + "loss": 16.7503, + "step": 10155 + }, + { + "epoch": 0.423325413696803, + "grad_norm": 99.5, + "learning_rate": 6.462945089783249e-05, + "loss": 8.5002, + "step": 10156 + }, + { + "epoch": 0.4233670959943312, + "grad_norm": 207.0, + "learning_rate": 6.462299614036067e-05, + "loss": 12.3128, + "step": 10157 + }, + { + "epoch": 0.42340877829185947, + "grad_norm": 548.0, + "learning_rate": 6.461654111638346e-05, + "loss": 19.3768, + "step": 10158 + }, + { + "epoch": 0.4234504605893877, + "grad_norm": 564.0, + "learning_rate": 6.461008582601849e-05, + "loss": 20.5002, + "step": 10159 + }, + { + "epoch": 0.42349214288691595, + "grad_norm": 696.0, + "learning_rate": 6.46036302693834e-05, + "loss": 18.7537, + "step": 10160 + }, + { + "epoch": 0.42353382518444416, + "grad_norm": 233.0, + "learning_rate": 6.459717444659585e-05, + "loss": 12.3752, + "step": 10161 + }, + { + "epoch": 0.4235755074819724, + "grad_norm": 1200.0, + "learning_rate": 6.45907183577735e-05, + "loss": 28.3754, + "step": 10162 + }, + { + "epoch": 0.42361718977950064, + "grad_norm": 608.0, + "learning_rate": 6.4584262003034e-05, + "loss": 16.0018, + "step": 10163 + }, + { + "epoch": 0.4236588720770289, + "grad_norm": 372.0, + "learning_rate": 6.457780538249504e-05, + "loss": 14.5629, + "step": 10164 + }, + { + "epoch": 0.4237005543745571, + "grad_norm": 360.0, + "learning_rate": 6.45713484962743e-05, + "loss": 12.3129, + "step": 10165 + }, + { + "epoch": 0.4237422366720854, + "grad_norm": 426.0, + "learning_rate": 6.456489134448943e-05, + "loss": 16.5004, + "step": 10166 + }, + { + "epoch": 0.4237839189696136, + "grad_norm": 213.0, + "learning_rate": 6.455843392725813e-05, + "loss": 11.8126, + "step": 10167 + }, + { + "epoch": 0.42382560126714186, + "grad_norm": 107.5, + "learning_rate": 6.455197624469805e-05, + "loss": 10.5631, + "step": 10168 + }, + { + "epoch": 0.42386728356467007, + "grad_norm": 223.0, + "learning_rate": 6.454551829692694e-05, + "loss": 11.1877, + "step": 10169 + }, + { + "epoch": 0.42390896586219834, + "grad_norm": 121.5, + "learning_rate": 6.453906008406245e-05, + "loss": 8.0628, + "step": 10170 + }, + { + "epoch": 0.42395064815972655, + "grad_norm": 159.0, + "learning_rate": 6.453260160622232e-05, + "loss": 9.4382, + "step": 10171 + }, + { + "epoch": 0.4239923304572548, + "grad_norm": 130.0, + "learning_rate": 6.452614286352422e-05, + "loss": 9.5005, + "step": 10172 + }, + { + "epoch": 0.42403401275478303, + "grad_norm": 442.0, + "learning_rate": 6.451968385608586e-05, + "loss": 15.8751, + "step": 10173 + }, + { + "epoch": 0.4240756950523113, + "grad_norm": 516.0, + "learning_rate": 6.4513224584025e-05, + "loss": 18.0004, + "step": 10174 + }, + { + "epoch": 0.4241173773498395, + "grad_norm": 312.0, + "learning_rate": 6.450676504745933e-05, + "loss": 12.3158, + "step": 10175 + }, + { + "epoch": 0.4241590596473678, + "grad_norm": 176.0, + "learning_rate": 6.450030524650657e-05, + "loss": 12.1882, + "step": 10176 + }, + { + "epoch": 0.424200741944896, + "grad_norm": 243.0, + "learning_rate": 6.449384518128448e-05, + "loss": 12.5627, + "step": 10177 + }, + { + "epoch": 0.42424242424242425, + "grad_norm": 1184.0, + "learning_rate": 6.448738485191075e-05, + "loss": 26.3843, + "step": 10178 + }, + { + "epoch": 0.42428410653995247, + "grad_norm": 1760.0, + "learning_rate": 6.448092425850317e-05, + "loss": 35.256, + "step": 10179 + }, + { + "epoch": 0.42432578883748073, + "grad_norm": 664.0, + "learning_rate": 6.447446340117943e-05, + "loss": 20.6251, + "step": 10180 + }, + { + "epoch": 0.42436747113500894, + "grad_norm": 136.0, + "learning_rate": 6.446800228005732e-05, + "loss": 11.0641, + "step": 10181 + }, + { + "epoch": 0.4244091534325372, + "grad_norm": 462.0, + "learning_rate": 6.446154089525459e-05, + "loss": 15.7502, + "step": 10182 + }, + { + "epoch": 0.4244508357300654, + "grad_norm": 394.0, + "learning_rate": 6.445507924688899e-05, + "loss": 15.9377, + "step": 10183 + }, + { + "epoch": 0.4244925180275937, + "grad_norm": 1264.0, + "learning_rate": 6.44486173350783e-05, + "loss": 28.8767, + "step": 10184 + }, + { + "epoch": 0.4245342003251219, + "grad_norm": 368.0, + "learning_rate": 6.444215515994027e-05, + "loss": 13.8753, + "step": 10185 + }, + { + "epoch": 0.42457588262265017, + "grad_norm": 120.0, + "learning_rate": 6.443569272159266e-05, + "loss": 10.4387, + "step": 10186 + }, + { + "epoch": 0.4246175649201784, + "grad_norm": 656.0, + "learning_rate": 6.442923002015329e-05, + "loss": 18.2548, + "step": 10187 + }, + { + "epoch": 0.42465924721770665, + "grad_norm": 454.0, + "learning_rate": 6.44227670557399e-05, + "loss": 15.8753, + "step": 10188 + }, + { + "epoch": 0.42470092951523486, + "grad_norm": 292.0, + "learning_rate": 6.441630382847033e-05, + "loss": 13.3129, + "step": 10189 + }, + { + "epoch": 0.4247426118127631, + "grad_norm": 88.0, + "learning_rate": 6.440984033846232e-05, + "loss": 6.3442, + "step": 10190 + }, + { + "epoch": 0.42478429411029134, + "grad_norm": 294.0, + "learning_rate": 6.440337658583371e-05, + "loss": 13.6263, + "step": 10191 + }, + { + "epoch": 0.4248259764078196, + "grad_norm": 656.0, + "learning_rate": 6.439691257070227e-05, + "loss": 18.7508, + "step": 10192 + }, + { + "epoch": 0.4248676587053478, + "grad_norm": 932.0, + "learning_rate": 6.439044829318583e-05, + "loss": 25.7502, + "step": 10193 + }, + { + "epoch": 0.4249093410028761, + "grad_norm": 234.0, + "learning_rate": 6.438398375340218e-05, + "loss": 11.5627, + "step": 10194 + }, + { + "epoch": 0.4249510233004043, + "grad_norm": 256.0, + "learning_rate": 6.437751895146916e-05, + "loss": 11.0007, + "step": 10195 + }, + { + "epoch": 0.42499270559793256, + "grad_norm": 356.0, + "learning_rate": 6.437105388750458e-05, + "loss": 13.3751, + "step": 10196 + }, + { + "epoch": 0.4250343878954608, + "grad_norm": 167.0, + "learning_rate": 6.436458856162626e-05, + "loss": 9.6265, + "step": 10197 + }, + { + "epoch": 0.42507607019298904, + "grad_norm": 312.0, + "learning_rate": 6.435812297395204e-05, + "loss": 12.8129, + "step": 10198 + }, + { + "epoch": 0.42511775249051725, + "grad_norm": 128.0, + "learning_rate": 6.435165712459974e-05, + "loss": 8.8755, + "step": 10199 + }, + { + "epoch": 0.4251594347880455, + "grad_norm": 149.0, + "learning_rate": 6.434519101368723e-05, + "loss": 9.8758, + "step": 10200 + }, + { + "epoch": 0.42520111708557373, + "grad_norm": 404.0, + "learning_rate": 6.433872464133235e-05, + "loss": 15.3754, + "step": 10201 + }, + { + "epoch": 0.425242799383102, + "grad_norm": 888.0, + "learning_rate": 6.433225800765293e-05, + "loss": 21.376, + "step": 10202 + }, + { + "epoch": 0.4252844816806302, + "grad_norm": 334.0, + "learning_rate": 6.432579111276684e-05, + "loss": 13.7504, + "step": 10203 + }, + { + "epoch": 0.4253261639781585, + "grad_norm": 175.0, + "learning_rate": 6.431932395679193e-05, + "loss": 11.3752, + "step": 10204 + }, + { + "epoch": 0.4253678462756867, + "grad_norm": 108.0, + "learning_rate": 6.431285653984607e-05, + "loss": 8.0002, + "step": 10205 + }, + { + "epoch": 0.42540952857321496, + "grad_norm": 137.0, + "learning_rate": 6.430638886204713e-05, + "loss": 10.6253, + "step": 10206 + }, + { + "epoch": 0.42545121087074317, + "grad_norm": 632.0, + "learning_rate": 6.429992092351299e-05, + "loss": 18.8755, + "step": 10207 + }, + { + "epoch": 0.42549289316827144, + "grad_norm": 720.0, + "learning_rate": 6.429345272436151e-05, + "loss": 20.5002, + "step": 10208 + }, + { + "epoch": 0.4255345754657997, + "grad_norm": 640.0, + "learning_rate": 6.428698426471059e-05, + "loss": 17.7523, + "step": 10209 + }, + { + "epoch": 0.4255762577633279, + "grad_norm": 612.0, + "learning_rate": 6.428051554467812e-05, + "loss": 20.2503, + "step": 10210 + }, + { + "epoch": 0.4256179400608562, + "grad_norm": 536.0, + "learning_rate": 6.427404656438196e-05, + "loss": 18.0003, + "step": 10211 + }, + { + "epoch": 0.4256596223583844, + "grad_norm": 134.0, + "learning_rate": 6.426757732394006e-05, + "loss": 9.6921, + "step": 10212 + }, + { + "epoch": 0.42570130465591266, + "grad_norm": 376.0, + "learning_rate": 6.42611078234703e-05, + "loss": 15.2502, + "step": 10213 + }, + { + "epoch": 0.42574298695344087, + "grad_norm": 308.0, + "learning_rate": 6.425463806309058e-05, + "loss": 12.3751, + "step": 10214 + }, + { + "epoch": 0.42578466925096914, + "grad_norm": 153.0, + "learning_rate": 6.42481680429188e-05, + "loss": 7.3762, + "step": 10215 + }, + { + "epoch": 0.42582635154849735, + "grad_norm": 290.0, + "learning_rate": 6.42416977630729e-05, + "loss": 12.2527, + "step": 10216 + }, + { + "epoch": 0.4258680338460256, + "grad_norm": 302.0, + "learning_rate": 6.423522722367081e-05, + "loss": 13.814, + "step": 10217 + }, + { + "epoch": 0.42590971614355383, + "grad_norm": 184.0, + "learning_rate": 6.422875642483043e-05, + "loss": 12.1877, + "step": 10218 + }, + { + "epoch": 0.4259513984410821, + "grad_norm": 378.0, + "learning_rate": 6.42222853666697e-05, + "loss": 14.4378, + "step": 10219 + }, + { + "epoch": 0.4259930807386103, + "grad_norm": 197.0, + "learning_rate": 6.421581404930654e-05, + "loss": 9.8776, + "step": 10220 + }, + { + "epoch": 0.4260347630361386, + "grad_norm": 320.0, + "learning_rate": 6.420934247285893e-05, + "loss": 14.6877, + "step": 10221 + }, + { + "epoch": 0.4260764453336668, + "grad_norm": 262.0, + "learning_rate": 6.42028706374448e-05, + "loss": 12.3139, + "step": 10222 + }, + { + "epoch": 0.42611812763119505, + "grad_norm": 430.0, + "learning_rate": 6.419639854318206e-05, + "loss": 14.3777, + "step": 10223 + }, + { + "epoch": 0.42615980992872327, + "grad_norm": 272.0, + "learning_rate": 6.41899261901887e-05, + "loss": 12.6877, + "step": 10224 + }, + { + "epoch": 0.42620149222625153, + "grad_norm": 904.0, + "learning_rate": 6.41834535785827e-05, + "loss": 26.0003, + "step": 10225 + }, + { + "epoch": 0.42624317452377974, + "grad_norm": 700.0, + "learning_rate": 6.4176980708482e-05, + "loss": 17.7553, + "step": 10226 + }, + { + "epoch": 0.426284856821308, + "grad_norm": 752.0, + "learning_rate": 6.417050758000455e-05, + "loss": 17.754, + "step": 10227 + }, + { + "epoch": 0.4263265391188362, + "grad_norm": 227.0, + "learning_rate": 6.416403419326834e-05, + "loss": 11.3752, + "step": 10228 + }, + { + "epoch": 0.4263682214163645, + "grad_norm": 344.0, + "learning_rate": 6.415756054839136e-05, + "loss": 14.9376, + "step": 10229 + }, + { + "epoch": 0.4264099037138927, + "grad_norm": 992.0, + "learning_rate": 6.415108664549158e-05, + "loss": 26.877, + "step": 10230 + }, + { + "epoch": 0.42645158601142097, + "grad_norm": 189.0, + "learning_rate": 6.414461248468698e-05, + "loss": 9.9378, + "step": 10231 + }, + { + "epoch": 0.4264932683089492, + "grad_norm": 628.0, + "learning_rate": 6.413813806609557e-05, + "loss": 19.7502, + "step": 10232 + }, + { + "epoch": 0.42653495060647745, + "grad_norm": 366.0, + "learning_rate": 6.413166338983535e-05, + "loss": 14.8128, + "step": 10233 + }, + { + "epoch": 0.42657663290400566, + "grad_norm": 350.0, + "learning_rate": 6.41251884560243e-05, + "loss": 14.0005, + "step": 10234 + }, + { + "epoch": 0.4266183152015339, + "grad_norm": 370.0, + "learning_rate": 6.411871326478042e-05, + "loss": 10.3133, + "step": 10235 + }, + { + "epoch": 0.42665999749906214, + "grad_norm": 302.0, + "learning_rate": 6.411223781622175e-05, + "loss": 13.2507, + "step": 10236 + }, + { + "epoch": 0.4267016797965904, + "grad_norm": 316.0, + "learning_rate": 6.410576211046631e-05, + "loss": 13.5628, + "step": 10237 + }, + { + "epoch": 0.4267433620941186, + "grad_norm": 516.0, + "learning_rate": 6.409928614763208e-05, + "loss": 16.0002, + "step": 10238 + }, + { + "epoch": 0.4267850443916469, + "grad_norm": 708.0, + "learning_rate": 6.409280992783711e-05, + "loss": 21.8752, + "step": 10239 + }, + { + "epoch": 0.4268267266891751, + "grad_norm": 616.0, + "learning_rate": 6.408633345119944e-05, + "loss": 19.0014, + "step": 10240 + }, + { + "epoch": 0.42686840898670336, + "grad_norm": 97.5, + "learning_rate": 6.407985671783709e-05, + "loss": 8.4378, + "step": 10241 + }, + { + "epoch": 0.4269100912842316, + "grad_norm": 310.0, + "learning_rate": 6.407337972786811e-05, + "loss": 11.8755, + "step": 10242 + }, + { + "epoch": 0.42695177358175984, + "grad_norm": 464.0, + "learning_rate": 6.406690248141052e-05, + "loss": 14.8768, + "step": 10243 + }, + { + "epoch": 0.42699345587928805, + "grad_norm": 410.0, + "learning_rate": 6.406042497858239e-05, + "loss": 16.0007, + "step": 10244 + }, + { + "epoch": 0.4270351381768163, + "grad_norm": 81.0, + "learning_rate": 6.405394721950176e-05, + "loss": 9.4381, + "step": 10245 + }, + { + "epoch": 0.42707682047434453, + "grad_norm": 232.0, + "learning_rate": 6.40474692042867e-05, + "loss": 11.1878, + "step": 10246 + }, + { + "epoch": 0.4271185027718728, + "grad_norm": 350.0, + "learning_rate": 6.404099093305527e-05, + "loss": 14.0002, + "step": 10247 + }, + { + "epoch": 0.427160185069401, + "grad_norm": 320.0, + "learning_rate": 6.403451240592553e-05, + "loss": 14.2503, + "step": 10248 + }, + { + "epoch": 0.4272018673669293, + "grad_norm": 466.0, + "learning_rate": 6.402803362301555e-05, + "loss": 17.3752, + "step": 10249 + }, + { + "epoch": 0.4272435496644575, + "grad_norm": 1080.0, + "learning_rate": 6.402155458444341e-05, + "loss": 29.3752, + "step": 10250 + }, + { + "epoch": 0.42728523196198576, + "grad_norm": 1408.0, + "learning_rate": 6.40150752903272e-05, + "loss": 34.7509, + "step": 10251 + }, + { + "epoch": 0.42732691425951397, + "grad_norm": 732.0, + "learning_rate": 6.4008595740785e-05, + "loss": 22.6274, + "step": 10252 + }, + { + "epoch": 0.42736859655704224, + "grad_norm": 278.0, + "learning_rate": 6.40021159359349e-05, + "loss": 13.7505, + "step": 10253 + }, + { + "epoch": 0.42741027885457045, + "grad_norm": 316.0, + "learning_rate": 6.399563587589499e-05, + "loss": 14.1884, + "step": 10254 + }, + { + "epoch": 0.4274519611520987, + "grad_norm": 426.0, + "learning_rate": 6.398915556078337e-05, + "loss": 15.1252, + "step": 10255 + }, + { + "epoch": 0.4274936434496269, + "grad_norm": 105.5, + "learning_rate": 6.398267499071816e-05, + "loss": 9.3756, + "step": 10256 + }, + { + "epoch": 0.4275353257471552, + "grad_norm": 512.0, + "learning_rate": 6.397619416581746e-05, + "loss": 17.2503, + "step": 10257 + }, + { + "epoch": 0.4275770080446834, + "grad_norm": 169.0, + "learning_rate": 6.396971308619937e-05, + "loss": 10.3754, + "step": 10258 + }, + { + "epoch": 0.42761869034221167, + "grad_norm": 185.0, + "learning_rate": 6.396323175198202e-05, + "loss": 11.8769, + "step": 10259 + }, + { + "epoch": 0.4276603726397399, + "grad_norm": 1728.0, + "learning_rate": 6.395675016328352e-05, + "loss": 34.0051, + "step": 10260 + }, + { + "epoch": 0.42770205493726815, + "grad_norm": 374.0, + "learning_rate": 6.395026832022202e-05, + "loss": 14.2504, + "step": 10261 + }, + { + "epoch": 0.42774373723479636, + "grad_norm": 172.0, + "learning_rate": 6.394378622291565e-05, + "loss": 10.0007, + "step": 10262 + }, + { + "epoch": 0.42778541953232463, + "grad_norm": 264.0, + "learning_rate": 6.393730387148252e-05, + "loss": 12.8751, + "step": 10263 + }, + { + "epoch": 0.42782710182985284, + "grad_norm": 98.5, + "learning_rate": 6.39308212660408e-05, + "loss": 7.8753, + "step": 10264 + }, + { + "epoch": 0.4278687841273811, + "grad_norm": 1280.0, + "learning_rate": 6.392433840670864e-05, + "loss": 26.1294, + "step": 10265 + }, + { + "epoch": 0.4279104664249093, + "grad_norm": 434.0, + "learning_rate": 6.391785529360416e-05, + "loss": 15.2503, + "step": 10266 + }, + { + "epoch": 0.4279521487224376, + "grad_norm": 306.0, + "learning_rate": 6.391137192684553e-05, + "loss": 13.0629, + "step": 10267 + }, + { + "epoch": 0.4279938310199658, + "grad_norm": 504.0, + "learning_rate": 6.390488830655092e-05, + "loss": 19.626, + "step": 10268 + }, + { + "epoch": 0.42803551331749407, + "grad_norm": 804.0, + "learning_rate": 6.389840443283847e-05, + "loss": 23.5, + "step": 10269 + }, + { + "epoch": 0.4280771956150223, + "grad_norm": 500.0, + "learning_rate": 6.38919203058264e-05, + "loss": 17.2508, + "step": 10270 + }, + { + "epoch": 0.42811887791255054, + "grad_norm": 502.0, + "learning_rate": 6.388543592563282e-05, + "loss": 16.7501, + "step": 10271 + }, + { + "epoch": 0.42816056021007876, + "grad_norm": 229.0, + "learning_rate": 6.387895129237594e-05, + "loss": 11.8753, + "step": 10272 + }, + { + "epoch": 0.428202242507607, + "grad_norm": 114.5, + "learning_rate": 6.387246640617395e-05, + "loss": 9.7515, + "step": 10273 + }, + { + "epoch": 0.42824392480513523, + "grad_norm": 264.0, + "learning_rate": 6.386598126714501e-05, + "loss": 12.8129, + "step": 10274 + }, + { + "epoch": 0.4282856071026635, + "grad_norm": 1368.0, + "learning_rate": 6.385949587540735e-05, + "loss": 34.7503, + "step": 10275 + }, + { + "epoch": 0.4283272894001917, + "grad_norm": 158.0, + "learning_rate": 6.385301023107914e-05, + "loss": 9.7503, + "step": 10276 + }, + { + "epoch": 0.42836897169772, + "grad_norm": 776.0, + "learning_rate": 6.384652433427859e-05, + "loss": 21.7528, + "step": 10277 + }, + { + "epoch": 0.4284106539952482, + "grad_norm": 358.0, + "learning_rate": 6.384003818512391e-05, + "loss": 15.0006, + "step": 10278 + }, + { + "epoch": 0.42845233629277646, + "grad_norm": 374.0, + "learning_rate": 6.38335517837333e-05, + "loss": 16.3752, + "step": 10279 + }, + { + "epoch": 0.42849401859030467, + "grad_norm": 266.0, + "learning_rate": 6.382706513022497e-05, + "loss": 13.7504, + "step": 10280 + }, + { + "epoch": 0.42853570088783294, + "grad_norm": 450.0, + "learning_rate": 6.382057822471717e-05, + "loss": 15.6256, + "step": 10281 + }, + { + "epoch": 0.4285773831853612, + "grad_norm": 468.0, + "learning_rate": 6.38140910673281e-05, + "loss": 15.1276, + "step": 10282 + }, + { + "epoch": 0.4286190654828894, + "grad_norm": 241.0, + "learning_rate": 6.380760365817598e-05, + "loss": 11.8128, + "step": 10283 + }, + { + "epoch": 0.4286607477804177, + "grad_norm": 292.0, + "learning_rate": 6.380111599737908e-05, + "loss": 9.6891, + "step": 10284 + }, + { + "epoch": 0.4287024300779459, + "grad_norm": 1056.0, + "learning_rate": 6.37946280850556e-05, + "loss": 23.8802, + "step": 10285 + }, + { + "epoch": 0.42874411237547416, + "grad_norm": 356.0, + "learning_rate": 6.37881399213238e-05, + "loss": 14.6877, + "step": 10286 + }, + { + "epoch": 0.4287857946730024, + "grad_norm": 520.0, + "learning_rate": 6.378165150630192e-05, + "loss": 17.8756, + "step": 10287 + }, + { + "epoch": 0.42882747697053064, + "grad_norm": 524.0, + "learning_rate": 6.377516284010822e-05, + "loss": 17.2504, + "step": 10288 + }, + { + "epoch": 0.42886915926805885, + "grad_norm": 249.0, + "learning_rate": 6.376867392286096e-05, + "loss": 12.0011, + "step": 10289 + }, + { + "epoch": 0.4289108415655871, + "grad_norm": 288.0, + "learning_rate": 6.376218475467841e-05, + "loss": 13.7508, + "step": 10290 + }, + { + "epoch": 0.42895252386311533, + "grad_norm": 384.0, + "learning_rate": 6.37556953356788e-05, + "loss": 14.9417, + "step": 10291 + }, + { + "epoch": 0.4289942061606436, + "grad_norm": 744.0, + "learning_rate": 6.374920566598044e-05, + "loss": 20.8752, + "step": 10292 + }, + { + "epoch": 0.4290358884581718, + "grad_norm": 356.0, + "learning_rate": 6.374271574570156e-05, + "loss": 13.1878, + "step": 10293 + }, + { + "epoch": 0.4290775707557001, + "grad_norm": 98.0, + "learning_rate": 6.373622557496049e-05, + "loss": 9.8129, + "step": 10294 + }, + { + "epoch": 0.4291192530532283, + "grad_norm": 270.0, + "learning_rate": 6.372973515387548e-05, + "loss": 12.6254, + "step": 10295 + }, + { + "epoch": 0.42916093535075656, + "grad_norm": 282.0, + "learning_rate": 6.372324448256482e-05, + "loss": 12.8127, + "step": 10296 + }, + { + "epoch": 0.42920261764828477, + "grad_norm": 552.0, + "learning_rate": 6.371675356114683e-05, + "loss": 19.0004, + "step": 10297 + }, + { + "epoch": 0.42924429994581303, + "grad_norm": 218.0, + "learning_rate": 6.371026238973978e-05, + "loss": 12.0002, + "step": 10298 + }, + { + "epoch": 0.42928598224334125, + "grad_norm": 390.0, + "learning_rate": 6.370377096846196e-05, + "loss": 15.6882, + "step": 10299 + }, + { + "epoch": 0.4293276645408695, + "grad_norm": 438.0, + "learning_rate": 6.369727929743172e-05, + "loss": 15.5628, + "step": 10300 + }, + { + "epoch": 0.4293693468383977, + "grad_norm": 262.0, + "learning_rate": 6.369078737676735e-05, + "loss": 12.3127, + "step": 10301 + }, + { + "epoch": 0.429411029135926, + "grad_norm": 360.0, + "learning_rate": 6.368429520658716e-05, + "loss": 13.8127, + "step": 10302 + }, + { + "epoch": 0.4294527114334542, + "grad_norm": 520.0, + "learning_rate": 6.367780278700948e-05, + "loss": 17.1258, + "step": 10303 + }, + { + "epoch": 0.42949439373098247, + "grad_norm": 231.0, + "learning_rate": 6.367131011815261e-05, + "loss": 12.3132, + "step": 10304 + }, + { + "epoch": 0.4295360760285107, + "grad_norm": 476.0, + "learning_rate": 6.366481720013492e-05, + "loss": 16.7504, + "step": 10305 + }, + { + "epoch": 0.42957775832603895, + "grad_norm": 438.0, + "learning_rate": 6.365832403307472e-05, + "loss": 15.8133, + "step": 10306 + }, + { + "epoch": 0.42961944062356716, + "grad_norm": 684.0, + "learning_rate": 6.365183061709034e-05, + "loss": 21.8753, + "step": 10307 + }, + { + "epoch": 0.42966112292109543, + "grad_norm": 174.0, + "learning_rate": 6.364533695230015e-05, + "loss": 10.0634, + "step": 10308 + }, + { + "epoch": 0.42970280521862364, + "grad_norm": 392.0, + "learning_rate": 6.363884303882248e-05, + "loss": 14.5627, + "step": 10309 + }, + { + "epoch": 0.4297444875161519, + "grad_norm": 202.0, + "learning_rate": 6.363234887677568e-05, + "loss": 12.0656, + "step": 10310 + }, + { + "epoch": 0.4297861698136801, + "grad_norm": 644.0, + "learning_rate": 6.362585446627812e-05, + "loss": 21.7503, + "step": 10311 + }, + { + "epoch": 0.4298278521112084, + "grad_norm": 152.0, + "learning_rate": 6.361935980744813e-05, + "loss": 10.8127, + "step": 10312 + }, + { + "epoch": 0.4298695344087366, + "grad_norm": 438.0, + "learning_rate": 6.361286490040412e-05, + "loss": 16.2502, + "step": 10313 + }, + { + "epoch": 0.42991121670626486, + "grad_norm": 249.0, + "learning_rate": 6.360636974526444e-05, + "loss": 13.7504, + "step": 10314 + }, + { + "epoch": 0.4299528990037931, + "grad_norm": 476.0, + "learning_rate": 6.359987434214744e-05, + "loss": 16.2501, + "step": 10315 + }, + { + "epoch": 0.42999458130132134, + "grad_norm": 372.0, + "learning_rate": 6.359337869117156e-05, + "loss": 13.1264, + "step": 10316 + }, + { + "epoch": 0.43003626359884956, + "grad_norm": 220.0, + "learning_rate": 6.358688279245513e-05, + "loss": 12.7506, + "step": 10317 + }, + { + "epoch": 0.4300779458963778, + "grad_norm": 406.0, + "learning_rate": 6.358038664611654e-05, + "loss": 15.4392, + "step": 10318 + }, + { + "epoch": 0.43011962819390603, + "grad_norm": 448.0, + "learning_rate": 6.357389025227421e-05, + "loss": 16.7504, + "step": 10319 + }, + { + "epoch": 0.4301613104914343, + "grad_norm": 368.0, + "learning_rate": 6.356739361104653e-05, + "loss": 13.1877, + "step": 10320 + }, + { + "epoch": 0.4302029927889625, + "grad_norm": 258.0, + "learning_rate": 6.35608967225519e-05, + "loss": 12.8129, + "step": 10321 + }, + { + "epoch": 0.4302446750864908, + "grad_norm": 181.0, + "learning_rate": 6.355439958690871e-05, + "loss": 11.2501, + "step": 10322 + }, + { + "epoch": 0.430286357384019, + "grad_norm": 242.0, + "learning_rate": 6.354790220423539e-05, + "loss": 11.1253, + "step": 10323 + }, + { + "epoch": 0.43032803968154726, + "grad_norm": 380.0, + "learning_rate": 6.354140457465035e-05, + "loss": 15.2503, + "step": 10324 + }, + { + "epoch": 0.43036972197907547, + "grad_norm": 446.0, + "learning_rate": 6.3534906698272e-05, + "loss": 14.441, + "step": 10325 + }, + { + "epoch": 0.43041140427660374, + "grad_norm": 326.0, + "learning_rate": 6.352840857521878e-05, + "loss": 14.1877, + "step": 10326 + }, + { + "epoch": 0.43045308657413195, + "grad_norm": 796.0, + "learning_rate": 6.352191020560912e-05, + "loss": 21.0003, + "step": 10327 + }, + { + "epoch": 0.4304947688716602, + "grad_norm": 628.0, + "learning_rate": 6.351541158956144e-05, + "loss": 19.2503, + "step": 10328 + }, + { + "epoch": 0.4305364511691884, + "grad_norm": 215.0, + "learning_rate": 6.350891272719417e-05, + "loss": 11.6252, + "step": 10329 + }, + { + "epoch": 0.4305781334667167, + "grad_norm": 446.0, + "learning_rate": 6.350241361862579e-05, + "loss": 17.2502, + "step": 10330 + }, + { + "epoch": 0.4306198157642449, + "grad_norm": 516.0, + "learning_rate": 6.349591426397472e-05, + "loss": 18.2502, + "step": 10331 + }, + { + "epoch": 0.4306614980617732, + "grad_norm": 166.0, + "learning_rate": 6.348941466335941e-05, + "loss": 9.7502, + "step": 10332 + }, + { + "epoch": 0.4307031803593014, + "grad_norm": 346.0, + "learning_rate": 6.348291481689831e-05, + "loss": 14.5627, + "step": 10333 + }, + { + "epoch": 0.43074486265682965, + "grad_norm": 56.25, + "learning_rate": 6.347641472470991e-05, + "loss": 7.0943, + "step": 10334 + }, + { + "epoch": 0.43078654495435786, + "grad_norm": 212.0, + "learning_rate": 6.346991438691265e-05, + "loss": 11.5003, + "step": 10335 + }, + { + "epoch": 0.43082822725188613, + "grad_norm": 88.5, + "learning_rate": 6.346341380362499e-05, + "loss": 9.6885, + "step": 10336 + }, + { + "epoch": 0.43086990954941434, + "grad_norm": 410.0, + "learning_rate": 6.345691297496543e-05, + "loss": 15.626, + "step": 10337 + }, + { + "epoch": 0.4309115918469426, + "grad_norm": 183.0, + "learning_rate": 6.345041190105243e-05, + "loss": 10.7502, + "step": 10338 + }, + { + "epoch": 0.4309532741444708, + "grad_norm": 200.0, + "learning_rate": 6.344391058200449e-05, + "loss": 10.5627, + "step": 10339 + }, + { + "epoch": 0.4309949564419991, + "grad_norm": 116.0, + "learning_rate": 6.343740901794008e-05, + "loss": 9.7505, + "step": 10340 + }, + { + "epoch": 0.4310366387395273, + "grad_norm": 426.0, + "learning_rate": 6.34309072089777e-05, + "loss": 15.2503, + "step": 10341 + }, + { + "epoch": 0.43107832103705557, + "grad_norm": 780.0, + "learning_rate": 6.342440515523584e-05, + "loss": 22.3755, + "step": 10342 + }, + { + "epoch": 0.4311200033345838, + "grad_norm": 564.0, + "learning_rate": 6.3417902856833e-05, + "loss": 18.876, + "step": 10343 + }, + { + "epoch": 0.43116168563211205, + "grad_norm": 692.0, + "learning_rate": 6.34114003138877e-05, + "loss": 20.6256, + "step": 10344 + }, + { + "epoch": 0.43120336792964026, + "grad_norm": 326.0, + "learning_rate": 6.340489752651843e-05, + "loss": 13.1254, + "step": 10345 + }, + { + "epoch": 0.4312450502271685, + "grad_norm": 324.0, + "learning_rate": 6.339839449484371e-05, + "loss": 12.7515, + "step": 10346 + }, + { + "epoch": 0.43128673252469674, + "grad_norm": 374.0, + "learning_rate": 6.339189121898208e-05, + "loss": 15.6255, + "step": 10347 + }, + { + "epoch": 0.431328414822225, + "grad_norm": 852.0, + "learning_rate": 6.338538769905202e-05, + "loss": 22.5002, + "step": 10348 + }, + { + "epoch": 0.4313700971197532, + "grad_norm": 215.0, + "learning_rate": 6.33788839351721e-05, + "loss": 9.8758, + "step": 10349 + }, + { + "epoch": 0.4314117794172815, + "grad_norm": 438.0, + "learning_rate": 6.337237992746082e-05, + "loss": 13.3164, + "step": 10350 + }, + { + "epoch": 0.4314534617148097, + "grad_norm": 448.0, + "learning_rate": 6.336587567603673e-05, + "loss": 16.6255, + "step": 10351 + }, + { + "epoch": 0.43149514401233796, + "grad_norm": 744.0, + "learning_rate": 6.335937118101836e-05, + "loss": 20.2508, + "step": 10352 + }, + { + "epoch": 0.4315368263098662, + "grad_norm": 260.0, + "learning_rate": 6.33528664425243e-05, + "loss": 13.2503, + "step": 10353 + }, + { + "epoch": 0.43157850860739444, + "grad_norm": 568.0, + "learning_rate": 6.334636146067304e-05, + "loss": 18.0002, + "step": 10354 + }, + { + "epoch": 0.4316201909049227, + "grad_norm": 502.0, + "learning_rate": 6.333985623558315e-05, + "loss": 14.7539, + "step": 10355 + }, + { + "epoch": 0.4316618732024509, + "grad_norm": 304.0, + "learning_rate": 6.33333507673732e-05, + "loss": 13.1877, + "step": 10356 + }, + { + "epoch": 0.4317035554999792, + "grad_norm": 544.0, + "learning_rate": 6.332684505616175e-05, + "loss": 19.3753, + "step": 10357 + }, + { + "epoch": 0.4317452377975074, + "grad_norm": 374.0, + "learning_rate": 6.332033910206737e-05, + "loss": 14.5629, + "step": 10358 + }, + { + "epoch": 0.43178692009503566, + "grad_norm": 436.0, + "learning_rate": 6.331383290520862e-05, + "loss": 16.7503, + "step": 10359 + }, + { + "epoch": 0.4318286023925639, + "grad_norm": 604.0, + "learning_rate": 6.33073264657041e-05, + "loss": 20.7502, + "step": 10360 + }, + { + "epoch": 0.43187028469009214, + "grad_norm": 219.0, + "learning_rate": 6.330081978367238e-05, + "loss": 12.1264, + "step": 10361 + }, + { + "epoch": 0.43191196698762035, + "grad_norm": 820.0, + "learning_rate": 6.329431285923199e-05, + "loss": 24.1254, + "step": 10362 + }, + { + "epoch": 0.4319536492851486, + "grad_norm": 98.5, + "learning_rate": 6.328780569250161e-05, + "loss": 8.2502, + "step": 10363 + }, + { + "epoch": 0.43199533158267683, + "grad_norm": 968.0, + "learning_rate": 6.328129828359977e-05, + "loss": 24.6259, + "step": 10364 + }, + { + "epoch": 0.4320370138802051, + "grad_norm": 266.0, + "learning_rate": 6.32747906326451e-05, + "loss": 13.5003, + "step": 10365 + }, + { + "epoch": 0.4320786961777333, + "grad_norm": 680.0, + "learning_rate": 6.32682827397562e-05, + "loss": 16.8801, + "step": 10366 + }, + { + "epoch": 0.4321203784752616, + "grad_norm": 72.5, + "learning_rate": 6.326177460505167e-05, + "loss": 8.1254, + "step": 10367 + }, + { + "epoch": 0.4321620607727898, + "grad_norm": 318.0, + "learning_rate": 6.325526622865012e-05, + "loss": 13.3752, + "step": 10368 + }, + { + "epoch": 0.43220374307031806, + "grad_norm": 506.0, + "learning_rate": 6.324875761067015e-05, + "loss": 17.0003, + "step": 10369 + }, + { + "epoch": 0.43224542536784627, + "grad_norm": 173.0, + "learning_rate": 6.32422487512304e-05, + "loss": 10.4378, + "step": 10370 + }, + { + "epoch": 0.43228710766537454, + "grad_norm": 368.0, + "learning_rate": 6.32357396504495e-05, + "loss": 10.7503, + "step": 10371 + }, + { + "epoch": 0.43232878996290275, + "grad_norm": 460.0, + "learning_rate": 6.322923030844608e-05, + "loss": 16.8752, + "step": 10372 + }, + { + "epoch": 0.432370472260431, + "grad_norm": 304.0, + "learning_rate": 6.322272072533874e-05, + "loss": 11.7502, + "step": 10373 + }, + { + "epoch": 0.4324121545579592, + "grad_norm": 370.0, + "learning_rate": 6.321621090124616e-05, + "loss": 13.5628, + "step": 10374 + }, + { + "epoch": 0.4324538368554875, + "grad_norm": 652.0, + "learning_rate": 6.320970083628695e-05, + "loss": 19.7501, + "step": 10375 + }, + { + "epoch": 0.4324955191530157, + "grad_norm": 772.0, + "learning_rate": 6.320319053057976e-05, + "loss": 21.6251, + "step": 10376 + }, + { + "epoch": 0.432537201450544, + "grad_norm": 340.0, + "learning_rate": 6.319667998424327e-05, + "loss": 15.1264, + "step": 10377 + }, + { + "epoch": 0.4325788837480722, + "grad_norm": 107.5, + "learning_rate": 6.319016919739611e-05, + "loss": 7.1564, + "step": 10378 + }, + { + "epoch": 0.43262056604560045, + "grad_norm": 414.0, + "learning_rate": 6.318365817015695e-05, + "loss": 15.3135, + "step": 10379 + }, + { + "epoch": 0.43266224834312866, + "grad_norm": 468.0, + "learning_rate": 6.317714690264445e-05, + "loss": 15.7534, + "step": 10380 + }, + { + "epoch": 0.43270393064065693, + "grad_norm": 382.0, + "learning_rate": 6.317063539497727e-05, + "loss": 14.1877, + "step": 10381 + }, + { + "epoch": 0.43274561293818514, + "grad_norm": 139.0, + "learning_rate": 6.316412364727408e-05, + "loss": 8.7507, + "step": 10382 + }, + { + "epoch": 0.4327872952357134, + "grad_norm": 336.0, + "learning_rate": 6.31576116596536e-05, + "loss": 13.2502, + "step": 10383 + }, + { + "epoch": 0.4328289775332416, + "grad_norm": 62.75, + "learning_rate": 6.315109943223445e-05, + "loss": 7.469, + "step": 10384 + }, + { + "epoch": 0.4328706598307699, + "grad_norm": 328.0, + "learning_rate": 6.314458696513535e-05, + "loss": 13.2508, + "step": 10385 + }, + { + "epoch": 0.4329123421282981, + "grad_norm": 64.5, + "learning_rate": 6.3138074258475e-05, + "loss": 8.7504, + "step": 10386 + }, + { + "epoch": 0.43295402442582637, + "grad_norm": 430.0, + "learning_rate": 6.313156131237206e-05, + "loss": 15.8753, + "step": 10387 + }, + { + "epoch": 0.4329957067233546, + "grad_norm": 524.0, + "learning_rate": 6.312504812694526e-05, + "loss": 17.1273, + "step": 10388 + }, + { + "epoch": 0.43303738902088285, + "grad_norm": 139.0, + "learning_rate": 6.31185347023133e-05, + "loss": 9.1884, + "step": 10389 + }, + { + "epoch": 0.43307907131841106, + "grad_norm": 210.0, + "learning_rate": 6.311202103859487e-05, + "loss": 11.8757, + "step": 10390 + }, + { + "epoch": 0.4331207536159393, + "grad_norm": 264.0, + "learning_rate": 6.31055071359087e-05, + "loss": 12.5004, + "step": 10391 + }, + { + "epoch": 0.43316243591346754, + "grad_norm": 92.5, + "learning_rate": 6.309899299437349e-05, + "loss": 9.9392, + "step": 10392 + }, + { + "epoch": 0.4332041182109958, + "grad_norm": 328.0, + "learning_rate": 6.309247861410798e-05, + "loss": 13.0629, + "step": 10393 + }, + { + "epoch": 0.433245800508524, + "grad_norm": 636.0, + "learning_rate": 6.308596399523089e-05, + "loss": 18.3756, + "step": 10394 + }, + { + "epoch": 0.4332874828060523, + "grad_norm": 122.0, + "learning_rate": 6.307944913786093e-05, + "loss": 9.0631, + "step": 10395 + }, + { + "epoch": 0.4333291651035805, + "grad_norm": 217.0, + "learning_rate": 6.307293404211687e-05, + "loss": 11.3127, + "step": 10396 + }, + { + "epoch": 0.43337084740110876, + "grad_norm": 366.0, + "learning_rate": 6.306641870811741e-05, + "loss": 13.5627, + "step": 10397 + }, + { + "epoch": 0.433412529698637, + "grad_norm": 416.0, + "learning_rate": 6.30599031359813e-05, + "loss": 15.9385, + "step": 10398 + }, + { + "epoch": 0.43345421199616524, + "grad_norm": 784.0, + "learning_rate": 6.30533873258273e-05, + "loss": 23.8752, + "step": 10399 + }, + { + "epoch": 0.43349589429369345, + "grad_norm": 284.0, + "learning_rate": 6.304687127777415e-05, + "loss": 15.2504, + "step": 10400 + }, + { + "epoch": 0.4335375765912217, + "grad_norm": 556.0, + "learning_rate": 6.304035499194063e-05, + "loss": 19.6254, + "step": 10401 + }, + { + "epoch": 0.43357925888874993, + "grad_norm": 520.0, + "learning_rate": 6.303383846844548e-05, + "loss": 17.3755, + "step": 10402 + }, + { + "epoch": 0.4336209411862782, + "grad_norm": 704.0, + "learning_rate": 6.302732170740748e-05, + "loss": 20.8752, + "step": 10403 + }, + { + "epoch": 0.4336626234838064, + "grad_norm": 218.0, + "learning_rate": 6.302080470894536e-05, + "loss": 10.1253, + "step": 10404 + }, + { + "epoch": 0.4337043057813347, + "grad_norm": 204.0, + "learning_rate": 6.301428747317793e-05, + "loss": 10.5005, + "step": 10405 + }, + { + "epoch": 0.4337459880788629, + "grad_norm": 177.0, + "learning_rate": 6.300777000022396e-05, + "loss": 11.1253, + "step": 10406 + }, + { + "epoch": 0.43378767037639115, + "grad_norm": 488.0, + "learning_rate": 6.300125229020221e-05, + "loss": 16.6252, + "step": 10407 + }, + { + "epoch": 0.43382935267391937, + "grad_norm": 328.0, + "learning_rate": 6.299473434323151e-05, + "loss": 15.0012, + "step": 10408 + }, + { + "epoch": 0.43387103497144763, + "grad_norm": 145.0, + "learning_rate": 6.29882161594306e-05, + "loss": 8.8757, + "step": 10409 + }, + { + "epoch": 0.43391271726897584, + "grad_norm": 118.5, + "learning_rate": 6.29816977389183e-05, + "loss": 9.5003, + "step": 10410 + }, + { + "epoch": 0.4339543995665041, + "grad_norm": 100.0, + "learning_rate": 6.297517908181342e-05, + "loss": 8.7504, + "step": 10411 + }, + { + "epoch": 0.4339960818640323, + "grad_norm": 454.0, + "learning_rate": 6.296866018823473e-05, + "loss": 15.6252, + "step": 10412 + }, + { + "epoch": 0.4340377641615606, + "grad_norm": 386.0, + "learning_rate": 6.296214105830108e-05, + "loss": 13.1255, + "step": 10413 + }, + { + "epoch": 0.4340794464590888, + "grad_norm": 221.0, + "learning_rate": 6.295562169213124e-05, + "loss": 12.5003, + "step": 10414 + }, + { + "epoch": 0.43412112875661707, + "grad_norm": 298.0, + "learning_rate": 6.294910208984405e-05, + "loss": 12.5627, + "step": 10415 + }, + { + "epoch": 0.4341628110541453, + "grad_norm": 204.0, + "learning_rate": 6.294258225155832e-05, + "loss": 10.4381, + "step": 10416 + }, + { + "epoch": 0.43420449335167355, + "grad_norm": 442.0, + "learning_rate": 6.293606217739288e-05, + "loss": 15.2503, + "step": 10417 + }, + { + "epoch": 0.43424617564920176, + "grad_norm": 53.0, + "learning_rate": 6.292954186746657e-05, + "loss": 6.9065, + "step": 10418 + }, + { + "epoch": 0.43428785794673, + "grad_norm": 436.0, + "learning_rate": 6.29230213218982e-05, + "loss": 16.0003, + "step": 10419 + }, + { + "epoch": 0.43432954024425824, + "grad_norm": 258.0, + "learning_rate": 6.291650054080663e-05, + "loss": 12.3127, + "step": 10420 + }, + { + "epoch": 0.4343712225417865, + "grad_norm": 358.0, + "learning_rate": 6.290997952431069e-05, + "loss": 14.6256, + "step": 10421 + }, + { + "epoch": 0.4344129048393147, + "grad_norm": 185.0, + "learning_rate": 6.290345827252922e-05, + "loss": 10.938, + "step": 10422 + }, + { + "epoch": 0.434454587136843, + "grad_norm": 207.0, + "learning_rate": 6.289693678558109e-05, + "loss": 10.6258, + "step": 10423 + }, + { + "epoch": 0.4344962694343712, + "grad_norm": 274.0, + "learning_rate": 6.289041506358513e-05, + "loss": 12.5002, + "step": 10424 + }, + { + "epoch": 0.43453795173189946, + "grad_norm": 474.0, + "learning_rate": 6.28838931066602e-05, + "loss": 17.5003, + "step": 10425 + }, + { + "epoch": 0.4345796340294277, + "grad_norm": 151.0, + "learning_rate": 6.287737091492519e-05, + "loss": 9.7502, + "step": 10426 + }, + { + "epoch": 0.43462131632695594, + "grad_norm": 40.75, + "learning_rate": 6.287084848849894e-05, + "loss": 7.2503, + "step": 10427 + }, + { + "epoch": 0.4346629986244842, + "grad_norm": 161.0, + "learning_rate": 6.286432582750034e-05, + "loss": 8.3141, + "step": 10428 + }, + { + "epoch": 0.4347046809220124, + "grad_norm": 138.0, + "learning_rate": 6.285780293204827e-05, + "loss": 10.1258, + "step": 10429 + }, + { + "epoch": 0.4347463632195407, + "grad_norm": 310.0, + "learning_rate": 6.28512798022616e-05, + "loss": 12.6878, + "step": 10430 + }, + { + "epoch": 0.4347880455170689, + "grad_norm": 199.0, + "learning_rate": 6.28447564382592e-05, + "loss": 11.2502, + "step": 10431 + }, + { + "epoch": 0.43482972781459717, + "grad_norm": 196.0, + "learning_rate": 6.283823284015999e-05, + "loss": 10.9377, + "step": 10432 + }, + { + "epoch": 0.4348714101121254, + "grad_norm": 112.0, + "learning_rate": 6.283170900808284e-05, + "loss": 5.7504, + "step": 10433 + }, + { + "epoch": 0.43491309240965365, + "grad_norm": 442.0, + "learning_rate": 6.282518494214665e-05, + "loss": 15.6879, + "step": 10434 + }, + { + "epoch": 0.43495477470718186, + "grad_norm": 163.0, + "learning_rate": 6.281866064247033e-05, + "loss": 10.3754, + "step": 10435 + }, + { + "epoch": 0.4349964570047101, + "grad_norm": 2192.0, + "learning_rate": 6.281213610917278e-05, + "loss": 41.7507, + "step": 10436 + }, + { + "epoch": 0.43503813930223834, + "grad_norm": 53.25, + "learning_rate": 6.280561134237292e-05, + "loss": 7.5946, + "step": 10437 + }, + { + "epoch": 0.4350798215997666, + "grad_norm": 270.0, + "learning_rate": 6.279908634218963e-05, + "loss": 11.8129, + "step": 10438 + }, + { + "epoch": 0.4351215038972948, + "grad_norm": 364.0, + "learning_rate": 6.279256110874187e-05, + "loss": 14.6254, + "step": 10439 + }, + { + "epoch": 0.4351631861948231, + "grad_norm": 272.0, + "learning_rate": 6.278603564214855e-05, + "loss": 12.8752, + "step": 10440 + }, + { + "epoch": 0.4352048684923513, + "grad_norm": 276.0, + "learning_rate": 6.277950994252861e-05, + "loss": 13.1885, + "step": 10441 + }, + { + "epoch": 0.43524655078987956, + "grad_norm": 249.0, + "learning_rate": 6.277298401000095e-05, + "loss": 12.6255, + "step": 10442 + }, + { + "epoch": 0.43528823308740777, + "grad_norm": 116.5, + "learning_rate": 6.276645784468453e-05, + "loss": 10.5632, + "step": 10443 + }, + { + "epoch": 0.43532991538493604, + "grad_norm": 492.0, + "learning_rate": 6.275993144669828e-05, + "loss": 17.0007, + "step": 10444 + }, + { + "epoch": 0.43537159768246425, + "grad_norm": 402.0, + "learning_rate": 6.275340481616114e-05, + "loss": 16.6281, + "step": 10445 + }, + { + "epoch": 0.4354132799799925, + "grad_norm": 154.0, + "learning_rate": 6.274687795319208e-05, + "loss": 7.5009, + "step": 10446 + }, + { + "epoch": 0.43545496227752073, + "grad_norm": 336.0, + "learning_rate": 6.274035085791003e-05, + "loss": 13.1278, + "step": 10447 + }, + { + "epoch": 0.435496644575049, + "grad_norm": 382.0, + "learning_rate": 6.273382353043396e-05, + "loss": 16.0024, + "step": 10448 + }, + { + "epoch": 0.4355383268725772, + "grad_norm": 298.0, + "learning_rate": 6.272729597088281e-05, + "loss": 12.2502, + "step": 10449 + }, + { + "epoch": 0.4355800091701055, + "grad_norm": 288.0, + "learning_rate": 6.272076817937556e-05, + "loss": 13.0628, + "step": 10450 + }, + { + "epoch": 0.4356216914676337, + "grad_norm": 524.0, + "learning_rate": 6.27142401560312e-05, + "loss": 18.6254, + "step": 10451 + }, + { + "epoch": 0.43566337376516195, + "grad_norm": 414.0, + "learning_rate": 6.270771190096867e-05, + "loss": 15.5626, + "step": 10452 + }, + { + "epoch": 0.43570505606269017, + "grad_norm": 596.0, + "learning_rate": 6.270118341430697e-05, + "loss": 18.8754, + "step": 10453 + }, + { + "epoch": 0.43574673836021843, + "grad_norm": 454.0, + "learning_rate": 6.269465469616507e-05, + "loss": 15.7516, + "step": 10454 + }, + { + "epoch": 0.43578842065774664, + "grad_norm": 221.0, + "learning_rate": 6.268812574666196e-05, + "loss": 12.1878, + "step": 10455 + }, + { + "epoch": 0.4358301029552749, + "grad_norm": 227.0, + "learning_rate": 6.268159656591664e-05, + "loss": 13.0635, + "step": 10456 + }, + { + "epoch": 0.4358717852528031, + "grad_norm": 1128.0, + "learning_rate": 6.267506715404809e-05, + "loss": 26.7508, + "step": 10457 + }, + { + "epoch": 0.4359134675503314, + "grad_norm": 188.0, + "learning_rate": 6.266853751117533e-05, + "loss": 10.0002, + "step": 10458 + }, + { + "epoch": 0.4359551498478596, + "grad_norm": 274.0, + "learning_rate": 6.266200763741733e-05, + "loss": 14.5003, + "step": 10459 + }, + { + "epoch": 0.43599683214538787, + "grad_norm": 183.0, + "learning_rate": 6.265547753289313e-05, + "loss": 10.563, + "step": 10460 + }, + { + "epoch": 0.4360385144429161, + "grad_norm": 426.0, + "learning_rate": 6.264894719772172e-05, + "loss": 15.8127, + "step": 10461 + }, + { + "epoch": 0.43608019674044435, + "grad_norm": 143.0, + "learning_rate": 6.264241663202212e-05, + "loss": 9.6262, + "step": 10462 + }, + { + "epoch": 0.43612187903797256, + "grad_norm": 260.0, + "learning_rate": 6.263588583591337e-05, + "loss": 12.5627, + "step": 10463 + }, + { + "epoch": 0.4361635613355008, + "grad_norm": 412.0, + "learning_rate": 6.262935480951446e-05, + "loss": 15.5002, + "step": 10464 + }, + { + "epoch": 0.43620524363302904, + "grad_norm": 62.75, + "learning_rate": 6.262282355294445e-05, + "loss": 9.0628, + "step": 10465 + }, + { + "epoch": 0.4362469259305573, + "grad_norm": 176.0, + "learning_rate": 6.261629206632235e-05, + "loss": 10.6252, + "step": 10466 + }, + { + "epoch": 0.4362886082280855, + "grad_norm": 181.0, + "learning_rate": 6.260976034976723e-05, + "loss": 10.7506, + "step": 10467 + }, + { + "epoch": 0.4363302905256138, + "grad_norm": 158.0, + "learning_rate": 6.260322840339809e-05, + "loss": 10.3752, + "step": 10468 + }, + { + "epoch": 0.436371972823142, + "grad_norm": 418.0, + "learning_rate": 6.259669622733401e-05, + "loss": 15.1255, + "step": 10469 + }, + { + "epoch": 0.43641365512067026, + "grad_norm": 237.0, + "learning_rate": 6.2590163821694e-05, + "loss": 11.5008, + "step": 10470 + }, + { + "epoch": 0.4364553374181985, + "grad_norm": 199.0, + "learning_rate": 6.258363118659716e-05, + "loss": 10.4377, + "step": 10471 + }, + { + "epoch": 0.43649701971572674, + "grad_norm": 294.0, + "learning_rate": 6.25770983221625e-05, + "loss": 11.3129, + "step": 10472 + }, + { + "epoch": 0.43653870201325495, + "grad_norm": 234.0, + "learning_rate": 6.257056522850913e-05, + "loss": 12.5007, + "step": 10473 + }, + { + "epoch": 0.4365803843107832, + "grad_norm": 148.0, + "learning_rate": 6.256403190575607e-05, + "loss": 10.2504, + "step": 10474 + }, + { + "epoch": 0.43662206660831143, + "grad_norm": 494.0, + "learning_rate": 6.255749835402243e-05, + "loss": 18.2503, + "step": 10475 + }, + { + "epoch": 0.4366637489058397, + "grad_norm": 1056.0, + "learning_rate": 6.255096457342725e-05, + "loss": 24.6301, + "step": 10476 + }, + { + "epoch": 0.4367054312033679, + "grad_norm": 286.0, + "learning_rate": 6.254443056408963e-05, + "loss": 14.2507, + "step": 10477 + }, + { + "epoch": 0.4367471135008962, + "grad_norm": 964.0, + "learning_rate": 6.253789632612868e-05, + "loss": 24.8752, + "step": 10478 + }, + { + "epoch": 0.4367887957984244, + "grad_norm": 676.0, + "learning_rate": 6.253136185966342e-05, + "loss": 19.5002, + "step": 10479 + }, + { + "epoch": 0.43683047809595266, + "grad_norm": 214.0, + "learning_rate": 6.252482716481299e-05, + "loss": 10.8129, + "step": 10480 + }, + { + "epoch": 0.43687216039348087, + "grad_norm": 448.0, + "learning_rate": 6.251829224169649e-05, + "loss": 15.6252, + "step": 10481 + }, + { + "epoch": 0.43691384269100914, + "grad_norm": 290.0, + "learning_rate": 6.251175709043297e-05, + "loss": 13.7504, + "step": 10482 + }, + { + "epoch": 0.43695552498853735, + "grad_norm": 180.0, + "learning_rate": 6.250522171114159e-05, + "loss": 11.1253, + "step": 10483 + }, + { + "epoch": 0.4369972072860656, + "grad_norm": 314.0, + "learning_rate": 6.249868610394144e-05, + "loss": 13.3127, + "step": 10484 + }, + { + "epoch": 0.4370388895835938, + "grad_norm": 404.0, + "learning_rate": 6.249215026895162e-05, + "loss": 17.0004, + "step": 10485 + }, + { + "epoch": 0.4370805718811221, + "grad_norm": 388.0, + "learning_rate": 6.248561420629124e-05, + "loss": 15.0627, + "step": 10486 + }, + { + "epoch": 0.4371222541786503, + "grad_norm": 185.0, + "learning_rate": 6.247907791607943e-05, + "loss": 11.0004, + "step": 10487 + }, + { + "epoch": 0.43716393647617857, + "grad_norm": 100.0, + "learning_rate": 6.247254139843533e-05, + "loss": 6.6253, + "step": 10488 + }, + { + "epoch": 0.4372056187737068, + "grad_norm": 234.0, + "learning_rate": 6.246600465347805e-05, + "loss": 11.3128, + "step": 10489 + }, + { + "epoch": 0.43724730107123505, + "grad_norm": 83.0, + "learning_rate": 6.245946768132674e-05, + "loss": 10.0002, + "step": 10490 + }, + { + "epoch": 0.43728898336876326, + "grad_norm": 384.0, + "learning_rate": 6.24529304821005e-05, + "loss": 13.8128, + "step": 10491 + }, + { + "epoch": 0.43733066566629153, + "grad_norm": 212.0, + "learning_rate": 6.244639305591852e-05, + "loss": 11.3757, + "step": 10492 + }, + { + "epoch": 0.43737234796381974, + "grad_norm": 172.0, + "learning_rate": 6.243985540289991e-05, + "loss": 10.0628, + "step": 10493 + }, + { + "epoch": 0.437414030261348, + "grad_norm": 256.0, + "learning_rate": 6.243331752316384e-05, + "loss": 11.8131, + "step": 10494 + }, + { + "epoch": 0.4374557125588762, + "grad_norm": 812.0, + "learning_rate": 6.242677941682945e-05, + "loss": 26.5003, + "step": 10495 + }, + { + "epoch": 0.4374973948564045, + "grad_norm": 388.0, + "learning_rate": 6.242024108401591e-05, + "loss": 15.1252, + "step": 10496 + }, + { + "epoch": 0.4375390771539327, + "grad_norm": 96.5, + "learning_rate": 6.241370252484235e-05, + "loss": 8.0005, + "step": 10497 + }, + { + "epoch": 0.43758075945146097, + "grad_norm": 236.0, + "learning_rate": 6.240716373942798e-05, + "loss": 11.8755, + "step": 10498 + }, + { + "epoch": 0.4376224417489892, + "grad_norm": 374.0, + "learning_rate": 6.240062472789195e-05, + "loss": 15.5628, + "step": 10499 + }, + { + "epoch": 0.43766412404651744, + "grad_norm": 274.0, + "learning_rate": 6.239408549035343e-05, + "loss": 11.6879, + "step": 10500 + }, + { + "epoch": 0.4377058063440457, + "grad_norm": 246.0, + "learning_rate": 6.23875460269316e-05, + "loss": 10.0631, + "step": 10501 + }, + { + "epoch": 0.4377474886415739, + "grad_norm": 414.0, + "learning_rate": 6.238100633774563e-05, + "loss": 14.6253, + "step": 10502 + }, + { + "epoch": 0.4377891709391022, + "grad_norm": 83.0, + "learning_rate": 6.237446642291473e-05, + "loss": 9.0006, + "step": 10503 + }, + { + "epoch": 0.4378308532366304, + "grad_norm": 356.0, + "learning_rate": 6.23679262825581e-05, + "loss": 14.5002, + "step": 10504 + }, + { + "epoch": 0.43787253553415867, + "grad_norm": 306.0, + "learning_rate": 6.236138591679491e-05, + "loss": 12.0638, + "step": 10505 + }, + { + "epoch": 0.4379142178316869, + "grad_norm": 251.0, + "learning_rate": 6.235484532574433e-05, + "loss": 12.2502, + "step": 10506 + }, + { + "epoch": 0.43795590012921515, + "grad_norm": 308.0, + "learning_rate": 6.234830450952563e-05, + "loss": 14.9384, + "step": 10507 + }, + { + "epoch": 0.43799758242674336, + "grad_norm": 215.0, + "learning_rate": 6.234176346825798e-05, + "loss": 10.2502, + "step": 10508 + }, + { + "epoch": 0.4380392647242716, + "grad_norm": 386.0, + "learning_rate": 6.233522220206059e-05, + "loss": 14.8752, + "step": 10509 + }, + { + "epoch": 0.43808094702179984, + "grad_norm": 368.0, + "learning_rate": 6.232868071105269e-05, + "loss": 14.4377, + "step": 10510 + }, + { + "epoch": 0.4381226293193281, + "grad_norm": 184.0, + "learning_rate": 6.232213899535348e-05, + "loss": 8.5004, + "step": 10511 + }, + { + "epoch": 0.4381643116168563, + "grad_norm": 163.0, + "learning_rate": 6.231559705508219e-05, + "loss": 9.5628, + "step": 10512 + }, + { + "epoch": 0.4382059939143846, + "grad_norm": 486.0, + "learning_rate": 6.230905489035805e-05, + "loss": 17.2504, + "step": 10513 + }, + { + "epoch": 0.4382476762119128, + "grad_norm": 430.0, + "learning_rate": 6.23025125013003e-05, + "loss": 16.6259, + "step": 10514 + }, + { + "epoch": 0.43828935850944106, + "grad_norm": 454.0, + "learning_rate": 6.229596988802817e-05, + "loss": 17.6257, + "step": 10515 + }, + { + "epoch": 0.4383310408069693, + "grad_norm": 140.0, + "learning_rate": 6.228942705066088e-05, + "loss": 10.0008, + "step": 10516 + }, + { + "epoch": 0.43837272310449754, + "grad_norm": 266.0, + "learning_rate": 6.22828839893177e-05, + "loss": 11.3133, + "step": 10517 + }, + { + "epoch": 0.43841440540202575, + "grad_norm": 224.0, + "learning_rate": 6.227634070411787e-05, + "loss": 11.7503, + "step": 10518 + }, + { + "epoch": 0.438456087699554, + "grad_norm": 400.0, + "learning_rate": 6.226979719518065e-05, + "loss": 15.6253, + "step": 10519 + }, + { + "epoch": 0.43849776999708223, + "grad_norm": 239.0, + "learning_rate": 6.226325346262529e-05, + "loss": 12.0627, + "step": 10520 + }, + { + "epoch": 0.4385394522946105, + "grad_norm": 412.0, + "learning_rate": 6.225670950657102e-05, + "loss": 14.6254, + "step": 10521 + }, + { + "epoch": 0.4385811345921387, + "grad_norm": 1016.0, + "learning_rate": 6.225016532713716e-05, + "loss": 27.8759, + "step": 10522 + }, + { + "epoch": 0.438622816889667, + "grad_norm": 221.0, + "learning_rate": 6.224362092444293e-05, + "loss": 11.2508, + "step": 10523 + }, + { + "epoch": 0.4386644991871952, + "grad_norm": 228.0, + "learning_rate": 6.223707629860763e-05, + "loss": 11.2503, + "step": 10524 + }, + { + "epoch": 0.43870618148472346, + "grad_norm": 249.0, + "learning_rate": 6.223053144975053e-05, + "loss": 12.2504, + "step": 10525 + }, + { + "epoch": 0.43874786378225167, + "grad_norm": 217.0, + "learning_rate": 6.22239863779909e-05, + "loss": 9.4378, + "step": 10526 + }, + { + "epoch": 0.43878954607977994, + "grad_norm": 63.25, + "learning_rate": 6.221744108344802e-05, + "loss": 6.2504, + "step": 10527 + }, + { + "epoch": 0.43883122837730815, + "grad_norm": 282.0, + "learning_rate": 6.221089556624122e-05, + "loss": 15.1252, + "step": 10528 + }, + { + "epoch": 0.4388729106748364, + "grad_norm": 496.0, + "learning_rate": 6.220434982648975e-05, + "loss": 17.5003, + "step": 10529 + }, + { + "epoch": 0.4389145929723646, + "grad_norm": 444.0, + "learning_rate": 6.219780386431293e-05, + "loss": 15.9381, + "step": 10530 + }, + { + "epoch": 0.4389562752698929, + "grad_norm": 210.0, + "learning_rate": 6.219125767983004e-05, + "loss": 12.3129, + "step": 10531 + }, + { + "epoch": 0.4389979575674211, + "grad_norm": 402.0, + "learning_rate": 6.218471127316042e-05, + "loss": 15.6252, + "step": 10532 + }, + { + "epoch": 0.43903963986494937, + "grad_norm": 664.0, + "learning_rate": 6.217816464442333e-05, + "loss": 20.1254, + "step": 10533 + }, + { + "epoch": 0.4390813221624776, + "grad_norm": 155.0, + "learning_rate": 6.217161779373812e-05, + "loss": 10.5003, + "step": 10534 + }, + { + "epoch": 0.43912300446000585, + "grad_norm": 352.0, + "learning_rate": 6.21650707212241e-05, + "loss": 13.6877, + "step": 10535 + }, + { + "epoch": 0.43916468675753406, + "grad_norm": 146.0, + "learning_rate": 6.215852342700056e-05, + "loss": 9.2537, + "step": 10536 + }, + { + "epoch": 0.43920636905506233, + "grad_norm": 336.0, + "learning_rate": 6.215197591118689e-05, + "loss": 15.438, + "step": 10537 + }, + { + "epoch": 0.43924805135259054, + "grad_norm": 500.0, + "learning_rate": 6.214542817390235e-05, + "loss": 16.6261, + "step": 10538 + }, + { + "epoch": 0.4392897336501188, + "grad_norm": 183.0, + "learning_rate": 6.213888021526631e-05, + "loss": 11.2502, + "step": 10539 + }, + { + "epoch": 0.439331415947647, + "grad_norm": 676.0, + "learning_rate": 6.21323320353981e-05, + "loss": 20.6252, + "step": 10540 + }, + { + "epoch": 0.4393730982451753, + "grad_norm": 716.0, + "learning_rate": 6.212578363441707e-05, + "loss": 18.2564, + "step": 10541 + }, + { + "epoch": 0.4394147805427035, + "grad_norm": 348.0, + "learning_rate": 6.211923501244255e-05, + "loss": 13.5641, + "step": 10542 + }, + { + "epoch": 0.43945646284023177, + "grad_norm": 1536.0, + "learning_rate": 6.21126861695939e-05, + "loss": 30.6284, + "step": 10543 + }, + { + "epoch": 0.43949814513776, + "grad_norm": 210.0, + "learning_rate": 6.210613710599047e-05, + "loss": 9.5001, + "step": 10544 + }, + { + "epoch": 0.43953982743528824, + "grad_norm": 1448.0, + "learning_rate": 6.209958782175162e-05, + "loss": 28.2503, + "step": 10545 + }, + { + "epoch": 0.43958150973281646, + "grad_norm": 976.0, + "learning_rate": 6.20930383169967e-05, + "loss": 30.0001, + "step": 10546 + }, + { + "epoch": 0.4396231920303447, + "grad_norm": 644.0, + "learning_rate": 6.208648859184508e-05, + "loss": 20.7502, + "step": 10547 + }, + { + "epoch": 0.43966487432787293, + "grad_norm": 904.0, + "learning_rate": 6.207993864641614e-05, + "loss": 27.1267, + "step": 10548 + }, + { + "epoch": 0.4397065566254012, + "grad_norm": 348.0, + "learning_rate": 6.207338848082924e-05, + "loss": 15.1877, + "step": 10549 + }, + { + "epoch": 0.4397482389229294, + "grad_norm": 396.0, + "learning_rate": 6.206683809520378e-05, + "loss": 16.8759, + "step": 10550 + }, + { + "epoch": 0.4397899212204577, + "grad_norm": 87.0, + "learning_rate": 6.206028748965908e-05, + "loss": 8.813, + "step": 10551 + }, + { + "epoch": 0.4398316035179859, + "grad_norm": 600.0, + "learning_rate": 6.205373666431462e-05, + "loss": 17.2512, + "step": 10552 + }, + { + "epoch": 0.43987328581551416, + "grad_norm": 446.0, + "learning_rate": 6.204718561928971e-05, + "loss": 16.0003, + "step": 10553 + }, + { + "epoch": 0.43991496811304237, + "grad_norm": 366.0, + "learning_rate": 6.204063435470378e-05, + "loss": 14.7503, + "step": 10554 + }, + { + "epoch": 0.43995665041057064, + "grad_norm": 192.0, + "learning_rate": 6.203408287067623e-05, + "loss": 7.6882, + "step": 10555 + }, + { + "epoch": 0.43999833270809885, + "grad_norm": 105.0, + "learning_rate": 6.202753116732645e-05, + "loss": 10.0638, + "step": 10556 + }, + { + "epoch": 0.4400400150056271, + "grad_norm": 176.0, + "learning_rate": 6.202097924477383e-05, + "loss": 11.4377, + "step": 10557 + }, + { + "epoch": 0.44008169730315533, + "grad_norm": 398.0, + "learning_rate": 6.201442710313782e-05, + "loss": 15.1879, + "step": 10558 + }, + { + "epoch": 0.4401233796006836, + "grad_norm": 85.5, + "learning_rate": 6.20078747425378e-05, + "loss": 9.4379, + "step": 10559 + }, + { + "epoch": 0.4401650618982118, + "grad_norm": 230.0, + "learning_rate": 6.200132216309319e-05, + "loss": 12.2504, + "step": 10560 + }, + { + "epoch": 0.4402067441957401, + "grad_norm": 500.0, + "learning_rate": 6.199476936492342e-05, + "loss": 16.8755, + "step": 10561 + }, + { + "epoch": 0.4402484264932683, + "grad_norm": 183.0, + "learning_rate": 6.198821634814791e-05, + "loss": 10.3135, + "step": 10562 + }, + { + "epoch": 0.44029010879079655, + "grad_norm": 660.0, + "learning_rate": 6.19816631128861e-05, + "loss": 20.7502, + "step": 10563 + }, + { + "epoch": 0.44033179108832476, + "grad_norm": 740.0, + "learning_rate": 6.197510965925741e-05, + "loss": 21.8752, + "step": 10564 + }, + { + "epoch": 0.44037347338585303, + "grad_norm": 110.5, + "learning_rate": 6.196855598738128e-05, + "loss": 9.4377, + "step": 10565 + }, + { + "epoch": 0.44041515568338124, + "grad_norm": 428.0, + "learning_rate": 6.196200209737716e-05, + "loss": 15.2513, + "step": 10566 + }, + { + "epoch": 0.4404568379809095, + "grad_norm": 1072.0, + "learning_rate": 6.195544798936449e-05, + "loss": 25.8783, + "step": 10567 + }, + { + "epoch": 0.4404985202784377, + "grad_norm": 320.0, + "learning_rate": 6.194889366346273e-05, + "loss": 14.8127, + "step": 10568 + }, + { + "epoch": 0.440540202575966, + "grad_norm": 944.0, + "learning_rate": 6.19423391197913e-05, + "loss": 25.7505, + "step": 10569 + }, + { + "epoch": 0.4405818848734942, + "grad_norm": 258.0, + "learning_rate": 6.193578435846969e-05, + "loss": 11.9377, + "step": 10570 + }, + { + "epoch": 0.44062356717102247, + "grad_norm": 500.0, + "learning_rate": 6.192922937961736e-05, + "loss": 17.5002, + "step": 10571 + }, + { + "epoch": 0.4406652494685507, + "grad_norm": 580.0, + "learning_rate": 6.192267418335375e-05, + "loss": 15.815, + "step": 10572 + }, + { + "epoch": 0.44070693176607895, + "grad_norm": 460.0, + "learning_rate": 6.191611876979837e-05, + "loss": 15.5001, + "step": 10573 + }, + { + "epoch": 0.4407486140636072, + "grad_norm": 318.0, + "learning_rate": 6.190956313907065e-05, + "loss": 14.8778, + "step": 10574 + }, + { + "epoch": 0.4407902963611354, + "grad_norm": 292.0, + "learning_rate": 6.190300729129008e-05, + "loss": 13.1251, + "step": 10575 + }, + { + "epoch": 0.4408319786586637, + "grad_norm": 426.0, + "learning_rate": 6.189645122657616e-05, + "loss": 16.501, + "step": 10576 + }, + { + "epoch": 0.4408736609561919, + "grad_norm": 1576.0, + "learning_rate": 6.188989494504836e-05, + "loss": 32.255, + "step": 10577 + }, + { + "epoch": 0.44091534325372017, + "grad_norm": 252.0, + "learning_rate": 6.188333844682615e-05, + "loss": 11.6877, + "step": 10578 + }, + { + "epoch": 0.4409570255512484, + "grad_norm": 132.0, + "learning_rate": 6.187678173202905e-05, + "loss": 9.0006, + "step": 10579 + }, + { + "epoch": 0.44099870784877665, + "grad_norm": 406.0, + "learning_rate": 6.187022480077655e-05, + "loss": 13.9378, + "step": 10580 + }, + { + "epoch": 0.44104039014630486, + "grad_norm": 322.0, + "learning_rate": 6.186366765318813e-05, + "loss": 13.1877, + "step": 10581 + }, + { + "epoch": 0.44108207244383313, + "grad_norm": 175.0, + "learning_rate": 6.185711028938334e-05, + "loss": 10.1255, + "step": 10582 + }, + { + "epoch": 0.44112375474136134, + "grad_norm": 326.0, + "learning_rate": 6.185055270948167e-05, + "loss": 14.5005, + "step": 10583 + }, + { + "epoch": 0.4411654370388896, + "grad_norm": 205.0, + "learning_rate": 6.18439949136026e-05, + "loss": 11.0021, + "step": 10584 + }, + { + "epoch": 0.4412071193364178, + "grad_norm": 255.0, + "learning_rate": 6.18374369018657e-05, + "loss": 13.0636, + "step": 10585 + }, + { + "epoch": 0.4412488016339461, + "grad_norm": 254.0, + "learning_rate": 6.183087867439043e-05, + "loss": 12.4378, + "step": 10586 + }, + { + "epoch": 0.4412904839314743, + "grad_norm": 183.0, + "learning_rate": 6.182432023129636e-05, + "loss": 9.8137, + "step": 10587 + }, + { + "epoch": 0.44133216622900256, + "grad_norm": 724.0, + "learning_rate": 6.181776157270302e-05, + "loss": 20.127, + "step": 10588 + }, + { + "epoch": 0.4413738485265308, + "grad_norm": 596.0, + "learning_rate": 6.18112026987299e-05, + "loss": 19.1276, + "step": 10589 + }, + { + "epoch": 0.44141553082405904, + "grad_norm": 752.0, + "learning_rate": 6.180464360949658e-05, + "loss": 21.5003, + "step": 10590 + }, + { + "epoch": 0.44145721312158726, + "grad_norm": 97.5, + "learning_rate": 6.179808430512256e-05, + "loss": 7.8754, + "step": 10591 + }, + { + "epoch": 0.4414988954191155, + "grad_norm": 592.0, + "learning_rate": 6.179152478572743e-05, + "loss": 16.2548, + "step": 10592 + }, + { + "epoch": 0.44154057771664373, + "grad_norm": 1896.0, + "learning_rate": 6.17849650514307e-05, + "loss": 37.2503, + "step": 10593 + }, + { + "epoch": 0.441582260014172, + "grad_norm": 276.0, + "learning_rate": 6.177840510235195e-05, + "loss": 12.9378, + "step": 10594 + }, + { + "epoch": 0.4416239423117002, + "grad_norm": 254.0, + "learning_rate": 6.17718449386107e-05, + "loss": 13.5627, + "step": 10595 + }, + { + "epoch": 0.4416656246092285, + "grad_norm": 366.0, + "learning_rate": 6.176528456032656e-05, + "loss": 14.6878, + "step": 10596 + }, + { + "epoch": 0.4417073069067567, + "grad_norm": 350.0, + "learning_rate": 6.175872396761904e-05, + "loss": 15.4379, + "step": 10597 + }, + { + "epoch": 0.44174898920428496, + "grad_norm": 472.0, + "learning_rate": 6.175216316060773e-05, + "loss": 16.2504, + "step": 10598 + }, + { + "epoch": 0.44179067150181317, + "grad_norm": 126.5, + "learning_rate": 6.174560213941222e-05, + "loss": 9.0007, + "step": 10599 + }, + { + "epoch": 0.44183235379934144, + "grad_norm": 676.0, + "learning_rate": 6.173904090415205e-05, + "loss": 19.5011, + "step": 10600 + }, + { + "epoch": 0.44187403609686965, + "grad_norm": 142.0, + "learning_rate": 6.173247945494684e-05, + "loss": 11.6888, + "step": 10601 + }, + { + "epoch": 0.4419157183943979, + "grad_norm": 1544.0, + "learning_rate": 6.172591779191614e-05, + "loss": 35.2537, + "step": 10602 + }, + { + "epoch": 0.44195740069192613, + "grad_norm": 362.0, + "learning_rate": 6.171935591517954e-05, + "loss": 14.7503, + "step": 10603 + }, + { + "epoch": 0.4419990829894544, + "grad_norm": 424.0, + "learning_rate": 6.171279382485665e-05, + "loss": 15.7503, + "step": 10604 + }, + { + "epoch": 0.4420407652869826, + "grad_norm": 604.0, + "learning_rate": 6.170623152106704e-05, + "loss": 16.8754, + "step": 10605 + }, + { + "epoch": 0.4420824475845109, + "grad_norm": 432.0, + "learning_rate": 6.169966900393033e-05, + "loss": 16.2502, + "step": 10606 + }, + { + "epoch": 0.4421241298820391, + "grad_norm": 235.0, + "learning_rate": 6.169310627356611e-05, + "loss": 10.7504, + "step": 10607 + }, + { + "epoch": 0.44216581217956735, + "grad_norm": 126.0, + "learning_rate": 6.168654333009399e-05, + "loss": 9.6256, + "step": 10608 + }, + { + "epoch": 0.44220749447709556, + "grad_norm": 192.0, + "learning_rate": 6.167998017363359e-05, + "loss": 11.4381, + "step": 10609 + }, + { + "epoch": 0.44224917677462383, + "grad_norm": 636.0, + "learning_rate": 6.167341680430451e-05, + "loss": 20.3781, + "step": 10610 + }, + { + "epoch": 0.44229085907215204, + "grad_norm": 225.0, + "learning_rate": 6.166685322222637e-05, + "loss": 11.8127, + "step": 10611 + }, + { + "epoch": 0.4423325413696803, + "grad_norm": 382.0, + "learning_rate": 6.166028942751879e-05, + "loss": 14.5003, + "step": 10612 + }, + { + "epoch": 0.4423742236672085, + "grad_norm": 402.0, + "learning_rate": 6.165372542030141e-05, + "loss": 14.6252, + "step": 10613 + }, + { + "epoch": 0.4424159059647368, + "grad_norm": 190.0, + "learning_rate": 6.164716120069384e-05, + "loss": 10.8758, + "step": 10614 + }, + { + "epoch": 0.442457588262265, + "grad_norm": 508.0, + "learning_rate": 6.164059676881573e-05, + "loss": 17.0028, + "step": 10615 + }, + { + "epoch": 0.44249927055979327, + "grad_norm": 147.0, + "learning_rate": 6.163403212478672e-05, + "loss": 10.3129, + "step": 10616 + }, + { + "epoch": 0.4425409528573215, + "grad_norm": 454.0, + "learning_rate": 6.16274672687264e-05, + "loss": 14.8127, + "step": 10617 + }, + { + "epoch": 0.44258263515484975, + "grad_norm": 336.0, + "learning_rate": 6.162090220075449e-05, + "loss": 14.2503, + "step": 10618 + }, + { + "epoch": 0.44262431745237796, + "grad_norm": 148.0, + "learning_rate": 6.16143369209906e-05, + "loss": 10.7507, + "step": 10619 + }, + { + "epoch": 0.4426659997499062, + "grad_norm": 426.0, + "learning_rate": 6.16077714295544e-05, + "loss": 17.6253, + "step": 10620 + }, + { + "epoch": 0.44270768204743444, + "grad_norm": 227.0, + "learning_rate": 6.160120572656553e-05, + "loss": 12.5002, + "step": 10621 + }, + { + "epoch": 0.4427493643449627, + "grad_norm": 592.0, + "learning_rate": 6.159463981214363e-05, + "loss": 17.6254, + "step": 10622 + }, + { + "epoch": 0.4427910466424909, + "grad_norm": 300.0, + "learning_rate": 6.158807368640842e-05, + "loss": 10.1271, + "step": 10623 + }, + { + "epoch": 0.4428327289400192, + "grad_norm": 154.0, + "learning_rate": 6.158150734947955e-05, + "loss": 10.6253, + "step": 10624 + }, + { + "epoch": 0.4428744112375474, + "grad_norm": 596.0, + "learning_rate": 6.157494080147664e-05, + "loss": 16.5042, + "step": 10625 + }, + { + "epoch": 0.44291609353507566, + "grad_norm": 892.0, + "learning_rate": 6.156837404251944e-05, + "loss": 22.7546, + "step": 10626 + }, + { + "epoch": 0.4429577758326039, + "grad_norm": 424.0, + "learning_rate": 6.156180707272758e-05, + "loss": 16.6258, + "step": 10627 + }, + { + "epoch": 0.44299945813013214, + "grad_norm": 230.0, + "learning_rate": 6.155523989222076e-05, + "loss": 11.1254, + "step": 10628 + }, + { + "epoch": 0.44304114042766035, + "grad_norm": 414.0, + "learning_rate": 6.154867250111866e-05, + "loss": 15.9378, + "step": 10629 + }, + { + "epoch": 0.4430828227251886, + "grad_norm": 322.0, + "learning_rate": 6.154210489954099e-05, + "loss": 12.9381, + "step": 10630 + }, + { + "epoch": 0.44312450502271683, + "grad_norm": 424.0, + "learning_rate": 6.153553708760743e-05, + "loss": 15.8126, + "step": 10631 + }, + { + "epoch": 0.4431661873202451, + "grad_norm": 199.0, + "learning_rate": 6.152896906543769e-05, + "loss": 11.1253, + "step": 10632 + }, + { + "epoch": 0.4432078696177733, + "grad_norm": 216.0, + "learning_rate": 6.152240083315146e-05, + "loss": 12.6253, + "step": 10633 + }, + { + "epoch": 0.4432495519153016, + "grad_norm": 360.0, + "learning_rate": 6.151583239086844e-05, + "loss": 12.3754, + "step": 10634 + }, + { + "epoch": 0.4432912342128298, + "grad_norm": 386.0, + "learning_rate": 6.150926373870837e-05, + "loss": 13.6877, + "step": 10635 + }, + { + "epoch": 0.44333291651035805, + "grad_norm": 94.5, + "learning_rate": 6.150269487679095e-05, + "loss": 9.0638, + "step": 10636 + }, + { + "epoch": 0.44337459880788627, + "grad_norm": 382.0, + "learning_rate": 6.149612580523588e-05, + "loss": 14.9379, + "step": 10637 + }, + { + "epoch": 0.44341628110541453, + "grad_norm": 96.5, + "learning_rate": 6.14895565241629e-05, + "loss": 9.2507, + "step": 10638 + }, + { + "epoch": 0.44345796340294275, + "grad_norm": 796.0, + "learning_rate": 6.148298703369174e-05, + "loss": 23.0002, + "step": 10639 + }, + { + "epoch": 0.443499645700471, + "grad_norm": 149.0, + "learning_rate": 6.147641733394212e-05, + "loss": 9.9377, + "step": 10640 + }, + { + "epoch": 0.4435413279979992, + "grad_norm": 450.0, + "learning_rate": 6.146984742503377e-05, + "loss": 17.0022, + "step": 10641 + }, + { + "epoch": 0.4435830102955275, + "grad_norm": 346.0, + "learning_rate": 6.146327730708642e-05, + "loss": 13.7502, + "step": 10642 + }, + { + "epoch": 0.4436246925930557, + "grad_norm": 108.0, + "learning_rate": 6.145670698021984e-05, + "loss": 8.6876, + "step": 10643 + }, + { + "epoch": 0.44366637489058397, + "grad_norm": 262.0, + "learning_rate": 6.145013644455375e-05, + "loss": 13.1252, + "step": 10644 + }, + { + "epoch": 0.4437080571881122, + "grad_norm": 506.0, + "learning_rate": 6.144356570020791e-05, + "loss": 16.1253, + "step": 10645 + }, + { + "epoch": 0.44374973948564045, + "grad_norm": 231.0, + "learning_rate": 6.143699474730208e-05, + "loss": 11.5628, + "step": 10646 + }, + { + "epoch": 0.4437914217831687, + "grad_norm": 180.0, + "learning_rate": 6.1430423585956e-05, + "loss": 10.6251, + "step": 10647 + }, + { + "epoch": 0.4438331040806969, + "grad_norm": 225.0, + "learning_rate": 6.142385221628944e-05, + "loss": 12.8753, + "step": 10648 + }, + { + "epoch": 0.4438747863782252, + "grad_norm": 208.0, + "learning_rate": 6.141728063842215e-05, + "loss": 12.3755, + "step": 10649 + }, + { + "epoch": 0.4439164686757534, + "grad_norm": 352.0, + "learning_rate": 6.141070885247391e-05, + "loss": 14.1252, + "step": 10650 + }, + { + "epoch": 0.4439581509732817, + "grad_norm": 624.0, + "learning_rate": 6.140413685856449e-05, + "loss": 19.5002, + "step": 10651 + }, + { + "epoch": 0.4439998332708099, + "grad_norm": 288.0, + "learning_rate": 6.139756465681365e-05, + "loss": 13.1878, + "step": 10652 + }, + { + "epoch": 0.44404151556833815, + "grad_norm": 468.0, + "learning_rate": 6.13909922473412e-05, + "loss": 16.3755, + "step": 10653 + }, + { + "epoch": 0.44408319786586636, + "grad_norm": 1312.0, + "learning_rate": 6.138441963026689e-05, + "loss": 31.3765, + "step": 10654 + }, + { + "epoch": 0.44412488016339463, + "grad_norm": 338.0, + "learning_rate": 6.137784680571053e-05, + "loss": 13.4379, + "step": 10655 + }, + { + "epoch": 0.44416656246092284, + "grad_norm": 376.0, + "learning_rate": 6.13712737737919e-05, + "loss": 13.5629, + "step": 10656 + }, + { + "epoch": 0.4442082447584511, + "grad_norm": 1640.0, + "learning_rate": 6.13647005346308e-05, + "loss": 34.501, + "step": 10657 + }, + { + "epoch": 0.4442499270559793, + "grad_norm": 1040.0, + "learning_rate": 6.135812708834701e-05, + "loss": 25.8766, + "step": 10658 + }, + { + "epoch": 0.4442916093535076, + "grad_norm": 1768.0, + "learning_rate": 6.135155343506036e-05, + "loss": 28.0054, + "step": 10659 + }, + { + "epoch": 0.4443332916510358, + "grad_norm": 45.75, + "learning_rate": 6.134497957489065e-05, + "loss": 7.0629, + "step": 10660 + }, + { + "epoch": 0.44437497394856407, + "grad_norm": 338.0, + "learning_rate": 6.133840550795766e-05, + "loss": 13.6266, + "step": 10661 + }, + { + "epoch": 0.4444166562460923, + "grad_norm": 1012.0, + "learning_rate": 6.133183123438123e-05, + "loss": 23.2524, + "step": 10662 + }, + { + "epoch": 0.44445833854362055, + "grad_norm": 120.5, + "learning_rate": 6.132525675428118e-05, + "loss": 9.7504, + "step": 10663 + }, + { + "epoch": 0.44450002084114876, + "grad_norm": 516.0, + "learning_rate": 6.131868206777731e-05, + "loss": 17.1252, + "step": 10664 + }, + { + "epoch": 0.444541703138677, + "grad_norm": 772.0, + "learning_rate": 6.131210717498945e-05, + "loss": 20.626, + "step": 10665 + }, + { + "epoch": 0.44458338543620524, + "grad_norm": 276.0, + "learning_rate": 6.130553207603744e-05, + "loss": 12.9384, + "step": 10666 + }, + { + "epoch": 0.4446250677337335, + "grad_norm": 192.0, + "learning_rate": 6.129895677104109e-05, + "loss": 11.6252, + "step": 10667 + }, + { + "epoch": 0.4446667500312617, + "grad_norm": 170.0, + "learning_rate": 6.129238126012027e-05, + "loss": 10.1252, + "step": 10668 + }, + { + "epoch": 0.44470843232879, + "grad_norm": 157.0, + "learning_rate": 6.128580554339479e-05, + "loss": 10.6256, + "step": 10669 + }, + { + "epoch": 0.4447501146263182, + "grad_norm": 182.0, + "learning_rate": 6.12792296209845e-05, + "loss": 10.2504, + "step": 10670 + }, + { + "epoch": 0.44479179692384646, + "grad_norm": 436.0, + "learning_rate": 6.127265349300926e-05, + "loss": 16.5014, + "step": 10671 + }, + { + "epoch": 0.4448334792213747, + "grad_norm": 170.0, + "learning_rate": 6.126607715958889e-05, + "loss": 11.8753, + "step": 10672 + }, + { + "epoch": 0.44487516151890294, + "grad_norm": 502.0, + "learning_rate": 6.125950062084327e-05, + "loss": 16.5002, + "step": 10673 + }, + { + "epoch": 0.44491684381643115, + "grad_norm": 328.0, + "learning_rate": 6.125292387689228e-05, + "loss": 13.4376, + "step": 10674 + }, + { + "epoch": 0.4449585261139594, + "grad_norm": 246.0, + "learning_rate": 6.124634692785572e-05, + "loss": 9.0007, + "step": 10675 + }, + { + "epoch": 0.44500020841148763, + "grad_norm": 270.0, + "learning_rate": 6.12397697738535e-05, + "loss": 13.0002, + "step": 10676 + }, + { + "epoch": 0.4450418907090159, + "grad_norm": 127.0, + "learning_rate": 6.123319241500548e-05, + "loss": 9.4378, + "step": 10677 + }, + { + "epoch": 0.4450835730065441, + "grad_norm": 292.0, + "learning_rate": 6.122661485143153e-05, + "loss": 14.0003, + "step": 10678 + }, + { + "epoch": 0.4451252553040724, + "grad_norm": 342.0, + "learning_rate": 6.122003708325152e-05, + "loss": 11.6879, + "step": 10679 + }, + { + "epoch": 0.4451669376016006, + "grad_norm": 274.0, + "learning_rate": 6.121345911058534e-05, + "loss": 13.3132, + "step": 10680 + }, + { + "epoch": 0.44520861989912885, + "grad_norm": 96.5, + "learning_rate": 6.120688093355288e-05, + "loss": 7.5627, + "step": 10681 + }, + { + "epoch": 0.44525030219665707, + "grad_norm": 151.0, + "learning_rate": 6.120030255227402e-05, + "loss": 10.8754, + "step": 10682 + }, + { + "epoch": 0.44529198449418533, + "grad_norm": 576.0, + "learning_rate": 6.119372396686864e-05, + "loss": 18.6253, + "step": 10683 + }, + { + "epoch": 0.44533366679171354, + "grad_norm": 370.0, + "learning_rate": 6.118714517745667e-05, + "loss": 14.8752, + "step": 10684 + }, + { + "epoch": 0.4453753490892418, + "grad_norm": 278.0, + "learning_rate": 6.118056618415795e-05, + "loss": 12.5004, + "step": 10685 + }, + { + "epoch": 0.44541703138677, + "grad_norm": 684.0, + "learning_rate": 6.117398698709244e-05, + "loss": 20.0017, + "step": 10686 + }, + { + "epoch": 0.4454587136842983, + "grad_norm": 876.0, + "learning_rate": 6.116740758638003e-05, + "loss": 26.6256, + "step": 10687 + }, + { + "epoch": 0.4455003959818265, + "grad_norm": 239.0, + "learning_rate": 6.116082798214062e-05, + "loss": 12.8753, + "step": 10688 + }, + { + "epoch": 0.44554207827935477, + "grad_norm": 520.0, + "learning_rate": 6.115424817449413e-05, + "loss": 16.8754, + "step": 10689 + }, + { + "epoch": 0.445583760576883, + "grad_norm": 386.0, + "learning_rate": 6.11476681635605e-05, + "loss": 15.2505, + "step": 10690 + }, + { + "epoch": 0.44562544287441125, + "grad_norm": 196.0, + "learning_rate": 6.114108794945958e-05, + "loss": 10.938, + "step": 10691 + }, + { + "epoch": 0.44566712517193946, + "grad_norm": 121.5, + "learning_rate": 6.113450753231137e-05, + "loss": 8.438, + "step": 10692 + }, + { + "epoch": 0.4457088074694677, + "grad_norm": 410.0, + "learning_rate": 6.112792691223577e-05, + "loss": 14.1905, + "step": 10693 + }, + { + "epoch": 0.44575048976699594, + "grad_norm": 280.0, + "learning_rate": 6.112134608935272e-05, + "loss": 12.2502, + "step": 10694 + }, + { + "epoch": 0.4457921720645242, + "grad_norm": 326.0, + "learning_rate": 6.111476506378214e-05, + "loss": 12.5008, + "step": 10695 + }, + { + "epoch": 0.4458338543620524, + "grad_norm": 318.0, + "learning_rate": 6.110818383564399e-05, + "loss": 13.4397, + "step": 10696 + }, + { + "epoch": 0.4458755366595807, + "grad_norm": 568.0, + "learning_rate": 6.110160240505819e-05, + "loss": 18.7503, + "step": 10697 + }, + { + "epoch": 0.4459172189571089, + "grad_norm": 202.0, + "learning_rate": 6.10950207721447e-05, + "loss": 12.3766, + "step": 10698 + }, + { + "epoch": 0.44595890125463716, + "grad_norm": 120.5, + "learning_rate": 6.10884389370235e-05, + "loss": 10.6879, + "step": 10699 + }, + { + "epoch": 0.4460005835521654, + "grad_norm": 147.0, + "learning_rate": 6.108185689981449e-05, + "loss": 9.0628, + "step": 10700 + }, + { + "epoch": 0.44604226584969364, + "grad_norm": 141.0, + "learning_rate": 6.107527466063767e-05, + "loss": 10.0004, + "step": 10701 + }, + { + "epoch": 0.44608394814722185, + "grad_norm": 199.0, + "learning_rate": 6.106869221961296e-05, + "loss": 8.5002, + "step": 10702 + }, + { + "epoch": 0.4461256304447501, + "grad_norm": 270.0, + "learning_rate": 6.106210957686037e-05, + "loss": 13.1877, + "step": 10703 + }, + { + "epoch": 0.44616731274227833, + "grad_norm": 270.0, + "learning_rate": 6.105552673249985e-05, + "loss": 12.6877, + "step": 10704 + }, + { + "epoch": 0.4462089950398066, + "grad_norm": 256.0, + "learning_rate": 6.104894368665137e-05, + "loss": 10.5001, + "step": 10705 + }, + { + "epoch": 0.4462506773373348, + "grad_norm": 292.0, + "learning_rate": 6.104236043943491e-05, + "loss": 12.5629, + "step": 10706 + }, + { + "epoch": 0.4462923596348631, + "grad_norm": 148.0, + "learning_rate": 6.103577699097045e-05, + "loss": 7.9064, + "step": 10707 + }, + { + "epoch": 0.4463340419323913, + "grad_norm": 364.0, + "learning_rate": 6.102919334137798e-05, + "loss": 14.7503, + "step": 10708 + }, + { + "epoch": 0.44637572422991956, + "grad_norm": 900.0, + "learning_rate": 6.102260949077748e-05, + "loss": 25.502, + "step": 10709 + }, + { + "epoch": 0.44641740652744777, + "grad_norm": 212.0, + "learning_rate": 6.101602543928895e-05, + "loss": 11.8127, + "step": 10710 + }, + { + "epoch": 0.44645908882497604, + "grad_norm": 708.0, + "learning_rate": 6.100944118703237e-05, + "loss": 21.0003, + "step": 10711 + }, + { + "epoch": 0.44650077112250425, + "grad_norm": 298.0, + "learning_rate": 6.1002856734127756e-05, + "loss": 14.0006, + "step": 10712 + }, + { + "epoch": 0.4465424534200325, + "grad_norm": 340.0, + "learning_rate": 6.0996272080695095e-05, + "loss": 14.2502, + "step": 10713 + }, + { + "epoch": 0.4465841357175607, + "grad_norm": 348.0, + "learning_rate": 6.09896872268544e-05, + "loss": 15.2503, + "step": 10714 + }, + { + "epoch": 0.446625818015089, + "grad_norm": 784.0, + "learning_rate": 6.098310217272568e-05, + "loss": 21.1257, + "step": 10715 + }, + { + "epoch": 0.4466675003126172, + "grad_norm": 792.0, + "learning_rate": 6.097651691842894e-05, + "loss": 23.6251, + "step": 10716 + }, + { + "epoch": 0.44670918261014547, + "grad_norm": 474.0, + "learning_rate": 6.096993146408421e-05, + "loss": 15.5631, + "step": 10717 + }, + { + "epoch": 0.4467508649076737, + "grad_norm": 219.0, + "learning_rate": 6.09633458098115e-05, + "loss": 10.2503, + "step": 10718 + }, + { + "epoch": 0.44679254720520195, + "grad_norm": 165.0, + "learning_rate": 6.095675995573085e-05, + "loss": 5.9379, + "step": 10719 + }, + { + "epoch": 0.4468342295027302, + "grad_norm": 430.0, + "learning_rate": 6.095017390196227e-05, + "loss": 16.7502, + "step": 10720 + }, + { + "epoch": 0.44687591180025843, + "grad_norm": 314.0, + "learning_rate": 6.094358764862581e-05, + "loss": 13.2501, + "step": 10721 + }, + { + "epoch": 0.4469175940977867, + "grad_norm": 350.0, + "learning_rate": 6.0937001195841484e-05, + "loss": 13.9376, + "step": 10722 + }, + { + "epoch": 0.4469592763953149, + "grad_norm": 290.0, + "learning_rate": 6.093041454372934e-05, + "loss": 13.0627, + "step": 10723 + }, + { + "epoch": 0.4470009586928432, + "grad_norm": 382.0, + "learning_rate": 6.092382769240943e-05, + "loss": 15.063, + "step": 10724 + }, + { + "epoch": 0.4470426409903714, + "grad_norm": 316.0, + "learning_rate": 6.0917240642001774e-05, + "loss": 13.3752, + "step": 10725 + }, + { + "epoch": 0.44708432328789965, + "grad_norm": 624.0, + "learning_rate": 6.091065339262645e-05, + "loss": 17.7502, + "step": 10726 + }, + { + "epoch": 0.44712600558542787, + "grad_norm": 1720.0, + "learning_rate": 6.09040659444035e-05, + "loss": 34.5049, + "step": 10727 + }, + { + "epoch": 0.44716768788295613, + "grad_norm": 374.0, + "learning_rate": 6.0897478297452984e-05, + "loss": 13.8128, + "step": 10728 + }, + { + "epoch": 0.44720937018048434, + "grad_norm": 506.0, + "learning_rate": 6.0890890451894946e-05, + "loss": 17.1256, + "step": 10729 + }, + { + "epoch": 0.4472510524780126, + "grad_norm": 482.0, + "learning_rate": 6.0884302407849455e-05, + "loss": 16.126, + "step": 10730 + }, + { + "epoch": 0.4472927347755408, + "grad_norm": 584.0, + "learning_rate": 6.087771416543661e-05, + "loss": 20.0026, + "step": 10731 + }, + { + "epoch": 0.4473344170730691, + "grad_norm": 100.5, + "learning_rate": 6.087112572477644e-05, + "loss": 8.9381, + "step": 10732 + }, + { + "epoch": 0.4473760993705973, + "grad_norm": 354.0, + "learning_rate": 6.086453708598905e-05, + "loss": 14.2505, + "step": 10733 + }, + { + "epoch": 0.44741778166812557, + "grad_norm": 438.0, + "learning_rate": 6.085794824919451e-05, + "loss": 15.6877, + "step": 10734 + }, + { + "epoch": 0.4474594639656538, + "grad_norm": 422.0, + "learning_rate": 6.085135921451288e-05, + "loss": 15.4377, + "step": 10735 + }, + { + "epoch": 0.44750114626318205, + "grad_norm": 276.0, + "learning_rate": 6.084476998206429e-05, + "loss": 11.6879, + "step": 10736 + }, + { + "epoch": 0.44754282856071026, + "grad_norm": 298.0, + "learning_rate": 6.083818055196879e-05, + "loss": 12.2507, + "step": 10737 + }, + { + "epoch": 0.4475845108582385, + "grad_norm": 209.0, + "learning_rate": 6.083159092434649e-05, + "loss": 11.5014, + "step": 10738 + }, + { + "epoch": 0.44762619315576674, + "grad_norm": 720.0, + "learning_rate": 6.0825001099317483e-05, + "loss": 19.5004, + "step": 10739 + }, + { + "epoch": 0.447667875453295, + "grad_norm": 388.0, + "learning_rate": 6.081841107700187e-05, + "loss": 14.6883, + "step": 10740 + }, + { + "epoch": 0.4477095577508232, + "grad_norm": 255.0, + "learning_rate": 6.081182085751975e-05, + "loss": 12.1253, + "step": 10741 + }, + { + "epoch": 0.4477512400483515, + "grad_norm": 660.0, + "learning_rate": 6.0805230440991245e-05, + "loss": 21.6252, + "step": 10742 + }, + { + "epoch": 0.4477929223458797, + "grad_norm": 406.0, + "learning_rate": 6.079863982753644e-05, + "loss": 15.8754, + "step": 10743 + }, + { + "epoch": 0.44783460464340796, + "grad_norm": 318.0, + "learning_rate": 6.079204901727548e-05, + "loss": 13.1879, + "step": 10744 + }, + { + "epoch": 0.4478762869409362, + "grad_norm": 350.0, + "learning_rate": 6.0785458010328463e-05, + "loss": 13.9377, + "step": 10745 + }, + { + "epoch": 0.44791796923846444, + "grad_norm": 342.0, + "learning_rate": 6.077886680681553e-05, + "loss": 13.5649, + "step": 10746 + }, + { + "epoch": 0.44795965153599265, + "grad_norm": 316.0, + "learning_rate": 6.077227540685677e-05, + "loss": 13.8128, + "step": 10747 + }, + { + "epoch": 0.4480013338335209, + "grad_norm": 398.0, + "learning_rate": 6.0765683810572346e-05, + "loss": 15.8754, + "step": 10748 + }, + { + "epoch": 0.44804301613104913, + "grad_norm": 1376.0, + "learning_rate": 6.075909201808239e-05, + "loss": 26.0052, + "step": 10749 + }, + { + "epoch": 0.4480846984285774, + "grad_norm": 376.0, + "learning_rate": 6.075250002950701e-05, + "loss": 15.7518, + "step": 10750 + }, + { + "epoch": 0.4481263807261056, + "grad_norm": 194.0, + "learning_rate": 6.0745907844966366e-05, + "loss": 10.1252, + "step": 10751 + }, + { + "epoch": 0.4481680630236339, + "grad_norm": 884.0, + "learning_rate": 6.07393154645806e-05, + "loss": 22.3752, + "step": 10752 + }, + { + "epoch": 0.4482097453211621, + "grad_norm": 368.0, + "learning_rate": 6.073272288846986e-05, + "loss": 14.0629, + "step": 10753 + }, + { + "epoch": 0.44825142761869036, + "grad_norm": 124.0, + "learning_rate": 6.0726130116754286e-05, + "loss": 9.9378, + "step": 10754 + }, + { + "epoch": 0.44829310991621857, + "grad_norm": 146.0, + "learning_rate": 6.071953714955404e-05, + "loss": 9.9377, + "step": 10755 + }, + { + "epoch": 0.44833479221374684, + "grad_norm": 346.0, + "learning_rate": 6.071294398698928e-05, + "loss": 15.438, + "step": 10756 + }, + { + "epoch": 0.44837647451127505, + "grad_norm": 286.0, + "learning_rate": 6.070635062918016e-05, + "loss": 12.5628, + "step": 10757 + }, + { + "epoch": 0.4484181568088033, + "grad_norm": 856.0, + "learning_rate": 6.069975707624686e-05, + "loss": 22.88, + "step": 10758 + }, + { + "epoch": 0.4484598391063315, + "grad_norm": 276.0, + "learning_rate": 6.069316332830952e-05, + "loss": 10.8752, + "step": 10759 + }, + { + "epoch": 0.4485015214038598, + "grad_norm": 688.0, + "learning_rate": 6.0686569385488345e-05, + "loss": 21.5006, + "step": 10760 + }, + { + "epoch": 0.448543203701388, + "grad_norm": 680.0, + "learning_rate": 6.0679975247903484e-05, + "loss": 20.2505, + "step": 10761 + }, + { + "epoch": 0.44858488599891627, + "grad_norm": 556.0, + "learning_rate": 6.0673380915675135e-05, + "loss": 17.5003, + "step": 10762 + }, + { + "epoch": 0.4486265682964445, + "grad_norm": 179.0, + "learning_rate": 6.066678638892347e-05, + "loss": 9.9378, + "step": 10763 + }, + { + "epoch": 0.44866825059397275, + "grad_norm": 460.0, + "learning_rate": 6.066019166776867e-05, + "loss": 13.6888, + "step": 10764 + }, + { + "epoch": 0.44870993289150096, + "grad_norm": 398.0, + "learning_rate": 6.065359675233093e-05, + "loss": 15.0631, + "step": 10765 + }, + { + "epoch": 0.44875161518902923, + "grad_norm": 378.0, + "learning_rate": 6.064700164273045e-05, + "loss": 14.1254, + "step": 10766 + }, + { + "epoch": 0.44879329748655744, + "grad_norm": 396.0, + "learning_rate": 6.064040633908742e-05, + "loss": 14.5002, + "step": 10767 + }, + { + "epoch": 0.4488349797840857, + "grad_norm": 207.0, + "learning_rate": 6.063381084152203e-05, + "loss": 11.6252, + "step": 10768 + }, + { + "epoch": 0.4488766620816139, + "grad_norm": 141.0, + "learning_rate": 6.06272151501545e-05, + "loss": 9.8754, + "step": 10769 + }, + { + "epoch": 0.4489183443791422, + "grad_norm": 262.0, + "learning_rate": 6.062061926510503e-05, + "loss": 12.6262, + "step": 10770 + }, + { + "epoch": 0.4489600266766704, + "grad_norm": 880.0, + "learning_rate": 6.061402318649383e-05, + "loss": 24.2503, + "step": 10771 + }, + { + "epoch": 0.44900170897419867, + "grad_norm": 111.5, + "learning_rate": 6.0607426914441126e-05, + "loss": 9.5636, + "step": 10772 + }, + { + "epoch": 0.4490433912717269, + "grad_norm": 107.5, + "learning_rate": 6.0600830449067114e-05, + "loss": 9.4379, + "step": 10773 + }, + { + "epoch": 0.44908507356925514, + "grad_norm": 318.0, + "learning_rate": 6.059423379049203e-05, + "loss": 14.8755, + "step": 10774 + }, + { + "epoch": 0.44912675586678336, + "grad_norm": 336.0, + "learning_rate": 6.058763693883609e-05, + "loss": 12.6876, + "step": 10775 + }, + { + "epoch": 0.4491684381643116, + "grad_norm": 166.0, + "learning_rate": 6.058103989421953e-05, + "loss": 9.8126, + "step": 10776 + }, + { + "epoch": 0.44921012046183983, + "grad_norm": 66.5, + "learning_rate": 6.0574442656762576e-05, + "loss": 9.3756, + "step": 10777 + }, + { + "epoch": 0.4492518027593681, + "grad_norm": 404.0, + "learning_rate": 6.056784522658547e-05, + "loss": 15.0008, + "step": 10778 + }, + { + "epoch": 0.4492934850568963, + "grad_norm": 376.0, + "learning_rate": 6.056124760380845e-05, + "loss": 15.2509, + "step": 10779 + }, + { + "epoch": 0.4493351673544246, + "grad_norm": 290.0, + "learning_rate": 6.0554649788551745e-05, + "loss": 13.5019, + "step": 10780 + }, + { + "epoch": 0.4493768496519528, + "grad_norm": 516.0, + "learning_rate": 6.054805178093561e-05, + "loss": 18.0002, + "step": 10781 + }, + { + "epoch": 0.44941853194948106, + "grad_norm": 434.0, + "learning_rate": 6.05414535810803e-05, + "loss": 13.6258, + "step": 10782 + }, + { + "epoch": 0.44946021424700927, + "grad_norm": 332.0, + "learning_rate": 6.053485518910607e-05, + "loss": 14.3127, + "step": 10783 + }, + { + "epoch": 0.44950189654453754, + "grad_norm": 195.0, + "learning_rate": 6.052825660513316e-05, + "loss": 10.2504, + "step": 10784 + }, + { + "epoch": 0.44954357884206575, + "grad_norm": 896.0, + "learning_rate": 6.052165782928184e-05, + "loss": 25.8753, + "step": 10785 + }, + { + "epoch": 0.449585261139594, + "grad_norm": 440.0, + "learning_rate": 6.051505886167237e-05, + "loss": 16.7503, + "step": 10786 + }, + { + "epoch": 0.44962694343712223, + "grad_norm": 246.0, + "learning_rate": 6.050845970242502e-05, + "loss": 10.9378, + "step": 10787 + }, + { + "epoch": 0.4496686257346505, + "grad_norm": 498.0, + "learning_rate": 6.050186035166007e-05, + "loss": 16.6253, + "step": 10788 + }, + { + "epoch": 0.4497103080321787, + "grad_norm": 234.0, + "learning_rate": 6.049526080949777e-05, + "loss": 11.0638, + "step": 10789 + }, + { + "epoch": 0.449751990329707, + "grad_norm": 147.0, + "learning_rate": 6.048866107605842e-05, + "loss": 9.8757, + "step": 10790 + }, + { + "epoch": 0.4497936726272352, + "grad_norm": 210.0, + "learning_rate": 6.048206115146228e-05, + "loss": 10.5636, + "step": 10791 + }, + { + "epoch": 0.44983535492476345, + "grad_norm": 436.0, + "learning_rate": 6.0475461035829637e-05, + "loss": 15.0005, + "step": 10792 + }, + { + "epoch": 0.4498770372222917, + "grad_norm": 540.0, + "learning_rate": 6.0468860729280796e-05, + "loss": 16.8756, + "step": 10793 + }, + { + "epoch": 0.44991871951981993, + "grad_norm": 1624.0, + "learning_rate": 6.046226023193604e-05, + "loss": 37.2503, + "step": 10794 + }, + { + "epoch": 0.4499604018173482, + "grad_norm": 81.5, + "learning_rate": 6.045565954391567e-05, + "loss": 9.1884, + "step": 10795 + }, + { + "epoch": 0.4500020841148764, + "grad_norm": 219.0, + "learning_rate": 6.0449058665339964e-05, + "loss": 11.0628, + "step": 10796 + }, + { + "epoch": 0.4500437664124047, + "grad_norm": 85.5, + "learning_rate": 6.044245759632925e-05, + "loss": 9.8753, + "step": 10797 + }, + { + "epoch": 0.4500854487099329, + "grad_norm": 192.0, + "learning_rate": 6.0435856337003816e-05, + "loss": 10.9378, + "step": 10798 + }, + { + "epoch": 0.45012713100746116, + "grad_norm": 338.0, + "learning_rate": 6.042925488748396e-05, + "loss": 13.8131, + "step": 10799 + }, + { + "epoch": 0.45016881330498937, + "grad_norm": 252.0, + "learning_rate": 6.0422653247890024e-05, + "loss": 11.7502, + "step": 10800 + }, + { + "epoch": 0.45021049560251764, + "grad_norm": 310.0, + "learning_rate": 6.04160514183423e-05, + "loss": 13.3752, + "step": 10801 + }, + { + "epoch": 0.45025217790004585, + "grad_norm": 350.0, + "learning_rate": 6.0409449398961116e-05, + "loss": 13.2502, + "step": 10802 + }, + { + "epoch": 0.4502938601975741, + "grad_norm": 147.0, + "learning_rate": 6.04028471898668e-05, + "loss": 8.8127, + "step": 10803 + }, + { + "epoch": 0.4503355424951023, + "grad_norm": 165.0, + "learning_rate": 6.039624479117966e-05, + "loss": 9.5626, + "step": 10804 + }, + { + "epoch": 0.4503772247926306, + "grad_norm": 255.0, + "learning_rate": 6.038964220302004e-05, + "loss": 12.5637, + "step": 10805 + }, + { + "epoch": 0.4504189070901588, + "grad_norm": 448.0, + "learning_rate": 6.0383039425508256e-05, + "loss": 14.0004, + "step": 10806 + }, + { + "epoch": 0.45046058938768707, + "grad_norm": 242.0, + "learning_rate": 6.037643645876467e-05, + "loss": 12.0631, + "step": 10807 + }, + { + "epoch": 0.4505022716852153, + "grad_norm": 1208.0, + "learning_rate": 6.03698333029096e-05, + "loss": 29.1253, + "step": 10808 + }, + { + "epoch": 0.45054395398274355, + "grad_norm": 170.0, + "learning_rate": 6.0363229958063406e-05, + "loss": 11.1252, + "step": 10809 + }, + { + "epoch": 0.45058563628027176, + "grad_norm": 144.0, + "learning_rate": 6.035662642434643e-05, + "loss": 10.4379, + "step": 10810 + }, + { + "epoch": 0.45062731857780003, + "grad_norm": 796.0, + "learning_rate": 6.035002270187901e-05, + "loss": 20.0002, + "step": 10811 + }, + { + "epoch": 0.45066900087532824, + "grad_norm": 117.5, + "learning_rate": 6.0343418790781515e-05, + "loss": 9.8751, + "step": 10812 + }, + { + "epoch": 0.4507106831728565, + "grad_norm": 354.0, + "learning_rate": 6.0336814691174284e-05, + "loss": 14.0002, + "step": 10813 + }, + { + "epoch": 0.4507523654703847, + "grad_norm": 488.0, + "learning_rate": 6.033021040317769e-05, + "loss": 16.2511, + "step": 10814 + }, + { + "epoch": 0.450794047767913, + "grad_norm": 296.0, + "learning_rate": 6.03236059269121e-05, + "loss": 13.1877, + "step": 10815 + }, + { + "epoch": 0.4508357300654412, + "grad_norm": 520.0, + "learning_rate": 6.031700126249788e-05, + "loss": 15.6253, + "step": 10816 + }, + { + "epoch": 0.45087741236296947, + "grad_norm": 460.0, + "learning_rate": 6.031039641005538e-05, + "loss": 16.6253, + "step": 10817 + }, + { + "epoch": 0.4509190946604977, + "grad_norm": 250.0, + "learning_rate": 6.0303791369704984e-05, + "loss": 11.6877, + "step": 10818 + }, + { + "epoch": 0.45096077695802594, + "grad_norm": 180.0, + "learning_rate": 6.0297186141567094e-05, + "loss": 10.0012, + "step": 10819 + }, + { + "epoch": 0.45100245925555416, + "grad_norm": 572.0, + "learning_rate": 6.029058072576207e-05, + "loss": 18.7503, + "step": 10820 + }, + { + "epoch": 0.4510441415530824, + "grad_norm": 276.0, + "learning_rate": 6.0283975122410294e-05, + "loss": 12.5627, + "step": 10821 + }, + { + "epoch": 0.45108582385061063, + "grad_norm": 306.0, + "learning_rate": 6.0277369331632164e-05, + "loss": 14.0005, + "step": 10822 + }, + { + "epoch": 0.4511275061481389, + "grad_norm": 95.5, + "learning_rate": 6.027076335354807e-05, + "loss": 9.1254, + "step": 10823 + }, + { + "epoch": 0.4511691884456671, + "grad_norm": 900.0, + "learning_rate": 6.026415718827839e-05, + "loss": 23.5004, + "step": 10824 + }, + { + "epoch": 0.4512108707431954, + "grad_norm": 135.0, + "learning_rate": 6.0257550835943545e-05, + "loss": 10.7508, + "step": 10825 + }, + { + "epoch": 0.4512525530407236, + "grad_norm": 171.0, + "learning_rate": 6.0250944296663915e-05, + "loss": 10.6253, + "step": 10826 + }, + { + "epoch": 0.45129423533825186, + "grad_norm": 136.0, + "learning_rate": 6.024433757055992e-05, + "loss": 9.6252, + "step": 10827 + }, + { + "epoch": 0.45133591763578007, + "grad_norm": 376.0, + "learning_rate": 6.0237730657751966e-05, + "loss": 12.8129, + "step": 10828 + }, + { + "epoch": 0.45137759993330834, + "grad_norm": 284.0, + "learning_rate": 6.0231123558360456e-05, + "loss": 12.9377, + "step": 10829 + }, + { + "epoch": 0.45141928223083655, + "grad_norm": 816.0, + "learning_rate": 6.0224516272505816e-05, + "loss": 24.3752, + "step": 10830 + }, + { + "epoch": 0.4514609645283648, + "grad_norm": 268.0, + "learning_rate": 6.0217908800308455e-05, + "loss": 11.3159, + "step": 10831 + }, + { + "epoch": 0.45150264682589303, + "grad_norm": 428.0, + "learning_rate": 6.02113011418888e-05, + "loss": 15.6891, + "step": 10832 + }, + { + "epoch": 0.4515443291234213, + "grad_norm": 108.0, + "learning_rate": 6.020469329736728e-05, + "loss": 8.5005, + "step": 10833 + }, + { + "epoch": 0.4515860114209495, + "grad_norm": 448.0, + "learning_rate": 6.019808526686431e-05, + "loss": 16.2503, + "step": 10834 + }, + { + "epoch": 0.4516276937184778, + "grad_norm": 241.0, + "learning_rate": 6.019147705050033e-05, + "loss": 11.5003, + "step": 10835 + }, + { + "epoch": 0.451669376016006, + "grad_norm": 708.0, + "learning_rate": 6.0184868648395786e-05, + "loss": 22.6254, + "step": 10836 + }, + { + "epoch": 0.45171105831353425, + "grad_norm": 318.0, + "learning_rate": 6.017826006067111e-05, + "loss": 13.3151, + "step": 10837 + }, + { + "epoch": 0.45175274061106246, + "grad_norm": 564.0, + "learning_rate": 6.017165128744673e-05, + "loss": 19.1253, + "step": 10838 + }, + { + "epoch": 0.45179442290859073, + "grad_norm": 211.0, + "learning_rate": 6.01650423288431e-05, + "loss": 12.5628, + "step": 10839 + }, + { + "epoch": 0.45183610520611894, + "grad_norm": 348.0, + "learning_rate": 6.0158433184980686e-05, + "loss": 14.4377, + "step": 10840 + }, + { + "epoch": 0.4518777875036472, + "grad_norm": 262.0, + "learning_rate": 6.015182385597992e-05, + "loss": 13.3752, + "step": 10841 + }, + { + "epoch": 0.4519194698011754, + "grad_norm": 1264.0, + "learning_rate": 6.0145214341961254e-05, + "loss": 27.3803, + "step": 10842 + }, + { + "epoch": 0.4519611520987037, + "grad_norm": 1112.0, + "learning_rate": 6.013860464304515e-05, + "loss": 26.5007, + "step": 10843 + }, + { + "epoch": 0.4520028343962319, + "grad_norm": 278.0, + "learning_rate": 6.01319947593521e-05, + "loss": 11.8753, + "step": 10844 + }, + { + "epoch": 0.45204451669376017, + "grad_norm": 300.0, + "learning_rate": 6.012538469100253e-05, + "loss": 12.6876, + "step": 10845 + }, + { + "epoch": 0.4520861989912884, + "grad_norm": 474.0, + "learning_rate": 6.011877443811693e-05, + "loss": 17.7504, + "step": 10846 + }, + { + "epoch": 0.45212788128881665, + "grad_norm": 580.0, + "learning_rate": 6.0112164000815766e-05, + "loss": 18.2505, + "step": 10847 + }, + { + "epoch": 0.45216956358634486, + "grad_norm": 197.0, + "learning_rate": 6.010555337921952e-05, + "loss": 11.0003, + "step": 10848 + }, + { + "epoch": 0.4522112458838731, + "grad_norm": 230.0, + "learning_rate": 6.009894257344866e-05, + "loss": 12.6253, + "step": 10849 + }, + { + "epoch": 0.45225292818140134, + "grad_norm": 328.0, + "learning_rate": 6.009233158362367e-05, + "loss": 13.8754, + "step": 10850 + }, + { + "epoch": 0.4522946104789296, + "grad_norm": 290.0, + "learning_rate": 6.0085720409865055e-05, + "loss": 12.6881, + "step": 10851 + }, + { + "epoch": 0.4523362927764578, + "grad_norm": 96.0, + "learning_rate": 6.007910905229328e-05, + "loss": 7.6255, + "step": 10852 + }, + { + "epoch": 0.4523779750739861, + "grad_norm": 792.0, + "learning_rate": 6.007249751102886e-05, + "loss": 23.7518, + "step": 10853 + }, + { + "epoch": 0.4524196573715143, + "grad_norm": 752.0, + "learning_rate": 6.006588578619227e-05, + "loss": 20.0002, + "step": 10854 + }, + { + "epoch": 0.45246133966904256, + "grad_norm": 239.0, + "learning_rate": 6.005927387790402e-05, + "loss": 12.4385, + "step": 10855 + }, + { + "epoch": 0.4525030219665708, + "grad_norm": 219.0, + "learning_rate": 6.005266178628459e-05, + "loss": 9.313, + "step": 10856 + }, + { + "epoch": 0.45254470426409904, + "grad_norm": 372.0, + "learning_rate": 6.004604951145454e-05, + "loss": 15.5003, + "step": 10857 + }, + { + "epoch": 0.45258638656162725, + "grad_norm": 616.0, + "learning_rate": 6.003943705353433e-05, + "loss": 18.0002, + "step": 10858 + }, + { + "epoch": 0.4526280688591555, + "grad_norm": 440.0, + "learning_rate": 6.0032824412644485e-05, + "loss": 15.4377, + "step": 10859 + }, + { + "epoch": 0.45266975115668373, + "grad_norm": 174.0, + "learning_rate": 6.002621158890553e-05, + "loss": 10.5632, + "step": 10860 + }, + { + "epoch": 0.452711433454212, + "grad_norm": 1328.0, + "learning_rate": 6.001959858243797e-05, + "loss": 33.2505, + "step": 10861 + }, + { + "epoch": 0.4527531157517402, + "grad_norm": 552.0, + "learning_rate": 6.001298539336235e-05, + "loss": 16.6291, + "step": 10862 + }, + { + "epoch": 0.4527947980492685, + "grad_norm": 474.0, + "learning_rate": 6.0006372021799184e-05, + "loss": 17.2506, + "step": 10863 + }, + { + "epoch": 0.4528364803467967, + "grad_norm": 226.0, + "learning_rate": 5.9999758467868995e-05, + "loss": 12.3755, + "step": 10864 + }, + { + "epoch": 0.45287816264432496, + "grad_norm": 181.0, + "learning_rate": 5.999314473169232e-05, + "loss": 10.9377, + "step": 10865 + }, + { + "epoch": 0.4529198449418532, + "grad_norm": 524.0, + "learning_rate": 5.998653081338969e-05, + "loss": 17.1257, + "step": 10866 + }, + { + "epoch": 0.45296152723938143, + "grad_norm": 446.0, + "learning_rate": 5.9979916713081655e-05, + "loss": 15.4377, + "step": 10867 + }, + { + "epoch": 0.4530032095369097, + "grad_norm": 186.0, + "learning_rate": 5.9973302430888746e-05, + "loss": 11.2505, + "step": 10868 + }, + { + "epoch": 0.4530448918344379, + "grad_norm": 146.0, + "learning_rate": 5.996668796693151e-05, + "loss": 10.4378, + "step": 10869 + }, + { + "epoch": 0.4530865741319662, + "grad_norm": 1256.0, + "learning_rate": 5.9960073321330515e-05, + "loss": 30.1253, + "step": 10870 + }, + { + "epoch": 0.4531282564294944, + "grad_norm": 492.0, + "learning_rate": 5.9953458494206285e-05, + "loss": 17.3752, + "step": 10871 + }, + { + "epoch": 0.45316993872702266, + "grad_norm": 278.0, + "learning_rate": 5.9946843485679406e-05, + "loss": 13.9379, + "step": 10872 + }, + { + "epoch": 0.45321162102455087, + "grad_norm": 262.0, + "learning_rate": 5.994022829587041e-05, + "loss": 13.3139, + "step": 10873 + }, + { + "epoch": 0.45325330332207914, + "grad_norm": 364.0, + "learning_rate": 5.993361292489987e-05, + "loss": 13.3128, + "step": 10874 + }, + { + "epoch": 0.45329498561960735, + "grad_norm": 784.0, + "learning_rate": 5.992699737288836e-05, + "loss": 25.0002, + "step": 10875 + }, + { + "epoch": 0.4533366679171356, + "grad_norm": 270.0, + "learning_rate": 5.992038163995645e-05, + "loss": 12.0626, + "step": 10876 + }, + { + "epoch": 0.45337835021466383, + "grad_norm": 266.0, + "learning_rate": 5.991376572622469e-05, + "loss": 12.0627, + "step": 10877 + }, + { + "epoch": 0.4534200325121921, + "grad_norm": 53.75, + "learning_rate": 5.9907149631813675e-05, + "loss": 7.719, + "step": 10878 + }, + { + "epoch": 0.4534617148097203, + "grad_norm": 196.0, + "learning_rate": 5.990053335684398e-05, + "loss": 12.6884, + "step": 10879 + }, + { + "epoch": 0.4535033971072486, + "grad_norm": 596.0, + "learning_rate": 5.9893916901436176e-05, + "loss": 18.2522, + "step": 10880 + }, + { + "epoch": 0.4535450794047768, + "grad_norm": 312.0, + "learning_rate": 5.988730026571085e-05, + "loss": 13.6254, + "step": 10881 + }, + { + "epoch": 0.45358676170230505, + "grad_norm": 212.0, + "learning_rate": 5.988068344978862e-05, + "loss": 11.0003, + "step": 10882 + }, + { + "epoch": 0.45362844399983326, + "grad_norm": 298.0, + "learning_rate": 5.9874066453790045e-05, + "loss": 13.7503, + "step": 10883 + }, + { + "epoch": 0.45367012629736153, + "grad_norm": 201.0, + "learning_rate": 5.986744927783574e-05, + "loss": 11.3127, + "step": 10884 + }, + { + "epoch": 0.45371180859488974, + "grad_norm": 544.0, + "learning_rate": 5.98608319220463e-05, + "loss": 18.5002, + "step": 10885 + }, + { + "epoch": 0.453753490892418, + "grad_norm": 340.0, + "learning_rate": 5.985421438654232e-05, + "loss": 14.189, + "step": 10886 + }, + { + "epoch": 0.4537951731899462, + "grad_norm": 832.0, + "learning_rate": 5.9847596671444395e-05, + "loss": 25.7503, + "step": 10887 + }, + { + "epoch": 0.4538368554874745, + "grad_norm": 374.0, + "learning_rate": 5.984097877687316e-05, + "loss": 14.3762, + "step": 10888 + }, + { + "epoch": 0.4538785377850027, + "grad_norm": 195.0, + "learning_rate": 5.983436070294921e-05, + "loss": 10.3127, + "step": 10889 + }, + { + "epoch": 0.45392022008253097, + "grad_norm": 1224.0, + "learning_rate": 5.982774244979317e-05, + "loss": 23.5051, + "step": 10890 + }, + { + "epoch": 0.4539619023800592, + "grad_norm": 660.0, + "learning_rate": 5.982112401752564e-05, + "loss": 21.1271, + "step": 10891 + }, + { + "epoch": 0.45400358467758745, + "grad_norm": 220.0, + "learning_rate": 5.981450540626725e-05, + "loss": 12.1253, + "step": 10892 + }, + { + "epoch": 0.45404526697511566, + "grad_norm": 206.0, + "learning_rate": 5.980788661613864e-05, + "loss": 12.1254, + "step": 10893 + }, + { + "epoch": 0.4540869492726439, + "grad_norm": 856.0, + "learning_rate": 5.9801267647260405e-05, + "loss": 22.6254, + "step": 10894 + }, + { + "epoch": 0.45412863157017214, + "grad_norm": 284.0, + "learning_rate": 5.9794648499753216e-05, + "loss": 12.8129, + "step": 10895 + }, + { + "epoch": 0.4541703138677004, + "grad_norm": 243.0, + "learning_rate": 5.978802917373769e-05, + "loss": 12.1879, + "step": 10896 + }, + { + "epoch": 0.4542119961652286, + "grad_norm": 144.0, + "learning_rate": 5.9781409669334455e-05, + "loss": 7.8755, + "step": 10897 + }, + { + "epoch": 0.4542536784627569, + "grad_norm": 404.0, + "learning_rate": 5.977478998666417e-05, + "loss": 15.4418, + "step": 10898 + }, + { + "epoch": 0.4542953607602851, + "grad_norm": 227.0, + "learning_rate": 5.976817012584746e-05, + "loss": 6.2823, + "step": 10899 + }, + { + "epoch": 0.45433704305781336, + "grad_norm": 306.0, + "learning_rate": 5.976155008700498e-05, + "loss": 14.3752, + "step": 10900 + }, + { + "epoch": 0.4543787253553416, + "grad_norm": 406.0, + "learning_rate": 5.975492987025739e-05, + "loss": 15.3133, + "step": 10901 + }, + { + "epoch": 0.45442040765286984, + "grad_norm": 486.0, + "learning_rate": 5.974830947572534e-05, + "loss": 16.8753, + "step": 10902 + }, + { + "epoch": 0.45446208995039805, + "grad_norm": 412.0, + "learning_rate": 5.974168890352948e-05, + "loss": 14.5632, + "step": 10903 + }, + { + "epoch": 0.4545037722479263, + "grad_norm": 120.0, + "learning_rate": 5.9735068153790476e-05, + "loss": 9.6256, + "step": 10904 + }, + { + "epoch": 0.45454545454545453, + "grad_norm": 92.0, + "learning_rate": 5.972844722662899e-05, + "loss": 9.8759, + "step": 10905 + }, + { + "epoch": 0.4545871368429828, + "grad_norm": 142.0, + "learning_rate": 5.972182612216568e-05, + "loss": 10.1259, + "step": 10906 + }, + { + "epoch": 0.454628819140511, + "grad_norm": 182.0, + "learning_rate": 5.9715204840521234e-05, + "loss": 11.3131, + "step": 10907 + }, + { + "epoch": 0.4546705014380393, + "grad_norm": 196.0, + "learning_rate": 5.9708583381816316e-05, + "loss": 11.7501, + "step": 10908 + }, + { + "epoch": 0.4547121837355675, + "grad_norm": 356.0, + "learning_rate": 5.970196174617162e-05, + "loss": 14.2502, + "step": 10909 + }, + { + "epoch": 0.45475386603309575, + "grad_norm": 126.0, + "learning_rate": 5.969533993370779e-05, + "loss": 9.438, + "step": 10910 + }, + { + "epoch": 0.45479554833062397, + "grad_norm": 104.0, + "learning_rate": 5.968871794454554e-05, + "loss": 8.3126, + "step": 10911 + }, + { + "epoch": 0.45483723062815223, + "grad_norm": 256.0, + "learning_rate": 5.9682095778805536e-05, + "loss": 11.8752, + "step": 10912 + }, + { + "epoch": 0.45487891292568045, + "grad_norm": 292.0, + "learning_rate": 5.967547343660849e-05, + "loss": 8.8755, + "step": 10913 + }, + { + "epoch": 0.4549205952232087, + "grad_norm": 240.0, + "learning_rate": 5.966885091807507e-05, + "loss": 13.1256, + "step": 10914 + }, + { + "epoch": 0.4549622775207369, + "grad_norm": 190.0, + "learning_rate": 5.9662228223325986e-05, + "loss": 11.5628, + "step": 10915 + }, + { + "epoch": 0.4550039598182652, + "grad_norm": 292.0, + "learning_rate": 5.965560535248194e-05, + "loss": 13.5647, + "step": 10916 + }, + { + "epoch": 0.4550456421157934, + "grad_norm": 272.0, + "learning_rate": 5.9648982305663625e-05, + "loss": 13.688, + "step": 10917 + }, + { + "epoch": 0.45508732441332167, + "grad_norm": 292.0, + "learning_rate": 5.964235908299175e-05, + "loss": 13.4377, + "step": 10918 + }, + { + "epoch": 0.4551290067108499, + "grad_norm": 312.0, + "learning_rate": 5.963573568458702e-05, + "loss": 14.1877, + "step": 10919 + }, + { + "epoch": 0.45517068900837815, + "grad_norm": 320.0, + "learning_rate": 5.9629112110570164e-05, + "loss": 14.1879, + "step": 10920 + }, + { + "epoch": 0.45521237130590636, + "grad_norm": 201.0, + "learning_rate": 5.962248836106187e-05, + "loss": 11.2502, + "step": 10921 + }, + { + "epoch": 0.4552540536034346, + "grad_norm": 334.0, + "learning_rate": 5.961586443618288e-05, + "loss": 14.2502, + "step": 10922 + }, + { + "epoch": 0.45529573590096284, + "grad_norm": 306.0, + "learning_rate": 5.9609240336053906e-05, + "loss": 12.813, + "step": 10923 + }, + { + "epoch": 0.4553374181984911, + "grad_norm": 356.0, + "learning_rate": 5.960261606079568e-05, + "loss": 13.8131, + "step": 10924 + }, + { + "epoch": 0.4553791004960193, + "grad_norm": 620.0, + "learning_rate": 5.9595991610528926e-05, + "loss": 19.3784, + "step": 10925 + }, + { + "epoch": 0.4554207827935476, + "grad_norm": 588.0, + "learning_rate": 5.958936698537436e-05, + "loss": 17.3755, + "step": 10926 + }, + { + "epoch": 0.4554624650910758, + "grad_norm": 420.0, + "learning_rate": 5.958274218545273e-05, + "loss": 16.0002, + "step": 10927 + }, + { + "epoch": 0.45550414738860406, + "grad_norm": 328.0, + "learning_rate": 5.9576117210884783e-05, + "loss": 10.563, + "step": 10928 + }, + { + "epoch": 0.4555458296861323, + "grad_norm": 138.0, + "learning_rate": 5.9569492061791254e-05, + "loss": 9.3772, + "step": 10929 + }, + { + "epoch": 0.45558751198366054, + "grad_norm": 600.0, + "learning_rate": 5.956286673829287e-05, + "loss": 20.626, + "step": 10930 + }, + { + "epoch": 0.45562919428118875, + "grad_norm": 208.0, + "learning_rate": 5.95562412405104e-05, + "loss": 11.1252, + "step": 10931 + }, + { + "epoch": 0.455670876578717, + "grad_norm": 748.0, + "learning_rate": 5.954961556856457e-05, + "loss": 20.1271, + "step": 10932 + }, + { + "epoch": 0.45571255887624523, + "grad_norm": 298.0, + "learning_rate": 5.954298972257616e-05, + "loss": 12.5637, + "step": 10933 + }, + { + "epoch": 0.4557542411737735, + "grad_norm": 544.0, + "learning_rate": 5.953636370266591e-05, + "loss": 20.1255, + "step": 10934 + }, + { + "epoch": 0.4557959234713017, + "grad_norm": 400.0, + "learning_rate": 5.952973750895459e-05, + "loss": 15.064, + "step": 10935 + }, + { + "epoch": 0.45583760576883, + "grad_norm": 204.0, + "learning_rate": 5.952311114156296e-05, + "loss": 8.065, + "step": 10936 + }, + { + "epoch": 0.4558792880663582, + "grad_norm": 382.0, + "learning_rate": 5.951648460061178e-05, + "loss": 15.0001, + "step": 10937 + }, + { + "epoch": 0.45592097036388646, + "grad_norm": 312.0, + "learning_rate": 5.950985788622182e-05, + "loss": 14.1254, + "step": 10938 + }, + { + "epoch": 0.4559626526614147, + "grad_norm": 136.0, + "learning_rate": 5.950323099851386e-05, + "loss": 9.9399, + "step": 10939 + }, + { + "epoch": 0.45600433495894294, + "grad_norm": 372.0, + "learning_rate": 5.949660393760868e-05, + "loss": 13.1261, + "step": 10940 + }, + { + "epoch": 0.4560460172564712, + "grad_norm": 436.0, + "learning_rate": 5.948997670362704e-05, + "loss": 16.0025, + "step": 10941 + }, + { + "epoch": 0.4560876995539994, + "grad_norm": 89.5, + "learning_rate": 5.948334929668973e-05, + "loss": 7.2815, + "step": 10942 + }, + { + "epoch": 0.4561293818515277, + "grad_norm": 191.0, + "learning_rate": 5.9476721716917536e-05, + "loss": 11.813, + "step": 10943 + }, + { + "epoch": 0.4561710641490559, + "grad_norm": 652.0, + "learning_rate": 5.947009396443124e-05, + "loss": 21.5005, + "step": 10944 + }, + { + "epoch": 0.45621274644658416, + "grad_norm": 1020.0, + "learning_rate": 5.946346603935166e-05, + "loss": 22.7541, + "step": 10945 + }, + { + "epoch": 0.4562544287441124, + "grad_norm": 868.0, + "learning_rate": 5.945683794179956e-05, + "loss": 19.254, + "step": 10946 + }, + { + "epoch": 0.45629611104164064, + "grad_norm": 207.0, + "learning_rate": 5.945020967189575e-05, + "loss": 11.2506, + "step": 10947 + }, + { + "epoch": 0.45633779333916885, + "grad_norm": 109.0, + "learning_rate": 5.944358122976104e-05, + "loss": 9.1254, + "step": 10948 + }, + { + "epoch": 0.4563794756366971, + "grad_norm": 181.0, + "learning_rate": 5.943695261551622e-05, + "loss": 10.813, + "step": 10949 + }, + { + "epoch": 0.45642115793422533, + "grad_norm": 478.0, + "learning_rate": 5.94303238292821e-05, + "loss": 17.1254, + "step": 10950 + }, + { + "epoch": 0.4564628402317536, + "grad_norm": 122.0, + "learning_rate": 5.942369487117948e-05, + "loss": 9.313, + "step": 10951 + }, + { + "epoch": 0.4565045225292818, + "grad_norm": 253.0, + "learning_rate": 5.9417065741329193e-05, + "loss": 11.0627, + "step": 10952 + }, + { + "epoch": 0.4565462048268101, + "grad_norm": 302.0, + "learning_rate": 5.941043643985205e-05, + "loss": 12.1877, + "step": 10953 + }, + { + "epoch": 0.4565878871243383, + "grad_norm": 616.0, + "learning_rate": 5.940380696686887e-05, + "loss": 20.0003, + "step": 10954 + }, + { + "epoch": 0.45662956942186655, + "grad_norm": 524.0, + "learning_rate": 5.939717732250046e-05, + "loss": 18.3751, + "step": 10955 + }, + { + "epoch": 0.45667125171939477, + "grad_norm": 109.0, + "learning_rate": 5.9390547506867675e-05, + "loss": 8.5628, + "step": 10956 + }, + { + "epoch": 0.45671293401692303, + "grad_norm": 964.0, + "learning_rate": 5.938391752009131e-05, + "loss": 22.0045, + "step": 10957 + }, + { + "epoch": 0.45675461631445124, + "grad_norm": 296.0, + "learning_rate": 5.937728736229222e-05, + "loss": 13.0011, + "step": 10958 + }, + { + "epoch": 0.4567962986119795, + "grad_norm": 352.0, + "learning_rate": 5.937065703359124e-05, + "loss": 14.6887, + "step": 10959 + }, + { + "epoch": 0.4568379809095077, + "grad_norm": 572.0, + "learning_rate": 5.936402653410921e-05, + "loss": 18.2548, + "step": 10960 + }, + { + "epoch": 0.456879663207036, + "grad_norm": 502.0, + "learning_rate": 5.935739586396696e-05, + "loss": 16.1256, + "step": 10961 + }, + { + "epoch": 0.4569213455045642, + "grad_norm": 536.0, + "learning_rate": 5.9350765023285334e-05, + "loss": 19.2505, + "step": 10962 + }, + { + "epoch": 0.45696302780209247, + "grad_norm": 478.0, + "learning_rate": 5.934413401218519e-05, + "loss": 17.6253, + "step": 10963 + }, + { + "epoch": 0.4570047100996207, + "grad_norm": 294.0, + "learning_rate": 5.933750283078738e-05, + "loss": 12.5628, + "step": 10964 + }, + { + "epoch": 0.45704639239714895, + "grad_norm": 276.0, + "learning_rate": 5.9330871479212744e-05, + "loss": 8.814, + "step": 10965 + }, + { + "epoch": 0.45708807469467716, + "grad_norm": 227.0, + "learning_rate": 5.932423995758215e-05, + "loss": 11.5008, + "step": 10966 + }, + { + "epoch": 0.4571297569922054, + "grad_norm": 428.0, + "learning_rate": 5.9317608266016455e-05, + "loss": 16.1251, + "step": 10967 + }, + { + "epoch": 0.45717143928973364, + "grad_norm": 664.0, + "learning_rate": 5.931097640463652e-05, + "loss": 19.2502, + "step": 10968 + }, + { + "epoch": 0.4572131215872619, + "grad_norm": 454.0, + "learning_rate": 5.93043443735632e-05, + "loss": 14.1876, + "step": 10969 + }, + { + "epoch": 0.4572548038847901, + "grad_norm": 260.0, + "learning_rate": 5.92977121729174e-05, + "loss": 11.3128, + "step": 10970 + }, + { + "epoch": 0.4572964861823184, + "grad_norm": 386.0, + "learning_rate": 5.929107980281996e-05, + "loss": 13.8128, + "step": 10971 + }, + { + "epoch": 0.4573381684798466, + "grad_norm": 195.0, + "learning_rate": 5.928444726339177e-05, + "loss": 11.5028, + "step": 10972 + }, + { + "epoch": 0.45737985077737486, + "grad_norm": 190.0, + "learning_rate": 5.927781455475371e-05, + "loss": 11.8755, + "step": 10973 + }, + { + "epoch": 0.4574215330749031, + "grad_norm": 229.0, + "learning_rate": 5.927118167702664e-05, + "loss": 11.5018, + "step": 10974 + }, + { + "epoch": 0.45746321537243134, + "grad_norm": 772.0, + "learning_rate": 5.926454863033149e-05, + "loss": 21.2505, + "step": 10975 + }, + { + "epoch": 0.45750489766995955, + "grad_norm": 366.0, + "learning_rate": 5.925791541478909e-05, + "loss": 14.6878, + "step": 10976 + }, + { + "epoch": 0.4575465799674878, + "grad_norm": 280.0, + "learning_rate": 5.925128203052037e-05, + "loss": 12.2504, + "step": 10977 + }, + { + "epoch": 0.45758826226501603, + "grad_norm": 360.0, + "learning_rate": 5.924464847764621e-05, + "loss": 12.6256, + "step": 10978 + }, + { + "epoch": 0.4576299445625443, + "grad_norm": 492.0, + "learning_rate": 5.923801475628752e-05, + "loss": 16.3754, + "step": 10979 + }, + { + "epoch": 0.4576716268600725, + "grad_norm": 684.0, + "learning_rate": 5.923138086656518e-05, + "loss": 19.753, + "step": 10980 + }, + { + "epoch": 0.4577133091576008, + "grad_norm": 253.0, + "learning_rate": 5.922474680860011e-05, + "loss": 10.9376, + "step": 10981 + }, + { + "epoch": 0.457754991455129, + "grad_norm": 524.0, + "learning_rate": 5.92181125825132e-05, + "loss": 16.7502, + "step": 10982 + }, + { + "epoch": 0.45779667375265726, + "grad_norm": 438.0, + "learning_rate": 5.921147818842537e-05, + "loss": 15.0002, + "step": 10983 + }, + { + "epoch": 0.45783835605018547, + "grad_norm": 298.0, + "learning_rate": 5.920484362645755e-05, + "loss": 14.1255, + "step": 10984 + }, + { + "epoch": 0.45788003834771374, + "grad_norm": 454.0, + "learning_rate": 5.9198208896730634e-05, + "loss": 13.4401, + "step": 10985 + }, + { + "epoch": 0.45792172064524195, + "grad_norm": 346.0, + "learning_rate": 5.919157399936554e-05, + "loss": 11.8764, + "step": 10986 + }, + { + "epoch": 0.4579634029427702, + "grad_norm": 284.0, + "learning_rate": 5.918493893448319e-05, + "loss": 13.8127, + "step": 10987 + }, + { + "epoch": 0.4580050852402984, + "grad_norm": 494.0, + "learning_rate": 5.917830370220452e-05, + "loss": 17.3752, + "step": 10988 + }, + { + "epoch": 0.4580467675378267, + "grad_norm": 490.0, + "learning_rate": 5.917166830265044e-05, + "loss": 17.2503, + "step": 10989 + }, + { + "epoch": 0.4580884498353549, + "grad_norm": 748.0, + "learning_rate": 5.9165032735941894e-05, + "loss": 20.6263, + "step": 10990 + }, + { + "epoch": 0.45813013213288317, + "grad_norm": 330.0, + "learning_rate": 5.915839700219982e-05, + "loss": 14.0004, + "step": 10991 + }, + { + "epoch": 0.4581718144304114, + "grad_norm": 494.0, + "learning_rate": 5.915176110154515e-05, + "loss": 17.7501, + "step": 10992 + }, + { + "epoch": 0.45821349672793965, + "grad_norm": 162.0, + "learning_rate": 5.9145125034098815e-05, + "loss": 9.8128, + "step": 10993 + }, + { + "epoch": 0.45825517902546786, + "grad_norm": 237.0, + "learning_rate": 5.913848879998176e-05, + "loss": 12.7504, + "step": 10994 + }, + { + "epoch": 0.45829686132299613, + "grad_norm": 332.0, + "learning_rate": 5.913185239931494e-05, + "loss": 14.4377, + "step": 10995 + }, + { + "epoch": 0.45833854362052434, + "grad_norm": 458.0, + "learning_rate": 5.912521583221929e-05, + "loss": 17.5006, + "step": 10996 + }, + { + "epoch": 0.4583802259180526, + "grad_norm": 784.0, + "learning_rate": 5.911857909881579e-05, + "loss": 23.2503, + "step": 10997 + }, + { + "epoch": 0.4584219082155808, + "grad_norm": 292.0, + "learning_rate": 5.9111942199225355e-05, + "loss": 12.6881, + "step": 10998 + }, + { + "epoch": 0.4584635905131091, + "grad_norm": 396.0, + "learning_rate": 5.9105305133568976e-05, + "loss": 14.8754, + "step": 10999 + }, + { + "epoch": 0.4585052728106373, + "grad_norm": 474.0, + "learning_rate": 5.909866790196761e-05, + "loss": 15.9385, + "step": 11000 + }, + { + "epoch": 0.45854695510816557, + "grad_norm": 276.0, + "learning_rate": 5.909203050454221e-05, + "loss": 10.6253, + "step": 11001 + }, + { + "epoch": 0.4585886374056938, + "grad_norm": 556.0, + "learning_rate": 5.908539294141374e-05, + "loss": 18.8755, + "step": 11002 + }, + { + "epoch": 0.45863031970322204, + "grad_norm": 302.0, + "learning_rate": 5.9078755212703185e-05, + "loss": 13.8754, + "step": 11003 + }, + { + "epoch": 0.45867200200075026, + "grad_norm": 378.0, + "learning_rate": 5.90721173185315e-05, + "loss": 13.1877, + "step": 11004 + }, + { + "epoch": 0.4587136842982785, + "grad_norm": 226.0, + "learning_rate": 5.906547925901968e-05, + "loss": 12.876, + "step": 11005 + }, + { + "epoch": 0.45875536659580674, + "grad_norm": 174.0, + "learning_rate": 5.905884103428869e-05, + "loss": 9.0627, + "step": 11006 + }, + { + "epoch": 0.458797048893335, + "grad_norm": 684.0, + "learning_rate": 5.905220264445952e-05, + "loss": 19.2505, + "step": 11007 + }, + { + "epoch": 0.4588387311908632, + "grad_norm": 260.0, + "learning_rate": 5.904556408965315e-05, + "loss": 12.1254, + "step": 11008 + }, + { + "epoch": 0.4588804134883915, + "grad_norm": 286.0, + "learning_rate": 5.903892536999058e-05, + "loss": 11.0016, + "step": 11009 + }, + { + "epoch": 0.4589220957859197, + "grad_norm": 221.0, + "learning_rate": 5.903228648559279e-05, + "loss": 11.5629, + "step": 11010 + }, + { + "epoch": 0.45896377808344796, + "grad_norm": 804.0, + "learning_rate": 5.902564743658078e-05, + "loss": 20.3753, + "step": 11011 + }, + { + "epoch": 0.4590054603809762, + "grad_norm": 172.0, + "learning_rate": 5.901900822307553e-05, + "loss": 10.2503, + "step": 11012 + }, + { + "epoch": 0.45904714267850444, + "grad_norm": 516.0, + "learning_rate": 5.9012368845198074e-05, + "loss": 16.8753, + "step": 11013 + }, + { + "epoch": 0.4590888249760327, + "grad_norm": 584.0, + "learning_rate": 5.900572930306938e-05, + "loss": 17.8757, + "step": 11014 + }, + { + "epoch": 0.4591305072735609, + "grad_norm": 194.0, + "learning_rate": 5.8999089596810476e-05, + "loss": 10.563, + "step": 11015 + }, + { + "epoch": 0.4591721895710892, + "grad_norm": 260.0, + "learning_rate": 5.899244972654236e-05, + "loss": 12.314, + "step": 11016 + }, + { + "epoch": 0.4592138718686174, + "grad_norm": 740.0, + "learning_rate": 5.898580969238606e-05, + "loss": 21.0003, + "step": 11017 + }, + { + "epoch": 0.45925555416614566, + "grad_norm": 241.0, + "learning_rate": 5.8979169494462586e-05, + "loss": 13.0003, + "step": 11018 + }, + { + "epoch": 0.4592972364636739, + "grad_norm": 194.0, + "learning_rate": 5.897252913289294e-05, + "loss": 11.0006, + "step": 11019 + }, + { + "epoch": 0.45933891876120214, + "grad_norm": 110.5, + "learning_rate": 5.896588860779814e-05, + "loss": 8.8753, + "step": 11020 + }, + { + "epoch": 0.45938060105873035, + "grad_norm": 792.0, + "learning_rate": 5.895924791929924e-05, + "loss": 21.6251, + "step": 11021 + }, + { + "epoch": 0.4594222833562586, + "grad_norm": 760.0, + "learning_rate": 5.895260706751725e-05, + "loss": 23.0003, + "step": 11022 + }, + { + "epoch": 0.45946396565378683, + "grad_norm": 432.0, + "learning_rate": 5.8945966052573195e-05, + "loss": 16.7503, + "step": 11023 + }, + { + "epoch": 0.4595056479513151, + "grad_norm": 98.5, + "learning_rate": 5.8939324874588134e-05, + "loss": 9.3133, + "step": 11024 + }, + { + "epoch": 0.4595473302488433, + "grad_norm": 320.0, + "learning_rate": 5.893268353368306e-05, + "loss": 11.9377, + "step": 11025 + }, + { + "epoch": 0.4595890125463716, + "grad_norm": 290.0, + "learning_rate": 5.892604202997906e-05, + "loss": 13.126, + "step": 11026 + }, + { + "epoch": 0.4596306948438998, + "grad_norm": 684.0, + "learning_rate": 5.891940036359713e-05, + "loss": 22.0009, + "step": 11027 + }, + { + "epoch": 0.45967237714142806, + "grad_norm": 126.5, + "learning_rate": 5.891275853465834e-05, + "loss": 7.9065, + "step": 11028 + }, + { + "epoch": 0.45971405943895627, + "grad_norm": 676.0, + "learning_rate": 5.890611654328375e-05, + "loss": 21.5033, + "step": 11029 + }, + { + "epoch": 0.45975574173648454, + "grad_norm": 268.0, + "learning_rate": 5.889947438959438e-05, + "loss": 13.8759, + "step": 11030 + }, + { + "epoch": 0.45979742403401275, + "grad_norm": 580.0, + "learning_rate": 5.88928320737113e-05, + "loss": 18.0002, + "step": 11031 + }, + { + "epoch": 0.459839106331541, + "grad_norm": 528.0, + "learning_rate": 5.888618959575556e-05, + "loss": 17.2513, + "step": 11032 + }, + { + "epoch": 0.4598807886290692, + "grad_norm": 258.0, + "learning_rate": 5.8879546955848245e-05, + "loss": 11.8127, + "step": 11033 + }, + { + "epoch": 0.4599224709265975, + "grad_norm": 282.0, + "learning_rate": 5.887290415411039e-05, + "loss": 13.8127, + "step": 11034 + }, + { + "epoch": 0.4599641532241257, + "grad_norm": 328.0, + "learning_rate": 5.886626119066307e-05, + "loss": 13.8756, + "step": 11035 + }, + { + "epoch": 0.46000583552165397, + "grad_norm": 544.0, + "learning_rate": 5.8859618065627344e-05, + "loss": 16.8753, + "step": 11036 + }, + { + "epoch": 0.4600475178191822, + "grad_norm": 282.0, + "learning_rate": 5.8852974779124306e-05, + "loss": 13.6255, + "step": 11037 + }, + { + "epoch": 0.46008920011671045, + "grad_norm": 304.0, + "learning_rate": 5.8846331331275e-05, + "loss": 12.8753, + "step": 11038 + }, + { + "epoch": 0.46013088241423866, + "grad_norm": 40.0, + "learning_rate": 5.8839687722200534e-05, + "loss": 5.4069, + "step": 11039 + }, + { + "epoch": 0.46017256471176693, + "grad_norm": 812.0, + "learning_rate": 5.883304395202197e-05, + "loss": 24.1262, + "step": 11040 + }, + { + "epoch": 0.46021424700929514, + "grad_norm": 290.0, + "learning_rate": 5.882640002086039e-05, + "loss": 13.3126, + "step": 11041 + }, + { + "epoch": 0.4602559293068234, + "grad_norm": 260.0, + "learning_rate": 5.881975592883691e-05, + "loss": 13.0004, + "step": 11042 + }, + { + "epoch": 0.4602976116043516, + "grad_norm": 664.0, + "learning_rate": 5.8813111676072565e-05, + "loss": 22.126, + "step": 11043 + }, + { + "epoch": 0.4603392939018799, + "grad_norm": 1120.0, + "learning_rate": 5.8806467262688495e-05, + "loss": 25.2552, + "step": 11044 + }, + { + "epoch": 0.4603809761994081, + "grad_norm": 354.0, + "learning_rate": 5.879982268880576e-05, + "loss": 12.8152, + "step": 11045 + }, + { + "epoch": 0.46042265849693637, + "grad_norm": 120.0, + "learning_rate": 5.8793177954545486e-05, + "loss": 9.4378, + "step": 11046 + }, + { + "epoch": 0.4604643407944646, + "grad_norm": 556.0, + "learning_rate": 5.878653306002877e-05, + "loss": 17.3757, + "step": 11047 + }, + { + "epoch": 0.46050602309199284, + "grad_norm": 229.0, + "learning_rate": 5.8779888005376704e-05, + "loss": 11.7503, + "step": 11048 + }, + { + "epoch": 0.46054770538952106, + "grad_norm": 1120.0, + "learning_rate": 5.877324279071039e-05, + "loss": 25.8793, + "step": 11049 + }, + { + "epoch": 0.4605893876870493, + "grad_norm": 604.0, + "learning_rate": 5.876659741615096e-05, + "loss": 18.3756, + "step": 11050 + }, + { + "epoch": 0.46063106998457753, + "grad_norm": 332.0, + "learning_rate": 5.875995188181952e-05, + "loss": 13.563, + "step": 11051 + }, + { + "epoch": 0.4606727522821058, + "grad_norm": 408.0, + "learning_rate": 5.875330618783717e-05, + "loss": 14.2509, + "step": 11052 + }, + { + "epoch": 0.460714434579634, + "grad_norm": 1488.0, + "learning_rate": 5.874666033432503e-05, + "loss": 33.0041, + "step": 11053 + }, + { + "epoch": 0.4607561168771623, + "grad_norm": 202.0, + "learning_rate": 5.8740014321404234e-05, + "loss": 12.3127, + "step": 11054 + }, + { + "epoch": 0.4607977991746905, + "grad_norm": 334.0, + "learning_rate": 5.873336814919591e-05, + "loss": 12.8129, + "step": 11055 + }, + { + "epoch": 0.46083948147221876, + "grad_norm": 334.0, + "learning_rate": 5.872672181782117e-05, + "loss": 14.3128, + "step": 11056 + }, + { + "epoch": 0.46088116376974697, + "grad_norm": 1488.0, + "learning_rate": 5.8720075327401137e-05, + "loss": 29.0047, + "step": 11057 + }, + { + "epoch": 0.46092284606727524, + "grad_norm": 492.0, + "learning_rate": 5.871342867805698e-05, + "loss": 17.2507, + "step": 11058 + }, + { + "epoch": 0.46096452836480345, + "grad_norm": 298.0, + "learning_rate": 5.87067818699098e-05, + "loss": 12.813, + "step": 11059 + }, + { + "epoch": 0.4610062106623317, + "grad_norm": 292.0, + "learning_rate": 5.870013490308075e-05, + "loss": 12.4386, + "step": 11060 + }, + { + "epoch": 0.46104789295985993, + "grad_norm": 250.0, + "learning_rate": 5.869348777769097e-05, + "loss": 11.8771, + "step": 11061 + }, + { + "epoch": 0.4610895752573882, + "grad_norm": 576.0, + "learning_rate": 5.8686840493861596e-05, + "loss": 17.3752, + "step": 11062 + }, + { + "epoch": 0.4611312575549164, + "grad_norm": 356.0, + "learning_rate": 5.8680193051713796e-05, + "loss": 14.0627, + "step": 11063 + }, + { + "epoch": 0.4611729398524447, + "grad_norm": 177.0, + "learning_rate": 5.8673545451368695e-05, + "loss": 11.8142, + "step": 11064 + }, + { + "epoch": 0.4612146221499729, + "grad_norm": 314.0, + "learning_rate": 5.866689769294747e-05, + "loss": 13.8755, + "step": 11065 + }, + { + "epoch": 0.46125630444750115, + "grad_norm": 210.0, + "learning_rate": 5.866024977657125e-05, + "loss": 11.1257, + "step": 11066 + }, + { + "epoch": 0.46129798674502936, + "grad_norm": 568.0, + "learning_rate": 5.8653601702361224e-05, + "loss": 18.7502, + "step": 11067 + }, + { + "epoch": 0.46133966904255763, + "grad_norm": 572.0, + "learning_rate": 5.864695347043853e-05, + "loss": 18.0006, + "step": 11068 + }, + { + "epoch": 0.46138135134008584, + "grad_norm": 294.0, + "learning_rate": 5.864030508092434e-05, + "loss": 12.4383, + "step": 11069 + }, + { + "epoch": 0.4614230336376141, + "grad_norm": 166.0, + "learning_rate": 5.8633656533939816e-05, + "loss": 10.5629, + "step": 11070 + }, + { + "epoch": 0.4614647159351423, + "grad_norm": 306.0, + "learning_rate": 5.862700782960615e-05, + "loss": 13.6908, + "step": 11071 + }, + { + "epoch": 0.4615063982326706, + "grad_norm": 520.0, + "learning_rate": 5.8620358968044496e-05, + "loss": 16.376, + "step": 11072 + }, + { + "epoch": 0.4615480805301988, + "grad_norm": 844.0, + "learning_rate": 5.861370994937604e-05, + "loss": 20.3798, + "step": 11073 + }, + { + "epoch": 0.46158976282772707, + "grad_norm": 221.0, + "learning_rate": 5.860706077372194e-05, + "loss": 11.1253, + "step": 11074 + }, + { + "epoch": 0.4616314451252553, + "grad_norm": 262.0, + "learning_rate": 5.860041144120341e-05, + "loss": 12.4379, + "step": 11075 + }, + { + "epoch": 0.46167312742278355, + "grad_norm": 296.0, + "learning_rate": 5.859376195194162e-05, + "loss": 13.5629, + "step": 11076 + }, + { + "epoch": 0.46171480972031176, + "grad_norm": 157.0, + "learning_rate": 5.858711230605774e-05, + "loss": 8.7506, + "step": 11077 + }, + { + "epoch": 0.46175649201784, + "grad_norm": 354.0, + "learning_rate": 5.8580462503672986e-05, + "loss": 14.688, + "step": 11078 + }, + { + "epoch": 0.46179817431536824, + "grad_norm": 584.0, + "learning_rate": 5.857381254490854e-05, + "loss": 19.3752, + "step": 11079 + }, + { + "epoch": 0.4618398566128965, + "grad_norm": 648.0, + "learning_rate": 5.856716242988559e-05, + "loss": 19.1252, + "step": 11080 + }, + { + "epoch": 0.4618815389104247, + "grad_norm": 348.0, + "learning_rate": 5.856051215872536e-05, + "loss": 14.4378, + "step": 11081 + }, + { + "epoch": 0.461923221207953, + "grad_norm": 470.0, + "learning_rate": 5.855386173154902e-05, + "loss": 17.0003, + "step": 11082 + }, + { + "epoch": 0.4619649035054812, + "grad_norm": 498.0, + "learning_rate": 5.8547211148477785e-05, + "loss": 17.0001, + "step": 11083 + }, + { + "epoch": 0.46200658580300946, + "grad_norm": 512.0, + "learning_rate": 5.854056040963288e-05, + "loss": 18.1252, + "step": 11084 + }, + { + "epoch": 0.46204826810053773, + "grad_norm": 784.0, + "learning_rate": 5.853390951513551e-05, + "loss": 22.2502, + "step": 11085 + }, + { + "epoch": 0.46208995039806594, + "grad_norm": 362.0, + "learning_rate": 5.8527258465106885e-05, + "loss": 13.8752, + "step": 11086 + }, + { + "epoch": 0.4621316326955942, + "grad_norm": 320.0, + "learning_rate": 5.8520607259668205e-05, + "loss": 12.6879, + "step": 11087 + }, + { + "epoch": 0.4621733149931224, + "grad_norm": 332.0, + "learning_rate": 5.85139558989407e-05, + "loss": 14.5012, + "step": 11088 + }, + { + "epoch": 0.4622149972906507, + "grad_norm": 382.0, + "learning_rate": 5.8507304383045604e-05, + "loss": 14.3757, + "step": 11089 + }, + { + "epoch": 0.4622566795881789, + "grad_norm": 716.0, + "learning_rate": 5.850065271210412e-05, + "loss": 19.7501, + "step": 11090 + }, + { + "epoch": 0.46229836188570717, + "grad_norm": 1584.0, + "learning_rate": 5.849400088623749e-05, + "loss": 33.2553, + "step": 11091 + }, + { + "epoch": 0.4623400441832354, + "grad_norm": 146.0, + "learning_rate": 5.848734890556694e-05, + "loss": 9.2519, + "step": 11092 + }, + { + "epoch": 0.46238172648076364, + "grad_norm": 48.75, + "learning_rate": 5.8480696770213706e-05, + "loss": 7.7827, + "step": 11093 + }, + { + "epoch": 0.46242340877829186, + "grad_norm": 164.0, + "learning_rate": 5.847404448029902e-05, + "loss": 10.0004, + "step": 11094 + }, + { + "epoch": 0.4624650910758201, + "grad_norm": 498.0, + "learning_rate": 5.84673920359441e-05, + "loss": 17.3753, + "step": 11095 + }, + { + "epoch": 0.46250677337334833, + "grad_norm": 488.0, + "learning_rate": 5.846073943727023e-05, + "loss": 15.1888, + "step": 11096 + }, + { + "epoch": 0.4625484556708766, + "grad_norm": 1144.0, + "learning_rate": 5.8454086684398625e-05, + "loss": 30.8753, + "step": 11097 + }, + { + "epoch": 0.4625901379684048, + "grad_norm": 988.0, + "learning_rate": 5.844743377745054e-05, + "loss": 23.6298, + "step": 11098 + }, + { + "epoch": 0.4626318202659331, + "grad_norm": 426.0, + "learning_rate": 5.844078071654724e-05, + "loss": 15.4377, + "step": 11099 + }, + { + "epoch": 0.4626735025634613, + "grad_norm": 536.0, + "learning_rate": 5.843412750180994e-05, + "loss": 16.5006, + "step": 11100 + }, + { + "epoch": 0.46271518486098956, + "grad_norm": 95.5, + "learning_rate": 5.842747413335994e-05, + "loss": 9.3755, + "step": 11101 + }, + { + "epoch": 0.46275686715851777, + "grad_norm": 616.0, + "learning_rate": 5.842082061131846e-05, + "loss": 16.7505, + "step": 11102 + }, + { + "epoch": 0.46279854945604604, + "grad_norm": 520.0, + "learning_rate": 5.841416693580678e-05, + "loss": 17.6253, + "step": 11103 + }, + { + "epoch": 0.46284023175357425, + "grad_norm": 968.0, + "learning_rate": 5.8407513106946165e-05, + "loss": 23.7506, + "step": 11104 + }, + { + "epoch": 0.4628819140511025, + "grad_norm": 137.0, + "learning_rate": 5.8400859124857874e-05, + "loss": 9.9386, + "step": 11105 + }, + { + "epoch": 0.46292359634863073, + "grad_norm": 157.0, + "learning_rate": 5.839420498966318e-05, + "loss": 11.2517, + "step": 11106 + }, + { + "epoch": 0.462965278646159, + "grad_norm": 664.0, + "learning_rate": 5.838755070148335e-05, + "loss": 19.3755, + "step": 11107 + }, + { + "epoch": 0.4630069609436872, + "grad_norm": 768.0, + "learning_rate": 5.838089626043966e-05, + "loss": 20.8752, + "step": 11108 + }, + { + "epoch": 0.4630486432412155, + "grad_norm": 256.0, + "learning_rate": 5.83742416666534e-05, + "loss": 11.8132, + "step": 11109 + }, + { + "epoch": 0.4630903255387437, + "grad_norm": 229.0, + "learning_rate": 5.836758692024584e-05, + "loss": 10.7504, + "step": 11110 + }, + { + "epoch": 0.46313200783627195, + "grad_norm": 418.0, + "learning_rate": 5.8360932021338264e-05, + "loss": 15.3759, + "step": 11111 + }, + { + "epoch": 0.46317369013380016, + "grad_norm": 536.0, + "learning_rate": 5.8354276970051966e-05, + "loss": 17.3753, + "step": 11112 + }, + { + "epoch": 0.46321537243132843, + "grad_norm": 146.0, + "learning_rate": 5.834762176650823e-05, + "loss": 10.3752, + "step": 11113 + }, + { + "epoch": 0.46325705472885664, + "grad_norm": 71.5, + "learning_rate": 5.834096641082834e-05, + "loss": 8.563, + "step": 11114 + }, + { + "epoch": 0.4632987370263849, + "grad_norm": 476.0, + "learning_rate": 5.83343109031336e-05, + "loss": 15.3131, + "step": 11115 + }, + { + "epoch": 0.4633404193239131, + "grad_norm": 884.0, + "learning_rate": 5.832765524354531e-05, + "loss": 21.7545, + "step": 11116 + }, + { + "epoch": 0.4633821016214414, + "grad_norm": 404.0, + "learning_rate": 5.8320999432184755e-05, + "loss": 16.0004, + "step": 11117 + }, + { + "epoch": 0.4634237839189696, + "grad_norm": 348.0, + "learning_rate": 5.8314343469173246e-05, + "loss": 13.5628, + "step": 11118 + }, + { + "epoch": 0.46346546621649787, + "grad_norm": 215.0, + "learning_rate": 5.830768735463209e-05, + "loss": 12.1882, + "step": 11119 + }, + { + "epoch": 0.4635071485140261, + "grad_norm": 600.0, + "learning_rate": 5.830103108868259e-05, + "loss": 18.3759, + "step": 11120 + }, + { + "epoch": 0.46354883081155435, + "grad_norm": 334.0, + "learning_rate": 5.8294374671446064e-05, + "loss": 14.5627, + "step": 11121 + }, + { + "epoch": 0.46359051310908256, + "grad_norm": 138.0, + "learning_rate": 5.828771810304383e-05, + "loss": 10.0012, + "step": 11122 + }, + { + "epoch": 0.4636321954066108, + "grad_norm": 154.0, + "learning_rate": 5.828106138359719e-05, + "loss": 9.9381, + "step": 11123 + }, + { + "epoch": 0.46367387770413904, + "grad_norm": 112.0, + "learning_rate": 5.827440451322748e-05, + "loss": 9.8754, + "step": 11124 + }, + { + "epoch": 0.4637155600016673, + "grad_norm": 462.0, + "learning_rate": 5.8267747492056015e-05, + "loss": 17.5004, + "step": 11125 + }, + { + "epoch": 0.4637572422991955, + "grad_norm": 358.0, + "learning_rate": 5.8261090320204105e-05, + "loss": 14.0004, + "step": 11126 + }, + { + "epoch": 0.4637989245967238, + "grad_norm": 548.0, + "learning_rate": 5.82544329977931e-05, + "loss": 16.3758, + "step": 11127 + }, + { + "epoch": 0.463840606894252, + "grad_norm": 92.5, + "learning_rate": 5.824777552494431e-05, + "loss": 8.4383, + "step": 11128 + }, + { + "epoch": 0.46388228919178026, + "grad_norm": 688.0, + "learning_rate": 5.82411179017791e-05, + "loss": 17.8795, + "step": 11129 + }, + { + "epoch": 0.4639239714893085, + "grad_norm": 330.0, + "learning_rate": 5.8234460128418764e-05, + "loss": 12.6252, + "step": 11130 + }, + { + "epoch": 0.46396565378683674, + "grad_norm": 154.0, + "learning_rate": 5.8227802204984674e-05, + "loss": 10.6252, + "step": 11131 + }, + { + "epoch": 0.46400733608436495, + "grad_norm": 274.0, + "learning_rate": 5.822114413159815e-05, + "loss": 12.2504, + "step": 11132 + }, + { + "epoch": 0.4640490183818932, + "grad_norm": 127.5, + "learning_rate": 5.8214485908380544e-05, + "loss": 9.3127, + "step": 11133 + }, + { + "epoch": 0.46409070067942143, + "grad_norm": 240.0, + "learning_rate": 5.8207827535453195e-05, + "loss": 10.8135, + "step": 11134 + }, + { + "epoch": 0.4641323829769497, + "grad_norm": 58.0, + "learning_rate": 5.820116901293748e-05, + "loss": 6.6877, + "step": 11135 + }, + { + "epoch": 0.4641740652744779, + "grad_norm": 253.0, + "learning_rate": 5.819451034095472e-05, + "loss": 11.3132, + "step": 11136 + }, + { + "epoch": 0.4642157475720062, + "grad_norm": 195.0, + "learning_rate": 5.818785151962629e-05, + "loss": 10.8752, + "step": 11137 + }, + { + "epoch": 0.4642574298695344, + "grad_norm": 101.5, + "learning_rate": 5.818119254907354e-05, + "loss": 8.2502, + "step": 11138 + }, + { + "epoch": 0.46429911216706266, + "grad_norm": 480.0, + "learning_rate": 5.817453342941782e-05, + "loss": 15.6279, + "step": 11139 + }, + { + "epoch": 0.46434079446459087, + "grad_norm": 372.0, + "learning_rate": 5.816787416078051e-05, + "loss": 14.8753, + "step": 11140 + }, + { + "epoch": 0.46438247676211913, + "grad_norm": 1168.0, + "learning_rate": 5.8161214743282964e-05, + "loss": 22.2543, + "step": 11141 + }, + { + "epoch": 0.46442415905964735, + "grad_norm": 540.0, + "learning_rate": 5.815455517704655e-05, + "loss": 16.3753, + "step": 11142 + }, + { + "epoch": 0.4644658413571756, + "grad_norm": 174.0, + "learning_rate": 5.814789546219266e-05, + "loss": 9.5002, + "step": 11143 + }, + { + "epoch": 0.4645075236547038, + "grad_norm": 326.0, + "learning_rate": 5.814123559884264e-05, + "loss": 8.3754, + "step": 11144 + }, + { + "epoch": 0.4645492059522321, + "grad_norm": 552.0, + "learning_rate": 5.813457558711788e-05, + "loss": 18.5005, + "step": 11145 + }, + { + "epoch": 0.4645908882497603, + "grad_norm": 254.0, + "learning_rate": 5.8127915427139746e-05, + "loss": 13.0001, + "step": 11146 + }, + { + "epoch": 0.46463257054728857, + "grad_norm": 215.0, + "learning_rate": 5.812125511902965e-05, + "loss": 9.9388, + "step": 11147 + }, + { + "epoch": 0.4646742528448168, + "grad_norm": 340.0, + "learning_rate": 5.811459466290895e-05, + "loss": 14.8753, + "step": 11148 + }, + { + "epoch": 0.46471593514234505, + "grad_norm": 284.0, + "learning_rate": 5.810793405889905e-05, + "loss": 12.0008, + "step": 11149 + }, + { + "epoch": 0.46475761743987326, + "grad_norm": 135.0, + "learning_rate": 5.810127330712132e-05, + "loss": 9.5005, + "step": 11150 + }, + { + "epoch": 0.46479929973740153, + "grad_norm": 284.0, + "learning_rate": 5.809461240769718e-05, + "loss": 13.3128, + "step": 11151 + }, + { + "epoch": 0.46484098203492974, + "grad_norm": 153.0, + "learning_rate": 5.8087951360747994e-05, + "loss": 10.7505, + "step": 11152 + }, + { + "epoch": 0.464882664332458, + "grad_norm": 520.0, + "learning_rate": 5.8081290166395186e-05, + "loss": 17.1251, + "step": 11153 + }, + { + "epoch": 0.4649243466299862, + "grad_norm": 348.0, + "learning_rate": 5.8074628824760146e-05, + "loss": 13.3127, + "step": 11154 + }, + { + "epoch": 0.4649660289275145, + "grad_norm": 448.0, + "learning_rate": 5.806796733596428e-05, + "loss": 17.0008, + "step": 11155 + }, + { + "epoch": 0.4650077112250427, + "grad_norm": 458.0, + "learning_rate": 5.806130570012899e-05, + "loss": 16.0002, + "step": 11156 + }, + { + "epoch": 0.46504939352257096, + "grad_norm": 648.0, + "learning_rate": 5.8054643917375695e-05, + "loss": 20.2504, + "step": 11157 + }, + { + "epoch": 0.46509107582009923, + "grad_norm": 560.0, + "learning_rate": 5.8047981987825787e-05, + "loss": 19.0002, + "step": 11158 + }, + { + "epoch": 0.46513275811762744, + "grad_norm": 288.0, + "learning_rate": 5.804131991160069e-05, + "loss": 12.1252, + "step": 11159 + }, + { + "epoch": 0.4651744404151557, + "grad_norm": 440.0, + "learning_rate": 5.8034657688821834e-05, + "loss": 14.0005, + "step": 11160 + }, + { + "epoch": 0.4652161227126839, + "grad_norm": 564.0, + "learning_rate": 5.802799531961063e-05, + "loss": 18.8753, + "step": 11161 + }, + { + "epoch": 0.4652578050102122, + "grad_norm": 95.0, + "learning_rate": 5.80213328040885e-05, + "loss": 8.2505, + "step": 11162 + }, + { + "epoch": 0.4652994873077404, + "grad_norm": 472.0, + "learning_rate": 5.801467014237686e-05, + "loss": 15.3127, + "step": 11163 + }, + { + "epoch": 0.46534116960526867, + "grad_norm": 316.0, + "learning_rate": 5.800800733459715e-05, + "loss": 14.1283, + "step": 11164 + }, + { + "epoch": 0.4653828519027969, + "grad_norm": 680.0, + "learning_rate": 5.8001344380870794e-05, + "loss": 17.1255, + "step": 11165 + }, + { + "epoch": 0.46542453420032515, + "grad_norm": 173.0, + "learning_rate": 5.799468128131923e-05, + "loss": 10.7501, + "step": 11166 + }, + { + "epoch": 0.46546621649785336, + "grad_norm": 398.0, + "learning_rate": 5.798801803606388e-05, + "loss": 14.9381, + "step": 11167 + }, + { + "epoch": 0.4655078987953816, + "grad_norm": 1496.0, + "learning_rate": 5.7981354645226203e-05, + "loss": 37.2504, + "step": 11168 + }, + { + "epoch": 0.46554958109290984, + "grad_norm": 324.0, + "learning_rate": 5.797469110892764e-05, + "loss": 14.3754, + "step": 11169 + }, + { + "epoch": 0.4655912633904381, + "grad_norm": 420.0, + "learning_rate": 5.79680274272896e-05, + "loss": 16.2503, + "step": 11170 + }, + { + "epoch": 0.4656329456879663, + "grad_norm": 294.0, + "learning_rate": 5.796136360043355e-05, + "loss": 14.2503, + "step": 11171 + }, + { + "epoch": 0.4656746279854946, + "grad_norm": 322.0, + "learning_rate": 5.795469962848096e-05, + "loss": 13.0002, + "step": 11172 + }, + { + "epoch": 0.4657163102830228, + "grad_norm": 356.0, + "learning_rate": 5.7948035511553254e-05, + "loss": 14.5628, + "step": 11173 + }, + { + "epoch": 0.46575799258055106, + "grad_norm": 222.0, + "learning_rate": 5.794137124977189e-05, + "loss": 13.3151, + "step": 11174 + }, + { + "epoch": 0.4657996748780793, + "grad_norm": 238.0, + "learning_rate": 5.793470684325835e-05, + "loss": 12.0003, + "step": 11175 + }, + { + "epoch": 0.46584135717560754, + "grad_norm": 127.0, + "learning_rate": 5.7928042292134054e-05, + "loss": 11.688, + "step": 11176 + }, + { + "epoch": 0.46588303947313575, + "grad_norm": 708.0, + "learning_rate": 5.79213775965205e-05, + "loss": 25.0033, + "step": 11177 + }, + { + "epoch": 0.465924721770664, + "grad_norm": 203.0, + "learning_rate": 5.791471275653913e-05, + "loss": 11.8126, + "step": 11178 + }, + { + "epoch": 0.46596640406819223, + "grad_norm": 468.0, + "learning_rate": 5.7908047772311404e-05, + "loss": 15.6876, + "step": 11179 + }, + { + "epoch": 0.4660080863657205, + "grad_norm": 408.0, + "learning_rate": 5.7901382643958816e-05, + "loss": 15.3127, + "step": 11180 + }, + { + "epoch": 0.4660497686632487, + "grad_norm": 476.0, + "learning_rate": 5.789471737160283e-05, + "loss": 15.2502, + "step": 11181 + }, + { + "epoch": 0.466091450960777, + "grad_norm": 188.0, + "learning_rate": 5.788805195536492e-05, + "loss": 11.5633, + "step": 11182 + }, + { + "epoch": 0.4661331332583052, + "grad_norm": 356.0, + "learning_rate": 5.7881386395366546e-05, + "loss": 14.4378, + "step": 11183 + }, + { + "epoch": 0.46617481555583345, + "grad_norm": 540.0, + "learning_rate": 5.787472069172921e-05, + "loss": 18.3763, + "step": 11184 + }, + { + "epoch": 0.46621649785336167, + "grad_norm": 720.0, + "learning_rate": 5.786805484457441e-05, + "loss": 20.1251, + "step": 11185 + }, + { + "epoch": 0.46625818015088993, + "grad_norm": 836.0, + "learning_rate": 5.786138885402359e-05, + "loss": 24.6251, + "step": 11186 + }, + { + "epoch": 0.46629986244841815, + "grad_norm": 388.0, + "learning_rate": 5.7854722720198275e-05, + "loss": 15.0626, + "step": 11187 + }, + { + "epoch": 0.4663415447459464, + "grad_norm": 248.0, + "learning_rate": 5.784805644321994e-05, + "loss": 12.0627, + "step": 11188 + }, + { + "epoch": 0.4663832270434746, + "grad_norm": 636.0, + "learning_rate": 5.7841390023210076e-05, + "loss": 19.0018, + "step": 11189 + }, + { + "epoch": 0.4664249093410029, + "grad_norm": 688.0, + "learning_rate": 5.7834723460290185e-05, + "loss": 19.3776, + "step": 11190 + }, + { + "epoch": 0.4664665916385311, + "grad_norm": 278.0, + "learning_rate": 5.782805675458176e-05, + "loss": 12.5003, + "step": 11191 + }, + { + "epoch": 0.46650827393605937, + "grad_norm": 536.0, + "learning_rate": 5.7821389906206315e-05, + "loss": 16.0005, + "step": 11192 + }, + { + "epoch": 0.4665499562335876, + "grad_norm": 191.0, + "learning_rate": 5.781472291528534e-05, + "loss": 5.5942, + "step": 11193 + }, + { + "epoch": 0.46659163853111585, + "grad_norm": 784.0, + "learning_rate": 5.780805578194034e-05, + "loss": 18.8793, + "step": 11194 + }, + { + "epoch": 0.46663332082864406, + "grad_norm": 245.0, + "learning_rate": 5.780138850629283e-05, + "loss": 11.7503, + "step": 11195 + }, + { + "epoch": 0.4666750031261723, + "grad_norm": 736.0, + "learning_rate": 5.779472108846432e-05, + "loss": 19.1253, + "step": 11196 + }, + { + "epoch": 0.46671668542370054, + "grad_norm": 498.0, + "learning_rate": 5.778805352857632e-05, + "loss": 16.6252, + "step": 11197 + }, + { + "epoch": 0.4667583677212288, + "grad_norm": 229.0, + "learning_rate": 5.778138582675038e-05, + "loss": 9.5003, + "step": 11198 + }, + { + "epoch": 0.466800050018757, + "grad_norm": 146.0, + "learning_rate": 5.777471798310797e-05, + "loss": 10.6882, + "step": 11199 + }, + { + "epoch": 0.4668417323162853, + "grad_norm": 69.5, + "learning_rate": 5.7768049997770647e-05, + "loss": 7.0939, + "step": 11200 + }, + { + "epoch": 0.4668834146138135, + "grad_norm": 432.0, + "learning_rate": 5.776138187085992e-05, + "loss": 17.1267, + "step": 11201 + }, + { + "epoch": 0.46692509691134176, + "grad_norm": 230.0, + "learning_rate": 5.7754713602497314e-05, + "loss": 11.5627, + "step": 11202 + }, + { + "epoch": 0.46696677920887, + "grad_norm": 127.5, + "learning_rate": 5.774804519280437e-05, + "loss": 8.0637, + "step": 11203 + }, + { + "epoch": 0.46700846150639824, + "grad_norm": 94.5, + "learning_rate": 5.774137664190261e-05, + "loss": 8.0002, + "step": 11204 + }, + { + "epoch": 0.46705014380392645, + "grad_norm": 326.0, + "learning_rate": 5.773470794991358e-05, + "loss": 13.938, + "step": 11205 + }, + { + "epoch": 0.4670918261014547, + "grad_norm": 270.0, + "learning_rate": 5.772803911695881e-05, + "loss": 12.0006, + "step": 11206 + }, + { + "epoch": 0.46713350839898293, + "grad_norm": 430.0, + "learning_rate": 5.7721370143159834e-05, + "loss": 15.3756, + "step": 11207 + }, + { + "epoch": 0.4671751906965112, + "grad_norm": 255.0, + "learning_rate": 5.7714701028638205e-05, + "loss": 12.3758, + "step": 11208 + }, + { + "epoch": 0.4672168729940394, + "grad_norm": 56.5, + "learning_rate": 5.7708031773515456e-05, + "loss": 8.2503, + "step": 11209 + }, + { + "epoch": 0.4672585552915677, + "grad_norm": 191.0, + "learning_rate": 5.770136237791315e-05, + "loss": 11.3129, + "step": 11210 + }, + { + "epoch": 0.4673002375890959, + "grad_norm": 462.0, + "learning_rate": 5.7694692841952837e-05, + "loss": 15.8755, + "step": 11211 + }, + { + "epoch": 0.46734191988662416, + "grad_norm": 1864.0, + "learning_rate": 5.768802316575606e-05, + "loss": 34.5044, + "step": 11212 + }, + { + "epoch": 0.46738360218415237, + "grad_norm": 152.0, + "learning_rate": 5.7681353349444375e-05, + "loss": 9.6878, + "step": 11213 + }, + { + "epoch": 0.46742528448168064, + "grad_norm": 288.0, + "learning_rate": 5.767468339313935e-05, + "loss": 12.1898, + "step": 11214 + }, + { + "epoch": 0.46746696677920885, + "grad_norm": 103.0, + "learning_rate": 5.766801329696254e-05, + "loss": 10.5017, + "step": 11215 + }, + { + "epoch": 0.4675086490767371, + "grad_norm": 1032.0, + "learning_rate": 5.76613430610355e-05, + "loss": 25.2501, + "step": 11216 + }, + { + "epoch": 0.4675503313742653, + "grad_norm": 484.0, + "learning_rate": 5.7654672685479816e-05, + "loss": 17.6262, + "step": 11217 + }, + { + "epoch": 0.4675920136717936, + "grad_norm": 414.0, + "learning_rate": 5.7648002170417025e-05, + "loss": 15.7505, + "step": 11218 + }, + { + "epoch": 0.4676336959693218, + "grad_norm": 133.0, + "learning_rate": 5.7641331515968735e-05, + "loss": 8.3754, + "step": 11219 + }, + { + "epoch": 0.4676753782668501, + "grad_norm": 179.0, + "learning_rate": 5.7634660722256486e-05, + "loss": 12.1276, + "step": 11220 + }, + { + "epoch": 0.4677170605643783, + "grad_norm": 149.0, + "learning_rate": 5.762798978940185e-05, + "loss": 12.0014, + "step": 11221 + }, + { + "epoch": 0.46775874286190655, + "grad_norm": 151.0, + "learning_rate": 5.7621318717526454e-05, + "loss": 9.6876, + "step": 11222 + }, + { + "epoch": 0.46780042515943476, + "grad_norm": 288.0, + "learning_rate": 5.761464750675183e-05, + "loss": 13.0003, + "step": 11223 + }, + { + "epoch": 0.46784210745696303, + "grad_norm": 163.0, + "learning_rate": 5.760797615719959e-05, + "loss": 11.1254, + "step": 11224 + }, + { + "epoch": 0.46788378975449124, + "grad_norm": 166.0, + "learning_rate": 5.7601304668991295e-05, + "loss": 9.5628, + "step": 11225 + }, + { + "epoch": 0.4679254720520195, + "grad_norm": 336.0, + "learning_rate": 5.759463304224857e-05, + "loss": 13.7511, + "step": 11226 + }, + { + "epoch": 0.4679671543495477, + "grad_norm": 262.0, + "learning_rate": 5.758796127709296e-05, + "loss": 13.438, + "step": 11227 + }, + { + "epoch": 0.468008836647076, + "grad_norm": 290.0, + "learning_rate": 5.7581289373646095e-05, + "loss": 12.5639, + "step": 11228 + }, + { + "epoch": 0.4680505189446042, + "grad_norm": 51.5, + "learning_rate": 5.757461733202956e-05, + "loss": 7.5003, + "step": 11229 + }, + { + "epoch": 0.46809220124213247, + "grad_norm": 274.0, + "learning_rate": 5.756794515236494e-05, + "loss": 12.9388, + "step": 11230 + }, + { + "epoch": 0.46813388353966073, + "grad_norm": 253.0, + "learning_rate": 5.7561272834773864e-05, + "loss": 13.4392, + "step": 11231 + }, + { + "epoch": 0.46817556583718895, + "grad_norm": 988.0, + "learning_rate": 5.755460037937791e-05, + "loss": 24.6301, + "step": 11232 + }, + { + "epoch": 0.4682172481347172, + "grad_norm": 510.0, + "learning_rate": 5.754792778629869e-05, + "loss": 18.7502, + "step": 11233 + }, + { + "epoch": 0.4682589304322454, + "grad_norm": 56.5, + "learning_rate": 5.754125505565782e-05, + "loss": 6.7503, + "step": 11234 + }, + { + "epoch": 0.4683006127297737, + "grad_norm": 274.0, + "learning_rate": 5.7534582187576904e-05, + "loss": 11.5636, + "step": 11235 + }, + { + "epoch": 0.4683422950273019, + "grad_norm": 292.0, + "learning_rate": 5.752790918217756e-05, + "loss": 13.313, + "step": 11236 + }, + { + "epoch": 0.46838397732483017, + "grad_norm": 362.0, + "learning_rate": 5.7521236039581415e-05, + "loss": 14.1252, + "step": 11237 + }, + { + "epoch": 0.4684256596223584, + "grad_norm": 418.0, + "learning_rate": 5.751456275991006e-05, + "loss": 16.1252, + "step": 11238 + }, + { + "epoch": 0.46846734191988665, + "grad_norm": 406.0, + "learning_rate": 5.7507889343285135e-05, + "loss": 14.5003, + "step": 11239 + }, + { + "epoch": 0.46850902421741486, + "grad_norm": 280.0, + "learning_rate": 5.7501215789828264e-05, + "loss": 12.4379, + "step": 11240 + }, + { + "epoch": 0.4685507065149431, + "grad_norm": 121.5, + "learning_rate": 5.7494542099661075e-05, + "loss": 9.1878, + "step": 11241 + }, + { + "epoch": 0.46859238881247134, + "grad_norm": 207.0, + "learning_rate": 5.7487868272905174e-05, + "loss": 12.1883, + "step": 11242 + }, + { + "epoch": 0.4686340711099996, + "grad_norm": 844.0, + "learning_rate": 5.748119430968223e-05, + "loss": 24.2573, + "step": 11243 + }, + { + "epoch": 0.4686757534075278, + "grad_norm": 792.0, + "learning_rate": 5.747452021011385e-05, + "loss": 21.3762, + "step": 11244 + }, + { + "epoch": 0.4687174357050561, + "grad_norm": 470.0, + "learning_rate": 5.7467845974321666e-05, + "loss": 15.8126, + "step": 11245 + }, + { + "epoch": 0.4687591180025843, + "grad_norm": 388.0, + "learning_rate": 5.746117160242732e-05, + "loss": 15.2505, + "step": 11246 + }, + { + "epoch": 0.46880080030011256, + "grad_norm": 215.0, + "learning_rate": 5.745449709455246e-05, + "loss": 10.9377, + "step": 11247 + }, + { + "epoch": 0.4688424825976408, + "grad_norm": 406.0, + "learning_rate": 5.744782245081875e-05, + "loss": 15.6878, + "step": 11248 + }, + { + "epoch": 0.46888416489516904, + "grad_norm": 1128.0, + "learning_rate": 5.744114767134781e-05, + "loss": 25.3752, + "step": 11249 + }, + { + "epoch": 0.46892584719269725, + "grad_norm": 255.0, + "learning_rate": 5.743447275626128e-05, + "loss": 13.2503, + "step": 11250 + }, + { + "epoch": 0.4689675294902255, + "grad_norm": 360.0, + "learning_rate": 5.742779770568083e-05, + "loss": 14.8752, + "step": 11251 + }, + { + "epoch": 0.46900921178775373, + "grad_norm": 560.0, + "learning_rate": 5.742112251972811e-05, + "loss": 16.0024, + "step": 11252 + }, + { + "epoch": 0.469050894085282, + "grad_norm": 310.0, + "learning_rate": 5.741444719852477e-05, + "loss": 13.688, + "step": 11253 + }, + { + "epoch": 0.4690925763828102, + "grad_norm": 152.0, + "learning_rate": 5.740777174219247e-05, + "loss": 10.7501, + "step": 11254 + }, + { + "epoch": 0.4691342586803385, + "grad_norm": 240.0, + "learning_rate": 5.740109615085287e-05, + "loss": 11.6253, + "step": 11255 + }, + { + "epoch": 0.4691759409778667, + "grad_norm": 222.0, + "learning_rate": 5.739442042462765e-05, + "loss": 11.6253, + "step": 11256 + }, + { + "epoch": 0.46921762327539496, + "grad_norm": 748.0, + "learning_rate": 5.7387744563638444e-05, + "loss": 20.5002, + "step": 11257 + }, + { + "epoch": 0.46925930557292317, + "grad_norm": 354.0, + "learning_rate": 5.738106856800694e-05, + "loss": 15.688, + "step": 11258 + }, + { + "epoch": 0.46930098787045144, + "grad_norm": 356.0, + "learning_rate": 5.7374392437854806e-05, + "loss": 14.3127, + "step": 11259 + }, + { + "epoch": 0.46934267016797965, + "grad_norm": 344.0, + "learning_rate": 5.736771617330372e-05, + "loss": 14.8127, + "step": 11260 + }, + { + "epoch": 0.4693843524655079, + "grad_norm": 1012.0, + "learning_rate": 5.7361039774475355e-05, + "loss": 21.3799, + "step": 11261 + }, + { + "epoch": 0.4694260347630361, + "grad_norm": 414.0, + "learning_rate": 5.735436324149139e-05, + "loss": 16.2508, + "step": 11262 + }, + { + "epoch": 0.4694677170605644, + "grad_norm": 104.5, + "learning_rate": 5.7347686574473494e-05, + "loss": 9.0626, + "step": 11263 + }, + { + "epoch": 0.4695093993580926, + "grad_norm": 756.0, + "learning_rate": 5.734100977354336e-05, + "loss": 21.1282, + "step": 11264 + }, + { + "epoch": 0.46955108165562087, + "grad_norm": 218.0, + "learning_rate": 5.733433283882268e-05, + "loss": 11.8755, + "step": 11265 + }, + { + "epoch": 0.4695927639531491, + "grad_norm": 408.0, + "learning_rate": 5.732765577043312e-05, + "loss": 16.6254, + "step": 11266 + }, + { + "epoch": 0.46963444625067735, + "grad_norm": 524.0, + "learning_rate": 5.732097856849638e-05, + "loss": 15.0637, + "step": 11267 + }, + { + "epoch": 0.46967612854820556, + "grad_norm": 233.0, + "learning_rate": 5.7314301233134174e-05, + "loss": 10.813, + "step": 11268 + }, + { + "epoch": 0.46971781084573383, + "grad_norm": 312.0, + "learning_rate": 5.730762376446816e-05, + "loss": 13.1257, + "step": 11269 + }, + { + "epoch": 0.46975949314326204, + "grad_norm": 340.0, + "learning_rate": 5.730094616262007e-05, + "loss": 14.5627, + "step": 11270 + }, + { + "epoch": 0.4698011754407903, + "grad_norm": 544.0, + "learning_rate": 5.729426842771158e-05, + "loss": 21.0003, + "step": 11271 + }, + { + "epoch": 0.4698428577383185, + "grad_norm": 145.0, + "learning_rate": 5.728759055986439e-05, + "loss": 11.1878, + "step": 11272 + }, + { + "epoch": 0.4698845400358468, + "grad_norm": 112.0, + "learning_rate": 5.728091255920023e-05, + "loss": 7.3126, + "step": 11273 + }, + { + "epoch": 0.469926222333375, + "grad_norm": 764.0, + "learning_rate": 5.727423442584079e-05, + "loss": 19.7554, + "step": 11274 + }, + { + "epoch": 0.46996790463090327, + "grad_norm": 764.0, + "learning_rate": 5.726755615990778e-05, + "loss": 23.6252, + "step": 11275 + }, + { + "epoch": 0.4700095869284315, + "grad_norm": 139.0, + "learning_rate": 5.7260877761522914e-05, + "loss": 9.7502, + "step": 11276 + }, + { + "epoch": 0.47005126922595974, + "grad_norm": 192.0, + "learning_rate": 5.72541992308079e-05, + "loss": 7.5317, + "step": 11277 + }, + { + "epoch": 0.47009295152348796, + "grad_norm": 688.0, + "learning_rate": 5.724752056788447e-05, + "loss": 18.0012, + "step": 11278 + }, + { + "epoch": 0.4701346338210162, + "grad_norm": 266.0, + "learning_rate": 5.724084177287434e-05, + "loss": 12.2507, + "step": 11279 + }, + { + "epoch": 0.47017631611854444, + "grad_norm": 376.0, + "learning_rate": 5.723416284589922e-05, + "loss": 15.3127, + "step": 11280 + }, + { + "epoch": 0.4702179984160727, + "grad_norm": 656.0, + "learning_rate": 5.722748378708084e-05, + "loss": 19.6257, + "step": 11281 + }, + { + "epoch": 0.4702596807136009, + "grad_norm": 251.0, + "learning_rate": 5.722080459654092e-05, + "loss": 13.0003, + "step": 11282 + }, + { + "epoch": 0.4703013630111292, + "grad_norm": 316.0, + "learning_rate": 5.7214125274401195e-05, + "loss": 14.3777, + "step": 11283 + }, + { + "epoch": 0.4703430453086574, + "grad_norm": 209.0, + "learning_rate": 5.720744582078339e-05, + "loss": 11.1885, + "step": 11284 + }, + { + "epoch": 0.47038472760618566, + "grad_norm": 160.0, + "learning_rate": 5.720076623580925e-05, + "loss": 9.9377, + "step": 11285 + }, + { + "epoch": 0.47042640990371387, + "grad_norm": 360.0, + "learning_rate": 5.719408651960052e-05, + "loss": 14.7502, + "step": 11286 + }, + { + "epoch": 0.47046809220124214, + "grad_norm": 796.0, + "learning_rate": 5.718740667227892e-05, + "loss": 21.2508, + "step": 11287 + }, + { + "epoch": 0.47050977449877035, + "grad_norm": 302.0, + "learning_rate": 5.718072669396619e-05, + "loss": 13.1877, + "step": 11288 + }, + { + "epoch": 0.4705514567962986, + "grad_norm": 203.0, + "learning_rate": 5.717404658478408e-05, + "loss": 11.6252, + "step": 11289 + }, + { + "epoch": 0.47059313909382683, + "grad_norm": 512.0, + "learning_rate": 5.716736634485433e-05, + "loss": 17.6252, + "step": 11290 + }, + { + "epoch": 0.4706348213913551, + "grad_norm": 117.5, + "learning_rate": 5.71606859742987e-05, + "loss": 9.0001, + "step": 11291 + }, + { + "epoch": 0.4706765036888833, + "grad_norm": 1416.0, + "learning_rate": 5.7154005473238936e-05, + "loss": 31.3755, + "step": 11292 + }, + { + "epoch": 0.4707181859864116, + "grad_norm": 408.0, + "learning_rate": 5.714732484179678e-05, + "loss": 16.7504, + "step": 11293 + }, + { + "epoch": 0.4707598682839398, + "grad_norm": 177.0, + "learning_rate": 5.7140644080094e-05, + "loss": 8.3757, + "step": 11294 + }, + { + "epoch": 0.47080155058146805, + "grad_norm": 784.0, + "learning_rate": 5.713396318825234e-05, + "loss": 21.6257, + "step": 11295 + }, + { + "epoch": 0.47084323287899627, + "grad_norm": 264.0, + "learning_rate": 5.712728216639357e-05, + "loss": 12.8129, + "step": 11296 + }, + { + "epoch": 0.47088491517652453, + "grad_norm": 476.0, + "learning_rate": 5.712060101463943e-05, + "loss": 18.1254, + "step": 11297 + }, + { + "epoch": 0.47092659747405274, + "grad_norm": 71.5, + "learning_rate": 5.711391973311173e-05, + "loss": 8.6879, + "step": 11298 + }, + { + "epoch": 0.470968279771581, + "grad_norm": 119.5, + "learning_rate": 5.71072383219322e-05, + "loss": 8.0627, + "step": 11299 + }, + { + "epoch": 0.4710099620691092, + "grad_norm": 225.0, + "learning_rate": 5.7100556781222634e-05, + "loss": 12.0003, + "step": 11300 + }, + { + "epoch": 0.4710516443666375, + "grad_norm": 71.5, + "learning_rate": 5.709387511110478e-05, + "loss": 8.9379, + "step": 11301 + }, + { + "epoch": 0.4710933266641657, + "grad_norm": 478.0, + "learning_rate": 5.7087193311700425e-05, + "loss": 16.6251, + "step": 11302 + }, + { + "epoch": 0.47113500896169397, + "grad_norm": 214.0, + "learning_rate": 5.708051138313134e-05, + "loss": 12.1264, + "step": 11303 + }, + { + "epoch": 0.47117669125922224, + "grad_norm": 408.0, + "learning_rate": 5.707382932551931e-05, + "loss": 16.7502, + "step": 11304 + }, + { + "epoch": 0.47121837355675045, + "grad_norm": 248.0, + "learning_rate": 5.706714713898611e-05, + "loss": 10.0629, + "step": 11305 + }, + { + "epoch": 0.4712600558542787, + "grad_norm": 310.0, + "learning_rate": 5.7060464823653525e-05, + "loss": 13.3127, + "step": 11306 + }, + { + "epoch": 0.4713017381518069, + "grad_norm": 502.0, + "learning_rate": 5.705378237964335e-05, + "loss": 18.1255, + "step": 11307 + }, + { + "epoch": 0.4713434204493352, + "grad_norm": 454.0, + "learning_rate": 5.704709980707736e-05, + "loss": 16.8753, + "step": 11308 + }, + { + "epoch": 0.4713851027468634, + "grad_norm": 336.0, + "learning_rate": 5.7040417106077334e-05, + "loss": 12.7502, + "step": 11309 + }, + { + "epoch": 0.47142678504439167, + "grad_norm": 300.0, + "learning_rate": 5.7033734276765104e-05, + "loss": 12.4402, + "step": 11310 + }, + { + "epoch": 0.4714684673419199, + "grad_norm": 196.0, + "learning_rate": 5.702705131926244e-05, + "loss": 12.3754, + "step": 11311 + }, + { + "epoch": 0.47151014963944815, + "grad_norm": 85.5, + "learning_rate": 5.702036823369114e-05, + "loss": 6.5002, + "step": 11312 + }, + { + "epoch": 0.47155183193697636, + "grad_norm": 268.0, + "learning_rate": 5.701368502017299e-05, + "loss": 12.3128, + "step": 11313 + }, + { + "epoch": 0.47159351423450463, + "grad_norm": 628.0, + "learning_rate": 5.700700167882983e-05, + "loss": 18.6281, + "step": 11314 + }, + { + "epoch": 0.47163519653203284, + "grad_norm": 195.0, + "learning_rate": 5.700031820978343e-05, + "loss": 9.6257, + "step": 11315 + }, + { + "epoch": 0.4716768788295611, + "grad_norm": 328.0, + "learning_rate": 5.699363461315561e-05, + "loss": 15.0626, + "step": 11316 + }, + { + "epoch": 0.4717185611270893, + "grad_norm": 320.0, + "learning_rate": 5.698695088906818e-05, + "loss": 12.5009, + "step": 11317 + }, + { + "epoch": 0.4717602434246176, + "grad_norm": 304.0, + "learning_rate": 5.6980267037642954e-05, + "loss": 13.0628, + "step": 11318 + }, + { + "epoch": 0.4718019257221458, + "grad_norm": 197.0, + "learning_rate": 5.697358305900173e-05, + "loss": 11.0007, + "step": 11319 + }, + { + "epoch": 0.47184360801967407, + "grad_norm": 188.0, + "learning_rate": 5.6966898953266355e-05, + "loss": 10.7503, + "step": 11320 + }, + { + "epoch": 0.4718852903172023, + "grad_norm": 199.0, + "learning_rate": 5.696021472055861e-05, + "loss": 12.3757, + "step": 11321 + }, + { + "epoch": 0.47192697261473054, + "grad_norm": 366.0, + "learning_rate": 5.695353036100034e-05, + "loss": 14.5005, + "step": 11322 + }, + { + "epoch": 0.47196865491225876, + "grad_norm": 548.0, + "learning_rate": 5.694684587471336e-05, + "loss": 17.5002, + "step": 11323 + }, + { + "epoch": 0.472010337209787, + "grad_norm": 227.0, + "learning_rate": 5.6940161261819504e-05, + "loss": 11.5003, + "step": 11324 + }, + { + "epoch": 0.47205201950731523, + "grad_norm": 240.0, + "learning_rate": 5.6933476522440585e-05, + "loss": 12.7502, + "step": 11325 + }, + { + "epoch": 0.4720937018048435, + "grad_norm": 580.0, + "learning_rate": 5.6926791656698444e-05, + "loss": 18.2504, + "step": 11326 + }, + { + "epoch": 0.4721353841023717, + "grad_norm": 322.0, + "learning_rate": 5.692010666471491e-05, + "loss": 14.6251, + "step": 11327 + }, + { + "epoch": 0.4721770663999, + "grad_norm": 88.0, + "learning_rate": 5.691342154661182e-05, + "loss": 8.2504, + "step": 11328 + }, + { + "epoch": 0.4722187486974282, + "grad_norm": 85.5, + "learning_rate": 5.690673630251101e-05, + "loss": 8.5002, + "step": 11329 + }, + { + "epoch": 0.47226043099495646, + "grad_norm": 148.0, + "learning_rate": 5.690005093253431e-05, + "loss": 10.1877, + "step": 11330 + }, + { + "epoch": 0.47230211329248467, + "grad_norm": 224.0, + "learning_rate": 5.689336543680358e-05, + "loss": 10.8131, + "step": 11331 + }, + { + "epoch": 0.47234379559001294, + "grad_norm": 576.0, + "learning_rate": 5.6886679815440646e-05, + "loss": 18.0002, + "step": 11332 + }, + { + "epoch": 0.47238547788754115, + "grad_norm": 245.0, + "learning_rate": 5.6879994068567366e-05, + "loss": 13.3752, + "step": 11333 + }, + { + "epoch": 0.4724271601850694, + "grad_norm": 268.0, + "learning_rate": 5.6873308196305576e-05, + "loss": 11.0002, + "step": 11334 + }, + { + "epoch": 0.47246884248259763, + "grad_norm": 116.0, + "learning_rate": 5.6866622198777134e-05, + "loss": 9.1256, + "step": 11335 + }, + { + "epoch": 0.4725105247801259, + "grad_norm": 420.0, + "learning_rate": 5.685993607610389e-05, + "loss": 15.0001, + "step": 11336 + }, + { + "epoch": 0.4725522070776541, + "grad_norm": 246.0, + "learning_rate": 5.685324982840771e-05, + "loss": 11.8128, + "step": 11337 + }, + { + "epoch": 0.4725938893751824, + "grad_norm": 328.0, + "learning_rate": 5.684656345581044e-05, + "loss": 13.5006, + "step": 11338 + }, + { + "epoch": 0.4726355716727106, + "grad_norm": 46.75, + "learning_rate": 5.683987695843395e-05, + "loss": 7.6878, + "step": 11339 + }, + { + "epoch": 0.47267725397023885, + "grad_norm": 532.0, + "learning_rate": 5.6833190336400086e-05, + "loss": 16.8767, + "step": 11340 + }, + { + "epoch": 0.47271893626776706, + "grad_norm": 408.0, + "learning_rate": 5.6826503589830726e-05, + "loss": 15.5641, + "step": 11341 + }, + { + "epoch": 0.47276061856529533, + "grad_norm": 143.0, + "learning_rate": 5.681981671884773e-05, + "loss": 9.3756, + "step": 11342 + }, + { + "epoch": 0.47280230086282354, + "grad_norm": 548.0, + "learning_rate": 5.6813129723572975e-05, + "loss": 17.8753, + "step": 11343 + }, + { + "epoch": 0.4728439831603518, + "grad_norm": 812.0, + "learning_rate": 5.680644260412832e-05, + "loss": 22.8753, + "step": 11344 + }, + { + "epoch": 0.47288566545788, + "grad_norm": 378.0, + "learning_rate": 5.679975536063564e-05, + "loss": 12.5641, + "step": 11345 + }, + { + "epoch": 0.4729273477554083, + "grad_norm": 62.25, + "learning_rate": 5.6793067993216834e-05, + "loss": 8.3131, + "step": 11346 + }, + { + "epoch": 0.4729690300529365, + "grad_norm": 628.0, + "learning_rate": 5.678638050199373e-05, + "loss": 19.3755, + "step": 11347 + }, + { + "epoch": 0.47301071235046477, + "grad_norm": 680.0, + "learning_rate": 5.6779692887088254e-05, + "loss": 19.3782, + "step": 11348 + }, + { + "epoch": 0.473052394647993, + "grad_norm": 314.0, + "learning_rate": 5.6773005148622285e-05, + "loss": 13.8753, + "step": 11349 + }, + { + "epoch": 0.47309407694552125, + "grad_norm": 450.0, + "learning_rate": 5.6766317286717683e-05, + "loss": 16.8758, + "step": 11350 + }, + { + "epoch": 0.47313575924304946, + "grad_norm": 406.0, + "learning_rate": 5.6759629301496353e-05, + "loss": 14.8757, + "step": 11351 + }, + { + "epoch": 0.4731774415405777, + "grad_norm": 230.0, + "learning_rate": 5.675294119308018e-05, + "loss": 11.6879, + "step": 11352 + }, + { + "epoch": 0.47321912383810594, + "grad_norm": 1056.0, + "learning_rate": 5.674625296159105e-05, + "loss": 29.8751, + "step": 11353 + }, + { + "epoch": 0.4732608061356342, + "grad_norm": 139.0, + "learning_rate": 5.673956460715086e-05, + "loss": 9.6254, + "step": 11354 + }, + { + "epoch": 0.4733024884331624, + "grad_norm": 235.0, + "learning_rate": 5.6732876129881506e-05, + "loss": 11.3753, + "step": 11355 + }, + { + "epoch": 0.4733441707306907, + "grad_norm": 298.0, + "learning_rate": 5.672618752990489e-05, + "loss": 12.9379, + "step": 11356 + }, + { + "epoch": 0.4733858530282189, + "grad_norm": 203.0, + "learning_rate": 5.671949880734292e-05, + "loss": 9.9377, + "step": 11357 + }, + { + "epoch": 0.47342753532574716, + "grad_norm": 338.0, + "learning_rate": 5.6712809962317474e-05, + "loss": 13.9377, + "step": 11358 + }, + { + "epoch": 0.4734692176232754, + "grad_norm": 262.0, + "learning_rate": 5.670612099495047e-05, + "loss": 11.1881, + "step": 11359 + }, + { + "epoch": 0.47351089992080364, + "grad_norm": 434.0, + "learning_rate": 5.669943190536381e-05, + "loss": 13.6252, + "step": 11360 + }, + { + "epoch": 0.47355258221833185, + "grad_norm": 149.0, + "learning_rate": 5.669274269367942e-05, + "loss": 10.504, + "step": 11361 + }, + { + "epoch": 0.4735942645158601, + "grad_norm": 69.0, + "learning_rate": 5.6686053360019195e-05, + "loss": 7.6881, + "step": 11362 + }, + { + "epoch": 0.47363594681338833, + "grad_norm": 246.0, + "learning_rate": 5.667936390450506e-05, + "loss": 11.8128, + "step": 11363 + }, + { + "epoch": 0.4736776291109166, + "grad_norm": 584.0, + "learning_rate": 5.6672674327258924e-05, + "loss": 18.0012, + "step": 11364 + }, + { + "epoch": 0.4737193114084448, + "grad_norm": 139.0, + "learning_rate": 5.6665984628402704e-05, + "loss": 9.3129, + "step": 11365 + }, + { + "epoch": 0.4737609937059731, + "grad_norm": 394.0, + "learning_rate": 5.665929480805833e-05, + "loss": 13.8127, + "step": 11366 + }, + { + "epoch": 0.4738026760035013, + "grad_norm": 183.0, + "learning_rate": 5.66526048663477e-05, + "loss": 11.2506, + "step": 11367 + }, + { + "epoch": 0.47384435830102956, + "grad_norm": 189.0, + "learning_rate": 5.664591480339278e-05, + "loss": 11.7503, + "step": 11368 + }, + { + "epoch": 0.47388604059855777, + "grad_norm": 396.0, + "learning_rate": 5.663922461931545e-05, + "loss": 15.0009, + "step": 11369 + }, + { + "epoch": 0.47392772289608603, + "grad_norm": 422.0, + "learning_rate": 5.663253431423767e-05, + "loss": 15.6885, + "step": 11370 + }, + { + "epoch": 0.47396940519361425, + "grad_norm": 151.0, + "learning_rate": 5.662584388828136e-05, + "loss": 9.8127, + "step": 11371 + }, + { + "epoch": 0.4740110874911425, + "grad_norm": 632.0, + "learning_rate": 5.6619153341568455e-05, + "loss": 20.0001, + "step": 11372 + }, + { + "epoch": 0.4740527697886707, + "grad_norm": 138.0, + "learning_rate": 5.661246267422089e-05, + "loss": 5.595, + "step": 11373 + }, + { + "epoch": 0.474094452086199, + "grad_norm": 235.0, + "learning_rate": 5.6605771886360626e-05, + "loss": 10.4378, + "step": 11374 + }, + { + "epoch": 0.4741361343837272, + "grad_norm": 368.0, + "learning_rate": 5.6599080978109565e-05, + "loss": 14.1251, + "step": 11375 + }, + { + "epoch": 0.47417781668125547, + "grad_norm": 242.0, + "learning_rate": 5.659238994958968e-05, + "loss": 11.9379, + "step": 11376 + }, + { + "epoch": 0.47421949897878374, + "grad_norm": 41.75, + "learning_rate": 5.658569880092289e-05, + "loss": 6.4691, + "step": 11377 + }, + { + "epoch": 0.47426118127631195, + "grad_norm": 219.0, + "learning_rate": 5.657900753223117e-05, + "loss": 11.1252, + "step": 11378 + }, + { + "epoch": 0.4743028635738402, + "grad_norm": 94.5, + "learning_rate": 5.6572316143636436e-05, + "loss": 9.438, + "step": 11379 + }, + { + "epoch": 0.47434454587136843, + "grad_norm": 322.0, + "learning_rate": 5.656562463526066e-05, + "loss": 15.3756, + "step": 11380 + }, + { + "epoch": 0.4743862281688967, + "grad_norm": 274.0, + "learning_rate": 5.65589330072258e-05, + "loss": 12.9391, + "step": 11381 + }, + { + "epoch": 0.4744279104664249, + "grad_norm": 187.0, + "learning_rate": 5.6552241259653806e-05, + "loss": 11.0631, + "step": 11382 + }, + { + "epoch": 0.4744695927639532, + "grad_norm": 54.75, + "learning_rate": 5.654554939266663e-05, + "loss": 7.9073, + "step": 11383 + }, + { + "epoch": 0.4745112750614814, + "grad_norm": 130.0, + "learning_rate": 5.6538857406386226e-05, + "loss": 9.6881, + "step": 11384 + }, + { + "epoch": 0.47455295735900965, + "grad_norm": 207.0, + "learning_rate": 5.653216530093457e-05, + "loss": 9.1253, + "step": 11385 + }, + { + "epoch": 0.47459463965653786, + "grad_norm": 430.0, + "learning_rate": 5.652547307643362e-05, + "loss": 15.7503, + "step": 11386 + }, + { + "epoch": 0.47463632195406613, + "grad_norm": 141.0, + "learning_rate": 5.651878073300535e-05, + "loss": 5.6256, + "step": 11387 + }, + { + "epoch": 0.47467800425159434, + "grad_norm": 504.0, + "learning_rate": 5.6512088270771725e-05, + "loss": 17.7502, + "step": 11388 + }, + { + "epoch": 0.4747196865491226, + "grad_norm": 382.0, + "learning_rate": 5.650539568985471e-05, + "loss": 16.6252, + "step": 11389 + }, + { + "epoch": 0.4747613688466508, + "grad_norm": 704.0, + "learning_rate": 5.649870299037627e-05, + "loss": 20.5024, + "step": 11390 + }, + { + "epoch": 0.4748030511441791, + "grad_norm": 276.0, + "learning_rate": 5.649201017245841e-05, + "loss": 13.5635, + "step": 11391 + }, + { + "epoch": 0.4748447334417073, + "grad_norm": 217.0, + "learning_rate": 5.648531723622308e-05, + "loss": 11.6877, + "step": 11392 + }, + { + "epoch": 0.47488641573923557, + "grad_norm": 116.0, + "learning_rate": 5.647862418179226e-05, + "loss": 6.9382, + "step": 11393 + }, + { + "epoch": 0.4749280980367638, + "grad_norm": 362.0, + "learning_rate": 5.647193100928796e-05, + "loss": 14.5629, + "step": 11394 + }, + { + "epoch": 0.47496978033429205, + "grad_norm": 484.0, + "learning_rate": 5.646523771883212e-05, + "loss": 16.7503, + "step": 11395 + }, + { + "epoch": 0.47501146263182026, + "grad_norm": 378.0, + "learning_rate": 5.6458544310546756e-05, + "loss": 6.5003, + "step": 11396 + }, + { + "epoch": 0.4750531449293485, + "grad_norm": 240.0, + "learning_rate": 5.645185078455386e-05, + "loss": 13.5003, + "step": 11397 + }, + { + "epoch": 0.47509482722687674, + "grad_norm": 588.0, + "learning_rate": 5.644515714097539e-05, + "loss": 19.3759, + "step": 11398 + }, + { + "epoch": 0.475136509524405, + "grad_norm": 334.0, + "learning_rate": 5.643846337993337e-05, + "loss": 14.3753, + "step": 11399 + }, + { + "epoch": 0.4751781918219332, + "grad_norm": 268.0, + "learning_rate": 5.643176950154978e-05, + "loss": 11.5627, + "step": 11400 + }, + { + "epoch": 0.4752198741194615, + "grad_norm": 208.0, + "learning_rate": 5.6425075505946624e-05, + "loss": 11.7501, + "step": 11401 + }, + { + "epoch": 0.4752615564169897, + "grad_norm": 350.0, + "learning_rate": 5.64183813932459e-05, + "loss": 13.2503, + "step": 11402 + }, + { + "epoch": 0.47530323871451796, + "grad_norm": 572.0, + "learning_rate": 5.6411687163569596e-05, + "loss": 17.0003, + "step": 11403 + }, + { + "epoch": 0.4753449210120462, + "grad_norm": 404.0, + "learning_rate": 5.640499281703974e-05, + "loss": 15.7502, + "step": 11404 + }, + { + "epoch": 0.47538660330957444, + "grad_norm": 1040.0, + "learning_rate": 5.63982983537783e-05, + "loss": 26.7502, + "step": 11405 + }, + { + "epoch": 0.47542828560710265, + "grad_norm": 764.0, + "learning_rate": 5.639160377390732e-05, + "loss": 22.0002, + "step": 11406 + }, + { + "epoch": 0.4754699679046309, + "grad_norm": 278.0, + "learning_rate": 5.638490907754879e-05, + "loss": 12.5626, + "step": 11407 + }, + { + "epoch": 0.47551165020215913, + "grad_norm": 604.0, + "learning_rate": 5.637821426482472e-05, + "loss": 19.6253, + "step": 11408 + }, + { + "epoch": 0.4755533324996874, + "grad_norm": 852.0, + "learning_rate": 5.6371519335857135e-05, + "loss": 24.8753, + "step": 11409 + }, + { + "epoch": 0.4755950147972156, + "grad_norm": 956.0, + "learning_rate": 5.636482429076804e-05, + "loss": 23.3839, + "step": 11410 + }, + { + "epoch": 0.4756366970947439, + "grad_norm": 636.0, + "learning_rate": 5.635812912967946e-05, + "loss": 20.2502, + "step": 11411 + }, + { + "epoch": 0.4756783793922721, + "grad_norm": 188.0, + "learning_rate": 5.635143385271341e-05, + "loss": 10.938, + "step": 11412 + }, + { + "epoch": 0.47572006168980036, + "grad_norm": 330.0, + "learning_rate": 5.634473845999191e-05, + "loss": 13.8133, + "step": 11413 + }, + { + "epoch": 0.47576174398732857, + "grad_norm": 308.0, + "learning_rate": 5.633804295163699e-05, + "loss": 13.3135, + "step": 11414 + }, + { + "epoch": 0.47580342628485683, + "grad_norm": 218.0, + "learning_rate": 5.633134732777069e-05, + "loss": 11.3752, + "step": 11415 + }, + { + "epoch": 0.47584510858238505, + "grad_norm": 151.0, + "learning_rate": 5.632465158851501e-05, + "loss": 9.1253, + "step": 11416 + }, + { + "epoch": 0.4758867908799133, + "grad_norm": 664.0, + "learning_rate": 5.6317955733992e-05, + "loss": 19.7502, + "step": 11417 + }, + { + "epoch": 0.4759284731774415, + "grad_norm": 820.0, + "learning_rate": 5.6311259764323675e-05, + "loss": 22.0002, + "step": 11418 + }, + { + "epoch": 0.4759701554749698, + "grad_norm": 251.0, + "learning_rate": 5.630456367963209e-05, + "loss": 9.0006, + "step": 11419 + }, + { + "epoch": 0.476011837772498, + "grad_norm": 292.0, + "learning_rate": 5.6297867480039265e-05, + "loss": 12.6259, + "step": 11420 + }, + { + "epoch": 0.47605352007002627, + "grad_norm": 272.0, + "learning_rate": 5.629117116566726e-05, + "loss": 10.7511, + "step": 11421 + }, + { + "epoch": 0.4760952023675545, + "grad_norm": 1216.0, + "learning_rate": 5.6284474736638095e-05, + "loss": 29.0004, + "step": 11422 + }, + { + "epoch": 0.47613688466508275, + "grad_norm": 804.0, + "learning_rate": 5.6277778193073806e-05, + "loss": 25.8751, + "step": 11423 + }, + { + "epoch": 0.47617856696261096, + "grad_norm": 206.0, + "learning_rate": 5.627108153509646e-05, + "loss": 11.8127, + "step": 11424 + }, + { + "epoch": 0.47622024926013923, + "grad_norm": 340.0, + "learning_rate": 5.626438476282809e-05, + "loss": 14.0627, + "step": 11425 + }, + { + "epoch": 0.47626193155766744, + "grad_norm": 172.0, + "learning_rate": 5.625768787639076e-05, + "loss": 10.8753, + "step": 11426 + }, + { + "epoch": 0.4763036138551957, + "grad_norm": 804.0, + "learning_rate": 5.625099087590653e-05, + "loss": 22.8752, + "step": 11427 + }, + { + "epoch": 0.4763452961527239, + "grad_norm": 540.0, + "learning_rate": 5.624429376149741e-05, + "loss": 16.1302, + "step": 11428 + }, + { + "epoch": 0.4763869784502522, + "grad_norm": 852.0, + "learning_rate": 5.62375965332855e-05, + "loss": 23.2502, + "step": 11429 + }, + { + "epoch": 0.4764286607477804, + "grad_norm": 716.0, + "learning_rate": 5.623089919139283e-05, + "loss": 23.0007, + "step": 11430 + }, + { + "epoch": 0.47647034304530866, + "grad_norm": 255.0, + "learning_rate": 5.622420173594147e-05, + "loss": 12.5627, + "step": 11431 + }, + { + "epoch": 0.4765120253428369, + "grad_norm": 616.0, + "learning_rate": 5.6217504167053484e-05, + "loss": 19.6273, + "step": 11432 + }, + { + "epoch": 0.47655370764036514, + "grad_norm": 210.0, + "learning_rate": 5.621080648485093e-05, + "loss": 4.6256, + "step": 11433 + }, + { + "epoch": 0.47659538993789335, + "grad_norm": 109.0, + "learning_rate": 5.620410868945588e-05, + "loss": 8.4379, + "step": 11434 + }, + { + "epoch": 0.4766370722354216, + "grad_norm": 123.0, + "learning_rate": 5.619741078099038e-05, + "loss": 8.8127, + "step": 11435 + }, + { + "epoch": 0.47667875453294983, + "grad_norm": 800.0, + "learning_rate": 5.6190712759576535e-05, + "loss": 18.1293, + "step": 11436 + }, + { + "epoch": 0.4767204368304781, + "grad_norm": 114.0, + "learning_rate": 5.61840146253364e-05, + "loss": 7.9689, + "step": 11437 + }, + { + "epoch": 0.4767621191280063, + "grad_norm": 498.0, + "learning_rate": 5.617731637839205e-05, + "loss": 16.8753, + "step": 11438 + }, + { + "epoch": 0.4768038014255346, + "grad_norm": 91.0, + "learning_rate": 5.617061801886556e-05, + "loss": 8.2504, + "step": 11439 + }, + { + "epoch": 0.4768454837230628, + "grad_norm": 474.0, + "learning_rate": 5.616391954687901e-05, + "loss": 16.8751, + "step": 11440 + }, + { + "epoch": 0.47688716602059106, + "grad_norm": 450.0, + "learning_rate": 5.615722096255448e-05, + "loss": 16.3756, + "step": 11441 + }, + { + "epoch": 0.47692884831811927, + "grad_norm": 372.0, + "learning_rate": 5.6150522266014035e-05, + "loss": 14.0626, + "step": 11442 + }, + { + "epoch": 0.47697053061564754, + "grad_norm": 82.5, + "learning_rate": 5.614382345737979e-05, + "loss": 8.9377, + "step": 11443 + }, + { + "epoch": 0.47701221291317575, + "grad_norm": 97.0, + "learning_rate": 5.613712453677382e-05, + "loss": 7.5941, + "step": 11444 + }, + { + "epoch": 0.477053895210704, + "grad_norm": 402.0, + "learning_rate": 5.613042550431821e-05, + "loss": 15.1255, + "step": 11445 + }, + { + "epoch": 0.4770955775082322, + "grad_norm": 228.0, + "learning_rate": 5.6123726360135055e-05, + "loss": 12.2501, + "step": 11446 + }, + { + "epoch": 0.4771372598057605, + "grad_norm": 306.0, + "learning_rate": 5.611702710434643e-05, + "loss": 11.9381, + "step": 11447 + }, + { + "epoch": 0.4771789421032887, + "grad_norm": 328.0, + "learning_rate": 5.611032773707444e-05, + "loss": 13.5627, + "step": 11448 + }, + { + "epoch": 0.477220624400817, + "grad_norm": 306.0, + "learning_rate": 5.6103628258441197e-05, + "loss": 13.2503, + "step": 11449 + }, + { + "epoch": 0.47726230669834524, + "grad_norm": 502.0, + "learning_rate": 5.609692866856878e-05, + "loss": 14.7514, + "step": 11450 + }, + { + "epoch": 0.47730398899587345, + "grad_norm": 506.0, + "learning_rate": 5.6090228967579305e-05, + "loss": 16.3767, + "step": 11451 + }, + { + "epoch": 0.4773456712934017, + "grad_norm": 292.0, + "learning_rate": 5.608352915559486e-05, + "loss": 14.1878, + "step": 11452 + }, + { + "epoch": 0.47738735359092993, + "grad_norm": 370.0, + "learning_rate": 5.607682923273756e-05, + "loss": 14.4377, + "step": 11453 + }, + { + "epoch": 0.4774290358884582, + "grad_norm": 222.0, + "learning_rate": 5.607012919912951e-05, + "loss": 13.2507, + "step": 11454 + }, + { + "epoch": 0.4774707181859864, + "grad_norm": 392.0, + "learning_rate": 5.606342905489281e-05, + "loss": 14.6252, + "step": 11455 + }, + { + "epoch": 0.4775124004835147, + "grad_norm": 144.0, + "learning_rate": 5.6056728800149584e-05, + "loss": 9.2503, + "step": 11456 + }, + { + "epoch": 0.4775540827810429, + "grad_norm": 133.0, + "learning_rate": 5.605002843502193e-05, + "loss": 8.2508, + "step": 11457 + }, + { + "epoch": 0.47759576507857116, + "grad_norm": 540.0, + "learning_rate": 5.604332795963198e-05, + "loss": 17.0005, + "step": 11458 + }, + { + "epoch": 0.47763744737609937, + "grad_norm": 720.0, + "learning_rate": 5.6036627374101824e-05, + "loss": 20.7503, + "step": 11459 + }, + { + "epoch": 0.47767912967362763, + "grad_norm": 173.0, + "learning_rate": 5.60299266785536e-05, + "loss": 11.5635, + "step": 11460 + }, + { + "epoch": 0.47772081197115585, + "grad_norm": 148.0, + "learning_rate": 5.6023225873109444e-05, + "loss": 10.1879, + "step": 11461 + }, + { + "epoch": 0.4777624942686841, + "grad_norm": 203.0, + "learning_rate": 5.601652495789145e-05, + "loss": 10.9378, + "step": 11462 + }, + { + "epoch": 0.4778041765662123, + "grad_norm": 876.0, + "learning_rate": 5.6009823933021763e-05, + "loss": 26.5003, + "step": 11463 + }, + { + "epoch": 0.4778458588637406, + "grad_norm": 426.0, + "learning_rate": 5.60031227986225e-05, + "loss": 14.9378, + "step": 11464 + }, + { + "epoch": 0.4778875411612688, + "grad_norm": 414.0, + "learning_rate": 5.599642155481578e-05, + "loss": 14.6878, + "step": 11465 + }, + { + "epoch": 0.47792922345879707, + "grad_norm": 104.5, + "learning_rate": 5.598972020172376e-05, + "loss": 9.5641, + "step": 11466 + }, + { + "epoch": 0.4779709057563253, + "grad_norm": 1184.0, + "learning_rate": 5.598301873946855e-05, + "loss": 24.6303, + "step": 11467 + }, + { + "epoch": 0.47801258805385355, + "grad_norm": 154.0, + "learning_rate": 5.59763171681723e-05, + "loss": 10.0632, + "step": 11468 + }, + { + "epoch": 0.47805427035138176, + "grad_norm": 676.0, + "learning_rate": 5.596961548795713e-05, + "loss": 19.8762, + "step": 11469 + }, + { + "epoch": 0.47809595264891, + "grad_norm": 412.0, + "learning_rate": 5.596291369894518e-05, + "loss": 15.1253, + "step": 11470 + }, + { + "epoch": 0.47813763494643824, + "grad_norm": 420.0, + "learning_rate": 5.595621180125862e-05, + "loss": 17.5008, + "step": 11471 + }, + { + "epoch": 0.4781793172439665, + "grad_norm": 422.0, + "learning_rate": 5.594950979501956e-05, + "loss": 16.2502, + "step": 11472 + }, + { + "epoch": 0.4782209995414947, + "grad_norm": 364.0, + "learning_rate": 5.594280768035014e-05, + "loss": 13.0009, + "step": 11473 + }, + { + "epoch": 0.478262681839023, + "grad_norm": 468.0, + "learning_rate": 5.5936105457372545e-05, + "loss": 14.938, + "step": 11474 + }, + { + "epoch": 0.4783043641365512, + "grad_norm": 600.0, + "learning_rate": 5.5929403126208893e-05, + "loss": 18.7502, + "step": 11475 + }, + { + "epoch": 0.47834604643407946, + "grad_norm": 348.0, + "learning_rate": 5.592270068698134e-05, + "loss": 15.3766, + "step": 11476 + }, + { + "epoch": 0.4783877287316077, + "grad_norm": 624.0, + "learning_rate": 5.591599813981205e-05, + "loss": 16.6297, + "step": 11477 + }, + { + "epoch": 0.47842941102913594, + "grad_norm": 466.0, + "learning_rate": 5.590929548482316e-05, + "loss": 16.5008, + "step": 11478 + }, + { + "epoch": 0.47847109332666415, + "grad_norm": 282.0, + "learning_rate": 5.5902592722136835e-05, + "loss": 13.7504, + "step": 11479 + }, + { + "epoch": 0.4785127756241924, + "grad_norm": 456.0, + "learning_rate": 5.589588985187525e-05, + "loss": 16.6254, + "step": 11480 + }, + { + "epoch": 0.47855445792172063, + "grad_norm": 1232.0, + "learning_rate": 5.5889186874160535e-05, + "loss": 31.6259, + "step": 11481 + }, + { + "epoch": 0.4785961402192489, + "grad_norm": 314.0, + "learning_rate": 5.588248378911487e-05, + "loss": 14.063, + "step": 11482 + }, + { + "epoch": 0.4786378225167771, + "grad_norm": 260.0, + "learning_rate": 5.587578059686041e-05, + "loss": 12.3759, + "step": 11483 + }, + { + "epoch": 0.4786795048143054, + "grad_norm": 520.0, + "learning_rate": 5.5869077297519334e-05, + "loss": 15.6889, + "step": 11484 + }, + { + "epoch": 0.4787211871118336, + "grad_norm": 207.0, + "learning_rate": 5.58623738912138e-05, + "loss": 11.3128, + "step": 11485 + }, + { + "epoch": 0.47876286940936186, + "grad_norm": 292.0, + "learning_rate": 5.585567037806597e-05, + "loss": 13.6257, + "step": 11486 + }, + { + "epoch": 0.47880455170689007, + "grad_norm": 494.0, + "learning_rate": 5.584896675819804e-05, + "loss": 15.9382, + "step": 11487 + }, + { + "epoch": 0.47884623400441834, + "grad_norm": 189.0, + "learning_rate": 5.584226303173217e-05, + "loss": 11.7502, + "step": 11488 + }, + { + "epoch": 0.47888791630194655, + "grad_norm": 250.0, + "learning_rate": 5.583555919879054e-05, + "loss": 12.8128, + "step": 11489 + }, + { + "epoch": 0.4789295985994748, + "grad_norm": 280.0, + "learning_rate": 5.582885525949533e-05, + "loss": 11.6877, + "step": 11490 + }, + { + "epoch": 0.478971280897003, + "grad_norm": 320.0, + "learning_rate": 5.5822151213968696e-05, + "loss": 13.4379, + "step": 11491 + }, + { + "epoch": 0.4790129631945313, + "grad_norm": 402.0, + "learning_rate": 5.581544706233286e-05, + "loss": 14.5002, + "step": 11492 + }, + { + "epoch": 0.4790546454920595, + "grad_norm": 189.0, + "learning_rate": 5.580874280470998e-05, + "loss": 11.2502, + "step": 11493 + }, + { + "epoch": 0.4790963277895878, + "grad_norm": 580.0, + "learning_rate": 5.580203844122225e-05, + "loss": 18.5006, + "step": 11494 + }, + { + "epoch": 0.479138010087116, + "grad_norm": 139.0, + "learning_rate": 5.579533397199185e-05, + "loss": 10.938, + "step": 11495 + }, + { + "epoch": 0.47917969238464425, + "grad_norm": 280.0, + "learning_rate": 5.578862939714097e-05, + "loss": 11.3128, + "step": 11496 + }, + { + "epoch": 0.47922137468217246, + "grad_norm": 232.0, + "learning_rate": 5.57819247167918e-05, + "loss": 10.4379, + "step": 11497 + }, + { + "epoch": 0.47926305697970073, + "grad_norm": 194.0, + "learning_rate": 5.5775219931066537e-05, + "loss": 10.0007, + "step": 11498 + }, + { + "epoch": 0.47930473927722894, + "grad_norm": 226.0, + "learning_rate": 5.576851504008739e-05, + "loss": 10.4377, + "step": 11499 + }, + { + "epoch": 0.4793464215747572, + "grad_norm": 308.0, + "learning_rate": 5.576181004397655e-05, + "loss": 12.6877, + "step": 11500 + }, + { + "epoch": 0.4793881038722854, + "grad_norm": 372.0, + "learning_rate": 5.5755104942856194e-05, + "loss": 14.8128, + "step": 11501 + }, + { + "epoch": 0.4794297861698137, + "grad_norm": 364.0, + "learning_rate": 5.574839973684856e-05, + "loss": 13.9377, + "step": 11502 + }, + { + "epoch": 0.4794714684673419, + "grad_norm": 154.0, + "learning_rate": 5.5741694426075806e-05, + "loss": 8.5018, + "step": 11503 + }, + { + "epoch": 0.47951315076487017, + "grad_norm": 442.0, + "learning_rate": 5.573498901066018e-05, + "loss": 13.313, + "step": 11504 + }, + { + "epoch": 0.4795548330623984, + "grad_norm": 300.0, + "learning_rate": 5.572828349072386e-05, + "loss": 12.4378, + "step": 11505 + }, + { + "epoch": 0.47959651535992665, + "grad_norm": 268.0, + "learning_rate": 5.5721577866389075e-05, + "loss": 13.2503, + "step": 11506 + }, + { + "epoch": 0.47963819765745486, + "grad_norm": 154.0, + "learning_rate": 5.571487213777802e-05, + "loss": 10.9379, + "step": 11507 + }, + { + "epoch": 0.4796798799549831, + "grad_norm": 346.0, + "learning_rate": 5.570816630501291e-05, + "loss": 14.3753, + "step": 11508 + }, + { + "epoch": 0.47972156225251134, + "grad_norm": 112.5, + "learning_rate": 5.570146036821596e-05, + "loss": 8.5004, + "step": 11509 + }, + { + "epoch": 0.4797632445500396, + "grad_norm": 424.0, + "learning_rate": 5.5694754327509404e-05, + "loss": 16.7503, + "step": 11510 + }, + { + "epoch": 0.4798049268475678, + "grad_norm": 580.0, + "learning_rate": 5.568804818301542e-05, + "loss": 17.6263, + "step": 11511 + }, + { + "epoch": 0.4798466091450961, + "grad_norm": 282.0, + "learning_rate": 5.568134193485627e-05, + "loss": 13.8768, + "step": 11512 + }, + { + "epoch": 0.4798882914426243, + "grad_norm": 438.0, + "learning_rate": 5.567463558315416e-05, + "loss": 14.2505, + "step": 11513 + }, + { + "epoch": 0.47992997374015256, + "grad_norm": 760.0, + "learning_rate": 5.56679291280313e-05, + "loss": 21.6291, + "step": 11514 + }, + { + "epoch": 0.47997165603768077, + "grad_norm": 177.0, + "learning_rate": 5.566122256960994e-05, + "loss": 10.6878, + "step": 11515 + }, + { + "epoch": 0.48001333833520904, + "grad_norm": 704.0, + "learning_rate": 5.5654515908012294e-05, + "loss": 18.1304, + "step": 11516 + }, + { + "epoch": 0.48005502063273725, + "grad_norm": 512.0, + "learning_rate": 5.5647809143360595e-05, + "loss": 18.7503, + "step": 11517 + }, + { + "epoch": 0.4800967029302655, + "grad_norm": 366.0, + "learning_rate": 5.5641102275777065e-05, + "loss": 15.4376, + "step": 11518 + }, + { + "epoch": 0.48013838522779373, + "grad_norm": 230.0, + "learning_rate": 5.5634395305383957e-05, + "loss": 12.0009, + "step": 11519 + }, + { + "epoch": 0.480180067525322, + "grad_norm": 251.0, + "learning_rate": 5.5627688232303485e-05, + "loss": 12.3753, + "step": 11520 + }, + { + "epoch": 0.4802217498228502, + "grad_norm": 386.0, + "learning_rate": 5.562098105665791e-05, + "loss": 12.627, + "step": 11521 + }, + { + "epoch": 0.4802634321203785, + "grad_norm": 221.0, + "learning_rate": 5.561427377856945e-05, + "loss": 12.1878, + "step": 11522 + }, + { + "epoch": 0.48030511441790674, + "grad_norm": 1072.0, + "learning_rate": 5.5607566398160325e-05, + "loss": 24.5052, + "step": 11523 + }, + { + "epoch": 0.48034679671543495, + "grad_norm": 103.0, + "learning_rate": 5.5600858915552834e-05, + "loss": 6.1255, + "step": 11524 + }, + { + "epoch": 0.4803884790129632, + "grad_norm": 552.0, + "learning_rate": 5.5594151330869185e-05, + "loss": 18.1252, + "step": 11525 + }, + { + "epoch": 0.48043016131049143, + "grad_norm": 484.0, + "learning_rate": 5.558744364423163e-05, + "loss": 16.2514, + "step": 11526 + }, + { + "epoch": 0.4804718436080197, + "grad_norm": 370.0, + "learning_rate": 5.5580735855762425e-05, + "loss": 13.8753, + "step": 11527 + }, + { + "epoch": 0.4805135259055479, + "grad_norm": 576.0, + "learning_rate": 5.557402796558381e-05, + "loss": 16.0012, + "step": 11528 + }, + { + "epoch": 0.4805552082030762, + "grad_norm": 268.0, + "learning_rate": 5.5567319973818036e-05, + "loss": 13.2504, + "step": 11529 + }, + { + "epoch": 0.4805968905006044, + "grad_norm": 552.0, + "learning_rate": 5.5560611880587366e-05, + "loss": 17.1253, + "step": 11530 + }, + { + "epoch": 0.48063857279813266, + "grad_norm": 1032.0, + "learning_rate": 5.555390368601404e-05, + "loss": 27.0008, + "step": 11531 + }, + { + "epoch": 0.48068025509566087, + "grad_norm": 744.0, + "learning_rate": 5.554719539022034e-05, + "loss": 21.3752, + "step": 11532 + }, + { + "epoch": 0.48072193739318914, + "grad_norm": 213.0, + "learning_rate": 5.554048699332851e-05, + "loss": 6.9075, + "step": 11533 + }, + { + "epoch": 0.48076361969071735, + "grad_norm": 86.5, + "learning_rate": 5.553377849546081e-05, + "loss": 5.5942, + "step": 11534 + }, + { + "epoch": 0.4808053019882456, + "grad_norm": 612.0, + "learning_rate": 5.5527069896739505e-05, + "loss": 17.1281, + "step": 11535 + }, + { + "epoch": 0.4808469842857738, + "grad_norm": 129.0, + "learning_rate": 5.552036119728685e-05, + "loss": 10.563, + "step": 11536 + }, + { + "epoch": 0.4808886665833021, + "grad_norm": 564.0, + "learning_rate": 5.551365239722513e-05, + "loss": 18.3799, + "step": 11537 + }, + { + "epoch": 0.4809303488808303, + "grad_norm": 564.0, + "learning_rate": 5.550694349667661e-05, + "loss": 15.2502, + "step": 11538 + }, + { + "epoch": 0.48097203117835857, + "grad_norm": 482.0, + "learning_rate": 5.550023449576356e-05, + "loss": 16.5028, + "step": 11539 + }, + { + "epoch": 0.4810137134758868, + "grad_norm": 368.0, + "learning_rate": 5.549352539460824e-05, + "loss": 15.626, + "step": 11540 + }, + { + "epoch": 0.48105539577341505, + "grad_norm": 67.0, + "learning_rate": 5.5486816193332935e-05, + "loss": 6.7195, + "step": 11541 + }, + { + "epoch": 0.48109707807094326, + "grad_norm": 576.0, + "learning_rate": 5.5480106892059925e-05, + "loss": 19.3773, + "step": 11542 + }, + { + "epoch": 0.48113876036847153, + "grad_norm": 238.0, + "learning_rate": 5.547339749091147e-05, + "loss": 12.6271, + "step": 11543 + }, + { + "epoch": 0.48118044266599974, + "grad_norm": 684.0, + "learning_rate": 5.546668799000986e-05, + "loss": 20.3769, + "step": 11544 + }, + { + "epoch": 0.481222124963528, + "grad_norm": 258.0, + "learning_rate": 5.5459978389477385e-05, + "loss": 12.9382, + "step": 11545 + }, + { + "epoch": 0.4812638072610562, + "grad_norm": 992.0, + "learning_rate": 5.5453268689436313e-05, + "loss": 24.0039, + "step": 11546 + }, + { + "epoch": 0.4813054895585845, + "grad_norm": 1152.0, + "learning_rate": 5.544655889000892e-05, + "loss": 27.1285, + "step": 11547 + }, + { + "epoch": 0.4813471718561127, + "grad_norm": 604.0, + "learning_rate": 5.543984899131753e-05, + "loss": 18.5004, + "step": 11548 + }, + { + "epoch": 0.48138885415364097, + "grad_norm": 486.0, + "learning_rate": 5.543313899348439e-05, + "loss": 17.6253, + "step": 11549 + }, + { + "epoch": 0.4814305364511692, + "grad_norm": 364.0, + "learning_rate": 5.5426428896631834e-05, + "loss": 12.7501, + "step": 11550 + }, + { + "epoch": 0.48147221874869744, + "grad_norm": 740.0, + "learning_rate": 5.5419718700882105e-05, + "loss": 23.3751, + "step": 11551 + }, + { + "epoch": 0.48151390104622566, + "grad_norm": 266.0, + "learning_rate": 5.541300840635754e-05, + "loss": 11.5004, + "step": 11552 + }, + { + "epoch": 0.4815555833437539, + "grad_norm": 2080.0, + "learning_rate": 5.5406298013180415e-05, + "loss": 39.7508, + "step": 11553 + }, + { + "epoch": 0.48159726564128214, + "grad_norm": 462.0, + "learning_rate": 5.539958752147302e-05, + "loss": 17.126, + "step": 11554 + }, + { + "epoch": 0.4816389479388104, + "grad_norm": 57.75, + "learning_rate": 5.539287693135766e-05, + "loss": 8.5628, + "step": 11555 + }, + { + "epoch": 0.4816806302363386, + "grad_norm": 334.0, + "learning_rate": 5.538616624295665e-05, + "loss": 14.5006, + "step": 11556 + }, + { + "epoch": 0.4817223125338669, + "grad_norm": 302.0, + "learning_rate": 5.537945545639228e-05, + "loss": 14.2504, + "step": 11557 + }, + { + "epoch": 0.4817639948313951, + "grad_norm": 896.0, + "learning_rate": 5.537274457178685e-05, + "loss": 22.505, + "step": 11558 + }, + { + "epoch": 0.48180567712892336, + "grad_norm": 264.0, + "learning_rate": 5.536603358926269e-05, + "loss": 12.2503, + "step": 11559 + }, + { + "epoch": 0.48184735942645157, + "grad_norm": 184.0, + "learning_rate": 5.535932250894207e-05, + "loss": 11.2502, + "step": 11560 + }, + { + "epoch": 0.48188904172397984, + "grad_norm": 266.0, + "learning_rate": 5.5352611330947325e-05, + "loss": 11.938, + "step": 11561 + }, + { + "epoch": 0.48193072402150805, + "grad_norm": 396.0, + "learning_rate": 5.534590005540077e-05, + "loss": 15.3127, + "step": 11562 + }, + { + "epoch": 0.4819724063190363, + "grad_norm": 432.0, + "learning_rate": 5.533918868242471e-05, + "loss": 14.7517, + "step": 11563 + }, + { + "epoch": 0.48201408861656453, + "grad_norm": 382.0, + "learning_rate": 5.5332477212141465e-05, + "loss": 14.0005, + "step": 11564 + }, + { + "epoch": 0.4820557709140928, + "grad_norm": 206.0, + "learning_rate": 5.532576564467334e-05, + "loss": 12.6879, + "step": 11565 + }, + { + "epoch": 0.482097453211621, + "grad_norm": 78.5, + "learning_rate": 5.531905398014268e-05, + "loss": 8.0629, + "step": 11566 + }, + { + "epoch": 0.4821391355091493, + "grad_norm": 414.0, + "learning_rate": 5.531234221867178e-05, + "loss": 14.6893, + "step": 11567 + }, + { + "epoch": 0.4821808178066775, + "grad_norm": 356.0, + "learning_rate": 5.530563036038298e-05, + "loss": 15.1884, + "step": 11568 + }, + { + "epoch": 0.48222250010420575, + "grad_norm": 536.0, + "learning_rate": 5.5298918405398584e-05, + "loss": 19.8753, + "step": 11569 + }, + { + "epoch": 0.48226418240173397, + "grad_norm": 564.0, + "learning_rate": 5.529220635384093e-05, + "loss": 17.5003, + "step": 11570 + }, + { + "epoch": 0.48230586469926223, + "grad_norm": 332.0, + "learning_rate": 5.528549420583234e-05, + "loss": 12.6258, + "step": 11571 + }, + { + "epoch": 0.48234754699679044, + "grad_norm": 294.0, + "learning_rate": 5.5278781961495164e-05, + "loss": 14.438, + "step": 11572 + }, + { + "epoch": 0.4823892292943187, + "grad_norm": 616.0, + "learning_rate": 5.527206962095172e-05, + "loss": 19.0002, + "step": 11573 + }, + { + "epoch": 0.4824309115918469, + "grad_norm": 724.0, + "learning_rate": 5.526535718432432e-05, + "loss": 21.3751, + "step": 11574 + }, + { + "epoch": 0.4824725938893752, + "grad_norm": 294.0, + "learning_rate": 5.5258644651735325e-05, + "loss": 12.2515, + "step": 11575 + }, + { + "epoch": 0.4825142761869034, + "grad_norm": 788.0, + "learning_rate": 5.525193202330706e-05, + "loss": 22.5002, + "step": 11576 + }, + { + "epoch": 0.48255595848443167, + "grad_norm": 253.0, + "learning_rate": 5.524521929916189e-05, + "loss": 13.0003, + "step": 11577 + }, + { + "epoch": 0.4825976407819599, + "grad_norm": 476.0, + "learning_rate": 5.523850647942211e-05, + "loss": 16.2504, + "step": 11578 + }, + { + "epoch": 0.48263932307948815, + "grad_norm": 94.5, + "learning_rate": 5.52317935642101e-05, + "loss": 8.3128, + "step": 11579 + }, + { + "epoch": 0.48268100537701636, + "grad_norm": 436.0, + "learning_rate": 5.522508055364818e-05, + "loss": 15.4377, + "step": 11580 + }, + { + "epoch": 0.4827226876745446, + "grad_norm": 304.0, + "learning_rate": 5.52183674478587e-05, + "loss": 13.2502, + "step": 11581 + }, + { + "epoch": 0.48276436997207284, + "grad_norm": 268.0, + "learning_rate": 5.5211654246964016e-05, + "loss": 13.7504, + "step": 11582 + }, + { + "epoch": 0.4828060522696011, + "grad_norm": 120.5, + "learning_rate": 5.520494095108647e-05, + "loss": 11.1257, + "step": 11583 + }, + { + "epoch": 0.4828477345671293, + "grad_norm": 414.0, + "learning_rate": 5.51982275603484e-05, + "loss": 15.7544, + "step": 11584 + }, + { + "epoch": 0.4828894168646576, + "grad_norm": 404.0, + "learning_rate": 5.51915140748722e-05, + "loss": 12.314, + "step": 11585 + }, + { + "epoch": 0.4829310991621858, + "grad_norm": 876.0, + "learning_rate": 5.518480049478016e-05, + "loss": 22.8765, + "step": 11586 + }, + { + "epoch": 0.48297278145971406, + "grad_norm": 96.0, + "learning_rate": 5.517808682019468e-05, + "loss": 7.9691, + "step": 11587 + }, + { + "epoch": 0.4830144637572423, + "grad_norm": 444.0, + "learning_rate": 5.517137305123813e-05, + "loss": 16.6251, + "step": 11588 + }, + { + "epoch": 0.48305614605477054, + "grad_norm": 320.0, + "learning_rate": 5.516465918803283e-05, + "loss": 13.5628, + "step": 11589 + }, + { + "epoch": 0.48309782835229875, + "grad_norm": 732.0, + "learning_rate": 5.515794523070116e-05, + "loss": 20.2502, + "step": 11590 + }, + { + "epoch": 0.483139510649827, + "grad_norm": 120.0, + "learning_rate": 5.515123117936548e-05, + "loss": 10.1257, + "step": 11591 + }, + { + "epoch": 0.48318119294735523, + "grad_norm": 182.0, + "learning_rate": 5.514451703414816e-05, + "loss": 11.0637, + "step": 11592 + }, + { + "epoch": 0.4832228752448835, + "grad_norm": 494.0, + "learning_rate": 5.513780279517156e-05, + "loss": 17.3754, + "step": 11593 + }, + { + "epoch": 0.4832645575424117, + "grad_norm": 540.0, + "learning_rate": 5.5131088462558044e-05, + "loss": 18.7508, + "step": 11594 + }, + { + "epoch": 0.48330623983994, + "grad_norm": 1728.0, + "learning_rate": 5.5124374036429985e-05, + "loss": 34.5004, + "step": 11595 + }, + { + "epoch": 0.48334792213746824, + "grad_norm": 326.0, + "learning_rate": 5.511765951690976e-05, + "loss": 13.129, + "step": 11596 + }, + { + "epoch": 0.48338960443499646, + "grad_norm": 133.0, + "learning_rate": 5.5110944904119724e-05, + "loss": 9.9377, + "step": 11597 + }, + { + "epoch": 0.4834312867325247, + "grad_norm": 119.5, + "learning_rate": 5.510423019818227e-05, + "loss": 8.6876, + "step": 11598 + }, + { + "epoch": 0.48347296903005293, + "grad_norm": 202.0, + "learning_rate": 5.5097515399219754e-05, + "loss": 11.7504, + "step": 11599 + }, + { + "epoch": 0.4835146513275812, + "grad_norm": 95.0, + "learning_rate": 5.5090800507354586e-05, + "loss": 7.7191, + "step": 11600 + }, + { + "epoch": 0.4835563336251094, + "grad_norm": 198.0, + "learning_rate": 5.508408552270913e-05, + "loss": 9.9377, + "step": 11601 + }, + { + "epoch": 0.4835980159226377, + "grad_norm": 181.0, + "learning_rate": 5.507737044540575e-05, + "loss": 10.6252, + "step": 11602 + }, + { + "epoch": 0.4836396982201659, + "grad_norm": 115.0, + "learning_rate": 5.507065527556685e-05, + "loss": 9.8127, + "step": 11603 + }, + { + "epoch": 0.48368138051769416, + "grad_norm": 584.0, + "learning_rate": 5.5063940013314805e-05, + "loss": 19.6252, + "step": 11604 + }, + { + "epoch": 0.48372306281522237, + "grad_norm": 210.0, + "learning_rate": 5.505722465877201e-05, + "loss": 11.2503, + "step": 11605 + }, + { + "epoch": 0.48376474511275064, + "grad_norm": 179.0, + "learning_rate": 5.505050921206084e-05, + "loss": 10.5003, + "step": 11606 + }, + { + "epoch": 0.48380642741027885, + "grad_norm": 520.0, + "learning_rate": 5.504379367330369e-05, + "loss": 18.3757, + "step": 11607 + }, + { + "epoch": 0.4838481097078071, + "grad_norm": 256.0, + "learning_rate": 5.503707804262296e-05, + "loss": 11.8751, + "step": 11608 + }, + { + "epoch": 0.48388979200533533, + "grad_norm": 324.0, + "learning_rate": 5.5030362320141026e-05, + "loss": 13.2502, + "step": 11609 + }, + { + "epoch": 0.4839314743028636, + "grad_norm": 410.0, + "learning_rate": 5.502364650598031e-05, + "loss": 16.3754, + "step": 11610 + }, + { + "epoch": 0.4839731566003918, + "grad_norm": 78.0, + "learning_rate": 5.501693060026317e-05, + "loss": 9.3765, + "step": 11611 + }, + { + "epoch": 0.4840148388979201, + "grad_norm": 172.0, + "learning_rate": 5.501021460311202e-05, + "loss": 10.9378, + "step": 11612 + }, + { + "epoch": 0.4840565211954483, + "grad_norm": 580.0, + "learning_rate": 5.5003498514649274e-05, + "loss": 18.2503, + "step": 11613 + }, + { + "epoch": 0.48409820349297655, + "grad_norm": 61.5, + "learning_rate": 5.4996782334997335e-05, + "loss": 6.844, + "step": 11614 + }, + { + "epoch": 0.48413988579050476, + "grad_norm": 416.0, + "learning_rate": 5.499006606427858e-05, + "loss": 14.0638, + "step": 11615 + }, + { + "epoch": 0.48418156808803303, + "grad_norm": 252.0, + "learning_rate": 5.4983349702615436e-05, + "loss": 12.44, + "step": 11616 + }, + { + "epoch": 0.48422325038556124, + "grad_norm": 528.0, + "learning_rate": 5.4976633250130295e-05, + "loss": 17.2503, + "step": 11617 + }, + { + "epoch": 0.4842649326830895, + "grad_norm": 460.0, + "learning_rate": 5.496991670694558e-05, + "loss": 16.0005, + "step": 11618 + }, + { + "epoch": 0.4843066149806177, + "grad_norm": 148.0, + "learning_rate": 5.496320007318368e-05, + "loss": 9.7502, + "step": 11619 + }, + { + "epoch": 0.484348297278146, + "grad_norm": 172.0, + "learning_rate": 5.495648334896704e-05, + "loss": 10.438, + "step": 11620 + }, + { + "epoch": 0.4843899795756742, + "grad_norm": 904.0, + "learning_rate": 5.4949766534418024e-05, + "loss": 25.2503, + "step": 11621 + }, + { + "epoch": 0.48443166187320247, + "grad_norm": 484.0, + "learning_rate": 5.494304962965909e-05, + "loss": 15.4383, + "step": 11622 + }, + { + "epoch": 0.4844733441707307, + "grad_norm": 292.0, + "learning_rate": 5.4936332634812636e-05, + "loss": 12.3751, + "step": 11623 + }, + { + "epoch": 0.48451502646825895, + "grad_norm": 458.0, + "learning_rate": 5.492961555000107e-05, + "loss": 16.7504, + "step": 11624 + }, + { + "epoch": 0.48455670876578716, + "grad_norm": 260.0, + "learning_rate": 5.492289837534682e-05, + "loss": 11.7506, + "step": 11625 + }, + { + "epoch": 0.4845983910633154, + "grad_norm": 498.0, + "learning_rate": 5.491618111097233e-05, + "loss": 16.7506, + "step": 11626 + }, + { + "epoch": 0.48464007336084364, + "grad_norm": 188.0, + "learning_rate": 5.490946375699999e-05, + "loss": 10.0628, + "step": 11627 + }, + { + "epoch": 0.4846817556583719, + "grad_norm": 528.0, + "learning_rate": 5.490274631355224e-05, + "loss": 18.8752, + "step": 11628 + }, + { + "epoch": 0.4847234379559001, + "grad_norm": 454.0, + "learning_rate": 5.489602878075151e-05, + "loss": 15.2502, + "step": 11629 + }, + { + "epoch": 0.4847651202534284, + "grad_norm": 588.0, + "learning_rate": 5.488931115872021e-05, + "loss": 16.1255, + "step": 11630 + }, + { + "epoch": 0.4848068025509566, + "grad_norm": 392.0, + "learning_rate": 5.488259344758079e-05, + "loss": 15.5012, + "step": 11631 + }, + { + "epoch": 0.48484848484848486, + "grad_norm": 123.5, + "learning_rate": 5.487587564745567e-05, + "loss": 10.2502, + "step": 11632 + }, + { + "epoch": 0.4848901671460131, + "grad_norm": 340.0, + "learning_rate": 5.486915775846728e-05, + "loss": 12.938, + "step": 11633 + }, + { + "epoch": 0.48493184944354134, + "grad_norm": 640.0, + "learning_rate": 5.486243978073805e-05, + "loss": 20.2502, + "step": 11634 + }, + { + "epoch": 0.48497353174106955, + "grad_norm": 188.0, + "learning_rate": 5.485572171439044e-05, + "loss": 9.3752, + "step": 11635 + }, + { + "epoch": 0.4850152140385978, + "grad_norm": 352.0, + "learning_rate": 5.4849003559546866e-05, + "loss": 14.6879, + "step": 11636 + }, + { + "epoch": 0.48505689633612603, + "grad_norm": 144.0, + "learning_rate": 5.484228531632975e-05, + "loss": 10.8752, + "step": 11637 + }, + { + "epoch": 0.4850985786336543, + "grad_norm": 208.0, + "learning_rate": 5.4835566984861573e-05, + "loss": 11.3754, + "step": 11638 + }, + { + "epoch": 0.4851402609311825, + "grad_norm": 282.0, + "learning_rate": 5.482884856526476e-05, + "loss": 11.3752, + "step": 11639 + }, + { + "epoch": 0.4851819432287108, + "grad_norm": 139.0, + "learning_rate": 5.482213005766175e-05, + "loss": 9.6877, + "step": 11640 + }, + { + "epoch": 0.485223625526239, + "grad_norm": 364.0, + "learning_rate": 5.481541146217499e-05, + "loss": 15.0003, + "step": 11641 + }, + { + "epoch": 0.48526530782376726, + "grad_norm": 246.0, + "learning_rate": 5.480869277892693e-05, + "loss": 12.2501, + "step": 11642 + }, + { + "epoch": 0.48530699012129547, + "grad_norm": 53.5, + "learning_rate": 5.480197400804001e-05, + "loss": 7.3752, + "step": 11643 + }, + { + "epoch": 0.48534867241882373, + "grad_norm": 131.0, + "learning_rate": 5.47952551496367e-05, + "loss": 9.9382, + "step": 11644 + }, + { + "epoch": 0.48539035471635195, + "grad_norm": 482.0, + "learning_rate": 5.478853620383944e-05, + "loss": 16.7503, + "step": 11645 + }, + { + "epoch": 0.4854320370138802, + "grad_norm": 1080.0, + "learning_rate": 5.4781817170770676e-05, + "loss": 26.0004, + "step": 11646 + }, + { + "epoch": 0.4854737193114084, + "grad_norm": 63.25, + "learning_rate": 5.477509805055286e-05, + "loss": 8.4378, + "step": 11647 + }, + { + "epoch": 0.4855154016089367, + "grad_norm": 434.0, + "learning_rate": 5.476837884330848e-05, + "loss": 14.9379, + "step": 11648 + }, + { + "epoch": 0.4855570839064649, + "grad_norm": 107.0, + "learning_rate": 5.476165954915995e-05, + "loss": 8.6252, + "step": 11649 + }, + { + "epoch": 0.48559876620399317, + "grad_norm": 213.0, + "learning_rate": 5.4754940168229765e-05, + "loss": 11.8129, + "step": 11650 + }, + { + "epoch": 0.4856404485015214, + "grad_norm": 704.0, + "learning_rate": 5.474822070064037e-05, + "loss": 21.3759, + "step": 11651 + }, + { + "epoch": 0.48568213079904965, + "grad_norm": 1440.0, + "learning_rate": 5.474150114651423e-05, + "loss": 27.3799, + "step": 11652 + }, + { + "epoch": 0.48572381309657786, + "grad_norm": 142.0, + "learning_rate": 5.473478150597382e-05, + "loss": 9.8128, + "step": 11653 + }, + { + "epoch": 0.48576549539410613, + "grad_norm": 1312.0, + "learning_rate": 5.4728061779141585e-05, + "loss": 26.7533, + "step": 11654 + }, + { + "epoch": 0.48580717769163434, + "grad_norm": 632.0, + "learning_rate": 5.472134196614e-05, + "loss": 20.6253, + "step": 11655 + }, + { + "epoch": 0.4858488599891626, + "grad_norm": 180.0, + "learning_rate": 5.471462206709156e-05, + "loss": 9.5006, + "step": 11656 + }, + { + "epoch": 0.4858905422866908, + "grad_norm": 700.0, + "learning_rate": 5.47079020821187e-05, + "loss": 20.1252, + "step": 11657 + }, + { + "epoch": 0.4859322245842191, + "grad_norm": 418.0, + "learning_rate": 5.470118201134391e-05, + "loss": 14.1881, + "step": 11658 + }, + { + "epoch": 0.4859739068817473, + "grad_norm": 464.0, + "learning_rate": 5.4694461854889655e-05, + "loss": 15.2526, + "step": 11659 + }, + { + "epoch": 0.48601558917927556, + "grad_norm": 540.0, + "learning_rate": 5.468774161287843e-05, + "loss": 18.8756, + "step": 11660 + }, + { + "epoch": 0.4860572714768038, + "grad_norm": 736.0, + "learning_rate": 5.4681021285432686e-05, + "loss": 22.2504, + "step": 11661 + }, + { + "epoch": 0.48609895377433204, + "grad_norm": 588.0, + "learning_rate": 5.46743008726749e-05, + "loss": 19.5007, + "step": 11662 + }, + { + "epoch": 0.48614063607186025, + "grad_norm": 51.0, + "learning_rate": 5.4667580374727576e-05, + "loss": 6.7815, + "step": 11663 + }, + { + "epoch": 0.4861823183693885, + "grad_norm": 632.0, + "learning_rate": 5.46608597917132e-05, + "loss": 21.3754, + "step": 11664 + }, + { + "epoch": 0.48622400066691673, + "grad_norm": 268.0, + "learning_rate": 5.465413912375423e-05, + "loss": 12.8752, + "step": 11665 + }, + { + "epoch": 0.486265682964445, + "grad_norm": 264.0, + "learning_rate": 5.464741837097316e-05, + "loss": 7.4069, + "step": 11666 + }, + { + "epoch": 0.4863073652619732, + "grad_norm": 752.0, + "learning_rate": 5.464069753349248e-05, + "loss": 18.8752, + "step": 11667 + }, + { + "epoch": 0.4863490475595015, + "grad_norm": 82.0, + "learning_rate": 5.463397661143468e-05, + "loss": 8.8129, + "step": 11668 + }, + { + "epoch": 0.48639072985702975, + "grad_norm": 1176.0, + "learning_rate": 5.462725560492224e-05, + "loss": 26.3781, + "step": 11669 + }, + { + "epoch": 0.48643241215455796, + "grad_norm": 624.0, + "learning_rate": 5.462053451407766e-05, + "loss": 20.3751, + "step": 11670 + }, + { + "epoch": 0.4864740944520862, + "grad_norm": 556.0, + "learning_rate": 5.4613813339023424e-05, + "loss": 18.3754, + "step": 11671 + }, + { + "epoch": 0.48651577674961444, + "grad_norm": 420.0, + "learning_rate": 5.460709207988203e-05, + "loss": 15.0002, + "step": 11672 + }, + { + "epoch": 0.4865574590471427, + "grad_norm": 468.0, + "learning_rate": 5.4600370736775974e-05, + "loss": 15.4378, + "step": 11673 + }, + { + "epoch": 0.4865991413446709, + "grad_norm": 672.0, + "learning_rate": 5.459364930982775e-05, + "loss": 20.3753, + "step": 11674 + }, + { + "epoch": 0.4866408236421992, + "grad_norm": 472.0, + "learning_rate": 5.4586927799159856e-05, + "loss": 17.5001, + "step": 11675 + }, + { + "epoch": 0.4866825059397274, + "grad_norm": 1504.0, + "learning_rate": 5.45802062048948e-05, + "loss": 36.0002, + "step": 11676 + }, + { + "epoch": 0.48672418823725566, + "grad_norm": 904.0, + "learning_rate": 5.4573484527155086e-05, + "loss": 25.6255, + "step": 11677 + }, + { + "epoch": 0.4867658705347839, + "grad_norm": 82.5, + "learning_rate": 5.456676276606321e-05, + "loss": 8.5002, + "step": 11678 + }, + { + "epoch": 0.48680755283231214, + "grad_norm": 408.0, + "learning_rate": 5.4560040921741676e-05, + "loss": 15.813, + "step": 11679 + }, + { + "epoch": 0.48684923512984035, + "grad_norm": 620.0, + "learning_rate": 5.4553318994312984e-05, + "loss": 18.8776, + "step": 11680 + }, + { + "epoch": 0.4868909174273686, + "grad_norm": 340.0, + "learning_rate": 5.4546596983899654e-05, + "loss": 14.2503, + "step": 11681 + }, + { + "epoch": 0.48693259972489683, + "grad_norm": 912.0, + "learning_rate": 5.45398748906242e-05, + "loss": 24.6266, + "step": 11682 + }, + { + "epoch": 0.4869742820224251, + "grad_norm": 116.0, + "learning_rate": 5.45331527146091e-05, + "loss": 8.1878, + "step": 11683 + }, + { + "epoch": 0.4870159643199533, + "grad_norm": 358.0, + "learning_rate": 5.4526430455976906e-05, + "loss": 14.5005, + "step": 11684 + }, + { + "epoch": 0.4870576466174816, + "grad_norm": 358.0, + "learning_rate": 5.451970811485012e-05, + "loss": 15.3751, + "step": 11685 + }, + { + "epoch": 0.4870993289150098, + "grad_norm": 245.0, + "learning_rate": 5.4512985691351236e-05, + "loss": 12.5627, + "step": 11686 + }, + { + "epoch": 0.48714101121253806, + "grad_norm": 126.5, + "learning_rate": 5.450626318560279e-05, + "loss": 10.2503, + "step": 11687 + }, + { + "epoch": 0.48718269351006627, + "grad_norm": 70.0, + "learning_rate": 5.4499540597727303e-05, + "loss": 7.876, + "step": 11688 + }, + { + "epoch": 0.48722437580759453, + "grad_norm": 304.0, + "learning_rate": 5.44928179278473e-05, + "loss": 13.9381, + "step": 11689 + }, + { + "epoch": 0.48726605810512275, + "grad_norm": 235.0, + "learning_rate": 5.4486095176085274e-05, + "loss": 11.8126, + "step": 11690 + }, + { + "epoch": 0.487307740402651, + "grad_norm": 170.0, + "learning_rate": 5.4479372342563775e-05, + "loss": 10.5004, + "step": 11691 + }, + { + "epoch": 0.4873494227001792, + "grad_norm": 426.0, + "learning_rate": 5.447264942740531e-05, + "loss": 16.2505, + "step": 11692 + }, + { + "epoch": 0.4873911049977075, + "grad_norm": 286.0, + "learning_rate": 5.4465926430732416e-05, + "loss": 13.1892, + "step": 11693 + }, + { + "epoch": 0.4874327872952357, + "grad_norm": 560.0, + "learning_rate": 5.445920335266762e-05, + "loss": 15.8752, + "step": 11694 + }, + { + "epoch": 0.48747446959276397, + "grad_norm": 552.0, + "learning_rate": 5.445248019333345e-05, + "loss": 16.6254, + "step": 11695 + }, + { + "epoch": 0.4875161518902922, + "grad_norm": 492.0, + "learning_rate": 5.444575695285242e-05, + "loss": 16.1253, + "step": 11696 + }, + { + "epoch": 0.48755783418782045, + "grad_norm": 64.0, + "learning_rate": 5.443903363134708e-05, + "loss": 6.0003, + "step": 11697 + }, + { + "epoch": 0.48759951648534866, + "grad_norm": 226.0, + "learning_rate": 5.4432310228939966e-05, + "loss": 12.0004, + "step": 11698 + }, + { + "epoch": 0.48764119878287693, + "grad_norm": 231.0, + "learning_rate": 5.4425586745753595e-05, + "loss": 11.7506, + "step": 11699 + }, + { + "epoch": 0.48768288108040514, + "grad_norm": 452.0, + "learning_rate": 5.4418863181910504e-05, + "loss": 15.3753, + "step": 11700 + }, + { + "epoch": 0.4877245633779334, + "grad_norm": 284.0, + "learning_rate": 5.4412139537533255e-05, + "loss": 13.1879, + "step": 11701 + }, + { + "epoch": 0.4877662456754616, + "grad_norm": 218.0, + "learning_rate": 5.440541581274436e-05, + "loss": 9.313, + "step": 11702 + }, + { + "epoch": 0.4878079279729899, + "grad_norm": 135.0, + "learning_rate": 5.439869200766638e-05, + "loss": 9.8752, + "step": 11703 + }, + { + "epoch": 0.4878496102705181, + "grad_norm": 916.0, + "learning_rate": 5.439196812242186e-05, + "loss": 22.0005, + "step": 11704 + }, + { + "epoch": 0.48789129256804636, + "grad_norm": 434.0, + "learning_rate": 5.438524415713331e-05, + "loss": 16.0002, + "step": 11705 + }, + { + "epoch": 0.4879329748655746, + "grad_norm": 262.0, + "learning_rate": 5.4378520111923304e-05, + "loss": 13.5643, + "step": 11706 + }, + { + "epoch": 0.48797465716310284, + "grad_norm": 235.0, + "learning_rate": 5.437179598691439e-05, + "loss": 11.3754, + "step": 11707 + }, + { + "epoch": 0.48801633946063105, + "grad_norm": 126.5, + "learning_rate": 5.436507178222909e-05, + "loss": 10.0631, + "step": 11708 + }, + { + "epoch": 0.4880580217581593, + "grad_norm": 173.0, + "learning_rate": 5.435834749798997e-05, + "loss": 10.6877, + "step": 11709 + }, + { + "epoch": 0.48809970405568753, + "grad_norm": 334.0, + "learning_rate": 5.4351623134319584e-05, + "loss": 13.6877, + "step": 11710 + }, + { + "epoch": 0.4881413863532158, + "grad_norm": 436.0, + "learning_rate": 5.434489869134048e-05, + "loss": 16.5003, + "step": 11711 + }, + { + "epoch": 0.488183068650744, + "grad_norm": 298.0, + "learning_rate": 5.4338174169175204e-05, + "loss": 13.5629, + "step": 11712 + }, + { + "epoch": 0.4882247509482723, + "grad_norm": 280.0, + "learning_rate": 5.433144956794634e-05, + "loss": 12.2505, + "step": 11713 + }, + { + "epoch": 0.4882664332458005, + "grad_norm": 270.0, + "learning_rate": 5.43247248877764e-05, + "loss": 11.6252, + "step": 11714 + }, + { + "epoch": 0.48830811554332876, + "grad_norm": 284.0, + "learning_rate": 5.431800012878798e-05, + "loss": 12.0641, + "step": 11715 + }, + { + "epoch": 0.48834979784085697, + "grad_norm": 92.5, + "learning_rate": 5.4311275291103616e-05, + "loss": 8.1877, + "step": 11716 + }, + { + "epoch": 0.48839148013838524, + "grad_norm": 424.0, + "learning_rate": 5.4304550374845884e-05, + "loss": 14.0654, + "step": 11717 + }, + { + "epoch": 0.48843316243591345, + "grad_norm": 300.0, + "learning_rate": 5.429782538013734e-05, + "loss": 13.0628, + "step": 11718 + }, + { + "epoch": 0.4884748447334417, + "grad_norm": 188.0, + "learning_rate": 5.429110030710054e-05, + "loss": 11.1879, + "step": 11719 + }, + { + "epoch": 0.4885165270309699, + "grad_norm": 366.0, + "learning_rate": 5.428437515585806e-05, + "loss": 14.9378, + "step": 11720 + }, + { + "epoch": 0.4885582093284982, + "grad_norm": 736.0, + "learning_rate": 5.427764992653246e-05, + "loss": 21.5036, + "step": 11721 + }, + { + "epoch": 0.4885998916260264, + "grad_norm": 235.0, + "learning_rate": 5.427092461924631e-05, + "loss": 8.0636, + "step": 11722 + }, + { + "epoch": 0.4886415739235547, + "grad_norm": 446.0, + "learning_rate": 5.426419923412218e-05, + "loss": 14.4379, + "step": 11723 + }, + { + "epoch": 0.4886832562210829, + "grad_norm": 1272.0, + "learning_rate": 5.4257473771282655e-05, + "loss": 32.7501, + "step": 11724 + }, + { + "epoch": 0.48872493851861115, + "grad_norm": 98.5, + "learning_rate": 5.4250748230850255e-05, + "loss": 9.5629, + "step": 11725 + }, + { + "epoch": 0.48876662081613936, + "grad_norm": 129.0, + "learning_rate": 5.424402261294762e-05, + "loss": 6.2818, + "step": 11726 + }, + { + "epoch": 0.48880830311366763, + "grad_norm": 800.0, + "learning_rate": 5.4237296917697286e-05, + "loss": 23.3753, + "step": 11727 + }, + { + "epoch": 0.48884998541119584, + "grad_norm": 376.0, + "learning_rate": 5.423057114522185e-05, + "loss": 15.0627, + "step": 11728 + }, + { + "epoch": 0.4888916677087241, + "grad_norm": 171.0, + "learning_rate": 5.4223845295643884e-05, + "loss": 11.7508, + "step": 11729 + }, + { + "epoch": 0.4889333500062523, + "grad_norm": 245.0, + "learning_rate": 5.4217119369085945e-05, + "loss": 13.0015, + "step": 11730 + }, + { + "epoch": 0.4889750323037806, + "grad_norm": 374.0, + "learning_rate": 5.421039336567064e-05, + "loss": 14.8753, + "step": 11731 + }, + { + "epoch": 0.4890167146013088, + "grad_norm": 147.0, + "learning_rate": 5.420366728552054e-05, + "loss": 9.5004, + "step": 11732 + }, + { + "epoch": 0.48905839689883707, + "grad_norm": 109.0, + "learning_rate": 5.419694112875824e-05, + "loss": 9.5626, + "step": 11733 + }, + { + "epoch": 0.4891000791963653, + "grad_norm": 416.0, + "learning_rate": 5.4190214895506305e-05, + "loss": 14.4402, + "step": 11734 + }, + { + "epoch": 0.48914176149389355, + "grad_norm": 556.0, + "learning_rate": 5.418348858588733e-05, + "loss": 17.3752, + "step": 11735 + }, + { + "epoch": 0.48918344379142176, + "grad_norm": 156.0, + "learning_rate": 5.417676220002391e-05, + "loss": 10.5009, + "step": 11736 + }, + { + "epoch": 0.48922512608895, + "grad_norm": 500.0, + "learning_rate": 5.4170035738038625e-05, + "loss": 13.689, + "step": 11737 + }, + { + "epoch": 0.48926680838647824, + "grad_norm": 346.0, + "learning_rate": 5.416330920005406e-05, + "loss": 13.6895, + "step": 11738 + }, + { + "epoch": 0.4893084906840065, + "grad_norm": 194.0, + "learning_rate": 5.415658258619283e-05, + "loss": 11.7502, + "step": 11739 + }, + { + "epoch": 0.4893501729815347, + "grad_norm": 230.0, + "learning_rate": 5.414985589657751e-05, + "loss": 11.1258, + "step": 11740 + }, + { + "epoch": 0.489391855279063, + "grad_norm": 197.0, + "learning_rate": 5.4143129131330696e-05, + "loss": 12.2507, + "step": 11741 + }, + { + "epoch": 0.48943353757659125, + "grad_norm": 247.0, + "learning_rate": 5.413640229057498e-05, + "loss": 11.6883, + "step": 11742 + }, + { + "epoch": 0.48947521987411946, + "grad_norm": 103.5, + "learning_rate": 5.412967537443298e-05, + "loss": 7.6566, + "step": 11743 + }, + { + "epoch": 0.4895169021716477, + "grad_norm": 272.0, + "learning_rate": 5.412294838302726e-05, + "loss": 13.6879, + "step": 11744 + }, + { + "epoch": 0.48955858446917594, + "grad_norm": 572.0, + "learning_rate": 5.411622131648045e-05, + "loss": 18.5004, + "step": 11745 + }, + { + "epoch": 0.4896002667667042, + "grad_norm": 123.5, + "learning_rate": 5.410949417491514e-05, + "loss": 11.1256, + "step": 11746 + }, + { + "epoch": 0.4896419490642324, + "grad_norm": 50.5, + "learning_rate": 5.4102766958453945e-05, + "loss": 7.7191, + "step": 11747 + }, + { + "epoch": 0.4896836313617607, + "grad_norm": 332.0, + "learning_rate": 5.4096039667219445e-05, + "loss": 12.0002, + "step": 11748 + }, + { + "epoch": 0.4897253136592889, + "grad_norm": 170.0, + "learning_rate": 5.408931230133426e-05, + "loss": 10.0627, + "step": 11749 + }, + { + "epoch": 0.48976699595681716, + "grad_norm": 72.0, + "learning_rate": 5.4082584860920993e-05, + "loss": 8.4383, + "step": 11750 + }, + { + "epoch": 0.4898086782543454, + "grad_norm": 800.0, + "learning_rate": 5.4075857346102254e-05, + "loss": 25.5002, + "step": 11751 + }, + { + "epoch": 0.48985036055187364, + "grad_norm": 132.0, + "learning_rate": 5.4069129757000656e-05, + "loss": 9.8753, + "step": 11752 + }, + { + "epoch": 0.48989204284940185, + "grad_norm": 380.0, + "learning_rate": 5.40624020937388e-05, + "loss": 13.9379, + "step": 11753 + }, + { + "epoch": 0.4899337251469301, + "grad_norm": 306.0, + "learning_rate": 5.4055674356439325e-05, + "loss": 12.5627, + "step": 11754 + }, + { + "epoch": 0.48997540744445833, + "grad_norm": 692.0, + "learning_rate": 5.404894654522481e-05, + "loss": 17.3758, + "step": 11755 + }, + { + "epoch": 0.4900170897419866, + "grad_norm": 136.0, + "learning_rate": 5.404221866021789e-05, + "loss": 10.3129, + "step": 11756 + }, + { + "epoch": 0.4900587720395148, + "grad_norm": 192.0, + "learning_rate": 5.403549070154118e-05, + "loss": 11.2511, + "step": 11757 + }, + { + "epoch": 0.4901004543370431, + "grad_norm": 98.5, + "learning_rate": 5.402876266931729e-05, + "loss": 9.3755, + "step": 11758 + }, + { + "epoch": 0.4901421366345713, + "grad_norm": 756.0, + "learning_rate": 5.4022034563668834e-05, + "loss": 21.8758, + "step": 11759 + }, + { + "epoch": 0.49018381893209956, + "grad_norm": 58.75, + "learning_rate": 5.401530638471844e-05, + "loss": 7.0316, + "step": 11760 + }, + { + "epoch": 0.49022550122962777, + "grad_norm": 358.0, + "learning_rate": 5.400857813258875e-05, + "loss": 14.9378, + "step": 11761 + }, + { + "epoch": 0.49026718352715604, + "grad_norm": 302.0, + "learning_rate": 5.400184980740235e-05, + "loss": 13.1256, + "step": 11762 + }, + { + "epoch": 0.49030886582468425, + "grad_norm": 201.0, + "learning_rate": 5.399512140928188e-05, + "loss": 11.1877, + "step": 11763 + }, + { + "epoch": 0.4903505481222125, + "grad_norm": 464.0, + "learning_rate": 5.3988392938349975e-05, + "loss": 15.6252, + "step": 11764 + }, + { + "epoch": 0.4903922304197407, + "grad_norm": 796.0, + "learning_rate": 5.398166439472926e-05, + "loss": 26.2504, + "step": 11765 + }, + { + "epoch": 0.490433912717269, + "grad_norm": 169.0, + "learning_rate": 5.397493577854236e-05, + "loss": 10.5004, + "step": 11766 + }, + { + "epoch": 0.4904755950147972, + "grad_norm": 254.0, + "learning_rate": 5.396820708991189e-05, + "loss": 10.313, + "step": 11767 + }, + { + "epoch": 0.4905172773123255, + "grad_norm": 89.5, + "learning_rate": 5.39614783289605e-05, + "loss": 7.5014, + "step": 11768 + }, + { + "epoch": 0.4905589596098537, + "grad_norm": 210.0, + "learning_rate": 5.395474949581082e-05, + "loss": 11.9377, + "step": 11769 + }, + { + "epoch": 0.49060064190738195, + "grad_norm": 168.0, + "learning_rate": 5.394802059058547e-05, + "loss": 8.626, + "step": 11770 + }, + { + "epoch": 0.49064232420491016, + "grad_norm": 628.0, + "learning_rate": 5.39412916134071e-05, + "loss": 18.8753, + "step": 11771 + }, + { + "epoch": 0.49068400650243843, + "grad_norm": 336.0, + "learning_rate": 5.393456256439834e-05, + "loss": 14.3753, + "step": 11772 + }, + { + "epoch": 0.49072568879996664, + "grad_norm": 540.0, + "learning_rate": 5.392783344368183e-05, + "loss": 18.0012, + "step": 11773 + }, + { + "epoch": 0.4907673710974949, + "grad_norm": 478.0, + "learning_rate": 5.392110425138021e-05, + "loss": 16.5003, + "step": 11774 + }, + { + "epoch": 0.4908090533950231, + "grad_norm": 236.0, + "learning_rate": 5.391437498761609e-05, + "loss": 12.3129, + "step": 11775 + }, + { + "epoch": 0.4908507356925514, + "grad_norm": 568.0, + "learning_rate": 5.3907645652512165e-05, + "loss": 17.7509, + "step": 11776 + }, + { + "epoch": 0.4908924179900796, + "grad_norm": 245.0, + "learning_rate": 5.390091624619105e-05, + "loss": 12.6878, + "step": 11777 + }, + { + "epoch": 0.49093410028760787, + "grad_norm": 472.0, + "learning_rate": 5.389418676877538e-05, + "loss": 17.2508, + "step": 11778 + }, + { + "epoch": 0.4909757825851361, + "grad_norm": 446.0, + "learning_rate": 5.388745722038781e-05, + "loss": 16.3762, + "step": 11779 + }, + { + "epoch": 0.49101746488266435, + "grad_norm": 71.5, + "learning_rate": 5.388072760115099e-05, + "loss": 7.344, + "step": 11780 + }, + { + "epoch": 0.49105914718019256, + "grad_norm": 424.0, + "learning_rate": 5.387399791118758e-05, + "loss": 15.5629, + "step": 11781 + }, + { + "epoch": 0.4911008294777208, + "grad_norm": 374.0, + "learning_rate": 5.3867268150620196e-05, + "loss": 13.9384, + "step": 11782 + }, + { + "epoch": 0.49114251177524904, + "grad_norm": 326.0, + "learning_rate": 5.386053831957152e-05, + "loss": 14.0004, + "step": 11783 + }, + { + "epoch": 0.4911841940727773, + "grad_norm": 358.0, + "learning_rate": 5.385380841816418e-05, + "loss": 13.8126, + "step": 11784 + }, + { + "epoch": 0.4912258763703055, + "grad_norm": 232.0, + "learning_rate": 5.384707844652084e-05, + "loss": 12.1878, + "step": 11785 + }, + { + "epoch": 0.4912675586678338, + "grad_norm": 223.0, + "learning_rate": 5.3840348404764165e-05, + "loss": 5.7818, + "step": 11786 + }, + { + "epoch": 0.491309240965362, + "grad_norm": 296.0, + "learning_rate": 5.3833618293016786e-05, + "loss": 10.6879, + "step": 11787 + }, + { + "epoch": 0.49135092326289026, + "grad_norm": 352.0, + "learning_rate": 5.3826888111401365e-05, + "loss": 13.6252, + "step": 11788 + }, + { + "epoch": 0.49139260556041847, + "grad_norm": 384.0, + "learning_rate": 5.382015786004059e-05, + "loss": 14.9384, + "step": 11789 + }, + { + "epoch": 0.49143428785794674, + "grad_norm": 540.0, + "learning_rate": 5.381342753905708e-05, + "loss": 17.2502, + "step": 11790 + }, + { + "epoch": 0.49147597015547495, + "grad_norm": 1528.0, + "learning_rate": 5.380669714857353e-05, + "loss": 31.6253, + "step": 11791 + }, + { + "epoch": 0.4915176524530032, + "grad_norm": 282.0, + "learning_rate": 5.379996668871259e-05, + "loss": 12.6883, + "step": 11792 + }, + { + "epoch": 0.49155933475053143, + "grad_norm": 60.75, + "learning_rate": 5.379323615959691e-05, + "loss": 7.8754, + "step": 11793 + }, + { + "epoch": 0.4916010170480597, + "grad_norm": 948.0, + "learning_rate": 5.378650556134916e-05, + "loss": 24.7561, + "step": 11794 + }, + { + "epoch": 0.4916426993455879, + "grad_norm": 324.0, + "learning_rate": 5.3779774894092016e-05, + "loss": 11.0004, + "step": 11795 + }, + { + "epoch": 0.4916843816431162, + "grad_norm": 296.0, + "learning_rate": 5.377304415794814e-05, + "loss": 13.563, + "step": 11796 + }, + { + "epoch": 0.4917260639406444, + "grad_norm": 696.0, + "learning_rate": 5.37663133530402e-05, + "loss": 20.8753, + "step": 11797 + }, + { + "epoch": 0.49176774623817265, + "grad_norm": 960.0, + "learning_rate": 5.375958247949087e-05, + "loss": 24.6253, + "step": 11798 + }, + { + "epoch": 0.49180942853570087, + "grad_norm": 668.0, + "learning_rate": 5.37528515374228e-05, + "loss": 21.0001, + "step": 11799 + }, + { + "epoch": 0.49185111083322913, + "grad_norm": 506.0, + "learning_rate": 5.374612052695869e-05, + "loss": 16.6252, + "step": 11800 + }, + { + "epoch": 0.49189279313075734, + "grad_norm": 320.0, + "learning_rate": 5.373938944822119e-05, + "loss": 14.2508, + "step": 11801 + }, + { + "epoch": 0.4919344754282856, + "grad_norm": 177.0, + "learning_rate": 5.373265830133298e-05, + "loss": 10.0009, + "step": 11802 + }, + { + "epoch": 0.4919761577258138, + "grad_norm": 352.0, + "learning_rate": 5.372592708641676e-05, + "loss": 15.1252, + "step": 11803 + }, + { + "epoch": 0.4920178400233421, + "grad_norm": 127.5, + "learning_rate": 5.371919580359518e-05, + "loss": 10.5628, + "step": 11804 + }, + { + "epoch": 0.4920595223208703, + "grad_norm": 376.0, + "learning_rate": 5.371246445299093e-05, + "loss": 15.6877, + "step": 11805 + }, + { + "epoch": 0.49210120461839857, + "grad_norm": 352.0, + "learning_rate": 5.370573303472668e-05, + "loss": 13.6252, + "step": 11806 + }, + { + "epoch": 0.4921428869159268, + "grad_norm": 2160.0, + "learning_rate": 5.369900154892512e-05, + "loss": 45.251, + "step": 11807 + }, + { + "epoch": 0.49218456921345505, + "grad_norm": 438.0, + "learning_rate": 5.369226999570893e-05, + "loss": 15.8752, + "step": 11808 + }, + { + "epoch": 0.49222625151098326, + "grad_norm": 268.0, + "learning_rate": 5.3685538375200796e-05, + "loss": 10.8134, + "step": 11809 + }, + { + "epoch": 0.4922679338085115, + "grad_norm": 312.0, + "learning_rate": 5.3678806687523384e-05, + "loss": 15.3755, + "step": 11810 + }, + { + "epoch": 0.49230961610603974, + "grad_norm": 652.0, + "learning_rate": 5.367207493279941e-05, + "loss": 21.2505, + "step": 11811 + }, + { + "epoch": 0.492351298403568, + "grad_norm": 516.0, + "learning_rate": 5.366534311115153e-05, + "loss": 17.001, + "step": 11812 + }, + { + "epoch": 0.4923929807010962, + "grad_norm": 1848.0, + "learning_rate": 5.365861122270245e-05, + "loss": 42.7503, + "step": 11813 + }, + { + "epoch": 0.4924346629986245, + "grad_norm": 96.5, + "learning_rate": 5.365187926757486e-05, + "loss": 8.5005, + "step": 11814 + }, + { + "epoch": 0.49247634529615275, + "grad_norm": 190.0, + "learning_rate": 5.3645147245891436e-05, + "loss": 10.9377, + "step": 11815 + }, + { + "epoch": 0.49251802759368096, + "grad_norm": 204.0, + "learning_rate": 5.363841515777489e-05, + "loss": 10.2507, + "step": 11816 + }, + { + "epoch": 0.49255970989120923, + "grad_norm": 53.5, + "learning_rate": 5.36316830033479e-05, + "loss": 7.3757, + "step": 11817 + }, + { + "epoch": 0.49260139218873744, + "grad_norm": 386.0, + "learning_rate": 5.362495078273318e-05, + "loss": 13.0003, + "step": 11818 + }, + { + "epoch": 0.4926430744862657, + "grad_norm": 474.0, + "learning_rate": 5.3618218496053384e-05, + "loss": 15.563, + "step": 11819 + }, + { + "epoch": 0.4926847567837939, + "grad_norm": 206.0, + "learning_rate": 5.3611486143431255e-05, + "loss": 12.4378, + "step": 11820 + }, + { + "epoch": 0.4927264390813222, + "grad_norm": 210.0, + "learning_rate": 5.360475372498946e-05, + "loss": 11.1254, + "step": 11821 + }, + { + "epoch": 0.4927681213788504, + "grad_norm": 159.0, + "learning_rate": 5.359802124085072e-05, + "loss": 10.3127, + "step": 11822 + }, + { + "epoch": 0.49280980367637867, + "grad_norm": 616.0, + "learning_rate": 5.359128869113771e-05, + "loss": 19.2502, + "step": 11823 + }, + { + "epoch": 0.4928514859739069, + "grad_norm": 1112.0, + "learning_rate": 5.3584556075973145e-05, + "loss": 27.7503, + "step": 11824 + }, + { + "epoch": 0.49289316827143514, + "grad_norm": 368.0, + "learning_rate": 5.357782339547974e-05, + "loss": 15.3754, + "step": 11825 + }, + { + "epoch": 0.49293485056896336, + "grad_norm": 73.0, + "learning_rate": 5.357109064978016e-05, + "loss": 5.9066, + "step": 11826 + }, + { + "epoch": 0.4929765328664916, + "grad_norm": 450.0, + "learning_rate": 5.356435783899716e-05, + "loss": 17.7503, + "step": 11827 + }, + { + "epoch": 0.49301821516401984, + "grad_norm": 330.0, + "learning_rate": 5.355762496325342e-05, + "loss": 14.3752, + "step": 11828 + }, + { + "epoch": 0.4930598974615481, + "grad_norm": 480.0, + "learning_rate": 5.355089202267165e-05, + "loss": 17.2504, + "step": 11829 + }, + { + "epoch": 0.4931015797590763, + "grad_norm": 332.0, + "learning_rate": 5.354415901737455e-05, + "loss": 13.0001, + "step": 11830 + }, + { + "epoch": 0.4931432620566046, + "grad_norm": 58.25, + "learning_rate": 5.353742594748484e-05, + "loss": 8.9378, + "step": 11831 + }, + { + "epoch": 0.4931849443541328, + "grad_norm": 75.5, + "learning_rate": 5.3530692813125226e-05, + "loss": 8.2506, + "step": 11832 + }, + { + "epoch": 0.49322662665166106, + "grad_norm": 260.0, + "learning_rate": 5.352395961441843e-05, + "loss": 12.6877, + "step": 11833 + }, + { + "epoch": 0.49326830894918927, + "grad_norm": 516.0, + "learning_rate": 5.351722635148715e-05, + "loss": 17.7502, + "step": 11834 + }, + { + "epoch": 0.49330999124671754, + "grad_norm": 73.5, + "learning_rate": 5.351049302445411e-05, + "loss": 8.1883, + "step": 11835 + }, + { + "epoch": 0.49335167354424575, + "grad_norm": 404.0, + "learning_rate": 5.350375963344203e-05, + "loss": 15.0629, + "step": 11836 + }, + { + "epoch": 0.493393355841774, + "grad_norm": 113.0, + "learning_rate": 5.3497026178573604e-05, + "loss": 9.3753, + "step": 11837 + }, + { + "epoch": 0.49343503813930223, + "grad_norm": 386.0, + "learning_rate": 5.3490292659971565e-05, + "loss": 14.2534, + "step": 11838 + }, + { + "epoch": 0.4934767204368305, + "grad_norm": 676.0, + "learning_rate": 5.348355907775864e-05, + "loss": 19.0003, + "step": 11839 + }, + { + "epoch": 0.4935184027343587, + "grad_norm": 143.0, + "learning_rate": 5.3476825432057545e-05, + "loss": 9.8133, + "step": 11840 + }, + { + "epoch": 0.493560085031887, + "grad_norm": 238.0, + "learning_rate": 5.3470091722991e-05, + "loss": 11.1877, + "step": 11841 + }, + { + "epoch": 0.4936017673294152, + "grad_norm": 101.5, + "learning_rate": 5.3463357950681716e-05, + "loss": 9.3754, + "step": 11842 + }, + { + "epoch": 0.49364344962694345, + "grad_norm": 143.0, + "learning_rate": 5.345662411525243e-05, + "loss": 9.6253, + "step": 11843 + }, + { + "epoch": 0.49368513192447167, + "grad_norm": 106.5, + "learning_rate": 5.344989021682587e-05, + "loss": 9.2505, + "step": 11844 + }, + { + "epoch": 0.49372681422199993, + "grad_norm": 772.0, + "learning_rate": 5.344315625552474e-05, + "loss": 23.3755, + "step": 11845 + }, + { + "epoch": 0.49376849651952814, + "grad_norm": 330.0, + "learning_rate": 5.343642223147179e-05, + "loss": 13.8752, + "step": 11846 + }, + { + "epoch": 0.4938101788170564, + "grad_norm": 58.25, + "learning_rate": 5.342968814478975e-05, + "loss": 6.9689, + "step": 11847 + }, + { + "epoch": 0.4938518611145846, + "grad_norm": 664.0, + "learning_rate": 5.342295399560132e-05, + "loss": 19.3753, + "step": 11848 + }, + { + "epoch": 0.4938935434121129, + "grad_norm": 227.0, + "learning_rate": 5.3416219784029265e-05, + "loss": 11.4381, + "step": 11849 + }, + { + "epoch": 0.4939352257096411, + "grad_norm": 246.0, + "learning_rate": 5.3409485510196286e-05, + "loss": 12.563, + "step": 11850 + }, + { + "epoch": 0.49397690800716937, + "grad_norm": 181.0, + "learning_rate": 5.340275117422513e-05, + "loss": 11.0011, + "step": 11851 + }, + { + "epoch": 0.4940185903046976, + "grad_norm": 243.0, + "learning_rate": 5.339601677623854e-05, + "loss": 12.0626, + "step": 11852 + }, + { + "epoch": 0.49406027260222585, + "grad_norm": 2384.0, + "learning_rate": 5.338928231635925e-05, + "loss": 47.5002, + "step": 11853 + }, + { + "epoch": 0.49410195489975406, + "grad_norm": 294.0, + "learning_rate": 5.338254779470998e-05, + "loss": 12.6877, + "step": 11854 + }, + { + "epoch": 0.4941436371972823, + "grad_norm": 280.0, + "learning_rate": 5.337581321141348e-05, + "loss": 13.0632, + "step": 11855 + }, + { + "epoch": 0.49418531949481054, + "grad_norm": 548.0, + "learning_rate": 5.336907856659248e-05, + "loss": 18.0007, + "step": 11856 + }, + { + "epoch": 0.4942270017923388, + "grad_norm": 138.0, + "learning_rate": 5.336234386036973e-05, + "loss": 10.0001, + "step": 11857 + }, + { + "epoch": 0.494268684089867, + "grad_norm": 188.0, + "learning_rate": 5.3355609092867966e-05, + "loss": 11.4378, + "step": 11858 + }, + { + "epoch": 0.4943103663873953, + "grad_norm": 79.5, + "learning_rate": 5.334887426420993e-05, + "loss": 6.9065, + "step": 11859 + }, + { + "epoch": 0.4943520486849235, + "grad_norm": 410.0, + "learning_rate": 5.3342139374518354e-05, + "loss": 16.5003, + "step": 11860 + }, + { + "epoch": 0.49439373098245176, + "grad_norm": 188.0, + "learning_rate": 5.333540442391599e-05, + "loss": 10.3753, + "step": 11861 + }, + { + "epoch": 0.49443541327998, + "grad_norm": 448.0, + "learning_rate": 5.332866941252559e-05, + "loss": 16.8753, + "step": 11862 + }, + { + "epoch": 0.49447709557750824, + "grad_norm": 268.0, + "learning_rate": 5.332193434046988e-05, + "loss": 12.6879, + "step": 11863 + }, + { + "epoch": 0.49451877787503645, + "grad_norm": 928.0, + "learning_rate": 5.3315199207871634e-05, + "loss": 27.2504, + "step": 11864 + }, + { + "epoch": 0.4945604601725647, + "grad_norm": 454.0, + "learning_rate": 5.33084640148536e-05, + "loss": 13.8128, + "step": 11865 + }, + { + "epoch": 0.49460214247009293, + "grad_norm": 152.0, + "learning_rate": 5.3301728761538505e-05, + "loss": 10.8763, + "step": 11866 + }, + { + "epoch": 0.4946438247676212, + "grad_norm": 364.0, + "learning_rate": 5.32949934480491e-05, + "loss": 14.2501, + "step": 11867 + }, + { + "epoch": 0.4946855070651494, + "grad_norm": 532.0, + "learning_rate": 5.328825807450817e-05, + "loss": 19.0002, + "step": 11868 + }, + { + "epoch": 0.4947271893626777, + "grad_norm": 298.0, + "learning_rate": 5.3281522641038426e-05, + "loss": 13.4381, + "step": 11869 + }, + { + "epoch": 0.4947688716602059, + "grad_norm": 200.0, + "learning_rate": 5.327478714776265e-05, + "loss": 8.814, + "step": 11870 + }, + { + "epoch": 0.49481055395773416, + "grad_norm": 151.0, + "learning_rate": 5.326805159480358e-05, + "loss": 8.6879, + "step": 11871 + }, + { + "epoch": 0.49485223625526237, + "grad_norm": 348.0, + "learning_rate": 5.326131598228399e-05, + "loss": 15.2503, + "step": 11872 + }, + { + "epoch": 0.49489391855279063, + "grad_norm": 628.0, + "learning_rate": 5.3254580310326616e-05, + "loss": 21.3752, + "step": 11873 + }, + { + "epoch": 0.49493560085031885, + "grad_norm": 228.0, + "learning_rate": 5.324784457905423e-05, + "loss": 11.4384, + "step": 11874 + }, + { + "epoch": 0.4949772831478471, + "grad_norm": 422.0, + "learning_rate": 5.324110878858959e-05, + "loss": 16.1251, + "step": 11875 + }, + { + "epoch": 0.4950189654453753, + "grad_norm": 308.0, + "learning_rate": 5.323437293905544e-05, + "loss": 14.0004, + "step": 11876 + }, + { + "epoch": 0.4950606477429036, + "grad_norm": 223.0, + "learning_rate": 5.3227637030574575e-05, + "loss": 11.0002, + "step": 11877 + }, + { + "epoch": 0.4951023300404318, + "grad_norm": 640.0, + "learning_rate": 5.3220901063269736e-05, + "loss": 21.5001, + "step": 11878 + }, + { + "epoch": 0.49514401233796007, + "grad_norm": 213.0, + "learning_rate": 5.3214165037263684e-05, + "loss": 11.2504, + "step": 11879 + }, + { + "epoch": 0.4951856946354883, + "grad_norm": 340.0, + "learning_rate": 5.32074289526792e-05, + "loss": 13.5003, + "step": 11880 + }, + { + "epoch": 0.49522737693301655, + "grad_norm": 374.0, + "learning_rate": 5.3200692809639016e-05, + "loss": 15.6877, + "step": 11881 + }, + { + "epoch": 0.49526905923054476, + "grad_norm": 274.0, + "learning_rate": 5.319395660826594e-05, + "loss": 12.1252, + "step": 11882 + }, + { + "epoch": 0.49531074152807303, + "grad_norm": 294.0, + "learning_rate": 5.318722034868272e-05, + "loss": 14.1877, + "step": 11883 + }, + { + "epoch": 0.49535242382560124, + "grad_norm": 241.0, + "learning_rate": 5.3180484031012126e-05, + "loss": 10.8129, + "step": 11884 + }, + { + "epoch": 0.4953941061231295, + "grad_norm": 318.0, + "learning_rate": 5.317374765537693e-05, + "loss": 13.5634, + "step": 11885 + }, + { + "epoch": 0.4954357884206577, + "grad_norm": 380.0, + "learning_rate": 5.316701122189989e-05, + "loss": 15.6877, + "step": 11886 + }, + { + "epoch": 0.495477470718186, + "grad_norm": 217.0, + "learning_rate": 5.3160274730703796e-05, + "loss": 11.0003, + "step": 11887 + }, + { + "epoch": 0.49551915301571425, + "grad_norm": 400.0, + "learning_rate": 5.3153538181911414e-05, + "loss": 15.0004, + "step": 11888 + }, + { + "epoch": 0.49556083531324246, + "grad_norm": 326.0, + "learning_rate": 5.314680157564551e-05, + "loss": 13.5633, + "step": 11889 + }, + { + "epoch": 0.49560251761077073, + "grad_norm": 119.0, + "learning_rate": 5.314006491202887e-05, + "loss": 9.5628, + "step": 11890 + }, + { + "epoch": 0.49564419990829894, + "grad_norm": 502.0, + "learning_rate": 5.3133328191184286e-05, + "loss": 17.5017, + "step": 11891 + }, + { + "epoch": 0.4956858822058272, + "grad_norm": 536.0, + "learning_rate": 5.3126591413234506e-05, + "loss": 16.5033, + "step": 11892 + }, + { + "epoch": 0.4957275645033554, + "grad_norm": 712.0, + "learning_rate": 5.311985457830232e-05, + "loss": 20.1254, + "step": 11893 + }, + { + "epoch": 0.4957692468008837, + "grad_norm": 161.0, + "learning_rate": 5.3113117686510505e-05, + "loss": 10.8132, + "step": 11894 + }, + { + "epoch": 0.4958109290984119, + "grad_norm": 360.0, + "learning_rate": 5.3106380737981855e-05, + "loss": 15.6878, + "step": 11895 + }, + { + "epoch": 0.49585261139594017, + "grad_norm": 1520.0, + "learning_rate": 5.309964373283913e-05, + "loss": 35.0002, + "step": 11896 + }, + { + "epoch": 0.4958942936934684, + "grad_norm": 166.0, + "learning_rate": 5.309290667120512e-05, + "loss": 11.8754, + "step": 11897 + }, + { + "epoch": 0.49593597599099665, + "grad_norm": 366.0, + "learning_rate": 5.308616955320263e-05, + "loss": 13.6252, + "step": 11898 + }, + { + "epoch": 0.49597765828852486, + "grad_norm": 568.0, + "learning_rate": 5.307943237895441e-05, + "loss": 16.7542, + "step": 11899 + }, + { + "epoch": 0.4960193405860531, + "grad_norm": 122.5, + "learning_rate": 5.3072695148583264e-05, + "loss": 10.0002, + "step": 11900 + }, + { + "epoch": 0.49606102288358134, + "grad_norm": 296.0, + "learning_rate": 5.306595786221196e-05, + "loss": 15.1877, + "step": 11901 + }, + { + "epoch": 0.4961027051811096, + "grad_norm": 548.0, + "learning_rate": 5.3059220519963314e-05, + "loss": 18.5002, + "step": 11902 + }, + { + "epoch": 0.4961443874786378, + "grad_norm": 245.0, + "learning_rate": 5.305248312196011e-05, + "loss": 13.1282, + "step": 11903 + }, + { + "epoch": 0.4961860697761661, + "grad_norm": 260.0, + "learning_rate": 5.304574566832513e-05, + "loss": 12.3753, + "step": 11904 + }, + { + "epoch": 0.4962277520736943, + "grad_norm": 282.0, + "learning_rate": 5.303900815918116e-05, + "loss": 13.6879, + "step": 11905 + }, + { + "epoch": 0.49626943437122256, + "grad_norm": 500.0, + "learning_rate": 5.303227059465099e-05, + "loss": 18.2506, + "step": 11906 + }, + { + "epoch": 0.4963111166687508, + "grad_norm": 524.0, + "learning_rate": 5.3025532974857426e-05, + "loss": 18.8751, + "step": 11907 + }, + { + "epoch": 0.49635279896627904, + "grad_norm": 302.0, + "learning_rate": 5.301879529992326e-05, + "loss": 13.3756, + "step": 11908 + }, + { + "epoch": 0.49639448126380725, + "grad_norm": 147.0, + "learning_rate": 5.3012057569971285e-05, + "loss": 10.0002, + "step": 11909 + }, + { + "epoch": 0.4964361635613355, + "grad_norm": 130.0, + "learning_rate": 5.300531978512428e-05, + "loss": 9.5627, + "step": 11910 + }, + { + "epoch": 0.49647784585886373, + "grad_norm": 460.0, + "learning_rate": 5.2998581945505067e-05, + "loss": 17.2501, + "step": 11911 + }, + { + "epoch": 0.496519528156392, + "grad_norm": 336.0, + "learning_rate": 5.299184405123643e-05, + "loss": 14.0004, + "step": 11912 + }, + { + "epoch": 0.4965612104539202, + "grad_norm": 207.0, + "learning_rate": 5.298510610244116e-05, + "loss": 11.7504, + "step": 11913 + }, + { + "epoch": 0.4966028927514485, + "grad_norm": 510.0, + "learning_rate": 5.297836809924206e-05, + "loss": 17.0002, + "step": 11914 + }, + { + "epoch": 0.4966445750489767, + "grad_norm": 212.0, + "learning_rate": 5.2971630041761945e-05, + "loss": 11.5627, + "step": 11915 + }, + { + "epoch": 0.49668625734650496, + "grad_norm": 212.0, + "learning_rate": 5.2964891930123614e-05, + "loss": 11.1878, + "step": 11916 + }, + { + "epoch": 0.49672793964403317, + "grad_norm": 210.0, + "learning_rate": 5.2958153764449866e-05, + "loss": 12.2502, + "step": 11917 + }, + { + "epoch": 0.49676962194156143, + "grad_norm": 88.5, + "learning_rate": 5.29514155448635e-05, + "loss": 8.7503, + "step": 11918 + }, + { + "epoch": 0.49681130423908965, + "grad_norm": 255.0, + "learning_rate": 5.294467727148732e-05, + "loss": 13.2501, + "step": 11919 + }, + { + "epoch": 0.4968529865366179, + "grad_norm": 354.0, + "learning_rate": 5.2937938944444146e-05, + "loss": 10.4401, + "step": 11920 + }, + { + "epoch": 0.4968946688341461, + "grad_norm": 181.0, + "learning_rate": 5.293120056385677e-05, + "loss": 8.9379, + "step": 11921 + }, + { + "epoch": 0.4969363511316744, + "grad_norm": 107.0, + "learning_rate": 5.2924462129847997e-05, + "loss": 9.7503, + "step": 11922 + }, + { + "epoch": 0.4969780334292026, + "grad_norm": 222.0, + "learning_rate": 5.291772364254064e-05, + "loss": 11.6254, + "step": 11923 + }, + { + "epoch": 0.49701971572673087, + "grad_norm": 504.0, + "learning_rate": 5.291098510205752e-05, + "loss": 17.5003, + "step": 11924 + }, + { + "epoch": 0.4970613980242591, + "grad_norm": 213.0, + "learning_rate": 5.290424650852144e-05, + "loss": 10.4379, + "step": 11925 + }, + { + "epoch": 0.49710308032178735, + "grad_norm": 282.0, + "learning_rate": 5.2897507862055184e-05, + "loss": 14.001, + "step": 11926 + }, + { + "epoch": 0.49714476261931556, + "grad_norm": 644.0, + "learning_rate": 5.289076916278162e-05, + "loss": 19.5018, + "step": 11927 + }, + { + "epoch": 0.49718644491684383, + "grad_norm": 396.0, + "learning_rate": 5.2884030410823515e-05, + "loss": 14.3755, + "step": 11928 + }, + { + "epoch": 0.49722812721437204, + "grad_norm": 652.0, + "learning_rate": 5.28772916063037e-05, + "loss": 18.376, + "step": 11929 + }, + { + "epoch": 0.4972698095119003, + "grad_norm": 270.0, + "learning_rate": 5.287055274934501e-05, + "loss": 12.6877, + "step": 11930 + }, + { + "epoch": 0.4973114918094285, + "grad_norm": 136.0, + "learning_rate": 5.286381384007022e-05, + "loss": 9.3127, + "step": 11931 + }, + { + "epoch": 0.4973531741069568, + "grad_norm": 520.0, + "learning_rate": 5.285707487860218e-05, + "loss": 16.5003, + "step": 11932 + }, + { + "epoch": 0.497394856404485, + "grad_norm": 72.0, + "learning_rate": 5.285033586506369e-05, + "loss": 6.4065, + "step": 11933 + }, + { + "epoch": 0.49743653870201326, + "grad_norm": 152.0, + "learning_rate": 5.284359679957758e-05, + "loss": 10.5626, + "step": 11934 + }, + { + "epoch": 0.4974782209995415, + "grad_norm": 318.0, + "learning_rate": 5.283685768226666e-05, + "loss": 10.3138, + "step": 11935 + }, + { + "epoch": 0.49751990329706974, + "grad_norm": 616.0, + "learning_rate": 5.283011851325377e-05, + "loss": 18.5002, + "step": 11936 + }, + { + "epoch": 0.49756158559459795, + "grad_norm": 324.0, + "learning_rate": 5.2823379292661703e-05, + "loss": 13.3129, + "step": 11937 + }, + { + "epoch": 0.4976032678921262, + "grad_norm": 656.0, + "learning_rate": 5.281664002061331e-05, + "loss": 19.7501, + "step": 11938 + }, + { + "epoch": 0.49764495018965443, + "grad_norm": 444.0, + "learning_rate": 5.280990069723139e-05, + "loss": 12.7521, + "step": 11939 + }, + { + "epoch": 0.4976866324871827, + "grad_norm": 532.0, + "learning_rate": 5.280316132263878e-05, + "loss": 16.7502, + "step": 11940 + }, + { + "epoch": 0.4977283147847109, + "grad_norm": 288.0, + "learning_rate": 5.2796421896958315e-05, + "loss": 12.8751, + "step": 11941 + }, + { + "epoch": 0.4977699970822392, + "grad_norm": 736.0, + "learning_rate": 5.278968242031282e-05, + "loss": 24.1252, + "step": 11942 + }, + { + "epoch": 0.4978116793797674, + "grad_norm": 237.0, + "learning_rate": 5.2782942892825094e-05, + "loss": 12.3128, + "step": 11943 + }, + { + "epoch": 0.49785336167729566, + "grad_norm": 888.0, + "learning_rate": 5.2776203314618e-05, + "loss": 24.2527, + "step": 11944 + }, + { + "epoch": 0.49789504397482387, + "grad_norm": 234.0, + "learning_rate": 5.2769463685814357e-05, + "loss": 11.5004, + "step": 11945 + }, + { + "epoch": 0.49793672627235214, + "grad_norm": 332.0, + "learning_rate": 5.276272400653699e-05, + "loss": 13.6253, + "step": 11946 + }, + { + "epoch": 0.49797840856988035, + "grad_norm": 1072.0, + "learning_rate": 5.275598427690873e-05, + "loss": 24.5046, + "step": 11947 + }, + { + "epoch": 0.4980200908674086, + "grad_norm": 316.0, + "learning_rate": 5.274924449705242e-05, + "loss": 13.3751, + "step": 11948 + }, + { + "epoch": 0.4980617731649368, + "grad_norm": 131.0, + "learning_rate": 5.274250466709088e-05, + "loss": 8.5006, + "step": 11949 + }, + { + "epoch": 0.4981034554624651, + "grad_norm": 896.0, + "learning_rate": 5.2735764787146944e-05, + "loss": 20.8807, + "step": 11950 + }, + { + "epoch": 0.4981451377599933, + "grad_norm": 316.0, + "learning_rate": 5.2729024857343454e-05, + "loss": 12.8755, + "step": 11951 + }, + { + "epoch": 0.4981868200575216, + "grad_norm": 338.0, + "learning_rate": 5.272228487780323e-05, + "loss": 13.7502, + "step": 11952 + }, + { + "epoch": 0.4982285023550498, + "grad_norm": 568.0, + "learning_rate": 5.271554484864915e-05, + "loss": 13.5667, + "step": 11953 + }, + { + "epoch": 0.49827018465257805, + "grad_norm": 334.0, + "learning_rate": 5.2708804770004005e-05, + "loss": 12.5003, + "step": 11954 + }, + { + "epoch": 0.49831186695010626, + "grad_norm": 616.0, + "learning_rate": 5.270206464199066e-05, + "loss": 19.0003, + "step": 11955 + }, + { + "epoch": 0.49835354924763453, + "grad_norm": 318.0, + "learning_rate": 5.269532446473194e-05, + "loss": 13.5002, + "step": 11956 + }, + { + "epoch": 0.49839523154516274, + "grad_norm": 580.0, + "learning_rate": 5.26885842383507e-05, + "loss": 19.1257, + "step": 11957 + }, + { + "epoch": 0.498436913842691, + "grad_norm": 149.0, + "learning_rate": 5.268184396296978e-05, + "loss": 9.9377, + "step": 11958 + }, + { + "epoch": 0.4984785961402192, + "grad_norm": 418.0, + "learning_rate": 5.2675103638712e-05, + "loss": 14.0627, + "step": 11959 + }, + { + "epoch": 0.4985202784377475, + "grad_norm": 198.0, + "learning_rate": 5.2668363265700227e-05, + "loss": 9.2503, + "step": 11960 + }, + { + "epoch": 0.49856196073527576, + "grad_norm": 175.0, + "learning_rate": 5.2661622844057305e-05, + "loss": 9.8129, + "step": 11961 + }, + { + "epoch": 0.49860364303280397, + "grad_norm": 306.0, + "learning_rate": 5.265488237390606e-05, + "loss": 14.3755, + "step": 11962 + }, + { + "epoch": 0.49864532533033223, + "grad_norm": 294.0, + "learning_rate": 5.264814185536935e-05, + "loss": 13.6879, + "step": 11963 + }, + { + "epoch": 0.49868700762786045, + "grad_norm": 406.0, + "learning_rate": 5.264140128857e-05, + "loss": 14.2505, + "step": 11964 + }, + { + "epoch": 0.4987286899253887, + "grad_norm": 314.0, + "learning_rate": 5.26346606736309e-05, + "loss": 13.0626, + "step": 11965 + }, + { + "epoch": 0.4987703722229169, + "grad_norm": 150.0, + "learning_rate": 5.262792001067487e-05, + "loss": 9.8753, + "step": 11966 + }, + { + "epoch": 0.4988120545204452, + "grad_norm": 243.0, + "learning_rate": 5.2621179299824774e-05, + "loss": 12.2505, + "step": 11967 + }, + { + "epoch": 0.4988537368179734, + "grad_norm": 179.0, + "learning_rate": 5.2614438541203434e-05, + "loss": 8.6886, + "step": 11968 + }, + { + "epoch": 0.49889541911550167, + "grad_norm": 264.0, + "learning_rate": 5.2607697734933733e-05, + "loss": 13.0005, + "step": 11969 + }, + { + "epoch": 0.4989371014130299, + "grad_norm": 700.0, + "learning_rate": 5.2600956881138505e-05, + "loss": 19.0011, + "step": 11970 + }, + { + "epoch": 0.49897878371055815, + "grad_norm": 308.0, + "learning_rate": 5.259421597994062e-05, + "loss": 13.3753, + "step": 11971 + }, + { + "epoch": 0.49902046600808636, + "grad_norm": 186.0, + "learning_rate": 5.2587475031462906e-05, + "loss": 10.9379, + "step": 11972 + }, + { + "epoch": 0.49906214830561463, + "grad_norm": 434.0, + "learning_rate": 5.258073403582823e-05, + "loss": 16.5018, + "step": 11973 + }, + { + "epoch": 0.49910383060314284, + "grad_norm": 193.0, + "learning_rate": 5.2573992993159446e-05, + "loss": 12.4389, + "step": 11974 + }, + { + "epoch": 0.4991455129006711, + "grad_norm": 326.0, + "learning_rate": 5.256725190357942e-05, + "loss": 14.2508, + "step": 11975 + }, + { + "epoch": 0.4991871951981993, + "grad_norm": 684.0, + "learning_rate": 5.256051076721099e-05, + "loss": 22.2504, + "step": 11976 + }, + { + "epoch": 0.4992288774957276, + "grad_norm": 184.0, + "learning_rate": 5.2553769584177014e-05, + "loss": 11.7502, + "step": 11977 + }, + { + "epoch": 0.4992705597932558, + "grad_norm": 139.0, + "learning_rate": 5.254702835460037e-05, + "loss": 9.6254, + "step": 11978 + }, + { + "epoch": 0.49931224209078406, + "grad_norm": 584.0, + "learning_rate": 5.254028707860391e-05, + "loss": 18.8777, + "step": 11979 + }, + { + "epoch": 0.4993539243883123, + "grad_norm": 462.0, + "learning_rate": 5.25335457563105e-05, + "loss": 17.2502, + "step": 11980 + }, + { + "epoch": 0.49939560668584054, + "grad_norm": 312.0, + "learning_rate": 5.252680438784299e-05, + "loss": 13.1882, + "step": 11981 + }, + { + "epoch": 0.49943728898336875, + "grad_norm": 354.0, + "learning_rate": 5.252006297332425e-05, + "loss": 13.8133, + "step": 11982 + }, + { + "epoch": 0.499478971280897, + "grad_norm": 510.0, + "learning_rate": 5.251332151287712e-05, + "loss": 19.0026, + "step": 11983 + }, + { + "epoch": 0.49952065357842523, + "grad_norm": 208.0, + "learning_rate": 5.25065800066245e-05, + "loss": 11.3756, + "step": 11984 + }, + { + "epoch": 0.4995623358759535, + "grad_norm": 296.0, + "learning_rate": 5.249983845468923e-05, + "loss": 12.2506, + "step": 11985 + }, + { + "epoch": 0.4996040181734817, + "grad_norm": 792.0, + "learning_rate": 5.249309685719419e-05, + "loss": 18.3794, + "step": 11986 + }, + { + "epoch": 0.49964570047101, + "grad_norm": 620.0, + "learning_rate": 5.2486355214262225e-05, + "loss": 18.6254, + "step": 11987 + }, + { + "epoch": 0.4996873827685382, + "grad_norm": 426.0, + "learning_rate": 5.247961352601622e-05, + "loss": 13.5634, + "step": 11988 + }, + { + "epoch": 0.49972906506606646, + "grad_norm": 157.0, + "learning_rate": 5.247287179257904e-05, + "loss": 11.1256, + "step": 11989 + }, + { + "epoch": 0.49977074736359467, + "grad_norm": 152.0, + "learning_rate": 5.246613001407356e-05, + "loss": 8.9381, + "step": 11990 + }, + { + "epoch": 0.49981242966112294, + "grad_norm": 438.0, + "learning_rate": 5.2459388190622625e-05, + "loss": 16.3752, + "step": 11991 + }, + { + "epoch": 0.49985411195865115, + "grad_norm": 264.0, + "learning_rate": 5.245264632234913e-05, + "loss": 12.3128, + "step": 11992 + }, + { + "epoch": 0.4998957942561794, + "grad_norm": 478.0, + "learning_rate": 5.2445904409375946e-05, + "loss": 15.627, + "step": 11993 + }, + { + "epoch": 0.4999374765537076, + "grad_norm": 412.0, + "learning_rate": 5.243916245182593e-05, + "loss": 15.3752, + "step": 11994 + }, + { + "epoch": 0.4999791588512359, + "grad_norm": 185.0, + "learning_rate": 5.243242044982196e-05, + "loss": 10.6253, + "step": 11995 + }, + { + "epoch": 0.5000208411487641, + "grad_norm": 460.0, + "learning_rate": 5.242567840348691e-05, + "loss": 16.3753, + "step": 11996 + }, + { + "epoch": 0.5000625234462923, + "grad_norm": 211.0, + "learning_rate": 5.241893631294367e-05, + "loss": 11.3128, + "step": 11997 + }, + { + "epoch": 0.5001042057438206, + "grad_norm": 648.0, + "learning_rate": 5.241219417831509e-05, + "loss": 16.6287, + "step": 11998 + }, + { + "epoch": 0.5001458880413489, + "grad_norm": 596.0, + "learning_rate": 5.240545199972405e-05, + "loss": 19.5012, + "step": 11999 + }, + { + "epoch": 0.5001875703388771, + "grad_norm": 182.0, + "learning_rate": 5.239870977729344e-05, + "loss": 9.3129, + "step": 12000 + }, + { + "epoch": 0.5002292526364053, + "grad_norm": 276.0, + "learning_rate": 5.2391967511146144e-05, + "loss": 13.3127, + "step": 12001 + }, + { + "epoch": 0.5002709349339336, + "grad_norm": 184.0, + "learning_rate": 5.2385225201405e-05, + "loss": 11.314, + "step": 12002 + }, + { + "epoch": 0.5003126172314618, + "grad_norm": 262.0, + "learning_rate": 5.237848284819293e-05, + "loss": 10.7508, + "step": 12003 + }, + { + "epoch": 0.50035429952899, + "grad_norm": 183.0, + "learning_rate": 5.23717404516328e-05, + "loss": 10.8759, + "step": 12004 + }, + { + "epoch": 0.5003959818265182, + "grad_norm": 330.0, + "learning_rate": 5.2364998011847496e-05, + "loss": 14.1256, + "step": 12005 + }, + { + "epoch": 0.5004376641240466, + "grad_norm": 134.0, + "learning_rate": 5.2358255528959885e-05, + "loss": 7.8441, + "step": 12006 + }, + { + "epoch": 0.5004793464215748, + "grad_norm": 226.0, + "learning_rate": 5.235151300309287e-05, + "loss": 11.8752, + "step": 12007 + }, + { + "epoch": 0.500521028719103, + "grad_norm": 402.0, + "learning_rate": 5.234477043436931e-05, + "loss": 15.7508, + "step": 12008 + }, + { + "epoch": 0.5005627110166312, + "grad_norm": 316.0, + "learning_rate": 5.233802782291209e-05, + "loss": 12.8126, + "step": 12009 + }, + { + "epoch": 0.5006043933141595, + "grad_norm": 300.0, + "learning_rate": 5.233128516884412e-05, + "loss": 12.7502, + "step": 12010 + }, + { + "epoch": 0.5006460756116877, + "grad_norm": 226.0, + "learning_rate": 5.232454247228828e-05, + "loss": 11.6251, + "step": 12011 + }, + { + "epoch": 0.5006877579092159, + "grad_norm": 119.5, + "learning_rate": 5.2317799733367434e-05, + "loss": 9.8754, + "step": 12012 + }, + { + "epoch": 0.5007294402067441, + "grad_norm": 402.0, + "learning_rate": 5.231105695220448e-05, + "loss": 15.813, + "step": 12013 + }, + { + "epoch": 0.5007711225042725, + "grad_norm": 528.0, + "learning_rate": 5.2304314128922316e-05, + "loss": 17.7502, + "step": 12014 + }, + { + "epoch": 0.5008128048018007, + "grad_norm": 604.0, + "learning_rate": 5.229757126364381e-05, + "loss": 19.5003, + "step": 12015 + }, + { + "epoch": 0.5008544870993289, + "grad_norm": 528.0, + "learning_rate": 5.229082835649186e-05, + "loss": 16.8751, + "step": 12016 + }, + { + "epoch": 0.5008961693968571, + "grad_norm": 346.0, + "learning_rate": 5.2284085407589376e-05, + "loss": 14.0002, + "step": 12017 + }, + { + "epoch": 0.5009378516943854, + "grad_norm": 408.0, + "learning_rate": 5.2277342417059226e-05, + "loss": 15.5003, + "step": 12018 + }, + { + "epoch": 0.5009795339919136, + "grad_norm": 908.0, + "learning_rate": 5.227059938502432e-05, + "loss": 25.6255, + "step": 12019 + }, + { + "epoch": 0.5010212162894419, + "grad_norm": 378.0, + "learning_rate": 5.226385631160753e-05, + "loss": 15.1252, + "step": 12020 + }, + { + "epoch": 0.5010628985869701, + "grad_norm": 992.0, + "learning_rate": 5.225711319693175e-05, + "loss": 23.5036, + "step": 12021 + }, + { + "epoch": 0.5011045808844984, + "grad_norm": 464.0, + "learning_rate": 5.225037004111989e-05, + "loss": 15.4378, + "step": 12022 + }, + { + "epoch": 0.5011462631820266, + "grad_norm": 1088.0, + "learning_rate": 5.2243626844294835e-05, + "loss": 24.634, + "step": 12023 + }, + { + "epoch": 0.5011879454795548, + "grad_norm": 644.0, + "learning_rate": 5.223688360657949e-05, + "loss": 19.5002, + "step": 12024 + }, + { + "epoch": 0.501229627777083, + "grad_norm": 444.0, + "learning_rate": 5.223014032809673e-05, + "loss": 16.5002, + "step": 12025 + }, + { + "epoch": 0.5012713100746113, + "grad_norm": 382.0, + "learning_rate": 5.2223397008969466e-05, + "loss": 14.2506, + "step": 12026 + }, + { + "epoch": 0.5013129923721396, + "grad_norm": 260.0, + "learning_rate": 5.221665364932059e-05, + "loss": 11.8126, + "step": 12027 + }, + { + "epoch": 0.5013546746696678, + "grad_norm": 338.0, + "learning_rate": 5.220991024927301e-05, + "loss": 12.8141, + "step": 12028 + }, + { + "epoch": 0.501396356967196, + "grad_norm": 278.0, + "learning_rate": 5.220316680894962e-05, + "loss": 12.6878, + "step": 12029 + }, + { + "epoch": 0.5014380392647243, + "grad_norm": 880.0, + "learning_rate": 5.219642332847332e-05, + "loss": 24.5003, + "step": 12030 + }, + { + "epoch": 0.5014797215622525, + "grad_norm": 276.0, + "learning_rate": 5.218967980796702e-05, + "loss": 12.8752, + "step": 12031 + }, + { + "epoch": 0.5015214038597807, + "grad_norm": 474.0, + "learning_rate": 5.2182936247553595e-05, + "loss": 17.0002, + "step": 12032 + }, + { + "epoch": 0.501563086157309, + "grad_norm": 230.0, + "learning_rate": 5.217619264735597e-05, + "loss": 11.8128, + "step": 12033 + }, + { + "epoch": 0.5016047684548373, + "grad_norm": 556.0, + "learning_rate": 5.216944900749704e-05, + "loss": 17.8753, + "step": 12034 + }, + { + "epoch": 0.5016464507523655, + "grad_norm": 540.0, + "learning_rate": 5.216270532809972e-05, + "loss": 18.3766, + "step": 12035 + }, + { + "epoch": 0.5016881330498937, + "grad_norm": 708.0, + "learning_rate": 5.2155961609286886e-05, + "loss": 21.2531, + "step": 12036 + }, + { + "epoch": 0.501729815347422, + "grad_norm": 596.0, + "learning_rate": 5.214921785118146e-05, + "loss": 17.5006, + "step": 12037 + }, + { + "epoch": 0.5017714976449502, + "grad_norm": 229.0, + "learning_rate": 5.2142474053906356e-05, + "loss": 11.7514, + "step": 12038 + }, + { + "epoch": 0.5018131799424784, + "grad_norm": 476.0, + "learning_rate": 5.213573021758448e-05, + "loss": 15.6879, + "step": 12039 + }, + { + "epoch": 0.5018548622400066, + "grad_norm": 430.0, + "learning_rate": 5.21289863423387e-05, + "loss": 13.6257, + "step": 12040 + }, + { + "epoch": 0.501896544537535, + "grad_norm": 169.0, + "learning_rate": 5.2122242428291986e-05, + "loss": 10.313, + "step": 12041 + }, + { + "epoch": 0.5019382268350632, + "grad_norm": 183.0, + "learning_rate": 5.2115498475567204e-05, + "loss": 10.7508, + "step": 12042 + }, + { + "epoch": 0.5019799091325914, + "grad_norm": 482.0, + "learning_rate": 5.210875448428728e-05, + "loss": 17.8752, + "step": 12043 + }, + { + "epoch": 0.5020215914301196, + "grad_norm": 332.0, + "learning_rate": 5.210201045457511e-05, + "loss": 13.5628, + "step": 12044 + }, + { + "epoch": 0.5020632737276479, + "grad_norm": 238.0, + "learning_rate": 5.209526638655362e-05, + "loss": 11.5634, + "step": 12045 + }, + { + "epoch": 0.5021049560251761, + "grad_norm": 328.0, + "learning_rate": 5.208852228034572e-05, + "loss": 14.1877, + "step": 12046 + }, + { + "epoch": 0.5021466383227043, + "grad_norm": 70.0, + "learning_rate": 5.20817781360743e-05, + "loss": 8.2504, + "step": 12047 + }, + { + "epoch": 0.5021883206202326, + "grad_norm": 320.0, + "learning_rate": 5.2075033953862303e-05, + "loss": 13.4377, + "step": 12048 + }, + { + "epoch": 0.5022300029177609, + "grad_norm": 1120.0, + "learning_rate": 5.206828973383262e-05, + "loss": 29.5008, + "step": 12049 + }, + { + "epoch": 0.5022716852152891, + "grad_norm": 444.0, + "learning_rate": 5.206154547610817e-05, + "loss": 14.0675, + "step": 12050 + }, + { + "epoch": 0.5023133675128173, + "grad_norm": 788.0, + "learning_rate": 5.2054801180811886e-05, + "loss": 27.5003, + "step": 12051 + }, + { + "epoch": 0.5023550498103455, + "grad_norm": 199.0, + "learning_rate": 5.204805684806664e-05, + "loss": 13.3128, + "step": 12052 + }, + { + "epoch": 0.5023967321078738, + "grad_norm": 482.0, + "learning_rate": 5.2041312477995395e-05, + "loss": 17.2502, + "step": 12053 + }, + { + "epoch": 0.502438414405402, + "grad_norm": 668.0, + "learning_rate": 5.2034568070721055e-05, + "loss": 18.5003, + "step": 12054 + }, + { + "epoch": 0.5024800967029303, + "grad_norm": 334.0, + "learning_rate": 5.2027823626366526e-05, + "loss": 14.8128, + "step": 12055 + }, + { + "epoch": 0.5025217790004585, + "grad_norm": 326.0, + "learning_rate": 5.202107914505473e-05, + "loss": 13.0002, + "step": 12056 + }, + { + "epoch": 0.5025634612979868, + "grad_norm": 96.5, + "learning_rate": 5.201433462690858e-05, + "loss": 8.1883, + "step": 12057 + }, + { + "epoch": 0.502605143595515, + "grad_norm": 512.0, + "learning_rate": 5.2007590072051014e-05, + "loss": 16.3783, + "step": 12058 + }, + { + "epoch": 0.5026468258930432, + "grad_norm": 310.0, + "learning_rate": 5.200084548060493e-05, + "loss": 13.4376, + "step": 12059 + }, + { + "epoch": 0.5026885081905714, + "grad_norm": 190.0, + "learning_rate": 5.199410085269327e-05, + "loss": 12.3754, + "step": 12060 + }, + { + "epoch": 0.5027301904880997, + "grad_norm": 234.0, + "learning_rate": 5.198735618843894e-05, + "loss": 11.6277, + "step": 12061 + }, + { + "epoch": 0.502771872785628, + "grad_norm": 440.0, + "learning_rate": 5.198061148796487e-05, + "loss": 15.3126, + "step": 12062 + }, + { + "epoch": 0.5028135550831562, + "grad_norm": 249.0, + "learning_rate": 5.197386675139398e-05, + "loss": 12.0002, + "step": 12063 + }, + { + "epoch": 0.5028552373806844, + "grad_norm": 190.0, + "learning_rate": 5.196712197884919e-05, + "loss": 10.2502, + "step": 12064 + }, + { + "epoch": 0.5028969196782127, + "grad_norm": 776.0, + "learning_rate": 5.196037717045341e-05, + "loss": 22.8756, + "step": 12065 + }, + { + "epoch": 0.5029386019757409, + "grad_norm": 494.0, + "learning_rate": 5.1953632326329605e-05, + "loss": 19.1253, + "step": 12066 + }, + { + "epoch": 0.5029802842732691, + "grad_norm": 243.0, + "learning_rate": 5.194688744660067e-05, + "loss": 12.0038, + "step": 12067 + }, + { + "epoch": 0.5030219665707973, + "grad_norm": 358.0, + "learning_rate": 5.1940142531389544e-05, + "loss": 14.6252, + "step": 12068 + }, + { + "epoch": 0.5030636488683257, + "grad_norm": 896.0, + "learning_rate": 5.1933397580819143e-05, + "loss": 25.5011, + "step": 12069 + }, + { + "epoch": 0.5031053311658539, + "grad_norm": 362.0, + "learning_rate": 5.19266525950124e-05, + "loss": 14.4379, + "step": 12070 + }, + { + "epoch": 0.5031470134633821, + "grad_norm": 704.0, + "learning_rate": 5.191990757409225e-05, + "loss": 20.8755, + "step": 12071 + }, + { + "epoch": 0.5031886957609103, + "grad_norm": 404.0, + "learning_rate": 5.19131625181816e-05, + "loss": 14.4376, + "step": 12072 + }, + { + "epoch": 0.5032303780584386, + "grad_norm": 396.0, + "learning_rate": 5.190641742740341e-05, + "loss": 13.1253, + "step": 12073 + }, + { + "epoch": 0.5032720603559668, + "grad_norm": 272.0, + "learning_rate": 5.189967230188059e-05, + "loss": 12.3753, + "step": 12074 + }, + { + "epoch": 0.503313742653495, + "grad_norm": 470.0, + "learning_rate": 5.1892927141736056e-05, + "loss": 16.8754, + "step": 12075 + }, + { + "epoch": 0.5033554249510233, + "grad_norm": 159.0, + "learning_rate": 5.188618194709277e-05, + "loss": 10.3752, + "step": 12076 + }, + { + "epoch": 0.5033971072485516, + "grad_norm": 474.0, + "learning_rate": 5.1879436718073647e-05, + "loss": 16.2502, + "step": 12077 + }, + { + "epoch": 0.5034387895460798, + "grad_norm": 350.0, + "learning_rate": 5.1872691454801626e-05, + "loss": 14.1887, + "step": 12078 + }, + { + "epoch": 0.503480471843608, + "grad_norm": 215.0, + "learning_rate": 5.186594615739963e-05, + "loss": 11.1877, + "step": 12079 + }, + { + "epoch": 0.5035221541411362, + "grad_norm": 400.0, + "learning_rate": 5.185920082599061e-05, + "loss": 14.1886, + "step": 12080 + }, + { + "epoch": 0.5035638364386645, + "grad_norm": 314.0, + "learning_rate": 5.185245546069748e-05, + "loss": 12.4377, + "step": 12081 + }, + { + "epoch": 0.5036055187361927, + "grad_norm": 456.0, + "learning_rate": 5.1845710061643193e-05, + "loss": 15.8761, + "step": 12082 + }, + { + "epoch": 0.503647201033721, + "grad_norm": 628.0, + "learning_rate": 5.183896462895067e-05, + "loss": 20.126, + "step": 12083 + }, + { + "epoch": 0.5036888833312492, + "grad_norm": 370.0, + "learning_rate": 5.1832219162742866e-05, + "loss": 15.0627, + "step": 12084 + }, + { + "epoch": 0.5037305656287775, + "grad_norm": 1344.0, + "learning_rate": 5.18254736631427e-05, + "loss": 30.8762, + "step": 12085 + }, + { + "epoch": 0.5037722479263057, + "grad_norm": 336.0, + "learning_rate": 5.181872813027311e-05, + "loss": 14.4377, + "step": 12086 + }, + { + "epoch": 0.5038139302238339, + "grad_norm": 720.0, + "learning_rate": 5.181198256425703e-05, + "loss": 21.3752, + "step": 12087 + }, + { + "epoch": 0.5038556125213621, + "grad_norm": 560.0, + "learning_rate": 5.1805236965217417e-05, + "loss": 18.7502, + "step": 12088 + }, + { + "epoch": 0.5038972948188905, + "grad_norm": 720.0, + "learning_rate": 5.17984913332772e-05, + "loss": 21.6251, + "step": 12089 + }, + { + "epoch": 0.5039389771164187, + "grad_norm": 374.0, + "learning_rate": 5.179174566855931e-05, + "loss": 13.502, + "step": 12090 + }, + { + "epoch": 0.5039806594139469, + "grad_norm": 282.0, + "learning_rate": 5.178499997118671e-05, + "loss": 12.0627, + "step": 12091 + }, + { + "epoch": 0.5040223417114751, + "grad_norm": 462.0, + "learning_rate": 5.177825424128232e-05, + "loss": 16.2503, + "step": 12092 + }, + { + "epoch": 0.5040640240090034, + "grad_norm": 836.0, + "learning_rate": 5.177150847896909e-05, + "loss": 20.5033, + "step": 12093 + }, + { + "epoch": 0.5041057063065316, + "grad_norm": 588.0, + "learning_rate": 5.176476268436996e-05, + "loss": 17.2502, + "step": 12094 + }, + { + "epoch": 0.5041473886040598, + "grad_norm": 572.0, + "learning_rate": 5.1758016857607874e-05, + "loss": 16.8762, + "step": 12095 + }, + { + "epoch": 0.504189070901588, + "grad_norm": 528.0, + "learning_rate": 5.1751270998805766e-05, + "loss": 18.5022, + "step": 12096 + }, + { + "epoch": 0.5042307531991164, + "grad_norm": 296.0, + "learning_rate": 5.17445251080866e-05, + "loss": 13.7502, + "step": 12097 + }, + { + "epoch": 0.5042724354966446, + "grad_norm": 432.0, + "learning_rate": 5.173777918557331e-05, + "loss": 15.9378, + "step": 12098 + }, + { + "epoch": 0.5043141177941728, + "grad_norm": 284.0, + "learning_rate": 5.1731033231388835e-05, + "loss": 12.5006, + "step": 12099 + }, + { + "epoch": 0.504355800091701, + "grad_norm": 310.0, + "learning_rate": 5.1724287245656136e-05, + "loss": 13.6257, + "step": 12100 + }, + { + "epoch": 0.5043974823892293, + "grad_norm": 490.0, + "learning_rate": 5.1717541228498135e-05, + "loss": 17.7504, + "step": 12101 + }, + { + "epoch": 0.5044391646867575, + "grad_norm": 382.0, + "learning_rate": 5.1710795180037794e-05, + "loss": 14.8752, + "step": 12102 + }, + { + "epoch": 0.5044808469842857, + "grad_norm": 336.0, + "learning_rate": 5.1704049100398055e-05, + "loss": 14.2505, + "step": 12103 + }, + { + "epoch": 0.504522529281814, + "grad_norm": 268.0, + "learning_rate": 5.169730298970188e-05, + "loss": 11.8127, + "step": 12104 + }, + { + "epoch": 0.5045642115793423, + "grad_norm": 1152.0, + "learning_rate": 5.1690556848072205e-05, + "loss": 24.1299, + "step": 12105 + }, + { + "epoch": 0.5046058938768705, + "grad_norm": 59.5, + "learning_rate": 5.168381067563197e-05, + "loss": 6.6879, + "step": 12106 + }, + { + "epoch": 0.5046475761743987, + "grad_norm": 462.0, + "learning_rate": 5.167706447250416e-05, + "loss": 16.7505, + "step": 12107 + }, + { + "epoch": 0.504689258471927, + "grad_norm": 304.0, + "learning_rate": 5.167031823881168e-05, + "loss": 14.1261, + "step": 12108 + }, + { + "epoch": 0.5047309407694552, + "grad_norm": 316.0, + "learning_rate": 5.166357197467752e-05, + "loss": 15.0036, + "step": 12109 + }, + { + "epoch": 0.5047726230669835, + "grad_norm": 107.5, + "learning_rate": 5.16568256802246e-05, + "loss": 8.3757, + "step": 12110 + }, + { + "epoch": 0.5048143053645117, + "grad_norm": 608.0, + "learning_rate": 5.1650079355575884e-05, + "loss": 18.7504, + "step": 12111 + }, + { + "epoch": 0.50485598766204, + "grad_norm": 203.0, + "learning_rate": 5.1643333000854335e-05, + "loss": 11.0001, + "step": 12112 + }, + { + "epoch": 0.5048976699595682, + "grad_norm": 173.0, + "learning_rate": 5.163658661618288e-05, + "loss": 11.3753, + "step": 12113 + }, + { + "epoch": 0.5049393522570964, + "grad_norm": 572.0, + "learning_rate": 5.162984020168451e-05, + "loss": 15.192, + "step": 12114 + }, + { + "epoch": 0.5049810345546246, + "grad_norm": 306.0, + "learning_rate": 5.1623093757482135e-05, + "loss": 13.5627, + "step": 12115 + }, + { + "epoch": 0.5050227168521529, + "grad_norm": 568.0, + "learning_rate": 5.161634728369874e-05, + "loss": 19.2505, + "step": 12116 + }, + { + "epoch": 0.5050643991496812, + "grad_norm": 412.0, + "learning_rate": 5.160960078045728e-05, + "loss": 14.6876, + "step": 12117 + }, + { + "epoch": 0.5051060814472094, + "grad_norm": 246.0, + "learning_rate": 5.1602854247880697e-05, + "loss": 12.0001, + "step": 12118 + }, + { + "epoch": 0.5051477637447376, + "grad_norm": 384.0, + "learning_rate": 5.1596107686091955e-05, + "loss": 14.1252, + "step": 12119 + }, + { + "epoch": 0.5051894460422659, + "grad_norm": 360.0, + "learning_rate": 5.158936109521401e-05, + "loss": 14.688, + "step": 12120 + }, + { + "epoch": 0.5052311283397941, + "grad_norm": 190.0, + "learning_rate": 5.158261447536982e-05, + "loss": 10.1254, + "step": 12121 + }, + { + "epoch": 0.5052728106373223, + "grad_norm": 362.0, + "learning_rate": 5.1575867826682335e-05, + "loss": 15.0002, + "step": 12122 + }, + { + "epoch": 0.5053144929348505, + "grad_norm": 664.0, + "learning_rate": 5.156912114927451e-05, + "loss": 21.5002, + "step": 12123 + }, + { + "epoch": 0.5053561752323789, + "grad_norm": 364.0, + "learning_rate": 5.156237444326934e-05, + "loss": 13.9378, + "step": 12124 + }, + { + "epoch": 0.5053978575299071, + "grad_norm": 282.0, + "learning_rate": 5.1555627708789735e-05, + "loss": 12.6252, + "step": 12125 + }, + { + "epoch": 0.5054395398274353, + "grad_norm": 172.0, + "learning_rate": 5.154888094595868e-05, + "loss": 11.9389, + "step": 12126 + }, + { + "epoch": 0.5054812221249635, + "grad_norm": 374.0, + "learning_rate": 5.154213415489913e-05, + "loss": 15.1889, + "step": 12127 + }, + { + "epoch": 0.5055229044224918, + "grad_norm": 724.0, + "learning_rate": 5.153538733573405e-05, + "loss": 20.6284, + "step": 12128 + }, + { + "epoch": 0.50556458672002, + "grad_norm": 502.0, + "learning_rate": 5.15286404885864e-05, + "loss": 17.2507, + "step": 12129 + }, + { + "epoch": 0.5056062690175482, + "grad_norm": 380.0, + "learning_rate": 5.1521893613579154e-05, + "loss": 12.1259, + "step": 12130 + }, + { + "epoch": 0.5056479513150764, + "grad_norm": 348.0, + "learning_rate": 5.151514671083525e-05, + "loss": 13.8128, + "step": 12131 + }, + { + "epoch": 0.5056896336126048, + "grad_norm": 278.0, + "learning_rate": 5.1508399780477666e-05, + "loss": 13.3134, + "step": 12132 + }, + { + "epoch": 0.505731315910133, + "grad_norm": 135.0, + "learning_rate": 5.1501652822629356e-05, + "loss": 8.3754, + "step": 12133 + }, + { + "epoch": 0.5057729982076612, + "grad_norm": 350.0, + "learning_rate": 5.14949058374133e-05, + "loss": 14.3753, + "step": 12134 + }, + { + "epoch": 0.5058146805051894, + "grad_norm": 316.0, + "learning_rate": 5.148815882495245e-05, + "loss": 13.7502, + "step": 12135 + }, + { + "epoch": 0.5058563628027177, + "grad_norm": 308.0, + "learning_rate": 5.148141178536976e-05, + "loss": 12.7502, + "step": 12136 + }, + { + "epoch": 0.5058980451002459, + "grad_norm": 532.0, + "learning_rate": 5.147466471878822e-05, + "loss": 19.0004, + "step": 12137 + }, + { + "epoch": 0.5059397273977742, + "grad_norm": 644.0, + "learning_rate": 5.146791762533078e-05, + "loss": 17.1269, + "step": 12138 + }, + { + "epoch": 0.5059814096953024, + "grad_norm": 243.0, + "learning_rate": 5.1461170505120426e-05, + "loss": 13.1879, + "step": 12139 + }, + { + "epoch": 0.5060230919928307, + "grad_norm": 260.0, + "learning_rate": 5.14544233582801e-05, + "loss": 13.3755, + "step": 12140 + }, + { + "epoch": 0.5060647742903589, + "grad_norm": 42.0, + "learning_rate": 5.144767618493277e-05, + "loss": 6.8442, + "step": 12141 + }, + { + "epoch": 0.5061064565878871, + "grad_norm": 336.0, + "learning_rate": 5.144092898520142e-05, + "loss": 14.0634, + "step": 12142 + }, + { + "epoch": 0.5061481388854153, + "grad_norm": 1256.0, + "learning_rate": 5.143418175920901e-05, + "loss": 25.8797, + "step": 12143 + }, + { + "epoch": 0.5061898211829436, + "grad_norm": 350.0, + "learning_rate": 5.142743450707851e-05, + "loss": 15.3775, + "step": 12144 + }, + { + "epoch": 0.5062315034804719, + "grad_norm": 266.0, + "learning_rate": 5.14206872289329e-05, + "loss": 12.7503, + "step": 12145 + }, + { + "epoch": 0.5062731857780001, + "grad_norm": 414.0, + "learning_rate": 5.141393992489513e-05, + "loss": 15.314, + "step": 12146 + }, + { + "epoch": 0.5063148680755283, + "grad_norm": 378.0, + "learning_rate": 5.140719259508817e-05, + "loss": 14.8129, + "step": 12147 + }, + { + "epoch": 0.5063565503730566, + "grad_norm": 506.0, + "learning_rate": 5.140044523963502e-05, + "loss": 17.0004, + "step": 12148 + }, + { + "epoch": 0.5063982326705848, + "grad_norm": 180.0, + "learning_rate": 5.139369785865862e-05, + "loss": 11.7503, + "step": 12149 + }, + { + "epoch": 0.506439914968113, + "grad_norm": 588.0, + "learning_rate": 5.1386950452281954e-05, + "loss": 17.7513, + "step": 12150 + }, + { + "epoch": 0.5064815972656412, + "grad_norm": 245.0, + "learning_rate": 5.138020302062799e-05, + "loss": 13.2507, + "step": 12151 + }, + { + "epoch": 0.5065232795631696, + "grad_norm": 616.0, + "learning_rate": 5.1373455563819704e-05, + "loss": 19.1253, + "step": 12152 + }, + { + "epoch": 0.5065649618606978, + "grad_norm": 286.0, + "learning_rate": 5.136670808198006e-05, + "loss": 11.8129, + "step": 12153 + }, + { + "epoch": 0.506606644158226, + "grad_norm": 310.0, + "learning_rate": 5.1359960575232055e-05, + "loss": 12.6876, + "step": 12154 + }, + { + "epoch": 0.5066483264557542, + "grad_norm": 115.0, + "learning_rate": 5.1353213043698644e-05, + "loss": 11.1254, + "step": 12155 + }, + { + "epoch": 0.5066900087532825, + "grad_norm": 201.0, + "learning_rate": 5.1346465487502804e-05, + "loss": 12.376, + "step": 12156 + }, + { + "epoch": 0.5067316910508107, + "grad_norm": 210.0, + "learning_rate": 5.133971790676751e-05, + "loss": 9.9378, + "step": 12157 + }, + { + "epoch": 0.5067733733483389, + "grad_norm": 233.0, + "learning_rate": 5.133297030161574e-05, + "loss": 12.3132, + "step": 12158 + }, + { + "epoch": 0.5068150556458672, + "grad_norm": 192.0, + "learning_rate": 5.132622267217047e-05, + "loss": 9.8752, + "step": 12159 + }, + { + "epoch": 0.5068567379433955, + "grad_norm": 302.0, + "learning_rate": 5.131947501855468e-05, + "loss": 13.5629, + "step": 12160 + }, + { + "epoch": 0.5068984202409237, + "grad_norm": 135.0, + "learning_rate": 5.131272734089133e-05, + "loss": 10.8134, + "step": 12161 + }, + { + "epoch": 0.5069401025384519, + "grad_norm": 282.0, + "learning_rate": 5.1305979639303405e-05, + "loss": 14.1899, + "step": 12162 + }, + { + "epoch": 0.5069817848359801, + "grad_norm": 426.0, + "learning_rate": 5.129923191391389e-05, + "loss": 15.5003, + "step": 12163 + }, + { + "epoch": 0.5070234671335084, + "grad_norm": 322.0, + "learning_rate": 5.1292484164845764e-05, + "loss": 11.2512, + "step": 12164 + }, + { + "epoch": 0.5070651494310366, + "grad_norm": 462.0, + "learning_rate": 5.1285736392221995e-05, + "loss": 17.0006, + "step": 12165 + }, + { + "epoch": 0.5071068317285649, + "grad_norm": 86.5, + "learning_rate": 5.1278988596165555e-05, + "loss": 9.8132, + "step": 12166 + }, + { + "epoch": 0.5071485140260931, + "grad_norm": 376.0, + "learning_rate": 5.127224077679944e-05, + "loss": 14.3143, + "step": 12167 + }, + { + "epoch": 0.5071901963236214, + "grad_norm": 816.0, + "learning_rate": 5.126549293424663e-05, + "loss": 21.5005, + "step": 12168 + }, + { + "epoch": 0.5072318786211496, + "grad_norm": 211.0, + "learning_rate": 5.12587450686301e-05, + "loss": 12.7503, + "step": 12169 + }, + { + "epoch": 0.5072735609186778, + "grad_norm": 156.0, + "learning_rate": 5.1251997180072816e-05, + "loss": 9.1886, + "step": 12170 + }, + { + "epoch": 0.507315243216206, + "grad_norm": 386.0, + "learning_rate": 5.124524926869779e-05, + "loss": 15.688, + "step": 12171 + }, + { + "epoch": 0.5073569255137343, + "grad_norm": 203.0, + "learning_rate": 5.123850133462797e-05, + "loss": 11.0004, + "step": 12172 + }, + { + "epoch": 0.5073986078112626, + "grad_norm": 149.0, + "learning_rate": 5.123175337798637e-05, + "loss": 7.8438, + "step": 12173 + }, + { + "epoch": 0.5074402901087908, + "grad_norm": 712.0, + "learning_rate": 5.1225005398895955e-05, + "loss": 20.1258, + "step": 12174 + }, + { + "epoch": 0.507481972406319, + "grad_norm": 89.0, + "learning_rate": 5.12182573974797e-05, + "loss": 8.6879, + "step": 12175 + }, + { + "epoch": 0.5075236547038473, + "grad_norm": 250.0, + "learning_rate": 5.121150937386059e-05, + "loss": 11.6252, + "step": 12176 + }, + { + "epoch": 0.5075653370013755, + "grad_norm": 508.0, + "learning_rate": 5.120476132816162e-05, + "loss": 15.8128, + "step": 12177 + }, + { + "epoch": 0.5076070192989037, + "grad_norm": 1440.0, + "learning_rate": 5.1198013260505765e-05, + "loss": 27.6303, + "step": 12178 + }, + { + "epoch": 0.507648701596432, + "grad_norm": 89.0, + "learning_rate": 5.1191265171016e-05, + "loss": 7.8442, + "step": 12179 + }, + { + "epoch": 0.5076903838939603, + "grad_norm": 282.0, + "learning_rate": 5.118451705981534e-05, + "loss": 11.6877, + "step": 12180 + }, + { + "epoch": 0.5077320661914885, + "grad_norm": 247.0, + "learning_rate": 5.117776892702675e-05, + "loss": 12.1878, + "step": 12181 + }, + { + "epoch": 0.5077737484890167, + "grad_norm": 340.0, + "learning_rate": 5.117102077277321e-05, + "loss": 15.0005, + "step": 12182 + }, + { + "epoch": 0.507815430786545, + "grad_norm": 237.0, + "learning_rate": 5.116427259717772e-05, + "loss": 11.8759, + "step": 12183 + }, + { + "epoch": 0.5078571130840732, + "grad_norm": 412.0, + "learning_rate": 5.115752440036325e-05, + "loss": 14.4378, + "step": 12184 + }, + { + "epoch": 0.5078987953816014, + "grad_norm": 752.0, + "learning_rate": 5.115077618245281e-05, + "loss": 20.1252, + "step": 12185 + }, + { + "epoch": 0.5079404776791296, + "grad_norm": 294.0, + "learning_rate": 5.1144027943569364e-05, + "loss": 13.3132, + "step": 12186 + }, + { + "epoch": 0.507982159976658, + "grad_norm": 494.0, + "learning_rate": 5.11372796838359e-05, + "loss": 15.6877, + "step": 12187 + }, + { + "epoch": 0.5080238422741862, + "grad_norm": 183.0, + "learning_rate": 5.113053140337541e-05, + "loss": 11.2503, + "step": 12188 + }, + { + "epoch": 0.5080655245717144, + "grad_norm": 219.0, + "learning_rate": 5.112378310231091e-05, + "loss": 13.3131, + "step": 12189 + }, + { + "epoch": 0.5081072068692426, + "grad_norm": 548.0, + "learning_rate": 5.1117034780765336e-05, + "loss": 18.6254, + "step": 12190 + }, + { + "epoch": 0.5081488891667709, + "grad_norm": 100.0, + "learning_rate": 5.111028643886171e-05, + "loss": 8.0012, + "step": 12191 + }, + { + "epoch": 0.5081905714642991, + "grad_norm": 392.0, + "learning_rate": 5.110353807672301e-05, + "loss": 13.5633, + "step": 12192 + }, + { + "epoch": 0.5082322537618273, + "grad_norm": 1012.0, + "learning_rate": 5.109678969447225e-05, + "loss": 26.0, + "step": 12193 + }, + { + "epoch": 0.5082739360593556, + "grad_norm": 308.0, + "learning_rate": 5.109004129223238e-05, + "loss": 13.7502, + "step": 12194 + }, + { + "epoch": 0.5083156183568839, + "grad_norm": 155.0, + "learning_rate": 5.108329287012643e-05, + "loss": 11.0629, + "step": 12195 + }, + { + "epoch": 0.5083573006544121, + "grad_norm": 712.0, + "learning_rate": 5.107654442827736e-05, + "loss": 20.0049, + "step": 12196 + }, + { + "epoch": 0.5083989829519403, + "grad_norm": 274.0, + "learning_rate": 5.106979596680817e-05, + "loss": 12.5634, + "step": 12197 + }, + { + "epoch": 0.5084406652494685, + "grad_norm": 243.0, + "learning_rate": 5.106304748584187e-05, + "loss": 12.1879, + "step": 12198 + }, + { + "epoch": 0.5084823475469968, + "grad_norm": 177.0, + "learning_rate": 5.105629898550142e-05, + "loss": 9.3128, + "step": 12199 + }, + { + "epoch": 0.508524029844525, + "grad_norm": 404.0, + "learning_rate": 5.1049550465909825e-05, + "loss": 14.5626, + "step": 12200 + }, + { + "epoch": 0.5085657121420533, + "grad_norm": 113.5, + "learning_rate": 5.104280192719009e-05, + "loss": 6.5955, + "step": 12201 + }, + { + "epoch": 0.5086073944395815, + "grad_norm": 170.0, + "learning_rate": 5.103605336946519e-05, + "loss": 5.1251, + "step": 12202 + }, + { + "epoch": 0.5086490767371098, + "grad_norm": 366.0, + "learning_rate": 5.102930479285812e-05, + "loss": 13.9378, + "step": 12203 + }, + { + "epoch": 0.508690759034638, + "grad_norm": 900.0, + "learning_rate": 5.102255619749188e-05, + "loss": 24.1254, + "step": 12204 + }, + { + "epoch": 0.5087324413321662, + "grad_norm": 290.0, + "learning_rate": 5.1015807583489474e-05, + "loss": 13.2511, + "step": 12205 + }, + { + "epoch": 0.5087741236296944, + "grad_norm": 348.0, + "learning_rate": 5.1009058950973876e-05, + "loss": 13.5002, + "step": 12206 + }, + { + "epoch": 0.5088158059272228, + "grad_norm": 656.0, + "learning_rate": 5.100231030006809e-05, + "loss": 20.3752, + "step": 12207 + }, + { + "epoch": 0.508857488224751, + "grad_norm": 354.0, + "learning_rate": 5.099556163089512e-05, + "loss": 13.0005, + "step": 12208 + }, + { + "epoch": 0.5088991705222792, + "grad_norm": 512.0, + "learning_rate": 5.098881294357795e-05, + "loss": 16.8752, + "step": 12209 + }, + { + "epoch": 0.5089408528198074, + "grad_norm": 352.0, + "learning_rate": 5.098206423823956e-05, + "loss": 14.6253, + "step": 12210 + }, + { + "epoch": 0.5089825351173357, + "grad_norm": 516.0, + "learning_rate": 5.097531551500297e-05, + "loss": 16.2502, + "step": 12211 + }, + { + "epoch": 0.5090242174148639, + "grad_norm": 700.0, + "learning_rate": 5.096856677399118e-05, + "loss": 19.7505, + "step": 12212 + }, + { + "epoch": 0.5090658997123921, + "grad_norm": 2400.0, + "learning_rate": 5.0961818015327156e-05, + "loss": 43.5073, + "step": 12213 + }, + { + "epoch": 0.5091075820099203, + "grad_norm": 109.0, + "learning_rate": 5.0955069239133926e-05, + "loss": 9.8753, + "step": 12214 + }, + { + "epoch": 0.5091492643074487, + "grad_norm": 346.0, + "learning_rate": 5.094832044553447e-05, + "loss": 13.6877, + "step": 12215 + }, + { + "epoch": 0.5091909466049769, + "grad_norm": 524.0, + "learning_rate": 5.0941571634651776e-05, + "loss": 17.8763, + "step": 12216 + }, + { + "epoch": 0.5092326289025051, + "grad_norm": 418.0, + "learning_rate": 5.0934822806608875e-05, + "loss": 16.5003, + "step": 12217 + }, + { + "epoch": 0.5092743112000333, + "grad_norm": 704.0, + "learning_rate": 5.092807396152873e-05, + "loss": 19.755, + "step": 12218 + }, + { + "epoch": 0.5093159934975616, + "grad_norm": 520.0, + "learning_rate": 5.0921325099534365e-05, + "loss": 18.5003, + "step": 12219 + }, + { + "epoch": 0.5093576757950898, + "grad_norm": 184.0, + "learning_rate": 5.091457622074877e-05, + "loss": 10.6877, + "step": 12220 + }, + { + "epoch": 0.509399358092618, + "grad_norm": 54.0, + "learning_rate": 5.090782732529494e-05, + "loss": 7.2816, + "step": 12221 + }, + { + "epoch": 0.5094410403901463, + "grad_norm": 416.0, + "learning_rate": 5.0901078413295875e-05, + "loss": 16.8753, + "step": 12222 + }, + { + "epoch": 0.5094827226876746, + "grad_norm": 628.0, + "learning_rate": 5.089432948487458e-05, + "loss": 18.5014, + "step": 12223 + }, + { + "epoch": 0.5095244049852028, + "grad_norm": 328.0, + "learning_rate": 5.0887580540154045e-05, + "loss": 13.5007, + "step": 12224 + }, + { + "epoch": 0.509566087282731, + "grad_norm": 564.0, + "learning_rate": 5.0880831579257285e-05, + "loss": 17.0012, + "step": 12225 + }, + { + "epoch": 0.5096077695802592, + "grad_norm": 167.0, + "learning_rate": 5.0874082602307286e-05, + "loss": 10.1878, + "step": 12226 + }, + { + "epoch": 0.5096494518777875, + "grad_norm": 178.0, + "learning_rate": 5.086733360942705e-05, + "loss": 8.6253, + "step": 12227 + }, + { + "epoch": 0.5096911341753158, + "grad_norm": 384.0, + "learning_rate": 5.086058460073958e-05, + "loss": 14.8753, + "step": 12228 + }, + { + "epoch": 0.509732816472844, + "grad_norm": 258.0, + "learning_rate": 5.085383557636788e-05, + "loss": 11.8128, + "step": 12229 + }, + { + "epoch": 0.5097744987703722, + "grad_norm": 304.0, + "learning_rate": 5.084708653643495e-05, + "loss": 13.5637, + "step": 12230 + }, + { + "epoch": 0.5098161810679005, + "grad_norm": 182.0, + "learning_rate": 5.084033748106381e-05, + "loss": 10.6879, + "step": 12231 + }, + { + "epoch": 0.5098578633654287, + "grad_norm": 324.0, + "learning_rate": 5.083358841037742e-05, + "loss": 14.6878, + "step": 12232 + }, + { + "epoch": 0.5098995456629569, + "grad_norm": 264.0, + "learning_rate": 5.082683932449882e-05, + "loss": 14.7506, + "step": 12233 + }, + { + "epoch": 0.5099412279604851, + "grad_norm": 486.0, + "learning_rate": 5.0820090223551e-05, + "loss": 17.7502, + "step": 12234 + }, + { + "epoch": 0.5099829102580135, + "grad_norm": 568.0, + "learning_rate": 5.081334110765696e-05, + "loss": 18.1255, + "step": 12235 + }, + { + "epoch": 0.5100245925555417, + "grad_norm": 318.0, + "learning_rate": 5.080659197693971e-05, + "loss": 12.9391, + "step": 12236 + }, + { + "epoch": 0.5100662748530699, + "grad_norm": 492.0, + "learning_rate": 5.0799842831522245e-05, + "loss": 16.876, + "step": 12237 + }, + { + "epoch": 0.5101079571505981, + "grad_norm": 422.0, + "learning_rate": 5.079309367152758e-05, + "loss": 15.3753, + "step": 12238 + }, + { + "epoch": 0.5101496394481264, + "grad_norm": 386.0, + "learning_rate": 5.078634449707871e-05, + "loss": 13.8131, + "step": 12239 + }, + { + "epoch": 0.5101913217456546, + "grad_norm": 340.0, + "learning_rate": 5.0779595308298645e-05, + "loss": 14.5628, + "step": 12240 + }, + { + "epoch": 0.5102330040431828, + "grad_norm": 162.0, + "learning_rate": 5.077284610531037e-05, + "loss": 10.2503, + "step": 12241 + }, + { + "epoch": 0.510274686340711, + "grad_norm": 808.0, + "learning_rate": 5.0766096888236917e-05, + "loss": 20.5008, + "step": 12242 + }, + { + "epoch": 0.5103163686382394, + "grad_norm": 140.0, + "learning_rate": 5.0759347657201285e-05, + "loss": 8.1256, + "step": 12243 + }, + { + "epoch": 0.5103580509357676, + "grad_norm": 1592.0, + "learning_rate": 5.075259841232647e-05, + "loss": 34.5033, + "step": 12244 + }, + { + "epoch": 0.5103997332332958, + "grad_norm": 286.0, + "learning_rate": 5.074584915373548e-05, + "loss": 12.8127, + "step": 12245 + }, + { + "epoch": 0.510441415530824, + "grad_norm": 864.0, + "learning_rate": 5.073909988155132e-05, + "loss": 23.6252, + "step": 12246 + }, + { + "epoch": 0.5104830978283523, + "grad_norm": 484.0, + "learning_rate": 5.0732350595897015e-05, + "loss": 16.7528, + "step": 12247 + }, + { + "epoch": 0.5105247801258805, + "grad_norm": 328.0, + "learning_rate": 5.072560129689554e-05, + "loss": 13.5004, + "step": 12248 + }, + { + "epoch": 0.5105664624234088, + "grad_norm": 129.0, + "learning_rate": 5.0718851984669925e-05, + "loss": 10.8752, + "step": 12249 + }, + { + "epoch": 0.510608144720937, + "grad_norm": 378.0, + "learning_rate": 5.071210265934316e-05, + "loss": 15.5002, + "step": 12250 + }, + { + "epoch": 0.5106498270184653, + "grad_norm": 362.0, + "learning_rate": 5.0705353321038276e-05, + "loss": 15.6253, + "step": 12251 + }, + { + "epoch": 0.5106915093159935, + "grad_norm": 209.0, + "learning_rate": 5.0698603969878255e-05, + "loss": 12.0628, + "step": 12252 + }, + { + "epoch": 0.5107331916135217, + "grad_norm": 202.0, + "learning_rate": 5.069185460598611e-05, + "loss": 10.6882, + "step": 12253 + }, + { + "epoch": 0.51077487391105, + "grad_norm": 82.5, + "learning_rate": 5.0685105229484855e-05, + "loss": 7.3758, + "step": 12254 + }, + { + "epoch": 0.5108165562085782, + "grad_norm": 560.0, + "learning_rate": 5.0678355840497495e-05, + "loss": 18.7503, + "step": 12255 + }, + { + "epoch": 0.5108582385061065, + "grad_norm": 1472.0, + "learning_rate": 5.0671606439147045e-05, + "loss": 34.7526, + "step": 12256 + }, + { + "epoch": 0.5108999208036347, + "grad_norm": 306.0, + "learning_rate": 5.0664857025556515e-05, + "loss": 9.063, + "step": 12257 + }, + { + "epoch": 0.510941603101163, + "grad_norm": 253.0, + "learning_rate": 5.0658107599848894e-05, + "loss": 10.5628, + "step": 12258 + }, + { + "epoch": 0.5109832853986912, + "grad_norm": 166.0, + "learning_rate": 5.065135816214721e-05, + "loss": 10.9377, + "step": 12259 + }, + { + "epoch": 0.5110249676962194, + "grad_norm": 458.0, + "learning_rate": 5.064460871257447e-05, + "loss": 17.251, + "step": 12260 + }, + { + "epoch": 0.5110666499937476, + "grad_norm": 288.0, + "learning_rate": 5.0637859251253675e-05, + "loss": 12.5626, + "step": 12261 + }, + { + "epoch": 0.511108332291276, + "grad_norm": 202.0, + "learning_rate": 5.0631109778307826e-05, + "loss": 11.5008, + "step": 12262 + }, + { + "epoch": 0.5111500145888042, + "grad_norm": 334.0, + "learning_rate": 5.062436029385996e-05, + "loss": 13.7504, + "step": 12263 + }, + { + "epoch": 0.5111916968863324, + "grad_norm": 118.5, + "learning_rate": 5.0617610798033076e-05, + "loss": 9.3757, + "step": 12264 + }, + { + "epoch": 0.5112333791838606, + "grad_norm": 131.0, + "learning_rate": 5.061086129095017e-05, + "loss": 9.0627, + "step": 12265 + }, + { + "epoch": 0.5112750614813889, + "grad_norm": 340.0, + "learning_rate": 5.0604111772734255e-05, + "loss": 13.2501, + "step": 12266 + }, + { + "epoch": 0.5113167437789171, + "grad_norm": 864.0, + "learning_rate": 5.059736224350836e-05, + "loss": 28.3754, + "step": 12267 + }, + { + "epoch": 0.5113584260764453, + "grad_norm": 476.0, + "learning_rate": 5.059061270339549e-05, + "loss": 17.1255, + "step": 12268 + }, + { + "epoch": 0.5114001083739735, + "grad_norm": 516.0, + "learning_rate": 5.058386315251864e-05, + "loss": 17.8752, + "step": 12269 + }, + { + "epoch": 0.5114417906715019, + "grad_norm": 330.0, + "learning_rate": 5.057711359100084e-05, + "loss": 14.3128, + "step": 12270 + }, + { + "epoch": 0.5114834729690301, + "grad_norm": 996.0, + "learning_rate": 5.05703640189651e-05, + "loss": 24.1297, + "step": 12271 + }, + { + "epoch": 0.5115251552665583, + "grad_norm": 528.0, + "learning_rate": 5.056361443653441e-05, + "loss": 16.5007, + "step": 12272 + }, + { + "epoch": 0.5115668375640865, + "grad_norm": 1320.0, + "learning_rate": 5.0556864843831805e-05, + "loss": 30.8759, + "step": 12273 + }, + { + "epoch": 0.5116085198616148, + "grad_norm": 668.0, + "learning_rate": 5.055011524098029e-05, + "loss": 18.7508, + "step": 12274 + }, + { + "epoch": 0.511650202159143, + "grad_norm": 372.0, + "learning_rate": 5.054336562810288e-05, + "loss": 14.8129, + "step": 12275 + }, + { + "epoch": 0.5116918844566712, + "grad_norm": 576.0, + "learning_rate": 5.053661600532257e-05, + "loss": 19.2502, + "step": 12276 + }, + { + "epoch": 0.5117335667541995, + "grad_norm": 492.0, + "learning_rate": 5.0529866372762394e-05, + "loss": 19.0002, + "step": 12277 + }, + { + "epoch": 0.5117752490517278, + "grad_norm": 780.0, + "learning_rate": 5.052311673054536e-05, + "loss": 20.0071, + "step": 12278 + }, + { + "epoch": 0.511816931349256, + "grad_norm": 608.0, + "learning_rate": 5.051636707879446e-05, + "loss": 19.3751, + "step": 12279 + }, + { + "epoch": 0.5118586136467842, + "grad_norm": 197.0, + "learning_rate": 5.050961741763274e-05, + "loss": 11.2502, + "step": 12280 + }, + { + "epoch": 0.5119002959443124, + "grad_norm": 414.0, + "learning_rate": 5.050286774718319e-05, + "loss": 15.5002, + "step": 12281 + }, + { + "epoch": 0.5119419782418407, + "grad_norm": 239.0, + "learning_rate": 5.049611806756883e-05, + "loss": 12.1253, + "step": 12282 + }, + { + "epoch": 0.511983660539369, + "grad_norm": 432.0, + "learning_rate": 5.0489368378912685e-05, + "loss": 15.5012, + "step": 12283 + }, + { + "epoch": 0.5120253428368972, + "grad_norm": 90.5, + "learning_rate": 5.0482618681337744e-05, + "loss": 6.7818, + "step": 12284 + }, + { + "epoch": 0.5120670251344254, + "grad_norm": 804.0, + "learning_rate": 5.047586897496704e-05, + "loss": 22.3761, + "step": 12285 + }, + { + "epoch": 0.5121087074319537, + "grad_norm": 354.0, + "learning_rate": 5.046911925992359e-05, + "loss": 14.0628, + "step": 12286 + }, + { + "epoch": 0.5121503897294819, + "grad_norm": 207.0, + "learning_rate": 5.046236953633039e-05, + "loss": 12.1878, + "step": 12287 + }, + { + "epoch": 0.5121920720270101, + "grad_norm": 322.0, + "learning_rate": 5.045561980431047e-05, + "loss": 13.5629, + "step": 12288 + }, + { + "epoch": 0.5122337543245383, + "grad_norm": 324.0, + "learning_rate": 5.044887006398684e-05, + "loss": 13.8129, + "step": 12289 + }, + { + "epoch": 0.5122754366220666, + "grad_norm": 592.0, + "learning_rate": 5.044212031548251e-05, + "loss": 17.6251, + "step": 12290 + }, + { + "epoch": 0.5123171189195949, + "grad_norm": 169.0, + "learning_rate": 5.043537055892049e-05, + "loss": 10.0003, + "step": 12291 + }, + { + "epoch": 0.5123588012171231, + "grad_norm": 506.0, + "learning_rate": 5.042862079442381e-05, + "loss": 17.6253, + "step": 12292 + }, + { + "epoch": 0.5124004835146513, + "grad_norm": 88.0, + "learning_rate": 5.0421871022115474e-05, + "loss": 8.1881, + "step": 12293 + }, + { + "epoch": 0.5124421658121796, + "grad_norm": 156.0, + "learning_rate": 5.0415121242118515e-05, + "loss": 10.0627, + "step": 12294 + }, + { + "epoch": 0.5124838481097078, + "grad_norm": 294.0, + "learning_rate": 5.040837145455591e-05, + "loss": 14.3754, + "step": 12295 + }, + { + "epoch": 0.512525530407236, + "grad_norm": 440.0, + "learning_rate": 5.040162165955072e-05, + "loss": 17.2501, + "step": 12296 + }, + { + "epoch": 0.5125672127047642, + "grad_norm": 155.0, + "learning_rate": 5.039487185722593e-05, + "loss": 7.2506, + "step": 12297 + }, + { + "epoch": 0.5126088950022926, + "grad_norm": 123.0, + "learning_rate": 5.038812204770458e-05, + "loss": 7.5942, + "step": 12298 + }, + { + "epoch": 0.5126505772998208, + "grad_norm": 524.0, + "learning_rate": 5.0381372231109655e-05, + "loss": 16.1253, + "step": 12299 + }, + { + "epoch": 0.512692259597349, + "grad_norm": 241.0, + "learning_rate": 5.03746224075642e-05, + "loss": 10.9378, + "step": 12300 + }, + { + "epoch": 0.5127339418948772, + "grad_norm": 258.0, + "learning_rate": 5.03678725771912e-05, + "loss": 12.0628, + "step": 12301 + }, + { + "epoch": 0.5127756241924055, + "grad_norm": 676.0, + "learning_rate": 5.036112274011371e-05, + "loss": 21.8756, + "step": 12302 + }, + { + "epoch": 0.5128173064899337, + "grad_norm": 1448.0, + "learning_rate": 5.0354372896454715e-05, + "loss": 29.6299, + "step": 12303 + }, + { + "epoch": 0.512858988787462, + "grad_norm": 320.0, + "learning_rate": 5.034762304633723e-05, + "loss": 14.313, + "step": 12304 + }, + { + "epoch": 0.5129006710849902, + "grad_norm": 796.0, + "learning_rate": 5.03408731898843e-05, + "loss": 22.0002, + "step": 12305 + }, + { + "epoch": 0.5129423533825185, + "grad_norm": 219.0, + "learning_rate": 5.033412332721892e-05, + "loss": 12.0639, + "step": 12306 + }, + { + "epoch": 0.5129840356800467, + "grad_norm": 153.0, + "learning_rate": 5.032737345846412e-05, + "loss": 10.1253, + "step": 12307 + }, + { + "epoch": 0.5130257179775749, + "grad_norm": 246.0, + "learning_rate": 5.032062358374291e-05, + "loss": 12.5627, + "step": 12308 + }, + { + "epoch": 0.5130674002751031, + "grad_norm": 430.0, + "learning_rate": 5.0313873703178305e-05, + "loss": 15.6884, + "step": 12309 + }, + { + "epoch": 0.5131090825726314, + "grad_norm": 494.0, + "learning_rate": 5.030712381689332e-05, + "loss": 17.8761, + "step": 12310 + }, + { + "epoch": 0.5131507648701596, + "grad_norm": 328.0, + "learning_rate": 5.030037392501098e-05, + "loss": 13.8754, + "step": 12311 + }, + { + "epoch": 0.5131924471676879, + "grad_norm": 1272.0, + "learning_rate": 5.02936240276543e-05, + "loss": 28.5057, + "step": 12312 + }, + { + "epoch": 0.5132341294652161, + "grad_norm": 916.0, + "learning_rate": 5.028687412494628e-05, + "loss": 21.6294, + "step": 12313 + }, + { + "epoch": 0.5132758117627444, + "grad_norm": 396.0, + "learning_rate": 5.028012421700997e-05, + "loss": 15.1262, + "step": 12314 + }, + { + "epoch": 0.5133174940602726, + "grad_norm": 588.0, + "learning_rate": 5.0273374303968365e-05, + "loss": 19.8752, + "step": 12315 + }, + { + "epoch": 0.5133591763578008, + "grad_norm": 676.0, + "learning_rate": 5.0266624385944494e-05, + "loss": 20.5025, + "step": 12316 + }, + { + "epoch": 0.513400858655329, + "grad_norm": 217.0, + "learning_rate": 5.0259874463061364e-05, + "loss": 11.6252, + "step": 12317 + }, + { + "epoch": 0.5134425409528574, + "grad_norm": 134.0, + "learning_rate": 5.0253124535442e-05, + "loss": 9.9383, + "step": 12318 + }, + { + "epoch": 0.5134842232503856, + "grad_norm": 410.0, + "learning_rate": 5.024637460320942e-05, + "loss": 16.1266, + "step": 12319 + }, + { + "epoch": 0.5135259055479138, + "grad_norm": 728.0, + "learning_rate": 5.023962466648664e-05, + "loss": 20.7503, + "step": 12320 + }, + { + "epoch": 0.513567587845442, + "grad_norm": 154.0, + "learning_rate": 5.0232874725396685e-05, + "loss": 10.3756, + "step": 12321 + }, + { + "epoch": 0.5136092701429703, + "grad_norm": 544.0, + "learning_rate": 5.022612478006257e-05, + "loss": 17.8752, + "step": 12322 + }, + { + "epoch": 0.5136509524404985, + "grad_norm": 224.0, + "learning_rate": 5.02193748306073e-05, + "loss": 7.7195, + "step": 12323 + }, + { + "epoch": 0.5136926347380267, + "grad_norm": 105.0, + "learning_rate": 5.021262487715391e-05, + "loss": 8.2502, + "step": 12324 + }, + { + "epoch": 0.513734317035555, + "grad_norm": 346.0, + "learning_rate": 5.0205874919825416e-05, + "loss": 14.7504, + "step": 12325 + }, + { + "epoch": 0.5137759993330833, + "grad_norm": 253.0, + "learning_rate": 5.019912495874483e-05, + "loss": 13.1251, + "step": 12326 + }, + { + "epoch": 0.5138176816306115, + "grad_norm": 318.0, + "learning_rate": 5.019237499403516e-05, + "loss": 13.1254, + "step": 12327 + }, + { + "epoch": 0.5138593639281397, + "grad_norm": 476.0, + "learning_rate": 5.018562502581946e-05, + "loss": 16.6253, + "step": 12328 + }, + { + "epoch": 0.513901046225668, + "grad_norm": 386.0, + "learning_rate": 5.0178875054220707e-05, + "loss": 14.6251, + "step": 12329 + }, + { + "epoch": 0.5139427285231962, + "grad_norm": 340.0, + "learning_rate": 5.0172125079361954e-05, + "loss": 14.1251, + "step": 12330 + }, + { + "epoch": 0.5139844108207244, + "grad_norm": 213.0, + "learning_rate": 5.0165375101366206e-05, + "loss": 11.0633, + "step": 12331 + }, + { + "epoch": 0.5140260931182526, + "grad_norm": 396.0, + "learning_rate": 5.015862512035647e-05, + "loss": 14.5629, + "step": 12332 + }, + { + "epoch": 0.514067775415781, + "grad_norm": 276.0, + "learning_rate": 5.0151875136455797e-05, + "loss": 13.6256, + "step": 12333 + }, + { + "epoch": 0.5141094577133092, + "grad_norm": 334.0, + "learning_rate": 5.0145125149787176e-05, + "loss": 12.688, + "step": 12334 + }, + { + "epoch": 0.5141511400108374, + "grad_norm": 296.0, + "learning_rate": 5.013837516047364e-05, + "loss": 13.688, + "step": 12335 + }, + { + "epoch": 0.5141928223083656, + "grad_norm": 412.0, + "learning_rate": 5.0131625168638196e-05, + "loss": 16.1253, + "step": 12336 + }, + { + "epoch": 0.5142345046058939, + "grad_norm": 402.0, + "learning_rate": 5.0124875174403884e-05, + "loss": 17.2506, + "step": 12337 + }, + { + "epoch": 0.5142761869034221, + "grad_norm": 344.0, + "learning_rate": 5.01181251778937e-05, + "loss": 15.063, + "step": 12338 + }, + { + "epoch": 0.5143178692009504, + "grad_norm": 1400.0, + "learning_rate": 5.011137517923068e-05, + "loss": 29.2539, + "step": 12339 + }, + { + "epoch": 0.5143595514984786, + "grad_norm": 620.0, + "learning_rate": 5.010462517853783e-05, + "loss": 18.7506, + "step": 12340 + }, + { + "epoch": 0.5144012337960069, + "grad_norm": 1528.0, + "learning_rate": 5.0097875175938195e-05, + "loss": 28.253, + "step": 12341 + }, + { + "epoch": 0.5144429160935351, + "grad_norm": 172.0, + "learning_rate": 5.009112517155474e-05, + "loss": 3.1877, + "step": 12342 + }, + { + "epoch": 0.5144845983910633, + "grad_norm": 608.0, + "learning_rate": 5.008437516551055e-05, + "loss": 19.0005, + "step": 12343 + }, + { + "epoch": 0.5145262806885915, + "grad_norm": 462.0, + "learning_rate": 5.007762515792861e-05, + "loss": 15.8757, + "step": 12344 + }, + { + "epoch": 0.5145679629861198, + "grad_norm": 338.0, + "learning_rate": 5.007087514893194e-05, + "loss": 14.1257, + "step": 12345 + }, + { + "epoch": 0.514609645283648, + "grad_norm": 420.0, + "learning_rate": 5.006412513864357e-05, + "loss": 14.3128, + "step": 12346 + }, + { + "epoch": 0.5146513275811763, + "grad_norm": 484.0, + "learning_rate": 5.0057375127186515e-05, + "loss": 15.8767, + "step": 12347 + }, + { + "epoch": 0.5146930098787045, + "grad_norm": 552.0, + "learning_rate": 5.005062511468379e-05, + "loss": 18.7504, + "step": 12348 + }, + { + "epoch": 0.5147346921762328, + "grad_norm": 204.0, + "learning_rate": 5.004387510125842e-05, + "loss": 10.2506, + "step": 12349 + }, + { + "epoch": 0.514776374473761, + "grad_norm": 508.0, + "learning_rate": 5.003712508703342e-05, + "loss": 16.8752, + "step": 12350 + }, + { + "epoch": 0.5148180567712892, + "grad_norm": 344.0, + "learning_rate": 5.003037507213181e-05, + "loss": 12.7502, + "step": 12351 + }, + { + "epoch": 0.5148597390688174, + "grad_norm": 498.0, + "learning_rate": 5.0023625056676616e-05, + "loss": 19.0003, + "step": 12352 + }, + { + "epoch": 0.5149014213663458, + "grad_norm": 338.0, + "learning_rate": 5.001687504079085e-05, + "loss": 14.0628, + "step": 12353 + }, + { + "epoch": 0.514943103663874, + "grad_norm": 366.0, + "learning_rate": 5.001012502459753e-05, + "loss": 12.8127, + "step": 12354 + }, + { + "epoch": 0.5149847859614022, + "grad_norm": 206.0, + "learning_rate": 5.000337500821968e-05, + "loss": 10.8754, + "step": 12355 + }, + { + "epoch": 0.5150264682589304, + "grad_norm": 880.0, + "learning_rate": 4.999662499178033e-05, + "loss": 24.8756, + "step": 12356 + }, + { + "epoch": 0.5150681505564587, + "grad_norm": 266.0, + "learning_rate": 4.9989874975402474e-05, + "loss": 14.6251, + "step": 12357 + }, + { + "epoch": 0.5151098328539869, + "grad_norm": 1896.0, + "learning_rate": 4.998312495920917e-05, + "loss": 38.5004, + "step": 12358 + }, + { + "epoch": 0.5151515151515151, + "grad_norm": 334.0, + "learning_rate": 4.9976374943323396e-05, + "loss": 13.0626, + "step": 12359 + }, + { + "epoch": 0.5151931974490433, + "grad_norm": 548.0, + "learning_rate": 4.996962492786821e-05, + "loss": 17.1254, + "step": 12360 + }, + { + "epoch": 0.5152348797465717, + "grad_norm": 302.0, + "learning_rate": 4.996287491296659e-05, + "loss": 13.8132, + "step": 12361 + }, + { + "epoch": 0.5152765620440999, + "grad_norm": 344.0, + "learning_rate": 4.9956124898741605e-05, + "loss": 14.063, + "step": 12362 + }, + { + "epoch": 0.5153182443416281, + "grad_norm": 103.5, + "learning_rate": 4.9949374885316216e-05, + "loss": 7.6877, + "step": 12363 + }, + { + "epoch": 0.5153599266391563, + "grad_norm": 276.0, + "learning_rate": 4.994262487281351e-05, + "loss": 12.8131, + "step": 12364 + }, + { + "epoch": 0.5154016089366846, + "grad_norm": 478.0, + "learning_rate": 4.9935874861356443e-05, + "loss": 16.7503, + "step": 12365 + }, + { + "epoch": 0.5154432912342128, + "grad_norm": 628.0, + "learning_rate": 4.992912485106808e-05, + "loss": 23.1265, + "step": 12366 + }, + { + "epoch": 0.515484973531741, + "grad_norm": 360.0, + "learning_rate": 4.99223748420714e-05, + "loss": 15.0628, + "step": 12367 + }, + { + "epoch": 0.5155266558292693, + "grad_norm": 223.0, + "learning_rate": 4.991562483448946e-05, + "loss": 13.1883, + "step": 12368 + }, + { + "epoch": 0.5155683381267976, + "grad_norm": 342.0, + "learning_rate": 4.9908874828445265e-05, + "loss": 12.5657, + "step": 12369 + }, + { + "epoch": 0.5156100204243258, + "grad_norm": 400.0, + "learning_rate": 4.990212482406184e-05, + "loss": 15.2508, + "step": 12370 + }, + { + "epoch": 0.515651702721854, + "grad_norm": 137.0, + "learning_rate": 4.9895374821462173e-05, + "loss": 10.1251, + "step": 12371 + }, + { + "epoch": 0.5156933850193822, + "grad_norm": 454.0, + "learning_rate": 4.988862482076934e-05, + "loss": 16.0002, + "step": 12372 + }, + { + "epoch": 0.5157350673169105, + "grad_norm": 280.0, + "learning_rate": 4.988187482210631e-05, + "loss": 11.9379, + "step": 12373 + }, + { + "epoch": 0.5157767496144388, + "grad_norm": 66.5, + "learning_rate": 4.987512482559614e-05, + "loss": 8.814, + "step": 12374 + }, + { + "epoch": 0.515818431911967, + "grad_norm": 85.0, + "learning_rate": 4.9868374831361815e-05, + "loss": 8.1877, + "step": 12375 + }, + { + "epoch": 0.5158601142094952, + "grad_norm": 134.0, + "learning_rate": 4.986162483952638e-05, + "loss": 10.5002, + "step": 12376 + }, + { + "epoch": 0.5159017965070235, + "grad_norm": 556.0, + "learning_rate": 4.9854874850212836e-05, + "loss": 16.7511, + "step": 12377 + }, + { + "epoch": 0.5159434788045517, + "grad_norm": 193.0, + "learning_rate": 4.984812486354423e-05, + "loss": 11.3753, + "step": 12378 + }, + { + "epoch": 0.5159851611020799, + "grad_norm": 270.0, + "learning_rate": 4.9841374879643535e-05, + "loss": 11.2503, + "step": 12379 + }, + { + "epoch": 0.5160268433996081, + "grad_norm": 486.0, + "learning_rate": 4.9834624898633806e-05, + "loss": 17.6256, + "step": 12380 + }, + { + "epoch": 0.5160685256971365, + "grad_norm": 302.0, + "learning_rate": 4.982787492063806e-05, + "loss": 14.8129, + "step": 12381 + }, + { + "epoch": 0.5161102079946647, + "grad_norm": 420.0, + "learning_rate": 4.9821124945779305e-05, + "loss": 15.3127, + "step": 12382 + }, + { + "epoch": 0.5161518902921929, + "grad_norm": 288.0, + "learning_rate": 4.981437497418055e-05, + "loss": 13.7503, + "step": 12383 + }, + { + "epoch": 0.5161935725897211, + "grad_norm": 644.0, + "learning_rate": 4.9807625005964856e-05, + "loss": 20.5008, + "step": 12384 + }, + { + "epoch": 0.5162352548872494, + "grad_norm": 249.0, + "learning_rate": 4.9800875041255185e-05, + "loss": 10.1895, + "step": 12385 + }, + { + "epoch": 0.5162769371847776, + "grad_norm": 117.5, + "learning_rate": 4.9794125080174616e-05, + "loss": 8.6256, + "step": 12386 + }, + { + "epoch": 0.5163186194823058, + "grad_norm": 434.0, + "learning_rate": 4.9787375122846105e-05, + "loss": 14.1904, + "step": 12387 + }, + { + "epoch": 0.516360301779834, + "grad_norm": 296.0, + "learning_rate": 4.978062516939272e-05, + "loss": 13.2503, + "step": 12388 + }, + { + "epoch": 0.5164019840773624, + "grad_norm": 648.0, + "learning_rate": 4.977387521993745e-05, + "loss": 20.2503, + "step": 12389 + }, + { + "epoch": 0.5164436663748906, + "grad_norm": 202.0, + "learning_rate": 4.976712527460334e-05, + "loss": 11.1877, + "step": 12390 + }, + { + "epoch": 0.5164853486724188, + "grad_norm": 78.5, + "learning_rate": 4.9760375333513365e-05, + "loss": 8.6879, + "step": 12391 + }, + { + "epoch": 0.516527030969947, + "grad_norm": 916.0, + "learning_rate": 4.9753625396790584e-05, + "loss": 21.2553, + "step": 12392 + }, + { + "epoch": 0.5165687132674753, + "grad_norm": 608.0, + "learning_rate": 4.974687546455801e-05, + "loss": 18.6259, + "step": 12393 + }, + { + "epoch": 0.5166103955650035, + "grad_norm": 158.0, + "learning_rate": 4.974012553693865e-05, + "loss": 9.9379, + "step": 12394 + }, + { + "epoch": 0.5166520778625318, + "grad_norm": 412.0, + "learning_rate": 4.973337561405551e-05, + "loss": 15.5007, + "step": 12395 + }, + { + "epoch": 0.51669376016006, + "grad_norm": 660.0, + "learning_rate": 4.972662569603165e-05, + "loss": 17.3802, + "step": 12396 + }, + { + "epoch": 0.5167354424575883, + "grad_norm": 430.0, + "learning_rate": 4.9719875782990036e-05, + "loss": 15.4381, + "step": 12397 + }, + { + "epoch": 0.5167771247551165, + "grad_norm": 784.0, + "learning_rate": 4.971312587505374e-05, + "loss": 20.6295, + "step": 12398 + }, + { + "epoch": 0.5168188070526447, + "grad_norm": 456.0, + "learning_rate": 4.970637597234572e-05, + "loss": 16.3751, + "step": 12399 + }, + { + "epoch": 0.516860489350173, + "grad_norm": 282.0, + "learning_rate": 4.9699626074989045e-05, + "loss": 12.8128, + "step": 12400 + }, + { + "epoch": 0.5169021716477012, + "grad_norm": 386.0, + "learning_rate": 4.96928761831067e-05, + "loss": 12.5044, + "step": 12401 + }, + { + "epoch": 0.5169438539452295, + "grad_norm": 224.0, + "learning_rate": 4.968612629682172e-05, + "loss": 12.0628, + "step": 12402 + }, + { + "epoch": 0.5169855362427577, + "grad_norm": 580.0, + "learning_rate": 4.967937641625711e-05, + "loss": 18.3755, + "step": 12403 + }, + { + "epoch": 0.517027218540286, + "grad_norm": 227.0, + "learning_rate": 4.96726265415359e-05, + "loss": 12.2502, + "step": 12404 + }, + { + "epoch": 0.5170689008378142, + "grad_norm": 130.0, + "learning_rate": 4.966587667278109e-05, + "loss": 7.5942, + "step": 12405 + }, + { + "epoch": 0.5171105831353424, + "grad_norm": 101.0, + "learning_rate": 4.965912681011571e-05, + "loss": 7.9377, + "step": 12406 + }, + { + "epoch": 0.5171522654328706, + "grad_norm": 412.0, + "learning_rate": 4.965237695366278e-05, + "loss": 15.876, + "step": 12407 + }, + { + "epoch": 0.517193947730399, + "grad_norm": 440.0, + "learning_rate": 4.964562710354531e-05, + "loss": 14.5628, + "step": 12408 + }, + { + "epoch": 0.5172356300279272, + "grad_norm": 310.0, + "learning_rate": 4.963887725988631e-05, + "loss": 12.0008, + "step": 12409 + }, + { + "epoch": 0.5172773123254554, + "grad_norm": 520.0, + "learning_rate": 4.963212742280882e-05, + "loss": 14.0046, + "step": 12410 + }, + { + "epoch": 0.5173189946229836, + "grad_norm": 536.0, + "learning_rate": 4.962537759243582e-05, + "loss": 17.6256, + "step": 12411 + }, + { + "epoch": 0.5173606769205119, + "grad_norm": 524.0, + "learning_rate": 4.961862776889036e-05, + "loss": 16.5003, + "step": 12412 + }, + { + "epoch": 0.5174023592180401, + "grad_norm": 420.0, + "learning_rate": 4.9611877952295435e-05, + "loss": 15.376, + "step": 12413 + }, + { + "epoch": 0.5174440415155683, + "grad_norm": 284.0, + "learning_rate": 4.9605128142774085e-05, + "loss": 13.1253, + "step": 12414 + }, + { + "epoch": 0.5174857238130965, + "grad_norm": 480.0, + "learning_rate": 4.959837834044928e-05, + "loss": 17.0002, + "step": 12415 + }, + { + "epoch": 0.5175274061106249, + "grad_norm": 151.0, + "learning_rate": 4.95916285454441e-05, + "loss": 11.1878, + "step": 12416 + }, + { + "epoch": 0.5175690884081531, + "grad_norm": 832.0, + "learning_rate": 4.95848787578815e-05, + "loss": 21.5008, + "step": 12417 + }, + { + "epoch": 0.5176107707056813, + "grad_norm": 588.0, + "learning_rate": 4.957812897788454e-05, + "loss": 18.2504, + "step": 12418 + }, + { + "epoch": 0.5176524530032095, + "grad_norm": 227.0, + "learning_rate": 4.9571379205576204e-05, + "loss": 10.4385, + "step": 12419 + }, + { + "epoch": 0.5176941353007378, + "grad_norm": 716.0, + "learning_rate": 4.956462944107952e-05, + "loss": 20.7506, + "step": 12420 + }, + { + "epoch": 0.517735817598266, + "grad_norm": 192.0, + "learning_rate": 4.95578796845175e-05, + "loss": 10.0627, + "step": 12421 + }, + { + "epoch": 0.5177774998957942, + "grad_norm": 130.0, + "learning_rate": 4.955112993601318e-05, + "loss": 9.7503, + "step": 12422 + }, + { + "epoch": 0.5178191821933225, + "grad_norm": 592.0, + "learning_rate": 4.954438019568954e-05, + "loss": 17.1252, + "step": 12423 + }, + { + "epoch": 0.5178608644908508, + "grad_norm": 320.0, + "learning_rate": 4.9537630463669624e-05, + "loss": 13.5004, + "step": 12424 + }, + { + "epoch": 0.517902546788379, + "grad_norm": 300.0, + "learning_rate": 4.953088074007642e-05, + "loss": 11.8752, + "step": 12425 + }, + { + "epoch": 0.5179442290859072, + "grad_norm": 115.5, + "learning_rate": 4.9524131025032975e-05, + "loss": 8.9385, + "step": 12426 + }, + { + "epoch": 0.5179859113834354, + "grad_norm": 1560.0, + "learning_rate": 4.951738131866227e-05, + "loss": 36.2502, + "step": 12427 + }, + { + "epoch": 0.5180275936809637, + "grad_norm": 238.0, + "learning_rate": 4.951063162108734e-05, + "loss": 9.8763, + "step": 12428 + }, + { + "epoch": 0.518069275978492, + "grad_norm": 458.0, + "learning_rate": 4.950388193243118e-05, + "loss": 17.2503, + "step": 12429 + }, + { + "epoch": 0.5181109582760202, + "grad_norm": 720.0, + "learning_rate": 4.949713225281682e-05, + "loss": 21.6254, + "step": 12430 + }, + { + "epoch": 0.5181526405735484, + "grad_norm": 512.0, + "learning_rate": 4.949038258236728e-05, + "loss": 17.6252, + "step": 12431 + }, + { + "epoch": 0.5181943228710767, + "grad_norm": 322.0, + "learning_rate": 4.948363292120555e-05, + "loss": 13.2501, + "step": 12432 + }, + { + "epoch": 0.5182360051686049, + "grad_norm": 532.0, + "learning_rate": 4.947688326945465e-05, + "loss": 17.7502, + "step": 12433 + }, + { + "epoch": 0.5182776874661331, + "grad_norm": 82.5, + "learning_rate": 4.9470133627237624e-05, + "loss": 6.6878, + "step": 12434 + }, + { + "epoch": 0.5183193697636613, + "grad_norm": 1288.0, + "learning_rate": 4.9463383994677436e-05, + "loss": 31.1255, + "step": 12435 + }, + { + "epoch": 0.5183610520611897, + "grad_norm": 155.0, + "learning_rate": 4.945663437189715e-05, + "loss": 9.1256, + "step": 12436 + }, + { + "epoch": 0.5184027343587179, + "grad_norm": 340.0, + "learning_rate": 4.9449884759019724e-05, + "loss": 12.8754, + "step": 12437 + }, + { + "epoch": 0.5184444166562461, + "grad_norm": 198.0, + "learning_rate": 4.944313515616821e-05, + "loss": 10.9378, + "step": 12438 + }, + { + "epoch": 0.5184860989537743, + "grad_norm": 414.0, + "learning_rate": 4.94363855634656e-05, + "loss": 15.063, + "step": 12439 + }, + { + "epoch": 0.5185277812513026, + "grad_norm": 251.0, + "learning_rate": 4.942963598103493e-05, + "loss": 12.6253, + "step": 12440 + }, + { + "epoch": 0.5185694635488308, + "grad_norm": 240.0, + "learning_rate": 4.942288640899917e-05, + "loss": 13.1885, + "step": 12441 + }, + { + "epoch": 0.518611145846359, + "grad_norm": 342.0, + "learning_rate": 4.9416136847481375e-05, + "loss": 12.4386, + "step": 12442 + }, + { + "epoch": 0.5186528281438872, + "grad_norm": 402.0, + "learning_rate": 4.9409387296604524e-05, + "loss": 15.5006, + "step": 12443 + }, + { + "epoch": 0.5186945104414156, + "grad_norm": 294.0, + "learning_rate": 4.9402637756491645e-05, + "loss": 13.938, + "step": 12444 + }, + { + "epoch": 0.5187361927389438, + "grad_norm": 142.0, + "learning_rate": 4.939588822726575e-05, + "loss": 10.2504, + "step": 12445 + }, + { + "epoch": 0.518777875036472, + "grad_norm": 184.0, + "learning_rate": 4.938913870904985e-05, + "loss": 10.5627, + "step": 12446 + }, + { + "epoch": 0.5188195573340002, + "grad_norm": 162.0, + "learning_rate": 4.9382389201966936e-05, + "loss": 12.0003, + "step": 12447 + }, + { + "epoch": 0.5188612396315285, + "grad_norm": 592.0, + "learning_rate": 4.937563970614006e-05, + "loss": 18.8755, + "step": 12448 + }, + { + "epoch": 0.5189029219290567, + "grad_norm": 255.0, + "learning_rate": 4.936889022169218e-05, + "loss": 13.0631, + "step": 12449 + }, + { + "epoch": 0.518944604226585, + "grad_norm": 498.0, + "learning_rate": 4.936214074874635e-05, + "loss": 16.6258, + "step": 12450 + }, + { + "epoch": 0.5189862865241132, + "grad_norm": 1104.0, + "learning_rate": 4.935539128742555e-05, + "loss": 26.0006, + "step": 12451 + }, + { + "epoch": 0.5190279688216415, + "grad_norm": 123.0, + "learning_rate": 4.9348641837852814e-05, + "loss": 11.6259, + "step": 12452 + }, + { + "epoch": 0.5190696511191697, + "grad_norm": 544.0, + "learning_rate": 4.934189240015111e-05, + "loss": 17.7518, + "step": 12453 + }, + { + "epoch": 0.5191113334166979, + "grad_norm": 193.0, + "learning_rate": 4.933514297444351e-05, + "loss": 9.8752, + "step": 12454 + }, + { + "epoch": 0.5191530157142261, + "grad_norm": 660.0, + "learning_rate": 4.932839356085297e-05, + "loss": 19.7505, + "step": 12455 + }, + { + "epoch": 0.5191946980117544, + "grad_norm": 320.0, + "learning_rate": 4.932164415950251e-05, + "loss": 12.8756, + "step": 12456 + }, + { + "epoch": 0.5192363803092827, + "grad_norm": 82.5, + "learning_rate": 4.931489477051516e-05, + "loss": 7.0319, + "step": 12457 + }, + { + "epoch": 0.5192780626068109, + "grad_norm": 442.0, + "learning_rate": 4.93081453940139e-05, + "loss": 17.1258, + "step": 12458 + }, + { + "epoch": 0.5193197449043391, + "grad_norm": 352.0, + "learning_rate": 4.9301396030121756e-05, + "loss": 13.1878, + "step": 12459 + }, + { + "epoch": 0.5193614272018674, + "grad_norm": 163.0, + "learning_rate": 4.929464667896175e-05, + "loss": 8.5627, + "step": 12460 + }, + { + "epoch": 0.5194031094993956, + "grad_norm": 660.0, + "learning_rate": 4.928789734065685e-05, + "loss": 18.5036, + "step": 12461 + }, + { + "epoch": 0.5194447917969238, + "grad_norm": 536.0, + "learning_rate": 4.92811480153301e-05, + "loss": 18.2502, + "step": 12462 + }, + { + "epoch": 0.519486474094452, + "grad_norm": 384.0, + "learning_rate": 4.927439870310447e-05, + "loss": 15.6253, + "step": 12463 + }, + { + "epoch": 0.5195281563919804, + "grad_norm": 580.0, + "learning_rate": 4.926764940410301e-05, + "loss": 20.6255, + "step": 12464 + }, + { + "epoch": 0.5195698386895086, + "grad_norm": 580.0, + "learning_rate": 4.926090011844868e-05, + "loss": 19.2501, + "step": 12465 + }, + { + "epoch": 0.5196115209870368, + "grad_norm": 768.0, + "learning_rate": 4.925415084626454e-05, + "loss": 20.7506, + "step": 12466 + }, + { + "epoch": 0.519653203284565, + "grad_norm": 1296.0, + "learning_rate": 4.924740158767354e-05, + "loss": 26.0039, + "step": 12467 + }, + { + "epoch": 0.5196948855820933, + "grad_norm": 454.0, + "learning_rate": 4.9240652342798726e-05, + "loss": 16.252, + "step": 12468 + }, + { + "epoch": 0.5197365678796215, + "grad_norm": 207.0, + "learning_rate": 4.923390311176309e-05, + "loss": 11.6255, + "step": 12469 + }, + { + "epoch": 0.5197782501771497, + "grad_norm": 756.0, + "learning_rate": 4.922715389468964e-05, + "loss": 19.3764, + "step": 12470 + }, + { + "epoch": 0.5198199324746781, + "grad_norm": 418.0, + "learning_rate": 4.922040469170137e-05, + "loss": 13.8753, + "step": 12471 + }, + { + "epoch": 0.5198616147722063, + "grad_norm": 736.0, + "learning_rate": 4.921365550292131e-05, + "loss": 21.1257, + "step": 12472 + }, + { + "epoch": 0.5199032970697345, + "grad_norm": 460.0, + "learning_rate": 4.920690632847243e-05, + "loss": 18.2503, + "step": 12473 + }, + { + "epoch": 0.5199449793672627, + "grad_norm": 268.0, + "learning_rate": 4.920015716847777e-05, + "loss": 13.7506, + "step": 12474 + }, + { + "epoch": 0.519986661664791, + "grad_norm": 344.0, + "learning_rate": 4.91934080230603e-05, + "loss": 14.8136, + "step": 12475 + }, + { + "epoch": 0.5200283439623192, + "grad_norm": 302.0, + "learning_rate": 4.918665889234306e-05, + "loss": 14.0004, + "step": 12476 + }, + { + "epoch": 0.5200700262598474, + "grad_norm": 119.0, + "learning_rate": 4.9179909776449005e-05, + "loss": 8.6255, + "step": 12477 + }, + { + "epoch": 0.5201117085573757, + "grad_norm": 282.0, + "learning_rate": 4.91731606755012e-05, + "loss": 14.1877, + "step": 12478 + }, + { + "epoch": 0.520153390854904, + "grad_norm": 472.0, + "learning_rate": 4.916641158962259e-05, + "loss": 17.0001, + "step": 12479 + }, + { + "epoch": 0.5201950731524322, + "grad_norm": 204.0, + "learning_rate": 4.9159662518936225e-05, + "loss": 8.6876, + "step": 12480 + }, + { + "epoch": 0.5202367554499604, + "grad_norm": 217.0, + "learning_rate": 4.9152913463565056e-05, + "loss": 11.3752, + "step": 12481 + }, + { + "epoch": 0.5202784377474886, + "grad_norm": 240.0, + "learning_rate": 4.914616442363213e-05, + "loss": 10.7504, + "step": 12482 + }, + { + "epoch": 0.5203201200450169, + "grad_norm": 245.0, + "learning_rate": 4.9139415399260425e-05, + "loss": 11.001, + "step": 12483 + }, + { + "epoch": 0.5203618023425451, + "grad_norm": 360.0, + "learning_rate": 4.9132666390572975e-05, + "loss": 14.5002, + "step": 12484 + }, + { + "epoch": 0.5204034846400734, + "grad_norm": 274.0, + "learning_rate": 4.9125917397692726e-05, + "loss": 13.1879, + "step": 12485 + }, + { + "epoch": 0.5204451669376016, + "grad_norm": 207.0, + "learning_rate": 4.911916842074274e-05, + "loss": 12.1252, + "step": 12486 + }, + { + "epoch": 0.5204868492351299, + "grad_norm": 496.0, + "learning_rate": 4.911241945984597e-05, + "loss": 17.5003, + "step": 12487 + }, + { + "epoch": 0.5205285315326581, + "grad_norm": 668.0, + "learning_rate": 4.910567051512544e-05, + "loss": 19.6253, + "step": 12488 + }, + { + "epoch": 0.5205702138301863, + "grad_norm": 456.0, + "learning_rate": 4.9098921586704136e-05, + "loss": 15.6256, + "step": 12489 + }, + { + "epoch": 0.5206118961277145, + "grad_norm": 488.0, + "learning_rate": 4.909217267470508e-05, + "loss": 17.0002, + "step": 12490 + }, + { + "epoch": 0.5206535784252428, + "grad_norm": 310.0, + "learning_rate": 4.9085423779251235e-05, + "loss": 14.0627, + "step": 12491 + }, + { + "epoch": 0.5206952607227711, + "grad_norm": 458.0, + "learning_rate": 4.907867490046565e-05, + "loss": 13.6279, + "step": 12492 + }, + { + "epoch": 0.5207369430202993, + "grad_norm": 370.0, + "learning_rate": 4.907192603847128e-05, + "loss": 13.1252, + "step": 12493 + }, + { + "epoch": 0.5207786253178275, + "grad_norm": 604.0, + "learning_rate": 4.9065177193391143e-05, + "loss": 16.8793, + "step": 12494 + }, + { + "epoch": 0.5208203076153558, + "grad_norm": 1144.0, + "learning_rate": 4.9058428365348235e-05, + "loss": 33.0004, + "step": 12495 + }, + { + "epoch": 0.520861989912884, + "grad_norm": 512.0, + "learning_rate": 4.905167955446555e-05, + "loss": 17.8753, + "step": 12496 + }, + { + "epoch": 0.5209036722104122, + "grad_norm": 364.0, + "learning_rate": 4.9044930760866085e-05, + "loss": 13.0003, + "step": 12497 + }, + { + "epoch": 0.5209453545079404, + "grad_norm": 280.0, + "learning_rate": 4.903818198467286e-05, + "loss": 12.7504, + "step": 12498 + }, + { + "epoch": 0.5209870368054688, + "grad_norm": 296.0, + "learning_rate": 4.903143322600884e-05, + "loss": 14.1877, + "step": 12499 + }, + { + "epoch": 0.521028719102997, + "grad_norm": 390.0, + "learning_rate": 4.902468448499705e-05, + "loss": 15.1252, + "step": 12500 + }, + { + "epoch": 0.5210704014005252, + "grad_norm": 490.0, + "learning_rate": 4.9017935761760446e-05, + "loss": 17.6255, + "step": 12501 + }, + { + "epoch": 0.5211120836980534, + "grad_norm": 376.0, + "learning_rate": 4.901118705642208e-05, + "loss": 15.688, + "step": 12502 + }, + { + "epoch": 0.5211537659955817, + "grad_norm": 1296.0, + "learning_rate": 4.900443836910489e-05, + "loss": 29.7545, + "step": 12503 + }, + { + "epoch": 0.5211954482931099, + "grad_norm": 164.0, + "learning_rate": 4.8997689699931924e-05, + "loss": 10.6256, + "step": 12504 + }, + { + "epoch": 0.5212371305906381, + "grad_norm": 528.0, + "learning_rate": 4.8990941049026136e-05, + "loss": 17.6252, + "step": 12505 + }, + { + "epoch": 0.5212788128881664, + "grad_norm": 127.5, + "learning_rate": 4.898419241651054e-05, + "loss": 9.1879, + "step": 12506 + }, + { + "epoch": 0.5213204951856947, + "grad_norm": 197.0, + "learning_rate": 4.8977443802508126e-05, + "loss": 12.8758, + "step": 12507 + }, + { + "epoch": 0.5213621774832229, + "grad_norm": 640.0, + "learning_rate": 4.8970695207141895e-05, + "loss": 21.0005, + "step": 12508 + }, + { + "epoch": 0.5214038597807511, + "grad_norm": 142.0, + "learning_rate": 4.896394663053482e-05, + "loss": 10.2502, + "step": 12509 + }, + { + "epoch": 0.5214455420782793, + "grad_norm": 592.0, + "learning_rate": 4.8957198072809936e-05, + "loss": 20.2502, + "step": 12510 + }, + { + "epoch": 0.5214872243758076, + "grad_norm": 354.0, + "learning_rate": 4.895044953409018e-05, + "loss": 14.1251, + "step": 12511 + }, + { + "epoch": 0.5215289066733358, + "grad_norm": 478.0, + "learning_rate": 4.894370101449861e-05, + "loss": 14.8752, + "step": 12512 + }, + { + "epoch": 0.5215705889708641, + "grad_norm": 362.0, + "learning_rate": 4.8936952514158145e-05, + "loss": 10.8165, + "step": 12513 + }, + { + "epoch": 0.5216122712683923, + "grad_norm": 196.0, + "learning_rate": 4.8930204033191845e-05, + "loss": 11.0003, + "step": 12514 + }, + { + "epoch": 0.5216539535659206, + "grad_norm": 360.0, + "learning_rate": 4.8923455571722645e-05, + "loss": 12.3127, + "step": 12515 + }, + { + "epoch": 0.5216956358634488, + "grad_norm": 223.0, + "learning_rate": 4.8916707129873595e-05, + "loss": 10.1878, + "step": 12516 + }, + { + "epoch": 0.521737318160977, + "grad_norm": 434.0, + "learning_rate": 4.890995870776762e-05, + "loss": 16.1252, + "step": 12517 + }, + { + "epoch": 0.5217790004585052, + "grad_norm": 334.0, + "learning_rate": 4.890321030552776e-05, + "loss": 13.5002, + "step": 12518 + }, + { + "epoch": 0.5218206827560335, + "grad_norm": 478.0, + "learning_rate": 4.8896461923276994e-05, + "loss": 17.1278, + "step": 12519 + }, + { + "epoch": 0.5218623650535618, + "grad_norm": 362.0, + "learning_rate": 4.8889713561138304e-05, + "loss": 12.9379, + "step": 12520 + }, + { + "epoch": 0.52190404735109, + "grad_norm": 576.0, + "learning_rate": 4.8882965219234675e-05, + "loss": 18.2503, + "step": 12521 + }, + { + "epoch": 0.5219457296486182, + "grad_norm": 262.0, + "learning_rate": 4.887621689768912e-05, + "loss": 12.5006, + "step": 12522 + }, + { + "epoch": 0.5219874119461465, + "grad_norm": 406.0, + "learning_rate": 4.886946859662459e-05, + "loss": 15.3755, + "step": 12523 + }, + { + "epoch": 0.5220290942436747, + "grad_norm": 346.0, + "learning_rate": 4.8862720316164125e-05, + "loss": 14.1886, + "step": 12524 + }, + { + "epoch": 0.5220707765412029, + "grad_norm": 656.0, + "learning_rate": 4.885597205643065e-05, + "loss": 19.6253, + "step": 12525 + }, + { + "epoch": 0.5221124588387311, + "grad_norm": 219.0, + "learning_rate": 4.884922381754721e-05, + "loss": 13.5013, + "step": 12526 + }, + { + "epoch": 0.5221541411362595, + "grad_norm": 1176.0, + "learning_rate": 4.884247559963676e-05, + "loss": 28.5021, + "step": 12527 + }, + { + "epoch": 0.5221958234337877, + "grad_norm": 344.0, + "learning_rate": 4.88357274028223e-05, + "loss": 15.1883, + "step": 12528 + }, + { + "epoch": 0.5222375057313159, + "grad_norm": 390.0, + "learning_rate": 4.88289792272268e-05, + "loss": 14.6879, + "step": 12529 + }, + { + "epoch": 0.5222791880288441, + "grad_norm": 165.0, + "learning_rate": 4.882223107297327e-05, + "loss": 11.2502, + "step": 12530 + }, + { + "epoch": 0.5223208703263724, + "grad_norm": 1012.0, + "learning_rate": 4.881548294018467e-05, + "loss": 22.129, + "step": 12531 + }, + { + "epoch": 0.5223625526239006, + "grad_norm": 127.5, + "learning_rate": 4.8808734828984e-05, + "loss": 8.0635, + "step": 12532 + }, + { + "epoch": 0.5224042349214288, + "grad_norm": 276.0, + "learning_rate": 4.8801986739494254e-05, + "loss": 13.3753, + "step": 12533 + }, + { + "epoch": 0.5224459172189571, + "grad_norm": 350.0, + "learning_rate": 4.87952386718384e-05, + "loss": 14.3126, + "step": 12534 + }, + { + "epoch": 0.5224875995164854, + "grad_norm": 394.0, + "learning_rate": 4.878849062613942e-05, + "loss": 15.5648, + "step": 12535 + }, + { + "epoch": 0.5225292818140136, + "grad_norm": 434.0, + "learning_rate": 4.878174260252033e-05, + "loss": 15.5003, + "step": 12536 + }, + { + "epoch": 0.5225709641115418, + "grad_norm": 252.0, + "learning_rate": 4.877499460110406e-05, + "loss": 13.6878, + "step": 12537 + }, + { + "epoch": 0.52261264640907, + "grad_norm": 392.0, + "learning_rate": 4.8768246622013655e-05, + "loss": 15.5002, + "step": 12538 + }, + { + "epoch": 0.5226543287065983, + "grad_norm": 282.0, + "learning_rate": 4.876149866537203e-05, + "loss": 14.3755, + "step": 12539 + }, + { + "epoch": 0.5226960110041265, + "grad_norm": 844.0, + "learning_rate": 4.875475073130223e-05, + "loss": 24.1266, + "step": 12540 + }, + { + "epoch": 0.5227376933016548, + "grad_norm": 316.0, + "learning_rate": 4.874800281992719e-05, + "loss": 11.7504, + "step": 12541 + }, + { + "epoch": 0.522779375599183, + "grad_norm": 310.0, + "learning_rate": 4.8741254931369925e-05, + "loss": 13.3127, + "step": 12542 + }, + { + "epoch": 0.5228210578967113, + "grad_norm": 215.0, + "learning_rate": 4.8734507065753384e-05, + "loss": 11.313, + "step": 12543 + }, + { + "epoch": 0.5228627401942395, + "grad_norm": 382.0, + "learning_rate": 4.872775922320057e-05, + "loss": 15.3755, + "step": 12544 + }, + { + "epoch": 0.5229044224917677, + "grad_norm": 466.0, + "learning_rate": 4.872101140383446e-05, + "loss": 16.7501, + "step": 12545 + }, + { + "epoch": 0.522946104789296, + "grad_norm": 432.0, + "learning_rate": 4.871426360777803e-05, + "loss": 16.5016, + "step": 12546 + }, + { + "epoch": 0.5229877870868243, + "grad_norm": 406.0, + "learning_rate": 4.870751583515425e-05, + "loss": 13.5004, + "step": 12547 + }, + { + "epoch": 0.5230294693843525, + "grad_norm": 498.0, + "learning_rate": 4.870076808608613e-05, + "loss": 18.3752, + "step": 12548 + }, + { + "epoch": 0.5230711516818807, + "grad_norm": 202.0, + "learning_rate": 4.86940203606966e-05, + "loss": 9.5629, + "step": 12549 + }, + { + "epoch": 0.523112833979409, + "grad_norm": 330.0, + "learning_rate": 4.8687272659108694e-05, + "loss": 14.2502, + "step": 12550 + }, + { + "epoch": 0.5231545162769372, + "grad_norm": 192.0, + "learning_rate": 4.8680524981445334e-05, + "loss": 10.938, + "step": 12551 + }, + { + "epoch": 0.5231961985744654, + "grad_norm": 482.0, + "learning_rate": 4.867377732782955e-05, + "loss": 17.3754, + "step": 12552 + }, + { + "epoch": 0.5232378808719936, + "grad_norm": 390.0, + "learning_rate": 4.866702969838426e-05, + "loss": 15.751, + "step": 12553 + }, + { + "epoch": 0.523279563169522, + "grad_norm": 89.5, + "learning_rate": 4.8660282093232506e-05, + "loss": 8.2512, + "step": 12554 + }, + { + "epoch": 0.5233212454670502, + "grad_norm": 298.0, + "learning_rate": 4.865353451249721e-05, + "loss": 12.2504, + "step": 12555 + }, + { + "epoch": 0.5233629277645784, + "grad_norm": 486.0, + "learning_rate": 4.864678695630137e-05, + "loss": 17.5005, + "step": 12556 + }, + { + "epoch": 0.5234046100621066, + "grad_norm": 448.0, + "learning_rate": 4.8640039424767957e-05, + "loss": 16.8753, + "step": 12557 + }, + { + "epoch": 0.5234462923596349, + "grad_norm": 241.0, + "learning_rate": 4.863329191801995e-05, + "loss": 8.8139, + "step": 12558 + }, + { + "epoch": 0.5234879746571631, + "grad_norm": 182.0, + "learning_rate": 4.862654443618031e-05, + "loss": 11.1877, + "step": 12559 + }, + { + "epoch": 0.5235296569546913, + "grad_norm": 828.0, + "learning_rate": 4.861979697937203e-05, + "loss": 19.1348, + "step": 12560 + }, + { + "epoch": 0.5235713392522195, + "grad_norm": 664.0, + "learning_rate": 4.861304954771806e-05, + "loss": 21.0003, + "step": 12561 + }, + { + "epoch": 0.5236130215497479, + "grad_norm": 442.0, + "learning_rate": 4.86063021413414e-05, + "loss": 15.8757, + "step": 12562 + }, + { + "epoch": 0.5236547038472761, + "grad_norm": 286.0, + "learning_rate": 4.8599554760365e-05, + "loss": 14.1879, + "step": 12563 + }, + { + "epoch": 0.5236963861448043, + "grad_norm": 194.0, + "learning_rate": 4.859280740491185e-05, + "loss": 12.1253, + "step": 12564 + }, + { + "epoch": 0.5237380684423325, + "grad_norm": 187.0, + "learning_rate": 4.858606007510489e-05, + "loss": 11.688, + "step": 12565 + }, + { + "epoch": 0.5237797507398608, + "grad_norm": 2304.0, + "learning_rate": 4.857931277106713e-05, + "loss": 42.252, + "step": 12566 + }, + { + "epoch": 0.523821433037389, + "grad_norm": 354.0, + "learning_rate": 4.85725654929215e-05, + "loss": 9.8754, + "step": 12567 + }, + { + "epoch": 0.5238631153349173, + "grad_norm": 66.5, + "learning_rate": 4.8565818240791014e-05, + "loss": 8.3752, + "step": 12568 + }, + { + "epoch": 0.5239047976324455, + "grad_norm": 244.0, + "learning_rate": 4.8559071014798595e-05, + "loss": 11.8131, + "step": 12569 + }, + { + "epoch": 0.5239464799299738, + "grad_norm": 114.0, + "learning_rate": 4.855232381506725e-05, + "loss": 9.938, + "step": 12570 + }, + { + "epoch": 0.523988162227502, + "grad_norm": 484.0, + "learning_rate": 4.854557664171991e-05, + "loss": 17.2503, + "step": 12571 + }, + { + "epoch": 0.5240298445250302, + "grad_norm": 145.0, + "learning_rate": 4.853882949487959e-05, + "loss": 10.2508, + "step": 12572 + }, + { + "epoch": 0.5240715268225584, + "grad_norm": 123.5, + "learning_rate": 4.853208237466922e-05, + "loss": 9.1253, + "step": 12573 + }, + { + "epoch": 0.5241132091200867, + "grad_norm": 544.0, + "learning_rate": 4.85253352812118e-05, + "loss": 18.1255, + "step": 12574 + }, + { + "epoch": 0.524154891417615, + "grad_norm": 208.0, + "learning_rate": 4.851858821463025e-05, + "loss": 10.7503, + "step": 12575 + }, + { + "epoch": 0.5241965737151432, + "grad_norm": 88.0, + "learning_rate": 4.8511841175047576e-05, + "loss": 8.6918, + "step": 12576 + }, + { + "epoch": 0.5242382560126714, + "grad_norm": 532.0, + "learning_rate": 4.8505094162586715e-05, + "loss": 18.3787, + "step": 12577 + }, + { + "epoch": 0.5242799383101997, + "grad_norm": 102.5, + "learning_rate": 4.849834717737066e-05, + "loss": 8.8755, + "step": 12578 + }, + { + "epoch": 0.5243216206077279, + "grad_norm": 89.0, + "learning_rate": 4.8491600219522346e-05, + "loss": 8.3753, + "step": 12579 + }, + { + "epoch": 0.5243633029052561, + "grad_norm": 412.0, + "learning_rate": 4.8484853289164775e-05, + "loss": 14.4379, + "step": 12580 + }, + { + "epoch": 0.5244049852027843, + "grad_norm": 222.0, + "learning_rate": 4.8478106386420865e-05, + "loss": 11.6878, + "step": 12581 + }, + { + "epoch": 0.5244466675003127, + "grad_norm": 772.0, + "learning_rate": 4.8471359511413604e-05, + "loss": 22.5002, + "step": 12582 + }, + { + "epoch": 0.5244883497978409, + "grad_norm": 182.0, + "learning_rate": 4.846461266426596e-05, + "loss": 11.9385, + "step": 12583 + }, + { + "epoch": 0.5245300320953691, + "grad_norm": 494.0, + "learning_rate": 4.8457865845100885e-05, + "loss": 17.7507, + "step": 12584 + }, + { + "epoch": 0.5245717143928973, + "grad_norm": 1688.0, + "learning_rate": 4.8451119054041325e-05, + "loss": 37.7503, + "step": 12585 + }, + { + "epoch": 0.5246133966904256, + "grad_norm": 362.0, + "learning_rate": 4.844437229121029e-05, + "loss": 15.5008, + "step": 12586 + }, + { + "epoch": 0.5246550789879538, + "grad_norm": 422.0, + "learning_rate": 4.8437625556730674e-05, + "loss": 13.5001, + "step": 12587 + }, + { + "epoch": 0.524696761285482, + "grad_norm": 588.0, + "learning_rate": 4.84308788507255e-05, + "loss": 20.2518, + "step": 12588 + }, + { + "epoch": 0.5247384435830103, + "grad_norm": 1184.0, + "learning_rate": 4.842413217331768e-05, + "loss": 28.8769, + "step": 12589 + }, + { + "epoch": 0.5247801258805386, + "grad_norm": 133.0, + "learning_rate": 4.841738552463021e-05, + "loss": 10.1251, + "step": 12590 + }, + { + "epoch": 0.5248218081780668, + "grad_norm": 364.0, + "learning_rate": 4.8410638904786e-05, + "loss": 15.1881, + "step": 12591 + }, + { + "epoch": 0.524863490475595, + "grad_norm": 804.0, + "learning_rate": 4.840389231390807e-05, + "loss": 22.6254, + "step": 12592 + }, + { + "epoch": 0.5249051727731232, + "grad_norm": 322.0, + "learning_rate": 4.8397145752119315e-05, + "loss": 13.6878, + "step": 12593 + }, + { + "epoch": 0.5249468550706515, + "grad_norm": 396.0, + "learning_rate": 4.8390399219542735e-05, + "loss": 15.2524, + "step": 12594 + }, + { + "epoch": 0.5249885373681797, + "grad_norm": 274.0, + "learning_rate": 4.838365271630127e-05, + "loss": 13.3127, + "step": 12595 + }, + { + "epoch": 0.525030219665708, + "grad_norm": 342.0, + "learning_rate": 4.837690624251788e-05, + "loss": 13.3127, + "step": 12596 + }, + { + "epoch": 0.5250719019632362, + "grad_norm": 220.0, + "learning_rate": 4.837015979831551e-05, + "loss": 7.9689, + "step": 12597 + }, + { + "epoch": 0.5251135842607645, + "grad_norm": 298.0, + "learning_rate": 4.836341338381714e-05, + "loss": 13.1255, + "step": 12598 + }, + { + "epoch": 0.5251552665582927, + "grad_norm": 312.0, + "learning_rate": 4.835666699914568e-05, + "loss": 14.1879, + "step": 12599 + }, + { + "epoch": 0.5251969488558209, + "grad_norm": 736.0, + "learning_rate": 4.834992064442414e-05, + "loss": 25.7504, + "step": 12600 + }, + { + "epoch": 0.5252386311533491, + "grad_norm": 452.0, + "learning_rate": 4.834317431977541e-05, + "loss": 16.001, + "step": 12601 + }, + { + "epoch": 0.5252803134508774, + "grad_norm": 203.0, + "learning_rate": 4.8336428025322514e-05, + "loss": 5.6252, + "step": 12602 + }, + { + "epoch": 0.5253219957484057, + "grad_norm": 113.0, + "learning_rate": 4.832968176118833e-05, + "loss": 6.2504, + "step": 12603 + }, + { + "epoch": 0.5253636780459339, + "grad_norm": 85.0, + "learning_rate": 4.832293552749587e-05, + "loss": 9.9391, + "step": 12604 + }, + { + "epoch": 0.5254053603434621, + "grad_norm": 344.0, + "learning_rate": 4.8316189324368035e-05, + "loss": 12.5651, + "step": 12605 + }, + { + "epoch": 0.5254470426409904, + "grad_norm": 264.0, + "learning_rate": 4.830944315192783e-05, + "loss": 12.9384, + "step": 12606 + }, + { + "epoch": 0.5254887249385186, + "grad_norm": 588.0, + "learning_rate": 4.830269701029814e-05, + "loss": 17.8755, + "step": 12607 + }, + { + "epoch": 0.5255304072360468, + "grad_norm": 272.0, + "learning_rate": 4.829595089960196e-05, + "loss": 12.7502, + "step": 12608 + }, + { + "epoch": 0.525572089533575, + "grad_norm": 414.0, + "learning_rate": 4.828920481996221e-05, + "loss": 15.6253, + "step": 12609 + }, + { + "epoch": 0.5256137718311034, + "grad_norm": 160.0, + "learning_rate": 4.828245877150189e-05, + "loss": 10.7503, + "step": 12610 + }, + { + "epoch": 0.5256554541286316, + "grad_norm": 516.0, + "learning_rate": 4.827571275434388e-05, + "loss": 17.5002, + "step": 12611 + }, + { + "epoch": 0.5256971364261598, + "grad_norm": 1848.0, + "learning_rate": 4.826896676861118e-05, + "loss": 34.7504, + "step": 12612 + }, + { + "epoch": 0.525738818723688, + "grad_norm": 308.0, + "learning_rate": 4.82622208144267e-05, + "loss": 13.6258, + "step": 12613 + }, + { + "epoch": 0.5257805010212163, + "grad_norm": 452.0, + "learning_rate": 4.825547489191342e-05, + "loss": 16.2504, + "step": 12614 + }, + { + "epoch": 0.5258221833187445, + "grad_norm": 114.5, + "learning_rate": 4.824872900119424e-05, + "loss": 9.7508, + "step": 12615 + }, + { + "epoch": 0.5258638656162727, + "grad_norm": 564.0, + "learning_rate": 4.824198314239215e-05, + "loss": 17.254, + "step": 12616 + }, + { + "epoch": 0.5259055479138011, + "grad_norm": 94.0, + "learning_rate": 4.823523731563005e-05, + "loss": 8.5629, + "step": 12617 + }, + { + "epoch": 0.5259472302113293, + "grad_norm": 384.0, + "learning_rate": 4.822849152103094e-05, + "loss": 14.7534, + "step": 12618 + }, + { + "epoch": 0.5259889125088575, + "grad_norm": 280.0, + "learning_rate": 4.8221745758717697e-05, + "loss": 13.3752, + "step": 12619 + }, + { + "epoch": 0.5260305948063857, + "grad_norm": 484.0, + "learning_rate": 4.821500002881331e-05, + "loss": 17.001, + "step": 12620 + }, + { + "epoch": 0.526072277103914, + "grad_norm": 386.0, + "learning_rate": 4.82082543314407e-05, + "loss": 14.5642, + "step": 12621 + }, + { + "epoch": 0.5261139594014422, + "grad_norm": 960.0, + "learning_rate": 4.820150866672282e-05, + "loss": 23.0004, + "step": 12622 + }, + { + "epoch": 0.5261556416989704, + "grad_norm": 644.0, + "learning_rate": 4.8194763034782595e-05, + "loss": 23.0018, + "step": 12623 + }, + { + "epoch": 0.5261973239964987, + "grad_norm": 162.0, + "learning_rate": 4.818801743574299e-05, + "loss": 10.0629, + "step": 12624 + }, + { + "epoch": 0.526239006294027, + "grad_norm": 173.0, + "learning_rate": 4.8181271869726904e-05, + "loss": 9.8134, + "step": 12625 + }, + { + "epoch": 0.5262806885915552, + "grad_norm": 684.0, + "learning_rate": 4.817452633685733e-05, + "loss": 21.6255, + "step": 12626 + }, + { + "epoch": 0.5263223708890834, + "grad_norm": 219.0, + "learning_rate": 4.8167780837257145e-05, + "loss": 11.3755, + "step": 12627 + }, + { + "epoch": 0.5263640531866116, + "grad_norm": 464.0, + "learning_rate": 4.8161035371049346e-05, + "loss": 15.8133, + "step": 12628 + }, + { + "epoch": 0.5264057354841399, + "grad_norm": 260.0, + "learning_rate": 4.815428993835682e-05, + "loss": 13.0005, + "step": 12629 + }, + { + "epoch": 0.5264474177816681, + "grad_norm": 356.0, + "learning_rate": 4.814754453930254e-05, + "loss": 14.3129, + "step": 12630 + }, + { + "epoch": 0.5264891000791964, + "grad_norm": 330.0, + "learning_rate": 4.81407991740094e-05, + "loss": 10.0653, + "step": 12631 + }, + { + "epoch": 0.5265307823767246, + "grad_norm": 300.0, + "learning_rate": 4.813405384260038e-05, + "loss": 14.5005, + "step": 12632 + }, + { + "epoch": 0.5265724646742529, + "grad_norm": 366.0, + "learning_rate": 4.812730854519839e-05, + "loss": 14.4381, + "step": 12633 + }, + { + "epoch": 0.5266141469717811, + "grad_norm": 442.0, + "learning_rate": 4.8120563281926365e-05, + "loss": 16.1253, + "step": 12634 + }, + { + "epoch": 0.5266558292693093, + "grad_norm": 1472.0, + "learning_rate": 4.811381805290724e-05, + "loss": 30.1292, + "step": 12635 + }, + { + "epoch": 0.5266975115668375, + "grad_norm": 221.0, + "learning_rate": 4.810707285826396e-05, + "loss": 9.1261, + "step": 12636 + }, + { + "epoch": 0.5267391938643659, + "grad_norm": 588.0, + "learning_rate": 4.810032769811943e-05, + "loss": 19.6254, + "step": 12637 + }, + { + "epoch": 0.5267808761618941, + "grad_norm": 322.0, + "learning_rate": 4.809358257259661e-05, + "loss": 14.1258, + "step": 12638 + }, + { + "epoch": 0.5268225584594223, + "grad_norm": 201.0, + "learning_rate": 4.8086837481818405e-05, + "loss": 11.1877, + "step": 12639 + }, + { + "epoch": 0.5268642407569505, + "grad_norm": 544.0, + "learning_rate": 4.8080092425907775e-05, + "loss": 17.8754, + "step": 12640 + }, + { + "epoch": 0.5269059230544788, + "grad_norm": 294.0, + "learning_rate": 4.807334740498761e-05, + "loss": 10.1252, + "step": 12641 + }, + { + "epoch": 0.526947605352007, + "grad_norm": 248.0, + "learning_rate": 4.806660241918088e-05, + "loss": 11.5628, + "step": 12642 + }, + { + "epoch": 0.5269892876495352, + "grad_norm": 408.0, + "learning_rate": 4.805985746861047e-05, + "loss": 15.1255, + "step": 12643 + }, + { + "epoch": 0.5270309699470634, + "grad_norm": 30.375, + "learning_rate": 4.8053112553399335e-05, + "loss": 6.0951, + "step": 12644 + }, + { + "epoch": 0.5270726522445918, + "grad_norm": 93.0, + "learning_rate": 4.804636767367041e-05, + "loss": 8.938, + "step": 12645 + }, + { + "epoch": 0.52711433454212, + "grad_norm": 199.0, + "learning_rate": 4.8039622829546596e-05, + "loss": 10.7503, + "step": 12646 + }, + { + "epoch": 0.5271560168396482, + "grad_norm": 180.0, + "learning_rate": 4.8032878021150825e-05, + "loss": 11.0626, + "step": 12647 + }, + { + "epoch": 0.5271976991371764, + "grad_norm": 258.0, + "learning_rate": 4.802613324860605e-05, + "loss": 13.0627, + "step": 12648 + }, + { + "epoch": 0.5272393814347047, + "grad_norm": 324.0, + "learning_rate": 4.801938851203514e-05, + "loss": 13.6887, + "step": 12649 + }, + { + "epoch": 0.5272810637322329, + "grad_norm": 354.0, + "learning_rate": 4.8012643811561084e-05, + "loss": 15.2503, + "step": 12650 + }, + { + "epoch": 0.5273227460297611, + "grad_norm": 266.0, + "learning_rate": 4.8005899147306746e-05, + "loss": 14.063, + "step": 12651 + }, + { + "epoch": 0.5273644283272894, + "grad_norm": 238.0, + "learning_rate": 4.799915451939509e-05, + "loss": 12.2506, + "step": 12652 + }, + { + "epoch": 0.5274061106248177, + "grad_norm": 464.0, + "learning_rate": 4.7992409927949e-05, + "loss": 16.5002, + "step": 12653 + }, + { + "epoch": 0.5274477929223459, + "grad_norm": 310.0, + "learning_rate": 4.798566537309144e-05, + "loss": 13.5001, + "step": 12654 + }, + { + "epoch": 0.5274894752198741, + "grad_norm": 604.0, + "learning_rate": 4.797892085494529e-05, + "loss": 19.7507, + "step": 12655 + }, + { + "epoch": 0.5275311575174023, + "grad_norm": 336.0, + "learning_rate": 4.79721763736335e-05, + "loss": 14.0004, + "step": 12656 + }, + { + "epoch": 0.5275728398149306, + "grad_norm": 392.0, + "learning_rate": 4.796543192927896e-05, + "loss": 15.3753, + "step": 12657 + }, + { + "epoch": 0.5276145221124589, + "grad_norm": 230.0, + "learning_rate": 4.7958687522004616e-05, + "loss": 12.251, + "step": 12658 + }, + { + "epoch": 0.5276562044099871, + "grad_norm": 800.0, + "learning_rate": 4.7951943151933365e-05, + "loss": 22.3753, + "step": 12659 + }, + { + "epoch": 0.5276978867075153, + "grad_norm": 560.0, + "learning_rate": 4.794519881918814e-05, + "loss": 17.6259, + "step": 12660 + }, + { + "epoch": 0.5277395690050436, + "grad_norm": 356.0, + "learning_rate": 4.793845452389183e-05, + "loss": 13.6879, + "step": 12661 + }, + { + "epoch": 0.5277812513025718, + "grad_norm": 130.0, + "learning_rate": 4.79317102661674e-05, + "loss": 6.6252, + "step": 12662 + }, + { + "epoch": 0.5278229336001, + "grad_norm": 620.0, + "learning_rate": 4.792496604613771e-05, + "loss": 17.7504, + "step": 12663 + }, + { + "epoch": 0.5278646158976282, + "grad_norm": 436.0, + "learning_rate": 4.7918221863925714e-05, + "loss": 15.8132, + "step": 12664 + }, + { + "epoch": 0.5279062981951566, + "grad_norm": 368.0, + "learning_rate": 4.79114777196543e-05, + "loss": 15.0007, + "step": 12665 + }, + { + "epoch": 0.5279479804926848, + "grad_norm": 340.0, + "learning_rate": 4.79047336134464e-05, + "loss": 14.5628, + "step": 12666 + }, + { + "epoch": 0.527989662790213, + "grad_norm": 160.0, + "learning_rate": 4.7897989545424895e-05, + "loss": 8.9379, + "step": 12667 + }, + { + "epoch": 0.5280313450877412, + "grad_norm": 308.0, + "learning_rate": 4.789124551571275e-05, + "loss": 13.3752, + "step": 12668 + }, + { + "epoch": 0.5280730273852695, + "grad_norm": 296.0, + "learning_rate": 4.788450152443281e-05, + "loss": 12.8753, + "step": 12669 + }, + { + "epoch": 0.5281147096827977, + "grad_norm": 352.0, + "learning_rate": 4.7877757571708025e-05, + "loss": 14.2507, + "step": 12670 + }, + { + "epoch": 0.5281563919803259, + "grad_norm": 382.0, + "learning_rate": 4.787101365766131e-05, + "loss": 14.1252, + "step": 12671 + }, + { + "epoch": 0.5281980742778541, + "grad_norm": 426.0, + "learning_rate": 4.786426978241555e-05, + "loss": 17.3754, + "step": 12672 + }, + { + "epoch": 0.5282397565753825, + "grad_norm": 278.0, + "learning_rate": 4.785752594609365e-05, + "loss": 13.3128, + "step": 12673 + }, + { + "epoch": 0.5282814388729107, + "grad_norm": 434.0, + "learning_rate": 4.7850782148818556e-05, + "loss": 16.2502, + "step": 12674 + }, + { + "epoch": 0.5283231211704389, + "grad_norm": 952.0, + "learning_rate": 4.784403839071313e-05, + "loss": 24.2511, + "step": 12675 + }, + { + "epoch": 0.5283648034679671, + "grad_norm": 124.5, + "learning_rate": 4.7837294671900314e-05, + "loss": 10.7503, + "step": 12676 + }, + { + "epoch": 0.5284064857654954, + "grad_norm": 240.0, + "learning_rate": 4.783055099250297e-05, + "loss": 12.6878, + "step": 12677 + }, + { + "epoch": 0.5284481680630236, + "grad_norm": 302.0, + "learning_rate": 4.782380735264405e-05, + "loss": 13.6251, + "step": 12678 + }, + { + "epoch": 0.5284898503605518, + "grad_norm": 148.0, + "learning_rate": 4.781706375244642e-05, + "loss": 9.9381, + "step": 12679 + }, + { + "epoch": 0.5285315326580801, + "grad_norm": 133.0, + "learning_rate": 4.781032019203301e-05, + "loss": 11.313, + "step": 12680 + }, + { + "epoch": 0.5285732149556084, + "grad_norm": 360.0, + "learning_rate": 4.780357667152669e-05, + "loss": 14.5628, + "step": 12681 + }, + { + "epoch": 0.5286148972531366, + "grad_norm": 124.0, + "learning_rate": 4.779683319105039e-05, + "loss": 8.9378, + "step": 12682 + }, + { + "epoch": 0.5286565795506648, + "grad_norm": 102.5, + "learning_rate": 4.7790089750727e-05, + "loss": 10.1257, + "step": 12683 + }, + { + "epoch": 0.528698261848193, + "grad_norm": 466.0, + "learning_rate": 4.778334635067942e-05, + "loss": 16.6253, + "step": 12684 + }, + { + "epoch": 0.5287399441457213, + "grad_norm": 564.0, + "learning_rate": 4.7776602991030545e-05, + "loss": 17.5031, + "step": 12685 + }, + { + "epoch": 0.5287816264432496, + "grad_norm": 544.0, + "learning_rate": 4.7769859671903294e-05, + "loss": 17.6253, + "step": 12686 + }, + { + "epoch": 0.5288233087407778, + "grad_norm": 308.0, + "learning_rate": 4.7763116393420526e-05, + "loss": 13.1877, + "step": 12687 + }, + { + "epoch": 0.528864991038306, + "grad_norm": 326.0, + "learning_rate": 4.775637315570519e-05, + "loss": 13.2503, + "step": 12688 + }, + { + "epoch": 0.5289066733358343, + "grad_norm": 402.0, + "learning_rate": 4.774962995888012e-05, + "loss": 14.6877, + "step": 12689 + }, + { + "epoch": 0.5289483556333625, + "grad_norm": 420.0, + "learning_rate": 4.774288680306827e-05, + "loss": 15.8127, + "step": 12690 + }, + { + "epoch": 0.5289900379308907, + "grad_norm": 446.0, + "learning_rate": 4.773614368839249e-05, + "loss": 15.5004, + "step": 12691 + }, + { + "epoch": 0.529031720228419, + "grad_norm": 396.0, + "learning_rate": 4.7729400614975706e-05, + "loss": 14.5019, + "step": 12692 + }, + { + "epoch": 0.5290734025259473, + "grad_norm": 368.0, + "learning_rate": 4.772265758294078e-05, + "loss": 15.1256, + "step": 12693 + }, + { + "epoch": 0.5291150848234755, + "grad_norm": 91.5, + "learning_rate": 4.771591459241064e-05, + "loss": 10.4379, + "step": 12694 + }, + { + "epoch": 0.5291567671210037, + "grad_norm": 219.0, + "learning_rate": 4.770917164350814e-05, + "loss": 11.6879, + "step": 12695 + }, + { + "epoch": 0.529198449418532, + "grad_norm": 408.0, + "learning_rate": 4.770242873635621e-05, + "loss": 15.0628, + "step": 12696 + }, + { + "epoch": 0.5292401317160602, + "grad_norm": 47.25, + "learning_rate": 4.76956858710777e-05, + "loss": 7.2814, + "step": 12697 + }, + { + "epoch": 0.5292818140135884, + "grad_norm": 370.0, + "learning_rate": 4.768894304779554e-05, + "loss": 14.688, + "step": 12698 + }, + { + "epoch": 0.5293234963111166, + "grad_norm": 316.0, + "learning_rate": 4.7682200266632584e-05, + "loss": 14.0629, + "step": 12699 + }, + { + "epoch": 0.529365178608645, + "grad_norm": 306.0, + "learning_rate": 4.767545752771175e-05, + "loss": 12.5632, + "step": 12700 + }, + { + "epoch": 0.5294068609061732, + "grad_norm": 366.0, + "learning_rate": 4.766871483115589e-05, + "loss": 15.2503, + "step": 12701 + }, + { + "epoch": 0.5294485432037014, + "grad_norm": 56.25, + "learning_rate": 4.766197217708793e-05, + "loss": 8.6251, + "step": 12702 + }, + { + "epoch": 0.5294902255012296, + "grad_norm": 352.0, + "learning_rate": 4.765522956563071e-05, + "loss": 14.9389, + "step": 12703 + }, + { + "epoch": 0.5295319077987579, + "grad_norm": 223.0, + "learning_rate": 4.764848699690716e-05, + "loss": 11.8128, + "step": 12704 + }, + { + "epoch": 0.5295735900962861, + "grad_norm": 456.0, + "learning_rate": 4.764174447104012e-05, + "loss": 16.2503, + "step": 12705 + }, + { + "epoch": 0.5296152723938143, + "grad_norm": 186.0, + "learning_rate": 4.763500198815253e-05, + "loss": 11.0628, + "step": 12706 + }, + { + "epoch": 0.5296569546913426, + "grad_norm": 203.0, + "learning_rate": 4.7628259548367206e-05, + "loss": 10.6252, + "step": 12707 + }, + { + "epoch": 0.5296986369888709, + "grad_norm": 144.0, + "learning_rate": 4.762151715180708e-05, + "loss": 10.0009, + "step": 12708 + }, + { + "epoch": 0.5297403192863991, + "grad_norm": 704.0, + "learning_rate": 4.761477479859501e-05, + "loss": 21.8757, + "step": 12709 + }, + { + "epoch": 0.5297820015839273, + "grad_norm": 348.0, + "learning_rate": 4.760803248885388e-05, + "loss": 14.3127, + "step": 12710 + }, + { + "epoch": 0.5298236838814555, + "grad_norm": 96.0, + "learning_rate": 4.7601290222706565e-05, + "loss": 9.9378, + "step": 12711 + }, + { + "epoch": 0.5298653661789838, + "grad_norm": 368.0, + "learning_rate": 4.759454800027597e-05, + "loss": 15.3756, + "step": 12712 + }, + { + "epoch": 0.529907048476512, + "grad_norm": 344.0, + "learning_rate": 4.758780582168492e-05, + "loss": 15.1255, + "step": 12713 + }, + { + "epoch": 0.5299487307740403, + "grad_norm": 928.0, + "learning_rate": 4.758106368705635e-05, + "loss": 27.626, + "step": 12714 + }, + { + "epoch": 0.5299904130715685, + "grad_norm": 278.0, + "learning_rate": 4.7574321596513094e-05, + "loss": 12.3128, + "step": 12715 + }, + { + "epoch": 0.5300320953690968, + "grad_norm": 300.0, + "learning_rate": 4.756757955017806e-05, + "loss": 12.6881, + "step": 12716 + }, + { + "epoch": 0.530073777666625, + "grad_norm": 348.0, + "learning_rate": 4.7560837548174084e-05, + "loss": 15.5005, + "step": 12717 + }, + { + "epoch": 0.5301154599641532, + "grad_norm": 648.0, + "learning_rate": 4.755409559062408e-05, + "loss": 18.8752, + "step": 12718 + }, + { + "epoch": 0.5301571422616814, + "grad_norm": 221.0, + "learning_rate": 4.754735367765088e-05, + "loss": 12.0005, + "step": 12719 + }, + { + "epoch": 0.5301988245592097, + "grad_norm": 344.0, + "learning_rate": 4.7540611809377386e-05, + "loss": 13.6877, + "step": 12720 + }, + { + "epoch": 0.530240506856738, + "grad_norm": 1672.0, + "learning_rate": 4.753386998592646e-05, + "loss": 39.5004, + "step": 12721 + }, + { + "epoch": 0.5302821891542662, + "grad_norm": 490.0, + "learning_rate": 4.752712820742097e-05, + "loss": 16.7501, + "step": 12722 + }, + { + "epoch": 0.5303238714517944, + "grad_norm": 536.0, + "learning_rate": 4.752038647398378e-05, + "loss": 16.7503, + "step": 12723 + }, + { + "epoch": 0.5303655537493227, + "grad_norm": 191.0, + "learning_rate": 4.751364478573779e-05, + "loss": 11.0002, + "step": 12724 + }, + { + "epoch": 0.5304072360468509, + "grad_norm": 596.0, + "learning_rate": 4.7506903142805824e-05, + "loss": 19.6252, + "step": 12725 + }, + { + "epoch": 0.5304489183443791, + "grad_norm": 74.0, + "learning_rate": 4.750016154531079e-05, + "loss": 8.2502, + "step": 12726 + }, + { + "epoch": 0.5304906006419073, + "grad_norm": 528.0, + "learning_rate": 4.749341999337551e-05, + "loss": 16.8753, + "step": 12727 + }, + { + "epoch": 0.5305322829394357, + "grad_norm": 288.0, + "learning_rate": 4.74866784871229e-05, + "loss": 12.1877, + "step": 12728 + }, + { + "epoch": 0.5305739652369639, + "grad_norm": 700.0, + "learning_rate": 4.747993702667577e-05, + "loss": 19.5005, + "step": 12729 + }, + { + "epoch": 0.5306156475344921, + "grad_norm": 374.0, + "learning_rate": 4.747319561215703e-05, + "loss": 13.0627, + "step": 12730 + }, + { + "epoch": 0.5306573298320203, + "grad_norm": 608.0, + "learning_rate": 4.746645424368951e-05, + "loss": 17.6253, + "step": 12731 + }, + { + "epoch": 0.5306990121295486, + "grad_norm": 1056.0, + "learning_rate": 4.745971292139609e-05, + "loss": 26.8764, + "step": 12732 + }, + { + "epoch": 0.5307406944270768, + "grad_norm": 512.0, + "learning_rate": 4.7452971645399636e-05, + "loss": 16.1253, + "step": 12733 + }, + { + "epoch": 0.530782376724605, + "grad_norm": 274.0, + "learning_rate": 4.7446230415823e-05, + "loss": 13.6877, + "step": 12734 + }, + { + "epoch": 0.5308240590221333, + "grad_norm": 302.0, + "learning_rate": 4.743948923278902e-05, + "loss": 13.1885, + "step": 12735 + }, + { + "epoch": 0.5308657413196616, + "grad_norm": 362.0, + "learning_rate": 4.743274809642061e-05, + "loss": 14.0011, + "step": 12736 + }, + { + "epoch": 0.5309074236171898, + "grad_norm": 350.0, + "learning_rate": 4.7426007006840566e-05, + "loss": 12.5005, + "step": 12737 + }, + { + "epoch": 0.530949105914718, + "grad_norm": 59.5, + "learning_rate": 4.7419265964171796e-05, + "loss": 8.3129, + "step": 12738 + }, + { + "epoch": 0.5309907882122462, + "grad_norm": 107.0, + "learning_rate": 4.7412524968537106e-05, + "loss": 10.4379, + "step": 12739 + }, + { + "epoch": 0.5310324705097745, + "grad_norm": 302.0, + "learning_rate": 4.740578402005941e-05, + "loss": 11.5628, + "step": 12740 + }, + { + "epoch": 0.5310741528073027, + "grad_norm": 146.0, + "learning_rate": 4.73990431188615e-05, + "loss": 10.2505, + "step": 12741 + }, + { + "epoch": 0.531115835104831, + "grad_norm": 292.0, + "learning_rate": 4.7392302265066285e-05, + "loss": 12.0003, + "step": 12742 + }, + { + "epoch": 0.5311575174023592, + "grad_norm": 560.0, + "learning_rate": 4.738556145879657e-05, + "loss": 18.3753, + "step": 12743 + }, + { + "epoch": 0.5311991996998875, + "grad_norm": 165.0, + "learning_rate": 4.737882070017525e-05, + "loss": 9.5003, + "step": 12744 + }, + { + "epoch": 0.5312408819974157, + "grad_norm": 142.0, + "learning_rate": 4.7372079989325134e-05, + "loss": 11.0002, + "step": 12745 + }, + { + "epoch": 0.5312825642949439, + "grad_norm": 276.0, + "learning_rate": 4.7365339326369105e-05, + "loss": 10.2502, + "step": 12746 + }, + { + "epoch": 0.5313242465924721, + "grad_norm": 584.0, + "learning_rate": 4.7358598711430005e-05, + "loss": 19.3753, + "step": 12747 + }, + { + "epoch": 0.5313659288900004, + "grad_norm": 952.0, + "learning_rate": 4.7351858144630674e-05, + "loss": 28.5003, + "step": 12748 + }, + { + "epoch": 0.5314076111875287, + "grad_norm": 188.0, + "learning_rate": 4.734511762609395e-05, + "loss": 10.7502, + "step": 12749 + }, + { + "epoch": 0.5314492934850569, + "grad_norm": 102.0, + "learning_rate": 4.733837715594272e-05, + "loss": 4.6876, + "step": 12750 + }, + { + "epoch": 0.5314909757825851, + "grad_norm": 536.0, + "learning_rate": 4.733163673429978e-05, + "loss": 18.3763, + "step": 12751 + }, + { + "epoch": 0.5315326580801134, + "grad_norm": 109.0, + "learning_rate": 4.732489636128802e-05, + "loss": 9.7508, + "step": 12752 + }, + { + "epoch": 0.5315743403776416, + "grad_norm": 219.0, + "learning_rate": 4.731815603703024e-05, + "loss": 11.6877, + "step": 12753 + }, + { + "epoch": 0.5316160226751698, + "grad_norm": 83.0, + "learning_rate": 4.731141576164932e-05, + "loss": 7.3757, + "step": 12754 + }, + { + "epoch": 0.531657704972698, + "grad_norm": 193.0, + "learning_rate": 4.7304675535268064e-05, + "loss": 12.0006, + "step": 12755 + }, + { + "epoch": 0.5316993872702264, + "grad_norm": 175.0, + "learning_rate": 4.729793535800937e-05, + "loss": 11.4378, + "step": 12756 + }, + { + "epoch": 0.5317410695677546, + "grad_norm": 146.0, + "learning_rate": 4.729119522999601e-05, + "loss": 10.1254, + "step": 12757 + }, + { + "epoch": 0.5317827518652828, + "grad_norm": 228.0, + "learning_rate": 4.728445515135087e-05, + "loss": 12.0628, + "step": 12758 + }, + { + "epoch": 0.531824434162811, + "grad_norm": 145.0, + "learning_rate": 4.727771512219677e-05, + "loss": 8.8127, + "step": 12759 + }, + { + "epoch": 0.5318661164603393, + "grad_norm": 330.0, + "learning_rate": 4.727097514265657e-05, + "loss": 12.8138, + "step": 12760 + }, + { + "epoch": 0.5319077987578675, + "grad_norm": 308.0, + "learning_rate": 4.726423521285307e-05, + "loss": 14.6265, + "step": 12761 + }, + { + "epoch": 0.5319494810553957, + "grad_norm": 219.0, + "learning_rate": 4.7257495332909155e-05, + "loss": 11.9378, + "step": 12762 + }, + { + "epoch": 0.5319911633529241, + "grad_norm": 496.0, + "learning_rate": 4.72507555029476e-05, + "loss": 17.8753, + "step": 12763 + }, + { + "epoch": 0.5320328456504523, + "grad_norm": 362.0, + "learning_rate": 4.724401572309129e-05, + "loss": 13.1251, + "step": 12764 + }, + { + "epoch": 0.5320745279479805, + "grad_norm": 109.0, + "learning_rate": 4.7237275993463023e-05, + "loss": 9.3145, + "step": 12765 + }, + { + "epoch": 0.5321162102455087, + "grad_norm": 247.0, + "learning_rate": 4.723053631418566e-05, + "loss": 12.7506, + "step": 12766 + }, + { + "epoch": 0.532157892543037, + "grad_norm": 488.0, + "learning_rate": 4.722379668538201e-05, + "loss": 15.5003, + "step": 12767 + }, + { + "epoch": 0.5321995748405652, + "grad_norm": 470.0, + "learning_rate": 4.7217057107174924e-05, + "loss": 16.2503, + "step": 12768 + }, + { + "epoch": 0.5322412571380934, + "grad_norm": 672.0, + "learning_rate": 4.72103175796872e-05, + "loss": 21.2502, + "step": 12769 + }, + { + "epoch": 0.5322829394356217, + "grad_norm": 604.0, + "learning_rate": 4.7203578103041697e-05, + "loss": 21.3754, + "step": 12770 + }, + { + "epoch": 0.53232462173315, + "grad_norm": 225.0, + "learning_rate": 4.7196838677361236e-05, + "loss": 10.6253, + "step": 12771 + }, + { + "epoch": 0.5323663040306782, + "grad_norm": 69.5, + "learning_rate": 4.719009930276863e-05, + "loss": 8.5003, + "step": 12772 + }, + { + "epoch": 0.5324079863282064, + "grad_norm": 255.0, + "learning_rate": 4.7183359979386705e-05, + "loss": 12.7502, + "step": 12773 + }, + { + "epoch": 0.5324496686257346, + "grad_norm": 272.0, + "learning_rate": 4.717662070733832e-05, + "loss": 11.003, + "step": 12774 + }, + { + "epoch": 0.5324913509232629, + "grad_norm": 1048.0, + "learning_rate": 4.716988148674625e-05, + "loss": 24.3789, + "step": 12775 + }, + { + "epoch": 0.5325330332207912, + "grad_norm": 324.0, + "learning_rate": 4.716314231773336e-05, + "loss": 13.9377, + "step": 12776 + }, + { + "epoch": 0.5325747155183194, + "grad_norm": 175.0, + "learning_rate": 4.715640320042243e-05, + "loss": 10.1879, + "step": 12777 + }, + { + "epoch": 0.5326163978158476, + "grad_norm": 156.0, + "learning_rate": 4.7149664134936335e-05, + "loss": 9.0003, + "step": 12778 + }, + { + "epoch": 0.5326580801133759, + "grad_norm": 352.0, + "learning_rate": 4.714292512139783e-05, + "loss": 14.1255, + "step": 12779 + }, + { + "epoch": 0.5326997624109041, + "grad_norm": 454.0, + "learning_rate": 4.71361861599298e-05, + "loss": 17.1253, + "step": 12780 + }, + { + "epoch": 0.5327414447084323, + "grad_norm": 324.0, + "learning_rate": 4.7129447250655004e-05, + "loss": 13.7504, + "step": 12781 + }, + { + "epoch": 0.5327831270059605, + "grad_norm": 124.5, + "learning_rate": 4.712270839369632e-05, + "loss": 6.5324, + "step": 12782 + }, + { + "epoch": 0.5328248093034889, + "grad_norm": 2128.0, + "learning_rate": 4.71159695891765e-05, + "loss": 42.0002, + "step": 12783 + }, + { + "epoch": 0.5328664916010171, + "grad_norm": 414.0, + "learning_rate": 4.71092308372184e-05, + "loss": 14.9378, + "step": 12784 + }, + { + "epoch": 0.5329081738985453, + "grad_norm": 310.0, + "learning_rate": 4.710249213794483e-05, + "loss": 12.4378, + "step": 12785 + }, + { + "epoch": 0.5329498561960735, + "grad_norm": 356.0, + "learning_rate": 4.709575349147859e-05, + "loss": 15.3129, + "step": 12786 + }, + { + "epoch": 0.5329915384936018, + "grad_norm": 116.5, + "learning_rate": 4.708901489794249e-05, + "loss": 8.2506, + "step": 12787 + }, + { + "epoch": 0.53303322079113, + "grad_norm": 628.0, + "learning_rate": 4.708227635745938e-05, + "loss": 20.7502, + "step": 12788 + }, + { + "epoch": 0.5330749030886582, + "grad_norm": 266.0, + "learning_rate": 4.7075537870152015e-05, + "loss": 13.3752, + "step": 12789 + }, + { + "epoch": 0.5331165853861864, + "grad_norm": 154.0, + "learning_rate": 4.706879943614326e-05, + "loss": 8.938, + "step": 12790 + }, + { + "epoch": 0.5331582676837148, + "grad_norm": 100.5, + "learning_rate": 4.7062061055555866e-05, + "loss": 9.2502, + "step": 12791 + }, + { + "epoch": 0.533199949981243, + "grad_norm": 484.0, + "learning_rate": 4.70553227285127e-05, + "loss": 17.1253, + "step": 12792 + }, + { + "epoch": 0.5332416322787712, + "grad_norm": 952.0, + "learning_rate": 4.704858445513651e-05, + "loss": 25.0015, + "step": 12793 + }, + { + "epoch": 0.5332833145762994, + "grad_norm": 204.0, + "learning_rate": 4.704184623555016e-05, + "loss": 12.188, + "step": 12794 + }, + { + "epoch": 0.5333249968738277, + "grad_norm": 176.0, + "learning_rate": 4.703510806987639e-05, + "loss": 11.5638, + "step": 12795 + }, + { + "epoch": 0.5333666791713559, + "grad_norm": 390.0, + "learning_rate": 4.702836995823806e-05, + "loss": 15.1877, + "step": 12796 + }, + { + "epoch": 0.5334083614688842, + "grad_norm": 548.0, + "learning_rate": 4.702163190075795e-05, + "loss": 16.1257, + "step": 12797 + }, + { + "epoch": 0.5334500437664124, + "grad_norm": 390.0, + "learning_rate": 4.7014893897558866e-05, + "loss": 15.1261, + "step": 12798 + }, + { + "epoch": 0.5334917260639407, + "grad_norm": 442.0, + "learning_rate": 4.700815594876359e-05, + "loss": 12.8755, + "step": 12799 + }, + { + "epoch": 0.5335334083614689, + "grad_norm": 108.0, + "learning_rate": 4.700141805449496e-05, + "loss": 8.1252, + "step": 12800 + }, + { + "epoch": 0.5335750906589971, + "grad_norm": 151.0, + "learning_rate": 4.699468021487573e-05, + "loss": 10.563, + "step": 12801 + }, + { + "epoch": 0.5336167729565253, + "grad_norm": 540.0, + "learning_rate": 4.698794243002874e-05, + "loss": 17.2502, + "step": 12802 + }, + { + "epoch": 0.5336584552540536, + "grad_norm": 154.0, + "learning_rate": 4.698120470007675e-05, + "loss": 9.4381, + "step": 12803 + }, + { + "epoch": 0.5337001375515819, + "grad_norm": 524.0, + "learning_rate": 4.6974467025142586e-05, + "loss": 16.8752, + "step": 12804 + }, + { + "epoch": 0.5337418198491101, + "grad_norm": 178.0, + "learning_rate": 4.696772940534901e-05, + "loss": 10.8767, + "step": 12805 + }, + { + "epoch": 0.5337835021466383, + "grad_norm": 552.0, + "learning_rate": 4.6960991840818865e-05, + "loss": 16.3753, + "step": 12806 + }, + { + "epoch": 0.5338251844441666, + "grad_norm": 111.5, + "learning_rate": 4.6954254331674886e-05, + "loss": 9.4377, + "step": 12807 + }, + { + "epoch": 0.5338668667416948, + "grad_norm": 756.0, + "learning_rate": 4.6947516878039904e-05, + "loss": 20.7502, + "step": 12808 + }, + { + "epoch": 0.533908549039223, + "grad_norm": 640.0, + "learning_rate": 4.69407794800367e-05, + "loss": 16.8777, + "step": 12809 + }, + { + "epoch": 0.5339502313367512, + "grad_norm": 328.0, + "learning_rate": 4.693404213778805e-05, + "loss": 14.0005, + "step": 12810 + }, + { + "epoch": 0.5339919136342796, + "grad_norm": 478.0, + "learning_rate": 4.6927304851416754e-05, + "loss": 17.3752, + "step": 12811 + }, + { + "epoch": 0.5340335959318078, + "grad_norm": 212.0, + "learning_rate": 4.6920567621045616e-05, + "loss": 12.0002, + "step": 12812 + }, + { + "epoch": 0.534075278229336, + "grad_norm": 168.0, + "learning_rate": 4.691383044679739e-05, + "loss": 10.8137, + "step": 12813 + }, + { + "epoch": 0.5341169605268642, + "grad_norm": 338.0, + "learning_rate": 4.69070933287949e-05, + "loss": 14.3752, + "step": 12814 + }, + { + "epoch": 0.5341586428243925, + "grad_norm": 163.0, + "learning_rate": 4.690035626716088e-05, + "loss": 9.626, + "step": 12815 + }, + { + "epoch": 0.5342003251219207, + "grad_norm": 107.5, + "learning_rate": 4.6893619262018177e-05, + "loss": 8.0005, + "step": 12816 + }, + { + "epoch": 0.5342420074194489, + "grad_norm": 604.0, + "learning_rate": 4.688688231348951e-05, + "loss": 18.2506, + "step": 12817 + }, + { + "epoch": 0.5342836897169772, + "grad_norm": 350.0, + "learning_rate": 4.68801454216977e-05, + "loss": 13.5003, + "step": 12818 + }, + { + "epoch": 0.5343253720145055, + "grad_norm": 362.0, + "learning_rate": 4.6873408586765506e-05, + "loss": 12.4399, + "step": 12819 + }, + { + "epoch": 0.5343670543120337, + "grad_norm": 832.0, + "learning_rate": 4.686667180881574e-05, + "loss": 22.5005, + "step": 12820 + }, + { + "epoch": 0.5344087366095619, + "grad_norm": 320.0, + "learning_rate": 4.685993508797114e-05, + "loss": 12.5638, + "step": 12821 + }, + { + "epoch": 0.5344504189070901, + "grad_norm": 298.0, + "learning_rate": 4.68531984243545e-05, + "loss": 13.0628, + "step": 12822 + }, + { + "epoch": 0.5344921012046184, + "grad_norm": 314.0, + "learning_rate": 4.684646181808859e-05, + "loss": 12.1275, + "step": 12823 + }, + { + "epoch": 0.5345337835021466, + "grad_norm": 290.0, + "learning_rate": 4.683972526929622e-05, + "loss": 13.0629, + "step": 12824 + }, + { + "epoch": 0.5345754657996749, + "grad_norm": 238.0, + "learning_rate": 4.6832988778100116e-05, + "loss": 11.6878, + "step": 12825 + }, + { + "epoch": 0.5346171480972031, + "grad_norm": 708.0, + "learning_rate": 4.68262523446231e-05, + "loss": 21.8754, + "step": 12826 + }, + { + "epoch": 0.5346588303947314, + "grad_norm": 780.0, + "learning_rate": 4.6819515968987886e-05, + "loss": 21.0004, + "step": 12827 + }, + { + "epoch": 0.5347005126922596, + "grad_norm": 47.5, + "learning_rate": 4.68127796513173e-05, + "loss": 6.7816, + "step": 12828 + }, + { + "epoch": 0.5347421949897878, + "grad_norm": 552.0, + "learning_rate": 4.6806043391734066e-05, + "loss": 17.0002, + "step": 12829 + }, + { + "epoch": 0.534783877287316, + "grad_norm": 390.0, + "learning_rate": 4.6799307190360995e-05, + "loss": 15.4386, + "step": 12830 + }, + { + "epoch": 0.5348255595848443, + "grad_norm": 420.0, + "learning_rate": 4.679257104732082e-05, + "loss": 15.8752, + "step": 12831 + }, + { + "epoch": 0.5348672418823726, + "grad_norm": 194.0, + "learning_rate": 4.6785834962736335e-05, + "loss": 11.0003, + "step": 12832 + }, + { + "epoch": 0.5349089241799008, + "grad_norm": 636.0, + "learning_rate": 4.6779098936730276e-05, + "loss": 22.1251, + "step": 12833 + }, + { + "epoch": 0.534950606477429, + "grad_norm": 388.0, + "learning_rate": 4.6772362969425436e-05, + "loss": 13.3164, + "step": 12834 + }, + { + "epoch": 0.5349922887749573, + "grad_norm": 296.0, + "learning_rate": 4.676562706094457e-05, + "loss": 13.6252, + "step": 12835 + }, + { + "epoch": 0.5350339710724855, + "grad_norm": 378.0, + "learning_rate": 4.675889121141043e-05, + "loss": 15.3128, + "step": 12836 + }, + { + "epoch": 0.5350756533700137, + "grad_norm": 406.0, + "learning_rate": 4.675215542094578e-05, + "loss": 14.876, + "step": 12837 + }, + { + "epoch": 0.535117335667542, + "grad_norm": 108.5, + "learning_rate": 4.674541968967341e-05, + "loss": 11.5005, + "step": 12838 + }, + { + "epoch": 0.5351590179650703, + "grad_norm": 272.0, + "learning_rate": 4.673868401771603e-05, + "loss": 11.126, + "step": 12839 + }, + { + "epoch": 0.5352007002625985, + "grad_norm": 149.0, + "learning_rate": 4.673194840519644e-05, + "loss": 9.6257, + "step": 12840 + }, + { + "epoch": 0.5352423825601267, + "grad_norm": 288.0, + "learning_rate": 4.6725212852237366e-05, + "loss": 12.1254, + "step": 12841 + }, + { + "epoch": 0.535284064857655, + "grad_norm": 227.0, + "learning_rate": 4.67184773589616e-05, + "loss": 12.1252, + "step": 12842 + }, + { + "epoch": 0.5353257471551832, + "grad_norm": 2008.0, + "learning_rate": 4.671174192549185e-05, + "loss": 43.0003, + "step": 12843 + }, + { + "epoch": 0.5353674294527114, + "grad_norm": 416.0, + "learning_rate": 4.670500655195091e-05, + "loss": 15.0643, + "step": 12844 + }, + { + "epoch": 0.5354091117502396, + "grad_norm": 512.0, + "learning_rate": 4.669827123846151e-05, + "loss": 16.7548, + "step": 12845 + }, + { + "epoch": 0.535450794047768, + "grad_norm": 344.0, + "learning_rate": 4.669153598514642e-05, + "loss": 15.001, + "step": 12846 + }, + { + "epoch": 0.5354924763452962, + "grad_norm": 420.0, + "learning_rate": 4.668480079212837e-05, + "loss": 16.8754, + "step": 12847 + }, + { + "epoch": 0.5355341586428244, + "grad_norm": 229.0, + "learning_rate": 4.667806565953013e-05, + "loss": 8.1894, + "step": 12848 + }, + { + "epoch": 0.5355758409403526, + "grad_norm": 320.0, + "learning_rate": 4.667133058747442e-05, + "loss": 12.1253, + "step": 12849 + }, + { + "epoch": 0.5356175232378809, + "grad_norm": 548.0, + "learning_rate": 4.666459557608403e-05, + "loss": 18.6251, + "step": 12850 + }, + { + "epoch": 0.5356592055354091, + "grad_norm": 112.5, + "learning_rate": 4.665786062548166e-05, + "loss": 5.7502, + "step": 12851 + }, + { + "epoch": 0.5357008878329373, + "grad_norm": 274.0, + "learning_rate": 4.6651125735790104e-05, + "loss": 12.938, + "step": 12852 + }, + { + "epoch": 0.5357425701304656, + "grad_norm": 668.0, + "learning_rate": 4.6644390907132045e-05, + "loss": 18.6273, + "step": 12853 + }, + { + "epoch": 0.5357842524279939, + "grad_norm": 310.0, + "learning_rate": 4.6637656139630293e-05, + "loss": 12.8754, + "step": 12854 + }, + { + "epoch": 0.5358259347255221, + "grad_norm": 274.0, + "learning_rate": 4.6630921433407527e-05, + "loss": 12.1252, + "step": 12855 + }, + { + "epoch": 0.5358676170230503, + "grad_norm": 215.0, + "learning_rate": 4.6624186788586544e-05, + "loss": 11.7501, + "step": 12856 + }, + { + "epoch": 0.5359092993205785, + "grad_norm": 474.0, + "learning_rate": 4.661745220529003e-05, + "loss": 17.0004, + "step": 12857 + }, + { + "epoch": 0.5359509816181068, + "grad_norm": 97.5, + "learning_rate": 4.661071768364076e-05, + "loss": 8.6252, + "step": 12858 + }, + { + "epoch": 0.535992663915635, + "grad_norm": 85.0, + "learning_rate": 4.660398322376146e-05, + "loss": 7.9076, + "step": 12859 + }, + { + "epoch": 0.5360343462131633, + "grad_norm": 330.0, + "learning_rate": 4.659724882577487e-05, + "loss": 13.8751, + "step": 12860 + }, + { + "epoch": 0.5360760285106915, + "grad_norm": 358.0, + "learning_rate": 4.659051448980372e-05, + "loss": 15.1253, + "step": 12861 + }, + { + "epoch": 0.5361177108082198, + "grad_norm": 130.0, + "learning_rate": 4.658378021597076e-05, + "loss": 10.0628, + "step": 12862 + }, + { + "epoch": 0.536159393105748, + "grad_norm": 628.0, + "learning_rate": 4.657704600439869e-05, + "loss": 20.7519, + "step": 12863 + }, + { + "epoch": 0.5362010754032762, + "grad_norm": 1360.0, + "learning_rate": 4.657031185521028e-05, + "loss": 25.5042, + "step": 12864 + }, + { + "epoch": 0.5362427577008044, + "grad_norm": 163.0, + "learning_rate": 4.656357776852822e-05, + "loss": 9.0627, + "step": 12865 + }, + { + "epoch": 0.5362844399983328, + "grad_norm": 175.0, + "learning_rate": 4.6556843744475274e-05, + "loss": 11.5629, + "step": 12866 + }, + { + "epoch": 0.536326122295861, + "grad_norm": 219.0, + "learning_rate": 4.655010978317414e-05, + "loss": 11.5631, + "step": 12867 + }, + { + "epoch": 0.5363678045933892, + "grad_norm": 438.0, + "learning_rate": 4.654337588474759e-05, + "loss": 16.001, + "step": 12868 + }, + { + "epoch": 0.5364094868909174, + "grad_norm": 960.0, + "learning_rate": 4.653664204931829e-05, + "loss": 21.1287, + "step": 12869 + }, + { + "epoch": 0.5364511691884457, + "grad_norm": 1528.0, + "learning_rate": 4.6529908277009025e-05, + "loss": 33.5006, + "step": 12870 + }, + { + "epoch": 0.5364928514859739, + "grad_norm": 75.5, + "learning_rate": 4.6523174567942467e-05, + "loss": 8.1879, + "step": 12871 + }, + { + "epoch": 0.5365345337835021, + "grad_norm": 664.0, + "learning_rate": 4.6516440922241365e-05, + "loss": 19.5006, + "step": 12872 + }, + { + "epoch": 0.5365762160810303, + "grad_norm": 628.0, + "learning_rate": 4.6509707340028446e-05, + "loss": 18.2503, + "step": 12873 + }, + { + "epoch": 0.5366178983785587, + "grad_norm": 239.0, + "learning_rate": 4.6502973821426414e-05, + "loss": 13.0002, + "step": 12874 + }, + { + "epoch": 0.5366595806760869, + "grad_norm": 556.0, + "learning_rate": 4.649624036655799e-05, + "loss": 16.2549, + "step": 12875 + }, + { + "epoch": 0.5367012629736151, + "grad_norm": 338.0, + "learning_rate": 4.648950697554591e-05, + "loss": 14.3129, + "step": 12876 + }, + { + "epoch": 0.5367429452711433, + "grad_norm": 536.0, + "learning_rate": 4.648277364851286e-05, + "loss": 18.2522, + "step": 12877 + }, + { + "epoch": 0.5367846275686716, + "grad_norm": 648.0, + "learning_rate": 4.647604038558159e-05, + "loss": 20.1274, + "step": 12878 + }, + { + "epoch": 0.5368263098661998, + "grad_norm": 684.0, + "learning_rate": 4.6469307186874786e-05, + "loss": 20.3752, + "step": 12879 + }, + { + "epoch": 0.536867992163728, + "grad_norm": 256.0, + "learning_rate": 4.646257405251518e-05, + "loss": 12.8128, + "step": 12880 + }, + { + "epoch": 0.5369096744612563, + "grad_norm": 532.0, + "learning_rate": 4.6455840982625466e-05, + "loss": 18.2502, + "step": 12881 + }, + { + "epoch": 0.5369513567587846, + "grad_norm": 145.0, + "learning_rate": 4.6449107977328374e-05, + "loss": 7.1877, + "step": 12882 + }, + { + "epoch": 0.5369930390563128, + "grad_norm": 185.0, + "learning_rate": 4.644237503674659e-05, + "loss": 10.6266, + "step": 12883 + }, + { + "epoch": 0.537034721353841, + "grad_norm": 251.0, + "learning_rate": 4.643564216100285e-05, + "loss": 11.9376, + "step": 12884 + }, + { + "epoch": 0.5370764036513692, + "grad_norm": 171.0, + "learning_rate": 4.642890935021984e-05, + "loss": 10.1878, + "step": 12885 + }, + { + "epoch": 0.5371180859488975, + "grad_norm": 496.0, + "learning_rate": 4.6422176604520284e-05, + "loss": 14.4384, + "step": 12886 + }, + { + "epoch": 0.5371597682464258, + "grad_norm": 356.0, + "learning_rate": 4.641544392402686e-05, + "loss": 14.6252, + "step": 12887 + }, + { + "epoch": 0.537201450543954, + "grad_norm": 612.0, + "learning_rate": 4.6408711308862316e-05, + "loss": 18.8757, + "step": 12888 + }, + { + "epoch": 0.5372431328414822, + "grad_norm": 238.0, + "learning_rate": 4.6401978759149295e-05, + "loss": 12.1876, + "step": 12889 + }, + { + "epoch": 0.5372848151390105, + "grad_norm": 292.0, + "learning_rate": 4.639524627501056e-05, + "loss": 13.8754, + "step": 12890 + }, + { + "epoch": 0.5373264974365387, + "grad_norm": 146.0, + "learning_rate": 4.638851385656876e-05, + "loss": 10.0631, + "step": 12891 + }, + { + "epoch": 0.5373681797340669, + "grad_norm": 136.0, + "learning_rate": 4.6381781503946635e-05, + "loss": 9.6877, + "step": 12892 + }, + { + "epoch": 0.5374098620315951, + "grad_norm": 125.5, + "learning_rate": 4.637504921726684e-05, + "loss": 6.4067, + "step": 12893 + }, + { + "epoch": 0.5374515443291235, + "grad_norm": 688.0, + "learning_rate": 4.636831699665212e-05, + "loss": 18.0013, + "step": 12894 + }, + { + "epoch": 0.5374932266266517, + "grad_norm": 340.0, + "learning_rate": 4.6361584842225124e-05, + "loss": 14.0003, + "step": 12895 + }, + { + "epoch": 0.5375349089241799, + "grad_norm": 322.0, + "learning_rate": 4.6354852754108575e-05, + "loss": 14.8752, + "step": 12896 + }, + { + "epoch": 0.5375765912217081, + "grad_norm": 272.0, + "learning_rate": 4.634812073242516e-05, + "loss": 12.3751, + "step": 12897 + }, + { + "epoch": 0.5376182735192364, + "grad_norm": 616.0, + "learning_rate": 4.634138877729757e-05, + "loss": 19.6251, + "step": 12898 + }, + { + "epoch": 0.5376599558167646, + "grad_norm": 162.0, + "learning_rate": 4.633465688884848e-05, + "loss": 10.3761, + "step": 12899 + }, + { + "epoch": 0.5377016381142928, + "grad_norm": 274.0, + "learning_rate": 4.6327925067200615e-05, + "loss": 11.0627, + "step": 12900 + }, + { + "epoch": 0.537743320411821, + "grad_norm": 366.0, + "learning_rate": 4.632119331247662e-05, + "loss": 15.6877, + "step": 12901 + }, + { + "epoch": 0.5377850027093494, + "grad_norm": 310.0, + "learning_rate": 4.6314461624799236e-05, + "loss": 14.1253, + "step": 12902 + }, + { + "epoch": 0.5378266850068776, + "grad_norm": 368.0, + "learning_rate": 4.6307730004291075e-05, + "loss": 13.9379, + "step": 12903 + }, + { + "epoch": 0.5378683673044058, + "grad_norm": 176.0, + "learning_rate": 4.63009984510749e-05, + "loss": 9.0647, + "step": 12904 + }, + { + "epoch": 0.537910049601934, + "grad_norm": 556.0, + "learning_rate": 4.629426696527333e-05, + "loss": 17.8758, + "step": 12905 + }, + { + "epoch": 0.5379517318994623, + "grad_norm": 152.0, + "learning_rate": 4.628753554700909e-05, + "loss": 10.1256, + "step": 12906 + }, + { + "epoch": 0.5379934141969905, + "grad_norm": 444.0, + "learning_rate": 4.628080419640483e-05, + "loss": 16.5005, + "step": 12907 + }, + { + "epoch": 0.5380350964945187, + "grad_norm": 968.0, + "learning_rate": 4.6274072913583263e-05, + "loss": 24.3752, + "step": 12908 + }, + { + "epoch": 0.5380767787920471, + "grad_norm": 326.0, + "learning_rate": 4.6267341698667024e-05, + "loss": 11.1253, + "step": 12909 + }, + { + "epoch": 0.5381184610895753, + "grad_norm": 294.0, + "learning_rate": 4.626061055177883e-05, + "loss": 13.7507, + "step": 12910 + }, + { + "epoch": 0.5381601433871035, + "grad_norm": 98.5, + "learning_rate": 4.625387947304132e-05, + "loss": 10.2511, + "step": 12911 + }, + { + "epoch": 0.5382018256846317, + "grad_norm": 352.0, + "learning_rate": 4.624714846257722e-05, + "loss": 15.0629, + "step": 12912 + }, + { + "epoch": 0.53824350798216, + "grad_norm": 160.0, + "learning_rate": 4.624041752050915e-05, + "loss": 9.0629, + "step": 12913 + }, + { + "epoch": 0.5382851902796882, + "grad_norm": 308.0, + "learning_rate": 4.623368664695982e-05, + "loss": 13.5628, + "step": 12914 + }, + { + "epoch": 0.5383268725772165, + "grad_norm": 450.0, + "learning_rate": 4.622695584205187e-05, + "loss": 16.3751, + "step": 12915 + }, + { + "epoch": 0.5383685548747447, + "grad_norm": 59.5, + "learning_rate": 4.6220225105908e-05, + "loss": 6.9377, + "step": 12916 + }, + { + "epoch": 0.538410237172273, + "grad_norm": 628.0, + "learning_rate": 4.621349443865085e-05, + "loss": 21.0005, + "step": 12917 + }, + { + "epoch": 0.5384519194698012, + "grad_norm": 129.0, + "learning_rate": 4.620676384040312e-05, + "loss": 10.0627, + "step": 12918 + }, + { + "epoch": 0.5384936017673294, + "grad_norm": 83.0, + "learning_rate": 4.620003331128743e-05, + "loss": 8.4379, + "step": 12919 + }, + { + "epoch": 0.5385352840648576, + "grad_norm": 96.5, + "learning_rate": 4.619330285142649e-05, + "loss": 8.3128, + "step": 12920 + }, + { + "epoch": 0.538576966362386, + "grad_norm": 516.0, + "learning_rate": 4.618657246094292e-05, + "loss": 17.7503, + "step": 12921 + }, + { + "epoch": 0.5386186486599142, + "grad_norm": 175.0, + "learning_rate": 4.617984213995943e-05, + "loss": 9.8752, + "step": 12922 + }, + { + "epoch": 0.5386603309574424, + "grad_norm": 488.0, + "learning_rate": 4.617311188859864e-05, + "loss": 15.7507, + "step": 12923 + }, + { + "epoch": 0.5387020132549706, + "grad_norm": 199.0, + "learning_rate": 4.616638170698324e-05, + "loss": 11.0005, + "step": 12924 + }, + { + "epoch": 0.5387436955524989, + "grad_norm": 258.0, + "learning_rate": 4.615965159523585e-05, + "loss": 7.313, + "step": 12925 + }, + { + "epoch": 0.5387853778500271, + "grad_norm": 308.0, + "learning_rate": 4.615292155347918e-05, + "loss": 13.6877, + "step": 12926 + }, + { + "epoch": 0.5388270601475553, + "grad_norm": 225.0, + "learning_rate": 4.6146191581835826e-05, + "loss": 10.0003, + "step": 12927 + }, + { + "epoch": 0.5388687424450835, + "grad_norm": 872.0, + "learning_rate": 4.6139461680428506e-05, + "loss": 22.0003, + "step": 12928 + }, + { + "epoch": 0.5389104247426119, + "grad_norm": 426.0, + "learning_rate": 4.613273184937981e-05, + "loss": 15.5002, + "step": 12929 + }, + { + "epoch": 0.5389521070401401, + "grad_norm": 248.0, + "learning_rate": 4.6126002088812445e-05, + "loss": 10.2503, + "step": 12930 + }, + { + "epoch": 0.5389937893376683, + "grad_norm": 213.0, + "learning_rate": 4.611927239884901e-05, + "loss": 11.1879, + "step": 12931 + }, + { + "epoch": 0.5390354716351965, + "grad_norm": 152.0, + "learning_rate": 4.6112542779612205e-05, + "loss": 9.1884, + "step": 12932 + }, + { + "epoch": 0.5390771539327248, + "grad_norm": 276.0, + "learning_rate": 4.610581323122463e-05, + "loss": 11.9379, + "step": 12933 + }, + { + "epoch": 0.539118836230253, + "grad_norm": 127.0, + "learning_rate": 4.609908375380897e-05, + "loss": 10.2501, + "step": 12934 + }, + { + "epoch": 0.5391605185277812, + "grad_norm": 636.0, + "learning_rate": 4.609235434748785e-05, + "loss": 19.3752, + "step": 12935 + }, + { + "epoch": 0.5392022008253095, + "grad_norm": 158.0, + "learning_rate": 4.608562501238392e-05, + "loss": 9.6878, + "step": 12936 + }, + { + "epoch": 0.5392438831228378, + "grad_norm": 628.0, + "learning_rate": 4.607889574861981e-05, + "loss": 20.1252, + "step": 12937 + }, + { + "epoch": 0.539285565420366, + "grad_norm": 1320.0, + "learning_rate": 4.60721665563182e-05, + "loss": 33.2508, + "step": 12938 + }, + { + "epoch": 0.5393272477178942, + "grad_norm": 148.0, + "learning_rate": 4.606543743560167e-05, + "loss": 9.9377, + "step": 12939 + }, + { + "epoch": 0.5393689300154224, + "grad_norm": 524.0, + "learning_rate": 4.605870838659293e-05, + "loss": 18.0012, + "step": 12940 + }, + { + "epoch": 0.5394106123129507, + "grad_norm": 172.0, + "learning_rate": 4.605197940941454e-05, + "loss": 10.563, + "step": 12941 + }, + { + "epoch": 0.5394522946104789, + "grad_norm": 185.0, + "learning_rate": 4.604525050418921e-05, + "loss": 11.5628, + "step": 12942 + }, + { + "epoch": 0.5394939769080072, + "grad_norm": 428.0, + "learning_rate": 4.603852167103951e-05, + "loss": 17.3752, + "step": 12943 + }, + { + "epoch": 0.5395356592055354, + "grad_norm": 198.0, + "learning_rate": 4.603179291008813e-05, + "loss": 13.188, + "step": 12944 + }, + { + "epoch": 0.5395773415030637, + "grad_norm": 408.0, + "learning_rate": 4.602506422145766e-05, + "loss": 15.2512, + "step": 12945 + }, + { + "epoch": 0.5396190238005919, + "grad_norm": 233.0, + "learning_rate": 4.6018335605270766e-05, + "loss": 11.9382, + "step": 12946 + }, + { + "epoch": 0.5396607060981201, + "grad_norm": 203.0, + "learning_rate": 4.601160706165003e-05, + "loss": 10.6254, + "step": 12947 + }, + { + "epoch": 0.5397023883956483, + "grad_norm": 239.0, + "learning_rate": 4.600487859071813e-05, + "loss": 10.8129, + "step": 12948 + }, + { + "epoch": 0.5397440706931766, + "grad_norm": 219.0, + "learning_rate": 4.599815019259765e-05, + "loss": 11.6878, + "step": 12949 + }, + { + "epoch": 0.5397857529907049, + "grad_norm": 314.0, + "learning_rate": 4.599142186741127e-05, + "loss": 14.9387, + "step": 12950 + }, + { + "epoch": 0.5398274352882331, + "grad_norm": 188.0, + "learning_rate": 4.598469361528156e-05, + "loss": 11.1252, + "step": 12951 + }, + { + "epoch": 0.5398691175857613, + "grad_norm": 406.0, + "learning_rate": 4.597796543633119e-05, + "loss": 13.5628, + "step": 12952 + }, + { + "epoch": 0.5399107998832896, + "grad_norm": 380.0, + "learning_rate": 4.5971237330682726e-05, + "loss": 15.5004, + "step": 12953 + }, + { + "epoch": 0.5399524821808178, + "grad_norm": 95.5, + "learning_rate": 4.5964509298458843e-05, + "loss": 8.876, + "step": 12954 + }, + { + "epoch": 0.539994164478346, + "grad_norm": 1272.0, + "learning_rate": 4.595778133978212e-05, + "loss": 27.13, + "step": 12955 + }, + { + "epoch": 0.5400358467758742, + "grad_norm": 326.0, + "learning_rate": 4.5951053454775214e-05, + "loss": 13.8757, + "step": 12956 + }, + { + "epoch": 0.5400775290734026, + "grad_norm": 340.0, + "learning_rate": 4.5944325643560687e-05, + "loss": 14.4382, + "step": 12957 + }, + { + "epoch": 0.5401192113709308, + "grad_norm": 776.0, + "learning_rate": 4.593759790626121e-05, + "loss": 21.6258, + "step": 12958 + }, + { + "epoch": 0.540160893668459, + "grad_norm": 1456.0, + "learning_rate": 4.5930870242999355e-05, + "loss": 31.508, + "step": 12959 + }, + { + "epoch": 0.5402025759659872, + "grad_norm": 340.0, + "learning_rate": 4.592414265389776e-05, + "loss": 14.5016, + "step": 12960 + }, + { + "epoch": 0.5402442582635155, + "grad_norm": 1176.0, + "learning_rate": 4.5917415139079025e-05, + "loss": 27.6281, + "step": 12961 + }, + { + "epoch": 0.5402859405610437, + "grad_norm": 800.0, + "learning_rate": 4.591068769866576e-05, + "loss": 22.0004, + "step": 12962 + }, + { + "epoch": 0.5403276228585719, + "grad_norm": 139.0, + "learning_rate": 4.5903960332780566e-05, + "loss": 9.6254, + "step": 12963 + }, + { + "epoch": 0.5403693051561002, + "grad_norm": 79.5, + "learning_rate": 4.589723304154609e-05, + "loss": 6.2507, + "step": 12964 + }, + { + "epoch": 0.5404109874536285, + "grad_norm": 149.0, + "learning_rate": 4.5890505825084864e-05, + "loss": 10.2502, + "step": 12965 + }, + { + "epoch": 0.5404526697511567, + "grad_norm": 680.0, + "learning_rate": 4.588377868351957e-05, + "loss": 15.4423, + "step": 12966 + }, + { + "epoch": 0.5404943520486849, + "grad_norm": 368.0, + "learning_rate": 4.587705161697275e-05, + "loss": 14.7504, + "step": 12967 + }, + { + "epoch": 0.5405360343462131, + "grad_norm": 258.0, + "learning_rate": 4.5870324625567055e-05, + "loss": 11.8127, + "step": 12968 + }, + { + "epoch": 0.5405777166437414, + "grad_norm": 140.0, + "learning_rate": 4.586359770942503e-05, + "loss": 10.6882, + "step": 12969 + }, + { + "epoch": 0.5406193989412696, + "grad_norm": 106.5, + "learning_rate": 4.5856870868669336e-05, + "loss": 6.7191, + "step": 12970 + }, + { + "epoch": 0.5406610812387979, + "grad_norm": 612.0, + "learning_rate": 4.585014410342251e-05, + "loss": 19.3754, + "step": 12971 + }, + { + "epoch": 0.5407027635363261, + "grad_norm": 314.0, + "learning_rate": 4.584341741380719e-05, + "loss": 11.8131, + "step": 12972 + }, + { + "epoch": 0.5407444458338544, + "grad_norm": 278.0, + "learning_rate": 4.583669079994595e-05, + "loss": 12.4382, + "step": 12973 + }, + { + "epoch": 0.5407861281313826, + "grad_norm": 504.0, + "learning_rate": 4.582996426196139e-05, + "loss": 17.3751, + "step": 12974 + }, + { + "epoch": 0.5408278104289108, + "grad_norm": 42.5, + "learning_rate": 4.58232377999761e-05, + "loss": 6.8128, + "step": 12975 + }, + { + "epoch": 0.540869492726439, + "grad_norm": 1256.0, + "learning_rate": 4.581651141411269e-05, + "loss": 31.6252, + "step": 12976 + }, + { + "epoch": 0.5409111750239673, + "grad_norm": 247.0, + "learning_rate": 4.5809785104493707e-05, + "loss": 12.1252, + "step": 12977 + }, + { + "epoch": 0.5409528573214956, + "grad_norm": 199.0, + "learning_rate": 4.5803058871241786e-05, + "loss": 11.0002, + "step": 12978 + }, + { + "epoch": 0.5409945396190238, + "grad_norm": 156.0, + "learning_rate": 4.579633271447947e-05, + "loss": 10.2504, + "step": 12979 + }, + { + "epoch": 0.5410362219165521, + "grad_norm": 528.0, + "learning_rate": 4.578960663432938e-05, + "loss": 18.8751, + "step": 12980 + }, + { + "epoch": 0.5410779042140803, + "grad_norm": 644.0, + "learning_rate": 4.5782880630914067e-05, + "loss": 20.2501, + "step": 12981 + }, + { + "epoch": 0.5411195865116085, + "grad_norm": 356.0, + "learning_rate": 4.577615470435615e-05, + "loss": 14.2503, + "step": 12982 + }, + { + "epoch": 0.5411612688091367, + "grad_norm": 64.0, + "learning_rate": 4.5769428854778155e-05, + "loss": 7.8757, + "step": 12983 + }, + { + "epoch": 0.541202951106665, + "grad_norm": 696.0, + "learning_rate": 4.576270308230272e-05, + "loss": 21.3765, + "step": 12984 + }, + { + "epoch": 0.5412446334041933, + "grad_norm": 190.0, + "learning_rate": 4.575597738705239e-05, + "loss": 11.0001, + "step": 12985 + }, + { + "epoch": 0.5412863157017215, + "grad_norm": 628.0, + "learning_rate": 4.574925176914975e-05, + "loss": 17.8752, + "step": 12986 + }, + { + "epoch": 0.5413279979992497, + "grad_norm": 232.0, + "learning_rate": 4.5742526228717363e-05, + "loss": 11.1876, + "step": 12987 + }, + { + "epoch": 0.541369680296778, + "grad_norm": 63.0, + "learning_rate": 4.573580076587784e-05, + "loss": 8.1252, + "step": 12988 + }, + { + "epoch": 0.5414113625943062, + "grad_norm": 744.0, + "learning_rate": 4.57290753807537e-05, + "loss": 22.6256, + "step": 12989 + }, + { + "epoch": 0.5414530448918344, + "grad_norm": 1640.0, + "learning_rate": 4.572235007346756e-05, + "loss": 29.1303, + "step": 12990 + }, + { + "epoch": 0.5414947271893626, + "grad_norm": 494.0, + "learning_rate": 4.5715624844141955e-05, + "loss": 16.5002, + "step": 12991 + }, + { + "epoch": 0.541536409486891, + "grad_norm": 288.0, + "learning_rate": 4.570889969289948e-05, + "loss": 13.5002, + "step": 12992 + }, + { + "epoch": 0.5415780917844192, + "grad_norm": 728.0, + "learning_rate": 4.5702174619862675e-05, + "loss": 22.5001, + "step": 12993 + }, + { + "epoch": 0.5416197740819474, + "grad_norm": 1544.0, + "learning_rate": 4.569544962515414e-05, + "loss": 34.7507, + "step": 12994 + }, + { + "epoch": 0.5416614563794756, + "grad_norm": 448.0, + "learning_rate": 4.568872470889639e-05, + "loss": 16.3752, + "step": 12995 + }, + { + "epoch": 0.5417031386770039, + "grad_norm": 334.0, + "learning_rate": 4.568199987121204e-05, + "loss": 14.1253, + "step": 12996 + }, + { + "epoch": 0.5417448209745321, + "grad_norm": 1008.0, + "learning_rate": 4.567527511222361e-05, + "loss": 21.7537, + "step": 12997 + }, + { + "epoch": 0.5417865032720603, + "grad_norm": 394.0, + "learning_rate": 4.566855043205368e-05, + "loss": 13.4392, + "step": 12998 + }, + { + "epoch": 0.5418281855695886, + "grad_norm": 171.0, + "learning_rate": 4.56618258308248e-05, + "loss": 9.1889, + "step": 12999 + }, + { + "epoch": 0.5418698678671169, + "grad_norm": 464.0, + "learning_rate": 4.5655101308659537e-05, + "loss": 17.0005, + "step": 13000 + }, + { + "epoch": 0.5419115501646451, + "grad_norm": 226.0, + "learning_rate": 4.564837686568042e-05, + "loss": 12.1879, + "step": 13001 + }, + { + "epoch": 0.5419532324621733, + "grad_norm": 201.0, + "learning_rate": 4.5641652502010044e-05, + "loss": 12.4377, + "step": 13002 + }, + { + "epoch": 0.5419949147597015, + "grad_norm": 146.0, + "learning_rate": 4.563492821777092e-05, + "loss": 10.6256, + "step": 13003 + }, + { + "epoch": 0.5420365970572298, + "grad_norm": 536.0, + "learning_rate": 4.562820401308564e-05, + "loss": 16.3754, + "step": 13004 + }, + { + "epoch": 0.542078279354758, + "grad_norm": 58.0, + "learning_rate": 4.56214798880767e-05, + "loss": 6.8752, + "step": 13005 + }, + { + "epoch": 0.5421199616522863, + "grad_norm": 852.0, + "learning_rate": 4.561475584286671e-05, + "loss": 23.5002, + "step": 13006 + }, + { + "epoch": 0.5421616439498145, + "grad_norm": 350.0, + "learning_rate": 4.5608031877578154e-05, + "loss": 12.5003, + "step": 13007 + }, + { + "epoch": 0.5422033262473428, + "grad_norm": 204.0, + "learning_rate": 4.560130799233363e-05, + "loss": 10.8129, + "step": 13008 + }, + { + "epoch": 0.542245008544871, + "grad_norm": 239.0, + "learning_rate": 4.559458418725564e-05, + "loss": 11.5627, + "step": 13009 + }, + { + "epoch": 0.5422866908423992, + "grad_norm": 206.0, + "learning_rate": 4.558786046246675e-05, + "loss": 12.5626, + "step": 13010 + }, + { + "epoch": 0.5423283731399274, + "grad_norm": 540.0, + "learning_rate": 4.55811368180895e-05, + "loss": 18.1252, + "step": 13011 + }, + { + "epoch": 0.5423700554374558, + "grad_norm": 468.0, + "learning_rate": 4.557441325424642e-05, + "loss": 14.6888, + "step": 13012 + }, + { + "epoch": 0.542411737734984, + "grad_norm": 740.0, + "learning_rate": 4.5567689771060046e-05, + "loss": 20.8752, + "step": 13013 + }, + { + "epoch": 0.5424534200325122, + "grad_norm": 438.0, + "learning_rate": 4.556096636865294e-05, + "loss": 16.3754, + "step": 13014 + }, + { + "epoch": 0.5424951023300404, + "grad_norm": 204.0, + "learning_rate": 4.5554243047147584e-05, + "loss": 10.6886, + "step": 13015 + }, + { + "epoch": 0.5425367846275687, + "grad_norm": 114.0, + "learning_rate": 4.554751980666658e-05, + "loss": 9.0009, + "step": 13016 + }, + { + "epoch": 0.5425784669250969, + "grad_norm": 68.0, + "learning_rate": 4.554079664733239e-05, + "loss": 7.3443, + "step": 13017 + }, + { + "epoch": 0.5426201492226251, + "grad_norm": 118.0, + "learning_rate": 4.55340735692676e-05, + "loss": 8.9385, + "step": 13018 + }, + { + "epoch": 0.5426618315201533, + "grad_norm": 708.0, + "learning_rate": 4.5527350572594696e-05, + "loss": 21.2504, + "step": 13019 + }, + { + "epoch": 0.5427035138176817, + "grad_norm": 162.0, + "learning_rate": 4.552062765743625e-05, + "loss": 10.6255, + "step": 13020 + }, + { + "epoch": 0.5427451961152099, + "grad_norm": 422.0, + "learning_rate": 4.551390482391474e-05, + "loss": 15.7504, + "step": 13021 + }, + { + "epoch": 0.5427868784127381, + "grad_norm": 95.0, + "learning_rate": 4.550718207215272e-05, + "loss": 9.1878, + "step": 13022 + }, + { + "epoch": 0.5428285607102663, + "grad_norm": 193.0, + "learning_rate": 4.550045940227271e-05, + "loss": 11.3127, + "step": 13023 + }, + { + "epoch": 0.5428702430077946, + "grad_norm": 120.5, + "learning_rate": 4.549373681439722e-05, + "loss": 9.9377, + "step": 13024 + }, + { + "epoch": 0.5429119253053228, + "grad_norm": 436.0, + "learning_rate": 4.548701430864877e-05, + "loss": 15.7508, + "step": 13025 + }, + { + "epoch": 0.542953607602851, + "grad_norm": 292.0, + "learning_rate": 4.5480291885149905e-05, + "loss": 14.188, + "step": 13026 + }, + { + "epoch": 0.5429952899003793, + "grad_norm": 108.0, + "learning_rate": 4.54735695440231e-05, + "loss": 9.1879, + "step": 13027 + }, + { + "epoch": 0.5430369721979076, + "grad_norm": 314.0, + "learning_rate": 4.546684728539091e-05, + "loss": 11.2508, + "step": 13028 + }, + { + "epoch": 0.5430786544954358, + "grad_norm": 768.0, + "learning_rate": 4.5460125109375817e-05, + "loss": 20.5048, + "step": 13029 + }, + { + "epoch": 0.543120336792964, + "grad_norm": 380.0, + "learning_rate": 4.5453403016100364e-05, + "loss": 13.0003, + "step": 13030 + }, + { + "epoch": 0.5431620190904922, + "grad_norm": 456.0, + "learning_rate": 4.544668100568703e-05, + "loss": 11.1892, + "step": 13031 + }, + { + "epoch": 0.5432037013880205, + "grad_norm": 124.5, + "learning_rate": 4.543995907825835e-05, + "loss": 8.4377, + "step": 13032 + }, + { + "epoch": 0.5432453836855488, + "grad_norm": 664.0, + "learning_rate": 4.5433237233936804e-05, + "loss": 21.6251, + "step": 13033 + }, + { + "epoch": 0.543287065983077, + "grad_norm": 356.0, + "learning_rate": 4.542651547284493e-05, + "loss": 13.6284, + "step": 13034 + }, + { + "epoch": 0.5433287482806052, + "grad_norm": 149.0, + "learning_rate": 4.541979379510521e-05, + "loss": 9.188, + "step": 13035 + }, + { + "epoch": 0.5433704305781335, + "grad_norm": 107.0, + "learning_rate": 4.5413072200840156e-05, + "loss": 9.0627, + "step": 13036 + }, + { + "epoch": 0.5434121128756617, + "grad_norm": 324.0, + "learning_rate": 4.5406350690172253e-05, + "loss": 12.3751, + "step": 13037 + }, + { + "epoch": 0.5434537951731899, + "grad_norm": 848.0, + "learning_rate": 4.5399629263224044e-05, + "loss": 23.0003, + "step": 13038 + }, + { + "epoch": 0.5434954774707181, + "grad_norm": 268.0, + "learning_rate": 4.539290792011798e-05, + "loss": 12.8754, + "step": 13039 + }, + { + "epoch": 0.5435371597682465, + "grad_norm": 346.0, + "learning_rate": 4.53861866609766e-05, + "loss": 15.1878, + "step": 13040 + }, + { + "epoch": 0.5435788420657747, + "grad_norm": 474.0, + "learning_rate": 4.537946548592236e-05, + "loss": 16.3753, + "step": 13041 + }, + { + "epoch": 0.5436205243633029, + "grad_norm": 988.0, + "learning_rate": 4.537274439507778e-05, + "loss": 26.1258, + "step": 13042 + }, + { + "epoch": 0.5436622066608311, + "grad_norm": 198.0, + "learning_rate": 4.5366023388565335e-05, + "loss": 11.4379, + "step": 13043 + }, + { + "epoch": 0.5437038889583594, + "grad_norm": 308.0, + "learning_rate": 4.535930246650754e-05, + "loss": 11.7502, + "step": 13044 + }, + { + "epoch": 0.5437455712558876, + "grad_norm": 165.0, + "learning_rate": 4.5352581629026844e-05, + "loss": 7.126, + "step": 13045 + }, + { + "epoch": 0.5437872535534158, + "grad_norm": 143.0, + "learning_rate": 4.534586087624579e-05, + "loss": 10.1877, + "step": 13046 + }, + { + "epoch": 0.543828935850944, + "grad_norm": 324.0, + "learning_rate": 4.5339140208286815e-05, + "loss": 13.3753, + "step": 13047 + }, + { + "epoch": 0.5438706181484724, + "grad_norm": 294.0, + "learning_rate": 4.533241962527243e-05, + "loss": 12.3753, + "step": 13048 + }, + { + "epoch": 0.5439123004460006, + "grad_norm": 442.0, + "learning_rate": 4.532569912732511e-05, + "loss": 15.6251, + "step": 13049 + }, + { + "epoch": 0.5439539827435288, + "grad_norm": 221.0, + "learning_rate": 4.531897871456734e-05, + "loss": 10.4377, + "step": 13050 + }, + { + "epoch": 0.543995665041057, + "grad_norm": 432.0, + "learning_rate": 4.5312258387121584e-05, + "loss": 15.9377, + "step": 13051 + }, + { + "epoch": 0.5440373473385853, + "grad_norm": 416.0, + "learning_rate": 4.530553814511036e-05, + "loss": 15.1878, + "step": 13052 + }, + { + "epoch": 0.5440790296361135, + "grad_norm": 115.5, + "learning_rate": 4.52988179886561e-05, + "loss": 6.9378, + "step": 13053 + }, + { + "epoch": 0.5441207119336418, + "grad_norm": 201.0, + "learning_rate": 4.529209791788132e-05, + "loss": 10.6878, + "step": 13054 + }, + { + "epoch": 0.5441623942311701, + "grad_norm": 544.0, + "learning_rate": 4.528537793290845e-05, + "loss": 17.502, + "step": 13055 + }, + { + "epoch": 0.5442040765286983, + "grad_norm": 58.0, + "learning_rate": 4.527865803386001e-05, + "loss": 7.7818, + "step": 13056 + }, + { + "epoch": 0.5442457588262265, + "grad_norm": 75.5, + "learning_rate": 4.527193822085842e-05, + "loss": 8.9382, + "step": 13057 + }, + { + "epoch": 0.5442874411237547, + "grad_norm": 1528.0, + "learning_rate": 4.5265218494026204e-05, + "loss": 34.0005, + "step": 13058 + }, + { + "epoch": 0.544329123421283, + "grad_norm": 342.0, + "learning_rate": 4.525849885348578e-05, + "loss": 13.1253, + "step": 13059 + }, + { + "epoch": 0.5443708057188112, + "grad_norm": 432.0, + "learning_rate": 4.525177929935964e-05, + "loss": 15.3127, + "step": 13060 + }, + { + "epoch": 0.5444124880163395, + "grad_norm": 286.0, + "learning_rate": 4.5245059831770246e-05, + "loss": 12.8751, + "step": 13061 + }, + { + "epoch": 0.5444541703138677, + "grad_norm": 221.0, + "learning_rate": 4.523834045084006e-05, + "loss": 11.9379, + "step": 13062 + }, + { + "epoch": 0.544495852611396, + "grad_norm": 358.0, + "learning_rate": 4.5231621156691534e-05, + "loss": 11.8139, + "step": 13063 + }, + { + "epoch": 0.5445375349089242, + "grad_norm": 468.0, + "learning_rate": 4.522490194944715e-05, + "loss": 17.3752, + "step": 13064 + }, + { + "epoch": 0.5445792172064524, + "grad_norm": 588.0, + "learning_rate": 4.5218182829229335e-05, + "loss": 18.8752, + "step": 13065 + }, + { + "epoch": 0.5446208995039806, + "grad_norm": 213.0, + "learning_rate": 4.521146379616059e-05, + "loss": 13.188, + "step": 13066 + }, + { + "epoch": 0.544662581801509, + "grad_norm": 268.0, + "learning_rate": 4.520474485036331e-05, + "loss": 12.8754, + "step": 13067 + }, + { + "epoch": 0.5447042640990372, + "grad_norm": 127.0, + "learning_rate": 4.5198025991960005e-05, + "loss": 8.0627, + "step": 13068 + }, + { + "epoch": 0.5447459463965654, + "grad_norm": 290.0, + "learning_rate": 4.519130722107308e-05, + "loss": 13.6879, + "step": 13069 + }, + { + "epoch": 0.5447876286940936, + "grad_norm": 232.0, + "learning_rate": 4.5184588537825035e-05, + "loss": 12.3752, + "step": 13070 + }, + { + "epoch": 0.5448293109916219, + "grad_norm": 197.0, + "learning_rate": 4.5177869942338264e-05, + "loss": 11.438, + "step": 13071 + }, + { + "epoch": 0.5448709932891501, + "grad_norm": 294.0, + "learning_rate": 4.517115143473527e-05, + "loss": 11.1251, + "step": 13072 + }, + { + "epoch": 0.5449126755866783, + "grad_norm": 372.0, + "learning_rate": 4.516443301513844e-05, + "loss": 16.0011, + "step": 13073 + }, + { + "epoch": 0.5449543578842065, + "grad_norm": 246.0, + "learning_rate": 4.515771468367026e-05, + "loss": 13.0004, + "step": 13074 + }, + { + "epoch": 0.5449960401817349, + "grad_norm": 420.0, + "learning_rate": 4.515099644045315e-05, + "loss": 17.6265, + "step": 13075 + }, + { + "epoch": 0.5450377224792631, + "grad_norm": 194.0, + "learning_rate": 4.514427828560959e-05, + "loss": 10.3753, + "step": 13076 + }, + { + "epoch": 0.5450794047767913, + "grad_norm": 1088.0, + "learning_rate": 4.5137560219261956e-05, + "loss": 25.2544, + "step": 13077 + }, + { + "epoch": 0.5451210870743195, + "grad_norm": 400.0, + "learning_rate": 4.5130842241532746e-05, + "loss": 16.5002, + "step": 13078 + }, + { + "epoch": 0.5451627693718478, + "grad_norm": 284.0, + "learning_rate": 4.5124124352544345e-05, + "loss": 12.1251, + "step": 13079 + }, + { + "epoch": 0.545204451669376, + "grad_norm": 195.0, + "learning_rate": 4.5117406552419234e-05, + "loss": 10.7516, + "step": 13080 + }, + { + "epoch": 0.5452461339669042, + "grad_norm": 292.0, + "learning_rate": 4.51106888412798e-05, + "loss": 12.1252, + "step": 13081 + }, + { + "epoch": 0.5452878162644325, + "grad_norm": 1040.0, + "learning_rate": 4.5103971219248516e-05, + "loss": 26.7506, + "step": 13082 + }, + { + "epoch": 0.5453294985619608, + "grad_norm": 330.0, + "learning_rate": 4.509725368644776e-05, + "loss": 14.3753, + "step": 13083 + }, + { + "epoch": 0.545371180859489, + "grad_norm": 506.0, + "learning_rate": 4.5090536243000034e-05, + "loss": 17.0001, + "step": 13084 + }, + { + "epoch": 0.5454128631570172, + "grad_norm": 203.0, + "learning_rate": 4.508381888902768e-05, + "loss": 9.5629, + "step": 13085 + }, + { + "epoch": 0.5454545454545454, + "grad_norm": 134.0, + "learning_rate": 4.507710162465319e-05, + "loss": 10.563, + "step": 13086 + }, + { + "epoch": 0.5454962277520737, + "grad_norm": 544.0, + "learning_rate": 4.507038444999895e-05, + "loss": 18.6256, + "step": 13087 + }, + { + "epoch": 0.545537910049602, + "grad_norm": 223.0, + "learning_rate": 4.506366736518739e-05, + "loss": 11.438, + "step": 13088 + }, + { + "epoch": 0.5455795923471302, + "grad_norm": 398.0, + "learning_rate": 4.505695037034092e-05, + "loss": 15.7508, + "step": 13089 + }, + { + "epoch": 0.5456212746446584, + "grad_norm": 89.5, + "learning_rate": 4.5050233465581995e-05, + "loss": 9.0003, + "step": 13090 + }, + { + "epoch": 0.5456629569421867, + "grad_norm": 426.0, + "learning_rate": 4.504351665103298e-05, + "loss": 15.0004, + "step": 13091 + }, + { + "epoch": 0.5457046392397149, + "grad_norm": 792.0, + "learning_rate": 4.5036799926816335e-05, + "loss": 21.2504, + "step": 13092 + }, + { + "epoch": 0.5457463215372431, + "grad_norm": 422.0, + "learning_rate": 4.5030083293054434e-05, + "loss": 15.0004, + "step": 13093 + }, + { + "epoch": 0.5457880038347713, + "grad_norm": 212.0, + "learning_rate": 4.5023366749869724e-05, + "loss": 11.6256, + "step": 13094 + }, + { + "epoch": 0.5458296861322997, + "grad_norm": 624.0, + "learning_rate": 4.5016650297384576e-05, + "loss": 19.3752, + "step": 13095 + }, + { + "epoch": 0.5458713684298279, + "grad_norm": 576.0, + "learning_rate": 4.500993393572144e-05, + "loss": 20.7501, + "step": 13096 + }, + { + "epoch": 0.5459130507273561, + "grad_norm": 320.0, + "learning_rate": 4.5003217665002676e-05, + "loss": 12.5005, + "step": 13097 + }, + { + "epoch": 0.5459547330248843, + "grad_norm": 224.0, + "learning_rate": 4.499650148535073e-05, + "loss": 11.6252, + "step": 13098 + }, + { + "epoch": 0.5459964153224126, + "grad_norm": 60.75, + "learning_rate": 4.498978539688799e-05, + "loss": 8.3133, + "step": 13099 + }, + { + "epoch": 0.5460380976199408, + "grad_norm": 326.0, + "learning_rate": 4.498306939973685e-05, + "loss": 12.5004, + "step": 13100 + }, + { + "epoch": 0.546079779917469, + "grad_norm": 504.0, + "learning_rate": 4.4976353494019705e-05, + "loss": 18.3752, + "step": 13101 + }, + { + "epoch": 0.5461214622149972, + "grad_norm": 368.0, + "learning_rate": 4.4969637679858986e-05, + "loss": 13.8132, + "step": 13102 + }, + { + "epoch": 0.5461631445125256, + "grad_norm": 133.0, + "learning_rate": 4.4962921957377054e-05, + "loss": 9.4384, + "step": 13103 + }, + { + "epoch": 0.5462048268100538, + "grad_norm": 490.0, + "learning_rate": 4.495620632669632e-05, + "loss": 16.1252, + "step": 13104 + }, + { + "epoch": 0.546246509107582, + "grad_norm": 138.0, + "learning_rate": 4.494949078793917e-05, + "loss": 10.1882, + "step": 13105 + }, + { + "epoch": 0.5462881914051102, + "grad_norm": 370.0, + "learning_rate": 4.494277534122801e-05, + "loss": 12.3751, + "step": 13106 + }, + { + "epoch": 0.5463298737026385, + "grad_norm": 1464.0, + "learning_rate": 4.49360599866852e-05, + "loss": 33.7505, + "step": 13107 + }, + { + "epoch": 0.5463715560001667, + "grad_norm": 792.0, + "learning_rate": 4.492934472443317e-05, + "loss": 22.2506, + "step": 13108 + }, + { + "epoch": 0.546413238297695, + "grad_norm": 276.0, + "learning_rate": 4.492262955459426e-05, + "loss": 12.7502, + "step": 13109 + }, + { + "epoch": 0.5464549205952232, + "grad_norm": 820.0, + "learning_rate": 4.491591447729089e-05, + "loss": 22.5007, + "step": 13110 + }, + { + "epoch": 0.5464966028927515, + "grad_norm": 856.0, + "learning_rate": 4.4909199492645425e-05, + "loss": 20.1293, + "step": 13111 + }, + { + "epoch": 0.5465382851902797, + "grad_norm": 408.0, + "learning_rate": 4.490248460078025e-05, + "loss": 16.3754, + "step": 13112 + }, + { + "epoch": 0.5465799674878079, + "grad_norm": 310.0, + "learning_rate": 4.489576980181774e-05, + "loss": 13.3127, + "step": 13113 + }, + { + "epoch": 0.5466216497853361, + "grad_norm": 540.0, + "learning_rate": 4.4889055095880295e-05, + "loss": 17.7505, + "step": 13114 + }, + { + "epoch": 0.5466633320828644, + "grad_norm": 64.5, + "learning_rate": 4.488234048309026e-05, + "loss": 8.6888, + "step": 13115 + }, + { + "epoch": 0.5467050143803927, + "grad_norm": 656.0, + "learning_rate": 4.487562596357004e-05, + "loss": 19.7542, + "step": 13116 + }, + { + "epoch": 0.5467466966779209, + "grad_norm": 572.0, + "learning_rate": 4.486891153744197e-05, + "loss": 20.1253, + "step": 13117 + }, + { + "epoch": 0.5467883789754491, + "grad_norm": 406.0, + "learning_rate": 4.486219720482847e-05, + "loss": 15.8126, + "step": 13118 + }, + { + "epoch": 0.5468300612729774, + "grad_norm": 304.0, + "learning_rate": 4.485548296585185e-05, + "loss": 12.2523, + "step": 13119 + }, + { + "epoch": 0.5468717435705056, + "grad_norm": 268.0, + "learning_rate": 4.484876882063454e-05, + "loss": 12.8128, + "step": 13120 + }, + { + "epoch": 0.5469134258680338, + "grad_norm": 282.0, + "learning_rate": 4.4842054769298846e-05, + "loss": 13.5628, + "step": 13121 + }, + { + "epoch": 0.546955108165562, + "grad_norm": 380.0, + "learning_rate": 4.483534081196719e-05, + "loss": 12.6876, + "step": 13122 + }, + { + "epoch": 0.5469967904630904, + "grad_norm": 253.0, + "learning_rate": 4.4828626948761886e-05, + "loss": 12.8129, + "step": 13123 + }, + { + "epoch": 0.5470384727606186, + "grad_norm": 192.0, + "learning_rate": 4.482191317980532e-05, + "loss": 10.8756, + "step": 13124 + }, + { + "epoch": 0.5470801550581468, + "grad_norm": 484.0, + "learning_rate": 4.481519950521985e-05, + "loss": 18.7511, + "step": 13125 + }, + { + "epoch": 0.5471218373556751, + "grad_norm": 328.0, + "learning_rate": 4.480848592512783e-05, + "loss": 14.5627, + "step": 13126 + }, + { + "epoch": 0.5471635196532033, + "grad_norm": 67.0, + "learning_rate": 4.48017724396516e-05, + "loss": 6.8127, + "step": 13127 + }, + { + "epoch": 0.5472052019507315, + "grad_norm": 235.0, + "learning_rate": 4.479505904891356e-05, + "loss": 11.8755, + "step": 13128 + }, + { + "epoch": 0.5472468842482597, + "grad_norm": 274.0, + "learning_rate": 4.4788345753035996e-05, + "loss": 13.2502, + "step": 13129 + }, + { + "epoch": 0.5472885665457881, + "grad_norm": 512.0, + "learning_rate": 4.4781632552141326e-05, + "loss": 16.3756, + "step": 13130 + }, + { + "epoch": 0.5473302488433163, + "grad_norm": 148.0, + "learning_rate": 4.477491944635184e-05, + "loss": 9.7501, + "step": 13131 + }, + { + "epoch": 0.5473719311408445, + "grad_norm": 316.0, + "learning_rate": 4.4768206435789926e-05, + "loss": 14.1878, + "step": 13132 + }, + { + "epoch": 0.5474136134383727, + "grad_norm": 142.0, + "learning_rate": 4.4761493520577893e-05, + "loss": 10.6879, + "step": 13133 + }, + { + "epoch": 0.547455295735901, + "grad_norm": 576.0, + "learning_rate": 4.4754780700838136e-05, + "loss": 20.5004, + "step": 13134 + }, + { + "epoch": 0.5474969780334292, + "grad_norm": 318.0, + "learning_rate": 4.4748067976692945e-05, + "loss": 14.5003, + "step": 13135 + }, + { + "epoch": 0.5475386603309574, + "grad_norm": 620.0, + "learning_rate": 4.4741355348264686e-05, + "loss": 19.0001, + "step": 13136 + }, + { + "epoch": 0.5475803426284857, + "grad_norm": 135.0, + "learning_rate": 4.47346428156757e-05, + "loss": 10.7504, + "step": 13137 + }, + { + "epoch": 0.547622024926014, + "grad_norm": 140.0, + "learning_rate": 4.4727930379048306e-05, + "loss": 10.3752, + "step": 13138 + }, + { + "epoch": 0.5476637072235422, + "grad_norm": 255.0, + "learning_rate": 4.472121803850484e-05, + "loss": 12.3753, + "step": 13139 + }, + { + "epoch": 0.5477053895210704, + "grad_norm": 612.0, + "learning_rate": 4.471450579416767e-05, + "loss": 19.6252, + "step": 13140 + }, + { + "epoch": 0.5477470718185986, + "grad_norm": 79.0, + "learning_rate": 4.470779364615908e-05, + "loss": 8.8129, + "step": 13141 + }, + { + "epoch": 0.5477887541161269, + "grad_norm": 133.0, + "learning_rate": 4.470108159460144e-05, + "loss": 9.8752, + "step": 13142 + }, + { + "epoch": 0.5478304364136551, + "grad_norm": 472.0, + "learning_rate": 4.469436963961704e-05, + "loss": 16.6253, + "step": 13143 + }, + { + "epoch": 0.5478721187111834, + "grad_norm": 126.5, + "learning_rate": 4.468765778132824e-05, + "loss": 9.1254, + "step": 13144 + }, + { + "epoch": 0.5479138010087116, + "grad_norm": 130.0, + "learning_rate": 4.4680946019857326e-05, + "loss": 8.8754, + "step": 13145 + }, + { + "epoch": 0.5479554833062399, + "grad_norm": 151.0, + "learning_rate": 4.467423435532667e-05, + "loss": 10.688, + "step": 13146 + }, + { + "epoch": 0.5479971656037681, + "grad_norm": 220.0, + "learning_rate": 4.466752278785855e-05, + "loss": 11.0629, + "step": 13147 + }, + { + "epoch": 0.5480388479012963, + "grad_norm": 824.0, + "learning_rate": 4.46608113175753e-05, + "loss": 23.8758, + "step": 13148 + }, + { + "epoch": 0.5480805301988245, + "grad_norm": 235.0, + "learning_rate": 4.4654099944599244e-05, + "loss": 13.9378, + "step": 13149 + }, + { + "epoch": 0.5481222124963528, + "grad_norm": 213.0, + "learning_rate": 4.4647388669052686e-05, + "loss": 10.5008, + "step": 13150 + }, + { + "epoch": 0.5481638947938811, + "grad_norm": 474.0, + "learning_rate": 4.464067749105794e-05, + "loss": 16.3755, + "step": 13151 + }, + { + "epoch": 0.5482055770914093, + "grad_norm": 142.0, + "learning_rate": 4.4633966410737335e-05, + "loss": 10.2504, + "step": 13152 + }, + { + "epoch": 0.5482472593889375, + "grad_norm": 290.0, + "learning_rate": 4.462725542821315e-05, + "loss": 13.2501, + "step": 13153 + }, + { + "epoch": 0.5482889416864658, + "grad_norm": 60.75, + "learning_rate": 4.462054454360774e-05, + "loss": 8.8754, + "step": 13154 + }, + { + "epoch": 0.548330623983994, + "grad_norm": 276.0, + "learning_rate": 4.461383375704336e-05, + "loss": 12.5627, + "step": 13155 + }, + { + "epoch": 0.5483723062815222, + "grad_norm": 306.0, + "learning_rate": 4.4607123068642356e-05, + "loss": 14.0004, + "step": 13156 + }, + { + "epoch": 0.5484139885790504, + "grad_norm": 852.0, + "learning_rate": 4.4600412478526995e-05, + "loss": 23.5032, + "step": 13157 + }, + { + "epoch": 0.5484556708765788, + "grad_norm": 366.0, + "learning_rate": 4.459370198681962e-05, + "loss": 13.9378, + "step": 13158 + }, + { + "epoch": 0.548497353174107, + "grad_norm": 436.0, + "learning_rate": 4.458699159364247e-05, + "loss": 16.2545, + "step": 13159 + }, + { + "epoch": 0.5485390354716352, + "grad_norm": 160.0, + "learning_rate": 4.458028129911791e-05, + "loss": 11.3771, + "step": 13160 + }, + { + "epoch": 0.5485807177691634, + "grad_norm": 488.0, + "learning_rate": 4.4573571103368184e-05, + "loss": 16.7502, + "step": 13161 + }, + { + "epoch": 0.5486224000666917, + "grad_norm": 500.0, + "learning_rate": 4.4566861006515616e-05, + "loss": 17.0006, + "step": 13162 + }, + { + "epoch": 0.5486640823642199, + "grad_norm": 1408.0, + "learning_rate": 4.4560151008682474e-05, + "loss": 28.5007, + "step": 13163 + }, + { + "epoch": 0.5487057646617481, + "grad_norm": 153.0, + "learning_rate": 4.455344110999109e-05, + "loss": 9.1878, + "step": 13164 + }, + { + "epoch": 0.5487474469592764, + "grad_norm": 392.0, + "learning_rate": 4.4546731310563705e-05, + "loss": 14.3752, + "step": 13165 + }, + { + "epoch": 0.5487891292568047, + "grad_norm": 272.0, + "learning_rate": 4.454002161052264e-05, + "loss": 13.2504, + "step": 13166 + }, + { + "epoch": 0.5488308115543329, + "grad_norm": 170.0, + "learning_rate": 4.453331200999015e-05, + "loss": 11.3756, + "step": 13167 + }, + { + "epoch": 0.5488724938518611, + "grad_norm": 856.0, + "learning_rate": 4.4526602509088553e-05, + "loss": 22.5026, + "step": 13168 + }, + { + "epoch": 0.5489141761493893, + "grad_norm": 318.0, + "learning_rate": 4.451989310794009e-05, + "loss": 14.0003, + "step": 13169 + }, + { + "epoch": 0.5489558584469176, + "grad_norm": 195.0, + "learning_rate": 4.4513183806667083e-05, + "loss": 11.0007, + "step": 13170 + }, + { + "epoch": 0.5489975407444458, + "grad_norm": 182.0, + "learning_rate": 4.450647460539177e-05, + "loss": 12.1255, + "step": 13171 + }, + { + "epoch": 0.5490392230419741, + "grad_norm": 108.5, + "learning_rate": 4.4499765504236465e-05, + "loss": 8.6883, + "step": 13172 + }, + { + "epoch": 0.5490809053395023, + "grad_norm": 262.0, + "learning_rate": 4.44930565033234e-05, + "loss": 11.2511, + "step": 13173 + }, + { + "epoch": 0.5491225876370306, + "grad_norm": 446.0, + "learning_rate": 4.448634760277487e-05, + "loss": 16.3758, + "step": 13174 + }, + { + "epoch": 0.5491642699345588, + "grad_norm": 462.0, + "learning_rate": 4.447963880271316e-05, + "loss": 16.1251, + "step": 13175 + }, + { + "epoch": 0.549205952232087, + "grad_norm": 1224.0, + "learning_rate": 4.447293010326052e-05, + "loss": 27.7547, + "step": 13176 + }, + { + "epoch": 0.5492476345296152, + "grad_norm": 376.0, + "learning_rate": 4.44662215045392e-05, + "loss": 15.3757, + "step": 13177 + }, + { + "epoch": 0.5492893168271435, + "grad_norm": 288.0, + "learning_rate": 4.445951300667151e-05, + "loss": 12.2505, + "step": 13178 + }, + { + "epoch": 0.5493309991246718, + "grad_norm": 124.0, + "learning_rate": 4.445280460977967e-05, + "loss": 8.3756, + "step": 13179 + }, + { + "epoch": 0.5493726814222, + "grad_norm": 310.0, + "learning_rate": 4.4446096313985976e-05, + "loss": 11.8133, + "step": 13180 + }, + { + "epoch": 0.5494143637197282, + "grad_norm": 298.0, + "learning_rate": 4.443938811941265e-05, + "loss": 13.1255, + "step": 13181 + }, + { + "epoch": 0.5494560460172565, + "grad_norm": 214.0, + "learning_rate": 4.443268002618199e-05, + "loss": 11.0629, + "step": 13182 + }, + { + "epoch": 0.5494977283147847, + "grad_norm": 1016.0, + "learning_rate": 4.442597203441621e-05, + "loss": 23.6299, + "step": 13183 + }, + { + "epoch": 0.5495394106123129, + "grad_norm": 576.0, + "learning_rate": 4.441926414423761e-05, + "loss": 18.2511, + "step": 13184 + }, + { + "epoch": 0.5495810929098411, + "grad_norm": 186.0, + "learning_rate": 4.441255635576838e-05, + "loss": 8.438, + "step": 13185 + }, + { + "epoch": 0.5496227752073695, + "grad_norm": 382.0, + "learning_rate": 4.4405848669130826e-05, + "loss": 14.8757, + "step": 13186 + }, + { + "epoch": 0.5496644575048977, + "grad_norm": 203.0, + "learning_rate": 4.4399141084447184e-05, + "loss": 6.0324, + "step": 13187 + }, + { + "epoch": 0.5497061398024259, + "grad_norm": 243.0, + "learning_rate": 4.4392433601839686e-05, + "loss": 12.2504, + "step": 13188 + }, + { + "epoch": 0.5497478220999541, + "grad_norm": 448.0, + "learning_rate": 4.438572622143057e-05, + "loss": 15.3127, + "step": 13189 + }, + { + "epoch": 0.5497895043974824, + "grad_norm": 264.0, + "learning_rate": 4.437901894334212e-05, + "loss": 11.8126, + "step": 13190 + }, + { + "epoch": 0.5498311866950106, + "grad_norm": 320.0, + "learning_rate": 4.437231176769652e-05, + "loss": 11.8756, + "step": 13191 + }, + { + "epoch": 0.5498728689925388, + "grad_norm": 452.0, + "learning_rate": 4.436560469461606e-05, + "loss": 15.8127, + "step": 13192 + }, + { + "epoch": 0.549914551290067, + "grad_norm": 294.0, + "learning_rate": 4.435889772422294e-05, + "loss": 11.5022, + "step": 13193 + }, + { + "epoch": 0.5499562335875954, + "grad_norm": 494.0, + "learning_rate": 4.4352190856639424e-05, + "loss": 15.8752, + "step": 13194 + }, + { + "epoch": 0.5499979158851236, + "grad_norm": 528.0, + "learning_rate": 4.434548409198772e-05, + "loss": 15.1252, + "step": 13195 + }, + { + "epoch": 0.5500395981826518, + "grad_norm": 155.0, + "learning_rate": 4.433877743039008e-05, + "loss": 8.8129, + "step": 13196 + }, + { + "epoch": 0.55008128048018, + "grad_norm": 272.0, + "learning_rate": 4.433207087196871e-05, + "loss": 12.6882, + "step": 13197 + }, + { + "epoch": 0.5501229627777083, + "grad_norm": 164.0, + "learning_rate": 4.4325364416845854e-05, + "loss": 10.5628, + "step": 13198 + }, + { + "epoch": 0.5501646450752365, + "grad_norm": 358.0, + "learning_rate": 4.4318658065143744e-05, + "loss": 14.0003, + "step": 13199 + }, + { + "epoch": 0.5502063273727648, + "grad_norm": 418.0, + "learning_rate": 4.431195181698459e-05, + "loss": 15.4376, + "step": 13200 + }, + { + "epoch": 0.5502480096702931, + "grad_norm": 432.0, + "learning_rate": 4.430524567249061e-05, + "loss": 16.2503, + "step": 13201 + }, + { + "epoch": 0.5502896919678213, + "grad_norm": 165.0, + "learning_rate": 4.429853963178405e-05, + "loss": 9.6257, + "step": 13202 + }, + { + "epoch": 0.5503313742653495, + "grad_norm": 45.5, + "learning_rate": 4.42918336949871e-05, + "loss": 7.2503, + "step": 13203 + }, + { + "epoch": 0.5503730565628777, + "grad_norm": 358.0, + "learning_rate": 4.4285127862222004e-05, + "loss": 13.4378, + "step": 13204 + }, + { + "epoch": 0.550414738860406, + "grad_norm": 520.0, + "learning_rate": 4.4278422133610936e-05, + "loss": 18.8754, + "step": 13205 + }, + { + "epoch": 0.5504564211579343, + "grad_norm": 458.0, + "learning_rate": 4.4271716509276156e-05, + "loss": 15.5631, + "step": 13206 + }, + { + "epoch": 0.5504981034554625, + "grad_norm": 484.0, + "learning_rate": 4.426501098933983e-05, + "loss": 17.1254, + "step": 13207 + }, + { + "epoch": 0.5505397857529907, + "grad_norm": 482.0, + "learning_rate": 4.4258305573924205e-05, + "loss": 15.4386, + "step": 13208 + }, + { + "epoch": 0.550581468050519, + "grad_norm": 235.0, + "learning_rate": 4.425160026315146e-05, + "loss": 12.5627, + "step": 13209 + }, + { + "epoch": 0.5506231503480472, + "grad_norm": 119.0, + "learning_rate": 4.4244895057143824e-05, + "loss": 9.1256, + "step": 13210 + }, + { + "epoch": 0.5506648326455754, + "grad_norm": 358.0, + "learning_rate": 4.423818995602347e-05, + "loss": 14.9376, + "step": 13211 + }, + { + "epoch": 0.5507065149431036, + "grad_norm": 180.0, + "learning_rate": 4.423148495991262e-05, + "loss": 8.9381, + "step": 13212 + }, + { + "epoch": 0.550748197240632, + "grad_norm": 496.0, + "learning_rate": 4.422478006893347e-05, + "loss": 17.5002, + "step": 13213 + }, + { + "epoch": 0.5507898795381602, + "grad_norm": 213.0, + "learning_rate": 4.421807528320822e-05, + "loss": 12.6254, + "step": 13214 + }, + { + "epoch": 0.5508315618356884, + "grad_norm": 348.0, + "learning_rate": 4.4211370602859044e-05, + "loss": 13.942, + "step": 13215 + }, + { + "epoch": 0.5508732441332166, + "grad_norm": 180.0, + "learning_rate": 4.420466602800818e-05, + "loss": 9.8753, + "step": 13216 + }, + { + "epoch": 0.5509149264307449, + "grad_norm": 262.0, + "learning_rate": 4.419796155877777e-05, + "loss": 12.1877, + "step": 13217 + }, + { + "epoch": 0.5509566087282731, + "grad_norm": 344.0, + "learning_rate": 4.419125719529004e-05, + "loss": 15.3756, + "step": 13218 + }, + { + "epoch": 0.5509982910258013, + "grad_norm": 168.0, + "learning_rate": 4.418455293766715e-05, + "loss": 11.7509, + "step": 13219 + }, + { + "epoch": 0.5510399733233295, + "grad_norm": 106.0, + "learning_rate": 4.4177848786031315e-05, + "loss": 7.938, + "step": 13220 + }, + { + "epoch": 0.5510816556208579, + "grad_norm": 408.0, + "learning_rate": 4.417114474050469e-05, + "loss": 14.938, + "step": 13221 + }, + { + "epoch": 0.5511233379183861, + "grad_norm": 580.0, + "learning_rate": 4.4164440801209484e-05, + "loss": 19.6253, + "step": 13222 + }, + { + "epoch": 0.5511650202159143, + "grad_norm": 192.0, + "learning_rate": 4.4157736968267835e-05, + "loss": 9.7502, + "step": 13223 + }, + { + "epoch": 0.5512067025134425, + "grad_norm": 117.0, + "learning_rate": 4.415103324180197e-05, + "loss": 10.7509, + "step": 13224 + }, + { + "epoch": 0.5512483848109708, + "grad_norm": 193.0, + "learning_rate": 4.414432962193404e-05, + "loss": 11.6258, + "step": 13225 + }, + { + "epoch": 0.551290067108499, + "grad_norm": 274.0, + "learning_rate": 4.4137626108786224e-05, + "loss": 12.6253, + "step": 13226 + }, + { + "epoch": 0.5513317494060272, + "grad_norm": 940.0, + "learning_rate": 4.4130922702480684e-05, + "loss": 20.1252, + "step": 13227 + }, + { + "epoch": 0.5513734317035555, + "grad_norm": 772.0, + "learning_rate": 4.412421940313961e-05, + "loss": 22.1289, + "step": 13228 + }, + { + "epoch": 0.5514151140010838, + "grad_norm": 196.0, + "learning_rate": 4.4117516210885145e-05, + "loss": 12.2516, + "step": 13229 + }, + { + "epoch": 0.551456796298612, + "grad_norm": 716.0, + "learning_rate": 4.411081312583949e-05, + "loss": 20.2502, + "step": 13230 + }, + { + "epoch": 0.5514984785961402, + "grad_norm": 234.0, + "learning_rate": 4.410411014812477e-05, + "loss": 12.8756, + "step": 13231 + }, + { + "epoch": 0.5515401608936684, + "grad_norm": 239.0, + "learning_rate": 4.4097407277863176e-05, + "loss": 12.5002, + "step": 13232 + }, + { + "epoch": 0.5515818431911967, + "grad_norm": 332.0, + "learning_rate": 4.409070451517685e-05, + "loss": 14.2502, + "step": 13233 + }, + { + "epoch": 0.551623525488725, + "grad_norm": 474.0, + "learning_rate": 4.4084001860187975e-05, + "loss": 14.6897, + "step": 13234 + }, + { + "epoch": 0.5516652077862532, + "grad_norm": 302.0, + "learning_rate": 4.4077299313018664e-05, + "loss": 12.628, + "step": 13235 + }, + { + "epoch": 0.5517068900837814, + "grad_norm": 296.0, + "learning_rate": 4.407059687379112e-05, + "loss": 13.9377, + "step": 13236 + }, + { + "epoch": 0.5517485723813097, + "grad_norm": 344.0, + "learning_rate": 4.4063894542627473e-05, + "loss": 14.6883, + "step": 13237 + }, + { + "epoch": 0.5517902546788379, + "grad_norm": 276.0, + "learning_rate": 4.4057192319649865e-05, + "loss": 13.3754, + "step": 13238 + }, + { + "epoch": 0.5518319369763661, + "grad_norm": 712.0, + "learning_rate": 4.4050490204980456e-05, + "loss": 21.8759, + "step": 13239 + }, + { + "epoch": 0.5518736192738943, + "grad_norm": 229.0, + "learning_rate": 4.40437881987414e-05, + "loss": 11.1881, + "step": 13240 + }, + { + "epoch": 0.5519153015714227, + "grad_norm": 260.0, + "learning_rate": 4.403708630105482e-05, + "loss": 11.8753, + "step": 13241 + }, + { + "epoch": 0.5519569838689509, + "grad_norm": 368.0, + "learning_rate": 4.4030384512042896e-05, + "loss": 13.5009, + "step": 13242 + }, + { + "epoch": 0.5519986661664791, + "grad_norm": 652.0, + "learning_rate": 4.402368283182772e-05, + "loss": 20.6252, + "step": 13243 + }, + { + "epoch": 0.5520403484640073, + "grad_norm": 227.0, + "learning_rate": 4.401698126053147e-05, + "loss": 12.5634, + "step": 13244 + }, + { + "epoch": 0.5520820307615356, + "grad_norm": 290.0, + "learning_rate": 4.401027979827625e-05, + "loss": 12.9386, + "step": 13245 + }, + { + "epoch": 0.5521237130590638, + "grad_norm": 528.0, + "learning_rate": 4.4003578445184234e-05, + "loss": 15.4394, + "step": 13246 + }, + { + "epoch": 0.552165395356592, + "grad_norm": 572.0, + "learning_rate": 4.399687720137751e-05, + "loss": 18.2502, + "step": 13247 + }, + { + "epoch": 0.5522070776541202, + "grad_norm": 1168.0, + "learning_rate": 4.399017606697826e-05, + "loss": 26.5029, + "step": 13248 + }, + { + "epoch": 0.5522487599516486, + "grad_norm": 648.0, + "learning_rate": 4.398347504210856e-05, + "loss": 20.5014, + "step": 13249 + }, + { + "epoch": 0.5522904422491768, + "grad_norm": 264.0, + "learning_rate": 4.397677412689057e-05, + "loss": 12.938, + "step": 13250 + }, + { + "epoch": 0.552332124546705, + "grad_norm": 75.0, + "learning_rate": 4.397007332144641e-05, + "loss": 6.282, + "step": 13251 + }, + { + "epoch": 0.5523738068442332, + "grad_norm": 236.0, + "learning_rate": 4.396337262589818e-05, + "loss": 11.5629, + "step": 13252 + }, + { + "epoch": 0.5524154891417615, + "grad_norm": 346.0, + "learning_rate": 4.395667204036804e-05, + "loss": 14.2502, + "step": 13253 + }, + { + "epoch": 0.5524571714392897, + "grad_norm": 100.5, + "learning_rate": 4.394997156497807e-05, + "loss": 7.344, + "step": 13254 + }, + { + "epoch": 0.552498853736818, + "grad_norm": 736.0, + "learning_rate": 4.3943271199850435e-05, + "loss": 25.002, + "step": 13255 + }, + { + "epoch": 0.5525405360343462, + "grad_norm": 186.0, + "learning_rate": 4.393657094510719e-05, + "loss": 9.9377, + "step": 13256 + }, + { + "epoch": 0.5525822183318745, + "grad_norm": 79.0, + "learning_rate": 4.3929870800870504e-05, + "loss": 8.4381, + "step": 13257 + }, + { + "epoch": 0.5526239006294027, + "grad_norm": 492.0, + "learning_rate": 4.392317076726244e-05, + "loss": 15.3167, + "step": 13258 + }, + { + "epoch": 0.5526655829269309, + "grad_norm": 197.0, + "learning_rate": 4.391647084440515e-05, + "loss": 11.126, + "step": 13259 + }, + { + "epoch": 0.5527072652244591, + "grad_norm": 159.0, + "learning_rate": 4.39097710324207e-05, + "loss": 11.2508, + "step": 13260 + }, + { + "epoch": 0.5527489475219874, + "grad_norm": 494.0, + "learning_rate": 4.390307133143123e-05, + "loss": 17.1253, + "step": 13261 + }, + { + "epoch": 0.5527906298195157, + "grad_norm": 756.0, + "learning_rate": 4.389637174155881e-05, + "loss": 20.8771, + "step": 13262 + }, + { + "epoch": 0.5528323121170439, + "grad_norm": 436.0, + "learning_rate": 4.388967226292557e-05, + "loss": 15.2501, + "step": 13263 + }, + { + "epoch": 0.5528739944145721, + "grad_norm": 282.0, + "learning_rate": 4.388297289565358e-05, + "loss": 13.2504, + "step": 13264 + }, + { + "epoch": 0.5529156767121004, + "grad_norm": 796.0, + "learning_rate": 4.387627363986496e-05, + "loss": 21.6252, + "step": 13265 + }, + { + "epoch": 0.5529573590096286, + "grad_norm": 294.0, + "learning_rate": 4.38695744956818e-05, + "loss": 13.2503, + "step": 13266 + }, + { + "epoch": 0.5529990413071568, + "grad_norm": 624.0, + "learning_rate": 4.3862875463226186e-05, + "loss": 19.1253, + "step": 13267 + }, + { + "epoch": 0.553040723604685, + "grad_norm": 203.0, + "learning_rate": 4.3856176542620205e-05, + "loss": 10.0669, + "step": 13268 + }, + { + "epoch": 0.5530824059022134, + "grad_norm": 190.0, + "learning_rate": 4.384947773398597e-05, + "loss": 11.9377, + "step": 13269 + }, + { + "epoch": 0.5531240881997416, + "grad_norm": 480.0, + "learning_rate": 4.3842779037445526e-05, + "loss": 13.0004, + "step": 13270 + }, + { + "epoch": 0.5531657704972698, + "grad_norm": 178.0, + "learning_rate": 4.3836080453121004e-05, + "loss": 10.6879, + "step": 13271 + }, + { + "epoch": 0.5532074527947981, + "grad_norm": 176.0, + "learning_rate": 4.382938198113444e-05, + "loss": 11.8129, + "step": 13272 + }, + { + "epoch": 0.5532491350923263, + "grad_norm": 214.0, + "learning_rate": 4.3822683621607966e-05, + "loss": 12.2506, + "step": 13273 + }, + { + "epoch": 0.5532908173898545, + "grad_norm": 1336.0, + "learning_rate": 4.38159853746636e-05, + "loss": 26.7545, + "step": 13274 + }, + { + "epoch": 0.5533324996873827, + "grad_norm": 3568.0, + "learning_rate": 4.380928724042348e-05, + "loss": 66.0141, + "step": 13275 + }, + { + "epoch": 0.5533741819849111, + "grad_norm": 276.0, + "learning_rate": 4.3802589219009614e-05, + "loss": 11.7508, + "step": 13276 + }, + { + "epoch": 0.5534158642824393, + "grad_norm": 189.0, + "learning_rate": 4.379589131054413e-05, + "loss": 10.0003, + "step": 13277 + }, + { + "epoch": 0.5534575465799675, + "grad_norm": 490.0, + "learning_rate": 4.378919351514908e-05, + "loss": 16.8752, + "step": 13278 + }, + { + "epoch": 0.5534992288774957, + "grad_norm": 150.0, + "learning_rate": 4.378249583294653e-05, + "loss": 10.188, + "step": 13279 + }, + { + "epoch": 0.553540911175024, + "grad_norm": 468.0, + "learning_rate": 4.377579826405853e-05, + "loss": 18.3752, + "step": 13280 + }, + { + "epoch": 0.5535825934725522, + "grad_norm": 280.0, + "learning_rate": 4.376910080860718e-05, + "loss": 13.2506, + "step": 13281 + }, + { + "epoch": 0.5536242757700804, + "grad_norm": 520.0, + "learning_rate": 4.3762403466714505e-05, + "loss": 17.2501, + "step": 13282 + }, + { + "epoch": 0.5536659580676087, + "grad_norm": 302.0, + "learning_rate": 4.37557062385026e-05, + "loss": 12.6878, + "step": 13283 + }, + { + "epoch": 0.553707640365137, + "grad_norm": 808.0, + "learning_rate": 4.374900912409347e-05, + "loss": 20.6273, + "step": 13284 + }, + { + "epoch": 0.5537493226626652, + "grad_norm": 612.0, + "learning_rate": 4.374231212360924e-05, + "loss": 18.3753, + "step": 13285 + }, + { + "epoch": 0.5537910049601934, + "grad_norm": 161.0, + "learning_rate": 4.37356152371719e-05, + "loss": 9.6298, + "step": 13286 + }, + { + "epoch": 0.5538326872577216, + "grad_norm": 436.0, + "learning_rate": 4.372891846490356e-05, + "loss": 15.4381, + "step": 13287 + }, + { + "epoch": 0.5538743695552499, + "grad_norm": 124.0, + "learning_rate": 4.37222218069262e-05, + "loss": 8.3129, + "step": 13288 + }, + { + "epoch": 0.5539160518527781, + "grad_norm": 164.0, + "learning_rate": 4.3715525263361924e-05, + "loss": 10.6253, + "step": 13289 + }, + { + "epoch": 0.5539577341503064, + "grad_norm": 482.0, + "learning_rate": 4.370882883433275e-05, + "loss": 16.8752, + "step": 13290 + }, + { + "epoch": 0.5539994164478346, + "grad_norm": 470.0, + "learning_rate": 4.370213251996074e-05, + "loss": 15.8148, + "step": 13291 + }, + { + "epoch": 0.5540410987453629, + "grad_norm": 350.0, + "learning_rate": 4.369543632036791e-05, + "loss": 15.1886, + "step": 13292 + }, + { + "epoch": 0.5540827810428911, + "grad_norm": 490.0, + "learning_rate": 4.3688740235676337e-05, + "loss": 16.6262, + "step": 13293 + }, + { + "epoch": 0.5541244633404193, + "grad_norm": 233.0, + "learning_rate": 4.368204426600801e-05, + "loss": 11.9377, + "step": 13294 + }, + { + "epoch": 0.5541661456379475, + "grad_norm": 1208.0, + "learning_rate": 4.3675348411485004e-05, + "loss": 29.3754, + "step": 13295 + }, + { + "epoch": 0.5542078279354758, + "grad_norm": 636.0, + "learning_rate": 4.3668652672229314e-05, + "loss": 18.5009, + "step": 13296 + }, + { + "epoch": 0.5542495102330041, + "grad_norm": 512.0, + "learning_rate": 4.366195704836301e-05, + "loss": 15.0025, + "step": 13297 + }, + { + "epoch": 0.5542911925305323, + "grad_norm": 536.0, + "learning_rate": 4.365526154000808e-05, + "loss": 17.0004, + "step": 13298 + }, + { + "epoch": 0.5543328748280605, + "grad_norm": 296.0, + "learning_rate": 4.364856614728661e-05, + "loss": 13.0003, + "step": 13299 + }, + { + "epoch": 0.5543745571255888, + "grad_norm": 135.0, + "learning_rate": 4.3641870870320544e-05, + "loss": 9.5005, + "step": 13300 + }, + { + "epoch": 0.554416239423117, + "grad_norm": 426.0, + "learning_rate": 4.363517570923198e-05, + "loss": 16.3761, + "step": 13301 + }, + { + "epoch": 0.5544579217206452, + "grad_norm": 218.0, + "learning_rate": 4.362848066414287e-05, + "loss": 12.4383, + "step": 13302 + }, + { + "epoch": 0.5544996040181734, + "grad_norm": 220.0, + "learning_rate": 4.362178573517528e-05, + "loss": 12.1262, + "step": 13303 + }, + { + "epoch": 0.5545412863157018, + "grad_norm": 720.0, + "learning_rate": 4.3615090922451224e-05, + "loss": 22.0002, + "step": 13304 + }, + { + "epoch": 0.55458296861323, + "grad_norm": 424.0, + "learning_rate": 4.360839622609269e-05, + "loss": 15.3752, + "step": 13305 + }, + { + "epoch": 0.5546246509107582, + "grad_norm": 316.0, + "learning_rate": 4.360170164622169e-05, + "loss": 12.8128, + "step": 13306 + }, + { + "epoch": 0.5546663332082864, + "grad_norm": 692.0, + "learning_rate": 4.359500718296028e-05, + "loss": 20.6288, + "step": 13307 + }, + { + "epoch": 0.5547080155058147, + "grad_norm": 282.0, + "learning_rate": 4.35883128364304e-05, + "loss": 11.6255, + "step": 13308 + }, + { + "epoch": 0.5547496978033429, + "grad_norm": 366.0, + "learning_rate": 4.3581618606754114e-05, + "loss": 12.6254, + "step": 13309 + }, + { + "epoch": 0.5547913801008711, + "grad_norm": 199.0, + "learning_rate": 4.3574924494053374e-05, + "loss": 11.6254, + "step": 13310 + }, + { + "epoch": 0.5548330623983994, + "grad_norm": 1688.0, + "learning_rate": 4.356823049845023e-05, + "loss": 34.5039, + "step": 13311 + }, + { + "epoch": 0.5548747446959277, + "grad_norm": 680.0, + "learning_rate": 4.356153662006663e-05, + "loss": 21.7504, + "step": 13312 + }, + { + "epoch": 0.5549164269934559, + "grad_norm": 346.0, + "learning_rate": 4.355484285902462e-05, + "loss": 13.063, + "step": 13313 + }, + { + "epoch": 0.5549581092909841, + "grad_norm": 142.0, + "learning_rate": 4.3548149215446155e-05, + "loss": 7.6567, + "step": 13314 + }, + { + "epoch": 0.5549997915885123, + "grad_norm": 316.0, + "learning_rate": 4.354145568945325e-05, + "loss": 12.8754, + "step": 13315 + }, + { + "epoch": 0.5550414738860406, + "grad_norm": 288.0, + "learning_rate": 4.353476228116788e-05, + "loss": 13.1252, + "step": 13316 + }, + { + "epoch": 0.5550831561835688, + "grad_norm": 270.0, + "learning_rate": 4.3528068990712056e-05, + "loss": 13.5629, + "step": 13317 + }, + { + "epoch": 0.5551248384810971, + "grad_norm": 360.0, + "learning_rate": 4.352137581820773e-05, + "loss": 14.627, + "step": 13318 + }, + { + "epoch": 0.5551665207786253, + "grad_norm": 165.0, + "learning_rate": 4.351468276377693e-05, + "loss": 10.3754, + "step": 13319 + }, + { + "epoch": 0.5552082030761536, + "grad_norm": 430.0, + "learning_rate": 4.350798982754159e-05, + "loss": 14.4377, + "step": 13320 + }, + { + "epoch": 0.5552498853736818, + "grad_norm": 454.0, + "learning_rate": 4.350129700962373e-05, + "loss": 15.5003, + "step": 13321 + }, + { + "epoch": 0.55529156767121, + "grad_norm": 576.0, + "learning_rate": 4.3494604310145294e-05, + "loss": 17.2528, + "step": 13322 + }, + { + "epoch": 0.5553332499687382, + "grad_norm": 231.0, + "learning_rate": 4.348791172922829e-05, + "loss": 11.7504, + "step": 13323 + }, + { + "epoch": 0.5553749322662666, + "grad_norm": 264.0, + "learning_rate": 4.348121926699465e-05, + "loss": 13.0007, + "step": 13324 + }, + { + "epoch": 0.5554166145637948, + "grad_norm": 157.0, + "learning_rate": 4.347452692356639e-05, + "loss": 10.3758, + "step": 13325 + }, + { + "epoch": 0.555458296861323, + "grad_norm": 90.0, + "learning_rate": 4.3467834699065436e-05, + "loss": 7.5006, + "step": 13326 + }, + { + "epoch": 0.5554999791588512, + "grad_norm": 66.0, + "learning_rate": 4.346114259361378e-05, + "loss": 7.2815, + "step": 13327 + }, + { + "epoch": 0.5555416614563795, + "grad_norm": 81.5, + "learning_rate": 4.345445060733338e-05, + "loss": 8.1254, + "step": 13328 + }, + { + "epoch": 0.5555833437539077, + "grad_norm": 182.0, + "learning_rate": 4.3447758740346206e-05, + "loss": 10.3752, + "step": 13329 + }, + { + "epoch": 0.5556250260514359, + "grad_norm": 322.0, + "learning_rate": 4.3441066992774195e-05, + "loss": 12.7503, + "step": 13330 + }, + { + "epoch": 0.5556667083489641, + "grad_norm": 266.0, + "learning_rate": 4.343437536473934e-05, + "loss": 13.1252, + "step": 13331 + }, + { + "epoch": 0.5557083906464925, + "grad_norm": 498.0, + "learning_rate": 4.342768385636357e-05, + "loss": 15.2503, + "step": 13332 + }, + { + "epoch": 0.5557500729440207, + "grad_norm": 992.0, + "learning_rate": 4.342099246776885e-05, + "loss": 28.0007, + "step": 13333 + }, + { + "epoch": 0.5557917552415489, + "grad_norm": 450.0, + "learning_rate": 4.3414301199077115e-05, + "loss": 14.1897, + "step": 13334 + }, + { + "epoch": 0.5558334375390771, + "grad_norm": 184.0, + "learning_rate": 4.340761005041034e-05, + "loss": 7.688, + "step": 13335 + }, + { + "epoch": 0.5558751198366054, + "grad_norm": 684.0, + "learning_rate": 4.340091902189043e-05, + "loss": 20.1257, + "step": 13336 + }, + { + "epoch": 0.5559168021341336, + "grad_norm": 223.0, + "learning_rate": 4.339422811363939e-05, + "loss": 11.1252, + "step": 13337 + }, + { + "epoch": 0.5559584844316618, + "grad_norm": 194.0, + "learning_rate": 4.33875373257791e-05, + "loss": 10.6881, + "step": 13338 + }, + { + "epoch": 0.5560001667291901, + "grad_norm": 416.0, + "learning_rate": 4.3380846658431564e-05, + "loss": 14.5005, + "step": 13339 + }, + { + "epoch": 0.5560418490267184, + "grad_norm": 360.0, + "learning_rate": 4.3374156111718654e-05, + "loss": 15.2507, + "step": 13340 + }, + { + "epoch": 0.5560835313242466, + "grad_norm": 344.0, + "learning_rate": 4.3367465685762345e-05, + "loss": 14.3752, + "step": 13341 + }, + { + "epoch": 0.5561252136217748, + "grad_norm": 298.0, + "learning_rate": 4.336077538068455e-05, + "loss": 13.3753, + "step": 13342 + }, + { + "epoch": 0.556166895919303, + "grad_norm": 238.0, + "learning_rate": 4.335408519660724e-05, + "loss": 11.8127, + "step": 13343 + }, + { + "epoch": 0.5562085782168313, + "grad_norm": 167.0, + "learning_rate": 4.3347395133652296e-05, + "loss": 11.9382, + "step": 13344 + }, + { + "epoch": 0.5562502605143596, + "grad_norm": 948.0, + "learning_rate": 4.334070519194169e-05, + "loss": 21.88, + "step": 13345 + }, + { + "epoch": 0.5562919428118878, + "grad_norm": 596.0, + "learning_rate": 4.3334015371597294e-05, + "loss": 18.2501, + "step": 13346 + }, + { + "epoch": 0.5563336251094161, + "grad_norm": 198.0, + "learning_rate": 4.332732567274109e-05, + "loss": 11.8753, + "step": 13347 + }, + { + "epoch": 0.5563753074069443, + "grad_norm": 436.0, + "learning_rate": 4.332063609549494e-05, + "loss": 13.8752, + "step": 13348 + }, + { + "epoch": 0.5564169897044725, + "grad_norm": 420.0, + "learning_rate": 4.331394663998081e-05, + "loss": 14.2532, + "step": 13349 + }, + { + "epoch": 0.5564586720020007, + "grad_norm": 544.0, + "learning_rate": 4.330725730632058e-05, + "loss": 17.2505, + "step": 13350 + }, + { + "epoch": 0.556500354299529, + "grad_norm": 482.0, + "learning_rate": 4.3300568094636195e-05, + "loss": 16.6254, + "step": 13351 + }, + { + "epoch": 0.5565420365970573, + "grad_norm": 272.0, + "learning_rate": 4.3293879005049534e-05, + "loss": 12.9381, + "step": 13352 + }, + { + "epoch": 0.5565837188945855, + "grad_norm": 115.5, + "learning_rate": 4.328719003768253e-05, + "loss": 9.6271, + "step": 13353 + }, + { + "epoch": 0.5566254011921137, + "grad_norm": 772.0, + "learning_rate": 4.32805011926571e-05, + "loss": 22.0006, + "step": 13354 + }, + { + "epoch": 0.556667083489642, + "grad_norm": 214.0, + "learning_rate": 4.327381247009511e-05, + "loss": 11.5628, + "step": 13355 + }, + { + "epoch": 0.5567087657871702, + "grad_norm": 180.0, + "learning_rate": 4.3267123870118485e-05, + "loss": 11.1879, + "step": 13356 + }, + { + "epoch": 0.5567504480846984, + "grad_norm": 532.0, + "learning_rate": 4.3260435392849143e-05, + "loss": 18.2502, + "step": 13357 + }, + { + "epoch": 0.5567921303822266, + "grad_norm": 552.0, + "learning_rate": 4.325374703840896e-05, + "loss": 18.1254, + "step": 13358 + }, + { + "epoch": 0.556833812679755, + "grad_norm": 380.0, + "learning_rate": 4.324705880691983e-05, + "loss": 14.4378, + "step": 13359 + }, + { + "epoch": 0.5568754949772832, + "grad_norm": 368.0, + "learning_rate": 4.3240370698503645e-05, + "loss": 14.8754, + "step": 13360 + }, + { + "epoch": 0.5569171772748114, + "grad_norm": 584.0, + "learning_rate": 4.323368271328233e-05, + "loss": 18.5038, + "step": 13361 + }, + { + "epoch": 0.5569588595723396, + "grad_norm": 488.0, + "learning_rate": 4.322699485137772e-05, + "loss": 15.9377, + "step": 13362 + }, + { + "epoch": 0.5570005418698679, + "grad_norm": 414.0, + "learning_rate": 4.322030711291175e-05, + "loss": 14.9388, + "step": 13363 + }, + { + "epoch": 0.5570422241673961, + "grad_norm": 502.0, + "learning_rate": 4.3213619498006266e-05, + "loss": 17.1261, + "step": 13364 + }, + { + "epoch": 0.5570839064649243, + "grad_norm": 284.0, + "learning_rate": 4.3206932006783184e-05, + "loss": 13.188, + "step": 13365 + }, + { + "epoch": 0.5571255887624526, + "grad_norm": 604.0, + "learning_rate": 4.320024463936436e-05, + "loss": 19.8766, + "step": 13366 + }, + { + "epoch": 0.5571672710599809, + "grad_norm": 430.0, + "learning_rate": 4.319355739587169e-05, + "loss": 15.0627, + "step": 13367 + }, + { + "epoch": 0.5572089533575091, + "grad_norm": 262.0, + "learning_rate": 4.318687027642703e-05, + "loss": 13.3127, + "step": 13368 + }, + { + "epoch": 0.5572506356550373, + "grad_norm": 580.0, + "learning_rate": 4.3180183281152276e-05, + "loss": 17.002, + "step": 13369 + }, + { + "epoch": 0.5572923179525655, + "grad_norm": 672.0, + "learning_rate": 4.317349641016927e-05, + "loss": 20.7503, + "step": 13370 + }, + { + "epoch": 0.5573340002500938, + "grad_norm": 418.0, + "learning_rate": 4.3166809663599925e-05, + "loss": 16.0003, + "step": 13371 + }, + { + "epoch": 0.557375682547622, + "grad_norm": 246.0, + "learning_rate": 4.316012304156605e-05, + "loss": 11.7505, + "step": 13372 + }, + { + "epoch": 0.5574173648451503, + "grad_norm": 174.0, + "learning_rate": 4.3153436544189576e-05, + "loss": 12.2503, + "step": 13373 + }, + { + "epoch": 0.5574590471426785, + "grad_norm": 410.0, + "learning_rate": 4.3146750171592295e-05, + "loss": 14.9378, + "step": 13374 + }, + { + "epoch": 0.5575007294402068, + "grad_norm": 250.0, + "learning_rate": 4.314006392389612e-05, + "loss": 11.8127, + "step": 13375 + }, + { + "epoch": 0.557542411737735, + "grad_norm": 1088.0, + "learning_rate": 4.313337780122287e-05, + "loss": 28.6257, + "step": 13376 + }, + { + "epoch": 0.5575840940352632, + "grad_norm": 396.0, + "learning_rate": 4.3126691803694436e-05, + "loss": 15.4384, + "step": 13377 + }, + { + "epoch": 0.5576257763327914, + "grad_norm": 248.0, + "learning_rate": 4.312000593143265e-05, + "loss": 11.6878, + "step": 13378 + }, + { + "epoch": 0.5576674586303197, + "grad_norm": 660.0, + "learning_rate": 4.3113320184559366e-05, + "loss": 20.7506, + "step": 13379 + }, + { + "epoch": 0.557709140927848, + "grad_norm": 376.0, + "learning_rate": 4.3106634563196426e-05, + "loss": 14.4376, + "step": 13380 + }, + { + "epoch": 0.5577508232253762, + "grad_norm": 438.0, + "learning_rate": 4.30999490674657e-05, + "loss": 16.2504, + "step": 13381 + }, + { + "epoch": 0.5577925055229044, + "grad_norm": 171.0, + "learning_rate": 4.3093263697489e-05, + "loss": 9.8126, + "step": 13382 + }, + { + "epoch": 0.5578341878204327, + "grad_norm": 218.0, + "learning_rate": 4.30865784533882e-05, + "loss": 11.5004, + "step": 13383 + }, + { + "epoch": 0.5578758701179609, + "grad_norm": 528.0, + "learning_rate": 4.3079893335285085e-05, + "loss": 17.8753, + "step": 13384 + }, + { + "epoch": 0.5579175524154891, + "grad_norm": 51.75, + "learning_rate": 4.307320834330156e-05, + "loss": 7.3442, + "step": 13385 + }, + { + "epoch": 0.5579592347130173, + "grad_norm": 796.0, + "learning_rate": 4.306652347755942e-05, + "loss": 21.3755, + "step": 13386 + }, + { + "epoch": 0.5580009170105457, + "grad_norm": 201.0, + "learning_rate": 4.3059838738180514e-05, + "loss": 10.9377, + "step": 13387 + }, + { + "epoch": 0.5580425993080739, + "grad_norm": 366.0, + "learning_rate": 4.3053154125286646e-05, + "loss": 14.7503, + "step": 13388 + }, + { + "epoch": 0.5580842816056021, + "grad_norm": 159.0, + "learning_rate": 4.3046469638999674e-05, + "loss": 11.1256, + "step": 13389 + }, + { + "epoch": 0.5581259639031303, + "grad_norm": 588.0, + "learning_rate": 4.3039785279441394e-05, + "loss": 19.7502, + "step": 13390 + }, + { + "epoch": 0.5581676462006586, + "grad_norm": 700.0, + "learning_rate": 4.3033101046733656e-05, + "loss": 21.8756, + "step": 13391 + }, + { + "epoch": 0.5582093284981868, + "grad_norm": 112.0, + "learning_rate": 4.302641694099827e-05, + "loss": 11.1257, + "step": 13392 + }, + { + "epoch": 0.558251010795715, + "grad_norm": 536.0, + "learning_rate": 4.301973296235706e-05, + "loss": 16.3757, + "step": 13393 + }, + { + "epoch": 0.5582926930932433, + "grad_norm": 280.0, + "learning_rate": 4.3013049110931816e-05, + "loss": 12.8128, + "step": 13394 + }, + { + "epoch": 0.5583343753907716, + "grad_norm": 173.0, + "learning_rate": 4.30063653868444e-05, + "loss": 8.0629, + "step": 13395 + }, + { + "epoch": 0.5583760576882998, + "grad_norm": 370.0, + "learning_rate": 4.2999681790216575e-05, + "loss": 15.1878, + "step": 13396 + }, + { + "epoch": 0.558417739985828, + "grad_norm": 116.5, + "learning_rate": 4.299299832117019e-05, + "loss": 10.4379, + "step": 13397 + }, + { + "epoch": 0.5584594222833562, + "grad_norm": 392.0, + "learning_rate": 4.2986314979827e-05, + "loss": 15.0631, + "step": 13398 + }, + { + "epoch": 0.5585011045808845, + "grad_norm": 268.0, + "learning_rate": 4.297963176630888e-05, + "loss": 13.4376, + "step": 13399 + }, + { + "epoch": 0.5585427868784127, + "grad_norm": 148.0, + "learning_rate": 4.2972948680737565e-05, + "loss": 10.0629, + "step": 13400 + }, + { + "epoch": 0.558584469175941, + "grad_norm": 149.0, + "learning_rate": 4.296626572323491e-05, + "loss": 10.3753, + "step": 13401 + }, + { + "epoch": 0.5586261514734692, + "grad_norm": 1168.0, + "learning_rate": 4.2959582893922664e-05, + "loss": 25.0045, + "step": 13402 + }, + { + "epoch": 0.5586678337709975, + "grad_norm": 588.0, + "learning_rate": 4.295290019292265e-05, + "loss": 18.5002, + "step": 13403 + }, + { + "epoch": 0.5587095160685257, + "grad_norm": 223.0, + "learning_rate": 4.294621762035666e-05, + "loss": 11.8126, + "step": 13404 + }, + { + "epoch": 0.5587511983660539, + "grad_norm": 296.0, + "learning_rate": 4.293953517634648e-05, + "loss": 12.4378, + "step": 13405 + }, + { + "epoch": 0.5587928806635821, + "grad_norm": 187.0, + "learning_rate": 4.293285286101388e-05, + "loss": 10.7503, + "step": 13406 + }, + { + "epoch": 0.5588345629611104, + "grad_norm": 486.0, + "learning_rate": 4.2926170674480695e-05, + "loss": 18.6255, + "step": 13407 + }, + { + "epoch": 0.5588762452586387, + "grad_norm": 460.0, + "learning_rate": 4.291948861686866e-05, + "loss": 17.0007, + "step": 13408 + }, + { + "epoch": 0.5589179275561669, + "grad_norm": 1472.0, + "learning_rate": 4.2912806688299586e-05, + "loss": 31.7524, + "step": 13409 + }, + { + "epoch": 0.5589596098536951, + "grad_norm": 173.0, + "learning_rate": 4.2906124888895224e-05, + "loss": 10.8127, + "step": 13410 + }, + { + "epoch": 0.5590012921512234, + "grad_norm": 245.0, + "learning_rate": 4.289944321877738e-05, + "loss": 12.6888, + "step": 13411 + }, + { + "epoch": 0.5590429744487516, + "grad_norm": 278.0, + "learning_rate": 4.2892761678067794e-05, + "loss": 10.1255, + "step": 13412 + }, + { + "epoch": 0.5590846567462798, + "grad_norm": 210.0, + "learning_rate": 4.2886080266888285e-05, + "loss": 10.5003, + "step": 13413 + }, + { + "epoch": 0.559126339043808, + "grad_norm": 1328.0, + "learning_rate": 4.2879398985360566e-05, + "loss": 32.0002, + "step": 13414 + }, + { + "epoch": 0.5591680213413364, + "grad_norm": 169.0, + "learning_rate": 4.2872717833606445e-05, + "loss": 8.7503, + "step": 13415 + }, + { + "epoch": 0.5592097036388646, + "grad_norm": 330.0, + "learning_rate": 4.286603681174768e-05, + "loss": 14.3754, + "step": 13416 + }, + { + "epoch": 0.5592513859363928, + "grad_norm": 118.0, + "learning_rate": 4.285935591990602e-05, + "loss": 9.8752, + "step": 13417 + }, + { + "epoch": 0.5592930682339211, + "grad_norm": 540.0, + "learning_rate": 4.285267515820322e-05, + "loss": 17.1253, + "step": 13418 + }, + { + "epoch": 0.5593347505314493, + "grad_norm": 324.0, + "learning_rate": 4.284599452676108e-05, + "loss": 12.0628, + "step": 13419 + }, + { + "epoch": 0.5593764328289775, + "grad_norm": 442.0, + "learning_rate": 4.28393140257013e-05, + "loss": 15.1883, + "step": 13420 + }, + { + "epoch": 0.5594181151265057, + "grad_norm": 708.0, + "learning_rate": 4.2832633655145674e-05, + "loss": 20.126, + "step": 13421 + }, + { + "epoch": 0.5594597974240341, + "grad_norm": 648.0, + "learning_rate": 4.282595341521592e-05, + "loss": 20.1259, + "step": 13422 + }, + { + "epoch": 0.5595014797215623, + "grad_norm": 498.0, + "learning_rate": 4.281927330603382e-05, + "loss": 17.2518, + "step": 13423 + }, + { + "epoch": 0.5595431620190905, + "grad_norm": 324.0, + "learning_rate": 4.281259332772108e-05, + "loss": 15.3128, + "step": 13424 + }, + { + "epoch": 0.5595848443166187, + "grad_norm": 474.0, + "learning_rate": 4.280591348039949e-05, + "loss": 15.2502, + "step": 13425 + }, + { + "epoch": 0.559626526614147, + "grad_norm": 490.0, + "learning_rate": 4.279923376419074e-05, + "loss": 17.6252, + "step": 13426 + }, + { + "epoch": 0.5596682089116752, + "grad_norm": 426.0, + "learning_rate": 4.2792554179216615e-05, + "loss": 16.6251, + "step": 13427 + }, + { + "epoch": 0.5597098912092034, + "grad_norm": 414.0, + "learning_rate": 4.278587472559881e-05, + "loss": 15.8753, + "step": 13428 + }, + { + "epoch": 0.5597515735067317, + "grad_norm": 278.0, + "learning_rate": 4.277919540345909e-05, + "loss": 13.126, + "step": 13429 + }, + { + "epoch": 0.55979325580426, + "grad_norm": 123.5, + "learning_rate": 4.277251621291918e-05, + "loss": 9.751, + "step": 13430 + }, + { + "epoch": 0.5598349381017882, + "grad_norm": 414.0, + "learning_rate": 4.2765837154100797e-05, + "loss": 15.6878, + "step": 13431 + }, + { + "epoch": 0.5598766203993164, + "grad_norm": 316.0, + "learning_rate": 4.2759158227125665e-05, + "loss": 14.3753, + "step": 13432 + }, + { + "epoch": 0.5599183026968446, + "grad_norm": 190.0, + "learning_rate": 4.2752479432115535e-05, + "loss": 11.2512, + "step": 13433 + }, + { + "epoch": 0.5599599849943729, + "grad_norm": 476.0, + "learning_rate": 4.274580076919209e-05, + "loss": 16.3753, + "step": 13434 + }, + { + "epoch": 0.5600016672919012, + "grad_norm": 312.0, + "learning_rate": 4.273912223847709e-05, + "loss": 12.3147, + "step": 13435 + }, + { + "epoch": 0.5600433495894294, + "grad_norm": 282.0, + "learning_rate": 4.273244384009222e-05, + "loss": 11.8127, + "step": 13436 + }, + { + "epoch": 0.5600850318869576, + "grad_norm": 472.0, + "learning_rate": 4.2725765574159224e-05, + "loss": 16.2503, + "step": 13437 + }, + { + "epoch": 0.5601267141844859, + "grad_norm": 278.0, + "learning_rate": 4.271908744079977e-05, + "loss": 12.7517, + "step": 13438 + }, + { + "epoch": 0.5601683964820141, + "grad_norm": 644.0, + "learning_rate": 4.271240944013561e-05, + "loss": 18.8753, + "step": 13439 + }, + { + "epoch": 0.5602100787795423, + "grad_norm": 480.0, + "learning_rate": 4.2705731572288424e-05, + "loss": 17.7502, + "step": 13440 + }, + { + "epoch": 0.5602517610770705, + "grad_norm": 408.0, + "learning_rate": 4.269905383737994e-05, + "loss": 10.2514, + "step": 13441 + }, + { + "epoch": 0.5602934433745989, + "grad_norm": 234.0, + "learning_rate": 4.269237623553184e-05, + "loss": 11.4379, + "step": 13442 + }, + { + "epoch": 0.5603351256721271, + "grad_norm": 119.5, + "learning_rate": 4.268569876686583e-05, + "loss": 7.1879, + "step": 13443 + }, + { + "epoch": 0.5603768079696553, + "grad_norm": 100.5, + "learning_rate": 4.267902143150361e-05, + "loss": 9.0627, + "step": 13444 + }, + { + "epoch": 0.5604184902671835, + "grad_norm": 148.0, + "learning_rate": 4.267234422956688e-05, + "loss": 10.9384, + "step": 13445 + }, + { + "epoch": 0.5604601725647118, + "grad_norm": 300.0, + "learning_rate": 4.266566716117732e-05, + "loss": 12.8126, + "step": 13446 + }, + { + "epoch": 0.56050185486224, + "grad_norm": 652.0, + "learning_rate": 4.265899022645665e-05, + "loss": 20.8754, + "step": 13447 + }, + { + "epoch": 0.5605435371597682, + "grad_norm": 428.0, + "learning_rate": 4.2652313425526504e-05, + "loss": 13.4382, + "step": 13448 + }, + { + "epoch": 0.5605852194572964, + "grad_norm": 580.0, + "learning_rate": 4.264563675850862e-05, + "loss": 20.2504, + "step": 13449 + }, + { + "epoch": 0.5606269017548248, + "grad_norm": 354.0, + "learning_rate": 4.2638960225524637e-05, + "loss": 14.3142, + "step": 13450 + }, + { + "epoch": 0.560668584052353, + "grad_norm": 300.0, + "learning_rate": 4.2632283826696284e-05, + "loss": 12.8752, + "step": 13451 + }, + { + "epoch": 0.5607102663498812, + "grad_norm": 692.0, + "learning_rate": 4.2625607562145186e-05, + "loss": 22.1252, + "step": 13452 + }, + { + "epoch": 0.5607519486474094, + "grad_norm": 160.0, + "learning_rate": 4.261893143199306e-05, + "loss": 9.3753, + "step": 13453 + }, + { + "epoch": 0.5607936309449377, + "grad_norm": 552.0, + "learning_rate": 4.261225543636157e-05, + "loss": 17.3772, + "step": 13454 + }, + { + "epoch": 0.5608353132424659, + "grad_norm": 154.0, + "learning_rate": 4.2605579575372364e-05, + "loss": 11.6254, + "step": 13455 + }, + { + "epoch": 0.5608769955399941, + "grad_norm": 177.0, + "learning_rate": 4.2598903849147126e-05, + "loss": 10.0004, + "step": 13456 + }, + { + "epoch": 0.5609186778375224, + "grad_norm": 384.0, + "learning_rate": 4.259222825780754e-05, + "loss": 13.8756, + "step": 13457 + }, + { + "epoch": 0.5609603601350507, + "grad_norm": 370.0, + "learning_rate": 4.2585552801475236e-05, + "loss": 12.5629, + "step": 13458 + }, + { + "epoch": 0.5610020424325789, + "grad_norm": 140.0, + "learning_rate": 4.2578877480271906e-05, + "loss": 6.5952, + "step": 13459 + }, + { + "epoch": 0.5610437247301071, + "grad_norm": 247.0, + "learning_rate": 4.257220229431917e-05, + "loss": 11.0002, + "step": 13460 + }, + { + "epoch": 0.5610854070276353, + "grad_norm": 352.0, + "learning_rate": 4.2565527243738736e-05, + "loss": 15.4401, + "step": 13461 + }, + { + "epoch": 0.5611270893251636, + "grad_norm": 149.0, + "learning_rate": 4.25588523286522e-05, + "loss": 10.5002, + "step": 13462 + }, + { + "epoch": 0.5611687716226919, + "grad_norm": 350.0, + "learning_rate": 4.2552177549181265e-05, + "loss": 15.0629, + "step": 13463 + }, + { + "epoch": 0.5612104539202201, + "grad_norm": 948.0, + "learning_rate": 4.254550290544753e-05, + "loss": 24.3751, + "step": 13464 + }, + { + "epoch": 0.5612521362177483, + "grad_norm": 167.0, + "learning_rate": 4.253882839757269e-05, + "loss": 10.5004, + "step": 13465 + }, + { + "epoch": 0.5612938185152766, + "grad_norm": 812.0, + "learning_rate": 4.253215402567835e-05, + "loss": 19.6317, + "step": 13466 + }, + { + "epoch": 0.5613355008128048, + "grad_norm": 234.0, + "learning_rate": 4.252547978988617e-05, + "loss": 12.8143, + "step": 13467 + }, + { + "epoch": 0.561377183110333, + "grad_norm": 388.0, + "learning_rate": 4.251880569031777e-05, + "loss": 14.8129, + "step": 13468 + }, + { + "epoch": 0.5614188654078612, + "grad_norm": 640.0, + "learning_rate": 4.251213172709483e-05, + "loss": 20.1258, + "step": 13469 + }, + { + "epoch": 0.5614605477053896, + "grad_norm": 225.0, + "learning_rate": 4.250545790033893e-05, + "loss": 11.0008, + "step": 13470 + }, + { + "epoch": 0.5615022300029178, + "grad_norm": 272.0, + "learning_rate": 4.249878421017174e-05, + "loss": 13.0003, + "step": 13471 + }, + { + "epoch": 0.561543912300446, + "grad_norm": 205.0, + "learning_rate": 4.2492110656714856e-05, + "loss": 6.5631, + "step": 13472 + }, + { + "epoch": 0.5615855945979742, + "grad_norm": 664.0, + "learning_rate": 4.248543724008995e-05, + "loss": 21.0003, + "step": 13473 + }, + { + "epoch": 0.5616272768955025, + "grad_norm": 169.0, + "learning_rate": 4.247876396041859e-05, + "loss": 8.6252, + "step": 13474 + }, + { + "epoch": 0.5616689591930307, + "grad_norm": 1012.0, + "learning_rate": 4.247209081782245e-05, + "loss": 23.2518, + "step": 13475 + }, + { + "epoch": 0.5617106414905589, + "grad_norm": 536.0, + "learning_rate": 4.2465417812423094e-05, + "loss": 17.7502, + "step": 13476 + }, + { + "epoch": 0.5617523237880871, + "grad_norm": 400.0, + "learning_rate": 4.2458744944342194e-05, + "loss": 16.3753, + "step": 13477 + }, + { + "epoch": 0.5617940060856155, + "grad_norm": 488.0, + "learning_rate": 4.245207221370131e-05, + "loss": 17.1252, + "step": 13478 + }, + { + "epoch": 0.5618356883831437, + "grad_norm": 124.0, + "learning_rate": 4.244539962062209e-05, + "loss": 7.2815, + "step": 13479 + }, + { + "epoch": 0.5618773706806719, + "grad_norm": 220.0, + "learning_rate": 4.243872716522614e-05, + "loss": 12.4397, + "step": 13480 + }, + { + "epoch": 0.5619190529782001, + "grad_norm": 632.0, + "learning_rate": 4.243205484763506e-05, + "loss": 19.7516, + "step": 13481 + }, + { + "epoch": 0.5619607352757284, + "grad_norm": 232.0, + "learning_rate": 4.242538266797044e-05, + "loss": 12.0629, + "step": 13482 + }, + { + "epoch": 0.5620024175732566, + "grad_norm": 78.0, + "learning_rate": 4.241871062635391e-05, + "loss": 8.1256, + "step": 13483 + }, + { + "epoch": 0.5620440998707849, + "grad_norm": 804.0, + "learning_rate": 4.2412038722907035e-05, + "loss": 23.1254, + "step": 13484 + }, + { + "epoch": 0.5620857821683131, + "grad_norm": 584.0, + "learning_rate": 4.240536695775145e-05, + "loss": 20.0006, + "step": 13485 + }, + { + "epoch": 0.5621274644658414, + "grad_norm": 88.5, + "learning_rate": 4.2398695331008696e-05, + "loss": 8.7514, + "step": 13486 + }, + { + "epoch": 0.5621691467633696, + "grad_norm": 154.0, + "learning_rate": 4.239202384280042e-05, + "loss": 11.6878, + "step": 13487 + }, + { + "epoch": 0.5622108290608978, + "grad_norm": 98.5, + "learning_rate": 4.238535249324817e-05, + "loss": 8.5005, + "step": 13488 + }, + { + "epoch": 0.562252511358426, + "grad_norm": 266.0, + "learning_rate": 4.2378681282473564e-05, + "loss": 12.3129, + "step": 13489 + }, + { + "epoch": 0.5622941936559543, + "grad_norm": 498.0, + "learning_rate": 4.237201021059815e-05, + "loss": 16.0008, + "step": 13490 + }, + { + "epoch": 0.5623358759534826, + "grad_norm": 75.5, + "learning_rate": 4.236533927774353e-05, + "loss": 8.6256, + "step": 13491 + }, + { + "epoch": 0.5623775582510108, + "grad_norm": 324.0, + "learning_rate": 4.235866848403128e-05, + "loss": 14.5002, + "step": 13492 + }, + { + "epoch": 0.5624192405485391, + "grad_norm": 1256.0, + "learning_rate": 4.235199782958298e-05, + "loss": 27.626, + "step": 13493 + }, + { + "epoch": 0.5624609228460673, + "grad_norm": 332.0, + "learning_rate": 4.234532731452019e-05, + "loss": 13.563, + "step": 13494 + }, + { + "epoch": 0.5625026051435955, + "grad_norm": 880.0, + "learning_rate": 4.2338656938964504e-05, + "loss": 21.6253, + "step": 13495 + }, + { + "epoch": 0.5625442874411237, + "grad_norm": 612.0, + "learning_rate": 4.233198670303746e-05, + "loss": 17.3752, + "step": 13496 + }, + { + "epoch": 0.562585969738652, + "grad_norm": 243.0, + "learning_rate": 4.2325316606860655e-05, + "loss": 12.1886, + "step": 13497 + }, + { + "epoch": 0.5626276520361803, + "grad_norm": 268.0, + "learning_rate": 4.2318646650555616e-05, + "loss": 13.1877, + "step": 13498 + }, + { + "epoch": 0.5626693343337085, + "grad_norm": 348.0, + "learning_rate": 4.231197683424395e-05, + "loss": 15.3754, + "step": 13499 + }, + { + "epoch": 0.5627110166312367, + "grad_norm": 225.0, + "learning_rate": 4.230530715804716e-05, + "loss": 10.5002, + "step": 13500 + }, + { + "epoch": 0.562752698928765, + "grad_norm": 478.0, + "learning_rate": 4.229863762208686e-05, + "loss": 15.9383, + "step": 13501 + }, + { + "epoch": 0.5627943812262932, + "grad_norm": 932.0, + "learning_rate": 4.229196822648455e-05, + "loss": 25.7509, + "step": 13502 + }, + { + "epoch": 0.5628360635238214, + "grad_norm": 166.0, + "learning_rate": 4.2285298971361806e-05, + "loss": 10.6256, + "step": 13503 + }, + { + "epoch": 0.5628777458213496, + "grad_norm": 412.0, + "learning_rate": 4.227862985684018e-05, + "loss": 16.0009, + "step": 13504 + }, + { + "epoch": 0.562919428118878, + "grad_norm": 282.0, + "learning_rate": 4.227196088304121e-05, + "loss": 12.4377, + "step": 13505 + }, + { + "epoch": 0.5629611104164062, + "grad_norm": 186.0, + "learning_rate": 4.226529205008642e-05, + "loss": 10.9377, + "step": 13506 + }, + { + "epoch": 0.5630027927139344, + "grad_norm": 262.0, + "learning_rate": 4.22586233580974e-05, + "loss": 12.5002, + "step": 13507 + }, + { + "epoch": 0.5630444750114626, + "grad_norm": 191.0, + "learning_rate": 4.2251954807195635e-05, + "loss": 10.6897, + "step": 13508 + }, + { + "epoch": 0.5630861573089909, + "grad_norm": 398.0, + "learning_rate": 4.22452863975027e-05, + "loss": 13.6261, + "step": 13509 + }, + { + "epoch": 0.5631278396065191, + "grad_norm": 454.0, + "learning_rate": 4.223861812914008e-05, + "loss": 15.4378, + "step": 13510 + }, + { + "epoch": 0.5631695219040473, + "grad_norm": 356.0, + "learning_rate": 4.2231950002229365e-05, + "loss": 12.876, + "step": 13511 + }, + { + "epoch": 0.5632112042015756, + "grad_norm": 752.0, + "learning_rate": 4.222528201689203e-05, + "loss": 19.7549, + "step": 13512 + }, + { + "epoch": 0.5632528864991039, + "grad_norm": 1020.0, + "learning_rate": 4.221861417324964e-05, + "loss": 23.7548, + "step": 13513 + }, + { + "epoch": 0.5632945687966321, + "grad_norm": 266.0, + "learning_rate": 4.221194647142367e-05, + "loss": 13.1254, + "step": 13514 + }, + { + "epoch": 0.5633362510941603, + "grad_norm": 1192.0, + "learning_rate": 4.220527891153569e-05, + "loss": 24.0056, + "step": 13515 + }, + { + "epoch": 0.5633779333916885, + "grad_norm": 414.0, + "learning_rate": 4.2198611493707176e-05, + "loss": 15.5635, + "step": 13516 + }, + { + "epoch": 0.5634196156892168, + "grad_norm": 300.0, + "learning_rate": 4.219194421805967e-05, + "loss": 13.6256, + "step": 13517 + }, + { + "epoch": 0.563461297986745, + "grad_norm": 1720.0, + "learning_rate": 4.218527708471467e-05, + "loss": 40.5001, + "step": 13518 + }, + { + "epoch": 0.5635029802842733, + "grad_norm": 193.0, + "learning_rate": 4.2178610093793703e-05, + "loss": 9.6266, + "step": 13519 + }, + { + "epoch": 0.5635446625818015, + "grad_norm": 584.0, + "learning_rate": 4.217194324541824e-05, + "loss": 18.0007, + "step": 13520 + }, + { + "epoch": 0.5635863448793298, + "grad_norm": 187.0, + "learning_rate": 4.2165276539709826e-05, + "loss": 7.5941, + "step": 13521 + }, + { + "epoch": 0.563628027176858, + "grad_norm": 90.0, + "learning_rate": 4.215860997678992e-05, + "loss": 9.5631, + "step": 13522 + }, + { + "epoch": 0.5636697094743862, + "grad_norm": 256.0, + "learning_rate": 4.215194355678007e-05, + "loss": 11.9376, + "step": 13523 + }, + { + "epoch": 0.5637113917719144, + "grad_norm": 584.0, + "learning_rate": 4.214527727980172e-05, + "loss": 19.5002, + "step": 13524 + }, + { + "epoch": 0.5637530740694428, + "grad_norm": 924.0, + "learning_rate": 4.213861114597641e-05, + "loss": 26.1253, + "step": 13525 + }, + { + "epoch": 0.563794756366971, + "grad_norm": 209.0, + "learning_rate": 4.21319451554256e-05, + "loss": 12.1252, + "step": 13526 + }, + { + "epoch": 0.5638364386644992, + "grad_norm": 394.0, + "learning_rate": 4.2125279308270794e-05, + "loss": 16.5006, + "step": 13527 + }, + { + "epoch": 0.5638781209620274, + "grad_norm": 358.0, + "learning_rate": 4.211861360463346e-05, + "loss": 13.1879, + "step": 13528 + }, + { + "epoch": 0.5639198032595557, + "grad_norm": 636.0, + "learning_rate": 4.2111948044635096e-05, + "loss": 20.2504, + "step": 13529 + }, + { + "epoch": 0.5639614855570839, + "grad_norm": 246.0, + "learning_rate": 4.210528262839718e-05, + "loss": 12.2504, + "step": 13530 + }, + { + "epoch": 0.5640031678546121, + "grad_norm": 472.0, + "learning_rate": 4.209861735604119e-05, + "loss": 17.5003, + "step": 13531 + }, + { + "epoch": 0.5640448501521403, + "grad_norm": 350.0, + "learning_rate": 4.2091952227688594e-05, + "loss": 14.8756, + "step": 13532 + }, + { + "epoch": 0.5640865324496687, + "grad_norm": 102.5, + "learning_rate": 4.208528724346089e-05, + "loss": 9.6252, + "step": 13533 + }, + { + "epoch": 0.5641282147471969, + "grad_norm": 154.0, + "learning_rate": 4.2078622403479503e-05, + "loss": 9.5009, + "step": 13534 + }, + { + "epoch": 0.5641698970447251, + "grad_norm": 484.0, + "learning_rate": 4.207195770786596e-05, + "loss": 16.1291, + "step": 13535 + }, + { + "epoch": 0.5642115793422533, + "grad_norm": 220.0, + "learning_rate": 4.206529315674166e-05, + "loss": 11.2502, + "step": 13536 + }, + { + "epoch": 0.5642532616397816, + "grad_norm": 412.0, + "learning_rate": 4.2058628750228114e-05, + "loss": 14.5004, + "step": 13537 + }, + { + "epoch": 0.5642949439373098, + "grad_norm": 320.0, + "learning_rate": 4.205196448844675e-05, + "loss": 11.0002, + "step": 13538 + }, + { + "epoch": 0.564336626234838, + "grad_norm": 1072.0, + "learning_rate": 4.204530037151906e-05, + "loss": 23.5048, + "step": 13539 + }, + { + "epoch": 0.5643783085323663, + "grad_norm": 440.0, + "learning_rate": 4.203863639956645e-05, + "loss": 13.8128, + "step": 13540 + }, + { + "epoch": 0.5644199908298946, + "grad_norm": 132.0, + "learning_rate": 4.203197257271041e-05, + "loss": 7.0005, + "step": 13541 + }, + { + "epoch": 0.5644616731274228, + "grad_norm": 366.0, + "learning_rate": 4.202530889107238e-05, + "loss": 13.6251, + "step": 13542 + }, + { + "epoch": 0.564503355424951, + "grad_norm": 235.0, + "learning_rate": 4.20186453547738e-05, + "loss": 9.8753, + "step": 13543 + }, + { + "epoch": 0.5645450377224792, + "grad_norm": 272.0, + "learning_rate": 4.201198196393611e-05, + "loss": 12.5628, + "step": 13544 + }, + { + "epoch": 0.5645867200200075, + "grad_norm": 572.0, + "learning_rate": 4.200531871868078e-05, + "loss": 18.8753, + "step": 13545 + }, + { + "epoch": 0.5646284023175357, + "grad_norm": 242.0, + "learning_rate": 4.1998655619129204e-05, + "loss": 12.313, + "step": 13546 + }, + { + "epoch": 0.564670084615064, + "grad_norm": 151.0, + "learning_rate": 4.199199266540286e-05, + "loss": 9.7504, + "step": 13547 + }, + { + "epoch": 0.5647117669125922, + "grad_norm": 474.0, + "learning_rate": 4.1985329857623135e-05, + "loss": 15.8756, + "step": 13548 + }, + { + "epoch": 0.5647534492101205, + "grad_norm": 99.5, + "learning_rate": 4.197866719591151e-05, + "loss": 7.2814, + "step": 13549 + }, + { + "epoch": 0.5647951315076487, + "grad_norm": 294.0, + "learning_rate": 4.197200468038937e-05, + "loss": 13.1251, + "step": 13550 + }, + { + "epoch": 0.5648368138051769, + "grad_norm": 408.0, + "learning_rate": 4.196534231117817e-05, + "loss": 14.4414, + "step": 13551 + }, + { + "epoch": 0.5648784961027051, + "grad_norm": 438.0, + "learning_rate": 4.195868008839931e-05, + "loss": 17.0026, + "step": 13552 + }, + { + "epoch": 0.5649201784002335, + "grad_norm": 173.0, + "learning_rate": 4.195201801217423e-05, + "loss": 11.0005, + "step": 13553 + }, + { + "epoch": 0.5649618606977617, + "grad_norm": 536.0, + "learning_rate": 4.194535608262432e-05, + "loss": 17.3754, + "step": 13554 + }, + { + "epoch": 0.5650035429952899, + "grad_norm": 260.0, + "learning_rate": 4.193869429987102e-05, + "loss": 13.0628, + "step": 13555 + }, + { + "epoch": 0.5650452252928181, + "grad_norm": 484.0, + "learning_rate": 4.193203266403572e-05, + "loss": 16.5003, + "step": 13556 + }, + { + "epoch": 0.5650869075903464, + "grad_norm": 430.0, + "learning_rate": 4.1925371175239866e-05, + "loss": 15.8753, + "step": 13557 + }, + { + "epoch": 0.5651285898878746, + "grad_norm": 143.0, + "learning_rate": 4.191870983360481e-05, + "loss": 10.6251, + "step": 13558 + }, + { + "epoch": 0.5651702721854028, + "grad_norm": 2040.0, + "learning_rate": 4.191204863925202e-05, + "loss": 39.5075, + "step": 13559 + }, + { + "epoch": 0.565211954482931, + "grad_norm": 446.0, + "learning_rate": 4.190538759230282e-05, + "loss": 15.7502, + "step": 13560 + }, + { + "epoch": 0.5652536367804594, + "grad_norm": 568.0, + "learning_rate": 4.189872669287869e-05, + "loss": 18.5003, + "step": 13561 + }, + { + "epoch": 0.5652953190779876, + "grad_norm": 358.0, + "learning_rate": 4.189206594110095e-05, + "loss": 15.6252, + "step": 13562 + }, + { + "epoch": 0.5653370013755158, + "grad_norm": 286.0, + "learning_rate": 4.1885405337091064e-05, + "loss": 13.6878, + "step": 13563 + }, + { + "epoch": 0.5653786836730441, + "grad_norm": 294.0, + "learning_rate": 4.1878744880970355e-05, + "loss": 13.2502, + "step": 13564 + }, + { + "epoch": 0.5654203659705723, + "grad_norm": 358.0, + "learning_rate": 4.187208457286026e-05, + "loss": 13.5626, + "step": 13565 + }, + { + "epoch": 0.5654620482681005, + "grad_norm": 195.0, + "learning_rate": 4.186542441288213e-05, + "loss": 10.313, + "step": 13566 + }, + { + "epoch": 0.5655037305656287, + "grad_norm": 262.0, + "learning_rate": 4.1858764401157367e-05, + "loss": 13.4379, + "step": 13567 + }, + { + "epoch": 0.5655454128631571, + "grad_norm": 2048.0, + "learning_rate": 4.185210453780735e-05, + "loss": 37.0037, + "step": 13568 + }, + { + "epoch": 0.5655870951606853, + "grad_norm": 145.0, + "learning_rate": 4.184544482295346e-05, + "loss": 10.1255, + "step": 13569 + }, + { + "epoch": 0.5656287774582135, + "grad_norm": 143.0, + "learning_rate": 4.1838785256717034e-05, + "loss": 11.3752, + "step": 13570 + }, + { + "epoch": 0.5656704597557417, + "grad_norm": 648.0, + "learning_rate": 4.1832125839219506e-05, + "loss": 19.1251, + "step": 13571 + }, + { + "epoch": 0.56571214205327, + "grad_norm": 180.0, + "learning_rate": 4.182546657058218e-05, + "loss": 11.6892, + "step": 13572 + }, + { + "epoch": 0.5657538243507982, + "grad_norm": 420.0, + "learning_rate": 4.181880745092647e-05, + "loss": 15.1879, + "step": 13573 + }, + { + "epoch": 0.5657955066483265, + "grad_norm": 215.0, + "learning_rate": 4.1812148480373706e-05, + "loss": 8.3753, + "step": 13574 + }, + { + "epoch": 0.5658371889458547, + "grad_norm": 358.0, + "learning_rate": 4.180548965904528e-05, + "loss": 14.5003, + "step": 13575 + }, + { + "epoch": 0.565878871243383, + "grad_norm": 312.0, + "learning_rate": 4.179883098706252e-05, + "loss": 12.7504, + "step": 13576 + }, + { + "epoch": 0.5659205535409112, + "grad_norm": 378.0, + "learning_rate": 4.179217246454681e-05, + "loss": 14.5627, + "step": 13577 + }, + { + "epoch": 0.5659622358384394, + "grad_norm": 374.0, + "learning_rate": 4.178551409161946e-05, + "loss": 10.1887, + "step": 13578 + }, + { + "epoch": 0.5660039181359676, + "grad_norm": 229.0, + "learning_rate": 4.177885586840186e-05, + "loss": 5.6574, + "step": 13579 + }, + { + "epoch": 0.5660456004334959, + "grad_norm": 708.0, + "learning_rate": 4.177219779501534e-05, + "loss": 20.1253, + "step": 13580 + }, + { + "epoch": 0.5660872827310242, + "grad_norm": 904.0, + "learning_rate": 4.176553987158124e-05, + "loss": 20.001, + "step": 13581 + }, + { + "epoch": 0.5661289650285524, + "grad_norm": 97.0, + "learning_rate": 4.1758882098220906e-05, + "loss": 8.8139, + "step": 13582 + }, + { + "epoch": 0.5661706473260806, + "grad_norm": 1472.0, + "learning_rate": 4.175222447505569e-05, + "loss": 32.7508, + "step": 13583 + }, + { + "epoch": 0.5662123296236089, + "grad_norm": 107.0, + "learning_rate": 4.1745567002206906e-05, + "loss": 7.8751, + "step": 13584 + }, + { + "epoch": 0.5662540119211371, + "grad_norm": 177.0, + "learning_rate": 4.1738909679795906e-05, + "loss": 8.1881, + "step": 13585 + }, + { + "epoch": 0.5662956942186653, + "grad_norm": 176.0, + "learning_rate": 4.173225250794399e-05, + "loss": 12.0629, + "step": 13586 + }, + { + "epoch": 0.5663373765161935, + "grad_norm": 182.0, + "learning_rate": 4.1725595486772534e-05, + "loss": 10.2502, + "step": 13587 + }, + { + "epoch": 0.5663790588137219, + "grad_norm": 232.0, + "learning_rate": 4.171893861640281e-05, + "loss": 11.5003, + "step": 13588 + }, + { + "epoch": 0.5664207411112501, + "grad_norm": 109.0, + "learning_rate": 4.171228189695619e-05, + "loss": 9.1877, + "step": 13589 + }, + { + "epoch": 0.5664624234087783, + "grad_norm": 310.0, + "learning_rate": 4.1705625328553934e-05, + "loss": 13.9378, + "step": 13590 + }, + { + "epoch": 0.5665041057063065, + "grad_norm": 270.0, + "learning_rate": 4.169896891131743e-05, + "loss": 12.5628, + "step": 13591 + }, + { + "epoch": 0.5665457880038348, + "grad_norm": 56.75, + "learning_rate": 4.169231264536792e-05, + "loss": 7.4066, + "step": 13592 + }, + { + "epoch": 0.566587470301363, + "grad_norm": 171.0, + "learning_rate": 4.1685656530826765e-05, + "loss": 10.8755, + "step": 13593 + }, + { + "epoch": 0.5666291525988912, + "grad_norm": 1048.0, + "learning_rate": 4.167900056781524e-05, + "loss": 26.8753, + "step": 13594 + }, + { + "epoch": 0.5666708348964195, + "grad_norm": 217.0, + "learning_rate": 4.1672344756454704e-05, + "loss": 11.6887, + "step": 13595 + }, + { + "epoch": 0.5667125171939478, + "grad_norm": 238.0, + "learning_rate": 4.16656890968664e-05, + "loss": 11.2504, + "step": 13596 + }, + { + "epoch": 0.566754199491476, + "grad_norm": 236.0, + "learning_rate": 4.165903358917167e-05, + "loss": 11.3754, + "step": 13597 + }, + { + "epoch": 0.5667958817890042, + "grad_norm": 179.0, + "learning_rate": 4.165237823349177e-05, + "loss": 11.0002, + "step": 13598 + }, + { + "epoch": 0.5668375640865324, + "grad_norm": 386.0, + "learning_rate": 4.164572302994804e-05, + "loss": 15.0005, + "step": 13599 + }, + { + "epoch": 0.5668792463840607, + "grad_norm": 150.0, + "learning_rate": 4.163906797866173e-05, + "loss": 9.1893, + "step": 13600 + }, + { + "epoch": 0.5669209286815889, + "grad_norm": 229.0, + "learning_rate": 4.163241307975417e-05, + "loss": 11.7502, + "step": 13601 + }, + { + "epoch": 0.5669626109791172, + "grad_norm": 286.0, + "learning_rate": 4.16257583333466e-05, + "loss": 12.9377, + "step": 13602 + }, + { + "epoch": 0.5670042932766454, + "grad_norm": 266.0, + "learning_rate": 4.161910373956035e-05, + "loss": 12.6879, + "step": 13603 + }, + { + "epoch": 0.5670459755741737, + "grad_norm": 418.0, + "learning_rate": 4.161244929851666e-05, + "loss": 14.7503, + "step": 13604 + }, + { + "epoch": 0.5670876578717019, + "grad_norm": 137.0, + "learning_rate": 4.160579501033683e-05, + "loss": 7.6877, + "step": 13605 + }, + { + "epoch": 0.5671293401692301, + "grad_norm": 370.0, + "learning_rate": 4.159914087514214e-05, + "loss": 15.1267, + "step": 13606 + }, + { + "epoch": 0.5671710224667583, + "grad_norm": 828.0, + "learning_rate": 4.1592486893053854e-05, + "loss": 24.7503, + "step": 13607 + }, + { + "epoch": 0.5672127047642866, + "grad_norm": 536.0, + "learning_rate": 4.158583306419322e-05, + "loss": 20.2513, + "step": 13608 + }, + { + "epoch": 0.5672543870618149, + "grad_norm": 290.0, + "learning_rate": 4.157917938868155e-05, + "loss": 12.7503, + "step": 13609 + }, + { + "epoch": 0.5672960693593431, + "grad_norm": 162.0, + "learning_rate": 4.157252586664006e-05, + "loss": 10.8127, + "step": 13610 + }, + { + "epoch": 0.5673377516568713, + "grad_norm": 816.0, + "learning_rate": 4.156587249819006e-05, + "loss": 21.1285, + "step": 13611 + }, + { + "epoch": 0.5673794339543996, + "grad_norm": 510.0, + "learning_rate": 4.155921928345276e-05, + "loss": 16.8767, + "step": 13612 + }, + { + "epoch": 0.5674211162519278, + "grad_norm": 324.0, + "learning_rate": 4.155256622254946e-05, + "loss": 12.3128, + "step": 13613 + }, + { + "epoch": 0.567462798549456, + "grad_norm": 210.0, + "learning_rate": 4.154591331560137e-05, + "loss": 11.438, + "step": 13614 + }, + { + "epoch": 0.5675044808469842, + "grad_norm": 378.0, + "learning_rate": 4.153926056272978e-05, + "loss": 14.5627, + "step": 13615 + }, + { + "epoch": 0.5675461631445126, + "grad_norm": 106.0, + "learning_rate": 4.15326079640559e-05, + "loss": 8.7505, + "step": 13616 + }, + { + "epoch": 0.5675878454420408, + "grad_norm": 202.0, + "learning_rate": 4.1525955519701e-05, + "loss": 11.813, + "step": 13617 + }, + { + "epoch": 0.567629527739569, + "grad_norm": 282.0, + "learning_rate": 4.1519303229786305e-05, + "loss": 11.6254, + "step": 13618 + }, + { + "epoch": 0.5676712100370972, + "grad_norm": 452.0, + "learning_rate": 4.1512651094433066e-05, + "loss": 17.0009, + "step": 13619 + }, + { + "epoch": 0.5677128923346255, + "grad_norm": 212.0, + "learning_rate": 4.1505999113762504e-05, + "loss": 12.1269, + "step": 13620 + }, + { + "epoch": 0.5677545746321537, + "grad_norm": 442.0, + "learning_rate": 4.149934728789589e-05, + "loss": 13.7504, + "step": 13621 + }, + { + "epoch": 0.5677962569296819, + "grad_norm": 672.0, + "learning_rate": 4.14926956169544e-05, + "loss": 20.3756, + "step": 13622 + }, + { + "epoch": 0.5678379392272102, + "grad_norm": 492.0, + "learning_rate": 4.148604410105931e-05, + "loss": 18.1254, + "step": 13623 + }, + { + "epoch": 0.5678796215247385, + "grad_norm": 552.0, + "learning_rate": 4.147939274033179e-05, + "loss": 19.0002, + "step": 13624 + }, + { + "epoch": 0.5679213038222667, + "grad_norm": 123.0, + "learning_rate": 4.147274153489313e-05, + "loss": 7.2819, + "step": 13625 + }, + { + "epoch": 0.5679629861197949, + "grad_norm": 386.0, + "learning_rate": 4.146609048486449e-05, + "loss": 14.5003, + "step": 13626 + }, + { + "epoch": 0.5680046684173231, + "grad_norm": 228.0, + "learning_rate": 4.145943959036712e-05, + "loss": 11.6252, + "step": 13627 + }, + { + "epoch": 0.5680463507148514, + "grad_norm": 79.5, + "learning_rate": 4.1452788851522206e-05, + "loss": 7.0631, + "step": 13628 + }, + { + "epoch": 0.5680880330123796, + "grad_norm": 118.5, + "learning_rate": 4.1446138268450986e-05, + "loss": 9.3131, + "step": 13629 + }, + { + "epoch": 0.5681297153099079, + "grad_norm": 214.0, + "learning_rate": 4.1439487841274654e-05, + "loss": 11.8752, + "step": 13630 + }, + { + "epoch": 0.5681713976074361, + "grad_norm": 580.0, + "learning_rate": 4.143283757011442e-05, + "loss": 17.1253, + "step": 13631 + }, + { + "epoch": 0.5682130799049644, + "grad_norm": 540.0, + "learning_rate": 4.142618745509147e-05, + "loss": 17.1284, + "step": 13632 + }, + { + "epoch": 0.5682547622024926, + "grad_norm": 304.0, + "learning_rate": 4.1419537496327026e-05, + "loss": 14.3755, + "step": 13633 + }, + { + "epoch": 0.5682964445000208, + "grad_norm": 278.0, + "learning_rate": 4.141288769394226e-05, + "loss": 13.0627, + "step": 13634 + }, + { + "epoch": 0.568338126797549, + "grad_norm": 600.0, + "learning_rate": 4.14062380480584e-05, + "loss": 23.3752, + "step": 13635 + }, + { + "epoch": 0.5683798090950773, + "grad_norm": 1304.0, + "learning_rate": 4.139958855879659e-05, + "loss": 27.1307, + "step": 13636 + }, + { + "epoch": 0.5684214913926056, + "grad_norm": 226.0, + "learning_rate": 4.1392939226278063e-05, + "loss": 12.1879, + "step": 13637 + }, + { + "epoch": 0.5684631736901338, + "grad_norm": 776.0, + "learning_rate": 4.138629005062397e-05, + "loss": 19.0034, + "step": 13638 + }, + { + "epoch": 0.5685048559876621, + "grad_norm": 254.0, + "learning_rate": 4.1379641031955515e-05, + "loss": 13.4379, + "step": 13639 + }, + { + "epoch": 0.5685465382851903, + "grad_norm": 380.0, + "learning_rate": 4.137299217039385e-05, + "loss": 14.7503, + "step": 13640 + }, + { + "epoch": 0.5685882205827185, + "grad_norm": 253.0, + "learning_rate": 4.136634346606019e-05, + "loss": 12.0637, + "step": 13641 + }, + { + "epoch": 0.5686299028802467, + "grad_norm": 100.0, + "learning_rate": 4.135969491907567e-05, + "loss": 9.5004, + "step": 13642 + }, + { + "epoch": 0.568671585177775, + "grad_norm": 640.0, + "learning_rate": 4.1353046529561476e-05, + "loss": 19.379, + "step": 13643 + }, + { + "epoch": 0.5687132674753033, + "grad_norm": 696.0, + "learning_rate": 4.134639829763879e-05, + "loss": 18.5002, + "step": 13644 + }, + { + "epoch": 0.5687549497728315, + "grad_norm": 205.0, + "learning_rate": 4.1339750223428755e-05, + "loss": 10.5627, + "step": 13645 + }, + { + "epoch": 0.5687966320703597, + "grad_norm": 568.0, + "learning_rate": 4.133310230705254e-05, + "loss": 18.0006, + "step": 13646 + }, + { + "epoch": 0.568838314367888, + "grad_norm": 342.0, + "learning_rate": 4.1326454548631316e-05, + "loss": 14.0003, + "step": 13647 + }, + { + "epoch": 0.5688799966654162, + "grad_norm": 338.0, + "learning_rate": 4.131980694828621e-05, + "loss": 13.9379, + "step": 13648 + }, + { + "epoch": 0.5689216789629444, + "grad_norm": 596.0, + "learning_rate": 4.131315950613841e-05, + "loss": 18.2508, + "step": 13649 + }, + { + "epoch": 0.5689633612604726, + "grad_norm": 1048.0, + "learning_rate": 4.1306512222309036e-05, + "loss": 26.3754, + "step": 13650 + }, + { + "epoch": 0.569005043558001, + "grad_norm": 221.0, + "learning_rate": 4.129986509691926e-05, + "loss": 12.0003, + "step": 13651 + }, + { + "epoch": 0.5690467258555292, + "grad_norm": 306.0, + "learning_rate": 4.12932181300902e-05, + "loss": 13.5002, + "step": 13652 + }, + { + "epoch": 0.5690884081530574, + "grad_norm": 576.0, + "learning_rate": 4.128657132194303e-05, + "loss": 17.0011, + "step": 13653 + }, + { + "epoch": 0.5691300904505856, + "grad_norm": 414.0, + "learning_rate": 4.1279924672598855e-05, + "loss": 16.501, + "step": 13654 + }, + { + "epoch": 0.5691717727481139, + "grad_norm": 340.0, + "learning_rate": 4.127327818217884e-05, + "loss": 14.3127, + "step": 13655 + }, + { + "epoch": 0.5692134550456421, + "grad_norm": 340.0, + "learning_rate": 4.12666318508041e-05, + "loss": 15.1255, + "step": 13656 + }, + { + "epoch": 0.5692551373431703, + "grad_norm": 224.0, + "learning_rate": 4.125998567859577e-05, + "loss": 12.1256, + "step": 13657 + }, + { + "epoch": 0.5692968196406986, + "grad_norm": 932.0, + "learning_rate": 4.125333966567497e-05, + "loss": 24.253, + "step": 13658 + }, + { + "epoch": 0.5693385019382269, + "grad_norm": 149.0, + "learning_rate": 4.1246693812162844e-05, + "loss": 10.8133, + "step": 13659 + }, + { + "epoch": 0.5693801842357551, + "grad_norm": 98.0, + "learning_rate": 4.1240048118180486e-05, + "loss": 9.0635, + "step": 13660 + }, + { + "epoch": 0.5694218665332833, + "grad_norm": 219.0, + "learning_rate": 4.1233402583849046e-05, + "loss": 10.7502, + "step": 13661 + }, + { + "epoch": 0.5694635488308115, + "grad_norm": 146.0, + "learning_rate": 4.12267572092896e-05, + "loss": 9.5007, + "step": 13662 + }, + { + "epoch": 0.5695052311283398, + "grad_norm": 126.0, + "learning_rate": 4.1220111994623314e-05, + "loss": 10.5002, + "step": 13663 + }, + { + "epoch": 0.569546913425868, + "grad_norm": 227.0, + "learning_rate": 4.121346693997123e-05, + "loss": 12.3752, + "step": 13664 + }, + { + "epoch": 0.5695885957233963, + "grad_norm": 700.0, + "learning_rate": 4.1206822045454526e-05, + "loss": 21.0002, + "step": 13665 + }, + { + "epoch": 0.5696302780209245, + "grad_norm": 780.0, + "learning_rate": 4.1200177311194236e-05, + "loss": 21.0038, + "step": 13666 + }, + { + "epoch": 0.5696719603184528, + "grad_norm": 444.0, + "learning_rate": 4.119353273731152e-05, + "loss": 16.3757, + "step": 13667 + }, + { + "epoch": 0.569713642615981, + "grad_norm": 186.0, + "learning_rate": 4.118688832392744e-05, + "loss": 10.1251, + "step": 13668 + }, + { + "epoch": 0.5697553249135092, + "grad_norm": 358.0, + "learning_rate": 4.118024407116311e-05, + "loss": 14.1878, + "step": 13669 + }, + { + "epoch": 0.5697970072110374, + "grad_norm": 324.0, + "learning_rate": 4.1173599979139606e-05, + "loss": 13.1879, + "step": 13670 + }, + { + "epoch": 0.5698386895085658, + "grad_norm": 276.0, + "learning_rate": 4.116695604797804e-05, + "loss": 12.1877, + "step": 13671 + }, + { + "epoch": 0.569880371806094, + "grad_norm": 700.0, + "learning_rate": 4.116031227779947e-05, + "loss": 20.8759, + "step": 13672 + }, + { + "epoch": 0.5699220541036222, + "grad_norm": 764.0, + "learning_rate": 4.115366866872501e-05, + "loss": 20.2543, + "step": 13673 + }, + { + "epoch": 0.5699637364011504, + "grad_norm": 352.0, + "learning_rate": 4.11470252208757e-05, + "loss": 14.4378, + "step": 13674 + }, + { + "epoch": 0.5700054186986787, + "grad_norm": 86.0, + "learning_rate": 4.114038193437267e-05, + "loss": 8.7502, + "step": 13675 + }, + { + "epoch": 0.5700471009962069, + "grad_norm": 60.5, + "learning_rate": 4.113373880933694e-05, + "loss": 8.313, + "step": 13676 + }, + { + "epoch": 0.5700887832937351, + "grad_norm": 85.5, + "learning_rate": 4.112709584588963e-05, + "loss": 7.5626, + "step": 13677 + }, + { + "epoch": 0.5701304655912633, + "grad_norm": 119.5, + "learning_rate": 4.112045304415176e-05, + "loss": 7.3755, + "step": 13678 + }, + { + "epoch": 0.5701721478887917, + "grad_norm": 52.75, + "learning_rate": 4.111381040424445e-05, + "loss": 7.844, + "step": 13679 + }, + { + "epoch": 0.5702138301863199, + "grad_norm": 61.0, + "learning_rate": 4.1107167926288704e-05, + "loss": 8.0001, + "step": 13680 + }, + { + "epoch": 0.5702555124838481, + "grad_norm": 354.0, + "learning_rate": 4.110052561040563e-05, + "loss": 13.0641, + "step": 13681 + }, + { + "epoch": 0.5702971947813763, + "grad_norm": 458.0, + "learning_rate": 4.109388345671625e-05, + "loss": 18.5005, + "step": 13682 + }, + { + "epoch": 0.5703388770789046, + "grad_norm": 203.0, + "learning_rate": 4.108724146534167e-05, + "loss": 10.1254, + "step": 13683 + }, + { + "epoch": 0.5703805593764328, + "grad_norm": 208.0, + "learning_rate": 4.1080599636402875e-05, + "loss": 11.3127, + "step": 13684 + }, + { + "epoch": 0.570422241673961, + "grad_norm": 127.0, + "learning_rate": 4.107395797002096e-05, + "loss": 8.6253, + "step": 13685 + }, + { + "epoch": 0.5704639239714893, + "grad_norm": 358.0, + "learning_rate": 4.1067316466316936e-05, + "loss": 14.8752, + "step": 13686 + }, + { + "epoch": 0.5705056062690176, + "grad_norm": 398.0, + "learning_rate": 4.1060675125411884e-05, + "loss": 16.0007, + "step": 13687 + }, + { + "epoch": 0.5705472885665458, + "grad_norm": 166.0, + "learning_rate": 4.1054033947426796e-05, + "loss": 9.5004, + "step": 13688 + }, + { + "epoch": 0.570588970864074, + "grad_norm": 276.0, + "learning_rate": 4.104739293248276e-05, + "loss": 13.0003, + "step": 13689 + }, + { + "epoch": 0.5706306531616022, + "grad_norm": 97.0, + "learning_rate": 4.104075208070076e-05, + "loss": 7.1572, + "step": 13690 + }, + { + "epoch": 0.5706723354591305, + "grad_norm": 172.0, + "learning_rate": 4.1034111392201866e-05, + "loss": 11.0627, + "step": 13691 + }, + { + "epoch": 0.5707140177566588, + "grad_norm": 226.0, + "learning_rate": 4.102747086710708e-05, + "loss": 11.313, + "step": 13692 + }, + { + "epoch": 0.570755700054187, + "grad_norm": 116.0, + "learning_rate": 4.102083050553743e-05, + "loss": 9.5634, + "step": 13693 + }, + { + "epoch": 0.5707973823517152, + "grad_norm": 460.0, + "learning_rate": 4.101419030761395e-05, + "loss": 17.2503, + "step": 13694 + }, + { + "epoch": 0.5708390646492435, + "grad_norm": 1280.0, + "learning_rate": 4.100755027345764e-05, + "loss": 36.5177, + "step": 13695 + }, + { + "epoch": 0.5708807469467717, + "grad_norm": 222.0, + "learning_rate": 4.1000910403189516e-05, + "loss": 10.6877, + "step": 13696 + }, + { + "epoch": 0.5709224292442999, + "grad_norm": 201.0, + "learning_rate": 4.0994270696930624e-05, + "loss": 11.6252, + "step": 13697 + }, + { + "epoch": 0.5709641115418281, + "grad_norm": 400.0, + "learning_rate": 4.098763115480193e-05, + "loss": 15.1254, + "step": 13698 + }, + { + "epoch": 0.5710057938393565, + "grad_norm": 470.0, + "learning_rate": 4.098099177692448e-05, + "loss": 16.7514, + "step": 13699 + }, + { + "epoch": 0.5710474761368847, + "grad_norm": 258.0, + "learning_rate": 4.0974352563419226e-05, + "loss": 12.4378, + "step": 13700 + }, + { + "epoch": 0.5710891584344129, + "grad_norm": 624.0, + "learning_rate": 4.096771351440722e-05, + "loss": 19.3753, + "step": 13701 + }, + { + "epoch": 0.5711308407319411, + "grad_norm": 272.0, + "learning_rate": 4.0961074630009424e-05, + "loss": 13.1877, + "step": 13702 + }, + { + "epoch": 0.5711725230294694, + "grad_norm": 215.0, + "learning_rate": 4.095443591034686e-05, + "loss": 8.3754, + "step": 13703 + }, + { + "epoch": 0.5712142053269976, + "grad_norm": 233.0, + "learning_rate": 4.094779735554049e-05, + "loss": 13.1878, + "step": 13704 + }, + { + "epoch": 0.5712558876245258, + "grad_norm": 262.0, + "learning_rate": 4.0941158965711315e-05, + "loss": 12.1253, + "step": 13705 + }, + { + "epoch": 0.571297569922054, + "grad_norm": 146.0, + "learning_rate": 4.093452074098033e-05, + "loss": 8.3754, + "step": 13706 + }, + { + "epoch": 0.5713392522195824, + "grad_norm": 270.0, + "learning_rate": 4.092788268146851e-05, + "loss": 10.6878, + "step": 13707 + }, + { + "epoch": 0.5713809345171106, + "grad_norm": 221.0, + "learning_rate": 4.092124478729682e-05, + "loss": 12.0004, + "step": 13708 + }, + { + "epoch": 0.5714226168146388, + "grad_norm": 249.0, + "learning_rate": 4.0914607058586276e-05, + "loss": 12.8752, + "step": 13709 + }, + { + "epoch": 0.5714642991121671, + "grad_norm": 768.0, + "learning_rate": 4.090796949545779e-05, + "loss": 21.6257, + "step": 13710 + }, + { + "epoch": 0.5715059814096953, + "grad_norm": 149.0, + "learning_rate": 4.0901332098032403e-05, + "loss": 10.7502, + "step": 13711 + }, + { + "epoch": 0.5715476637072235, + "grad_norm": 424.0, + "learning_rate": 4.089469486643102e-05, + "loss": 16.1252, + "step": 13712 + }, + { + "epoch": 0.5715893460047518, + "grad_norm": 1104.0, + "learning_rate": 4.088805780077466e-05, + "loss": 29.1262, + "step": 13713 + }, + { + "epoch": 0.5716310283022801, + "grad_norm": 166.0, + "learning_rate": 4.088142090118422e-05, + "loss": 10.6251, + "step": 13714 + }, + { + "epoch": 0.5716727105998083, + "grad_norm": 382.0, + "learning_rate": 4.087478416778072e-05, + "loss": 12.2505, + "step": 13715 + }, + { + "epoch": 0.5717143928973365, + "grad_norm": 684.0, + "learning_rate": 4.086814760068507e-05, + "loss": 18.8754, + "step": 13716 + }, + { + "epoch": 0.5717560751948647, + "grad_norm": 652.0, + "learning_rate": 4.086151120001825e-05, + "loss": 20.0003, + "step": 13717 + }, + { + "epoch": 0.571797757492393, + "grad_norm": 258.0, + "learning_rate": 4.0854874965901204e-05, + "loss": 12.0631, + "step": 13718 + }, + { + "epoch": 0.5718394397899212, + "grad_norm": 223.0, + "learning_rate": 4.0848238898454864e-05, + "loss": 13.0003, + "step": 13719 + }, + { + "epoch": 0.5718811220874495, + "grad_norm": 280.0, + "learning_rate": 4.0841602997800174e-05, + "loss": 12.2505, + "step": 13720 + }, + { + "epoch": 0.5719228043849777, + "grad_norm": 249.0, + "learning_rate": 4.083496726405811e-05, + "loss": 11.2503, + "step": 13721 + }, + { + "epoch": 0.571964486682506, + "grad_norm": 308.0, + "learning_rate": 4.082833169734956e-05, + "loss": 12.6877, + "step": 13722 + }, + { + "epoch": 0.5720061689800342, + "grad_norm": 111.0, + "learning_rate": 4.08216962977955e-05, + "loss": 9.0636, + "step": 13723 + }, + { + "epoch": 0.5720478512775624, + "grad_norm": 310.0, + "learning_rate": 4.0815061065516814e-05, + "loss": 14.1255, + "step": 13724 + }, + { + "epoch": 0.5720895335750906, + "grad_norm": 312.0, + "learning_rate": 4.080842600063447e-05, + "loss": 13.8755, + "step": 13725 + }, + { + "epoch": 0.572131215872619, + "grad_norm": 208.0, + "learning_rate": 4.080179110326937e-05, + "loss": 11.8129, + "step": 13726 + }, + { + "epoch": 0.5721728981701472, + "grad_norm": 716.0, + "learning_rate": 4.079515637354246e-05, + "loss": 19.8752, + "step": 13727 + }, + { + "epoch": 0.5722145804676754, + "grad_norm": 292.0, + "learning_rate": 4.078852181157462e-05, + "loss": 12.3127, + "step": 13728 + }, + { + "epoch": 0.5722562627652036, + "grad_norm": 244.0, + "learning_rate": 4.078188741748681e-05, + "loss": 12.5002, + "step": 13729 + }, + { + "epoch": 0.5722979450627319, + "grad_norm": 103.0, + "learning_rate": 4.0775253191399895e-05, + "loss": 10.1881, + "step": 13730 + }, + { + "epoch": 0.5723396273602601, + "grad_norm": 888.0, + "learning_rate": 4.0768619133434835e-05, + "loss": 23.5007, + "step": 13731 + }, + { + "epoch": 0.5723813096577883, + "grad_norm": 302.0, + "learning_rate": 4.07619852437125e-05, + "loss": 13.6255, + "step": 13732 + }, + { + "epoch": 0.5724229919553165, + "grad_norm": 95.0, + "learning_rate": 4.07553515223538e-05, + "loss": 9.6256, + "step": 13733 + }, + { + "epoch": 0.5724646742528449, + "grad_norm": 472.0, + "learning_rate": 4.0748717969479635e-05, + "loss": 15.6256, + "step": 13734 + }, + { + "epoch": 0.5725063565503731, + "grad_norm": 156.0, + "learning_rate": 4.074208458521092e-05, + "loss": 9.438, + "step": 13735 + }, + { + "epoch": 0.5725480388479013, + "grad_norm": 69.5, + "learning_rate": 4.073545136966852e-05, + "loss": 7.9066, + "step": 13736 + }, + { + "epoch": 0.5725897211454295, + "grad_norm": 402.0, + "learning_rate": 4.072881832297336e-05, + "loss": 15.4376, + "step": 13737 + }, + { + "epoch": 0.5726314034429578, + "grad_norm": 336.0, + "learning_rate": 4.072218544524629e-05, + "loss": 12.8126, + "step": 13738 + }, + { + "epoch": 0.572673085740486, + "grad_norm": 348.0, + "learning_rate": 4.071555273660824e-05, + "loss": 13.4377, + "step": 13739 + }, + { + "epoch": 0.5727147680380142, + "grad_norm": 171.0, + "learning_rate": 4.0708920197180035e-05, + "loss": 9.5007, + "step": 13740 + }, + { + "epoch": 0.5727564503355425, + "grad_norm": 132.0, + "learning_rate": 4.070228782708261e-05, + "loss": 8.8758, + "step": 13741 + }, + { + "epoch": 0.5727981326330708, + "grad_norm": 304.0, + "learning_rate": 4.069565562643679e-05, + "loss": 14.8136, + "step": 13742 + }, + { + "epoch": 0.572839814930599, + "grad_norm": 205.0, + "learning_rate": 4.0689023595363486e-05, + "loss": 10.5003, + "step": 13743 + }, + { + "epoch": 0.5728814972281272, + "grad_norm": 274.0, + "learning_rate": 4.068239173398356e-05, + "loss": 12.5626, + "step": 13744 + }, + { + "epoch": 0.5729231795256554, + "grad_norm": 304.0, + "learning_rate": 4.0675760042417857e-05, + "loss": 14.0003, + "step": 13745 + }, + { + "epoch": 0.5729648618231837, + "grad_norm": 241.0, + "learning_rate": 4.0669128520787254e-05, + "loss": 12.3755, + "step": 13746 + }, + { + "epoch": 0.573006544120712, + "grad_norm": 185.0, + "learning_rate": 4.066249716921263e-05, + "loss": 11.938, + "step": 13747 + }, + { + "epoch": 0.5730482264182402, + "grad_norm": 160.0, + "learning_rate": 4.0655865987814806e-05, + "loss": 10.5003, + "step": 13748 + }, + { + "epoch": 0.5730899087157684, + "grad_norm": 474.0, + "learning_rate": 4.064923497671467e-05, + "loss": 17.1254, + "step": 13749 + }, + { + "epoch": 0.5731315910132967, + "grad_norm": 450.0, + "learning_rate": 4.0642604136033045e-05, + "loss": 16.5001, + "step": 13750 + }, + { + "epoch": 0.5731732733108249, + "grad_norm": 392.0, + "learning_rate": 4.06359734658908e-05, + "loss": 14.1252, + "step": 13751 + }, + { + "epoch": 0.5732149556083531, + "grad_norm": 181.0, + "learning_rate": 4.062934296640876e-05, + "loss": 10.4378, + "step": 13752 + }, + { + "epoch": 0.5732566379058813, + "grad_norm": 466.0, + "learning_rate": 4.062271263770779e-05, + "loss": 18.1254, + "step": 13753 + }, + { + "epoch": 0.5732983202034097, + "grad_norm": 844.0, + "learning_rate": 4.061608247990869e-05, + "loss": 19.3768, + "step": 13754 + }, + { + "epoch": 0.5733400025009379, + "grad_norm": 218.0, + "learning_rate": 4.0609452493132336e-05, + "loss": 10.4378, + "step": 13755 + }, + { + "epoch": 0.5733816847984661, + "grad_norm": 310.0, + "learning_rate": 4.0602822677499544e-05, + "loss": 13.1877, + "step": 13756 + }, + { + "epoch": 0.5734233670959943, + "grad_norm": 140.0, + "learning_rate": 4.059619303313115e-05, + "loss": 10.6251, + "step": 13757 + }, + { + "epoch": 0.5734650493935226, + "grad_norm": 406.0, + "learning_rate": 4.058956356014795e-05, + "loss": 14.7502, + "step": 13758 + }, + { + "epoch": 0.5735067316910508, + "grad_norm": 676.0, + "learning_rate": 4.058293425867081e-05, + "loss": 20.0005, + "step": 13759 + }, + { + "epoch": 0.573548413988579, + "grad_norm": 134.0, + "learning_rate": 4.057630512882051e-05, + "loss": 9.9379, + "step": 13760 + }, + { + "epoch": 0.5735900962861072, + "grad_norm": 560.0, + "learning_rate": 4.056967617071792e-05, + "loss": 18.3753, + "step": 13761 + }, + { + "epoch": 0.5736317785836356, + "grad_norm": 346.0, + "learning_rate": 4.056304738448378e-05, + "loss": 14.0627, + "step": 13762 + }, + { + "epoch": 0.5736734608811638, + "grad_norm": 254.0, + "learning_rate": 4.055641877023897e-05, + "loss": 11.5627, + "step": 13763 + }, + { + "epoch": 0.573715143178692, + "grad_norm": 460.0, + "learning_rate": 4.0549790328104245e-05, + "loss": 16.7504, + "step": 13764 + }, + { + "epoch": 0.5737568254762202, + "grad_norm": 312.0, + "learning_rate": 4.0543162058200445e-05, + "loss": 14.3753, + "step": 13765 + }, + { + "epoch": 0.5737985077737485, + "grad_norm": 444.0, + "learning_rate": 4.053653396064834e-05, + "loss": 13.5004, + "step": 13766 + }, + { + "epoch": 0.5738401900712767, + "grad_norm": 1352.0, + "learning_rate": 4.0529906035568766e-05, + "loss": 29.7505, + "step": 13767 + }, + { + "epoch": 0.573881872368805, + "grad_norm": 430.0, + "learning_rate": 4.0523278283082475e-05, + "loss": 16.2501, + "step": 13768 + }, + { + "epoch": 0.5739235546663332, + "grad_norm": 364.0, + "learning_rate": 4.051665070331028e-05, + "loss": 15.0004, + "step": 13769 + }, + { + "epoch": 0.5739652369638615, + "grad_norm": 214.0, + "learning_rate": 4.051002329637298e-05, + "loss": 12.3138, + "step": 13770 + }, + { + "epoch": 0.5740069192613897, + "grad_norm": 308.0, + "learning_rate": 4.050339606239134e-05, + "loss": 13.3129, + "step": 13771 + }, + { + "epoch": 0.5740486015589179, + "grad_norm": 592.0, + "learning_rate": 4.049676900148614e-05, + "loss": 17.8753, + "step": 13772 + }, + { + "epoch": 0.5740902838564461, + "grad_norm": 194.0, + "learning_rate": 4.049014211377819e-05, + "loss": 11.3127, + "step": 13773 + }, + { + "epoch": 0.5741319661539744, + "grad_norm": 103.5, + "learning_rate": 4.0483515399388226e-05, + "loss": 8.9376, + "step": 13774 + }, + { + "epoch": 0.5741736484515026, + "grad_norm": 636.0, + "learning_rate": 4.047688885843706e-05, + "loss": 18.13, + "step": 13775 + }, + { + "epoch": 0.5742153307490309, + "grad_norm": 135.0, + "learning_rate": 4.047026249104541e-05, + "loss": 9.8752, + "step": 13776 + }, + { + "epoch": 0.5742570130465591, + "grad_norm": 258.0, + "learning_rate": 4.04636362973341e-05, + "loss": 12.3127, + "step": 13777 + }, + { + "epoch": 0.5742986953440874, + "grad_norm": 101.5, + "learning_rate": 4.0457010277423846e-05, + "loss": 10.0003, + "step": 13778 + }, + { + "epoch": 0.5743403776416156, + "grad_norm": 354.0, + "learning_rate": 4.045038443143544e-05, + "loss": 11.7506, + "step": 13779 + }, + { + "epoch": 0.5743820599391438, + "grad_norm": 119.0, + "learning_rate": 4.044375875948961e-05, + "loss": 9.9378, + "step": 13780 + }, + { + "epoch": 0.574423742236672, + "grad_norm": 480.0, + "learning_rate": 4.0437133261707136e-05, + "loss": 16.3761, + "step": 13781 + }, + { + "epoch": 0.5744654245342004, + "grad_norm": 135.0, + "learning_rate": 4.0430507938208764e-05, + "loss": 10.3128, + "step": 13782 + }, + { + "epoch": 0.5745071068317286, + "grad_norm": 77.0, + "learning_rate": 4.042388278911523e-05, + "loss": 7.7505, + "step": 13783 + }, + { + "epoch": 0.5745487891292568, + "grad_norm": 600.0, + "learning_rate": 4.041725781454726e-05, + "loss": 19.001, + "step": 13784 + }, + { + "epoch": 0.5745904714267851, + "grad_norm": 456.0, + "learning_rate": 4.041063301462565e-05, + "loss": 16.5002, + "step": 13785 + }, + { + "epoch": 0.5746321537243133, + "grad_norm": 141.0, + "learning_rate": 4.040400838947108e-05, + "loss": 11.0632, + "step": 13786 + }, + { + "epoch": 0.5746738360218415, + "grad_norm": 956.0, + "learning_rate": 4.039738393920433e-05, + "loss": 23.1293, + "step": 13787 + }, + { + "epoch": 0.5747155183193697, + "grad_norm": 83.0, + "learning_rate": 4.0390759663946085e-05, + "loss": 7.9064, + "step": 13788 + }, + { + "epoch": 0.5747572006168981, + "grad_norm": 436.0, + "learning_rate": 4.038413556381713e-05, + "loss": 15.5627, + "step": 13789 + }, + { + "epoch": 0.5747988829144263, + "grad_norm": 276.0, + "learning_rate": 4.037751163893813e-05, + "loss": 14.1257, + "step": 13790 + }, + { + "epoch": 0.5748405652119545, + "grad_norm": 93.0, + "learning_rate": 4.0370887889429854e-05, + "loss": 8.188, + "step": 13791 + }, + { + "epoch": 0.5748822475094827, + "grad_norm": 888.0, + "learning_rate": 4.036426431541298e-05, + "loss": 23.626, + "step": 13792 + }, + { + "epoch": 0.574923929807011, + "grad_norm": 456.0, + "learning_rate": 4.035764091700826e-05, + "loss": 15.438, + "step": 13793 + }, + { + "epoch": 0.5749656121045392, + "grad_norm": 448.0, + "learning_rate": 4.035101769433639e-05, + "loss": 16.5001, + "step": 13794 + }, + { + "epoch": 0.5750072944020674, + "grad_norm": 668.0, + "learning_rate": 4.034439464751807e-05, + "loss": 17.7509, + "step": 13795 + }, + { + "epoch": 0.5750489766995956, + "grad_norm": 294.0, + "learning_rate": 4.033777177667401e-05, + "loss": 10.5006, + "step": 13796 + }, + { + "epoch": 0.575090658997124, + "grad_norm": 346.0, + "learning_rate": 4.0331149081924944e-05, + "loss": 16.2503, + "step": 13797 + }, + { + "epoch": 0.5751323412946522, + "grad_norm": 368.0, + "learning_rate": 4.032452656339151e-05, + "loss": 14.3127, + "step": 13798 + }, + { + "epoch": 0.5751740235921804, + "grad_norm": 640.0, + "learning_rate": 4.031790422119447e-05, + "loss": 18.5004, + "step": 13799 + }, + { + "epoch": 0.5752157058897086, + "grad_norm": 264.0, + "learning_rate": 4.031128205545447e-05, + "loss": 7.751, + "step": 13800 + }, + { + "epoch": 0.5752573881872369, + "grad_norm": 532.0, + "learning_rate": 4.030466006629222e-05, + "loss": 17.1285, + "step": 13801 + }, + { + "epoch": 0.5752990704847651, + "grad_norm": 512.0, + "learning_rate": 4.029803825382839e-05, + "loss": 17.3756, + "step": 13802 + }, + { + "epoch": 0.5753407527822934, + "grad_norm": 584.0, + "learning_rate": 4.029141661818369e-05, + "loss": 19.5019, + "step": 13803 + }, + { + "epoch": 0.5753824350798216, + "grad_norm": 189.0, + "learning_rate": 4.0284795159478764e-05, + "loss": 11.0004, + "step": 13804 + }, + { + "epoch": 0.5754241173773499, + "grad_norm": 516.0, + "learning_rate": 4.027817387783433e-05, + "loss": 17.5006, + "step": 13805 + }, + { + "epoch": 0.5754657996748781, + "grad_norm": 556.0, + "learning_rate": 4.0271552773371016e-05, + "loss": 18.8751, + "step": 13806 + }, + { + "epoch": 0.5755074819724063, + "grad_norm": 380.0, + "learning_rate": 4.0264931846209536e-05, + "loss": 14.9381, + "step": 13807 + }, + { + "epoch": 0.5755491642699345, + "grad_norm": 310.0, + "learning_rate": 4.025831109647052e-05, + "loss": 13.5011, + "step": 13808 + }, + { + "epoch": 0.5755908465674628, + "grad_norm": 612.0, + "learning_rate": 4.0251690524274674e-05, + "loss": 19.3751, + "step": 13809 + }, + { + "epoch": 0.575632528864991, + "grad_norm": 237.0, + "learning_rate": 4.024507012974261e-05, + "loss": 12.5014, + "step": 13810 + }, + { + "epoch": 0.5756742111625193, + "grad_norm": 278.0, + "learning_rate": 4.0238449912995026e-05, + "loss": 13.3134, + "step": 13811 + }, + { + "epoch": 0.5757158934600475, + "grad_norm": 209.0, + "learning_rate": 4.0231829874152546e-05, + "loss": 11.0003, + "step": 13812 + }, + { + "epoch": 0.5757575757575758, + "grad_norm": 374.0, + "learning_rate": 4.0225210013335846e-05, + "loss": 14.5627, + "step": 13813 + }, + { + "epoch": 0.575799258055104, + "grad_norm": 284.0, + "learning_rate": 4.021859033066554e-05, + "loss": 13.2503, + "step": 13814 + }, + { + "epoch": 0.5758409403526322, + "grad_norm": 105.0, + "learning_rate": 4.021197082626232e-05, + "loss": 8.6878, + "step": 13815 + }, + { + "epoch": 0.5758826226501604, + "grad_norm": 258.0, + "learning_rate": 4.020535150024678e-05, + "loss": 11.0629, + "step": 13816 + }, + { + "epoch": 0.5759243049476888, + "grad_norm": 145.0, + "learning_rate": 4.01987323527396e-05, + "loss": 10.1257, + "step": 13817 + }, + { + "epoch": 0.575965987245217, + "grad_norm": 360.0, + "learning_rate": 4.019211338386137e-05, + "loss": 13.5006, + "step": 13818 + }, + { + "epoch": 0.5760076695427452, + "grad_norm": 366.0, + "learning_rate": 4.018549459373275e-05, + "loss": 11.3129, + "step": 13819 + }, + { + "epoch": 0.5760493518402734, + "grad_norm": 864.0, + "learning_rate": 4.017887598247437e-05, + "loss": 20.7547, + "step": 13820 + }, + { + "epoch": 0.5760910341378017, + "grad_norm": 880.0, + "learning_rate": 4.017225755020685e-05, + "loss": 23.0015, + "step": 13821 + }, + { + "epoch": 0.5761327164353299, + "grad_norm": 340.0, + "learning_rate": 4.016563929705079e-05, + "loss": 14.0627, + "step": 13822 + }, + { + "epoch": 0.5761743987328581, + "grad_norm": 182.0, + "learning_rate": 4.0159021223126846e-05, + "loss": 9.6251, + "step": 13823 + }, + { + "epoch": 0.5762160810303864, + "grad_norm": 298.0, + "learning_rate": 4.0152403328555597e-05, + "loss": 11.8757, + "step": 13824 + }, + { + "epoch": 0.5762577633279147, + "grad_norm": 360.0, + "learning_rate": 4.01457856134577e-05, + "loss": 14.7509, + "step": 13825 + }, + { + "epoch": 0.5762994456254429, + "grad_norm": 251.0, + "learning_rate": 4.0139168077953705e-05, + "loss": 12.4378, + "step": 13826 + }, + { + "epoch": 0.5763411279229711, + "grad_norm": 217.0, + "learning_rate": 4.013255072216427e-05, + "loss": 8.0628, + "step": 13827 + }, + { + "epoch": 0.5763828102204993, + "grad_norm": 284.0, + "learning_rate": 4.0125933546209947e-05, + "loss": 12.8129, + "step": 13828 + }, + { + "epoch": 0.5764244925180276, + "grad_norm": 213.0, + "learning_rate": 4.01193165502114e-05, + "loss": 9.6881, + "step": 13829 + }, + { + "epoch": 0.5764661748155558, + "grad_norm": 1368.0, + "learning_rate": 4.0112699734289145e-05, + "loss": 27.0048, + "step": 13830 + }, + { + "epoch": 0.576507857113084, + "grad_norm": 296.0, + "learning_rate": 4.0106083098563836e-05, + "loss": 12.5006, + "step": 13831 + }, + { + "epoch": 0.5765495394106123, + "grad_norm": 230.0, + "learning_rate": 4.009946664315604e-05, + "loss": 11.6252, + "step": 13832 + }, + { + "epoch": 0.5765912217081406, + "grad_norm": 326.0, + "learning_rate": 4.0092850368186344e-05, + "loss": 14.0003, + "step": 13833 + }, + { + "epoch": 0.5766329040056688, + "grad_norm": 264.0, + "learning_rate": 4.0086234273775315e-05, + "loss": 12.8127, + "step": 13834 + }, + { + "epoch": 0.576674586303197, + "grad_norm": 340.0, + "learning_rate": 4.007961836004357e-05, + "loss": 14.188, + "step": 13835 + }, + { + "epoch": 0.5767162686007252, + "grad_norm": 478.0, + "learning_rate": 4.0073002627111637e-05, + "loss": 15.1884, + "step": 13836 + }, + { + "epoch": 0.5767579508982535, + "grad_norm": 556.0, + "learning_rate": 4.0066387075100134e-05, + "loss": 18.3753, + "step": 13837 + }, + { + "epoch": 0.5767996331957818, + "grad_norm": 536.0, + "learning_rate": 4.005977170412959e-05, + "loss": 16.7505, + "step": 13838 + }, + { + "epoch": 0.57684131549331, + "grad_norm": 336.0, + "learning_rate": 4.005315651432061e-05, + "loss": 13.3129, + "step": 13839 + }, + { + "epoch": 0.5768829977908382, + "grad_norm": 306.0, + "learning_rate": 4.004654150579371e-05, + "loss": 14.3126, + "step": 13840 + }, + { + "epoch": 0.5769246800883665, + "grad_norm": 358.0, + "learning_rate": 4.00399266786695e-05, + "loss": 13.0627, + "step": 13841 + }, + { + "epoch": 0.5769663623858947, + "grad_norm": 848.0, + "learning_rate": 4.003331203306849e-05, + "loss": 20.6284, + "step": 13842 + }, + { + "epoch": 0.5770080446834229, + "grad_norm": 286.0, + "learning_rate": 4.0026697569111265e-05, + "loss": 13.8752, + "step": 13843 + }, + { + "epoch": 0.5770497269809511, + "grad_norm": 452.0, + "learning_rate": 4.002008328691836e-05, + "loss": 16.8752, + "step": 13844 + }, + { + "epoch": 0.5770914092784795, + "grad_norm": 1160.0, + "learning_rate": 4.001346918661032e-05, + "loss": 27.3752, + "step": 13845 + }, + { + "epoch": 0.5771330915760077, + "grad_norm": 215.0, + "learning_rate": 4.000685526830768e-05, + "loss": 12.3133, + "step": 13846 + }, + { + "epoch": 0.5771747738735359, + "grad_norm": 468.0, + "learning_rate": 4.000024153213102e-05, + "loss": 14.8754, + "step": 13847 + }, + { + "epoch": 0.5772164561710641, + "grad_norm": 175.0, + "learning_rate": 3.9993627978200814e-05, + "loss": 10.2505, + "step": 13848 + }, + { + "epoch": 0.5772581384685924, + "grad_norm": 232.0, + "learning_rate": 3.998701460663765e-05, + "loss": 11.6252, + "step": 13849 + }, + { + "epoch": 0.5772998207661206, + "grad_norm": 432.0, + "learning_rate": 3.998040141756202e-05, + "loss": 15.8752, + "step": 13850 + }, + { + "epoch": 0.5773415030636488, + "grad_norm": 251.0, + "learning_rate": 3.997378841109448e-05, + "loss": 12.8128, + "step": 13851 + }, + { + "epoch": 0.577383185361177, + "grad_norm": 207.0, + "learning_rate": 3.996717558735551e-05, + "loss": 12.6258, + "step": 13852 + }, + { + "epoch": 0.5774248676587054, + "grad_norm": 1296.0, + "learning_rate": 3.9960562946465685e-05, + "loss": 30.5005, + "step": 13853 + }, + { + "epoch": 0.5774665499562336, + "grad_norm": 704.0, + "learning_rate": 3.9953950488545464e-05, + "loss": 19.6253, + "step": 13854 + }, + { + "epoch": 0.5775082322537618, + "grad_norm": 45.5, + "learning_rate": 3.994733821371541e-05, + "loss": 7.6568, + "step": 13855 + }, + { + "epoch": 0.5775499145512901, + "grad_norm": 502.0, + "learning_rate": 3.994072612209599e-05, + "loss": 16.6295, + "step": 13856 + }, + { + "epoch": 0.5775915968488183, + "grad_norm": 1096.0, + "learning_rate": 3.993411421380774e-05, + "loss": 28.1251, + "step": 13857 + }, + { + "epoch": 0.5776332791463465, + "grad_norm": 444.0, + "learning_rate": 3.9927502488971154e-05, + "loss": 15.6876, + "step": 13858 + }, + { + "epoch": 0.5776749614438748, + "grad_norm": 106.5, + "learning_rate": 3.992089094770672e-05, + "loss": 8.8128, + "step": 13859 + }, + { + "epoch": 0.5777166437414031, + "grad_norm": 262.0, + "learning_rate": 3.9914279590134936e-05, + "loss": 12.9398, + "step": 13860 + }, + { + "epoch": 0.5777583260389313, + "grad_norm": 215.0, + "learning_rate": 3.9907668416376335e-05, + "loss": 13.3131, + "step": 13861 + }, + { + "epoch": 0.5778000083364595, + "grad_norm": 238.0, + "learning_rate": 3.9901057426551344e-05, + "loss": 11.6252, + "step": 13862 + }, + { + "epoch": 0.5778416906339877, + "grad_norm": 328.0, + "learning_rate": 3.9894446620780494e-05, + "loss": 13.3763, + "step": 13863 + }, + { + "epoch": 0.577883372931516, + "grad_norm": 320.0, + "learning_rate": 3.988783599918424e-05, + "loss": 14.4376, + "step": 13864 + }, + { + "epoch": 0.5779250552290442, + "grad_norm": 544.0, + "learning_rate": 3.988122556188308e-05, + "loss": 17.0002, + "step": 13865 + }, + { + "epoch": 0.5779667375265725, + "grad_norm": 244.0, + "learning_rate": 3.987461530899747e-05, + "loss": 11.5002, + "step": 13866 + }, + { + "epoch": 0.5780084198241007, + "grad_norm": 254.0, + "learning_rate": 3.986800524064792e-05, + "loss": 13.1254, + "step": 13867 + }, + { + "epoch": 0.578050102121629, + "grad_norm": 296.0, + "learning_rate": 3.9861395356954846e-05, + "loss": 13.0627, + "step": 13868 + }, + { + "epoch": 0.5780917844191572, + "grad_norm": 354.0, + "learning_rate": 3.985478565803875e-05, + "loss": 13.6253, + "step": 13869 + }, + { + "epoch": 0.5781334667166854, + "grad_norm": 310.0, + "learning_rate": 3.9848176144020094e-05, + "loss": 13.4377, + "step": 13870 + }, + { + "epoch": 0.5781751490142136, + "grad_norm": 278.0, + "learning_rate": 3.9841566815019325e-05, + "loss": 13.0002, + "step": 13871 + }, + { + "epoch": 0.578216831311742, + "grad_norm": 784.0, + "learning_rate": 3.983495767115689e-05, + "loss": 23.8752, + "step": 13872 + }, + { + "epoch": 0.5782585136092702, + "grad_norm": 1080.0, + "learning_rate": 3.9828348712553284e-05, + "loss": 28.0003, + "step": 13873 + }, + { + "epoch": 0.5783001959067984, + "grad_norm": 264.0, + "learning_rate": 3.9821739939328895e-05, + "loss": 12.1277, + "step": 13874 + }, + { + "epoch": 0.5783418782043266, + "grad_norm": 81.5, + "learning_rate": 3.9815131351604226e-05, + "loss": 8.5627, + "step": 13875 + }, + { + "epoch": 0.5783835605018549, + "grad_norm": 318.0, + "learning_rate": 3.980852294949966e-05, + "loss": 12.2503, + "step": 13876 + }, + { + "epoch": 0.5784252427993831, + "grad_norm": 524.0, + "learning_rate": 3.98019147331357e-05, + "loss": 17.2502, + "step": 13877 + }, + { + "epoch": 0.5784669250969113, + "grad_norm": 264.0, + "learning_rate": 3.979530670263273e-05, + "loss": 10.1879, + "step": 13878 + }, + { + "epoch": 0.5785086073944395, + "grad_norm": 364.0, + "learning_rate": 3.978869885811122e-05, + "loss": 15.751, + "step": 13879 + }, + { + "epoch": 0.5785502896919679, + "grad_norm": 272.0, + "learning_rate": 3.978209119969155e-05, + "loss": 11.8753, + "step": 13880 + }, + { + "epoch": 0.5785919719894961, + "grad_norm": 207.0, + "learning_rate": 3.977548372749419e-05, + "loss": 11.8127, + "step": 13881 + }, + { + "epoch": 0.5786336542870243, + "grad_norm": 223.0, + "learning_rate": 3.976887644163955e-05, + "loss": 13.3757, + "step": 13882 + }, + { + "epoch": 0.5786753365845525, + "grad_norm": 516.0, + "learning_rate": 3.9762269342248046e-05, + "loss": 18.0002, + "step": 13883 + }, + { + "epoch": 0.5787170188820808, + "grad_norm": 1416.0, + "learning_rate": 3.975566242944008e-05, + "loss": 29.129, + "step": 13884 + }, + { + "epoch": 0.578758701179609, + "grad_norm": 472.0, + "learning_rate": 3.974905570333609e-05, + "loss": 16.0008, + "step": 13885 + }, + { + "epoch": 0.5788003834771372, + "grad_norm": 224.0, + "learning_rate": 3.974244916405646e-05, + "loss": 11.6253, + "step": 13886 + }, + { + "epoch": 0.5788420657746655, + "grad_norm": 177.0, + "learning_rate": 3.9735842811721616e-05, + "loss": 10.3752, + "step": 13887 + }, + { + "epoch": 0.5788837480721938, + "grad_norm": 528.0, + "learning_rate": 3.972923664645193e-05, + "loss": 17.5011, + "step": 13888 + }, + { + "epoch": 0.578925430369722, + "grad_norm": 458.0, + "learning_rate": 3.972263066836784e-05, + "loss": 15.626, + "step": 13889 + }, + { + "epoch": 0.5789671126672502, + "grad_norm": 896.0, + "learning_rate": 3.9716024877589704e-05, + "loss": 23.005, + "step": 13890 + }, + { + "epoch": 0.5790087949647784, + "grad_norm": 80.5, + "learning_rate": 3.970941927423794e-05, + "loss": 8.6878, + "step": 13891 + }, + { + "epoch": 0.5790504772623067, + "grad_norm": 384.0, + "learning_rate": 3.970281385843291e-05, + "loss": 15.2502, + "step": 13892 + }, + { + "epoch": 0.579092159559835, + "grad_norm": 728.0, + "learning_rate": 3.969620863029502e-05, + "loss": 22.0003, + "step": 13893 + }, + { + "epoch": 0.5791338418573632, + "grad_norm": 502.0, + "learning_rate": 3.968960358994463e-05, + "loss": 18.0005, + "step": 13894 + }, + { + "epoch": 0.5791755241548914, + "grad_norm": 158.0, + "learning_rate": 3.968299873750214e-05, + "loss": 10.0628, + "step": 13895 + }, + { + "epoch": 0.5792172064524197, + "grad_norm": 442.0, + "learning_rate": 3.9676394073087914e-05, + "loss": 16.5004, + "step": 13896 + }, + { + "epoch": 0.5792588887499479, + "grad_norm": 146.0, + "learning_rate": 3.966978959682232e-05, + "loss": 11.6258, + "step": 13897 + }, + { + "epoch": 0.5793005710474761, + "grad_norm": 1152.0, + "learning_rate": 3.9663185308825714e-05, + "loss": 22.755, + "step": 13898 + }, + { + "epoch": 0.5793422533450043, + "grad_norm": 892.0, + "learning_rate": 3.9656581209218504e-05, + "loss": 25.5015, + "step": 13899 + }, + { + "epoch": 0.5793839356425327, + "grad_norm": 752.0, + "learning_rate": 3.964997729812099e-05, + "loss": 20.1256, + "step": 13900 + }, + { + "epoch": 0.5794256179400609, + "grad_norm": 324.0, + "learning_rate": 3.9643373575653586e-05, + "loss": 13.8752, + "step": 13901 + }, + { + "epoch": 0.5794673002375891, + "grad_norm": 548.0, + "learning_rate": 3.9636770041936585e-05, + "loss": 17.7501, + "step": 13902 + }, + { + "epoch": 0.5795089825351173, + "grad_norm": 462.0, + "learning_rate": 3.963016669709041e-05, + "loss": 17.5005, + "step": 13903 + }, + { + "epoch": 0.5795506648326456, + "grad_norm": 223.0, + "learning_rate": 3.9623563541235334e-05, + "loss": 10.5003, + "step": 13904 + }, + { + "epoch": 0.5795923471301738, + "grad_norm": 1768.0, + "learning_rate": 3.9616960574491756e-05, + "loss": 34.5032, + "step": 13905 + }, + { + "epoch": 0.579634029427702, + "grad_norm": 512.0, + "learning_rate": 3.9610357796979966e-05, + "loss": 17.7502, + "step": 13906 + }, + { + "epoch": 0.5796757117252302, + "grad_norm": 191.0, + "learning_rate": 3.9603755208820346e-05, + "loss": 12.3132, + "step": 13907 + }, + { + "epoch": 0.5797173940227586, + "grad_norm": 516.0, + "learning_rate": 3.9597152810133214e-05, + "loss": 16.3753, + "step": 13908 + }, + { + "epoch": 0.5797590763202868, + "grad_norm": 568.0, + "learning_rate": 3.959055060103889e-05, + "loss": 18.3754, + "step": 13909 + }, + { + "epoch": 0.579800758617815, + "grad_norm": 304.0, + "learning_rate": 3.95839485816577e-05, + "loss": 13.126, + "step": 13910 + }, + { + "epoch": 0.5798424409153432, + "grad_norm": 612.0, + "learning_rate": 3.957734675210999e-05, + "loss": 19.2505, + "step": 13911 + }, + { + "epoch": 0.5798841232128715, + "grad_norm": 316.0, + "learning_rate": 3.9570745112516035e-05, + "loss": 12.9379, + "step": 13912 + }, + { + "epoch": 0.5799258055103997, + "grad_norm": 418.0, + "learning_rate": 3.95641436629962e-05, + "loss": 16.7501, + "step": 13913 + }, + { + "epoch": 0.579967487807928, + "grad_norm": 376.0, + "learning_rate": 3.955754240367075e-05, + "loss": 14.5022, + "step": 13914 + }, + { + "epoch": 0.5800091701054562, + "grad_norm": 1280.0, + "learning_rate": 3.955094133466004e-05, + "loss": 26.5044, + "step": 13915 + }, + { + "epoch": 0.5800508524029845, + "grad_norm": 193.0, + "learning_rate": 3.954434045608433e-05, + "loss": 11.7501, + "step": 13916 + }, + { + "epoch": 0.5800925347005127, + "grad_norm": 150.0, + "learning_rate": 3.953773976806397e-05, + "loss": 11.3127, + "step": 13917 + }, + { + "epoch": 0.5801342169980409, + "grad_norm": 408.0, + "learning_rate": 3.95311392707192e-05, + "loss": 14.438, + "step": 13918 + }, + { + "epoch": 0.5801758992955691, + "grad_norm": 336.0, + "learning_rate": 3.952453896417037e-05, + "loss": 14.563, + "step": 13919 + }, + { + "epoch": 0.5802175815930974, + "grad_norm": 354.0, + "learning_rate": 3.951793884853773e-05, + "loss": 14.6253, + "step": 13920 + }, + { + "epoch": 0.5802592638906257, + "grad_norm": 266.0, + "learning_rate": 3.95113389239416e-05, + "loss": 12.6254, + "step": 13921 + }, + { + "epoch": 0.5803009461881539, + "grad_norm": 272.0, + "learning_rate": 3.950473919050223e-05, + "loss": 11.6879, + "step": 13922 + }, + { + "epoch": 0.5803426284856821, + "grad_norm": 406.0, + "learning_rate": 3.949813964833995e-05, + "loss": 15.3759, + "step": 13923 + }, + { + "epoch": 0.5803843107832104, + "grad_norm": 756.0, + "learning_rate": 3.949154029757498e-05, + "loss": 20.1292, + "step": 13924 + }, + { + "epoch": 0.5804259930807386, + "grad_norm": 165.0, + "learning_rate": 3.948494113832764e-05, + "loss": 9.3751, + "step": 13925 + }, + { + "epoch": 0.5804676753782668, + "grad_norm": 528.0, + "learning_rate": 3.947834217071816e-05, + "loss": 17.1254, + "step": 13926 + }, + { + "epoch": 0.580509357675795, + "grad_norm": 712.0, + "learning_rate": 3.947174339486685e-05, + "loss": 21.2501, + "step": 13927 + }, + { + "epoch": 0.5805510399733234, + "grad_norm": 454.0, + "learning_rate": 3.946514481089394e-05, + "loss": 14.7514, + "step": 13928 + }, + { + "epoch": 0.5805927222708516, + "grad_norm": 157.0, + "learning_rate": 3.9458546418919715e-05, + "loss": 10.0004, + "step": 13929 + }, + { + "epoch": 0.5806344045683798, + "grad_norm": 151.0, + "learning_rate": 3.9451948219064396e-05, + "loss": 10.313, + "step": 13930 + }, + { + "epoch": 0.5806760868659081, + "grad_norm": 206.0, + "learning_rate": 3.9445350211448274e-05, + "loss": 11.8755, + "step": 13931 + }, + { + "epoch": 0.5807177691634363, + "grad_norm": 1576.0, + "learning_rate": 3.9438752396191565e-05, + "loss": 31.754, + "step": 13932 + }, + { + "epoch": 0.5807594514609645, + "grad_norm": 748.0, + "learning_rate": 3.9432154773414535e-05, + "loss": 21.6252, + "step": 13933 + }, + { + "epoch": 0.5808011337584927, + "grad_norm": 364.0, + "learning_rate": 3.9425557343237415e-05, + "loss": 16.2501, + "step": 13934 + }, + { + "epoch": 0.5808428160560211, + "grad_norm": 100.0, + "learning_rate": 3.941896010578048e-05, + "loss": 7.8753, + "step": 13935 + }, + { + "epoch": 0.5808844983535493, + "grad_norm": 454.0, + "learning_rate": 3.941236306116391e-05, + "loss": 17.2501, + "step": 13936 + }, + { + "epoch": 0.5809261806510775, + "grad_norm": 502.0, + "learning_rate": 3.9405766209507984e-05, + "loss": 17.3756, + "step": 13937 + }, + { + "epoch": 0.5809678629486057, + "grad_norm": 482.0, + "learning_rate": 3.9399169550932884e-05, + "loss": 18.2503, + "step": 13938 + }, + { + "epoch": 0.581009545246134, + "grad_norm": 147.0, + "learning_rate": 3.9392573085558885e-05, + "loss": 8.8753, + "step": 13939 + }, + { + "epoch": 0.5810512275436622, + "grad_norm": 540.0, + "learning_rate": 3.938597681350616e-05, + "loss": 16.3762, + "step": 13940 + }, + { + "epoch": 0.5810929098411904, + "grad_norm": 462.0, + "learning_rate": 3.937938073489498e-05, + "loss": 15.9387, + "step": 13941 + }, + { + "epoch": 0.5811345921387187, + "grad_norm": 167.0, + "learning_rate": 3.93727848498455e-05, + "loss": 8.5006, + "step": 13942 + }, + { + "epoch": 0.581176274436247, + "grad_norm": 404.0, + "learning_rate": 3.9366189158477987e-05, + "loss": 16.2507, + "step": 13943 + }, + { + "epoch": 0.5812179567337752, + "grad_norm": 476.0, + "learning_rate": 3.9359593660912584e-05, + "loss": 15.5662, + "step": 13944 + }, + { + "epoch": 0.5812596390313034, + "grad_norm": 249.0, + "learning_rate": 3.9352998357269555e-05, + "loss": 12.0004, + "step": 13945 + }, + { + "epoch": 0.5813013213288316, + "grad_norm": 161.0, + "learning_rate": 3.9346403247669074e-05, + "loss": 10.813, + "step": 13946 + }, + { + "epoch": 0.5813430036263599, + "grad_norm": 414.0, + "learning_rate": 3.9339808332231334e-05, + "loss": 15.3128, + "step": 13947 + }, + { + "epoch": 0.5813846859238881, + "grad_norm": 346.0, + "learning_rate": 3.933321361107653e-05, + "loss": 13.5641, + "step": 13948 + }, + { + "epoch": 0.5814263682214164, + "grad_norm": 488.0, + "learning_rate": 3.932661908432487e-05, + "loss": 17.0003, + "step": 13949 + }, + { + "epoch": 0.5814680505189446, + "grad_norm": 324.0, + "learning_rate": 3.9320024752096514e-05, + "loss": 14.7515, + "step": 13950 + }, + { + "epoch": 0.5815097328164729, + "grad_norm": 169.0, + "learning_rate": 3.931343061451167e-05, + "loss": 10.6881, + "step": 13951 + }, + { + "epoch": 0.5815514151140011, + "grad_norm": 416.0, + "learning_rate": 3.930683667169047e-05, + "loss": 17.6254, + "step": 13952 + }, + { + "epoch": 0.5815930974115293, + "grad_norm": 137.0, + "learning_rate": 3.9300242923753155e-05, + "loss": 9.7502, + "step": 13953 + }, + { + "epoch": 0.5816347797090575, + "grad_norm": 213.0, + "learning_rate": 3.9293649370819844e-05, + "loss": 10.5002, + "step": 13954 + }, + { + "epoch": 0.5816764620065858, + "grad_norm": 370.0, + "learning_rate": 3.9287056013010736e-05, + "loss": 14.6255, + "step": 13955 + }, + { + "epoch": 0.5817181443041141, + "grad_norm": 245.0, + "learning_rate": 3.928046285044596e-05, + "loss": 9.0629, + "step": 13956 + }, + { + "epoch": 0.5817598266016423, + "grad_norm": 198.0, + "learning_rate": 3.927386988324572e-05, + "loss": 10.8133, + "step": 13957 + }, + { + "epoch": 0.5818015088991705, + "grad_norm": 382.0, + "learning_rate": 3.926727711153015e-05, + "loss": 13.9376, + "step": 13958 + }, + { + "epoch": 0.5818431911966988, + "grad_norm": 448.0, + "learning_rate": 3.9260684535419404e-05, + "loss": 16.7502, + "step": 13959 + }, + { + "epoch": 0.581884873494227, + "grad_norm": 314.0, + "learning_rate": 3.9254092155033625e-05, + "loss": 13.8752, + "step": 13960 + }, + { + "epoch": 0.5819265557917552, + "grad_norm": 196.0, + "learning_rate": 3.9247499970493e-05, + "loss": 11.0627, + "step": 13961 + }, + { + "epoch": 0.5819682380892834, + "grad_norm": 552.0, + "learning_rate": 3.9240907981917616e-05, + "loss": 18.3755, + "step": 13962 + }, + { + "epoch": 0.5820099203868118, + "grad_norm": 848.0, + "learning_rate": 3.923431618942766e-05, + "loss": 23.0006, + "step": 13963 + }, + { + "epoch": 0.58205160268434, + "grad_norm": 712.0, + "learning_rate": 3.922772459314322e-05, + "loss": 23.1252, + "step": 13964 + }, + { + "epoch": 0.5820932849818682, + "grad_norm": 322.0, + "learning_rate": 3.922113319318449e-05, + "loss": 13.6257, + "step": 13965 + }, + { + "epoch": 0.5821349672793964, + "grad_norm": 392.0, + "learning_rate": 3.9214541989671535e-05, + "loss": 15.0633, + "step": 13966 + }, + { + "epoch": 0.5821766495769247, + "grad_norm": 432.0, + "learning_rate": 3.9207950982724527e-05, + "loss": 16.5004, + "step": 13967 + }, + { + "epoch": 0.5822183318744529, + "grad_norm": 404.0, + "learning_rate": 3.9201360172463556e-05, + "loss": 14.6252, + "step": 13968 + }, + { + "epoch": 0.5822600141719811, + "grad_norm": 404.0, + "learning_rate": 3.9194769559008767e-05, + "loss": 15.5633, + "step": 13969 + }, + { + "epoch": 0.5823016964695094, + "grad_norm": 168.0, + "learning_rate": 3.9188179142480254e-05, + "loss": 11.1253, + "step": 13970 + }, + { + "epoch": 0.5823433787670377, + "grad_norm": 126.0, + "learning_rate": 3.918158892299814e-05, + "loss": 9.3753, + "step": 13971 + }, + { + "epoch": 0.5823850610645659, + "grad_norm": 532.0, + "learning_rate": 3.917499890068252e-05, + "loss": 13.8791, + "step": 13972 + }, + { + "epoch": 0.5824267433620941, + "grad_norm": 350.0, + "learning_rate": 3.916840907565352e-05, + "loss": 14.3752, + "step": 13973 + }, + { + "epoch": 0.5824684256596223, + "grad_norm": 506.0, + "learning_rate": 3.9161819448031213e-05, + "loss": 16.3753, + "step": 13974 + }, + { + "epoch": 0.5825101079571506, + "grad_norm": 201.0, + "learning_rate": 3.915523001793573e-05, + "loss": 11.5635, + "step": 13975 + }, + { + "epoch": 0.5825517902546788, + "grad_norm": 804.0, + "learning_rate": 3.914864078548711e-05, + "loss": 21.3752, + "step": 13976 + }, + { + "epoch": 0.5825934725522071, + "grad_norm": 412.0, + "learning_rate": 3.914205175080551e-05, + "loss": 14.6884, + "step": 13977 + }, + { + "epoch": 0.5826351548497353, + "grad_norm": 366.0, + "learning_rate": 3.9135462914010954e-05, + "loss": 14.6283, + "step": 13978 + }, + { + "epoch": 0.5826768371472636, + "grad_norm": 524.0, + "learning_rate": 3.912887427522357e-05, + "loss": 14.255, + "step": 13979 + }, + { + "epoch": 0.5827185194447918, + "grad_norm": 528.0, + "learning_rate": 3.91222858345634e-05, + "loss": 18.8757, + "step": 13980 + }, + { + "epoch": 0.58276020174232, + "grad_norm": 644.0, + "learning_rate": 3.9115697592150556e-05, + "loss": 20.5007, + "step": 13981 + }, + { + "epoch": 0.5828018840398482, + "grad_norm": 213.0, + "learning_rate": 3.9109109548105065e-05, + "loss": 11.1877, + "step": 13982 + }, + { + "epoch": 0.5828435663373766, + "grad_norm": 119.5, + "learning_rate": 3.9102521702547034e-05, + "loss": 9.8132, + "step": 13983 + }, + { + "epoch": 0.5828852486349048, + "grad_norm": 528.0, + "learning_rate": 3.909593405559651e-05, + "loss": 18.2514, + "step": 13984 + }, + { + "epoch": 0.582926930932433, + "grad_norm": 125.5, + "learning_rate": 3.908934660737356e-05, + "loss": 9.4378, + "step": 13985 + }, + { + "epoch": 0.5829686132299612, + "grad_norm": 428.0, + "learning_rate": 3.9082759357998224e-05, + "loss": 15.8762, + "step": 13986 + }, + { + "epoch": 0.5830102955274895, + "grad_norm": 536.0, + "learning_rate": 3.907617230759059e-05, + "loss": 18.6255, + "step": 13987 + }, + { + "epoch": 0.5830519778250177, + "grad_norm": 189.0, + "learning_rate": 3.906958545627066e-05, + "loss": 11.0627, + "step": 13988 + }, + { + "epoch": 0.5830936601225459, + "grad_norm": 390.0, + "learning_rate": 3.906299880415853e-05, + "loss": 14.3134, + "step": 13989 + }, + { + "epoch": 0.5831353424200741, + "grad_norm": 260.0, + "learning_rate": 3.90564123513742e-05, + "loss": 11.6258, + "step": 13990 + }, + { + "epoch": 0.5831770247176025, + "grad_norm": 448.0, + "learning_rate": 3.904982609803773e-05, + "loss": 16.0003, + "step": 13991 + }, + { + "epoch": 0.5832187070151307, + "grad_norm": 237.0, + "learning_rate": 3.904324004426915e-05, + "loss": 12.0007, + "step": 13992 + }, + { + "epoch": 0.5832603893126589, + "grad_norm": 205.0, + "learning_rate": 3.903665419018851e-05, + "loss": 11.5628, + "step": 13993 + }, + { + "epoch": 0.5833020716101871, + "grad_norm": 322.0, + "learning_rate": 3.903006853591579e-05, + "loss": 13.1253, + "step": 13994 + }, + { + "epoch": 0.5833437539077154, + "grad_norm": 262.0, + "learning_rate": 3.9023483081571065e-05, + "loss": 12.7503, + "step": 13995 + }, + { + "epoch": 0.5833854362052436, + "grad_norm": 616.0, + "learning_rate": 3.901689782727433e-05, + "loss": 18.1254, + "step": 13996 + }, + { + "epoch": 0.5834271185027718, + "grad_norm": 1024.0, + "learning_rate": 3.9010312773145614e-05, + "loss": 27.1268, + "step": 13997 + }, + { + "epoch": 0.5834688008003001, + "grad_norm": 242.0, + "learning_rate": 3.900372791930491e-05, + "loss": 11.689, + "step": 13998 + }, + { + "epoch": 0.5835104830978284, + "grad_norm": 330.0, + "learning_rate": 3.899714326587226e-05, + "loss": 11.9379, + "step": 13999 + }, + { + "epoch": 0.5835521653953566, + "grad_norm": 145.0, + "learning_rate": 3.8990558812967624e-05, + "loss": 12.0629, + "step": 14000 + }, + { + "epoch": 0.5835938476928848, + "grad_norm": 161.0, + "learning_rate": 3.8983974560711065e-05, + "loss": 10.8128, + "step": 14001 + }, + { + "epoch": 0.5836355299904131, + "grad_norm": 302.0, + "learning_rate": 3.8977390509222516e-05, + "loss": 12.5626, + "step": 14002 + }, + { + "epoch": 0.5836772122879413, + "grad_norm": 173.0, + "learning_rate": 3.897080665862203e-05, + "loss": 5.1572, + "step": 14003 + }, + { + "epoch": 0.5837188945854695, + "grad_norm": 808.0, + "learning_rate": 3.8964223009029546e-05, + "loss": 20.5001, + "step": 14004 + }, + { + "epoch": 0.5837605768829978, + "grad_norm": 404.0, + "learning_rate": 3.89576395605651e-05, + "loss": 15.3753, + "step": 14005 + }, + { + "epoch": 0.5838022591805261, + "grad_norm": 280.0, + "learning_rate": 3.895105631334863e-05, + "loss": 13.1252, + "step": 14006 + }, + { + "epoch": 0.5838439414780543, + "grad_norm": 223.0, + "learning_rate": 3.894447326750016e-05, + "loss": 11.5626, + "step": 14007 + }, + { + "epoch": 0.5838856237755825, + "grad_norm": 258.0, + "learning_rate": 3.893789042313964e-05, + "loss": 11.0003, + "step": 14008 + }, + { + "epoch": 0.5839273060731107, + "grad_norm": 848.0, + "learning_rate": 3.893130778038705e-05, + "loss": 20.2504, + "step": 14009 + }, + { + "epoch": 0.583968988370639, + "grad_norm": 716.0, + "learning_rate": 3.8924725339362346e-05, + "loss": 21.7507, + "step": 14010 + }, + { + "epoch": 0.5840106706681673, + "grad_norm": 752.0, + "learning_rate": 3.891814310018552e-05, + "loss": 23.1252, + "step": 14011 + }, + { + "epoch": 0.5840523529656955, + "grad_norm": 364.0, + "learning_rate": 3.891156106297651e-05, + "loss": 15.3762, + "step": 14012 + }, + { + "epoch": 0.5840940352632237, + "grad_norm": 1112.0, + "learning_rate": 3.8904979227855295e-05, + "loss": 28.0015, + "step": 14013 + }, + { + "epoch": 0.584135717560752, + "grad_norm": 140.0, + "learning_rate": 3.889839759494181e-05, + "loss": 10.3128, + "step": 14014 + }, + { + "epoch": 0.5841773998582802, + "grad_norm": 266.0, + "learning_rate": 3.8891816164356026e-05, + "loss": 12.6878, + "step": 14015 + }, + { + "epoch": 0.5842190821558084, + "grad_norm": 492.0, + "learning_rate": 3.8885234936217864e-05, + "loss": 16.6255, + "step": 14016 + }, + { + "epoch": 0.5842607644533366, + "grad_norm": 380.0, + "learning_rate": 3.88786539106473e-05, + "loss": 14.8751, + "step": 14017 + }, + { + "epoch": 0.584302446750865, + "grad_norm": 205.0, + "learning_rate": 3.887207308776423e-05, + "loss": 11.5006, + "step": 14018 + }, + { + "epoch": 0.5843441290483932, + "grad_norm": 154.0, + "learning_rate": 3.886549246768864e-05, + "loss": 10.6267, + "step": 14019 + }, + { + "epoch": 0.5843858113459214, + "grad_norm": 432.0, + "learning_rate": 3.885891205054042e-05, + "loss": 16.2505, + "step": 14020 + }, + { + "epoch": 0.5844274936434496, + "grad_norm": 106.5, + "learning_rate": 3.8852331836439525e-05, + "loss": 8.8754, + "step": 14021 + }, + { + "epoch": 0.5844691759409779, + "grad_norm": 372.0, + "learning_rate": 3.884575182550586e-05, + "loss": 11.3779, + "step": 14022 + }, + { + "epoch": 0.5845108582385061, + "grad_norm": 80.5, + "learning_rate": 3.883917201785938e-05, + "loss": 9.6258, + "step": 14023 + }, + { + "epoch": 0.5845525405360343, + "grad_norm": 196.0, + "learning_rate": 3.883259241361996e-05, + "loss": 11.0002, + "step": 14024 + }, + { + "epoch": 0.5845942228335625, + "grad_norm": 278.0, + "learning_rate": 3.8826013012907555e-05, + "loss": 9.626, + "step": 14025 + }, + { + "epoch": 0.5846359051310909, + "grad_norm": 400.0, + "learning_rate": 3.881943381584204e-05, + "loss": 14.9378, + "step": 14026 + }, + { + "epoch": 0.5846775874286191, + "grad_norm": 490.0, + "learning_rate": 3.881285482254335e-05, + "loss": 15.316, + "step": 14027 + }, + { + "epoch": 0.5847192697261473, + "grad_norm": 207.0, + "learning_rate": 3.880627603313136e-05, + "loss": 9.0002, + "step": 14028 + }, + { + "epoch": 0.5847609520236755, + "grad_norm": 548.0, + "learning_rate": 3.8799697447725996e-05, + "loss": 18.7501, + "step": 14029 + }, + { + "epoch": 0.5848026343212038, + "grad_norm": 506.0, + "learning_rate": 3.8793119066447124e-05, + "loss": 16.1252, + "step": 14030 + }, + { + "epoch": 0.584844316618732, + "grad_norm": 260.0, + "learning_rate": 3.878654088941467e-05, + "loss": 12.3126, + "step": 14031 + }, + { + "epoch": 0.5848859989162603, + "grad_norm": 360.0, + "learning_rate": 3.877996291674848e-05, + "loss": 14.1876, + "step": 14032 + }, + { + "epoch": 0.5849276812137885, + "grad_norm": 224.0, + "learning_rate": 3.8773385148568484e-05, + "loss": 11.7505, + "step": 14033 + }, + { + "epoch": 0.5849693635113168, + "grad_norm": 368.0, + "learning_rate": 3.876680758499453e-05, + "loss": 14.2515, + "step": 14034 + }, + { + "epoch": 0.585011045808845, + "grad_norm": 348.0, + "learning_rate": 3.876023022614651e-05, + "loss": 14.6881, + "step": 14035 + }, + { + "epoch": 0.5850527281063732, + "grad_norm": 52.75, + "learning_rate": 3.875365307214428e-05, + "loss": 7.0006, + "step": 14036 + }, + { + "epoch": 0.5850944104039014, + "grad_norm": 268.0, + "learning_rate": 3.8747076123107736e-05, + "loss": 10.8753, + "step": 14037 + }, + { + "epoch": 0.5851360927014297, + "grad_norm": 412.0, + "learning_rate": 3.874049937915671e-05, + "loss": 15.0644, + "step": 14038 + }, + { + "epoch": 0.585177774998958, + "grad_norm": 500.0, + "learning_rate": 3.8733922840411113e-05, + "loss": 16.1258, + "step": 14039 + }, + { + "epoch": 0.5852194572964862, + "grad_norm": 394.0, + "learning_rate": 3.872734650699075e-05, + "loss": 13.9377, + "step": 14040 + }, + { + "epoch": 0.5852611395940144, + "grad_norm": 217.0, + "learning_rate": 3.8720770379015506e-05, + "loss": 11.8752, + "step": 14041 + }, + { + "epoch": 0.5853028218915427, + "grad_norm": 716.0, + "learning_rate": 3.871419445660521e-05, + "loss": 20.3754, + "step": 14042 + }, + { + "epoch": 0.5853445041890709, + "grad_norm": 155.0, + "learning_rate": 3.870761873987975e-05, + "loss": 9.8126, + "step": 14043 + }, + { + "epoch": 0.5853861864865991, + "grad_norm": 141.0, + "learning_rate": 3.8701043228958906e-05, + "loss": 10.6255, + "step": 14044 + }, + { + "epoch": 0.5854278687841273, + "grad_norm": 628.0, + "learning_rate": 3.869446792396257e-05, + "loss": 18.6258, + "step": 14045 + }, + { + "epoch": 0.5854695510816557, + "grad_norm": 268.0, + "learning_rate": 3.8687892825010556e-05, + "loss": 13.4382, + "step": 14046 + }, + { + "epoch": 0.5855112333791839, + "grad_norm": 202.0, + "learning_rate": 3.868131793222271e-05, + "loss": 11.0004, + "step": 14047 + }, + { + "epoch": 0.5855529156767121, + "grad_norm": 171.0, + "learning_rate": 3.8674743245718825e-05, + "loss": 7.595, + "step": 14048 + }, + { + "epoch": 0.5855945979742403, + "grad_norm": 184.0, + "learning_rate": 3.866816876561878e-05, + "loss": 10.0008, + "step": 14049 + }, + { + "epoch": 0.5856362802717686, + "grad_norm": 348.0, + "learning_rate": 3.866159449204233e-05, + "loss": 14.3753, + "step": 14050 + }, + { + "epoch": 0.5856779625692968, + "grad_norm": 226.0, + "learning_rate": 3.8655020425109366e-05, + "loss": 12.6877, + "step": 14051 + }, + { + "epoch": 0.585719644866825, + "grad_norm": 58.75, + "learning_rate": 3.8648446564939636e-05, + "loss": 8.1254, + "step": 14052 + }, + { + "epoch": 0.5857613271643533, + "grad_norm": 358.0, + "learning_rate": 3.864187291165299e-05, + "loss": 15.2503, + "step": 14053 + }, + { + "epoch": 0.5858030094618816, + "grad_norm": 227.0, + "learning_rate": 3.86352994653692e-05, + "loss": 11.9378, + "step": 14054 + }, + { + "epoch": 0.5858446917594098, + "grad_norm": 145.0, + "learning_rate": 3.8628726226208113e-05, + "loss": 10.4378, + "step": 14055 + }, + { + "epoch": 0.585886374056938, + "grad_norm": 458.0, + "learning_rate": 3.862215319428947e-05, + "loss": 15.7509, + "step": 14056 + }, + { + "epoch": 0.5859280563544662, + "grad_norm": 488.0, + "learning_rate": 3.861558036973312e-05, + "loss": 16.3752, + "step": 14057 + }, + { + "epoch": 0.5859697386519945, + "grad_norm": 258.0, + "learning_rate": 3.860900775265881e-05, + "loss": 12.3127, + "step": 14058 + }, + { + "epoch": 0.5860114209495227, + "grad_norm": 404.0, + "learning_rate": 3.860243534318635e-05, + "loss": 16.6266, + "step": 14059 + }, + { + "epoch": 0.586053103247051, + "grad_norm": 540.0, + "learning_rate": 3.8595863141435515e-05, + "loss": 16.1269, + "step": 14060 + }, + { + "epoch": 0.5860947855445792, + "grad_norm": 536.0, + "learning_rate": 3.858929114752611e-05, + "loss": 18.3752, + "step": 14061 + }, + { + "epoch": 0.5861364678421075, + "grad_norm": 536.0, + "learning_rate": 3.858271936157785e-05, + "loss": 17.5004, + "step": 14062 + }, + { + "epoch": 0.5861781501396357, + "grad_norm": 175.0, + "learning_rate": 3.8576147783710573e-05, + "loss": 12.0629, + "step": 14063 + }, + { + "epoch": 0.5862198324371639, + "grad_norm": 392.0, + "learning_rate": 3.8569576414044e-05, + "loss": 15.6877, + "step": 14064 + }, + { + "epoch": 0.5862615147346921, + "grad_norm": 172.0, + "learning_rate": 3.8563005252697925e-05, + "loss": 8.1883, + "step": 14065 + }, + { + "epoch": 0.5863031970322204, + "grad_norm": 370.0, + "learning_rate": 3.8556434299792074e-05, + "loss": 14.9382, + "step": 14066 + }, + { + "epoch": 0.5863448793297487, + "grad_norm": 91.5, + "learning_rate": 3.8549863555446253e-05, + "loss": 8.5633, + "step": 14067 + }, + { + "epoch": 0.5863865616272769, + "grad_norm": 908.0, + "learning_rate": 3.854329301978016e-05, + "loss": 23.6253, + "step": 14068 + }, + { + "epoch": 0.5864282439248051, + "grad_norm": 89.5, + "learning_rate": 3.853672269291359e-05, + "loss": 8.7514, + "step": 14069 + }, + { + "epoch": 0.5864699262223334, + "grad_norm": 198.0, + "learning_rate": 3.8530152574966244e-05, + "loss": 11.1255, + "step": 14070 + }, + { + "epoch": 0.5865116085198616, + "grad_norm": 151.0, + "learning_rate": 3.8523582666057896e-05, + "loss": 10.1253, + "step": 14071 + }, + { + "epoch": 0.5865532908173898, + "grad_norm": 180.0, + "learning_rate": 3.8517012966308275e-05, + "loss": 11.2505, + "step": 14072 + }, + { + "epoch": 0.586594973114918, + "grad_norm": 840.0, + "learning_rate": 3.85104434758371e-05, + "loss": 23.7511, + "step": 14073 + }, + { + "epoch": 0.5866366554124464, + "grad_norm": 338.0, + "learning_rate": 3.850387419476412e-05, + "loss": 13.938, + "step": 14074 + }, + { + "epoch": 0.5866783377099746, + "grad_norm": 282.0, + "learning_rate": 3.849730512320906e-05, + "loss": 14.0633, + "step": 14075 + }, + { + "epoch": 0.5867200200075028, + "grad_norm": 223.0, + "learning_rate": 3.8490736261291625e-05, + "loss": 11.3129, + "step": 14076 + }, + { + "epoch": 0.5867617023050311, + "grad_norm": 336.0, + "learning_rate": 3.848416760913156e-05, + "loss": 14.5628, + "step": 14077 + }, + { + "epoch": 0.5868033846025593, + "grad_norm": 324.0, + "learning_rate": 3.8477599166848546e-05, + "loss": 12.442, + "step": 14078 + }, + { + "epoch": 0.5868450669000875, + "grad_norm": 199.0, + "learning_rate": 3.8471030934562324e-05, + "loss": 10.5006, + "step": 14079 + }, + { + "epoch": 0.5868867491976157, + "grad_norm": 179.0, + "learning_rate": 3.846446291239257e-05, + "loss": 10.9378, + "step": 14080 + }, + { + "epoch": 0.5869284314951441, + "grad_norm": 69.5, + "learning_rate": 3.845789510045902e-05, + "loss": 8.3137, + "step": 14081 + }, + { + "epoch": 0.5869701137926723, + "grad_norm": 96.5, + "learning_rate": 3.845132749888134e-05, + "loss": 8.7503, + "step": 14082 + }, + { + "epoch": 0.5870117960902005, + "grad_norm": 748.0, + "learning_rate": 3.844476010777925e-05, + "loss": 22.6256, + "step": 14083 + }, + { + "epoch": 0.5870534783877287, + "grad_norm": 346.0, + "learning_rate": 3.843819292727243e-05, + "loss": 14.2541, + "step": 14084 + }, + { + "epoch": 0.587095160685257, + "grad_norm": 632.0, + "learning_rate": 3.843162595748058e-05, + "loss": 19.1251, + "step": 14085 + }, + { + "epoch": 0.5871368429827852, + "grad_norm": 117.5, + "learning_rate": 3.842505919852335e-05, + "loss": 10.4378, + "step": 14086 + }, + { + "epoch": 0.5871785252803134, + "grad_norm": 195.0, + "learning_rate": 3.841849265052048e-05, + "loss": 11.5003, + "step": 14087 + }, + { + "epoch": 0.5872202075778417, + "grad_norm": 280.0, + "learning_rate": 3.8411926313591575e-05, + "loss": 14.3754, + "step": 14088 + }, + { + "epoch": 0.58726188987537, + "grad_norm": 376.0, + "learning_rate": 3.8405360187856363e-05, + "loss": 14.5003, + "step": 14089 + }, + { + "epoch": 0.5873035721728982, + "grad_norm": 636.0, + "learning_rate": 3.839879427343448e-05, + "loss": 19.7504, + "step": 14090 + }, + { + "epoch": 0.5873452544704264, + "grad_norm": 278.0, + "learning_rate": 3.839222857044561e-05, + "loss": 12.7502, + "step": 14091 + }, + { + "epoch": 0.5873869367679546, + "grad_norm": 223.0, + "learning_rate": 3.838566307900939e-05, + "loss": 11.1257, + "step": 14092 + }, + { + "epoch": 0.5874286190654829, + "grad_norm": 235.0, + "learning_rate": 3.8379097799245515e-05, + "loss": 13.0627, + "step": 14093 + }, + { + "epoch": 0.5874703013630111, + "grad_norm": 280.0, + "learning_rate": 3.837253273127359e-05, + "loss": 11.7502, + "step": 14094 + }, + { + "epoch": 0.5875119836605394, + "grad_norm": 960.0, + "learning_rate": 3.836596787521331e-05, + "loss": 25.0003, + "step": 14095 + }, + { + "epoch": 0.5875536659580676, + "grad_norm": 544.0, + "learning_rate": 3.835940323118428e-05, + "loss": 18.3752, + "step": 14096 + }, + { + "epoch": 0.5875953482555959, + "grad_norm": 233.0, + "learning_rate": 3.8352838799306165e-05, + "loss": 11.0008, + "step": 14097 + }, + { + "epoch": 0.5876370305531241, + "grad_norm": 442.0, + "learning_rate": 3.834627457969859e-05, + "loss": 14.9388, + "step": 14098 + }, + { + "epoch": 0.5876787128506523, + "grad_norm": 209.0, + "learning_rate": 3.833971057248122e-05, + "loss": 9.5002, + "step": 14099 + }, + { + "epoch": 0.5877203951481805, + "grad_norm": 356.0, + "learning_rate": 3.833314677777363e-05, + "loss": 13.5002, + "step": 14100 + }, + { + "epoch": 0.5877620774457089, + "grad_norm": 76.5, + "learning_rate": 3.8326583195695504e-05, + "loss": 8.3755, + "step": 14101 + }, + { + "epoch": 0.5878037597432371, + "grad_norm": 1024.0, + "learning_rate": 3.832001982636641e-05, + "loss": 21.8789, + "step": 14102 + }, + { + "epoch": 0.5878454420407653, + "grad_norm": 242.0, + "learning_rate": 3.8313456669906016e-05, + "loss": 11.6877, + "step": 14103 + }, + { + "epoch": 0.5878871243382935, + "grad_norm": 308.0, + "learning_rate": 3.830689372643389e-05, + "loss": 13.5631, + "step": 14104 + }, + { + "epoch": 0.5879288066358218, + "grad_norm": 608.0, + "learning_rate": 3.830033099606968e-05, + "loss": 18.1252, + "step": 14105 + }, + { + "epoch": 0.58797048893335, + "grad_norm": 227.0, + "learning_rate": 3.829376847893296e-05, + "loss": 11.0006, + "step": 14106 + }, + { + "epoch": 0.5880121712308782, + "grad_norm": 237.0, + "learning_rate": 3.828720617514337e-05, + "loss": 12.4377, + "step": 14107 + }, + { + "epoch": 0.5880538535284064, + "grad_norm": 560.0, + "learning_rate": 3.828064408482046e-05, + "loss": 18.8752, + "step": 14108 + }, + { + "epoch": 0.5880955358259348, + "grad_norm": 456.0, + "learning_rate": 3.827408220808387e-05, + "loss": 15.1285, + "step": 14109 + }, + { + "epoch": 0.588137218123463, + "grad_norm": 318.0, + "learning_rate": 3.826752054505317e-05, + "loss": 13.7505, + "step": 14110 + }, + { + "epoch": 0.5881789004209912, + "grad_norm": 1424.0, + "learning_rate": 3.826095909584795e-05, + "loss": 25.5047, + "step": 14111 + }, + { + "epoch": 0.5882205827185194, + "grad_norm": 438.0, + "learning_rate": 3.825439786058778e-05, + "loss": 16.5002, + "step": 14112 + }, + { + "epoch": 0.5882622650160477, + "grad_norm": 404.0, + "learning_rate": 3.824783683939228e-05, + "loss": 14.6252, + "step": 14113 + }, + { + "epoch": 0.5883039473135759, + "grad_norm": 464.0, + "learning_rate": 3.824127603238096e-05, + "loss": 17.5006, + "step": 14114 + }, + { + "epoch": 0.5883456296111041, + "grad_norm": 528.0, + "learning_rate": 3.823471543967346e-05, + "loss": 17.6252, + "step": 14115 + }, + { + "epoch": 0.5883873119086324, + "grad_norm": 1456.0, + "learning_rate": 3.82281550613893e-05, + "loss": 29.0037, + "step": 14116 + }, + { + "epoch": 0.5884289942061607, + "grad_norm": 128.0, + "learning_rate": 3.822159489764807e-05, + "loss": 7.3442, + "step": 14117 + }, + { + "epoch": 0.5884706765036889, + "grad_norm": 704.0, + "learning_rate": 3.82150349485693e-05, + "loss": 17.3799, + "step": 14118 + }, + { + "epoch": 0.5885123588012171, + "grad_norm": 209.0, + "learning_rate": 3.8208475214272586e-05, + "loss": 11.1878, + "step": 14119 + }, + { + "epoch": 0.5885540410987453, + "grad_norm": 231.0, + "learning_rate": 3.8201915694877436e-05, + "loss": 9.8129, + "step": 14120 + }, + { + "epoch": 0.5885957233962736, + "grad_norm": 1576.0, + "learning_rate": 3.8195356390503436e-05, + "loss": 35.2504, + "step": 14121 + }, + { + "epoch": 0.5886374056938019, + "grad_norm": 688.0, + "learning_rate": 3.8188797301270105e-05, + "loss": 19.8789, + "step": 14122 + }, + { + "epoch": 0.5886790879913301, + "grad_norm": 95.0, + "learning_rate": 3.8182238427297004e-05, + "loss": 9.6886, + "step": 14123 + }, + { + "epoch": 0.5887207702888583, + "grad_norm": 189.0, + "learning_rate": 3.817567976870363e-05, + "loss": 9.6256, + "step": 14124 + }, + { + "epoch": 0.5887624525863866, + "grad_norm": 151.0, + "learning_rate": 3.816912132560957e-05, + "loss": 10.2503, + "step": 14125 + }, + { + "epoch": 0.5888041348839148, + "grad_norm": 556.0, + "learning_rate": 3.816256309813431e-05, + "loss": 15.5628, + "step": 14126 + }, + { + "epoch": 0.588845817181443, + "grad_norm": 720.0, + "learning_rate": 3.815600508639741e-05, + "loss": 19.1281, + "step": 14127 + }, + { + "epoch": 0.5888874994789712, + "grad_norm": 181.0, + "learning_rate": 3.8149447290518335e-05, + "loss": 11.0627, + "step": 14128 + }, + { + "epoch": 0.5889291817764996, + "grad_norm": 155.0, + "learning_rate": 3.8142889710616665e-05, + "loss": 10.1881, + "step": 14129 + }, + { + "epoch": 0.5889708640740278, + "grad_norm": 436.0, + "learning_rate": 3.813633234681186e-05, + "loss": 15.1264, + "step": 14130 + }, + { + "epoch": 0.589012546371556, + "grad_norm": 310.0, + "learning_rate": 3.8129775199223464e-05, + "loss": 12.1883, + "step": 14131 + }, + { + "epoch": 0.5890542286690842, + "grad_norm": 438.0, + "learning_rate": 3.812321826797096e-05, + "loss": 15.3752, + "step": 14132 + }, + { + "epoch": 0.5890959109666125, + "grad_norm": 255.0, + "learning_rate": 3.811666155317386e-05, + "loss": 12.7503, + "step": 14133 + }, + { + "epoch": 0.5891375932641407, + "grad_norm": 326.0, + "learning_rate": 3.811010505495166e-05, + "loss": 14.5004, + "step": 14134 + }, + { + "epoch": 0.5891792755616689, + "grad_norm": 410.0, + "learning_rate": 3.810354877342385e-05, + "loss": 15.8752, + "step": 14135 + }, + { + "epoch": 0.5892209578591971, + "grad_norm": 392.0, + "learning_rate": 3.8096992708709915e-05, + "loss": 15.3127, + "step": 14136 + }, + { + "epoch": 0.5892626401567255, + "grad_norm": 107.5, + "learning_rate": 3.8090436860929365e-05, + "loss": 7.2509, + "step": 14137 + }, + { + "epoch": 0.5893043224542537, + "grad_norm": 228.0, + "learning_rate": 3.808388123020163e-05, + "loss": 12.1253, + "step": 14138 + }, + { + "epoch": 0.5893460047517819, + "grad_norm": 508.0, + "learning_rate": 3.807732581664625e-05, + "loss": 16.8754, + "step": 14139 + }, + { + "epoch": 0.5893876870493101, + "grad_norm": 676.0, + "learning_rate": 3.807077062038264e-05, + "loss": 22.1252, + "step": 14140 + }, + { + "epoch": 0.5894293693468384, + "grad_norm": 544.0, + "learning_rate": 3.806421564153031e-05, + "loss": 17.8752, + "step": 14141 + }, + { + "epoch": 0.5894710516443666, + "grad_norm": 394.0, + "learning_rate": 3.80576608802087e-05, + "loss": 15.8752, + "step": 14142 + }, + { + "epoch": 0.5895127339418949, + "grad_norm": 173.0, + "learning_rate": 3.805110633653729e-05, + "loss": 10.6253, + "step": 14143 + }, + { + "epoch": 0.5895544162394231, + "grad_norm": 470.0, + "learning_rate": 3.804455201063551e-05, + "loss": 15.4385, + "step": 14144 + }, + { + "epoch": 0.5895960985369514, + "grad_norm": 133.0, + "learning_rate": 3.8037997902622855e-05, + "loss": 9.6884, + "step": 14145 + }, + { + "epoch": 0.5896377808344796, + "grad_norm": 242.0, + "learning_rate": 3.803144401261872e-05, + "loss": 12.0006, + "step": 14146 + }, + { + "epoch": 0.5896794631320078, + "grad_norm": 454.0, + "learning_rate": 3.80248903407426e-05, + "loss": 14.8752, + "step": 14147 + }, + { + "epoch": 0.5897211454295361, + "grad_norm": 316.0, + "learning_rate": 3.801833688711391e-05, + "loss": 14.942, + "step": 14148 + }, + { + "epoch": 0.5897628277270643, + "grad_norm": 564.0, + "learning_rate": 3.801178365185211e-05, + "loss": 18.3752, + "step": 14149 + }, + { + "epoch": 0.5898045100245926, + "grad_norm": 228.0, + "learning_rate": 3.8005230635076595e-05, + "loss": 11.0627, + "step": 14150 + }, + { + "epoch": 0.5898461923221208, + "grad_norm": 1216.0, + "learning_rate": 3.799867783690684e-05, + "loss": 28.3781, + "step": 14151 + }, + { + "epoch": 0.5898878746196491, + "grad_norm": 233.0, + "learning_rate": 3.799212525746222e-05, + "loss": 12.063, + "step": 14152 + }, + { + "epoch": 0.5899295569171773, + "grad_norm": 344.0, + "learning_rate": 3.798557289686221e-05, + "loss": 13.6261, + "step": 14153 + }, + { + "epoch": 0.5899712392147055, + "grad_norm": 346.0, + "learning_rate": 3.7979020755226175e-05, + "loss": 13.1256, + "step": 14154 + }, + { + "epoch": 0.5900129215122337, + "grad_norm": 296.0, + "learning_rate": 3.797246883267357e-05, + "loss": 11.8128, + "step": 14155 + }, + { + "epoch": 0.590054603809762, + "grad_norm": 255.0, + "learning_rate": 3.796591712932378e-05, + "loss": 12.8127, + "step": 14156 + }, + { + "epoch": 0.5900962861072903, + "grad_norm": 450.0, + "learning_rate": 3.7959365645296234e-05, + "loss": 15.1302, + "step": 14157 + }, + { + "epoch": 0.5901379684048185, + "grad_norm": 146.0, + "learning_rate": 3.79528143807103e-05, + "loss": 9.6254, + "step": 14158 + }, + { + "epoch": 0.5901796507023467, + "grad_norm": 740.0, + "learning_rate": 3.7946263335685396e-05, + "loss": 17.8783, + "step": 14159 + }, + { + "epoch": 0.590221332999875, + "grad_norm": 251.0, + "learning_rate": 3.793971251034092e-05, + "loss": 13.188, + "step": 14160 + }, + { + "epoch": 0.5902630152974032, + "grad_norm": 232.0, + "learning_rate": 3.793316190479625e-05, + "loss": 9.438, + "step": 14161 + }, + { + "epoch": 0.5903046975949314, + "grad_norm": 856.0, + "learning_rate": 3.7926611519170766e-05, + "loss": 22.3753, + "step": 14162 + }, + { + "epoch": 0.5903463798924596, + "grad_norm": 796.0, + "learning_rate": 3.792006135358388e-05, + "loss": 22.7502, + "step": 14163 + }, + { + "epoch": 0.590388062189988, + "grad_norm": 390.0, + "learning_rate": 3.791351140815493e-05, + "loss": 13.0009, + "step": 14164 + }, + { + "epoch": 0.5904297444875162, + "grad_norm": 772.0, + "learning_rate": 3.790696168300333e-05, + "loss": 23.1252, + "step": 14165 + }, + { + "epoch": 0.5904714267850444, + "grad_norm": 410.0, + "learning_rate": 3.79004121782484e-05, + "loss": 14.8753, + "step": 14166 + }, + { + "epoch": 0.5905131090825726, + "grad_norm": 222.0, + "learning_rate": 3.789386289400955e-05, + "loss": 12.5628, + "step": 14167 + }, + { + "epoch": 0.5905547913801009, + "grad_norm": 438.0, + "learning_rate": 3.788731383040611e-05, + "loss": 17.5007, + "step": 14168 + }, + { + "epoch": 0.5905964736776291, + "grad_norm": 206.0, + "learning_rate": 3.788076498755747e-05, + "loss": 11.0003, + "step": 14169 + }, + { + "epoch": 0.5906381559751573, + "grad_norm": 332.0, + "learning_rate": 3.7874216365582945e-05, + "loss": 13.6877, + "step": 14170 + }, + { + "epoch": 0.5906798382726856, + "grad_norm": 246.0, + "learning_rate": 3.786766796460191e-05, + "loss": 11.8126, + "step": 14171 + }, + { + "epoch": 0.5907215205702139, + "grad_norm": 396.0, + "learning_rate": 3.78611197847337e-05, + "loss": 14.5011, + "step": 14172 + }, + { + "epoch": 0.5907632028677421, + "grad_norm": 118.0, + "learning_rate": 3.785457182609767e-05, + "loss": 6.7506, + "step": 14173 + }, + { + "epoch": 0.5908048851652703, + "grad_norm": 73.0, + "learning_rate": 3.7848024088813125e-05, + "loss": 7.2504, + "step": 14174 + }, + { + "epoch": 0.5908465674627985, + "grad_norm": 836.0, + "learning_rate": 3.784147657299945e-05, + "loss": 23.3753, + "step": 14175 + }, + { + "epoch": 0.5908882497603268, + "grad_norm": 368.0, + "learning_rate": 3.7834929278775916e-05, + "loss": 15.5004, + "step": 14176 + }, + { + "epoch": 0.590929932057855, + "grad_norm": 384.0, + "learning_rate": 3.7828382206261904e-05, + "loss": 15.0002, + "step": 14177 + }, + { + "epoch": 0.5909716143553833, + "grad_norm": 502.0, + "learning_rate": 3.782183535557668e-05, + "loss": 16.8755, + "step": 14178 + }, + { + "epoch": 0.5910132966529115, + "grad_norm": 592.0, + "learning_rate": 3.7815288726839606e-05, + "loss": 17.6254, + "step": 14179 + }, + { + "epoch": 0.5910549789504398, + "grad_norm": 262.0, + "learning_rate": 3.780874232016996e-05, + "loss": 11.1879, + "step": 14180 + }, + { + "epoch": 0.591096661247968, + "grad_norm": 302.0, + "learning_rate": 3.780219613568709e-05, + "loss": 12.7503, + "step": 14181 + }, + { + "epoch": 0.5911383435454962, + "grad_norm": 312.0, + "learning_rate": 3.779565017351026e-05, + "loss": 11.5032, + "step": 14182 + }, + { + "epoch": 0.5911800258430244, + "grad_norm": 612.0, + "learning_rate": 3.7789104433758794e-05, + "loss": 20.3754, + "step": 14183 + }, + { + "epoch": 0.5912217081405527, + "grad_norm": 308.0, + "learning_rate": 3.778255891655198e-05, + "loss": 13.9378, + "step": 14184 + }, + { + "epoch": 0.591263390438081, + "grad_norm": 372.0, + "learning_rate": 3.777601362200912e-05, + "loss": 15.313, + "step": 14185 + }, + { + "epoch": 0.5913050727356092, + "grad_norm": 880.0, + "learning_rate": 3.7769468550249486e-05, + "loss": 22.2502, + "step": 14186 + }, + { + "epoch": 0.5913467550331374, + "grad_norm": 141.0, + "learning_rate": 3.7762923701392395e-05, + "loss": 11.6256, + "step": 14187 + }, + { + "epoch": 0.5913884373306657, + "grad_norm": 450.0, + "learning_rate": 3.775637907555708e-05, + "loss": 16.876, + "step": 14188 + }, + { + "epoch": 0.5914301196281939, + "grad_norm": 47.5, + "learning_rate": 3.774983467286287e-05, + "loss": 7.4065, + "step": 14189 + }, + { + "epoch": 0.5914718019257221, + "grad_norm": 410.0, + "learning_rate": 3.774329049342898e-05, + "loss": 15.2502, + "step": 14190 + }, + { + "epoch": 0.5915134842232503, + "grad_norm": 288.0, + "learning_rate": 3.7736746537374744e-05, + "loss": 14.1252, + "step": 14191 + }, + { + "epoch": 0.5915551665207787, + "grad_norm": 104.5, + "learning_rate": 3.773020280481936e-05, + "loss": 8.2505, + "step": 14192 + }, + { + "epoch": 0.5915968488183069, + "grad_norm": 243.0, + "learning_rate": 3.772365929588215e-05, + "loss": 11.7507, + "step": 14193 + }, + { + "epoch": 0.5916385311158351, + "grad_norm": 390.0, + "learning_rate": 3.771711601068231e-05, + "loss": 13.1289, + "step": 14194 + }, + { + "epoch": 0.5916802134133633, + "grad_norm": 61.5, + "learning_rate": 3.771057294933914e-05, + "loss": 8.5004, + "step": 14195 + }, + { + "epoch": 0.5917218957108916, + "grad_norm": 412.0, + "learning_rate": 3.770403011197185e-05, + "loss": 14.1876, + "step": 14196 + }, + { + "epoch": 0.5917635780084198, + "grad_norm": 156.0, + "learning_rate": 3.769748749869972e-05, + "loss": 8.2513, + "step": 14197 + }, + { + "epoch": 0.591805260305948, + "grad_norm": 552.0, + "learning_rate": 3.769094510964196e-05, + "loss": 16.1256, + "step": 14198 + }, + { + "epoch": 0.5918469426034763, + "grad_norm": 278.0, + "learning_rate": 3.768440294491783e-05, + "loss": 11.69, + "step": 14199 + }, + { + "epoch": 0.5918886249010046, + "grad_norm": 126.0, + "learning_rate": 3.767786100464653e-05, + "loss": 8.7508, + "step": 14200 + }, + { + "epoch": 0.5919303071985328, + "grad_norm": 73.5, + "learning_rate": 3.767131928894734e-05, + "loss": 9.2503, + "step": 14201 + }, + { + "epoch": 0.591971989496061, + "grad_norm": 386.0, + "learning_rate": 3.766477779793942e-05, + "loss": 13.0008, + "step": 14202 + }, + { + "epoch": 0.5920136717935892, + "grad_norm": 352.0, + "learning_rate": 3.765823653174204e-05, + "loss": 15.2502, + "step": 14203 + }, + { + "epoch": 0.5920553540911175, + "grad_norm": 1400.0, + "learning_rate": 3.765169549047438e-05, + "loss": 32.2503, + "step": 14204 + }, + { + "epoch": 0.5920970363886457, + "grad_norm": 572.0, + "learning_rate": 3.764515467425568e-05, + "loss": 18.7502, + "step": 14205 + }, + { + "epoch": 0.592138718686174, + "grad_norm": 632.0, + "learning_rate": 3.763861408320512e-05, + "loss": 18.5004, + "step": 14206 + }, + { + "epoch": 0.5921804009837022, + "grad_norm": 1008.0, + "learning_rate": 3.763207371744193e-05, + "loss": 25.8788, + "step": 14207 + }, + { + "epoch": 0.5922220832812305, + "grad_norm": 61.25, + "learning_rate": 3.762553357708527e-05, + "loss": 7.0939, + "step": 14208 + }, + { + "epoch": 0.5922637655787587, + "grad_norm": 171.0, + "learning_rate": 3.761899366225438e-05, + "loss": 10.8757, + "step": 14209 + }, + { + "epoch": 0.5923054478762869, + "grad_norm": 402.0, + "learning_rate": 3.761245397306842e-05, + "loss": 16.5002, + "step": 14210 + }, + { + "epoch": 0.5923471301738151, + "grad_norm": 183.0, + "learning_rate": 3.760591450964659e-05, + "loss": 11.2505, + "step": 14211 + }, + { + "epoch": 0.5923888124713435, + "grad_norm": 732.0, + "learning_rate": 3.759937527210806e-05, + "loss": 21.3753, + "step": 14212 + }, + { + "epoch": 0.5924304947688717, + "grad_norm": 151.0, + "learning_rate": 3.7592836260572036e-05, + "loss": 10.4378, + "step": 14213 + }, + { + "epoch": 0.5924721770663999, + "grad_norm": 138.0, + "learning_rate": 3.758629747515765e-05, + "loss": 10.4382, + "step": 14214 + }, + { + "epoch": 0.5925138593639281, + "grad_norm": 88.0, + "learning_rate": 3.757975891598412e-05, + "loss": 7.6564, + "step": 14215 + }, + { + "epoch": 0.5925555416614564, + "grad_norm": 588.0, + "learning_rate": 3.757322058317056e-05, + "loss": 18.6254, + "step": 14216 + }, + { + "epoch": 0.5925972239589846, + "grad_norm": 1064.0, + "learning_rate": 3.756668247683618e-05, + "loss": 25.3755, + "step": 14217 + }, + { + "epoch": 0.5926389062565128, + "grad_norm": 528.0, + "learning_rate": 3.7560144597100094e-05, + "loss": 17.6251, + "step": 14218 + }, + { + "epoch": 0.592680588554041, + "grad_norm": 376.0, + "learning_rate": 3.75536069440815e-05, + "loss": 12.8139, + "step": 14219 + }, + { + "epoch": 0.5927222708515694, + "grad_norm": 502.0, + "learning_rate": 3.7547069517899505e-05, + "loss": 17.6252, + "step": 14220 + }, + { + "epoch": 0.5927639531490976, + "grad_norm": 103.5, + "learning_rate": 3.754053231867328e-05, + "loss": 9.3764, + "step": 14221 + }, + { + "epoch": 0.5928056354466258, + "grad_norm": 167.0, + "learning_rate": 3.753399534652197e-05, + "loss": 9.1253, + "step": 14222 + }, + { + "epoch": 0.5928473177441541, + "grad_norm": 520.0, + "learning_rate": 3.7527458601564684e-05, + "loss": 18.2502, + "step": 14223 + }, + { + "epoch": 0.5928890000416823, + "grad_norm": 520.0, + "learning_rate": 3.752092208392057e-05, + "loss": 16.0004, + "step": 14224 + }, + { + "epoch": 0.5929306823392105, + "grad_norm": 592.0, + "learning_rate": 3.751438579370878e-05, + "loss": 21.0003, + "step": 14225 + }, + { + "epoch": 0.5929723646367387, + "grad_norm": 342.0, + "learning_rate": 3.75078497310484e-05, + "loss": 11.6253, + "step": 14226 + }, + { + "epoch": 0.5930140469342671, + "grad_norm": 197.0, + "learning_rate": 3.7501313896058586e-05, + "loss": 12.0002, + "step": 14227 + }, + { + "epoch": 0.5930557292317953, + "grad_norm": 484.0, + "learning_rate": 3.7494778288858413e-05, + "loss": 18.0005, + "step": 14228 + }, + { + "epoch": 0.5930974115293235, + "grad_norm": 71.0, + "learning_rate": 3.748824290956704e-05, + "loss": 8.1252, + "step": 14229 + }, + { + "epoch": 0.5931390938268517, + "grad_norm": 1216.0, + "learning_rate": 3.748170775830353e-05, + "loss": 32.7506, + "step": 14230 + }, + { + "epoch": 0.59318077612438, + "grad_norm": 90.5, + "learning_rate": 3.747517283518702e-05, + "loss": 8.9386, + "step": 14231 + }, + { + "epoch": 0.5932224584219082, + "grad_norm": 352.0, + "learning_rate": 3.746863814033659e-05, + "loss": 14.0652, + "step": 14232 + }, + { + "epoch": 0.5932641407194365, + "grad_norm": 580.0, + "learning_rate": 3.746210367387135e-05, + "loss": 19.5006, + "step": 14233 + }, + { + "epoch": 0.5933058230169647, + "grad_norm": 406.0, + "learning_rate": 3.745556943591037e-05, + "loss": 17.2505, + "step": 14234 + }, + { + "epoch": 0.593347505314493, + "grad_norm": 628.0, + "learning_rate": 3.744903542657276e-05, + "loss": 19.1251, + "step": 14235 + }, + { + "epoch": 0.5933891876120212, + "grad_norm": 147.0, + "learning_rate": 3.744250164597759e-05, + "loss": 10.3127, + "step": 14236 + }, + { + "epoch": 0.5934308699095494, + "grad_norm": 242.0, + "learning_rate": 3.7435968094243946e-05, + "loss": 12.7504, + "step": 14237 + }, + { + "epoch": 0.5934725522070776, + "grad_norm": 243.0, + "learning_rate": 3.742943477149089e-05, + "loss": 9.5002, + "step": 14238 + }, + { + "epoch": 0.5935142345046059, + "grad_norm": 314.0, + "learning_rate": 3.742290167783752e-05, + "loss": 13.2504, + "step": 14239 + }, + { + "epoch": 0.5935559168021342, + "grad_norm": 390.0, + "learning_rate": 3.7416368813402855e-05, + "loss": 15.3126, + "step": 14240 + }, + { + "epoch": 0.5935975990996624, + "grad_norm": 354.0, + "learning_rate": 3.740983617830602e-05, + "loss": 14.2502, + "step": 14241 + }, + { + "epoch": 0.5936392813971906, + "grad_norm": 168.0, + "learning_rate": 3.740330377266601e-05, + "loss": 9.1878, + "step": 14242 + }, + { + "epoch": 0.5936809636947189, + "grad_norm": 1272.0, + "learning_rate": 3.7396771596601925e-05, + "loss": 28.754, + "step": 14243 + }, + { + "epoch": 0.5937226459922471, + "grad_norm": 274.0, + "learning_rate": 3.7390239650232784e-05, + "loss": 13.8753, + "step": 14244 + }, + { + "epoch": 0.5937643282897753, + "grad_norm": 498.0, + "learning_rate": 3.738370793367766e-05, + "loss": 17.6254, + "step": 14245 + }, + { + "epoch": 0.5938060105873035, + "grad_norm": 174.0, + "learning_rate": 3.737717644705556e-05, + "loss": 11.6878, + "step": 14246 + }, + { + "epoch": 0.5938476928848319, + "grad_norm": 548.0, + "learning_rate": 3.737064519048554e-05, + "loss": 17.2504, + "step": 14247 + }, + { + "epoch": 0.5938893751823601, + "grad_norm": 264.0, + "learning_rate": 3.736411416408665e-05, + "loss": 13.1881, + "step": 14248 + }, + { + "epoch": 0.5939310574798883, + "grad_norm": 456.0, + "learning_rate": 3.735758336797789e-05, + "loss": 15.8751, + "step": 14249 + }, + { + "epoch": 0.5939727397774165, + "grad_norm": 169.0, + "learning_rate": 3.7351052802278285e-05, + "loss": 11.3133, + "step": 14250 + }, + { + "epoch": 0.5940144220749448, + "grad_norm": 104.5, + "learning_rate": 3.734452246710689e-05, + "loss": 10.5003, + "step": 14251 + }, + { + "epoch": 0.594056104372473, + "grad_norm": 296.0, + "learning_rate": 3.733799236258268e-05, + "loss": 12.8127, + "step": 14252 + }, + { + "epoch": 0.5940977866700012, + "grad_norm": 604.0, + "learning_rate": 3.7331462488824695e-05, + "loss": 18.6259, + "step": 14253 + }, + { + "epoch": 0.5941394689675294, + "grad_norm": 201.0, + "learning_rate": 3.7324932845951916e-05, + "loss": 11.2506, + "step": 14254 + }, + { + "epoch": 0.5941811512650578, + "grad_norm": 436.0, + "learning_rate": 3.731840343408338e-05, + "loss": 16.6256, + "step": 14255 + }, + { + "epoch": 0.594222833562586, + "grad_norm": 354.0, + "learning_rate": 3.731187425333804e-05, + "loss": 11.5628, + "step": 14256 + }, + { + "epoch": 0.5942645158601142, + "grad_norm": 181.0, + "learning_rate": 3.7305345303834946e-05, + "loss": 11.3131, + "step": 14257 + }, + { + "epoch": 0.5943061981576424, + "grad_norm": 242.0, + "learning_rate": 3.729881658569304e-05, + "loss": 11.1252, + "step": 14258 + }, + { + "epoch": 0.5943478804551707, + "grad_norm": 342.0, + "learning_rate": 3.729228809903134e-05, + "loss": 15.6883, + "step": 14259 + }, + { + "epoch": 0.5943895627526989, + "grad_norm": 145.0, + "learning_rate": 3.728575984396882e-05, + "loss": 9.1878, + "step": 14260 + }, + { + "epoch": 0.5944312450502272, + "grad_norm": 336.0, + "learning_rate": 3.727923182062445e-05, + "loss": 14.0001, + "step": 14261 + }, + { + "epoch": 0.5944729273477554, + "grad_norm": 406.0, + "learning_rate": 3.72727040291172e-05, + "loss": 15.939, + "step": 14262 + }, + { + "epoch": 0.5945146096452837, + "grad_norm": 370.0, + "learning_rate": 3.726617646956607e-05, + "loss": 13.4378, + "step": 14263 + }, + { + "epoch": 0.5945562919428119, + "grad_norm": 254.0, + "learning_rate": 3.725964914208998e-05, + "loss": 12.3127, + "step": 14264 + }, + { + "epoch": 0.5945979742403401, + "grad_norm": 229.0, + "learning_rate": 3.725312204680794e-05, + "loss": 11.8759, + "step": 14265 + }, + { + "epoch": 0.5946396565378683, + "grad_norm": 640.0, + "learning_rate": 3.724659518383886e-05, + "loss": 20.0006, + "step": 14266 + }, + { + "epoch": 0.5946813388353966, + "grad_norm": 161.0, + "learning_rate": 3.7240068553301744e-05, + "loss": 9.8752, + "step": 14267 + }, + { + "epoch": 0.5947230211329249, + "grad_norm": 184.0, + "learning_rate": 3.723354215531548e-05, + "loss": 11.5002, + "step": 14268 + }, + { + "epoch": 0.5947647034304531, + "grad_norm": 216.0, + "learning_rate": 3.722701598999907e-05, + "loss": 11.6877, + "step": 14269 + }, + { + "epoch": 0.5948063857279813, + "grad_norm": 193.0, + "learning_rate": 3.72204900574714e-05, + "loss": 10.1253, + "step": 14270 + }, + { + "epoch": 0.5948480680255096, + "grad_norm": 390.0, + "learning_rate": 3.721396435785146e-05, + "loss": 15.0009, + "step": 14271 + }, + { + "epoch": 0.5948897503230378, + "grad_norm": 684.0, + "learning_rate": 3.720743889125813e-05, + "loss": 18.8807, + "step": 14272 + }, + { + "epoch": 0.594931432620566, + "grad_norm": 132.0, + "learning_rate": 3.7200913657810375e-05, + "loss": 10.0007, + "step": 14273 + }, + { + "epoch": 0.5949731149180942, + "grad_norm": 1608.0, + "learning_rate": 3.71943886576271e-05, + "loss": 31.5092, + "step": 14274 + }, + { + "epoch": 0.5950147972156226, + "grad_norm": 219.0, + "learning_rate": 3.718786389082724e-05, + "loss": 12.1877, + "step": 14275 + }, + { + "epoch": 0.5950564795131508, + "grad_norm": 454.0, + "learning_rate": 3.718133935752968e-05, + "loss": 13.5004, + "step": 14276 + }, + { + "epoch": 0.595098161810679, + "grad_norm": 145.0, + "learning_rate": 3.7174815057853376e-05, + "loss": 10.1252, + "step": 14277 + }, + { + "epoch": 0.5951398441082072, + "grad_norm": 340.0, + "learning_rate": 3.7168290991917174e-05, + "loss": 13.0004, + "step": 14278 + }, + { + "epoch": 0.5951815264057355, + "grad_norm": 428.0, + "learning_rate": 3.7161767159840034e-05, + "loss": 15.3753, + "step": 14279 + }, + { + "epoch": 0.5952232087032637, + "grad_norm": 576.0, + "learning_rate": 3.71552435617408e-05, + "loss": 18.7505, + "step": 14280 + }, + { + "epoch": 0.5952648910007919, + "grad_norm": 512.0, + "learning_rate": 3.7148720197738426e-05, + "loss": 17.3751, + "step": 14281 + }, + { + "epoch": 0.5953065732983202, + "grad_norm": 141.0, + "learning_rate": 3.714219706795173e-05, + "loss": 9.2503, + "step": 14282 + }, + { + "epoch": 0.5953482555958485, + "grad_norm": 508.0, + "learning_rate": 3.713567417249967e-05, + "loss": 14.5036, + "step": 14283 + }, + { + "epoch": 0.5953899378933767, + "grad_norm": 446.0, + "learning_rate": 3.712915151150106e-05, + "loss": 15.5004, + "step": 14284 + }, + { + "epoch": 0.5954316201909049, + "grad_norm": 105.5, + "learning_rate": 3.7122629085074826e-05, + "loss": 6.8129, + "step": 14285 + }, + { + "epoch": 0.5954733024884331, + "grad_norm": 580.0, + "learning_rate": 3.711610689333982e-05, + "loss": 19.6251, + "step": 14286 + }, + { + "epoch": 0.5955149847859614, + "grad_norm": 306.0, + "learning_rate": 3.71095849364149e-05, + "loss": 13.5003, + "step": 14287 + }, + { + "epoch": 0.5955566670834896, + "grad_norm": 304.0, + "learning_rate": 3.710306321441893e-05, + "loss": 11.814, + "step": 14288 + }, + { + "epoch": 0.5955983493810179, + "grad_norm": 1048.0, + "learning_rate": 3.70965417274708e-05, + "loss": 26.0001, + "step": 14289 + }, + { + "epoch": 0.5956400316785461, + "grad_norm": 112.5, + "learning_rate": 3.7090020475689326e-05, + "loss": 10.2507, + "step": 14290 + }, + { + "epoch": 0.5956817139760744, + "grad_norm": 245.0, + "learning_rate": 3.708349945919339e-05, + "loss": 12.4391, + "step": 14291 + }, + { + "epoch": 0.5957233962736026, + "grad_norm": 506.0, + "learning_rate": 3.7076978678101805e-05, + "loss": 16.7502, + "step": 14292 + }, + { + "epoch": 0.5957650785711308, + "grad_norm": 221.0, + "learning_rate": 3.7070458132533445e-05, + "loss": 9.3135, + "step": 14293 + }, + { + "epoch": 0.5958067608686591, + "grad_norm": 168.0, + "learning_rate": 3.706393782260712e-05, + "loss": 9.9378, + "step": 14294 + }, + { + "epoch": 0.5958484431661873, + "grad_norm": 536.0, + "learning_rate": 3.7057417748441695e-05, + "loss": 17.5002, + "step": 14295 + }, + { + "epoch": 0.5958901254637156, + "grad_norm": 244.0, + "learning_rate": 3.705089791015596e-05, + "loss": 13.4377, + "step": 14296 + }, + { + "epoch": 0.5959318077612438, + "grad_norm": 247.0, + "learning_rate": 3.7044378307868775e-05, + "loss": 12.6253, + "step": 14297 + }, + { + "epoch": 0.5959734900587721, + "grad_norm": 231.0, + "learning_rate": 3.703785894169895e-05, + "loss": 12.5002, + "step": 14298 + }, + { + "epoch": 0.5960151723563003, + "grad_norm": 298.0, + "learning_rate": 3.703133981176527e-05, + "loss": 10.9378, + "step": 14299 + }, + { + "epoch": 0.5960568546538285, + "grad_norm": 84.5, + "learning_rate": 3.702482091818659e-05, + "loss": 8.0626, + "step": 14300 + }, + { + "epoch": 0.5960985369513567, + "grad_norm": 280.0, + "learning_rate": 3.7018302261081714e-05, + "loss": 11.5664, + "step": 14301 + }, + { + "epoch": 0.596140219248885, + "grad_norm": 424.0, + "learning_rate": 3.7011783840569405e-05, + "loss": 15.5001, + "step": 14302 + }, + { + "epoch": 0.5961819015464133, + "grad_norm": 386.0, + "learning_rate": 3.700526565676852e-05, + "loss": 15.5005, + "step": 14303 + }, + { + "epoch": 0.5962235838439415, + "grad_norm": 228.0, + "learning_rate": 3.6998747709797784e-05, + "loss": 13.813, + "step": 14304 + }, + { + "epoch": 0.5962652661414697, + "grad_norm": 604.0, + "learning_rate": 3.699222999977606e-05, + "loss": 18.2502, + "step": 14305 + }, + { + "epoch": 0.596306948438998, + "grad_norm": 378.0, + "learning_rate": 3.698571252682208e-05, + "loss": 14.2507, + "step": 14306 + }, + { + "epoch": 0.5963486307365262, + "grad_norm": 302.0, + "learning_rate": 3.6979195291054656e-05, + "loss": 13.4377, + "step": 14307 + }, + { + "epoch": 0.5963903130340544, + "grad_norm": 251.0, + "learning_rate": 3.697267829259254e-05, + "loss": 12.7502, + "step": 14308 + }, + { + "epoch": 0.5964319953315826, + "grad_norm": 404.0, + "learning_rate": 3.696616153155452e-05, + "loss": 14.3128, + "step": 14309 + }, + { + "epoch": 0.596473677629111, + "grad_norm": 198.0, + "learning_rate": 3.695964500805938e-05, + "loss": 11.2502, + "step": 14310 + }, + { + "epoch": 0.5965153599266392, + "grad_norm": 604.0, + "learning_rate": 3.695312872222585e-05, + "loss": 20.1254, + "step": 14311 + }, + { + "epoch": 0.5965570422241674, + "grad_norm": 136.0, + "learning_rate": 3.69466126741727e-05, + "loss": 10.3149, + "step": 14312 + }, + { + "epoch": 0.5965987245216956, + "grad_norm": 190.0, + "learning_rate": 3.694009686401872e-05, + "loss": 11.1255, + "step": 14313 + }, + { + "epoch": 0.5966404068192239, + "grad_norm": 628.0, + "learning_rate": 3.693358129188261e-05, + "loss": 19.5017, + "step": 14314 + }, + { + "epoch": 0.5966820891167521, + "grad_norm": 78.0, + "learning_rate": 3.692706595788316e-05, + "loss": 6.9689, + "step": 14315 + }, + { + "epoch": 0.5967237714142803, + "grad_norm": 324.0, + "learning_rate": 3.692055086213907e-05, + "loss": 11.7501, + "step": 14316 + }, + { + "epoch": 0.5967654537118086, + "grad_norm": 49.75, + "learning_rate": 3.691403600476914e-05, + "loss": 7.6253, + "step": 14317 + }, + { + "epoch": 0.5968071360093369, + "grad_norm": 316.0, + "learning_rate": 3.6907521385892025e-05, + "loss": 12.7516, + "step": 14318 + }, + { + "epoch": 0.5968488183068651, + "grad_norm": 247.0, + "learning_rate": 3.690100700562652e-05, + "loss": 12.2502, + "step": 14319 + }, + { + "epoch": 0.5968905006043933, + "grad_norm": 404.0, + "learning_rate": 3.689449286409131e-05, + "loss": 15.6878, + "step": 14320 + }, + { + "epoch": 0.5969321829019215, + "grad_norm": 540.0, + "learning_rate": 3.6887978961405146e-05, + "loss": 18.6251, + "step": 14321 + }, + { + "epoch": 0.5969738651994498, + "grad_norm": 226.0, + "learning_rate": 3.6881465297686714e-05, + "loss": 12.2504, + "step": 14322 + }, + { + "epoch": 0.597015547496978, + "grad_norm": 448.0, + "learning_rate": 3.687495187305475e-05, + "loss": 14.7503, + "step": 14323 + }, + { + "epoch": 0.5970572297945063, + "grad_norm": 620.0, + "learning_rate": 3.686843868762795e-05, + "loss": 18.8808, + "step": 14324 + }, + { + "epoch": 0.5970989120920345, + "grad_norm": 344.0, + "learning_rate": 3.686192574152502e-05, + "loss": 14.8128, + "step": 14325 + }, + { + "epoch": 0.5971405943895628, + "grad_norm": 724.0, + "learning_rate": 3.685541303486465e-05, + "loss": 20.5003, + "step": 14326 + }, + { + "epoch": 0.597182276687091, + "grad_norm": 149.0, + "learning_rate": 3.6848900567765574e-05, + "loss": 8.6252, + "step": 14327 + }, + { + "epoch": 0.5972239589846192, + "grad_norm": 346.0, + "learning_rate": 3.684238834034642e-05, + "loss": 14.2502, + "step": 14328 + }, + { + "epoch": 0.5972656412821474, + "grad_norm": 876.0, + "learning_rate": 3.683587635272594e-05, + "loss": 22.2507, + "step": 14329 + }, + { + "epoch": 0.5973073235796758, + "grad_norm": 308.0, + "learning_rate": 3.682936460502275e-05, + "loss": 13.9383, + "step": 14330 + }, + { + "epoch": 0.597349005877204, + "grad_norm": 372.0, + "learning_rate": 3.682285309735558e-05, + "loss": 14.8128, + "step": 14331 + }, + { + "epoch": 0.5973906881747322, + "grad_norm": 532.0, + "learning_rate": 3.681634182984307e-05, + "loss": 18.8752, + "step": 14332 + }, + { + "epoch": 0.5974323704722604, + "grad_norm": 462.0, + "learning_rate": 3.680983080260392e-05, + "loss": 16.6257, + "step": 14333 + }, + { + "epoch": 0.5974740527697887, + "grad_norm": 972.0, + "learning_rate": 3.6803320015756746e-05, + "loss": 24.7504, + "step": 14334 + }, + { + "epoch": 0.5975157350673169, + "grad_norm": 540.0, + "learning_rate": 3.679680946942024e-05, + "loss": 17.8752, + "step": 14335 + }, + { + "epoch": 0.5975574173648451, + "grad_norm": 700.0, + "learning_rate": 3.6790299163713074e-05, + "loss": 18.5034, + "step": 14336 + }, + { + "epoch": 0.5975990996623733, + "grad_norm": 258.0, + "learning_rate": 3.6783789098753865e-05, + "loss": 13.0001, + "step": 14337 + }, + { + "epoch": 0.5976407819599017, + "grad_norm": 110.0, + "learning_rate": 3.677727927466127e-05, + "loss": 9.6253, + "step": 14338 + }, + { + "epoch": 0.5976824642574299, + "grad_norm": 169.0, + "learning_rate": 3.677076969155395e-05, + "loss": 10.8126, + "step": 14339 + }, + { + "epoch": 0.5977241465549581, + "grad_norm": 1088.0, + "learning_rate": 3.676426034955051e-05, + "loss": 23.3754, + "step": 14340 + }, + { + "epoch": 0.5977658288524863, + "grad_norm": 1472.0, + "learning_rate": 3.6757751248769614e-05, + "loss": 30.7545, + "step": 14341 + }, + { + "epoch": 0.5978075111500146, + "grad_norm": 93.5, + "learning_rate": 3.675124238932986e-05, + "loss": 8.1253, + "step": 14342 + }, + { + "epoch": 0.5978491934475428, + "grad_norm": 246.0, + "learning_rate": 3.674473377134991e-05, + "loss": 4.5315, + "step": 14343 + }, + { + "epoch": 0.597890875745071, + "grad_norm": 434.0, + "learning_rate": 3.6738225394948344e-05, + "loss": 16.6256, + "step": 14344 + }, + { + "epoch": 0.5979325580425993, + "grad_norm": 167.0, + "learning_rate": 3.673171726024381e-05, + "loss": 11.0628, + "step": 14345 + }, + { + "epoch": 0.5979742403401276, + "grad_norm": 548.0, + "learning_rate": 3.672520936735489e-05, + "loss": 17.5003, + "step": 14346 + }, + { + "epoch": 0.5980159226376558, + "grad_norm": 143.0, + "learning_rate": 3.671870171640023e-05, + "loss": 9.5003, + "step": 14347 + }, + { + "epoch": 0.598057604935184, + "grad_norm": 127.5, + "learning_rate": 3.6712194307498406e-05, + "loss": 8.5002, + "step": 14348 + }, + { + "epoch": 0.5980992872327122, + "grad_norm": 170.0, + "learning_rate": 3.6705687140768014e-05, + "loss": 8.9382, + "step": 14349 + }, + { + "epoch": 0.5981409695302405, + "grad_norm": 201.0, + "learning_rate": 3.669918021632764e-05, + "loss": 12.0003, + "step": 14350 + }, + { + "epoch": 0.5981826518277688, + "grad_norm": 364.0, + "learning_rate": 3.6692673534295916e-05, + "loss": 10.44, + "step": 14351 + }, + { + "epoch": 0.598224334125297, + "grad_norm": 736.0, + "learning_rate": 3.668616709479138e-05, + "loss": 22.5011, + "step": 14352 + }, + { + "epoch": 0.5982660164228252, + "grad_norm": 137.0, + "learning_rate": 3.6679660897932646e-05, + "loss": 7.8757, + "step": 14353 + }, + { + "epoch": 0.5983076987203535, + "grad_norm": 482.0, + "learning_rate": 3.667315494383825e-05, + "loss": 15.4383, + "step": 14354 + }, + { + "epoch": 0.5983493810178817, + "grad_norm": 424.0, + "learning_rate": 3.666664923262682e-05, + "loss": 15.9377, + "step": 14355 + }, + { + "epoch": 0.5983910633154099, + "grad_norm": 346.0, + "learning_rate": 3.6660143764416866e-05, + "loss": 15.1254, + "step": 14356 + }, + { + "epoch": 0.5984327456129381, + "grad_norm": 394.0, + "learning_rate": 3.6653638539326994e-05, + "loss": 13.8145, + "step": 14357 + }, + { + "epoch": 0.5984744279104665, + "grad_norm": 238.0, + "learning_rate": 3.6647133557475724e-05, + "loss": 12.4378, + "step": 14358 + }, + { + "epoch": 0.5985161102079947, + "grad_norm": 564.0, + "learning_rate": 3.664062881898165e-05, + "loss": 17.7501, + "step": 14359 + }, + { + "epoch": 0.5985577925055229, + "grad_norm": 160.0, + "learning_rate": 3.663412432396328e-05, + "loss": 10.8128, + "step": 14360 + }, + { + "epoch": 0.5985994748030511, + "grad_norm": 256.0, + "learning_rate": 3.662762007253919e-05, + "loss": 9.8771, + "step": 14361 + }, + { + "epoch": 0.5986411571005794, + "grad_norm": 490.0, + "learning_rate": 3.662111606482791e-05, + "loss": 17.0001, + "step": 14362 + }, + { + "epoch": 0.5986828393981076, + "grad_norm": 616.0, + "learning_rate": 3.6614612300947994e-05, + "loss": 19.5033, + "step": 14363 + }, + { + "epoch": 0.5987245216956358, + "grad_norm": 960.0, + "learning_rate": 3.660810878101794e-05, + "loss": 22.3753, + "step": 14364 + }, + { + "epoch": 0.598766203993164, + "grad_norm": 280.0, + "learning_rate": 3.66016055051563e-05, + "loss": 12.1253, + "step": 14365 + }, + { + "epoch": 0.5988078862906924, + "grad_norm": 540.0, + "learning_rate": 3.659510247348158e-05, + "loss": 17.8768, + "step": 14366 + }, + { + "epoch": 0.5988495685882206, + "grad_norm": 104.0, + "learning_rate": 3.658859968611232e-05, + "loss": 8.8133, + "step": 14367 + }, + { + "epoch": 0.5988912508857488, + "grad_norm": 276.0, + "learning_rate": 3.658209714316701e-05, + "loss": 13.4379, + "step": 14368 + }, + { + "epoch": 0.5989329331832771, + "grad_norm": 264.0, + "learning_rate": 3.6575594844764186e-05, + "loss": 11.6252, + "step": 14369 + }, + { + "epoch": 0.5989746154808053, + "grad_norm": 314.0, + "learning_rate": 3.656909279102232e-05, + "loss": 12.7502, + "step": 14370 + }, + { + "epoch": 0.5990162977783335, + "grad_norm": 225.0, + "learning_rate": 3.656259098205995e-05, + "loss": 12.0632, + "step": 14371 + }, + { + "epoch": 0.5990579800758618, + "grad_norm": 302.0, + "learning_rate": 3.6556089417995524e-05, + "loss": 13.3126, + "step": 14372 + }, + { + "epoch": 0.5990996623733901, + "grad_norm": 450.0, + "learning_rate": 3.6549588098947584e-05, + "loss": 14.2504, + "step": 14373 + }, + { + "epoch": 0.5991413446709183, + "grad_norm": 103.5, + "learning_rate": 3.6543087025034584e-05, + "loss": 9.0634, + "step": 14374 + }, + { + "epoch": 0.5991830269684465, + "grad_norm": 508.0, + "learning_rate": 3.653658619637502e-05, + "loss": 17.3752, + "step": 14375 + }, + { + "epoch": 0.5992247092659747, + "grad_norm": 202.0, + "learning_rate": 3.6530085613087363e-05, + "loss": 9.7502, + "step": 14376 + }, + { + "epoch": 0.599266391563503, + "grad_norm": 572.0, + "learning_rate": 3.6523585275290115e-05, + "loss": 16.2543, + "step": 14377 + }, + { + "epoch": 0.5993080738610312, + "grad_norm": 241.0, + "learning_rate": 3.651708518310169e-05, + "loss": 13.1879, + "step": 14378 + }, + { + "epoch": 0.5993497561585595, + "grad_norm": 344.0, + "learning_rate": 3.6510585336640615e-05, + "loss": 14.8129, + "step": 14379 + }, + { + "epoch": 0.5993914384560877, + "grad_norm": 628.0, + "learning_rate": 3.650408573602529e-05, + "loss": 19.6293, + "step": 14380 + }, + { + "epoch": 0.599433120753616, + "grad_norm": 616.0, + "learning_rate": 3.649758638137423e-05, + "loss": 19.5001, + "step": 14381 + }, + { + "epoch": 0.5994748030511442, + "grad_norm": 344.0, + "learning_rate": 3.649108727280583e-05, + "loss": 14.2502, + "step": 14382 + }, + { + "epoch": 0.5995164853486724, + "grad_norm": 1160.0, + "learning_rate": 3.648458841043858e-05, + "loss": 31.2512, + "step": 14383 + }, + { + "epoch": 0.5995581676462006, + "grad_norm": 248.0, + "learning_rate": 3.6478089794390895e-05, + "loss": 11.5007, + "step": 14384 + }, + { + "epoch": 0.599599849943729, + "grad_norm": 380.0, + "learning_rate": 3.647159142478123e-05, + "loss": 15.3752, + "step": 14385 + }, + { + "epoch": 0.5996415322412572, + "grad_norm": 600.0, + "learning_rate": 3.6465093301728015e-05, + "loss": 19.5007, + "step": 14386 + }, + { + "epoch": 0.5996832145387854, + "grad_norm": 420.0, + "learning_rate": 3.645859542534967e-05, + "loss": 14.7503, + "step": 14387 + }, + { + "epoch": 0.5997248968363136, + "grad_norm": 172.0, + "learning_rate": 3.645209779576462e-05, + "loss": 10.876, + "step": 14388 + }, + { + "epoch": 0.5997665791338419, + "grad_norm": 266.0, + "learning_rate": 3.6445600413091316e-05, + "loss": 12.1252, + "step": 14389 + }, + { + "epoch": 0.5998082614313701, + "grad_norm": 115.5, + "learning_rate": 3.643910327744812e-05, + "loss": 9.0638, + "step": 14390 + }, + { + "epoch": 0.5998499437288983, + "grad_norm": 122.5, + "learning_rate": 3.6432606388953495e-05, + "loss": 8.8139, + "step": 14391 + }, + { + "epoch": 0.5998916260264265, + "grad_norm": 69.5, + "learning_rate": 3.642610974772579e-05, + "loss": 8.1878, + "step": 14392 + }, + { + "epoch": 0.5999333083239549, + "grad_norm": 492.0, + "learning_rate": 3.6419613353883476e-05, + "loss": 17.2504, + "step": 14393 + }, + { + "epoch": 0.5999749906214831, + "grad_norm": 420.0, + "learning_rate": 3.641311720754489e-05, + "loss": 13.2508, + "step": 14394 + }, + { + "epoch": 0.6000166729190113, + "grad_norm": 139.0, + "learning_rate": 3.640662130882847e-05, + "loss": 8.8131, + "step": 14395 + }, + { + "epoch": 0.6000583552165395, + "grad_norm": 356.0, + "learning_rate": 3.6400125657852556e-05, + "loss": 15.6253, + "step": 14396 + }, + { + "epoch": 0.6001000375140678, + "grad_norm": 334.0, + "learning_rate": 3.639363025473559e-05, + "loss": 12.6876, + "step": 14397 + }, + { + "epoch": 0.600141719811596, + "grad_norm": 63.5, + "learning_rate": 3.6387135099595894e-05, + "loss": 7.3753, + "step": 14398 + }, + { + "epoch": 0.6001834021091242, + "grad_norm": 540.0, + "learning_rate": 3.638064019255187e-05, + "loss": 17.2502, + "step": 14399 + }, + { + "epoch": 0.6002250844066525, + "grad_norm": 129.0, + "learning_rate": 3.6374145533721895e-05, + "loss": 10.1255, + "step": 14400 + }, + { + "epoch": 0.6002667667041808, + "grad_norm": 360.0, + "learning_rate": 3.6367651123224334e-05, + "loss": 14.2503, + "step": 14401 + }, + { + "epoch": 0.600308449001709, + "grad_norm": 648.0, + "learning_rate": 3.636115696117753e-05, + "loss": 19.5004, + "step": 14402 + }, + { + "epoch": 0.6003501312992372, + "grad_norm": 588.0, + "learning_rate": 3.6354663047699866e-05, + "loss": 18.8756, + "step": 14403 + }, + { + "epoch": 0.6003918135967654, + "grad_norm": 936.0, + "learning_rate": 3.634816938290966e-05, + "loss": 30.0005, + "step": 14404 + }, + { + "epoch": 0.6004334958942937, + "grad_norm": 458.0, + "learning_rate": 3.63416759669253e-05, + "loss": 16.7506, + "step": 14405 + }, + { + "epoch": 0.600475178191822, + "grad_norm": 382.0, + "learning_rate": 3.6335182799865086e-05, + "loss": 15.5003, + "step": 14406 + }, + { + "epoch": 0.6005168604893502, + "grad_norm": 478.0, + "learning_rate": 3.6328689881847404e-05, + "loss": 17.0003, + "step": 14407 + }, + { + "epoch": 0.6005585427868784, + "grad_norm": 488.0, + "learning_rate": 3.6322197212990535e-05, + "loss": 16.1256, + "step": 14408 + }, + { + "epoch": 0.6006002250844067, + "grad_norm": 620.0, + "learning_rate": 3.631570479341286e-05, + "loss": 18.8758, + "step": 14409 + }, + { + "epoch": 0.6006419073819349, + "grad_norm": 88.0, + "learning_rate": 3.630921262323266e-05, + "loss": 9.0632, + "step": 14410 + }, + { + "epoch": 0.6006835896794631, + "grad_norm": 276.0, + "learning_rate": 3.6302720702568295e-05, + "loss": 12.0626, + "step": 14411 + }, + { + "epoch": 0.6007252719769913, + "grad_norm": 230.0, + "learning_rate": 3.629622903153805e-05, + "loss": 10.7502, + "step": 14412 + }, + { + "epoch": 0.6007669542745196, + "grad_norm": 254.0, + "learning_rate": 3.628973761026024e-05, + "loss": 11.7504, + "step": 14413 + }, + { + "epoch": 0.6008086365720479, + "grad_norm": 198.0, + "learning_rate": 3.628324643885318e-05, + "loss": 11.3128, + "step": 14414 + }, + { + "epoch": 0.6008503188695761, + "grad_norm": 278.0, + "learning_rate": 3.627675551743519e-05, + "loss": 12.1254, + "step": 14415 + }, + { + "epoch": 0.6008920011671043, + "grad_norm": 644.0, + "learning_rate": 3.6270264846124534e-05, + "loss": 17.7528, + "step": 14416 + }, + { + "epoch": 0.6009336834646326, + "grad_norm": 182.0, + "learning_rate": 3.626377442503953e-05, + "loss": 9.5634, + "step": 14417 + }, + { + "epoch": 0.6009753657621608, + "grad_norm": 97.5, + "learning_rate": 3.625728425429844e-05, + "loss": 6.969, + "step": 14418 + }, + { + "epoch": 0.601017048059689, + "grad_norm": 209.0, + "learning_rate": 3.625079433401959e-05, + "loss": 11.5004, + "step": 14419 + }, + { + "epoch": 0.6010587303572172, + "grad_norm": 1288.0, + "learning_rate": 3.6244304664321204e-05, + "loss": 27.1297, + "step": 14420 + }, + { + "epoch": 0.6011004126547456, + "grad_norm": 352.0, + "learning_rate": 3.6237815245321614e-05, + "loss": 15.2503, + "step": 14421 + }, + { + "epoch": 0.6011420949522738, + "grad_norm": 386.0, + "learning_rate": 3.623132607713904e-05, + "loss": 15.8753, + "step": 14422 + }, + { + "epoch": 0.601183777249802, + "grad_norm": 176.0, + "learning_rate": 3.622483715989178e-05, + "loss": 11.1877, + "step": 14423 + }, + { + "epoch": 0.6012254595473302, + "grad_norm": 298.0, + "learning_rate": 3.621834849369809e-05, + "loss": 14.001, + "step": 14424 + }, + { + "epoch": 0.6012671418448585, + "grad_norm": 253.0, + "learning_rate": 3.621186007867622e-05, + "loss": 12.4385, + "step": 14425 + }, + { + "epoch": 0.6013088241423867, + "grad_norm": 564.0, + "learning_rate": 3.620537191494441e-05, + "loss": 16.7507, + "step": 14426 + }, + { + "epoch": 0.601350506439915, + "grad_norm": 146.0, + "learning_rate": 3.619888400262095e-05, + "loss": 11.0628, + "step": 14427 + }, + { + "epoch": 0.6013921887374432, + "grad_norm": 256.0, + "learning_rate": 3.619239634182403e-05, + "loss": 12.0628, + "step": 14428 + }, + { + "epoch": 0.6014338710349715, + "grad_norm": 412.0, + "learning_rate": 3.618590893267193e-05, + "loss": 14.8751, + "step": 14429 + }, + { + "epoch": 0.6014755533324997, + "grad_norm": 2272.0, + "learning_rate": 3.617942177528284e-05, + "loss": 43.5045, + "step": 14430 + }, + { + "epoch": 0.6015172356300279, + "grad_norm": 370.0, + "learning_rate": 3.6172934869775044e-05, + "loss": 16.1254, + "step": 14431 + }, + { + "epoch": 0.6015589179275561, + "grad_norm": 656.0, + "learning_rate": 3.616644821626671e-05, + "loss": 20.0003, + "step": 14432 + }, + { + "epoch": 0.6016006002250844, + "grad_norm": 348.0, + "learning_rate": 3.615996181487612e-05, + "loss": 15.1253, + "step": 14433 + }, + { + "epoch": 0.6016422825226126, + "grad_norm": 151.0, + "learning_rate": 3.615347566572143e-05, + "loss": 9.5627, + "step": 14434 + }, + { + "epoch": 0.6016839648201409, + "grad_norm": 336.0, + "learning_rate": 3.614698976892088e-05, + "loss": 13.6252, + "step": 14435 + }, + { + "epoch": 0.6017256471176691, + "grad_norm": 440.0, + "learning_rate": 3.6140504124592666e-05, + "loss": 17.2502, + "step": 14436 + }, + { + "epoch": 0.6017673294151974, + "grad_norm": 109.5, + "learning_rate": 3.6134018732855e-05, + "loss": 10.7504, + "step": 14437 + }, + { + "epoch": 0.6018090117127256, + "grad_norm": 390.0, + "learning_rate": 3.612753359382606e-05, + "loss": 14.8754, + "step": 14438 + }, + { + "epoch": 0.6018506940102538, + "grad_norm": 368.0, + "learning_rate": 3.6121048707624084e-05, + "loss": 14.1878, + "step": 14439 + }, + { + "epoch": 0.6018923763077821, + "grad_norm": 396.0, + "learning_rate": 3.61145640743672e-05, + "loss": 15.2502, + "step": 14440 + }, + { + "epoch": 0.6019340586053104, + "grad_norm": 336.0, + "learning_rate": 3.610807969417364e-05, + "loss": 12.1257, + "step": 14441 + }, + { + "epoch": 0.6019757409028386, + "grad_norm": 442.0, + "learning_rate": 3.6101595567161525e-05, + "loss": 13.8778, + "step": 14442 + }, + { + "epoch": 0.6020174232003668, + "grad_norm": 1012.0, + "learning_rate": 3.60951116934491e-05, + "loss": 23.2548, + "step": 14443 + }, + { + "epoch": 0.6020591054978951, + "grad_norm": 920.0, + "learning_rate": 3.608862807315448e-05, + "loss": 20.2551, + "step": 14444 + }, + { + "epoch": 0.6021007877954233, + "grad_norm": 332.0, + "learning_rate": 3.6082144706395864e-05, + "loss": 14.3753, + "step": 14445 + }, + { + "epoch": 0.6021424700929515, + "grad_norm": 211.0, + "learning_rate": 3.607566159329138e-05, + "loss": 11.0631, + "step": 14446 + }, + { + "epoch": 0.6021841523904797, + "grad_norm": 1312.0, + "learning_rate": 3.606917873395922e-05, + "loss": 29.38, + "step": 14447 + }, + { + "epoch": 0.602225834688008, + "grad_norm": 494.0, + "learning_rate": 3.606269612851749e-05, + "loss": 15.9408, + "step": 14448 + }, + { + "epoch": 0.6022675169855363, + "grad_norm": 366.0, + "learning_rate": 3.605621377708437e-05, + "loss": 12.8751, + "step": 14449 + }, + { + "epoch": 0.6023091992830645, + "grad_norm": 336.0, + "learning_rate": 3.604973167977799e-05, + "loss": 14.7507, + "step": 14450 + }, + { + "epoch": 0.6023508815805927, + "grad_norm": 117.5, + "learning_rate": 3.6043249836716494e-05, + "loss": 8.438, + "step": 14451 + }, + { + "epoch": 0.602392563878121, + "grad_norm": 424.0, + "learning_rate": 3.6036768248017996e-05, + "loss": 15.6878, + "step": 14452 + }, + { + "epoch": 0.6024342461756492, + "grad_norm": 212.0, + "learning_rate": 3.603028691380066e-05, + "loss": 10.9378, + "step": 14453 + }, + { + "epoch": 0.6024759284731774, + "grad_norm": 155.0, + "learning_rate": 3.6023805834182555e-05, + "loss": 10.6878, + "step": 14454 + }, + { + "epoch": 0.6025176107707056, + "grad_norm": 133.0, + "learning_rate": 3.6017325009281855e-05, + "loss": 9.6253, + "step": 14455 + }, + { + "epoch": 0.602559293068234, + "grad_norm": 216.0, + "learning_rate": 3.601084443921663e-05, + "loss": 10.938, + "step": 14456 + }, + { + "epoch": 0.6026009753657622, + "grad_norm": 138.0, + "learning_rate": 3.600436412410503e-05, + "loss": 11.7517, + "step": 14457 + }, + { + "epoch": 0.6026426576632904, + "grad_norm": 136.0, + "learning_rate": 3.599788406406511e-05, + "loss": 10.4378, + "step": 14458 + }, + { + "epoch": 0.6026843399608186, + "grad_norm": 1080.0, + "learning_rate": 3.599140425921502e-05, + "loss": 31.3751, + "step": 14459 + }, + { + "epoch": 0.6027260222583469, + "grad_norm": 196.0, + "learning_rate": 3.5984924709672806e-05, + "loss": 10.7507, + "step": 14460 + }, + { + "epoch": 0.6027677045558751, + "grad_norm": 430.0, + "learning_rate": 3.59784454155566e-05, + "loss": 17.0003, + "step": 14461 + }, + { + "epoch": 0.6028093868534034, + "grad_norm": 250.0, + "learning_rate": 3.597196637698447e-05, + "loss": 11.1878, + "step": 14462 + }, + { + "epoch": 0.6028510691509316, + "grad_norm": 310.0, + "learning_rate": 3.596548759407449e-05, + "loss": 13.3127, + "step": 14463 + }, + { + "epoch": 0.6028927514484599, + "grad_norm": 67.5, + "learning_rate": 3.595900906694474e-05, + "loss": 7.7502, + "step": 14464 + }, + { + "epoch": 0.6029344337459881, + "grad_norm": 94.5, + "learning_rate": 3.5952530795713315e-05, + "loss": 10.6263, + "step": 14465 + }, + { + "epoch": 0.6029761160435163, + "grad_norm": 122.0, + "learning_rate": 3.5946052780498245e-05, + "loss": 10.5631, + "step": 14466 + }, + { + "epoch": 0.6030177983410445, + "grad_norm": 880.0, + "learning_rate": 3.593957502141763e-05, + "loss": 23.6263, + "step": 14467 + }, + { + "epoch": 0.6030594806385728, + "grad_norm": 328.0, + "learning_rate": 3.5933097518589486e-05, + "loss": 12.3129, + "step": 14468 + }, + { + "epoch": 0.603101162936101, + "grad_norm": 340.0, + "learning_rate": 3.592662027213192e-05, + "loss": 15.3134, + "step": 14469 + }, + { + "epoch": 0.6031428452336293, + "grad_norm": 92.0, + "learning_rate": 3.592014328216292e-05, + "loss": 7.5003, + "step": 14470 + }, + { + "epoch": 0.6031845275311575, + "grad_norm": 154.0, + "learning_rate": 3.591366654880057e-05, + "loss": 9.0627, + "step": 14471 + }, + { + "epoch": 0.6032262098286858, + "grad_norm": 536.0, + "learning_rate": 3.590719007216289e-05, + "loss": 17.376, + "step": 14472 + }, + { + "epoch": 0.603267892126214, + "grad_norm": 438.0, + "learning_rate": 3.590071385236793e-05, + "loss": 16.7505, + "step": 14473 + }, + { + "epoch": 0.6033095744237422, + "grad_norm": 278.0, + "learning_rate": 3.5894237889533714e-05, + "loss": 10.3776, + "step": 14474 + }, + { + "epoch": 0.6033512567212704, + "grad_norm": 256.0, + "learning_rate": 3.588776218377825e-05, + "loss": 12.6877, + "step": 14475 + }, + { + "epoch": 0.6033929390187988, + "grad_norm": 175.0, + "learning_rate": 3.588128673521958e-05, + "loss": 10.0001, + "step": 14476 + }, + { + "epoch": 0.603434621316327, + "grad_norm": 648.0, + "learning_rate": 3.587481154397573e-05, + "loss": 20.2509, + "step": 14477 + }, + { + "epoch": 0.6034763036138552, + "grad_norm": 188.0, + "learning_rate": 3.5868336610164665e-05, + "loss": 10.8761, + "step": 14478 + }, + { + "epoch": 0.6035179859113834, + "grad_norm": 162.0, + "learning_rate": 3.5861861933904446e-05, + "loss": 5.4378, + "step": 14479 + }, + { + "epoch": 0.6035596682089117, + "grad_norm": 400.0, + "learning_rate": 3.585538751531302e-05, + "loss": 15.0001, + "step": 14480 + }, + { + "epoch": 0.6036013505064399, + "grad_norm": 1800.0, + "learning_rate": 3.5848913354508446e-05, + "loss": 35.2547, + "step": 14481 + }, + { + "epoch": 0.6036430328039681, + "grad_norm": 804.0, + "learning_rate": 3.5842439451608654e-05, + "loss": 23.5001, + "step": 14482 + }, + { + "epoch": 0.6036847151014963, + "grad_norm": 1016.0, + "learning_rate": 3.583596580673168e-05, + "loss": 22.8795, + "step": 14483 + }, + { + "epoch": 0.6037263973990247, + "grad_norm": 378.0, + "learning_rate": 3.582949241999547e-05, + "loss": 12.8752, + "step": 14484 + }, + { + "epoch": 0.6037680796965529, + "grad_norm": 153.0, + "learning_rate": 3.5823019291518035e-05, + "loss": 7.5629, + "step": 14485 + }, + { + "epoch": 0.6038097619940811, + "grad_norm": 488.0, + "learning_rate": 3.5816546421417313e-05, + "loss": 15.8126, + "step": 14486 + }, + { + "epoch": 0.6038514442916093, + "grad_norm": 183.0, + "learning_rate": 3.58100738098113e-05, + "loss": 11.9381, + "step": 14487 + }, + { + "epoch": 0.6038931265891376, + "grad_norm": 118.5, + "learning_rate": 3.5803601456817947e-05, + "loss": 10.0003, + "step": 14488 + }, + { + "epoch": 0.6039348088866658, + "grad_norm": 260.0, + "learning_rate": 3.579712936255523e-05, + "loss": 13.7504, + "step": 14489 + }, + { + "epoch": 0.603976491184194, + "grad_norm": 468.0, + "learning_rate": 3.579065752714108e-05, + "loss": 14.5631, + "step": 14490 + }, + { + "epoch": 0.6040181734817223, + "grad_norm": 700.0, + "learning_rate": 3.578418595069347e-05, + "loss": 19.5005, + "step": 14491 + }, + { + "epoch": 0.6040598557792506, + "grad_norm": 416.0, + "learning_rate": 3.5777714633330315e-05, + "loss": 14.9386, + "step": 14492 + }, + { + "epoch": 0.6041015380767788, + "grad_norm": 348.0, + "learning_rate": 3.5771243575169596e-05, + "loss": 13.7509, + "step": 14493 + }, + { + "epoch": 0.604143220374307, + "grad_norm": 231.0, + "learning_rate": 3.57647727763292e-05, + "loss": 11.6877, + "step": 14494 + }, + { + "epoch": 0.6041849026718352, + "grad_norm": 274.0, + "learning_rate": 3.575830223692711e-05, + "loss": 12.7502, + "step": 14495 + }, + { + "epoch": 0.6042265849693635, + "grad_norm": 308.0, + "learning_rate": 3.5751831957081206e-05, + "loss": 12.5628, + "step": 14496 + }, + { + "epoch": 0.6042682672668918, + "grad_norm": 422.0, + "learning_rate": 3.574536193690945e-05, + "loss": 16.8756, + "step": 14497 + }, + { + "epoch": 0.60430994956442, + "grad_norm": 520.0, + "learning_rate": 3.573889217652971e-05, + "loss": 18.2502, + "step": 14498 + }, + { + "epoch": 0.6043516318619482, + "grad_norm": 221.0, + "learning_rate": 3.573242267605995e-05, + "loss": 10.6263, + "step": 14499 + }, + { + "epoch": 0.6043933141594765, + "grad_norm": 224.0, + "learning_rate": 3.572595343561804e-05, + "loss": 12.3129, + "step": 14500 + }, + { + "epoch": 0.6044349964570047, + "grad_norm": 290.0, + "learning_rate": 3.57194844553219e-05, + "loss": 14.1255, + "step": 14501 + }, + { + "epoch": 0.6044766787545329, + "grad_norm": 292.0, + "learning_rate": 3.5713015735289416e-05, + "loss": 12.5006, + "step": 14502 + }, + { + "epoch": 0.6045183610520611, + "grad_norm": 700.0, + "learning_rate": 3.570654727563851e-05, + "loss": 21.0003, + "step": 14503 + }, + { + "epoch": 0.6045600433495895, + "grad_norm": 251.0, + "learning_rate": 3.5700079076487024e-05, + "loss": 12.8759, + "step": 14504 + }, + { + "epoch": 0.6046017256471177, + "grad_norm": 292.0, + "learning_rate": 3.5693611137952885e-05, + "loss": 11.5011, + "step": 14505 + }, + { + "epoch": 0.6046434079446459, + "grad_norm": 436.0, + "learning_rate": 3.568714346015394e-05, + "loss": 16.5002, + "step": 14506 + }, + { + "epoch": 0.6046850902421741, + "grad_norm": 232.0, + "learning_rate": 3.568067604320809e-05, + "loss": 12.252, + "step": 14507 + }, + { + "epoch": 0.6047267725397024, + "grad_norm": 316.0, + "learning_rate": 3.567420888723317e-05, + "loss": 12.627, + "step": 14508 + }, + { + "epoch": 0.6047684548372306, + "grad_norm": 660.0, + "learning_rate": 3.566774199234709e-05, + "loss": 19.7506, + "step": 14509 + }, + { + "epoch": 0.6048101371347588, + "grad_norm": 462.0, + "learning_rate": 3.566127535866767e-05, + "loss": 17.501, + "step": 14510 + }, + { + "epoch": 0.604851819432287, + "grad_norm": 250.0, + "learning_rate": 3.565480898631277e-05, + "loss": 13.1257, + "step": 14511 + }, + { + "epoch": 0.6048935017298154, + "grad_norm": 350.0, + "learning_rate": 3.564834287540027e-05, + "loss": 13.8143, + "step": 14512 + }, + { + "epoch": 0.6049351840273436, + "grad_norm": 408.0, + "learning_rate": 3.564187702604798e-05, + "loss": 16.0003, + "step": 14513 + }, + { + "epoch": 0.6049768663248718, + "grad_norm": 196.0, + "learning_rate": 3.5635411438373755e-05, + "loss": 10.688, + "step": 14514 + }, + { + "epoch": 0.6050185486224001, + "grad_norm": 79.5, + "learning_rate": 3.562894611249545e-05, + "loss": 9.8759, + "step": 14515 + }, + { + "epoch": 0.6050602309199283, + "grad_norm": 512.0, + "learning_rate": 3.5622481048530856e-05, + "loss": 17.8754, + "step": 14516 + }, + { + "epoch": 0.6051019132174565, + "grad_norm": 241.0, + "learning_rate": 3.5616016246597837e-05, + "loss": 12.2502, + "step": 14517 + }, + { + "epoch": 0.6051435955149848, + "grad_norm": 712.0, + "learning_rate": 3.560955170681418e-05, + "loss": 20.2504, + "step": 14518 + }, + { + "epoch": 0.6051852778125131, + "grad_norm": 144.0, + "learning_rate": 3.560308742929775e-05, + "loss": 9.5004, + "step": 14519 + }, + { + "epoch": 0.6052269601100413, + "grad_norm": 2064.0, + "learning_rate": 3.55966234141663e-05, + "loss": 35.5092, + "step": 14520 + }, + { + "epoch": 0.6052686424075695, + "grad_norm": 348.0, + "learning_rate": 3.559015966153769e-05, + "loss": 13.8127, + "step": 14521 + }, + { + "epoch": 0.6053103247050977, + "grad_norm": 414.0, + "learning_rate": 3.5583696171529686e-05, + "loss": 16.001, + "step": 14522 + }, + { + "epoch": 0.605352007002626, + "grad_norm": 326.0, + "learning_rate": 3.557723294426011e-05, + "loss": 14.5011, + "step": 14523 + }, + { + "epoch": 0.6053936893001542, + "grad_norm": 848.0, + "learning_rate": 3.557076997984673e-05, + "loss": 21.7501, + "step": 14524 + }, + { + "epoch": 0.6054353715976825, + "grad_norm": 298.0, + "learning_rate": 3.556430727840735e-05, + "loss": 13.8129, + "step": 14525 + }, + { + "epoch": 0.6054770538952107, + "grad_norm": 229.0, + "learning_rate": 3.555784484005975e-05, + "loss": 10.9378, + "step": 14526 + }, + { + "epoch": 0.605518736192739, + "grad_norm": 274.0, + "learning_rate": 3.555138266492173e-05, + "loss": 12.8134, + "step": 14527 + }, + { + "epoch": 0.6055604184902672, + "grad_norm": 356.0, + "learning_rate": 3.5544920753111014e-05, + "loss": 15.1911, + "step": 14528 + }, + { + "epoch": 0.6056021007877954, + "grad_norm": 197.0, + "learning_rate": 3.553845910474542e-05, + "loss": 11.6878, + "step": 14529 + }, + { + "epoch": 0.6056437830853236, + "grad_norm": 404.0, + "learning_rate": 3.553199771994269e-05, + "loss": 15.5002, + "step": 14530 + }, + { + "epoch": 0.605685465382852, + "grad_norm": 412.0, + "learning_rate": 3.552553659882059e-05, + "loss": 16.2505, + "step": 14531 + }, + { + "epoch": 0.6057271476803802, + "grad_norm": 804.0, + "learning_rate": 3.551907574149685e-05, + "loss": 20.2521, + "step": 14532 + }, + { + "epoch": 0.6057688299779084, + "grad_norm": 143.0, + "learning_rate": 3.5512615148089274e-05, + "loss": 10.0626, + "step": 14533 + }, + { + "epoch": 0.6058105122754366, + "grad_norm": 51.25, + "learning_rate": 3.550615481871554e-05, + "loss": 7.1565, + "step": 14534 + }, + { + "epoch": 0.6058521945729649, + "grad_norm": 414.0, + "learning_rate": 3.549969475349345e-05, + "loss": 15.3126, + "step": 14535 + }, + { + "epoch": 0.6058938768704931, + "grad_norm": 304.0, + "learning_rate": 3.549323495254068e-05, + "loss": 14.5628, + "step": 14536 + }, + { + "epoch": 0.6059355591680213, + "grad_norm": 418.0, + "learning_rate": 3.548677541597501e-05, + "loss": 16.2503, + "step": 14537 + }, + { + "epoch": 0.6059772414655495, + "grad_norm": 892.0, + "learning_rate": 3.548031614391415e-05, + "loss": 21.382, + "step": 14538 + }, + { + "epoch": 0.6060189237630779, + "grad_norm": 772.0, + "learning_rate": 3.54738571364758e-05, + "loss": 20.2534, + "step": 14539 + }, + { + "epoch": 0.6060606060606061, + "grad_norm": 266.0, + "learning_rate": 3.5467398393777696e-05, + "loss": 12.6252, + "step": 14540 + }, + { + "epoch": 0.6061022883581343, + "grad_norm": 139.0, + "learning_rate": 3.5460939915937564e-05, + "loss": 9.3757, + "step": 14541 + }, + { + "epoch": 0.6061439706556625, + "grad_norm": 294.0, + "learning_rate": 3.545448170307307e-05, + "loss": 12.4383, + "step": 14542 + }, + { + "epoch": 0.6061856529531908, + "grad_norm": 418.0, + "learning_rate": 3.544802375530196e-05, + "loss": 15.0631, + "step": 14543 + }, + { + "epoch": 0.606227335250719, + "grad_norm": 203.0, + "learning_rate": 3.544156607274189e-05, + "loss": 11.4388, + "step": 14544 + }, + { + "epoch": 0.6062690175482472, + "grad_norm": 328.0, + "learning_rate": 3.5435108655510596e-05, + "loss": 13.1252, + "step": 14545 + }, + { + "epoch": 0.6063106998457755, + "grad_norm": 418.0, + "learning_rate": 3.5428651503725704e-05, + "loss": 14.938, + "step": 14546 + }, + { + "epoch": 0.6063523821433038, + "grad_norm": 376.0, + "learning_rate": 3.542219461750497e-05, + "loss": 14.3758, + "step": 14547 + }, + { + "epoch": 0.606394064440832, + "grad_norm": 75.5, + "learning_rate": 3.5415737996966e-05, + "loss": 7.3753, + "step": 14548 + }, + { + "epoch": 0.6064357467383602, + "grad_norm": 458.0, + "learning_rate": 3.540928164222652e-05, + "loss": 14.5627, + "step": 14549 + }, + { + "epoch": 0.6064774290358884, + "grad_norm": 290.0, + "learning_rate": 3.540282555340417e-05, + "loss": 12.3753, + "step": 14550 + }, + { + "epoch": 0.6065191113334167, + "grad_norm": 135.0, + "learning_rate": 3.5396369730616624e-05, + "loss": 10.3135, + "step": 14551 + }, + { + "epoch": 0.606560793630945, + "grad_norm": 708.0, + "learning_rate": 3.538991417398152e-05, + "loss": 19.2504, + "step": 14552 + }, + { + "epoch": 0.6066024759284732, + "grad_norm": 386.0, + "learning_rate": 3.5383458883616554e-05, + "loss": 14.0005, + "step": 14553 + }, + { + "epoch": 0.6066441582260014, + "grad_norm": 72.0, + "learning_rate": 3.5377003859639324e-05, + "loss": 7.1565, + "step": 14554 + }, + { + "epoch": 0.6066858405235297, + "grad_norm": 692.0, + "learning_rate": 3.537054910216752e-05, + "loss": 21.3755, + "step": 14555 + }, + { + "epoch": 0.6067275228210579, + "grad_norm": 226.0, + "learning_rate": 3.5364094611318734e-05, + "loss": 11.8127, + "step": 14556 + }, + { + "epoch": 0.6067692051185861, + "grad_norm": 412.0, + "learning_rate": 3.535764038721065e-05, + "loss": 15.563, + "step": 14557 + }, + { + "epoch": 0.6068108874161143, + "grad_norm": 354.0, + "learning_rate": 3.535118642996084e-05, + "loss": 13.9377, + "step": 14558 + }, + { + "epoch": 0.6068525697136427, + "grad_norm": 374.0, + "learning_rate": 3.534473273968698e-05, + "loss": 15.0004, + "step": 14559 + }, + { + "epoch": 0.6068942520111709, + "grad_norm": 316.0, + "learning_rate": 3.533827931650665e-05, + "loss": 14.0006, + "step": 14560 + }, + { + "epoch": 0.6069359343086991, + "grad_norm": 496.0, + "learning_rate": 3.533182616053749e-05, + "loss": 17.251, + "step": 14561 + }, + { + "epoch": 0.6069776166062273, + "grad_norm": 139.0, + "learning_rate": 3.53253732718971e-05, + "loss": 10.3754, + "step": 14562 + }, + { + "epoch": 0.6070192989037556, + "grad_norm": 217.0, + "learning_rate": 3.531892065070308e-05, + "loss": 12.5002, + "step": 14563 + }, + { + "epoch": 0.6070609812012838, + "grad_norm": 241.0, + "learning_rate": 3.531246829707303e-05, + "loss": 12.7505, + "step": 14564 + }, + { + "epoch": 0.607102663498812, + "grad_norm": 168.0, + "learning_rate": 3.530601621112457e-05, + "loss": 11.5005, + "step": 14565 + }, + { + "epoch": 0.6071443457963402, + "grad_norm": 118.5, + "learning_rate": 3.529956439297525e-05, + "loss": 9.938, + "step": 14566 + }, + { + "epoch": 0.6071860280938686, + "grad_norm": 217.0, + "learning_rate": 3.529311284274269e-05, + "loss": 10.2512, + "step": 14567 + }, + { + "epoch": 0.6072277103913968, + "grad_norm": 247.0, + "learning_rate": 3.528666156054443e-05, + "loss": 13.1879, + "step": 14568 + }, + { + "epoch": 0.607269392688925, + "grad_norm": 290.0, + "learning_rate": 3.528021054649809e-05, + "loss": 11.9378, + "step": 14569 + }, + { + "epoch": 0.6073110749864532, + "grad_norm": 544.0, + "learning_rate": 3.5273759800721206e-05, + "loss": 19.0002, + "step": 14570 + }, + { + "epoch": 0.6073527572839815, + "grad_norm": 1448.0, + "learning_rate": 3.526730932333138e-05, + "loss": 29.629, + "step": 14571 + }, + { + "epoch": 0.6073944395815097, + "grad_norm": 156.0, + "learning_rate": 3.526085911444612e-05, + "loss": 9.1258, + "step": 14572 + }, + { + "epoch": 0.607436121879038, + "grad_norm": 728.0, + "learning_rate": 3.525440917418303e-05, + "loss": 19.0004, + "step": 14573 + }, + { + "epoch": 0.6074778041765662, + "grad_norm": 39.5, + "learning_rate": 3.5247959502659634e-05, + "loss": 6.8754, + "step": 14574 + }, + { + "epoch": 0.6075194864740945, + "grad_norm": 418.0, + "learning_rate": 3.524151009999349e-05, + "loss": 15.2506, + "step": 14575 + }, + { + "epoch": 0.6075611687716227, + "grad_norm": 276.0, + "learning_rate": 3.523506096630213e-05, + "loss": 11.8128, + "step": 14576 + }, + { + "epoch": 0.6076028510691509, + "grad_norm": 384.0, + "learning_rate": 3.522861210170309e-05, + "loss": 16.6261, + "step": 14577 + }, + { + "epoch": 0.6076445333666791, + "grad_norm": 294.0, + "learning_rate": 3.522216350631391e-05, + "loss": 13.8128, + "step": 14578 + }, + { + "epoch": 0.6076862156642074, + "grad_norm": 77.5, + "learning_rate": 3.521571518025213e-05, + "loss": 7.751, + "step": 14579 + }, + { + "epoch": 0.6077278979617357, + "grad_norm": 324.0, + "learning_rate": 3.5209267123635224e-05, + "loss": 13.5627, + "step": 14580 + }, + { + "epoch": 0.6077695802592639, + "grad_norm": 260.0, + "learning_rate": 3.520281933658076e-05, + "loss": 14.6882, + "step": 14581 + }, + { + "epoch": 0.6078112625567921, + "grad_norm": 972.0, + "learning_rate": 3.5196371819206215e-05, + "loss": 24.2536, + "step": 14582 + }, + { + "epoch": 0.6078529448543204, + "grad_norm": 372.0, + "learning_rate": 3.518992457162912e-05, + "loss": 16.6254, + "step": 14583 + }, + { + "epoch": 0.6078946271518486, + "grad_norm": 164.0, + "learning_rate": 3.518347759396694e-05, + "loss": 10.313, + "step": 14584 + }, + { + "epoch": 0.6079363094493768, + "grad_norm": 328.0, + "learning_rate": 3.517703088633723e-05, + "loss": 13.6258, + "step": 14585 + }, + { + "epoch": 0.6079779917469051, + "grad_norm": 215.0, + "learning_rate": 3.5170584448857424e-05, + "loss": 11.8752, + "step": 14586 + }, + { + "epoch": 0.6080196740444334, + "grad_norm": 394.0, + "learning_rate": 3.5164138281645036e-05, + "loss": 15.3753, + "step": 14587 + }, + { + "epoch": 0.6080613563419616, + "grad_norm": 1488.0, + "learning_rate": 3.5157692384817546e-05, + "loss": 33.2506, + "step": 14588 + }, + { + "epoch": 0.6081030386394898, + "grad_norm": 476.0, + "learning_rate": 3.515124675849243e-05, + "loss": 16.0021, + "step": 14589 + }, + { + "epoch": 0.6081447209370181, + "grad_norm": 278.0, + "learning_rate": 3.514480140278716e-05, + "loss": 11.5636, + "step": 14590 + }, + { + "epoch": 0.6081864032345463, + "grad_norm": 568.0, + "learning_rate": 3.513835631781921e-05, + "loss": 17.1256, + "step": 14591 + }, + { + "epoch": 0.6082280855320745, + "grad_norm": 45.5, + "learning_rate": 3.513191150370603e-05, + "loss": 6.8753, + "step": 14592 + }, + { + "epoch": 0.6082697678296027, + "grad_norm": 276.0, + "learning_rate": 3.512546696056509e-05, + "loss": 12.1878, + "step": 14593 + }, + { + "epoch": 0.6083114501271311, + "grad_norm": 87.0, + "learning_rate": 3.5119022688513815e-05, + "loss": 8.3752, + "step": 14594 + }, + { + "epoch": 0.6083531324246593, + "grad_norm": 442.0, + "learning_rate": 3.5112578687669695e-05, + "loss": 17.2503, + "step": 14595 + }, + { + "epoch": 0.6083948147221875, + "grad_norm": 450.0, + "learning_rate": 3.510613495815013e-05, + "loss": 15.7502, + "step": 14596 + }, + { + "epoch": 0.6084364970197157, + "grad_norm": 1120.0, + "learning_rate": 3.5099691500072606e-05, + "loss": 26.6254, + "step": 14597 + }, + { + "epoch": 0.608478179317244, + "grad_norm": 394.0, + "learning_rate": 3.5093248313554497e-05, + "loss": 16.7503, + "step": 14598 + }, + { + "epoch": 0.6085198616147722, + "grad_norm": 256.0, + "learning_rate": 3.508680539871327e-05, + "loss": 12.6877, + "step": 14599 + }, + { + "epoch": 0.6085615439123004, + "grad_norm": 446.0, + "learning_rate": 3.508036275566635e-05, + "loss": 16.3754, + "step": 14600 + }, + { + "epoch": 0.6086032262098287, + "grad_norm": 440.0, + "learning_rate": 3.5073920384531136e-05, + "loss": 16.3761, + "step": 14601 + }, + { + "epoch": 0.608644908507357, + "grad_norm": 254.0, + "learning_rate": 3.5067478285425034e-05, + "loss": 10.6258, + "step": 14602 + }, + { + "epoch": 0.6086865908048852, + "grad_norm": 165.0, + "learning_rate": 3.506103645846549e-05, + "loss": 9.9378, + "step": 14603 + }, + { + "epoch": 0.6087282731024134, + "grad_norm": 114.0, + "learning_rate": 3.505459490376986e-05, + "loss": 10.6253, + "step": 14604 + }, + { + "epoch": 0.6087699553999416, + "grad_norm": 53.25, + "learning_rate": 3.504815362145559e-05, + "loss": 8.0635, + "step": 14605 + }, + { + "epoch": 0.6088116376974699, + "grad_norm": 126.0, + "learning_rate": 3.504171261164002e-05, + "loss": 8.7501, + "step": 14606 + }, + { + "epoch": 0.6088533199949981, + "grad_norm": 201.0, + "learning_rate": 3.503527187444059e-05, + "loss": 10.1884, + "step": 14607 + }, + { + "epoch": 0.6088950022925264, + "grad_norm": 154.0, + "learning_rate": 3.502883140997464e-05, + "loss": 10.7505, + "step": 14608 + }, + { + "epoch": 0.6089366845900546, + "grad_norm": 672.0, + "learning_rate": 3.5022391218359585e-05, + "loss": 17.2547, + "step": 14609 + }, + { + "epoch": 0.6089783668875829, + "grad_norm": 384.0, + "learning_rate": 3.501595129971276e-05, + "loss": 15.1255, + "step": 14610 + }, + { + "epoch": 0.6090200491851111, + "grad_norm": 162.0, + "learning_rate": 3.500951165415157e-05, + "loss": 10.0002, + "step": 14611 + }, + { + "epoch": 0.6090617314826393, + "grad_norm": 202.0, + "learning_rate": 3.500307228179335e-05, + "loss": 9.1252, + "step": 14612 + }, + { + "epoch": 0.6091034137801675, + "grad_norm": 528.0, + "learning_rate": 3.499663318275547e-05, + "loss": 17.5025, + "step": 14613 + }, + { + "epoch": 0.6091450960776958, + "grad_norm": 364.0, + "learning_rate": 3.499019435715527e-05, + "loss": 15.0002, + "step": 14614 + }, + { + "epoch": 0.6091867783752241, + "grad_norm": 125.5, + "learning_rate": 3.4983755805110135e-05, + "loss": 9.7504, + "step": 14615 + }, + { + "epoch": 0.6092284606727523, + "grad_norm": 780.0, + "learning_rate": 3.497731752673736e-05, + "loss": 23.1254, + "step": 14616 + }, + { + "epoch": 0.6092701429702805, + "grad_norm": 484.0, + "learning_rate": 3.497087952215433e-05, + "loss": 17.5004, + "step": 14617 + }, + { + "epoch": 0.6093118252678088, + "grad_norm": 684.0, + "learning_rate": 3.496444179147834e-05, + "loss": 20.6256, + "step": 14618 + }, + { + "epoch": 0.609353507565337, + "grad_norm": 492.0, + "learning_rate": 3.495800433482674e-05, + "loss": 17.7501, + "step": 14619 + }, + { + "epoch": 0.6093951898628652, + "grad_norm": 394.0, + "learning_rate": 3.495156715231684e-05, + "loss": 15.1915, + "step": 14620 + }, + { + "epoch": 0.6094368721603934, + "grad_norm": 188.0, + "learning_rate": 3.4945130244065985e-05, + "loss": 8.8754, + "step": 14621 + }, + { + "epoch": 0.6094785544579218, + "grad_norm": 171.0, + "learning_rate": 3.4938693610191435e-05, + "loss": 10.0006, + "step": 14622 + }, + { + "epoch": 0.60952023675545, + "grad_norm": 324.0, + "learning_rate": 3.493225725081056e-05, + "loss": 14.0027, + "step": 14623 + }, + { + "epoch": 0.6095619190529782, + "grad_norm": 440.0, + "learning_rate": 3.4925821166040604e-05, + "loss": 15.5628, + "step": 14624 + }, + { + "epoch": 0.6096036013505064, + "grad_norm": 330.0, + "learning_rate": 3.491938535599892e-05, + "loss": 13.0031, + "step": 14625 + }, + { + "epoch": 0.6096452836480347, + "grad_norm": 139.0, + "learning_rate": 3.4912949820802766e-05, + "loss": 10.3129, + "step": 14626 + }, + { + "epoch": 0.6096869659455629, + "grad_norm": 358.0, + "learning_rate": 3.490651456056945e-05, + "loss": 13.8785, + "step": 14627 + }, + { + "epoch": 0.6097286482430911, + "grad_norm": 532.0, + "learning_rate": 3.490007957541623e-05, + "loss": 17.3753, + "step": 14628 + }, + { + "epoch": 0.6097703305406194, + "grad_norm": 237.0, + "learning_rate": 3.4893644865460414e-05, + "loss": 9.9379, + "step": 14629 + }, + { + "epoch": 0.6098120128381477, + "grad_norm": 147.0, + "learning_rate": 3.488721043081925e-05, + "loss": 10.6877, + "step": 14630 + }, + { + "epoch": 0.6098536951356759, + "grad_norm": 135.0, + "learning_rate": 3.488077627161004e-05, + "loss": 9.8128, + "step": 14631 + }, + { + "epoch": 0.6098953774332041, + "grad_norm": 288.0, + "learning_rate": 3.487434238795e-05, + "loss": 12.0631, + "step": 14632 + }, + { + "epoch": 0.6099370597307323, + "grad_norm": 163.0, + "learning_rate": 3.486790877995643e-05, + "loss": 9.0004, + "step": 14633 + }, + { + "epoch": 0.6099787420282606, + "grad_norm": 936.0, + "learning_rate": 3.486147544774655e-05, + "loss": 21.3756, + "step": 14634 + }, + { + "epoch": 0.6100204243257888, + "grad_norm": 278.0, + "learning_rate": 3.485504239143764e-05, + "loss": 12.0628, + "step": 14635 + }, + { + "epoch": 0.6100621066233171, + "grad_norm": 350.0, + "learning_rate": 3.484860961114691e-05, + "loss": 14.5003, + "step": 14636 + }, + { + "epoch": 0.6101037889208453, + "grad_norm": 472.0, + "learning_rate": 3.4842177106991625e-05, + "loss": 15.9377, + "step": 14637 + }, + { + "epoch": 0.6101454712183736, + "grad_norm": 416.0, + "learning_rate": 3.483574487908901e-05, + "loss": 15.8768, + "step": 14638 + }, + { + "epoch": 0.6101871535159018, + "grad_norm": 434.0, + "learning_rate": 3.4829312927556285e-05, + "loss": 16.5002, + "step": 14639 + }, + { + "epoch": 0.61022883581343, + "grad_norm": 66.5, + "learning_rate": 3.4822881252510675e-05, + "loss": 8.6263, + "step": 14640 + }, + { + "epoch": 0.6102705181109582, + "grad_norm": 237.0, + "learning_rate": 3.4816449854069414e-05, + "loss": 12.1878, + "step": 14641 + }, + { + "epoch": 0.6103122004084865, + "grad_norm": 346.0, + "learning_rate": 3.481001873234968e-05, + "loss": 14.751, + "step": 14642 + }, + { + "epoch": 0.6103538827060148, + "grad_norm": 280.0, + "learning_rate": 3.480358788746874e-05, + "loss": 13.5004, + "step": 14643 + }, + { + "epoch": 0.610395565003543, + "grad_norm": 223.0, + "learning_rate": 3.479715731954373e-05, + "loss": 11.6877, + "step": 14644 + }, + { + "epoch": 0.6104372473010712, + "grad_norm": 402.0, + "learning_rate": 3.479072702869189e-05, + "loss": 15.8753, + "step": 14645 + }, + { + "epoch": 0.6104789295985995, + "grad_norm": 648.0, + "learning_rate": 3.4784297015030386e-05, + "loss": 20.0001, + "step": 14646 + }, + { + "epoch": 0.6105206118961277, + "grad_norm": 220.0, + "learning_rate": 3.477786727867644e-05, + "loss": 11.3128, + "step": 14647 + }, + { + "epoch": 0.6105622941936559, + "grad_norm": 230.0, + "learning_rate": 3.47714378197472e-05, + "loss": 11.9376, + "step": 14648 + }, + { + "epoch": 0.6106039764911841, + "grad_norm": 348.0, + "learning_rate": 3.476500863835986e-05, + "loss": 14.5627, + "step": 14649 + }, + { + "epoch": 0.6106456587887125, + "grad_norm": 420.0, + "learning_rate": 3.475857973463159e-05, + "loss": 16.5002, + "step": 14650 + }, + { + "epoch": 0.6106873410862407, + "grad_norm": 320.0, + "learning_rate": 3.475215110867957e-05, + "loss": 12.3128, + "step": 14651 + }, + { + "epoch": 0.6107290233837689, + "grad_norm": 604.0, + "learning_rate": 3.474572276062092e-05, + "loss": 19.1264, + "step": 14652 + }, + { + "epoch": 0.6107707056812971, + "grad_norm": 776.0, + "learning_rate": 3.4739294690572855e-05, + "loss": 22.2503, + "step": 14653 + }, + { + "epoch": 0.6108123879788254, + "grad_norm": 107.0, + "learning_rate": 3.4732866898652485e-05, + "loss": 9.188, + "step": 14654 + }, + { + "epoch": 0.6108540702763536, + "grad_norm": 92.5, + "learning_rate": 3.472643938497698e-05, + "loss": 9.7526, + "step": 14655 + }, + { + "epoch": 0.6108957525738818, + "grad_norm": 448.0, + "learning_rate": 3.4720012149663464e-05, + "loss": 17.2544, + "step": 14656 + }, + { + "epoch": 0.6109374348714101, + "grad_norm": 198.0, + "learning_rate": 3.4713585192829095e-05, + "loss": 11.5007, + "step": 14657 + }, + { + "epoch": 0.6109791171689384, + "grad_norm": 266.0, + "learning_rate": 3.470715851459098e-05, + "loss": 12.6252, + "step": 14658 + }, + { + "epoch": 0.6110207994664666, + "grad_norm": 239.0, + "learning_rate": 3.470073211506627e-05, + "loss": 11.3753, + "step": 14659 + }, + { + "epoch": 0.6110624817639948, + "grad_norm": 504.0, + "learning_rate": 3.4694305994372056e-05, + "loss": 16.2502, + "step": 14660 + }, + { + "epoch": 0.6111041640615231, + "grad_norm": 288.0, + "learning_rate": 3.46878801526255e-05, + "loss": 13.3127, + "step": 14661 + }, + { + "epoch": 0.6111458463590513, + "grad_norm": 208.0, + "learning_rate": 3.4681454589943666e-05, + "loss": 11.6883, + "step": 14662 + }, + { + "epoch": 0.6111875286565795, + "grad_norm": 644.0, + "learning_rate": 3.4675029306443695e-05, + "loss": 19.251, + "step": 14663 + }, + { + "epoch": 0.6112292109541078, + "grad_norm": 564.0, + "learning_rate": 3.4668604302242667e-05, + "loss": 19.7503, + "step": 14664 + }, + { + "epoch": 0.6112708932516361, + "grad_norm": 900.0, + "learning_rate": 3.4662179577457696e-05, + "loss": 22.6254, + "step": 14665 + }, + { + "epoch": 0.6113125755491643, + "grad_norm": 442.0, + "learning_rate": 3.465575513220585e-05, + "loss": 15.6255, + "step": 14666 + }, + { + "epoch": 0.6113542578466925, + "grad_norm": 175.0, + "learning_rate": 3.464933096660424e-05, + "loss": 10.8754, + "step": 14667 + }, + { + "epoch": 0.6113959401442207, + "grad_norm": 236.0, + "learning_rate": 3.4642907080769924e-05, + "loss": 12.5008, + "step": 14668 + }, + { + "epoch": 0.611437622441749, + "grad_norm": 129.0, + "learning_rate": 3.463648347482001e-05, + "loss": 10.813, + "step": 14669 + }, + { + "epoch": 0.6114793047392773, + "grad_norm": 156.0, + "learning_rate": 3.463006014887153e-05, + "loss": 11.3752, + "step": 14670 + }, + { + "epoch": 0.6115209870368055, + "grad_norm": 512.0, + "learning_rate": 3.462363710304159e-05, + "loss": 15.9381, + "step": 14671 + }, + { + "epoch": 0.6115626693343337, + "grad_norm": 680.0, + "learning_rate": 3.46172143374472e-05, + "loss": 19.7505, + "step": 14672 + }, + { + "epoch": 0.611604351631862, + "grad_norm": 384.0, + "learning_rate": 3.461079185220547e-05, + "loss": 15.0004, + "step": 14673 + }, + { + "epoch": 0.6116460339293902, + "grad_norm": 240.0, + "learning_rate": 3.460436964743341e-05, + "loss": 12.5041, + "step": 14674 + }, + { + "epoch": 0.6116877162269184, + "grad_norm": 362.0, + "learning_rate": 3.459794772324808e-05, + "loss": 13.6253, + "step": 14675 + }, + { + "epoch": 0.6117293985244466, + "grad_norm": 454.0, + "learning_rate": 3.459152607976652e-05, + "loss": 15.3757, + "step": 14676 + }, + { + "epoch": 0.611771080821975, + "grad_norm": 223.0, + "learning_rate": 3.458510471710578e-05, + "loss": 11.0003, + "step": 14677 + }, + { + "epoch": 0.6118127631195032, + "grad_norm": 708.0, + "learning_rate": 3.457868363538285e-05, + "loss": 19.2502, + "step": 14678 + }, + { + "epoch": 0.6118544454170314, + "grad_norm": 364.0, + "learning_rate": 3.45722628347148e-05, + "loss": 14.2501, + "step": 14679 + }, + { + "epoch": 0.6118961277145596, + "grad_norm": 536.0, + "learning_rate": 3.456584231521861e-05, + "loss": 15.8779, + "step": 14680 + }, + { + "epoch": 0.6119378100120879, + "grad_norm": 404.0, + "learning_rate": 3.455942207701134e-05, + "loss": 14.9377, + "step": 14681 + }, + { + "epoch": 0.6119794923096161, + "grad_norm": 133.0, + "learning_rate": 3.455300212020995e-05, + "loss": 10.5006, + "step": 14682 + }, + { + "epoch": 0.6120211746071443, + "grad_norm": 264.0, + "learning_rate": 3.454658244493149e-05, + "loss": 13.313, + "step": 14683 + }, + { + "epoch": 0.6120628569046725, + "grad_norm": 207.0, + "learning_rate": 3.454016305129292e-05, + "loss": 11.7503, + "step": 14684 + }, + { + "epoch": 0.6121045392022009, + "grad_norm": 290.0, + "learning_rate": 3.4533743939411264e-05, + "loss": 14.0627, + "step": 14685 + }, + { + "epoch": 0.6121462214997291, + "grad_norm": 322.0, + "learning_rate": 3.452732510940347e-05, + "loss": 13.6879, + "step": 14686 + }, + { + "epoch": 0.6121879037972573, + "grad_norm": 294.0, + "learning_rate": 3.4520906561386576e-05, + "loss": 13.1251, + "step": 14687 + }, + { + "epoch": 0.6122295860947855, + "grad_norm": 342.0, + "learning_rate": 3.451448829547753e-05, + "loss": 13.4386, + "step": 14688 + }, + { + "epoch": 0.6122712683923138, + "grad_norm": 132.0, + "learning_rate": 3.45080703117933e-05, + "loss": 10.9381, + "step": 14689 + }, + { + "epoch": 0.612312950689842, + "grad_norm": 231.0, + "learning_rate": 3.450165261045086e-05, + "loss": 13.1881, + "step": 14690 + }, + { + "epoch": 0.6123546329873703, + "grad_norm": 96.0, + "learning_rate": 3.44952351915672e-05, + "loss": 7.3761, + "step": 14691 + }, + { + "epoch": 0.6123963152848985, + "grad_norm": 376.0, + "learning_rate": 3.4488818055259234e-05, + "loss": 14.2502, + "step": 14692 + }, + { + "epoch": 0.6124379975824268, + "grad_norm": 398.0, + "learning_rate": 3.4482401201643955e-05, + "loss": 14.9403, + "step": 14693 + }, + { + "epoch": 0.612479679879955, + "grad_norm": 672.0, + "learning_rate": 3.4475984630838266e-05, + "loss": 18.6253, + "step": 14694 + }, + { + "epoch": 0.6125213621774832, + "grad_norm": 235.0, + "learning_rate": 3.446956834295916e-05, + "loss": 12.2573, + "step": 14695 + }, + { + "epoch": 0.6125630444750114, + "grad_norm": 588.0, + "learning_rate": 3.4463152338123526e-05, + "loss": 21.8751, + "step": 14696 + }, + { + "epoch": 0.6126047267725397, + "grad_norm": 318.0, + "learning_rate": 3.445673661644835e-05, + "loss": 13.2504, + "step": 14697 + }, + { + "epoch": 0.612646409070068, + "grad_norm": 424.0, + "learning_rate": 3.44503211780505e-05, + "loss": 16.1255, + "step": 14698 + }, + { + "epoch": 0.6126880913675962, + "grad_norm": 129.0, + "learning_rate": 3.444390602304695e-05, + "loss": 8.6255, + "step": 14699 + }, + { + "epoch": 0.6127297736651244, + "grad_norm": 816.0, + "learning_rate": 3.443749115155457e-05, + "loss": 23.7501, + "step": 14700 + }, + { + "epoch": 0.6127714559626527, + "grad_norm": 492.0, + "learning_rate": 3.4431076563690314e-05, + "loss": 17.2502, + "step": 14701 + }, + { + "epoch": 0.6128131382601809, + "grad_norm": 332.0, + "learning_rate": 3.442466225957106e-05, + "loss": 12.8128, + "step": 14702 + }, + { + "epoch": 0.6128548205577091, + "grad_norm": 332.0, + "learning_rate": 3.441824823931372e-05, + "loss": 13.5629, + "step": 14703 + }, + { + "epoch": 0.6128965028552373, + "grad_norm": 680.0, + "learning_rate": 3.441183450303518e-05, + "loss": 20.1256, + "step": 14704 + }, + { + "epoch": 0.6129381851527657, + "grad_norm": 560.0, + "learning_rate": 3.440542105085237e-05, + "loss": 18.6252, + "step": 14705 + }, + { + "epoch": 0.6129798674502939, + "grad_norm": 908.0, + "learning_rate": 3.439900788288212e-05, + "loss": 24.1253, + "step": 14706 + }, + { + "epoch": 0.6130215497478221, + "grad_norm": 368.0, + "learning_rate": 3.439259499924134e-05, + "loss": 14.3128, + "step": 14707 + }, + { + "epoch": 0.6130632320453503, + "grad_norm": 316.0, + "learning_rate": 3.438618240004691e-05, + "loss": 13.7502, + "step": 14708 + }, + { + "epoch": 0.6131049143428786, + "grad_norm": 133.0, + "learning_rate": 3.4379770085415694e-05, + "loss": 10.0006, + "step": 14709 + }, + { + "epoch": 0.6131465966404068, + "grad_norm": 160.0, + "learning_rate": 3.4373358055464534e-05, + "loss": 10.7506, + "step": 14710 + }, + { + "epoch": 0.613188278937935, + "grad_norm": 620.0, + "learning_rate": 3.4366946310310334e-05, + "loss": 18.7533, + "step": 14711 + }, + { + "epoch": 0.6132299612354633, + "grad_norm": 460.0, + "learning_rate": 3.436053485006991e-05, + "loss": 16.3757, + "step": 14712 + }, + { + "epoch": 0.6132716435329916, + "grad_norm": 264.0, + "learning_rate": 3.435412367486013e-05, + "loss": 10.7503, + "step": 14713 + }, + { + "epoch": 0.6133133258305198, + "grad_norm": 247.0, + "learning_rate": 3.4347712784797834e-05, + "loss": 13.0013, + "step": 14714 + }, + { + "epoch": 0.613355008128048, + "grad_norm": 400.0, + "learning_rate": 3.434130217999987e-05, + "loss": 15.7506, + "step": 14715 + }, + { + "epoch": 0.6133966904255762, + "grad_norm": 366.0, + "learning_rate": 3.433489186058305e-05, + "loss": 15.2502, + "step": 14716 + }, + { + "epoch": 0.6134383727231045, + "grad_norm": 362.0, + "learning_rate": 3.432848182666424e-05, + "loss": 14.0002, + "step": 14717 + }, + { + "epoch": 0.6134800550206327, + "grad_norm": 153.0, + "learning_rate": 3.4322072078360215e-05, + "loss": 9.1255, + "step": 14718 + }, + { + "epoch": 0.613521737318161, + "grad_norm": 204.0, + "learning_rate": 3.431566261578783e-05, + "loss": 11.3754, + "step": 14719 + }, + { + "epoch": 0.6135634196156892, + "grad_norm": 446.0, + "learning_rate": 3.4309253439063884e-05, + "loss": 16.2507, + "step": 14720 + }, + { + "epoch": 0.6136051019132175, + "grad_norm": 756.0, + "learning_rate": 3.43028445483052e-05, + "loss": 21.2513, + "step": 14721 + }, + { + "epoch": 0.6136467842107457, + "grad_norm": 288.0, + "learning_rate": 3.4296435943628545e-05, + "loss": 12.8127, + "step": 14722 + }, + { + "epoch": 0.6136884665082739, + "grad_norm": 111.0, + "learning_rate": 3.4290027625150755e-05, + "loss": 8.313, + "step": 14723 + }, + { + "epoch": 0.6137301488058021, + "grad_norm": 150.0, + "learning_rate": 3.428361959298859e-05, + "loss": 8.7503, + "step": 14724 + }, + { + "epoch": 0.6137718311033304, + "grad_norm": 124.5, + "learning_rate": 3.427721184725887e-05, + "loss": 7.7189, + "step": 14725 + }, + { + "epoch": 0.6138135134008587, + "grad_norm": 832.0, + "learning_rate": 3.427080438807835e-05, + "loss": 20.0049, + "step": 14726 + }, + { + "epoch": 0.6138551956983869, + "grad_norm": 272.0, + "learning_rate": 3.426439721556383e-05, + "loss": 13.8753, + "step": 14727 + }, + { + "epoch": 0.6138968779959151, + "grad_norm": 480.0, + "learning_rate": 3.4257990329832043e-05, + "loss": 16.7502, + "step": 14728 + }, + { + "epoch": 0.6139385602934434, + "grad_norm": 426.0, + "learning_rate": 3.4251583730999804e-05, + "loss": 16.3754, + "step": 14729 + }, + { + "epoch": 0.6139802425909716, + "grad_norm": 360.0, + "learning_rate": 3.4245177419183825e-05, + "loss": 14.0627, + "step": 14730 + }, + { + "epoch": 0.6140219248884998, + "grad_norm": 692.0, + "learning_rate": 3.4238771394500915e-05, + "loss": 20.3751, + "step": 14731 + }, + { + "epoch": 0.6140636071860281, + "grad_norm": 133.0, + "learning_rate": 3.423236565706777e-05, + "loss": 9.1877, + "step": 14732 + }, + { + "epoch": 0.6141052894835564, + "grad_norm": 426.0, + "learning_rate": 3.422596020700118e-05, + "loss": 17.1255, + "step": 14733 + }, + { + "epoch": 0.6141469717810846, + "grad_norm": 684.0, + "learning_rate": 3.421955504441785e-05, + "loss": 20.1253, + "step": 14734 + }, + { + "epoch": 0.6141886540786128, + "grad_norm": 256.0, + "learning_rate": 3.421315016943455e-05, + "loss": 12.876, + "step": 14735 + }, + { + "epoch": 0.6142303363761411, + "grad_norm": 480.0, + "learning_rate": 3.420674558216796e-05, + "loss": 11.6882, + "step": 14736 + }, + { + "epoch": 0.6142720186736693, + "grad_norm": 620.0, + "learning_rate": 3.420034128273487e-05, + "loss": 18.7501, + "step": 14737 + }, + { + "epoch": 0.6143137009711975, + "grad_norm": 226.0, + "learning_rate": 3.419393727125193e-05, + "loss": 11.7505, + "step": 14738 + }, + { + "epoch": 0.6143553832687257, + "grad_norm": 322.0, + "learning_rate": 3.418753354783591e-05, + "loss": 12.5001, + "step": 14739 + }, + { + "epoch": 0.6143970655662541, + "grad_norm": 239.0, + "learning_rate": 3.418113011260347e-05, + "loss": 12.8752, + "step": 14740 + }, + { + "epoch": 0.6144387478637823, + "grad_norm": 119.0, + "learning_rate": 3.4174726965671364e-05, + "loss": 9.1878, + "step": 14741 + }, + { + "epoch": 0.6144804301613105, + "grad_norm": 243.0, + "learning_rate": 3.416832410715625e-05, + "loss": 11.6251, + "step": 14742 + }, + { + "epoch": 0.6145221124588387, + "grad_norm": 832.0, + "learning_rate": 3.4161921537174844e-05, + "loss": 24.1256, + "step": 14743 + }, + { + "epoch": 0.614563794756367, + "grad_norm": 306.0, + "learning_rate": 3.41555192558438e-05, + "loss": 12.0002, + "step": 14744 + }, + { + "epoch": 0.6146054770538952, + "grad_norm": 107.0, + "learning_rate": 3.4149117263279864e-05, + "loss": 8.5629, + "step": 14745 + }, + { + "epoch": 0.6146471593514234, + "grad_norm": 644.0, + "learning_rate": 3.414271555959964e-05, + "loss": 20.0002, + "step": 14746 + }, + { + "epoch": 0.6146888416489517, + "grad_norm": 255.0, + "learning_rate": 3.413631414491985e-05, + "loss": 13.0635, + "step": 14747 + }, + { + "epoch": 0.61473052394648, + "grad_norm": 2080.0, + "learning_rate": 3.412991301935713e-05, + "loss": 38.7534, + "step": 14748 + }, + { + "epoch": 0.6147722062440082, + "grad_norm": 468.0, + "learning_rate": 3.4123512183028166e-05, + "loss": 16.5018, + "step": 14749 + }, + { + "epoch": 0.6148138885415364, + "grad_norm": 462.0, + "learning_rate": 3.4117111636049585e-05, + "loss": 18.2503, + "step": 14750 + }, + { + "epoch": 0.6148555708390646, + "grad_norm": 147.0, + "learning_rate": 3.411071137853807e-05, + "loss": 10.3133, + "step": 14751 + }, + { + "epoch": 0.6148972531365929, + "grad_norm": 216.0, + "learning_rate": 3.410431141061025e-05, + "loss": 11.6877, + "step": 14752 + }, + { + "epoch": 0.6149389354341211, + "grad_norm": 282.0, + "learning_rate": 3.4097911732382756e-05, + "loss": 11.4377, + "step": 14753 + }, + { + "epoch": 0.6149806177316494, + "grad_norm": 140.0, + "learning_rate": 3.409151234397223e-05, + "loss": 9.5001, + "step": 14754 + }, + { + "epoch": 0.6150223000291776, + "grad_norm": 412.0, + "learning_rate": 3.408511324549532e-05, + "loss": 17.1275, + "step": 14755 + }, + { + "epoch": 0.6150639823267059, + "grad_norm": 560.0, + "learning_rate": 3.4078714437068616e-05, + "loss": 17.0028, + "step": 14756 + }, + { + "epoch": 0.6151056646242341, + "grad_norm": 177.0, + "learning_rate": 3.407231591880878e-05, + "loss": 10.8753, + "step": 14757 + }, + { + "epoch": 0.6151473469217623, + "grad_norm": 840.0, + "learning_rate": 3.406591769083237e-05, + "loss": 22.6263, + "step": 14758 + }, + { + "epoch": 0.6151890292192905, + "grad_norm": 208.0, + "learning_rate": 3.405951975325605e-05, + "loss": 10.8751, + "step": 14759 + }, + { + "epoch": 0.6152307115168189, + "grad_norm": 280.0, + "learning_rate": 3.405312210619638e-05, + "loss": 13.4384, + "step": 14760 + }, + { + "epoch": 0.6152723938143471, + "grad_norm": 660.0, + "learning_rate": 3.404672474976999e-05, + "loss": 19.7502, + "step": 14761 + }, + { + "epoch": 0.6153140761118753, + "grad_norm": 215.0, + "learning_rate": 3.404032768409344e-05, + "loss": 10.0003, + "step": 14762 + }, + { + "epoch": 0.6153557584094035, + "grad_norm": 960.0, + "learning_rate": 3.403393090928335e-05, + "loss": 24.5032, + "step": 14763 + }, + { + "epoch": 0.6153974407069318, + "grad_norm": 185.0, + "learning_rate": 3.402753442545628e-05, + "loss": 6.0021, + "step": 14764 + }, + { + "epoch": 0.61543912300446, + "grad_norm": 466.0, + "learning_rate": 3.402113823272881e-05, + "loss": 15.5006, + "step": 14765 + }, + { + "epoch": 0.6154808053019882, + "grad_norm": 410.0, + "learning_rate": 3.4014742331217516e-05, + "loss": 15.938, + "step": 14766 + }, + { + "epoch": 0.6155224875995164, + "grad_norm": 544.0, + "learning_rate": 3.4008346721038975e-05, + "loss": 16.8752, + "step": 14767 + }, + { + "epoch": 0.6155641698970448, + "grad_norm": 260.0, + "learning_rate": 3.400195140230971e-05, + "loss": 14.3758, + "step": 14768 + }, + { + "epoch": 0.615605852194573, + "grad_norm": 145.0, + "learning_rate": 3.399555637514633e-05, + "loss": 8.6878, + "step": 14769 + }, + { + "epoch": 0.6156475344921012, + "grad_norm": 250.0, + "learning_rate": 3.3989161639665326e-05, + "loss": 12.6257, + "step": 14770 + }, + { + "epoch": 0.6156892167896294, + "grad_norm": 328.0, + "learning_rate": 3.39827671959833e-05, + "loss": 12.1879, + "step": 14771 + }, + { + "epoch": 0.6157308990871577, + "grad_norm": 62.25, + "learning_rate": 3.397637304421674e-05, + "loss": 8.4385, + "step": 14772 + }, + { + "epoch": 0.6157725813846859, + "grad_norm": 110.5, + "learning_rate": 3.396997918448223e-05, + "loss": 8.8128, + "step": 14773 + }, + { + "epoch": 0.6158142636822141, + "grad_norm": 604.0, + "learning_rate": 3.3963585616896244e-05, + "loss": 18.1256, + "step": 14774 + }, + { + "epoch": 0.6158559459797424, + "grad_norm": 504.0, + "learning_rate": 3.395719234157535e-05, + "loss": 19.1253, + "step": 14775 + }, + { + "epoch": 0.6158976282772707, + "grad_norm": 612.0, + "learning_rate": 3.3950799358636045e-05, + "loss": 20.1252, + "step": 14776 + }, + { + "epoch": 0.6159393105747989, + "grad_norm": 788.0, + "learning_rate": 3.394440666819485e-05, + "loss": 20.7526, + "step": 14777 + }, + { + "epoch": 0.6159809928723271, + "grad_norm": 79.5, + "learning_rate": 3.393801427036826e-05, + "loss": 8.564, + "step": 14778 + }, + { + "epoch": 0.6160226751698553, + "grad_norm": 338.0, + "learning_rate": 3.3931622165272803e-05, + "loss": 15.1885, + "step": 14779 + }, + { + "epoch": 0.6160643574673836, + "grad_norm": 202.0, + "learning_rate": 3.3925230353024935e-05, + "loss": 11.0015, + "step": 14780 + }, + { + "epoch": 0.6161060397649119, + "grad_norm": 460.0, + "learning_rate": 3.391883883374119e-05, + "loss": 16.6252, + "step": 14781 + }, + { + "epoch": 0.6161477220624401, + "grad_norm": 402.0, + "learning_rate": 3.391244760753802e-05, + "loss": 12.6916, + "step": 14782 + }, + { + "epoch": 0.6161894043599683, + "grad_norm": 696.0, + "learning_rate": 3.390605667453195e-05, + "loss": 22.0003, + "step": 14783 + }, + { + "epoch": 0.6162310866574966, + "grad_norm": 216.0, + "learning_rate": 3.389966603483939e-05, + "loss": 12.1256, + "step": 14784 + }, + { + "epoch": 0.6162727689550248, + "grad_norm": 704.0, + "learning_rate": 3.389327568857687e-05, + "loss": 18.7555, + "step": 14785 + }, + { + "epoch": 0.616314451252553, + "grad_norm": 224.0, + "learning_rate": 3.388688563586081e-05, + "loss": 12.8753, + "step": 14786 + }, + { + "epoch": 0.6163561335500812, + "grad_norm": 436.0, + "learning_rate": 3.3880495876807716e-05, + "loss": 15.6253, + "step": 14787 + }, + { + "epoch": 0.6163978158476096, + "grad_norm": 444.0, + "learning_rate": 3.387410641153399e-05, + "loss": 16.5005, + "step": 14788 + }, + { + "epoch": 0.6164394981451378, + "grad_norm": 1656.0, + "learning_rate": 3.3867717240156115e-05, + "loss": 31.2534, + "step": 14789 + }, + { + "epoch": 0.616481180442666, + "grad_norm": 231.0, + "learning_rate": 3.3861328362790524e-05, + "loss": 11.938, + "step": 14790 + }, + { + "epoch": 0.6165228627401942, + "grad_norm": 372.0, + "learning_rate": 3.385493977955366e-05, + "loss": 14.3754, + "step": 14791 + }, + { + "epoch": 0.6165645450377225, + "grad_norm": 476.0, + "learning_rate": 3.384855149056194e-05, + "loss": 17.7502, + "step": 14792 + }, + { + "epoch": 0.6166062273352507, + "grad_norm": 406.0, + "learning_rate": 3.3842163495931826e-05, + "loss": 17.1281, + "step": 14793 + }, + { + "epoch": 0.6166479096327789, + "grad_norm": 209.0, + "learning_rate": 3.38357757957797e-05, + "loss": 11.3754, + "step": 14794 + }, + { + "epoch": 0.6166895919303071, + "grad_norm": 972.0, + "learning_rate": 3.3829388390222006e-05, + "loss": 27.2505, + "step": 14795 + }, + { + "epoch": 0.6167312742278355, + "grad_norm": 124.5, + "learning_rate": 3.382300127937513e-05, + "loss": 9.5626, + "step": 14796 + }, + { + "epoch": 0.6167729565253637, + "grad_norm": 284.0, + "learning_rate": 3.381661446335551e-05, + "loss": 11.6253, + "step": 14797 + }, + { + "epoch": 0.6168146388228919, + "grad_norm": 320.0, + "learning_rate": 3.38102279422795e-05, + "loss": 14.5627, + "step": 14798 + }, + { + "epoch": 0.6168563211204201, + "grad_norm": 145.0, + "learning_rate": 3.3803841716263564e-05, + "loss": 8.1255, + "step": 14799 + }, + { + "epoch": 0.6168980034179484, + "grad_norm": 342.0, + "learning_rate": 3.379745578542401e-05, + "loss": 14.0641, + "step": 14800 + }, + { + "epoch": 0.6169396857154766, + "grad_norm": 528.0, + "learning_rate": 3.379107014987728e-05, + "loss": 17.0003, + "step": 14801 + }, + { + "epoch": 0.6169813680130048, + "grad_norm": 506.0, + "learning_rate": 3.3784684809739745e-05, + "loss": 18.0002, + "step": 14802 + }, + { + "epoch": 0.6170230503105332, + "grad_norm": 544.0, + "learning_rate": 3.3778299765127753e-05, + "loss": 17.6254, + "step": 14803 + }, + { + "epoch": 0.6170647326080614, + "grad_norm": 556.0, + "learning_rate": 3.3771915016157684e-05, + "loss": 16.8786, + "step": 14804 + }, + { + "epoch": 0.6171064149055896, + "grad_norm": 90.0, + "learning_rate": 3.3765530562945926e-05, + "loss": 10.3133, + "step": 14805 + }, + { + "epoch": 0.6171480972031178, + "grad_norm": 189.0, + "learning_rate": 3.3759146405608794e-05, + "loss": 11.5004, + "step": 14806 + }, + { + "epoch": 0.6171897795006461, + "grad_norm": 338.0, + "learning_rate": 3.3752762544262676e-05, + "loss": 14.8128, + "step": 14807 + }, + { + "epoch": 0.6172314617981743, + "grad_norm": 392.0, + "learning_rate": 3.374637897902389e-05, + "loss": 15.5626, + "step": 14808 + }, + { + "epoch": 0.6172731440957026, + "grad_norm": 464.0, + "learning_rate": 3.373999571000881e-05, + "loss": 15.8753, + "step": 14809 + }, + { + "epoch": 0.6173148263932308, + "grad_norm": 224.0, + "learning_rate": 3.373361273733373e-05, + "loss": 12.0001, + "step": 14810 + }, + { + "epoch": 0.6173565086907591, + "grad_norm": 192.0, + "learning_rate": 3.372723006111501e-05, + "loss": 11.126, + "step": 14811 + }, + { + "epoch": 0.6173981909882873, + "grad_norm": 628.0, + "learning_rate": 3.372084768146896e-05, + "loss": 18.7535, + "step": 14812 + }, + { + "epoch": 0.6174398732858155, + "grad_norm": 768.0, + "learning_rate": 3.371446559851191e-05, + "loss": 21.5003, + "step": 14813 + }, + { + "epoch": 0.6174815555833437, + "grad_norm": 304.0, + "learning_rate": 3.370808381236018e-05, + "loss": 14.5627, + "step": 14814 + }, + { + "epoch": 0.617523237880872, + "grad_norm": 784.0, + "learning_rate": 3.370170232313006e-05, + "loss": 19.5043, + "step": 14815 + }, + { + "epoch": 0.6175649201784003, + "grad_norm": 255.0, + "learning_rate": 3.369532113093785e-05, + "loss": 11.9377, + "step": 14816 + }, + { + "epoch": 0.6176066024759285, + "grad_norm": 334.0, + "learning_rate": 3.3688940235899894e-05, + "loss": 14.2503, + "step": 14817 + }, + { + "epoch": 0.6176482847734567, + "grad_norm": 138.0, + "learning_rate": 3.368255963813241e-05, + "loss": 9.6257, + "step": 14818 + }, + { + "epoch": 0.617689967070985, + "grad_norm": 238.0, + "learning_rate": 3.3676179337751756e-05, + "loss": 11.4377, + "step": 14819 + }, + { + "epoch": 0.6177316493685132, + "grad_norm": 164.0, + "learning_rate": 3.366979933487416e-05, + "loss": 9.1881, + "step": 14820 + }, + { + "epoch": 0.6177733316660414, + "grad_norm": 338.0, + "learning_rate": 3.3663419629615946e-05, + "loss": 14.0005, + "step": 14821 + }, + { + "epoch": 0.6178150139635696, + "grad_norm": 310.0, + "learning_rate": 3.3657040222093336e-05, + "loss": 13.1254, + "step": 14822 + }, + { + "epoch": 0.617856696261098, + "grad_norm": 300.0, + "learning_rate": 3.3650661112422644e-05, + "loss": 13.1885, + "step": 14823 + }, + { + "epoch": 0.6178983785586262, + "grad_norm": 97.0, + "learning_rate": 3.364428230072008e-05, + "loss": 7.2815, + "step": 14824 + }, + { + "epoch": 0.6179400608561544, + "grad_norm": 350.0, + "learning_rate": 3.3637903787101944e-05, + "loss": 15.3752, + "step": 14825 + }, + { + "epoch": 0.6179817431536826, + "grad_norm": 332.0, + "learning_rate": 3.3631525571684444e-05, + "loss": 14.1252, + "step": 14826 + }, + { + "epoch": 0.6180234254512109, + "grad_norm": 560.0, + "learning_rate": 3.362514765458384e-05, + "loss": 16.2512, + "step": 14827 + }, + { + "epoch": 0.6180651077487391, + "grad_norm": 668.0, + "learning_rate": 3.361877003591638e-05, + "loss": 21.2504, + "step": 14828 + }, + { + "epoch": 0.6181067900462673, + "grad_norm": 212.0, + "learning_rate": 3.3612392715798306e-05, + "loss": 10.5014, + "step": 14829 + }, + { + "epoch": 0.6181484723437956, + "grad_norm": 227.0, + "learning_rate": 3.360601569434581e-05, + "loss": 12.0006, + "step": 14830 + }, + { + "epoch": 0.6181901546413239, + "grad_norm": 216.0, + "learning_rate": 3.359963897167515e-05, + "loss": 12.3753, + "step": 14831 + }, + { + "epoch": 0.6182318369388521, + "grad_norm": 808.0, + "learning_rate": 3.3593262547902515e-05, + "loss": 20.8756, + "step": 14832 + }, + { + "epoch": 0.6182735192363803, + "grad_norm": 272.0, + "learning_rate": 3.3586886423144135e-05, + "loss": 10.6258, + "step": 14833 + }, + { + "epoch": 0.6183152015339085, + "grad_norm": 85.5, + "learning_rate": 3.358051059751619e-05, + "loss": 9.6877, + "step": 14834 + }, + { + "epoch": 0.6183568838314368, + "grad_norm": 114.5, + "learning_rate": 3.357413507113493e-05, + "loss": 9.6252, + "step": 14835 + }, + { + "epoch": 0.618398566128965, + "grad_norm": 796.0, + "learning_rate": 3.356775984411647e-05, + "loss": 22.6274, + "step": 14836 + }, + { + "epoch": 0.6184402484264933, + "grad_norm": 320.0, + "learning_rate": 3.3561384916577086e-05, + "loss": 13.0003, + "step": 14837 + }, + { + "epoch": 0.6184819307240215, + "grad_norm": 904.0, + "learning_rate": 3.35550102886329e-05, + "loss": 24.7503, + "step": 14838 + }, + { + "epoch": 0.6185236130215498, + "grad_norm": 90.5, + "learning_rate": 3.354863596040012e-05, + "loss": 10.1881, + "step": 14839 + }, + { + "epoch": 0.618565295319078, + "grad_norm": 213.0, + "learning_rate": 3.3542261931994905e-05, + "loss": 12.438, + "step": 14840 + }, + { + "epoch": 0.6186069776166062, + "grad_norm": 185.0, + "learning_rate": 3.353588820353343e-05, + "loss": 10.7503, + "step": 14841 + }, + { + "epoch": 0.6186486599141344, + "grad_norm": 91.0, + "learning_rate": 3.352951477513184e-05, + "loss": 10.5005, + "step": 14842 + }, + { + "epoch": 0.6186903422116627, + "grad_norm": 1112.0, + "learning_rate": 3.352314164690633e-05, + "loss": 30.5008, + "step": 14843 + }, + { + "epoch": 0.618732024509191, + "grad_norm": 416.0, + "learning_rate": 3.351676881897301e-05, + "loss": 15.7503, + "step": 14844 + }, + { + "epoch": 0.6187737068067192, + "grad_norm": 836.0, + "learning_rate": 3.351039629144805e-05, + "loss": 24.8755, + "step": 14845 + }, + { + "epoch": 0.6188153891042474, + "grad_norm": 540.0, + "learning_rate": 3.3504024064447575e-05, + "loss": 16.5032, + "step": 14846 + }, + { + "epoch": 0.6188570714017757, + "grad_norm": 274.0, + "learning_rate": 3.349765213808774e-05, + "loss": 12.6876, + "step": 14847 + }, + { + "epoch": 0.6188987536993039, + "grad_norm": 150.0, + "learning_rate": 3.3491280512484634e-05, + "loss": 9.5627, + "step": 14848 + }, + { + "epoch": 0.6189404359968321, + "grad_norm": 524.0, + "learning_rate": 3.3484909187754434e-05, + "loss": 17.3752, + "step": 14849 + }, + { + "epoch": 0.6189821182943603, + "grad_norm": 368.0, + "learning_rate": 3.3478538164013204e-05, + "loss": 14.8127, + "step": 14850 + }, + { + "epoch": 0.6190238005918887, + "grad_norm": 352.0, + "learning_rate": 3.34721674413771e-05, + "loss": 14.313, + "step": 14851 + }, + { + "epoch": 0.6190654828894169, + "grad_norm": 1064.0, + "learning_rate": 3.346579701996221e-05, + "loss": 27.7502, + "step": 14852 + }, + { + "epoch": 0.6191071651869451, + "grad_norm": 472.0, + "learning_rate": 3.345942689988463e-05, + "loss": 16.8751, + "step": 14853 + }, + { + "epoch": 0.6191488474844733, + "grad_norm": 446.0, + "learning_rate": 3.345305708126046e-05, + "loss": 16.6251, + "step": 14854 + }, + { + "epoch": 0.6191905297820016, + "grad_norm": 262.0, + "learning_rate": 3.344668756420581e-05, + "loss": 10.8757, + "step": 14855 + }, + { + "epoch": 0.6192322120795298, + "grad_norm": 186.0, + "learning_rate": 3.344031834883673e-05, + "loss": 9.5008, + "step": 14856 + }, + { + "epoch": 0.619273894377058, + "grad_norm": 462.0, + "learning_rate": 3.343394943526934e-05, + "loss": 16.2503, + "step": 14857 + }, + { + "epoch": 0.6193155766745863, + "grad_norm": 364.0, + "learning_rate": 3.342758082361967e-05, + "loss": 14.7504, + "step": 14858 + }, + { + "epoch": 0.6193572589721146, + "grad_norm": 185.0, + "learning_rate": 3.342121251400383e-05, + "loss": 9.7501, + "step": 14859 + }, + { + "epoch": 0.6193989412696428, + "grad_norm": 1304.0, + "learning_rate": 3.341484450653784e-05, + "loss": 27.6297, + "step": 14860 + }, + { + "epoch": 0.619440623567171, + "grad_norm": 83.0, + "learning_rate": 3.3408476801337815e-05, + "loss": 7.3449, + "step": 14861 + }, + { + "epoch": 0.6194823058646992, + "grad_norm": 624.0, + "learning_rate": 3.340210939851974e-05, + "loss": 17.5001, + "step": 14862 + }, + { + "epoch": 0.6195239881622275, + "grad_norm": 326.0, + "learning_rate": 3.3395742298199715e-05, + "loss": 14.4377, + "step": 14863 + }, + { + "epoch": 0.6195656704597557, + "grad_norm": 464.0, + "learning_rate": 3.3389375500493744e-05, + "loss": 17.3755, + "step": 14864 + }, + { + "epoch": 0.619607352757284, + "grad_norm": 368.0, + "learning_rate": 3.33830090055179e-05, + "loss": 14.8761, + "step": 14865 + }, + { + "epoch": 0.6196490350548122, + "grad_norm": 276.0, + "learning_rate": 3.3376642813388165e-05, + "loss": 13.2502, + "step": 14866 + }, + { + "epoch": 0.6196907173523405, + "grad_norm": 180.0, + "learning_rate": 3.3370276924220616e-05, + "loss": 11.4379, + "step": 14867 + }, + { + "epoch": 0.6197323996498687, + "grad_norm": 382.0, + "learning_rate": 3.336391133813123e-05, + "loss": 16.3754, + "step": 14868 + }, + { + "epoch": 0.6197740819473969, + "grad_norm": 248.0, + "learning_rate": 3.3357546055236055e-05, + "loss": 12.9379, + "step": 14869 + }, + { + "epoch": 0.6198157642449251, + "grad_norm": 158.0, + "learning_rate": 3.3351181075651055e-05, + "loss": 9.6876, + "step": 14870 + }, + { + "epoch": 0.6198574465424534, + "grad_norm": 57.5, + "learning_rate": 3.3344816399492285e-05, + "loss": 8.0008, + "step": 14871 + }, + { + "epoch": 0.6198991288399817, + "grad_norm": 296.0, + "learning_rate": 3.3338452026875686e-05, + "loss": 13.2502, + "step": 14872 + }, + { + "epoch": 0.6199408111375099, + "grad_norm": 430.0, + "learning_rate": 3.333208795791731e-05, + "loss": 15.688, + "step": 14873 + }, + { + "epoch": 0.6199824934350381, + "grad_norm": 132.0, + "learning_rate": 3.332572419273308e-05, + "loss": 9.3753, + "step": 14874 + }, + { + "epoch": 0.6200241757325664, + "grad_norm": 312.0, + "learning_rate": 3.3319360731439034e-05, + "loss": 13.4378, + "step": 14875 + }, + { + "epoch": 0.6200658580300946, + "grad_norm": 784.0, + "learning_rate": 3.3312997574151095e-05, + "loss": 22.1252, + "step": 14876 + }, + { + "epoch": 0.6201075403276228, + "grad_norm": 676.0, + "learning_rate": 3.3306634720985266e-05, + "loss": 19.5002, + "step": 14877 + }, + { + "epoch": 0.6201492226251512, + "grad_norm": 454.0, + "learning_rate": 3.3300272172057505e-05, + "loss": 16.3755, + "step": 14878 + }, + { + "epoch": 0.6201909049226794, + "grad_norm": 486.0, + "learning_rate": 3.329390992748377e-05, + "loss": 17.5002, + "step": 14879 + }, + { + "epoch": 0.6202325872202076, + "grad_norm": 448.0, + "learning_rate": 3.328754798737998e-05, + "loss": 15.6878, + "step": 14880 + }, + { + "epoch": 0.6202742695177358, + "grad_norm": 406.0, + "learning_rate": 3.328118635186215e-05, + "loss": 16.6253, + "step": 14881 + }, + { + "epoch": 0.6203159518152641, + "grad_norm": 434.0, + "learning_rate": 3.3274825021046164e-05, + "loss": 14.2519, + "step": 14882 + }, + { + "epoch": 0.6203576341127923, + "grad_norm": 240.0, + "learning_rate": 3.326846399504799e-05, + "loss": 11.4381, + "step": 14883 + }, + { + "epoch": 0.6203993164103205, + "grad_norm": 156.0, + "learning_rate": 3.326210327398352e-05, + "loss": 10.1253, + "step": 14884 + }, + { + "epoch": 0.6204409987078487, + "grad_norm": 105.0, + "learning_rate": 3.3255742857968734e-05, + "loss": 8.5629, + "step": 14885 + }, + { + "epoch": 0.6204826810053771, + "grad_norm": 370.0, + "learning_rate": 3.324938274711949e-05, + "loss": 17.3752, + "step": 14886 + }, + { + "epoch": 0.6205243633029053, + "grad_norm": 258.0, + "learning_rate": 3.324302294155177e-05, + "loss": 10.6253, + "step": 14887 + }, + { + "epoch": 0.6205660456004335, + "grad_norm": 272.0, + "learning_rate": 3.3236663441381413e-05, + "loss": 11.2502, + "step": 14888 + }, + { + "epoch": 0.6206077278979617, + "grad_norm": 254.0, + "learning_rate": 3.3230304246724364e-05, + "loss": 12.3131, + "step": 14889 + }, + { + "epoch": 0.62064941019549, + "grad_norm": 864.0, + "learning_rate": 3.3223945357696506e-05, + "loss": 22.377, + "step": 14890 + }, + { + "epoch": 0.6206910924930182, + "grad_norm": 181.0, + "learning_rate": 3.3217586774413737e-05, + "loss": 11.6878, + "step": 14891 + }, + { + "epoch": 0.6207327747905464, + "grad_norm": 354.0, + "learning_rate": 3.321122849699193e-05, + "loss": 14.5642, + "step": 14892 + }, + { + "epoch": 0.6207744570880747, + "grad_norm": 270.0, + "learning_rate": 3.320487052554699e-05, + "loss": 10.7519, + "step": 14893 + }, + { + "epoch": 0.620816139385603, + "grad_norm": 147.0, + "learning_rate": 3.319851286019475e-05, + "loss": 10.2502, + "step": 14894 + }, + { + "epoch": 0.6208578216831312, + "grad_norm": 410.0, + "learning_rate": 3.319215550105114e-05, + "loss": 15.4387, + "step": 14895 + }, + { + "epoch": 0.6208995039806594, + "grad_norm": 123.5, + "learning_rate": 3.318579844823195e-05, + "loss": 10.5631, + "step": 14896 + }, + { + "epoch": 0.6209411862781876, + "grad_norm": 494.0, + "learning_rate": 3.317944170185311e-05, + "loss": 14.4423, + "step": 14897 + }, + { + "epoch": 0.6209828685757159, + "grad_norm": 183.0, + "learning_rate": 3.317308526203041e-05, + "loss": 11.0002, + "step": 14898 + }, + { + "epoch": 0.6210245508732442, + "grad_norm": 159.0, + "learning_rate": 3.316672912887975e-05, + "loss": 10.5629, + "step": 14899 + }, + { + "epoch": 0.6210662331707724, + "grad_norm": 400.0, + "learning_rate": 3.316037330251693e-05, + "loss": 16.0003, + "step": 14900 + }, + { + "epoch": 0.6211079154683006, + "grad_norm": 1120.0, + "learning_rate": 3.3154017783057804e-05, + "loss": 23.8801, + "step": 14901 + }, + { + "epoch": 0.6211495977658289, + "grad_norm": 338.0, + "learning_rate": 3.3147662570618196e-05, + "loss": 15.0014, + "step": 14902 + }, + { + "epoch": 0.6211912800633571, + "grad_norm": 57.5, + "learning_rate": 3.314130766531395e-05, + "loss": 7.469, + "step": 14903 + }, + { + "epoch": 0.6212329623608853, + "grad_norm": 390.0, + "learning_rate": 3.3134953067260845e-05, + "loss": 14.2502, + "step": 14904 + }, + { + "epoch": 0.6212746446584135, + "grad_norm": 428.0, + "learning_rate": 3.3128598776574735e-05, + "loss": 15.9379, + "step": 14905 + }, + { + "epoch": 0.6213163269559419, + "grad_norm": 82.5, + "learning_rate": 3.312224479337139e-05, + "loss": 8.1892, + "step": 14906 + }, + { + "epoch": 0.6213580092534701, + "grad_norm": 508.0, + "learning_rate": 3.3115891117766665e-05, + "loss": 18.0009, + "step": 14907 + }, + { + "epoch": 0.6213996915509983, + "grad_norm": 274.0, + "learning_rate": 3.3109537749876296e-05, + "loss": 14.5639, + "step": 14908 + }, + { + "epoch": 0.6214413738485265, + "grad_norm": 224.0, + "learning_rate": 3.310318468981612e-05, + "loss": 10.8754, + "step": 14909 + }, + { + "epoch": 0.6214830561460548, + "grad_norm": 548.0, + "learning_rate": 3.309683193770188e-05, + "loss": 18.0005, + "step": 14910 + }, + { + "epoch": 0.621524738443583, + "grad_norm": 378.0, + "learning_rate": 3.30904794936494e-05, + "loss": 14.9401, + "step": 14911 + }, + { + "epoch": 0.6215664207411112, + "grad_norm": 231.0, + "learning_rate": 3.308412735777442e-05, + "loss": 10.3135, + "step": 14912 + }, + { + "epoch": 0.6216081030386394, + "grad_norm": 402.0, + "learning_rate": 3.307777553019273e-05, + "loss": 15.5006, + "step": 14913 + }, + { + "epoch": 0.6216497853361678, + "grad_norm": 99.0, + "learning_rate": 3.307142401102007e-05, + "loss": 9.5638, + "step": 14914 + }, + { + "epoch": 0.621691467633696, + "grad_norm": 348.0, + "learning_rate": 3.306507280037221e-05, + "loss": 14.7502, + "step": 14915 + }, + { + "epoch": 0.6217331499312242, + "grad_norm": 101.0, + "learning_rate": 3.305872189836491e-05, + "loss": 7.6565, + "step": 14916 + }, + { + "epoch": 0.6217748322287524, + "grad_norm": 156.0, + "learning_rate": 3.305237130511391e-05, + "loss": 8.5005, + "step": 14917 + }, + { + "epoch": 0.6218165145262807, + "grad_norm": 157.0, + "learning_rate": 3.304602102073493e-05, + "loss": 9.6877, + "step": 14918 + }, + { + "epoch": 0.6218581968238089, + "grad_norm": 416.0, + "learning_rate": 3.3039671045343756e-05, + "loss": 15.6253, + "step": 14919 + }, + { + "epoch": 0.6218998791213372, + "grad_norm": 204.0, + "learning_rate": 3.303332137905605e-05, + "loss": 10.9376, + "step": 14920 + }, + { + "epoch": 0.6219415614188654, + "grad_norm": 316.0, + "learning_rate": 3.302697202198759e-05, + "loss": 13.5629, + "step": 14921 + }, + { + "epoch": 0.6219832437163937, + "grad_norm": 1336.0, + "learning_rate": 3.302062297425406e-05, + "loss": 28.6302, + "step": 14922 + }, + { + "epoch": 0.6220249260139219, + "grad_norm": 470.0, + "learning_rate": 3.301427423597119e-05, + "loss": 15.5652, + "step": 14923 + }, + { + "epoch": 0.6220666083114501, + "grad_norm": 264.0, + "learning_rate": 3.300792580725466e-05, + "loss": 12.5002, + "step": 14924 + }, + { + "epoch": 0.6221082906089783, + "grad_norm": 736.0, + "learning_rate": 3.300157768822022e-05, + "loss": 21.8754, + "step": 14925 + }, + { + "epoch": 0.6221499729065066, + "grad_norm": 412.0, + "learning_rate": 3.2995229878983516e-05, + "loss": 16.1264, + "step": 14926 + }, + { + "epoch": 0.6221916552040349, + "grad_norm": 312.0, + "learning_rate": 3.2988882379660254e-05, + "loss": 13.6255, + "step": 14927 + }, + { + "epoch": 0.6222333375015631, + "grad_norm": 126.0, + "learning_rate": 3.2982535190366136e-05, + "loss": 7.3753, + "step": 14928 + }, + { + "epoch": 0.6222750197990913, + "grad_norm": 170.0, + "learning_rate": 3.2976188311216823e-05, + "loss": 11.8753, + "step": 14929 + }, + { + "epoch": 0.6223167020966196, + "grad_norm": 306.0, + "learning_rate": 3.2969841742327975e-05, + "loss": 13.1888, + "step": 14930 + }, + { + "epoch": 0.6223583843941478, + "grad_norm": 284.0, + "learning_rate": 3.296349548381529e-05, + "loss": 12.0004, + "step": 14931 + }, + { + "epoch": 0.622400066691676, + "grad_norm": 213.0, + "learning_rate": 3.2957149535794395e-05, + "loss": 11.0626, + "step": 14932 + }, + { + "epoch": 0.6224417489892042, + "grad_norm": 516.0, + "learning_rate": 3.2950803898380984e-05, + "loss": 18.0004, + "step": 14933 + }, + { + "epoch": 0.6224834312867326, + "grad_norm": 195.0, + "learning_rate": 3.294445857169066e-05, + "loss": 12.1883, + "step": 14934 + }, + { + "epoch": 0.6225251135842608, + "grad_norm": 608.0, + "learning_rate": 3.2938113555839125e-05, + "loss": 19.5003, + "step": 14935 + }, + { + "epoch": 0.622566795881789, + "grad_norm": 430.0, + "learning_rate": 3.293176885094196e-05, + "loss": 17.1255, + "step": 14936 + }, + { + "epoch": 0.6226084781793172, + "grad_norm": 197.0, + "learning_rate": 3.2925424457114836e-05, + "loss": 12.2508, + "step": 14937 + }, + { + "epoch": 0.6226501604768455, + "grad_norm": 332.0, + "learning_rate": 3.291908037447335e-05, + "loss": 14.4377, + "step": 14938 + }, + { + "epoch": 0.6226918427743737, + "grad_norm": 204.0, + "learning_rate": 3.291273660313316e-05, + "loss": 12.1878, + "step": 14939 + }, + { + "epoch": 0.6227335250719019, + "grad_norm": 195.0, + "learning_rate": 3.290639314320985e-05, + "loss": 11.5001, + "step": 14940 + }, + { + "epoch": 0.6227752073694302, + "grad_norm": 572.0, + "learning_rate": 3.2900049994819035e-05, + "loss": 18.1253, + "step": 14941 + }, + { + "epoch": 0.6228168896669585, + "grad_norm": 170.0, + "learning_rate": 3.289370715807634e-05, + "loss": 10.8752, + "step": 14942 + }, + { + "epoch": 0.6228585719644867, + "grad_norm": 123.5, + "learning_rate": 3.288736463309735e-05, + "loss": 9.5002, + "step": 14943 + }, + { + "epoch": 0.6229002542620149, + "grad_norm": 466.0, + "learning_rate": 3.2881022419997654e-05, + "loss": 15.6878, + "step": 14944 + }, + { + "epoch": 0.6229419365595431, + "grad_norm": 1656.0, + "learning_rate": 3.2874680518892855e-05, + "loss": 33.2502, + "step": 14945 + }, + { + "epoch": 0.6229836188570714, + "grad_norm": 247.0, + "learning_rate": 3.28683389298985e-05, + "loss": 11.3754, + "step": 14946 + }, + { + "epoch": 0.6230253011545996, + "grad_norm": 516.0, + "learning_rate": 3.2861997653130216e-05, + "loss": 16.3755, + "step": 14947 + }, + { + "epoch": 0.6230669834521279, + "grad_norm": 438.0, + "learning_rate": 3.285565668870353e-05, + "loss": 16.5002, + "step": 14948 + }, + { + "epoch": 0.6231086657496562, + "grad_norm": 394.0, + "learning_rate": 3.284931603673404e-05, + "loss": 15.1877, + "step": 14949 + }, + { + "epoch": 0.6231503480471844, + "grad_norm": 74.0, + "learning_rate": 3.2842975697337264e-05, + "loss": 8.8752, + "step": 14950 + }, + { + "epoch": 0.6231920303447126, + "grad_norm": 800.0, + "learning_rate": 3.28366356706288e-05, + "loss": 21.7508, + "step": 14951 + }, + { + "epoch": 0.6232337126422408, + "grad_norm": 197.0, + "learning_rate": 3.283029595672416e-05, + "loss": 10.5013, + "step": 14952 + }, + { + "epoch": 0.6232753949397691, + "grad_norm": 181.0, + "learning_rate": 3.28239565557389e-05, + "loss": 9.3129, + "step": 14953 + }, + { + "epoch": 0.6233170772372973, + "grad_norm": 672.0, + "learning_rate": 3.281761746778855e-05, + "loss": 20.7503, + "step": 14954 + }, + { + "epoch": 0.6233587595348256, + "grad_norm": 222.0, + "learning_rate": 3.281127869298867e-05, + "loss": 11.0631, + "step": 14955 + }, + { + "epoch": 0.6234004418323538, + "grad_norm": 358.0, + "learning_rate": 3.2804940231454746e-05, + "loss": 14.5002, + "step": 14956 + }, + { + "epoch": 0.6234421241298821, + "grad_norm": 138.0, + "learning_rate": 3.279860208330233e-05, + "loss": 8.6261, + "step": 14957 + }, + { + "epoch": 0.6234838064274103, + "grad_norm": 418.0, + "learning_rate": 3.279226424864689e-05, + "loss": 16.8751, + "step": 14958 + }, + { + "epoch": 0.6235254887249385, + "grad_norm": 234.0, + "learning_rate": 3.278592672760399e-05, + "loss": 11.4379, + "step": 14959 + }, + { + "epoch": 0.6235671710224667, + "grad_norm": 346.0, + "learning_rate": 3.277958952028908e-05, + "loss": 15.1253, + "step": 14960 + }, + { + "epoch": 0.623608853319995, + "grad_norm": 688.0, + "learning_rate": 3.2773252626817705e-05, + "loss": 20.6255, + "step": 14961 + }, + { + "epoch": 0.6236505356175233, + "grad_norm": 336.0, + "learning_rate": 3.27669160473053e-05, + "loss": 14.4379, + "step": 14962 + }, + { + "epoch": 0.6236922179150515, + "grad_norm": 332.0, + "learning_rate": 3.2760579781867405e-05, + "loss": 15.2505, + "step": 14963 + }, + { + "epoch": 0.6237339002125797, + "grad_norm": 568.0, + "learning_rate": 3.275424383061946e-05, + "loss": 19.3762, + "step": 14964 + }, + { + "epoch": 0.623775582510108, + "grad_norm": 296.0, + "learning_rate": 3.274790819367696e-05, + "loss": 10.313, + "step": 14965 + }, + { + "epoch": 0.6238172648076362, + "grad_norm": 117.5, + "learning_rate": 3.2741572871155356e-05, + "loss": 8.1254, + "step": 14966 + }, + { + "epoch": 0.6238589471051644, + "grad_norm": 308.0, + "learning_rate": 3.2735237863170123e-05, + "loss": 13.4379, + "step": 14967 + }, + { + "epoch": 0.6239006294026926, + "grad_norm": 524.0, + "learning_rate": 3.27289031698367e-05, + "loss": 18.0005, + "step": 14968 + }, + { + "epoch": 0.623942311700221, + "grad_norm": 165.0, + "learning_rate": 3.272256879127058e-05, + "loss": 12.3755, + "step": 14969 + }, + { + "epoch": 0.6239839939977492, + "grad_norm": 143.0, + "learning_rate": 3.271623472758715e-05, + "loss": 10.0631, + "step": 14970 + }, + { + "epoch": 0.6240256762952774, + "grad_norm": 340.0, + "learning_rate": 3.27099009789019e-05, + "loss": 14.5006, + "step": 14971 + }, + { + "epoch": 0.6240673585928056, + "grad_norm": 133.0, + "learning_rate": 3.270356754533021e-05, + "loss": 9.6878, + "step": 14972 + }, + { + "epoch": 0.6241090408903339, + "grad_norm": 1032.0, + "learning_rate": 3.269723442698757e-05, + "loss": 23.6282, + "step": 14973 + }, + { + "epoch": 0.6241507231878621, + "grad_norm": 176.0, + "learning_rate": 3.2690901623989337e-05, + "loss": 11.8126, + "step": 14974 + }, + { + "epoch": 0.6241924054853903, + "grad_norm": 144.0, + "learning_rate": 3.268456913645098e-05, + "loss": 10.0627, + "step": 14975 + }, + { + "epoch": 0.6242340877829186, + "grad_norm": 302.0, + "learning_rate": 3.2678236964487876e-05, + "loss": 13.126, + "step": 14976 + }, + { + "epoch": 0.6242757700804469, + "grad_norm": 122.0, + "learning_rate": 3.267190510821545e-05, + "loss": 6.5316, + "step": 14977 + }, + { + "epoch": 0.6243174523779751, + "grad_norm": 426.0, + "learning_rate": 3.266557356774909e-05, + "loss": 15.9381, + "step": 14978 + }, + { + "epoch": 0.6243591346755033, + "grad_norm": 135.0, + "learning_rate": 3.265924234320418e-05, + "loss": 10.7508, + "step": 14979 + }, + { + "epoch": 0.6244008169730315, + "grad_norm": 228.0, + "learning_rate": 3.265291143469612e-05, + "loss": 10.063, + "step": 14980 + }, + { + "epoch": 0.6244424992705598, + "grad_norm": 404.0, + "learning_rate": 3.26465808423403e-05, + "loss": 14.7503, + "step": 14981 + }, + { + "epoch": 0.624484181568088, + "grad_norm": 294.0, + "learning_rate": 3.264025056625207e-05, + "loss": 11.3128, + "step": 14982 + }, + { + "epoch": 0.6245258638656163, + "grad_norm": 158.0, + "learning_rate": 3.2633920606546843e-05, + "loss": 8.3753, + "step": 14983 + }, + { + "epoch": 0.6245675461631445, + "grad_norm": 235.0, + "learning_rate": 3.262759096333993e-05, + "loss": 11.3753, + "step": 14984 + }, + { + "epoch": 0.6246092284606728, + "grad_norm": 88.5, + "learning_rate": 3.2621261636746724e-05, + "loss": 8.313, + "step": 14985 + }, + { + "epoch": 0.624650910758201, + "grad_norm": 148.0, + "learning_rate": 3.261493262688256e-05, + "loss": 10.0629, + "step": 14986 + }, + { + "epoch": 0.6246925930557292, + "grad_norm": 588.0, + "learning_rate": 3.26086039338628e-05, + "loss": 19.5004, + "step": 14987 + }, + { + "epoch": 0.6247342753532574, + "grad_norm": 296.0, + "learning_rate": 3.260227555780276e-05, + "loss": 14.1255, + "step": 14988 + }, + { + "epoch": 0.6247759576507858, + "grad_norm": 368.0, + "learning_rate": 3.2595947498817804e-05, + "loss": 13.6258, + "step": 14989 + }, + { + "epoch": 0.624817639948314, + "grad_norm": 322.0, + "learning_rate": 3.258961975702325e-05, + "loss": 13.8754, + "step": 14990 + }, + { + "epoch": 0.6248593222458422, + "grad_norm": 382.0, + "learning_rate": 3.2583292332534424e-05, + "loss": 15.0003, + "step": 14991 + }, + { + "epoch": 0.6249010045433704, + "grad_norm": 374.0, + "learning_rate": 3.257696522546663e-05, + "loss": 14.1251, + "step": 14992 + }, + { + "epoch": 0.6249426868408987, + "grad_norm": 354.0, + "learning_rate": 3.257063843593522e-05, + "loss": 14.2505, + "step": 14993 + }, + { + "epoch": 0.6249843691384269, + "grad_norm": 1536.0, + "learning_rate": 3.256431196405544e-05, + "loss": 28.0047, + "step": 14994 + }, + { + "epoch": 0.6250260514359551, + "grad_norm": 1624.0, + "learning_rate": 3.255798580994264e-05, + "loss": 31.7588, + "step": 14995 + }, + { + "epoch": 0.6250677337334833, + "grad_norm": 480.0, + "learning_rate": 3.255165997371208e-05, + "loss": 16.8754, + "step": 14996 + }, + { + "epoch": 0.6251094160310117, + "grad_norm": 510.0, + "learning_rate": 3.2545334455479094e-05, + "loss": 16.7516, + "step": 14997 + }, + { + "epoch": 0.6251510983285399, + "grad_norm": 430.0, + "learning_rate": 3.253900925535891e-05, + "loss": 15.0003, + "step": 14998 + }, + { + "epoch": 0.6251927806260681, + "grad_norm": 1112.0, + "learning_rate": 3.253268437346685e-05, + "loss": 29.3753, + "step": 14999 + }, + { + "epoch": 0.6252344629235963, + "grad_norm": 276.0, + "learning_rate": 3.2526359809918154e-05, + "loss": 12.6252, + "step": 15000 + }, + { + "epoch": 0.6252761452211246, + "grad_norm": 264.0, + "learning_rate": 3.252003556482812e-05, + "loss": 13.0627, + "step": 15001 + }, + { + "epoch": 0.6253178275186528, + "grad_norm": 276.0, + "learning_rate": 3.251371163831197e-05, + "loss": 7.6253, + "step": 15002 + }, + { + "epoch": 0.625359509816181, + "grad_norm": 332.0, + "learning_rate": 3.250738803048499e-05, + "loss": 12.3752, + "step": 15003 + }, + { + "epoch": 0.6254011921137093, + "grad_norm": 372.0, + "learning_rate": 3.250106474146241e-05, + "loss": 12.0024, + "step": 15004 + }, + { + "epoch": 0.6254428744112376, + "grad_norm": 366.0, + "learning_rate": 3.249474177135948e-05, + "loss": 16.1255, + "step": 15005 + }, + { + "epoch": 0.6254845567087658, + "grad_norm": 86.0, + "learning_rate": 3.248841912029142e-05, + "loss": 8.6257, + "step": 15006 + }, + { + "epoch": 0.625526239006294, + "grad_norm": 252.0, + "learning_rate": 3.2482096788373504e-05, + "loss": 10.6878, + "step": 15007 + }, + { + "epoch": 0.6255679213038222, + "grad_norm": 800.0, + "learning_rate": 3.247577477572091e-05, + "loss": 21.7503, + "step": 15008 + }, + { + "epoch": 0.6256096036013505, + "grad_norm": 832.0, + "learning_rate": 3.2469453082448896e-05, + "loss": 23.5034, + "step": 15009 + }, + { + "epoch": 0.6256512858988788, + "grad_norm": 260.0, + "learning_rate": 3.246313170867263e-05, + "loss": 12.5005, + "step": 15010 + }, + { + "epoch": 0.625692968196407, + "grad_norm": 346.0, + "learning_rate": 3.245681065450738e-05, + "loss": 15.1254, + "step": 15011 + }, + { + "epoch": 0.6257346504939352, + "grad_norm": 153.0, + "learning_rate": 3.2450489920068275e-05, + "loss": 9.7501, + "step": 15012 + }, + { + "epoch": 0.6257763327914635, + "grad_norm": 536.0, + "learning_rate": 3.2444169505470576e-05, + "loss": 18.0003, + "step": 15013 + }, + { + "epoch": 0.6258180150889917, + "grad_norm": 414.0, + "learning_rate": 3.243784941082942e-05, + "loss": 15.1879, + "step": 15014 + }, + { + "epoch": 0.6258596973865199, + "grad_norm": 1304.0, + "learning_rate": 3.2431529636260035e-05, + "loss": 31.0002, + "step": 15015 + }, + { + "epoch": 0.6259013796840481, + "grad_norm": 124.0, + "learning_rate": 3.242521018187759e-05, + "loss": 5.688, + "step": 15016 + }, + { + "epoch": 0.6259430619815765, + "grad_norm": 246.0, + "learning_rate": 3.241889104779724e-05, + "loss": 12.5004, + "step": 15017 + }, + { + "epoch": 0.6259847442791047, + "grad_norm": 122.0, + "learning_rate": 3.2412572234134156e-05, + "loss": 6.4377, + "step": 15018 + }, + { + "epoch": 0.6260264265766329, + "grad_norm": 600.0, + "learning_rate": 3.240625374100351e-05, + "loss": 19.7502, + "step": 15019 + }, + { + "epoch": 0.6260681088741611, + "grad_norm": 236.0, + "learning_rate": 3.239993556852045e-05, + "loss": 12.9381, + "step": 15020 + }, + { + "epoch": 0.6261097911716894, + "grad_norm": 458.0, + "learning_rate": 3.239361771680014e-05, + "loss": 17.2505, + "step": 15021 + }, + { + "epoch": 0.6261514734692176, + "grad_norm": 358.0, + "learning_rate": 3.238730018595768e-05, + "loss": 14.3753, + "step": 15022 + }, + { + "epoch": 0.6261931557667458, + "grad_norm": 238.0, + "learning_rate": 3.238098297610827e-05, + "loss": 12.2504, + "step": 15023 + }, + { + "epoch": 0.6262348380642742, + "grad_norm": 470.0, + "learning_rate": 3.237466608736699e-05, + "loss": 17.6253, + "step": 15024 + }, + { + "epoch": 0.6262765203618024, + "grad_norm": 280.0, + "learning_rate": 3.2368349519848996e-05, + "loss": 13.0005, + "step": 15025 + }, + { + "epoch": 0.6263182026593306, + "grad_norm": 884.0, + "learning_rate": 3.236203327366938e-05, + "loss": 24.6253, + "step": 15026 + }, + { + "epoch": 0.6263598849568588, + "grad_norm": 296.0, + "learning_rate": 3.2355717348943285e-05, + "loss": 12.0005, + "step": 15027 + }, + { + "epoch": 0.6264015672543871, + "grad_norm": 468.0, + "learning_rate": 3.234940174578581e-05, + "loss": 17.3759, + "step": 15028 + }, + { + "epoch": 0.6264432495519153, + "grad_norm": 400.0, + "learning_rate": 3.2343086464312054e-05, + "loss": 15.7505, + "step": 15029 + }, + { + "epoch": 0.6264849318494435, + "grad_norm": 334.0, + "learning_rate": 3.23367715046371e-05, + "loss": 14.5003, + "step": 15030 + }, + { + "epoch": 0.6265266141469717, + "grad_norm": 248.0, + "learning_rate": 3.233045686687608e-05, + "loss": 11.6878, + "step": 15031 + }, + { + "epoch": 0.6265682964445001, + "grad_norm": 314.0, + "learning_rate": 3.232414255114403e-05, + "loss": 11.8129, + "step": 15032 + }, + { + "epoch": 0.6266099787420283, + "grad_norm": 418.0, + "learning_rate": 3.231782855755607e-05, + "loss": 16.5005, + "step": 15033 + }, + { + "epoch": 0.6266516610395565, + "grad_norm": 528.0, + "learning_rate": 3.231151488622724e-05, + "loss": 18.6259, + "step": 15034 + }, + { + "epoch": 0.6266933433370847, + "grad_norm": 74.5, + "learning_rate": 3.230520153727263e-05, + "loss": 6.6254, + "step": 15035 + }, + { + "epoch": 0.626735025634613, + "grad_norm": 312.0, + "learning_rate": 3.229888851080728e-05, + "loss": 12.563, + "step": 15036 + }, + { + "epoch": 0.6267767079321412, + "grad_norm": 490.0, + "learning_rate": 3.229257580694628e-05, + "loss": 16.2502, + "step": 15037 + }, + { + "epoch": 0.6268183902296695, + "grad_norm": 298.0, + "learning_rate": 3.228626342580463e-05, + "loss": 12.2503, + "step": 15038 + }, + { + "epoch": 0.6268600725271977, + "grad_norm": 478.0, + "learning_rate": 3.227995136749743e-05, + "loss": 16.6251, + "step": 15039 + }, + { + "epoch": 0.626901754824726, + "grad_norm": 251.0, + "learning_rate": 3.227363963213966e-05, + "loss": 13.2504, + "step": 15040 + }, + { + "epoch": 0.6269434371222542, + "grad_norm": 195.0, + "learning_rate": 3.226732821984639e-05, + "loss": 11.0627, + "step": 15041 + }, + { + "epoch": 0.6269851194197824, + "grad_norm": 95.5, + "learning_rate": 3.226101713073266e-05, + "loss": 7.8754, + "step": 15042 + }, + { + "epoch": 0.6270268017173106, + "grad_norm": 892.0, + "learning_rate": 3.225470636491344e-05, + "loss": 22.6252, + "step": 15043 + }, + { + "epoch": 0.627068484014839, + "grad_norm": 174.0, + "learning_rate": 3.224839592250378e-05, + "loss": 10.3752, + "step": 15044 + }, + { + "epoch": 0.6271101663123672, + "grad_norm": 115.5, + "learning_rate": 3.224208580361868e-05, + "loss": 9.4377, + "step": 15045 + }, + { + "epoch": 0.6271518486098954, + "grad_norm": 474.0, + "learning_rate": 3.223577600837315e-05, + "loss": 16.2505, + "step": 15046 + }, + { + "epoch": 0.6271935309074236, + "grad_norm": 412.0, + "learning_rate": 3.222946653688217e-05, + "loss": 15.4381, + "step": 15047 + }, + { + "epoch": 0.6272352132049519, + "grad_norm": 600.0, + "learning_rate": 3.2223157389260756e-05, + "loss": 19.7502, + "step": 15048 + }, + { + "epoch": 0.6272768955024801, + "grad_norm": 596.0, + "learning_rate": 3.221684856562386e-05, + "loss": 18.8755, + "step": 15049 + }, + { + "epoch": 0.6273185778000083, + "grad_norm": 332.0, + "learning_rate": 3.2210540066086495e-05, + "loss": 13.3133, + "step": 15050 + }, + { + "epoch": 0.6273602600975365, + "grad_norm": 292.0, + "learning_rate": 3.2204231890763595e-05, + "loss": 13.0635, + "step": 15051 + }, + { + "epoch": 0.6274019423950649, + "grad_norm": 76.5, + "learning_rate": 3.2197924039770167e-05, + "loss": 8.3129, + "step": 15052 + }, + { + "epoch": 0.6274436246925931, + "grad_norm": 115.5, + "learning_rate": 3.2191616513221134e-05, + "loss": 10.2502, + "step": 15053 + }, + { + "epoch": 0.6274853069901213, + "grad_norm": 1312.0, + "learning_rate": 3.218530931123149e-05, + "loss": 24.8762, + "step": 15054 + }, + { + "epoch": 0.6275269892876495, + "grad_norm": 80.5, + "learning_rate": 3.217900243391615e-05, + "loss": 8.9379, + "step": 15055 + }, + { + "epoch": 0.6275686715851778, + "grad_norm": 238.0, + "learning_rate": 3.217269588139008e-05, + "loss": 12.2506, + "step": 15056 + }, + { + "epoch": 0.627610353882706, + "grad_norm": 129.0, + "learning_rate": 3.216638965376821e-05, + "loss": 8.8127, + "step": 15057 + }, + { + "epoch": 0.6276520361802342, + "grad_norm": 241.0, + "learning_rate": 3.216008375116548e-05, + "loss": 12.438, + "step": 15058 + }, + { + "epoch": 0.6276937184777625, + "grad_norm": 588.0, + "learning_rate": 3.215377817369679e-05, + "loss": 19.1255, + "step": 15059 + }, + { + "epoch": 0.6277354007752908, + "grad_norm": 198.0, + "learning_rate": 3.2147472921477094e-05, + "loss": 10.7506, + "step": 15060 + }, + { + "epoch": 0.627777083072819, + "grad_norm": 176.0, + "learning_rate": 3.214116799462127e-05, + "loss": 11.5002, + "step": 15061 + }, + { + "epoch": 0.6278187653703472, + "grad_norm": 600.0, + "learning_rate": 3.213486339324426e-05, + "loss": 18.6253, + "step": 15062 + }, + { + "epoch": 0.6278604476678754, + "grad_norm": 330.0, + "learning_rate": 3.212855911746094e-05, + "loss": 14.9379, + "step": 15063 + }, + { + "epoch": 0.6279021299654037, + "grad_norm": 173.0, + "learning_rate": 3.212225516738624e-05, + "loss": 10.3127, + "step": 15064 + }, + { + "epoch": 0.6279438122629319, + "grad_norm": 1012.0, + "learning_rate": 3.2115951543134996e-05, + "loss": 26.6258, + "step": 15065 + }, + { + "epoch": 0.6279854945604602, + "grad_norm": 96.0, + "learning_rate": 3.210964824482215e-05, + "loss": 8.8128, + "step": 15066 + }, + { + "epoch": 0.6280271768579884, + "grad_norm": 668.0, + "learning_rate": 3.210334527256252e-05, + "loss": 21.7502, + "step": 15067 + }, + { + "epoch": 0.6280688591555167, + "grad_norm": 438.0, + "learning_rate": 3.209704262647104e-05, + "loss": 16.5009, + "step": 15068 + }, + { + "epoch": 0.6281105414530449, + "grad_norm": 82.0, + "learning_rate": 3.2090740306662536e-05, + "loss": 7.6571, + "step": 15069 + }, + { + "epoch": 0.6281522237505731, + "grad_norm": 328.0, + "learning_rate": 3.2084438313251884e-05, + "loss": 13.7502, + "step": 15070 + }, + { + "epoch": 0.6281939060481013, + "grad_norm": 708.0, + "learning_rate": 3.207813664635392e-05, + "loss": 20.6252, + "step": 15071 + }, + { + "epoch": 0.6282355883456296, + "grad_norm": 360.0, + "learning_rate": 3.207183530608353e-05, + "loss": 14.0659, + "step": 15072 + }, + { + "epoch": 0.6282772706431579, + "grad_norm": 484.0, + "learning_rate": 3.206553429255551e-05, + "loss": 15.7531, + "step": 15073 + }, + { + "epoch": 0.6283189529406861, + "grad_norm": 396.0, + "learning_rate": 3.2059233605884744e-05, + "loss": 14.6879, + "step": 15074 + }, + { + "epoch": 0.6283606352382143, + "grad_norm": 186.0, + "learning_rate": 3.205293324618601e-05, + "loss": 11.5629, + "step": 15075 + }, + { + "epoch": 0.6284023175357426, + "grad_norm": 440.0, + "learning_rate": 3.20466332135742e-05, + "loss": 16.2509, + "step": 15076 + }, + { + "epoch": 0.6284439998332708, + "grad_norm": 408.0, + "learning_rate": 3.2040333508164056e-05, + "loss": 16.3764, + "step": 15077 + }, + { + "epoch": 0.628485682130799, + "grad_norm": 186.0, + "learning_rate": 3.203403413007045e-05, + "loss": 11.0006, + "step": 15078 + }, + { + "epoch": 0.6285273644283272, + "grad_norm": 440.0, + "learning_rate": 3.202773507940815e-05, + "loss": 16.5004, + "step": 15079 + }, + { + "epoch": 0.6285690467258556, + "grad_norm": 340.0, + "learning_rate": 3.202143635629198e-05, + "loss": 13.9376, + "step": 15080 + }, + { + "epoch": 0.6286107290233838, + "grad_norm": 376.0, + "learning_rate": 3.2015137960836736e-05, + "loss": 14.7504, + "step": 15081 + }, + { + "epoch": 0.628652411320912, + "grad_norm": 200.0, + "learning_rate": 3.2008839893157196e-05, + "loss": 11.0011, + "step": 15082 + }, + { + "epoch": 0.6286940936184402, + "grad_norm": 184.0, + "learning_rate": 3.2002542153368135e-05, + "loss": 12.439, + "step": 15083 + }, + { + "epoch": 0.6287357759159685, + "grad_norm": 270.0, + "learning_rate": 3.1996244741584356e-05, + "loss": 12.4377, + "step": 15084 + }, + { + "epoch": 0.6287774582134967, + "grad_norm": 382.0, + "learning_rate": 3.1989947657920596e-05, + "loss": 14.9379, + "step": 15085 + }, + { + "epoch": 0.6288191405110249, + "grad_norm": 320.0, + "learning_rate": 3.1983650902491664e-05, + "loss": 14.2507, + "step": 15086 + }, + { + "epoch": 0.6288608228085532, + "grad_norm": 248.0, + "learning_rate": 3.197735447541227e-05, + "loss": 13.1254, + "step": 15087 + }, + { + "epoch": 0.6289025051060815, + "grad_norm": 139.0, + "learning_rate": 3.1971058376797214e-05, + "loss": 10.8129, + "step": 15088 + }, + { + "epoch": 0.6289441874036097, + "grad_norm": 488.0, + "learning_rate": 3.19647626067612e-05, + "loss": 16.6253, + "step": 15089 + }, + { + "epoch": 0.6289858697011379, + "grad_norm": 976.0, + "learning_rate": 3.1958467165419e-05, + "loss": 26.7503, + "step": 15090 + }, + { + "epoch": 0.6290275519986661, + "grad_norm": 241.0, + "learning_rate": 3.195217205288533e-05, + "loss": 11.2508, + "step": 15091 + }, + { + "epoch": 0.6290692342961944, + "grad_norm": 117.0, + "learning_rate": 3.194587726927494e-05, + "loss": 8.9382, + "step": 15092 + }, + { + "epoch": 0.6291109165937226, + "grad_norm": 316.0, + "learning_rate": 3.193958281470252e-05, + "loss": 14.0627, + "step": 15093 + }, + { + "epoch": 0.6291525988912509, + "grad_norm": 76.0, + "learning_rate": 3.1933288689282814e-05, + "loss": 7.8439, + "step": 15094 + }, + { + "epoch": 0.6291942811887792, + "grad_norm": 204.0, + "learning_rate": 3.1926994893130525e-05, + "loss": 11.6256, + "step": 15095 + }, + { + "epoch": 0.6292359634863074, + "grad_norm": 450.0, + "learning_rate": 3.192070142636037e-05, + "loss": 15.1892, + "step": 15096 + }, + { + "epoch": 0.6292776457838356, + "grad_norm": 326.0, + "learning_rate": 3.191440828908701e-05, + "loss": 14.0003, + "step": 15097 + }, + { + "epoch": 0.6293193280813638, + "grad_norm": 1104.0, + "learning_rate": 3.19081154814252e-05, + "loss": 29.7503, + "step": 15098 + }, + { + "epoch": 0.6293610103788921, + "grad_norm": 232.0, + "learning_rate": 3.1901823003489555e-05, + "loss": 11.6877, + "step": 15099 + }, + { + "epoch": 0.6294026926764204, + "grad_norm": 229.0, + "learning_rate": 3.1895530855394825e-05, + "loss": 14.0629, + "step": 15100 + }, + { + "epoch": 0.6294443749739486, + "grad_norm": 142.0, + "learning_rate": 3.1889239037255626e-05, + "loss": 10.063, + "step": 15101 + }, + { + "epoch": 0.6294860572714768, + "grad_norm": 548.0, + "learning_rate": 3.1882947549186674e-05, + "loss": 18.8756, + "step": 15102 + }, + { + "epoch": 0.6295277395690051, + "grad_norm": 434.0, + "learning_rate": 3.1876656391302597e-05, + "loss": 15.8751, + "step": 15103 + }, + { + "epoch": 0.6295694218665333, + "grad_norm": 55.25, + "learning_rate": 3.187036556371808e-05, + "loss": 8.3752, + "step": 15104 + }, + { + "epoch": 0.6296111041640615, + "grad_norm": 202.0, + "learning_rate": 3.186407506654774e-05, + "loss": 10.188, + "step": 15105 + }, + { + "epoch": 0.6296527864615897, + "grad_norm": 180.0, + "learning_rate": 3.185778489990625e-05, + "loss": 10.8758, + "step": 15106 + }, + { + "epoch": 0.629694468759118, + "grad_norm": 354.0, + "learning_rate": 3.185149506390825e-05, + "loss": 14.0032, + "step": 15107 + }, + { + "epoch": 0.6297361510566463, + "grad_norm": 482.0, + "learning_rate": 3.184520555866836e-05, + "loss": 16.7503, + "step": 15108 + }, + { + "epoch": 0.6297778333541745, + "grad_norm": 203.0, + "learning_rate": 3.1838916384301194e-05, + "loss": 10.2511, + "step": 15109 + }, + { + "epoch": 0.6298195156517027, + "grad_norm": 988.0, + "learning_rate": 3.183262754092141e-05, + "loss": 27.7545, + "step": 15110 + }, + { + "epoch": 0.629861197949231, + "grad_norm": 198.0, + "learning_rate": 3.1826339028643595e-05, + "loss": 10.8753, + "step": 15111 + }, + { + "epoch": 0.6299028802467592, + "grad_norm": 264.0, + "learning_rate": 3.1820050847582374e-05, + "loss": 11.8753, + "step": 15112 + }, + { + "epoch": 0.6299445625442874, + "grad_norm": 1056.0, + "learning_rate": 3.181376299785231e-05, + "loss": 26.7516, + "step": 15113 + }, + { + "epoch": 0.6299862448418156, + "grad_norm": 298.0, + "learning_rate": 3.180747547956807e-05, + "loss": 13.1252, + "step": 15114 + }, + { + "epoch": 0.630027927139344, + "grad_norm": 824.0, + "learning_rate": 3.1801188292844176e-05, + "loss": 23.6253, + "step": 15115 + }, + { + "epoch": 0.6300696094368722, + "grad_norm": 126.0, + "learning_rate": 3.179490143779525e-05, + "loss": 10.3754, + "step": 15116 + }, + { + "epoch": 0.6301112917344004, + "grad_norm": 880.0, + "learning_rate": 3.1788614914535856e-05, + "loss": 22.5046, + "step": 15117 + }, + { + "epoch": 0.6301529740319286, + "grad_norm": 175.0, + "learning_rate": 3.1782328723180575e-05, + "loss": 9.313, + "step": 15118 + }, + { + "epoch": 0.6301946563294569, + "grad_norm": 684.0, + "learning_rate": 3.177604286384398e-05, + "loss": 19.7505, + "step": 15119 + }, + { + "epoch": 0.6302363386269851, + "grad_norm": 274.0, + "learning_rate": 3.176975733664061e-05, + "loss": 14.0008, + "step": 15120 + }, + { + "epoch": 0.6302780209245133, + "grad_norm": 418.0, + "learning_rate": 3.176347214168502e-05, + "loss": 15.6876, + "step": 15121 + }, + { + "epoch": 0.6303197032220416, + "grad_norm": 198.0, + "learning_rate": 3.17571872790918e-05, + "loss": 11.5003, + "step": 15122 + }, + { + "epoch": 0.6303613855195699, + "grad_norm": 284.0, + "learning_rate": 3.175090274897544e-05, + "loss": 13.0002, + "step": 15123 + }, + { + "epoch": 0.6304030678170981, + "grad_norm": 250.0, + "learning_rate": 3.174461855145051e-05, + "loss": 12.5631, + "step": 15124 + }, + { + "epoch": 0.6304447501146263, + "grad_norm": 376.0, + "learning_rate": 3.173833468663151e-05, + "loss": 14.5024, + "step": 15125 + }, + { + "epoch": 0.6304864324121545, + "grad_norm": 228.0, + "learning_rate": 3.1732051154633e-05, + "loss": 10.188, + "step": 15126 + }, + { + "epoch": 0.6305281147096828, + "grad_norm": 494.0, + "learning_rate": 3.172576795556946e-05, + "loss": 15.8129, + "step": 15127 + }, + { + "epoch": 0.630569797007211, + "grad_norm": 376.0, + "learning_rate": 3.1719485089555444e-05, + "loss": 15.7503, + "step": 15128 + }, + { + "epoch": 0.6306114793047393, + "grad_norm": 508.0, + "learning_rate": 3.171320255670541e-05, + "loss": 14.5004, + "step": 15129 + }, + { + "epoch": 0.6306531616022675, + "grad_norm": 173.0, + "learning_rate": 3.1706920357133906e-05, + "loss": 8.6255, + "step": 15130 + }, + { + "epoch": 0.6306948438997958, + "grad_norm": 308.0, + "learning_rate": 3.1700638490955384e-05, + "loss": 13.5628, + "step": 15131 + }, + { + "epoch": 0.630736526197324, + "grad_norm": 121.5, + "learning_rate": 3.169435695828436e-05, + "loss": 11.1253, + "step": 15132 + }, + { + "epoch": 0.6307782084948522, + "grad_norm": 66.5, + "learning_rate": 3.168807575923529e-05, + "loss": 6.7821, + "step": 15133 + }, + { + "epoch": 0.6308198907923804, + "grad_norm": 572.0, + "learning_rate": 3.1681794893922695e-05, + "loss": 18.6252, + "step": 15134 + }, + { + "epoch": 0.6308615730899088, + "grad_norm": 214.0, + "learning_rate": 3.167551436246099e-05, + "loss": 12.1253, + "step": 15135 + }, + { + "epoch": 0.630903255387437, + "grad_norm": 508.0, + "learning_rate": 3.166923416496468e-05, + "loss": 19.376, + "step": 15136 + }, + { + "epoch": 0.6309449376849652, + "grad_norm": 175.0, + "learning_rate": 3.166295430154819e-05, + "loss": 10.6253, + "step": 15137 + }, + { + "epoch": 0.6309866199824934, + "grad_norm": 716.0, + "learning_rate": 3.1656674772326e-05, + "loss": 20.8753, + "step": 15138 + }, + { + "epoch": 0.6310283022800217, + "grad_norm": 132.0, + "learning_rate": 3.165039557741252e-05, + "loss": 9.4376, + "step": 15139 + }, + { + "epoch": 0.6310699845775499, + "grad_norm": 240.0, + "learning_rate": 3.164411671692223e-05, + "loss": 11.8126, + "step": 15140 + }, + { + "epoch": 0.6311116668750781, + "grad_norm": 376.0, + "learning_rate": 3.163783819096952e-05, + "loss": 12.1878, + "step": 15141 + }, + { + "epoch": 0.6311533491726063, + "grad_norm": 274.0, + "learning_rate": 3.1631559999668865e-05, + "loss": 9.5008, + "step": 15142 + }, + { + "epoch": 0.6311950314701347, + "grad_norm": 314.0, + "learning_rate": 3.162528214313464e-05, + "loss": 12.5628, + "step": 15143 + }, + { + "epoch": 0.6312367137676629, + "grad_norm": 494.0, + "learning_rate": 3.161900462148129e-05, + "loss": 17.0003, + "step": 15144 + }, + { + "epoch": 0.6312783960651911, + "grad_norm": 180.0, + "learning_rate": 3.16127274348232e-05, + "loss": 10.2504, + "step": 15145 + }, + { + "epoch": 0.6313200783627193, + "grad_norm": 432.0, + "learning_rate": 3.1606450583274795e-05, + "loss": 16.0003, + "step": 15146 + }, + { + "epoch": 0.6313617606602476, + "grad_norm": 312.0, + "learning_rate": 3.160017406695045e-05, + "loss": 13.4377, + "step": 15147 + }, + { + "epoch": 0.6314034429577758, + "grad_norm": 306.0, + "learning_rate": 3.159389788596459e-05, + "loss": 14.0008, + "step": 15148 + }, + { + "epoch": 0.631445125255304, + "grad_norm": 113.5, + "learning_rate": 3.158762204043155e-05, + "loss": 9.0003, + "step": 15149 + }, + { + "epoch": 0.6314868075528323, + "grad_norm": 288.0, + "learning_rate": 3.158134653046575e-05, + "loss": 12.313, + "step": 15150 + }, + { + "epoch": 0.6315284898503606, + "grad_norm": 138.0, + "learning_rate": 3.157507135618153e-05, + "loss": 9.8129, + "step": 15151 + }, + { + "epoch": 0.6315701721478888, + "grad_norm": 231.0, + "learning_rate": 3.156879651769329e-05, + "loss": 11.563, + "step": 15152 + }, + { + "epoch": 0.631611854445417, + "grad_norm": 660.0, + "learning_rate": 3.1562522015115345e-05, + "loss": 19.2508, + "step": 15153 + }, + { + "epoch": 0.6316535367429452, + "grad_norm": 448.0, + "learning_rate": 3.1556247848562096e-05, + "loss": 16.3754, + "step": 15154 + }, + { + "epoch": 0.6316952190404735, + "grad_norm": 332.0, + "learning_rate": 3.154997401814784e-05, + "loss": 14.0627, + "step": 15155 + }, + { + "epoch": 0.6317369013380018, + "grad_norm": 179.0, + "learning_rate": 3.154370052398697e-05, + "loss": 10.3764, + "step": 15156 + }, + { + "epoch": 0.63177858363553, + "grad_norm": 107.0, + "learning_rate": 3.1537427366193784e-05, + "loss": 8.1885, + "step": 15157 + }, + { + "epoch": 0.6318202659330582, + "grad_norm": 414.0, + "learning_rate": 3.1531154544882635e-05, + "loss": 15.7505, + "step": 15158 + }, + { + "epoch": 0.6318619482305865, + "grad_norm": 228.0, + "learning_rate": 3.152488206016782e-05, + "loss": 10.6876, + "step": 15159 + }, + { + "epoch": 0.6319036305281147, + "grad_norm": 294.0, + "learning_rate": 3.1518609912163673e-05, + "loss": 13.1253, + "step": 15160 + }, + { + "epoch": 0.6319453128256429, + "grad_norm": 660.0, + "learning_rate": 3.15123381009845e-05, + "loss": 20.0003, + "step": 15161 + }, + { + "epoch": 0.6319869951231711, + "grad_norm": 444.0, + "learning_rate": 3.150606662674462e-05, + "loss": 17.2502, + "step": 15162 + }, + { + "epoch": 0.6320286774206995, + "grad_norm": 155.0, + "learning_rate": 3.1499795489558304e-05, + "loss": 11.6885, + "step": 15163 + }, + { + "epoch": 0.6320703597182277, + "grad_norm": 508.0, + "learning_rate": 3.149352468953987e-05, + "loss": 15.5646, + "step": 15164 + }, + { + "epoch": 0.6321120420157559, + "grad_norm": 306.0, + "learning_rate": 3.1487254226803575e-05, + "loss": 13.8754, + "step": 15165 + }, + { + "epoch": 0.6321537243132841, + "grad_norm": 206.0, + "learning_rate": 3.148098410146373e-05, + "loss": 11.6879, + "step": 15166 + }, + { + "epoch": 0.6321954066108124, + "grad_norm": 382.0, + "learning_rate": 3.147471431363458e-05, + "loss": 15.0629, + "step": 15167 + }, + { + "epoch": 0.6322370889083406, + "grad_norm": 1264.0, + "learning_rate": 3.1468444863430426e-05, + "loss": 26.2553, + "step": 15168 + }, + { + "epoch": 0.6322787712058688, + "grad_norm": 296.0, + "learning_rate": 3.146217575096548e-05, + "loss": 11.3753, + "step": 15169 + }, + { + "epoch": 0.6323204535033972, + "grad_norm": 234.0, + "learning_rate": 3.145590697635404e-05, + "loss": 10.6252, + "step": 15170 + }, + { + "epoch": 0.6323621358009254, + "grad_norm": 131.0, + "learning_rate": 3.144963853971034e-05, + "loss": 9.1879, + "step": 15171 + }, + { + "epoch": 0.6324038180984536, + "grad_norm": 302.0, + "learning_rate": 3.144337044114864e-05, + "loss": 13.8752, + "step": 15172 + }, + { + "epoch": 0.6324455003959818, + "grad_norm": 416.0, + "learning_rate": 3.143710268078314e-05, + "loss": 14.7508, + "step": 15173 + }, + { + "epoch": 0.6324871826935101, + "grad_norm": 420.0, + "learning_rate": 3.143083525872811e-05, + "loss": 15.3127, + "step": 15174 + }, + { + "epoch": 0.6325288649910383, + "grad_norm": 126.0, + "learning_rate": 3.142456817509773e-05, + "loss": 8.7502, + "step": 15175 + }, + { + "epoch": 0.6325705472885665, + "grad_norm": 458.0, + "learning_rate": 3.141830143000628e-05, + "loss": 16.0002, + "step": 15176 + }, + { + "epoch": 0.6326122295860948, + "grad_norm": 264.0, + "learning_rate": 3.141203502356791e-05, + "loss": 12.5004, + "step": 15177 + }, + { + "epoch": 0.6326539118836231, + "grad_norm": 190.0, + "learning_rate": 3.140576895589687e-05, + "loss": 10.8128, + "step": 15178 + }, + { + "epoch": 0.6326955941811513, + "grad_norm": 120.5, + "learning_rate": 3.139950322710732e-05, + "loss": 5.8138, + "step": 15179 + }, + { + "epoch": 0.6327372764786795, + "grad_norm": 230.0, + "learning_rate": 3.139323783731349e-05, + "loss": 11.5004, + "step": 15180 + }, + { + "epoch": 0.6327789587762077, + "grad_norm": 636.0, + "learning_rate": 3.138697278662954e-05, + "loss": 18.0003, + "step": 15181 + }, + { + "epoch": 0.632820641073736, + "grad_norm": 392.0, + "learning_rate": 3.138070807516967e-05, + "loss": 14.3752, + "step": 15182 + }, + { + "epoch": 0.6328623233712642, + "grad_norm": 184.0, + "learning_rate": 3.137444370304805e-05, + "loss": 10.4387, + "step": 15183 + }, + { + "epoch": 0.6329040056687925, + "grad_norm": 239.0, + "learning_rate": 3.136817967037885e-05, + "loss": 12.1878, + "step": 15184 + }, + { + "epoch": 0.6329456879663207, + "grad_norm": 300.0, + "learning_rate": 3.136191597727621e-05, + "loss": 13.9377, + "step": 15185 + }, + { + "epoch": 0.632987370263849, + "grad_norm": 185.0, + "learning_rate": 3.135565262385434e-05, + "loss": 11.3128, + "step": 15186 + }, + { + "epoch": 0.6330290525613772, + "grad_norm": 462.0, + "learning_rate": 3.134938961022733e-05, + "loss": 15.8128, + "step": 15187 + }, + { + "epoch": 0.6330707348589054, + "grad_norm": 77.5, + "learning_rate": 3.134312693650937e-05, + "loss": 8.5632, + "step": 15188 + }, + { + "epoch": 0.6331124171564336, + "grad_norm": 278.0, + "learning_rate": 3.1336864602814554e-05, + "loss": 12.9376, + "step": 15189 + }, + { + "epoch": 0.633154099453962, + "grad_norm": 164.0, + "learning_rate": 3.133060260925706e-05, + "loss": 9.688, + "step": 15190 + }, + { + "epoch": 0.6331957817514902, + "grad_norm": 248.0, + "learning_rate": 3.1324340955950966e-05, + "loss": 12.1879, + "step": 15191 + }, + { + "epoch": 0.6332374640490184, + "grad_norm": 888.0, + "learning_rate": 3.131807964301044e-05, + "loss": 24.3752, + "step": 15192 + }, + { + "epoch": 0.6332791463465466, + "grad_norm": 446.0, + "learning_rate": 3.131181867054955e-05, + "loss": 16.1256, + "step": 15193 + }, + { + "epoch": 0.6333208286440749, + "grad_norm": 732.0, + "learning_rate": 3.1305558038682435e-05, + "loss": 20.0023, + "step": 15194 + }, + { + "epoch": 0.6333625109416031, + "grad_norm": 256.0, + "learning_rate": 3.129929774752318e-05, + "loss": 10.8126, + "step": 15195 + }, + { + "epoch": 0.6334041932391313, + "grad_norm": 284.0, + "learning_rate": 3.1293037797185886e-05, + "loss": 13.8752, + "step": 15196 + }, + { + "epoch": 0.6334458755366595, + "grad_norm": 186.0, + "learning_rate": 3.128677818778463e-05, + "loss": 9.8148, + "step": 15197 + }, + { + "epoch": 0.6334875578341879, + "grad_norm": 250.0, + "learning_rate": 3.1280518919433524e-05, + "loss": 13.3754, + "step": 15198 + }, + { + "epoch": 0.6335292401317161, + "grad_norm": 1448.0, + "learning_rate": 3.12742599922466e-05, + "loss": 36.2503, + "step": 15199 + }, + { + "epoch": 0.6335709224292443, + "grad_norm": 668.0, + "learning_rate": 3.126800140633798e-05, + "loss": 19.6259, + "step": 15200 + }, + { + "epoch": 0.6336126047267725, + "grad_norm": 195.0, + "learning_rate": 3.1261743161821664e-05, + "loss": 10.2504, + "step": 15201 + }, + { + "epoch": 0.6336542870243008, + "grad_norm": 1272.0, + "learning_rate": 3.125548525881177e-05, + "loss": 29.0032, + "step": 15202 + }, + { + "epoch": 0.633695969321829, + "grad_norm": 160.0, + "learning_rate": 3.1249227697422296e-05, + "loss": 10.3753, + "step": 15203 + }, + { + "epoch": 0.6337376516193572, + "grad_norm": 123.0, + "learning_rate": 3.124297047776733e-05, + "loss": 9.1879, + "step": 15204 + }, + { + "epoch": 0.6337793339168855, + "grad_norm": 230.0, + "learning_rate": 3.123671359996088e-05, + "loss": 11.4378, + "step": 15205 + }, + { + "epoch": 0.6338210162144138, + "grad_norm": 316.0, + "learning_rate": 3.1230457064117e-05, + "loss": 13.1888, + "step": 15206 + }, + { + "epoch": 0.633862698511942, + "grad_norm": 450.0, + "learning_rate": 3.1224200870349696e-05, + "loss": 15.2502, + "step": 15207 + }, + { + "epoch": 0.6339043808094702, + "grad_norm": 444.0, + "learning_rate": 3.121794501877301e-05, + "loss": 15.1254, + "step": 15208 + }, + { + "epoch": 0.6339460631069984, + "grad_norm": 89.5, + "learning_rate": 3.1211689509500924e-05, + "loss": 9.1878, + "step": 15209 + }, + { + "epoch": 0.6339877454045267, + "grad_norm": 101.0, + "learning_rate": 3.120543434264749e-05, + "loss": 10.3767, + "step": 15210 + }, + { + "epoch": 0.634029427702055, + "grad_norm": 456.0, + "learning_rate": 3.119917951832666e-05, + "loss": 15.7516, + "step": 15211 + }, + { + "epoch": 0.6340711099995832, + "grad_norm": 255.0, + "learning_rate": 3.119292503665248e-05, + "loss": 12.0632, + "step": 15212 + }, + { + "epoch": 0.6341127922971114, + "grad_norm": 256.0, + "learning_rate": 3.1186670897738876e-05, + "loss": 11.9377, + "step": 15213 + }, + { + "epoch": 0.6341544745946397, + "grad_norm": 372.0, + "learning_rate": 3.11804171016999e-05, + "loss": 14.3754, + "step": 15214 + }, + { + "epoch": 0.6341961568921679, + "grad_norm": 88.5, + "learning_rate": 3.1174163648649465e-05, + "loss": 8.0003, + "step": 15215 + }, + { + "epoch": 0.6342378391896961, + "grad_norm": 408.0, + "learning_rate": 3.1167910538701595e-05, + "loss": 15.3763, + "step": 15216 + }, + { + "epoch": 0.6342795214872243, + "grad_norm": 316.0, + "learning_rate": 3.11616577719702e-05, + "loss": 14.8129, + "step": 15217 + }, + { + "epoch": 0.6343212037847527, + "grad_norm": 564.0, + "learning_rate": 3.115540534856929e-05, + "loss": 18.3769, + "step": 15218 + }, + { + "epoch": 0.6343628860822809, + "grad_norm": 140.0, + "learning_rate": 3.114915326861276e-05, + "loss": 10.6879, + "step": 15219 + }, + { + "epoch": 0.6344045683798091, + "grad_norm": 364.0, + "learning_rate": 3.1142901532214605e-05, + "loss": 14.5631, + "step": 15220 + }, + { + "epoch": 0.6344462506773373, + "grad_norm": 246.0, + "learning_rate": 3.113665013948874e-05, + "loss": 10.7504, + "step": 15221 + }, + { + "epoch": 0.6344879329748656, + "grad_norm": 1400.0, + "learning_rate": 3.113039909054911e-05, + "loss": 38.0004, + "step": 15222 + }, + { + "epoch": 0.6345296152723938, + "grad_norm": 368.0, + "learning_rate": 3.112414838550961e-05, + "loss": 15.1272, + "step": 15223 + }, + { + "epoch": 0.634571297569922, + "grad_norm": 292.0, + "learning_rate": 3.11178980244842e-05, + "loss": 11.3129, + "step": 15224 + }, + { + "epoch": 0.6346129798674502, + "grad_norm": 496.0, + "learning_rate": 3.1111648007586766e-05, + "loss": 15.8752, + "step": 15225 + }, + { + "epoch": 0.6346546621649786, + "grad_norm": 716.0, + "learning_rate": 3.110539833493124e-05, + "loss": 20.8753, + "step": 15226 + }, + { + "epoch": 0.6346963444625068, + "grad_norm": 72.5, + "learning_rate": 3.1099149006631484e-05, + "loss": 5.9064, + "step": 15227 + }, + { + "epoch": 0.634738026760035, + "grad_norm": 564.0, + "learning_rate": 3.109290002280144e-05, + "loss": 18.2502, + "step": 15228 + }, + { + "epoch": 0.6347797090575632, + "grad_norm": 392.0, + "learning_rate": 3.1086651383554944e-05, + "loss": 11.3754, + "step": 15229 + }, + { + "epoch": 0.6348213913550915, + "grad_norm": 498.0, + "learning_rate": 3.108040308900593e-05, + "loss": 17.1254, + "step": 15230 + }, + { + "epoch": 0.6348630736526197, + "grad_norm": 464.0, + "learning_rate": 3.107415513926823e-05, + "loss": 16.3753, + "step": 15231 + }, + { + "epoch": 0.634904755950148, + "grad_norm": 588.0, + "learning_rate": 3.106790753445573e-05, + "loss": 19.3751, + "step": 15232 + }, + { + "epoch": 0.6349464382476762, + "grad_norm": 163.0, + "learning_rate": 3.1061660274682314e-05, + "loss": 9.4378, + "step": 15233 + }, + { + "epoch": 0.6349881205452045, + "grad_norm": 212.0, + "learning_rate": 3.105541336006182e-05, + "loss": 12.1251, + "step": 15234 + }, + { + "epoch": 0.6350298028427327, + "grad_norm": 608.0, + "learning_rate": 3.1049166790708076e-05, + "loss": 19.1259, + "step": 15235 + }, + { + "epoch": 0.6350714851402609, + "grad_norm": 352.0, + "learning_rate": 3.1042920566734975e-05, + "loss": 16.8752, + "step": 15236 + }, + { + "epoch": 0.6351131674377891, + "grad_norm": 290.0, + "learning_rate": 3.1036674688256306e-05, + "loss": 12.8753, + "step": 15237 + }, + { + "epoch": 0.6351548497353174, + "grad_norm": 356.0, + "learning_rate": 3.103042915538595e-05, + "loss": 13.1254, + "step": 15238 + }, + { + "epoch": 0.6351965320328457, + "grad_norm": 101.5, + "learning_rate": 3.1024183968237684e-05, + "loss": 9.7508, + "step": 15239 + }, + { + "epoch": 0.6352382143303739, + "grad_norm": 260.0, + "learning_rate": 3.101793912692538e-05, + "loss": 13.3752, + "step": 15240 + }, + { + "epoch": 0.6352798966279022, + "grad_norm": 1392.0, + "learning_rate": 3.1011694631562785e-05, + "loss": 26.8802, + "step": 15241 + }, + { + "epoch": 0.6353215789254304, + "grad_norm": 350.0, + "learning_rate": 3.100545048226377e-05, + "loss": 13.188, + "step": 15242 + }, + { + "epoch": 0.6353632612229586, + "grad_norm": 276.0, + "learning_rate": 3.099920667914208e-05, + "loss": 12.5639, + "step": 15243 + }, + { + "epoch": 0.6354049435204868, + "grad_norm": 466.0, + "learning_rate": 3.0992963222311554e-05, + "loss": 16.6254, + "step": 15244 + }, + { + "epoch": 0.6354466258180151, + "grad_norm": 111.5, + "learning_rate": 3.098672011188595e-05, + "loss": 10.064, + "step": 15245 + }, + { + "epoch": 0.6354883081155434, + "grad_norm": 484.0, + "learning_rate": 3.098047734797907e-05, + "loss": 17.2511, + "step": 15246 + }, + { + "epoch": 0.6355299904130716, + "grad_norm": 258.0, + "learning_rate": 3.097423493070466e-05, + "loss": 11.4379, + "step": 15247 + }, + { + "epoch": 0.6355716727105998, + "grad_norm": 528.0, + "learning_rate": 3.096799286017653e-05, + "loss": 16.0005, + "step": 15248 + }, + { + "epoch": 0.6356133550081281, + "grad_norm": 142.0, + "learning_rate": 3.0961751136508404e-05, + "loss": 9.7503, + "step": 15249 + }, + { + "epoch": 0.6356550373056563, + "grad_norm": 108.0, + "learning_rate": 3.095550975981407e-05, + "loss": 9.6878, + "step": 15250 + }, + { + "epoch": 0.6356967196031845, + "grad_norm": 592.0, + "learning_rate": 3.094926873020724e-05, + "loss": 18.501, + "step": 15251 + }, + { + "epoch": 0.6357384019007127, + "grad_norm": 524.0, + "learning_rate": 3.09430280478017e-05, + "loss": 17.1296, + "step": 15252 + }, + { + "epoch": 0.6357800841982411, + "grad_norm": 736.0, + "learning_rate": 3.093678771271114e-05, + "loss": 21.7512, + "step": 15253 + }, + { + "epoch": 0.6358217664957693, + "grad_norm": 628.0, + "learning_rate": 3.0930547725049354e-05, + "loss": 19.0006, + "step": 15254 + }, + { + "epoch": 0.6358634487932975, + "grad_norm": 520.0, + "learning_rate": 3.092430808493e-05, + "loss": 17.5004, + "step": 15255 + }, + { + "epoch": 0.6359051310908257, + "grad_norm": 336.0, + "learning_rate": 3.091806879246684e-05, + "loss": 13.8128, + "step": 15256 + }, + { + "epoch": 0.635946813388354, + "grad_norm": 728.0, + "learning_rate": 3.091182984777354e-05, + "loss": 20.3799, + "step": 15257 + }, + { + "epoch": 0.6359884956858822, + "grad_norm": 185.0, + "learning_rate": 3.090559125096386e-05, + "loss": 11.814, + "step": 15258 + }, + { + "epoch": 0.6360301779834104, + "grad_norm": 368.0, + "learning_rate": 3.0899353002151466e-05, + "loss": 13.3754, + "step": 15259 + }, + { + "epoch": 0.6360718602809387, + "grad_norm": 1360.0, + "learning_rate": 3.0893115101450076e-05, + "loss": 30.0016, + "step": 15260 + }, + { + "epoch": 0.636113542578467, + "grad_norm": 270.0, + "learning_rate": 3.088687754897334e-05, + "loss": 13.3752, + "step": 15261 + }, + { + "epoch": 0.6361552248759952, + "grad_norm": 158.0, + "learning_rate": 3.088064034483498e-05, + "loss": 10.4377, + "step": 15262 + }, + { + "epoch": 0.6361969071735234, + "grad_norm": 322.0, + "learning_rate": 3.087440348914862e-05, + "loss": 13.1885, + "step": 15263 + }, + { + "epoch": 0.6362385894710516, + "grad_norm": 608.0, + "learning_rate": 3.086816698202797e-05, + "loss": 19.5036, + "step": 15264 + }, + { + "epoch": 0.6362802717685799, + "grad_norm": 708.0, + "learning_rate": 3.086193082358666e-05, + "loss": 20.7502, + "step": 15265 + }, + { + "epoch": 0.6363219540661081, + "grad_norm": 344.0, + "learning_rate": 3.0855695013938384e-05, + "loss": 12.1252, + "step": 15266 + }, + { + "epoch": 0.6363636363636364, + "grad_norm": 260.0, + "learning_rate": 3.084945955319675e-05, + "loss": 13.4376, + "step": 15267 + }, + { + "epoch": 0.6364053186611646, + "grad_norm": 232.0, + "learning_rate": 3.0843224441475424e-05, + "loss": 11.8127, + "step": 15268 + }, + { + "epoch": 0.6364470009586929, + "grad_norm": 446.0, + "learning_rate": 3.0836989678888016e-05, + "loss": 16.0002, + "step": 15269 + }, + { + "epoch": 0.6364886832562211, + "grad_norm": 189.0, + "learning_rate": 3.083075526554818e-05, + "loss": 11.0632, + "step": 15270 + }, + { + "epoch": 0.6365303655537493, + "grad_norm": 247.0, + "learning_rate": 3.082452120156954e-05, + "loss": 11.9379, + "step": 15271 + }, + { + "epoch": 0.6365720478512775, + "grad_norm": 800.0, + "learning_rate": 3.08182874870657e-05, + "loss": 22.501, + "step": 15272 + }, + { + "epoch": 0.6366137301488058, + "grad_norm": 272.0, + "learning_rate": 3.0812054122150266e-05, + "loss": 8.4376, + "step": 15273 + }, + { + "epoch": 0.6366554124463341, + "grad_norm": 334.0, + "learning_rate": 3.080582110693686e-05, + "loss": 12.2502, + "step": 15274 + }, + { + "epoch": 0.6366970947438623, + "grad_norm": 180.0, + "learning_rate": 3.0799588441539054e-05, + "loss": 10.813, + "step": 15275 + }, + { + "epoch": 0.6367387770413905, + "grad_norm": 176.0, + "learning_rate": 3.0793356126070475e-05, + "loss": 9.7502, + "step": 15276 + }, + { + "epoch": 0.6367804593389188, + "grad_norm": 844.0, + "learning_rate": 3.0787124160644665e-05, + "loss": 22.5053, + "step": 15277 + }, + { + "epoch": 0.636822141636447, + "grad_norm": 147.0, + "learning_rate": 3.078089254537524e-05, + "loss": 10.4379, + "step": 15278 + }, + { + "epoch": 0.6368638239339752, + "grad_norm": 258.0, + "learning_rate": 3.077466128037574e-05, + "loss": 12.8131, + "step": 15279 + }, + { + "epoch": 0.6369055062315034, + "grad_norm": 223.0, + "learning_rate": 3.076843036575976e-05, + "loss": 10.5631, + "step": 15280 + }, + { + "epoch": 0.6369471885290318, + "grad_norm": 68.0, + "learning_rate": 3.076219980164082e-05, + "loss": 7.8129, + "step": 15281 + }, + { + "epoch": 0.63698887082656, + "grad_norm": 252.0, + "learning_rate": 3.075596958813251e-05, + "loss": 13.3755, + "step": 15282 + }, + { + "epoch": 0.6370305531240882, + "grad_norm": 292.0, + "learning_rate": 3.0749739725348365e-05, + "loss": 11.8752, + "step": 15283 + }, + { + "epoch": 0.6370722354216164, + "grad_norm": 199.0, + "learning_rate": 3.0743510213401916e-05, + "loss": 10.1881, + "step": 15284 + }, + { + "epoch": 0.6371139177191447, + "grad_norm": 282.0, + "learning_rate": 3.07372810524067e-05, + "loss": 13.5627, + "step": 15285 + }, + { + "epoch": 0.6371556000166729, + "grad_norm": 234.0, + "learning_rate": 3.073105224247626e-05, + "loss": 11.7502, + "step": 15286 + }, + { + "epoch": 0.6371972823142011, + "grad_norm": 308.0, + "learning_rate": 3.072482378372409e-05, + "loss": 11.8752, + "step": 15287 + }, + { + "epoch": 0.6372389646117294, + "grad_norm": 636.0, + "learning_rate": 3.071859567626374e-05, + "loss": 18.751, + "step": 15288 + }, + { + "epoch": 0.6372806469092577, + "grad_norm": 266.0, + "learning_rate": 3.071236792020867e-05, + "loss": 12.3758, + "step": 15289 + }, + { + "epoch": 0.6373223292067859, + "grad_norm": 434.0, + "learning_rate": 3.070614051567243e-05, + "loss": 16.0003, + "step": 15290 + }, + { + "epoch": 0.6373640115043141, + "grad_norm": 149.0, + "learning_rate": 3.0699913462768464e-05, + "loss": 9.3131, + "step": 15291 + }, + { + "epoch": 0.6374056938018423, + "grad_norm": 276.0, + "learning_rate": 3.069368676161032e-05, + "loss": 13.3753, + "step": 15292 + }, + { + "epoch": 0.6374473760993706, + "grad_norm": 298.0, + "learning_rate": 3.068746041231142e-05, + "loss": 13.5662, + "step": 15293 + }, + { + "epoch": 0.6374890583968988, + "grad_norm": 482.0, + "learning_rate": 3.068123441498528e-05, + "loss": 17.7503, + "step": 15294 + }, + { + "epoch": 0.6375307406944271, + "grad_norm": 334.0, + "learning_rate": 3.067500876974536e-05, + "loss": 14.7502, + "step": 15295 + }, + { + "epoch": 0.6375724229919553, + "grad_norm": 60.0, + "learning_rate": 3.066878347670512e-05, + "loss": 7.5944, + "step": 15296 + }, + { + "epoch": 0.6376141052894836, + "grad_norm": 172.0, + "learning_rate": 3.0662558535978006e-05, + "loss": 12.1881, + "step": 15297 + }, + { + "epoch": 0.6376557875870118, + "grad_norm": 820.0, + "learning_rate": 3.0656333947677494e-05, + "loss": 23.5007, + "step": 15298 + }, + { + "epoch": 0.63769746988454, + "grad_norm": 640.0, + "learning_rate": 3.0650109711917e-05, + "loss": 18.5004, + "step": 15299 + }, + { + "epoch": 0.6377391521820682, + "grad_norm": 384.0, + "learning_rate": 3.0643885828809994e-05, + "loss": 15.3126, + "step": 15300 + }, + { + "epoch": 0.6377808344795965, + "grad_norm": 193.0, + "learning_rate": 3.063766229846987e-05, + "loss": 11.3753, + "step": 15301 + }, + { + "epoch": 0.6378225167771248, + "grad_norm": 186.0, + "learning_rate": 3.063143912101009e-05, + "loss": 10.2505, + "step": 15302 + }, + { + "epoch": 0.637864199074653, + "grad_norm": 314.0, + "learning_rate": 3.062521629654402e-05, + "loss": 13.1883, + "step": 15303 + }, + { + "epoch": 0.6379058813721812, + "grad_norm": 344.0, + "learning_rate": 3.0618993825185135e-05, + "loss": 13.3126, + "step": 15304 + }, + { + "epoch": 0.6379475636697095, + "grad_norm": 564.0, + "learning_rate": 3.061277170704678e-05, + "loss": 17.8761, + "step": 15305 + }, + { + "epoch": 0.6379892459672377, + "grad_norm": 175.0, + "learning_rate": 3.0606549942242405e-05, + "loss": 10.0628, + "step": 15306 + }, + { + "epoch": 0.6380309282647659, + "grad_norm": 434.0, + "learning_rate": 3.0600328530885356e-05, + "loss": 17.1252, + "step": 15307 + }, + { + "epoch": 0.6380726105622941, + "grad_norm": 154.0, + "learning_rate": 3.0594107473089055e-05, + "loss": 10.3752, + "step": 15308 + }, + { + "epoch": 0.6381142928598225, + "grad_norm": 88.0, + "learning_rate": 3.058788676896687e-05, + "loss": 8.8753, + "step": 15309 + }, + { + "epoch": 0.6381559751573507, + "grad_norm": 490.0, + "learning_rate": 3.058166641863217e-05, + "loss": 15.6273, + "step": 15310 + }, + { + "epoch": 0.6381976574548789, + "grad_norm": 239.0, + "learning_rate": 3.057544642219831e-05, + "loss": 10.8133, + "step": 15311 + }, + { + "epoch": 0.6382393397524071, + "grad_norm": 632.0, + "learning_rate": 3.056922677977869e-05, + "loss": 14.5674, + "step": 15312 + }, + { + "epoch": 0.6382810220499354, + "grad_norm": 296.0, + "learning_rate": 3.05630074914866e-05, + "loss": 13.6253, + "step": 15313 + }, + { + "epoch": 0.6383227043474636, + "grad_norm": 154.0, + "learning_rate": 3.0556788557435465e-05, + "loss": 9.3132, + "step": 15314 + }, + { + "epoch": 0.6383643866449918, + "grad_norm": 196.0, + "learning_rate": 3.055056997773856e-05, + "loss": 11.3757, + "step": 15315 + }, + { + "epoch": 0.6384060689425202, + "grad_norm": 153.0, + "learning_rate": 3.054435175250926e-05, + "loss": 8.688, + "step": 15316 + }, + { + "epoch": 0.6384477512400484, + "grad_norm": 772.0, + "learning_rate": 3.053813388186085e-05, + "loss": 17.8753, + "step": 15317 + }, + { + "epoch": 0.6384894335375766, + "grad_norm": 660.0, + "learning_rate": 3.053191636590671e-05, + "loss": 18.5002, + "step": 15318 + }, + { + "epoch": 0.6385311158351048, + "grad_norm": 204.0, + "learning_rate": 3.052569920476009e-05, + "loss": 10.1881, + "step": 15319 + }, + { + "epoch": 0.6385727981326331, + "grad_norm": 760.0, + "learning_rate": 3.051948239853435e-05, + "loss": 21.8808, + "step": 15320 + }, + { + "epoch": 0.6386144804301613, + "grad_norm": 316.0, + "learning_rate": 3.0513265947342772e-05, + "loss": 13.3132, + "step": 15321 + }, + { + "epoch": 0.6386561627276895, + "grad_norm": 176.0, + "learning_rate": 3.050704985129865e-05, + "loss": 10.1881, + "step": 15322 + }, + { + "epoch": 0.6386978450252178, + "grad_norm": 247.0, + "learning_rate": 3.0500834110515263e-05, + "loss": 12.0005, + "step": 15323 + }, + { + "epoch": 0.6387395273227461, + "grad_norm": 236.0, + "learning_rate": 3.049461872510593e-05, + "loss": 10.7503, + "step": 15324 + }, + { + "epoch": 0.6387812096202743, + "grad_norm": 276.0, + "learning_rate": 3.0488403695183883e-05, + "loss": 13.1253, + "step": 15325 + }, + { + "epoch": 0.6388228919178025, + "grad_norm": 290.0, + "learning_rate": 3.048218902086243e-05, + "loss": 13.1878, + "step": 15326 + }, + { + "epoch": 0.6388645742153307, + "grad_norm": 218.0, + "learning_rate": 3.0475974702254782e-05, + "loss": 11.8753, + "step": 15327 + }, + { + "epoch": 0.638906256512859, + "grad_norm": 118.0, + "learning_rate": 3.0469760739474262e-05, + "loss": 7.5005, + "step": 15328 + }, + { + "epoch": 0.6389479388103873, + "grad_norm": 89.0, + "learning_rate": 3.0463547132634063e-05, + "loss": 6.0317, + "step": 15329 + }, + { + "epoch": 0.6389896211079155, + "grad_norm": 768.0, + "learning_rate": 3.0457333881847473e-05, + "loss": 22.1287, + "step": 15330 + }, + { + "epoch": 0.6390313034054437, + "grad_norm": 372.0, + "learning_rate": 3.045112098722769e-05, + "loss": 14.3757, + "step": 15331 + }, + { + "epoch": 0.639072985702972, + "grad_norm": 370.0, + "learning_rate": 3.0444908448887966e-05, + "loss": 14.3762, + "step": 15332 + }, + { + "epoch": 0.6391146680005002, + "grad_norm": 314.0, + "learning_rate": 3.0438696266941525e-05, + "loss": 14.0011, + "step": 15333 + }, + { + "epoch": 0.6391563502980284, + "grad_norm": 448.0, + "learning_rate": 3.0432484441501575e-05, + "loss": 16.7511, + "step": 15334 + }, + { + "epoch": 0.6391980325955566, + "grad_norm": 736.0, + "learning_rate": 3.042627297268133e-05, + "loss": 21.1281, + "step": 15335 + }, + { + "epoch": 0.639239714893085, + "grad_norm": 460.0, + "learning_rate": 3.042006186059402e-05, + "loss": 17.1252, + "step": 15336 + }, + { + "epoch": 0.6392813971906132, + "grad_norm": 208.0, + "learning_rate": 3.04138511053528e-05, + "loss": 12.1253, + "step": 15337 + }, + { + "epoch": 0.6393230794881414, + "grad_norm": 280.0, + "learning_rate": 3.0407640707070896e-05, + "loss": 12.8128, + "step": 15338 + }, + { + "epoch": 0.6393647617856696, + "grad_norm": 468.0, + "learning_rate": 3.040143066586146e-05, + "loss": 15.2502, + "step": 15339 + }, + { + "epoch": 0.6394064440831979, + "grad_norm": 264.0, + "learning_rate": 3.0395220981837714e-05, + "loss": 12.8127, + "step": 15340 + }, + { + "epoch": 0.6394481263807261, + "grad_norm": 340.0, + "learning_rate": 3.038901165511278e-05, + "loss": 14.3126, + "step": 15341 + }, + { + "epoch": 0.6394898086782543, + "grad_norm": 221.0, + "learning_rate": 3.0382802685799875e-05, + "loss": 10.6879, + "step": 15342 + }, + { + "epoch": 0.6395314909757825, + "grad_norm": 140.0, + "learning_rate": 3.037659407401211e-05, + "loss": 10.9379, + "step": 15343 + }, + { + "epoch": 0.6395731732733109, + "grad_norm": 344.0, + "learning_rate": 3.0370385819862684e-05, + "loss": 11.6255, + "step": 15344 + }, + { + "epoch": 0.6396148555708391, + "grad_norm": 536.0, + "learning_rate": 3.0364177923464698e-05, + "loss": 16.1271, + "step": 15345 + }, + { + "epoch": 0.6396565378683673, + "grad_norm": 424.0, + "learning_rate": 3.0357970384931322e-05, + "loss": 15.5001, + "step": 15346 + }, + { + "epoch": 0.6396982201658955, + "grad_norm": 544.0, + "learning_rate": 3.0351763204375672e-05, + "loss": 16.7502, + "step": 15347 + }, + { + "epoch": 0.6397399024634238, + "grad_norm": 74.5, + "learning_rate": 3.034555638191089e-05, + "loss": 8.2501, + "step": 15348 + }, + { + "epoch": 0.639781584760952, + "grad_norm": 464.0, + "learning_rate": 3.0339349917650077e-05, + "loss": 17.0011, + "step": 15349 + }, + { + "epoch": 0.6398232670584802, + "grad_norm": 244.0, + "learning_rate": 3.0333143811706378e-05, + "loss": 11.7503, + "step": 15350 + }, + { + "epoch": 0.6398649493560085, + "grad_norm": 660.0, + "learning_rate": 3.0326938064192856e-05, + "loss": 21.1258, + "step": 15351 + }, + { + "epoch": 0.6399066316535368, + "grad_norm": 238.0, + "learning_rate": 3.032073267522265e-05, + "loss": 12.6881, + "step": 15352 + }, + { + "epoch": 0.639948313951065, + "grad_norm": 728.0, + "learning_rate": 3.031452764490883e-05, + "loss": 21.6256, + "step": 15353 + }, + { + "epoch": 0.6399899962485932, + "grad_norm": 66.0, + "learning_rate": 3.03083229733645e-05, + "loss": 7.7504, + "step": 15354 + }, + { + "epoch": 0.6400316785461214, + "grad_norm": 214.0, + "learning_rate": 3.0302118660702716e-05, + "loss": 11.1877, + "step": 15355 + }, + { + "epoch": 0.6400733608436497, + "grad_norm": 212.0, + "learning_rate": 3.029591470703659e-05, + "loss": 11.8752, + "step": 15356 + }, + { + "epoch": 0.640115043141178, + "grad_norm": 536.0, + "learning_rate": 3.0289711112479147e-05, + "loss": 17.8764, + "step": 15357 + }, + { + "epoch": 0.6401567254387062, + "grad_norm": 167.0, + "learning_rate": 3.028350787714348e-05, + "loss": 10.2503, + "step": 15358 + }, + { + "epoch": 0.6401984077362344, + "grad_norm": 306.0, + "learning_rate": 3.0277305001142626e-05, + "loss": 13.5628, + "step": 15359 + }, + { + "epoch": 0.6402400900337627, + "grad_norm": 584.0, + "learning_rate": 3.027110248458964e-05, + "loss": 17.6253, + "step": 15360 + }, + { + "epoch": 0.6402817723312909, + "grad_norm": 302.0, + "learning_rate": 3.0264900327597557e-05, + "loss": 13.0631, + "step": 15361 + }, + { + "epoch": 0.6403234546288191, + "grad_norm": 412.0, + "learning_rate": 3.025869853027944e-05, + "loss": 14.1257, + "step": 15362 + }, + { + "epoch": 0.6403651369263473, + "grad_norm": 157.0, + "learning_rate": 3.0252497092748265e-05, + "loss": 11.4377, + "step": 15363 + }, + { + "epoch": 0.6404068192238757, + "grad_norm": 136.0, + "learning_rate": 3.0246296015117113e-05, + "loss": 9.3759, + "step": 15364 + }, + { + "epoch": 0.6404485015214039, + "grad_norm": 194.0, + "learning_rate": 3.024009529749895e-05, + "loss": 11.1877, + "step": 15365 + }, + { + "epoch": 0.6404901838189321, + "grad_norm": 162.0, + "learning_rate": 3.0233894940006813e-05, + "loss": 9.6254, + "step": 15366 + }, + { + "epoch": 0.6405318661164603, + "grad_norm": 242.0, + "learning_rate": 3.0227694942753683e-05, + "loss": 13.7514, + "step": 15367 + }, + { + "epoch": 0.6405735484139886, + "grad_norm": 48.5, + "learning_rate": 3.0221495305852587e-05, + "loss": 6.0947, + "step": 15368 + }, + { + "epoch": 0.6406152307115168, + "grad_norm": 604.0, + "learning_rate": 3.021529602941648e-05, + "loss": 19.7505, + "step": 15369 + }, + { + "epoch": 0.640656913009045, + "grad_norm": 498.0, + "learning_rate": 3.020909711355836e-05, + "loss": 15.3126, + "step": 15370 + }, + { + "epoch": 0.6406985953065732, + "grad_norm": 182.0, + "learning_rate": 3.0202898558391213e-05, + "loss": 11.2502, + "step": 15371 + }, + { + "epoch": 0.6407402776041016, + "grad_norm": 844.0, + "learning_rate": 3.0196700364027986e-05, + "loss": 21.8755, + "step": 15372 + }, + { + "epoch": 0.6407819599016298, + "grad_norm": 452.0, + "learning_rate": 3.019050253058165e-05, + "loss": 15.0009, + "step": 15373 + }, + { + "epoch": 0.640823642199158, + "grad_norm": 176.0, + "learning_rate": 3.0184305058165185e-05, + "loss": 11.6255, + "step": 15374 + }, + { + "epoch": 0.6408653244966862, + "grad_norm": 172.0, + "learning_rate": 3.0178107946891492e-05, + "loss": 11.0002, + "step": 15375 + }, + { + "epoch": 0.6409070067942145, + "grad_norm": 382.0, + "learning_rate": 3.0171911196873566e-05, + "loss": 15.8128, + "step": 15376 + }, + { + "epoch": 0.6409486890917427, + "grad_norm": 326.0, + "learning_rate": 3.01657148082243e-05, + "loss": 14.6877, + "step": 15377 + }, + { + "epoch": 0.640990371389271, + "grad_norm": 184.0, + "learning_rate": 3.015951878105666e-05, + "loss": 11.1255, + "step": 15378 + }, + { + "epoch": 0.6410320536867992, + "grad_norm": 346.0, + "learning_rate": 3.0153323115483535e-05, + "loss": 14.1876, + "step": 15379 + }, + { + "epoch": 0.6410737359843275, + "grad_norm": 364.0, + "learning_rate": 3.014712781161787e-05, + "loss": 15.3752, + "step": 15380 + }, + { + "epoch": 0.6411154182818557, + "grad_norm": 318.0, + "learning_rate": 3.0140932869572547e-05, + "loss": 12.5008, + "step": 15381 + }, + { + "epoch": 0.6411571005793839, + "grad_norm": 740.0, + "learning_rate": 3.0134738289460508e-05, + "loss": 21.3755, + "step": 15382 + }, + { + "epoch": 0.6411987828769121, + "grad_norm": 173.0, + "learning_rate": 3.0128544071394603e-05, + "loss": 10.1254, + "step": 15383 + }, + { + "epoch": 0.6412404651744404, + "grad_norm": 396.0, + "learning_rate": 3.012235021548776e-05, + "loss": 14.6255, + "step": 15384 + }, + { + "epoch": 0.6412821474719687, + "grad_norm": 158.0, + "learning_rate": 3.011615672185284e-05, + "loss": 10.3753, + "step": 15385 + }, + { + "epoch": 0.6413238297694969, + "grad_norm": 676.0, + "learning_rate": 3.0109963590602745e-05, + "loss": 21.1256, + "step": 15386 + }, + { + "epoch": 0.6413655120670252, + "grad_norm": 680.0, + "learning_rate": 3.0103770821850308e-05, + "loss": 21.2503, + "step": 15387 + }, + { + "epoch": 0.6414071943645534, + "grad_norm": 498.0, + "learning_rate": 3.0097578415708437e-05, + "loss": 16.2511, + "step": 15388 + }, + { + "epoch": 0.6414488766620816, + "grad_norm": 536.0, + "learning_rate": 3.0091386372289947e-05, + "loss": 16.8753, + "step": 15389 + }, + { + "epoch": 0.6414905589596098, + "grad_norm": 476.0, + "learning_rate": 3.008519469170773e-05, + "loss": 17.2502, + "step": 15390 + }, + { + "epoch": 0.6415322412571381, + "grad_norm": 324.0, + "learning_rate": 3.0079003374074584e-05, + "loss": 13.4378, + "step": 15391 + }, + { + "epoch": 0.6415739235546664, + "grad_norm": 247.0, + "learning_rate": 3.0072812419503395e-05, + "loss": 11.5627, + "step": 15392 + }, + { + "epoch": 0.6416156058521946, + "grad_norm": 314.0, + "learning_rate": 3.006662182810694e-05, + "loss": 11.5014, + "step": 15393 + }, + { + "epoch": 0.6416572881497228, + "grad_norm": 302.0, + "learning_rate": 3.0060431599998095e-05, + "loss": 13.9377, + "step": 15394 + }, + { + "epoch": 0.6416989704472511, + "grad_norm": 278.0, + "learning_rate": 3.0054241735289633e-05, + "loss": 12.2503, + "step": 15395 + }, + { + "epoch": 0.6417406527447793, + "grad_norm": 304.0, + "learning_rate": 3.0048052234094404e-05, + "loss": 12.7509, + "step": 15396 + }, + { + "epoch": 0.6417823350423075, + "grad_norm": 328.0, + "learning_rate": 3.004186309652518e-05, + "loss": 15.0631, + "step": 15397 + }, + { + "epoch": 0.6418240173398357, + "grad_norm": 132.0, + "learning_rate": 3.0035674322694786e-05, + "loss": 8.8752, + "step": 15398 + }, + { + "epoch": 0.6418656996373641, + "grad_norm": 211.0, + "learning_rate": 3.002948591271598e-05, + "loss": 10.6255, + "step": 15399 + }, + { + "epoch": 0.6419073819348923, + "grad_norm": 230.0, + "learning_rate": 3.0023297866701594e-05, + "loss": 9.8132, + "step": 15400 + }, + { + "epoch": 0.6419490642324205, + "grad_norm": 464.0, + "learning_rate": 3.0017110184764352e-05, + "loss": 15.7503, + "step": 15401 + }, + { + "epoch": 0.6419907465299487, + "grad_norm": 876.0, + "learning_rate": 3.0010922867017078e-05, + "loss": 22.6262, + "step": 15402 + }, + { + "epoch": 0.642032428827477, + "grad_norm": 180.0, + "learning_rate": 3.0004735913572478e-05, + "loss": 11.1252, + "step": 15403 + }, + { + "epoch": 0.6420741111250052, + "grad_norm": 452.0, + "learning_rate": 2.9998549324543372e-05, + "loss": 16.2502, + "step": 15404 + }, + { + "epoch": 0.6421157934225334, + "grad_norm": 370.0, + "learning_rate": 2.9992363100042455e-05, + "loss": 15.876, + "step": 15405 + }, + { + "epoch": 0.6421574757200617, + "grad_norm": 442.0, + "learning_rate": 2.9986177240182523e-05, + "loss": 14.9393, + "step": 15406 + }, + { + "epoch": 0.64219915801759, + "grad_norm": 151.0, + "learning_rate": 2.997999174507626e-05, + "loss": 10.6253, + "step": 15407 + }, + { + "epoch": 0.6422408403151182, + "grad_norm": 1408.0, + "learning_rate": 2.997380661483643e-05, + "loss": 32.2505, + "step": 15408 + }, + { + "epoch": 0.6422825226126464, + "grad_norm": 322.0, + "learning_rate": 2.9967621849575766e-05, + "loss": 13.1878, + "step": 15409 + }, + { + "epoch": 0.6423242049101746, + "grad_norm": 280.0, + "learning_rate": 2.996143744940696e-05, + "loss": 11.5007, + "step": 15410 + }, + { + "epoch": 0.6423658872077029, + "grad_norm": 264.0, + "learning_rate": 2.995525341444273e-05, + "loss": 12.2502, + "step": 15411 + }, + { + "epoch": 0.6424075695052311, + "grad_norm": 210.0, + "learning_rate": 2.994906974479581e-05, + "loss": 11.8753, + "step": 15412 + }, + { + "epoch": 0.6424492518027594, + "grad_norm": 168.0, + "learning_rate": 2.994288644057885e-05, + "loss": 9.4377, + "step": 15413 + }, + { + "epoch": 0.6424909341002876, + "grad_norm": 160.0, + "learning_rate": 2.9936703501904585e-05, + "loss": 10.5007, + "step": 15414 + }, + { + "epoch": 0.6425326163978159, + "grad_norm": 167.0, + "learning_rate": 2.9930520928885656e-05, + "loss": 9.8131, + "step": 15415 + }, + { + "epoch": 0.6425742986953441, + "grad_norm": 500.0, + "learning_rate": 2.9924338721634793e-05, + "loss": 18.1252, + "step": 15416 + }, + { + "epoch": 0.6426159809928723, + "grad_norm": 552.0, + "learning_rate": 2.9918156880264615e-05, + "loss": 18.6253, + "step": 15417 + }, + { + "epoch": 0.6426576632904005, + "grad_norm": 328.0, + "learning_rate": 2.9911975404887826e-05, + "loss": 13.0639, + "step": 15418 + }, + { + "epoch": 0.6426993455879288, + "grad_norm": 688.0, + "learning_rate": 2.990579429561705e-05, + "loss": 20.7539, + "step": 15419 + }, + { + "epoch": 0.6427410278854571, + "grad_norm": 458.0, + "learning_rate": 2.9899613552564975e-05, + "loss": 16.2503, + "step": 15420 + }, + { + "epoch": 0.6427827101829853, + "grad_norm": 206.0, + "learning_rate": 2.989343317584422e-05, + "loss": 11.7505, + "step": 15421 + }, + { + "epoch": 0.6428243924805135, + "grad_norm": 436.0, + "learning_rate": 2.9887253165567432e-05, + "loss": 17.1255, + "step": 15422 + }, + { + "epoch": 0.6428660747780418, + "grad_norm": 132.0, + "learning_rate": 2.9881073521847235e-05, + "loss": 9.0628, + "step": 15423 + }, + { + "epoch": 0.64290775707557, + "grad_norm": 122.5, + "learning_rate": 2.987489424479627e-05, + "loss": 10.0005, + "step": 15424 + }, + { + "epoch": 0.6429494393730982, + "grad_norm": 532.0, + "learning_rate": 2.986871533452713e-05, + "loss": 15.6268, + "step": 15425 + }, + { + "epoch": 0.6429911216706264, + "grad_norm": 290.0, + "learning_rate": 2.9862536791152462e-05, + "loss": 13.6252, + "step": 15426 + }, + { + "epoch": 0.6430328039681548, + "grad_norm": 384.0, + "learning_rate": 2.9856358614784824e-05, + "loss": 11.8754, + "step": 15427 + }, + { + "epoch": 0.643074486265683, + "grad_norm": 584.0, + "learning_rate": 2.985018080553686e-05, + "loss": 19.6253, + "step": 15428 + }, + { + "epoch": 0.6431161685632112, + "grad_norm": 448.0, + "learning_rate": 2.9844003363521115e-05, + "loss": 16.3753, + "step": 15429 + }, + { + "epoch": 0.6431578508607394, + "grad_norm": 760.0, + "learning_rate": 2.9837826288850224e-05, + "loss": 21.8752, + "step": 15430 + }, + { + "epoch": 0.6431995331582677, + "grad_norm": 155.0, + "learning_rate": 2.9831649581636723e-05, + "loss": 9.9378, + "step": 15431 + }, + { + "epoch": 0.6432412154557959, + "grad_norm": 346.0, + "learning_rate": 2.9825473241993207e-05, + "loss": 13.7536, + "step": 15432 + }, + { + "epoch": 0.6432828977533241, + "grad_norm": 536.0, + "learning_rate": 2.981929727003222e-05, + "loss": 17.5003, + "step": 15433 + }, + { + "epoch": 0.6433245800508524, + "grad_norm": 262.0, + "learning_rate": 2.981312166586634e-05, + "loss": 13.1258, + "step": 15434 + }, + { + "epoch": 0.6433662623483807, + "grad_norm": 135.0, + "learning_rate": 2.9806946429608108e-05, + "loss": 9.0005, + "step": 15435 + }, + { + "epoch": 0.6434079446459089, + "grad_norm": 344.0, + "learning_rate": 2.980077156137007e-05, + "loss": 15.7504, + "step": 15436 + }, + { + "epoch": 0.6434496269434371, + "grad_norm": 332.0, + "learning_rate": 2.9794597061264757e-05, + "loss": 12.6884, + "step": 15437 + }, + { + "epoch": 0.6434913092409653, + "grad_norm": 468.0, + "learning_rate": 2.9788422929404724e-05, + "loss": 16.5002, + "step": 15438 + }, + { + "epoch": 0.6435329915384936, + "grad_norm": 145.0, + "learning_rate": 2.9782249165902453e-05, + "loss": 9.8128, + "step": 15439 + }, + { + "epoch": 0.6435746738360218, + "grad_norm": 181.0, + "learning_rate": 2.9776075770870515e-05, + "loss": 7.7511, + "step": 15440 + }, + { + "epoch": 0.6436163561335501, + "grad_norm": 388.0, + "learning_rate": 2.9769902744421363e-05, + "loss": 16.2505, + "step": 15441 + }, + { + "epoch": 0.6436580384310783, + "grad_norm": 418.0, + "learning_rate": 2.9763730086667557e-05, + "loss": 16.5003, + "step": 15442 + }, + { + "epoch": 0.6436997207286066, + "grad_norm": 572.0, + "learning_rate": 2.9757557797721542e-05, + "loss": 16.6274, + "step": 15443 + }, + { + "epoch": 0.6437414030261348, + "grad_norm": 560.0, + "learning_rate": 2.9751385877695852e-05, + "loss": 18.8751, + "step": 15444 + }, + { + "epoch": 0.643783085323663, + "grad_norm": 474.0, + "learning_rate": 2.9745214326702935e-05, + "loss": 16.7533, + "step": 15445 + }, + { + "epoch": 0.6438247676211912, + "grad_norm": 516.0, + "learning_rate": 2.9739043144855294e-05, + "loss": 16.8767, + "step": 15446 + }, + { + "epoch": 0.6438664499187196, + "grad_norm": 220.0, + "learning_rate": 2.9732872332265392e-05, + "loss": 11.8126, + "step": 15447 + }, + { + "epoch": 0.6439081322162478, + "grad_norm": 179.0, + "learning_rate": 2.9726701889045684e-05, + "loss": 9.3756, + "step": 15448 + }, + { + "epoch": 0.643949814513776, + "grad_norm": 696.0, + "learning_rate": 2.972053181530863e-05, + "loss": 21.8753, + "step": 15449 + }, + { + "epoch": 0.6439914968113042, + "grad_norm": 596.0, + "learning_rate": 2.9714362111166705e-05, + "loss": 18.2506, + "step": 15450 + }, + { + "epoch": 0.6440331791088325, + "grad_norm": 556.0, + "learning_rate": 2.970819277673231e-05, + "loss": 19.2503, + "step": 15451 + }, + { + "epoch": 0.6440748614063607, + "grad_norm": 384.0, + "learning_rate": 2.9702023812117918e-05, + "loss": 16.8754, + "step": 15452 + }, + { + "epoch": 0.6441165437038889, + "grad_norm": 1240.0, + "learning_rate": 2.969585521743593e-05, + "loss": 27.8752, + "step": 15453 + }, + { + "epoch": 0.6441582260014171, + "grad_norm": 173.0, + "learning_rate": 2.96896869927988e-05, + "loss": 10.7507, + "step": 15454 + }, + { + "epoch": 0.6441999082989455, + "grad_norm": 81.0, + "learning_rate": 2.9683519138318905e-05, + "loss": 8.2506, + "step": 15455 + }, + { + "epoch": 0.6442415905964737, + "grad_norm": 170.0, + "learning_rate": 2.9677351654108702e-05, + "loss": 8.563, + "step": 15456 + }, + { + "epoch": 0.6442832728940019, + "grad_norm": 740.0, + "learning_rate": 2.967118454028055e-05, + "loss": 20.8755, + "step": 15457 + }, + { + "epoch": 0.6443249551915301, + "grad_norm": 312.0, + "learning_rate": 2.966501779694687e-05, + "loss": 9.3757, + "step": 15458 + }, + { + "epoch": 0.6443666374890584, + "grad_norm": 992.0, + "learning_rate": 2.9658851424220047e-05, + "loss": 22.7543, + "step": 15459 + }, + { + "epoch": 0.6444083197865866, + "grad_norm": 312.0, + "learning_rate": 2.9652685422212467e-05, + "loss": 13.188, + "step": 15460 + }, + { + "epoch": 0.6444500020841148, + "grad_norm": 266.0, + "learning_rate": 2.9646519791036487e-05, + "loss": 11.8134, + "step": 15461 + }, + { + "epoch": 0.6444916843816432, + "grad_norm": 414.0, + "learning_rate": 2.9640354530804515e-05, + "loss": 15.6894, + "step": 15462 + }, + { + "epoch": 0.6445333666791714, + "grad_norm": 416.0, + "learning_rate": 2.9634189641628862e-05, + "loss": 14.313, + "step": 15463 + }, + { + "epoch": 0.6445750489766996, + "grad_norm": 378.0, + "learning_rate": 2.9628025123621938e-05, + "loss": 15.2501, + "step": 15464 + }, + { + "epoch": 0.6446167312742278, + "grad_norm": 552.0, + "learning_rate": 2.9621860976896036e-05, + "loss": 18.5007, + "step": 15465 + }, + { + "epoch": 0.6446584135717561, + "grad_norm": 136.0, + "learning_rate": 2.9615697201563552e-05, + "loss": 9.5628, + "step": 15466 + }, + { + "epoch": 0.6447000958692843, + "grad_norm": 194.0, + "learning_rate": 2.9609533797736777e-05, + "loss": 12.1882, + "step": 15467 + }, + { + "epoch": 0.6447417781668126, + "grad_norm": 284.0, + "learning_rate": 2.9603370765528083e-05, + "loss": 12.1255, + "step": 15468 + }, + { + "epoch": 0.6447834604643408, + "grad_norm": 596.0, + "learning_rate": 2.9597208105049735e-05, + "loss": 20.3753, + "step": 15469 + }, + { + "epoch": 0.6448251427618691, + "grad_norm": 330.0, + "learning_rate": 2.959104581641411e-05, + "loss": 13.5001, + "step": 15470 + }, + { + "epoch": 0.6448668250593973, + "grad_norm": 270.0, + "learning_rate": 2.9584883899733462e-05, + "loss": 12.8768, + "step": 15471 + }, + { + "epoch": 0.6449085073569255, + "grad_norm": 684.0, + "learning_rate": 2.957872235512013e-05, + "loss": 20.6252, + "step": 15472 + }, + { + "epoch": 0.6449501896544537, + "grad_norm": 157.0, + "learning_rate": 2.9572561182686388e-05, + "loss": 9.064, + "step": 15473 + }, + { + "epoch": 0.644991871951982, + "grad_norm": 169.0, + "learning_rate": 2.9566400382544547e-05, + "loss": 9.7502, + "step": 15474 + }, + { + "epoch": 0.6450335542495103, + "grad_norm": 138.0, + "learning_rate": 2.9560239954806857e-05, + "loss": 10.1878, + "step": 15475 + }, + { + "epoch": 0.6450752365470385, + "grad_norm": 145.0, + "learning_rate": 2.955407989958563e-05, + "loss": 10.0631, + "step": 15476 + }, + { + "epoch": 0.6451169188445667, + "grad_norm": 139.0, + "learning_rate": 2.9547920216993087e-05, + "loss": 10.1878, + "step": 15477 + }, + { + "epoch": 0.645158601142095, + "grad_norm": 85.5, + "learning_rate": 2.9541760907141535e-05, + "loss": 7.4064, + "step": 15478 + }, + { + "epoch": 0.6452002834396232, + "grad_norm": 732.0, + "learning_rate": 2.9535601970143184e-05, + "loss": 16.8788, + "step": 15479 + }, + { + "epoch": 0.6452419657371514, + "grad_norm": 51.5, + "learning_rate": 2.9529443406110326e-05, + "loss": 6.7816, + "step": 15480 + }, + { + "epoch": 0.6452836480346796, + "grad_norm": 904.0, + "learning_rate": 2.9523285215155162e-05, + "loss": 21.8802, + "step": 15481 + }, + { + "epoch": 0.645325330332208, + "grad_norm": 300.0, + "learning_rate": 2.9517127397389955e-05, + "loss": 13.8754, + "step": 15482 + }, + { + "epoch": 0.6453670126297362, + "grad_norm": 728.0, + "learning_rate": 2.951096995292691e-05, + "loss": 24.5002, + "step": 15483 + }, + { + "epoch": 0.6454086949272644, + "grad_norm": 320.0, + "learning_rate": 2.9504812881878262e-05, + "loss": 13.2501, + "step": 15484 + }, + { + "epoch": 0.6454503772247926, + "grad_norm": 238.0, + "learning_rate": 2.9498656184356215e-05, + "loss": 13.3752, + "step": 15485 + }, + { + "epoch": 0.6454920595223209, + "grad_norm": 310.0, + "learning_rate": 2.949249986047298e-05, + "loss": 13.6257, + "step": 15486 + }, + { + "epoch": 0.6455337418198491, + "grad_norm": 350.0, + "learning_rate": 2.9486343910340752e-05, + "loss": 13.8752, + "step": 15487 + }, + { + "epoch": 0.6455754241173773, + "grad_norm": 348.0, + "learning_rate": 2.948018833407174e-05, + "loss": 15.1258, + "step": 15488 + }, + { + "epoch": 0.6456171064149056, + "grad_norm": 320.0, + "learning_rate": 2.94740331317781e-05, + "loss": 13.7502, + "step": 15489 + }, + { + "epoch": 0.6456587887124339, + "grad_norm": 310.0, + "learning_rate": 2.946787830357205e-05, + "loss": 13.3755, + "step": 15490 + }, + { + "epoch": 0.6457004710099621, + "grad_norm": 496.0, + "learning_rate": 2.9461723849565714e-05, + "loss": 17.6259, + "step": 15491 + }, + { + "epoch": 0.6457421533074903, + "grad_norm": 354.0, + "learning_rate": 2.945556976987131e-05, + "loss": 13.8752, + "step": 15492 + }, + { + "epoch": 0.6457838356050185, + "grad_norm": 480.0, + "learning_rate": 2.9449416064600953e-05, + "loss": 15.188, + "step": 15493 + }, + { + "epoch": 0.6458255179025468, + "grad_norm": 780.0, + "learning_rate": 2.9443262733866827e-05, + "loss": 19.6293, + "step": 15494 + }, + { + "epoch": 0.645867200200075, + "grad_norm": 247.0, + "learning_rate": 2.9437109777781046e-05, + "loss": 12.0626, + "step": 15495 + }, + { + "epoch": 0.6459088824976033, + "grad_norm": 202.0, + "learning_rate": 2.9430957196455772e-05, + "loss": 9.5626, + "step": 15496 + }, + { + "epoch": 0.6459505647951315, + "grad_norm": 462.0, + "learning_rate": 2.942480499000313e-05, + "loss": 15.2502, + "step": 15497 + }, + { + "epoch": 0.6459922470926598, + "grad_norm": 764.0, + "learning_rate": 2.941865315853525e-05, + "loss": 21.3756, + "step": 15498 + }, + { + "epoch": 0.646033929390188, + "grad_norm": 1024.0, + "learning_rate": 2.941250170216422e-05, + "loss": 22.6283, + "step": 15499 + }, + { + "epoch": 0.6460756116877162, + "grad_norm": 472.0, + "learning_rate": 2.9406350621002205e-05, + "loss": 17.8759, + "step": 15500 + }, + { + "epoch": 0.6461172939852444, + "grad_norm": 206.0, + "learning_rate": 2.940019991516125e-05, + "loss": 11.7514, + "step": 15501 + }, + { + "epoch": 0.6461589762827727, + "grad_norm": 454.0, + "learning_rate": 2.939404958475351e-05, + "loss": 17.3753, + "step": 15502 + }, + { + "epoch": 0.646200658580301, + "grad_norm": 227.0, + "learning_rate": 2.938789962989102e-05, + "loss": 9.7527, + "step": 15503 + }, + { + "epoch": 0.6462423408778292, + "grad_norm": 298.0, + "learning_rate": 2.9381750050685906e-05, + "loss": 13.0003, + "step": 15504 + }, + { + "epoch": 0.6462840231753574, + "grad_norm": 864.0, + "learning_rate": 2.937560084725021e-05, + "loss": 21.6256, + "step": 15505 + }, + { + "epoch": 0.6463257054728857, + "grad_norm": 912.0, + "learning_rate": 2.9369452019696043e-05, + "loss": 24.0033, + "step": 15506 + }, + { + "epoch": 0.6463673877704139, + "grad_norm": 1704.0, + "learning_rate": 2.9363303568135425e-05, + "loss": 34.504, + "step": 15507 + }, + { + "epoch": 0.6464090700679421, + "grad_norm": 136.0, + "learning_rate": 2.9357155492680445e-05, + "loss": 10.9382, + "step": 15508 + }, + { + "epoch": 0.6464507523654703, + "grad_norm": 596.0, + "learning_rate": 2.935100779344312e-05, + "loss": 19.1253, + "step": 15509 + }, + { + "epoch": 0.6464924346629987, + "grad_norm": 139.0, + "learning_rate": 2.9344860470535528e-05, + "loss": 10.1254, + "step": 15510 + }, + { + "epoch": 0.6465341169605269, + "grad_norm": 280.0, + "learning_rate": 2.9338713524069672e-05, + "loss": 10.3755, + "step": 15511 + }, + { + "epoch": 0.6465757992580551, + "grad_norm": 308.0, + "learning_rate": 2.9332566954157615e-05, + "loss": 13.1252, + "step": 15512 + }, + { + "epoch": 0.6466174815555833, + "grad_norm": 107.5, + "learning_rate": 2.9326420760911342e-05, + "loss": 9.5005, + "step": 15513 + }, + { + "epoch": 0.6466591638531116, + "grad_norm": 324.0, + "learning_rate": 2.9320274944442905e-05, + "loss": 12.5003, + "step": 15514 + }, + { + "epoch": 0.6467008461506398, + "grad_norm": 1264.0, + "learning_rate": 2.9314129504864275e-05, + "loss": 29.6254, + "step": 15515 + }, + { + "epoch": 0.646742528448168, + "grad_norm": 270.0, + "learning_rate": 2.9307984442287484e-05, + "loss": 12.6254, + "step": 15516 + }, + { + "epoch": 0.6467842107456963, + "grad_norm": 516.0, + "learning_rate": 2.9301839756824502e-05, + "loss": 16.8759, + "step": 15517 + }, + { + "epoch": 0.6468258930432246, + "grad_norm": 266.0, + "learning_rate": 2.9295695448587346e-05, + "loss": 11.5635, + "step": 15518 + }, + { + "epoch": 0.6468675753407528, + "grad_norm": 160.0, + "learning_rate": 2.9289551517687958e-05, + "loss": 12.438, + "step": 15519 + }, + { + "epoch": 0.646909257638281, + "grad_norm": 596.0, + "learning_rate": 2.928340796423835e-05, + "loss": 20.0003, + "step": 15520 + }, + { + "epoch": 0.6469509399358092, + "grad_norm": 334.0, + "learning_rate": 2.9277264788350457e-05, + "loss": 14.6879, + "step": 15521 + }, + { + "epoch": 0.6469926222333375, + "grad_norm": 178.0, + "learning_rate": 2.9271121990136252e-05, + "loss": 11.8127, + "step": 15522 + }, + { + "epoch": 0.6470343045308657, + "grad_norm": 436.0, + "learning_rate": 2.9264979569707697e-05, + "loss": 16.5004, + "step": 15523 + }, + { + "epoch": 0.647075986828394, + "grad_norm": 1192.0, + "learning_rate": 2.925883752717673e-05, + "loss": 31.8752, + "step": 15524 + }, + { + "epoch": 0.6471176691259222, + "grad_norm": 300.0, + "learning_rate": 2.9252695862655276e-05, + "loss": 11.8752, + "step": 15525 + }, + { + "epoch": 0.6471593514234505, + "grad_norm": 324.0, + "learning_rate": 2.9246554576255304e-05, + "loss": 14.1254, + "step": 15526 + }, + { + "epoch": 0.6472010337209787, + "grad_norm": 127.5, + "learning_rate": 2.9240413668088697e-05, + "loss": 9.5015, + "step": 15527 + }, + { + "epoch": 0.6472427160185069, + "grad_norm": 142.0, + "learning_rate": 2.9234273138267414e-05, + "loss": 10.6253, + "step": 15528 + }, + { + "epoch": 0.6472843983160351, + "grad_norm": 1824.0, + "learning_rate": 2.9228132986903328e-05, + "loss": 38.0001, + "step": 15529 + }, + { + "epoch": 0.6473260806135634, + "grad_norm": 83.0, + "learning_rate": 2.9221993214108377e-05, + "loss": 9.1256, + "step": 15530 + }, + { + "epoch": 0.6473677629110917, + "grad_norm": 588.0, + "learning_rate": 2.9215853819994433e-05, + "loss": 18.5028, + "step": 15531 + }, + { + "epoch": 0.6474094452086199, + "grad_norm": 354.0, + "learning_rate": 2.920971480467341e-05, + "loss": 15.3753, + "step": 15532 + }, + { + "epoch": 0.6474511275061482, + "grad_norm": 179.0, + "learning_rate": 2.9203576168257163e-05, + "loss": 10.3791, + "step": 15533 + }, + { + "epoch": 0.6474928098036764, + "grad_norm": 156.0, + "learning_rate": 2.9197437910857595e-05, + "loss": 10.5008, + "step": 15534 + }, + { + "epoch": 0.6475344921012046, + "grad_norm": 54.0, + "learning_rate": 2.9191300032586567e-05, + "loss": 6.9066, + "step": 15535 + }, + { + "epoch": 0.6475761743987328, + "grad_norm": 458.0, + "learning_rate": 2.9185162533555966e-05, + "loss": 16.6252, + "step": 15536 + }, + { + "epoch": 0.6476178566962612, + "grad_norm": 244.0, + "learning_rate": 2.9179025413877582e-05, + "loss": 12.064, + "step": 15537 + }, + { + "epoch": 0.6476595389937894, + "grad_norm": 199.0, + "learning_rate": 2.9172888673663352e-05, + "loss": 12.0632, + "step": 15538 + }, + { + "epoch": 0.6477012212913176, + "grad_norm": 196.0, + "learning_rate": 2.9166752313025036e-05, + "loss": 11.3131, + "step": 15539 + }, + { + "epoch": 0.6477429035888458, + "grad_norm": 382.0, + "learning_rate": 2.9160616332074524e-05, + "loss": 13.8787, + "step": 15540 + }, + { + "epoch": 0.6477845858863741, + "grad_norm": 716.0, + "learning_rate": 2.9154480730923635e-05, + "loss": 21.5001, + "step": 15541 + }, + { + "epoch": 0.6478262681839023, + "grad_norm": 300.0, + "learning_rate": 2.9148345509684183e-05, + "loss": 13.6878, + "step": 15542 + }, + { + "epoch": 0.6478679504814305, + "grad_norm": 456.0, + "learning_rate": 2.9142210668467984e-05, + "loss": 16.2503, + "step": 15543 + }, + { + "epoch": 0.6479096327789587, + "grad_norm": 1568.0, + "learning_rate": 2.9136076207386847e-05, + "loss": 32.7551, + "step": 15544 + }, + { + "epoch": 0.6479513150764871, + "grad_norm": 262.0, + "learning_rate": 2.9129942126552572e-05, + "loss": 12.1251, + "step": 15545 + }, + { + "epoch": 0.6479929973740153, + "grad_norm": 1104.0, + "learning_rate": 2.912380842607696e-05, + "loss": 27.6259, + "step": 15546 + }, + { + "epoch": 0.6480346796715435, + "grad_norm": 209.0, + "learning_rate": 2.9117675106071784e-05, + "loss": 10.8756, + "step": 15547 + }, + { + "epoch": 0.6480763619690717, + "grad_norm": 274.0, + "learning_rate": 2.9111542166648842e-05, + "loss": 13.2504, + "step": 15548 + }, + { + "epoch": 0.6481180442666, + "grad_norm": 468.0, + "learning_rate": 2.9105409607919892e-05, + "loss": 16.8753, + "step": 15549 + }, + { + "epoch": 0.6481597265641282, + "grad_norm": 1160.0, + "learning_rate": 2.9099277429996707e-05, + "loss": 28.0006, + "step": 15550 + }, + { + "epoch": 0.6482014088616564, + "grad_norm": 250.0, + "learning_rate": 2.9093145632991053e-05, + "loss": 12.6257, + "step": 15551 + }, + { + "epoch": 0.6482430911591847, + "grad_norm": 868.0, + "learning_rate": 2.9087014217014675e-05, + "loss": 24.8753, + "step": 15552 + }, + { + "epoch": 0.648284773456713, + "grad_norm": 207.0, + "learning_rate": 2.908088318217931e-05, + "loss": 11.7504, + "step": 15553 + }, + { + "epoch": 0.6483264557542412, + "grad_norm": 1600.0, + "learning_rate": 2.9074752528596715e-05, + "loss": 41.2501, + "step": 15554 + }, + { + "epoch": 0.6483681380517694, + "grad_norm": 196.0, + "learning_rate": 2.9068622256378586e-05, + "loss": 11.7502, + "step": 15555 + }, + { + "epoch": 0.6484098203492976, + "grad_norm": 400.0, + "learning_rate": 2.9062492365636717e-05, + "loss": 14.7501, + "step": 15556 + }, + { + "epoch": 0.6484515026468259, + "grad_norm": 260.0, + "learning_rate": 2.9056362856482743e-05, + "loss": 13.3128, + "step": 15557 + }, + { + "epoch": 0.6484931849443542, + "grad_norm": 260.0, + "learning_rate": 2.9050233729028463e-05, + "loss": 13.1253, + "step": 15558 + }, + { + "epoch": 0.6485348672418824, + "grad_norm": 318.0, + "learning_rate": 2.9044104983385485e-05, + "loss": 13.2503, + "step": 15559 + }, + { + "epoch": 0.6485765495394106, + "grad_norm": 1048.0, + "learning_rate": 2.9037976619665597e-05, + "loss": 31.8753, + "step": 15560 + }, + { + "epoch": 0.6486182318369389, + "grad_norm": 91.0, + "learning_rate": 2.90318486379804e-05, + "loss": 8.5002, + "step": 15561 + }, + { + "epoch": 0.6486599141344671, + "grad_norm": 712.0, + "learning_rate": 2.9025721038441665e-05, + "loss": 17.6295, + "step": 15562 + }, + { + "epoch": 0.6487015964319953, + "grad_norm": 480.0, + "learning_rate": 2.9019593821160997e-05, + "loss": 16.5024, + "step": 15563 + }, + { + "epoch": 0.6487432787295235, + "grad_norm": 2048.0, + "learning_rate": 2.901346698625012e-05, + "loss": 40.0052, + "step": 15564 + }, + { + "epoch": 0.6487849610270519, + "grad_norm": 240.0, + "learning_rate": 2.9007340533820636e-05, + "loss": 11.1876, + "step": 15565 + }, + { + "epoch": 0.6488266433245801, + "grad_norm": 354.0, + "learning_rate": 2.9001214463984256e-05, + "loss": 14.1254, + "step": 15566 + }, + { + "epoch": 0.6488683256221083, + "grad_norm": 374.0, + "learning_rate": 2.899508877685261e-05, + "loss": 14.1254, + "step": 15567 + }, + { + "epoch": 0.6489100079196365, + "grad_norm": 306.0, + "learning_rate": 2.898896347253732e-05, + "loss": 10.8758, + "step": 15568 + }, + { + "epoch": 0.6489516902171648, + "grad_norm": 1004.0, + "learning_rate": 2.8982838551150048e-05, + "loss": 23.3798, + "step": 15569 + }, + { + "epoch": 0.648993372514693, + "grad_norm": 175.0, + "learning_rate": 2.897671401280241e-05, + "loss": 9.313, + "step": 15570 + }, + { + "epoch": 0.6490350548122212, + "grad_norm": 310.0, + "learning_rate": 2.8970589857606022e-05, + "loss": 14.1882, + "step": 15571 + }, + { + "epoch": 0.6490767371097494, + "grad_norm": 1096.0, + "learning_rate": 2.8964466085672504e-05, + "loss": 21.5045, + "step": 15572 + }, + { + "epoch": 0.6491184194072778, + "grad_norm": 836.0, + "learning_rate": 2.8958342697113454e-05, + "loss": 26.3753, + "step": 15573 + }, + { + "epoch": 0.649160101704806, + "grad_norm": 548.0, + "learning_rate": 2.8952219692040482e-05, + "loss": 18.8754, + "step": 15574 + }, + { + "epoch": 0.6492017840023342, + "grad_norm": 230.0, + "learning_rate": 2.894609707056517e-05, + "loss": 12.4378, + "step": 15575 + }, + { + "epoch": 0.6492434662998624, + "grad_norm": 1176.0, + "learning_rate": 2.893997483279911e-05, + "loss": 31.2509, + "step": 15576 + }, + { + "epoch": 0.6492851485973907, + "grad_norm": 388.0, + "learning_rate": 2.8933852978853876e-05, + "loss": 16.3754, + "step": 15577 + }, + { + "epoch": 0.6493268308949189, + "grad_norm": 300.0, + "learning_rate": 2.892773150884104e-05, + "loss": 13.3752, + "step": 15578 + }, + { + "epoch": 0.6493685131924471, + "grad_norm": 448.0, + "learning_rate": 2.8921610422872168e-05, + "loss": 15.438, + "step": 15579 + }, + { + "epoch": 0.6494101954899754, + "grad_norm": 184.0, + "learning_rate": 2.8915489721058824e-05, + "loss": 7.1569, + "step": 15580 + }, + { + "epoch": 0.6494518777875037, + "grad_norm": 159.0, + "learning_rate": 2.890936940351253e-05, + "loss": 12.0629, + "step": 15581 + }, + { + "epoch": 0.6494935600850319, + "grad_norm": 326.0, + "learning_rate": 2.8903249470344885e-05, + "loss": 14.6881, + "step": 15582 + }, + { + "epoch": 0.6495352423825601, + "grad_norm": 264.0, + "learning_rate": 2.8897129921667356e-05, + "loss": 12.7508, + "step": 15583 + }, + { + "epoch": 0.6495769246800883, + "grad_norm": 105.0, + "learning_rate": 2.8891010757591546e-05, + "loss": 8.876, + "step": 15584 + }, + { + "epoch": 0.6496186069776166, + "grad_norm": 384.0, + "learning_rate": 2.8884891978228902e-05, + "loss": 14.2522, + "step": 15585 + }, + { + "epoch": 0.6496602892751449, + "grad_norm": 604.0, + "learning_rate": 2.887877358369101e-05, + "loss": 19.1252, + "step": 15586 + }, + { + "epoch": 0.6497019715726731, + "grad_norm": 229.0, + "learning_rate": 2.8872655574089315e-05, + "loss": 12.4378, + "step": 15587 + }, + { + "epoch": 0.6497436538702013, + "grad_norm": 444.0, + "learning_rate": 2.8866537949535376e-05, + "loss": 17.7517, + "step": 15588 + }, + { + "epoch": 0.6497853361677296, + "grad_norm": 332.0, + "learning_rate": 2.8860420710140623e-05, + "loss": 14.0002, + "step": 15589 + }, + { + "epoch": 0.6498270184652578, + "grad_norm": 308.0, + "learning_rate": 2.8854303856016596e-05, + "loss": 13.1288, + "step": 15590 + }, + { + "epoch": 0.649868700762786, + "grad_norm": 198.0, + "learning_rate": 2.884818738727476e-05, + "loss": 12.6255, + "step": 15591 + }, + { + "epoch": 0.6499103830603142, + "grad_norm": 316.0, + "learning_rate": 2.8842071304026596e-05, + "loss": 14.0004, + "step": 15592 + }, + { + "epoch": 0.6499520653578426, + "grad_norm": 728.0, + "learning_rate": 2.8835955606383548e-05, + "loss": 18.6291, + "step": 15593 + }, + { + "epoch": 0.6499937476553708, + "grad_norm": 219.0, + "learning_rate": 2.8829840294457095e-05, + "loss": 11.0629, + "step": 15594 + }, + { + "epoch": 0.650035429952899, + "grad_norm": 194.0, + "learning_rate": 2.882372536835868e-05, + "loss": 7.5001, + "step": 15595 + }, + { + "epoch": 0.6500771122504272, + "grad_norm": 296.0, + "learning_rate": 2.881761082819975e-05, + "loss": 12.8129, + "step": 15596 + }, + { + "epoch": 0.6501187945479555, + "grad_norm": 482.0, + "learning_rate": 2.8811496674091743e-05, + "loss": 15.1251, + "step": 15597 + }, + { + "epoch": 0.6501604768454837, + "grad_norm": 452.0, + "learning_rate": 2.8805382906146094e-05, + "loss": 17.5004, + "step": 15598 + }, + { + "epoch": 0.6502021591430119, + "grad_norm": 144.0, + "learning_rate": 2.8799269524474214e-05, + "loss": 11.0003, + "step": 15599 + }, + { + "epoch": 0.6502438414405401, + "grad_norm": 516.0, + "learning_rate": 2.8793156529187537e-05, + "loss": 18.3752, + "step": 15600 + }, + { + "epoch": 0.6502855237380685, + "grad_norm": 206.0, + "learning_rate": 2.878704392039746e-05, + "loss": 10.6878, + "step": 15601 + }, + { + "epoch": 0.6503272060355967, + "grad_norm": 288.0, + "learning_rate": 2.8780931698215397e-05, + "loss": 12.6914, + "step": 15602 + }, + { + "epoch": 0.6503688883331249, + "grad_norm": 716.0, + "learning_rate": 2.8774819862752733e-05, + "loss": 21.1283, + "step": 15603 + }, + { + "epoch": 0.6504105706306531, + "grad_norm": 536.0, + "learning_rate": 2.876870841412086e-05, + "loss": 17.5003, + "step": 15604 + }, + { + "epoch": 0.6504522529281814, + "grad_norm": 748.0, + "learning_rate": 2.8762597352431165e-05, + "loss": 20.6285, + "step": 15605 + }, + { + "epoch": 0.6504939352257096, + "grad_norm": 864.0, + "learning_rate": 2.8756486677795012e-05, + "loss": 20.8796, + "step": 15606 + }, + { + "epoch": 0.6505356175232379, + "grad_norm": 336.0, + "learning_rate": 2.8750376390323762e-05, + "loss": 14.5033, + "step": 15607 + }, + { + "epoch": 0.6505772998207662, + "grad_norm": 120.0, + "learning_rate": 2.8744266490128824e-05, + "loss": 9.2504, + "step": 15608 + }, + { + "epoch": 0.6506189821182944, + "grad_norm": 226.0, + "learning_rate": 2.8738156977321473e-05, + "loss": 8.5004, + "step": 15609 + }, + { + "epoch": 0.6506606644158226, + "grad_norm": 376.0, + "learning_rate": 2.8732047852013146e-05, + "loss": 14.5629, + "step": 15610 + }, + { + "epoch": 0.6507023467133508, + "grad_norm": 318.0, + "learning_rate": 2.872593911431509e-05, + "loss": 13.8127, + "step": 15611 + }, + { + "epoch": 0.6507440290108791, + "grad_norm": 336.0, + "learning_rate": 2.8719830764338728e-05, + "loss": 14.8127, + "step": 15612 + }, + { + "epoch": 0.6507857113084073, + "grad_norm": 466.0, + "learning_rate": 2.8713722802195298e-05, + "loss": 16.7505, + "step": 15613 + }, + { + "epoch": 0.6508273936059356, + "grad_norm": 436.0, + "learning_rate": 2.8707615227996198e-05, + "loss": 15.1905, + "step": 15614 + }, + { + "epoch": 0.6508690759034638, + "grad_norm": 320.0, + "learning_rate": 2.8701508041852655e-05, + "loss": 13.6252, + "step": 15615 + }, + { + "epoch": 0.6509107582009921, + "grad_norm": 334.0, + "learning_rate": 2.8695401243876048e-05, + "loss": 14.5005, + "step": 15616 + }, + { + "epoch": 0.6509524404985203, + "grad_norm": 772.0, + "learning_rate": 2.8689294834177637e-05, + "loss": 22.3756, + "step": 15617 + }, + { + "epoch": 0.6509941227960485, + "grad_norm": 336.0, + "learning_rate": 2.868318881286872e-05, + "loss": 14.6882, + "step": 15618 + }, + { + "epoch": 0.6510358050935767, + "grad_norm": 636.0, + "learning_rate": 2.8677083180060587e-05, + "loss": 21.5003, + "step": 15619 + }, + { + "epoch": 0.651077487391105, + "grad_norm": 468.0, + "learning_rate": 2.8670977935864502e-05, + "loss": 15.879, + "step": 15620 + }, + { + "epoch": 0.6511191696886333, + "grad_norm": 964.0, + "learning_rate": 2.8664873080391734e-05, + "loss": 26.5003, + "step": 15621 + }, + { + "epoch": 0.6511608519861615, + "grad_norm": 418.0, + "learning_rate": 2.865876861375355e-05, + "loss": 14.9377, + "step": 15622 + }, + { + "epoch": 0.6512025342836897, + "grad_norm": 256.0, + "learning_rate": 2.8652664536061202e-05, + "loss": 12.6252, + "step": 15623 + }, + { + "epoch": 0.651244216581218, + "grad_norm": 123.5, + "learning_rate": 2.8646560847425942e-05, + "loss": 5.9065, + "step": 15624 + }, + { + "epoch": 0.6512858988787462, + "grad_norm": 356.0, + "learning_rate": 2.8640457547959004e-05, + "loss": 13.9378, + "step": 15625 + }, + { + "epoch": 0.6513275811762744, + "grad_norm": 430.0, + "learning_rate": 2.863435463777162e-05, + "loss": 16.5007, + "step": 15626 + }, + { + "epoch": 0.6513692634738026, + "grad_norm": 692.0, + "learning_rate": 2.8628252116975023e-05, + "loss": 23.2514, + "step": 15627 + }, + { + "epoch": 0.651410945771331, + "grad_norm": 506.0, + "learning_rate": 2.862214998568043e-05, + "loss": 17.6261, + "step": 15628 + }, + { + "epoch": 0.6514526280688592, + "grad_norm": 262.0, + "learning_rate": 2.8616048243999048e-05, + "loss": 12.3752, + "step": 15629 + }, + { + "epoch": 0.6514943103663874, + "grad_norm": 520.0, + "learning_rate": 2.8609946892042084e-05, + "loss": 16.5004, + "step": 15630 + }, + { + "epoch": 0.6515359926639156, + "grad_norm": 95.0, + "learning_rate": 2.860384592992072e-05, + "loss": 7.6255, + "step": 15631 + }, + { + "epoch": 0.6515776749614439, + "grad_norm": 322.0, + "learning_rate": 2.8597745357746197e-05, + "loss": 11.1897, + "step": 15632 + }, + { + "epoch": 0.6516193572589721, + "grad_norm": 338.0, + "learning_rate": 2.8591645175629634e-05, + "loss": 14.0643, + "step": 15633 + }, + { + "epoch": 0.6516610395565003, + "grad_norm": 139.0, + "learning_rate": 2.858554538368228e-05, + "loss": 8.5001, + "step": 15634 + }, + { + "epoch": 0.6517027218540286, + "grad_norm": 139.0, + "learning_rate": 2.8579445982015206e-05, + "loss": 9.8756, + "step": 15635 + }, + { + "epoch": 0.6517444041515569, + "grad_norm": 253.0, + "learning_rate": 2.8573346970739686e-05, + "loss": 13.5002, + "step": 15636 + }, + { + "epoch": 0.6517860864490851, + "grad_norm": 488.0, + "learning_rate": 2.8567248349966768e-05, + "loss": 15.7515, + "step": 15637 + }, + { + "epoch": 0.6518277687466133, + "grad_norm": 352.0, + "learning_rate": 2.85611501198077e-05, + "loss": 14.2502, + "step": 15638 + }, + { + "epoch": 0.6518694510441415, + "grad_norm": 416.0, + "learning_rate": 2.8555052280373536e-05, + "loss": 16.0002, + "step": 15639 + }, + { + "epoch": 0.6519111333416698, + "grad_norm": 334.0, + "learning_rate": 2.8548954831775483e-05, + "loss": 14.0629, + "step": 15640 + }, + { + "epoch": 0.651952815639198, + "grad_norm": 426.0, + "learning_rate": 2.8542857774124593e-05, + "loss": 16.2502, + "step": 15641 + }, + { + "epoch": 0.6519944979367263, + "grad_norm": 1240.0, + "learning_rate": 2.8536761107532047e-05, + "loss": 28.5048, + "step": 15642 + }, + { + "epoch": 0.6520361802342545, + "grad_norm": 482.0, + "learning_rate": 2.8530664832108934e-05, + "loss": 17.5036, + "step": 15643 + }, + { + "epoch": 0.6520778625317828, + "grad_norm": 720.0, + "learning_rate": 2.8524568947966356e-05, + "loss": 21.5003, + "step": 15644 + }, + { + "epoch": 0.652119544829311, + "grad_norm": 394.0, + "learning_rate": 2.8518473455215422e-05, + "loss": 15.5001, + "step": 15645 + }, + { + "epoch": 0.6521612271268392, + "grad_norm": 163.0, + "learning_rate": 2.8512378353967206e-05, + "loss": 9.0002, + "step": 15646 + }, + { + "epoch": 0.6522029094243674, + "grad_norm": 556.0, + "learning_rate": 2.8506283644332808e-05, + "loss": 18.2503, + "step": 15647 + }, + { + "epoch": 0.6522445917218958, + "grad_norm": 804.0, + "learning_rate": 2.8500189326423305e-05, + "loss": 21.5005, + "step": 15648 + }, + { + "epoch": 0.652286274019424, + "grad_norm": 316.0, + "learning_rate": 2.849409540034975e-05, + "loss": 12.6255, + "step": 15649 + }, + { + "epoch": 0.6523279563169522, + "grad_norm": 254.0, + "learning_rate": 2.8488001866223222e-05, + "loss": 12.6254, + "step": 15650 + }, + { + "epoch": 0.6523696386144804, + "grad_norm": 368.0, + "learning_rate": 2.848190872415477e-05, + "loss": 12.9378, + "step": 15651 + }, + { + "epoch": 0.6524113209120087, + "grad_norm": 286.0, + "learning_rate": 2.8475815974255443e-05, + "loss": 13.8752, + "step": 15652 + }, + { + "epoch": 0.6524530032095369, + "grad_norm": 156.0, + "learning_rate": 2.8469723616636278e-05, + "loss": 9.0627, + "step": 15653 + }, + { + "epoch": 0.6524946855070651, + "grad_norm": 628.0, + "learning_rate": 2.8463631651408307e-05, + "loss": 20.1266, + "step": 15654 + }, + { + "epoch": 0.6525363678045933, + "grad_norm": 348.0, + "learning_rate": 2.8457540078682567e-05, + "loss": 14.6251, + "step": 15655 + }, + { + "epoch": 0.6525780501021217, + "grad_norm": 312.0, + "learning_rate": 2.8451448898570065e-05, + "loss": 14.5629, + "step": 15656 + }, + { + "epoch": 0.6526197323996499, + "grad_norm": 146.0, + "learning_rate": 2.8445358111181808e-05, + "loss": 8.6878, + "step": 15657 + }, + { + "epoch": 0.6526614146971781, + "grad_norm": 308.0, + "learning_rate": 2.8439267716628847e-05, + "loss": 13.3752, + "step": 15658 + }, + { + "epoch": 0.6527030969947063, + "grad_norm": 231.0, + "learning_rate": 2.8433177715022108e-05, + "loss": 12.6256, + "step": 15659 + }, + { + "epoch": 0.6527447792922346, + "grad_norm": 354.0, + "learning_rate": 2.8427088106472666e-05, + "loss": 13.0003, + "step": 15660 + }, + { + "epoch": 0.6527864615897628, + "grad_norm": 386.0, + "learning_rate": 2.842099889109141e-05, + "loss": 12.8153, + "step": 15661 + }, + { + "epoch": 0.652828143887291, + "grad_norm": 668.0, + "learning_rate": 2.8414910068989402e-05, + "loss": 22.6252, + "step": 15662 + }, + { + "epoch": 0.6528698261848193, + "grad_norm": 306.0, + "learning_rate": 2.840882164027754e-05, + "loss": 13.6878, + "step": 15663 + }, + { + "epoch": 0.6529115084823476, + "grad_norm": 300.0, + "learning_rate": 2.840273360506686e-05, + "loss": 13.0004, + "step": 15664 + }, + { + "epoch": 0.6529531907798758, + "grad_norm": 390.0, + "learning_rate": 2.839664596346824e-05, + "loss": 14.9376, + "step": 15665 + }, + { + "epoch": 0.652994873077404, + "grad_norm": 204.0, + "learning_rate": 2.8390558715592676e-05, + "loss": 11.5005, + "step": 15666 + }, + { + "epoch": 0.6530365553749322, + "grad_norm": 452.0, + "learning_rate": 2.838447186155111e-05, + "loss": 15.313, + "step": 15667 + }, + { + "epoch": 0.6530782376724605, + "grad_norm": 190.0, + "learning_rate": 2.8378385401454455e-05, + "loss": 10.8757, + "step": 15668 + }, + { + "epoch": 0.6531199199699887, + "grad_norm": 1120.0, + "learning_rate": 2.8372299335413643e-05, + "loss": 25.0005, + "step": 15669 + }, + { + "epoch": 0.653161602267517, + "grad_norm": 1104.0, + "learning_rate": 2.83662136635396e-05, + "loss": 27.5048, + "step": 15670 + }, + { + "epoch": 0.6532032845650452, + "grad_norm": 664.0, + "learning_rate": 2.836012838594323e-05, + "loss": 20.0004, + "step": 15671 + }, + { + "epoch": 0.6532449668625735, + "grad_norm": 1464.0, + "learning_rate": 2.835404350273545e-05, + "loss": 29.5057, + "step": 15672 + }, + { + "epoch": 0.6532866491601017, + "grad_norm": 166.0, + "learning_rate": 2.8347959014027138e-05, + "loss": 10.7524, + "step": 15673 + }, + { + "epoch": 0.6533283314576299, + "grad_norm": 272.0, + "learning_rate": 2.8341874919929202e-05, + "loss": 13.3753, + "step": 15674 + }, + { + "epoch": 0.6533700137551581, + "grad_norm": 322.0, + "learning_rate": 2.8335791220552515e-05, + "loss": 11.5645, + "step": 15675 + }, + { + "epoch": 0.6534116960526865, + "grad_norm": 1728.0, + "learning_rate": 2.8329707916007958e-05, + "loss": 35.2502, + "step": 15676 + }, + { + "epoch": 0.6534533783502147, + "grad_norm": 142.0, + "learning_rate": 2.8323625006406394e-05, + "loss": 9.5627, + "step": 15677 + }, + { + "epoch": 0.6534950606477429, + "grad_norm": 1488.0, + "learning_rate": 2.8317542491858696e-05, + "loss": 32.0002, + "step": 15678 + }, + { + "epoch": 0.6535367429452712, + "grad_norm": 60.0, + "learning_rate": 2.8311460372475707e-05, + "loss": 7.6571, + "step": 15679 + }, + { + "epoch": 0.6535784252427994, + "grad_norm": 143.0, + "learning_rate": 2.8305378648368276e-05, + "loss": 9.0001, + "step": 15680 + }, + { + "epoch": 0.6536201075403276, + "grad_norm": 434.0, + "learning_rate": 2.829929731964723e-05, + "loss": 16.1253, + "step": 15681 + }, + { + "epoch": 0.6536617898378558, + "grad_norm": 200.0, + "learning_rate": 2.829321638642345e-05, + "loss": 10.0006, + "step": 15682 + }, + { + "epoch": 0.6537034721353842, + "grad_norm": 374.0, + "learning_rate": 2.8287135848807693e-05, + "loss": 14.0626, + "step": 15683 + }, + { + "epoch": 0.6537451544329124, + "grad_norm": 134.0, + "learning_rate": 2.828105570691085e-05, + "loss": 7.0318, + "step": 15684 + }, + { + "epoch": 0.6537868367304406, + "grad_norm": 107.5, + "learning_rate": 2.8274975960843663e-05, + "loss": 8.5627, + "step": 15685 + }, + { + "epoch": 0.6538285190279688, + "grad_norm": 262.0, + "learning_rate": 2.8268896610716998e-05, + "loss": 13.9378, + "step": 15686 + }, + { + "epoch": 0.6538702013254971, + "grad_norm": 498.0, + "learning_rate": 2.8262817656641584e-05, + "loss": 17.6253, + "step": 15687 + }, + { + "epoch": 0.6539118836230253, + "grad_norm": 828.0, + "learning_rate": 2.825673909872829e-05, + "loss": 19.2538, + "step": 15688 + }, + { + "epoch": 0.6539535659205535, + "grad_norm": 462.0, + "learning_rate": 2.8250660937087814e-05, + "loss": 17.7502, + "step": 15689 + }, + { + "epoch": 0.6539952482180817, + "grad_norm": 81.0, + "learning_rate": 2.8244583171831012e-05, + "loss": 5.5626, + "step": 15690 + }, + { + "epoch": 0.6540369305156101, + "grad_norm": 776.0, + "learning_rate": 2.8238505803068577e-05, + "loss": 19.3797, + "step": 15691 + }, + { + "epoch": 0.6540786128131383, + "grad_norm": 308.0, + "learning_rate": 2.8232428830911323e-05, + "loss": 11.8751, + "step": 15692 + }, + { + "epoch": 0.6541202951106665, + "grad_norm": 226.0, + "learning_rate": 2.8226352255469985e-05, + "loss": 12.1252, + "step": 15693 + }, + { + "epoch": 0.6541619774081947, + "grad_norm": 600.0, + "learning_rate": 2.8220276076855313e-05, + "loss": 18.3753, + "step": 15694 + }, + { + "epoch": 0.654203659705723, + "grad_norm": 1272.0, + "learning_rate": 2.8214200295178038e-05, + "loss": 28.627, + "step": 15695 + }, + { + "epoch": 0.6542453420032512, + "grad_norm": 544.0, + "learning_rate": 2.82081249105489e-05, + "loss": 19.0004, + "step": 15696 + }, + { + "epoch": 0.6542870243007795, + "grad_norm": 300.0, + "learning_rate": 2.8202049923078623e-05, + "loss": 13.0027, + "step": 15697 + }, + { + "epoch": 0.6543287065983077, + "grad_norm": 77.5, + "learning_rate": 2.8195975332877915e-05, + "loss": 6.3751, + "step": 15698 + }, + { + "epoch": 0.654370388895836, + "grad_norm": 336.0, + "learning_rate": 2.8189901140057496e-05, + "loss": 15.1878, + "step": 15699 + }, + { + "epoch": 0.6544120711933642, + "grad_norm": 204.0, + "learning_rate": 2.8183827344728064e-05, + "loss": 10.8129, + "step": 15700 + }, + { + "epoch": 0.6544537534908924, + "grad_norm": 372.0, + "learning_rate": 2.817775394700032e-05, + "loss": 14.9378, + "step": 15701 + }, + { + "epoch": 0.6544954357884206, + "grad_norm": 458.0, + "learning_rate": 2.8171680946984934e-05, + "loss": 15.3752, + "step": 15702 + }, + { + "epoch": 0.6545371180859489, + "grad_norm": 228.0, + "learning_rate": 2.8165608344792617e-05, + "loss": 12.9379, + "step": 15703 + }, + { + "epoch": 0.6545788003834772, + "grad_norm": 488.0, + "learning_rate": 2.8159536140534017e-05, + "loss": 15.128, + "step": 15704 + }, + { + "epoch": 0.6546204826810054, + "grad_norm": 512.0, + "learning_rate": 2.8153464334319814e-05, + "loss": 17.6253, + "step": 15705 + }, + { + "epoch": 0.6546621649785336, + "grad_norm": 448.0, + "learning_rate": 2.814739292626065e-05, + "loss": 16.5004, + "step": 15706 + }, + { + "epoch": 0.6547038472760619, + "grad_norm": 924.0, + "learning_rate": 2.814132191646719e-05, + "loss": 19.5052, + "step": 15707 + }, + { + "epoch": 0.6547455295735901, + "grad_norm": 724.0, + "learning_rate": 2.8135251305050104e-05, + "loss": 21.7504, + "step": 15708 + }, + { + "epoch": 0.6547872118711183, + "grad_norm": 460.0, + "learning_rate": 2.8129181092119972e-05, + "loss": 16.1252, + "step": 15709 + }, + { + "epoch": 0.6548288941686465, + "grad_norm": 432.0, + "learning_rate": 2.812311127778749e-05, + "loss": 16.2531, + "step": 15710 + }, + { + "epoch": 0.6548705764661749, + "grad_norm": 138.0, + "learning_rate": 2.8117041862163206e-05, + "loss": 9.3757, + "step": 15711 + }, + { + "epoch": 0.6549122587637031, + "grad_norm": 114.5, + "learning_rate": 2.811097284535782e-05, + "loss": 8.8753, + "step": 15712 + }, + { + "epoch": 0.6549539410612313, + "grad_norm": 540.0, + "learning_rate": 2.810490422748186e-05, + "loss": 18.7504, + "step": 15713 + }, + { + "epoch": 0.6549956233587595, + "grad_norm": 141.0, + "learning_rate": 2.8098836008646002e-05, + "loss": 10.5002, + "step": 15714 + }, + { + "epoch": 0.6550373056562878, + "grad_norm": 458.0, + "learning_rate": 2.809276818896076e-05, + "loss": 16.3764, + "step": 15715 + }, + { + "epoch": 0.655078987953816, + "grad_norm": 406.0, + "learning_rate": 2.8086700768536788e-05, + "loss": 14.3127, + "step": 15716 + }, + { + "epoch": 0.6551206702513442, + "grad_norm": 604.0, + "learning_rate": 2.808063374748463e-05, + "loss": 19.8752, + "step": 15717 + }, + { + "epoch": 0.6551623525488725, + "grad_norm": 664.0, + "learning_rate": 2.8074567125914873e-05, + "loss": 19.7508, + "step": 15718 + }, + { + "epoch": 0.6552040348464008, + "grad_norm": 540.0, + "learning_rate": 2.8068500903938078e-05, + "loss": 17.5003, + "step": 15719 + }, + { + "epoch": 0.655245717143929, + "grad_norm": 113.0, + "learning_rate": 2.80624350816648e-05, + "loss": 9.5004, + "step": 15720 + }, + { + "epoch": 0.6552873994414572, + "grad_norm": 780.0, + "learning_rate": 2.8056369659205593e-05, + "loss": 22.5009, + "step": 15721 + }, + { + "epoch": 0.6553290817389854, + "grad_norm": 154.0, + "learning_rate": 2.8050304636670992e-05, + "loss": 9.1252, + "step": 15722 + }, + { + "epoch": 0.6553707640365137, + "grad_norm": 126.5, + "learning_rate": 2.804424001417154e-05, + "loss": 9.5629, + "step": 15723 + }, + { + "epoch": 0.6554124463340419, + "grad_norm": 205.0, + "learning_rate": 2.8038175791817767e-05, + "loss": 11.627, + "step": 15724 + }, + { + "epoch": 0.6554541286315702, + "grad_norm": 104.5, + "learning_rate": 2.8032111969720183e-05, + "loss": 8.5633, + "step": 15725 + }, + { + "epoch": 0.6554958109290984, + "grad_norm": 584.0, + "learning_rate": 2.8026048547989307e-05, + "loss": 16.1254, + "step": 15726 + }, + { + "epoch": 0.6555374932266267, + "grad_norm": 107.5, + "learning_rate": 2.8019985526735653e-05, + "loss": 9.3129, + "step": 15727 + }, + { + "epoch": 0.6555791755241549, + "grad_norm": 374.0, + "learning_rate": 2.801392290606971e-05, + "loss": 14.5007, + "step": 15728 + }, + { + "epoch": 0.6556208578216831, + "grad_norm": 258.0, + "learning_rate": 2.8007860686101973e-05, + "loss": 11.4406, + "step": 15729 + }, + { + "epoch": 0.6556625401192113, + "grad_norm": 154.0, + "learning_rate": 2.800179886694293e-05, + "loss": 10.5008, + "step": 15730 + }, + { + "epoch": 0.6557042224167396, + "grad_norm": 232.0, + "learning_rate": 2.7995737448703057e-05, + "loss": 13.0629, + "step": 15731 + }, + { + "epoch": 0.6557459047142679, + "grad_norm": 266.0, + "learning_rate": 2.7989676431492822e-05, + "loss": 12.192, + "step": 15732 + }, + { + "epoch": 0.6557875870117961, + "grad_norm": 292.0, + "learning_rate": 2.798361581542267e-05, + "loss": 13.8127, + "step": 15733 + }, + { + "epoch": 0.6558292693093243, + "grad_norm": 362.0, + "learning_rate": 2.7977555600603106e-05, + "loss": 15.0627, + "step": 15734 + }, + { + "epoch": 0.6558709516068526, + "grad_norm": 532.0, + "learning_rate": 2.7971495787144507e-05, + "loss": 18.0033, + "step": 15735 + }, + { + "epoch": 0.6559126339043808, + "grad_norm": 294.0, + "learning_rate": 2.79654363751574e-05, + "loss": 13.6877, + "step": 15736 + }, + { + "epoch": 0.655954316201909, + "grad_norm": 304.0, + "learning_rate": 2.7959377364752125e-05, + "loss": 14.2506, + "step": 15737 + }, + { + "epoch": 0.6559959984994372, + "grad_norm": 135.0, + "learning_rate": 2.7953318756039204e-05, + "loss": 9.5003, + "step": 15738 + }, + { + "epoch": 0.6560376807969656, + "grad_norm": 322.0, + "learning_rate": 2.7947260549128956e-05, + "loss": 12.2502, + "step": 15739 + }, + { + "epoch": 0.6560793630944938, + "grad_norm": 123.0, + "learning_rate": 2.7941202744131883e-05, + "loss": 10.3755, + "step": 15740 + }, + { + "epoch": 0.656121045392022, + "grad_norm": 260.0, + "learning_rate": 2.7935145341158308e-05, + "loss": 10.9379, + "step": 15741 + }, + { + "epoch": 0.6561627276895502, + "grad_norm": 151.0, + "learning_rate": 2.7929088340318692e-05, + "loss": 10.5629, + "step": 15742 + }, + { + "epoch": 0.6562044099870785, + "grad_norm": 220.0, + "learning_rate": 2.79230317417234e-05, + "loss": 6.0006, + "step": 15743 + }, + { + "epoch": 0.6562460922846067, + "grad_norm": 227.0, + "learning_rate": 2.7916975545482817e-05, + "loss": 12.6877, + "step": 15744 + }, + { + "epoch": 0.6562877745821349, + "grad_norm": 684.0, + "learning_rate": 2.791091975170731e-05, + "loss": 22.0004, + "step": 15745 + }, + { + "epoch": 0.6563294568796632, + "grad_norm": 482.0, + "learning_rate": 2.790486436050725e-05, + "loss": 16.3754, + "step": 15746 + }, + { + "epoch": 0.6563711391771915, + "grad_norm": 700.0, + "learning_rate": 2.7898809371992997e-05, + "loss": 18.8756, + "step": 15747 + }, + { + "epoch": 0.6564128214747197, + "grad_norm": 206.0, + "learning_rate": 2.789275478627491e-05, + "loss": 11.3128, + "step": 15748 + }, + { + "epoch": 0.6564545037722479, + "grad_norm": 560.0, + "learning_rate": 2.788670060346333e-05, + "loss": 18.2515, + "step": 15749 + }, + { + "epoch": 0.6564961860697761, + "grad_norm": 376.0, + "learning_rate": 2.7880646823668588e-05, + "loss": 15.7504, + "step": 15750 + }, + { + "epoch": 0.6565378683673044, + "grad_norm": 278.0, + "learning_rate": 2.7874593447001028e-05, + "loss": 10.4378, + "step": 15751 + }, + { + "epoch": 0.6565795506648326, + "grad_norm": 140.0, + "learning_rate": 2.786854047357097e-05, + "loss": 7.6252, + "step": 15752 + }, + { + "epoch": 0.6566212329623609, + "grad_norm": 352.0, + "learning_rate": 2.786248790348872e-05, + "loss": 14.8752, + "step": 15753 + }, + { + "epoch": 0.6566629152598892, + "grad_norm": 564.0, + "learning_rate": 2.7856435736864595e-05, + "loss": 18.6252, + "step": 15754 + }, + { + "epoch": 0.6567045975574174, + "grad_norm": 70.5, + "learning_rate": 2.785038397380889e-05, + "loss": 9.0006, + "step": 15755 + }, + { + "epoch": 0.6567462798549456, + "grad_norm": 200.0, + "learning_rate": 2.784433261443191e-05, + "loss": 12.2503, + "step": 15756 + }, + { + "epoch": 0.6567879621524738, + "grad_norm": 127.5, + "learning_rate": 2.7838281658843913e-05, + "loss": 9.5005, + "step": 15757 + }, + { + "epoch": 0.6568296444500021, + "grad_norm": 544.0, + "learning_rate": 2.7832231107155237e-05, + "loss": 17.6252, + "step": 15758 + }, + { + "epoch": 0.6568713267475303, + "grad_norm": 486.0, + "learning_rate": 2.782618095947608e-05, + "loss": 17.0002, + "step": 15759 + }, + { + "epoch": 0.6569130090450586, + "grad_norm": 192.0, + "learning_rate": 2.782013121591678e-05, + "loss": 11.814, + "step": 15760 + }, + { + "epoch": 0.6569546913425868, + "grad_norm": 688.0, + "learning_rate": 2.7814081876587523e-05, + "loss": 22.2516, + "step": 15761 + }, + { + "epoch": 0.6569963736401151, + "grad_norm": 226.0, + "learning_rate": 2.780803294159863e-05, + "loss": 12.6253, + "step": 15762 + }, + { + "epoch": 0.6570380559376433, + "grad_norm": 227.0, + "learning_rate": 2.7801984411060267e-05, + "loss": 11.0003, + "step": 15763 + }, + { + "epoch": 0.6570797382351715, + "grad_norm": 376.0, + "learning_rate": 2.7795936285082746e-05, + "loss": 12.5628, + "step": 15764 + }, + { + "epoch": 0.6571214205326997, + "grad_norm": 153.0, + "learning_rate": 2.7789888563776222e-05, + "loss": 10.1877, + "step": 15765 + }, + { + "epoch": 0.657163102830228, + "grad_norm": 920.0, + "learning_rate": 2.7783841247250986e-05, + "loss": 26.2503, + "step": 15766 + }, + { + "epoch": 0.6572047851277563, + "grad_norm": 340.0, + "learning_rate": 2.777779433561718e-05, + "loss": 14.3754, + "step": 15767 + }, + { + "epoch": 0.6572464674252845, + "grad_norm": 67.5, + "learning_rate": 2.7771747828985063e-05, + "loss": 7.0636, + "step": 15768 + }, + { + "epoch": 0.6572881497228127, + "grad_norm": 502.0, + "learning_rate": 2.7765701727464814e-05, + "loss": 17.6252, + "step": 15769 + }, + { + "epoch": 0.657329832020341, + "grad_norm": 466.0, + "learning_rate": 2.7759656031166626e-05, + "loss": 17.1253, + "step": 15770 + }, + { + "epoch": 0.6573715143178692, + "grad_norm": 211.0, + "learning_rate": 2.7753610740200685e-05, + "loss": 11.8756, + "step": 15771 + }, + { + "epoch": 0.6574131966153974, + "grad_norm": 226.0, + "learning_rate": 2.774756585467716e-05, + "loss": 10.3127, + "step": 15772 + }, + { + "epoch": 0.6574548789129256, + "grad_norm": 250.0, + "learning_rate": 2.774152137470622e-05, + "loss": 13.8133, + "step": 15773 + }, + { + "epoch": 0.657496561210454, + "grad_norm": 502.0, + "learning_rate": 2.773547730039803e-05, + "loss": 17.3772, + "step": 15774 + }, + { + "epoch": 0.6575382435079822, + "grad_norm": 233.0, + "learning_rate": 2.7729433631862746e-05, + "loss": 10.5628, + "step": 15775 + }, + { + "epoch": 0.6575799258055104, + "grad_norm": 306.0, + "learning_rate": 2.7723390369210507e-05, + "loss": 12.6877, + "step": 15776 + }, + { + "epoch": 0.6576216081030386, + "grad_norm": 456.0, + "learning_rate": 2.7717347512551463e-05, + "loss": 14.8755, + "step": 15777 + }, + { + "epoch": 0.6576632904005669, + "grad_norm": 828.0, + "learning_rate": 2.7711305061995728e-05, + "loss": 18.3793, + "step": 15778 + }, + { + "epoch": 0.6577049726980951, + "grad_norm": 136.0, + "learning_rate": 2.7705263017653443e-05, + "loss": 10.688, + "step": 15779 + }, + { + "epoch": 0.6577466549956233, + "grad_norm": 664.0, + "learning_rate": 2.769922137963472e-05, + "loss": 18.8767, + "step": 15780 + }, + { + "epoch": 0.6577883372931516, + "grad_norm": 424.0, + "learning_rate": 2.7693180148049663e-05, + "loss": 16.5006, + "step": 15781 + }, + { + "epoch": 0.6578300195906799, + "grad_norm": 156.0, + "learning_rate": 2.7687139323008382e-05, + "loss": 11.1269, + "step": 15782 + }, + { + "epoch": 0.6578717018882081, + "grad_norm": 356.0, + "learning_rate": 2.7681098904620938e-05, + "loss": 14.3128, + "step": 15783 + }, + { + "epoch": 0.6579133841857363, + "grad_norm": 125.0, + "learning_rate": 2.7675058892997496e-05, + "loss": 9.6253, + "step": 15784 + }, + { + "epoch": 0.6579550664832645, + "grad_norm": 276.0, + "learning_rate": 2.7669019288248054e-05, + "loss": 12.6257, + "step": 15785 + }, + { + "epoch": 0.6579967487807928, + "grad_norm": 260.0, + "learning_rate": 2.7662980090482747e-05, + "loss": 10.9377, + "step": 15786 + }, + { + "epoch": 0.658038431078321, + "grad_norm": 366.0, + "learning_rate": 2.765694129981158e-05, + "loss": 14.3759, + "step": 15787 + }, + { + "epoch": 0.6580801133758493, + "grad_norm": 108.5, + "learning_rate": 2.7650902916344666e-05, + "loss": 9.5629, + "step": 15788 + }, + { + "epoch": 0.6581217956733775, + "grad_norm": 494.0, + "learning_rate": 2.7644864940192004e-05, + "loss": 17.8752, + "step": 15789 + }, + { + "epoch": 0.6581634779709058, + "grad_norm": 660.0, + "learning_rate": 2.7638827371463698e-05, + "loss": 16.5038, + "step": 15790 + }, + { + "epoch": 0.658205160268434, + "grad_norm": 258.0, + "learning_rate": 2.763279021026971e-05, + "loss": 11.6253, + "step": 15791 + }, + { + "epoch": 0.6582468425659622, + "grad_norm": 322.0, + "learning_rate": 2.7626753456720127e-05, + "loss": 13.9386, + "step": 15792 + }, + { + "epoch": 0.6582885248634904, + "grad_norm": 360.0, + "learning_rate": 2.7620717110924942e-05, + "loss": 15.1251, + "step": 15793 + }, + { + "epoch": 0.6583302071610188, + "grad_norm": 452.0, + "learning_rate": 2.7614681172994184e-05, + "loss": 16.1278, + "step": 15794 + }, + { + "epoch": 0.658371889458547, + "grad_norm": 1200.0, + "learning_rate": 2.7608645643037844e-05, + "loss": 29.5002, + "step": 15795 + }, + { + "epoch": 0.6584135717560752, + "grad_norm": 880.0, + "learning_rate": 2.7602610521165928e-05, + "loss": 20.8817, + "step": 15796 + }, + { + "epoch": 0.6584552540536034, + "grad_norm": 189.0, + "learning_rate": 2.759657580748842e-05, + "loss": 8.1254, + "step": 15797 + }, + { + "epoch": 0.6584969363511317, + "grad_norm": 262.0, + "learning_rate": 2.759054150211531e-05, + "loss": 12.0628, + "step": 15798 + }, + { + "epoch": 0.6585386186486599, + "grad_norm": 404.0, + "learning_rate": 2.7584507605156562e-05, + "loss": 15.5003, + "step": 15799 + }, + { + "epoch": 0.6585803009461881, + "grad_norm": 328.0, + "learning_rate": 2.757847411672216e-05, + "loss": 12.6883, + "step": 15800 + }, + { + "epoch": 0.6586219832437163, + "grad_norm": 448.0, + "learning_rate": 2.7572441036922054e-05, + "loss": 16.7501, + "step": 15801 + }, + { + "epoch": 0.6586636655412447, + "grad_norm": 338.0, + "learning_rate": 2.7566408365866204e-05, + "loss": 14.3764, + "step": 15802 + }, + { + "epoch": 0.6587053478387729, + "grad_norm": 288.0, + "learning_rate": 2.7560376103664553e-05, + "loss": 13.3757, + "step": 15803 + }, + { + "epoch": 0.6587470301363011, + "grad_norm": 572.0, + "learning_rate": 2.7554344250427034e-05, + "loss": 21.1255, + "step": 15804 + }, + { + "epoch": 0.6587887124338293, + "grad_norm": 272.0, + "learning_rate": 2.7548312806263586e-05, + "loss": 12.3761, + "step": 15805 + }, + { + "epoch": 0.6588303947313576, + "grad_norm": 804.0, + "learning_rate": 2.754228177128414e-05, + "loss": 23.6252, + "step": 15806 + }, + { + "epoch": 0.6588720770288858, + "grad_norm": 176.0, + "learning_rate": 2.753625114559857e-05, + "loss": 7.0009, + "step": 15807 + }, + { + "epoch": 0.658913759326414, + "grad_norm": 201.0, + "learning_rate": 2.7530220929316863e-05, + "loss": 11.6254, + "step": 15808 + }, + { + "epoch": 0.6589554416239423, + "grad_norm": 374.0, + "learning_rate": 2.7524191122548837e-05, + "loss": 15.4377, + "step": 15809 + }, + { + "epoch": 0.6589971239214706, + "grad_norm": 201.0, + "learning_rate": 2.7518161725404458e-05, + "loss": 11.1877, + "step": 15810 + }, + { + "epoch": 0.6590388062189988, + "grad_norm": 488.0, + "learning_rate": 2.7512132737993533e-05, + "loss": 18.0002, + "step": 15811 + }, + { + "epoch": 0.659080488516527, + "grad_norm": 394.0, + "learning_rate": 2.7506104160426033e-05, + "loss": 15.2505, + "step": 15812 + }, + { + "epoch": 0.6591221708140552, + "grad_norm": 600.0, + "learning_rate": 2.7500075992811735e-05, + "loss": 18.1256, + "step": 15813 + }, + { + "epoch": 0.6591638531115835, + "grad_norm": 230.0, + "learning_rate": 2.7494048235260595e-05, + "loss": 12.0002, + "step": 15814 + }, + { + "epoch": 0.6592055354091118, + "grad_norm": 560.0, + "learning_rate": 2.748802088788238e-05, + "loss": 18.1253, + "step": 15815 + }, + { + "epoch": 0.65924721770664, + "grad_norm": 600.0, + "learning_rate": 2.7481993950787032e-05, + "loss": 17.8771, + "step": 15816 + }, + { + "epoch": 0.6592889000041682, + "grad_norm": 368.0, + "learning_rate": 2.7475967424084293e-05, + "loss": 14.8752, + "step": 15817 + }, + { + "epoch": 0.6593305823016965, + "grad_norm": 241.0, + "learning_rate": 2.7469941307884074e-05, + "loss": 11.7504, + "step": 15818 + }, + { + "epoch": 0.6593722645992247, + "grad_norm": 209.0, + "learning_rate": 2.746391560229617e-05, + "loss": 5.6252, + "step": 15819 + }, + { + "epoch": 0.6594139468967529, + "grad_norm": 110.0, + "learning_rate": 2.74578903074304e-05, + "loss": 10.1257, + "step": 15820 + }, + { + "epoch": 0.6594556291942811, + "grad_norm": 278.0, + "learning_rate": 2.7451865423396593e-05, + "loss": 13.6257, + "step": 15821 + }, + { + "epoch": 0.6594973114918095, + "grad_norm": 808.0, + "learning_rate": 2.744584095030453e-05, + "loss": 21.7502, + "step": 15822 + }, + { + "epoch": 0.6595389937893377, + "grad_norm": 268.0, + "learning_rate": 2.7439816888264026e-05, + "loss": 12.3128, + "step": 15823 + }, + { + "epoch": 0.6595806760868659, + "grad_norm": 740.0, + "learning_rate": 2.7433793237384852e-05, + "loss": 24.7505, + "step": 15824 + }, + { + "epoch": 0.6596223583843942, + "grad_norm": 348.0, + "learning_rate": 2.742776999777681e-05, + "loss": 13.9377, + "step": 15825 + }, + { + "epoch": 0.6596640406819224, + "grad_norm": 181.0, + "learning_rate": 2.7421747169549654e-05, + "loss": 10.8753, + "step": 15826 + }, + { + "epoch": 0.6597057229794506, + "grad_norm": 592.0, + "learning_rate": 2.7415724752813164e-05, + "loss": 16.8763, + "step": 15827 + }, + { + "epoch": 0.6597474052769788, + "grad_norm": 520.0, + "learning_rate": 2.7409702747677096e-05, + "loss": 18.7503, + "step": 15828 + }, + { + "epoch": 0.6597890875745072, + "grad_norm": 226.0, + "learning_rate": 2.7403681154251203e-05, + "loss": 12.0004, + "step": 15829 + }, + { + "epoch": 0.6598307698720354, + "grad_norm": 524.0, + "learning_rate": 2.7397659972645224e-05, + "loss": 17.6252, + "step": 15830 + }, + { + "epoch": 0.6598724521695636, + "grad_norm": 364.0, + "learning_rate": 2.7391639202968898e-05, + "loss": 15.0626, + "step": 15831 + }, + { + "epoch": 0.6599141344670918, + "grad_norm": 190.0, + "learning_rate": 2.7385618845331956e-05, + "loss": 9.6253, + "step": 15832 + }, + { + "epoch": 0.6599558167646201, + "grad_norm": 600.0, + "learning_rate": 2.7379598899844095e-05, + "loss": 20.2519, + "step": 15833 + }, + { + "epoch": 0.6599974990621483, + "grad_norm": 292.0, + "learning_rate": 2.7373579366615098e-05, + "loss": 13.1885, + "step": 15834 + }, + { + "epoch": 0.6600391813596765, + "grad_norm": 504.0, + "learning_rate": 2.736756024575458e-05, + "loss": 16.3754, + "step": 15835 + }, + { + "epoch": 0.6600808636572048, + "grad_norm": 90.0, + "learning_rate": 2.7361541537372326e-05, + "loss": 8.5628, + "step": 15836 + }, + { + "epoch": 0.6601225459547331, + "grad_norm": 88.5, + "learning_rate": 2.7355523241577953e-05, + "loss": 8.8754, + "step": 15837 + }, + { + "epoch": 0.6601642282522613, + "grad_norm": 560.0, + "learning_rate": 2.7349505358481213e-05, + "loss": 18.5002, + "step": 15838 + }, + { + "epoch": 0.6602059105497895, + "grad_norm": 66.0, + "learning_rate": 2.734348788819171e-05, + "loss": 9.1252, + "step": 15839 + }, + { + "epoch": 0.6602475928473177, + "grad_norm": 462.0, + "learning_rate": 2.7337470830819197e-05, + "loss": 17.3754, + "step": 15840 + }, + { + "epoch": 0.660289275144846, + "grad_norm": 672.0, + "learning_rate": 2.7331454186473245e-05, + "loss": 21.3753, + "step": 15841 + }, + { + "epoch": 0.6603309574423742, + "grad_norm": 183.0, + "learning_rate": 2.7325437955263574e-05, + "loss": 10.8142, + "step": 15842 + }, + { + "epoch": 0.6603726397399025, + "grad_norm": 159.0, + "learning_rate": 2.731942213729981e-05, + "loss": 10.2502, + "step": 15843 + }, + { + "epoch": 0.6604143220374307, + "grad_norm": 536.0, + "learning_rate": 2.731340673269158e-05, + "loss": 18.2503, + "step": 15844 + }, + { + "epoch": 0.660456004334959, + "grad_norm": 58.25, + "learning_rate": 2.7307391741548537e-05, + "loss": 8.2505, + "step": 15845 + }, + { + "epoch": 0.6604976866324872, + "grad_norm": 366.0, + "learning_rate": 2.7301377163980296e-05, + "loss": 14.6259, + "step": 15846 + }, + { + "epoch": 0.6605393689300154, + "grad_norm": 1280.0, + "learning_rate": 2.729536300009647e-05, + "loss": 26.6311, + "step": 15847 + }, + { + "epoch": 0.6605810512275436, + "grad_norm": 206.0, + "learning_rate": 2.7289349250006667e-05, + "loss": 11.6254, + "step": 15848 + }, + { + "epoch": 0.660622733525072, + "grad_norm": 416.0, + "learning_rate": 2.7283335913820485e-05, + "loss": 15.876, + "step": 15849 + }, + { + "epoch": 0.6606644158226002, + "grad_norm": 59.5, + "learning_rate": 2.727732299164753e-05, + "loss": 7.7816, + "step": 15850 + }, + { + "epoch": 0.6607060981201284, + "grad_norm": 296.0, + "learning_rate": 2.727131048359738e-05, + "loss": 10.8752, + "step": 15851 + }, + { + "epoch": 0.6607477804176566, + "grad_norm": 60.0, + "learning_rate": 2.7265298389779608e-05, + "loss": 8.0628, + "step": 15852 + }, + { + "epoch": 0.6607894627151849, + "grad_norm": 480.0, + "learning_rate": 2.7259286710303798e-05, + "loss": 17.0006, + "step": 15853 + }, + { + "epoch": 0.6608311450127131, + "grad_norm": 892.0, + "learning_rate": 2.72532754452795e-05, + "loss": 24.2535, + "step": 15854 + }, + { + "epoch": 0.6608728273102413, + "grad_norm": 177.0, + "learning_rate": 2.724726459481628e-05, + "loss": 10.813, + "step": 15855 + }, + { + "epoch": 0.6609145096077695, + "grad_norm": 114.0, + "learning_rate": 2.7241254159023684e-05, + "loss": 10.0004, + "step": 15856 + }, + { + "epoch": 0.6609561919052979, + "grad_norm": 392.0, + "learning_rate": 2.7235244138011252e-05, + "loss": 14.8754, + "step": 15857 + }, + { + "epoch": 0.6609978742028261, + "grad_norm": 79.0, + "learning_rate": 2.7229234531888515e-05, + "loss": 6.9702, + "step": 15858 + }, + { + "epoch": 0.6610395565003543, + "grad_norm": 258.0, + "learning_rate": 2.722322534076498e-05, + "loss": 12.2502, + "step": 15859 + }, + { + "epoch": 0.6610812387978825, + "grad_norm": 454.0, + "learning_rate": 2.721721656475022e-05, + "loss": 16.2503, + "step": 15860 + }, + { + "epoch": 0.6611229210954108, + "grad_norm": 215.0, + "learning_rate": 2.7211208203953675e-05, + "loss": 11.4379, + "step": 15861 + }, + { + "epoch": 0.661164603392939, + "grad_norm": 684.0, + "learning_rate": 2.720520025848492e-05, + "loss": 23.2504, + "step": 15862 + }, + { + "epoch": 0.6612062856904672, + "grad_norm": 380.0, + "learning_rate": 2.7199192728453383e-05, + "loss": 11.8752, + "step": 15863 + }, + { + "epoch": 0.6612479679879955, + "grad_norm": 398.0, + "learning_rate": 2.7193185613968615e-05, + "loss": 14.063, + "step": 15864 + }, + { + "epoch": 0.6612896502855238, + "grad_norm": 430.0, + "learning_rate": 2.718717891514002e-05, + "loss": 15.813, + "step": 15865 + }, + { + "epoch": 0.661331332583052, + "grad_norm": 298.0, + "learning_rate": 2.7181172632077168e-05, + "loss": 14.5005, + "step": 15866 + }, + { + "epoch": 0.6613730148805802, + "grad_norm": 318.0, + "learning_rate": 2.717516676488942e-05, + "loss": 13.2502, + "step": 15867 + }, + { + "epoch": 0.6614146971781084, + "grad_norm": 378.0, + "learning_rate": 2.716916131368631e-05, + "loss": 13.5007, + "step": 15868 + }, + { + "epoch": 0.6614563794756367, + "grad_norm": 217.0, + "learning_rate": 2.716315627857725e-05, + "loss": 11.8129, + "step": 15869 + }, + { + "epoch": 0.661498061773165, + "grad_norm": 412.0, + "learning_rate": 2.715715165967171e-05, + "loss": 14.5628, + "step": 15870 + }, + { + "epoch": 0.6615397440706932, + "grad_norm": 506.0, + "learning_rate": 2.7151147457079097e-05, + "loss": 17.2502, + "step": 15871 + }, + { + "epoch": 0.6615814263682214, + "grad_norm": 310.0, + "learning_rate": 2.7145143670908858e-05, + "loss": 13.8758, + "step": 15872 + }, + { + "epoch": 0.6616231086657497, + "grad_norm": 342.0, + "learning_rate": 2.7139140301270394e-05, + "loss": 14.7503, + "step": 15873 + }, + { + "epoch": 0.6616647909632779, + "grad_norm": 314.0, + "learning_rate": 2.7133137348273142e-05, + "loss": 13.5007, + "step": 15874 + }, + { + "epoch": 0.6617064732608061, + "grad_norm": 592.0, + "learning_rate": 2.7127134812026478e-05, + "loss": 19.6255, + "step": 15875 + }, + { + "epoch": 0.6617481555583343, + "grad_norm": 496.0, + "learning_rate": 2.7121132692639818e-05, + "loss": 18.2503, + "step": 15876 + }, + { + "epoch": 0.6617898378558627, + "grad_norm": 76.0, + "learning_rate": 2.711513099022255e-05, + "loss": 9.3768, + "step": 15877 + }, + { + "epoch": 0.6618315201533909, + "grad_norm": 208.0, + "learning_rate": 2.7109129704884044e-05, + "loss": 10.9377, + "step": 15878 + }, + { + "epoch": 0.6618732024509191, + "grad_norm": 460.0, + "learning_rate": 2.7103128836733682e-05, + "loss": 16.0002, + "step": 15879 + }, + { + "epoch": 0.6619148847484473, + "grad_norm": 229.0, + "learning_rate": 2.7097128385880833e-05, + "loss": 11.9398, + "step": 15880 + }, + { + "epoch": 0.6619565670459756, + "grad_norm": 233.0, + "learning_rate": 2.7091128352434857e-05, + "loss": 11.1252, + "step": 15881 + }, + { + "epoch": 0.6619982493435038, + "grad_norm": 171.0, + "learning_rate": 2.7085128736505094e-05, + "loss": 10.1257, + "step": 15882 + }, + { + "epoch": 0.662039931641032, + "grad_norm": 171.0, + "learning_rate": 2.7079129538200875e-05, + "loss": 11.0002, + "step": 15883 + }, + { + "epoch": 0.6620816139385602, + "grad_norm": 776.0, + "learning_rate": 2.7073130757631593e-05, + "loss": 21.7502, + "step": 15884 + }, + { + "epoch": 0.6621232962360886, + "grad_norm": 284.0, + "learning_rate": 2.7067132394906504e-05, + "loss": 13.7508, + "step": 15885 + }, + { + "epoch": 0.6621649785336168, + "grad_norm": 390.0, + "learning_rate": 2.7061134450134996e-05, + "loss": 15.8754, + "step": 15886 + }, + { + "epoch": 0.662206660831145, + "grad_norm": 464.0, + "learning_rate": 2.705513692342631e-05, + "loss": 15.1254, + "step": 15887 + }, + { + "epoch": 0.6622483431286732, + "grad_norm": 182.0, + "learning_rate": 2.7049139814889834e-05, + "loss": 10.6252, + "step": 15888 + }, + { + "epoch": 0.6622900254262015, + "grad_norm": 156.0, + "learning_rate": 2.704314312463478e-05, + "loss": 11.4379, + "step": 15889 + }, + { + "epoch": 0.6623317077237297, + "grad_norm": 480.0, + "learning_rate": 2.7037146852770523e-05, + "loss": 17.0002, + "step": 15890 + }, + { + "epoch": 0.662373390021258, + "grad_norm": 252.0, + "learning_rate": 2.7031150999406263e-05, + "loss": 10.6883, + "step": 15891 + }, + { + "epoch": 0.6624150723187862, + "grad_norm": 322.0, + "learning_rate": 2.702515556465135e-05, + "loss": 12.8128, + "step": 15892 + }, + { + "epoch": 0.6624567546163145, + "grad_norm": 422.0, + "learning_rate": 2.701916054861498e-05, + "loss": 15.0628, + "step": 15893 + }, + { + "epoch": 0.6624984369138427, + "grad_norm": 252.0, + "learning_rate": 2.7013165951406473e-05, + "loss": 11.7501, + "step": 15894 + }, + { + "epoch": 0.6625401192113709, + "grad_norm": 1080.0, + "learning_rate": 2.7007171773135047e-05, + "loss": 27.5005, + "step": 15895 + }, + { + "epoch": 0.6625818015088991, + "grad_norm": 1248.0, + "learning_rate": 2.7001178013909966e-05, + "loss": 30.8753, + "step": 15896 + }, + { + "epoch": 0.6626234838064274, + "grad_norm": 131.0, + "learning_rate": 2.699518467384045e-05, + "loss": 11.2504, + "step": 15897 + }, + { + "epoch": 0.6626651661039556, + "grad_norm": 210.0, + "learning_rate": 2.6989191753035737e-05, + "loss": 11.5631, + "step": 15898 + }, + { + "epoch": 0.6627068484014839, + "grad_norm": 448.0, + "learning_rate": 2.6983199251605052e-05, + "loss": 16.7503, + "step": 15899 + }, + { + "epoch": 0.6627485306990122, + "grad_norm": 636.0, + "learning_rate": 2.6977207169657605e-05, + "loss": 19.7513, + "step": 15900 + }, + { + "epoch": 0.6627902129965404, + "grad_norm": 472.0, + "learning_rate": 2.6971215507302596e-05, + "loss": 14.5001, + "step": 15901 + }, + { + "epoch": 0.6628318952940686, + "grad_norm": 480.0, + "learning_rate": 2.6965224264649223e-05, + "loss": 16.2501, + "step": 15902 + }, + { + "epoch": 0.6628735775915968, + "grad_norm": 274.0, + "learning_rate": 2.6959233441806698e-05, + "loss": 12.5629, + "step": 15903 + }, + { + "epoch": 0.6629152598891251, + "grad_norm": 450.0, + "learning_rate": 2.6953243038884178e-05, + "loss": 15.752, + "step": 15904 + }, + { + "epoch": 0.6629569421866534, + "grad_norm": 378.0, + "learning_rate": 2.694725305599085e-05, + "loss": 15.6877, + "step": 15905 + }, + { + "epoch": 0.6629986244841816, + "grad_norm": 183.0, + "learning_rate": 2.6941263493235885e-05, + "loss": 10.6254, + "step": 15906 + }, + { + "epoch": 0.6630403067817098, + "grad_norm": 117.5, + "learning_rate": 2.6935274350728434e-05, + "loss": 9.5004, + "step": 15907 + }, + { + "epoch": 0.6630819890792381, + "grad_norm": 233.0, + "learning_rate": 2.692928562857766e-05, + "loss": 10.0636, + "step": 15908 + }, + { + "epoch": 0.6631236713767663, + "grad_norm": 150.0, + "learning_rate": 2.6923297326892686e-05, + "loss": 10.813, + "step": 15909 + }, + { + "epoch": 0.6631653536742945, + "grad_norm": 402.0, + "learning_rate": 2.691730944578271e-05, + "loss": 14.3126, + "step": 15910 + }, + { + "epoch": 0.6632070359718227, + "grad_norm": 732.0, + "learning_rate": 2.691132198535677e-05, + "loss": 21.3752, + "step": 15911 + }, + { + "epoch": 0.6632487182693511, + "grad_norm": 111.0, + "learning_rate": 2.690533494572408e-05, + "loss": 8.7511, + "step": 15912 + }, + { + "epoch": 0.6632904005668793, + "grad_norm": 249.0, + "learning_rate": 2.6899348326993667e-05, + "loss": 12.2503, + "step": 15913 + }, + { + "epoch": 0.6633320828644075, + "grad_norm": 286.0, + "learning_rate": 2.6893362129274722e-05, + "loss": 12.7514, + "step": 15914 + }, + { + "epoch": 0.6633737651619357, + "grad_norm": 229.0, + "learning_rate": 2.688737635267627e-05, + "loss": 9.2506, + "step": 15915 + }, + { + "epoch": 0.663415447459464, + "grad_norm": 712.0, + "learning_rate": 2.688139099730747e-05, + "loss": 23.1254, + "step": 15916 + }, + { + "epoch": 0.6634571297569922, + "grad_norm": 440.0, + "learning_rate": 2.6875406063277332e-05, + "loss": 16.1252, + "step": 15917 + }, + { + "epoch": 0.6634988120545204, + "grad_norm": 117.5, + "learning_rate": 2.6869421550694996e-05, + "loss": 7.7819, + "step": 15918 + }, + { + "epoch": 0.6635404943520486, + "grad_norm": 362.0, + "learning_rate": 2.68634374596695e-05, + "loss": 14.3752, + "step": 15919 + }, + { + "epoch": 0.663582176649577, + "grad_norm": 127.0, + "learning_rate": 2.6857453790309912e-05, + "loss": 10.0002, + "step": 15920 + }, + { + "epoch": 0.6636238589471052, + "grad_norm": 336.0, + "learning_rate": 2.685147054272528e-05, + "loss": 15.2502, + "step": 15921 + }, + { + "epoch": 0.6636655412446334, + "grad_norm": 262.0, + "learning_rate": 2.684548771702466e-05, + "loss": 13.8755, + "step": 15922 + }, + { + "epoch": 0.6637072235421616, + "grad_norm": 258.0, + "learning_rate": 2.6839505313317077e-05, + "loss": 12.0629, + "step": 15923 + }, + { + "epoch": 0.6637489058396899, + "grad_norm": 264.0, + "learning_rate": 2.6833523331711563e-05, + "loss": 11.2512, + "step": 15924 + }, + { + "epoch": 0.6637905881372181, + "grad_norm": 334.0, + "learning_rate": 2.6827541772317142e-05, + "loss": 14.1254, + "step": 15925 + }, + { + "epoch": 0.6638322704347464, + "grad_norm": 195.0, + "learning_rate": 2.6821560635242836e-05, + "loss": 10.6877, + "step": 15926 + }, + { + "epoch": 0.6638739527322746, + "grad_norm": 169.0, + "learning_rate": 2.6815579920597644e-05, + "loss": 9.7506, + "step": 15927 + }, + { + "epoch": 0.6639156350298029, + "grad_norm": 201.0, + "learning_rate": 2.6809599628490568e-05, + "loss": 11.8758, + "step": 15928 + }, + { + "epoch": 0.6639573173273311, + "grad_norm": 77.0, + "learning_rate": 2.6803619759030592e-05, + "loss": 8.2503, + "step": 15929 + }, + { + "epoch": 0.6639989996248593, + "grad_norm": 237.0, + "learning_rate": 2.6797640312326704e-05, + "loss": 12.1254, + "step": 15930 + }, + { + "epoch": 0.6640406819223875, + "grad_norm": 352.0, + "learning_rate": 2.6791661288487885e-05, + "loss": 15.2502, + "step": 15931 + }, + { + "epoch": 0.6640823642199158, + "grad_norm": 91.0, + "learning_rate": 2.6785682687623092e-05, + "loss": 8.313, + "step": 15932 + }, + { + "epoch": 0.664124046517444, + "grad_norm": 232.0, + "learning_rate": 2.6779704509841286e-05, + "loss": 10.6877, + "step": 15933 + }, + { + "epoch": 0.6641657288149723, + "grad_norm": 258.0, + "learning_rate": 2.677372675525146e-05, + "loss": 12.688, + "step": 15934 + }, + { + "epoch": 0.6642074111125005, + "grad_norm": 163.0, + "learning_rate": 2.6767749423962486e-05, + "loss": 10.4383, + "step": 15935 + }, + { + "epoch": 0.6642490934100288, + "grad_norm": 91.5, + "learning_rate": 2.6761772516083377e-05, + "loss": 9.1879, + "step": 15936 + }, + { + "epoch": 0.664290775707557, + "grad_norm": 294.0, + "learning_rate": 2.675579603172299e-05, + "loss": 14.0003, + "step": 15937 + }, + { + "epoch": 0.6643324580050852, + "grad_norm": 215.0, + "learning_rate": 2.6749819970990318e-05, + "loss": 12.2504, + "step": 15938 + }, + { + "epoch": 0.6643741403026134, + "grad_norm": 442.0, + "learning_rate": 2.6743844333994215e-05, + "loss": 14.6877, + "step": 15939 + }, + { + "epoch": 0.6644158226001418, + "grad_norm": 422.0, + "learning_rate": 2.6737869120843638e-05, + "loss": 14.2523, + "step": 15940 + }, + { + "epoch": 0.66445750489767, + "grad_norm": 462.0, + "learning_rate": 2.6731894331647434e-05, + "loss": 15.5033, + "step": 15941 + }, + { + "epoch": 0.6644991871951982, + "grad_norm": 334.0, + "learning_rate": 2.6725919966514557e-05, + "loss": 13.5627, + "step": 15942 + }, + { + "epoch": 0.6645408694927264, + "grad_norm": 1144.0, + "learning_rate": 2.671994602555381e-05, + "loss": 27.2508, + "step": 15943 + }, + { + "epoch": 0.6645825517902547, + "grad_norm": 728.0, + "learning_rate": 2.671397250887414e-05, + "loss": 20.8753, + "step": 15944 + }, + { + "epoch": 0.6646242340877829, + "grad_norm": 528.0, + "learning_rate": 2.6707999416584383e-05, + "loss": 18.0002, + "step": 15945 + }, + { + "epoch": 0.6646659163853111, + "grad_norm": 494.0, + "learning_rate": 2.6702026748793406e-05, + "loss": 16.3757, + "step": 15946 + }, + { + "epoch": 0.6647075986828394, + "grad_norm": 181.0, + "learning_rate": 2.6696054505610064e-05, + "loss": 8.8753, + "step": 15947 + }, + { + "epoch": 0.6647492809803677, + "grad_norm": 348.0, + "learning_rate": 2.669008268714319e-05, + "loss": 13.1254, + "step": 15948 + }, + { + "epoch": 0.6647909632778959, + "grad_norm": 166.0, + "learning_rate": 2.6684111293501633e-05, + "loss": 11.3755, + "step": 15949 + }, + { + "epoch": 0.6648326455754241, + "grad_norm": 716.0, + "learning_rate": 2.6678140324794222e-05, + "loss": 21.2504, + "step": 15950 + }, + { + "epoch": 0.6648743278729523, + "grad_norm": 264.0, + "learning_rate": 2.6672169781129763e-05, + "loss": 12.9377, + "step": 15951 + }, + { + "epoch": 0.6649160101704806, + "grad_norm": 124.5, + "learning_rate": 2.6666199662617086e-05, + "loss": 10.7509, + "step": 15952 + }, + { + "epoch": 0.6649576924680088, + "grad_norm": 116.0, + "learning_rate": 2.666022996936499e-05, + "loss": 10.2505, + "step": 15953 + }, + { + "epoch": 0.664999374765537, + "grad_norm": 233.0, + "learning_rate": 2.6654260701482282e-05, + "loss": 12.4398, + "step": 15954 + }, + { + "epoch": 0.6650410570630653, + "grad_norm": 266.0, + "learning_rate": 2.664829185907774e-05, + "loss": 12.5004, + "step": 15955 + }, + { + "epoch": 0.6650827393605936, + "grad_norm": 368.0, + "learning_rate": 2.6642323442260154e-05, + "loss": 16.0005, + "step": 15956 + }, + { + "epoch": 0.6651244216581218, + "grad_norm": 474.0, + "learning_rate": 2.663635545113829e-05, + "loss": 17.8754, + "step": 15957 + }, + { + "epoch": 0.66516610395565, + "grad_norm": 174.0, + "learning_rate": 2.663038788582093e-05, + "loss": 9.7505, + "step": 15958 + }, + { + "epoch": 0.6652077862531782, + "grad_norm": 580.0, + "learning_rate": 2.66244207464168e-05, + "loss": 17.6255, + "step": 15959 + }, + { + "epoch": 0.6652494685507065, + "grad_norm": 284.0, + "learning_rate": 2.661845403303472e-05, + "loss": 12.5001, + "step": 15960 + }, + { + "epoch": 0.6652911508482348, + "grad_norm": 1112.0, + "learning_rate": 2.6612487745783342e-05, + "loss": 25.3816, + "step": 15961 + }, + { + "epoch": 0.665332833145763, + "grad_norm": 302.0, + "learning_rate": 2.66065218847715e-05, + "loss": 12.8136, + "step": 15962 + }, + { + "epoch": 0.6653745154432912, + "grad_norm": 304.0, + "learning_rate": 2.660055645010784e-05, + "loss": 11.9385, + "step": 15963 + }, + { + "epoch": 0.6654161977408195, + "grad_norm": 684.0, + "learning_rate": 2.659459144190114e-05, + "loss": 23.3756, + "step": 15964 + }, + { + "epoch": 0.6654578800383477, + "grad_norm": 178.0, + "learning_rate": 2.6588626860260057e-05, + "loss": 11.2504, + "step": 15965 + }, + { + "epoch": 0.6654995623358759, + "grad_norm": 422.0, + "learning_rate": 2.658266270529337e-05, + "loss": 17.0003, + "step": 15966 + }, + { + "epoch": 0.6655412446334041, + "grad_norm": 302.0, + "learning_rate": 2.6576698977109693e-05, + "loss": 12.6255, + "step": 15967 + }, + { + "epoch": 0.6655829269309325, + "grad_norm": 592.0, + "learning_rate": 2.657073567581777e-05, + "loss": 18.7509, + "step": 15968 + }, + { + "epoch": 0.6656246092284607, + "grad_norm": 334.0, + "learning_rate": 2.656477280152628e-05, + "loss": 13.9378, + "step": 15969 + }, + { + "epoch": 0.6656662915259889, + "grad_norm": 720.0, + "learning_rate": 2.6558810354343878e-05, + "loss": 19.0039, + "step": 15970 + }, + { + "epoch": 0.6657079738235172, + "grad_norm": 454.0, + "learning_rate": 2.6552848334379238e-05, + "loss": 17.5002, + "step": 15971 + }, + { + "epoch": 0.6657496561210454, + "grad_norm": 113.0, + "learning_rate": 2.6546886741741023e-05, + "loss": 9.9377, + "step": 15972 + }, + { + "epoch": 0.6657913384185736, + "grad_norm": 330.0, + "learning_rate": 2.654092557653788e-05, + "loss": 14.0637, + "step": 15973 + }, + { + "epoch": 0.6658330207161018, + "grad_norm": 227.0, + "learning_rate": 2.653496483887844e-05, + "loss": 11.9381, + "step": 15974 + }, + { + "epoch": 0.6658747030136302, + "grad_norm": 304.0, + "learning_rate": 2.652900452887136e-05, + "loss": 13.8755, + "step": 15975 + }, + { + "epoch": 0.6659163853111584, + "grad_norm": 362.0, + "learning_rate": 2.652304464662525e-05, + "loss": 15.188, + "step": 15976 + }, + { + "epoch": 0.6659580676086866, + "grad_norm": 508.0, + "learning_rate": 2.6517085192248743e-05, + "loss": 16.1286, + "step": 15977 + }, + { + "epoch": 0.6659997499062148, + "grad_norm": 247.0, + "learning_rate": 2.6511126165850436e-05, + "loss": 13.5009, + "step": 15978 + }, + { + "epoch": 0.6660414322037431, + "grad_norm": 266.0, + "learning_rate": 2.650516756753894e-05, + "loss": 11.6878, + "step": 15979 + }, + { + "epoch": 0.6660831145012713, + "grad_norm": 245.0, + "learning_rate": 2.649920939742285e-05, + "loss": 12.1876, + "step": 15980 + }, + { + "epoch": 0.6661247967987995, + "grad_norm": 86.0, + "learning_rate": 2.6493251655610764e-05, + "loss": 8.5628, + "step": 15981 + }, + { + "epoch": 0.6661664790963278, + "grad_norm": 185.0, + "learning_rate": 2.6487294342211245e-05, + "loss": 8.2505, + "step": 15982 + }, + { + "epoch": 0.6662081613938561, + "grad_norm": 748.0, + "learning_rate": 2.6481337457332876e-05, + "loss": 21.7542, + "step": 15983 + }, + { + "epoch": 0.6662498436913843, + "grad_norm": 804.0, + "learning_rate": 2.647538100108422e-05, + "loss": 23.0006, + "step": 15984 + }, + { + "epoch": 0.6662915259889125, + "grad_norm": 456.0, + "learning_rate": 2.6469424973573815e-05, + "loss": 15.8787, + "step": 15985 + }, + { + "epoch": 0.6663332082864407, + "grad_norm": 1232.0, + "learning_rate": 2.6463469374910267e-05, + "loss": 29.7502, + "step": 15986 + }, + { + "epoch": 0.666374890583969, + "grad_norm": 250.0, + "learning_rate": 2.645751420520204e-05, + "loss": 13.3752, + "step": 15987 + }, + { + "epoch": 0.6664165728814972, + "grad_norm": 72.0, + "learning_rate": 2.645155946455774e-05, + "loss": 5.7819, + "step": 15988 + }, + { + "epoch": 0.6664582551790255, + "grad_norm": 170.0, + "learning_rate": 2.644560515308583e-05, + "loss": 11.063, + "step": 15989 + }, + { + "epoch": 0.6664999374765537, + "grad_norm": 238.0, + "learning_rate": 2.643965127089489e-05, + "loss": 11.6253, + "step": 15990 + }, + { + "epoch": 0.666541619774082, + "grad_norm": 256.0, + "learning_rate": 2.643369781809336e-05, + "loss": 12.1885, + "step": 15991 + }, + { + "epoch": 0.6665833020716102, + "grad_norm": 189.0, + "learning_rate": 2.6427744794789817e-05, + "loss": 11.1879, + "step": 15992 + }, + { + "epoch": 0.6666249843691384, + "grad_norm": 380.0, + "learning_rate": 2.6421792201092676e-05, + "loss": 14.4378, + "step": 15993 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 464.0, + "learning_rate": 2.641584003711049e-05, + "loss": 16.8753, + "step": 15994 + }, + { + "epoch": 0.666708348964195, + "grad_norm": 146.0, + "learning_rate": 2.6409888302951715e-05, + "loss": 9.6253, + "step": 15995 + }, + { + "epoch": 0.6667500312617232, + "grad_norm": 160.0, + "learning_rate": 2.640393699872481e-05, + "loss": 10.5009, + "step": 15996 + }, + { + "epoch": 0.6667917135592514, + "grad_norm": 164.0, + "learning_rate": 2.6397986124538253e-05, + "loss": 10.9377, + "step": 15997 + }, + { + "epoch": 0.6668333958567796, + "grad_norm": 704.0, + "learning_rate": 2.63920356805005e-05, + "loss": 20.8753, + "step": 15998 + }, + { + "epoch": 0.6668750781543079, + "grad_norm": 53.75, + "learning_rate": 2.6386085666719986e-05, + "loss": 8.2503, + "step": 15999 + }, + { + "epoch": 0.6669167604518361, + "grad_norm": 398.0, + "learning_rate": 2.638013608330516e-05, + "loss": 12.8129, + "step": 16000 + }, + { + "epoch": 0.6669584427493643, + "grad_norm": 260.0, + "learning_rate": 2.6374186930364446e-05, + "loss": 12.1252, + "step": 16001 + }, + { + "epoch": 0.6670001250468925, + "grad_norm": 168.0, + "learning_rate": 2.636823820800628e-05, + "loss": 10.3752, + "step": 16002 + }, + { + "epoch": 0.6670418073444209, + "grad_norm": 436.0, + "learning_rate": 2.6362289916339067e-05, + "loss": 16.2503, + "step": 16003 + }, + { + "epoch": 0.6670834896419491, + "grad_norm": 223.0, + "learning_rate": 2.6356342055471217e-05, + "loss": 12.876, + "step": 16004 + }, + { + "epoch": 0.6671251719394773, + "grad_norm": 272.0, + "learning_rate": 2.6350394625511132e-05, + "loss": 12.7505, + "step": 16005 + }, + { + "epoch": 0.6671668542370055, + "grad_norm": 952.0, + "learning_rate": 2.6344447626567205e-05, + "loss": 28.0004, + "step": 16006 + }, + { + "epoch": 0.6672085365345338, + "grad_norm": 123.5, + "learning_rate": 2.6338501058747818e-05, + "loss": 9.7502, + "step": 16007 + }, + { + "epoch": 0.667250218832062, + "grad_norm": 292.0, + "learning_rate": 2.633255492216135e-05, + "loss": 14.5004, + "step": 16008 + }, + { + "epoch": 0.6672919011295902, + "grad_norm": 98.5, + "learning_rate": 2.6326609216916153e-05, + "loss": 8.2503, + "step": 16009 + }, + { + "epoch": 0.6673335834271185, + "grad_norm": 596.0, + "learning_rate": 2.6320663943120638e-05, + "loss": 18.1261, + "step": 16010 + }, + { + "epoch": 0.6673752657246468, + "grad_norm": 276.0, + "learning_rate": 2.6314719100883094e-05, + "loss": 13.1879, + "step": 16011 + }, + { + "epoch": 0.667416948022175, + "grad_norm": 370.0, + "learning_rate": 2.6308774690311933e-05, + "loss": 15.7503, + "step": 16012 + }, + { + "epoch": 0.6674586303197032, + "grad_norm": 382.0, + "learning_rate": 2.6302830711515415e-05, + "loss": 15.6255, + "step": 16013 + }, + { + "epoch": 0.6675003126172314, + "grad_norm": 668.0, + "learning_rate": 2.629688716460195e-05, + "loss": 19.0013, + "step": 16014 + }, + { + "epoch": 0.6675419949147597, + "grad_norm": 350.0, + "learning_rate": 2.629094404967979e-05, + "loss": 12.3789, + "step": 16015 + }, + { + "epoch": 0.667583677212288, + "grad_norm": 177.0, + "learning_rate": 2.628500136685731e-05, + "loss": 10.3752, + "step": 16016 + }, + { + "epoch": 0.6676253595098162, + "grad_norm": 356.0, + "learning_rate": 2.6279059116242744e-05, + "loss": 12.7507, + "step": 16017 + }, + { + "epoch": 0.6676670418073444, + "grad_norm": 304.0, + "learning_rate": 2.627311729794447e-05, + "loss": 13.1877, + "step": 16018 + }, + { + "epoch": 0.6677087241048727, + "grad_norm": 414.0, + "learning_rate": 2.626717591207071e-05, + "loss": 15.8772, + "step": 16019 + }, + { + "epoch": 0.6677504064024009, + "grad_norm": 304.0, + "learning_rate": 2.6261234958729784e-05, + "loss": 14.0628, + "step": 16020 + }, + { + "epoch": 0.6677920886999291, + "grad_norm": 100.5, + "learning_rate": 2.625529443802997e-05, + "loss": 10.6258, + "step": 16021 + }, + { + "epoch": 0.6678337709974573, + "grad_norm": 264.0, + "learning_rate": 2.6249354350079514e-05, + "loss": 10.1254, + "step": 16022 + }, + { + "epoch": 0.6678754532949857, + "grad_norm": 274.0, + "learning_rate": 2.624341469498669e-05, + "loss": 13.9379, + "step": 16023 + }, + { + "epoch": 0.6679171355925139, + "grad_norm": 154.0, + "learning_rate": 2.623747547285974e-05, + "loss": 9.2505, + "step": 16024 + }, + { + "epoch": 0.6679588178900421, + "grad_norm": 296.0, + "learning_rate": 2.62315366838069e-05, + "loss": 12.9378, + "step": 16025 + }, + { + "epoch": 0.6680005001875703, + "grad_norm": 464.0, + "learning_rate": 2.622559832793643e-05, + "loss": 16.1256, + "step": 16026 + }, + { + "epoch": 0.6680421824850986, + "grad_norm": 390.0, + "learning_rate": 2.6219660405356527e-05, + "loss": 16.2502, + "step": 16027 + }, + { + "epoch": 0.6680838647826268, + "grad_norm": 266.0, + "learning_rate": 2.6213722916175433e-05, + "loss": 11.5001, + "step": 16028 + }, + { + "epoch": 0.668125547080155, + "grad_norm": 472.0, + "learning_rate": 2.6207785860501343e-05, + "loss": 15.188, + "step": 16029 + }, + { + "epoch": 0.6681672293776832, + "grad_norm": 312.0, + "learning_rate": 2.6201849238442473e-05, + "loss": 14.3753, + "step": 16030 + }, + { + "epoch": 0.6682089116752116, + "grad_norm": 332.0, + "learning_rate": 2.6195913050107013e-05, + "loss": 12.8131, + "step": 16031 + }, + { + "epoch": 0.6682505939727398, + "grad_norm": 91.5, + "learning_rate": 2.618997729560315e-05, + "loss": 9.3754, + "step": 16032 + }, + { + "epoch": 0.668292276270268, + "grad_norm": 167.0, + "learning_rate": 2.6184041975039064e-05, + "loss": 10.3137, + "step": 16033 + }, + { + "epoch": 0.6683339585677962, + "grad_norm": 394.0, + "learning_rate": 2.617810708852293e-05, + "loss": 14.8755, + "step": 16034 + }, + { + "epoch": 0.6683756408653245, + "grad_norm": 300.0, + "learning_rate": 2.6172172636162885e-05, + "loss": 13.4377, + "step": 16035 + }, + { + "epoch": 0.6684173231628527, + "grad_norm": 462.0, + "learning_rate": 2.6166238618067152e-05, + "loss": 16.6252, + "step": 16036 + }, + { + "epoch": 0.668459005460381, + "grad_norm": 370.0, + "learning_rate": 2.616030503434379e-05, + "loss": 14.0002, + "step": 16037 + }, + { + "epoch": 0.6685006877579092, + "grad_norm": 340.0, + "learning_rate": 2.6154371885101033e-05, + "loss": 13.8154, + "step": 16038 + }, + { + "epoch": 0.6685423700554375, + "grad_norm": 696.0, + "learning_rate": 2.614843917044692e-05, + "loss": 18.7522, + "step": 16039 + }, + { + "epoch": 0.6685840523529657, + "grad_norm": 270.0, + "learning_rate": 2.614250689048966e-05, + "loss": 13.6254, + "step": 16040 + }, + { + "epoch": 0.6686257346504939, + "grad_norm": 245.0, + "learning_rate": 2.6136575045337298e-05, + "loss": 12.7502, + "step": 16041 + }, + { + "epoch": 0.6686674169480221, + "grad_norm": 632.0, + "learning_rate": 2.6130643635098007e-05, + "loss": 17.8752, + "step": 16042 + }, + { + "epoch": 0.6687090992455504, + "grad_norm": 312.0, + "learning_rate": 2.612471265987981e-05, + "loss": 12.8768, + "step": 16043 + }, + { + "epoch": 0.6687507815430787, + "grad_norm": 492.0, + "learning_rate": 2.6118782119790874e-05, + "loss": 17.6255, + "step": 16044 + }, + { + "epoch": 0.6687924638406069, + "grad_norm": 142.0, + "learning_rate": 2.611285201493925e-05, + "loss": 9.2502, + "step": 16045 + }, + { + "epoch": 0.6688341461381352, + "grad_norm": 696.0, + "learning_rate": 2.6106922345433015e-05, + "loss": 20.8756, + "step": 16046 + }, + { + "epoch": 0.6688758284356634, + "grad_norm": 450.0, + "learning_rate": 2.6100993111380246e-05, + "loss": 15.8129, + "step": 16047 + }, + { + "epoch": 0.6689175107331916, + "grad_norm": 490.0, + "learning_rate": 2.6095064312889002e-05, + "loss": 15.3768, + "step": 16048 + }, + { + "epoch": 0.6689591930307198, + "grad_norm": 324.0, + "learning_rate": 2.608913595006733e-05, + "loss": 13.9378, + "step": 16049 + }, + { + "epoch": 0.6690008753282481, + "grad_norm": 952.0, + "learning_rate": 2.6083208023023277e-05, + "loss": 25.1251, + "step": 16050 + }, + { + "epoch": 0.6690425576257764, + "grad_norm": 1648.0, + "learning_rate": 2.607728053186488e-05, + "loss": 31.8752, + "step": 16051 + }, + { + "epoch": 0.6690842399233046, + "grad_norm": 1072.0, + "learning_rate": 2.6071353476700165e-05, + "loss": 25.2504, + "step": 16052 + }, + { + "epoch": 0.6691259222208328, + "grad_norm": 241.0, + "learning_rate": 2.6065426857637167e-05, + "loss": 11.9377, + "step": 16053 + }, + { + "epoch": 0.6691676045183611, + "grad_norm": 468.0, + "learning_rate": 2.605950067478388e-05, + "loss": 17.3756, + "step": 16054 + }, + { + "epoch": 0.6692092868158893, + "grad_norm": 163.0, + "learning_rate": 2.6053574928248326e-05, + "loss": 11.3753, + "step": 16055 + }, + { + "epoch": 0.6692509691134175, + "grad_norm": 576.0, + "learning_rate": 2.6047649618138494e-05, + "loss": 17.751, + "step": 16056 + }, + { + "epoch": 0.6692926514109457, + "grad_norm": 68.5, + "learning_rate": 2.6041724744562368e-05, + "loss": 7.9065, + "step": 16057 + }, + { + "epoch": 0.6693343337084741, + "grad_norm": 231.0, + "learning_rate": 2.603580030762794e-05, + "loss": 11.7503, + "step": 16058 + }, + { + "epoch": 0.6693760160060023, + "grad_norm": 233.0, + "learning_rate": 2.602987630744316e-05, + "loss": 13.0003, + "step": 16059 + }, + { + "epoch": 0.6694176983035305, + "grad_norm": 356.0, + "learning_rate": 2.602395274411604e-05, + "loss": 14.1882, + "step": 16060 + }, + { + "epoch": 0.6694593806010587, + "grad_norm": 195.0, + "learning_rate": 2.6018029617754484e-05, + "loss": 10.3128, + "step": 16061 + }, + { + "epoch": 0.669501062898587, + "grad_norm": 176.0, + "learning_rate": 2.6012106928466506e-05, + "loss": 12.0638, + "step": 16062 + }, + { + "epoch": 0.6695427451961152, + "grad_norm": 76.0, + "learning_rate": 2.6006184676359964e-05, + "loss": 9.7504, + "step": 16063 + }, + { + "epoch": 0.6695844274936434, + "grad_norm": 416.0, + "learning_rate": 2.6000262861542878e-05, + "loss": 15.9376, + "step": 16064 + }, + { + "epoch": 0.6696261097911717, + "grad_norm": 506.0, + "learning_rate": 2.5994341484123092e-05, + "loss": 17.7504, + "step": 16065 + }, + { + "epoch": 0.6696677920887, + "grad_norm": 334.0, + "learning_rate": 2.5988420544208604e-05, + "loss": 10.9386, + "step": 16066 + }, + { + "epoch": 0.6697094743862282, + "grad_norm": 304.0, + "learning_rate": 2.5982500041907244e-05, + "loss": 14.8149, + "step": 16067 + }, + { + "epoch": 0.6697511566837564, + "grad_norm": 916.0, + "learning_rate": 2.5976579977326988e-05, + "loss": 24.2507, + "step": 16068 + }, + { + "epoch": 0.6697928389812846, + "grad_norm": 128.0, + "learning_rate": 2.597066035057566e-05, + "loss": 9.5006, + "step": 16069 + }, + { + "epoch": 0.6698345212788129, + "grad_norm": 294.0, + "learning_rate": 2.5964741161761197e-05, + "loss": 12.5628, + "step": 16070 + }, + { + "epoch": 0.6698762035763411, + "grad_norm": 96.0, + "learning_rate": 2.5958822410991458e-05, + "loss": 9.8754, + "step": 16071 + }, + { + "epoch": 0.6699178858738694, + "grad_norm": 89.5, + "learning_rate": 2.595290409837432e-05, + "loss": 7.3128, + "step": 16072 + }, + { + "epoch": 0.6699595681713976, + "grad_norm": 588.0, + "learning_rate": 2.5946986224017632e-05, + "loss": 18.0003, + "step": 16073 + }, + { + "epoch": 0.6700012504689259, + "grad_norm": 728.0, + "learning_rate": 2.5941068788029266e-05, + "loss": 20.5021, + "step": 16074 + }, + { + "epoch": 0.6700429327664541, + "grad_norm": 177.0, + "learning_rate": 2.5935151790517047e-05, + "loss": 11.7503, + "step": 16075 + }, + { + "epoch": 0.6700846150639823, + "grad_norm": 380.0, + "learning_rate": 2.5929235231588828e-05, + "loss": 15.0002, + "step": 16076 + }, + { + "epoch": 0.6701262973615105, + "grad_norm": 396.0, + "learning_rate": 2.592331911135243e-05, + "loss": 14.0629, + "step": 16077 + }, + { + "epoch": 0.6701679796590388, + "grad_norm": 193.0, + "learning_rate": 2.5917403429915687e-05, + "loss": 11.6252, + "step": 16078 + }, + { + "epoch": 0.6702096619565671, + "grad_norm": 392.0, + "learning_rate": 2.5911488187386397e-05, + "loss": 15.5023, + "step": 16079 + }, + { + "epoch": 0.6702513442540953, + "grad_norm": 672.0, + "learning_rate": 2.590557338387237e-05, + "loss": 20.1253, + "step": 16080 + }, + { + "epoch": 0.6702930265516235, + "grad_norm": 162.0, + "learning_rate": 2.5899659019481415e-05, + "loss": 9.6254, + "step": 16081 + }, + { + "epoch": 0.6703347088491518, + "grad_norm": 225.0, + "learning_rate": 2.589374509432131e-05, + "loss": 11.7503, + "step": 16082 + }, + { + "epoch": 0.67037639114668, + "grad_norm": 294.0, + "learning_rate": 2.5887831608499848e-05, + "loss": 12.8756, + "step": 16083 + }, + { + "epoch": 0.6704180734442082, + "grad_norm": 708.0, + "learning_rate": 2.5881918562124785e-05, + "loss": 21.7506, + "step": 16084 + }, + { + "epoch": 0.6704597557417364, + "grad_norm": 308.0, + "learning_rate": 2.5876005955303885e-05, + "loss": 13.1877, + "step": 16085 + }, + { + "epoch": 0.6705014380392648, + "grad_norm": 568.0, + "learning_rate": 2.587009378814495e-05, + "loss": 19.2502, + "step": 16086 + }, + { + "epoch": 0.670543120336793, + "grad_norm": 664.0, + "learning_rate": 2.586418206075566e-05, + "loss": 19.5007, + "step": 16087 + }, + { + "epoch": 0.6705848026343212, + "grad_norm": 302.0, + "learning_rate": 2.5858270773243842e-05, + "loss": 12.9379, + "step": 16088 + }, + { + "epoch": 0.6706264849318494, + "grad_norm": 88.5, + "learning_rate": 2.5852359925717136e-05, + "loss": 8.3753, + "step": 16089 + }, + { + "epoch": 0.6706681672293777, + "grad_norm": 328.0, + "learning_rate": 2.5846449518283354e-05, + "loss": 14.8753, + "step": 16090 + }, + { + "epoch": 0.6707098495269059, + "grad_norm": 338.0, + "learning_rate": 2.5840539551050136e-05, + "loss": 14.6259, + "step": 16091 + }, + { + "epoch": 0.6707515318244341, + "grad_norm": 438.0, + "learning_rate": 2.5834630024125267e-05, + "loss": 15.7506, + "step": 16092 + }, + { + "epoch": 0.6707932141219624, + "grad_norm": 280.0, + "learning_rate": 2.582872093761637e-05, + "loss": 13.6257, + "step": 16093 + }, + { + "epoch": 0.6708348964194907, + "grad_norm": 308.0, + "learning_rate": 2.5822812291631203e-05, + "loss": 13.5627, + "step": 16094 + }, + { + "epoch": 0.6708765787170189, + "grad_norm": 456.0, + "learning_rate": 2.581690408627743e-05, + "loss": 17.1254, + "step": 16095 + }, + { + "epoch": 0.6709182610145471, + "grad_norm": 146.0, + "learning_rate": 2.5810996321662716e-05, + "loss": 9.3129, + "step": 16096 + }, + { + "epoch": 0.6709599433120753, + "grad_norm": 270.0, + "learning_rate": 2.5805088997894755e-05, + "loss": 12.3127, + "step": 16097 + }, + { + "epoch": 0.6710016256096036, + "grad_norm": 155.0, + "learning_rate": 2.579918211508119e-05, + "loss": 11.4378, + "step": 16098 + }, + { + "epoch": 0.6710433079071318, + "grad_norm": 298.0, + "learning_rate": 2.5793275673329676e-05, + "loss": 14.2503, + "step": 16099 + }, + { + "epoch": 0.6710849902046601, + "grad_norm": 320.0, + "learning_rate": 2.5787369672747863e-05, + "loss": 14.7503, + "step": 16100 + }, + { + "epoch": 0.6711266725021883, + "grad_norm": 314.0, + "learning_rate": 2.578146411344339e-05, + "loss": 13.8751, + "step": 16101 + }, + { + "epoch": 0.6711683547997166, + "grad_norm": 412.0, + "learning_rate": 2.5775558995523884e-05, + "loss": 15.5012, + "step": 16102 + }, + { + "epoch": 0.6712100370972448, + "grad_norm": 668.0, + "learning_rate": 2.5769654319096958e-05, + "loss": 21.3752, + "step": 16103 + }, + { + "epoch": 0.671251719394773, + "grad_norm": 282.0, + "learning_rate": 2.5763750084270243e-05, + "loss": 14.4379, + "step": 16104 + }, + { + "epoch": 0.6712934016923012, + "grad_norm": 1112.0, + "learning_rate": 2.5757846291151328e-05, + "loss": 33.754, + "step": 16105 + }, + { + "epoch": 0.6713350839898296, + "grad_norm": 314.0, + "learning_rate": 2.5751942939847817e-05, + "loss": 14.001, + "step": 16106 + }, + { + "epoch": 0.6713767662873578, + "grad_norm": 466.0, + "learning_rate": 2.57460400304673e-05, + "loss": 17.3751, + "step": 16107 + }, + { + "epoch": 0.671418448584886, + "grad_norm": 596.0, + "learning_rate": 2.5740137563117355e-05, + "loss": 19.8752, + "step": 16108 + }, + { + "epoch": 0.6714601308824142, + "grad_norm": 388.0, + "learning_rate": 2.5734235537905536e-05, + "loss": 13.6877, + "step": 16109 + }, + { + "epoch": 0.6715018131799425, + "grad_norm": 161.0, + "learning_rate": 2.5728333954939476e-05, + "loss": 10.6258, + "step": 16110 + }, + { + "epoch": 0.6715434954774707, + "grad_norm": 127.0, + "learning_rate": 2.5722432814326637e-05, + "loss": 8.8753, + "step": 16111 + }, + { + "epoch": 0.6715851777749989, + "grad_norm": 290.0, + "learning_rate": 2.5716532116174652e-05, + "loss": 13.3756, + "step": 16112 + }, + { + "epoch": 0.6716268600725271, + "grad_norm": 122.5, + "learning_rate": 2.5710631860590996e-05, + "loss": 8.0629, + "step": 16113 + }, + { + "epoch": 0.6716685423700555, + "grad_norm": 532.0, + "learning_rate": 2.570473204768326e-05, + "loss": 17.7509, + "step": 16114 + }, + { + "epoch": 0.6717102246675837, + "grad_norm": 450.0, + "learning_rate": 2.5698832677558903e-05, + "loss": 16.2503, + "step": 16115 + }, + { + "epoch": 0.6717519069651119, + "grad_norm": 237.0, + "learning_rate": 2.569293375032552e-05, + "loss": 13.2508, + "step": 16116 + }, + { + "epoch": 0.6717935892626402, + "grad_norm": 342.0, + "learning_rate": 2.568703526609053e-05, + "loss": 14.8127, + "step": 16117 + }, + { + "epoch": 0.6718352715601684, + "grad_norm": 161.0, + "learning_rate": 2.5681137224961527e-05, + "loss": 10.6259, + "step": 16118 + }, + { + "epoch": 0.6718769538576966, + "grad_norm": 556.0, + "learning_rate": 2.5675239627045922e-05, + "loss": 17.8753, + "step": 16119 + }, + { + "epoch": 0.6719186361552248, + "grad_norm": 266.0, + "learning_rate": 2.566934247245124e-05, + "loss": 11.3128, + "step": 16120 + }, + { + "epoch": 0.6719603184527532, + "grad_norm": 213.0, + "learning_rate": 2.5663445761284965e-05, + "loss": 11.0626, + "step": 16121 + }, + { + "epoch": 0.6720020007502814, + "grad_norm": 1632.0, + "learning_rate": 2.5657549493654542e-05, + "loss": 30.8804, + "step": 16122 + }, + { + "epoch": 0.6720436830478096, + "grad_norm": 502.0, + "learning_rate": 2.565165366966745e-05, + "loss": 15.5662, + "step": 16123 + }, + { + "epoch": 0.6720853653453378, + "grad_norm": 264.0, + "learning_rate": 2.564575828943112e-05, + "loss": 12.1877, + "step": 16124 + }, + { + "epoch": 0.6721270476428661, + "grad_norm": 157.0, + "learning_rate": 2.5639863353053016e-05, + "loss": 10.4379, + "step": 16125 + }, + { + "epoch": 0.6721687299403943, + "grad_norm": 872.0, + "learning_rate": 2.5633968860640557e-05, + "loss": 24.6252, + "step": 16126 + }, + { + "epoch": 0.6722104122379225, + "grad_norm": 478.0, + "learning_rate": 2.5628074812301183e-05, + "loss": 16.8753, + "step": 16127 + }, + { + "epoch": 0.6722520945354508, + "grad_norm": 596.0, + "learning_rate": 2.5622181208142316e-05, + "loss": 19.2502, + "step": 16128 + }, + { + "epoch": 0.6722937768329791, + "grad_norm": 796.0, + "learning_rate": 2.5616288048271352e-05, + "loss": 22.0002, + "step": 16129 + }, + { + "epoch": 0.6723354591305073, + "grad_norm": 494.0, + "learning_rate": 2.56103953327957e-05, + "loss": 19.5004, + "step": 16130 + }, + { + "epoch": 0.6723771414280355, + "grad_norm": 548.0, + "learning_rate": 2.5604503061822767e-05, + "loss": 17.7507, + "step": 16131 + }, + { + "epoch": 0.6724188237255637, + "grad_norm": 214.0, + "learning_rate": 2.5598611235459926e-05, + "loss": 10.5626, + "step": 16132 + }, + { + "epoch": 0.672460506023092, + "grad_norm": 306.0, + "learning_rate": 2.5592719853814562e-05, + "loss": 13.5002, + "step": 16133 + }, + { + "epoch": 0.6725021883206203, + "grad_norm": 436.0, + "learning_rate": 2.5586828916994044e-05, + "loss": 17.1257, + "step": 16134 + }, + { + "epoch": 0.6725438706181485, + "grad_norm": 223.0, + "learning_rate": 2.5580938425105716e-05, + "loss": 11.5632, + "step": 16135 + }, + { + "epoch": 0.6725855529156767, + "grad_norm": 211.0, + "learning_rate": 2.5575048378256992e-05, + "loss": 13.6879, + "step": 16136 + }, + { + "epoch": 0.672627235213205, + "grad_norm": 73.0, + "learning_rate": 2.5569158776555148e-05, + "loss": 8.6879, + "step": 16137 + }, + { + "epoch": 0.6726689175107332, + "grad_norm": 964.0, + "learning_rate": 2.5563269620107588e-05, + "loss": 22.6297, + "step": 16138 + }, + { + "epoch": 0.6727105998082614, + "grad_norm": 366.0, + "learning_rate": 2.5557380909021572e-05, + "loss": 13.6257, + "step": 16139 + }, + { + "epoch": 0.6727522821057896, + "grad_norm": 366.0, + "learning_rate": 2.5551492643404494e-05, + "loss": 12.7537, + "step": 16140 + }, + { + "epoch": 0.672793964403318, + "grad_norm": 356.0, + "learning_rate": 2.5545604823363593e-05, + "loss": 15.3752, + "step": 16141 + }, + { + "epoch": 0.6728356467008462, + "grad_norm": 462.0, + "learning_rate": 2.5539717449006262e-05, + "loss": 16.3752, + "step": 16142 + }, + { + "epoch": 0.6728773289983744, + "grad_norm": 410.0, + "learning_rate": 2.5533830520439704e-05, + "loss": 13.1877, + "step": 16143 + }, + { + "epoch": 0.6729190112959026, + "grad_norm": 330.0, + "learning_rate": 2.55279440377713e-05, + "loss": 12.1877, + "step": 16144 + }, + { + "epoch": 0.6729606935934309, + "grad_norm": 278.0, + "learning_rate": 2.552205800110825e-05, + "loss": 12.5002, + "step": 16145 + }, + { + "epoch": 0.6730023758909591, + "grad_norm": 426.0, + "learning_rate": 2.551617241055788e-05, + "loss": 16.8751, + "step": 16146 + }, + { + "epoch": 0.6730440581884873, + "grad_norm": 254.0, + "learning_rate": 2.5510287266227446e-05, + "loss": 13.7502, + "step": 16147 + }, + { + "epoch": 0.6730857404860155, + "grad_norm": 217.0, + "learning_rate": 2.55044025682242e-05, + "loss": 11.1255, + "step": 16148 + }, + { + "epoch": 0.6731274227835439, + "grad_norm": 292.0, + "learning_rate": 2.549851831665539e-05, + "loss": 13.7502, + "step": 16149 + }, + { + "epoch": 0.6731691050810721, + "grad_norm": 1272.0, + "learning_rate": 2.549263451162826e-05, + "loss": 30.3755, + "step": 16150 + }, + { + "epoch": 0.6732107873786003, + "grad_norm": 354.0, + "learning_rate": 2.5486751153250043e-05, + "loss": 13.4378, + "step": 16151 + }, + { + "epoch": 0.6732524696761285, + "grad_norm": 428.0, + "learning_rate": 2.5480868241627964e-05, + "loss": 16.8751, + "step": 16152 + }, + { + "epoch": 0.6732941519736568, + "grad_norm": 462.0, + "learning_rate": 2.5474985776869232e-05, + "loss": 17.1252, + "step": 16153 + }, + { + "epoch": 0.673335834271185, + "grad_norm": 155.0, + "learning_rate": 2.5469103759081066e-05, + "loss": 9.4376, + "step": 16154 + }, + { + "epoch": 0.6733775165687133, + "grad_norm": 486.0, + "learning_rate": 2.546322218837066e-05, + "loss": 17.1252, + "step": 16155 + }, + { + "epoch": 0.6734191988662415, + "grad_norm": 83.0, + "learning_rate": 2.5457341064845207e-05, + "loss": 9.5627, + "step": 16156 + }, + { + "epoch": 0.6734608811637698, + "grad_norm": 120.0, + "learning_rate": 2.545146038861189e-05, + "loss": 9.2514, + "step": 16157 + }, + { + "epoch": 0.673502563461298, + "grad_norm": 398.0, + "learning_rate": 2.5445580159777893e-05, + "loss": 14.8129, + "step": 16158 + }, + { + "epoch": 0.6735442457588262, + "grad_norm": 276.0, + "learning_rate": 2.543970037845037e-05, + "loss": 12.7512, + "step": 16159 + }, + { + "epoch": 0.6735859280563544, + "grad_norm": 116.5, + "learning_rate": 2.5433821044736493e-05, + "loss": 9.0006, + "step": 16160 + }, + { + "epoch": 0.6736276103538827, + "grad_norm": 588.0, + "learning_rate": 2.542794215874339e-05, + "loss": 19.8752, + "step": 16161 + }, + { + "epoch": 0.673669292651411, + "grad_norm": 330.0, + "learning_rate": 2.5422063720578254e-05, + "loss": 13.6888, + "step": 16162 + }, + { + "epoch": 0.6737109749489392, + "grad_norm": 1032.0, + "learning_rate": 2.5416185730348153e-05, + "loss": 26.7504, + "step": 16163 + }, + { + "epoch": 0.6737526572464674, + "grad_norm": 376.0, + "learning_rate": 2.5410308188160293e-05, + "loss": 17.6256, + "step": 16164 + }, + { + "epoch": 0.6737943395439957, + "grad_norm": 404.0, + "learning_rate": 2.5404431094121707e-05, + "loss": 14.7505, + "step": 16165 + }, + { + "epoch": 0.6738360218415239, + "grad_norm": 201.0, + "learning_rate": 2.5398554448339586e-05, + "loss": 11.5627, + "step": 16166 + }, + { + "epoch": 0.6738777041390521, + "grad_norm": 732.0, + "learning_rate": 2.5392678250920952e-05, + "loss": 20.8754, + "step": 16167 + }, + { + "epoch": 0.6739193864365803, + "grad_norm": 482.0, + "learning_rate": 2.5386802501972983e-05, + "loss": 17.8769, + "step": 16168 + }, + { + "epoch": 0.6739610687341087, + "grad_norm": 360.0, + "learning_rate": 2.5380927201602688e-05, + "loss": 13.8754, + "step": 16169 + }, + { + "epoch": 0.6740027510316369, + "grad_norm": 338.0, + "learning_rate": 2.53750523499172e-05, + "loss": 12.752, + "step": 16170 + }, + { + "epoch": 0.6740444333291651, + "grad_norm": 306.0, + "learning_rate": 2.5369177947023565e-05, + "loss": 13.1253, + "step": 16171 + }, + { + "epoch": 0.6740861156266933, + "grad_norm": 676.0, + "learning_rate": 2.5363303993028854e-05, + "loss": 21.2502, + "step": 16172 + }, + { + "epoch": 0.6741277979242216, + "grad_norm": 227.0, + "learning_rate": 2.5357430488040107e-05, + "loss": 12.2502, + "step": 16173 + }, + { + "epoch": 0.6741694802217498, + "grad_norm": 228.0, + "learning_rate": 2.535155743216438e-05, + "loss": 11.6252, + "step": 16174 + }, + { + "epoch": 0.674211162519278, + "grad_norm": 312.0, + "learning_rate": 2.5345684825508708e-05, + "loss": 12.3751, + "step": 16175 + }, + { + "epoch": 0.6742528448168063, + "grad_norm": 106.0, + "learning_rate": 2.5339812668180118e-05, + "loss": 9.6252, + "step": 16176 + }, + { + "epoch": 0.6742945271143346, + "grad_norm": 1264.0, + "learning_rate": 2.533394096028563e-05, + "loss": 32.5006, + "step": 16177 + }, + { + "epoch": 0.6743362094118628, + "grad_norm": 426.0, + "learning_rate": 2.5328069701932257e-05, + "loss": 14.8127, + "step": 16178 + }, + { + "epoch": 0.674377891709391, + "grad_norm": 336.0, + "learning_rate": 2.5322198893227005e-05, + "loss": 13.5002, + "step": 16179 + }, + { + "epoch": 0.6744195740069192, + "grad_norm": 852.0, + "learning_rate": 2.5316328534276867e-05, + "loss": 23.0006, + "step": 16180 + }, + { + "epoch": 0.6744612563044475, + "grad_norm": 508.0, + "learning_rate": 2.5310458625188838e-05, + "loss": 17.1253, + "step": 16181 + }, + { + "epoch": 0.6745029386019757, + "grad_norm": 800.0, + "learning_rate": 2.530458916606989e-05, + "loss": 19.6283, + "step": 16182 + }, + { + "epoch": 0.674544620899504, + "grad_norm": 103.5, + "learning_rate": 2.5298720157027e-05, + "loss": 10.7504, + "step": 16183 + }, + { + "epoch": 0.6745863031970322, + "grad_norm": 318.0, + "learning_rate": 2.529285159816712e-05, + "loss": 14.3777, + "step": 16184 + }, + { + "epoch": 0.6746279854945605, + "grad_norm": 330.0, + "learning_rate": 2.52869834895972e-05, + "loss": 14.0004, + "step": 16185 + }, + { + "epoch": 0.6746696677920887, + "grad_norm": 187.0, + "learning_rate": 2.5281115831424236e-05, + "loss": 10.5628, + "step": 16186 + }, + { + "epoch": 0.6747113500896169, + "grad_norm": 362.0, + "learning_rate": 2.527524862375509e-05, + "loss": 15.0005, + "step": 16187 + }, + { + "epoch": 0.6747530323871451, + "grad_norm": 432.0, + "learning_rate": 2.5269381866696773e-05, + "loss": 14.6254, + "step": 16188 + }, + { + "epoch": 0.6747947146846734, + "grad_norm": 282.0, + "learning_rate": 2.526351556035613e-05, + "loss": 12.6252, + "step": 16189 + }, + { + "epoch": 0.6748363969822017, + "grad_norm": 420.0, + "learning_rate": 2.525764970484015e-05, + "loss": 17.1259, + "step": 16190 + }, + { + "epoch": 0.6748780792797299, + "grad_norm": 72.0, + "learning_rate": 2.525178430025566e-05, + "loss": 8.5005, + "step": 16191 + }, + { + "epoch": 0.6749197615772582, + "grad_norm": 864.0, + "learning_rate": 2.5245919346709634e-05, + "loss": 24.6252, + "step": 16192 + }, + { + "epoch": 0.6749614438747864, + "grad_norm": 536.0, + "learning_rate": 2.5240054844308893e-05, + "loss": 18.0011, + "step": 16193 + }, + { + "epoch": 0.6750031261723146, + "grad_norm": 712.0, + "learning_rate": 2.5234190793160384e-05, + "loss": 21.1257, + "step": 16194 + }, + { + "epoch": 0.6750448084698428, + "grad_norm": 330.0, + "learning_rate": 2.5228327193370914e-05, + "loss": 13.0626, + "step": 16195 + }, + { + "epoch": 0.6750864907673712, + "grad_norm": 284.0, + "learning_rate": 2.5222464045047393e-05, + "loss": 11.9381, + "step": 16196 + }, + { + "epoch": 0.6751281730648994, + "grad_norm": 203.0, + "learning_rate": 2.521660134829667e-05, + "loss": 11.8133, + "step": 16197 + }, + { + "epoch": 0.6751698553624276, + "grad_norm": 216.0, + "learning_rate": 2.5210739103225583e-05, + "loss": 10.5627, + "step": 16198 + }, + { + "epoch": 0.6752115376599558, + "grad_norm": 262.0, + "learning_rate": 2.5204877309940977e-05, + "loss": 12.3127, + "step": 16199 + }, + { + "epoch": 0.6752532199574841, + "grad_norm": 270.0, + "learning_rate": 2.5199015968549688e-05, + "loss": 13.0003, + "step": 16200 + }, + { + "epoch": 0.6752949022550123, + "grad_norm": 214.0, + "learning_rate": 2.5193155079158527e-05, + "loss": 9.7502, + "step": 16201 + }, + { + "epoch": 0.6753365845525405, + "grad_norm": 96.0, + "learning_rate": 2.518729464187432e-05, + "loss": 9.5627, + "step": 16202 + }, + { + "epoch": 0.6753782668500687, + "grad_norm": 260.0, + "learning_rate": 2.518143465680387e-05, + "loss": 12.9377, + "step": 16203 + }, + { + "epoch": 0.6754199491475971, + "grad_norm": 372.0, + "learning_rate": 2.517557512405398e-05, + "loss": 15.2503, + "step": 16204 + }, + { + "epoch": 0.6754616314451253, + "grad_norm": 1576.0, + "learning_rate": 2.5169716043731435e-05, + "loss": 35.7502, + "step": 16205 + }, + { + "epoch": 0.6755033137426535, + "grad_norm": 136.0, + "learning_rate": 2.516385741594302e-05, + "loss": 7.5005, + "step": 16206 + }, + { + "epoch": 0.6755449960401817, + "grad_norm": 141.0, + "learning_rate": 2.5157999240795505e-05, + "loss": 6.2191, + "step": 16207 + }, + { + "epoch": 0.67558667833771, + "grad_norm": 67.5, + "learning_rate": 2.5152141518395668e-05, + "loss": 7.1573, + "step": 16208 + }, + { + "epoch": 0.6756283606352382, + "grad_norm": 864.0, + "learning_rate": 2.514628424885025e-05, + "loss": 25.0007, + "step": 16209 + }, + { + "epoch": 0.6756700429327664, + "grad_norm": 53.0, + "learning_rate": 2.5140427432266007e-05, + "loss": 7.6259, + "step": 16210 + }, + { + "epoch": 0.6757117252302947, + "grad_norm": 406.0, + "learning_rate": 2.5134571068749662e-05, + "loss": 15.2502, + "step": 16211 + }, + { + "epoch": 0.675753407527823, + "grad_norm": 300.0, + "learning_rate": 2.5128715158408012e-05, + "loss": 9.7518, + "step": 16212 + }, + { + "epoch": 0.6757950898253512, + "grad_norm": 179.0, + "learning_rate": 2.512285970134769e-05, + "loss": 9.1887, + "step": 16213 + }, + { + "epoch": 0.6758367721228794, + "grad_norm": 428.0, + "learning_rate": 2.51170046976755e-05, + "loss": 16.5002, + "step": 16214 + }, + { + "epoch": 0.6758784544204076, + "grad_norm": 148.0, + "learning_rate": 2.511115014749807e-05, + "loss": 9.063, + "step": 16215 + }, + { + "epoch": 0.6759201367179359, + "grad_norm": 194.0, + "learning_rate": 2.5105296050922178e-05, + "loss": 11.5633, + "step": 16216 + }, + { + "epoch": 0.6759618190154641, + "grad_norm": 186.0, + "learning_rate": 2.509944240805443e-05, + "loss": 10.7504, + "step": 16217 + }, + { + "epoch": 0.6760035013129924, + "grad_norm": 454.0, + "learning_rate": 2.50935892190016e-05, + "loss": 16.6259, + "step": 16218 + }, + { + "epoch": 0.6760451836105206, + "grad_norm": 1336.0, + "learning_rate": 2.5087736483870274e-05, + "loss": 32.0003, + "step": 16219 + }, + { + "epoch": 0.6760868659080489, + "grad_norm": 474.0, + "learning_rate": 2.5081884202767186e-05, + "loss": 17.5005, + "step": 16220 + }, + { + "epoch": 0.6761285482055771, + "grad_norm": 140.0, + "learning_rate": 2.507603237579897e-05, + "loss": 11.688, + "step": 16221 + }, + { + "epoch": 0.6761702305031053, + "grad_norm": 270.0, + "learning_rate": 2.5070181003072274e-05, + "loss": 13.5013, + "step": 16222 + }, + { + "epoch": 0.6762119128006335, + "grad_norm": 176.0, + "learning_rate": 2.5064330084693742e-05, + "loss": 9.8753, + "step": 16223 + }, + { + "epoch": 0.6762535950981619, + "grad_norm": 164.0, + "learning_rate": 2.5058479620770013e-05, + "loss": 10.6877, + "step": 16224 + }, + { + "epoch": 0.6762952773956901, + "grad_norm": 332.0, + "learning_rate": 2.50526296114077e-05, + "loss": 14.3131, + "step": 16225 + }, + { + "epoch": 0.6763369596932183, + "grad_norm": 644.0, + "learning_rate": 2.504678005671343e-05, + "loss": 19.5003, + "step": 16226 + }, + { + "epoch": 0.6763786419907465, + "grad_norm": 89.5, + "learning_rate": 2.5040930956793806e-05, + "loss": 5.7825, + "step": 16227 + }, + { + "epoch": 0.6764203242882748, + "grad_norm": 251.0, + "learning_rate": 2.503508231175544e-05, + "loss": 11.9376, + "step": 16228 + }, + { + "epoch": 0.676462006585803, + "grad_norm": 135.0, + "learning_rate": 2.502923412170491e-05, + "loss": 10.1253, + "step": 16229 + }, + { + "epoch": 0.6765036888833312, + "grad_norm": 560.0, + "learning_rate": 2.5023386386748803e-05, + "loss": 15.8168, + "step": 16230 + }, + { + "epoch": 0.6765453711808594, + "grad_norm": 216.0, + "learning_rate": 2.5017539106993694e-05, + "loss": 12.4377, + "step": 16231 + }, + { + "epoch": 0.6765870534783878, + "grad_norm": 205.0, + "learning_rate": 2.5011692282546157e-05, + "loss": 10.5007, + "step": 16232 + }, + { + "epoch": 0.676628735775916, + "grad_norm": 286.0, + "learning_rate": 2.500584591351275e-05, + "loss": 15.8135, + "step": 16233 + }, + { + "epoch": 0.6766704180734442, + "grad_norm": 122.5, + "learning_rate": 2.500000000000001e-05, + "loss": 7.4377, + "step": 16234 + }, + { + "epoch": 0.6767121003709724, + "grad_norm": 736.0, + "learning_rate": 2.499415454211448e-05, + "loss": 19.6291, + "step": 16235 + }, + { + "epoch": 0.6767537826685007, + "grad_norm": 162.0, + "learning_rate": 2.4988309539962735e-05, + "loss": 11.4378, + "step": 16236 + }, + { + "epoch": 0.6767954649660289, + "grad_norm": 868.0, + "learning_rate": 2.4982464993651235e-05, + "loss": 24.6253, + "step": 16237 + }, + { + "epoch": 0.6768371472635571, + "grad_norm": 368.0, + "learning_rate": 2.4976620903286562e-05, + "loss": 14.7502, + "step": 16238 + }, + { + "epoch": 0.6768788295610854, + "grad_norm": 62.5, + "learning_rate": 2.4970777268975165e-05, + "loss": 8.3757, + "step": 16239 + }, + { + "epoch": 0.6769205118586137, + "grad_norm": 229.0, + "learning_rate": 2.4964934090823604e-05, + "loss": 13.0007, + "step": 16240 + }, + { + "epoch": 0.6769621941561419, + "grad_norm": 414.0, + "learning_rate": 2.4959091368938307e-05, + "loss": 15.0007, + "step": 16241 + }, + { + "epoch": 0.6770038764536701, + "grad_norm": 398.0, + "learning_rate": 2.4953249103425834e-05, + "loss": 17.7508, + "step": 16242 + }, + { + "epoch": 0.6770455587511983, + "grad_norm": 138.0, + "learning_rate": 2.4947407294392583e-05, + "loss": 11.2507, + "step": 16243 + }, + { + "epoch": 0.6770872410487266, + "grad_norm": 494.0, + "learning_rate": 2.4941565941945096e-05, + "loss": 18.1257, + "step": 16244 + }, + { + "epoch": 0.6771289233462549, + "grad_norm": 564.0, + "learning_rate": 2.493572504618975e-05, + "loss": 18.7508, + "step": 16245 + }, + { + "epoch": 0.6771706056437831, + "grad_norm": 231.0, + "learning_rate": 2.492988460723306e-05, + "loss": 12.0627, + "step": 16246 + }, + { + "epoch": 0.6772122879413113, + "grad_norm": 516.0, + "learning_rate": 2.4924044625181453e-05, + "loss": 18.1254, + "step": 16247 + }, + { + "epoch": 0.6772539702388396, + "grad_norm": 264.0, + "learning_rate": 2.4918205100141356e-05, + "loss": 12.7504, + "step": 16248 + }, + { + "epoch": 0.6772956525363678, + "grad_norm": 406.0, + "learning_rate": 2.4912366032219198e-05, + "loss": 13.3755, + "step": 16249 + }, + { + "epoch": 0.677337334833896, + "grad_norm": 212.0, + "learning_rate": 2.4906527421521396e-05, + "loss": 10.6253, + "step": 16250 + }, + { + "epoch": 0.6773790171314242, + "grad_norm": 322.0, + "learning_rate": 2.4900689268154358e-05, + "loss": 12.2503, + "step": 16251 + }, + { + "epoch": 0.6774206994289526, + "grad_norm": 324.0, + "learning_rate": 2.4894851572224485e-05, + "loss": 13.8756, + "step": 16252 + }, + { + "epoch": 0.6774623817264808, + "grad_norm": 220.0, + "learning_rate": 2.488901433383818e-05, + "loss": 11.5627, + "step": 16253 + }, + { + "epoch": 0.677504064024009, + "grad_norm": 207.0, + "learning_rate": 2.488317755310181e-05, + "loss": 12.8751, + "step": 16254 + }, + { + "epoch": 0.6775457463215372, + "grad_norm": 1184.0, + "learning_rate": 2.487734123012176e-05, + "loss": 30.7502, + "step": 16255 + }, + { + "epoch": 0.6775874286190655, + "grad_norm": 434.0, + "learning_rate": 2.4871505365004394e-05, + "loss": 17.1252, + "step": 16256 + }, + { + "epoch": 0.6776291109165937, + "grad_norm": 636.0, + "learning_rate": 2.4865669957856075e-05, + "loss": 19.2546, + "step": 16257 + }, + { + "epoch": 0.6776707932141219, + "grad_norm": 482.0, + "learning_rate": 2.4859835008783157e-05, + "loss": 17.2502, + "step": 16258 + }, + { + "epoch": 0.6777124755116501, + "grad_norm": 66.0, + "learning_rate": 2.4854000517891973e-05, + "loss": 8.8753, + "step": 16259 + }, + { + "epoch": 0.6777541578091785, + "grad_norm": 274.0, + "learning_rate": 2.484816648528886e-05, + "loss": 12.8755, + "step": 16260 + }, + { + "epoch": 0.6777958401067067, + "grad_norm": 157.0, + "learning_rate": 2.484233291108013e-05, + "loss": 9.8752, + "step": 16261 + }, + { + "epoch": 0.6778375224042349, + "grad_norm": 776.0, + "learning_rate": 2.4836499795372153e-05, + "loss": 22.6253, + "step": 16262 + }, + { + "epoch": 0.6778792047017632, + "grad_norm": 330.0, + "learning_rate": 2.483066713827116e-05, + "loss": 14.4396, + "step": 16263 + }, + { + "epoch": 0.6779208869992914, + "grad_norm": 118.5, + "learning_rate": 2.482483493988354e-05, + "loss": 8.3753, + "step": 16264 + }, + { + "epoch": 0.6779625692968196, + "grad_norm": 53.0, + "learning_rate": 2.4819003200315495e-05, + "loss": 7.6251, + "step": 16265 + }, + { + "epoch": 0.6780042515943479, + "grad_norm": 242.0, + "learning_rate": 2.481317191967339e-05, + "loss": 12.3127, + "step": 16266 + }, + { + "epoch": 0.6780459338918762, + "grad_norm": 422.0, + "learning_rate": 2.4807341098063418e-05, + "loss": 13.8751, + "step": 16267 + }, + { + "epoch": 0.6780876161894044, + "grad_norm": 366.0, + "learning_rate": 2.4801510735591936e-05, + "loss": 14.7503, + "step": 16268 + }, + { + "epoch": 0.6781292984869326, + "grad_norm": 191.0, + "learning_rate": 2.479568083236512e-05, + "loss": 9.6255, + "step": 16269 + }, + { + "epoch": 0.6781709807844608, + "grad_norm": 131.0, + "learning_rate": 2.4789851388489277e-05, + "loss": 9.5634, + "step": 16270 + }, + { + "epoch": 0.6782126630819891, + "grad_norm": 744.0, + "learning_rate": 2.478402240407063e-05, + "loss": 20.2502, + "step": 16271 + }, + { + "epoch": 0.6782543453795173, + "grad_norm": 322.0, + "learning_rate": 2.4778193879215412e-05, + "loss": 12.6879, + "step": 16272 + }, + { + "epoch": 0.6782960276770456, + "grad_norm": 572.0, + "learning_rate": 2.477236581402985e-05, + "loss": 18.1254, + "step": 16273 + }, + { + "epoch": 0.6783377099745738, + "grad_norm": 310.0, + "learning_rate": 2.476653820862016e-05, + "loss": 13.813, + "step": 16274 + }, + { + "epoch": 0.6783793922721021, + "grad_norm": 184.0, + "learning_rate": 2.4760711063092552e-05, + "loss": 11.2505, + "step": 16275 + }, + { + "epoch": 0.6784210745696303, + "grad_norm": 304.0, + "learning_rate": 2.475488437755323e-05, + "loss": 13.5003, + "step": 16276 + }, + { + "epoch": 0.6784627568671585, + "grad_norm": 310.0, + "learning_rate": 2.4749058152108373e-05, + "loss": 13.5003, + "step": 16277 + }, + { + "epoch": 0.6785044391646867, + "grad_norm": 236.0, + "learning_rate": 2.4743232386864174e-05, + "loss": 11.8127, + "step": 16278 + }, + { + "epoch": 0.678546121462215, + "grad_norm": 224.0, + "learning_rate": 2.4737407081926813e-05, + "loss": 12.0662, + "step": 16279 + }, + { + "epoch": 0.6785878037597433, + "grad_norm": 64.5, + "learning_rate": 2.4731582237402447e-05, + "loss": 6.5941, + "step": 16280 + }, + { + "epoch": 0.6786294860572715, + "grad_norm": 211.0, + "learning_rate": 2.4725757853397236e-05, + "loss": 10.5632, + "step": 16281 + }, + { + "epoch": 0.6786711683547997, + "grad_norm": 588.0, + "learning_rate": 2.471993393001733e-05, + "loss": 18.6252, + "step": 16282 + }, + { + "epoch": 0.678712850652328, + "grad_norm": 1112.0, + "learning_rate": 2.471411046736888e-05, + "loss": 27.1253, + "step": 16283 + }, + { + "epoch": 0.6787545329498562, + "grad_norm": 442.0, + "learning_rate": 2.4708287465558005e-05, + "loss": 16.1253, + "step": 16284 + }, + { + "epoch": 0.6787962152473844, + "grad_norm": 156.0, + "learning_rate": 2.4702464924690836e-05, + "loss": 11.0626, + "step": 16285 + }, + { + "epoch": 0.6788378975449126, + "grad_norm": 1080.0, + "learning_rate": 2.4696642844873496e-05, + "loss": 24.5014, + "step": 16286 + }, + { + "epoch": 0.678879579842441, + "grad_norm": 732.0, + "learning_rate": 2.4690821226212063e-05, + "loss": 22.8753, + "step": 16287 + }, + { + "epoch": 0.6789212621399692, + "grad_norm": 210.0, + "learning_rate": 2.4685000068812697e-05, + "loss": 11.1877, + "step": 16288 + }, + { + "epoch": 0.6789629444374974, + "grad_norm": 700.0, + "learning_rate": 2.4679179372781414e-05, + "loss": 20.1256, + "step": 16289 + }, + { + "epoch": 0.6790046267350256, + "grad_norm": 752.0, + "learning_rate": 2.4673359138224377e-05, + "loss": 21.6253, + "step": 16290 + }, + { + "epoch": 0.6790463090325539, + "grad_norm": 848.0, + "learning_rate": 2.4667539365247577e-05, + "loss": 21.7509, + "step": 16291 + }, + { + "epoch": 0.6790879913300821, + "grad_norm": 494.0, + "learning_rate": 2.4661720053957154e-05, + "loss": 16.2503, + "step": 16292 + }, + { + "epoch": 0.6791296736276103, + "grad_norm": 190.0, + "learning_rate": 2.46559012044591e-05, + "loss": 10.5007, + "step": 16293 + }, + { + "epoch": 0.6791713559251386, + "grad_norm": 852.0, + "learning_rate": 2.4650082816859537e-05, + "loss": 23.6252, + "step": 16294 + }, + { + "epoch": 0.6792130382226669, + "grad_norm": 241.0, + "learning_rate": 2.4644264891264424e-05, + "loss": 12.2503, + "step": 16295 + }, + { + "epoch": 0.6792547205201951, + "grad_norm": 264.0, + "learning_rate": 2.4638447427779848e-05, + "loss": 12.8764, + "step": 16296 + }, + { + "epoch": 0.6792964028177233, + "grad_norm": 210.0, + "learning_rate": 2.4632630426511827e-05, + "loss": 11.8752, + "step": 16297 + }, + { + "epoch": 0.6793380851152515, + "grad_norm": 194.0, + "learning_rate": 2.4626813887566362e-05, + "loss": 10.6878, + "step": 16298 + }, + { + "epoch": 0.6793797674127798, + "grad_norm": 1216.0, + "learning_rate": 2.4620997811049472e-05, + "loss": 32.7521, + "step": 16299 + }, + { + "epoch": 0.679421449710308, + "grad_norm": 260.0, + "learning_rate": 2.461518219706715e-05, + "loss": 13.0011, + "step": 16300 + }, + { + "epoch": 0.6794631320078363, + "grad_norm": 420.0, + "learning_rate": 2.4609367045725378e-05, + "loss": 15.5005, + "step": 16301 + }, + { + "epoch": 0.6795048143053645, + "grad_norm": 480.0, + "learning_rate": 2.4603552357130156e-05, + "loss": 17.5006, + "step": 16302 + }, + { + "epoch": 0.6795464966028928, + "grad_norm": 1040.0, + "learning_rate": 2.4597738131387438e-05, + "loss": 24.7555, + "step": 16303 + }, + { + "epoch": 0.679588178900421, + "grad_norm": 227.0, + "learning_rate": 2.4591924368603202e-05, + "loss": 10.1268, + "step": 16304 + }, + { + "epoch": 0.6796298611979492, + "grad_norm": 532.0, + "learning_rate": 2.45861110688834e-05, + "loss": 17.2502, + "step": 16305 + }, + { + "epoch": 0.6796715434954774, + "grad_norm": 408.0, + "learning_rate": 2.4580298232333982e-05, + "loss": 16.3758, + "step": 16306 + }, + { + "epoch": 0.6797132257930057, + "grad_norm": 206.0, + "learning_rate": 2.4574485859060882e-05, + "loss": 11.5002, + "step": 16307 + }, + { + "epoch": 0.679754908090534, + "grad_norm": 472.0, + "learning_rate": 2.4568673949170034e-05, + "loss": 17.2505, + "step": 16308 + }, + { + "epoch": 0.6797965903880622, + "grad_norm": 111.0, + "learning_rate": 2.4562862502767357e-05, + "loss": 6.7501, + "step": 16309 + }, + { + "epoch": 0.6798382726855904, + "grad_norm": 320.0, + "learning_rate": 2.455705151995878e-05, + "loss": 14.4378, + "step": 16310 + }, + { + "epoch": 0.6798799549831187, + "grad_norm": 314.0, + "learning_rate": 2.4551241000850165e-05, + "loss": 13.5002, + "step": 16311 + }, + { + "epoch": 0.6799216372806469, + "grad_norm": 189.0, + "learning_rate": 2.4545430945547493e-05, + "loss": 10.5024, + "step": 16312 + }, + { + "epoch": 0.6799633195781751, + "grad_norm": 346.0, + "learning_rate": 2.4539621354156556e-05, + "loss": 14.9377, + "step": 16313 + }, + { + "epoch": 0.6800050018757033, + "grad_norm": 502.0, + "learning_rate": 2.453381222678332e-05, + "loss": 17.5001, + "step": 16314 + }, + { + "epoch": 0.6800466841732317, + "grad_norm": 239.0, + "learning_rate": 2.452800356353358e-05, + "loss": 12.1258, + "step": 16315 + }, + { + "epoch": 0.6800883664707599, + "grad_norm": 720.0, + "learning_rate": 2.4522195364513273e-05, + "loss": 22.7502, + "step": 16316 + }, + { + "epoch": 0.6801300487682881, + "grad_norm": 354.0, + "learning_rate": 2.451638762982818e-05, + "loss": 14.6253, + "step": 16317 + }, + { + "epoch": 0.6801717310658163, + "grad_norm": 234.0, + "learning_rate": 2.4510580359584223e-05, + "loss": 10.5641, + "step": 16318 + }, + { + "epoch": 0.6802134133633446, + "grad_norm": 552.0, + "learning_rate": 2.4504773553887162e-05, + "loss": 17.6269, + "step": 16319 + }, + { + "epoch": 0.6802550956608728, + "grad_norm": 378.0, + "learning_rate": 2.4498967212842903e-05, + "loss": 16.0004, + "step": 16320 + }, + { + "epoch": 0.680296777958401, + "grad_norm": 147.0, + "learning_rate": 2.4493161336557196e-05, + "loss": 10.7503, + "step": 16321 + }, + { + "epoch": 0.6803384602559293, + "grad_norm": 222.0, + "learning_rate": 2.4487355925135903e-05, + "loss": 12.0003, + "step": 16322 + }, + { + "epoch": 0.6803801425534576, + "grad_norm": 520.0, + "learning_rate": 2.4481550978684814e-05, + "loss": 17.1254, + "step": 16323 + }, + { + "epoch": 0.6804218248509858, + "grad_norm": 194.0, + "learning_rate": 2.4475746497309726e-05, + "loss": 11.0007, + "step": 16324 + }, + { + "epoch": 0.680463507148514, + "grad_norm": 422.0, + "learning_rate": 2.4469942481116424e-05, + "loss": 15.5003, + "step": 16325 + }, + { + "epoch": 0.6805051894460422, + "grad_norm": 612.0, + "learning_rate": 2.4464138930210684e-05, + "loss": 19.0001, + "step": 16326 + }, + { + "epoch": 0.6805468717435705, + "grad_norm": 193.0, + "learning_rate": 2.4458335844698282e-05, + "loss": 11.8127, + "step": 16327 + }, + { + "epoch": 0.6805885540410987, + "grad_norm": 422.0, + "learning_rate": 2.4452533224684976e-05, + "loss": 16.8753, + "step": 16328 + }, + { + "epoch": 0.680630236338627, + "grad_norm": 716.0, + "learning_rate": 2.4446731070276517e-05, + "loss": 18.1278, + "step": 16329 + }, + { + "epoch": 0.6806719186361552, + "grad_norm": 235.0, + "learning_rate": 2.4440929381578654e-05, + "loss": 11.6252, + "step": 16330 + }, + { + "epoch": 0.6807136009336835, + "grad_norm": 62.25, + "learning_rate": 2.4435128158697124e-05, + "loss": 8.6881, + "step": 16331 + }, + { + "epoch": 0.6807552832312117, + "grad_norm": 474.0, + "learning_rate": 2.442932740173765e-05, + "loss": 18.0018, + "step": 16332 + }, + { + "epoch": 0.6807969655287399, + "grad_norm": 124.5, + "learning_rate": 2.4423527110805955e-05, + "loss": 9.6878, + "step": 16333 + }, + { + "epoch": 0.6808386478262681, + "grad_norm": 776.0, + "learning_rate": 2.441772728600775e-05, + "loss": 21.8752, + "step": 16334 + }, + { + "epoch": 0.6808803301237965, + "grad_norm": 316.0, + "learning_rate": 2.4411927927448737e-05, + "loss": 13.3761, + "step": 16335 + }, + { + "epoch": 0.6809220124213247, + "grad_norm": 504.0, + "learning_rate": 2.4406129035234608e-05, + "loss": 18.2502, + "step": 16336 + }, + { + "epoch": 0.6809636947188529, + "grad_norm": 194.0, + "learning_rate": 2.4400330609471027e-05, + "loss": 11.3127, + "step": 16337 + }, + { + "epoch": 0.6810053770163812, + "grad_norm": 102.5, + "learning_rate": 2.4394532650263733e-05, + "loss": 8.8127, + "step": 16338 + }, + { + "epoch": 0.6810470593139094, + "grad_norm": 312.0, + "learning_rate": 2.4388735157718322e-05, + "loss": 15.5031, + "step": 16339 + }, + { + "epoch": 0.6810887416114376, + "grad_norm": 173.0, + "learning_rate": 2.438293813194052e-05, + "loss": 11.3143, + "step": 16340 + }, + { + "epoch": 0.6811304239089658, + "grad_norm": 222.0, + "learning_rate": 2.4377141573035905e-05, + "loss": 10.6255, + "step": 16341 + }, + { + "epoch": 0.6811721062064942, + "grad_norm": 236.0, + "learning_rate": 2.4371345481110202e-05, + "loss": 11.7504, + "step": 16342 + }, + { + "epoch": 0.6812137885040224, + "grad_norm": 138.0, + "learning_rate": 2.436554985626896e-05, + "loss": 10.3131, + "step": 16343 + }, + { + "epoch": 0.6812554708015506, + "grad_norm": 484.0, + "learning_rate": 2.435975469861789e-05, + "loss": 13.5626, + "step": 16344 + }, + { + "epoch": 0.6812971530990788, + "grad_norm": 139.0, + "learning_rate": 2.4353960008262523e-05, + "loss": 11.3756, + "step": 16345 + }, + { + "epoch": 0.6813388353966071, + "grad_norm": 197.0, + "learning_rate": 2.4348165785308534e-05, + "loss": 11.4377, + "step": 16346 + }, + { + "epoch": 0.6813805176941353, + "grad_norm": 137.0, + "learning_rate": 2.43423720298615e-05, + "loss": 8.5006, + "step": 16347 + }, + { + "epoch": 0.6814221999916635, + "grad_norm": 100.5, + "learning_rate": 2.4336578742027018e-05, + "loss": 9.7511, + "step": 16348 + }, + { + "epoch": 0.6814638822891917, + "grad_norm": 364.0, + "learning_rate": 2.433078592191066e-05, + "loss": 15.0633, + "step": 16349 + }, + { + "epoch": 0.6815055645867201, + "grad_norm": 394.0, + "learning_rate": 2.432499356961801e-05, + "loss": 14.438, + "step": 16350 + }, + { + "epoch": 0.6815472468842483, + "grad_norm": 92.5, + "learning_rate": 2.4319201685254633e-05, + "loss": 9.8134, + "step": 16351 + }, + { + "epoch": 0.6815889291817765, + "grad_norm": 238.0, + "learning_rate": 2.431341026892608e-05, + "loss": 12.1254, + "step": 16352 + }, + { + "epoch": 0.6816306114793047, + "grad_norm": 392.0, + "learning_rate": 2.4307619320737912e-05, + "loss": 15.2503, + "step": 16353 + }, + { + "epoch": 0.681672293776833, + "grad_norm": 776.0, + "learning_rate": 2.4301828840795652e-05, + "loss": 19.876, + "step": 16354 + }, + { + "epoch": 0.6817139760743612, + "grad_norm": 476.0, + "learning_rate": 2.4296038829204855e-05, + "loss": 16.1261, + "step": 16355 + }, + { + "epoch": 0.6817556583718895, + "grad_norm": 214.0, + "learning_rate": 2.4290249286071026e-05, + "loss": 11.8129, + "step": 16356 + }, + { + "epoch": 0.6817973406694177, + "grad_norm": 478.0, + "learning_rate": 2.4284460211499687e-05, + "loss": 15.8755, + "step": 16357 + }, + { + "epoch": 0.681839022966946, + "grad_norm": 644.0, + "learning_rate": 2.4278671605596342e-05, + "loss": 19.0004, + "step": 16358 + }, + { + "epoch": 0.6818807052644742, + "grad_norm": 1128.0, + "learning_rate": 2.4272883468466496e-05, + "loss": 25.1263, + "step": 16359 + }, + { + "epoch": 0.6819223875620024, + "grad_norm": 181.0, + "learning_rate": 2.4267095800215627e-05, + "loss": 10.1253, + "step": 16360 + }, + { + "epoch": 0.6819640698595306, + "grad_norm": 214.0, + "learning_rate": 2.42613086009492e-05, + "loss": 9.8128, + "step": 16361 + }, + { + "epoch": 0.6820057521570589, + "grad_norm": 268.0, + "learning_rate": 2.4255521870772752e-05, + "loss": 13.0004, + "step": 16362 + }, + { + "epoch": 0.6820474344545872, + "grad_norm": 75.5, + "learning_rate": 2.424973560979166e-05, + "loss": 8.2505, + "step": 16363 + }, + { + "epoch": 0.6820891167521154, + "grad_norm": 201.0, + "learning_rate": 2.4243949818111465e-05, + "loss": 11.7501, + "step": 16364 + }, + { + "epoch": 0.6821307990496436, + "grad_norm": 215.0, + "learning_rate": 2.4238164495837535e-05, + "loss": 11.6878, + "step": 16365 + }, + { + "epoch": 0.6821724813471719, + "grad_norm": 700.0, + "learning_rate": 2.423237964307538e-05, + "loss": 22.7502, + "step": 16366 + }, + { + "epoch": 0.6822141636447001, + "grad_norm": 209.0, + "learning_rate": 2.4226595259930358e-05, + "loss": 12.0628, + "step": 16367 + }, + { + "epoch": 0.6822558459422283, + "grad_norm": 450.0, + "learning_rate": 2.4220811346507955e-05, + "loss": 16.3753, + "step": 16368 + }, + { + "epoch": 0.6822975282397565, + "grad_norm": 198.0, + "learning_rate": 2.421502790291352e-05, + "loss": 11.1252, + "step": 16369 + }, + { + "epoch": 0.6823392105372849, + "grad_norm": 143.0, + "learning_rate": 2.4209244929252534e-05, + "loss": 9.6254, + "step": 16370 + }, + { + "epoch": 0.6823808928348131, + "grad_norm": 206.0, + "learning_rate": 2.42034624256303e-05, + "loss": 11.2502, + "step": 16371 + }, + { + "epoch": 0.6824225751323413, + "grad_norm": 292.0, + "learning_rate": 2.4197680392152283e-05, + "loss": 13.564, + "step": 16372 + }, + { + "epoch": 0.6824642574298695, + "grad_norm": 213.0, + "learning_rate": 2.4191898828923826e-05, + "loss": 12.1253, + "step": 16373 + }, + { + "epoch": 0.6825059397273978, + "grad_norm": 488.0, + "learning_rate": 2.4186117736050308e-05, + "loss": 17.3752, + "step": 16374 + }, + { + "epoch": 0.682547622024926, + "grad_norm": 704.0, + "learning_rate": 2.418033711363708e-05, + "loss": 19.6303, + "step": 16375 + }, + { + "epoch": 0.6825893043224542, + "grad_norm": 780.0, + "learning_rate": 2.4174556961789503e-05, + "loss": 22.5022, + "step": 16376 + }, + { + "epoch": 0.6826309866199824, + "grad_norm": 239.0, + "learning_rate": 2.4168777280612913e-05, + "loss": 11.9379, + "step": 16377 + }, + { + "epoch": 0.6826726689175108, + "grad_norm": 187.0, + "learning_rate": 2.4162998070212655e-05, + "loss": 10.2502, + "step": 16378 + }, + { + "epoch": 0.682714351215039, + "grad_norm": 200.0, + "learning_rate": 2.4157219330694055e-05, + "loss": 11.0628, + "step": 16379 + }, + { + "epoch": 0.6827560335125672, + "grad_norm": 952.0, + "learning_rate": 2.4151441062162423e-05, + "loss": 21.6302, + "step": 16380 + }, + { + "epoch": 0.6827977158100954, + "grad_norm": 157.0, + "learning_rate": 2.414566326472307e-05, + "loss": 10.0018, + "step": 16381 + }, + { + "epoch": 0.6828393981076237, + "grad_norm": 278.0, + "learning_rate": 2.41398859384813e-05, + "loss": 11.1254, + "step": 16382 + }, + { + "epoch": 0.6828810804051519, + "grad_norm": 302.0, + "learning_rate": 2.4134109083542407e-05, + "loss": 13.9381, + "step": 16383 + }, + { + "epoch": 0.6829227627026802, + "grad_norm": 262.0, + "learning_rate": 2.4128332700011668e-05, + "loss": 11.3126, + "step": 16384 + }, + { + "epoch": 0.6829644450002084, + "grad_norm": 270.0, + "learning_rate": 2.4122556787994366e-05, + "loss": 13.0002, + "step": 16385 + }, + { + "epoch": 0.6830061272977367, + "grad_norm": 228.0, + "learning_rate": 2.4116781347595763e-05, + "loss": 12.0006, + "step": 16386 + }, + { + "epoch": 0.6830478095952649, + "grad_norm": 422.0, + "learning_rate": 2.4111006378921097e-05, + "loss": 15.8753, + "step": 16387 + }, + { + "epoch": 0.6830894918927931, + "grad_norm": 748.0, + "learning_rate": 2.4105231882075675e-05, + "loss": 20.2514, + "step": 16388 + }, + { + "epoch": 0.6831311741903213, + "grad_norm": 290.0, + "learning_rate": 2.4099457857164665e-05, + "loss": 12.5002, + "step": 16389 + }, + { + "epoch": 0.6831728564878496, + "grad_norm": 258.0, + "learning_rate": 2.4093684304293373e-05, + "loss": 12.5001, + "step": 16390 + }, + { + "epoch": 0.6832145387853779, + "grad_norm": 524.0, + "learning_rate": 2.4087911223566944e-05, + "loss": 19.0004, + "step": 16391 + }, + { + "epoch": 0.6832562210829061, + "grad_norm": 388.0, + "learning_rate": 2.4082138615090673e-05, + "loss": 15.5001, + "step": 16392 + }, + { + "epoch": 0.6832979033804343, + "grad_norm": 192.0, + "learning_rate": 2.407636647896969e-05, + "loss": 12.314, + "step": 16393 + }, + { + "epoch": 0.6833395856779626, + "grad_norm": 392.0, + "learning_rate": 2.4070594815309266e-05, + "loss": 14.3752, + "step": 16394 + }, + { + "epoch": 0.6833812679754908, + "grad_norm": 203.0, + "learning_rate": 2.4064823624214517e-05, + "loss": 10.7505, + "step": 16395 + }, + { + "epoch": 0.683422950273019, + "grad_norm": 1032.0, + "learning_rate": 2.4059052905790674e-05, + "loss": 22.7543, + "step": 16396 + }, + { + "epoch": 0.6834646325705472, + "grad_norm": 684.0, + "learning_rate": 2.4053282660142896e-05, + "loss": 18.8754, + "step": 16397 + }, + { + "epoch": 0.6835063148680756, + "grad_norm": 174.0, + "learning_rate": 2.404751288737635e-05, + "loss": 5.9377, + "step": 16398 + }, + { + "epoch": 0.6835479971656038, + "grad_norm": 147.0, + "learning_rate": 2.4041743587596176e-05, + "loss": 9.1256, + "step": 16399 + }, + { + "epoch": 0.683589679463132, + "grad_norm": 223.0, + "learning_rate": 2.4035974760907536e-05, + "loss": 11.5627, + "step": 16400 + }, + { + "epoch": 0.6836313617606602, + "grad_norm": 163.0, + "learning_rate": 2.4030206407415562e-05, + "loss": 10.8128, + "step": 16401 + }, + { + "epoch": 0.6836730440581885, + "grad_norm": 205.0, + "learning_rate": 2.402443852722538e-05, + "loss": 12.0002, + "step": 16402 + }, + { + "epoch": 0.6837147263557167, + "grad_norm": 206.0, + "learning_rate": 2.401867112044211e-05, + "loss": 11.1878, + "step": 16403 + }, + { + "epoch": 0.6837564086532449, + "grad_norm": 268.0, + "learning_rate": 2.4012904187170872e-05, + "loss": 12.4377, + "step": 16404 + }, + { + "epoch": 0.6837980909507732, + "grad_norm": 288.0, + "learning_rate": 2.4007137727516755e-05, + "loss": 13.1878, + "step": 16405 + }, + { + "epoch": 0.6838397732483015, + "grad_norm": 112.5, + "learning_rate": 2.400137174158486e-05, + "loss": 8.7504, + "step": 16406 + }, + { + "epoch": 0.6838814555458297, + "grad_norm": 151.0, + "learning_rate": 2.3995606229480278e-05, + "loss": 9.9386, + "step": 16407 + }, + { + "epoch": 0.6839231378433579, + "grad_norm": 764.0, + "learning_rate": 2.3989841191308077e-05, + "loss": 23.2501, + "step": 16408 + }, + { + "epoch": 0.6839648201408862, + "grad_norm": 298.0, + "learning_rate": 2.398407662717333e-05, + "loss": 12.2504, + "step": 16409 + }, + { + "epoch": 0.6840065024384144, + "grad_norm": 332.0, + "learning_rate": 2.3978312537181095e-05, + "loss": 14.4377, + "step": 16410 + }, + { + "epoch": 0.6840481847359426, + "grad_norm": 211.0, + "learning_rate": 2.397254892143643e-05, + "loss": 11.0009, + "step": 16411 + }, + { + "epoch": 0.6840898670334709, + "grad_norm": 180.0, + "learning_rate": 2.3966785780044365e-05, + "loss": 9.5008, + "step": 16412 + }, + { + "epoch": 0.6841315493309992, + "grad_norm": 274.0, + "learning_rate": 2.3961023113109926e-05, + "loss": 13.6255, + "step": 16413 + }, + { + "epoch": 0.6841732316285274, + "grad_norm": 450.0, + "learning_rate": 2.3955260920738188e-05, + "loss": 16.0001, + "step": 16414 + }, + { + "epoch": 0.6842149139260556, + "grad_norm": 502.0, + "learning_rate": 2.394949920303409e-05, + "loss": 16.8761, + "step": 16415 + }, + { + "epoch": 0.6842565962235838, + "grad_norm": 380.0, + "learning_rate": 2.3943737960102725e-05, + "loss": 14.2518, + "step": 16416 + }, + { + "epoch": 0.6842982785211121, + "grad_norm": 101.5, + "learning_rate": 2.3937977192049004e-05, + "loss": 8.2508, + "step": 16417 + }, + { + "epoch": 0.6843399608186403, + "grad_norm": 600.0, + "learning_rate": 2.3932216898978007e-05, + "loss": 20.3758, + "step": 16418 + }, + { + "epoch": 0.6843816431161686, + "grad_norm": 68.5, + "learning_rate": 2.392645708099463e-05, + "loss": 7.6879, + "step": 16419 + }, + { + "epoch": 0.6844233254136968, + "grad_norm": 552.0, + "learning_rate": 2.392069773820393e-05, + "loss": 18.8757, + "step": 16420 + }, + { + "epoch": 0.6844650077112251, + "grad_norm": 239.0, + "learning_rate": 2.3914938870710785e-05, + "loss": 12.0002, + "step": 16421 + }, + { + "epoch": 0.6845066900087533, + "grad_norm": 300.0, + "learning_rate": 2.3909180478620212e-05, + "loss": 14.6252, + "step": 16422 + }, + { + "epoch": 0.6845483723062815, + "grad_norm": 556.0, + "learning_rate": 2.3903422562037147e-05, + "loss": 14.6897, + "step": 16423 + }, + { + "epoch": 0.6845900546038097, + "grad_norm": 620.0, + "learning_rate": 2.389766512106652e-05, + "loss": 19.6257, + "step": 16424 + }, + { + "epoch": 0.684631736901338, + "grad_norm": 220.0, + "learning_rate": 2.3891908155813265e-05, + "loss": 11.6252, + "step": 16425 + }, + { + "epoch": 0.6846734191988663, + "grad_norm": 1664.0, + "learning_rate": 2.38861516663823e-05, + "loss": 37.0006, + "step": 16426 + }, + { + "epoch": 0.6847151014963945, + "grad_norm": 336.0, + "learning_rate": 2.388039565287854e-05, + "loss": 13.9377, + "step": 16427 + }, + { + "epoch": 0.6847567837939227, + "grad_norm": 75.0, + "learning_rate": 2.3874640115406887e-05, + "loss": 8.3137, + "step": 16428 + }, + { + "epoch": 0.684798466091451, + "grad_norm": 156.0, + "learning_rate": 2.386888505407224e-05, + "loss": 10.0627, + "step": 16429 + }, + { + "epoch": 0.6848401483889792, + "grad_norm": 286.0, + "learning_rate": 2.3863130468979477e-05, + "loss": 12.5627, + "step": 16430 + }, + { + "epoch": 0.6848818306865074, + "grad_norm": 210.0, + "learning_rate": 2.3857376360233484e-05, + "loss": 10.6256, + "step": 16431 + }, + { + "epoch": 0.6849235129840356, + "grad_norm": 988.0, + "learning_rate": 2.3851622727939126e-05, + "loss": 26.5004, + "step": 16432 + }, + { + "epoch": 0.684965195281564, + "grad_norm": 378.0, + "learning_rate": 2.384586957220127e-05, + "loss": 14.1878, + "step": 16433 + }, + { + "epoch": 0.6850068775790922, + "grad_norm": 452.0, + "learning_rate": 2.384011689312476e-05, + "loss": 16.6254, + "step": 16434 + }, + { + "epoch": 0.6850485598766204, + "grad_norm": 620.0, + "learning_rate": 2.3834364690814442e-05, + "loss": 20.0006, + "step": 16435 + }, + { + "epoch": 0.6850902421741486, + "grad_norm": 1048.0, + "learning_rate": 2.3828612965375153e-05, + "loss": 27.626, + "step": 16436 + }, + { + "epoch": 0.6851319244716769, + "grad_norm": 358.0, + "learning_rate": 2.3822861716911694e-05, + "loss": 13.9394, + "step": 16437 + }, + { + "epoch": 0.6851736067692051, + "grad_norm": 170.0, + "learning_rate": 2.3817110945528937e-05, + "loss": 10.8128, + "step": 16438 + }, + { + "epoch": 0.6852152890667333, + "grad_norm": 280.0, + "learning_rate": 2.3811360651331627e-05, + "loss": 11.7502, + "step": 16439 + }, + { + "epoch": 0.6852569713642616, + "grad_norm": 242.0, + "learning_rate": 2.380561083442463e-05, + "loss": 11.0004, + "step": 16440 + }, + { + "epoch": 0.6852986536617899, + "grad_norm": 452.0, + "learning_rate": 2.3799861494912663e-05, + "loss": 15.5023, + "step": 16441 + }, + { + "epoch": 0.6853403359593181, + "grad_norm": 176.0, + "learning_rate": 2.3794112632900585e-05, + "loss": 10.8754, + "step": 16442 + }, + { + "epoch": 0.6853820182568463, + "grad_norm": 482.0, + "learning_rate": 2.378836424849309e-05, + "loss": 17.0006, + "step": 16443 + }, + { + "epoch": 0.6854237005543745, + "grad_norm": 87.0, + "learning_rate": 2.378261634179502e-05, + "loss": 9.1256, + "step": 16444 + }, + { + "epoch": 0.6854653828519028, + "grad_norm": 99.0, + "learning_rate": 2.377686891291106e-05, + "loss": 6.6877, + "step": 16445 + }, + { + "epoch": 0.685507065149431, + "grad_norm": 1296.0, + "learning_rate": 2.3771121961946025e-05, + "loss": 26.0049, + "step": 16446 + }, + { + "epoch": 0.6855487474469593, + "grad_norm": 278.0, + "learning_rate": 2.3765375489004588e-05, + "loss": 13.3754, + "step": 16447 + }, + { + "epoch": 0.6855904297444875, + "grad_norm": 568.0, + "learning_rate": 2.3759629494191532e-05, + "loss": 18.2506, + "step": 16448 + }, + { + "epoch": 0.6856321120420158, + "grad_norm": 736.0, + "learning_rate": 2.3753883977611553e-05, + "loss": 21.3794, + "step": 16449 + }, + { + "epoch": 0.685673794339544, + "grad_norm": 464.0, + "learning_rate": 2.374813893936937e-05, + "loss": 17.0003, + "step": 16450 + }, + { + "epoch": 0.6857154766370722, + "grad_norm": 434.0, + "learning_rate": 2.374239437956969e-05, + "loss": 15.3754, + "step": 16451 + }, + { + "epoch": 0.6857571589346004, + "grad_norm": 676.0, + "learning_rate": 2.3736650298317197e-05, + "loss": 19.2537, + "step": 16452 + }, + { + "epoch": 0.6857988412321288, + "grad_norm": 152.0, + "learning_rate": 2.3730906695716586e-05, + "loss": 8.0007, + "step": 16453 + }, + { + "epoch": 0.685840523529657, + "grad_norm": 390.0, + "learning_rate": 2.3725163571872532e-05, + "loss": 15.8131, + "step": 16454 + }, + { + "epoch": 0.6858822058271852, + "grad_norm": 332.0, + "learning_rate": 2.3719420926889702e-05, + "loss": 12.7507, + "step": 16455 + }, + { + "epoch": 0.6859238881247134, + "grad_norm": 386.0, + "learning_rate": 2.3713678760872765e-05, + "loss": 15.6878, + "step": 16456 + }, + { + "epoch": 0.6859655704222417, + "grad_norm": 278.0, + "learning_rate": 2.370793707392636e-05, + "loss": 13.2503, + "step": 16457 + }, + { + "epoch": 0.6860072527197699, + "grad_norm": 744.0, + "learning_rate": 2.370219586615514e-05, + "loss": 22.0005, + "step": 16458 + }, + { + "epoch": 0.6860489350172981, + "grad_norm": 310.0, + "learning_rate": 2.369645513766373e-05, + "loss": 14.1272, + "step": 16459 + }, + { + "epoch": 0.6860906173148263, + "grad_norm": 560.0, + "learning_rate": 2.3690714888556764e-05, + "loss": 20.376, + "step": 16460 + }, + { + "epoch": 0.6861322996123547, + "grad_norm": 356.0, + "learning_rate": 2.3684975118938858e-05, + "loss": 13.6253, + "step": 16461 + }, + { + "epoch": 0.6861739819098829, + "grad_norm": 129.0, + "learning_rate": 2.367923582891461e-05, + "loss": 8.5635, + "step": 16462 + }, + { + "epoch": 0.6862156642074111, + "grad_norm": 312.0, + "learning_rate": 2.3673497018588607e-05, + "loss": 13.8766, + "step": 16463 + }, + { + "epoch": 0.6862573465049393, + "grad_norm": 330.0, + "learning_rate": 2.36677586880655e-05, + "loss": 15.1263, + "step": 16464 + }, + { + "epoch": 0.6862990288024676, + "grad_norm": 239.0, + "learning_rate": 2.366202083744979e-05, + "loss": 13.2508, + "step": 16465 + }, + { + "epoch": 0.6863407110999958, + "grad_norm": 232.0, + "learning_rate": 2.3656283466846125e-05, + "loss": 10.3759, + "step": 16466 + }, + { + "epoch": 0.686382393397524, + "grad_norm": 306.0, + "learning_rate": 2.3650546576358994e-05, + "loss": 14.0629, + "step": 16467 + }, + { + "epoch": 0.6864240756950523, + "grad_norm": 430.0, + "learning_rate": 2.364481016609303e-05, + "loss": 16.2502, + "step": 16468 + }, + { + "epoch": 0.6864657579925806, + "grad_norm": 278.0, + "learning_rate": 2.3639074236152702e-05, + "loss": 11.0017, + "step": 16469 + }, + { + "epoch": 0.6865074402901088, + "grad_norm": 354.0, + "learning_rate": 2.3633338786642622e-05, + "loss": 15.5627, + "step": 16470 + }, + { + "epoch": 0.686549122587637, + "grad_norm": 988.0, + "learning_rate": 2.362760381766725e-05, + "loss": 25.3754, + "step": 16471 + }, + { + "epoch": 0.6865908048851652, + "grad_norm": 760.0, + "learning_rate": 2.3621869329331153e-05, + "loss": 21.5007, + "step": 16472 + }, + { + "epoch": 0.6866324871826935, + "grad_norm": 524.0, + "learning_rate": 2.361613532173883e-05, + "loss": 18.876, + "step": 16473 + }, + { + "epoch": 0.6866741694802218, + "grad_norm": 300.0, + "learning_rate": 2.3610401794994786e-05, + "loss": 14.5008, + "step": 16474 + }, + { + "epoch": 0.68671585177775, + "grad_norm": 138.0, + "learning_rate": 2.3604668749203512e-05, + "loss": 10.563, + "step": 16475 + }, + { + "epoch": 0.6867575340752782, + "grad_norm": 460.0, + "learning_rate": 2.359893618446949e-05, + "loss": 17.0008, + "step": 16476 + }, + { + "epoch": 0.6867992163728065, + "grad_norm": 700.0, + "learning_rate": 2.3593204100897203e-05, + "loss": 19.3751, + "step": 16477 + }, + { + "epoch": 0.6868408986703347, + "grad_norm": 600.0, + "learning_rate": 2.3587472498591112e-05, + "loss": 19.5003, + "step": 16478 + }, + { + "epoch": 0.6868825809678629, + "grad_norm": 354.0, + "learning_rate": 2.358174137765568e-05, + "loss": 13.6251, + "step": 16479 + }, + { + "epoch": 0.6869242632653911, + "grad_norm": 332.0, + "learning_rate": 2.357601073819536e-05, + "loss": 13.8754, + "step": 16480 + }, + { + "epoch": 0.6869659455629195, + "grad_norm": 672.0, + "learning_rate": 2.3570280580314587e-05, + "loss": 19.3753, + "step": 16481 + }, + { + "epoch": 0.6870076278604477, + "grad_norm": 436.0, + "learning_rate": 2.35645509041178e-05, + "loss": 14.8752, + "step": 16482 + }, + { + "epoch": 0.6870493101579759, + "grad_norm": 139.0, + "learning_rate": 2.3558821709709418e-05, + "loss": 10.2502, + "step": 16483 + }, + { + "epoch": 0.6870909924555042, + "grad_norm": 78.0, + "learning_rate": 2.3553092997193855e-05, + "loss": 8.688, + "step": 16484 + }, + { + "epoch": 0.6871326747530324, + "grad_norm": 904.0, + "learning_rate": 2.3547364766675524e-05, + "loss": 23.1252, + "step": 16485 + }, + { + "epoch": 0.6871743570505606, + "grad_norm": 89.5, + "learning_rate": 2.354163701825881e-05, + "loss": 7.2815, + "step": 16486 + }, + { + "epoch": 0.6872160393480888, + "grad_norm": 1248.0, + "learning_rate": 2.3535909752048096e-05, + "loss": 31.8753, + "step": 16487 + }, + { + "epoch": 0.6872577216456172, + "grad_norm": 372.0, + "learning_rate": 2.3530182968147818e-05, + "loss": 15.3129, + "step": 16488 + }, + { + "epoch": 0.6872994039431454, + "grad_norm": 152.0, + "learning_rate": 2.3524456666662266e-05, + "loss": 9.5028, + "step": 16489 + }, + { + "epoch": 0.6873410862406736, + "grad_norm": 520.0, + "learning_rate": 2.3518730847695874e-05, + "loss": 17.1285, + "step": 16490 + }, + { + "epoch": 0.6873827685382018, + "grad_norm": 294.0, + "learning_rate": 2.351300551135293e-05, + "loss": 14.438, + "step": 16491 + }, + { + "epoch": 0.6874244508357301, + "grad_norm": 1256.0, + "learning_rate": 2.350728065773785e-05, + "loss": 28.5011, + "step": 16492 + }, + { + "epoch": 0.6874661331332583, + "grad_norm": 186.0, + "learning_rate": 2.3501556286954896e-05, + "loss": 12.127, + "step": 16493 + }, + { + "epoch": 0.6875078154307865, + "grad_norm": 270.0, + "learning_rate": 2.3495832399108464e-05, + "loss": 13.1257, + "step": 16494 + }, + { + "epoch": 0.6875494977283148, + "grad_norm": 314.0, + "learning_rate": 2.34901089943028e-05, + "loss": 13.2505, + "step": 16495 + }, + { + "epoch": 0.6875911800258431, + "grad_norm": 125.5, + "learning_rate": 2.3484386072642294e-05, + "loss": 9.4378, + "step": 16496 + }, + { + "epoch": 0.6876328623233713, + "grad_norm": 156.0, + "learning_rate": 2.3478663634231167e-05, + "loss": 8.9377, + "step": 16497 + }, + { + "epoch": 0.6876745446208995, + "grad_norm": 205.0, + "learning_rate": 2.347294167917377e-05, + "loss": 9.7504, + "step": 16498 + }, + { + "epoch": 0.6877162269184277, + "grad_norm": 772.0, + "learning_rate": 2.346722020757437e-05, + "loss": 24.0003, + "step": 16499 + }, + { + "epoch": 0.687757909215956, + "grad_norm": 118.0, + "learning_rate": 2.346149921953723e-05, + "loss": 7.1257, + "step": 16500 + }, + { + "epoch": 0.6877995915134842, + "grad_norm": 944.0, + "learning_rate": 2.3455778715166627e-05, + "loss": 21.6302, + "step": 16501 + }, + { + "epoch": 0.6878412738110125, + "grad_norm": 172.0, + "learning_rate": 2.3450058694566806e-05, + "loss": 7.219, + "step": 16502 + }, + { + "epoch": 0.6878829561085407, + "grad_norm": 203.0, + "learning_rate": 2.3444339157842034e-05, + "loss": 11.5007, + "step": 16503 + }, + { + "epoch": 0.687924638406069, + "grad_norm": 147.0, + "learning_rate": 2.3438620105096525e-05, + "loss": 5.5317, + "step": 16504 + }, + { + "epoch": 0.6879663207035972, + "grad_norm": 89.5, + "learning_rate": 2.343290153643453e-05, + "loss": 9.3133, + "step": 16505 + }, + { + "epoch": 0.6880080030011254, + "grad_norm": 251.0, + "learning_rate": 2.3427183451960265e-05, + "loss": 12.0004, + "step": 16506 + }, + { + "epoch": 0.6880496852986536, + "grad_norm": 235.0, + "learning_rate": 2.3421465851777935e-05, + "loss": 12.6254, + "step": 16507 + }, + { + "epoch": 0.688091367596182, + "grad_norm": 736.0, + "learning_rate": 2.3415748735991754e-05, + "loss": 22.3796, + "step": 16508 + }, + { + "epoch": 0.6881330498937102, + "grad_norm": 132.0, + "learning_rate": 2.341003210470591e-05, + "loss": 9.688, + "step": 16509 + }, + { + "epoch": 0.6881747321912384, + "grad_norm": 184.0, + "learning_rate": 2.340431595802459e-05, + "loss": 9.8133, + "step": 16510 + }, + { + "epoch": 0.6882164144887666, + "grad_norm": 163.0, + "learning_rate": 2.3398600296051974e-05, + "loss": 10.3129, + "step": 16511 + }, + { + "epoch": 0.6882580967862949, + "grad_norm": 640.0, + "learning_rate": 2.339288511889223e-05, + "loss": 19.3753, + "step": 16512 + }, + { + "epoch": 0.6882997790838231, + "grad_norm": 246.0, + "learning_rate": 2.3387170426649496e-05, + "loss": 11.0023, + "step": 16513 + }, + { + "epoch": 0.6883414613813513, + "grad_norm": 334.0, + "learning_rate": 2.3381456219427984e-05, + "loss": 12.8775, + "step": 16514 + }, + { + "epoch": 0.6883831436788795, + "grad_norm": 290.0, + "learning_rate": 2.3375742497331755e-05, + "loss": 13.5001, + "step": 16515 + }, + { + "epoch": 0.6884248259764079, + "grad_norm": 728.0, + "learning_rate": 2.337002926046502e-05, + "loss": 21.7502, + "step": 16516 + }, + { + "epoch": 0.6884665082739361, + "grad_norm": 330.0, + "learning_rate": 2.336431650893183e-05, + "loss": 11.8153, + "step": 16517 + }, + { + "epoch": 0.6885081905714643, + "grad_norm": 560.0, + "learning_rate": 2.3358604242836375e-05, + "loss": 18.3752, + "step": 16518 + }, + { + "epoch": 0.6885498728689925, + "grad_norm": 648.0, + "learning_rate": 2.3352892462282684e-05, + "loss": 18.8754, + "step": 16519 + }, + { + "epoch": 0.6885915551665208, + "grad_norm": 424.0, + "learning_rate": 2.3347181167374927e-05, + "loss": 16.7503, + "step": 16520 + }, + { + "epoch": 0.688633237464049, + "grad_norm": 480.0, + "learning_rate": 2.3341470358217126e-05, + "loss": 17.0005, + "step": 16521 + }, + { + "epoch": 0.6886749197615772, + "grad_norm": 450.0, + "learning_rate": 2.3335760034913412e-05, + "loss": 16.5009, + "step": 16522 + }, + { + "epoch": 0.6887166020591055, + "grad_norm": 189.0, + "learning_rate": 2.3330050197567838e-05, + "loss": 10.9377, + "step": 16523 + }, + { + "epoch": 0.6887582843566338, + "grad_norm": 696.0, + "learning_rate": 2.3324340846284464e-05, + "loss": 18.8767, + "step": 16524 + }, + { + "epoch": 0.688799966654162, + "grad_norm": 422.0, + "learning_rate": 2.331863198116735e-05, + "loss": 15.5001, + "step": 16525 + }, + { + "epoch": 0.6888416489516902, + "grad_norm": 249.0, + "learning_rate": 2.3312923602320536e-05, + "loss": 13.6252, + "step": 16526 + }, + { + "epoch": 0.6888833312492184, + "grad_norm": 300.0, + "learning_rate": 2.3307215709848057e-05, + "loss": 13.9381, + "step": 16527 + }, + { + "epoch": 0.6889250135467467, + "grad_norm": 302.0, + "learning_rate": 2.3301508303853943e-05, + "loss": 14.0631, + "step": 16528 + }, + { + "epoch": 0.688966695844275, + "grad_norm": 120.0, + "learning_rate": 2.329580138444221e-05, + "loss": 9.5636, + "step": 16529 + }, + { + "epoch": 0.6890083781418032, + "grad_norm": 420.0, + "learning_rate": 2.3290094951716868e-05, + "loss": 15.9379, + "step": 16530 + }, + { + "epoch": 0.6890500604393314, + "grad_norm": 620.0, + "learning_rate": 2.3284389005781915e-05, + "loss": 17.753, + "step": 16531 + }, + { + "epoch": 0.6890917427368597, + "grad_norm": 628.0, + "learning_rate": 2.3278683546741348e-05, + "loss": 19.501, + "step": 16532 + }, + { + "epoch": 0.6891334250343879, + "grad_norm": 312.0, + "learning_rate": 2.3272978574699138e-05, + "loss": 14.6261, + "step": 16533 + }, + { + "epoch": 0.6891751073319161, + "grad_norm": 448.0, + "learning_rate": 2.3267274089759274e-05, + "loss": 16.5004, + "step": 16534 + }, + { + "epoch": 0.6892167896294443, + "grad_norm": 246.0, + "learning_rate": 2.3261570092025707e-05, + "loss": 11.9384, + "step": 16535 + }, + { + "epoch": 0.6892584719269726, + "grad_norm": 428.0, + "learning_rate": 2.3255866581602402e-05, + "loss": 15.4378, + "step": 16536 + }, + { + "epoch": 0.6893001542245009, + "grad_norm": 292.0, + "learning_rate": 2.32501635585933e-05, + "loss": 13.5638, + "step": 16537 + }, + { + "epoch": 0.6893418365220291, + "grad_norm": 1032.0, + "learning_rate": 2.3244461023102343e-05, + "loss": 24.3756, + "step": 16538 + }, + { + "epoch": 0.6893835188195573, + "grad_norm": 284.0, + "learning_rate": 2.3238758975233444e-05, + "loss": 10.0001, + "step": 16539 + }, + { + "epoch": 0.6894252011170856, + "grad_norm": 253.0, + "learning_rate": 2.323305741509057e-05, + "loss": 12.6253, + "step": 16540 + }, + { + "epoch": 0.6894668834146138, + "grad_norm": 231.0, + "learning_rate": 2.3227356342777568e-05, + "loss": 11.9379, + "step": 16541 + }, + { + "epoch": 0.689508565712142, + "grad_norm": 286.0, + "learning_rate": 2.322165575839841e-05, + "loss": 13.8756, + "step": 16542 + }, + { + "epoch": 0.6895502480096702, + "grad_norm": 540.0, + "learning_rate": 2.321595566205691e-05, + "loss": 15.7505, + "step": 16543 + }, + { + "epoch": 0.6895919303071986, + "grad_norm": 358.0, + "learning_rate": 2.3210256053857038e-05, + "loss": 14.9394, + "step": 16544 + }, + { + "epoch": 0.6896336126047268, + "grad_norm": 159.0, + "learning_rate": 2.3204556933902587e-05, + "loss": 10.1259, + "step": 16545 + }, + { + "epoch": 0.689675294902255, + "grad_norm": 231.0, + "learning_rate": 2.3198858302297505e-05, + "loss": 11.9376, + "step": 16546 + }, + { + "epoch": 0.6897169771997832, + "grad_norm": 255.0, + "learning_rate": 2.3193160159145572e-05, + "loss": 12.6883, + "step": 16547 + }, + { + "epoch": 0.6897586594973115, + "grad_norm": 376.0, + "learning_rate": 2.3187462504550693e-05, + "loss": 15.1891, + "step": 16548 + }, + { + "epoch": 0.6898003417948397, + "grad_norm": 612.0, + "learning_rate": 2.318176533861669e-05, + "loss": 21.3763, + "step": 16549 + }, + { + "epoch": 0.689842024092368, + "grad_norm": 260.0, + "learning_rate": 2.3176068661447397e-05, + "loss": 11.6876, + "step": 16550 + }, + { + "epoch": 0.6898837063898962, + "grad_norm": 664.0, + "learning_rate": 2.317037247314663e-05, + "loss": 19.8752, + "step": 16551 + }, + { + "epoch": 0.6899253886874245, + "grad_norm": 239.0, + "learning_rate": 2.316467677381821e-05, + "loss": 12.5629, + "step": 16552 + }, + { + "epoch": 0.6899670709849527, + "grad_norm": 91.0, + "learning_rate": 2.3158981563565936e-05, + "loss": 9.2504, + "step": 16553 + }, + { + "epoch": 0.6900087532824809, + "grad_norm": 400.0, + "learning_rate": 2.3153286842493605e-05, + "loss": 15.1877, + "step": 16554 + }, + { + "epoch": 0.6900504355800092, + "grad_norm": 219.0, + "learning_rate": 2.3147592610705005e-05, + "loss": 12.3127, + "step": 16555 + }, + { + "epoch": 0.6900921178775374, + "grad_norm": 166.0, + "learning_rate": 2.3141898868303914e-05, + "loss": 12.4379, + "step": 16556 + }, + { + "epoch": 0.6901338001750656, + "grad_norm": 308.0, + "learning_rate": 2.3136205615394103e-05, + "loss": 13.2504, + "step": 16557 + }, + { + "epoch": 0.6901754824725939, + "grad_norm": 856.0, + "learning_rate": 2.3130512852079323e-05, + "loss": 23.8752, + "step": 16558 + }, + { + "epoch": 0.6902171647701222, + "grad_norm": 856.0, + "learning_rate": 2.3124820578463334e-05, + "loss": 24.751, + "step": 16559 + }, + { + "epoch": 0.6902588470676504, + "grad_norm": 328.0, + "learning_rate": 2.311912879464988e-05, + "loss": 14.4379, + "step": 16560 + }, + { + "epoch": 0.6903005293651786, + "grad_norm": 430.0, + "learning_rate": 2.3113437500742686e-05, + "loss": 15.3132, + "step": 16561 + }, + { + "epoch": 0.6903422116627068, + "grad_norm": 284.0, + "learning_rate": 2.310774669684548e-05, + "loss": 13.8753, + "step": 16562 + }, + { + "epoch": 0.6903838939602351, + "grad_norm": 616.0, + "learning_rate": 2.310205638306196e-05, + "loss": 18.7524, + "step": 16563 + }, + { + "epoch": 0.6904255762577634, + "grad_norm": 324.0, + "learning_rate": 2.3096366559495885e-05, + "loss": 14.0006, + "step": 16564 + }, + { + "epoch": 0.6904672585552916, + "grad_norm": 396.0, + "learning_rate": 2.3090677226250885e-05, + "loss": 14.5002, + "step": 16565 + }, + { + "epoch": 0.6905089408528198, + "grad_norm": 141.0, + "learning_rate": 2.3084988383430718e-05, + "loss": 11.3134, + "step": 16566 + }, + { + "epoch": 0.6905506231503481, + "grad_norm": 151.0, + "learning_rate": 2.307930003113899e-05, + "loss": 4.4069, + "step": 16567 + }, + { + "epoch": 0.6905923054478763, + "grad_norm": 170.0, + "learning_rate": 2.3073612169479443e-05, + "loss": 10.5005, + "step": 16568 + }, + { + "epoch": 0.6906339877454045, + "grad_norm": 396.0, + "learning_rate": 2.3067924798555668e-05, + "loss": 15.813, + "step": 16569 + }, + { + "epoch": 0.6906756700429327, + "grad_norm": 181.0, + "learning_rate": 2.3062237918471396e-05, + "loss": 11.1253, + "step": 16570 + }, + { + "epoch": 0.690717352340461, + "grad_norm": 215.0, + "learning_rate": 2.305655152933019e-05, + "loss": 11.064, + "step": 16571 + }, + { + "epoch": 0.6907590346379893, + "grad_norm": 199.0, + "learning_rate": 2.3050865631235757e-05, + "loss": 12.3752, + "step": 16572 + }, + { + "epoch": 0.6908007169355175, + "grad_norm": 110.0, + "learning_rate": 2.3045180224291657e-05, + "loss": 7.8446, + "step": 16573 + }, + { + "epoch": 0.6908423992330457, + "grad_norm": 548.0, + "learning_rate": 2.3039495308601555e-05, + "loss": 18.8754, + "step": 16574 + }, + { + "epoch": 0.690884081530574, + "grad_norm": 540.0, + "learning_rate": 2.3033810884269048e-05, + "loss": 17.5002, + "step": 16575 + }, + { + "epoch": 0.6909257638281022, + "grad_norm": 460.0, + "learning_rate": 2.3028126951397732e-05, + "loss": 16.7507, + "step": 16576 + }, + { + "epoch": 0.6909674461256304, + "grad_norm": 213.0, + "learning_rate": 2.3022443510091195e-05, + "loss": 9.6877, + "step": 16577 + }, + { + "epoch": 0.6910091284231586, + "grad_norm": 430.0, + "learning_rate": 2.301676056045302e-05, + "loss": 15.2504, + "step": 16578 + }, + { + "epoch": 0.691050810720687, + "grad_norm": 221.0, + "learning_rate": 2.301107810258678e-05, + "loss": 11.0006, + "step": 16579 + }, + { + "epoch": 0.6910924930182152, + "grad_norm": 165.0, + "learning_rate": 2.3005396136596037e-05, + "loss": 10.0629, + "step": 16580 + }, + { + "epoch": 0.6911341753157434, + "grad_norm": 428.0, + "learning_rate": 2.2999714662584348e-05, + "loss": 14.9389, + "step": 16581 + }, + { + "epoch": 0.6911758576132716, + "grad_norm": 600.0, + "learning_rate": 2.2994033680655253e-05, + "loss": 15.9415, + "step": 16582 + }, + { + "epoch": 0.6912175399107999, + "grad_norm": 86.0, + "learning_rate": 2.298835319091229e-05, + "loss": 8.7504, + "step": 16583 + }, + { + "epoch": 0.6912592222083281, + "grad_norm": 121.5, + "learning_rate": 2.2982673193458993e-05, + "loss": 9.0003, + "step": 16584 + }, + { + "epoch": 0.6913009045058564, + "grad_norm": 564.0, + "learning_rate": 2.2976993688398873e-05, + "loss": 19.3757, + "step": 16585 + }, + { + "epoch": 0.6913425868033846, + "grad_norm": 442.0, + "learning_rate": 2.2971314675835442e-05, + "loss": 15.5629, + "step": 16586 + }, + { + "epoch": 0.6913842691009129, + "grad_norm": 1600.0, + "learning_rate": 2.29656361558722e-05, + "loss": 34.0039, + "step": 16587 + }, + { + "epoch": 0.6914259513984411, + "grad_norm": 250.0, + "learning_rate": 2.295995812861264e-05, + "loss": 10.5005, + "step": 16588 + }, + { + "epoch": 0.6914676336959693, + "grad_norm": 552.0, + "learning_rate": 2.2954280594160223e-05, + "loss": 17.0005, + "step": 16589 + }, + { + "epoch": 0.6915093159934975, + "grad_norm": 374.0, + "learning_rate": 2.294860355261848e-05, + "loss": 15.0627, + "step": 16590 + }, + { + "epoch": 0.6915509982910258, + "grad_norm": 60.0, + "learning_rate": 2.2942927004090804e-05, + "loss": 7.4378, + "step": 16591 + }, + { + "epoch": 0.691592680588554, + "grad_norm": 197.0, + "learning_rate": 2.293725094868072e-05, + "loss": 5.0951, + "step": 16592 + }, + { + "epoch": 0.6916343628860823, + "grad_norm": 195.0, + "learning_rate": 2.2931575386491604e-05, + "loss": 11.813, + "step": 16593 + }, + { + "epoch": 0.6916760451836105, + "grad_norm": 1584.0, + "learning_rate": 2.292590031762697e-05, + "loss": 30.7586, + "step": 16594 + }, + { + "epoch": 0.6917177274811388, + "grad_norm": 209.0, + "learning_rate": 2.2920225742190166e-05, + "loss": 11.3779, + "step": 16595 + }, + { + "epoch": 0.691759409778667, + "grad_norm": 444.0, + "learning_rate": 2.2914551660284688e-05, + "loss": 16.3752, + "step": 16596 + }, + { + "epoch": 0.6918010920761952, + "grad_norm": 178.0, + "learning_rate": 2.2908878072013874e-05, + "loss": 10.5627, + "step": 16597 + }, + { + "epoch": 0.6918427743737234, + "grad_norm": 378.0, + "learning_rate": 2.290320497748118e-05, + "loss": 15.9377, + "step": 16598 + }, + { + "epoch": 0.6918844566712518, + "grad_norm": 688.0, + "learning_rate": 2.2897532376789982e-05, + "loss": 21.5003, + "step": 16599 + }, + { + "epoch": 0.69192613896878, + "grad_norm": 588.0, + "learning_rate": 2.2891860270043662e-05, + "loss": 17.7507, + "step": 16600 + }, + { + "epoch": 0.6919678212663082, + "grad_norm": 572.0, + "learning_rate": 2.2886188657345592e-05, + "loss": 18.0002, + "step": 16601 + }, + { + "epoch": 0.6920095035638364, + "grad_norm": 370.0, + "learning_rate": 2.2880517538799144e-05, + "loss": 15.2504, + "step": 16602 + }, + { + "epoch": 0.6920511858613647, + "grad_norm": 392.0, + "learning_rate": 2.2874846914507676e-05, + "loss": 15.3129, + "step": 16603 + }, + { + "epoch": 0.6920928681588929, + "grad_norm": 616.0, + "learning_rate": 2.286917678457452e-05, + "loss": 17.0009, + "step": 16604 + }, + { + "epoch": 0.6921345504564211, + "grad_norm": 536.0, + "learning_rate": 2.2863507149103035e-05, + "loss": 14.4458, + "step": 16605 + }, + { + "epoch": 0.6921762327539493, + "grad_norm": 364.0, + "learning_rate": 2.2857838008196535e-05, + "loss": 14.1253, + "step": 16606 + }, + { + "epoch": 0.6922179150514777, + "grad_norm": 296.0, + "learning_rate": 2.2852169361958354e-05, + "loss": 13.6878, + "step": 16607 + }, + { + "epoch": 0.6922595973490059, + "grad_norm": 154.0, + "learning_rate": 2.284650121049179e-05, + "loss": 10.938, + "step": 16608 + }, + { + "epoch": 0.6923012796465341, + "grad_norm": 416.0, + "learning_rate": 2.2840833553900154e-05, + "loss": 14.8754, + "step": 16609 + }, + { + "epoch": 0.6923429619440623, + "grad_norm": 274.0, + "learning_rate": 2.2835166392286733e-05, + "loss": 12.6877, + "step": 16610 + }, + { + "epoch": 0.6923846442415906, + "grad_norm": 398.0, + "learning_rate": 2.282949972575482e-05, + "loss": 15.1878, + "step": 16611 + }, + { + "epoch": 0.6924263265391188, + "grad_norm": 274.0, + "learning_rate": 2.2823833554407686e-05, + "loss": 12.3752, + "step": 16612 + }, + { + "epoch": 0.692468008836647, + "grad_norm": 174.0, + "learning_rate": 2.2818167878348583e-05, + "loss": 12.1253, + "step": 16613 + }, + { + "epoch": 0.6925096911341753, + "grad_norm": 197.0, + "learning_rate": 2.2812502697680816e-05, + "loss": 11.0006, + "step": 16614 + }, + { + "epoch": 0.6925513734317036, + "grad_norm": 490.0, + "learning_rate": 2.2806838012507563e-05, + "loss": 14.688, + "step": 16615 + }, + { + "epoch": 0.6925930557292318, + "grad_norm": 326.0, + "learning_rate": 2.2801173822932143e-05, + "loss": 11.7524, + "step": 16616 + }, + { + "epoch": 0.69263473802676, + "grad_norm": 492.0, + "learning_rate": 2.2795510129057707e-05, + "loss": 18.7517, + "step": 16617 + }, + { + "epoch": 0.6926764203242882, + "grad_norm": 196.0, + "learning_rate": 2.2789846930987545e-05, + "loss": 9.8753, + "step": 16618 + }, + { + "epoch": 0.6927181026218165, + "grad_norm": 124.5, + "learning_rate": 2.2784184228824806e-05, + "loss": 10.7507, + "step": 16619 + }, + { + "epoch": 0.6927597849193448, + "grad_norm": 416.0, + "learning_rate": 2.2778522022672767e-05, + "loss": 16.1255, + "step": 16620 + }, + { + "epoch": 0.692801467216873, + "grad_norm": 544.0, + "learning_rate": 2.2772860312634535e-05, + "loss": 15.5629, + "step": 16621 + }, + { + "epoch": 0.6928431495144012, + "grad_norm": 536.0, + "learning_rate": 2.276719909881338e-05, + "loss": 18.0001, + "step": 16622 + }, + { + "epoch": 0.6928848318119295, + "grad_norm": 520.0, + "learning_rate": 2.2761538381312403e-05, + "loss": 18.1252, + "step": 16623 + }, + { + "epoch": 0.6929265141094577, + "grad_norm": 129.0, + "learning_rate": 2.275587816023483e-05, + "loss": 10.6255, + "step": 16624 + }, + { + "epoch": 0.6929681964069859, + "grad_norm": 748.0, + "learning_rate": 2.275021843568379e-05, + "loss": 19.2502, + "step": 16625 + }, + { + "epoch": 0.6930098787045142, + "grad_norm": 474.0, + "learning_rate": 2.274455920776244e-05, + "loss": 15.7502, + "step": 16626 + }, + { + "epoch": 0.6930515610020425, + "grad_norm": 97.5, + "learning_rate": 2.2738900476573916e-05, + "loss": 9.8128, + "step": 16627 + }, + { + "epoch": 0.6930932432995707, + "grad_norm": 676.0, + "learning_rate": 2.2733242242221353e-05, + "loss": 20.3752, + "step": 16628 + }, + { + "epoch": 0.6931349255970989, + "grad_norm": 244.0, + "learning_rate": 2.272758450480787e-05, + "loss": 12.9379, + "step": 16629 + }, + { + "epoch": 0.6931766078946272, + "grad_norm": 312.0, + "learning_rate": 2.2721927264436582e-05, + "loss": 14.1251, + "step": 16630 + }, + { + "epoch": 0.6932182901921554, + "grad_norm": 294.0, + "learning_rate": 2.2716270521210593e-05, + "loss": 13.6876, + "step": 16631 + }, + { + "epoch": 0.6932599724896836, + "grad_norm": 952.0, + "learning_rate": 2.271061427523299e-05, + "loss": 26.7503, + "step": 16632 + }, + { + "epoch": 0.6933016547872118, + "grad_norm": 532.0, + "learning_rate": 2.2704958526606867e-05, + "loss": 18.8773, + "step": 16633 + }, + { + "epoch": 0.6933433370847402, + "grad_norm": 61.0, + "learning_rate": 2.2699303275435297e-05, + "loss": 7.594, + "step": 16634 + }, + { + "epoch": 0.6933850193822684, + "grad_norm": 239.0, + "learning_rate": 2.269364852182135e-05, + "loss": 11.0629, + "step": 16635 + }, + { + "epoch": 0.6934267016797966, + "grad_norm": 85.0, + "learning_rate": 2.2687994265868084e-05, + "loss": 8.6883, + "step": 16636 + }, + { + "epoch": 0.6934683839773248, + "grad_norm": 452.0, + "learning_rate": 2.2682340507678546e-05, + "loss": 16.5012, + "step": 16637 + }, + { + "epoch": 0.6935100662748531, + "grad_norm": 644.0, + "learning_rate": 2.2676687247355773e-05, + "loss": 20.8752, + "step": 16638 + }, + { + "epoch": 0.6935517485723813, + "grad_norm": 262.0, + "learning_rate": 2.2671034485002785e-05, + "loss": 12.5003, + "step": 16639 + }, + { + "epoch": 0.6935934308699095, + "grad_norm": 142.0, + "learning_rate": 2.266538222072266e-05, + "loss": 10.3753, + "step": 16640 + }, + { + "epoch": 0.6936351131674378, + "grad_norm": 292.0, + "learning_rate": 2.265973045461833e-05, + "loss": 8.4396, + "step": 16641 + }, + { + "epoch": 0.6936767954649661, + "grad_norm": 354.0, + "learning_rate": 2.2654079186792876e-05, + "loss": 12.5627, + "step": 16642 + }, + { + "epoch": 0.6937184777624943, + "grad_norm": 644.0, + "learning_rate": 2.264842841734922e-05, + "loss": 22.1254, + "step": 16643 + }, + { + "epoch": 0.6937601600600225, + "grad_norm": 306.0, + "learning_rate": 2.264277814639042e-05, + "loss": 13.8756, + "step": 16644 + }, + { + "epoch": 0.6938018423575507, + "grad_norm": 452.0, + "learning_rate": 2.2637128374019385e-05, + "loss": 14.4377, + "step": 16645 + }, + { + "epoch": 0.693843524655079, + "grad_norm": 308.0, + "learning_rate": 2.2631479100339148e-05, + "loss": 11.2501, + "step": 16646 + }, + { + "epoch": 0.6938852069526072, + "grad_norm": 368.0, + "learning_rate": 2.2625830325452598e-05, + "loss": 16.2503, + "step": 16647 + }, + { + "epoch": 0.6939268892501355, + "grad_norm": 168.0, + "learning_rate": 2.2620182049462734e-05, + "loss": 10.8753, + "step": 16648 + }, + { + "epoch": 0.6939685715476637, + "grad_norm": 173.0, + "learning_rate": 2.2614534272472486e-05, + "loss": 9.5004, + "step": 16649 + }, + { + "epoch": 0.694010253845192, + "grad_norm": 592.0, + "learning_rate": 2.2608886994584784e-05, + "loss": 18.3758, + "step": 16650 + }, + { + "epoch": 0.6940519361427202, + "grad_norm": 197.0, + "learning_rate": 2.260324021590255e-05, + "loss": 11.1877, + "step": 16651 + }, + { + "epoch": 0.6940936184402484, + "grad_norm": 175.0, + "learning_rate": 2.259759393652869e-05, + "loss": 5.5944, + "step": 16652 + }, + { + "epoch": 0.6941353007377766, + "grad_norm": 268.0, + "learning_rate": 2.2591948156566113e-05, + "loss": 10.6252, + "step": 16653 + }, + { + "epoch": 0.694176983035305, + "grad_norm": 456.0, + "learning_rate": 2.2586302876117714e-05, + "loss": 16.6257, + "step": 16654 + }, + { + "epoch": 0.6942186653328332, + "grad_norm": 302.0, + "learning_rate": 2.2580658095286382e-05, + "loss": 13.3127, + "step": 16655 + }, + { + "epoch": 0.6942603476303614, + "grad_norm": 101.0, + "learning_rate": 2.257501381417499e-05, + "loss": 9.188, + "step": 16656 + }, + { + "epoch": 0.6943020299278896, + "grad_norm": 398.0, + "learning_rate": 2.2569370032886406e-05, + "loss": 15.3774, + "step": 16657 + }, + { + "epoch": 0.6943437122254179, + "grad_norm": 170.0, + "learning_rate": 2.2563726751523484e-05, + "loss": 8.1253, + "step": 16658 + }, + { + "epoch": 0.6943853945229461, + "grad_norm": 212.0, + "learning_rate": 2.255808397018908e-05, + "loss": 11.8129, + "step": 16659 + }, + { + "epoch": 0.6944270768204743, + "grad_norm": 424.0, + "learning_rate": 2.2552441688986035e-05, + "loss": 15.0002, + "step": 16660 + }, + { + "epoch": 0.6944687591180025, + "grad_norm": 916.0, + "learning_rate": 2.2546799908017174e-05, + "loss": 22.8785, + "step": 16661 + }, + { + "epoch": 0.6945104414155309, + "grad_norm": 464.0, + "learning_rate": 2.2541158627385322e-05, + "loss": 16.3753, + "step": 16662 + }, + { + "epoch": 0.6945521237130591, + "grad_norm": 254.0, + "learning_rate": 2.253551784719329e-05, + "loss": 12.4382, + "step": 16663 + }, + { + "epoch": 0.6945938060105873, + "grad_norm": 568.0, + "learning_rate": 2.2529877567543882e-05, + "loss": 16.8813, + "step": 16664 + }, + { + "epoch": 0.6946354883081155, + "grad_norm": 226.0, + "learning_rate": 2.2524237788539882e-05, + "loss": 11.7503, + "step": 16665 + }, + { + "epoch": 0.6946771706056438, + "grad_norm": 628.0, + "learning_rate": 2.2518598510284123e-05, + "loss": 18.7527, + "step": 16666 + }, + { + "epoch": 0.694718852903172, + "grad_norm": 330.0, + "learning_rate": 2.25129597328793e-05, + "loss": 16.376, + "step": 16667 + }, + { + "epoch": 0.6947605352007002, + "grad_norm": 418.0, + "learning_rate": 2.2507321456428272e-05, + "loss": 15.001, + "step": 16668 + }, + { + "epoch": 0.6948022174982285, + "grad_norm": 219.0, + "learning_rate": 2.2501683681033712e-05, + "loss": 10.9378, + "step": 16669 + }, + { + "epoch": 0.6948438997957568, + "grad_norm": 748.0, + "learning_rate": 2.2496046406798444e-05, + "loss": 18.5037, + "step": 16670 + }, + { + "epoch": 0.694885582093285, + "grad_norm": 70.0, + "learning_rate": 2.249040963382513e-05, + "loss": 8.1254, + "step": 16671 + }, + { + "epoch": 0.6949272643908132, + "grad_norm": 458.0, + "learning_rate": 2.248477336221659e-05, + "loss": 15.1259, + "step": 16672 + }, + { + "epoch": 0.6949689466883414, + "grad_norm": 236.0, + "learning_rate": 2.2479137592075455e-05, + "loss": 12.6257, + "step": 16673 + }, + { + "epoch": 0.6950106289858697, + "grad_norm": 73.5, + "learning_rate": 2.2473502323504498e-05, + "loss": 9.8752, + "step": 16674 + }, + { + "epoch": 0.695052311283398, + "grad_norm": 185.0, + "learning_rate": 2.246786755660641e-05, + "loss": 11.376, + "step": 16675 + }, + { + "epoch": 0.6950939935809262, + "grad_norm": 240.0, + "learning_rate": 2.2462233291483875e-05, + "loss": 12.2507, + "step": 16676 + }, + { + "epoch": 0.6951356758784544, + "grad_norm": 932.0, + "learning_rate": 2.2456599528239587e-05, + "loss": 23.0016, + "step": 16677 + }, + { + "epoch": 0.6951773581759827, + "grad_norm": 422.0, + "learning_rate": 2.2450966266976216e-05, + "loss": 15.3128, + "step": 16678 + }, + { + "epoch": 0.6952190404735109, + "grad_norm": 195.0, + "learning_rate": 2.244533350779642e-05, + "loss": 10.3752, + "step": 16679 + }, + { + "epoch": 0.6952607227710391, + "grad_norm": 256.0, + "learning_rate": 2.2439701250802882e-05, + "loss": 10.4387, + "step": 16680 + }, + { + "epoch": 0.6953024050685673, + "grad_norm": 436.0, + "learning_rate": 2.243406949609823e-05, + "loss": 15.8128, + "step": 16681 + }, + { + "epoch": 0.6953440873660957, + "grad_norm": 386.0, + "learning_rate": 2.2428438243785106e-05, + "loss": 15.8763, + "step": 16682 + }, + { + "epoch": 0.6953857696636239, + "grad_norm": 360.0, + "learning_rate": 2.2422807493966146e-05, + "loss": 14.6253, + "step": 16683 + }, + { + "epoch": 0.6954274519611521, + "grad_norm": 516.0, + "learning_rate": 2.2417177246743964e-05, + "loss": 18.0002, + "step": 16684 + }, + { + "epoch": 0.6954691342586803, + "grad_norm": 115.0, + "learning_rate": 2.2411547502221182e-05, + "loss": 9.3136, + "step": 16685 + }, + { + "epoch": 0.6955108165562086, + "grad_norm": 424.0, + "learning_rate": 2.2405918260500386e-05, + "loss": 15.6298, + "step": 16686 + }, + { + "epoch": 0.6955524988537368, + "grad_norm": 204.0, + "learning_rate": 2.2400289521684187e-05, + "loss": 11.8756, + "step": 16687 + }, + { + "epoch": 0.695594181151265, + "grad_norm": 111.0, + "learning_rate": 2.2394661285875155e-05, + "loss": 10.7506, + "step": 16688 + }, + { + "epoch": 0.6956358634487932, + "grad_norm": 1544.0, + "learning_rate": 2.2389033553175858e-05, + "loss": 35.5002, + "step": 16689 + }, + { + "epoch": 0.6956775457463216, + "grad_norm": 204.0, + "learning_rate": 2.2383406323688917e-05, + "loss": 10.5003, + "step": 16690 + }, + { + "epoch": 0.6957192280438498, + "grad_norm": 528.0, + "learning_rate": 2.23777795975168e-05, + "loss": 17.6295, + "step": 16691 + }, + { + "epoch": 0.695760910341378, + "grad_norm": 188.0, + "learning_rate": 2.237215337476215e-05, + "loss": 7.5638, + "step": 16692 + }, + { + "epoch": 0.6958025926389062, + "grad_norm": 157.0, + "learning_rate": 2.2366527655527415e-05, + "loss": 8.4377, + "step": 16693 + }, + { + "epoch": 0.6958442749364345, + "grad_norm": 98.0, + "learning_rate": 2.2360902439915198e-05, + "loss": 9.9378, + "step": 16694 + }, + { + "epoch": 0.6958859572339627, + "grad_norm": 218.0, + "learning_rate": 2.2355277728027955e-05, + "loss": 12.3753, + "step": 16695 + }, + { + "epoch": 0.695927639531491, + "grad_norm": 167.0, + "learning_rate": 2.2349653519968273e-05, + "loss": 10.6878, + "step": 16696 + }, + { + "epoch": 0.6959693218290192, + "grad_norm": 40.75, + "learning_rate": 2.2344029815838564e-05, + "loss": 6.5628, + "step": 16697 + }, + { + "epoch": 0.6960110041265475, + "grad_norm": 396.0, + "learning_rate": 2.2338406615741408e-05, + "loss": 13.5012, + "step": 16698 + }, + { + "epoch": 0.6960526864240757, + "grad_norm": 398.0, + "learning_rate": 2.233278391977921e-05, + "loss": 15.7506, + "step": 16699 + }, + { + "epoch": 0.6960943687216039, + "grad_norm": 163.0, + "learning_rate": 2.2327161728054497e-05, + "loss": 10.7507, + "step": 16700 + }, + { + "epoch": 0.6961360510191322, + "grad_norm": 432.0, + "learning_rate": 2.232154004066972e-05, + "loss": 15.5627, + "step": 16701 + }, + { + "epoch": 0.6961777333166604, + "grad_norm": 306.0, + "learning_rate": 2.2315918857727335e-05, + "loss": 11.5648, + "step": 16702 + }, + { + "epoch": 0.6962194156141887, + "grad_norm": 1136.0, + "learning_rate": 2.231029817932978e-05, + "loss": 25.8799, + "step": 16703 + }, + { + "epoch": 0.6962610979117169, + "grad_norm": 154.0, + "learning_rate": 2.2304678005579504e-05, + "loss": 8.938, + "step": 16704 + }, + { + "epoch": 0.6963027802092452, + "grad_norm": 388.0, + "learning_rate": 2.2299058336578933e-05, + "loss": 16.0003, + "step": 16705 + }, + { + "epoch": 0.6963444625067734, + "grad_norm": 486.0, + "learning_rate": 2.2293439172430476e-05, + "loss": 17.8758, + "step": 16706 + }, + { + "epoch": 0.6963861448043016, + "grad_norm": 712.0, + "learning_rate": 2.2287820513236553e-05, + "loss": 19.1251, + "step": 16707 + }, + { + "epoch": 0.6964278271018298, + "grad_norm": 238.0, + "learning_rate": 2.2282202359099557e-05, + "loss": 12.0627, + "step": 16708 + }, + { + "epoch": 0.6964695093993581, + "grad_norm": 108.5, + "learning_rate": 2.2276584710121888e-05, + "loss": 9.7503, + "step": 16709 + }, + { + "epoch": 0.6965111916968864, + "grad_norm": 512.0, + "learning_rate": 2.2270967566405925e-05, + "loss": 16.7535, + "step": 16710 + }, + { + "epoch": 0.6965528739944146, + "grad_norm": 740.0, + "learning_rate": 2.226535092805404e-05, + "loss": 19.5015, + "step": 16711 + }, + { + "epoch": 0.6965945562919428, + "grad_norm": 214.0, + "learning_rate": 2.225973479516859e-05, + "loss": 11.7509, + "step": 16712 + }, + { + "epoch": 0.6966362385894711, + "grad_norm": 101.5, + "learning_rate": 2.2254119167851945e-05, + "loss": 7.4378, + "step": 16713 + }, + { + "epoch": 0.6966779208869993, + "grad_norm": 392.0, + "learning_rate": 2.224850404620643e-05, + "loss": 14.5627, + "step": 16714 + }, + { + "epoch": 0.6967196031845275, + "grad_norm": 91.5, + "learning_rate": 2.2242889430334384e-05, + "loss": 8.6256, + "step": 16715 + }, + { + "epoch": 0.6967612854820557, + "grad_norm": 167.0, + "learning_rate": 2.2237275320338174e-05, + "loss": 7.4696, + "step": 16716 + }, + { + "epoch": 0.6968029677795841, + "grad_norm": 98.0, + "learning_rate": 2.2231661716320052e-05, + "loss": 10.3759, + "step": 16717 + }, + { + "epoch": 0.6968446500771123, + "grad_norm": 560.0, + "learning_rate": 2.2226048618382395e-05, + "loss": 18.2502, + "step": 16718 + }, + { + "epoch": 0.6968863323746405, + "grad_norm": 198.0, + "learning_rate": 2.222043602662743e-05, + "loss": 11.2502, + "step": 16719 + }, + { + "epoch": 0.6969280146721687, + "grad_norm": 404.0, + "learning_rate": 2.2214823941157524e-05, + "loss": 15.4377, + "step": 16720 + }, + { + "epoch": 0.696969696969697, + "grad_norm": 217.0, + "learning_rate": 2.2209212362074876e-05, + "loss": 11.813, + "step": 16721 + }, + { + "epoch": 0.6970113792672252, + "grad_norm": 486.0, + "learning_rate": 2.220360128948184e-05, + "loss": 17.126, + "step": 16722 + }, + { + "epoch": 0.6970530615647534, + "grad_norm": 400.0, + "learning_rate": 2.2197990723480604e-05, + "loss": 15.1879, + "step": 16723 + }, + { + "epoch": 0.6970947438622817, + "grad_norm": 824.0, + "learning_rate": 2.2192380664173472e-05, + "loss": 23.5019, + "step": 16724 + }, + { + "epoch": 0.69713642615981, + "grad_norm": 316.0, + "learning_rate": 2.218677111166267e-05, + "loss": 14.2508, + "step": 16725 + }, + { + "epoch": 0.6971781084573382, + "grad_norm": 228.0, + "learning_rate": 2.2181162066050433e-05, + "loss": 11.5626, + "step": 16726 + }, + { + "epoch": 0.6972197907548664, + "grad_norm": 332.0, + "learning_rate": 2.2175553527438986e-05, + "loss": 14.2505, + "step": 16727 + }, + { + "epoch": 0.6972614730523946, + "grad_norm": 322.0, + "learning_rate": 2.216994549593055e-05, + "loss": 13.1877, + "step": 16728 + }, + { + "epoch": 0.6973031553499229, + "grad_norm": 136.0, + "learning_rate": 2.2164337971627325e-05, + "loss": 10.2506, + "step": 16729 + }, + { + "epoch": 0.6973448376474511, + "grad_norm": 512.0, + "learning_rate": 2.2158730954631513e-05, + "loss": 16.2521, + "step": 16730 + }, + { + "epoch": 0.6973865199449794, + "grad_norm": 292.0, + "learning_rate": 2.21531244450453e-05, + "loss": 14.0012, + "step": 16731 + }, + { + "epoch": 0.6974282022425076, + "grad_norm": 234.0, + "learning_rate": 2.2147518442970866e-05, + "loss": 12.3753, + "step": 16732 + }, + { + "epoch": 0.6974698845400359, + "grad_norm": 1504.0, + "learning_rate": 2.214191294851038e-05, + "loss": 30.3789, + "step": 16733 + }, + { + "epoch": 0.6975115668375641, + "grad_norm": 556.0, + "learning_rate": 2.2136307961766002e-05, + "loss": 18.3752, + "step": 16734 + }, + { + "epoch": 0.6975532491350923, + "grad_norm": 122.5, + "learning_rate": 2.2130703482839886e-05, + "loss": 8.0018, + "step": 16735 + }, + { + "epoch": 0.6975949314326205, + "grad_norm": 80.5, + "learning_rate": 2.2125099511834173e-05, + "loss": 7.8445, + "step": 16736 + }, + { + "epoch": 0.6976366137301488, + "grad_norm": 350.0, + "learning_rate": 2.2119496048851002e-05, + "loss": 14.8765, + "step": 16737 + }, + { + "epoch": 0.6976782960276771, + "grad_norm": 81.5, + "learning_rate": 2.2113893093992484e-05, + "loss": 9.0002, + "step": 16738 + }, + { + "epoch": 0.6977199783252053, + "grad_norm": 490.0, + "learning_rate": 2.2108290647360724e-05, + "loss": 17.8751, + "step": 16739 + }, + { + "epoch": 0.6977616606227335, + "grad_norm": 338.0, + "learning_rate": 2.210268870905788e-05, + "loss": 13.3127, + "step": 16740 + }, + { + "epoch": 0.6978033429202618, + "grad_norm": 260.0, + "learning_rate": 2.2097087279185973e-05, + "loss": 13.6887, + "step": 16741 + }, + { + "epoch": 0.69784502521779, + "grad_norm": 104.0, + "learning_rate": 2.2091486357847163e-05, + "loss": 7.6566, + "step": 16742 + }, + { + "epoch": 0.6978867075153182, + "grad_norm": 190.0, + "learning_rate": 2.2085885945143453e-05, + "loss": 11.4384, + "step": 16743 + }, + { + "epoch": 0.6979283898128464, + "grad_norm": 438.0, + "learning_rate": 2.208028604117699e-05, + "loss": 16.0011, + "step": 16744 + }, + { + "epoch": 0.6979700721103748, + "grad_norm": 476.0, + "learning_rate": 2.2074686646049758e-05, + "loss": 17.2504, + "step": 16745 + }, + { + "epoch": 0.698011754407903, + "grad_norm": 344.0, + "learning_rate": 2.206908775986387e-05, + "loss": 12.626, + "step": 16746 + }, + { + "epoch": 0.6980534367054312, + "grad_norm": 196.0, + "learning_rate": 2.206348938272131e-05, + "loss": 9.6252, + "step": 16747 + }, + { + "epoch": 0.6980951190029594, + "grad_norm": 224.0, + "learning_rate": 2.2057891514724165e-05, + "loss": 11.6878, + "step": 16748 + }, + { + "epoch": 0.6981368013004877, + "grad_norm": 924.0, + "learning_rate": 2.2052294155974394e-05, + "loss": 24.2508, + "step": 16749 + }, + { + "epoch": 0.6981784835980159, + "grad_norm": 384.0, + "learning_rate": 2.204669730657406e-05, + "loss": 11.3781, + "step": 16750 + }, + { + "epoch": 0.6982201658955441, + "grad_norm": 420.0, + "learning_rate": 2.204110096662515e-05, + "loss": 14.4377, + "step": 16751 + }, + { + "epoch": 0.6982618481930724, + "grad_norm": 168.0, + "learning_rate": 2.203550513622966e-05, + "loss": 11.3128, + "step": 16752 + }, + { + "epoch": 0.6983035304906007, + "grad_norm": 148.0, + "learning_rate": 2.2029909815489568e-05, + "loss": 11.2503, + "step": 16753 + }, + { + "epoch": 0.6983452127881289, + "grad_norm": 158.0, + "learning_rate": 2.2024315004506852e-05, + "loss": 10.8753, + "step": 16754 + }, + { + "epoch": 0.6983868950856571, + "grad_norm": 334.0, + "learning_rate": 2.201872070338348e-05, + "loss": 14.5627, + "step": 16755 + }, + { + "epoch": 0.6984285773831853, + "grad_norm": 416.0, + "learning_rate": 2.2013126912221405e-05, + "loss": 15.6253, + "step": 16756 + }, + { + "epoch": 0.6984702596807136, + "grad_norm": 178.0, + "learning_rate": 2.2007533631122578e-05, + "loss": 10.5004, + "step": 16757 + }, + { + "epoch": 0.6985119419782418, + "grad_norm": 193.0, + "learning_rate": 2.2001940860188934e-05, + "loss": 9.7503, + "step": 16758 + }, + { + "epoch": 0.6985536242757701, + "grad_norm": 242.0, + "learning_rate": 2.1996348599522408e-05, + "loss": 15.0006, + "step": 16759 + }, + { + "epoch": 0.6985953065732983, + "grad_norm": 86.0, + "learning_rate": 2.1990756849224915e-05, + "loss": 8.7502, + "step": 16760 + }, + { + "epoch": 0.6986369888708266, + "grad_norm": 472.0, + "learning_rate": 2.1985165609398357e-05, + "loss": 15.4383, + "step": 16761 + }, + { + "epoch": 0.6986786711683548, + "grad_norm": 552.0, + "learning_rate": 2.197957488014465e-05, + "loss": 17.6252, + "step": 16762 + }, + { + "epoch": 0.698720353465883, + "grad_norm": 258.0, + "learning_rate": 2.197398466156567e-05, + "loss": 9.7505, + "step": 16763 + }, + { + "epoch": 0.6987620357634112, + "grad_norm": 133.0, + "learning_rate": 2.1968394953763315e-05, + "loss": 10.563, + "step": 16764 + }, + { + "epoch": 0.6988037180609395, + "grad_norm": 246.0, + "learning_rate": 2.1962805756839432e-05, + "loss": 11.5627, + "step": 16765 + }, + { + "epoch": 0.6988454003584678, + "grad_norm": 270.0, + "learning_rate": 2.1957217070895936e-05, + "loss": 12.8752, + "step": 16766 + }, + { + "epoch": 0.698887082655996, + "grad_norm": 306.0, + "learning_rate": 2.1951628896034615e-05, + "loss": 11.0007, + "step": 16767 + }, + { + "epoch": 0.6989287649535242, + "grad_norm": 416.0, + "learning_rate": 2.1946041232357385e-05, + "loss": 14.7537, + "step": 16768 + }, + { + "epoch": 0.6989704472510525, + "grad_norm": 225.0, + "learning_rate": 2.1940454079966e-05, + "loss": 11.438, + "step": 16769 + }, + { + "epoch": 0.6990121295485807, + "grad_norm": 600.0, + "learning_rate": 2.1934867438962376e-05, + "loss": 17.6251, + "step": 16770 + }, + { + "epoch": 0.6990538118461089, + "grad_norm": 564.0, + "learning_rate": 2.1929281309448242e-05, + "loss": 18.5002, + "step": 16771 + }, + { + "epoch": 0.6990954941436373, + "grad_norm": 157.0, + "learning_rate": 2.1923695691525485e-05, + "loss": 9.8127, + "step": 16772 + }, + { + "epoch": 0.6991371764411655, + "grad_norm": 458.0, + "learning_rate": 2.191811058529583e-05, + "loss": 16.2503, + "step": 16773 + }, + { + "epoch": 0.6991788587386937, + "grad_norm": 280.0, + "learning_rate": 2.1912525990861123e-05, + "loss": 13.8127, + "step": 16774 + }, + { + "epoch": 0.6992205410362219, + "grad_norm": 140.0, + "learning_rate": 2.190694190832312e-05, + "loss": 10.3128, + "step": 16775 + }, + { + "epoch": 0.6992622233337502, + "grad_norm": 216.0, + "learning_rate": 2.1901358337783595e-05, + "loss": 8.6265, + "step": 16776 + }, + { + "epoch": 0.6993039056312784, + "grad_norm": 81.5, + "learning_rate": 2.189577527934431e-05, + "loss": 8.6259, + "step": 16777 + }, + { + "epoch": 0.6993455879288066, + "grad_norm": 402.0, + "learning_rate": 2.1890192733107017e-05, + "loss": 14.8129, + "step": 16778 + }, + { + "epoch": 0.6993872702263348, + "grad_norm": 155.0, + "learning_rate": 2.1884610699173458e-05, + "loss": 9.8752, + "step": 16779 + }, + { + "epoch": 0.6994289525238632, + "grad_norm": 210.0, + "learning_rate": 2.187902917764536e-05, + "loss": 11.0628, + "step": 16780 + }, + { + "epoch": 0.6994706348213914, + "grad_norm": 352.0, + "learning_rate": 2.1873448168624454e-05, + "loss": 14.0627, + "step": 16781 + }, + { + "epoch": 0.6995123171189196, + "grad_norm": 83.0, + "learning_rate": 2.186786767221245e-05, + "loss": 8.1255, + "step": 16782 + }, + { + "epoch": 0.6995539994164478, + "grad_norm": 556.0, + "learning_rate": 2.1862287688511057e-05, + "loss": 17.0002, + "step": 16783 + }, + { + "epoch": 0.6995956817139761, + "grad_norm": 728.0, + "learning_rate": 2.1856708217621967e-05, + "loss": 21.0004, + "step": 16784 + }, + { + "epoch": 0.6996373640115043, + "grad_norm": 276.0, + "learning_rate": 2.1851129259646875e-05, + "loss": 11.0011, + "step": 16785 + }, + { + "epoch": 0.6996790463090325, + "grad_norm": 624.0, + "learning_rate": 2.1845550814687442e-05, + "loss": 20.3755, + "step": 16786 + }, + { + "epoch": 0.6997207286065608, + "grad_norm": 200.0, + "learning_rate": 2.183997288284535e-05, + "loss": 10.8753, + "step": 16787 + }, + { + "epoch": 0.6997624109040891, + "grad_norm": 205.0, + "learning_rate": 2.1834395464222253e-05, + "loss": 12.5004, + "step": 16788 + }, + { + "epoch": 0.6998040932016173, + "grad_norm": 424.0, + "learning_rate": 2.1828818558919796e-05, + "loss": 15.8761, + "step": 16789 + }, + { + "epoch": 0.6998457754991455, + "grad_norm": 189.0, + "learning_rate": 2.182324216703962e-05, + "loss": 11.3127, + "step": 16790 + }, + { + "epoch": 0.6998874577966737, + "grad_norm": 704.0, + "learning_rate": 2.1817666288683343e-05, + "loss": 21.3754, + "step": 16791 + }, + { + "epoch": 0.699929140094202, + "grad_norm": 656.0, + "learning_rate": 2.1812090923952633e-05, + "loss": 19.0005, + "step": 16792 + }, + { + "epoch": 0.6999708223917303, + "grad_norm": 59.25, + "learning_rate": 2.1806516072949028e-05, + "loss": 7.2507, + "step": 16793 + }, + { + "epoch": 0.7000125046892585, + "grad_norm": 111.0, + "learning_rate": 2.1800941735774216e-05, + "loss": 9.3761, + "step": 16794 + }, + { + "epoch": 0.7000541869867867, + "grad_norm": 258.0, + "learning_rate": 2.1795367912529703e-05, + "loss": 11.6881, + "step": 16795 + }, + { + "epoch": 0.700095869284315, + "grad_norm": 165.0, + "learning_rate": 2.178979460331715e-05, + "loss": 10.7513, + "step": 16796 + }, + { + "epoch": 0.7001375515818432, + "grad_norm": 236.0, + "learning_rate": 2.178422180823806e-05, + "loss": 12.688, + "step": 16797 + }, + { + "epoch": 0.7001792338793714, + "grad_norm": 940.0, + "learning_rate": 2.177864952739407e-05, + "loss": 21.7547, + "step": 16798 + }, + { + "epoch": 0.7002209161768996, + "grad_norm": 75.0, + "learning_rate": 2.1773077760886658e-05, + "loss": 6.4689, + "step": 16799 + }, + { + "epoch": 0.700262598474428, + "grad_norm": 128.0, + "learning_rate": 2.1767506508817426e-05, + "loss": 8.8757, + "step": 16800 + }, + { + "epoch": 0.7003042807719562, + "grad_norm": 246.0, + "learning_rate": 2.1761935771287895e-05, + "loss": 12.3751, + "step": 16801 + }, + { + "epoch": 0.7003459630694844, + "grad_norm": 360.0, + "learning_rate": 2.1756365548399594e-05, + "loss": 15.377, + "step": 16802 + }, + { + "epoch": 0.7003876453670126, + "grad_norm": 458.0, + "learning_rate": 2.1750795840254036e-05, + "loss": 15.5001, + "step": 16803 + }, + { + "epoch": 0.7004293276645409, + "grad_norm": 264.0, + "learning_rate": 2.174522664695273e-05, + "loss": 14.0628, + "step": 16804 + }, + { + "epoch": 0.7004710099620691, + "grad_norm": 430.0, + "learning_rate": 2.173965796859718e-05, + "loss": 14.5631, + "step": 16805 + }, + { + "epoch": 0.7005126922595973, + "grad_norm": 892.0, + "learning_rate": 2.1734089805288872e-05, + "loss": 23.7504, + "step": 16806 + }, + { + "epoch": 0.7005543745571255, + "grad_norm": 680.0, + "learning_rate": 2.1728522157129288e-05, + "loss": 17.6284, + "step": 16807 + }, + { + "epoch": 0.7005960568546539, + "grad_norm": 1392.0, + "learning_rate": 2.1722955024219893e-05, + "loss": 25.5005, + "step": 16808 + }, + { + "epoch": 0.7006377391521821, + "grad_norm": 214.0, + "learning_rate": 2.1717388406662155e-05, + "loss": 12.3133, + "step": 16809 + }, + { + "epoch": 0.7006794214497103, + "grad_norm": 236.0, + "learning_rate": 2.1711822304557528e-05, + "loss": 11.1876, + "step": 16810 + }, + { + "epoch": 0.7007211037472385, + "grad_norm": 434.0, + "learning_rate": 2.1706256718007446e-05, + "loss": 13.7506, + "step": 16811 + }, + { + "epoch": 0.7007627860447668, + "grad_norm": 408.0, + "learning_rate": 2.1700691647113347e-05, + "loss": 14.3757, + "step": 16812 + }, + { + "epoch": 0.700804468342295, + "grad_norm": 404.0, + "learning_rate": 2.1695127091976654e-05, + "loss": 17.2503, + "step": 16813 + }, + { + "epoch": 0.7008461506398233, + "grad_norm": 262.0, + "learning_rate": 2.1689563052698787e-05, + "loss": 12.0006, + "step": 16814 + }, + { + "epoch": 0.7008878329373515, + "grad_norm": 932.0, + "learning_rate": 2.1683999529381123e-05, + "loss": 26.5002, + "step": 16815 + }, + { + "epoch": 0.7009295152348798, + "grad_norm": 123.5, + "learning_rate": 2.1678436522125123e-05, + "loss": 10.9378, + "step": 16816 + }, + { + "epoch": 0.700971197532408, + "grad_norm": 95.0, + "learning_rate": 2.167287403103209e-05, + "loss": 9.9378, + "step": 16817 + }, + { + "epoch": 0.7010128798299362, + "grad_norm": 584.0, + "learning_rate": 2.166731205620348e-05, + "loss": 19.751, + "step": 16818 + }, + { + "epoch": 0.7010545621274644, + "grad_norm": 736.0, + "learning_rate": 2.1661750597740586e-05, + "loss": 18.8756, + "step": 16819 + }, + { + "epoch": 0.7010962444249927, + "grad_norm": 588.0, + "learning_rate": 2.1656189655744845e-05, + "loss": 19.1251, + "step": 16820 + }, + { + "epoch": 0.701137926722521, + "grad_norm": 580.0, + "learning_rate": 2.1650629230317527e-05, + "loss": 18.8765, + "step": 16821 + }, + { + "epoch": 0.7011796090200492, + "grad_norm": 328.0, + "learning_rate": 2.1645069321560042e-05, + "loss": 14.8134, + "step": 16822 + }, + { + "epoch": 0.7012212913175774, + "grad_norm": 162.0, + "learning_rate": 2.163950992957364e-05, + "loss": 10.6254, + "step": 16823 + }, + { + "epoch": 0.7012629736151057, + "grad_norm": 680.0, + "learning_rate": 2.163395105445974e-05, + "loss": 21.1253, + "step": 16824 + }, + { + "epoch": 0.7013046559126339, + "grad_norm": 428.0, + "learning_rate": 2.162839269631955e-05, + "loss": 16.3757, + "step": 16825 + }, + { + "epoch": 0.7013463382101621, + "grad_norm": 386.0, + "learning_rate": 2.1622834855254448e-05, + "loss": 15.2502, + "step": 16826 + }, + { + "epoch": 0.7013880205076903, + "grad_norm": 318.0, + "learning_rate": 2.1617277531365697e-05, + "loss": 14.2505, + "step": 16827 + }, + { + "epoch": 0.7014297028052187, + "grad_norm": 1288.0, + "learning_rate": 2.161172072475458e-05, + "loss": 26.2536, + "step": 16828 + }, + { + "epoch": 0.7014713851027469, + "grad_norm": 326.0, + "learning_rate": 2.160616443552238e-05, + "loss": 13.6257, + "step": 16829 + }, + { + "epoch": 0.7015130674002751, + "grad_norm": 502.0, + "learning_rate": 2.160060866377035e-05, + "loss": 16.7509, + "step": 16830 + }, + { + "epoch": 0.7015547496978033, + "grad_norm": 502.0, + "learning_rate": 2.1595053409599747e-05, + "loss": 16.3755, + "step": 16831 + }, + { + "epoch": 0.7015964319953316, + "grad_norm": 672.0, + "learning_rate": 2.1589498673111803e-05, + "loss": 20.7503, + "step": 16832 + }, + { + "epoch": 0.7016381142928598, + "grad_norm": 418.0, + "learning_rate": 2.1583944454407795e-05, + "loss": 15.0004, + "step": 16833 + }, + { + "epoch": 0.701679796590388, + "grad_norm": 197.0, + "learning_rate": 2.1578390753588895e-05, + "loss": 10.5003, + "step": 16834 + }, + { + "epoch": 0.7017214788879163, + "grad_norm": 276.0, + "learning_rate": 2.1572837570756376e-05, + "loss": 11.9383, + "step": 16835 + }, + { + "epoch": 0.7017631611854446, + "grad_norm": 162.0, + "learning_rate": 2.1567284906011386e-05, + "loss": 10.8757, + "step": 16836 + }, + { + "epoch": 0.7018048434829728, + "grad_norm": 692.0, + "learning_rate": 2.1561732759455183e-05, + "loss": 21.6252, + "step": 16837 + }, + { + "epoch": 0.701846525780501, + "grad_norm": 81.5, + "learning_rate": 2.1556181131188897e-05, + "loss": 8.0006, + "step": 16838 + }, + { + "epoch": 0.7018882080780292, + "grad_norm": 382.0, + "learning_rate": 2.155063002131376e-05, + "loss": 16.3753, + "step": 16839 + }, + { + "epoch": 0.7019298903755575, + "grad_norm": 412.0, + "learning_rate": 2.1545079429930885e-05, + "loss": 15.5003, + "step": 16840 + }, + { + "epoch": 0.7019715726730857, + "grad_norm": 268.0, + "learning_rate": 2.1539529357141487e-05, + "loss": 12.8754, + "step": 16841 + }, + { + "epoch": 0.702013254970614, + "grad_norm": 185.0, + "learning_rate": 2.153397980304669e-05, + "loss": 11.9392, + "step": 16842 + }, + { + "epoch": 0.7020549372681422, + "grad_norm": 162.0, + "learning_rate": 2.1528430767747632e-05, + "loss": 9.5002, + "step": 16843 + }, + { + "epoch": 0.7020966195656705, + "grad_norm": 324.0, + "learning_rate": 2.1522882251345454e-05, + "loss": 11.8127, + "step": 16844 + }, + { + "epoch": 0.7021383018631987, + "grad_norm": 187.0, + "learning_rate": 2.151733425394128e-05, + "loss": 10.5009, + "step": 16845 + }, + { + "epoch": 0.7021799841607269, + "grad_norm": 728.0, + "learning_rate": 2.1511786775636213e-05, + "loss": 20.5027, + "step": 16846 + }, + { + "epoch": 0.7022216664582552, + "grad_norm": 266.0, + "learning_rate": 2.1506239816531366e-05, + "loss": 11.6878, + "step": 16847 + }, + { + "epoch": 0.7022633487557834, + "grad_norm": 226.0, + "learning_rate": 2.150069337672782e-05, + "loss": 12.1878, + "step": 16848 + }, + { + "epoch": 0.7023050310533117, + "grad_norm": 298.0, + "learning_rate": 2.149514745632667e-05, + "loss": 12.2528, + "step": 16849 + }, + { + "epoch": 0.7023467133508399, + "grad_norm": 608.0, + "learning_rate": 2.1489602055428993e-05, + "loss": 19.0005, + "step": 16850 + }, + { + "epoch": 0.7023883956483682, + "grad_norm": 1004.0, + "learning_rate": 2.148405717413584e-05, + "loss": 25.7524, + "step": 16851 + }, + { + "epoch": 0.7024300779458964, + "grad_norm": 234.0, + "learning_rate": 2.1478512812548285e-05, + "loss": 8.8751, + "step": 16852 + }, + { + "epoch": 0.7024717602434246, + "grad_norm": 358.0, + "learning_rate": 2.147296897076736e-05, + "loss": 15.0005, + "step": 16853 + }, + { + "epoch": 0.7025134425409528, + "grad_norm": 176.0, + "learning_rate": 2.146742564889411e-05, + "loss": 11.5627, + "step": 16854 + }, + { + "epoch": 0.7025551248384811, + "grad_norm": 176.0, + "learning_rate": 2.1461882847029562e-05, + "loss": 12.6881, + "step": 16855 + }, + { + "epoch": 0.7025968071360094, + "grad_norm": 360.0, + "learning_rate": 2.1456340565274718e-05, + "loss": 13.8751, + "step": 16856 + }, + { + "epoch": 0.7026384894335376, + "grad_norm": 532.0, + "learning_rate": 2.1450798803730633e-05, + "loss": 16.1251, + "step": 16857 + }, + { + "epoch": 0.7026801717310658, + "grad_norm": 147.0, + "learning_rate": 2.1445257562498234e-05, + "loss": 9.3126, + "step": 16858 + }, + { + "epoch": 0.7027218540285941, + "grad_norm": 468.0, + "learning_rate": 2.1439716841678592e-05, + "loss": 14.5627, + "step": 16859 + }, + { + "epoch": 0.7027635363261223, + "grad_norm": 398.0, + "learning_rate": 2.1434176641372612e-05, + "loss": 14.0037, + "step": 16860 + }, + { + "epoch": 0.7028052186236505, + "grad_norm": 316.0, + "learning_rate": 2.1428636961681332e-05, + "loss": 13.376, + "step": 16861 + }, + { + "epoch": 0.7028469009211787, + "grad_norm": 596.0, + "learning_rate": 2.142309780270564e-05, + "loss": 20.1252, + "step": 16862 + }, + { + "epoch": 0.7028885832187071, + "grad_norm": 175.0, + "learning_rate": 2.1417559164546563e-05, + "loss": 10.3135, + "step": 16863 + }, + { + "epoch": 0.7029302655162353, + "grad_norm": 516.0, + "learning_rate": 2.1412021047304976e-05, + "loss": 18.7509, + "step": 16864 + }, + { + "epoch": 0.7029719478137635, + "grad_norm": 460.0, + "learning_rate": 2.140648345108186e-05, + "loss": 14.8753, + "step": 16865 + }, + { + "epoch": 0.7030136301112917, + "grad_norm": 364.0, + "learning_rate": 2.1400946375978125e-05, + "loss": 14.6877, + "step": 16866 + }, + { + "epoch": 0.70305531240882, + "grad_norm": 155.0, + "learning_rate": 2.1395409822094674e-05, + "loss": 7.7819, + "step": 16867 + }, + { + "epoch": 0.7030969947063482, + "grad_norm": 1088.0, + "learning_rate": 2.1389873789532423e-05, + "loss": 22.3801, + "step": 16868 + }, + { + "epoch": 0.7031386770038764, + "grad_norm": 664.0, + "learning_rate": 2.1384338278392264e-05, + "loss": 20.6256, + "step": 16869 + }, + { + "epoch": 0.7031803593014047, + "grad_norm": 180.0, + "learning_rate": 2.1378803288775074e-05, + "loss": 10.5627, + "step": 16870 + }, + { + "epoch": 0.703222041598933, + "grad_norm": 97.0, + "learning_rate": 2.137326882078174e-05, + "loss": 9.5002, + "step": 16871 + }, + { + "epoch": 0.7032637238964612, + "grad_norm": 166.0, + "learning_rate": 2.1367734874513124e-05, + "loss": 11.5628, + "step": 16872 + }, + { + "epoch": 0.7033054061939894, + "grad_norm": 111.5, + "learning_rate": 2.1362201450070075e-05, + "loss": 8.3129, + "step": 16873 + }, + { + "epoch": 0.7033470884915176, + "grad_norm": 892.0, + "learning_rate": 2.135666854755345e-05, + "loss": 25.0007, + "step": 16874 + }, + { + "epoch": 0.7033887707890459, + "grad_norm": 105.0, + "learning_rate": 2.1351136167064084e-05, + "loss": 11.251, + "step": 16875 + }, + { + "epoch": 0.7034304530865741, + "grad_norm": 169.0, + "learning_rate": 2.1345604308702803e-05, + "loss": 10.6257, + "step": 16876 + }, + { + "epoch": 0.7034721353841024, + "grad_norm": 203.0, + "learning_rate": 2.134007297257043e-05, + "loss": 11.6252, + "step": 16877 + }, + { + "epoch": 0.7035138176816306, + "grad_norm": 296.0, + "learning_rate": 2.1334542158767773e-05, + "loss": 13.5628, + "step": 16878 + }, + { + "epoch": 0.7035554999791589, + "grad_norm": 336.0, + "learning_rate": 2.1329011867395625e-05, + "loss": 14.2502, + "step": 16879 + }, + { + "epoch": 0.7035971822766871, + "grad_norm": 118.0, + "learning_rate": 2.1323482098554764e-05, + "loss": 9.0003, + "step": 16880 + }, + { + "epoch": 0.7036388645742153, + "grad_norm": 302.0, + "learning_rate": 2.1317952852346023e-05, + "loss": 14.0005, + "step": 16881 + }, + { + "epoch": 0.7036805468717435, + "grad_norm": 636.0, + "learning_rate": 2.1312424128870105e-05, + "loss": 18.0026, + "step": 16882 + }, + { + "epoch": 0.7037222291692719, + "grad_norm": 286.0, + "learning_rate": 2.130689592822784e-05, + "loss": 12.5636, + "step": 16883 + }, + { + "epoch": 0.7037639114668001, + "grad_norm": 270.0, + "learning_rate": 2.1301368250519903e-05, + "loss": 12.9381, + "step": 16884 + }, + { + "epoch": 0.7038055937643283, + "grad_norm": 306.0, + "learning_rate": 2.129584109584712e-05, + "loss": 11.4392, + "step": 16885 + }, + { + "epoch": 0.7038472760618565, + "grad_norm": 860.0, + "learning_rate": 2.129031446431014e-05, + "loss": 23.3783, + "step": 16886 + }, + { + "epoch": 0.7038889583593848, + "grad_norm": 159.0, + "learning_rate": 2.128478835600976e-05, + "loss": 9.8751, + "step": 16887 + }, + { + "epoch": 0.703930640656913, + "grad_norm": 216.0, + "learning_rate": 2.1279262771046632e-05, + "loss": 11.6265, + "step": 16888 + }, + { + "epoch": 0.7039723229544412, + "grad_norm": 264.0, + "learning_rate": 2.1273737709521523e-05, + "loss": 12.4377, + "step": 16889 + }, + { + "epoch": 0.7040140052519694, + "grad_norm": 170.0, + "learning_rate": 2.126821317153506e-05, + "loss": 8.6252, + "step": 16890 + }, + { + "epoch": 0.7040556875494978, + "grad_norm": 142.0, + "learning_rate": 2.1262689157187982e-05, + "loss": 11.6882, + "step": 16891 + }, + { + "epoch": 0.704097369847026, + "grad_norm": 120.0, + "learning_rate": 2.125716566658094e-05, + "loss": 8.2502, + "step": 16892 + }, + { + "epoch": 0.7041390521445542, + "grad_norm": 400.0, + "learning_rate": 2.1251642699814606e-05, + "loss": 14.8753, + "step": 16893 + }, + { + "epoch": 0.7041807344420824, + "grad_norm": 444.0, + "learning_rate": 2.1246120256989638e-05, + "loss": 15.6252, + "step": 16894 + }, + { + "epoch": 0.7042224167396107, + "grad_norm": 664.0, + "learning_rate": 2.124059833820668e-05, + "loss": 20.0004, + "step": 16895 + }, + { + "epoch": 0.7042640990371389, + "grad_norm": 338.0, + "learning_rate": 2.1235076943566372e-05, + "loss": 12.8127, + "step": 16896 + }, + { + "epoch": 0.7043057813346671, + "grad_norm": 266.0, + "learning_rate": 2.122955607316934e-05, + "loss": 12.1257, + "step": 16897 + }, + { + "epoch": 0.7043474636321954, + "grad_norm": 880.0, + "learning_rate": 2.1224035727116204e-05, + "loss": 22.3754, + "step": 16898 + }, + { + "epoch": 0.7043891459297237, + "grad_norm": 142.0, + "learning_rate": 2.121851590550757e-05, + "loss": 10.7512, + "step": 16899 + }, + { + "epoch": 0.7044308282272519, + "grad_norm": 350.0, + "learning_rate": 2.1212996608444035e-05, + "loss": 13.8131, + "step": 16900 + }, + { + "epoch": 0.7044725105247801, + "grad_norm": 229.0, + "learning_rate": 2.1207477836026195e-05, + "loss": 12.0012, + "step": 16901 + }, + { + "epoch": 0.7045141928223083, + "grad_norm": 290.0, + "learning_rate": 2.1201959588354624e-05, + "loss": 13.8795, + "step": 16902 + }, + { + "epoch": 0.7045558751198366, + "grad_norm": 294.0, + "learning_rate": 2.1196441865529902e-05, + "loss": 12.2503, + "step": 16903 + }, + { + "epoch": 0.7045975574173649, + "grad_norm": 374.0, + "learning_rate": 2.1190924667652585e-05, + "loss": 15.1253, + "step": 16904 + }, + { + "epoch": 0.7046392397148931, + "grad_norm": 308.0, + "learning_rate": 2.118540799482322e-05, + "loss": 14.1253, + "step": 16905 + }, + { + "epoch": 0.7046809220124213, + "grad_norm": 468.0, + "learning_rate": 2.117989184714233e-05, + "loss": 16.1252, + "step": 16906 + }, + { + "epoch": 0.7047226043099496, + "grad_norm": 492.0, + "learning_rate": 2.1174376224710512e-05, + "loss": 18.6258, + "step": 16907 + }, + { + "epoch": 0.7047642866074778, + "grad_norm": 1568.0, + "learning_rate": 2.116886112762821e-05, + "loss": 35.2501, + "step": 16908 + }, + { + "epoch": 0.704805968905006, + "grad_norm": 640.0, + "learning_rate": 2.1163346555996005e-05, + "loss": 18.6253, + "step": 16909 + }, + { + "epoch": 0.7048476512025342, + "grad_norm": 92.5, + "learning_rate": 2.1157832509914332e-05, + "loss": 7.9386, + "step": 16910 + }, + { + "epoch": 0.7048893335000626, + "grad_norm": 85.5, + "learning_rate": 2.1152318989483767e-05, + "loss": 8.1254, + "step": 16911 + }, + { + "epoch": 0.7049310157975908, + "grad_norm": 454.0, + "learning_rate": 2.11468059948047e-05, + "loss": 17.2505, + "step": 16912 + }, + { + "epoch": 0.704972698095119, + "grad_norm": 424.0, + "learning_rate": 2.11412935259777e-05, + "loss": 16.0003, + "step": 16913 + }, + { + "epoch": 0.7050143803926472, + "grad_norm": 536.0, + "learning_rate": 2.1135781583103152e-05, + "loss": 16.2503, + "step": 16914 + }, + { + "epoch": 0.7050560626901755, + "grad_norm": 460.0, + "learning_rate": 2.1130270166281564e-05, + "loss": 16.376, + "step": 16915 + }, + { + "epoch": 0.7050977449877037, + "grad_norm": 524.0, + "learning_rate": 2.1124759275613367e-05, + "loss": 18.2504, + "step": 16916 + }, + { + "epoch": 0.7051394272852319, + "grad_norm": 223.0, + "learning_rate": 2.1119248911198987e-05, + "loss": 11.6253, + "step": 16917 + }, + { + "epoch": 0.7051811095827603, + "grad_norm": 143.0, + "learning_rate": 2.1113739073138867e-05, + "loss": 9.4377, + "step": 16918 + }, + { + "epoch": 0.7052227918802885, + "grad_norm": 544.0, + "learning_rate": 2.1108229761533416e-05, + "loss": 18.0004, + "step": 16919 + }, + { + "epoch": 0.7052644741778167, + "grad_norm": 181.0, + "learning_rate": 2.1102720976483036e-05, + "loss": 10.1892, + "step": 16920 + }, + { + "epoch": 0.7053061564753449, + "grad_norm": 620.0, + "learning_rate": 2.1097212718088137e-05, + "loss": 19.8755, + "step": 16921 + }, + { + "epoch": 0.7053478387728732, + "grad_norm": 124.0, + "learning_rate": 2.10917049864491e-05, + "loss": 10.4379, + "step": 16922 + }, + { + "epoch": 0.7053895210704014, + "grad_norm": 712.0, + "learning_rate": 2.108619778166631e-05, + "loss": 20.253, + "step": 16923 + }, + { + "epoch": 0.7054312033679296, + "grad_norm": 584.0, + "learning_rate": 2.108069110384013e-05, + "loss": 16.379, + "step": 16924 + }, + { + "epoch": 0.7054728856654578, + "grad_norm": 178.0, + "learning_rate": 2.107518495307092e-05, + "loss": 11.3752, + "step": 16925 + }, + { + "epoch": 0.7055145679629862, + "grad_norm": 190.0, + "learning_rate": 2.1069679329459037e-05, + "loss": 11.3753, + "step": 16926 + }, + { + "epoch": 0.7055562502605144, + "grad_norm": 145.0, + "learning_rate": 2.1064174233104812e-05, + "loss": 8.7502, + "step": 16927 + }, + { + "epoch": 0.7055979325580426, + "grad_norm": 476.0, + "learning_rate": 2.1058669664108583e-05, + "loss": 17.002, + "step": 16928 + }, + { + "epoch": 0.7056396148555708, + "grad_norm": 532.0, + "learning_rate": 2.1053165622570664e-05, + "loss": 18.1252, + "step": 16929 + }, + { + "epoch": 0.7056812971530991, + "grad_norm": 177.0, + "learning_rate": 2.1047662108591377e-05, + "loss": 9.8127, + "step": 16930 + }, + { + "epoch": 0.7057229794506273, + "grad_norm": 217.0, + "learning_rate": 2.104215912227101e-05, + "loss": 12.5627, + "step": 16931 + }, + { + "epoch": 0.7057646617481556, + "grad_norm": 1208.0, + "learning_rate": 2.103665666370985e-05, + "loss": 26.1293, + "step": 16932 + }, + { + "epoch": 0.7058063440456838, + "grad_norm": 60.25, + "learning_rate": 2.1031154733008235e-05, + "loss": 8.0633, + "step": 16933 + }, + { + "epoch": 0.7058480263432121, + "grad_norm": 436.0, + "learning_rate": 2.1025653330266355e-05, + "loss": 15.6251, + "step": 16934 + }, + { + "epoch": 0.7058897086407403, + "grad_norm": 516.0, + "learning_rate": 2.1020152455584553e-05, + "loss": 16.2503, + "step": 16935 + }, + { + "epoch": 0.7059313909382685, + "grad_norm": 173.0, + "learning_rate": 2.1014652109063006e-05, + "loss": 10.6253, + "step": 16936 + }, + { + "epoch": 0.7059730732357967, + "grad_norm": 290.0, + "learning_rate": 2.1009152290802038e-05, + "loss": 11.5627, + "step": 16937 + }, + { + "epoch": 0.706014755533325, + "grad_norm": 302.0, + "learning_rate": 2.10036530009018e-05, + "loss": 14.0628, + "step": 16938 + }, + { + "epoch": 0.7060564378308533, + "grad_norm": 468.0, + "learning_rate": 2.0998154239462603e-05, + "loss": 15.6256, + "step": 16939 + }, + { + "epoch": 0.7060981201283815, + "grad_norm": 478.0, + "learning_rate": 2.0992656006584583e-05, + "loss": 15.2504, + "step": 16940 + }, + { + "epoch": 0.7061398024259097, + "grad_norm": 1336.0, + "learning_rate": 2.0987158302367997e-05, + "loss": 29.6304, + "step": 16941 + }, + { + "epoch": 0.706181484723438, + "grad_norm": 470.0, + "learning_rate": 2.0981661126913026e-05, + "loss": 15.1254, + "step": 16942 + }, + { + "epoch": 0.7062231670209662, + "grad_norm": 94.0, + "learning_rate": 2.097616448031986e-05, + "loss": 8.9377, + "step": 16943 + }, + { + "epoch": 0.7062648493184944, + "grad_norm": 354.0, + "learning_rate": 2.097066836268867e-05, + "loss": 15.1877, + "step": 16944 + }, + { + "epoch": 0.7063065316160226, + "grad_norm": 346.0, + "learning_rate": 2.096517277411963e-05, + "loss": 14.6255, + "step": 16945 + }, + { + "epoch": 0.706348213913551, + "grad_norm": 572.0, + "learning_rate": 2.0959677714712895e-05, + "loss": 18.5003, + "step": 16946 + }, + { + "epoch": 0.7063898962110792, + "grad_norm": 246.0, + "learning_rate": 2.0954183184568605e-05, + "loss": 13.3759, + "step": 16947 + }, + { + "epoch": 0.7064315785086074, + "grad_norm": 122.5, + "learning_rate": 2.0948689183786913e-05, + "loss": 8.563, + "step": 16948 + }, + { + "epoch": 0.7064732608061356, + "grad_norm": 520.0, + "learning_rate": 2.0943195712467933e-05, + "loss": 18.1251, + "step": 16949 + }, + { + "epoch": 0.7065149431036639, + "grad_norm": 266.0, + "learning_rate": 2.0937702770711792e-05, + "loss": 13.0002, + "step": 16950 + }, + { + "epoch": 0.7065566254011921, + "grad_norm": 172.0, + "learning_rate": 2.0932210358618598e-05, + "loss": 11.6252, + "step": 16951 + }, + { + "epoch": 0.7065983076987203, + "grad_norm": 282.0, + "learning_rate": 2.0926718476288452e-05, + "loss": 5.8767, + "step": 16952 + }, + { + "epoch": 0.7066399899962486, + "grad_norm": 916.0, + "learning_rate": 2.092122712382144e-05, + "loss": 23.5005, + "step": 16953 + }, + { + "epoch": 0.7066816722937769, + "grad_norm": 220.0, + "learning_rate": 2.0915736301317645e-05, + "loss": 12.3756, + "step": 16954 + }, + { + "epoch": 0.7067233545913051, + "grad_norm": 205.0, + "learning_rate": 2.0910246008877143e-05, + "loss": 11.8127, + "step": 16955 + }, + { + "epoch": 0.7067650368888333, + "grad_norm": 540.0, + "learning_rate": 2.0904756246599964e-05, + "loss": 17.8754, + "step": 16956 + }, + { + "epoch": 0.7068067191863615, + "grad_norm": 252.0, + "learning_rate": 2.0899267014586234e-05, + "loss": 12.8133, + "step": 16957 + }, + { + "epoch": 0.7068484014838898, + "grad_norm": 984.0, + "learning_rate": 2.0893778312935895e-05, + "loss": 23.5061, + "step": 16958 + }, + { + "epoch": 0.706890083781418, + "grad_norm": 112.0, + "learning_rate": 2.0888290141749077e-05, + "loss": 11.001, + "step": 16959 + }, + { + "epoch": 0.7069317660789463, + "grad_norm": 326.0, + "learning_rate": 2.0882802501125713e-05, + "loss": 14.5628, + "step": 16960 + }, + { + "epoch": 0.7069734483764745, + "grad_norm": 752.0, + "learning_rate": 2.08773153911659e-05, + "loss": 21.5017, + "step": 16961 + }, + { + "epoch": 0.7070151306740028, + "grad_norm": 157.0, + "learning_rate": 2.0871828811969556e-05, + "loss": 11.1252, + "step": 16962 + }, + { + "epoch": 0.707056812971531, + "grad_norm": 688.0, + "learning_rate": 2.086634276363676e-05, + "loss": 20.7505, + "step": 16963 + }, + { + "epoch": 0.7070984952690592, + "grad_norm": 192.0, + "learning_rate": 2.0860857246267414e-05, + "loss": 11.0627, + "step": 16964 + }, + { + "epoch": 0.7071401775665874, + "grad_norm": 296.0, + "learning_rate": 2.085537225996157e-05, + "loss": 14.4381, + "step": 16965 + }, + { + "epoch": 0.7071818598641157, + "grad_norm": 50.75, + "learning_rate": 2.084988780481911e-05, + "loss": 7.3754, + "step": 16966 + }, + { + "epoch": 0.707223542161644, + "grad_norm": 952.0, + "learning_rate": 2.084440388094006e-05, + "loss": 23.6288, + "step": 16967 + }, + { + "epoch": 0.7072652244591722, + "grad_norm": 1256.0, + "learning_rate": 2.0838920488424333e-05, + "loss": 29.1252, + "step": 16968 + }, + { + "epoch": 0.7073069067567004, + "grad_norm": 116.0, + "learning_rate": 2.083343762737187e-05, + "loss": 10.1903, + "step": 16969 + }, + { + "epoch": 0.7073485890542287, + "grad_norm": 171.0, + "learning_rate": 2.08279552978826e-05, + "loss": 11.5645, + "step": 16970 + }, + { + "epoch": 0.7073902713517569, + "grad_norm": 564.0, + "learning_rate": 2.0822473500056426e-05, + "loss": 20.0009, + "step": 16971 + }, + { + "epoch": 0.7074319536492851, + "grad_norm": 512.0, + "learning_rate": 2.081699223399327e-05, + "loss": 18.5003, + "step": 16972 + }, + { + "epoch": 0.7074736359468133, + "grad_norm": 416.0, + "learning_rate": 2.081151149979302e-05, + "loss": 16.2505, + "step": 16973 + }, + { + "epoch": 0.7075153182443417, + "grad_norm": 189.0, + "learning_rate": 2.0806031297555554e-05, + "loss": 9.7504, + "step": 16974 + }, + { + "epoch": 0.7075570005418699, + "grad_norm": 120.0, + "learning_rate": 2.080055162738077e-05, + "loss": 11.0629, + "step": 16975 + }, + { + "epoch": 0.7075986828393981, + "grad_norm": 173.0, + "learning_rate": 2.0795072489368522e-05, + "loss": 12.1254, + "step": 16976 + }, + { + "epoch": 0.7076403651369263, + "grad_norm": 153.0, + "learning_rate": 2.0789593883618668e-05, + "loss": 10.6252, + "step": 16977 + }, + { + "epoch": 0.7076820474344546, + "grad_norm": 127.5, + "learning_rate": 2.0784115810231064e-05, + "loss": 9.6879, + "step": 16978 + }, + { + "epoch": 0.7077237297319828, + "grad_norm": 1408.0, + "learning_rate": 2.077863826930554e-05, + "loss": 28.2503, + "step": 16979 + }, + { + "epoch": 0.707765412029511, + "grad_norm": 426.0, + "learning_rate": 2.0773161260941926e-05, + "loss": 15.4377, + "step": 16980 + }, + { + "epoch": 0.7078070943270393, + "grad_norm": 432.0, + "learning_rate": 2.0767684785240044e-05, + "loss": 15.4377, + "step": 16981 + }, + { + "epoch": 0.7078487766245676, + "grad_norm": 492.0, + "learning_rate": 2.0762208842299684e-05, + "loss": 17.5001, + "step": 16982 + }, + { + "epoch": 0.7078904589220958, + "grad_norm": 302.0, + "learning_rate": 2.0756733432220697e-05, + "loss": 13.3753, + "step": 16983 + }, + { + "epoch": 0.707932141219624, + "grad_norm": 844.0, + "learning_rate": 2.075125855510281e-05, + "loss": 23.6255, + "step": 16984 + }, + { + "epoch": 0.7079738235171522, + "grad_norm": 235.0, + "learning_rate": 2.074578421104586e-05, + "loss": 12.2502, + "step": 16985 + }, + { + "epoch": 0.7080155058146805, + "grad_norm": 408.0, + "learning_rate": 2.0740310400149558e-05, + "loss": 15.5633, + "step": 16986 + }, + { + "epoch": 0.7080571881122087, + "grad_norm": 326.0, + "learning_rate": 2.0734837122513736e-05, + "loss": 12.5627, + "step": 16987 + }, + { + "epoch": 0.708098870409737, + "grad_norm": 156.0, + "learning_rate": 2.072936437823806e-05, + "loss": 9.8756, + "step": 16988 + }, + { + "epoch": 0.7081405527072652, + "grad_norm": 300.0, + "learning_rate": 2.072389216742236e-05, + "loss": 12.876, + "step": 16989 + }, + { + "epoch": 0.7081822350047935, + "grad_norm": 462.0, + "learning_rate": 2.0718420490166284e-05, + "loss": 17.8757, + "step": 16990 + }, + { + "epoch": 0.7082239173023217, + "grad_norm": 203.0, + "learning_rate": 2.0712949346569616e-05, + "loss": 11.2503, + "step": 16991 + }, + { + "epoch": 0.7082655995998499, + "grad_norm": 668.0, + "learning_rate": 2.0707478736732043e-05, + "loss": 19.6254, + "step": 16992 + }, + { + "epoch": 0.7083072818973782, + "grad_norm": 720.0, + "learning_rate": 2.070200866075327e-05, + "loss": 21.0026, + "step": 16993 + }, + { + "epoch": 0.7083489641949064, + "grad_norm": 1096.0, + "learning_rate": 2.0696539118732993e-05, + "loss": 26.5002, + "step": 16994 + }, + { + "epoch": 0.7083906464924347, + "grad_norm": 812.0, + "learning_rate": 2.0691070110770887e-05, + "loss": 22.0006, + "step": 16995 + }, + { + "epoch": 0.7084323287899629, + "grad_norm": 368.0, + "learning_rate": 2.0685601636966635e-05, + "loss": 13.44, + "step": 16996 + }, + { + "epoch": 0.7084740110874912, + "grad_norm": 282.0, + "learning_rate": 2.0680133697419897e-05, + "loss": 13.3127, + "step": 16997 + }, + { + "epoch": 0.7085156933850194, + "grad_norm": 320.0, + "learning_rate": 2.0674666292230315e-05, + "loss": 13.4377, + "step": 16998 + }, + { + "epoch": 0.7085573756825476, + "grad_norm": 596.0, + "learning_rate": 2.0669199421497553e-05, + "loss": 19.0003, + "step": 16999 + }, + { + "epoch": 0.7085990579800758, + "grad_norm": 444.0, + "learning_rate": 2.0663733085321236e-05, + "loss": 16.0023, + "step": 17000 + }, + { + "epoch": 0.7086407402776042, + "grad_norm": 412.0, + "learning_rate": 2.065826728380098e-05, + "loss": 15.6254, + "step": 17001 + }, + { + "epoch": 0.7086824225751324, + "grad_norm": 386.0, + "learning_rate": 2.065280201703641e-05, + "loss": 15.1879, + "step": 17002 + }, + { + "epoch": 0.7087241048726606, + "grad_norm": 190.0, + "learning_rate": 2.0647337285127132e-05, + "loss": 11.1881, + "step": 17003 + }, + { + "epoch": 0.7087657871701888, + "grad_norm": 856.0, + "learning_rate": 2.0641873088172737e-05, + "loss": 19.1295, + "step": 17004 + }, + { + "epoch": 0.7088074694677171, + "grad_norm": 668.0, + "learning_rate": 2.0636409426272806e-05, + "loss": 21.8753, + "step": 17005 + }, + { + "epoch": 0.7088491517652453, + "grad_norm": 235.0, + "learning_rate": 2.063094629952691e-05, + "loss": 12.0628, + "step": 17006 + }, + { + "epoch": 0.7088908340627735, + "grad_norm": 134.0, + "learning_rate": 2.0625483708034655e-05, + "loss": 9.3751, + "step": 17007 + }, + { + "epoch": 0.7089325163603017, + "grad_norm": 366.0, + "learning_rate": 2.0620021651895533e-05, + "loss": 15.6252, + "step": 17008 + }, + { + "epoch": 0.7089741986578301, + "grad_norm": 636.0, + "learning_rate": 2.061456013120916e-05, + "loss": 19.3752, + "step": 17009 + }, + { + "epoch": 0.7090158809553583, + "grad_norm": 644.0, + "learning_rate": 2.0609099146075002e-05, + "loss": 19.876, + "step": 17010 + }, + { + "epoch": 0.7090575632528865, + "grad_norm": 440.0, + "learning_rate": 2.060363869659266e-05, + "loss": 17.0004, + "step": 17011 + }, + { + "epoch": 0.7090992455504147, + "grad_norm": 213.0, + "learning_rate": 2.059817878286157e-05, + "loss": 10.2503, + "step": 17012 + }, + { + "epoch": 0.709140927847943, + "grad_norm": 163.0, + "learning_rate": 2.059271940498132e-05, + "loss": 9.8752, + "step": 17013 + }, + { + "epoch": 0.7091826101454712, + "grad_norm": 314.0, + "learning_rate": 2.0587260563051337e-05, + "loss": 13.5627, + "step": 17014 + }, + { + "epoch": 0.7092242924429994, + "grad_norm": 656.0, + "learning_rate": 2.0581802257171172e-05, + "loss": 20.8761, + "step": 17015 + }, + { + "epoch": 0.7092659747405277, + "grad_norm": 256.0, + "learning_rate": 2.0576344487440243e-05, + "loss": 12.8753, + "step": 17016 + }, + { + "epoch": 0.709307657038056, + "grad_norm": 784.0, + "learning_rate": 2.0570887253958053e-05, + "loss": 22.1254, + "step": 17017 + }, + { + "epoch": 0.7093493393355842, + "grad_norm": 436.0, + "learning_rate": 2.0565430556824067e-05, + "loss": 16.2502, + "step": 17018 + }, + { + "epoch": 0.7093910216331124, + "grad_norm": 470.0, + "learning_rate": 2.055997439613772e-05, + "loss": 15.3754, + "step": 17019 + }, + { + "epoch": 0.7094327039306406, + "grad_norm": 282.0, + "learning_rate": 2.0554518771998456e-05, + "loss": 12.6881, + "step": 17020 + }, + { + "epoch": 0.7094743862281689, + "grad_norm": 233.0, + "learning_rate": 2.0549063684505693e-05, + "loss": 11.0004, + "step": 17021 + }, + { + "epoch": 0.7095160685256972, + "grad_norm": 510.0, + "learning_rate": 2.054360913375886e-05, + "loss": 16.6253, + "step": 17022 + }, + { + "epoch": 0.7095577508232254, + "grad_norm": 320.0, + "learning_rate": 2.053815511985737e-05, + "loss": 13.5026, + "step": 17023 + }, + { + "epoch": 0.7095994331207536, + "grad_norm": 384.0, + "learning_rate": 2.053270164290062e-05, + "loss": 15.3131, + "step": 17024 + }, + { + "epoch": 0.7096411154182819, + "grad_norm": 194.0, + "learning_rate": 2.0527248702987995e-05, + "loss": 11.0628, + "step": 17025 + }, + { + "epoch": 0.7096827977158101, + "grad_norm": 116.0, + "learning_rate": 2.0521796300218878e-05, + "loss": 7.8439, + "step": 17026 + }, + { + "epoch": 0.7097244800133383, + "grad_norm": 438.0, + "learning_rate": 2.0516344434692642e-05, + "loss": 16.3753, + "step": 17027 + }, + { + "epoch": 0.7097661623108665, + "grad_norm": 516.0, + "learning_rate": 2.0510893106508645e-05, + "loss": 18.7511, + "step": 17028 + }, + { + "epoch": 0.7098078446083949, + "grad_norm": 178.0, + "learning_rate": 2.050544231576624e-05, + "loss": 11.3128, + "step": 17029 + }, + { + "epoch": 0.7098495269059231, + "grad_norm": 346.0, + "learning_rate": 2.0499992062564766e-05, + "loss": 14.5007, + "step": 17030 + }, + { + "epoch": 0.7098912092034513, + "grad_norm": 376.0, + "learning_rate": 2.049454234700356e-05, + "loss": 15.8755, + "step": 17031 + }, + { + "epoch": 0.7099328915009795, + "grad_norm": 536.0, + "learning_rate": 2.048909316918191e-05, + "loss": 16.3794, + "step": 17032 + }, + { + "epoch": 0.7099745737985078, + "grad_norm": 221.0, + "learning_rate": 2.0483644529199204e-05, + "loss": 11.6877, + "step": 17033 + }, + { + "epoch": 0.710016256096036, + "grad_norm": 532.0, + "learning_rate": 2.0478196427154655e-05, + "loss": 17.6262, + "step": 17034 + }, + { + "epoch": 0.7100579383935642, + "grad_norm": 254.0, + "learning_rate": 2.0472748863147633e-05, + "loss": 12.2502, + "step": 17035 + }, + { + "epoch": 0.7100996206910924, + "grad_norm": 248.0, + "learning_rate": 2.0467301837277353e-05, + "loss": 11.5628, + "step": 17036 + }, + { + "epoch": 0.7101413029886208, + "grad_norm": 688.0, + "learning_rate": 2.046185534964315e-05, + "loss": 20.6252, + "step": 17037 + }, + { + "epoch": 0.710182985286149, + "grad_norm": 322.0, + "learning_rate": 2.0456409400344225e-05, + "loss": 12.8757, + "step": 17038 + }, + { + "epoch": 0.7102246675836772, + "grad_norm": 532.0, + "learning_rate": 2.0450963989479887e-05, + "loss": 18.2502, + "step": 17039 + }, + { + "epoch": 0.7102663498812054, + "grad_norm": 227.0, + "learning_rate": 2.0445519117149327e-05, + "loss": 5.4695, + "step": 17040 + }, + { + "epoch": 0.7103080321787337, + "grad_norm": 1320.0, + "learning_rate": 2.0440074783451818e-05, + "loss": 27.1313, + "step": 17041 + }, + { + "epoch": 0.7103497144762619, + "grad_norm": 278.0, + "learning_rate": 2.043463098848657e-05, + "loss": 13.0627, + "step": 17042 + }, + { + "epoch": 0.7103913967737902, + "grad_norm": 348.0, + "learning_rate": 2.04291877323528e-05, + "loss": 15.376, + "step": 17043 + }, + { + "epoch": 0.7104330790713184, + "grad_norm": 246.0, + "learning_rate": 2.0423745015149705e-05, + "loss": 12.6259, + "step": 17044 + }, + { + "epoch": 0.7104747613688467, + "grad_norm": 388.0, + "learning_rate": 2.0418302836976484e-05, + "loss": 14.0002, + "step": 17045 + }, + { + "epoch": 0.7105164436663749, + "grad_norm": 314.0, + "learning_rate": 2.041286119793232e-05, + "loss": 14.3768, + "step": 17046 + }, + { + "epoch": 0.7105581259639031, + "grad_norm": 282.0, + "learning_rate": 2.0407420098116385e-05, + "loss": 13.7505, + "step": 17047 + }, + { + "epoch": 0.7105998082614313, + "grad_norm": 536.0, + "learning_rate": 2.0401979537627852e-05, + "loss": 17.1273, + "step": 17048 + }, + { + "epoch": 0.7106414905589596, + "grad_norm": 234.0, + "learning_rate": 2.0396539516565866e-05, + "loss": 13.438, + "step": 17049 + }, + { + "epoch": 0.7106831728564879, + "grad_norm": 180.0, + "learning_rate": 2.0391100035029575e-05, + "loss": 9.5628, + "step": 17050 + }, + { + "epoch": 0.7107248551540161, + "grad_norm": 374.0, + "learning_rate": 2.0385661093118113e-05, + "loss": 14.6254, + "step": 17051 + }, + { + "epoch": 0.7107665374515443, + "grad_norm": 452.0, + "learning_rate": 2.0380222690930605e-05, + "loss": 17.5002, + "step": 17052 + }, + { + "epoch": 0.7108082197490726, + "grad_norm": 460.0, + "learning_rate": 2.0374784828566175e-05, + "loss": 16.5004, + "step": 17053 + }, + { + "epoch": 0.7108499020466008, + "grad_norm": 245.0, + "learning_rate": 2.0369347506123914e-05, + "loss": 13.3127, + "step": 17054 + }, + { + "epoch": 0.710891584344129, + "grad_norm": 99.0, + "learning_rate": 2.0363910723702928e-05, + "loss": 8.1878, + "step": 17055 + }, + { + "epoch": 0.7109332666416572, + "grad_norm": 318.0, + "learning_rate": 2.0358474481402303e-05, + "loss": 14.3127, + "step": 17056 + }, + { + "epoch": 0.7109749489391856, + "grad_norm": 446.0, + "learning_rate": 2.035303877932111e-05, + "loss": 17.0002, + "step": 17057 + }, + { + "epoch": 0.7110166312367138, + "grad_norm": 87.0, + "learning_rate": 2.0347603617558396e-05, + "loss": 8.8761, + "step": 17058 + }, + { + "epoch": 0.711058313534242, + "grad_norm": 175.0, + "learning_rate": 2.034216899621328e-05, + "loss": 9.0002, + "step": 17059 + }, + { + "epoch": 0.7110999958317702, + "grad_norm": 760.0, + "learning_rate": 2.0336734915384726e-05, + "loss": 20.6274, + "step": 17060 + }, + { + "epoch": 0.7111416781292985, + "grad_norm": 47.5, + "learning_rate": 2.0331301375171847e-05, + "loss": 7.3751, + "step": 17061 + }, + { + "epoch": 0.7111833604268267, + "grad_norm": 480.0, + "learning_rate": 2.03258683756736e-05, + "loss": 17.5002, + "step": 17062 + }, + { + "epoch": 0.7112250427243549, + "grad_norm": 137.0, + "learning_rate": 2.032043591698907e-05, + "loss": 8.3764, + "step": 17063 + }, + { + "epoch": 0.7112667250218833, + "grad_norm": 604.0, + "learning_rate": 2.0315003999217198e-05, + "loss": 20.5002, + "step": 17064 + }, + { + "epoch": 0.7113084073194115, + "grad_norm": 108.0, + "learning_rate": 2.0309572622457045e-05, + "loss": 9.8757, + "step": 17065 + }, + { + "epoch": 0.7113500896169397, + "grad_norm": 892.0, + "learning_rate": 2.0304141786807536e-05, + "loss": 22.6295, + "step": 17066 + }, + { + "epoch": 0.7113917719144679, + "grad_norm": 1544.0, + "learning_rate": 2.0298711492367695e-05, + "loss": 27.2554, + "step": 17067 + }, + { + "epoch": 0.7114334542119962, + "grad_norm": 95.0, + "learning_rate": 2.029328173923647e-05, + "loss": 8.5018, + "step": 17068 + }, + { + "epoch": 0.7114751365095244, + "grad_norm": 414.0, + "learning_rate": 2.028785252751283e-05, + "loss": 15.6254, + "step": 17069 + }, + { + "epoch": 0.7115168188070526, + "grad_norm": 408.0, + "learning_rate": 2.0282423857295707e-05, + "loss": 15.3753, + "step": 17070 + }, + { + "epoch": 0.7115585011045809, + "grad_norm": 420.0, + "learning_rate": 2.027699572868405e-05, + "loss": 16.0002, + "step": 17071 + }, + { + "epoch": 0.7116001834021092, + "grad_norm": 498.0, + "learning_rate": 2.0271568141776788e-05, + "loss": 16.8752, + "step": 17072 + }, + { + "epoch": 0.7116418656996374, + "grad_norm": 732.0, + "learning_rate": 2.026614109667283e-05, + "loss": 20.1286, + "step": 17073 + }, + { + "epoch": 0.7116835479971656, + "grad_norm": 134.0, + "learning_rate": 2.0260714593471096e-05, + "loss": 9.4378, + "step": 17074 + }, + { + "epoch": 0.7117252302946938, + "grad_norm": 494.0, + "learning_rate": 2.0255288632270474e-05, + "loss": 18.3755, + "step": 17075 + }, + { + "epoch": 0.7117669125922221, + "grad_norm": 466.0, + "learning_rate": 2.024986321316986e-05, + "loss": 17.0004, + "step": 17076 + }, + { + "epoch": 0.7118085948897503, + "grad_norm": 398.0, + "learning_rate": 2.024443833626813e-05, + "loss": 15.1878, + "step": 17077 + }, + { + "epoch": 0.7118502771872786, + "grad_norm": 152.0, + "learning_rate": 2.0239014001664154e-05, + "loss": 9.7502, + "step": 17078 + }, + { + "epoch": 0.7118919594848068, + "grad_norm": 37.5, + "learning_rate": 2.023359020945679e-05, + "loss": 6.7503, + "step": 17079 + }, + { + "epoch": 0.7119336417823351, + "grad_norm": 464.0, + "learning_rate": 2.022816695974488e-05, + "loss": 17.0002, + "step": 17080 + }, + { + "epoch": 0.7119753240798633, + "grad_norm": 1864.0, + "learning_rate": 2.0222744252627274e-05, + "loss": 36.5044, + "step": 17081 + }, + { + "epoch": 0.7120170063773915, + "grad_norm": 336.0, + "learning_rate": 2.0217322088202778e-05, + "loss": 14.7504, + "step": 17082 + }, + { + "epoch": 0.7120586886749197, + "grad_norm": 164.0, + "learning_rate": 2.0211900466570273e-05, + "loss": 11.3755, + "step": 17083 + }, + { + "epoch": 0.712100370972448, + "grad_norm": 1640.0, + "learning_rate": 2.0206479387828485e-05, + "loss": 35.5004, + "step": 17084 + }, + { + "epoch": 0.7121420532699763, + "grad_norm": 308.0, + "learning_rate": 2.0201058852076294e-05, + "loss": 13.6254, + "step": 17085 + }, + { + "epoch": 0.7121837355675045, + "grad_norm": 116.5, + "learning_rate": 2.019563885941241e-05, + "loss": 5.1565, + "step": 17086 + }, + { + "epoch": 0.7122254178650327, + "grad_norm": 310.0, + "learning_rate": 2.0190219409935697e-05, + "loss": 14.5007, + "step": 17087 + }, + { + "epoch": 0.712267100162561, + "grad_norm": 378.0, + "learning_rate": 2.018480050374484e-05, + "loss": 14.9383, + "step": 17088 + }, + { + "epoch": 0.7123087824600892, + "grad_norm": 201.0, + "learning_rate": 2.0179382140938675e-05, + "loss": 11.0003, + "step": 17089 + }, + { + "epoch": 0.7123504647576174, + "grad_norm": 450.0, + "learning_rate": 2.0173964321615884e-05, + "loss": 14.8154, + "step": 17090 + }, + { + "epoch": 0.7123921470551456, + "grad_norm": 520.0, + "learning_rate": 2.0168547045875274e-05, + "loss": 16.251, + "step": 17091 + }, + { + "epoch": 0.712433829352674, + "grad_norm": 580.0, + "learning_rate": 2.0163130313815514e-05, + "loss": 20.0012, + "step": 17092 + }, + { + "epoch": 0.7124755116502022, + "grad_norm": 203.0, + "learning_rate": 2.0157714125535365e-05, + "loss": 11.1252, + "step": 17093 + }, + { + "epoch": 0.7125171939477304, + "grad_norm": 330.0, + "learning_rate": 2.0152298481133526e-05, + "loss": 14.5627, + "step": 17094 + }, + { + "epoch": 0.7125588762452586, + "grad_norm": 300.0, + "learning_rate": 2.0146883380708698e-05, + "loss": 12.7502, + "step": 17095 + }, + { + "epoch": 0.7126005585427869, + "grad_norm": 388.0, + "learning_rate": 2.0141468824359572e-05, + "loss": 16.7502, + "step": 17096 + }, + { + "epoch": 0.7126422408403151, + "grad_norm": 880.0, + "learning_rate": 2.0136054812184822e-05, + "loss": 20.8754, + "step": 17097 + }, + { + "epoch": 0.7126839231378433, + "grad_norm": 366.0, + "learning_rate": 2.013064134428313e-05, + "loss": 13.8129, + "step": 17098 + }, + { + "epoch": 0.7127256054353716, + "grad_norm": 183.0, + "learning_rate": 2.0125228420753145e-05, + "loss": 10.3127, + "step": 17099 + }, + { + "epoch": 0.7127672877328999, + "grad_norm": 188.0, + "learning_rate": 2.011981604169353e-05, + "loss": 10.6877, + "step": 17100 + }, + { + "epoch": 0.7128089700304281, + "grad_norm": 876.0, + "learning_rate": 2.011440420720292e-05, + "loss": 25.2514, + "step": 17101 + }, + { + "epoch": 0.7128506523279563, + "grad_norm": 178.0, + "learning_rate": 2.0108992917379943e-05, + "loss": 10.3754, + "step": 17102 + }, + { + "epoch": 0.7128923346254845, + "grad_norm": 548.0, + "learning_rate": 2.010358217232322e-05, + "loss": 17.2511, + "step": 17103 + }, + { + "epoch": 0.7129340169230128, + "grad_norm": 330.0, + "learning_rate": 2.0098171972131373e-05, + "loss": 11.0009, + "step": 17104 + }, + { + "epoch": 0.712975699220541, + "grad_norm": 201.0, + "learning_rate": 2.0092762316902996e-05, + "loss": 11.0003, + "step": 17105 + }, + { + "epoch": 0.7130173815180693, + "grad_norm": 326.0, + "learning_rate": 2.0087353206736675e-05, + "loss": 9.3133, + "step": 17106 + }, + { + "epoch": 0.7130590638155975, + "grad_norm": 314.0, + "learning_rate": 2.0081944641731004e-05, + "loss": 13.8752, + "step": 17107 + }, + { + "epoch": 0.7131007461131258, + "grad_norm": 168.0, + "learning_rate": 2.0076536621984525e-05, + "loss": 11.3134, + "step": 17108 + }, + { + "epoch": 0.713142428410654, + "grad_norm": 182.0, + "learning_rate": 2.007112914759586e-05, + "loss": 11.0005, + "step": 17109 + }, + { + "epoch": 0.7131841107081822, + "grad_norm": 244.0, + "learning_rate": 2.006572221866349e-05, + "loss": 12.563, + "step": 17110 + }, + { + "epoch": 0.7132257930057104, + "grad_norm": 60.25, + "learning_rate": 2.006031583528602e-05, + "loss": 6.1259, + "step": 17111 + }, + { + "epoch": 0.7132674753032388, + "grad_norm": 1632.0, + "learning_rate": 2.005490999756192e-05, + "loss": 36.7503, + "step": 17112 + }, + { + "epoch": 0.713309157600767, + "grad_norm": 384.0, + "learning_rate": 2.0049504705589778e-05, + "loss": 15.3778, + "step": 17113 + }, + { + "epoch": 0.7133508398982952, + "grad_norm": 131.0, + "learning_rate": 2.0044099959468037e-05, + "loss": 8.8131, + "step": 17114 + }, + { + "epoch": 0.7133925221958234, + "grad_norm": 124.0, + "learning_rate": 2.0038695759295267e-05, + "loss": 10.4385, + "step": 17115 + }, + { + "epoch": 0.7134342044933517, + "grad_norm": 278.0, + "learning_rate": 2.0033292105169893e-05, + "loss": 12.1878, + "step": 17116 + }, + { + "epoch": 0.7134758867908799, + "grad_norm": 576.0, + "learning_rate": 2.0027888997190448e-05, + "loss": 18.7504, + "step": 17117 + }, + { + "epoch": 0.7135175690884081, + "grad_norm": 324.0, + "learning_rate": 2.0022486435455385e-05, + "loss": 14.4408, + "step": 17118 + }, + { + "epoch": 0.7135592513859363, + "grad_norm": 322.0, + "learning_rate": 2.001708442006317e-05, + "loss": 12.3753, + "step": 17119 + }, + { + "epoch": 0.7136009336834647, + "grad_norm": 708.0, + "learning_rate": 2.0011682951112254e-05, + "loss": 22.6267, + "step": 17120 + }, + { + "epoch": 0.7136426159809929, + "grad_norm": 318.0, + "learning_rate": 2.0006282028701074e-05, + "loss": 15.2508, + "step": 17121 + }, + { + "epoch": 0.7136842982785211, + "grad_norm": 398.0, + "learning_rate": 2.000088165292807e-05, + "loss": 14.7508, + "step": 17122 + }, + { + "epoch": 0.7137259805760493, + "grad_norm": 366.0, + "learning_rate": 1.9995481823891658e-05, + "loss": 14.6252, + "step": 17123 + }, + { + "epoch": 0.7137676628735776, + "grad_norm": 458.0, + "learning_rate": 1.999008254169026e-05, + "loss": 14.9378, + "step": 17124 + }, + { + "epoch": 0.7138093451711058, + "grad_norm": 660.0, + "learning_rate": 1.9984683806422266e-05, + "loss": 19.1251, + "step": 17125 + }, + { + "epoch": 0.713851027468634, + "grad_norm": 512.0, + "learning_rate": 1.9979285618186077e-05, + "loss": 18.3753, + "step": 17126 + }, + { + "epoch": 0.7138927097661623, + "grad_norm": 184.0, + "learning_rate": 1.997388797708007e-05, + "loss": 11.6253, + "step": 17127 + }, + { + "epoch": 0.7139343920636906, + "grad_norm": 1624.0, + "learning_rate": 1.9968490883202623e-05, + "loss": 35.2502, + "step": 17128 + }, + { + "epoch": 0.7139760743612188, + "grad_norm": 370.0, + "learning_rate": 1.9963094336652095e-05, + "loss": 14.6877, + "step": 17129 + }, + { + "epoch": 0.714017756658747, + "grad_norm": 316.0, + "learning_rate": 1.9957698337526836e-05, + "loss": 15.8132, + "step": 17130 + }, + { + "epoch": 0.7140594389562752, + "grad_norm": 193.0, + "learning_rate": 1.9952302885925194e-05, + "loss": 11.5631, + "step": 17131 + }, + { + "epoch": 0.7141011212538035, + "grad_norm": 398.0, + "learning_rate": 1.9946907981945478e-05, + "loss": 15.063, + "step": 17132 + }, + { + "epoch": 0.7141428035513318, + "grad_norm": 624.0, + "learning_rate": 1.9941513625686075e-05, + "loss": 20.1254, + "step": 17133 + }, + { + "epoch": 0.71418448584886, + "grad_norm": 274.0, + "learning_rate": 1.9936119817245213e-05, + "loss": 12.1254, + "step": 17134 + }, + { + "epoch": 0.7142261681463882, + "grad_norm": 266.0, + "learning_rate": 1.9930726556721275e-05, + "loss": 13.8127, + "step": 17135 + }, + { + "epoch": 0.7142678504439165, + "grad_norm": 426.0, + "learning_rate": 1.992533384421248e-05, + "loss": 16.0002, + "step": 17136 + }, + { + "epoch": 0.7143095327414447, + "grad_norm": 368.0, + "learning_rate": 1.991994167981718e-05, + "loss": 14.438, + "step": 17137 + }, + { + "epoch": 0.7143512150389729, + "grad_norm": 167.0, + "learning_rate": 1.9914550063633574e-05, + "loss": 10.8754, + "step": 17138 + }, + { + "epoch": 0.7143928973365012, + "grad_norm": 1416.0, + "learning_rate": 1.990915899576e-05, + "loss": 29.2581, + "step": 17139 + }, + { + "epoch": 0.7144345796340295, + "grad_norm": 173.0, + "learning_rate": 1.9903768476294642e-05, + "loss": 10.5003, + "step": 17140 + }, + { + "epoch": 0.7144762619315577, + "grad_norm": 350.0, + "learning_rate": 1.9898378505335806e-05, + "loss": 14.4377, + "step": 17141 + }, + { + "epoch": 0.7145179442290859, + "grad_norm": 418.0, + "learning_rate": 1.9892989082981667e-05, + "loss": 13.1881, + "step": 17142 + }, + { + "epoch": 0.7145596265266142, + "grad_norm": 206.0, + "learning_rate": 1.9887600209330487e-05, + "loss": 11.6252, + "step": 17143 + }, + { + "epoch": 0.7146013088241424, + "grad_norm": 524.0, + "learning_rate": 1.9882211884480468e-05, + "loss": 18.6253, + "step": 17144 + }, + { + "epoch": 0.7146429911216706, + "grad_norm": 197.0, + "learning_rate": 1.9876824108529808e-05, + "loss": 8.6892, + "step": 17145 + }, + { + "epoch": 0.7146846734191988, + "grad_norm": 344.0, + "learning_rate": 1.9871436881576705e-05, + "loss": 14.6877, + "step": 17146 + }, + { + "epoch": 0.7147263557167272, + "grad_norm": 328.0, + "learning_rate": 1.9866050203719338e-05, + "loss": 13.3751, + "step": 17147 + }, + { + "epoch": 0.7147680380142554, + "grad_norm": 720.0, + "learning_rate": 1.9860664075055884e-05, + "loss": 22.5002, + "step": 17148 + }, + { + "epoch": 0.7148097203117836, + "grad_norm": 233.0, + "learning_rate": 1.98552784956845e-05, + "loss": 10.0007, + "step": 17149 + }, + { + "epoch": 0.7148514026093118, + "grad_norm": 482.0, + "learning_rate": 1.984989346570334e-05, + "loss": 17.5004, + "step": 17150 + }, + { + "epoch": 0.7148930849068401, + "grad_norm": 187.0, + "learning_rate": 1.984450898521055e-05, + "loss": 10.2504, + "step": 17151 + }, + { + "epoch": 0.7149347672043683, + "grad_norm": 348.0, + "learning_rate": 1.9839125054304264e-05, + "loss": 13.6877, + "step": 17152 + }, + { + "epoch": 0.7149764495018965, + "grad_norm": 180.0, + "learning_rate": 1.9833741673082597e-05, + "loss": 10.9378, + "step": 17153 + }, + { + "epoch": 0.7150181317994247, + "grad_norm": 247.0, + "learning_rate": 1.982835884164367e-05, + "loss": 12.9377, + "step": 17154 + }, + { + "epoch": 0.7150598140969531, + "grad_norm": 788.0, + "learning_rate": 1.9822976560085575e-05, + "loss": 22.2504, + "step": 17155 + }, + { + "epoch": 0.7151014963944813, + "grad_norm": 480.0, + "learning_rate": 1.981759482850642e-05, + "loss": 17.2503, + "step": 17156 + }, + { + "epoch": 0.7151431786920095, + "grad_norm": 244.0, + "learning_rate": 1.981221364700427e-05, + "loss": 12.8779, + "step": 17157 + }, + { + "epoch": 0.7151848609895377, + "grad_norm": 140.0, + "learning_rate": 1.9806833015677196e-05, + "loss": 10.6878, + "step": 17158 + }, + { + "epoch": 0.715226543287066, + "grad_norm": 800.0, + "learning_rate": 1.98014529346233e-05, + "loss": 25.3752, + "step": 17159 + }, + { + "epoch": 0.7152682255845942, + "grad_norm": 330.0, + "learning_rate": 1.9796073403940574e-05, + "loss": 14.8127, + "step": 17160 + }, + { + "epoch": 0.7153099078821225, + "grad_norm": 122.0, + "learning_rate": 1.9790694423727124e-05, + "loss": 7.8775, + "step": 17161 + }, + { + "epoch": 0.7153515901796507, + "grad_norm": 77.0, + "learning_rate": 1.9785315994080912e-05, + "loss": 7.4377, + "step": 17162 + }, + { + "epoch": 0.715393272477179, + "grad_norm": 332.0, + "learning_rate": 1.977993811510004e-05, + "loss": 13.5628, + "step": 17163 + }, + { + "epoch": 0.7154349547747072, + "grad_norm": 312.0, + "learning_rate": 1.977456078688244e-05, + "loss": 13.1252, + "step": 17164 + }, + { + "epoch": 0.7154766370722354, + "grad_norm": 1104.0, + "learning_rate": 1.9769184009526186e-05, + "loss": 24.3753, + "step": 17165 + }, + { + "epoch": 0.7155183193697636, + "grad_norm": 1160.0, + "learning_rate": 1.976380778312921e-05, + "loss": 29.2505, + "step": 17166 + }, + { + "epoch": 0.715560001667292, + "grad_norm": 342.0, + "learning_rate": 1.9758432107789525e-05, + "loss": 13.3752, + "step": 17167 + }, + { + "epoch": 0.7156016839648202, + "grad_norm": 189.0, + "learning_rate": 1.9753056983605113e-05, + "loss": 11.3753, + "step": 17168 + }, + { + "epoch": 0.7156433662623484, + "grad_norm": 189.0, + "learning_rate": 1.974768241067391e-05, + "loss": 11.1255, + "step": 17169 + }, + { + "epoch": 0.7156850485598766, + "grad_norm": 165.0, + "learning_rate": 1.974230838909389e-05, + "loss": 10.1251, + "step": 17170 + }, + { + "epoch": 0.7157267308574049, + "grad_norm": 204.0, + "learning_rate": 1.9736934918962986e-05, + "loss": 12.2503, + "step": 17171 + }, + { + "epoch": 0.7157684131549331, + "grad_norm": 111.5, + "learning_rate": 1.9731562000379127e-05, + "loss": 10.5004, + "step": 17172 + }, + { + "epoch": 0.7158100954524613, + "grad_norm": 430.0, + "learning_rate": 1.9726189633440234e-05, + "loss": 15.688, + "step": 17173 + }, + { + "epoch": 0.7158517777499895, + "grad_norm": 239.0, + "learning_rate": 1.972081781824423e-05, + "loss": 13.064, + "step": 17174 + }, + { + "epoch": 0.7158934600475179, + "grad_norm": 348.0, + "learning_rate": 1.9715446554889007e-05, + "loss": 13.4378, + "step": 17175 + }, + { + "epoch": 0.7159351423450461, + "grad_norm": 434.0, + "learning_rate": 1.9710075843472452e-05, + "loss": 15.3753, + "step": 17176 + }, + { + "epoch": 0.7159768246425743, + "grad_norm": 348.0, + "learning_rate": 1.970470568409246e-05, + "loss": 14.813, + "step": 17177 + }, + { + "epoch": 0.7160185069401025, + "grad_norm": 196.0, + "learning_rate": 1.9699336076846896e-05, + "loss": 12.3129, + "step": 17178 + }, + { + "epoch": 0.7160601892376308, + "grad_norm": 480.0, + "learning_rate": 1.969396702183362e-05, + "loss": 16.7512, + "step": 17179 + }, + { + "epoch": 0.716101871535159, + "grad_norm": 484.0, + "learning_rate": 1.9688598519150486e-05, + "loss": 18.5004, + "step": 17180 + }, + { + "epoch": 0.7161435538326872, + "grad_norm": 266.0, + "learning_rate": 1.9683230568895334e-05, + "loss": 11.6884, + "step": 17181 + }, + { + "epoch": 0.7161852361302155, + "grad_norm": 258.0, + "learning_rate": 1.9677863171166e-05, + "loss": 13.6883, + "step": 17182 + }, + { + "epoch": 0.7162269184277438, + "grad_norm": 164.0, + "learning_rate": 1.9672496326060296e-05, + "loss": 12.1878, + "step": 17183 + }, + { + "epoch": 0.716268600725272, + "grad_norm": 308.0, + "learning_rate": 1.9667130033676023e-05, + "loss": 13.192, + "step": 17184 + }, + { + "epoch": 0.7163102830228002, + "grad_norm": 220.0, + "learning_rate": 1.9661764294111036e-05, + "loss": 11.6252, + "step": 17185 + }, + { + "epoch": 0.7163519653203284, + "grad_norm": 282.0, + "learning_rate": 1.9656399107463054e-05, + "loss": 13.063, + "step": 17186 + }, + { + "epoch": 0.7163936476178567, + "grad_norm": 418.0, + "learning_rate": 1.965103447382992e-05, + "loss": 16.2502, + "step": 17187 + }, + { + "epoch": 0.7164353299153849, + "grad_norm": 209.0, + "learning_rate": 1.9645670393309346e-05, + "loss": 11.876, + "step": 17188 + }, + { + "epoch": 0.7164770122129132, + "grad_norm": 296.0, + "learning_rate": 1.964030686599916e-05, + "loss": 13.8755, + "step": 17189 + }, + { + "epoch": 0.7165186945104414, + "grad_norm": 360.0, + "learning_rate": 1.963494389199704e-05, + "loss": 13.7501, + "step": 17190 + }, + { + "epoch": 0.7165603768079697, + "grad_norm": 442.0, + "learning_rate": 1.96295814714008e-05, + "loss": 16.7502, + "step": 17191 + }, + { + "epoch": 0.7166020591054979, + "grad_norm": 186.0, + "learning_rate": 1.962421960430809e-05, + "loss": 11.3755, + "step": 17192 + }, + { + "epoch": 0.7166437414030261, + "grad_norm": 170.0, + "learning_rate": 1.96188582908167e-05, + "loss": 8.0627, + "step": 17193 + }, + { + "epoch": 0.7166854237005543, + "grad_norm": 182.0, + "learning_rate": 1.9613497531024317e-05, + "loss": 11.3129, + "step": 17194 + }, + { + "epoch": 0.7167271059980826, + "grad_norm": 140.0, + "learning_rate": 1.9608137325028637e-05, + "loss": 9.0626, + "step": 17195 + }, + { + "epoch": 0.7167687882956109, + "grad_norm": 108.5, + "learning_rate": 1.9602777672927346e-05, + "loss": 9.6881, + "step": 17196 + }, + { + "epoch": 0.7168104705931391, + "grad_norm": 119.5, + "learning_rate": 1.959741857481814e-05, + "loss": 10.5002, + "step": 17197 + }, + { + "epoch": 0.7168521528906673, + "grad_norm": 354.0, + "learning_rate": 1.959206003079867e-05, + "loss": 14.8771, + "step": 17198 + }, + { + "epoch": 0.7168938351881956, + "grad_norm": 123.0, + "learning_rate": 1.958670204096661e-05, + "loss": 9.3754, + "step": 17199 + }, + { + "epoch": 0.7169355174857238, + "grad_norm": 93.0, + "learning_rate": 1.9581344605419603e-05, + "loss": 9.1878, + "step": 17200 + }, + { + "epoch": 0.716977199783252, + "grad_norm": 488.0, + "learning_rate": 1.9575987724255296e-05, + "loss": 16.3752, + "step": 17201 + }, + { + "epoch": 0.7170188820807802, + "grad_norm": 254.0, + "learning_rate": 1.957063139757131e-05, + "loss": 12.563, + "step": 17202 + }, + { + "epoch": 0.7170605643783086, + "grad_norm": 494.0, + "learning_rate": 1.9565275625465268e-05, + "loss": 14.4416, + "step": 17203 + }, + { + "epoch": 0.7171022466758368, + "grad_norm": 462.0, + "learning_rate": 1.9559920408034775e-05, + "loss": 15.0657, + "step": 17204 + }, + { + "epoch": 0.717143928973365, + "grad_norm": 820.0, + "learning_rate": 1.955456574537744e-05, + "loss": 18.3794, + "step": 17205 + }, + { + "epoch": 0.7171856112708932, + "grad_norm": 422.0, + "learning_rate": 1.9549211637590847e-05, + "loss": 14.8752, + "step": 17206 + }, + { + "epoch": 0.7172272935684215, + "grad_norm": 384.0, + "learning_rate": 1.9543858084772575e-05, + "loss": 14.5005, + "step": 17207 + }, + { + "epoch": 0.7172689758659497, + "grad_norm": 400.0, + "learning_rate": 1.9538505087020177e-05, + "loss": 15.5004, + "step": 17208 + }, + { + "epoch": 0.7173106581634779, + "grad_norm": 716.0, + "learning_rate": 1.953315264443126e-05, + "loss": 21.1256, + "step": 17209 + }, + { + "epoch": 0.7173523404610063, + "grad_norm": 227.0, + "learning_rate": 1.9527800757103303e-05, + "loss": 11.876, + "step": 17210 + }, + { + "epoch": 0.7173940227585345, + "grad_norm": 724.0, + "learning_rate": 1.9522449425133926e-05, + "loss": 21.1268, + "step": 17211 + }, + { + "epoch": 0.7174357050560627, + "grad_norm": 184.0, + "learning_rate": 1.9517098648620573e-05, + "loss": 7.6585, + "step": 17212 + }, + { + "epoch": 0.7174773873535909, + "grad_norm": 544.0, + "learning_rate": 1.9511748427660836e-05, + "loss": 19.2501, + "step": 17213 + }, + { + "epoch": 0.7175190696511192, + "grad_norm": 312.0, + "learning_rate": 1.950639876235216e-05, + "loss": 14.1253, + "step": 17214 + }, + { + "epoch": 0.7175607519486474, + "grad_norm": 532.0, + "learning_rate": 1.950104965279211e-05, + "loss": 17.5008, + "step": 17215 + }, + { + "epoch": 0.7176024342461756, + "grad_norm": 31.25, + "learning_rate": 1.9495701099078094e-05, + "loss": 6.1877, + "step": 17216 + }, + { + "epoch": 0.7176441165437039, + "grad_norm": 68.0, + "learning_rate": 1.949035310130768e-05, + "loss": 9.0003, + "step": 17217 + }, + { + "epoch": 0.7176857988412322, + "grad_norm": 241.0, + "learning_rate": 1.948500565957824e-05, + "loss": 11.8752, + "step": 17218 + }, + { + "epoch": 0.7177274811387604, + "grad_norm": 138.0, + "learning_rate": 1.9479658773987314e-05, + "loss": 9.6263, + "step": 17219 + }, + { + "epoch": 0.7177691634362886, + "grad_norm": 125.5, + "learning_rate": 1.9474312444632304e-05, + "loss": 10.0627, + "step": 17220 + }, + { + "epoch": 0.7178108457338168, + "grad_norm": 126.0, + "learning_rate": 1.9468966671610665e-05, + "loss": 10.2511, + "step": 17221 + }, + { + "epoch": 0.7178525280313451, + "grad_norm": 868.0, + "learning_rate": 1.946362145501982e-05, + "loss": 22.2529, + "step": 17222 + }, + { + "epoch": 0.7178942103288734, + "grad_norm": 282.0, + "learning_rate": 1.945827679495718e-05, + "loss": 11.8127, + "step": 17223 + }, + { + "epoch": 0.7179358926264016, + "grad_norm": 120.5, + "learning_rate": 1.945293269152016e-05, + "loss": 9.6257, + "step": 17224 + }, + { + "epoch": 0.7179775749239298, + "grad_norm": 227.0, + "learning_rate": 1.9447589144806154e-05, + "loss": 11.2517, + "step": 17225 + }, + { + "epoch": 0.7180192572214581, + "grad_norm": 362.0, + "learning_rate": 1.9442246154912545e-05, + "loss": 14.5627, + "step": 17226 + }, + { + "epoch": 0.7180609395189863, + "grad_norm": 462.0, + "learning_rate": 1.9436903721936716e-05, + "loss": 15.4383, + "step": 17227 + }, + { + "epoch": 0.7181026218165145, + "grad_norm": 588.0, + "learning_rate": 1.9431561845976025e-05, + "loss": 17.7508, + "step": 17228 + }, + { + "epoch": 0.7181443041140427, + "grad_norm": 133.0, + "learning_rate": 1.9426220527127836e-05, + "loss": 10.1252, + "step": 17229 + }, + { + "epoch": 0.718185986411571, + "grad_norm": 628.0, + "learning_rate": 1.9420879765489497e-05, + "loss": 19.6253, + "step": 17230 + }, + { + "epoch": 0.7182276687090993, + "grad_norm": 178.0, + "learning_rate": 1.941553956115833e-05, + "loss": 11.313, + "step": 17231 + }, + { + "epoch": 0.7182693510066275, + "grad_norm": 202.0, + "learning_rate": 1.9410199914231676e-05, + "loss": 11.8755, + "step": 17232 + }, + { + "epoch": 0.7183110333041557, + "grad_norm": 193.0, + "learning_rate": 1.9404860824806842e-05, + "loss": 10.8754, + "step": 17233 + }, + { + "epoch": 0.718352715601684, + "grad_norm": 1072.0, + "learning_rate": 1.9399522292981114e-05, + "loss": 28.7503, + "step": 17234 + }, + { + "epoch": 0.7183943978992122, + "grad_norm": 452.0, + "learning_rate": 1.939418431885185e-05, + "loss": 14.0628, + "step": 17235 + }, + { + "epoch": 0.7184360801967404, + "grad_norm": 370.0, + "learning_rate": 1.9388846902516257e-05, + "loss": 13.2502, + "step": 17236 + }, + { + "epoch": 0.7184777624942686, + "grad_norm": 171.0, + "learning_rate": 1.938351004407168e-05, + "loss": 10.7508, + "step": 17237 + }, + { + "epoch": 0.718519444791797, + "grad_norm": 203.0, + "learning_rate": 1.9378173743615313e-05, + "loss": 11.0009, + "step": 17238 + }, + { + "epoch": 0.7185611270893252, + "grad_norm": 274.0, + "learning_rate": 1.9372838001244487e-05, + "loss": 11.3129, + "step": 17239 + }, + { + "epoch": 0.7186028093868534, + "grad_norm": 1264.0, + "learning_rate": 1.9367502817056373e-05, + "loss": 28.8752, + "step": 17240 + }, + { + "epoch": 0.7186444916843816, + "grad_norm": 177.0, + "learning_rate": 1.9362168191148273e-05, + "loss": 10.7503, + "step": 17241 + }, + { + "epoch": 0.7186861739819099, + "grad_norm": 968.0, + "learning_rate": 1.935683412361734e-05, + "loss": 25.0051, + "step": 17242 + }, + { + "epoch": 0.7187278562794381, + "grad_norm": 368.0, + "learning_rate": 1.9351500614560848e-05, + "loss": 14.3127, + "step": 17243 + }, + { + "epoch": 0.7187695385769663, + "grad_norm": 712.0, + "learning_rate": 1.9346167664075976e-05, + "loss": 21.2507, + "step": 17244 + }, + { + "epoch": 0.7188112208744946, + "grad_norm": 434.0, + "learning_rate": 1.9340835272259915e-05, + "loss": 16.1255, + "step": 17245 + }, + { + "epoch": 0.7188529031720229, + "grad_norm": 67.0, + "learning_rate": 1.9335503439209856e-05, + "loss": 7.4067, + "step": 17246 + }, + { + "epoch": 0.7188945854695511, + "grad_norm": 177.0, + "learning_rate": 1.9330172165022974e-05, + "loss": 11.3754, + "step": 17247 + }, + { + "epoch": 0.7189362677670793, + "grad_norm": 440.0, + "learning_rate": 1.932484144979642e-05, + "loss": 14.9379, + "step": 17248 + }, + { + "epoch": 0.7189779500646075, + "grad_norm": 117.0, + "learning_rate": 1.9319511293627356e-05, + "loss": 8.9378, + "step": 17249 + }, + { + "epoch": 0.7190196323621358, + "grad_norm": 255.0, + "learning_rate": 1.931418169661292e-05, + "loss": 13.3755, + "step": 17250 + }, + { + "epoch": 0.719061314659664, + "grad_norm": 496.0, + "learning_rate": 1.9308852658850252e-05, + "loss": 17.2505, + "step": 17251 + }, + { + "epoch": 0.7191029969571923, + "grad_norm": 35.25, + "learning_rate": 1.9303524180436468e-05, + "loss": 6.1878, + "step": 17252 + }, + { + "epoch": 0.7191446792547205, + "grad_norm": 286.0, + "learning_rate": 1.9298196261468676e-05, + "loss": 13.3128, + "step": 17253 + }, + { + "epoch": 0.7191863615522488, + "grad_norm": 490.0, + "learning_rate": 1.9292868902043985e-05, + "loss": 16.8751, + "step": 17254 + }, + { + "epoch": 0.719228043849777, + "grad_norm": 73.0, + "learning_rate": 1.928754210225949e-05, + "loss": 8.3129, + "step": 17255 + }, + { + "epoch": 0.7192697261473052, + "grad_norm": 560.0, + "learning_rate": 1.9282215862212256e-05, + "loss": 17.8777, + "step": 17256 + }, + { + "epoch": 0.7193114084448334, + "grad_norm": 87.5, + "learning_rate": 1.927689018199937e-05, + "loss": 6.6252, + "step": 17257 + }, + { + "epoch": 0.7193530907423618, + "grad_norm": 374.0, + "learning_rate": 1.927156506171787e-05, + "loss": 15.6877, + "step": 17258 + }, + { + "epoch": 0.71939477303989, + "grad_norm": 696.0, + "learning_rate": 1.926624050146486e-05, + "loss": 19.127, + "step": 17259 + }, + { + "epoch": 0.7194364553374182, + "grad_norm": 172.0, + "learning_rate": 1.926091650133731e-05, + "loss": 11.0627, + "step": 17260 + }, + { + "epoch": 0.7194781376349464, + "grad_norm": 438.0, + "learning_rate": 1.925559306143232e-05, + "loss": 16.8753, + "step": 17261 + }, + { + "epoch": 0.7195198199324747, + "grad_norm": 604.0, + "learning_rate": 1.9250270181846843e-05, + "loss": 19.8759, + "step": 17262 + }, + { + "epoch": 0.7195615022300029, + "grad_norm": 204.0, + "learning_rate": 1.9244947862677952e-05, + "loss": 9.6877, + "step": 17263 + }, + { + "epoch": 0.7196031845275311, + "grad_norm": 498.0, + "learning_rate": 1.923962610402259e-05, + "loss": 14.9413, + "step": 17264 + }, + { + "epoch": 0.7196448668250593, + "grad_norm": 552.0, + "learning_rate": 1.92343049059778e-05, + "loss": 16.3753, + "step": 17265 + }, + { + "epoch": 0.7196865491225877, + "grad_norm": 314.0, + "learning_rate": 1.9228984268640505e-05, + "loss": 11.0004, + "step": 17266 + }, + { + "epoch": 0.7197282314201159, + "grad_norm": 924.0, + "learning_rate": 1.922366419210773e-05, + "loss": 25.0002, + "step": 17267 + }, + { + "epoch": 0.7197699137176441, + "grad_norm": 1096.0, + "learning_rate": 1.921834467647638e-05, + "loss": 30.0009, + "step": 17268 + }, + { + "epoch": 0.7198115960151723, + "grad_norm": 520.0, + "learning_rate": 1.921302572184344e-05, + "loss": 17.2502, + "step": 17269 + }, + { + "epoch": 0.7198532783127006, + "grad_norm": 346.0, + "learning_rate": 1.9207707328305845e-05, + "loss": 15.6878, + "step": 17270 + }, + { + "epoch": 0.7198949606102288, + "grad_norm": 308.0, + "learning_rate": 1.9202389495960523e-05, + "loss": 13.8128, + "step": 17271 + }, + { + "epoch": 0.719936642907757, + "grad_norm": 255.0, + "learning_rate": 1.9197072224904378e-05, + "loss": 12.7508, + "step": 17272 + }, + { + "epoch": 0.7199783252052853, + "grad_norm": 322.0, + "learning_rate": 1.9191755515234328e-05, + "loss": 13.0004, + "step": 17273 + }, + { + "epoch": 0.7200200075028136, + "grad_norm": 456.0, + "learning_rate": 1.918643936704727e-05, + "loss": 16.8755, + "step": 17274 + }, + { + "epoch": 0.7200616898003418, + "grad_norm": 604.0, + "learning_rate": 1.9181123780440092e-05, + "loss": 18.7502, + "step": 17275 + }, + { + "epoch": 0.72010337209787, + "grad_norm": 103.5, + "learning_rate": 1.9175808755509667e-05, + "loss": 8.4379, + "step": 17276 + }, + { + "epoch": 0.7201450543953982, + "grad_norm": 324.0, + "learning_rate": 1.917049429235286e-05, + "loss": 13.1253, + "step": 17277 + }, + { + "epoch": 0.7201867366929265, + "grad_norm": 308.0, + "learning_rate": 1.9165180391066532e-05, + "loss": 13.3753, + "step": 17278 + }, + { + "epoch": 0.7202284189904548, + "grad_norm": 179.0, + "learning_rate": 1.9159867051747532e-05, + "loss": 10.6878, + "step": 17279 + }, + { + "epoch": 0.720270101287983, + "grad_norm": 624.0, + "learning_rate": 1.915455427449269e-05, + "loss": 20.6253, + "step": 17280 + }, + { + "epoch": 0.7203117835855112, + "grad_norm": 328.0, + "learning_rate": 1.914924205939884e-05, + "loss": 15.2524, + "step": 17281 + }, + { + "epoch": 0.7203534658830395, + "grad_norm": 500.0, + "learning_rate": 1.9143930406562788e-05, + "loss": 16.7501, + "step": 17282 + }, + { + "epoch": 0.7203951481805677, + "grad_norm": 76.5, + "learning_rate": 1.9138619316081348e-05, + "loss": 8.3756, + "step": 17283 + }, + { + "epoch": 0.7204368304780959, + "grad_norm": 374.0, + "learning_rate": 1.9133308788051286e-05, + "loss": 14.0626, + "step": 17284 + }, + { + "epoch": 0.7204785127756242, + "grad_norm": 470.0, + "learning_rate": 1.9127998822569448e-05, + "loss": 16.6252, + "step": 17285 + }, + { + "epoch": 0.7205201950731525, + "grad_norm": 380.0, + "learning_rate": 1.9122689419732543e-05, + "loss": 12.9378, + "step": 17286 + }, + { + "epoch": 0.7205618773706807, + "grad_norm": 512.0, + "learning_rate": 1.9117380579637395e-05, + "loss": 18.5002, + "step": 17287 + }, + { + "epoch": 0.7206035596682089, + "grad_norm": 49.25, + "learning_rate": 1.911207230238069e-05, + "loss": 6.0946, + "step": 17288 + }, + { + "epoch": 0.7206452419657372, + "grad_norm": 174.0, + "learning_rate": 1.9106764588059244e-05, + "loss": 10.6878, + "step": 17289 + }, + { + "epoch": 0.7206869242632654, + "grad_norm": 270.0, + "learning_rate": 1.9101457436769726e-05, + "loss": 13.0001, + "step": 17290 + }, + { + "epoch": 0.7207286065607936, + "grad_norm": 490.0, + "learning_rate": 1.9096150848608925e-05, + "loss": 16.004, + "step": 17291 + }, + { + "epoch": 0.7207702888583218, + "grad_norm": 330.0, + "learning_rate": 1.9090844823673477e-05, + "loss": 13.0004, + "step": 17292 + }, + { + "epoch": 0.7208119711558502, + "grad_norm": 760.0, + "learning_rate": 1.9085539362060146e-05, + "loss": 23.2503, + "step": 17293 + }, + { + "epoch": 0.7208536534533784, + "grad_norm": 158.0, + "learning_rate": 1.9080234463865603e-05, + "loss": 9.5003, + "step": 17294 + }, + { + "epoch": 0.7208953357509066, + "grad_norm": 432.0, + "learning_rate": 1.9074930129186536e-05, + "loss": 15.9388, + "step": 17295 + }, + { + "epoch": 0.7209370180484348, + "grad_norm": 502.0, + "learning_rate": 1.9069626358119613e-05, + "loss": 18.5004, + "step": 17296 + }, + { + "epoch": 0.7209787003459631, + "grad_norm": 1012.0, + "learning_rate": 1.9064323150761493e-05, + "loss": 28.5006, + "step": 17297 + }, + { + "epoch": 0.7210203826434913, + "grad_norm": 189.0, + "learning_rate": 1.905902050720883e-05, + "loss": 10.5001, + "step": 17298 + }, + { + "epoch": 0.7210620649410195, + "grad_norm": 79.5, + "learning_rate": 1.9053718427558264e-05, + "loss": 8.3133, + "step": 17299 + }, + { + "epoch": 0.7211037472385478, + "grad_norm": 156.0, + "learning_rate": 1.9048416911906424e-05, + "loss": 10.1259, + "step": 17300 + }, + { + "epoch": 0.7211454295360761, + "grad_norm": 1136.0, + "learning_rate": 1.9043115960349938e-05, + "loss": 29.7502, + "step": 17301 + }, + { + "epoch": 0.7211871118336043, + "grad_norm": 544.0, + "learning_rate": 1.9037815572985412e-05, + "loss": 15.7508, + "step": 17302 + }, + { + "epoch": 0.7212287941311325, + "grad_norm": 112.5, + "learning_rate": 1.903251574990944e-05, + "loss": 10.3753, + "step": 17303 + }, + { + "epoch": 0.7212704764286607, + "grad_norm": 108.0, + "learning_rate": 1.9027216491218625e-05, + "loss": 9.5013, + "step": 17304 + }, + { + "epoch": 0.721312158726189, + "grad_norm": 209.0, + "learning_rate": 1.9021917797009538e-05, + "loss": 12.6251, + "step": 17305 + }, + { + "epoch": 0.7213538410237172, + "grad_norm": 504.0, + "learning_rate": 1.9016619667378742e-05, + "loss": 16.8754, + "step": 17306 + }, + { + "epoch": 0.7213955233212455, + "grad_norm": 144.0, + "learning_rate": 1.9011322102422813e-05, + "loss": 9.0004, + "step": 17307 + }, + { + "epoch": 0.7214372056187737, + "grad_norm": 282.0, + "learning_rate": 1.9006025102238285e-05, + "loss": 12.7504, + "step": 17308 + }, + { + "epoch": 0.721478887916302, + "grad_norm": 207.0, + "learning_rate": 1.9000728666921697e-05, + "loss": 11.5627, + "step": 17309 + }, + { + "epoch": 0.7215205702138302, + "grad_norm": 764.0, + "learning_rate": 1.899543279656957e-05, + "loss": 17.8793, + "step": 17310 + }, + { + "epoch": 0.7215622525113584, + "grad_norm": 474.0, + "learning_rate": 1.8990137491278463e-05, + "loss": 15.7528, + "step": 17311 + }, + { + "epoch": 0.7216039348088866, + "grad_norm": 528.0, + "learning_rate": 1.8984842751144823e-05, + "loss": 18.1252, + "step": 17312 + }, + { + "epoch": 0.721645617106415, + "grad_norm": 233.0, + "learning_rate": 1.8979548576265206e-05, + "loss": 12.501, + "step": 17313 + }, + { + "epoch": 0.7216872994039432, + "grad_norm": 116.0, + "learning_rate": 1.897425496673604e-05, + "loss": 9.3769, + "step": 17314 + }, + { + "epoch": 0.7217289817014714, + "grad_norm": 155.0, + "learning_rate": 1.896896192265386e-05, + "loss": 9.6252, + "step": 17315 + }, + { + "epoch": 0.7217706639989996, + "grad_norm": 386.0, + "learning_rate": 1.896366944411508e-05, + "loss": 16.8751, + "step": 17316 + }, + { + "epoch": 0.7218123462965279, + "grad_norm": 364.0, + "learning_rate": 1.8958377531216203e-05, + "loss": 14.1252, + "step": 17317 + }, + { + "epoch": 0.7218540285940561, + "grad_norm": 384.0, + "learning_rate": 1.895308618405362e-05, + "loss": 15.1884, + "step": 17318 + }, + { + "epoch": 0.7218957108915843, + "grad_norm": 600.0, + "learning_rate": 1.894779540272382e-05, + "loss": 19.3752, + "step": 17319 + }, + { + "epoch": 0.7219373931891125, + "grad_norm": 540.0, + "learning_rate": 1.89425051873232e-05, + "loss": 18.6254, + "step": 17320 + }, + { + "epoch": 0.7219790754866409, + "grad_norm": 131.0, + "learning_rate": 1.893721553794818e-05, + "loss": 10.1252, + "step": 17321 + }, + { + "epoch": 0.7220207577841691, + "grad_norm": 656.0, + "learning_rate": 1.8931926454695165e-05, + "loss": 18.6254, + "step": 17322 + }, + { + "epoch": 0.7220624400816973, + "grad_norm": 234.0, + "learning_rate": 1.8926637937660547e-05, + "loss": 14.0629, + "step": 17323 + }, + { + "epoch": 0.7221041223792255, + "grad_norm": 246.0, + "learning_rate": 1.892134998694071e-05, + "loss": 11.1252, + "step": 17324 + }, + { + "epoch": 0.7221458046767538, + "grad_norm": 126.0, + "learning_rate": 1.8916062602632035e-05, + "loss": 10.2503, + "step": 17325 + }, + { + "epoch": 0.722187486974282, + "grad_norm": 181.0, + "learning_rate": 1.8910775784830876e-05, + "loss": 10.5632, + "step": 17326 + }, + { + "epoch": 0.7222291692718102, + "grad_norm": 392.0, + "learning_rate": 1.8905489533633586e-05, + "loss": 14.6878, + "step": 17327 + }, + { + "epoch": 0.7222708515693385, + "grad_norm": 332.0, + "learning_rate": 1.890020384913651e-05, + "loss": 14.5002, + "step": 17328 + }, + { + "epoch": 0.7223125338668668, + "grad_norm": 832.0, + "learning_rate": 1.889491873143598e-05, + "loss": 27.1259, + "step": 17329 + }, + { + "epoch": 0.722354216164395, + "grad_norm": 560.0, + "learning_rate": 1.8889634180628318e-05, + "loss": 17.7507, + "step": 17330 + }, + { + "epoch": 0.7223958984619232, + "grad_norm": 212.0, + "learning_rate": 1.8884350196809835e-05, + "loss": 11.3137, + "step": 17331 + }, + { + "epoch": 0.7224375807594514, + "grad_norm": 118.5, + "learning_rate": 1.8879066780076833e-05, + "loss": 10.4379, + "step": 17332 + }, + { + "epoch": 0.7224792630569797, + "grad_norm": 384.0, + "learning_rate": 1.8873783930525597e-05, + "loss": 14.7526, + "step": 17333 + }, + { + "epoch": 0.722520945354508, + "grad_norm": 506.0, + "learning_rate": 1.8868501648252397e-05, + "loss": 19.3753, + "step": 17334 + }, + { + "epoch": 0.7225626276520362, + "grad_norm": 596.0, + "learning_rate": 1.8863219933353554e-05, + "loss": 18.8767, + "step": 17335 + }, + { + "epoch": 0.7226043099495644, + "grad_norm": 288.0, + "learning_rate": 1.8857938785925255e-05, + "loss": 13.5635, + "step": 17336 + }, + { + "epoch": 0.7226459922470927, + "grad_norm": 496.0, + "learning_rate": 1.8852658206063822e-05, + "loss": 16.6252, + "step": 17337 + }, + { + "epoch": 0.7226876745446209, + "grad_norm": 748.0, + "learning_rate": 1.8847378193865423e-05, + "loss": 22.2504, + "step": 17338 + }, + { + "epoch": 0.7227293568421491, + "grad_norm": 239.0, + "learning_rate": 1.884209874942636e-05, + "loss": 11.6877, + "step": 17339 + }, + { + "epoch": 0.7227710391396773, + "grad_norm": 193.0, + "learning_rate": 1.8836819872842777e-05, + "loss": 10.0631, + "step": 17340 + }, + { + "epoch": 0.7228127214372057, + "grad_norm": 620.0, + "learning_rate": 1.8831541564210953e-05, + "loss": 18.377, + "step": 17341 + }, + { + "epoch": 0.7228544037347339, + "grad_norm": 352.0, + "learning_rate": 1.882626382362701e-05, + "loss": 15.8753, + "step": 17342 + }, + { + "epoch": 0.7228960860322621, + "grad_norm": 266.0, + "learning_rate": 1.8820986651187217e-05, + "loss": 12.563, + "step": 17343 + }, + { + "epoch": 0.7229377683297903, + "grad_norm": 418.0, + "learning_rate": 1.8815710046987676e-05, + "loss": 17.0004, + "step": 17344 + }, + { + "epoch": 0.7229794506273186, + "grad_norm": 796.0, + "learning_rate": 1.8810434011124607e-05, + "loss": 20.6294, + "step": 17345 + }, + { + "epoch": 0.7230211329248468, + "grad_norm": 644.0, + "learning_rate": 1.880515854369414e-05, + "loss": 19.3753, + "step": 17346 + }, + { + "epoch": 0.723062815222375, + "grad_norm": 450.0, + "learning_rate": 1.8799883644792433e-05, + "loss": 16.3764, + "step": 17347 + }, + { + "epoch": 0.7231044975199032, + "grad_norm": 119.0, + "learning_rate": 1.8794609314515614e-05, + "loss": 11.063, + "step": 17348 + }, + { + "epoch": 0.7231461798174316, + "grad_norm": 470.0, + "learning_rate": 1.8789335552959807e-05, + "loss": 16.1252, + "step": 17349 + }, + { + "epoch": 0.7231878621149598, + "grad_norm": 207.0, + "learning_rate": 1.878406236022114e-05, + "loss": 11.438, + "step": 17350 + }, + { + "epoch": 0.723229544412488, + "grad_norm": 600.0, + "learning_rate": 1.8778789736395696e-05, + "loss": 20.2504, + "step": 17351 + }, + { + "epoch": 0.7232712267100162, + "grad_norm": 720.0, + "learning_rate": 1.8773517681579588e-05, + "loss": 20.3794, + "step": 17352 + }, + { + "epoch": 0.7233129090075445, + "grad_norm": 88.5, + "learning_rate": 1.8768246195868895e-05, + "loss": 10.6879, + "step": 17353 + }, + { + "epoch": 0.7233545913050727, + "grad_norm": 184.0, + "learning_rate": 1.876297527935968e-05, + "loss": 11.1878, + "step": 17354 + }, + { + "epoch": 0.723396273602601, + "grad_norm": 796.0, + "learning_rate": 1.8757704932148018e-05, + "loss": 22.1253, + "step": 17355 + }, + { + "epoch": 0.7234379559001293, + "grad_norm": 494.0, + "learning_rate": 1.8752435154329955e-05, + "loss": 17.3752, + "step": 17356 + }, + { + "epoch": 0.7234796381976575, + "grad_norm": 796.0, + "learning_rate": 1.8747165946001537e-05, + "loss": 22.7576, + "step": 17357 + }, + { + "epoch": 0.7235213204951857, + "grad_norm": 366.0, + "learning_rate": 1.8741897307258792e-05, + "loss": 14.1256, + "step": 17358 + }, + { + "epoch": 0.7235630027927139, + "grad_norm": 386.0, + "learning_rate": 1.8736629238197746e-05, + "loss": 14.7513, + "step": 17359 + }, + { + "epoch": 0.7236046850902422, + "grad_norm": 174.0, + "learning_rate": 1.8731361738914387e-05, + "loss": 11.6885, + "step": 17360 + }, + { + "epoch": 0.7236463673877704, + "grad_norm": 612.0, + "learning_rate": 1.8726094809504773e-05, + "loss": 17.1255, + "step": 17361 + }, + { + "epoch": 0.7236880496852987, + "grad_norm": 248.0, + "learning_rate": 1.872082845006482e-05, + "loss": 10.9378, + "step": 17362 + }, + { + "epoch": 0.7237297319828269, + "grad_norm": 105.5, + "learning_rate": 1.8715562660690585e-05, + "loss": 10.1259, + "step": 17363 + }, + { + "epoch": 0.7237714142803552, + "grad_norm": 245.0, + "learning_rate": 1.8710297441477958e-05, + "loss": 11.6881, + "step": 17364 + }, + { + "epoch": 0.7238130965778834, + "grad_norm": 233.0, + "learning_rate": 1.8705032792522974e-05, + "loss": 10.9383, + "step": 17365 + }, + { + "epoch": 0.7238547788754116, + "grad_norm": 354.0, + "learning_rate": 1.8699768713921512e-05, + "loss": 14.0628, + "step": 17366 + }, + { + "epoch": 0.7238964611729398, + "grad_norm": 133.0, + "learning_rate": 1.869450520576958e-05, + "loss": 9.3127, + "step": 17367 + }, + { + "epoch": 0.7239381434704681, + "grad_norm": 284.0, + "learning_rate": 1.868924226816303e-05, + "loss": 11.9378, + "step": 17368 + }, + { + "epoch": 0.7239798257679964, + "grad_norm": 358.0, + "learning_rate": 1.868397990119784e-05, + "loss": 13.6876, + "step": 17369 + }, + { + "epoch": 0.7240215080655246, + "grad_norm": 656.0, + "learning_rate": 1.86787181049699e-05, + "loss": 21.1254, + "step": 17370 + }, + { + "epoch": 0.7240631903630528, + "grad_norm": 98.0, + "learning_rate": 1.8673456879575096e-05, + "loss": 9.3753, + "step": 17371 + }, + { + "epoch": 0.7241048726605811, + "grad_norm": 344.0, + "learning_rate": 1.8668196225109324e-05, + "loss": 14.4378, + "step": 17372 + }, + { + "epoch": 0.7241465549581093, + "grad_norm": 1256.0, + "learning_rate": 1.8662936141668458e-05, + "loss": 30.0011, + "step": 17373 + }, + { + "epoch": 0.7241882372556375, + "grad_norm": 580.0, + "learning_rate": 1.8657676629348363e-05, + "loss": 17.5004, + "step": 17374 + }, + { + "epoch": 0.7242299195531657, + "grad_norm": 184.0, + "learning_rate": 1.865241768824489e-05, + "loss": 11.0002, + "step": 17375 + }, + { + "epoch": 0.7242716018506941, + "grad_norm": 290.0, + "learning_rate": 1.8647159318453893e-05, + "loss": 12.6254, + "step": 17376 + }, + { + "epoch": 0.7243132841482223, + "grad_norm": 474.0, + "learning_rate": 1.8641901520071197e-05, + "loss": 17.3753, + "step": 17377 + }, + { + "epoch": 0.7243549664457505, + "grad_norm": 412.0, + "learning_rate": 1.863664429319263e-05, + "loss": 16.0002, + "step": 17378 + }, + { + "epoch": 0.7243966487432787, + "grad_norm": 478.0, + "learning_rate": 1.8631387637914004e-05, + "loss": 15.813, + "step": 17379 + }, + { + "epoch": 0.724438331040807, + "grad_norm": 276.0, + "learning_rate": 1.8626131554331127e-05, + "loss": 12.9379, + "step": 17380 + }, + { + "epoch": 0.7244800133383352, + "grad_norm": 302.0, + "learning_rate": 1.862087604253978e-05, + "loss": 13.8753, + "step": 17381 + }, + { + "epoch": 0.7245216956358634, + "grad_norm": 696.0, + "learning_rate": 1.861562110263576e-05, + "loss": 23.7516, + "step": 17382 + }, + { + "epoch": 0.7245633779333917, + "grad_norm": 360.0, + "learning_rate": 1.8610366734714828e-05, + "loss": 13.0002, + "step": 17383 + }, + { + "epoch": 0.72460506023092, + "grad_norm": 227.0, + "learning_rate": 1.8605112938872737e-05, + "loss": 11.8754, + "step": 17384 + }, + { + "epoch": 0.7246467425284482, + "grad_norm": 488.0, + "learning_rate": 1.8599859715205275e-05, + "loss": 14.44, + "step": 17385 + }, + { + "epoch": 0.7246884248259764, + "grad_norm": 159.0, + "learning_rate": 1.859460706380813e-05, + "loss": 10.8755, + "step": 17386 + }, + { + "epoch": 0.7247301071235046, + "grad_norm": 1960.0, + "learning_rate": 1.8589354984777092e-05, + "loss": 42.0003, + "step": 17387 + }, + { + "epoch": 0.7247717894210329, + "grad_norm": 165.0, + "learning_rate": 1.858410347820781e-05, + "loss": 10.8753, + "step": 17388 + }, + { + "epoch": 0.7248134717185611, + "grad_norm": 304.0, + "learning_rate": 1.8578852544196068e-05, + "loss": 14.3756, + "step": 17389 + }, + { + "epoch": 0.7248551540160894, + "grad_norm": 366.0, + "learning_rate": 1.8573602182837496e-05, + "loss": 14.5627, + "step": 17390 + }, + { + "epoch": 0.7248968363136176, + "grad_norm": 216.0, + "learning_rate": 1.8568352394227845e-05, + "loss": 10.0017, + "step": 17391 + }, + { + "epoch": 0.7249385186111459, + "grad_norm": 168.0, + "learning_rate": 1.8563103178462727e-05, + "loss": 9.9378, + "step": 17392 + }, + { + "epoch": 0.7249802009086741, + "grad_norm": 366.0, + "learning_rate": 1.855785453563788e-05, + "loss": 15.2504, + "step": 17393 + }, + { + "epoch": 0.7250218832062023, + "grad_norm": 656.0, + "learning_rate": 1.8552606465848892e-05, + "loss": 19.8753, + "step": 17394 + }, + { + "epoch": 0.7250635655037305, + "grad_norm": 482.0, + "learning_rate": 1.854735896919146e-05, + "loss": 17.1252, + "step": 17395 + }, + { + "epoch": 0.7251052478012588, + "grad_norm": 386.0, + "learning_rate": 1.85421120457612e-05, + "loss": 15.7504, + "step": 17396 + }, + { + "epoch": 0.7251469300987871, + "grad_norm": 660.0, + "learning_rate": 1.8536865695653743e-05, + "loss": 18.1306, + "step": 17397 + }, + { + "epoch": 0.7251886123963153, + "grad_norm": 107.0, + "learning_rate": 1.85316199189647e-05, + "loss": 10.8758, + "step": 17398 + }, + { + "epoch": 0.7252302946938435, + "grad_norm": 1616.0, + "learning_rate": 1.852637471578968e-05, + "loss": 31.6265, + "step": 17399 + }, + { + "epoch": 0.7252719769913718, + "grad_norm": 456.0, + "learning_rate": 1.8521130086224275e-05, + "loss": 14.6878, + "step": 17400 + }, + { + "epoch": 0.7253136592889, + "grad_norm": 516.0, + "learning_rate": 1.8515886030364066e-05, + "loss": 17.8761, + "step": 17401 + }, + { + "epoch": 0.7253553415864282, + "grad_norm": 292.0, + "learning_rate": 1.851064254830463e-05, + "loss": 12.2516, + "step": 17402 + }, + { + "epoch": 0.7253970238839564, + "grad_norm": 165.0, + "learning_rate": 1.850539964014153e-05, + "loss": 9.3753, + "step": 17403 + }, + { + "epoch": 0.7254387061814848, + "grad_norm": 81.5, + "learning_rate": 1.850015730597032e-05, + "loss": 9.0015, + "step": 17404 + }, + { + "epoch": 0.725480388479013, + "grad_norm": 184.0, + "learning_rate": 1.8494915545886533e-05, + "loss": 10.5002, + "step": 17405 + }, + { + "epoch": 0.7255220707765412, + "grad_norm": 470.0, + "learning_rate": 1.848967435998571e-05, + "loss": 17.2502, + "step": 17406 + }, + { + "epoch": 0.7255637530740694, + "grad_norm": 350.0, + "learning_rate": 1.8484433748363366e-05, + "loss": 13.3751, + "step": 17407 + }, + { + "epoch": 0.7256054353715977, + "grad_norm": 422.0, + "learning_rate": 1.8479193711115017e-05, + "loss": 15.5629, + "step": 17408 + }, + { + "epoch": 0.7256471176691259, + "grad_norm": 772.0, + "learning_rate": 1.8473954248336163e-05, + "loss": 20.1297, + "step": 17409 + }, + { + "epoch": 0.7256887999666541, + "grad_norm": 408.0, + "learning_rate": 1.8468715360122267e-05, + "loss": 14.8131, + "step": 17410 + }, + { + "epoch": 0.7257304822641824, + "grad_norm": 412.0, + "learning_rate": 1.8463477046568876e-05, + "loss": 16.5003, + "step": 17411 + }, + { + "epoch": 0.7257721645617107, + "grad_norm": 320.0, + "learning_rate": 1.845823930777138e-05, + "loss": 13.2502, + "step": 17412 + }, + { + "epoch": 0.7258138468592389, + "grad_norm": 676.0, + "learning_rate": 1.8453002143825303e-05, + "loss": 20.8752, + "step": 17413 + }, + { + "epoch": 0.7258555291567671, + "grad_norm": 119.0, + "learning_rate": 1.844776555482603e-05, + "loss": 9.2501, + "step": 17414 + }, + { + "epoch": 0.7258972114542953, + "grad_norm": 840.0, + "learning_rate": 1.8442529540869064e-05, + "loss": 22.6281, + "step": 17415 + }, + { + "epoch": 0.7259388937518236, + "grad_norm": 250.0, + "learning_rate": 1.8437294102049768e-05, + "loss": 12.0003, + "step": 17416 + }, + { + "epoch": 0.7259805760493518, + "grad_norm": 217.0, + "learning_rate": 1.8432059238463617e-05, + "loss": 12.3753, + "step": 17417 + }, + { + "epoch": 0.7260222583468801, + "grad_norm": 280.0, + "learning_rate": 1.8426824950205955e-05, + "loss": 13.7504, + "step": 17418 + }, + { + "epoch": 0.7260639406444083, + "grad_norm": 314.0, + "learning_rate": 1.8421591237372237e-05, + "loss": 13.7504, + "step": 17419 + }, + { + "epoch": 0.7261056229419366, + "grad_norm": 274.0, + "learning_rate": 1.8416358100057814e-05, + "loss": 10.8131, + "step": 17420 + }, + { + "epoch": 0.7261473052394648, + "grad_norm": 396.0, + "learning_rate": 1.841112553835807e-05, + "loss": 15.8756, + "step": 17421 + }, + { + "epoch": 0.726188987536993, + "grad_norm": 154.0, + "learning_rate": 1.840589355236837e-05, + "loss": 10.6258, + "step": 17422 + }, + { + "epoch": 0.7262306698345212, + "grad_norm": 572.0, + "learning_rate": 1.840066214218406e-05, + "loss": 18.3754, + "step": 17423 + }, + { + "epoch": 0.7262723521320495, + "grad_norm": 320.0, + "learning_rate": 1.839543130790049e-05, + "loss": 14.1883, + "step": 17424 + }, + { + "epoch": 0.7263140344295778, + "grad_norm": 756.0, + "learning_rate": 1.839020104961299e-05, + "loss": 23.2502, + "step": 17425 + }, + { + "epoch": 0.726355716727106, + "grad_norm": 94.5, + "learning_rate": 1.838497136741688e-05, + "loss": 8.5629, + "step": 17426 + }, + { + "epoch": 0.7263973990246342, + "grad_norm": 127.0, + "learning_rate": 1.8379742261407477e-05, + "loss": 9.3751, + "step": 17427 + }, + { + "epoch": 0.7264390813221625, + "grad_norm": 1552.0, + "learning_rate": 1.8374513731680078e-05, + "loss": 35.5056, + "step": 17428 + }, + { + "epoch": 0.7264807636196907, + "grad_norm": 330.0, + "learning_rate": 1.836928577832997e-05, + "loss": 13.7535, + "step": 17429 + }, + { + "epoch": 0.7265224459172189, + "grad_norm": 254.0, + "learning_rate": 1.836405840145244e-05, + "loss": 14.5633, + "step": 17430 + }, + { + "epoch": 0.7265641282147473, + "grad_norm": 177.0, + "learning_rate": 1.8358831601142755e-05, + "loss": 11.5003, + "step": 17431 + }, + { + "epoch": 0.7266058105122755, + "grad_norm": 422.0, + "learning_rate": 1.8353605377496168e-05, + "loss": 15.1878, + "step": 17432 + }, + { + "epoch": 0.7266474928098037, + "grad_norm": 396.0, + "learning_rate": 1.834837973060794e-05, + "loss": 13.4379, + "step": 17433 + }, + { + "epoch": 0.7266891751073319, + "grad_norm": 1088.0, + "learning_rate": 1.8343154660573292e-05, + "loss": 23.8787, + "step": 17434 + }, + { + "epoch": 0.7267308574048602, + "grad_norm": 330.0, + "learning_rate": 1.8337930167487465e-05, + "loss": 14.5002, + "step": 17435 + }, + { + "epoch": 0.7267725397023884, + "grad_norm": 432.0, + "learning_rate": 1.833270625144566e-05, + "loss": 17.1264, + "step": 17436 + }, + { + "epoch": 0.7268142219999166, + "grad_norm": 166.0, + "learning_rate": 1.832748291254312e-05, + "loss": 9.8128, + "step": 17437 + }, + { + "epoch": 0.7268559042974448, + "grad_norm": 332.0, + "learning_rate": 1.8322260150874985e-05, + "loss": 13.2503, + "step": 17438 + }, + { + "epoch": 0.7268975865949732, + "grad_norm": 306.0, + "learning_rate": 1.8317037966536505e-05, + "loss": 13.6251, + "step": 17439 + }, + { + "epoch": 0.7269392688925014, + "grad_norm": 868.0, + "learning_rate": 1.8311816359622786e-05, + "loss": 21.7544, + "step": 17440 + }, + { + "epoch": 0.7269809511900296, + "grad_norm": 253.0, + "learning_rate": 1.830659533022906e-05, + "loss": 11.3131, + "step": 17441 + }, + { + "epoch": 0.7270226334875578, + "grad_norm": 156.0, + "learning_rate": 1.8301374878450415e-05, + "loss": 7.063, + "step": 17442 + }, + { + "epoch": 0.7270643157850861, + "grad_norm": 206.0, + "learning_rate": 1.829615500438206e-05, + "loss": 11.1254, + "step": 17443 + }, + { + "epoch": 0.7271059980826143, + "grad_norm": 137.0, + "learning_rate": 1.829093570811906e-05, + "loss": 8.7503, + "step": 17444 + }, + { + "epoch": 0.7271476803801425, + "grad_norm": 253.0, + "learning_rate": 1.828571698975659e-05, + "loss": 11.5628, + "step": 17445 + }, + { + "epoch": 0.7271893626776708, + "grad_norm": 318.0, + "learning_rate": 1.8280498849389742e-05, + "loss": 13.1887, + "step": 17446 + }, + { + "epoch": 0.7272310449751991, + "grad_norm": 76.5, + "learning_rate": 1.827528128711362e-05, + "loss": 9.001, + "step": 17447 + }, + { + "epoch": 0.7272727272727273, + "grad_norm": 316.0, + "learning_rate": 1.8270064303023314e-05, + "loss": 14.0003, + "step": 17448 + }, + { + "epoch": 0.7273144095702555, + "grad_norm": 544.0, + "learning_rate": 1.82648478972139e-05, + "loss": 17.3784, + "step": 17449 + }, + { + "epoch": 0.7273560918677837, + "grad_norm": 328.0, + "learning_rate": 1.8259632069780453e-05, + "loss": 14.1881, + "step": 17450 + }, + { + "epoch": 0.727397774165312, + "grad_norm": 47.0, + "learning_rate": 1.8254416820818026e-05, + "loss": 7.6568, + "step": 17451 + }, + { + "epoch": 0.7274394564628403, + "grad_norm": 342.0, + "learning_rate": 1.8249202150421674e-05, + "loss": 12.0627, + "step": 17452 + }, + { + "epoch": 0.7274811387603685, + "grad_norm": 358.0, + "learning_rate": 1.8243988058686428e-05, + "loss": 15.1877, + "step": 17453 + }, + { + "epoch": 0.7275228210578967, + "grad_norm": 402.0, + "learning_rate": 1.823877454570732e-05, + "loss": 15.6252, + "step": 17454 + }, + { + "epoch": 0.727564503355425, + "grad_norm": 60.75, + "learning_rate": 1.8233561611579365e-05, + "loss": 7.7817, + "step": 17455 + }, + { + "epoch": 0.7276061856529532, + "grad_norm": 276.0, + "learning_rate": 1.822834925639757e-05, + "loss": 12.6877, + "step": 17456 + }, + { + "epoch": 0.7276478679504814, + "grad_norm": 193.0, + "learning_rate": 1.822313748025693e-05, + "loss": 12.3133, + "step": 17457 + }, + { + "epoch": 0.7276895502480096, + "grad_norm": 77.0, + "learning_rate": 1.8217926283252434e-05, + "loss": 9.938, + "step": 17458 + }, + { + "epoch": 0.727731232545538, + "grad_norm": 244.0, + "learning_rate": 1.8212715665479045e-05, + "loss": 11.5629, + "step": 17459 + }, + { + "epoch": 0.7277729148430662, + "grad_norm": 376.0, + "learning_rate": 1.8207505627031723e-05, + "loss": 15.3126, + "step": 17460 + }, + { + "epoch": 0.7278145971405944, + "grad_norm": 245.0, + "learning_rate": 1.8202296168005467e-05, + "loss": 12.2505, + "step": 17461 + }, + { + "epoch": 0.7278562794381226, + "grad_norm": 228.0, + "learning_rate": 1.8197087288495153e-05, + "loss": 11.4378, + "step": 17462 + }, + { + "epoch": 0.7278979617356509, + "grad_norm": 888.0, + "learning_rate": 1.819187898859578e-05, + "loss": 24.8752, + "step": 17463 + }, + { + "epoch": 0.7279396440331791, + "grad_norm": 260.0, + "learning_rate": 1.81866712684022e-05, + "loss": 12.7503, + "step": 17464 + }, + { + "epoch": 0.7279813263307073, + "grad_norm": 320.0, + "learning_rate": 1.8181464128009396e-05, + "loss": 12.5025, + "step": 17465 + }, + { + "epoch": 0.7280230086282355, + "grad_norm": 516.0, + "learning_rate": 1.81762575675122e-05, + "loss": 17.1253, + "step": 17466 + }, + { + "epoch": 0.7280646909257639, + "grad_norm": 350.0, + "learning_rate": 1.8171051587005568e-05, + "loss": 14.6252, + "step": 17467 + }, + { + "epoch": 0.7281063732232921, + "grad_norm": 67.5, + "learning_rate": 1.8165846186584313e-05, + "loss": 8.0016, + "step": 17468 + }, + { + "epoch": 0.7281480555208203, + "grad_norm": 300.0, + "learning_rate": 1.8160641366343368e-05, + "loss": 13.9381, + "step": 17469 + }, + { + "epoch": 0.7281897378183485, + "grad_norm": 424.0, + "learning_rate": 1.815543712637753e-05, + "loss": 15.6876, + "step": 17470 + }, + { + "epoch": 0.7282314201158768, + "grad_norm": 187.0, + "learning_rate": 1.815023346678169e-05, + "loss": 11.2506, + "step": 17471 + }, + { + "epoch": 0.728273102413405, + "grad_norm": 288.0, + "learning_rate": 1.8145030387650675e-05, + "loss": 13.8751, + "step": 17472 + }, + { + "epoch": 0.7283147847109332, + "grad_norm": 133.0, + "learning_rate": 1.813982788907931e-05, + "loss": 6.0319, + "step": 17473 + }, + { + "epoch": 0.7283564670084615, + "grad_norm": 416.0, + "learning_rate": 1.813462597116241e-05, + "loss": 14.7505, + "step": 17474 + }, + { + "epoch": 0.7283981493059898, + "grad_norm": 700.0, + "learning_rate": 1.8129424633994778e-05, + "loss": 18.8794, + "step": 17475 + }, + { + "epoch": 0.728439831603518, + "grad_norm": 209.0, + "learning_rate": 1.8124223877671214e-05, + "loss": 11.6879, + "step": 17476 + }, + { + "epoch": 0.7284815139010462, + "grad_norm": 280.0, + "learning_rate": 1.8119023702286496e-05, + "loss": 12.5002, + "step": 17477 + }, + { + "epoch": 0.7285231961985744, + "grad_norm": 368.0, + "learning_rate": 1.8113824107935408e-05, + "loss": 14.6877, + "step": 17478 + }, + { + "epoch": 0.7285648784961027, + "grad_norm": 178.0, + "learning_rate": 1.81086250947127e-05, + "loss": 10.6878, + "step": 17479 + }, + { + "epoch": 0.728606560793631, + "grad_norm": 229.0, + "learning_rate": 1.8103426662713135e-05, + "loss": 11.3753, + "step": 17480 + }, + { + "epoch": 0.7286482430911592, + "grad_norm": 840.0, + "learning_rate": 1.8098228812031447e-05, + "loss": 20.0054, + "step": 17481 + }, + { + "epoch": 0.7286899253886874, + "grad_norm": 280.0, + "learning_rate": 1.8093031542762374e-05, + "loss": 12.7505, + "step": 17482 + }, + { + "epoch": 0.7287316076862157, + "grad_norm": 203.0, + "learning_rate": 1.808783485500063e-05, + "loss": 11.6254, + "step": 17483 + }, + { + "epoch": 0.7287732899837439, + "grad_norm": 394.0, + "learning_rate": 1.808263874884093e-05, + "loss": 15.6255, + "step": 17484 + }, + { + "epoch": 0.7288149722812721, + "grad_norm": 158.0, + "learning_rate": 1.8077443224377976e-05, + "loss": 9.4378, + "step": 17485 + }, + { + "epoch": 0.7288566545788003, + "grad_norm": 280.0, + "learning_rate": 1.8072248281706432e-05, + "loss": 12.3132, + "step": 17486 + }, + { + "epoch": 0.7288983368763287, + "grad_norm": 584.0, + "learning_rate": 1.8067053920921027e-05, + "loss": 19.1252, + "step": 17487 + }, + { + "epoch": 0.7289400191738569, + "grad_norm": 71.0, + "learning_rate": 1.806186014211638e-05, + "loss": 8.1251, + "step": 17488 + }, + { + "epoch": 0.7289817014713851, + "grad_norm": 416.0, + "learning_rate": 1.8056666945387184e-05, + "loss": 15.3128, + "step": 17489 + }, + { + "epoch": 0.7290233837689133, + "grad_norm": 616.0, + "learning_rate": 1.8051474330828044e-05, + "loss": 18.6254, + "step": 17490 + }, + { + "epoch": 0.7290650660664416, + "grad_norm": 243.0, + "learning_rate": 1.804628229853365e-05, + "loss": 12.2502, + "step": 17491 + }, + { + "epoch": 0.7291067483639698, + "grad_norm": 346.0, + "learning_rate": 1.8041090848598567e-05, + "loss": 14.1878, + "step": 17492 + }, + { + "epoch": 0.729148430661498, + "grad_norm": 45.0, + "learning_rate": 1.8035899981117475e-05, + "loss": 7.0316, + "step": 17493 + }, + { + "epoch": 0.7291901129590262, + "grad_norm": 163.0, + "learning_rate": 1.8030709696184912e-05, + "loss": 8.6262, + "step": 17494 + }, + { + "epoch": 0.7292317952565546, + "grad_norm": 430.0, + "learning_rate": 1.802551999389551e-05, + "loss": 15.6886, + "step": 17495 + }, + { + "epoch": 0.7292734775540828, + "grad_norm": 169.0, + "learning_rate": 1.8020330874343854e-05, + "loss": 10.0629, + "step": 17496 + }, + { + "epoch": 0.729315159851611, + "grad_norm": 684.0, + "learning_rate": 1.80151423376245e-05, + "loss": 20.7506, + "step": 17497 + }, + { + "epoch": 0.7293568421491392, + "grad_norm": 1656.0, + "learning_rate": 1.8009954383832024e-05, + "loss": 39.7502, + "step": 17498 + }, + { + "epoch": 0.7293985244466675, + "grad_norm": 137.0, + "learning_rate": 1.8004767013060964e-05, + "loss": 10.0007, + "step": 17499 + }, + { + "epoch": 0.7294402067441957, + "grad_norm": 213.0, + "learning_rate": 1.7999580225405867e-05, + "loss": 8.814, + "step": 17500 + }, + { + "epoch": 0.729481889041724, + "grad_norm": 406.0, + "learning_rate": 1.799439402096126e-05, + "loss": 15.938, + "step": 17501 + }, + { + "epoch": 0.7295235713392523, + "grad_norm": 672.0, + "learning_rate": 1.798920839982166e-05, + "loss": 20.2524, + "step": 17502 + }, + { + "epoch": 0.7295652536367805, + "grad_norm": 222.0, + "learning_rate": 1.7984023362081583e-05, + "loss": 8.5009, + "step": 17503 + }, + { + "epoch": 0.7296069359343087, + "grad_norm": 676.0, + "learning_rate": 1.7978838907835522e-05, + "loss": 20.7505, + "step": 17504 + }, + { + "epoch": 0.7296486182318369, + "grad_norm": 376.0, + "learning_rate": 1.797365503717796e-05, + "loss": 13.5012, + "step": 17505 + }, + { + "epoch": 0.7296903005293652, + "grad_norm": 296.0, + "learning_rate": 1.7968471750203386e-05, + "loss": 12.8127, + "step": 17506 + }, + { + "epoch": 0.7297319828268934, + "grad_norm": 280.0, + "learning_rate": 1.7963289047006254e-05, + "loss": 12.7503, + "step": 17507 + }, + { + "epoch": 0.7297736651244217, + "grad_norm": 378.0, + "learning_rate": 1.7958106927681024e-05, + "loss": 15.3752, + "step": 17508 + }, + { + "epoch": 0.7298153474219499, + "grad_norm": 564.0, + "learning_rate": 1.7952925392322135e-05, + "loss": 17.8751, + "step": 17509 + }, + { + "epoch": 0.7298570297194782, + "grad_norm": 524.0, + "learning_rate": 1.7947744441024012e-05, + "loss": 18.5003, + "step": 17510 + }, + { + "epoch": 0.7298987120170064, + "grad_norm": 342.0, + "learning_rate": 1.794256407388113e-05, + "loss": 13.8128, + "step": 17511 + }, + { + "epoch": 0.7299403943145346, + "grad_norm": 552.0, + "learning_rate": 1.7937384290987823e-05, + "loss": 16.6255, + "step": 17512 + }, + { + "epoch": 0.7299820766120628, + "grad_norm": 211.0, + "learning_rate": 1.793220509243857e-05, + "loss": 12.0002, + "step": 17513 + }, + { + "epoch": 0.7300237589095911, + "grad_norm": 832.0, + "learning_rate": 1.7927026478327692e-05, + "loss": 24.3752, + "step": 17514 + }, + { + "epoch": 0.7300654412071194, + "grad_norm": 692.0, + "learning_rate": 1.792184844874964e-05, + "loss": 18.1297, + "step": 17515 + }, + { + "epoch": 0.7301071235046476, + "grad_norm": 428.0, + "learning_rate": 1.7916671003798707e-05, + "loss": 12.1254, + "step": 17516 + }, + { + "epoch": 0.7301488058021758, + "grad_norm": 184.0, + "learning_rate": 1.7911494143569337e-05, + "loss": 10.4377, + "step": 17517 + }, + { + "epoch": 0.7301904880997041, + "grad_norm": 147.0, + "learning_rate": 1.7906317868155792e-05, + "loss": 9.5003, + "step": 17518 + }, + { + "epoch": 0.7302321703972323, + "grad_norm": 84.5, + "learning_rate": 1.790114217765249e-05, + "loss": 9.1252, + "step": 17519 + }, + { + "epoch": 0.7302738526947605, + "grad_norm": 688.0, + "learning_rate": 1.789596707215369e-05, + "loss": 19.7502, + "step": 17520 + }, + { + "epoch": 0.7303155349922887, + "grad_norm": 201.0, + "learning_rate": 1.7890792551753755e-05, + "loss": 10.8127, + "step": 17521 + }, + { + "epoch": 0.7303572172898171, + "grad_norm": 390.0, + "learning_rate": 1.7885618616546974e-05, + "loss": 15.6266, + "step": 17522 + }, + { + "epoch": 0.7303988995873453, + "grad_norm": 166.0, + "learning_rate": 1.7880445266627648e-05, + "loss": 11.4379, + "step": 17523 + }, + { + "epoch": 0.7304405818848735, + "grad_norm": 177.0, + "learning_rate": 1.7875272502090057e-05, + "loss": 11.0002, + "step": 17524 + }, + { + "epoch": 0.7304822641824017, + "grad_norm": 536.0, + "learning_rate": 1.7870100323028476e-05, + "loss": 16.3757, + "step": 17525 + }, + { + "epoch": 0.73052394647993, + "grad_norm": 242.0, + "learning_rate": 1.7864928729537167e-05, + "loss": 11.313, + "step": 17526 + }, + { + "epoch": 0.7305656287774582, + "grad_norm": 636.0, + "learning_rate": 1.7859757721710384e-05, + "loss": 20.0002, + "step": 17527 + }, + { + "epoch": 0.7306073110749864, + "grad_norm": 1056.0, + "learning_rate": 1.785458729964238e-05, + "loss": 27.8758, + "step": 17528 + }, + { + "epoch": 0.7306489933725147, + "grad_norm": 239.0, + "learning_rate": 1.7849417463427364e-05, + "loss": 13.6877, + "step": 17529 + }, + { + "epoch": 0.730690675670043, + "grad_norm": 150.0, + "learning_rate": 1.7844248213159577e-05, + "loss": 11.1878, + "step": 17530 + }, + { + "epoch": 0.7307323579675712, + "grad_norm": 636.0, + "learning_rate": 1.7839079548933213e-05, + "loss": 21.7502, + "step": 17531 + }, + { + "epoch": 0.7307740402650994, + "grad_norm": 266.0, + "learning_rate": 1.7833911470842483e-05, + "loss": 13.1252, + "step": 17532 + }, + { + "epoch": 0.7308157225626276, + "grad_norm": 410.0, + "learning_rate": 1.782874397898157e-05, + "loss": 17.6253, + "step": 17533 + }, + { + "epoch": 0.7308574048601559, + "grad_norm": 464.0, + "learning_rate": 1.782357707344466e-05, + "loss": 19.7504, + "step": 17534 + }, + { + "epoch": 0.7308990871576841, + "grad_norm": 1200.0, + "learning_rate": 1.781841075432591e-05, + "loss": 27.7549, + "step": 17535 + }, + { + "epoch": 0.7309407694552124, + "grad_norm": 81.0, + "learning_rate": 1.7813245021719467e-05, + "loss": 6.2502, + "step": 17536 + }, + { + "epoch": 0.7309824517527406, + "grad_norm": 524.0, + "learning_rate": 1.780807987571952e-05, + "loss": 15.8753, + "step": 17537 + }, + { + "epoch": 0.7310241340502689, + "grad_norm": 584.0, + "learning_rate": 1.7802915316420145e-05, + "loss": 18.3753, + "step": 17538 + }, + { + "epoch": 0.7310658163477971, + "grad_norm": 346.0, + "learning_rate": 1.7797751343915526e-05, + "loss": 14.5664, + "step": 17539 + }, + { + "epoch": 0.7311074986453253, + "grad_norm": 560.0, + "learning_rate": 1.7792587958299718e-05, + "loss": 18.8753, + "step": 17540 + }, + { + "epoch": 0.7311491809428535, + "grad_norm": 115.0, + "learning_rate": 1.7787425159666886e-05, + "loss": 10.5632, + "step": 17541 + }, + { + "epoch": 0.7311908632403818, + "grad_norm": 424.0, + "learning_rate": 1.7782262948111056e-05, + "loss": 15.6881, + "step": 17542 + }, + { + "epoch": 0.7312325455379101, + "grad_norm": 540.0, + "learning_rate": 1.7777101323726376e-05, + "loss": 18.2502, + "step": 17543 + }, + { + "epoch": 0.7312742278354383, + "grad_norm": 87.5, + "learning_rate": 1.7771940286606853e-05, + "loss": 10.0001, + "step": 17544 + }, + { + "epoch": 0.7313159101329665, + "grad_norm": 324.0, + "learning_rate": 1.7766779836846597e-05, + "loss": 12.5003, + "step": 17545 + }, + { + "epoch": 0.7313575924304948, + "grad_norm": 458.0, + "learning_rate": 1.776161997453964e-05, + "loss": 14.7511, + "step": 17546 + }, + { + "epoch": 0.731399274728023, + "grad_norm": 420.0, + "learning_rate": 1.7756460699780016e-05, + "loss": 16.3768, + "step": 17547 + }, + { + "epoch": 0.7314409570255512, + "grad_norm": 284.0, + "learning_rate": 1.7751302012661763e-05, + "loss": 14.0005, + "step": 17548 + }, + { + "epoch": 0.7314826393230794, + "grad_norm": 172.0, + "learning_rate": 1.774614391327889e-05, + "loss": 10.4378, + "step": 17549 + }, + { + "epoch": 0.7315243216206078, + "grad_norm": 238.0, + "learning_rate": 1.7740986401725413e-05, + "loss": 13.0004, + "step": 17550 + }, + { + "epoch": 0.731566003918136, + "grad_norm": 342.0, + "learning_rate": 1.7735829478095316e-05, + "loss": 15.063, + "step": 17551 + }, + { + "epoch": 0.7316076862156642, + "grad_norm": 362.0, + "learning_rate": 1.7730673142482596e-05, + "loss": 14.3761, + "step": 17552 + }, + { + "epoch": 0.7316493685131924, + "grad_norm": 392.0, + "learning_rate": 1.7725517394981223e-05, + "loss": 14.2503, + "step": 17553 + }, + { + "epoch": 0.7316910508107207, + "grad_norm": 564.0, + "learning_rate": 1.7720362235685156e-05, + "loss": 19.6255, + "step": 17554 + }, + { + "epoch": 0.7317327331082489, + "grad_norm": 86.5, + "learning_rate": 1.7715207664688355e-05, + "loss": 9.0003, + "step": 17555 + }, + { + "epoch": 0.7317744154057771, + "grad_norm": 312.0, + "learning_rate": 1.771005368208476e-05, + "loss": 14.5004, + "step": 17556 + }, + { + "epoch": 0.7318160977033054, + "grad_norm": 478.0, + "learning_rate": 1.7704900287968302e-05, + "loss": 17.2511, + "step": 17557 + }, + { + "epoch": 0.7318577800008337, + "grad_norm": 544.0, + "learning_rate": 1.7699747482432905e-05, + "loss": 19.5007, + "step": 17558 + }, + { + "epoch": 0.7318994622983619, + "grad_norm": 230.0, + "learning_rate": 1.7694595265572477e-05, + "loss": 11.6255, + "step": 17559 + }, + { + "epoch": 0.7319411445958901, + "grad_norm": 131.0, + "learning_rate": 1.76894436374809e-05, + "loss": 9.2505, + "step": 17560 + }, + { + "epoch": 0.7319828268934183, + "grad_norm": 248.0, + "learning_rate": 1.7684292598252118e-05, + "loss": 11.1255, + "step": 17561 + }, + { + "epoch": 0.7320245091909466, + "grad_norm": 171.0, + "learning_rate": 1.767914214797994e-05, + "loss": 8.1254, + "step": 17562 + }, + { + "epoch": 0.7320661914884748, + "grad_norm": 784.0, + "learning_rate": 1.7673992286758297e-05, + "loss": 19.1276, + "step": 17563 + }, + { + "epoch": 0.7321078737860031, + "grad_norm": 138.0, + "learning_rate": 1.7668843014680975e-05, + "loss": 10.0005, + "step": 17564 + }, + { + "epoch": 0.7321495560835313, + "grad_norm": 398.0, + "learning_rate": 1.7663694331841896e-05, + "loss": 13.0661, + "step": 17565 + }, + { + "epoch": 0.7321912383810596, + "grad_norm": 356.0, + "learning_rate": 1.7658546238334827e-05, + "loss": 13.4377, + "step": 17566 + }, + { + "epoch": 0.7322329206785878, + "grad_norm": 182.0, + "learning_rate": 1.7653398734253655e-05, + "loss": 9.5008, + "step": 17567 + }, + { + "epoch": 0.732274602976116, + "grad_norm": 564.0, + "learning_rate": 1.7648251819692134e-05, + "loss": 18.5004, + "step": 17568 + }, + { + "epoch": 0.7323162852736442, + "grad_norm": 752.0, + "learning_rate": 1.7643105494744122e-05, + "loss": 21.5001, + "step": 17569 + }, + { + "epoch": 0.7323579675711726, + "grad_norm": 676.0, + "learning_rate": 1.763795975950336e-05, + "loss": 21.3756, + "step": 17570 + }, + { + "epoch": 0.7323996498687008, + "grad_norm": 756.0, + "learning_rate": 1.7632814614063664e-05, + "loss": 23.7508, + "step": 17571 + }, + { + "epoch": 0.732441332166229, + "grad_norm": 732.0, + "learning_rate": 1.7627670058518798e-05, + "loss": 20.5008, + "step": 17572 + }, + { + "epoch": 0.7324830144637572, + "grad_norm": 205.0, + "learning_rate": 1.7622526092962517e-05, + "loss": 11.1253, + "step": 17573 + }, + { + "epoch": 0.7325246967612855, + "grad_norm": 222.0, + "learning_rate": 1.7617382717488575e-05, + "loss": 11.0627, + "step": 17574 + }, + { + "epoch": 0.7325663790588137, + "grad_norm": 100.5, + "learning_rate": 1.76122399321907e-05, + "loss": 7.7503, + "step": 17575 + }, + { + "epoch": 0.7326080613563419, + "grad_norm": 418.0, + "learning_rate": 1.7607097737162638e-05, + "loss": 16.0008, + "step": 17576 + }, + { + "epoch": 0.7326497436538703, + "grad_norm": 77.5, + "learning_rate": 1.760195613249809e-05, + "loss": 8.1254, + "step": 17577 + }, + { + "epoch": 0.7326914259513985, + "grad_norm": 584.0, + "learning_rate": 1.7596815118290765e-05, + "loss": 18.7512, + "step": 17578 + }, + { + "epoch": 0.7327331082489267, + "grad_norm": 516.0, + "learning_rate": 1.7591674694634365e-05, + "loss": 16.6253, + "step": 17579 + }, + { + "epoch": 0.7327747905464549, + "grad_norm": 112.0, + "learning_rate": 1.7586534861622566e-05, + "loss": 9.6252, + "step": 17580 + }, + { + "epoch": 0.7328164728439832, + "grad_norm": 224.0, + "learning_rate": 1.758139561934905e-05, + "loss": 10.8751, + "step": 17581 + }, + { + "epoch": 0.7328581551415114, + "grad_norm": 249.0, + "learning_rate": 1.757625696790748e-05, + "loss": 12.5627, + "step": 17582 + }, + { + "epoch": 0.7328998374390396, + "grad_norm": 292.0, + "learning_rate": 1.7571118907391503e-05, + "loss": 13.4377, + "step": 17583 + }, + { + "epoch": 0.7329415197365678, + "grad_norm": 864.0, + "learning_rate": 1.756598143789476e-05, + "loss": 22.1254, + "step": 17584 + }, + { + "epoch": 0.7329832020340962, + "grad_norm": 166.0, + "learning_rate": 1.7560844559510886e-05, + "loss": 10.1251, + "step": 17585 + }, + { + "epoch": 0.7330248843316244, + "grad_norm": 247.0, + "learning_rate": 1.7555708272333483e-05, + "loss": 11.2502, + "step": 17586 + }, + { + "epoch": 0.7330665666291526, + "grad_norm": 340.0, + "learning_rate": 1.755057257645621e-05, + "loss": 13.8753, + "step": 17587 + }, + { + "epoch": 0.7331082489266808, + "grad_norm": 161.0, + "learning_rate": 1.7545437471972597e-05, + "loss": 10.6255, + "step": 17588 + }, + { + "epoch": 0.7331499312242091, + "grad_norm": 352.0, + "learning_rate": 1.7540302958976307e-05, + "loss": 14.8149, + "step": 17589 + }, + { + "epoch": 0.7331916135217373, + "grad_norm": 504.0, + "learning_rate": 1.753516903756084e-05, + "loss": 17.3752, + "step": 17590 + }, + { + "epoch": 0.7332332958192656, + "grad_norm": 528.0, + "learning_rate": 1.7530035707819832e-05, + "loss": 17.1251, + "step": 17591 + }, + { + "epoch": 0.7332749781167938, + "grad_norm": 214.0, + "learning_rate": 1.7524902969846773e-05, + "loss": 12.0627, + "step": 17592 + }, + { + "epoch": 0.7333166604143221, + "grad_norm": 266.0, + "learning_rate": 1.751977082373527e-05, + "loss": 13.3134, + "step": 17593 + }, + { + "epoch": 0.7333583427118503, + "grad_norm": 358.0, + "learning_rate": 1.7514639269578796e-05, + "loss": 15.3127, + "step": 17594 + }, + { + "epoch": 0.7334000250093785, + "grad_norm": 186.0, + "learning_rate": 1.750950830747094e-05, + "loss": 10.3753, + "step": 17595 + }, + { + "epoch": 0.7334417073069067, + "grad_norm": 1280.0, + "learning_rate": 1.7504377937505145e-05, + "loss": 33.5001, + "step": 17596 + }, + { + "epoch": 0.733483389604435, + "grad_norm": 476.0, + "learning_rate": 1.7499248159774966e-05, + "loss": 15.3794, + "step": 17597 + }, + { + "epoch": 0.7335250719019633, + "grad_norm": 162.0, + "learning_rate": 1.7494118974373873e-05, + "loss": 10.5627, + "step": 17598 + }, + { + "epoch": 0.7335667541994915, + "grad_norm": 188.0, + "learning_rate": 1.748899038139535e-05, + "loss": 10.813, + "step": 17599 + }, + { + "epoch": 0.7336084364970197, + "grad_norm": 264.0, + "learning_rate": 1.7483862380932858e-05, + "loss": 13.3767, + "step": 17600 + }, + { + "epoch": 0.733650118794548, + "grad_norm": 976.0, + "learning_rate": 1.7478734973079865e-05, + "loss": 24.3752, + "step": 17601 + }, + { + "epoch": 0.7336918010920762, + "grad_norm": 332.0, + "learning_rate": 1.747360815792981e-05, + "loss": 14.3752, + "step": 17602 + }, + { + "epoch": 0.7337334833896044, + "grad_norm": 400.0, + "learning_rate": 1.7468481935576136e-05, + "loss": 15.2536, + "step": 17603 + }, + { + "epoch": 0.7337751656871326, + "grad_norm": 252.0, + "learning_rate": 1.7463356306112265e-05, + "loss": 13.063, + "step": 17604 + }, + { + "epoch": 0.733816847984661, + "grad_norm": 170.0, + "learning_rate": 1.7458231269631614e-05, + "loss": 11.8136, + "step": 17605 + }, + { + "epoch": 0.7338585302821892, + "grad_norm": 100.0, + "learning_rate": 1.745310682622759e-05, + "loss": 7.5628, + "step": 17606 + }, + { + "epoch": 0.7339002125797174, + "grad_norm": 320.0, + "learning_rate": 1.7447982975993575e-05, + "loss": 13.9378, + "step": 17607 + }, + { + "epoch": 0.7339418948772456, + "grad_norm": 247.0, + "learning_rate": 1.744285971902297e-05, + "loss": 12.8129, + "step": 17608 + }, + { + "epoch": 0.7339835771747739, + "grad_norm": 332.0, + "learning_rate": 1.743773705540913e-05, + "loss": 13.1883, + "step": 17609 + }, + { + "epoch": 0.7340252594723021, + "grad_norm": 740.0, + "learning_rate": 1.7432614985245425e-05, + "loss": 23.6264, + "step": 17610 + }, + { + "epoch": 0.7340669417698303, + "grad_norm": 108.0, + "learning_rate": 1.742749350862521e-05, + "loss": 8.938, + "step": 17611 + }, + { + "epoch": 0.7341086240673586, + "grad_norm": 476.0, + "learning_rate": 1.7422372625641792e-05, + "loss": 14.6879, + "step": 17612 + }, + { + "epoch": 0.7341503063648869, + "grad_norm": 344.0, + "learning_rate": 1.7417252336388563e-05, + "loss": 14.3759, + "step": 17613 + }, + { + "epoch": 0.7341919886624151, + "grad_norm": 197.0, + "learning_rate": 1.7412132640958765e-05, + "loss": 12.6255, + "step": 17614 + }, + { + "epoch": 0.7342336709599433, + "grad_norm": 676.0, + "learning_rate": 1.7407013539445777e-05, + "loss": 17.3793, + "step": 17615 + }, + { + "epoch": 0.7342753532574715, + "grad_norm": 198.0, + "learning_rate": 1.740189503194283e-05, + "loss": 12.1878, + "step": 17616 + }, + { + "epoch": 0.7343170355549998, + "grad_norm": 904.0, + "learning_rate": 1.7396777118543274e-05, + "loss": 22.0046, + "step": 17617 + }, + { + "epoch": 0.734358717852528, + "grad_norm": 176.0, + "learning_rate": 1.739165979934031e-05, + "loss": 10.8751, + "step": 17618 + }, + { + "epoch": 0.7344004001500563, + "grad_norm": 151.0, + "learning_rate": 1.7386543074427282e-05, + "loss": 9.8756, + "step": 17619 + }, + { + "epoch": 0.7344420824475845, + "grad_norm": 856.0, + "learning_rate": 1.7381426943897362e-05, + "loss": 23.0004, + "step": 17620 + }, + { + "epoch": 0.7344837647451128, + "grad_norm": 308.0, + "learning_rate": 1.737631140784385e-05, + "loss": 12.8755, + "step": 17621 + }, + { + "epoch": 0.734525447042641, + "grad_norm": 207.0, + "learning_rate": 1.7371196466359955e-05, + "loss": 11.5676, + "step": 17622 + }, + { + "epoch": 0.7345671293401692, + "grad_norm": 500.0, + "learning_rate": 1.7366082119538907e-05, + "loss": 17.3751, + "step": 17623 + }, + { + "epoch": 0.7346088116376974, + "grad_norm": 398.0, + "learning_rate": 1.7360968367473905e-05, + "loss": 14.0003, + "step": 17624 + }, + { + "epoch": 0.7346504939352257, + "grad_norm": 888.0, + "learning_rate": 1.7355855210258153e-05, + "loss": 22.2535, + "step": 17625 + }, + { + "epoch": 0.734692176232754, + "grad_norm": 86.0, + "learning_rate": 1.735074264798484e-05, + "loss": 9.1253, + "step": 17626 + }, + { + "epoch": 0.7347338585302822, + "grad_norm": 160.0, + "learning_rate": 1.734563068074714e-05, + "loss": 10.3751, + "step": 17627 + }, + { + "epoch": 0.7347755408278104, + "grad_norm": 900.0, + "learning_rate": 1.734051930863822e-05, + "loss": 21.8755, + "step": 17628 + }, + { + "epoch": 0.7348172231253387, + "grad_norm": 156.0, + "learning_rate": 1.7335408531751233e-05, + "loss": 9.9377, + "step": 17629 + }, + { + "epoch": 0.7348589054228669, + "grad_norm": 258.0, + "learning_rate": 1.7330298350179325e-05, + "loss": 12.2545, + "step": 17630 + }, + { + "epoch": 0.7349005877203951, + "grad_norm": 438.0, + "learning_rate": 1.7325188764015633e-05, + "loss": 16.2505, + "step": 17631 + }, + { + "epoch": 0.7349422700179233, + "grad_norm": 414.0, + "learning_rate": 1.7320079773353274e-05, + "loss": 14.9409, + "step": 17632 + }, + { + "epoch": 0.7349839523154517, + "grad_norm": 154.0, + "learning_rate": 1.731497137828536e-05, + "loss": 8.7502, + "step": 17633 + }, + { + "epoch": 0.7350256346129799, + "grad_norm": 111.5, + "learning_rate": 1.7309863578905e-05, + "loss": 9.6254, + "step": 17634 + }, + { + "epoch": 0.7350673169105081, + "grad_norm": 338.0, + "learning_rate": 1.7304756375305276e-05, + "loss": 15.1877, + "step": 17635 + }, + { + "epoch": 0.7351089992080363, + "grad_norm": 179.0, + "learning_rate": 1.729964976757925e-05, + "loss": 11.6887, + "step": 17636 + }, + { + "epoch": 0.7351506815055646, + "grad_norm": 488.0, + "learning_rate": 1.7294543755820048e-05, + "loss": 18.251, + "step": 17637 + }, + { + "epoch": 0.7351923638030928, + "grad_norm": 540.0, + "learning_rate": 1.728943834012065e-05, + "loss": 18.1253, + "step": 17638 + }, + { + "epoch": 0.735234046100621, + "grad_norm": 372.0, + "learning_rate": 1.728433352057418e-05, + "loss": 16.5001, + "step": 17639 + }, + { + "epoch": 0.7352757283981493, + "grad_norm": 1264.0, + "learning_rate": 1.7279229297273597e-05, + "loss": 30.8753, + "step": 17640 + }, + { + "epoch": 0.7353174106956776, + "grad_norm": 206.0, + "learning_rate": 1.7274125670312e-05, + "loss": 11.8127, + "step": 17641 + }, + { + "epoch": 0.7353590929932058, + "grad_norm": 468.0, + "learning_rate": 1.726902263978234e-05, + "loss": 16.3752, + "step": 17642 + }, + { + "epoch": 0.735400775290734, + "grad_norm": 560.0, + "learning_rate": 1.726392020577768e-05, + "loss": 18.7505, + "step": 17643 + }, + { + "epoch": 0.7354424575882622, + "grad_norm": 470.0, + "learning_rate": 1.7258818368390943e-05, + "loss": 15.5627, + "step": 17644 + }, + { + "epoch": 0.7354841398857905, + "grad_norm": 260.0, + "learning_rate": 1.7253717127715184e-05, + "loss": 11.3128, + "step": 17645 + }, + { + "epoch": 0.7355258221833187, + "grad_norm": 524.0, + "learning_rate": 1.72486164838433e-05, + "loss": 18.2502, + "step": 17646 + }, + { + "epoch": 0.735567504480847, + "grad_norm": 318.0, + "learning_rate": 1.7243516436868314e-05, + "loss": 13.6255, + "step": 17647 + }, + { + "epoch": 0.7356091867783753, + "grad_norm": 176.0, + "learning_rate": 1.7238416986883136e-05, + "loss": 11.3127, + "step": 17648 + }, + { + "epoch": 0.7356508690759035, + "grad_norm": 316.0, + "learning_rate": 1.7233318133980726e-05, + "loss": 13.5002, + "step": 17649 + }, + { + "epoch": 0.7356925513734317, + "grad_norm": 438.0, + "learning_rate": 1.7228219878254e-05, + "loss": 14.688, + "step": 17650 + }, + { + "epoch": 0.7357342336709599, + "grad_norm": 552.0, + "learning_rate": 1.7223122219795873e-05, + "loss": 18.5003, + "step": 17651 + }, + { + "epoch": 0.7357759159684882, + "grad_norm": 380.0, + "learning_rate": 1.7218025158699258e-05, + "loss": 14.5012, + "step": 17652 + }, + { + "epoch": 0.7358175982660164, + "grad_norm": 189.0, + "learning_rate": 1.721292869505704e-05, + "loss": 11.6878, + "step": 17653 + }, + { + "epoch": 0.7358592805635447, + "grad_norm": 458.0, + "learning_rate": 1.7207832828962105e-05, + "loss": 16.8752, + "step": 17654 + }, + { + "epoch": 0.7359009628610729, + "grad_norm": 656.0, + "learning_rate": 1.7202737560507338e-05, + "loss": 17.2541, + "step": 17655 + }, + { + "epoch": 0.7359426451586012, + "grad_norm": 192.0, + "learning_rate": 1.719764288978558e-05, + "loss": 10.6252, + "step": 17656 + }, + { + "epoch": 0.7359843274561294, + "grad_norm": 432.0, + "learning_rate": 1.7192548816889697e-05, + "loss": 14.6253, + "step": 17657 + }, + { + "epoch": 0.7360260097536576, + "grad_norm": 158.0, + "learning_rate": 1.718745534191252e-05, + "loss": 10.4378, + "step": 17658 + }, + { + "epoch": 0.7360676920511858, + "grad_norm": 656.0, + "learning_rate": 1.7182362464946887e-05, + "loss": 20.628, + "step": 17659 + }, + { + "epoch": 0.7361093743487142, + "grad_norm": 1248.0, + "learning_rate": 1.7177270186085614e-05, + "loss": 26.7535, + "step": 17660 + }, + { + "epoch": 0.7361510566462424, + "grad_norm": 1472.0, + "learning_rate": 1.7172178505421498e-05, + "loss": 28.5049, + "step": 17661 + }, + { + "epoch": 0.7361927389437706, + "grad_norm": 428.0, + "learning_rate": 1.7167087423047334e-05, + "loss": 16.3753, + "step": 17662 + }, + { + "epoch": 0.7362344212412988, + "grad_norm": 1440.0, + "learning_rate": 1.7161996939055947e-05, + "loss": 33.0019, + "step": 17663 + }, + { + "epoch": 0.7362761035388271, + "grad_norm": 296.0, + "learning_rate": 1.7156907053540045e-05, + "loss": 14.8128, + "step": 17664 + }, + { + "epoch": 0.7363177858363553, + "grad_norm": 366.0, + "learning_rate": 1.7151817766592458e-05, + "loss": 15.1253, + "step": 17665 + }, + { + "epoch": 0.7363594681338835, + "grad_norm": 276.0, + "learning_rate": 1.7146729078305884e-05, + "loss": 12.4378, + "step": 17666 + }, + { + "epoch": 0.7364011504314117, + "grad_norm": 88.5, + "learning_rate": 1.7141640988773118e-05, + "loss": 9.6881, + "step": 17667 + }, + { + "epoch": 0.7364428327289401, + "grad_norm": 123.0, + "learning_rate": 1.7136553498086828e-05, + "loss": 8.1879, + "step": 17668 + }, + { + "epoch": 0.7364845150264683, + "grad_norm": 208.0, + "learning_rate": 1.7131466606339798e-05, + "loss": 7.0006, + "step": 17669 + }, + { + "epoch": 0.7365261973239965, + "grad_norm": 532.0, + "learning_rate": 1.712638031362468e-05, + "loss": 18.1253, + "step": 17670 + }, + { + "epoch": 0.7365678796215247, + "grad_norm": 644.0, + "learning_rate": 1.712129462003421e-05, + "loss": 20.3755, + "step": 17671 + }, + { + "epoch": 0.736609561919053, + "grad_norm": 416.0, + "learning_rate": 1.711620952566107e-05, + "loss": 15.6882, + "step": 17672 + }, + { + "epoch": 0.7366512442165812, + "grad_norm": 360.0, + "learning_rate": 1.7111125030597925e-05, + "loss": 14.1876, + "step": 17673 + }, + { + "epoch": 0.7366929265141094, + "grad_norm": 426.0, + "learning_rate": 1.7106041134937452e-05, + "loss": 15.9386, + "step": 17674 + }, + { + "epoch": 0.7367346088116377, + "grad_norm": 1328.0, + "learning_rate": 1.7100957838772292e-05, + "loss": 25.6306, + "step": 17675 + }, + { + "epoch": 0.736776291109166, + "grad_norm": 382.0, + "learning_rate": 1.70958751421951e-05, + "loss": 14.0002, + "step": 17676 + }, + { + "epoch": 0.7368179734066942, + "grad_norm": 446.0, + "learning_rate": 1.7090793045298502e-05, + "loss": 15.6251, + "step": 17677 + }, + { + "epoch": 0.7368596557042224, + "grad_norm": 284.0, + "learning_rate": 1.7085711548175127e-05, + "loss": 14.0002, + "step": 17678 + }, + { + "epoch": 0.7369013380017506, + "grad_norm": 1176.0, + "learning_rate": 1.708063065091758e-05, + "loss": 28.8753, + "step": 17679 + }, + { + "epoch": 0.7369430202992789, + "grad_norm": 268.0, + "learning_rate": 1.7075550353618457e-05, + "loss": 13.3129, + "step": 17680 + }, + { + "epoch": 0.7369847025968072, + "grad_norm": 183.0, + "learning_rate": 1.7070470656370357e-05, + "loss": 12.0639, + "step": 17681 + }, + { + "epoch": 0.7370263848943354, + "grad_norm": 284.0, + "learning_rate": 1.7065391559265846e-05, + "loss": 10.5003, + "step": 17682 + }, + { + "epoch": 0.7370680671918636, + "grad_norm": 516.0, + "learning_rate": 1.7060313062397504e-05, + "loss": 18.3752, + "step": 17683 + }, + { + "epoch": 0.7371097494893919, + "grad_norm": 264.0, + "learning_rate": 1.705523516585788e-05, + "loss": 13.8755, + "step": 17684 + }, + { + "epoch": 0.7371514317869201, + "grad_norm": 1064.0, + "learning_rate": 1.705015786973952e-05, + "loss": 24.6323, + "step": 17685 + }, + { + "epoch": 0.7371931140844483, + "grad_norm": 434.0, + "learning_rate": 1.7045081174134936e-05, + "loss": 14.3752, + "step": 17686 + }, + { + "epoch": 0.7372347963819765, + "grad_norm": 482.0, + "learning_rate": 1.7040005079136713e-05, + "loss": 17.2504, + "step": 17687 + }, + { + "epoch": 0.7372764786795049, + "grad_norm": 188.0, + "learning_rate": 1.7034929584837284e-05, + "loss": 11.6255, + "step": 17688 + }, + { + "epoch": 0.7373181609770331, + "grad_norm": 288.0, + "learning_rate": 1.702985469132922e-05, + "loss": 12.0005, + "step": 17689 + }, + { + "epoch": 0.7373598432745613, + "grad_norm": 194.0, + "learning_rate": 1.702478039870496e-05, + "loss": 11.5628, + "step": 17690 + }, + { + "epoch": 0.7374015255720895, + "grad_norm": 318.0, + "learning_rate": 1.701970670705703e-05, + "loss": 14.7506, + "step": 17691 + }, + { + "epoch": 0.7374432078696178, + "grad_norm": 1512.0, + "learning_rate": 1.701463361647784e-05, + "loss": 29.5041, + "step": 17692 + }, + { + "epoch": 0.737484890167146, + "grad_norm": 134.0, + "learning_rate": 1.7009561127059913e-05, + "loss": 5.9693, + "step": 17693 + }, + { + "epoch": 0.7375265724646742, + "grad_norm": 736.0, + "learning_rate": 1.7004489238895634e-05, + "loss": 20.7506, + "step": 17694 + }, + { + "epoch": 0.7375682547622024, + "grad_norm": 458.0, + "learning_rate": 1.69994179520775e-05, + "loss": 15.4384, + "step": 17695 + }, + { + "epoch": 0.7376099370597308, + "grad_norm": 392.0, + "learning_rate": 1.6994347266697867e-05, + "loss": 11.6887, + "step": 17696 + }, + { + "epoch": 0.737651619357259, + "grad_norm": 536.0, + "learning_rate": 1.6989277182849206e-05, + "loss": 16.8756, + "step": 17697 + }, + { + "epoch": 0.7376933016547872, + "grad_norm": 448.0, + "learning_rate": 1.6984207700623893e-05, + "loss": 15.7504, + "step": 17698 + }, + { + "epoch": 0.7377349839523154, + "grad_norm": 248.0, + "learning_rate": 1.697913882011433e-05, + "loss": 12.8763, + "step": 17699 + }, + { + "epoch": 0.7377766662498437, + "grad_norm": 672.0, + "learning_rate": 1.697407054141289e-05, + "loss": 21.3752, + "step": 17700 + }, + { + "epoch": 0.7378183485473719, + "grad_norm": 596.0, + "learning_rate": 1.696900286461195e-05, + "loss": 17.7546, + "step": 17701 + }, + { + "epoch": 0.7378600308449001, + "grad_norm": 716.0, + "learning_rate": 1.6963935789803865e-05, + "loss": 18.2504, + "step": 17702 + }, + { + "epoch": 0.7379017131424284, + "grad_norm": 1056.0, + "learning_rate": 1.6958869317080983e-05, + "loss": 25.6254, + "step": 17703 + }, + { + "epoch": 0.7379433954399567, + "grad_norm": 136.0, + "learning_rate": 1.695380344653564e-05, + "loss": 10.0629, + "step": 17704 + }, + { + "epoch": 0.7379850777374849, + "grad_norm": 166.0, + "learning_rate": 1.694873817826016e-05, + "loss": 9.4378, + "step": 17705 + }, + { + "epoch": 0.7380267600350131, + "grad_norm": 207.0, + "learning_rate": 1.6943673512346863e-05, + "loss": 11.3127, + "step": 17706 + }, + { + "epoch": 0.7380684423325413, + "grad_norm": 428.0, + "learning_rate": 1.693860944888805e-05, + "loss": 14.5003, + "step": 17707 + }, + { + "epoch": 0.7381101246300696, + "grad_norm": 728.0, + "learning_rate": 1.6933545987976014e-05, + "loss": 22.8756, + "step": 17708 + }, + { + "epoch": 0.7381518069275979, + "grad_norm": 704.0, + "learning_rate": 1.6928483129703037e-05, + "loss": 20.0002, + "step": 17709 + }, + { + "epoch": 0.7381934892251261, + "grad_norm": 684.0, + "learning_rate": 1.6923420874161393e-05, + "loss": 20.7518, + "step": 17710 + }, + { + "epoch": 0.7382351715226543, + "grad_norm": 166.0, + "learning_rate": 1.6918359221443342e-05, + "loss": 7.6877, + "step": 17711 + }, + { + "epoch": 0.7382768538201826, + "grad_norm": 241.0, + "learning_rate": 1.691329817164111e-05, + "loss": 12.0633, + "step": 17712 + }, + { + "epoch": 0.7383185361177108, + "grad_norm": 84.5, + "learning_rate": 1.6908237724846994e-05, + "loss": 9.6258, + "step": 17713 + }, + { + "epoch": 0.738360218415239, + "grad_norm": 292.0, + "learning_rate": 1.6903177881153143e-05, + "loss": 12.3757, + "step": 17714 + }, + { + "epoch": 0.7384019007127672, + "grad_norm": 460.0, + "learning_rate": 1.6898118640651854e-05, + "loss": 15.876, + "step": 17715 + }, + { + "epoch": 0.7384435830102956, + "grad_norm": 132.0, + "learning_rate": 1.689306000343525e-05, + "loss": 10.0628, + "step": 17716 + }, + { + "epoch": 0.7384852653078238, + "grad_norm": 310.0, + "learning_rate": 1.6888001969595607e-05, + "loss": 13.6881, + "step": 17717 + }, + { + "epoch": 0.738526947605352, + "grad_norm": 1912.0, + "learning_rate": 1.688294453922503e-05, + "loss": 37.5003, + "step": 17718 + }, + { + "epoch": 0.7385686299028802, + "grad_norm": 168.0, + "learning_rate": 1.6877887712415764e-05, + "loss": 10.3755, + "step": 17719 + }, + { + "epoch": 0.7386103122004085, + "grad_norm": 196.0, + "learning_rate": 1.6872831489259905e-05, + "loss": 10.1252, + "step": 17720 + }, + { + "epoch": 0.7386519944979367, + "grad_norm": 360.0, + "learning_rate": 1.6867775869849646e-05, + "loss": 14.4386, + "step": 17721 + }, + { + "epoch": 0.7386936767954649, + "grad_norm": 166.0, + "learning_rate": 1.686272085427711e-05, + "loss": 10.6253, + "step": 17722 + }, + { + "epoch": 0.7387353590929933, + "grad_norm": 532.0, + "learning_rate": 1.6857666442634427e-05, + "loss": 17.5004, + "step": 17723 + }, + { + "epoch": 0.7387770413905215, + "grad_norm": 94.5, + "learning_rate": 1.685261263501372e-05, + "loss": 9.2511, + "step": 17724 + }, + { + "epoch": 0.7388187236880497, + "grad_norm": 1680.0, + "learning_rate": 1.6847559431507093e-05, + "loss": 31.3773, + "step": 17725 + }, + { + "epoch": 0.7388604059855779, + "grad_norm": 304.0, + "learning_rate": 1.6842506832206635e-05, + "loss": 12.6877, + "step": 17726 + }, + { + "epoch": 0.7389020882831062, + "grad_norm": 58.5, + "learning_rate": 1.683745483720443e-05, + "loss": 8.0627, + "step": 17727 + }, + { + "epoch": 0.7389437705806344, + "grad_norm": 110.0, + "learning_rate": 1.6832403446592558e-05, + "loss": 9.8133, + "step": 17728 + }, + { + "epoch": 0.7389854528781626, + "grad_norm": 476.0, + "learning_rate": 1.6827352660463074e-05, + "loss": 16.8751, + "step": 17729 + }, + { + "epoch": 0.7390271351756909, + "grad_norm": 876.0, + "learning_rate": 1.6822302478908036e-05, + "loss": 23.1291, + "step": 17730 + }, + { + "epoch": 0.7390688174732192, + "grad_norm": 210.0, + "learning_rate": 1.681725290201948e-05, + "loss": 12.1877, + "step": 17731 + }, + { + "epoch": 0.7391104997707474, + "grad_norm": 360.0, + "learning_rate": 1.6812203929889435e-05, + "loss": 14.4379, + "step": 17732 + }, + { + "epoch": 0.7391521820682756, + "grad_norm": 81.0, + "learning_rate": 1.6807155562609923e-05, + "loss": 8.0627, + "step": 17733 + }, + { + "epoch": 0.7391938643658038, + "grad_norm": 516.0, + "learning_rate": 1.680210780027295e-05, + "loss": 17.7516, + "step": 17734 + }, + { + "epoch": 0.7392355466633321, + "grad_norm": 488.0, + "learning_rate": 1.6797060642970502e-05, + "loss": 17.8751, + "step": 17735 + }, + { + "epoch": 0.7392772289608603, + "grad_norm": 120.5, + "learning_rate": 1.6792014090794572e-05, + "loss": 8.188, + "step": 17736 + }, + { + "epoch": 0.7393189112583886, + "grad_norm": 504.0, + "learning_rate": 1.6786968143837134e-05, + "loss": 17.2502, + "step": 17737 + }, + { + "epoch": 0.7393605935559168, + "grad_norm": 282.0, + "learning_rate": 1.6781922802190135e-05, + "loss": 13.1879, + "step": 17738 + }, + { + "epoch": 0.7394022758534451, + "grad_norm": 274.0, + "learning_rate": 1.6776878065945572e-05, + "loss": 12.5628, + "step": 17739 + }, + { + "epoch": 0.7394439581509733, + "grad_norm": 494.0, + "learning_rate": 1.6771833935195326e-05, + "loss": 16.7508, + "step": 17740 + }, + { + "epoch": 0.7394856404485015, + "grad_norm": 183.0, + "learning_rate": 1.6766790410031387e-05, + "loss": 6.4084, + "step": 17741 + }, + { + "epoch": 0.7395273227460297, + "grad_norm": 156.0, + "learning_rate": 1.676174749054561e-05, + "loss": 9.188, + "step": 17742 + }, + { + "epoch": 0.739569005043558, + "grad_norm": 444.0, + "learning_rate": 1.6756705176829975e-05, + "loss": 14.8137, + "step": 17743 + }, + { + "epoch": 0.7396106873410863, + "grad_norm": 812.0, + "learning_rate": 1.6751663468976298e-05, + "loss": 24.8751, + "step": 17744 + }, + { + "epoch": 0.7396523696386145, + "grad_norm": 318.0, + "learning_rate": 1.674662236707654e-05, + "loss": 13.5626, + "step": 17745 + }, + { + "epoch": 0.7396940519361427, + "grad_norm": 217.0, + "learning_rate": 1.674158187122251e-05, + "loss": 11.0002, + "step": 17746 + }, + { + "epoch": 0.739735734233671, + "grad_norm": 292.0, + "learning_rate": 1.673654198150612e-05, + "loss": 13.188, + "step": 17747 + }, + { + "epoch": 0.7397774165311992, + "grad_norm": 354.0, + "learning_rate": 1.6731502698019204e-05, + "loss": 14.7501, + "step": 17748 + }, + { + "epoch": 0.7398190988287274, + "grad_norm": 306.0, + "learning_rate": 1.6726464020853604e-05, + "loss": 12.9394, + "step": 17749 + }, + { + "epoch": 0.7398607811262556, + "grad_norm": 1320.0, + "learning_rate": 1.6721425950101154e-05, + "loss": 24.7547, + "step": 17750 + }, + { + "epoch": 0.739902463423784, + "grad_norm": 474.0, + "learning_rate": 1.671638848585367e-05, + "loss": 17.0003, + "step": 17751 + }, + { + "epoch": 0.7399441457213122, + "grad_norm": 390.0, + "learning_rate": 1.6711351628202954e-05, + "loss": 15.9379, + "step": 17752 + }, + { + "epoch": 0.7399858280188404, + "grad_norm": 320.0, + "learning_rate": 1.6706315377240818e-05, + "loss": 12.1882, + "step": 17753 + }, + { + "epoch": 0.7400275103163686, + "grad_norm": 314.0, + "learning_rate": 1.6701279733059035e-05, + "loss": 13.3128, + "step": 17754 + }, + { + "epoch": 0.7400691926138969, + "grad_norm": 314.0, + "learning_rate": 1.6696244695749385e-05, + "loss": 13.0003, + "step": 17755 + }, + { + "epoch": 0.7401108749114251, + "grad_norm": 820.0, + "learning_rate": 1.6691210265403633e-05, + "loss": 21.8752, + "step": 17756 + }, + { + "epoch": 0.7401525572089533, + "grad_norm": 77.5, + "learning_rate": 1.6686176442113533e-05, + "loss": 9.4377, + "step": 17757 + }, + { + "epoch": 0.7401942395064816, + "grad_norm": 800.0, + "learning_rate": 1.6681143225970826e-05, + "loss": 21.5002, + "step": 17758 + }, + { + "epoch": 0.7402359218040099, + "grad_norm": 644.0, + "learning_rate": 1.6676110617067238e-05, + "loss": 20.6256, + "step": 17759 + }, + { + "epoch": 0.7402776041015381, + "grad_norm": 306.0, + "learning_rate": 1.667107861549449e-05, + "loss": 14.0627, + "step": 17760 + }, + { + "epoch": 0.7403192863990663, + "grad_norm": 107.5, + "learning_rate": 1.6666047221344293e-05, + "loss": 9.8127, + "step": 17761 + }, + { + "epoch": 0.7403609686965945, + "grad_norm": 238.0, + "learning_rate": 1.6661016434708332e-05, + "loss": 8.0629, + "step": 17762 + }, + { + "epoch": 0.7404026509941228, + "grad_norm": 135.0, + "learning_rate": 1.6655986255678335e-05, + "loss": 9.3751, + "step": 17763 + }, + { + "epoch": 0.740444333291651, + "grad_norm": 498.0, + "learning_rate": 1.665095668434592e-05, + "loss": 17.1253, + "step": 17764 + }, + { + "epoch": 0.7404860155891793, + "grad_norm": 256.0, + "learning_rate": 1.6645927720802812e-05, + "loss": 12.7505, + "step": 17765 + }, + { + "epoch": 0.7405276978867075, + "grad_norm": 147.0, + "learning_rate": 1.66408993651406e-05, + "loss": 10.3127, + "step": 17766 + }, + { + "epoch": 0.7405693801842358, + "grad_norm": 454.0, + "learning_rate": 1.6635871617450988e-05, + "loss": 15.5628, + "step": 17767 + }, + { + "epoch": 0.740611062481764, + "grad_norm": 438.0, + "learning_rate": 1.6630844477825547e-05, + "loss": 18.126, + "step": 17768 + }, + { + "epoch": 0.7406527447792922, + "grad_norm": 233.0, + "learning_rate": 1.662581794635596e-05, + "loss": 11.4378, + "step": 17769 + }, + { + "epoch": 0.7406944270768204, + "grad_norm": 556.0, + "learning_rate": 1.6620792023133767e-05, + "loss": 19.1252, + "step": 17770 + }, + { + "epoch": 0.7407361093743488, + "grad_norm": 224.0, + "learning_rate": 1.6615766708250642e-05, + "loss": 10.063, + "step": 17771 + }, + { + "epoch": 0.740777791671877, + "grad_norm": 306.0, + "learning_rate": 1.6610742001798097e-05, + "loss": 14.188, + "step": 17772 + }, + { + "epoch": 0.7408194739694052, + "grad_norm": 105.0, + "learning_rate": 1.6605717903867756e-05, + "loss": 7.7505, + "step": 17773 + }, + { + "epoch": 0.7408611562669334, + "grad_norm": 125.0, + "learning_rate": 1.6600694414551177e-05, + "loss": 8.3128, + "step": 17774 + }, + { + "epoch": 0.7409028385644617, + "grad_norm": 182.0, + "learning_rate": 1.65956715339399e-05, + "loss": 8.0002, + "step": 17775 + }, + { + "epoch": 0.7409445208619899, + "grad_norm": 470.0, + "learning_rate": 1.659064926212548e-05, + "loss": 16.8759, + "step": 17776 + }, + { + "epoch": 0.7409862031595181, + "grad_norm": 240.0, + "learning_rate": 1.6585627599199445e-05, + "loss": 12.5635, + "step": 17777 + }, + { + "epoch": 0.7410278854570463, + "grad_norm": 410.0, + "learning_rate": 1.6580606545253307e-05, + "loss": 15.8752, + "step": 17778 + }, + { + "epoch": 0.7410695677545747, + "grad_norm": 274.0, + "learning_rate": 1.657558610037858e-05, + "loss": 11.3148, + "step": 17779 + }, + { + "epoch": 0.7411112500521029, + "grad_norm": 430.0, + "learning_rate": 1.657056626466677e-05, + "loss": 15.8127, + "step": 17780 + }, + { + "epoch": 0.7411529323496311, + "grad_norm": 157.0, + "learning_rate": 1.6565547038209356e-05, + "loss": 10.0004, + "step": 17781 + }, + { + "epoch": 0.7411946146471593, + "grad_norm": 408.0, + "learning_rate": 1.6560528421097813e-05, + "loss": 15.626, + "step": 17782 + }, + { + "epoch": 0.7412362969446876, + "grad_norm": 100.5, + "learning_rate": 1.6555510413423614e-05, + "loss": 9.6258, + "step": 17783 + }, + { + "epoch": 0.7412779792422158, + "grad_norm": 390.0, + "learning_rate": 1.6550493015278202e-05, + "loss": 14.3127, + "step": 17784 + }, + { + "epoch": 0.741319661539744, + "grad_norm": 338.0, + "learning_rate": 1.654547622675302e-05, + "loss": 15.5627, + "step": 17785 + }, + { + "epoch": 0.7413613438372723, + "grad_norm": 105.0, + "learning_rate": 1.654046004793951e-05, + "loss": 9.1892, + "step": 17786 + }, + { + "epoch": 0.7414030261348006, + "grad_norm": 1464.0, + "learning_rate": 1.6535444478929086e-05, + "loss": 28.6306, + "step": 17787 + }, + { + "epoch": 0.7414447084323288, + "grad_norm": 170.0, + "learning_rate": 1.6530429519813135e-05, + "loss": 8.1878, + "step": 17788 + }, + { + "epoch": 0.741486390729857, + "grad_norm": 208.0, + "learning_rate": 1.6525415170683116e-05, + "loss": 11.7579, + "step": 17789 + }, + { + "epoch": 0.7415280730273852, + "grad_norm": 520.0, + "learning_rate": 1.652040143163034e-05, + "loss": 17.6251, + "step": 17790 + }, + { + "epoch": 0.7415697553249135, + "grad_norm": 506.0, + "learning_rate": 1.6515388302746253e-05, + "loss": 16.3753, + "step": 17791 + }, + { + "epoch": 0.7416114376224417, + "grad_norm": 350.0, + "learning_rate": 1.6510375784122155e-05, + "loss": 13.6878, + "step": 17792 + }, + { + "epoch": 0.74165311991997, + "grad_norm": 512.0, + "learning_rate": 1.6505363875849467e-05, + "loss": 16.3757, + "step": 17793 + }, + { + "epoch": 0.7416948022174983, + "grad_norm": 440.0, + "learning_rate": 1.650035257801946e-05, + "loss": 16.1252, + "step": 17794 + }, + { + "epoch": 0.7417364845150265, + "grad_norm": 764.0, + "learning_rate": 1.6495341890723537e-05, + "loss": 22.2503, + "step": 17795 + }, + { + "epoch": 0.7417781668125547, + "grad_norm": 192.0, + "learning_rate": 1.6490331814052945e-05, + "loss": 6.7821, + "step": 17796 + }, + { + "epoch": 0.7418198491100829, + "grad_norm": 588.0, + "learning_rate": 1.6485322348099052e-05, + "loss": 18.5002, + "step": 17797 + }, + { + "epoch": 0.7418615314076112, + "grad_norm": 171.0, + "learning_rate": 1.6480313492953132e-05, + "loss": 9.8128, + "step": 17798 + }, + { + "epoch": 0.7419032137051395, + "grad_norm": 824.0, + "learning_rate": 1.6475305248706473e-05, + "loss": 22.8763, + "step": 17799 + }, + { + "epoch": 0.7419448960026677, + "grad_norm": 1624.0, + "learning_rate": 1.6470297615450354e-05, + "loss": 34.7507, + "step": 17800 + }, + { + "epoch": 0.7419865783001959, + "grad_norm": 320.0, + "learning_rate": 1.6465290593276034e-05, + "loss": 14.6881, + "step": 17801 + }, + { + "epoch": 0.7420282605977242, + "grad_norm": 167.0, + "learning_rate": 1.646028418227477e-05, + "loss": 10.0004, + "step": 17802 + }, + { + "epoch": 0.7420699428952524, + "grad_norm": 984.0, + "learning_rate": 1.645527838253781e-05, + "loss": 21.506, + "step": 17803 + }, + { + "epoch": 0.7421116251927806, + "grad_norm": 96.0, + "learning_rate": 1.6450273194156374e-05, + "loss": 9.3753, + "step": 17804 + }, + { + "epoch": 0.7421533074903088, + "grad_norm": 332.0, + "learning_rate": 1.6445268617221686e-05, + "loss": 12.5631, + "step": 17805 + }, + { + "epoch": 0.7421949897878372, + "grad_norm": 284.0, + "learning_rate": 1.644026465182496e-05, + "loss": 11.0627, + "step": 17806 + }, + { + "epoch": 0.7422366720853654, + "grad_norm": 87.0, + "learning_rate": 1.643526129805739e-05, + "loss": 8.8753, + "step": 17807 + }, + { + "epoch": 0.7422783543828936, + "grad_norm": 94.5, + "learning_rate": 1.6430258556010163e-05, + "loss": 8.1254, + "step": 17808 + }, + { + "epoch": 0.7423200366804218, + "grad_norm": 246.0, + "learning_rate": 1.642525642577445e-05, + "loss": 11.1879, + "step": 17809 + }, + { + "epoch": 0.7423617189779501, + "grad_norm": 312.0, + "learning_rate": 1.6420254907441423e-05, + "loss": 14.1253, + "step": 17810 + }, + { + "epoch": 0.7424034012754783, + "grad_norm": 121.0, + "learning_rate": 1.6415254001102233e-05, + "loss": 11.0005, + "step": 17811 + }, + { + "epoch": 0.7424450835730065, + "grad_norm": 764.0, + "learning_rate": 1.6410253706847994e-05, + "loss": 21.8758, + "step": 17812 + }, + { + "epoch": 0.7424867658705347, + "grad_norm": 209.0, + "learning_rate": 1.6405254024769908e-05, + "loss": 12.6881, + "step": 17813 + }, + { + "epoch": 0.7425284481680631, + "grad_norm": 406.0, + "learning_rate": 1.640025495495901e-05, + "loss": 16.6253, + "step": 17814 + }, + { + "epoch": 0.7425701304655913, + "grad_norm": 632.0, + "learning_rate": 1.6395256497506477e-05, + "loss": 20.6252, + "step": 17815 + }, + { + "epoch": 0.7426118127631195, + "grad_norm": 458.0, + "learning_rate": 1.6390258652503354e-05, + "loss": 17.2502, + "step": 17816 + }, + { + "epoch": 0.7426534950606477, + "grad_norm": 456.0, + "learning_rate": 1.6385261420040776e-05, + "loss": 16.2502, + "step": 17817 + }, + { + "epoch": 0.742695177358176, + "grad_norm": 520.0, + "learning_rate": 1.6380264800209767e-05, + "loss": 17.3763, + "step": 17818 + }, + { + "epoch": 0.7427368596557042, + "grad_norm": 572.0, + "learning_rate": 1.6375268793101446e-05, + "loss": 18.6252, + "step": 17819 + }, + { + "epoch": 0.7427785419532325, + "grad_norm": 232.0, + "learning_rate": 1.6370273398806802e-05, + "loss": 11.6253, + "step": 17820 + }, + { + "epoch": 0.7428202242507607, + "grad_norm": 456.0, + "learning_rate": 1.6365278617416946e-05, + "loss": 16.7503, + "step": 17821 + }, + { + "epoch": 0.742861906548289, + "grad_norm": 388.0, + "learning_rate": 1.6360284449022837e-05, + "loss": 14.6293, + "step": 17822 + }, + { + "epoch": 0.7429035888458172, + "grad_norm": 89.5, + "learning_rate": 1.6355290893715547e-05, + "loss": 9.1255, + "step": 17823 + }, + { + "epoch": 0.7429452711433454, + "grad_norm": 396.0, + "learning_rate": 1.635029795158607e-05, + "loss": 16.0003, + "step": 17824 + }, + { + "epoch": 0.7429869534408736, + "grad_norm": 392.0, + "learning_rate": 1.63453056227254e-05, + "loss": 15.6252, + "step": 17825 + }, + { + "epoch": 0.7430286357384019, + "grad_norm": 564.0, + "learning_rate": 1.634031390722452e-05, + "loss": 18.1251, + "step": 17826 + }, + { + "epoch": 0.7430703180359302, + "grad_norm": 278.0, + "learning_rate": 1.6335322805174408e-05, + "loss": 8.9377, + "step": 17827 + }, + { + "epoch": 0.7431120003334584, + "grad_norm": 516.0, + "learning_rate": 1.6330332316666025e-05, + "loss": 18.2523, + "step": 17828 + }, + { + "epoch": 0.7431536826309866, + "grad_norm": 932.0, + "learning_rate": 1.6325342441790324e-05, + "loss": 27.7524, + "step": 17829 + }, + { + "epoch": 0.7431953649285149, + "grad_norm": 119.0, + "learning_rate": 1.6320353180638255e-05, + "loss": 9.3133, + "step": 17830 + }, + { + "epoch": 0.7432370472260431, + "grad_norm": 352.0, + "learning_rate": 1.631536453330073e-05, + "loss": 13.8758, + "step": 17831 + }, + { + "epoch": 0.7432787295235713, + "grad_norm": 241.0, + "learning_rate": 1.631037649986868e-05, + "loss": 12.1877, + "step": 17832 + }, + { + "epoch": 0.7433204118210995, + "grad_norm": 278.0, + "learning_rate": 1.630538908043301e-05, + "loss": 12.0627, + "step": 17833 + }, + { + "epoch": 0.7433620941186279, + "grad_norm": 109.0, + "learning_rate": 1.6300402275084615e-05, + "loss": 8.1881, + "step": 17834 + }, + { + "epoch": 0.7434037764161561, + "grad_norm": 1608.0, + "learning_rate": 1.629541608391438e-05, + "loss": 33.2545, + "step": 17835 + }, + { + "epoch": 0.7434454587136843, + "grad_norm": 114.0, + "learning_rate": 1.629043050701317e-05, + "loss": 8.0627, + "step": 17836 + }, + { + "epoch": 0.7434871410112125, + "grad_norm": 173.0, + "learning_rate": 1.6285445544471865e-05, + "loss": 9.3752, + "step": 17837 + }, + { + "epoch": 0.7435288233087408, + "grad_norm": 235.0, + "learning_rate": 1.628046119638129e-05, + "loss": 11.8127, + "step": 17838 + }, + { + "epoch": 0.743570505606269, + "grad_norm": 262.0, + "learning_rate": 1.627547746283234e-05, + "loss": 12.4378, + "step": 17839 + }, + { + "epoch": 0.7436121879037972, + "grad_norm": 95.5, + "learning_rate": 1.627049434391577e-05, + "loss": 6.7206, + "step": 17840 + }, + { + "epoch": 0.7436538702013255, + "grad_norm": 149.0, + "learning_rate": 1.626551183972247e-05, + "loss": 8.4379, + "step": 17841 + }, + { + "epoch": 0.7436955524988538, + "grad_norm": 37.5, + "learning_rate": 1.6260529950343175e-05, + "loss": 6.4064, + "step": 17842 + }, + { + "epoch": 0.743737234796382, + "grad_norm": 228.0, + "learning_rate": 1.6255548675868753e-05, + "loss": 11.2504, + "step": 17843 + }, + { + "epoch": 0.7437789170939102, + "grad_norm": 444.0, + "learning_rate": 1.6250568016389917e-05, + "loss": 16.501, + "step": 17844 + }, + { + "epoch": 0.7438205993914384, + "grad_norm": 122.0, + "learning_rate": 1.6245587971997507e-05, + "loss": 10.0627, + "step": 17845 + }, + { + "epoch": 0.7438622816889667, + "grad_norm": 454.0, + "learning_rate": 1.624060854278222e-05, + "loss": 15.2519, + "step": 17846 + }, + { + "epoch": 0.7439039639864949, + "grad_norm": 652.0, + "learning_rate": 1.6235629728834856e-05, + "loss": 19.7514, + "step": 17847 + }, + { + "epoch": 0.7439456462840232, + "grad_norm": 216.0, + "learning_rate": 1.6230651530246145e-05, + "loss": 10.9379, + "step": 17848 + }, + { + "epoch": 0.7439873285815514, + "grad_norm": 145.0, + "learning_rate": 1.6225673947106796e-05, + "loss": 10.0627, + "step": 17849 + }, + { + "epoch": 0.7440290108790797, + "grad_norm": 344.0, + "learning_rate": 1.6220696979507543e-05, + "loss": 12.3154, + "step": 17850 + }, + { + "epoch": 0.7440706931766079, + "grad_norm": 185.0, + "learning_rate": 1.621572062753909e-05, + "loss": 11.1251, + "step": 17851 + }, + { + "epoch": 0.7441123754741361, + "grad_norm": 528.0, + "learning_rate": 1.6210744891292117e-05, + "loss": 16.8752, + "step": 17852 + }, + { + "epoch": 0.7441540577716643, + "grad_norm": 210.0, + "learning_rate": 1.6205769770857328e-05, + "loss": 11.3752, + "step": 17853 + }, + { + "epoch": 0.7441957400691926, + "grad_norm": 204.0, + "learning_rate": 1.620079526632538e-05, + "loss": 10.6254, + "step": 17854 + }, + { + "epoch": 0.7442374223667209, + "grad_norm": 1544.0, + "learning_rate": 1.619582137778694e-05, + "loss": 29.005, + "step": 17855 + }, + { + "epoch": 0.7442791046642491, + "grad_norm": 388.0, + "learning_rate": 1.6190848105332656e-05, + "loss": 15.1879, + "step": 17856 + }, + { + "epoch": 0.7443207869617773, + "grad_norm": 228.0, + "learning_rate": 1.6185875449053167e-05, + "loss": 11.7502, + "step": 17857 + }, + { + "epoch": 0.7443624692593056, + "grad_norm": 131.0, + "learning_rate": 1.61809034090391e-05, + "loss": 10.9379, + "step": 17858 + }, + { + "epoch": 0.7444041515568338, + "grad_norm": 236.0, + "learning_rate": 1.617593198538107e-05, + "loss": 12.0637, + "step": 17859 + }, + { + "epoch": 0.744445833854362, + "grad_norm": 1136.0, + "learning_rate": 1.6170961178169686e-05, + "loss": 25.2526, + "step": 17860 + }, + { + "epoch": 0.7444875161518902, + "grad_norm": 328.0, + "learning_rate": 1.6165990987495533e-05, + "loss": 13.8755, + "step": 17861 + }, + { + "epoch": 0.7445291984494186, + "grad_norm": 2336.0, + "learning_rate": 1.6161021413449196e-05, + "loss": 53.002, + "step": 17862 + }, + { + "epoch": 0.7445708807469468, + "grad_norm": 206.0, + "learning_rate": 1.615605245612125e-05, + "loss": 7.9387, + "step": 17863 + }, + { + "epoch": 0.744612563044475, + "grad_norm": 308.0, + "learning_rate": 1.6151084115602238e-05, + "loss": 11.6251, + "step": 17864 + }, + { + "epoch": 0.7446542453420032, + "grad_norm": 76.5, + "learning_rate": 1.6146116391982757e-05, + "loss": 9.1252, + "step": 17865 + }, + { + "epoch": 0.7446959276395315, + "grad_norm": 256.0, + "learning_rate": 1.6141149285353275e-05, + "loss": 11.5632, + "step": 17866 + }, + { + "epoch": 0.7447376099370597, + "grad_norm": 254.0, + "learning_rate": 1.613618279580438e-05, + "loss": 12.8756, + "step": 17867 + }, + { + "epoch": 0.7447792922345879, + "grad_norm": 366.0, + "learning_rate": 1.6131216923426533e-05, + "loss": 15.7523, + "step": 17868 + }, + { + "epoch": 0.7448209745321163, + "grad_norm": 185.0, + "learning_rate": 1.61262516683103e-05, + "loss": 10.3131, + "step": 17869 + }, + { + "epoch": 0.7448626568296445, + "grad_norm": 225.0, + "learning_rate": 1.61212870305461e-05, + "loss": 12.5629, + "step": 17870 + }, + { + "epoch": 0.7449043391271727, + "grad_norm": 912.0, + "learning_rate": 1.6116323010224487e-05, + "loss": 23.8752, + "step": 17871 + }, + { + "epoch": 0.7449460214247009, + "grad_norm": 130.0, + "learning_rate": 1.6111359607435862e-05, + "loss": 9.5002, + "step": 17872 + }, + { + "epoch": 0.7449877037222292, + "grad_norm": 408.0, + "learning_rate": 1.6106396822270726e-05, + "loss": 15.5633, + "step": 17873 + }, + { + "epoch": 0.7450293860197574, + "grad_norm": 221.0, + "learning_rate": 1.6101434654819526e-05, + "loss": 11.1878, + "step": 17874 + }, + { + "epoch": 0.7450710683172856, + "grad_norm": 492.0, + "learning_rate": 1.6096473105172683e-05, + "loss": 17.0002, + "step": 17875 + }, + { + "epoch": 0.7451127506148139, + "grad_norm": 1456.0, + "learning_rate": 1.609151217342063e-05, + "loss": 29.5037, + "step": 17876 + }, + { + "epoch": 0.7451544329123422, + "grad_norm": 294.0, + "learning_rate": 1.6086551859653774e-05, + "loss": 10.9379, + "step": 17877 + }, + { + "epoch": 0.7451961152098704, + "grad_norm": 446.0, + "learning_rate": 1.608159216396253e-05, + "loss": 14.3779, + "step": 17878 + }, + { + "epoch": 0.7452377975073986, + "grad_norm": 716.0, + "learning_rate": 1.607663308643727e-05, + "loss": 21.7506, + "step": 17879 + }, + { + "epoch": 0.7452794798049268, + "grad_norm": 247.0, + "learning_rate": 1.6071674627168388e-05, + "loss": 13.7508, + "step": 17880 + }, + { + "epoch": 0.7453211621024551, + "grad_norm": 202.0, + "learning_rate": 1.6066716786246245e-05, + "loss": 12.3753, + "step": 17881 + }, + { + "epoch": 0.7453628443999833, + "grad_norm": 684.0, + "learning_rate": 1.6061759563761203e-05, + "loss": 20.7502, + "step": 17882 + }, + { + "epoch": 0.7454045266975116, + "grad_norm": 180.0, + "learning_rate": 1.6056802959803608e-05, + "loss": 11.5003, + "step": 17883 + }, + { + "epoch": 0.7454462089950398, + "grad_norm": 167.0, + "learning_rate": 1.6051846974463786e-05, + "loss": 9.8755, + "step": 17884 + }, + { + "epoch": 0.7454878912925681, + "grad_norm": 440.0, + "learning_rate": 1.6046891607832072e-05, + "loss": 17.0002, + "step": 17885 + }, + { + "epoch": 0.7455295735900963, + "grad_norm": 472.0, + "learning_rate": 1.604193685999877e-05, + "loss": 15.8757, + "step": 17886 + }, + { + "epoch": 0.7455712558876245, + "grad_norm": 490.0, + "learning_rate": 1.6036982731054184e-05, + "loss": 17.1252, + "step": 17887 + }, + { + "epoch": 0.7456129381851527, + "grad_norm": 147.0, + "learning_rate": 1.6032029221088584e-05, + "loss": 9.1888, + "step": 17888 + }, + { + "epoch": 0.745654620482681, + "grad_norm": 880.0, + "learning_rate": 1.6027076330192303e-05, + "loss": 24.2511, + "step": 17889 + }, + { + "epoch": 0.7456963027802093, + "grad_norm": 490.0, + "learning_rate": 1.602212405845554e-05, + "loss": 16.7502, + "step": 17890 + }, + { + "epoch": 0.7457379850777375, + "grad_norm": 1304.0, + "learning_rate": 1.601717240596861e-05, + "loss": 33.5003, + "step": 17891 + }, + { + "epoch": 0.7457796673752657, + "grad_norm": 260.0, + "learning_rate": 1.6012221372821707e-05, + "loss": 13.0629, + "step": 17892 + }, + { + "epoch": 0.745821349672794, + "grad_norm": 256.0, + "learning_rate": 1.600727095910511e-05, + "loss": 12.8128, + "step": 17893 + }, + { + "epoch": 0.7458630319703222, + "grad_norm": 392.0, + "learning_rate": 1.6002321164908985e-05, + "loss": 15.1256, + "step": 17894 + }, + { + "epoch": 0.7459047142678504, + "grad_norm": 119.0, + "learning_rate": 1.599737199032361e-05, + "loss": 4.6565, + "step": 17895 + }, + { + "epoch": 0.7459463965653786, + "grad_norm": 221.0, + "learning_rate": 1.599242343543912e-05, + "loss": 12.6252, + "step": 17896 + }, + { + "epoch": 0.745988078862907, + "grad_norm": 209.0, + "learning_rate": 1.5987475500345754e-05, + "loss": 11.8127, + "step": 17897 + }, + { + "epoch": 0.7460297611604352, + "grad_norm": 398.0, + "learning_rate": 1.5982528185133645e-05, + "loss": 13.5047, + "step": 17898 + }, + { + "epoch": 0.7460714434579634, + "grad_norm": 416.0, + "learning_rate": 1.597758148989299e-05, + "loss": 16.7516, + "step": 17899 + }, + { + "epoch": 0.7461131257554916, + "grad_norm": 222.0, + "learning_rate": 1.5972635414713934e-05, + "loss": 11.9378, + "step": 17900 + }, + { + "epoch": 0.7461548080530199, + "grad_norm": 292.0, + "learning_rate": 1.596768995968662e-05, + "loss": 14.1877, + "step": 17901 + }, + { + "epoch": 0.7461964903505481, + "grad_norm": 676.0, + "learning_rate": 1.5962745124901173e-05, + "loss": 22.5003, + "step": 17902 + }, + { + "epoch": 0.7462381726480763, + "grad_norm": 204.0, + "learning_rate": 1.5957800910447724e-05, + "loss": 12.8755, + "step": 17903 + }, + { + "epoch": 0.7462798549456046, + "grad_norm": 416.0, + "learning_rate": 1.595285731641637e-05, + "loss": 15.7503, + "step": 17904 + }, + { + "epoch": 0.7463215372431329, + "grad_norm": 174.0, + "learning_rate": 1.5947914342897215e-05, + "loss": 10.6887, + "step": 17905 + }, + { + "epoch": 0.7463632195406611, + "grad_norm": 330.0, + "learning_rate": 1.5942971989980342e-05, + "loss": 13.2501, + "step": 17906 + }, + { + "epoch": 0.7464049018381893, + "grad_norm": 174.0, + "learning_rate": 1.593803025775583e-05, + "loss": 11.9376, + "step": 17907 + }, + { + "epoch": 0.7464465841357175, + "grad_norm": 298.0, + "learning_rate": 1.5933089146313745e-05, + "loss": 14.0002, + "step": 17908 + }, + { + "epoch": 0.7464882664332458, + "grad_norm": 1176.0, + "learning_rate": 1.592814865574413e-05, + "loss": 30.0048, + "step": 17909 + }, + { + "epoch": 0.746529948730774, + "grad_norm": 340.0, + "learning_rate": 1.5923208786137027e-05, + "loss": 15.6254, + "step": 17910 + }, + { + "epoch": 0.7465716310283023, + "grad_norm": 352.0, + "learning_rate": 1.5918269537582475e-05, + "loss": 14.9392, + "step": 17911 + }, + { + "epoch": 0.7466133133258305, + "grad_norm": 179.0, + "learning_rate": 1.5913330910170486e-05, + "loss": 10.6878, + "step": 17912 + }, + { + "epoch": 0.7466549956233588, + "grad_norm": 284.0, + "learning_rate": 1.5908392903991064e-05, + "loss": 12.6253, + "step": 17913 + }, + { + "epoch": 0.746696677920887, + "grad_norm": 480.0, + "learning_rate": 1.5903455519134192e-05, + "loss": 16.6251, + "step": 17914 + }, + { + "epoch": 0.7467383602184152, + "grad_norm": 732.0, + "learning_rate": 1.5898518755689907e-05, + "loss": 22.1252, + "step": 17915 + }, + { + "epoch": 0.7467800425159434, + "grad_norm": 177.0, + "learning_rate": 1.5893582613748104e-05, + "loss": 11.3131, + "step": 17916 + }, + { + "epoch": 0.7468217248134718, + "grad_norm": 236.0, + "learning_rate": 1.5888647093398824e-05, + "loss": 12.2505, + "step": 17917 + }, + { + "epoch": 0.746863407111, + "grad_norm": 406.0, + "learning_rate": 1.5883712194731947e-05, + "loss": 16.0004, + "step": 17918 + }, + { + "epoch": 0.7469050894085282, + "grad_norm": 348.0, + "learning_rate": 1.5878777917837473e-05, + "loss": 15.3753, + "step": 17919 + }, + { + "epoch": 0.7469467717060564, + "grad_norm": 204.0, + "learning_rate": 1.587384426280527e-05, + "loss": 11.0003, + "step": 17920 + }, + { + "epoch": 0.7469884540035847, + "grad_norm": 484.0, + "learning_rate": 1.5868911229725313e-05, + "loss": 17.6261, + "step": 17921 + }, + { + "epoch": 0.7470301363011129, + "grad_norm": 568.0, + "learning_rate": 1.5863978818687453e-05, + "loss": 19.6251, + "step": 17922 + }, + { + "epoch": 0.7470718185986411, + "grad_norm": 102.0, + "learning_rate": 1.585904702978162e-05, + "loss": 10.5004, + "step": 17923 + }, + { + "epoch": 0.7471135008961693, + "grad_norm": 328.0, + "learning_rate": 1.5854115863097692e-05, + "loss": 14.8753, + "step": 17924 + }, + { + "epoch": 0.7471551831936977, + "grad_norm": 1448.0, + "learning_rate": 1.5849185318725528e-05, + "loss": 31.6261, + "step": 17925 + }, + { + "epoch": 0.7471968654912259, + "grad_norm": 128.0, + "learning_rate": 1.5844255396754993e-05, + "loss": 9.1879, + "step": 17926 + }, + { + "epoch": 0.7472385477887541, + "grad_norm": 524.0, + "learning_rate": 1.5839326097275937e-05, + "loss": 18.0021, + "step": 17927 + }, + { + "epoch": 0.7472802300862823, + "grad_norm": 314.0, + "learning_rate": 1.58343974203782e-05, + "loss": 13.5003, + "step": 17928 + }, + { + "epoch": 0.7473219123838106, + "grad_norm": 496.0, + "learning_rate": 1.5829469366151594e-05, + "loss": 16.7502, + "step": 17929 + }, + { + "epoch": 0.7473635946813388, + "grad_norm": 652.0, + "learning_rate": 1.582454193468595e-05, + "loss": 21.2503, + "step": 17930 + }, + { + "epoch": 0.747405276978867, + "grad_norm": 111.5, + "learning_rate": 1.5819615126071057e-05, + "loss": 6.6257, + "step": 17931 + }, + { + "epoch": 0.7474469592763953, + "grad_norm": 644.0, + "learning_rate": 1.5814688940396717e-05, + "loss": 18.5005, + "step": 17932 + }, + { + "epoch": 0.7474886415739236, + "grad_norm": 620.0, + "learning_rate": 1.5809763377752708e-05, + "loss": 19.1277, + "step": 17933 + }, + { + "epoch": 0.7475303238714518, + "grad_norm": 516.0, + "learning_rate": 1.580483843822879e-05, + "loss": 17.2509, + "step": 17934 + }, + { + "epoch": 0.74757200616898, + "grad_norm": 310.0, + "learning_rate": 1.5799914121914732e-05, + "loss": 14.3133, + "step": 17935 + }, + { + "epoch": 0.7476136884665082, + "grad_norm": 494.0, + "learning_rate": 1.579499042890027e-05, + "loss": 16.2503, + "step": 17936 + }, + { + "epoch": 0.7476553707640365, + "grad_norm": 302.0, + "learning_rate": 1.579006735927515e-05, + "loss": 14.3756, + "step": 17937 + }, + { + "epoch": 0.7476970530615648, + "grad_norm": 636.0, + "learning_rate": 1.578514491312907e-05, + "loss": 19.8757, + "step": 17938 + }, + { + "epoch": 0.747738735359093, + "grad_norm": 226.0, + "learning_rate": 1.5780223090551794e-05, + "loss": 12.0633, + "step": 17939 + }, + { + "epoch": 0.7477804176566213, + "grad_norm": 346.0, + "learning_rate": 1.5775301891632953e-05, + "loss": 15.1254, + "step": 17940 + }, + { + "epoch": 0.7478220999541495, + "grad_norm": 430.0, + "learning_rate": 1.5770381316462313e-05, + "loss": 15.7503, + "step": 17941 + }, + { + "epoch": 0.7478637822516777, + "grad_norm": 249.0, + "learning_rate": 1.576546136512948e-05, + "loss": 12.5003, + "step": 17942 + }, + { + "epoch": 0.7479054645492059, + "grad_norm": 238.0, + "learning_rate": 1.576054203772418e-05, + "loss": 13.0004, + "step": 17943 + }, + { + "epoch": 0.7479471468467342, + "grad_norm": 152.0, + "learning_rate": 1.5755623334336012e-05, + "loss": 10.0629, + "step": 17944 + }, + { + "epoch": 0.7479888291442625, + "grad_norm": 79.0, + "learning_rate": 1.5750705255054677e-05, + "loss": 8.1881, + "step": 17945 + }, + { + "epoch": 0.7480305114417907, + "grad_norm": 106.5, + "learning_rate": 1.5745787799969752e-05, + "loss": 8.5631, + "step": 17946 + }, + { + "epoch": 0.7480721937393189, + "grad_norm": 215.0, + "learning_rate": 1.5740870969170912e-05, + "loss": 11.3128, + "step": 17947 + }, + { + "epoch": 0.7481138760368472, + "grad_norm": 60.5, + "learning_rate": 1.573595476274771e-05, + "loss": 7.8129, + "step": 17948 + }, + { + "epoch": 0.7481555583343754, + "grad_norm": 262.0, + "learning_rate": 1.5731039180789793e-05, + "loss": 13.2504, + "step": 17949 + }, + { + "epoch": 0.7481972406319036, + "grad_norm": 524.0, + "learning_rate": 1.572612422338673e-05, + "loss": 15.9377, + "step": 17950 + }, + { + "epoch": 0.7482389229294318, + "grad_norm": 51.75, + "learning_rate": 1.5721209890628092e-05, + "loss": 7.8128, + "step": 17951 + }, + { + "epoch": 0.7482806052269602, + "grad_norm": 744.0, + "learning_rate": 1.5716296182603447e-05, + "loss": 21.1254, + "step": 17952 + }, + { + "epoch": 0.7483222875244884, + "grad_norm": 167.0, + "learning_rate": 1.5711383099402342e-05, + "loss": 10.8126, + "step": 17953 + }, + { + "epoch": 0.7483639698220166, + "grad_norm": 316.0, + "learning_rate": 1.5706470641114336e-05, + "loss": 13.1252, + "step": 17954 + }, + { + "epoch": 0.7484056521195448, + "grad_norm": 434.0, + "learning_rate": 1.5701558807828936e-05, + "loss": 16.1254, + "step": 17955 + }, + { + "epoch": 0.7484473344170731, + "grad_norm": 394.0, + "learning_rate": 1.5696647599635677e-05, + "loss": 15.3134, + "step": 17956 + }, + { + "epoch": 0.7484890167146013, + "grad_norm": 528.0, + "learning_rate": 1.5691737016624057e-05, + "loss": 18.5016, + "step": 17957 + }, + { + "epoch": 0.7485306990121295, + "grad_norm": 1004.0, + "learning_rate": 1.568682705888358e-05, + "loss": 27.1262, + "step": 17958 + }, + { + "epoch": 0.7485723813096578, + "grad_norm": 596.0, + "learning_rate": 1.5681917726503726e-05, + "loss": 15.6904, + "step": 17959 + }, + { + "epoch": 0.7486140636071861, + "grad_norm": 354.0, + "learning_rate": 1.5677009019573963e-05, + "loss": 14.1879, + "step": 17960 + }, + { + "epoch": 0.7486557459047143, + "grad_norm": 310.0, + "learning_rate": 1.5672100938183764e-05, + "loss": 13.4379, + "step": 17961 + }, + { + "epoch": 0.7486974282022425, + "grad_norm": 215.0, + "learning_rate": 1.5667193482422572e-05, + "loss": 11.3752, + "step": 17962 + }, + { + "epoch": 0.7487391104997707, + "grad_norm": 596.0, + "learning_rate": 1.566228665237982e-05, + "loss": 17.6267, + "step": 17963 + }, + { + "epoch": 0.748780792797299, + "grad_norm": 338.0, + "learning_rate": 1.565738044814493e-05, + "loss": 13.8774, + "step": 17964 + }, + { + "epoch": 0.7488224750948272, + "grad_norm": 540.0, + "learning_rate": 1.565247486980736e-05, + "loss": 17.6251, + "step": 17965 + }, + { + "epoch": 0.7488641573923555, + "grad_norm": 156.0, + "learning_rate": 1.5647569917456457e-05, + "loss": 10.3754, + "step": 17966 + }, + { + "epoch": 0.7489058396898837, + "grad_norm": 624.0, + "learning_rate": 1.564266559118167e-05, + "loss": 20.3763, + "step": 17967 + }, + { + "epoch": 0.748947521987412, + "grad_norm": 348.0, + "learning_rate": 1.5637761891072323e-05, + "loss": 13.5004, + "step": 17968 + }, + { + "epoch": 0.7489892042849402, + "grad_norm": 149.0, + "learning_rate": 1.563285881721785e-05, + "loss": 8.8131, + "step": 17969 + }, + { + "epoch": 0.7490308865824684, + "grad_norm": 249.0, + "learning_rate": 1.5627956369707537e-05, + "loss": 12.1881, + "step": 17970 + }, + { + "epoch": 0.7490725688799966, + "grad_norm": 620.0, + "learning_rate": 1.5623054548630806e-05, + "loss": 19.2503, + "step": 17971 + }, + { + "epoch": 0.749114251177525, + "grad_norm": 520.0, + "learning_rate": 1.5618153354076926e-05, + "loss": 16.2535, + "step": 17972 + }, + { + "epoch": 0.7491559334750532, + "grad_norm": 286.0, + "learning_rate": 1.561325278613527e-05, + "loss": 13.6251, + "step": 17973 + }, + { + "epoch": 0.7491976157725814, + "grad_norm": 221.0, + "learning_rate": 1.5608352844895134e-05, + "loss": 10.6256, + "step": 17974 + }, + { + "epoch": 0.7492392980701096, + "grad_norm": 418.0, + "learning_rate": 1.5603453530445823e-05, + "loss": 16.5002, + "step": 17975 + }, + { + "epoch": 0.7492809803676379, + "grad_norm": 334.0, + "learning_rate": 1.5598554842876622e-05, + "loss": 13.6252, + "step": 17976 + }, + { + "epoch": 0.7493226626651661, + "grad_norm": 478.0, + "learning_rate": 1.559365678227681e-05, + "loss": 17.6252, + "step": 17977 + }, + { + "epoch": 0.7493643449626943, + "grad_norm": 176.0, + "learning_rate": 1.558875934873566e-05, + "loss": 11.0003, + "step": 17978 + }, + { + "epoch": 0.7494060272602225, + "grad_norm": 736.0, + "learning_rate": 1.5583862542342424e-05, + "loss": 20.2504, + "step": 17979 + }, + { + "epoch": 0.7494477095577509, + "grad_norm": 247.0, + "learning_rate": 1.5578966363186353e-05, + "loss": 12.1882, + "step": 17980 + }, + { + "epoch": 0.7494893918552791, + "grad_norm": 272.0, + "learning_rate": 1.5574070811356673e-05, + "loss": 12.7503, + "step": 17981 + }, + { + "epoch": 0.7495310741528073, + "grad_norm": 304.0, + "learning_rate": 1.556917588694261e-05, + "loss": 13.1252, + "step": 17982 + }, + { + "epoch": 0.7495727564503355, + "grad_norm": 1752.0, + "learning_rate": 1.5564281590033374e-05, + "loss": 32.2556, + "step": 17983 + }, + { + "epoch": 0.7496144387478638, + "grad_norm": 270.0, + "learning_rate": 1.555938792071816e-05, + "loss": 10.5632, + "step": 17984 + }, + { + "epoch": 0.749656121045392, + "grad_norm": 1960.0, + "learning_rate": 1.5554494879086156e-05, + "loss": 35.0072, + "step": 17985 + }, + { + "epoch": 0.7496978033429202, + "grad_norm": 324.0, + "learning_rate": 1.5549602465226548e-05, + "loss": 14.6881, + "step": 17986 + }, + { + "epoch": 0.7497394856404485, + "grad_norm": 864.0, + "learning_rate": 1.5544710679228484e-05, + "loss": 24.3781, + "step": 17987 + }, + { + "epoch": 0.7497811679379768, + "grad_norm": 616.0, + "learning_rate": 1.5539819521181136e-05, + "loss": 17.8753, + "step": 17988 + }, + { + "epoch": 0.749822850235505, + "grad_norm": 81.5, + "learning_rate": 1.553492899117363e-05, + "loss": 8.5003, + "step": 17989 + }, + { + "epoch": 0.7498645325330332, + "grad_norm": 568.0, + "learning_rate": 1.553003908929509e-05, + "loss": 18.7502, + "step": 17990 + }, + { + "epoch": 0.7499062148305614, + "grad_norm": 548.0, + "learning_rate": 1.5525149815634675e-05, + "loss": 15.6879, + "step": 17991 + }, + { + "epoch": 0.7499478971280897, + "grad_norm": 266.0, + "learning_rate": 1.552026117028144e-05, + "loss": 12.8768, + "step": 17992 + }, + { + "epoch": 0.749989579425618, + "grad_norm": 199.0, + "learning_rate": 1.5515373153324537e-05, + "loss": 12.4391, + "step": 17993 + }, + { + "epoch": 0.7500312617231462, + "grad_norm": 296.0, + "learning_rate": 1.5510485764852983e-05, + "loss": 12.1876, + "step": 17994 + }, + { + "epoch": 0.7500729440206744, + "grad_norm": 462.0, + "learning_rate": 1.5505599004955922e-05, + "loss": 16.3758, + "step": 17995 + }, + { + "epoch": 0.7501146263182027, + "grad_norm": 442.0, + "learning_rate": 1.550071287372235e-05, + "loss": 15.5644, + "step": 17996 + }, + { + "epoch": 0.7501563086157309, + "grad_norm": 752.0, + "learning_rate": 1.5495827371241377e-05, + "loss": 20.1252, + "step": 17997 + }, + { + "epoch": 0.7501979909132591, + "grad_norm": 190.0, + "learning_rate": 1.5490942497601986e-05, + "loss": 11.4379, + "step": 17998 + }, + { + "epoch": 0.7502396732107873, + "grad_norm": 203.0, + "learning_rate": 1.5486058252893242e-05, + "loss": 9.6251, + "step": 17999 + }, + { + "epoch": 0.7502813555083157, + "grad_norm": 300.0, + "learning_rate": 1.5481174637204156e-05, + "loss": 12.8755, + "step": 18000 + }, + { + "epoch": 0.7503230378058439, + "grad_norm": 210.0, + "learning_rate": 1.547629165062372e-05, + "loss": 11.0627, + "step": 18001 + }, + { + "epoch": 0.7503647201033721, + "grad_norm": 720.0, + "learning_rate": 1.547140929324094e-05, + "loss": 19.3753, + "step": 18002 + }, + { + "epoch": 0.7504064024009003, + "grad_norm": 77.0, + "learning_rate": 1.546652756514479e-05, + "loss": 8.0627, + "step": 18003 + }, + { + "epoch": 0.7504480846984286, + "grad_norm": 366.0, + "learning_rate": 1.5461646466424236e-05, + "loss": 14.3129, + "step": 18004 + }, + { + "epoch": 0.7504897669959568, + "grad_norm": 300.0, + "learning_rate": 1.5456765997168245e-05, + "loss": 13.563, + "step": 18005 + }, + { + "epoch": 0.750531449293485, + "grad_norm": 290.0, + "learning_rate": 1.545188615746576e-05, + "loss": 13.7502, + "step": 18006 + }, + { + "epoch": 0.7505731315910132, + "grad_norm": 192.0, + "learning_rate": 1.5447006947405717e-05, + "loss": 11.5009, + "step": 18007 + }, + { + "epoch": 0.7506148138885416, + "grad_norm": 78.0, + "learning_rate": 1.5442128367077034e-05, + "loss": 8.4376, + "step": 18008 + }, + { + "epoch": 0.7506564961860698, + "grad_norm": 426.0, + "learning_rate": 1.5437250416568637e-05, + "loss": 15.7528, + "step": 18009 + }, + { + "epoch": 0.750698178483598, + "grad_norm": 304.0, + "learning_rate": 1.5432373095969417e-05, + "loss": 11.4385, + "step": 18010 + }, + { + "epoch": 0.7507398607811262, + "grad_norm": 172.0, + "learning_rate": 1.5427496405368264e-05, + "loss": 12.0628, + "step": 18011 + }, + { + "epoch": 0.7507815430786545, + "grad_norm": 208.0, + "learning_rate": 1.542262034485406e-05, + "loss": 10.0649, + "step": 18012 + }, + { + "epoch": 0.7508232253761827, + "grad_norm": 175.0, + "learning_rate": 1.541774491451567e-05, + "loss": 11.1882, + "step": 18013 + }, + { + "epoch": 0.750864907673711, + "grad_norm": 322.0, + "learning_rate": 1.541287011444193e-05, + "loss": 14.3126, + "step": 18014 + }, + { + "epoch": 0.7509065899712393, + "grad_norm": 628.0, + "learning_rate": 1.5407995944721737e-05, + "loss": 18.6301, + "step": 18015 + }, + { + "epoch": 0.7509482722687675, + "grad_norm": 376.0, + "learning_rate": 1.540312240544386e-05, + "loss": 15.188, + "step": 18016 + }, + { + "epoch": 0.7509899545662957, + "grad_norm": 434.0, + "learning_rate": 1.5398249496697174e-05, + "loss": 14.0003, + "step": 18017 + }, + { + "epoch": 0.7510316368638239, + "grad_norm": 290.0, + "learning_rate": 1.5393377218570433e-05, + "loss": 14.1252, + "step": 18018 + }, + { + "epoch": 0.7510733191613522, + "grad_norm": 458.0, + "learning_rate": 1.5388505571152494e-05, + "loss": 16.3753, + "step": 18019 + }, + { + "epoch": 0.7511150014588804, + "grad_norm": 572.0, + "learning_rate": 1.538363455453209e-05, + "loss": 18.7501, + "step": 18020 + }, + { + "epoch": 0.7511566837564086, + "grad_norm": 316.0, + "learning_rate": 1.5378764168798044e-05, + "loss": 14.6252, + "step": 18021 + }, + { + "epoch": 0.7511983660539369, + "grad_norm": 105.0, + "learning_rate": 1.5373894414039065e-05, + "loss": 9.1882, + "step": 18022 + }, + { + "epoch": 0.7512400483514652, + "grad_norm": 84.0, + "learning_rate": 1.5369025290343965e-05, + "loss": 8.4383, + "step": 18023 + }, + { + "epoch": 0.7512817306489934, + "grad_norm": 118.0, + "learning_rate": 1.536415679780142e-05, + "loss": 10.2505, + "step": 18024 + }, + { + "epoch": 0.7513234129465216, + "grad_norm": 308.0, + "learning_rate": 1.535928893650021e-05, + "loss": 14.6878, + "step": 18025 + }, + { + "epoch": 0.7513650952440498, + "grad_norm": 262.0, + "learning_rate": 1.5354421706529034e-05, + "loss": 11.4378, + "step": 18026 + }, + { + "epoch": 0.7514067775415781, + "grad_norm": 728.0, + "learning_rate": 1.53495551079766e-05, + "loss": 22.2507, + "step": 18027 + }, + { + "epoch": 0.7514484598391064, + "grad_norm": 306.0, + "learning_rate": 1.5344689140931594e-05, + "loss": 12.813, + "step": 18028 + }, + { + "epoch": 0.7514901421366346, + "grad_norm": 1024.0, + "learning_rate": 1.5339823805482713e-05, + "loss": 22.1286, + "step": 18029 + }, + { + "epoch": 0.7515318244341628, + "grad_norm": 484.0, + "learning_rate": 1.5334959101718617e-05, + "loss": 16.5002, + "step": 18030 + }, + { + "epoch": 0.7515735067316911, + "grad_norm": 240.0, + "learning_rate": 1.5330095029727963e-05, + "loss": 12.5628, + "step": 18031 + }, + { + "epoch": 0.7516151890292193, + "grad_norm": 276.0, + "learning_rate": 1.532523158959941e-05, + "loss": 12.7501, + "step": 18032 + }, + { + "epoch": 0.7516568713267475, + "grad_norm": 165.0, + "learning_rate": 1.532036878142159e-05, + "loss": 11.4383, + "step": 18033 + }, + { + "epoch": 0.7516985536242757, + "grad_norm": 182.0, + "learning_rate": 1.5315506605283126e-05, + "loss": 13.0004, + "step": 18034 + }, + { + "epoch": 0.7517402359218041, + "grad_norm": 336.0, + "learning_rate": 1.5310645061272634e-05, + "loss": 13.2502, + "step": 18035 + }, + { + "epoch": 0.7517819182193323, + "grad_norm": 394.0, + "learning_rate": 1.5305784149478714e-05, + "loss": 13.8752, + "step": 18036 + }, + { + "epoch": 0.7518236005168605, + "grad_norm": 185.0, + "learning_rate": 1.5300923869989957e-05, + "loss": 9.8128, + "step": 18037 + }, + { + "epoch": 0.7518652828143887, + "grad_norm": 772.0, + "learning_rate": 1.5296064222894947e-05, + "loss": 22.1288, + "step": 18038 + }, + { + "epoch": 0.751906965111917, + "grad_norm": 360.0, + "learning_rate": 1.5291205208282245e-05, + "loss": 15.2512, + "step": 18039 + }, + { + "epoch": 0.7519486474094452, + "grad_norm": 508.0, + "learning_rate": 1.5286346826240388e-05, + "loss": 18.5002, + "step": 18040 + }, + { + "epoch": 0.7519903297069734, + "grad_norm": 516.0, + "learning_rate": 1.528148907685798e-05, + "loss": 16.7504, + "step": 18041 + }, + { + "epoch": 0.7520320120045016, + "grad_norm": 82.0, + "learning_rate": 1.527663196022348e-05, + "loss": 9.3753, + "step": 18042 + }, + { + "epoch": 0.75207369430203, + "grad_norm": 215.0, + "learning_rate": 1.5271775476425482e-05, + "loss": 11.5005, + "step": 18043 + }, + { + "epoch": 0.7521153765995582, + "grad_norm": 229.0, + "learning_rate": 1.5266919625552422e-05, + "loss": 12.1256, + "step": 18044 + }, + { + "epoch": 0.7521570588970864, + "grad_norm": 434.0, + "learning_rate": 1.526206440769287e-05, + "loss": 17.2502, + "step": 18045 + }, + { + "epoch": 0.7521987411946146, + "grad_norm": 414.0, + "learning_rate": 1.5257209822935248e-05, + "loss": 15.938, + "step": 18046 + }, + { + "epoch": 0.7522404234921429, + "grad_norm": 149.0, + "learning_rate": 1.525235587136809e-05, + "loss": 11.4378, + "step": 18047 + }, + { + "epoch": 0.7522821057896711, + "grad_norm": 133.0, + "learning_rate": 1.52475025530798e-05, + "loss": 6.532, + "step": 18048 + }, + { + "epoch": 0.7523237880871994, + "grad_norm": 560.0, + "learning_rate": 1.5242649868158875e-05, + "loss": 20.6253, + "step": 18049 + }, + { + "epoch": 0.7523654703847276, + "grad_norm": 592.0, + "learning_rate": 1.5237797816693743e-05, + "loss": 20.6252, + "step": 18050 + }, + { + "epoch": 0.7524071526822559, + "grad_norm": 264.0, + "learning_rate": 1.5232946398772829e-05, + "loss": 12.5003, + "step": 18051 + }, + { + "epoch": 0.7524488349797841, + "grad_norm": 366.0, + "learning_rate": 1.5228095614484562e-05, + "loss": 15.0003, + "step": 18052 + }, + { + "epoch": 0.7524905172773123, + "grad_norm": 536.0, + "learning_rate": 1.522324546391733e-05, + "loss": 18.2503, + "step": 18053 + }, + { + "epoch": 0.7525321995748405, + "grad_norm": 628.0, + "learning_rate": 1.5218395947159541e-05, + "loss": 19.5006, + "step": 18054 + }, + { + "epoch": 0.7525738818723688, + "grad_norm": 1496.0, + "learning_rate": 1.5213547064299572e-05, + "loss": 28.1283, + "step": 18055 + }, + { + "epoch": 0.7526155641698971, + "grad_norm": 171.0, + "learning_rate": 1.5208698815425793e-05, + "loss": 10.4378, + "step": 18056 + }, + { + "epoch": 0.7526572464674253, + "grad_norm": 1232.0, + "learning_rate": 1.5203851200626573e-05, + "loss": 34.2513, + "step": 18057 + }, + { + "epoch": 0.7526989287649535, + "grad_norm": 187.0, + "learning_rate": 1.5199004219990249e-05, + "loss": 10.688, + "step": 18058 + }, + { + "epoch": 0.7527406110624818, + "grad_norm": 153.0, + "learning_rate": 1.5194157873605164e-05, + "loss": 10.7502, + "step": 18059 + }, + { + "epoch": 0.75278229336001, + "grad_norm": 227.0, + "learning_rate": 1.5189312161559644e-05, + "loss": 12.313, + "step": 18060 + }, + { + "epoch": 0.7528239756575382, + "grad_norm": 434.0, + "learning_rate": 1.5184467083941995e-05, + "loss": 15.5001, + "step": 18061 + }, + { + "epoch": 0.7528656579550664, + "grad_norm": 596.0, + "learning_rate": 1.517962264084053e-05, + "loss": 20.2502, + "step": 18062 + }, + { + "epoch": 0.7529073402525948, + "grad_norm": 1704.0, + "learning_rate": 1.5174778832343528e-05, + "loss": 34.7504, + "step": 18063 + }, + { + "epoch": 0.752949022550123, + "grad_norm": 368.0, + "learning_rate": 1.5169935658539259e-05, + "loss": 13.7503, + "step": 18064 + }, + { + "epoch": 0.7529907048476512, + "grad_norm": 428.0, + "learning_rate": 1.5165093119516038e-05, + "loss": 13.8757, + "step": 18065 + }, + { + "epoch": 0.7530323871451794, + "grad_norm": 153.0, + "learning_rate": 1.5160251215362054e-05, + "loss": 9.6258, + "step": 18066 + }, + { + "epoch": 0.7530740694427077, + "grad_norm": 454.0, + "learning_rate": 1.5155409946165616e-05, + "loss": 14.7504, + "step": 18067 + }, + { + "epoch": 0.7531157517402359, + "grad_norm": 912.0, + "learning_rate": 1.5150569312014895e-05, + "loss": 22.2535, + "step": 18068 + }, + { + "epoch": 0.7531574340377641, + "grad_norm": 216.0, + "learning_rate": 1.514572931299817e-05, + "loss": 12.2501, + "step": 18069 + }, + { + "epoch": 0.7531991163352924, + "grad_norm": 404.0, + "learning_rate": 1.5140889949203595e-05, + "loss": 15.6253, + "step": 18070 + }, + { + "epoch": 0.7532407986328207, + "grad_norm": 376.0, + "learning_rate": 1.5136051220719427e-05, + "loss": 14.7501, + "step": 18071 + }, + { + "epoch": 0.7532824809303489, + "grad_norm": 304.0, + "learning_rate": 1.5131213127633786e-05, + "loss": 11.5628, + "step": 18072 + }, + { + "epoch": 0.7533241632278771, + "grad_norm": 169.0, + "learning_rate": 1.5126375670034914e-05, + "loss": 7.594, + "step": 18073 + }, + { + "epoch": 0.7533658455254053, + "grad_norm": 424.0, + "learning_rate": 1.5121538848010908e-05, + "loss": 16.1253, + "step": 18074 + }, + { + "epoch": 0.7534075278229336, + "grad_norm": 664.0, + "learning_rate": 1.5116702661649967e-05, + "loss": 20.6253, + "step": 18075 + }, + { + "epoch": 0.7534492101204618, + "grad_norm": 344.0, + "learning_rate": 1.5111867111040224e-05, + "loss": 13.8129, + "step": 18076 + }, + { + "epoch": 0.75349089241799, + "grad_norm": 400.0, + "learning_rate": 1.5107032196269794e-05, + "loss": 14.5004, + "step": 18077 + }, + { + "epoch": 0.7535325747155183, + "grad_norm": 294.0, + "learning_rate": 1.5102197917426802e-05, + "loss": 14.0628, + "step": 18078 + }, + { + "epoch": 0.7535742570130466, + "grad_norm": 452.0, + "learning_rate": 1.5097364274599352e-05, + "loss": 17.1261, + "step": 18079 + }, + { + "epoch": 0.7536159393105748, + "grad_norm": 402.0, + "learning_rate": 1.5092531267875531e-05, + "loss": 14.2532, + "step": 18080 + }, + { + "epoch": 0.753657621608103, + "grad_norm": 193.0, + "learning_rate": 1.5087698897343432e-05, + "loss": 11.8759, + "step": 18081 + }, + { + "epoch": 0.7536993039056312, + "grad_norm": 548.0, + "learning_rate": 1.508286716309112e-05, + "loss": 18.6255, + "step": 18082 + }, + { + "epoch": 0.7537409862031595, + "grad_norm": 320.0, + "learning_rate": 1.5078036065206647e-05, + "loss": 14.6255, + "step": 18083 + }, + { + "epoch": 0.7537826685006878, + "grad_norm": 167.0, + "learning_rate": 1.5073205603778074e-05, + "loss": 10.7502, + "step": 18084 + }, + { + "epoch": 0.753824350798216, + "grad_norm": 330.0, + "learning_rate": 1.5068375778893429e-05, + "loss": 13.1261, + "step": 18085 + }, + { + "epoch": 0.7538660330957443, + "grad_norm": 312.0, + "learning_rate": 1.5063546590640731e-05, + "loss": 13.9377, + "step": 18086 + }, + { + "epoch": 0.7539077153932725, + "grad_norm": 648.0, + "learning_rate": 1.5058718039108e-05, + "loss": 20.7504, + "step": 18087 + }, + { + "epoch": 0.7539493976908007, + "grad_norm": 110.0, + "learning_rate": 1.5053890124383235e-05, + "loss": 9.0634, + "step": 18088 + }, + { + "epoch": 0.7539910799883289, + "grad_norm": 322.0, + "learning_rate": 1.5049062846554429e-05, + "loss": 14.563, + "step": 18089 + }, + { + "epoch": 0.7540327622858572, + "grad_norm": 426.0, + "learning_rate": 1.5044236205709533e-05, + "loss": 15.3134, + "step": 18090 + }, + { + "epoch": 0.7540744445833855, + "grad_norm": 342.0, + "learning_rate": 1.503941020193657e-05, + "loss": 15.1252, + "step": 18091 + }, + { + "epoch": 0.7541161268809137, + "grad_norm": 1064.0, + "learning_rate": 1.5034584835323428e-05, + "loss": 29.6252, + "step": 18092 + }, + { + "epoch": 0.7541578091784419, + "grad_norm": 132.0, + "learning_rate": 1.5029760105958113e-05, + "loss": 9.4377, + "step": 18093 + }, + { + "epoch": 0.7541994914759702, + "grad_norm": 1064.0, + "learning_rate": 1.5024936013928492e-05, + "loss": 20.505, + "step": 18094 + }, + { + "epoch": 0.7542411737734984, + "grad_norm": 404.0, + "learning_rate": 1.5020112559322552e-05, + "loss": 15.5003, + "step": 18095 + }, + { + "epoch": 0.7542828560710266, + "grad_norm": 410.0, + "learning_rate": 1.5015289742228134e-05, + "loss": 16.7507, + "step": 18096 + }, + { + "epoch": 0.7543245383685548, + "grad_norm": 352.0, + "learning_rate": 1.5010467562733194e-05, + "loss": 10.7502, + "step": 18097 + }, + { + "epoch": 0.7543662206660832, + "grad_norm": 360.0, + "learning_rate": 1.5005646020925556e-05, + "loss": 14.2502, + "step": 18098 + }, + { + "epoch": 0.7544079029636114, + "grad_norm": 474.0, + "learning_rate": 1.5000825116893148e-05, + "loss": 15.3753, + "step": 18099 + }, + { + "epoch": 0.7544495852611396, + "grad_norm": 145.0, + "learning_rate": 1.49960048507238e-05, + "loss": 10.5629, + "step": 18100 + }, + { + "epoch": 0.7544912675586678, + "grad_norm": 175.0, + "learning_rate": 1.4991185222505378e-05, + "loss": 10.0002, + "step": 18101 + }, + { + "epoch": 0.7545329498561961, + "grad_norm": 230.0, + "learning_rate": 1.4986366232325711e-05, + "loss": 11.0009, + "step": 18102 + }, + { + "epoch": 0.7545746321537243, + "grad_norm": 688.0, + "learning_rate": 1.4981547880272628e-05, + "loss": 19.1257, + "step": 18103 + }, + { + "epoch": 0.7546163144512525, + "grad_norm": 240.0, + "learning_rate": 1.4976730166433945e-05, + "loss": 10.1255, + "step": 18104 + }, + { + "epoch": 0.7546579967487808, + "grad_norm": 164.0, + "learning_rate": 1.497191309089746e-05, + "loss": 6.1879, + "step": 18105 + }, + { + "epoch": 0.7546996790463091, + "grad_norm": 568.0, + "learning_rate": 1.4967096653750972e-05, + "loss": 18.5002, + "step": 18106 + }, + { + "epoch": 0.7547413613438373, + "grad_norm": 119.5, + "learning_rate": 1.4962280855082255e-05, + "loss": 9.3129, + "step": 18107 + }, + { + "epoch": 0.7547830436413655, + "grad_norm": 202.0, + "learning_rate": 1.4957465694979084e-05, + "loss": 11.5004, + "step": 18108 + }, + { + "epoch": 0.7548247259388937, + "grad_norm": 69.0, + "learning_rate": 1.4952651173529208e-05, + "loss": 7.2817, + "step": 18109 + }, + { + "epoch": 0.754866408236422, + "grad_norm": 354.0, + "learning_rate": 1.4947837290820377e-05, + "loss": 13.4377, + "step": 18110 + }, + { + "epoch": 0.7549080905339502, + "grad_norm": 1312.0, + "learning_rate": 1.4943024046940324e-05, + "loss": 26.507, + "step": 18111 + }, + { + "epoch": 0.7549497728314785, + "grad_norm": 424.0, + "learning_rate": 1.4938211441976763e-05, + "loss": 16.1254, + "step": 18112 + }, + { + "epoch": 0.7549914551290067, + "grad_norm": 940.0, + "learning_rate": 1.4933399476017418e-05, + "loss": 22.8753, + "step": 18113 + }, + { + "epoch": 0.755033137426535, + "grad_norm": 450.0, + "learning_rate": 1.492858814914998e-05, + "loss": 14.9381, + "step": 18114 + }, + { + "epoch": 0.7550748197240632, + "grad_norm": 370.0, + "learning_rate": 1.4923777461462135e-05, + "loss": 14.7503, + "step": 18115 + }, + { + "epoch": 0.7551165020215914, + "grad_norm": 1080.0, + "learning_rate": 1.491896741304154e-05, + "loss": 27.2504, + "step": 18116 + }, + { + "epoch": 0.7551581843191196, + "grad_norm": 188.0, + "learning_rate": 1.4914158003975914e-05, + "loss": 8.1252, + "step": 18117 + }, + { + "epoch": 0.755199866616648, + "grad_norm": 540.0, + "learning_rate": 1.4909349234352843e-05, + "loss": 17.2503, + "step": 18118 + }, + { + "epoch": 0.7552415489141762, + "grad_norm": 86.5, + "learning_rate": 1.4904541104260028e-05, + "loss": 10.0629, + "step": 18119 + }, + { + "epoch": 0.7552832312117044, + "grad_norm": 494.0, + "learning_rate": 1.4899733613785033e-05, + "loss": 16.2504, + "step": 18120 + }, + { + "epoch": 0.7553249135092326, + "grad_norm": 294.0, + "learning_rate": 1.4894926763015542e-05, + "loss": 13.6257, + "step": 18121 + }, + { + "epoch": 0.7553665958067609, + "grad_norm": 588.0, + "learning_rate": 1.4890120552039095e-05, + "loss": 17.8758, + "step": 18122 + }, + { + "epoch": 0.7554082781042891, + "grad_norm": 167.0, + "learning_rate": 1.4885314980943348e-05, + "loss": 13.1257, + "step": 18123 + }, + { + "epoch": 0.7554499604018173, + "grad_norm": 584.0, + "learning_rate": 1.4880510049815822e-05, + "loss": 20.0017, + "step": 18124 + }, + { + "epoch": 0.7554916426993455, + "grad_norm": 229.0, + "learning_rate": 1.4875705758744136e-05, + "loss": 10.5626, + "step": 18125 + }, + { + "epoch": 0.7555333249968739, + "grad_norm": 80.0, + "learning_rate": 1.4870902107815831e-05, + "loss": 6.4692, + "step": 18126 + }, + { + "epoch": 0.7555750072944021, + "grad_norm": 524.0, + "learning_rate": 1.4866099097118452e-05, + "loss": 16.2545, + "step": 18127 + }, + { + "epoch": 0.7556166895919303, + "grad_norm": 124.5, + "learning_rate": 1.4861296726739532e-05, + "loss": 9.438, + "step": 18128 + }, + { + "epoch": 0.7556583718894585, + "grad_norm": 520.0, + "learning_rate": 1.4856494996766601e-05, + "loss": 13.1914, + "step": 18129 + }, + { + "epoch": 0.7557000541869868, + "grad_norm": 222.0, + "learning_rate": 1.4851693907287173e-05, + "loss": 11.6252, + "step": 18130 + }, + { + "epoch": 0.755741736484515, + "grad_norm": 1000.0, + "learning_rate": 1.4846893458388738e-05, + "loss": 28.6254, + "step": 18131 + }, + { + "epoch": 0.7557834187820432, + "grad_norm": 512.0, + "learning_rate": 1.4842093650158794e-05, + "loss": 15.939, + "step": 18132 + }, + { + "epoch": 0.7558251010795715, + "grad_norm": 1020.0, + "learning_rate": 1.483729448268481e-05, + "loss": 26.3793, + "step": 18133 + }, + { + "epoch": 0.7558667833770998, + "grad_norm": 508.0, + "learning_rate": 1.483249595605426e-05, + "loss": 16.7501, + "step": 18134 + }, + { + "epoch": 0.755908465674628, + "grad_norm": 1320.0, + "learning_rate": 1.482769807035459e-05, + "loss": 26.2538, + "step": 18135 + }, + { + "epoch": 0.7559501479721562, + "grad_norm": 442.0, + "learning_rate": 1.4822900825673248e-05, + "loss": 16.6254, + "step": 18136 + }, + { + "epoch": 0.7559918302696844, + "grad_norm": 160.0, + "learning_rate": 1.4818104222097662e-05, + "loss": 10.3752, + "step": 18137 + }, + { + "epoch": 0.7560335125672127, + "grad_norm": 141.0, + "learning_rate": 1.4813308259715248e-05, + "loss": 9.8752, + "step": 18138 + }, + { + "epoch": 0.756075194864741, + "grad_norm": 312.0, + "learning_rate": 1.4808512938613411e-05, + "loss": 14.2502, + "step": 18139 + }, + { + "epoch": 0.7561168771622692, + "grad_norm": 436.0, + "learning_rate": 1.4803718258879535e-05, + "loss": 16.1257, + "step": 18140 + }, + { + "epoch": 0.7561585594597974, + "grad_norm": 98.5, + "learning_rate": 1.4798924220601051e-05, + "loss": 9.2507, + "step": 18141 + }, + { + "epoch": 0.7562002417573257, + "grad_norm": 306.0, + "learning_rate": 1.4794130823865271e-05, + "loss": 15.0006, + "step": 18142 + }, + { + "epoch": 0.7562419240548539, + "grad_norm": 1192.0, + "learning_rate": 1.4789338068759607e-05, + "loss": 24.5037, + "step": 18143 + }, + { + "epoch": 0.7562836063523821, + "grad_norm": 209.0, + "learning_rate": 1.4784545955371353e-05, + "loss": 7.4065, + "step": 18144 + }, + { + "epoch": 0.7563252886499103, + "grad_norm": 139.0, + "learning_rate": 1.477975448378791e-05, + "loss": 7.0627, + "step": 18145 + }, + { + "epoch": 0.7563669709474387, + "grad_norm": 318.0, + "learning_rate": 1.4774963654096535e-05, + "loss": 13.2505, + "step": 18146 + }, + { + "epoch": 0.7564086532449669, + "grad_norm": 189.0, + "learning_rate": 1.4770173466384602e-05, + "loss": 10.0003, + "step": 18147 + }, + { + "epoch": 0.7564503355424951, + "grad_norm": 288.0, + "learning_rate": 1.4765383920739357e-05, + "loss": 13.9379, + "step": 18148 + }, + { + "epoch": 0.7564920178400233, + "grad_norm": 326.0, + "learning_rate": 1.4760595017248147e-05, + "loss": 13.5004, + "step": 18149 + }, + { + "epoch": 0.7565337001375516, + "grad_norm": 604.0, + "learning_rate": 1.4755806755998191e-05, + "loss": 20.0003, + "step": 18150 + }, + { + "epoch": 0.7565753824350798, + "grad_norm": 680.0, + "learning_rate": 1.4751019137076804e-05, + "loss": 18.7512, + "step": 18151 + }, + { + "epoch": 0.756617064732608, + "grad_norm": 66.5, + "learning_rate": 1.4746232160571221e-05, + "loss": 8.1878, + "step": 18152 + }, + { + "epoch": 0.7566587470301362, + "grad_norm": 396.0, + "learning_rate": 1.4741445826568684e-05, + "loss": 15.6252, + "step": 18153 + }, + { + "epoch": 0.7567004293276646, + "grad_norm": 294.0, + "learning_rate": 1.4736660135156427e-05, + "loss": 12.6907, + "step": 18154 + }, + { + "epoch": 0.7567421116251928, + "grad_norm": 448.0, + "learning_rate": 1.473187508642167e-05, + "loss": 16.1257, + "step": 18155 + }, + { + "epoch": 0.756783793922721, + "grad_norm": 224.0, + "learning_rate": 1.4727090680451622e-05, + "loss": 12.7506, + "step": 18156 + }, + { + "epoch": 0.7568254762202492, + "grad_norm": 292.0, + "learning_rate": 1.4722306917333478e-05, + "loss": 13.7505, + "step": 18157 + }, + { + "epoch": 0.7568671585177775, + "grad_norm": 70.5, + "learning_rate": 1.471752379715442e-05, + "loss": 8.5629, + "step": 18158 + }, + { + "epoch": 0.7569088408153057, + "grad_norm": 78.5, + "learning_rate": 1.4712741320001627e-05, + "loss": 9.0626, + "step": 18159 + }, + { + "epoch": 0.756950523112834, + "grad_norm": 215.0, + "learning_rate": 1.4707959485962253e-05, + "loss": 11.0001, + "step": 18160 + }, + { + "epoch": 0.7569922054103623, + "grad_norm": 216.0, + "learning_rate": 1.4703178295123448e-05, + "loss": 11.2503, + "step": 18161 + }, + { + "epoch": 0.7570338877078905, + "grad_norm": 211.0, + "learning_rate": 1.4698397747572351e-05, + "loss": 12.3752, + "step": 18162 + }, + { + "epoch": 0.7570755700054187, + "grad_norm": 400.0, + "learning_rate": 1.4693617843396095e-05, + "loss": 14.3126, + "step": 18163 + }, + { + "epoch": 0.7571172523029469, + "grad_norm": 460.0, + "learning_rate": 1.4688838582681786e-05, + "loss": 16.7503, + "step": 18164 + }, + { + "epoch": 0.7571589346004752, + "grad_norm": 396.0, + "learning_rate": 1.4684059965516528e-05, + "loss": 16.3753, + "step": 18165 + }, + { + "epoch": 0.7572006168980034, + "grad_norm": 108.0, + "learning_rate": 1.4679281991987393e-05, + "loss": 9.7504, + "step": 18166 + }, + { + "epoch": 0.7572422991955317, + "grad_norm": 344.0, + "learning_rate": 1.4674504662181516e-05, + "loss": 15.3762, + "step": 18167 + }, + { + "epoch": 0.7572839814930599, + "grad_norm": 1184.0, + "learning_rate": 1.4669727976185893e-05, + "loss": 30.5008, + "step": 18168 + }, + { + "epoch": 0.7573256637905882, + "grad_norm": 384.0, + "learning_rate": 1.4664951934087645e-05, + "loss": 15.5629, + "step": 18169 + }, + { + "epoch": 0.7573673460881164, + "grad_norm": 105.0, + "learning_rate": 1.4660176535973758e-05, + "loss": 7.844, + "step": 18170 + }, + { + "epoch": 0.7574090283856446, + "grad_norm": 508.0, + "learning_rate": 1.4655401781931316e-05, + "loss": 17.8756, + "step": 18171 + }, + { + "epoch": 0.7574507106831728, + "grad_norm": 149.0, + "learning_rate": 1.4650627672047284e-05, + "loss": 9.2503, + "step": 18172 + }, + { + "epoch": 0.7574923929807011, + "grad_norm": 520.0, + "learning_rate": 1.4645854206408731e-05, + "loss": 17.7503, + "step": 18173 + }, + { + "epoch": 0.7575340752782294, + "grad_norm": 103.5, + "learning_rate": 1.46410813851026e-05, + "loss": 9.1254, + "step": 18174 + }, + { + "epoch": 0.7575757575757576, + "grad_norm": 402.0, + "learning_rate": 1.463630920821591e-05, + "loss": 16.0005, + "step": 18175 + }, + { + "epoch": 0.7576174398732858, + "grad_norm": 134.0, + "learning_rate": 1.4631537675835622e-05, + "loss": 9.6877, + "step": 18176 + }, + { + "epoch": 0.7576591221708141, + "grad_norm": 159.0, + "learning_rate": 1.46267667880487e-05, + "loss": 9.9379, + "step": 18177 + }, + { + "epoch": 0.7577008044683423, + "grad_norm": 434.0, + "learning_rate": 1.4621996544942096e-05, + "loss": 15.1877, + "step": 18178 + }, + { + "epoch": 0.7577424867658705, + "grad_norm": 117.5, + "learning_rate": 1.4617226946602747e-05, + "loss": 8.7505, + "step": 18179 + }, + { + "epoch": 0.7577841690633987, + "grad_norm": 141.0, + "learning_rate": 1.4612457993117574e-05, + "loss": 10.5631, + "step": 18180 + }, + { + "epoch": 0.7578258513609271, + "grad_norm": 376.0, + "learning_rate": 1.46076896845735e-05, + "loss": 13.5002, + "step": 18181 + }, + { + "epoch": 0.7578675336584553, + "grad_norm": 404.0, + "learning_rate": 1.460292202105742e-05, + "loss": 15.4378, + "step": 18182 + }, + { + "epoch": 0.7579092159559835, + "grad_norm": 458.0, + "learning_rate": 1.4598155002656228e-05, + "loss": 17.5003, + "step": 18183 + }, + { + "epoch": 0.7579508982535117, + "grad_norm": 103.5, + "learning_rate": 1.4593388629456806e-05, + "loss": 9.4378, + "step": 18184 + }, + { + "epoch": 0.75799258055104, + "grad_norm": 162.0, + "learning_rate": 1.4588622901546017e-05, + "loss": 10.1257, + "step": 18185 + }, + { + "epoch": 0.7580342628485682, + "grad_norm": 430.0, + "learning_rate": 1.458385781901072e-05, + "loss": 16.0002, + "step": 18186 + }, + { + "epoch": 0.7580759451460964, + "grad_norm": 155.0, + "learning_rate": 1.4579093381937759e-05, + "loss": 7.719, + "step": 18187 + }, + { + "epoch": 0.7581176274436247, + "grad_norm": 306.0, + "learning_rate": 1.4574329590413965e-05, + "loss": 13.3163, + "step": 18188 + }, + { + "epoch": 0.758159309741153, + "grad_norm": 181.0, + "learning_rate": 1.456956644452616e-05, + "loss": 10.188, + "step": 18189 + }, + { + "epoch": 0.7582009920386812, + "grad_norm": 592.0, + "learning_rate": 1.4564803944361132e-05, + "loss": 18.3753, + "step": 18190 + }, + { + "epoch": 0.7582426743362094, + "grad_norm": 300.0, + "learning_rate": 1.4560042090005732e-05, + "loss": 14.1879, + "step": 18191 + }, + { + "epoch": 0.7582843566337376, + "grad_norm": 108.5, + "learning_rate": 1.4555280881546685e-05, + "loss": 9.1879, + "step": 18192 + }, + { + "epoch": 0.7583260389312659, + "grad_norm": 600.0, + "learning_rate": 1.4550520319070815e-05, + "loss": 19.8752, + "step": 18193 + }, + { + "epoch": 0.7583677212287941, + "grad_norm": 206.0, + "learning_rate": 1.4545760402664826e-05, + "loss": 10.2503, + "step": 18194 + }, + { + "epoch": 0.7584094035263224, + "grad_norm": 226.0, + "learning_rate": 1.4541001132415539e-05, + "loss": 12.0004, + "step": 18195 + }, + { + "epoch": 0.7584510858238506, + "grad_norm": 173.0, + "learning_rate": 1.453624250840962e-05, + "loss": 11.0002, + "step": 18196 + }, + { + "epoch": 0.7584927681213789, + "grad_norm": 125.0, + "learning_rate": 1.4531484530733863e-05, + "loss": 9.8753, + "step": 18197 + }, + { + "epoch": 0.7585344504189071, + "grad_norm": 474.0, + "learning_rate": 1.4526727199474916e-05, + "loss": 16.6252, + "step": 18198 + }, + { + "epoch": 0.7585761327164353, + "grad_norm": 482.0, + "learning_rate": 1.452197051471954e-05, + "loss": 18.1256, + "step": 18199 + }, + { + "epoch": 0.7586178150139635, + "grad_norm": 342.0, + "learning_rate": 1.4517214476554376e-05, + "loss": 12.7503, + "step": 18200 + }, + { + "epoch": 0.7586594973114918, + "grad_norm": 212.0, + "learning_rate": 1.4512459085066143e-05, + "loss": 11.5627, + "step": 18201 + }, + { + "epoch": 0.7587011796090201, + "grad_norm": 1048.0, + "learning_rate": 1.4507704340341493e-05, + "loss": 27.7536, + "step": 18202 + }, + { + "epoch": 0.7587428619065483, + "grad_norm": 254.0, + "learning_rate": 1.4502950242467084e-05, + "loss": 12.6252, + "step": 18203 + }, + { + "epoch": 0.7587845442040765, + "grad_norm": 374.0, + "learning_rate": 1.4498196791529555e-05, + "loss": 15.8129, + "step": 18204 + }, + { + "epoch": 0.7588262265016048, + "grad_norm": 213.0, + "learning_rate": 1.4493443987615546e-05, + "loss": 10.8126, + "step": 18205 + }, + { + "epoch": 0.758867908799133, + "grad_norm": 276.0, + "learning_rate": 1.4488691830811668e-05, + "loss": 13.5009, + "step": 18206 + }, + { + "epoch": 0.7589095910966612, + "grad_norm": 194.0, + "learning_rate": 1.4483940321204531e-05, + "loss": 9.3753, + "step": 18207 + }, + { + "epoch": 0.7589512733941894, + "grad_norm": 326.0, + "learning_rate": 1.4479189458880743e-05, + "loss": 13.8754, + "step": 18208 + }, + { + "epoch": 0.7589929556917178, + "grad_norm": 210.0, + "learning_rate": 1.4474439243926874e-05, + "loss": 11.8132, + "step": 18209 + }, + { + "epoch": 0.759034637989246, + "grad_norm": 236.0, + "learning_rate": 1.4469689676429505e-05, + "loss": 13.1878, + "step": 18210 + }, + { + "epoch": 0.7590763202867742, + "grad_norm": 88.5, + "learning_rate": 1.4464940756475193e-05, + "loss": 7.5349, + "step": 18211 + }, + { + "epoch": 0.7591180025843024, + "grad_norm": 342.0, + "learning_rate": 1.4460192484150498e-05, + "loss": 14.3128, + "step": 18212 + }, + { + "epoch": 0.7591596848818307, + "grad_norm": 207.0, + "learning_rate": 1.4455444859541944e-05, + "loss": 9.3753, + "step": 18213 + }, + { + "epoch": 0.7592013671793589, + "grad_norm": 224.0, + "learning_rate": 1.4450697882736064e-05, + "loss": 11.9383, + "step": 18214 + }, + { + "epoch": 0.7592430494768871, + "grad_norm": 696.0, + "learning_rate": 1.444595155381937e-05, + "loss": 20.7504, + "step": 18215 + }, + { + "epoch": 0.7592847317744154, + "grad_norm": 696.0, + "learning_rate": 1.4441205872878349e-05, + "loss": 21.7507, + "step": 18216 + }, + { + "epoch": 0.7593264140719437, + "grad_norm": 206.0, + "learning_rate": 1.4436460839999538e-05, + "loss": 11.8756, + "step": 18217 + }, + { + "epoch": 0.7593680963694719, + "grad_norm": 474.0, + "learning_rate": 1.4431716455269357e-05, + "loss": 15.5627, + "step": 18218 + }, + { + "epoch": 0.7594097786670001, + "grad_norm": 248.0, + "learning_rate": 1.4426972718774333e-05, + "loss": 12.1252, + "step": 18219 + }, + { + "epoch": 0.7594514609645283, + "grad_norm": 434.0, + "learning_rate": 1.4422229630600859e-05, + "loss": 15.8129, + "step": 18220 + }, + { + "epoch": 0.7594931432620566, + "grad_norm": 232.0, + "learning_rate": 1.4417487190835432e-05, + "loss": 12.0001, + "step": 18221 + }, + { + "epoch": 0.7595348255595848, + "grad_norm": 394.0, + "learning_rate": 1.4412745399564437e-05, + "loss": 15.251, + "step": 18222 + }, + { + "epoch": 0.7595765078571131, + "grad_norm": 356.0, + "learning_rate": 1.4408004256874341e-05, + "loss": 14.1254, + "step": 18223 + }, + { + "epoch": 0.7596181901546413, + "grad_norm": 496.0, + "learning_rate": 1.44032637628515e-05, + "loss": 16.1254, + "step": 18224 + }, + { + "epoch": 0.7596598724521696, + "grad_norm": 63.5, + "learning_rate": 1.4398523917582351e-05, + "loss": 8.6252, + "step": 18225 + }, + { + "epoch": 0.7597015547496978, + "grad_norm": 728.0, + "learning_rate": 1.4393784721153264e-05, + "loss": 18.2591, + "step": 18226 + }, + { + "epoch": 0.759743237047226, + "grad_norm": 201.0, + "learning_rate": 1.438904617365061e-05, + "loss": 10.8132, + "step": 18227 + }, + { + "epoch": 0.7597849193447542, + "grad_norm": 1144.0, + "learning_rate": 1.4384308275160751e-05, + "loss": 25.5003, + "step": 18228 + }, + { + "epoch": 0.7598266016422826, + "grad_norm": 768.0, + "learning_rate": 1.4379571025770038e-05, + "loss": 22.0003, + "step": 18229 + }, + { + "epoch": 0.7598682839398108, + "grad_norm": 234.0, + "learning_rate": 1.4374834425564804e-05, + "loss": 11.3127, + "step": 18230 + }, + { + "epoch": 0.759909966237339, + "grad_norm": 292.0, + "learning_rate": 1.4370098474631378e-05, + "loss": 11.5632, + "step": 18231 + }, + { + "epoch": 0.7599516485348673, + "grad_norm": 294.0, + "learning_rate": 1.4365363173056068e-05, + "loss": 13.9382, + "step": 18232 + }, + { + "epoch": 0.7599933308323955, + "grad_norm": 468.0, + "learning_rate": 1.4360628520925174e-05, + "loss": 16.2509, + "step": 18233 + }, + { + "epoch": 0.7600350131299237, + "grad_norm": 366.0, + "learning_rate": 1.4355894518324991e-05, + "loss": 13.2504, + "step": 18234 + }, + { + "epoch": 0.7600766954274519, + "grad_norm": 318.0, + "learning_rate": 1.4351161165341798e-05, + "loss": 13.3142, + "step": 18235 + }, + { + "epoch": 0.7601183777249803, + "grad_norm": 688.0, + "learning_rate": 1.434642846206185e-05, + "loss": 21.5031, + "step": 18236 + }, + { + "epoch": 0.7601600600225085, + "grad_norm": 688.0, + "learning_rate": 1.4341696408571415e-05, + "loss": 20.1251, + "step": 18237 + }, + { + "epoch": 0.7602017423200367, + "grad_norm": 380.0, + "learning_rate": 1.4336965004956726e-05, + "loss": 14.8753, + "step": 18238 + }, + { + "epoch": 0.7602434246175649, + "grad_norm": 57.0, + "learning_rate": 1.4332234251304016e-05, + "loss": 7.7503, + "step": 18239 + }, + { + "epoch": 0.7602851069150932, + "grad_norm": 298.0, + "learning_rate": 1.4327504147699506e-05, + "loss": 12.2504, + "step": 18240 + }, + { + "epoch": 0.7603267892126214, + "grad_norm": 370.0, + "learning_rate": 1.4322774694229396e-05, + "loss": 15.0629, + "step": 18241 + }, + { + "epoch": 0.7603684715101496, + "grad_norm": 696.0, + "learning_rate": 1.431804589097987e-05, + "loss": 25.0004, + "step": 18242 + }, + { + "epoch": 0.7604101538076778, + "grad_norm": 53.75, + "learning_rate": 1.4313317738037158e-05, + "loss": 7.8753, + "step": 18243 + }, + { + "epoch": 0.7604518361052062, + "grad_norm": 520.0, + "learning_rate": 1.4308590235487363e-05, + "loss": 17.8753, + "step": 18244 + }, + { + "epoch": 0.7604935184027344, + "grad_norm": 196.0, + "learning_rate": 1.4303863383416715e-05, + "loss": 11.0628, + "step": 18245 + }, + { + "epoch": 0.7605352007002626, + "grad_norm": 338.0, + "learning_rate": 1.4299137181911292e-05, + "loss": 14.3752, + "step": 18246 + }, + { + "epoch": 0.7605768829977908, + "grad_norm": 258.0, + "learning_rate": 1.4294411631057291e-05, + "loss": 12.3757, + "step": 18247 + }, + { + "epoch": 0.7606185652953191, + "grad_norm": 692.0, + "learning_rate": 1.4289686730940783e-05, + "loss": 21.3751, + "step": 18248 + }, + { + "epoch": 0.7606602475928473, + "grad_norm": 145.0, + "learning_rate": 1.4284962481647934e-05, + "loss": 9.8132, + "step": 18249 + }, + { + "epoch": 0.7607019298903755, + "grad_norm": 502.0, + "learning_rate": 1.428023888326478e-05, + "loss": 17.1253, + "step": 18250 + }, + { + "epoch": 0.7607436121879038, + "grad_norm": 57.5, + "learning_rate": 1.4275515935877459e-05, + "loss": 8.1878, + "step": 18251 + }, + { + "epoch": 0.7607852944854321, + "grad_norm": 560.0, + "learning_rate": 1.4270793639572034e-05, + "loss": 17.8755, + "step": 18252 + }, + { + "epoch": 0.7608269767829603, + "grad_norm": 340.0, + "learning_rate": 1.4266071994434566e-05, + "loss": 15.0002, + "step": 18253 + }, + { + "epoch": 0.7608686590804885, + "grad_norm": 234.0, + "learning_rate": 1.426135100055111e-05, + "loss": 8.9378, + "step": 18254 + }, + { + "epoch": 0.7609103413780167, + "grad_norm": 239.0, + "learning_rate": 1.4256630658007703e-05, + "loss": 12.7502, + "step": 18255 + }, + { + "epoch": 0.760952023675545, + "grad_norm": 220.0, + "learning_rate": 1.4251910966890375e-05, + "loss": 11.6879, + "step": 18256 + }, + { + "epoch": 0.7609937059730733, + "grad_norm": 143.0, + "learning_rate": 1.4247191927285142e-05, + "loss": 9.3752, + "step": 18257 + }, + { + "epoch": 0.7610353882706015, + "grad_norm": 93.5, + "learning_rate": 1.4242473539278013e-05, + "loss": 8.7503, + "step": 18258 + }, + { + "epoch": 0.7610770705681297, + "grad_norm": 376.0, + "learning_rate": 1.4237755802954972e-05, + "loss": 13.2504, + "step": 18259 + }, + { + "epoch": 0.761118752865658, + "grad_norm": 350.0, + "learning_rate": 1.4233038718402009e-05, + "loss": 13.6252, + "step": 18260 + }, + { + "epoch": 0.7611604351631862, + "grad_norm": 196.0, + "learning_rate": 1.4228322285705093e-05, + "loss": 10.8127, + "step": 18261 + }, + { + "epoch": 0.7612021174607144, + "grad_norm": 640.0, + "learning_rate": 1.4223606504950177e-05, + "loss": 21.0001, + "step": 18262 + }, + { + "epoch": 0.7612437997582426, + "grad_norm": 306.0, + "learning_rate": 1.421889137622321e-05, + "loss": 12.2503, + "step": 18263 + }, + { + "epoch": 0.761285482055771, + "grad_norm": 544.0, + "learning_rate": 1.4214176899610122e-05, + "loss": 17.1251, + "step": 18264 + }, + { + "epoch": 0.7613271643532992, + "grad_norm": 210.0, + "learning_rate": 1.4209463075196838e-05, + "loss": 11.0005, + "step": 18265 + }, + { + "epoch": 0.7613688466508274, + "grad_norm": 736.0, + "learning_rate": 1.4204749903069253e-05, + "loss": 20.5004, + "step": 18266 + }, + { + "epoch": 0.7614105289483556, + "grad_norm": 492.0, + "learning_rate": 1.4200037383313308e-05, + "loss": 16.8752, + "step": 18267 + }, + { + "epoch": 0.7614522112458839, + "grad_norm": 288.0, + "learning_rate": 1.4195325516014829e-05, + "loss": 12.0006, + "step": 18268 + }, + { + "epoch": 0.7614938935434121, + "grad_norm": 231.0, + "learning_rate": 1.4190614301259747e-05, + "loss": 12.7502, + "step": 18269 + }, + { + "epoch": 0.7615355758409403, + "grad_norm": 328.0, + "learning_rate": 1.4185903739133871e-05, + "loss": 13.8755, + "step": 18270 + }, + { + "epoch": 0.7615772581384685, + "grad_norm": 316.0, + "learning_rate": 1.418119382972311e-05, + "loss": 15.1885, + "step": 18271 + }, + { + "epoch": 0.7616189404359969, + "grad_norm": 211.0, + "learning_rate": 1.4176484573113241e-05, + "loss": 11.1876, + "step": 18272 + }, + { + "epoch": 0.7616606227335251, + "grad_norm": 600.0, + "learning_rate": 1.4171775969390155e-05, + "loss": 20.3796, + "step": 18273 + }, + { + "epoch": 0.7617023050310533, + "grad_norm": 454.0, + "learning_rate": 1.4167068018639595e-05, + "loss": 15.9377, + "step": 18274 + }, + { + "epoch": 0.7617439873285815, + "grad_norm": 462.0, + "learning_rate": 1.4162360720947437e-05, + "loss": 14.9385, + "step": 18275 + }, + { + "epoch": 0.7617856696261098, + "grad_norm": 217.0, + "learning_rate": 1.4157654076399401e-05, + "loss": 12.0006, + "step": 18276 + }, + { + "epoch": 0.761827351923638, + "grad_norm": 256.0, + "learning_rate": 1.415294808508132e-05, + "loss": 10.815, + "step": 18277 + }, + { + "epoch": 0.7618690342211663, + "grad_norm": 262.0, + "learning_rate": 1.4148242747078944e-05, + "loss": 11.1259, + "step": 18278 + }, + { + "epoch": 0.7619107165186945, + "grad_norm": 548.0, + "learning_rate": 1.4143538062478023e-05, + "loss": 18.3752, + "step": 18279 + }, + { + "epoch": 0.7619523988162228, + "grad_norm": 312.0, + "learning_rate": 1.4138834031364306e-05, + "loss": 14.063, + "step": 18280 + }, + { + "epoch": 0.761994081113751, + "grad_norm": 298.0, + "learning_rate": 1.4134130653823519e-05, + "loss": 13.3141, + "step": 18281 + }, + { + "epoch": 0.7620357634112792, + "grad_norm": 480.0, + "learning_rate": 1.4129427929941385e-05, + "loss": 17.7502, + "step": 18282 + }, + { + "epoch": 0.7620774457088074, + "grad_norm": 346.0, + "learning_rate": 1.4124725859803618e-05, + "loss": 13.8755, + "step": 18283 + }, + { + "epoch": 0.7621191280063357, + "grad_norm": 688.0, + "learning_rate": 1.41200244434959e-05, + "loss": 19.501, + "step": 18284 + }, + { + "epoch": 0.762160810303864, + "grad_norm": 224.0, + "learning_rate": 1.4115323681103926e-05, + "loss": 12.7503, + "step": 18285 + }, + { + "epoch": 0.7622024926013922, + "grad_norm": 268.0, + "learning_rate": 1.4110623572713361e-05, + "loss": 13.2504, + "step": 18286 + }, + { + "epoch": 0.7622441748989204, + "grad_norm": 488.0, + "learning_rate": 1.4105924118409864e-05, + "loss": 16.3775, + "step": 18287 + }, + { + "epoch": 0.7622858571964487, + "grad_norm": 157.0, + "learning_rate": 1.4101225318279093e-05, + "loss": 9.7503, + "step": 18288 + }, + { + "epoch": 0.7623275394939769, + "grad_norm": 136.0, + "learning_rate": 1.4096527172406675e-05, + "loss": 9.9377, + "step": 18289 + }, + { + "epoch": 0.7623692217915051, + "grad_norm": 338.0, + "learning_rate": 1.4091829680878233e-05, + "loss": 14.5003, + "step": 18290 + }, + { + "epoch": 0.7624109040890333, + "grad_norm": 177.0, + "learning_rate": 1.4087132843779389e-05, + "loss": 11.1252, + "step": 18291 + }, + { + "epoch": 0.7624525863865617, + "grad_norm": 416.0, + "learning_rate": 1.4082436661195719e-05, + "loss": 18.3764, + "step": 18292 + }, + { + "epoch": 0.7624942686840899, + "grad_norm": 290.0, + "learning_rate": 1.4077741133212858e-05, + "loss": 11.5627, + "step": 18293 + }, + { + "epoch": 0.7625359509816181, + "grad_norm": 346.0, + "learning_rate": 1.4073046259916322e-05, + "loss": 14.0007, + "step": 18294 + }, + { + "epoch": 0.7625776332791463, + "grad_norm": 221.0, + "learning_rate": 1.4068352041391742e-05, + "loss": 12.1259, + "step": 18295 + }, + { + "epoch": 0.7626193155766746, + "grad_norm": 398.0, + "learning_rate": 1.4063658477724606e-05, + "loss": 14.8752, + "step": 18296 + }, + { + "epoch": 0.7626609978742028, + "grad_norm": 101.0, + "learning_rate": 1.405896556900051e-05, + "loss": 10.1253, + "step": 18297 + }, + { + "epoch": 0.762702680171731, + "grad_norm": 636.0, + "learning_rate": 1.4054273315304928e-05, + "loss": 22.8752, + "step": 18298 + }, + { + "epoch": 0.7627443624692593, + "grad_norm": 188.0, + "learning_rate": 1.4049581716723437e-05, + "loss": 11.0627, + "step": 18299 + }, + { + "epoch": 0.7627860447667876, + "grad_norm": 308.0, + "learning_rate": 1.4044890773341485e-05, + "loss": 11.8127, + "step": 18300 + }, + { + "epoch": 0.7628277270643158, + "grad_norm": 496.0, + "learning_rate": 1.4040200485244598e-05, + "loss": 16.6252, + "step": 18301 + }, + { + "epoch": 0.762869409361844, + "grad_norm": 292.0, + "learning_rate": 1.4035510852518252e-05, + "loss": 12.1251, + "step": 18302 + }, + { + "epoch": 0.7629110916593722, + "grad_norm": 152.0, + "learning_rate": 1.4030821875247918e-05, + "loss": 10.2502, + "step": 18303 + }, + { + "epoch": 0.7629527739569005, + "grad_norm": 264.0, + "learning_rate": 1.4026133553519044e-05, + "loss": 7.6258, + "step": 18304 + }, + { + "epoch": 0.7629944562544287, + "grad_norm": 490.0, + "learning_rate": 1.4021445887417078e-05, + "loss": 15.8776, + "step": 18305 + }, + { + "epoch": 0.763036138551957, + "grad_norm": 428.0, + "learning_rate": 1.4016758877027457e-05, + "loss": 15.9379, + "step": 18306 + }, + { + "epoch": 0.7630778208494853, + "grad_norm": 592.0, + "learning_rate": 1.4012072522435598e-05, + "loss": 18.001, + "step": 18307 + }, + { + "epoch": 0.7631195031470135, + "grad_norm": 382.0, + "learning_rate": 1.4007386823726914e-05, + "loss": 14.9378, + "step": 18308 + }, + { + "epoch": 0.7631611854445417, + "grad_norm": 119.5, + "learning_rate": 1.4002701780986798e-05, + "loss": 6.6902, + "step": 18309 + }, + { + "epoch": 0.7632028677420699, + "grad_norm": 498.0, + "learning_rate": 1.3998017394300644e-05, + "loss": 18.5011, + "step": 18310 + }, + { + "epoch": 0.7632445500395982, + "grad_norm": 101.5, + "learning_rate": 1.399333366375381e-05, + "loss": 9.501, + "step": 18311 + }, + { + "epoch": 0.7632862323371264, + "grad_norm": 196.0, + "learning_rate": 1.3988650589431673e-05, + "loss": 10.4381, + "step": 18312 + }, + { + "epoch": 0.7633279146346547, + "grad_norm": 322.0, + "learning_rate": 1.3983968171419576e-05, + "loss": 13.1879, + "step": 18313 + }, + { + "epoch": 0.7633695969321829, + "grad_norm": 282.0, + "learning_rate": 1.397928640980285e-05, + "loss": 12.1252, + "step": 18314 + }, + { + "epoch": 0.7634112792297112, + "grad_norm": 217.0, + "learning_rate": 1.3974605304666833e-05, + "loss": 10.4376, + "step": 18315 + }, + { + "epoch": 0.7634529615272394, + "grad_norm": 68.5, + "learning_rate": 1.396992485609681e-05, + "loss": 8.7505, + "step": 18316 + }, + { + "epoch": 0.7634946438247676, + "grad_norm": 1400.0, + "learning_rate": 1.3965245064178146e-05, + "loss": 29.5042, + "step": 18317 + }, + { + "epoch": 0.7635363261222958, + "grad_norm": 516.0, + "learning_rate": 1.396056592899605e-05, + "loss": 16.6256, + "step": 18318 + }, + { + "epoch": 0.7635780084198242, + "grad_norm": 302.0, + "learning_rate": 1.3955887450635874e-05, + "loss": 14.8753, + "step": 18319 + }, + { + "epoch": 0.7636196907173524, + "grad_norm": 135.0, + "learning_rate": 1.3951209629182815e-05, + "loss": 9.7507, + "step": 18320 + }, + { + "epoch": 0.7636613730148806, + "grad_norm": 241.0, + "learning_rate": 1.3946532464722196e-05, + "loss": 8.1877, + "step": 18321 + }, + { + "epoch": 0.7637030553124088, + "grad_norm": 254.0, + "learning_rate": 1.394185595733919e-05, + "loss": 12.313, + "step": 18322 + }, + { + "epoch": 0.7637447376099371, + "grad_norm": 752.0, + "learning_rate": 1.3937180107119091e-05, + "loss": 22.7502, + "step": 18323 + }, + { + "epoch": 0.7637864199074653, + "grad_norm": 656.0, + "learning_rate": 1.393250491414706e-05, + "loss": 19.2503, + "step": 18324 + }, + { + "epoch": 0.7638281022049935, + "grad_norm": 193.0, + "learning_rate": 1.3927830378508356e-05, + "loss": 11.5008, + "step": 18325 + }, + { + "epoch": 0.7638697845025217, + "grad_norm": 138.0, + "learning_rate": 1.392315650028811e-05, + "loss": 8.4378, + "step": 18326 + }, + { + "epoch": 0.7639114668000501, + "grad_norm": 330.0, + "learning_rate": 1.391848327957156e-05, + "loss": 15.0006, + "step": 18327 + }, + { + "epoch": 0.7639531490975783, + "grad_norm": 181.0, + "learning_rate": 1.3913810716443853e-05, + "loss": 10.6881, + "step": 18328 + }, + { + "epoch": 0.7639948313951065, + "grad_norm": 67.0, + "learning_rate": 1.3909138810990151e-05, + "loss": 6.1565, + "step": 18329 + }, + { + "epoch": 0.7640365136926347, + "grad_norm": 340.0, + "learning_rate": 1.3904467563295597e-05, + "loss": 13.7501, + "step": 18330 + }, + { + "epoch": 0.764078195990163, + "grad_norm": 222.0, + "learning_rate": 1.3899796973445328e-05, + "loss": 12.1259, + "step": 18331 + }, + { + "epoch": 0.7641198782876912, + "grad_norm": 660.0, + "learning_rate": 1.3895127041524458e-05, + "loss": 20.2503, + "step": 18332 + }, + { + "epoch": 0.7641615605852194, + "grad_norm": 556.0, + "learning_rate": 1.3890457767618109e-05, + "loss": 17.5002, + "step": 18333 + }, + { + "epoch": 0.7642032428827477, + "grad_norm": 217.0, + "learning_rate": 1.3885789151811369e-05, + "loss": 12.8756, + "step": 18334 + }, + { + "epoch": 0.764244925180276, + "grad_norm": 544.0, + "learning_rate": 1.3881121194189328e-05, + "loss": 19.2503, + "step": 18335 + }, + { + "epoch": 0.7642866074778042, + "grad_norm": 828.0, + "learning_rate": 1.3876453894837066e-05, + "loss": 23.1252, + "step": 18336 + }, + { + "epoch": 0.7643282897753324, + "grad_norm": 388.0, + "learning_rate": 1.387178725383963e-05, + "loss": 16.5027, + "step": 18337 + }, + { + "epoch": 0.7643699720728606, + "grad_norm": 354.0, + "learning_rate": 1.3867121271282085e-05, + "loss": 14.5006, + "step": 18338 + }, + { + "epoch": 0.7644116543703889, + "grad_norm": 788.0, + "learning_rate": 1.3862455947249459e-05, + "loss": 19.2537, + "step": 18339 + }, + { + "epoch": 0.7644533366679171, + "grad_norm": 127.5, + "learning_rate": 1.3857791281826783e-05, + "loss": 9.1881, + "step": 18340 + }, + { + "epoch": 0.7644950189654454, + "grad_norm": 146.0, + "learning_rate": 1.3853127275099064e-05, + "loss": 10.1252, + "step": 18341 + }, + { + "epoch": 0.7645367012629736, + "grad_norm": 350.0, + "learning_rate": 1.38484639271513e-05, + "loss": 13.5628, + "step": 18342 + }, + { + "epoch": 0.7645783835605019, + "grad_norm": 260.0, + "learning_rate": 1.3843801238068516e-05, + "loss": 10.0005, + "step": 18343 + }, + { + "epoch": 0.7646200658580301, + "grad_norm": 132.0, + "learning_rate": 1.3839139207935636e-05, + "loss": 10.5627, + "step": 18344 + }, + { + "epoch": 0.7646617481555583, + "grad_norm": 239.0, + "learning_rate": 1.383447783683769e-05, + "loss": 12.8143, + "step": 18345 + }, + { + "epoch": 0.7647034304530865, + "grad_norm": 362.0, + "learning_rate": 1.3829817124859557e-05, + "loss": 14.5631, + "step": 18346 + }, + { + "epoch": 0.7647451127506149, + "grad_norm": 119.5, + "learning_rate": 1.3825157072086253e-05, + "loss": 9.8753, + "step": 18347 + }, + { + "epoch": 0.7647867950481431, + "grad_norm": 209.0, + "learning_rate": 1.3820497678602639e-05, + "loss": 9.6877, + "step": 18348 + }, + { + "epoch": 0.7648284773456713, + "grad_norm": 188.0, + "learning_rate": 1.3815838944493698e-05, + "loss": 10.2507, + "step": 18349 + }, + { + "epoch": 0.7648701596431995, + "grad_norm": 195.0, + "learning_rate": 1.3811180869844276e-05, + "loss": 11.0006, + "step": 18350 + }, + { + "epoch": 0.7649118419407278, + "grad_norm": 236.0, + "learning_rate": 1.3806523454739311e-05, + "loss": 11.0627, + "step": 18351 + }, + { + "epoch": 0.764953524238256, + "grad_norm": 964.0, + "learning_rate": 1.3801866699263666e-05, + "loss": 26.0002, + "step": 18352 + }, + { + "epoch": 0.7649952065357842, + "grad_norm": 498.0, + "learning_rate": 1.3797210603502215e-05, + "loss": 17.5009, + "step": 18353 + }, + { + "epoch": 0.7650368888333124, + "grad_norm": 204.0, + "learning_rate": 1.3792555167539816e-05, + "loss": 11.1253, + "step": 18354 + }, + { + "epoch": 0.7650785711308408, + "grad_norm": 252.0, + "learning_rate": 1.3787900391461312e-05, + "loss": 12.1882, + "step": 18355 + }, + { + "epoch": 0.765120253428369, + "grad_norm": 270.0, + "learning_rate": 1.3783246275351535e-05, + "loss": 13.6253, + "step": 18356 + }, + { + "epoch": 0.7651619357258972, + "grad_norm": 147.0, + "learning_rate": 1.3778592819295315e-05, + "loss": 9.9379, + "step": 18357 + }, + { + "epoch": 0.7652036180234254, + "grad_norm": 245.0, + "learning_rate": 1.3773940023377452e-05, + "loss": 12.1876, + "step": 18358 + }, + { + "epoch": 0.7652453003209537, + "grad_norm": 172.0, + "learning_rate": 1.3769287887682753e-05, + "loss": 11.7511, + "step": 18359 + }, + { + "epoch": 0.7652869826184819, + "grad_norm": 181.0, + "learning_rate": 1.3764636412295995e-05, + "loss": 10.9376, + "step": 18360 + }, + { + "epoch": 0.7653286649160101, + "grad_norm": 408.0, + "learning_rate": 1.3759985597301955e-05, + "loss": 15.3754, + "step": 18361 + }, + { + "epoch": 0.7653703472135384, + "grad_norm": 298.0, + "learning_rate": 1.3755335442785393e-05, + "loss": 13.2507, + "step": 18362 + }, + { + "epoch": 0.7654120295110667, + "grad_norm": 237.0, + "learning_rate": 1.3750685948831065e-05, + "loss": 11.9377, + "step": 18363 + }, + { + "epoch": 0.7654537118085949, + "grad_norm": 272.0, + "learning_rate": 1.3746037115523696e-05, + "loss": 12.938, + "step": 18364 + }, + { + "epoch": 0.7654953941061231, + "grad_norm": 378.0, + "learning_rate": 1.3741388942948025e-05, + "loss": 14.3127, + "step": 18365 + }, + { + "epoch": 0.7655370764036513, + "grad_norm": 616.0, + "learning_rate": 1.3736741431188743e-05, + "loss": 18.8769, + "step": 18366 + }, + { + "epoch": 0.7655787587011796, + "grad_norm": 422.0, + "learning_rate": 1.37320945803306e-05, + "loss": 14.7502, + "step": 18367 + }, + { + "epoch": 0.7656204409987079, + "grad_norm": 378.0, + "learning_rate": 1.3727448390458219e-05, + "loss": 16.2503, + "step": 18368 + }, + { + "epoch": 0.7656621232962361, + "grad_norm": 218.0, + "learning_rate": 1.3722802861656342e-05, + "loss": 10.938, + "step": 18369 + }, + { + "epoch": 0.7657038055937643, + "grad_norm": 306.0, + "learning_rate": 1.3718157994009573e-05, + "loss": 13.4377, + "step": 18370 + }, + { + "epoch": 0.7657454878912926, + "grad_norm": 286.0, + "learning_rate": 1.3713513787602628e-05, + "loss": 13.6878, + "step": 18371 + }, + { + "epoch": 0.7657871701888208, + "grad_norm": 472.0, + "learning_rate": 1.3708870242520083e-05, + "loss": 15.0007, + "step": 18372 + }, + { + "epoch": 0.765828852486349, + "grad_norm": 282.0, + "learning_rate": 1.3704227358846634e-05, + "loss": 10.2502, + "step": 18373 + }, + { + "epoch": 0.7658705347838772, + "grad_norm": 242.0, + "learning_rate": 1.3699585136666825e-05, + "loss": 13.1876, + "step": 18374 + }, + { + "epoch": 0.7659122170814056, + "grad_norm": 322.0, + "learning_rate": 1.3694943576065338e-05, + "loss": 14.1254, + "step": 18375 + }, + { + "epoch": 0.7659538993789338, + "grad_norm": 229.0, + "learning_rate": 1.3690302677126694e-05, + "loss": 11.3128, + "step": 18376 + }, + { + "epoch": 0.765995581676462, + "grad_norm": 131.0, + "learning_rate": 1.368566243993552e-05, + "loss": 9.5629, + "step": 18377 + }, + { + "epoch": 0.7660372639739903, + "grad_norm": 286.0, + "learning_rate": 1.368102286457637e-05, + "loss": 13.1251, + "step": 18378 + }, + { + "epoch": 0.7660789462715185, + "grad_norm": 444.0, + "learning_rate": 1.3676383951133808e-05, + "loss": 16.5002, + "step": 18379 + }, + { + "epoch": 0.7661206285690467, + "grad_norm": 376.0, + "learning_rate": 1.3671745699692367e-05, + "loss": 15.4378, + "step": 18380 + }, + { + "epoch": 0.7661623108665749, + "grad_norm": 235.0, + "learning_rate": 1.366710811033659e-05, + "loss": 10.5627, + "step": 18381 + }, + { + "epoch": 0.7662039931641033, + "grad_norm": 592.0, + "learning_rate": 1.3662471183150994e-05, + "loss": 18.8753, + "step": 18382 + }, + { + "epoch": 0.7662456754616315, + "grad_norm": 442.0, + "learning_rate": 1.3657834918220086e-05, + "loss": 14.1876, + "step": 18383 + }, + { + "epoch": 0.7662873577591597, + "grad_norm": 356.0, + "learning_rate": 1.3653199315628361e-05, + "loss": 13.3131, + "step": 18384 + }, + { + "epoch": 0.7663290400566879, + "grad_norm": 157.0, + "learning_rate": 1.3648564375460305e-05, + "loss": 8.3758, + "step": 18385 + }, + { + "epoch": 0.7663707223542162, + "grad_norm": 1032.0, + "learning_rate": 1.364393009780039e-05, + "loss": 24.7522, + "step": 18386 + }, + { + "epoch": 0.7664124046517444, + "grad_norm": 214.0, + "learning_rate": 1.3639296482733078e-05, + "loss": 11.6886, + "step": 18387 + }, + { + "epoch": 0.7664540869492726, + "grad_norm": 432.0, + "learning_rate": 1.3634663530342811e-05, + "loss": 15.5002, + "step": 18388 + }, + { + "epoch": 0.7664957692468009, + "grad_norm": 175.0, + "learning_rate": 1.3630031240714036e-05, + "loss": 11.251, + "step": 18389 + }, + { + "epoch": 0.7665374515443292, + "grad_norm": 366.0, + "learning_rate": 1.3625399613931166e-05, + "loss": 15.6256, + "step": 18390 + }, + { + "epoch": 0.7665791338418574, + "grad_norm": 424.0, + "learning_rate": 1.362076865007862e-05, + "loss": 15.7521, + "step": 18391 + }, + { + "epoch": 0.7666208161393856, + "grad_norm": 1256.0, + "learning_rate": 1.3616138349240775e-05, + "loss": 25.005, + "step": 18392 + }, + { + "epoch": 0.7666624984369138, + "grad_norm": 103.0, + "learning_rate": 1.3611508711502074e-05, + "loss": 9.6877, + "step": 18393 + }, + { + "epoch": 0.7667041807344421, + "grad_norm": 478.0, + "learning_rate": 1.3606879736946826e-05, + "loss": 17.3754, + "step": 18394 + }, + { + "epoch": 0.7667458630319703, + "grad_norm": 1608.0, + "learning_rate": 1.3602251425659451e-05, + "loss": 41.2502, + "step": 18395 + }, + { + "epoch": 0.7667875453294986, + "grad_norm": 544.0, + "learning_rate": 1.359762377772425e-05, + "loss": 18.3751, + "step": 18396 + }, + { + "epoch": 0.7668292276270268, + "grad_norm": 564.0, + "learning_rate": 1.3592996793225616e-05, + "loss": 21.1252, + "step": 18397 + }, + { + "epoch": 0.7668709099245551, + "grad_norm": 172.0, + "learning_rate": 1.3588370472247814e-05, + "loss": 11.8128, + "step": 18398 + }, + { + "epoch": 0.7669125922220833, + "grad_norm": 148.0, + "learning_rate": 1.3583744814875227e-05, + "loss": 9.5626, + "step": 18399 + }, + { + "epoch": 0.7669542745196115, + "grad_norm": 424.0, + "learning_rate": 1.3579119821192093e-05, + "loss": 14.4377, + "step": 18400 + }, + { + "epoch": 0.7669959568171397, + "grad_norm": 556.0, + "learning_rate": 1.3574495491282763e-05, + "loss": 19.7502, + "step": 18401 + }, + { + "epoch": 0.767037639114668, + "grad_norm": 528.0, + "learning_rate": 1.356987182523145e-05, + "loss": 17.7501, + "step": 18402 + }, + { + "epoch": 0.7670793214121963, + "grad_norm": 308.0, + "learning_rate": 1.356524882312248e-05, + "loss": 14.688, + "step": 18403 + }, + { + "epoch": 0.7671210037097245, + "grad_norm": 420.0, + "learning_rate": 1.3560626485040085e-05, + "loss": 15.7506, + "step": 18404 + }, + { + "epoch": 0.7671626860072527, + "grad_norm": 482.0, + "learning_rate": 1.35560048110685e-05, + "loss": 18.2513, + "step": 18405 + }, + { + "epoch": 0.767204368304781, + "grad_norm": 125.5, + "learning_rate": 1.3551383801291968e-05, + "loss": 9.9382, + "step": 18406 + }, + { + "epoch": 0.7672460506023092, + "grad_norm": 179.0, + "learning_rate": 1.3546763455794703e-05, + "loss": 11.0003, + "step": 18407 + }, + { + "epoch": 0.7672877328998374, + "grad_norm": 508.0, + "learning_rate": 1.3542143774660903e-05, + "loss": 17.8758, + "step": 18408 + }, + { + "epoch": 0.7673294151973656, + "grad_norm": 280.0, + "learning_rate": 1.353752475797478e-05, + "loss": 13.3128, + "step": 18409 + }, + { + "epoch": 0.767371097494894, + "grad_norm": 824.0, + "learning_rate": 1.3532906405820495e-05, + "loss": 22.1257, + "step": 18410 + }, + { + "epoch": 0.7674127797924222, + "grad_norm": 282.0, + "learning_rate": 1.3528288718282234e-05, + "loss": 13.5629, + "step": 18411 + }, + { + "epoch": 0.7674544620899504, + "grad_norm": 458.0, + "learning_rate": 1.3523671695444146e-05, + "loss": 15.6877, + "step": 18412 + }, + { + "epoch": 0.7674961443874786, + "grad_norm": 163.0, + "learning_rate": 1.351905533739038e-05, + "loss": 5.2193, + "step": 18413 + }, + { + "epoch": 0.7675378266850069, + "grad_norm": 784.0, + "learning_rate": 1.3514439644205067e-05, + "loss": 23.5002, + "step": 18414 + }, + { + "epoch": 0.7675795089825351, + "grad_norm": 276.0, + "learning_rate": 1.3509824615972334e-05, + "loss": 10.8133, + "step": 18415 + }, + { + "epoch": 0.7676211912800633, + "grad_norm": 219.0, + "learning_rate": 1.3505210252776285e-05, + "loss": 11.8127, + "step": 18416 + }, + { + "epoch": 0.7676628735775916, + "grad_norm": 458.0, + "learning_rate": 1.3500596554701018e-05, + "loss": 16.2502, + "step": 18417 + }, + { + "epoch": 0.7677045558751199, + "grad_norm": 312.0, + "learning_rate": 1.3495983521830608e-05, + "loss": 13.7502, + "step": 18418 + }, + { + "epoch": 0.7677462381726481, + "grad_norm": 480.0, + "learning_rate": 1.3491371154249167e-05, + "loss": 18.2502, + "step": 18419 + }, + { + "epoch": 0.7677879204701763, + "grad_norm": 290.0, + "learning_rate": 1.3486759452040693e-05, + "loss": 12.5628, + "step": 18420 + }, + { + "epoch": 0.7678296027677045, + "grad_norm": 181.0, + "learning_rate": 1.3482148415289302e-05, + "loss": 10.0629, + "step": 18421 + }, + { + "epoch": 0.7678712850652328, + "grad_norm": 402.0, + "learning_rate": 1.347753804407897e-05, + "loss": 15.7503, + "step": 18422 + }, + { + "epoch": 0.767912967362761, + "grad_norm": 600.0, + "learning_rate": 1.3472928338493779e-05, + "loss": 19.3751, + "step": 18423 + }, + { + "epoch": 0.7679546496602893, + "grad_norm": 704.0, + "learning_rate": 1.3468319298617682e-05, + "loss": 19.8771, + "step": 18424 + }, + { + "epoch": 0.7679963319578175, + "grad_norm": 88.0, + "learning_rate": 1.3463710924534734e-05, + "loss": 9.3754, + "step": 18425 + }, + { + "epoch": 0.7680380142553458, + "grad_norm": 446.0, + "learning_rate": 1.3459103216328872e-05, + "loss": 16.3754, + "step": 18426 + }, + { + "epoch": 0.768079696552874, + "grad_norm": 482.0, + "learning_rate": 1.3454496174084114e-05, + "loss": 16.5009, + "step": 18427 + }, + { + "epoch": 0.7681213788504022, + "grad_norm": 612.0, + "learning_rate": 1.3449889797884407e-05, + "loss": 17.6254, + "step": 18428 + }, + { + "epoch": 0.7681630611479304, + "grad_norm": 348.0, + "learning_rate": 1.3445284087813702e-05, + "loss": 15.0006, + "step": 18429 + }, + { + "epoch": 0.7682047434454587, + "grad_norm": 472.0, + "learning_rate": 1.3440679043955939e-05, + "loss": 17.0002, + "step": 18430 + }, + { + "epoch": 0.768246425742987, + "grad_norm": 420.0, + "learning_rate": 1.3436074666395044e-05, + "loss": 15.9377, + "step": 18431 + }, + { + "epoch": 0.7682881080405152, + "grad_norm": 420.0, + "learning_rate": 1.3431470955214936e-05, + "loss": 16.876, + "step": 18432 + }, + { + "epoch": 0.7683297903380434, + "grad_norm": 488.0, + "learning_rate": 1.3426867910499518e-05, + "loss": 14.628, + "step": 18433 + }, + { + "epoch": 0.7683714726355717, + "grad_norm": 253.0, + "learning_rate": 1.3422265532332678e-05, + "loss": 12.3752, + "step": 18434 + }, + { + "epoch": 0.7684131549330999, + "grad_norm": 948.0, + "learning_rate": 1.3417663820798293e-05, + "loss": 25.6252, + "step": 18435 + }, + { + "epoch": 0.7684548372306281, + "grad_norm": 640.0, + "learning_rate": 1.3413062775980234e-05, + "loss": 18.6253, + "step": 18436 + }, + { + "epoch": 0.7684965195281563, + "grad_norm": 262.0, + "learning_rate": 1.3408462397962357e-05, + "loss": 13.0628, + "step": 18437 + }, + { + "epoch": 0.7685382018256847, + "grad_norm": 328.0, + "learning_rate": 1.34038626868285e-05, + "loss": 13.8753, + "step": 18438 + }, + { + "epoch": 0.7685798841232129, + "grad_norm": 186.0, + "learning_rate": 1.339926364266249e-05, + "loss": 12.1879, + "step": 18439 + }, + { + "epoch": 0.7686215664207411, + "grad_norm": 116.5, + "learning_rate": 1.3394665265548151e-05, + "loss": 10.063, + "step": 18440 + }, + { + "epoch": 0.7686632487182693, + "grad_norm": 576.0, + "learning_rate": 1.3390067555569292e-05, + "loss": 19.1252, + "step": 18441 + }, + { + "epoch": 0.7687049310157976, + "grad_norm": 233.0, + "learning_rate": 1.3385470512809683e-05, + "loss": 12.3127, + "step": 18442 + }, + { + "epoch": 0.7687466133133258, + "grad_norm": 126.0, + "learning_rate": 1.338087413735315e-05, + "loss": 9.7503, + "step": 18443 + }, + { + "epoch": 0.768788295610854, + "grad_norm": 450.0, + "learning_rate": 1.3376278429283412e-05, + "loss": 15.5003, + "step": 18444 + }, + { + "epoch": 0.7688299779083823, + "grad_norm": 264.0, + "learning_rate": 1.3371683388684281e-05, + "loss": 12.0013, + "step": 18445 + }, + { + "epoch": 0.7688716602059106, + "grad_norm": 600.0, + "learning_rate": 1.336708901563944e-05, + "loss": 19.7504, + "step": 18446 + }, + { + "epoch": 0.7689133425034388, + "grad_norm": 330.0, + "learning_rate": 1.3362495310232687e-05, + "loss": 14.7506, + "step": 18447 + }, + { + "epoch": 0.768955024800967, + "grad_norm": 472.0, + "learning_rate": 1.3357902272547674e-05, + "loss": 14.0637, + "step": 18448 + }, + { + "epoch": 0.7689967070984952, + "grad_norm": 268.0, + "learning_rate": 1.3353309902668176e-05, + "loss": 12.5629, + "step": 18449 + }, + { + "epoch": 0.7690383893960235, + "grad_norm": 756.0, + "learning_rate": 1.3348718200677829e-05, + "loss": 21.1252, + "step": 18450 + }, + { + "epoch": 0.7690800716935517, + "grad_norm": 174.0, + "learning_rate": 1.334412716666038e-05, + "loss": 11.1254, + "step": 18451 + }, + { + "epoch": 0.76912175399108, + "grad_norm": 612.0, + "learning_rate": 1.3339536800699432e-05, + "loss": 18.0014, + "step": 18452 + }, + { + "epoch": 0.7691634362886083, + "grad_norm": 176.0, + "learning_rate": 1.3334947102878698e-05, + "loss": 10.3128, + "step": 18453 + }, + { + "epoch": 0.7692051185861365, + "grad_norm": 404.0, + "learning_rate": 1.3330358073281806e-05, + "loss": 16.2507, + "step": 18454 + }, + { + "epoch": 0.7692468008836647, + "grad_norm": 320.0, + "learning_rate": 1.3325769711992398e-05, + "loss": 11.0017, + "step": 18455 + }, + { + "epoch": 0.7692884831811929, + "grad_norm": 264.0, + "learning_rate": 1.3321182019094092e-05, + "loss": 12.0003, + "step": 18456 + }, + { + "epoch": 0.7693301654787212, + "grad_norm": 572.0, + "learning_rate": 1.3316594994670494e-05, + "loss": 17.8751, + "step": 18457 + }, + { + "epoch": 0.7693718477762495, + "grad_norm": 448.0, + "learning_rate": 1.3312008638805212e-05, + "loss": 16.6252, + "step": 18458 + }, + { + "epoch": 0.7694135300737777, + "grad_norm": 253.0, + "learning_rate": 1.3307422951581833e-05, + "loss": 11.8133, + "step": 18459 + }, + { + "epoch": 0.7694552123713059, + "grad_norm": 268.0, + "learning_rate": 1.3302837933083923e-05, + "loss": 13.501, + "step": 18460 + }, + { + "epoch": 0.7694968946688342, + "grad_norm": 185.0, + "learning_rate": 1.3298253583395048e-05, + "loss": 10.3758, + "step": 18461 + }, + { + "epoch": 0.7695385769663624, + "grad_norm": 544.0, + "learning_rate": 1.3293669902598766e-05, + "loss": 18.7504, + "step": 18462 + }, + { + "epoch": 0.7695802592638906, + "grad_norm": 178.0, + "learning_rate": 1.3289086890778607e-05, + "loss": 10.3755, + "step": 18463 + }, + { + "epoch": 0.7696219415614188, + "grad_norm": 150.0, + "learning_rate": 1.3284504548018095e-05, + "loss": 9.5007, + "step": 18464 + }, + { + "epoch": 0.7696636238589472, + "grad_norm": 181.0, + "learning_rate": 1.3279922874400746e-05, + "loss": 11.7506, + "step": 18465 + }, + { + "epoch": 0.7697053061564754, + "grad_norm": 458.0, + "learning_rate": 1.3275341870010066e-05, + "loss": 16.5003, + "step": 18466 + }, + { + "epoch": 0.7697469884540036, + "grad_norm": 450.0, + "learning_rate": 1.3270761534929538e-05, + "loss": 16.3772, + "step": 18467 + }, + { + "epoch": 0.7697886707515318, + "grad_norm": 1672.0, + "learning_rate": 1.3266181869242627e-05, + "loss": 35.7548, + "step": 18468 + }, + { + "epoch": 0.7698303530490601, + "grad_norm": 680.0, + "learning_rate": 1.3261602873032846e-05, + "loss": 20.2503, + "step": 18469 + }, + { + "epoch": 0.7698720353465883, + "grad_norm": 205.0, + "learning_rate": 1.3257024546383579e-05, + "loss": 11.6876, + "step": 18470 + }, + { + "epoch": 0.7699137176441165, + "grad_norm": 300.0, + "learning_rate": 1.3252446889378335e-05, + "loss": 12.1253, + "step": 18471 + }, + { + "epoch": 0.7699553999416447, + "grad_norm": 330.0, + "learning_rate": 1.324786990210048e-05, + "loss": 13.5637, + "step": 18472 + }, + { + "epoch": 0.7699970822391731, + "grad_norm": 296.0, + "learning_rate": 1.3243293584633482e-05, + "loss": 13.9382, + "step": 18473 + }, + { + "epoch": 0.7700387645367013, + "grad_norm": 804.0, + "learning_rate": 1.3238717937060697e-05, + "loss": 19.5046, + "step": 18474 + }, + { + "epoch": 0.7700804468342295, + "grad_norm": 154.0, + "learning_rate": 1.3234142959465567e-05, + "loss": 9.5626, + "step": 18475 + }, + { + "epoch": 0.7701221291317577, + "grad_norm": 274.0, + "learning_rate": 1.3229568651931417e-05, + "loss": 12.6877, + "step": 18476 + }, + { + "epoch": 0.770163811429286, + "grad_norm": 1064.0, + "learning_rate": 1.3224995014541663e-05, + "loss": 27.0005, + "step": 18477 + }, + { + "epoch": 0.7702054937268142, + "grad_norm": 656.0, + "learning_rate": 1.3220422047379633e-05, + "loss": 21.2505, + "step": 18478 + }, + { + "epoch": 0.7702471760243425, + "grad_norm": 192.0, + "learning_rate": 1.3215849750528675e-05, + "loss": 11.3155, + "step": 18479 + }, + { + "epoch": 0.7702888583218707, + "grad_norm": 768.0, + "learning_rate": 1.3211278124072124e-05, + "loss": 19.8752, + "step": 18480 + }, + { + "epoch": 0.770330540619399, + "grad_norm": 264.0, + "learning_rate": 1.3206707168093296e-05, + "loss": 12.3756, + "step": 18481 + }, + { + "epoch": 0.7703722229169272, + "grad_norm": 504.0, + "learning_rate": 1.3202136882675498e-05, + "loss": 17.1253, + "step": 18482 + }, + { + "epoch": 0.7704139052144554, + "grad_norm": 296.0, + "learning_rate": 1.3197567267902017e-05, + "loss": 11.5657, + "step": 18483 + }, + { + "epoch": 0.7704555875119836, + "grad_norm": 233.0, + "learning_rate": 1.3192998323856142e-05, + "loss": 13.0673, + "step": 18484 + }, + { + "epoch": 0.7704972698095119, + "grad_norm": 176.0, + "learning_rate": 1.3188430050621136e-05, + "loss": 9.5629, + "step": 18485 + }, + { + "epoch": 0.7705389521070402, + "grad_norm": 412.0, + "learning_rate": 1.3183862448280265e-05, + "loss": 14.6877, + "step": 18486 + }, + { + "epoch": 0.7705806344045684, + "grad_norm": 195.0, + "learning_rate": 1.3179295516916768e-05, + "loss": 12.1877, + "step": 18487 + }, + { + "epoch": 0.7706223167020966, + "grad_norm": 79.5, + "learning_rate": 1.3174729256613877e-05, + "loss": 8.8753, + "step": 18488 + }, + { + "epoch": 0.7706639989996249, + "grad_norm": 162.0, + "learning_rate": 1.3170163667454815e-05, + "loss": 11.1878, + "step": 18489 + }, + { + "epoch": 0.7707056812971531, + "grad_norm": 500.0, + "learning_rate": 1.3165598749522795e-05, + "loss": 16.6256, + "step": 18490 + }, + { + "epoch": 0.7707473635946813, + "grad_norm": 238.0, + "learning_rate": 1.3161034502900999e-05, + "loss": 10.8755, + "step": 18491 + }, + { + "epoch": 0.7707890458922095, + "grad_norm": 340.0, + "learning_rate": 1.315647092767261e-05, + "loss": 15.0004, + "step": 18492 + }, + { + "epoch": 0.7708307281897379, + "grad_norm": 410.0, + "learning_rate": 1.315190802392084e-05, + "loss": 14.9382, + "step": 18493 + }, + { + "epoch": 0.7708724104872661, + "grad_norm": 496.0, + "learning_rate": 1.3147345791728782e-05, + "loss": 14.1255, + "step": 18494 + }, + { + "epoch": 0.7709140927847943, + "grad_norm": 204.0, + "learning_rate": 1.3142784231179656e-05, + "loss": 12.0004, + "step": 18495 + }, + { + "epoch": 0.7709557750823225, + "grad_norm": 432.0, + "learning_rate": 1.313822334235652e-05, + "loss": 17.1253, + "step": 18496 + }, + { + "epoch": 0.7709974573798508, + "grad_norm": 548.0, + "learning_rate": 1.3133663125342572e-05, + "loss": 16.2505, + "step": 18497 + }, + { + "epoch": 0.771039139677379, + "grad_norm": 680.0, + "learning_rate": 1.3129103580220853e-05, + "loss": 21.0008, + "step": 18498 + }, + { + "epoch": 0.7710808219749072, + "grad_norm": 125.0, + "learning_rate": 1.3124544707074527e-05, + "loss": 10.813, + "step": 18499 + }, + { + "epoch": 0.7711225042724354, + "grad_norm": 512.0, + "learning_rate": 1.311998650598662e-05, + "loss": 17.251, + "step": 18500 + }, + { + "epoch": 0.7711641865699638, + "grad_norm": 334.0, + "learning_rate": 1.3115428977040256e-05, + "loss": 13.1878, + "step": 18501 + }, + { + "epoch": 0.771205868867492, + "grad_norm": 231.0, + "learning_rate": 1.3110872120318445e-05, + "loss": 11.9383, + "step": 18502 + }, + { + "epoch": 0.7712475511650202, + "grad_norm": 230.0, + "learning_rate": 1.3106315935904284e-05, + "loss": 11.3757, + "step": 18503 + }, + { + "epoch": 0.7712892334625484, + "grad_norm": 416.0, + "learning_rate": 1.3101760423880789e-05, + "loss": 16.6255, + "step": 18504 + }, + { + "epoch": 0.7713309157600767, + "grad_norm": 250.0, + "learning_rate": 1.3097205584330985e-05, + "loss": 12.2502, + "step": 18505 + }, + { + "epoch": 0.7713725980576049, + "grad_norm": 648.0, + "learning_rate": 1.3092651417337882e-05, + "loss": 18.38, + "step": 18506 + }, + { + "epoch": 0.7714142803551332, + "grad_norm": 242.0, + "learning_rate": 1.3088097922984489e-05, + "loss": 11.0627, + "step": 18507 + }, + { + "epoch": 0.7714559626526614, + "grad_norm": 95.5, + "learning_rate": 1.3083545101353783e-05, + "loss": 8.1252, + "step": 18508 + }, + { + "epoch": 0.7714976449501897, + "grad_norm": 1320.0, + "learning_rate": 1.3078992952528745e-05, + "loss": 30.0044, + "step": 18509 + }, + { + "epoch": 0.7715393272477179, + "grad_norm": 83.5, + "learning_rate": 1.3074441476592341e-05, + "loss": 8.0002, + "step": 18510 + }, + { + "epoch": 0.7715810095452461, + "grad_norm": 536.0, + "learning_rate": 1.3069890673627516e-05, + "loss": 17.7502, + "step": 18511 + }, + { + "epoch": 0.7716226918427743, + "grad_norm": 478.0, + "learning_rate": 1.3065340543717214e-05, + "loss": 15.5028, + "step": 18512 + }, + { + "epoch": 0.7716643741403026, + "grad_norm": 243.0, + "learning_rate": 1.3060791086944357e-05, + "loss": 13.5631, + "step": 18513 + }, + { + "epoch": 0.7717060564378309, + "grad_norm": 143.0, + "learning_rate": 1.3056242303391864e-05, + "loss": 9.9378, + "step": 18514 + }, + { + "epoch": 0.7717477387353591, + "grad_norm": 1176.0, + "learning_rate": 1.3051694193142632e-05, + "loss": 24.1297, + "step": 18515 + }, + { + "epoch": 0.7717894210328873, + "grad_norm": 46.75, + "learning_rate": 1.3047146756279555e-05, + "loss": 7.5635, + "step": 18516 + }, + { + "epoch": 0.7718311033304156, + "grad_norm": 604.0, + "learning_rate": 1.3042599992885512e-05, + "loss": 18.6252, + "step": 18517 + }, + { + "epoch": 0.7718727856279438, + "grad_norm": 103.0, + "learning_rate": 1.3038053903043346e-05, + "loss": 8.1878, + "step": 18518 + }, + { + "epoch": 0.771914467925472, + "grad_norm": 1064.0, + "learning_rate": 1.303350848683596e-05, + "loss": 22.8789, + "step": 18519 + }, + { + "epoch": 0.7719561502230002, + "grad_norm": 314.0, + "learning_rate": 1.3028963744346134e-05, + "loss": 13.7506, + "step": 18520 + }, + { + "epoch": 0.7719978325205286, + "grad_norm": 196.0, + "learning_rate": 1.3024419675656751e-05, + "loss": 10.5005, + "step": 18521 + }, + { + "epoch": 0.7720395148180568, + "grad_norm": 248.0, + "learning_rate": 1.3019876280850579e-05, + "loss": 12.4378, + "step": 18522 + }, + { + "epoch": 0.772081197115585, + "grad_norm": 536.0, + "learning_rate": 1.3015333560010474e-05, + "loss": 18.7502, + "step": 18523 + }, + { + "epoch": 0.7721228794131133, + "grad_norm": 1472.0, + "learning_rate": 1.3010791513219161e-05, + "loss": 35.5002, + "step": 18524 + }, + { + "epoch": 0.7721645617106415, + "grad_norm": 88.0, + "learning_rate": 1.3006250140559494e-05, + "loss": 9.1877, + "step": 18525 + }, + { + "epoch": 0.7722062440081697, + "grad_norm": 1240.0, + "learning_rate": 1.3001709442114169e-05, + "loss": 27.7542, + "step": 18526 + }, + { + "epoch": 0.7722479263056979, + "grad_norm": 440.0, + "learning_rate": 1.2997169417966004e-05, + "loss": 16.5006, + "step": 18527 + }, + { + "epoch": 0.7722896086032263, + "grad_norm": 139.0, + "learning_rate": 1.299263006819768e-05, + "loss": 10.1252, + "step": 18528 + }, + { + "epoch": 0.7723312909007545, + "grad_norm": 532.0, + "learning_rate": 1.2988091392891976e-05, + "loss": 18.3756, + "step": 18529 + }, + { + "epoch": 0.7723729731982827, + "grad_norm": 436.0, + "learning_rate": 1.2983553392131587e-05, + "loss": 16.6253, + "step": 18530 + }, + { + "epoch": 0.7724146554958109, + "grad_norm": 326.0, + "learning_rate": 1.2979016065999228e-05, + "loss": 14.2502, + "step": 18531 + }, + { + "epoch": 0.7724563377933392, + "grad_norm": 334.0, + "learning_rate": 1.297447941457759e-05, + "loss": 13.688, + "step": 18532 + }, + { + "epoch": 0.7724980200908674, + "grad_norm": 692.0, + "learning_rate": 1.2969943437949344e-05, + "loss": 21.0004, + "step": 18533 + }, + { + "epoch": 0.7725397023883956, + "grad_norm": 125.5, + "learning_rate": 1.2965408136197172e-05, + "loss": 10.3752, + "step": 18534 + }, + { + "epoch": 0.7725813846859239, + "grad_norm": 240.0, + "learning_rate": 1.296087350940372e-05, + "loss": 9.3129, + "step": 18535 + }, + { + "epoch": 0.7726230669834522, + "grad_norm": 140.0, + "learning_rate": 1.2956339557651642e-05, + "loss": 8.813, + "step": 18536 + }, + { + "epoch": 0.7726647492809804, + "grad_norm": 286.0, + "learning_rate": 1.295180628102356e-05, + "loss": 12.8752, + "step": 18537 + }, + { + "epoch": 0.7727064315785086, + "grad_norm": 356.0, + "learning_rate": 1.2947273679602096e-05, + "loss": 15.0629, + "step": 18538 + }, + { + "epoch": 0.7727481138760368, + "grad_norm": 146.0, + "learning_rate": 1.2942741753469862e-05, + "loss": 8.6877, + "step": 18539 + }, + { + "epoch": 0.7727897961735651, + "grad_norm": 386.0, + "learning_rate": 1.2938210502709446e-05, + "loss": 14.6881, + "step": 18540 + }, + { + "epoch": 0.7728314784710933, + "grad_norm": 1064.0, + "learning_rate": 1.2933679927403435e-05, + "loss": 27.1255, + "step": 18541 + }, + { + "epoch": 0.7728731607686216, + "grad_norm": 69.5, + "learning_rate": 1.29291500276344e-05, + "loss": 8.6878, + "step": 18542 + }, + { + "epoch": 0.7729148430661498, + "grad_norm": 532.0, + "learning_rate": 1.2924620803484894e-05, + "loss": 18.0007, + "step": 18543 + }, + { + "epoch": 0.7729565253636781, + "grad_norm": 940.0, + "learning_rate": 1.2920092255037453e-05, + "loss": 26.2502, + "step": 18544 + }, + { + "epoch": 0.7729982076612063, + "grad_norm": 1064.0, + "learning_rate": 1.291556438237465e-05, + "loss": 29.1252, + "step": 18545 + }, + { + "epoch": 0.7730398899587345, + "grad_norm": 215.0, + "learning_rate": 1.291103718557895e-05, + "loss": 11.3127, + "step": 18546 + }, + { + "epoch": 0.7730815722562627, + "grad_norm": 132.0, + "learning_rate": 1.2906510664732919e-05, + "loss": 7.9379, + "step": 18547 + }, + { + "epoch": 0.773123254553791, + "grad_norm": 528.0, + "learning_rate": 1.2901984819918995e-05, + "loss": 19.1252, + "step": 18548 + }, + { + "epoch": 0.7731649368513193, + "grad_norm": 414.0, + "learning_rate": 1.2897459651219717e-05, + "loss": 14.8127, + "step": 18549 + }, + { + "epoch": 0.7732066191488475, + "grad_norm": 111.5, + "learning_rate": 1.28929351587175e-05, + "loss": 6.9068, + "step": 18550 + }, + { + "epoch": 0.7732483014463757, + "grad_norm": 284.0, + "learning_rate": 1.2888411342494872e-05, + "loss": 13.4378, + "step": 18551 + }, + { + "epoch": 0.773289983743904, + "grad_norm": 139.0, + "learning_rate": 1.2883888202634204e-05, + "loss": 10.2507, + "step": 18552 + }, + { + "epoch": 0.7733316660414322, + "grad_norm": 520.0, + "learning_rate": 1.2879365739217986e-05, + "loss": 18.2504, + "step": 18553 + }, + { + "epoch": 0.7733733483389604, + "grad_norm": 253.0, + "learning_rate": 1.2874843952328624e-05, + "loss": 11.1877, + "step": 18554 + }, + { + "epoch": 0.7734150306364886, + "grad_norm": 203.0, + "learning_rate": 1.2870322842048527e-05, + "loss": 11.0002, + "step": 18555 + }, + { + "epoch": 0.773456712934017, + "grad_norm": 320.0, + "learning_rate": 1.2865802408460087e-05, + "loss": 15.1255, + "step": 18556 + }, + { + "epoch": 0.7734983952315452, + "grad_norm": 394.0, + "learning_rate": 1.2861282651645702e-05, + "loss": 13.0004, + "step": 18557 + }, + { + "epoch": 0.7735400775290734, + "grad_norm": 330.0, + "learning_rate": 1.285676357168773e-05, + "loss": 15.7508, + "step": 18558 + }, + { + "epoch": 0.7735817598266016, + "grad_norm": 572.0, + "learning_rate": 1.2852245168668543e-05, + "loss": 19.6251, + "step": 18559 + }, + { + "epoch": 0.7736234421241299, + "grad_norm": 252.0, + "learning_rate": 1.2847727442670487e-05, + "loss": 12.063, + "step": 18560 + }, + { + "epoch": 0.7736651244216581, + "grad_norm": 220.0, + "learning_rate": 1.2843210393775895e-05, + "loss": 11.4385, + "step": 18561 + }, + { + "epoch": 0.7737068067191863, + "grad_norm": 72.5, + "learning_rate": 1.2838694022067094e-05, + "loss": 8.8129, + "step": 18562 + }, + { + "epoch": 0.7737484890167146, + "grad_norm": 222.0, + "learning_rate": 1.2834178327626389e-05, + "loss": 12.3771, + "step": 18563 + }, + { + "epoch": 0.7737901713142429, + "grad_norm": 378.0, + "learning_rate": 1.2829663310536082e-05, + "loss": 15.2508, + "step": 18564 + }, + { + "epoch": 0.7738318536117711, + "grad_norm": 1072.0, + "learning_rate": 1.282514897087847e-05, + "loss": 27.1286, + "step": 18565 + }, + { + "epoch": 0.7738735359092993, + "grad_norm": 486.0, + "learning_rate": 1.2820635308735813e-05, + "loss": 18.0018, + "step": 18566 + }, + { + "epoch": 0.7739152182068275, + "grad_norm": 332.0, + "learning_rate": 1.2816122324190378e-05, + "loss": 14.3128, + "step": 18567 + }, + { + "epoch": 0.7739569005043558, + "grad_norm": 336.0, + "learning_rate": 1.2811610017324399e-05, + "loss": 13.5013, + "step": 18568 + }, + { + "epoch": 0.773998582801884, + "grad_norm": 153.0, + "learning_rate": 1.2807098388220156e-05, + "loss": 10.3131, + "step": 18569 + }, + { + "epoch": 0.7740402650994123, + "grad_norm": 229.0, + "learning_rate": 1.2802587436959823e-05, + "loss": 12.0627, + "step": 18570 + }, + { + "epoch": 0.7740819473969405, + "grad_norm": 780.0, + "learning_rate": 1.279807716362566e-05, + "loss": 22.5003, + "step": 18571 + }, + { + "epoch": 0.7741236296944688, + "grad_norm": 988.0, + "learning_rate": 1.2793567568299814e-05, + "loss": 22.0051, + "step": 18572 + }, + { + "epoch": 0.774165311991997, + "grad_norm": 245.0, + "learning_rate": 1.2789058651064534e-05, + "loss": 12.7502, + "step": 18573 + }, + { + "epoch": 0.7742069942895252, + "grad_norm": 210.0, + "learning_rate": 1.2784550412001933e-05, + "loss": 12.0628, + "step": 18574 + }, + { + "epoch": 0.7742486765870534, + "grad_norm": 608.0, + "learning_rate": 1.2780042851194235e-05, + "loss": 19.6292, + "step": 18575 + }, + { + "epoch": 0.7742903588845818, + "grad_norm": 314.0, + "learning_rate": 1.2775535968723534e-05, + "loss": 14.1265, + "step": 18576 + }, + { + "epoch": 0.77433204118211, + "grad_norm": 185.0, + "learning_rate": 1.2771029764672015e-05, + "loss": 11.6253, + "step": 18577 + }, + { + "epoch": 0.7743737234796382, + "grad_norm": 660.0, + "learning_rate": 1.2766524239121763e-05, + "loss": 20.2501, + "step": 18578 + }, + { + "epoch": 0.7744154057771664, + "grad_norm": 466.0, + "learning_rate": 1.2762019392154922e-05, + "loss": 17.6257, + "step": 18579 + }, + { + "epoch": 0.7744570880746947, + "grad_norm": 372.0, + "learning_rate": 1.2757515223853583e-05, + "loss": 15.1253, + "step": 18580 + }, + { + "epoch": 0.7744987703722229, + "grad_norm": 97.0, + "learning_rate": 1.2753011734299836e-05, + "loss": 9.4378, + "step": 18581 + }, + { + "epoch": 0.7745404526697511, + "grad_norm": 199.0, + "learning_rate": 1.2748508923575757e-05, + "loss": 11.7508, + "step": 18582 + }, + { + "epoch": 0.7745821349672793, + "grad_norm": 336.0, + "learning_rate": 1.2744006791763414e-05, + "loss": 13.1254, + "step": 18583 + }, + { + "epoch": 0.7746238172648077, + "grad_norm": 492.0, + "learning_rate": 1.2739505338944846e-05, + "loss": 19.3754, + "step": 18584 + }, + { + "epoch": 0.7746654995623359, + "grad_norm": 133.0, + "learning_rate": 1.2735004565202108e-05, + "loss": 7.9069, + "step": 18585 + }, + { + "epoch": 0.7747071818598641, + "grad_norm": 462.0, + "learning_rate": 1.2730504470617217e-05, + "loss": 15.9378, + "step": 18586 + }, + { + "epoch": 0.7747488641573923, + "grad_norm": 372.0, + "learning_rate": 1.2726005055272188e-05, + "loss": 14.5627, + "step": 18587 + }, + { + "epoch": 0.7747905464549206, + "grad_norm": 196.0, + "learning_rate": 1.2721506319249027e-05, + "loss": 11.5002, + "step": 18588 + }, + { + "epoch": 0.7748322287524488, + "grad_norm": 304.0, + "learning_rate": 1.2717008262629727e-05, + "loss": 13.938, + "step": 18589 + }, + { + "epoch": 0.774873911049977, + "grad_norm": 107.5, + "learning_rate": 1.2712510885496259e-05, + "loss": 8.5008, + "step": 18590 + }, + { + "epoch": 0.7749155933475053, + "grad_norm": 136.0, + "learning_rate": 1.270801418793059e-05, + "loss": 9.6878, + "step": 18591 + }, + { + "epoch": 0.7749572756450336, + "grad_norm": 258.0, + "learning_rate": 1.2703518170014672e-05, + "loss": 12.438, + "step": 18592 + }, + { + "epoch": 0.7749989579425618, + "grad_norm": 520.0, + "learning_rate": 1.2699022831830442e-05, + "loss": 18.2503, + "step": 18593 + }, + { + "epoch": 0.77504064024009, + "grad_norm": 175.0, + "learning_rate": 1.2694528173459823e-05, + "loss": 9.627, + "step": 18594 + }, + { + "epoch": 0.7750823225376183, + "grad_norm": 464.0, + "learning_rate": 1.2690034194984767e-05, + "loss": 16.7514, + "step": 18595 + }, + { + "epoch": 0.7751240048351465, + "grad_norm": 187.0, + "learning_rate": 1.2685540896487125e-05, + "loss": 10.9378, + "step": 18596 + }, + { + "epoch": 0.7751656871326748, + "grad_norm": 217.0, + "learning_rate": 1.268104827804884e-05, + "loss": 11.7503, + "step": 18597 + }, + { + "epoch": 0.775207369430203, + "grad_norm": 145.0, + "learning_rate": 1.2676556339751733e-05, + "loss": 5.6254, + "step": 18598 + }, + { + "epoch": 0.7752490517277313, + "grad_norm": 356.0, + "learning_rate": 1.2672065081677725e-05, + "loss": 16.0006, + "step": 18599 + }, + { + "epoch": 0.7752907340252595, + "grad_norm": 412.0, + "learning_rate": 1.2667574503908619e-05, + "loss": 16.876, + "step": 18600 + }, + { + "epoch": 0.7753324163227877, + "grad_norm": 418.0, + "learning_rate": 1.2663084606526316e-05, + "loss": 15.1254, + "step": 18601 + }, + { + "epoch": 0.7753740986203159, + "grad_norm": 1448.0, + "learning_rate": 1.2658595389612571e-05, + "loss": 25.8797, + "step": 18602 + }, + { + "epoch": 0.7754157809178442, + "grad_norm": 466.0, + "learning_rate": 1.2654106853249264e-05, + "loss": 14.1253, + "step": 18603 + }, + { + "epoch": 0.7754574632153725, + "grad_norm": 1664.0, + "learning_rate": 1.264961899751817e-05, + "loss": 31.631, + "step": 18604 + }, + { + "epoch": 0.7754991455129007, + "grad_norm": 238.0, + "learning_rate": 1.2645131822501083e-05, + "loss": 11.563, + "step": 18605 + }, + { + "epoch": 0.7755408278104289, + "grad_norm": 438.0, + "learning_rate": 1.2640645328279788e-05, + "loss": 15.5634, + "step": 18606 + }, + { + "epoch": 0.7755825101079572, + "grad_norm": 177.0, + "learning_rate": 1.2636159514936052e-05, + "loss": 10.5002, + "step": 18607 + }, + { + "epoch": 0.7756241924054854, + "grad_norm": 157.0, + "learning_rate": 1.2631674382551623e-05, + "loss": 11.0627, + "step": 18608 + }, + { + "epoch": 0.7756658747030136, + "grad_norm": 532.0, + "learning_rate": 1.2627189931208244e-05, + "loss": 18.5003, + "step": 18609 + }, + { + "epoch": 0.7757075570005418, + "grad_norm": 466.0, + "learning_rate": 1.2622706160987646e-05, + "loss": 14.813, + "step": 18610 + }, + { + "epoch": 0.7757492392980702, + "grad_norm": 348.0, + "learning_rate": 1.2618223071971546e-05, + "loss": 13.8131, + "step": 18611 + }, + { + "epoch": 0.7757909215955984, + "grad_norm": 580.0, + "learning_rate": 1.2613740664241652e-05, + "loss": 18.3754, + "step": 18612 + }, + { + "epoch": 0.7758326038931266, + "grad_norm": 165.0, + "learning_rate": 1.2609258937879648e-05, + "loss": 12.0012, + "step": 18613 + }, + { + "epoch": 0.7758742861906548, + "grad_norm": 215.0, + "learning_rate": 1.2604777892967223e-05, + "loss": 11.6877, + "step": 18614 + }, + { + "epoch": 0.7759159684881831, + "grad_norm": 217.0, + "learning_rate": 1.2600297529586041e-05, + "loss": 11.1257, + "step": 18615 + }, + { + "epoch": 0.7759576507857113, + "grad_norm": 112.5, + "learning_rate": 1.2595817847817754e-05, + "loss": 10.0002, + "step": 18616 + }, + { + "epoch": 0.7759993330832395, + "grad_norm": 268.0, + "learning_rate": 1.2591338847744011e-05, + "loss": 12.2502, + "step": 18617 + }, + { + "epoch": 0.7760410153807678, + "grad_norm": 238.0, + "learning_rate": 1.2586860529446421e-05, + "loss": 12.9379, + "step": 18618 + }, + { + "epoch": 0.7760826976782961, + "grad_norm": 142.0, + "learning_rate": 1.258238289300665e-05, + "loss": 8.6901, + "step": 18619 + }, + { + "epoch": 0.7761243799758243, + "grad_norm": 656.0, + "learning_rate": 1.257790593850624e-05, + "loss": 21.1255, + "step": 18620 + }, + { + "epoch": 0.7761660622733525, + "grad_norm": 498.0, + "learning_rate": 1.2573429666026836e-05, + "loss": 17.2506, + "step": 18621 + }, + { + "epoch": 0.7762077445708807, + "grad_norm": 510.0, + "learning_rate": 1.2568954075649997e-05, + "loss": 17.6253, + "step": 18622 + }, + { + "epoch": 0.776249426868409, + "grad_norm": 716.0, + "learning_rate": 1.2564479167457288e-05, + "loss": 20.0016, + "step": 18623 + }, + { + "epoch": 0.7762911091659372, + "grad_norm": 540.0, + "learning_rate": 1.2560004941530273e-05, + "loss": 17.7501, + "step": 18624 + }, + { + "epoch": 0.7763327914634655, + "grad_norm": 552.0, + "learning_rate": 1.2555531397950494e-05, + "loss": 17.0008, + "step": 18625 + }, + { + "epoch": 0.7763744737609937, + "grad_norm": 97.5, + "learning_rate": 1.2551058536799476e-05, + "loss": 10.6271, + "step": 18626 + }, + { + "epoch": 0.776416156058522, + "grad_norm": 193.0, + "learning_rate": 1.254658635815874e-05, + "loss": 12.7504, + "step": 18627 + }, + { + "epoch": 0.7764578383560502, + "grad_norm": 183.0, + "learning_rate": 1.2542114862109795e-05, + "loss": 9.8753, + "step": 18628 + }, + { + "epoch": 0.7764995206535784, + "grad_norm": 161.0, + "learning_rate": 1.2537644048734133e-05, + "loss": 9.3756, + "step": 18629 + }, + { + "epoch": 0.7765412029511066, + "grad_norm": 828.0, + "learning_rate": 1.2533173918113233e-05, + "loss": 19.2545, + "step": 18630 + }, + { + "epoch": 0.776582885248635, + "grad_norm": 262.0, + "learning_rate": 1.2528704470328561e-05, + "loss": 11.6263, + "step": 18631 + }, + { + "epoch": 0.7766245675461632, + "grad_norm": 135.0, + "learning_rate": 1.2524235705461578e-05, + "loss": 10.6255, + "step": 18632 + }, + { + "epoch": 0.7766662498436914, + "grad_norm": 784.0, + "learning_rate": 1.2519767623593726e-05, + "loss": 17.2548, + "step": 18633 + }, + { + "epoch": 0.7767079321412196, + "grad_norm": 378.0, + "learning_rate": 1.2515300224806437e-05, + "loss": 14.1255, + "step": 18634 + }, + { + "epoch": 0.7767496144387479, + "grad_norm": 528.0, + "learning_rate": 1.2510833509181114e-05, + "loss": 17.8751, + "step": 18635 + }, + { + "epoch": 0.7767912967362761, + "grad_norm": 356.0, + "learning_rate": 1.2506367476799207e-05, + "loss": 12.8751, + "step": 18636 + }, + { + "epoch": 0.7768329790338043, + "grad_norm": 304.0, + "learning_rate": 1.2501902127742054e-05, + "loss": 13.1252, + "step": 18637 + }, + { + "epoch": 0.7768746613313325, + "grad_norm": 290.0, + "learning_rate": 1.2497437462091094e-05, + "loss": 13.0627, + "step": 18638 + }, + { + "epoch": 0.7769163436288609, + "grad_norm": 1560.0, + "learning_rate": 1.2492973479927633e-05, + "loss": 36.7514, + "step": 18639 + }, + { + "epoch": 0.7769580259263891, + "grad_norm": 138.0, + "learning_rate": 1.2488510181333091e-05, + "loss": 11.126, + "step": 18640 + }, + { + "epoch": 0.7769997082239173, + "grad_norm": 374.0, + "learning_rate": 1.2484047566388751e-05, + "loss": 15.2502, + "step": 18641 + }, + { + "epoch": 0.7770413905214455, + "grad_norm": 378.0, + "learning_rate": 1.2479585635176e-05, + "loss": 14.1252, + "step": 18642 + }, + { + "epoch": 0.7770830728189738, + "grad_norm": 145.0, + "learning_rate": 1.247512438777611e-05, + "loss": 7.6565, + "step": 18643 + }, + { + "epoch": 0.777124755116502, + "grad_norm": 239.0, + "learning_rate": 1.2470663824270417e-05, + "loss": 12.3127, + "step": 18644 + }, + { + "epoch": 0.7771664374140302, + "grad_norm": 127.0, + "learning_rate": 1.2466203944740213e-05, + "loss": 9.6877, + "step": 18645 + }, + { + "epoch": 0.7772081197115585, + "grad_norm": 278.0, + "learning_rate": 1.2461744749266768e-05, + "loss": 14.8131, + "step": 18646 + }, + { + "epoch": 0.7772498020090868, + "grad_norm": 1376.0, + "learning_rate": 1.2457286237931359e-05, + "loss": 30.1253, + "step": 18647 + }, + { + "epoch": 0.777291484306615, + "grad_norm": 366.0, + "learning_rate": 1.2452828410815242e-05, + "loss": 14.3128, + "step": 18648 + }, + { + "epoch": 0.7773331666041432, + "grad_norm": 288.0, + "learning_rate": 1.2448371267999664e-05, + "loss": 13.6252, + "step": 18649 + }, + { + "epoch": 0.7773748489016714, + "grad_norm": 632.0, + "learning_rate": 1.2443914809565849e-05, + "loss": 19.127, + "step": 18650 + }, + { + "epoch": 0.7774165311991997, + "grad_norm": 340.0, + "learning_rate": 1.2439459035595024e-05, + "loss": 14.6257, + "step": 18651 + }, + { + "epoch": 0.777458213496728, + "grad_norm": 210.0, + "learning_rate": 1.243500394616839e-05, + "loss": 11.6257, + "step": 18652 + }, + { + "epoch": 0.7774998957942562, + "grad_norm": 660.0, + "learning_rate": 1.2430549541367142e-05, + "loss": 19.6254, + "step": 18653 + }, + { + "epoch": 0.7775415780917844, + "grad_norm": 432.0, + "learning_rate": 1.2426095821272465e-05, + "loss": 16.7502, + "step": 18654 + }, + { + "epoch": 0.7775832603893127, + "grad_norm": 462.0, + "learning_rate": 1.2421642785965532e-05, + "loss": 15.8129, + "step": 18655 + }, + { + "epoch": 0.7776249426868409, + "grad_norm": 176.0, + "learning_rate": 1.2417190435527487e-05, + "loss": 10.7502, + "step": 18656 + }, + { + "epoch": 0.7776666249843691, + "grad_norm": 173.0, + "learning_rate": 1.241273877003949e-05, + "loss": 11.0627, + "step": 18657 + }, + { + "epoch": 0.7777083072818973, + "grad_norm": 320.0, + "learning_rate": 1.2408287789582662e-05, + "loss": 15.0629, + "step": 18658 + }, + { + "epoch": 0.7777499895794256, + "grad_norm": 370.0, + "learning_rate": 1.2403837494238108e-05, + "loss": 15.3128, + "step": 18659 + }, + { + "epoch": 0.7777916718769539, + "grad_norm": 80.5, + "learning_rate": 1.2399387884086988e-05, + "loss": 7.5939, + "step": 18660 + }, + { + "epoch": 0.7778333541744821, + "grad_norm": 386.0, + "learning_rate": 1.2394938959210328e-05, + "loss": 14.3754, + "step": 18661 + }, + { + "epoch": 0.7778750364720103, + "grad_norm": 752.0, + "learning_rate": 1.2390490719689279e-05, + "loss": 20.5002, + "step": 18662 + }, + { + "epoch": 0.7779167187695386, + "grad_norm": 504.0, + "learning_rate": 1.2386043165604844e-05, + "loss": 17.7502, + "step": 18663 + }, + { + "epoch": 0.7779584010670668, + "grad_norm": 165.0, + "learning_rate": 1.238159629703814e-05, + "loss": 10.0627, + "step": 18664 + }, + { + "epoch": 0.778000083364595, + "grad_norm": 175.0, + "learning_rate": 1.2377150114070151e-05, + "loss": 11.0015, + "step": 18665 + }, + { + "epoch": 0.7780417656621232, + "grad_norm": 157.0, + "learning_rate": 1.2372704616781972e-05, + "loss": 9.5627, + "step": 18666 + }, + { + "epoch": 0.7780834479596516, + "grad_norm": 126.5, + "learning_rate": 1.2368259805254561e-05, + "loss": 9.5036, + "step": 18667 + }, + { + "epoch": 0.7781251302571798, + "grad_norm": 232.0, + "learning_rate": 1.236381567956899e-05, + "loss": 10.8128, + "step": 18668 + }, + { + "epoch": 0.778166812554708, + "grad_norm": 432.0, + "learning_rate": 1.2359372239806183e-05, + "loss": 15.5629, + "step": 18669 + }, + { + "epoch": 0.7782084948522363, + "grad_norm": 235.0, + "learning_rate": 1.2354929486047179e-05, + "loss": 11.9377, + "step": 18670 + }, + { + "epoch": 0.7782501771497645, + "grad_norm": 508.0, + "learning_rate": 1.2350487418372925e-05, + "loss": 17.1273, + "step": 18671 + }, + { + "epoch": 0.7782918594472927, + "grad_norm": 664.0, + "learning_rate": 1.2346046036864377e-05, + "loss": 17.8764, + "step": 18672 + }, + { + "epoch": 0.778333541744821, + "grad_norm": 183.0, + "learning_rate": 1.2341605341602486e-05, + "loss": 11.6878, + "step": 18673 + }, + { + "epoch": 0.7783752240423493, + "grad_norm": 78.5, + "learning_rate": 1.2337165332668182e-05, + "loss": 6.9065, + "step": 18674 + }, + { + "epoch": 0.7784169063398775, + "grad_norm": 200.0, + "learning_rate": 1.2332726010142382e-05, + "loss": 12.0657, + "step": 18675 + }, + { + "epoch": 0.7784585886374057, + "grad_norm": 564.0, + "learning_rate": 1.2328287374105996e-05, + "loss": 18.0033, + "step": 18676 + }, + { + "epoch": 0.7785002709349339, + "grad_norm": 204.0, + "learning_rate": 1.232384942463991e-05, + "loss": 12.4378, + "step": 18677 + }, + { + "epoch": 0.7785419532324622, + "grad_norm": 220.0, + "learning_rate": 1.2319412161825017e-05, + "loss": 7.5003, + "step": 18678 + }, + { + "epoch": 0.7785836355299904, + "grad_norm": 482.0, + "learning_rate": 1.2314975585742183e-05, + "loss": 16.7501, + "step": 18679 + }, + { + "epoch": 0.7786253178275186, + "grad_norm": 364.0, + "learning_rate": 1.2310539696472267e-05, + "loss": 11.4378, + "step": 18680 + }, + { + "epoch": 0.7786670001250469, + "grad_norm": 320.0, + "learning_rate": 1.2306104494096104e-05, + "loss": 13.19, + "step": 18681 + }, + { + "epoch": 0.7787086824225752, + "grad_norm": 568.0, + "learning_rate": 1.230166997869454e-05, + "loss": 19.2501, + "step": 18682 + }, + { + "epoch": 0.7787503647201034, + "grad_norm": 288.0, + "learning_rate": 1.2297236150348385e-05, + "loss": 12.3128, + "step": 18683 + }, + { + "epoch": 0.7787920470176316, + "grad_norm": 280.0, + "learning_rate": 1.2292803009138442e-05, + "loss": 12.9378, + "step": 18684 + }, + { + "epoch": 0.7788337293151598, + "grad_norm": 920.0, + "learning_rate": 1.2288370555145506e-05, + "loss": 25.6251, + "step": 18685 + }, + { + "epoch": 0.7788754116126881, + "grad_norm": 294.0, + "learning_rate": 1.2283938788450389e-05, + "loss": 8.9385, + "step": 18686 + }, + { + "epoch": 0.7789170939102164, + "grad_norm": 181.0, + "learning_rate": 1.2279507709133809e-05, + "loss": 12.0005, + "step": 18687 + }, + { + "epoch": 0.7789587762077446, + "grad_norm": 350.0, + "learning_rate": 1.2275077317276573e-05, + "loss": 12.6879, + "step": 18688 + }, + { + "epoch": 0.7790004585052728, + "grad_norm": 243.0, + "learning_rate": 1.227064761295938e-05, + "loss": 12.9378, + "step": 18689 + }, + { + "epoch": 0.7790421408028011, + "grad_norm": 114.0, + "learning_rate": 1.2266218596263008e-05, + "loss": 10.1882, + "step": 18690 + }, + { + "epoch": 0.7790838231003293, + "grad_norm": 72.0, + "learning_rate": 1.2261790267268125e-05, + "loss": 8.0007, + "step": 18691 + }, + { + "epoch": 0.7791255053978575, + "grad_norm": 290.0, + "learning_rate": 1.225736262605549e-05, + "loss": 14.1879, + "step": 18692 + }, + { + "epoch": 0.7791671876953857, + "grad_norm": 258.0, + "learning_rate": 1.2252935672705745e-05, + "loss": 12.0627, + "step": 18693 + }, + { + "epoch": 0.779208869992914, + "grad_norm": 212.0, + "learning_rate": 1.2248509407299614e-05, + "loss": 12.7514, + "step": 18694 + }, + { + "epoch": 0.7792505522904423, + "grad_norm": 114.0, + "learning_rate": 1.2244083829917746e-05, + "loss": 4.6565, + "step": 18695 + }, + { + "epoch": 0.7792922345879705, + "grad_norm": 109.5, + "learning_rate": 1.2239658940640809e-05, + "loss": 9.8126, + "step": 18696 + }, + { + "epoch": 0.7793339168854987, + "grad_norm": 255.0, + "learning_rate": 1.2235234739549434e-05, + "loss": 13.1259, + "step": 18697 + }, + { + "epoch": 0.779375599183027, + "grad_norm": 712.0, + "learning_rate": 1.2230811226724259e-05, + "loss": 23.0002, + "step": 18698 + }, + { + "epoch": 0.7794172814805552, + "grad_norm": 410.0, + "learning_rate": 1.2226388402245903e-05, + "loss": 15.1252, + "step": 18699 + }, + { + "epoch": 0.7794589637780834, + "grad_norm": 225.0, + "learning_rate": 1.2221966266194974e-05, + "loss": 11.8755, + "step": 18700 + }, + { + "epoch": 0.7795006460756116, + "grad_norm": 368.0, + "learning_rate": 1.221754481865206e-05, + "loss": 14.5627, + "step": 18701 + }, + { + "epoch": 0.77954232837314, + "grad_norm": 294.0, + "learning_rate": 1.221312405969775e-05, + "loss": 11.8752, + "step": 18702 + }, + { + "epoch": 0.7795840106706682, + "grad_norm": 668.0, + "learning_rate": 1.2208703989412606e-05, + "loss": 20.8752, + "step": 18703 + }, + { + "epoch": 0.7796256929681964, + "grad_norm": 1240.0, + "learning_rate": 1.2204284607877186e-05, + "loss": 24.3791, + "step": 18704 + }, + { + "epoch": 0.7796673752657246, + "grad_norm": 512.0, + "learning_rate": 1.2199865915172037e-05, + "loss": 17.1256, + "step": 18705 + }, + { + "epoch": 0.7797090575632529, + "grad_norm": 1152.0, + "learning_rate": 1.2195447911377684e-05, + "loss": 28.0013, + "step": 18706 + }, + { + "epoch": 0.7797507398607811, + "grad_norm": 166.0, + "learning_rate": 1.2191030596574649e-05, + "loss": 11.0003, + "step": 18707 + }, + { + "epoch": 0.7797924221583094, + "grad_norm": 410.0, + "learning_rate": 1.2186613970843436e-05, + "loss": 16.1262, + "step": 18708 + }, + { + "epoch": 0.7798341044558376, + "grad_norm": 346.0, + "learning_rate": 1.218219803426453e-05, + "loss": 14.0628, + "step": 18709 + }, + { + "epoch": 0.7798757867533659, + "grad_norm": 608.0, + "learning_rate": 1.2177782786918451e-05, + "loss": 18.7548, + "step": 18710 + }, + { + "epoch": 0.7799174690508941, + "grad_norm": 284.0, + "learning_rate": 1.2173368228885607e-05, + "loss": 13.5002, + "step": 18711 + }, + { + "epoch": 0.7799591513484223, + "grad_norm": 352.0, + "learning_rate": 1.2168954360246515e-05, + "loss": 14.1878, + "step": 18712 + }, + { + "epoch": 0.7800008336459505, + "grad_norm": 75.0, + "learning_rate": 1.2164541181081557e-05, + "loss": 7.8751, + "step": 18713 + }, + { + "epoch": 0.7800425159434788, + "grad_norm": 740.0, + "learning_rate": 1.2160128691471229e-05, + "loss": 21.5003, + "step": 18714 + }, + { + "epoch": 0.780084198241007, + "grad_norm": 600.0, + "learning_rate": 1.2155716891495877e-05, + "loss": 18.2503, + "step": 18715 + }, + { + "epoch": 0.7801258805385353, + "grad_norm": 187.0, + "learning_rate": 1.2151305781235984e-05, + "loss": 10.8755, + "step": 18716 + }, + { + "epoch": 0.7801675628360635, + "grad_norm": 96.0, + "learning_rate": 1.2146895360771865e-05, + "loss": 7.7508, + "step": 18717 + }, + { + "epoch": 0.7802092451335918, + "grad_norm": 342.0, + "learning_rate": 1.214248563018397e-05, + "loss": 15.6878, + "step": 18718 + }, + { + "epoch": 0.78025092743112, + "grad_norm": 414.0, + "learning_rate": 1.213807658955261e-05, + "loss": 15.8126, + "step": 18719 + }, + { + "epoch": 0.7802926097286482, + "grad_norm": 286.0, + "learning_rate": 1.2133668238958174e-05, + "loss": 13.2501, + "step": 18720 + }, + { + "epoch": 0.7803342920261764, + "grad_norm": 494.0, + "learning_rate": 1.2129260578481e-05, + "loss": 17.2504, + "step": 18721 + }, + { + "epoch": 0.7803759743237048, + "grad_norm": 276.0, + "learning_rate": 1.2124853608201414e-05, + "loss": 10.4379, + "step": 18722 + }, + { + "epoch": 0.780417656621233, + "grad_norm": 398.0, + "learning_rate": 1.212044732819973e-05, + "loss": 15.6252, + "step": 18723 + }, + { + "epoch": 0.7804593389187612, + "grad_norm": 552.0, + "learning_rate": 1.2116041738556256e-05, + "loss": 19.0002, + "step": 18724 + }, + { + "epoch": 0.7805010212162894, + "grad_norm": 226.0, + "learning_rate": 1.2111636839351286e-05, + "loss": 11.8753, + "step": 18725 + }, + { + "epoch": 0.7805427035138177, + "grad_norm": 201.0, + "learning_rate": 1.21072326306651e-05, + "loss": 10.8129, + "step": 18726 + }, + { + "epoch": 0.7805843858113459, + "grad_norm": 133.0, + "learning_rate": 1.2102829112577963e-05, + "loss": 10.5003, + "step": 18727 + }, + { + "epoch": 0.7806260681088741, + "grad_norm": 460.0, + "learning_rate": 1.2098426285170129e-05, + "loss": 14.9376, + "step": 18728 + }, + { + "epoch": 0.7806677504064023, + "grad_norm": 314.0, + "learning_rate": 1.2094024148521837e-05, + "loss": 15.3132, + "step": 18729 + }, + { + "epoch": 0.7807094327039307, + "grad_norm": 346.0, + "learning_rate": 1.2089622702713321e-05, + "loss": 14.3127, + "step": 18730 + }, + { + "epoch": 0.7807511150014589, + "grad_norm": 137.0, + "learning_rate": 1.20852219478248e-05, + "loss": 10.6255, + "step": 18731 + }, + { + "epoch": 0.7807927972989871, + "grad_norm": 378.0, + "learning_rate": 1.2080821883936472e-05, + "loss": 15.6255, + "step": 18732 + }, + { + "epoch": 0.7808344795965153, + "grad_norm": 476.0, + "learning_rate": 1.2076422511128532e-05, + "loss": 17.1252, + "step": 18733 + }, + { + "epoch": 0.7808761618940436, + "grad_norm": 712.0, + "learning_rate": 1.2072023829481161e-05, + "loss": 22.1271, + "step": 18734 + }, + { + "epoch": 0.7809178441915718, + "grad_norm": 458.0, + "learning_rate": 1.2067625839074503e-05, + "loss": 13.8128, + "step": 18735 + }, + { + "epoch": 0.7809595264891, + "grad_norm": 1424.0, + "learning_rate": 1.2063228539988764e-05, + "loss": 29.6253, + "step": 18736 + }, + { + "epoch": 0.7810012087866283, + "grad_norm": 125.0, + "learning_rate": 1.205883193230402e-05, + "loss": 6.6255, + "step": 18737 + }, + { + "epoch": 0.7810428910841566, + "grad_norm": 322.0, + "learning_rate": 1.2054436016100468e-05, + "loss": 14.1878, + "step": 18738 + }, + { + "epoch": 0.7810845733816848, + "grad_norm": 320.0, + "learning_rate": 1.2050040791458156e-05, + "loss": 14.5629, + "step": 18739 + }, + { + "epoch": 0.781126255679213, + "grad_norm": 290.0, + "learning_rate": 1.2045646258457243e-05, + "loss": 13.4382, + "step": 18740 + }, + { + "epoch": 0.7811679379767413, + "grad_norm": 57.25, + "learning_rate": 1.2041252417177773e-05, + "loss": 7.6883, + "step": 18741 + }, + { + "epoch": 0.7812096202742695, + "grad_norm": 744.0, + "learning_rate": 1.203685926769988e-05, + "loss": 20.3795, + "step": 18742 + }, + { + "epoch": 0.7812513025717978, + "grad_norm": 292.0, + "learning_rate": 1.203246681010356e-05, + "loss": 11.3762, + "step": 18743 + }, + { + "epoch": 0.781292984869326, + "grad_norm": 380.0, + "learning_rate": 1.2028075044468922e-05, + "loss": 14.1894, + "step": 18744 + }, + { + "epoch": 0.7813346671668543, + "grad_norm": 172.0, + "learning_rate": 1.2023683970875983e-05, + "loss": 11.5629, + "step": 18745 + }, + { + "epoch": 0.7813763494643825, + "grad_norm": 420.0, + "learning_rate": 1.2019293589404779e-05, + "loss": 15.3755, + "step": 18746 + }, + { + "epoch": 0.7814180317619107, + "grad_norm": 240.0, + "learning_rate": 1.2014903900135322e-05, + "loss": 13.1881, + "step": 18747 + }, + { + "epoch": 0.7814597140594389, + "grad_norm": 628.0, + "learning_rate": 1.2010514903147607e-05, + "loss": 18.0012, + "step": 18748 + }, + { + "epoch": 0.7815013963569672, + "grad_norm": 708.0, + "learning_rate": 1.2006126598521634e-05, + "loss": 21.2508, + "step": 18749 + }, + { + "epoch": 0.7815430786544955, + "grad_norm": 656.0, + "learning_rate": 1.200173898633738e-05, + "loss": 18.8752, + "step": 18750 + }, + { + "epoch": 0.7815847609520237, + "grad_norm": 444.0, + "learning_rate": 1.19973520666748e-05, + "loss": 16.501, + "step": 18751 + }, + { + "epoch": 0.7816264432495519, + "grad_norm": 512.0, + "learning_rate": 1.1992965839613856e-05, + "loss": 18.3753, + "step": 18752 + }, + { + "epoch": 0.7816681255470802, + "grad_norm": 114.5, + "learning_rate": 1.1988580305234481e-05, + "loss": 8.1878, + "step": 18753 + }, + { + "epoch": 0.7817098078446084, + "grad_norm": 508.0, + "learning_rate": 1.198419546361661e-05, + "loss": 15.0668, + "step": 18754 + }, + { + "epoch": 0.7817514901421366, + "grad_norm": 446.0, + "learning_rate": 1.1979811314840144e-05, + "loss": 16.2527, + "step": 18755 + }, + { + "epoch": 0.7817931724396648, + "grad_norm": 251.0, + "learning_rate": 1.1975427858985e-05, + "loss": 9.0006, + "step": 18756 + }, + { + "epoch": 0.7818348547371932, + "grad_norm": 392.0, + "learning_rate": 1.197104509613105e-05, + "loss": 14.8753, + "step": 18757 + }, + { + "epoch": 0.7818765370347214, + "grad_norm": 342.0, + "learning_rate": 1.1966663026358189e-05, + "loss": 14.6254, + "step": 18758 + }, + { + "epoch": 0.7819182193322496, + "grad_norm": 652.0, + "learning_rate": 1.196228164974626e-05, + "loss": 21.2534, + "step": 18759 + }, + { + "epoch": 0.7819599016297778, + "grad_norm": 229.0, + "learning_rate": 1.195790096637513e-05, + "loss": 11.6878, + "step": 18760 + }, + { + "epoch": 0.7820015839273061, + "grad_norm": 209.0, + "learning_rate": 1.1953520976324617e-05, + "loss": 12.0629, + "step": 18761 + }, + { + "epoch": 0.7820432662248343, + "grad_norm": 430.0, + "learning_rate": 1.1949141679674591e-05, + "loss": 15.6251, + "step": 18762 + }, + { + "epoch": 0.7820849485223625, + "grad_norm": 388.0, + "learning_rate": 1.1944763076504805e-05, + "loss": 14.9377, + "step": 18763 + }, + { + "epoch": 0.7821266308198908, + "grad_norm": 1004.0, + "learning_rate": 1.1940385166895124e-05, + "loss": 29.5004, + "step": 18764 + }, + { + "epoch": 0.7821683131174191, + "grad_norm": 352.0, + "learning_rate": 1.1936007950925272e-05, + "loss": 14.3774, + "step": 18765 + }, + { + "epoch": 0.7822099954149473, + "grad_norm": 660.0, + "learning_rate": 1.1931631428675078e-05, + "loss": 17.7543, + "step": 18766 + }, + { + "epoch": 0.7822516777124755, + "grad_norm": 868.0, + "learning_rate": 1.1927255600224257e-05, + "loss": 26.1253, + "step": 18767 + }, + { + "epoch": 0.7822933600100037, + "grad_norm": 125.5, + "learning_rate": 1.1922880465652608e-05, + "loss": 9.2512, + "step": 18768 + }, + { + "epoch": 0.782335042307532, + "grad_norm": 332.0, + "learning_rate": 1.1918506025039816e-05, + "loss": 12.1252, + "step": 18769 + }, + { + "epoch": 0.7823767246050602, + "grad_norm": 240.0, + "learning_rate": 1.1914132278465645e-05, + "loss": 11.9404, + "step": 18770 + }, + { + "epoch": 0.7824184069025885, + "grad_norm": 506.0, + "learning_rate": 1.1909759226009792e-05, + "loss": 15.4392, + "step": 18771 + }, + { + "epoch": 0.7824600892001167, + "grad_norm": 316.0, + "learning_rate": 1.190538686775196e-05, + "loss": 14.4377, + "step": 18772 + }, + { + "epoch": 0.782501771497645, + "grad_norm": 83.0, + "learning_rate": 1.1901015203771837e-05, + "loss": 9.1255, + "step": 18773 + }, + { + "epoch": 0.7825434537951732, + "grad_norm": 360.0, + "learning_rate": 1.1896644234149096e-05, + "loss": 13.8127, + "step": 18774 + }, + { + "epoch": 0.7825851360927014, + "grad_norm": 516.0, + "learning_rate": 1.189227395896339e-05, + "loss": 17.7503, + "step": 18775 + }, + { + "epoch": 0.7826268183902296, + "grad_norm": 113.0, + "learning_rate": 1.1887904378294378e-05, + "loss": 8.3126, + "step": 18776 + }, + { + "epoch": 0.782668500687758, + "grad_norm": 468.0, + "learning_rate": 1.1883535492221692e-05, + "loss": 16.3752, + "step": 18777 + }, + { + "epoch": 0.7827101829852862, + "grad_norm": 744.0, + "learning_rate": 1.187916730082495e-05, + "loss": 21.6251, + "step": 18778 + }, + { + "epoch": 0.7827518652828144, + "grad_norm": 366.0, + "learning_rate": 1.1874799804183772e-05, + "loss": 15.1252, + "step": 18779 + }, + { + "epoch": 0.7827935475803426, + "grad_norm": 121.0, + "learning_rate": 1.1870433002377756e-05, + "loss": 9.8754, + "step": 18780 + }, + { + "epoch": 0.7828352298778709, + "grad_norm": 354.0, + "learning_rate": 1.1866066895486478e-05, + "loss": 13.1254, + "step": 18781 + }, + { + "epoch": 0.7828769121753991, + "grad_norm": 600.0, + "learning_rate": 1.1861701483589515e-05, + "loss": 18.6253, + "step": 18782 + }, + { + "epoch": 0.7829185944729273, + "grad_norm": 334.0, + "learning_rate": 1.1857336766766436e-05, + "loss": 13.6883, + "step": 18783 + }, + { + "epoch": 0.7829602767704555, + "grad_norm": 314.0, + "learning_rate": 1.1852972745096774e-05, + "loss": 14.3752, + "step": 18784 + }, + { + "epoch": 0.7830019590679839, + "grad_norm": 532.0, + "learning_rate": 1.1848609418660056e-05, + "loss": 15.5051, + "step": 18785 + }, + { + "epoch": 0.7830436413655121, + "grad_norm": 187.0, + "learning_rate": 1.1844246787535845e-05, + "loss": 9.5632, + "step": 18786 + }, + { + "epoch": 0.7830853236630403, + "grad_norm": 792.0, + "learning_rate": 1.18398848518036e-05, + "loss": 20.0003, + "step": 18787 + }, + { + "epoch": 0.7831270059605685, + "grad_norm": 404.0, + "learning_rate": 1.183552361154287e-05, + "loss": 15.8751, + "step": 18788 + }, + { + "epoch": 0.7831686882580968, + "grad_norm": 356.0, + "learning_rate": 1.1831163066833078e-05, + "loss": 15.0629, + "step": 18789 + }, + { + "epoch": 0.783210370555625, + "grad_norm": 215.0, + "learning_rate": 1.1826803217753756e-05, + "loss": 11.9377, + "step": 18790 + }, + { + "epoch": 0.7832520528531532, + "grad_norm": 157.0, + "learning_rate": 1.1822444064384308e-05, + "loss": 9.8753, + "step": 18791 + }, + { + "epoch": 0.7832937351506815, + "grad_norm": 1256.0, + "learning_rate": 1.181808560680423e-05, + "loss": 27.0042, + "step": 18792 + }, + { + "epoch": 0.7833354174482098, + "grad_norm": 552.0, + "learning_rate": 1.1813727845092909e-05, + "loss": 19.5037, + "step": 18793 + }, + { + "epoch": 0.783377099745738, + "grad_norm": 268.0, + "learning_rate": 1.1809370779329814e-05, + "loss": 12.5002, + "step": 18794 + }, + { + "epoch": 0.7834187820432662, + "grad_norm": 235.0, + "learning_rate": 1.1805014409594294e-05, + "loss": 12.6256, + "step": 18795 + }, + { + "epoch": 0.7834604643407944, + "grad_norm": 278.0, + "learning_rate": 1.1800658735965797e-05, + "loss": 13.5627, + "step": 18796 + }, + { + "epoch": 0.7835021466383227, + "grad_norm": 158.0, + "learning_rate": 1.1796303758523685e-05, + "loss": 9.3134, + "step": 18797 + }, + { + "epoch": 0.783543828935851, + "grad_norm": 480.0, + "learning_rate": 1.1791949477347325e-05, + "loss": 17.1252, + "step": 18798 + }, + { + "epoch": 0.7835855112333792, + "grad_norm": 320.0, + "learning_rate": 1.1787595892516079e-05, + "loss": 13.7503, + "step": 18799 + }, + { + "epoch": 0.7836271935309074, + "grad_norm": 136.0, + "learning_rate": 1.178324300410929e-05, + "loss": 9.9377, + "step": 18800 + }, + { + "epoch": 0.7836688758284357, + "grad_norm": 368.0, + "learning_rate": 1.1778890812206294e-05, + "loss": 13.3127, + "step": 18801 + }, + { + "epoch": 0.7837105581259639, + "grad_norm": 286.0, + "learning_rate": 1.1774539316886402e-05, + "loss": 11.1897, + "step": 18802 + }, + { + "epoch": 0.7837522404234921, + "grad_norm": 209.0, + "learning_rate": 1.1770188518228925e-05, + "loss": 9.1253, + "step": 18803 + }, + { + "epoch": 0.7837939227210203, + "grad_norm": 520.0, + "learning_rate": 1.1765838416313157e-05, + "loss": 18.5007, + "step": 18804 + }, + { + "epoch": 0.7838356050185487, + "grad_norm": 360.0, + "learning_rate": 1.1761489011218374e-05, + "loss": 13.2503, + "step": 18805 + }, + { + "epoch": 0.7838772873160769, + "grad_norm": 560.0, + "learning_rate": 1.175714030302385e-05, + "loss": 17.7502, + "step": 18806 + }, + { + "epoch": 0.7839189696136051, + "grad_norm": 292.0, + "learning_rate": 1.1752792291808839e-05, + "loss": 13.0012, + "step": 18807 + }, + { + "epoch": 0.7839606519111333, + "grad_norm": 124.5, + "learning_rate": 1.174844497765259e-05, + "loss": 8.7505, + "step": 18808 + }, + { + "epoch": 0.7840023342086616, + "grad_norm": 432.0, + "learning_rate": 1.1744098360634321e-05, + "loss": 14.6892, + "step": 18809 + }, + { + "epoch": 0.7840440165061898, + "grad_norm": 328.0, + "learning_rate": 1.1739752440833257e-05, + "loss": 13.0628, + "step": 18810 + }, + { + "epoch": 0.784085698803718, + "grad_norm": 127.0, + "learning_rate": 1.1735407218328587e-05, + "loss": 9.1878, + "step": 18811 + }, + { + "epoch": 0.7841273811012462, + "grad_norm": 256.0, + "learning_rate": 1.1731062693199541e-05, + "loss": 12.1253, + "step": 18812 + }, + { + "epoch": 0.7841690633987746, + "grad_norm": 198.0, + "learning_rate": 1.1726718865525249e-05, + "loss": 11.0628, + "step": 18813 + }, + { + "epoch": 0.7842107456963028, + "grad_norm": 412.0, + "learning_rate": 1.1722375735384933e-05, + "loss": 15.9398, + "step": 18814 + }, + { + "epoch": 0.784252427993831, + "grad_norm": 222.0, + "learning_rate": 1.171803330285769e-05, + "loss": 12.0004, + "step": 18815 + }, + { + "epoch": 0.7842941102913593, + "grad_norm": 68.5, + "learning_rate": 1.1713691568022717e-05, + "loss": 8.626, + "step": 18816 + }, + { + "epoch": 0.7843357925888875, + "grad_norm": 404.0, + "learning_rate": 1.1709350530959084e-05, + "loss": 14.3753, + "step": 18817 + }, + { + "epoch": 0.7843774748864157, + "grad_norm": 272.0, + "learning_rate": 1.1705010191745963e-05, + "loss": 12.5633, + "step": 18818 + }, + { + "epoch": 0.784419157183944, + "grad_norm": 183.0, + "learning_rate": 1.1700670550462405e-05, + "loss": 10.5029, + "step": 18819 + }, + { + "epoch": 0.7844608394814723, + "grad_norm": 516.0, + "learning_rate": 1.1696331607187543e-05, + "loss": 17.6267, + "step": 18820 + }, + { + "epoch": 0.7845025217790005, + "grad_norm": 137.0, + "learning_rate": 1.1691993362000437e-05, + "loss": 10.0002, + "step": 18821 + }, + { + "epoch": 0.7845442040765287, + "grad_norm": 356.0, + "learning_rate": 1.1687655814980147e-05, + "loss": 14.6878, + "step": 18822 + }, + { + "epoch": 0.7845858863740569, + "grad_norm": 342.0, + "learning_rate": 1.1683318966205736e-05, + "loss": 14.6255, + "step": 18823 + }, + { + "epoch": 0.7846275686715852, + "grad_norm": 181.0, + "learning_rate": 1.1678982815756234e-05, + "loss": 10.6252, + "step": 18824 + }, + { + "epoch": 0.7846692509691134, + "grad_norm": 262.0, + "learning_rate": 1.1674647363710678e-05, + "loss": 12.1252, + "step": 18825 + }, + { + "epoch": 0.7847109332666417, + "grad_norm": 222.0, + "learning_rate": 1.1670312610148071e-05, + "loss": 12.0631, + "step": 18826 + }, + { + "epoch": 0.7847526155641699, + "grad_norm": 376.0, + "learning_rate": 1.1665978555147423e-05, + "loss": 16.3754, + "step": 18827 + }, + { + "epoch": 0.7847942978616982, + "grad_norm": 270.0, + "learning_rate": 1.1661645198787713e-05, + "loss": 13.8131, + "step": 18828 + }, + { + "epoch": 0.7848359801592264, + "grad_norm": 972.0, + "learning_rate": 1.165731254114793e-05, + "loss": 26.5036, + "step": 18829 + }, + { + "epoch": 0.7848776624567546, + "grad_norm": 97.5, + "learning_rate": 1.1652980582307027e-05, + "loss": 9.1881, + "step": 18830 + }, + { + "epoch": 0.7849193447542828, + "grad_norm": 215.0, + "learning_rate": 1.1648649322343953e-05, + "loss": 11.8751, + "step": 18831 + }, + { + "epoch": 0.7849610270518111, + "grad_norm": 408.0, + "learning_rate": 1.1644318761337652e-05, + "loss": 15.6254, + "step": 18832 + }, + { + "epoch": 0.7850027093493394, + "grad_norm": 183.0, + "learning_rate": 1.163998889936705e-05, + "loss": 9.5004, + "step": 18833 + }, + { + "epoch": 0.7850443916468676, + "grad_norm": 880.0, + "learning_rate": 1.163565973651105e-05, + "loss": 23.8752, + "step": 18834 + }, + { + "epoch": 0.7850860739443958, + "grad_norm": 170.0, + "learning_rate": 1.163133127284855e-05, + "loss": 10.1252, + "step": 18835 + }, + { + "epoch": 0.7851277562419241, + "grad_norm": 61.0, + "learning_rate": 1.1627003508458468e-05, + "loss": 7.9689, + "step": 18836 + }, + { + "epoch": 0.7851694385394523, + "grad_norm": 219.0, + "learning_rate": 1.1622676443419623e-05, + "loss": 12.7503, + "step": 18837 + }, + { + "epoch": 0.7852111208369805, + "grad_norm": 79.0, + "learning_rate": 1.1618350077810935e-05, + "loss": 9.1258, + "step": 18838 + }, + { + "epoch": 0.7852528031345087, + "grad_norm": 876.0, + "learning_rate": 1.16140244117112e-05, + "loss": 24.7501, + "step": 18839 + }, + { + "epoch": 0.7852944854320371, + "grad_norm": 278.0, + "learning_rate": 1.1609699445199306e-05, + "loss": 12.3775, + "step": 18840 + }, + { + "epoch": 0.7853361677295653, + "grad_norm": 524.0, + "learning_rate": 1.1605375178354016e-05, + "loss": 16.2504, + "step": 18841 + }, + { + "epoch": 0.7853778500270935, + "grad_norm": 188.0, + "learning_rate": 1.1601051611254199e-05, + "loss": 12.0632, + "step": 18842 + }, + { + "epoch": 0.7854195323246217, + "grad_norm": 324.0, + "learning_rate": 1.1596728743978603e-05, + "loss": 14.1878, + "step": 18843 + }, + { + "epoch": 0.78546121462215, + "grad_norm": 181.0, + "learning_rate": 1.1592406576606057e-05, + "loss": 11.1281, + "step": 18844 + }, + { + "epoch": 0.7855028969196782, + "grad_norm": 968.0, + "learning_rate": 1.1588085109215285e-05, + "loss": 24.2544, + "step": 18845 + }, + { + "epoch": 0.7855445792172064, + "grad_norm": 205.0, + "learning_rate": 1.1583764341885083e-05, + "loss": 12.6878, + "step": 18846 + }, + { + "epoch": 0.7855862615147347, + "grad_norm": 462.0, + "learning_rate": 1.1579444274694191e-05, + "loss": 16.0002, + "step": 18847 + }, + { + "epoch": 0.785627943812263, + "grad_norm": 258.0, + "learning_rate": 1.1575124907721336e-05, + "loss": 13.1252, + "step": 18848 + }, + { + "epoch": 0.7856696261097912, + "grad_norm": 186.0, + "learning_rate": 1.157080624104524e-05, + "loss": 11.7503, + "step": 18849 + }, + { + "epoch": 0.7857113084073194, + "grad_norm": 1432.0, + "learning_rate": 1.1566488274744614e-05, + "loss": 28.002, + "step": 18850 + }, + { + "epoch": 0.7857529907048476, + "grad_norm": 496.0, + "learning_rate": 1.1562171008898149e-05, + "loss": 20.1284, + "step": 18851 + }, + { + "epoch": 0.7857946730023759, + "grad_norm": 330.0, + "learning_rate": 1.1557854443584526e-05, + "loss": 14.3753, + "step": 18852 + }, + { + "epoch": 0.7858363552999041, + "grad_norm": 840.0, + "learning_rate": 1.1553538578882422e-05, + "loss": 22.2502, + "step": 18853 + }, + { + "epoch": 0.7858780375974324, + "grad_norm": 752.0, + "learning_rate": 1.154922341487049e-05, + "loss": 19.8803, + "step": 18854 + }, + { + "epoch": 0.7859197198949606, + "grad_norm": 294.0, + "learning_rate": 1.1544908951627376e-05, + "loss": 13.2502, + "step": 18855 + }, + { + "epoch": 0.7859614021924889, + "grad_norm": 126.5, + "learning_rate": 1.1540595189231713e-05, + "loss": 10.6257, + "step": 18856 + }, + { + "epoch": 0.7860030844900171, + "grad_norm": 396.0, + "learning_rate": 1.153628212776211e-05, + "loss": 13.5633, + "step": 18857 + }, + { + "epoch": 0.7860447667875453, + "grad_norm": 131.0, + "learning_rate": 1.1531969767297185e-05, + "loss": 9.0628, + "step": 18858 + }, + { + "epoch": 0.7860864490850735, + "grad_norm": 230.0, + "learning_rate": 1.1527658107915523e-05, + "loss": 13.313, + "step": 18859 + }, + { + "epoch": 0.7861281313826018, + "grad_norm": 2080.0, + "learning_rate": 1.1523347149695706e-05, + "loss": 47.255, + "step": 18860 + }, + { + "epoch": 0.7861698136801301, + "grad_norm": 528.0, + "learning_rate": 1.151903689271629e-05, + "loss": 17.5003, + "step": 18861 + }, + { + "epoch": 0.7862114959776583, + "grad_norm": 98.5, + "learning_rate": 1.1514727337055869e-05, + "loss": 8.3127, + "step": 18862 + }, + { + "epoch": 0.7862531782751865, + "grad_norm": 196.0, + "learning_rate": 1.1510418482792934e-05, + "loss": 11.438, + "step": 18863 + }, + { + "epoch": 0.7862948605727148, + "grad_norm": 106.0, + "learning_rate": 1.150611033000607e-05, + "loss": 9.7502, + "step": 18864 + }, + { + "epoch": 0.786336542870243, + "grad_norm": 85.5, + "learning_rate": 1.1501802878773726e-05, + "loss": 5.8149, + "step": 18865 + }, + { + "epoch": 0.7863782251677712, + "grad_norm": 172.0, + "learning_rate": 1.1497496129174478e-05, + "loss": 10.7504, + "step": 18866 + }, + { + "epoch": 0.7864199074652994, + "grad_norm": 237.0, + "learning_rate": 1.1493190081286748e-05, + "loss": 11.6252, + "step": 18867 + }, + { + "epoch": 0.7864615897628278, + "grad_norm": 1176.0, + "learning_rate": 1.1488884735189076e-05, + "loss": 27.7508, + "step": 18868 + }, + { + "epoch": 0.786503272060356, + "grad_norm": 536.0, + "learning_rate": 1.1484580090959874e-05, + "loss": 16.1277, + "step": 18869 + }, + { + "epoch": 0.7865449543578842, + "grad_norm": 302.0, + "learning_rate": 1.1480276148677627e-05, + "loss": 14.1252, + "step": 18870 + }, + { + "epoch": 0.7865866366554124, + "grad_norm": 396.0, + "learning_rate": 1.1475972908420774e-05, + "loss": 14.6877, + "step": 18871 + }, + { + "epoch": 0.7866283189529407, + "grad_norm": 71.0, + "learning_rate": 1.1471670370267729e-05, + "loss": 8.1253, + "step": 18872 + }, + { + "epoch": 0.7866700012504689, + "grad_norm": 664.0, + "learning_rate": 1.1467368534296913e-05, + "loss": 20.7502, + "step": 18873 + }, + { + "epoch": 0.7867116835479971, + "grad_norm": 354.0, + "learning_rate": 1.1463067400586724e-05, + "loss": 14.2503, + "step": 18874 + }, + { + "epoch": 0.7867533658455254, + "grad_norm": 440.0, + "learning_rate": 1.1458766969215556e-05, + "loss": 13.9378, + "step": 18875 + }, + { + "epoch": 0.7867950481430537, + "grad_norm": 600.0, + "learning_rate": 1.145446724026178e-05, + "loss": 20.0002, + "step": 18876 + }, + { + "epoch": 0.7868367304405819, + "grad_norm": 1040.0, + "learning_rate": 1.1450168213803764e-05, + "loss": 25.8756, + "step": 18877 + }, + { + "epoch": 0.7868784127381101, + "grad_norm": 202.0, + "learning_rate": 1.1445869889919847e-05, + "loss": 10.8128, + "step": 18878 + }, + { + "epoch": 0.7869200950356383, + "grad_norm": 186.0, + "learning_rate": 1.1441572268688377e-05, + "loss": 10.3127, + "step": 18879 + }, + { + "epoch": 0.7869617773331666, + "grad_norm": 342.0, + "learning_rate": 1.1437275350187682e-05, + "loss": 15.1879, + "step": 18880 + }, + { + "epoch": 0.7870034596306948, + "grad_norm": 344.0, + "learning_rate": 1.1432979134496058e-05, + "loss": 13.8128, + "step": 18881 + }, + { + "epoch": 0.7870451419282231, + "grad_norm": 450.0, + "learning_rate": 1.1428683621691815e-05, + "loss": 17.1257, + "step": 18882 + }, + { + "epoch": 0.7870868242257513, + "grad_norm": 524.0, + "learning_rate": 1.1424388811853242e-05, + "loss": 18.2504, + "step": 18883 + }, + { + "epoch": 0.7871285065232796, + "grad_norm": 158.0, + "learning_rate": 1.1420094705058604e-05, + "loss": 11.3757, + "step": 18884 + }, + { + "epoch": 0.7871701888208078, + "grad_norm": 217.0, + "learning_rate": 1.1415801301386164e-05, + "loss": 11.6255, + "step": 18885 + }, + { + "epoch": 0.787211871118336, + "grad_norm": 278.0, + "learning_rate": 1.1411508600914173e-05, + "loss": 11.0002, + "step": 18886 + }, + { + "epoch": 0.7872535534158643, + "grad_norm": 620.0, + "learning_rate": 1.1407216603720845e-05, + "loss": 19.3758, + "step": 18887 + }, + { + "epoch": 0.7872952357133925, + "grad_norm": 556.0, + "learning_rate": 1.140292530988445e-05, + "loss": 17.3752, + "step": 18888 + }, + { + "epoch": 0.7873369180109208, + "grad_norm": 173.0, + "learning_rate": 1.1398634719483142e-05, + "loss": 6.2506, + "step": 18889 + }, + { + "epoch": 0.787378600308449, + "grad_norm": 236.0, + "learning_rate": 1.1394344832595166e-05, + "loss": 11.6881, + "step": 18890 + }, + { + "epoch": 0.7874202826059773, + "grad_norm": 516.0, + "learning_rate": 1.1390055649298659e-05, + "loss": 15.6877, + "step": 18891 + }, + { + "epoch": 0.7874619649035055, + "grad_norm": 420.0, + "learning_rate": 1.1385767169671846e-05, + "loss": 11.0642, + "step": 18892 + }, + { + "epoch": 0.7875036472010337, + "grad_norm": 222.0, + "learning_rate": 1.138147939379282e-05, + "loss": 12.1252, + "step": 18893 + }, + { + "epoch": 0.7875453294985619, + "grad_norm": 238.0, + "learning_rate": 1.1377192321739788e-05, + "loss": 13.5005, + "step": 18894 + }, + { + "epoch": 0.7875870117960903, + "grad_norm": 520.0, + "learning_rate": 1.1372905953590835e-05, + "loss": 16.8775, + "step": 18895 + }, + { + "epoch": 0.7876286940936185, + "grad_norm": 540.0, + "learning_rate": 1.1368620289424104e-05, + "loss": 17.2506, + "step": 18896 + }, + { + "epoch": 0.7876703763911467, + "grad_norm": 412.0, + "learning_rate": 1.1364335329317705e-05, + "loss": 13.4384, + "step": 18897 + }, + { + "epoch": 0.7877120586886749, + "grad_norm": 156.0, + "learning_rate": 1.136005107334972e-05, + "loss": 12.063, + "step": 18898 + }, + { + "epoch": 0.7877537409862032, + "grad_norm": 304.0, + "learning_rate": 1.1355767521598237e-05, + "loss": 13.3754, + "step": 18899 + }, + { + "epoch": 0.7877954232837314, + "grad_norm": 494.0, + "learning_rate": 1.1351484674141316e-05, + "loss": 16.8756, + "step": 18900 + }, + { + "epoch": 0.7878371055812596, + "grad_norm": 189.0, + "learning_rate": 1.1347202531057028e-05, + "loss": 12.2505, + "step": 18901 + }, + { + "epoch": 0.7878787878787878, + "grad_norm": 1488.0, + "learning_rate": 1.1342921092423397e-05, + "loss": 33.757, + "step": 18902 + }, + { + "epoch": 0.7879204701763162, + "grad_norm": 484.0, + "learning_rate": 1.1338640358318463e-05, + "loss": 15.3753, + "step": 18903 + }, + { + "epoch": 0.7879621524738444, + "grad_norm": 220.0, + "learning_rate": 1.1334360328820237e-05, + "loss": 12.3752, + "step": 18904 + }, + { + "epoch": 0.7880038347713726, + "grad_norm": 258.0, + "learning_rate": 1.1330081004006732e-05, + "loss": 12.7505, + "step": 18905 + }, + { + "epoch": 0.7880455170689008, + "grad_norm": 300.0, + "learning_rate": 1.132580238395593e-05, + "loss": 13.9378, + "step": 18906 + }, + { + "epoch": 0.7880871993664291, + "grad_norm": 244.0, + "learning_rate": 1.1321524468745814e-05, + "loss": 11.5004, + "step": 18907 + }, + { + "epoch": 0.7881288816639573, + "grad_norm": 229.0, + "learning_rate": 1.131724725845435e-05, + "loss": 12.1881, + "step": 18908 + }, + { + "epoch": 0.7881705639614855, + "grad_norm": 236.0, + "learning_rate": 1.1312970753159485e-05, + "loss": 10.9399, + "step": 18909 + }, + { + "epoch": 0.7882122462590138, + "grad_norm": 209.0, + "learning_rate": 1.130869495293917e-05, + "loss": 11.2502, + "step": 18910 + }, + { + "epoch": 0.7882539285565421, + "grad_norm": 512.0, + "learning_rate": 1.1304419857871301e-05, + "loss": 17.5003, + "step": 18911 + }, + { + "epoch": 0.7882956108540703, + "grad_norm": 454.0, + "learning_rate": 1.1300145468033846e-05, + "loss": 15.8779, + "step": 18912 + }, + { + "epoch": 0.7883372931515985, + "grad_norm": 230.0, + "learning_rate": 1.1295871783504652e-05, + "loss": 12.1877, + "step": 18913 + }, + { + "epoch": 0.7883789754491267, + "grad_norm": 174.0, + "learning_rate": 1.1291598804361653e-05, + "loss": 11.5042, + "step": 18914 + }, + { + "epoch": 0.788420657746655, + "grad_norm": 136.0, + "learning_rate": 1.1287326530682674e-05, + "loss": 6.938, + "step": 18915 + }, + { + "epoch": 0.7884623400441833, + "grad_norm": 211.0, + "learning_rate": 1.1283054962545637e-05, + "loss": 12.0009, + "step": 18916 + }, + { + "epoch": 0.7885040223417115, + "grad_norm": 226.0, + "learning_rate": 1.1278784100028334e-05, + "loss": 13.0626, + "step": 18917 + }, + { + "epoch": 0.7885457046392397, + "grad_norm": 131.0, + "learning_rate": 1.1274513943208648e-05, + "loss": 10.4379, + "step": 18918 + }, + { + "epoch": 0.788587386936768, + "grad_norm": 235.0, + "learning_rate": 1.1270244492164362e-05, + "loss": 12.5001, + "step": 18919 + }, + { + "epoch": 0.7886290692342962, + "grad_norm": 87.5, + "learning_rate": 1.1265975746973333e-05, + "loss": 8.376, + "step": 18920 + }, + { + "epoch": 0.7886707515318244, + "grad_norm": 245.0, + "learning_rate": 1.1261707707713309e-05, + "loss": 12.5003, + "step": 18921 + }, + { + "epoch": 0.7887124338293526, + "grad_norm": 316.0, + "learning_rate": 1.1257440374462113e-05, + "loss": 13.3752, + "step": 18922 + }, + { + "epoch": 0.788754116126881, + "grad_norm": 146.0, + "learning_rate": 1.1253173747297507e-05, + "loss": 9.6255, + "step": 18923 + }, + { + "epoch": 0.7887957984244092, + "grad_norm": 390.0, + "learning_rate": 1.124890782629725e-05, + "loss": 13.7502, + "step": 18924 + }, + { + "epoch": 0.7888374807219374, + "grad_norm": 316.0, + "learning_rate": 1.1244642611539086e-05, + "loss": 14.5007, + "step": 18925 + }, + { + "epoch": 0.7888791630194656, + "grad_norm": 424.0, + "learning_rate": 1.1240378103100752e-05, + "loss": 14.563, + "step": 18926 + }, + { + "epoch": 0.7889208453169939, + "grad_norm": 148.0, + "learning_rate": 1.1236114301059964e-05, + "loss": 10.752, + "step": 18927 + }, + { + "epoch": 0.7889625276145221, + "grad_norm": 388.0, + "learning_rate": 1.1231851205494438e-05, + "loss": 16.5003, + "step": 18928 + }, + { + "epoch": 0.7890042099120503, + "grad_norm": 302.0, + "learning_rate": 1.1227588816481866e-05, + "loss": 10.5628, + "step": 18929 + }, + { + "epoch": 0.7890458922095785, + "grad_norm": 324.0, + "learning_rate": 1.1223327134099925e-05, + "loss": 14.1254, + "step": 18930 + }, + { + "epoch": 0.7890875745071069, + "grad_norm": 316.0, + "learning_rate": 1.1219066158426294e-05, + "loss": 14.7503, + "step": 18931 + }, + { + "epoch": 0.7891292568046351, + "grad_norm": 448.0, + "learning_rate": 1.1214805889538621e-05, + "loss": 15.6897, + "step": 18932 + }, + { + "epoch": 0.7891709391021633, + "grad_norm": 250.0, + "learning_rate": 1.1210546327514559e-05, + "loss": 13.813, + "step": 18933 + }, + { + "epoch": 0.7892126213996915, + "grad_norm": 528.0, + "learning_rate": 1.1206287472431731e-05, + "loss": 17.3751, + "step": 18934 + }, + { + "epoch": 0.7892543036972198, + "grad_norm": 370.0, + "learning_rate": 1.1202029324367757e-05, + "loss": 14.9378, + "step": 18935 + }, + { + "epoch": 0.789295985994748, + "grad_norm": 600.0, + "learning_rate": 1.1197771883400243e-05, + "loss": 19.7505, + "step": 18936 + }, + { + "epoch": 0.7893376682922763, + "grad_norm": 166.0, + "learning_rate": 1.1193515149606766e-05, + "loss": 10.5628, + "step": 18937 + }, + { + "epoch": 0.7893793505898045, + "grad_norm": 96.0, + "learning_rate": 1.1189259123064949e-05, + "loss": 8.7516, + "step": 18938 + }, + { + "epoch": 0.7894210328873328, + "grad_norm": 181.0, + "learning_rate": 1.11850038038523e-05, + "loss": 10.6881, + "step": 18939 + }, + { + "epoch": 0.789462715184861, + "grad_norm": 219.0, + "learning_rate": 1.1180749192046435e-05, + "loss": 12.1877, + "step": 18940 + }, + { + "epoch": 0.7895043974823892, + "grad_norm": 440.0, + "learning_rate": 1.117649528772483e-05, + "loss": 16.8752, + "step": 18941 + }, + { + "epoch": 0.7895460797799174, + "grad_norm": 440.0, + "learning_rate": 1.117224209096508e-05, + "loss": 15.8127, + "step": 18942 + }, + { + "epoch": 0.7895877620774457, + "grad_norm": 544.0, + "learning_rate": 1.1167989601844631e-05, + "loss": 17.6252, + "step": 18943 + }, + { + "epoch": 0.789629444374974, + "grad_norm": 418.0, + "learning_rate": 1.1163737820441045e-05, + "loss": 14.5627, + "step": 18944 + }, + { + "epoch": 0.7896711266725022, + "grad_norm": 736.0, + "learning_rate": 1.115948674683176e-05, + "loss": 19.5035, + "step": 18945 + }, + { + "epoch": 0.7897128089700304, + "grad_norm": 47.25, + "learning_rate": 1.1155236381094287e-05, + "loss": 6.4065, + "step": 18946 + }, + { + "epoch": 0.7897544912675587, + "grad_norm": 206.0, + "learning_rate": 1.1150986723306085e-05, + "loss": 12.5627, + "step": 18947 + }, + { + "epoch": 0.7897961735650869, + "grad_norm": 306.0, + "learning_rate": 1.1146737773544591e-05, + "loss": 13.4377, + "step": 18948 + }, + { + "epoch": 0.7898378558626151, + "grad_norm": 1328.0, + "learning_rate": 1.1142489531887257e-05, + "loss": 37.0021, + "step": 18949 + }, + { + "epoch": 0.7898795381601433, + "grad_norm": 290.0, + "learning_rate": 1.1138241998411498e-05, + "loss": 13.0002, + "step": 18950 + }, + { + "epoch": 0.7899212204576717, + "grad_norm": 640.0, + "learning_rate": 1.1133995173194733e-05, + "loss": 20.6253, + "step": 18951 + }, + { + "epoch": 0.7899629027551999, + "grad_norm": 364.0, + "learning_rate": 1.1129749056314349e-05, + "loss": 15.3764, + "step": 18952 + }, + { + "epoch": 0.7900045850527281, + "grad_norm": 286.0, + "learning_rate": 1.1125503647847746e-05, + "loss": 12.8128, + "step": 18953 + }, + { + "epoch": 0.7900462673502563, + "grad_norm": 113.5, + "learning_rate": 1.1121258947872282e-05, + "loss": 9.1876, + "step": 18954 + }, + { + "epoch": 0.7900879496477846, + "grad_norm": 820.0, + "learning_rate": 1.1117014956465332e-05, + "loss": 22.7508, + "step": 18955 + }, + { + "epoch": 0.7901296319453128, + "grad_norm": 1064.0, + "learning_rate": 1.1112771673704231e-05, + "loss": 25.0004, + "step": 18956 + }, + { + "epoch": 0.790171314242841, + "grad_norm": 350.0, + "learning_rate": 1.1108529099666326e-05, + "loss": 14.3756, + "step": 18957 + }, + { + "epoch": 0.7902129965403693, + "grad_norm": 496.0, + "learning_rate": 1.1104287234428922e-05, + "loss": 16.6255, + "step": 18958 + }, + { + "epoch": 0.7902546788378976, + "grad_norm": 588.0, + "learning_rate": 1.1100046078069342e-05, + "loss": 17.2537, + "step": 18959 + }, + { + "epoch": 0.7902963611354258, + "grad_norm": 628.0, + "learning_rate": 1.1095805630664874e-05, + "loss": 18.8782, + "step": 18960 + }, + { + "epoch": 0.790338043432954, + "grad_norm": 225.0, + "learning_rate": 1.1091565892292787e-05, + "loss": 12.3131, + "step": 18961 + }, + { + "epoch": 0.7903797257304823, + "grad_norm": 532.0, + "learning_rate": 1.108732686303039e-05, + "loss": 17.7506, + "step": 18962 + }, + { + "epoch": 0.7904214080280105, + "grad_norm": 600.0, + "learning_rate": 1.1083088542954895e-05, + "loss": 18.6253, + "step": 18963 + }, + { + "epoch": 0.7904630903255387, + "grad_norm": 194.0, + "learning_rate": 1.1078850932143587e-05, + "loss": 12.3765, + "step": 18964 + }, + { + "epoch": 0.790504772623067, + "grad_norm": 120.0, + "learning_rate": 1.1074614030673652e-05, + "loss": 10.7506, + "step": 18965 + }, + { + "epoch": 0.7905464549205953, + "grad_norm": 364.0, + "learning_rate": 1.107037783862236e-05, + "loss": 15.1892, + "step": 18966 + }, + { + "epoch": 0.7905881372181235, + "grad_norm": 262.0, + "learning_rate": 1.1066142356066856e-05, + "loss": 14.1884, + "step": 18967 + }, + { + "epoch": 0.7906298195156517, + "grad_norm": 848.0, + "learning_rate": 1.1061907583084397e-05, + "loss": 24.8768, + "step": 18968 + }, + { + "epoch": 0.7906715018131799, + "grad_norm": 164.0, + "learning_rate": 1.1057673519752099e-05, + "loss": 9.376, + "step": 18969 + }, + { + "epoch": 0.7907131841107082, + "grad_norm": 364.0, + "learning_rate": 1.1053440166147184e-05, + "loss": 13.5628, + "step": 18970 + }, + { + "epoch": 0.7907548664082364, + "grad_norm": 106.0, + "learning_rate": 1.1049207522346755e-05, + "loss": 9.6883, + "step": 18971 + }, + { + "epoch": 0.7907965487057647, + "grad_norm": 720.0, + "learning_rate": 1.1044975588427986e-05, + "loss": 20.6252, + "step": 18972 + }, + { + "epoch": 0.7908382310032929, + "grad_norm": 1424.0, + "learning_rate": 1.1040744364467997e-05, + "loss": 29.129, + "step": 18973 + }, + { + "epoch": 0.7908799133008212, + "grad_norm": 69.5, + "learning_rate": 1.10365138505439e-05, + "loss": 8.1881, + "step": 18974 + }, + { + "epoch": 0.7909215955983494, + "grad_norm": 442.0, + "learning_rate": 1.1032284046732799e-05, + "loss": 16.2502, + "step": 18975 + }, + { + "epoch": 0.7909632778958776, + "grad_norm": 476.0, + "learning_rate": 1.1028054953111782e-05, + "loss": 16.2514, + "step": 18976 + }, + { + "epoch": 0.7910049601934058, + "grad_norm": 211.0, + "learning_rate": 1.1023826569757922e-05, + "loss": 10.5006, + "step": 18977 + }, + { + "epoch": 0.7910466424909341, + "grad_norm": 231.0, + "learning_rate": 1.1019598896748279e-05, + "loss": 11.5629, + "step": 18978 + }, + { + "epoch": 0.7910883247884624, + "grad_norm": 132.0, + "learning_rate": 1.1015371934159907e-05, + "loss": 11.5003, + "step": 18979 + }, + { + "epoch": 0.7911300070859906, + "grad_norm": 408.0, + "learning_rate": 1.1011145682069845e-05, + "loss": 15.9379, + "step": 18980 + }, + { + "epoch": 0.7911716893835188, + "grad_norm": 121.5, + "learning_rate": 1.1006920140555115e-05, + "loss": 10.1878, + "step": 18981 + }, + { + "epoch": 0.7912133716810471, + "grad_norm": 344.0, + "learning_rate": 1.1002695309692723e-05, + "loss": 14.0633, + "step": 18982 + }, + { + "epoch": 0.7912550539785753, + "grad_norm": 160.0, + "learning_rate": 1.0998471189559672e-05, + "loss": 10.2501, + "step": 18983 + }, + { + "epoch": 0.7912967362761035, + "grad_norm": 588.0, + "learning_rate": 1.0994247780232952e-05, + "loss": 19.3754, + "step": 18984 + }, + { + "epoch": 0.7913384185736317, + "grad_norm": 828.0, + "learning_rate": 1.0990025081789523e-05, + "loss": 23.3752, + "step": 18985 + }, + { + "epoch": 0.7913801008711601, + "grad_norm": 392.0, + "learning_rate": 1.0985803094306352e-05, + "loss": 13.3756, + "step": 18986 + }, + { + "epoch": 0.7914217831686883, + "grad_norm": 628.0, + "learning_rate": 1.0981581817860365e-05, + "loss": 18.7502, + "step": 18987 + }, + { + "epoch": 0.7914634654662165, + "grad_norm": 64.0, + "learning_rate": 1.0977361252528546e-05, + "loss": 6.5939, + "step": 18988 + }, + { + "epoch": 0.7915051477637447, + "grad_norm": 516.0, + "learning_rate": 1.097314139838775e-05, + "loss": 17.5002, + "step": 18989 + }, + { + "epoch": 0.791546830061273, + "grad_norm": 214.0, + "learning_rate": 1.0968922255514947e-05, + "loss": 11.1882, + "step": 18990 + }, + { + "epoch": 0.7915885123588012, + "grad_norm": 292.0, + "learning_rate": 1.0964703823986971e-05, + "loss": 14.314, + "step": 18991 + }, + { + "epoch": 0.7916301946563294, + "grad_norm": 400.0, + "learning_rate": 1.0960486103880762e-05, + "loss": 15.8129, + "step": 18992 + }, + { + "epoch": 0.7916718769538577, + "grad_norm": 688.0, + "learning_rate": 1.0956269095273137e-05, + "loss": 20.2505, + "step": 18993 + }, + { + "epoch": 0.791713559251386, + "grad_norm": 139.0, + "learning_rate": 1.0952052798240992e-05, + "loss": 7.1253, + "step": 18994 + }, + { + "epoch": 0.7917552415489142, + "grad_norm": 588.0, + "learning_rate": 1.0947837212861128e-05, + "loss": 17.8803, + "step": 18995 + }, + { + "epoch": 0.7917969238464424, + "grad_norm": 776.0, + "learning_rate": 1.0943622339210414e-05, + "loss": 21.3764, + "step": 18996 + }, + { + "epoch": 0.7918386061439706, + "grad_norm": 184.0, + "learning_rate": 1.0939408177365646e-05, + "loss": 10.8128, + "step": 18997 + }, + { + "epoch": 0.7918802884414989, + "grad_norm": 181.0, + "learning_rate": 1.0935194727403637e-05, + "loss": 9.2504, + "step": 18998 + }, + { + "epoch": 0.7919219707390271, + "grad_norm": 604.0, + "learning_rate": 1.0930981989401163e-05, + "loss": 18.5027, + "step": 18999 + }, + { + "epoch": 0.7919636530365554, + "grad_norm": 163.0, + "learning_rate": 1.0926769963435019e-05, + "loss": 12.0011, + "step": 19000 + }, + { + "epoch": 0.7920053353340836, + "grad_norm": 94.0, + "learning_rate": 1.0922558649581954e-05, + "loss": 7.8754, + "step": 19001 + }, + { + "epoch": 0.7920470176316119, + "grad_norm": 736.0, + "learning_rate": 1.091834804791873e-05, + "loss": 24.8753, + "step": 19002 + }, + { + "epoch": 0.7920886999291401, + "grad_norm": 69.0, + "learning_rate": 1.0914138158522086e-05, + "loss": 7.5943, + "step": 19003 + }, + { + "epoch": 0.7921303822266683, + "grad_norm": 306.0, + "learning_rate": 1.0909928981468737e-05, + "loss": 14.5627, + "step": 19004 + }, + { + "epoch": 0.7921720645241965, + "grad_norm": 298.0, + "learning_rate": 1.0905720516835406e-05, + "loss": 12.9378, + "step": 19005 + }, + { + "epoch": 0.7922137468217249, + "grad_norm": 1072.0, + "learning_rate": 1.090151276469879e-05, + "loss": 23.2556, + "step": 19006 + }, + { + "epoch": 0.7922554291192531, + "grad_norm": 101.0, + "learning_rate": 1.089730572513557e-05, + "loss": 9.1259, + "step": 19007 + }, + { + "epoch": 0.7922971114167813, + "grad_norm": 520.0, + "learning_rate": 1.0893099398222428e-05, + "loss": 17.8752, + "step": 19008 + }, + { + "epoch": 0.7923387937143095, + "grad_norm": 628.0, + "learning_rate": 1.0888893784036025e-05, + "loss": 18.8757, + "step": 19009 + }, + { + "epoch": 0.7923804760118378, + "grad_norm": 576.0, + "learning_rate": 1.0884688882652998e-05, + "loss": 18.6297, + "step": 19010 + }, + { + "epoch": 0.792422158309366, + "grad_norm": 342.0, + "learning_rate": 1.0880484694149978e-05, + "loss": 15.5627, + "step": 19011 + }, + { + "epoch": 0.7924638406068942, + "grad_norm": 290.0, + "learning_rate": 1.0876281218603623e-05, + "loss": 12.1879, + "step": 19012 + }, + { + "epoch": 0.7925055229044224, + "grad_norm": 458.0, + "learning_rate": 1.087207845609049e-05, + "loss": 17.5011, + "step": 19013 + }, + { + "epoch": 0.7925472052019508, + "grad_norm": 350.0, + "learning_rate": 1.0867876406687227e-05, + "loss": 14.3128, + "step": 19014 + }, + { + "epoch": 0.792588887499479, + "grad_norm": 125.5, + "learning_rate": 1.0863675070470364e-05, + "loss": 9.8127, + "step": 19015 + }, + { + "epoch": 0.7926305697970072, + "grad_norm": 183.0, + "learning_rate": 1.0859474447516526e-05, + "loss": 10.7508, + "step": 19016 + }, + { + "epoch": 0.7926722520945354, + "grad_norm": 426.0, + "learning_rate": 1.085527453790221e-05, + "loss": 15.8754, + "step": 19017 + }, + { + "epoch": 0.7927139343920637, + "grad_norm": 440.0, + "learning_rate": 1.085107534170402e-05, + "loss": 17.5002, + "step": 19018 + }, + { + "epoch": 0.7927556166895919, + "grad_norm": 175.0, + "learning_rate": 1.0846876858998428e-05, + "loss": 9.8753, + "step": 19019 + }, + { + "epoch": 0.7927972989871201, + "grad_norm": 154.0, + "learning_rate": 1.0842679089862013e-05, + "loss": 9.9383, + "step": 19020 + }, + { + "epoch": 0.7928389812846484, + "grad_norm": 278.0, + "learning_rate": 1.0838482034371212e-05, + "loss": 13.8752, + "step": 19021 + }, + { + "epoch": 0.7928806635821767, + "grad_norm": 238.0, + "learning_rate": 1.0834285692602574e-05, + "loss": 12.7501, + "step": 19022 + }, + { + "epoch": 0.7929223458797049, + "grad_norm": 392.0, + "learning_rate": 1.0830090064632553e-05, + "loss": 14.6877, + "step": 19023 + }, + { + "epoch": 0.7929640281772331, + "grad_norm": 294.0, + "learning_rate": 1.0825895150537618e-05, + "loss": 13.3754, + "step": 19024 + }, + { + "epoch": 0.7930057104747613, + "grad_norm": 225.0, + "learning_rate": 1.082170095039422e-05, + "loss": 10.8752, + "step": 19025 + }, + { + "epoch": 0.7930473927722896, + "grad_norm": 490.0, + "learning_rate": 1.0817507464278803e-05, + "loss": 16.1252, + "step": 19026 + }, + { + "epoch": 0.7930890750698179, + "grad_norm": 134.0, + "learning_rate": 1.0813314692267783e-05, + "loss": 11.0629, + "step": 19027 + }, + { + "epoch": 0.7931307573673461, + "grad_norm": 604.0, + "learning_rate": 1.0809122634437591e-05, + "loss": 20.2506, + "step": 19028 + }, + { + "epoch": 0.7931724396648743, + "grad_norm": 312.0, + "learning_rate": 1.0804931290864618e-05, + "loss": 13.8757, + "step": 19029 + }, + { + "epoch": 0.7932141219624026, + "grad_norm": 180.0, + "learning_rate": 1.0800740661625247e-05, + "loss": 12.2511, + "step": 19030 + }, + { + "epoch": 0.7932558042599308, + "grad_norm": 374.0, + "learning_rate": 1.0796550746795859e-05, + "loss": 14.6253, + "step": 19031 + }, + { + "epoch": 0.793297486557459, + "grad_norm": 184.0, + "learning_rate": 1.0792361546452811e-05, + "loss": 10.1254, + "step": 19032 + }, + { + "epoch": 0.7933391688549873, + "grad_norm": 298.0, + "learning_rate": 1.0788173060672457e-05, + "loss": 12.0629, + "step": 19033 + }, + { + "epoch": 0.7933808511525156, + "grad_norm": 290.0, + "learning_rate": 1.078398528953113e-05, + "loss": 12.3753, + "step": 19034 + }, + { + "epoch": 0.7934225334500438, + "grad_norm": 154.0, + "learning_rate": 1.0779798233105154e-05, + "loss": 11.6883, + "step": 19035 + }, + { + "epoch": 0.793464215747572, + "grad_norm": 166.0, + "learning_rate": 1.0775611891470832e-05, + "loss": 9.7502, + "step": 19036 + }, + { + "epoch": 0.7935058980451003, + "grad_norm": 214.0, + "learning_rate": 1.0771426264704455e-05, + "loss": 11.6878, + "step": 19037 + }, + { + "epoch": 0.7935475803426285, + "grad_norm": 270.0, + "learning_rate": 1.0767241352882345e-05, + "loss": 13.4377, + "step": 19038 + }, + { + "epoch": 0.7935892626401567, + "grad_norm": 300.0, + "learning_rate": 1.0763057156080714e-05, + "loss": 13.9377, + "step": 19039 + }, + { + "epoch": 0.7936309449376849, + "grad_norm": 932.0, + "learning_rate": 1.075887367437588e-05, + "loss": 28.377, + "step": 19040 + }, + { + "epoch": 0.7936726272352133, + "grad_norm": 480.0, + "learning_rate": 1.0754690907844028e-05, + "loss": 16.3757, + "step": 19041 + }, + { + "epoch": 0.7937143095327415, + "grad_norm": 270.0, + "learning_rate": 1.0750508856561443e-05, + "loss": 13.3134, + "step": 19042 + }, + { + "epoch": 0.7937559918302697, + "grad_norm": 336.0, + "learning_rate": 1.0746327520604294e-05, + "loss": 14.0007, + "step": 19043 + }, + { + "epoch": 0.7937976741277979, + "grad_norm": 412.0, + "learning_rate": 1.0742146900048839e-05, + "loss": 15.9379, + "step": 19044 + }, + { + "epoch": 0.7938393564253262, + "grad_norm": 228.0, + "learning_rate": 1.073796699497121e-05, + "loss": 12.1254, + "step": 19045 + }, + { + "epoch": 0.7938810387228544, + "grad_norm": 244.0, + "learning_rate": 1.0733787805447648e-05, + "loss": 13.1254, + "step": 19046 + }, + { + "epoch": 0.7939227210203826, + "grad_norm": 724.0, + "learning_rate": 1.0729609331554263e-05, + "loss": 21.2504, + "step": 19047 + }, + { + "epoch": 0.7939644033179108, + "grad_norm": 117.0, + "learning_rate": 1.0725431573367245e-05, + "loss": 9.4376, + "step": 19048 + }, + { + "epoch": 0.7940060856154392, + "grad_norm": 194.0, + "learning_rate": 1.0721254530962726e-05, + "loss": 10.3753, + "step": 19049 + }, + { + "epoch": 0.7940477679129674, + "grad_norm": 324.0, + "learning_rate": 1.071707820441683e-05, + "loss": 14.313, + "step": 19050 + }, + { + "epoch": 0.7940894502104956, + "grad_norm": 920.0, + "learning_rate": 1.0712902593805669e-05, + "loss": 25.1259, + "step": 19051 + }, + { + "epoch": 0.7941311325080238, + "grad_norm": 620.0, + "learning_rate": 1.0708727699205346e-05, + "loss": 18.6254, + "step": 19052 + }, + { + "epoch": 0.7941728148055521, + "grad_norm": 322.0, + "learning_rate": 1.0704553520691946e-05, + "loss": 13.8131, + "step": 19053 + }, + { + "epoch": 0.7942144971030803, + "grad_norm": 316.0, + "learning_rate": 1.0700380058341553e-05, + "loss": 14.1254, + "step": 19054 + }, + { + "epoch": 0.7942561794006086, + "grad_norm": 212.0, + "learning_rate": 1.0696207312230217e-05, + "loss": 12.1253, + "step": 19055 + }, + { + "epoch": 0.7942978616981368, + "grad_norm": 384.0, + "learning_rate": 1.0692035282433993e-05, + "loss": 15.1877, + "step": 19056 + }, + { + "epoch": 0.7943395439956651, + "grad_norm": 712.0, + "learning_rate": 1.0687863969028916e-05, + "loss": 19.5035, + "step": 19057 + }, + { + "epoch": 0.7943812262931933, + "grad_norm": 372.0, + "learning_rate": 1.0683693372091008e-05, + "loss": 15.1878, + "step": 19058 + }, + { + "epoch": 0.7944229085907215, + "grad_norm": 71.5, + "learning_rate": 1.0679523491696281e-05, + "loss": 9.1257, + "step": 19059 + }, + { + "epoch": 0.7944645908882497, + "grad_norm": 384.0, + "learning_rate": 1.0675354327920722e-05, + "loss": 11.5628, + "step": 19060 + }, + { + "epoch": 0.794506273185778, + "grad_norm": 426.0, + "learning_rate": 1.0671185880840329e-05, + "loss": 16.1266, + "step": 19061 + }, + { + "epoch": 0.7945479554833063, + "grad_norm": 1328.0, + "learning_rate": 1.066701815053106e-05, + "loss": 29.1305, + "step": 19062 + }, + { + "epoch": 0.7945896377808345, + "grad_norm": 150.0, + "learning_rate": 1.0662851137068864e-05, + "loss": 9.8127, + "step": 19063 + }, + { + "epoch": 0.7946313200783627, + "grad_norm": 320.0, + "learning_rate": 1.0658684840529726e-05, + "loss": 14.7507, + "step": 19064 + }, + { + "epoch": 0.794673002375891, + "grad_norm": 728.0, + "learning_rate": 1.0654519260989521e-05, + "loss": 21.2509, + "step": 19065 + }, + { + "epoch": 0.7947146846734192, + "grad_norm": 185.0, + "learning_rate": 1.0650354398524226e-05, + "loss": 11.6879, + "step": 19066 + }, + { + "epoch": 0.7947563669709474, + "grad_norm": 152.0, + "learning_rate": 1.0646190253209687e-05, + "loss": 9.3753, + "step": 19067 + }, + { + "epoch": 0.7947980492684756, + "grad_norm": 484.0, + "learning_rate": 1.0642026825121848e-05, + "loss": 17.0003, + "step": 19068 + }, + { + "epoch": 0.794839731566004, + "grad_norm": 266.0, + "learning_rate": 1.0637864114336538e-05, + "loss": 12.7504, + "step": 19069 + }, + { + "epoch": 0.7948814138635322, + "grad_norm": 228.0, + "learning_rate": 1.0633702120929678e-05, + "loss": 11.0628, + "step": 19070 + }, + { + "epoch": 0.7949230961610604, + "grad_norm": 484.0, + "learning_rate": 1.062954084497706e-05, + "loss": 17.2501, + "step": 19071 + }, + { + "epoch": 0.7949647784585886, + "grad_norm": 516.0, + "learning_rate": 1.062538028655457e-05, + "loss": 18.0002, + "step": 19072 + }, + { + "epoch": 0.7950064607561169, + "grad_norm": 256.0, + "learning_rate": 1.0621220445738023e-05, + "loss": 13.6254, + "step": 19073 + }, + { + "epoch": 0.7950481430536451, + "grad_norm": 356.0, + "learning_rate": 1.0617061322603229e-05, + "loss": 14.9379, + "step": 19074 + }, + { + "epoch": 0.7950898253511733, + "grad_norm": 292.0, + "learning_rate": 1.0612902917225987e-05, + "loss": 11.3128, + "step": 19075 + }, + { + "epoch": 0.7951315076487016, + "grad_norm": 348.0, + "learning_rate": 1.0608745229682088e-05, + "loss": 14.5002, + "step": 19076 + }, + { + "epoch": 0.7951731899462299, + "grad_norm": 620.0, + "learning_rate": 1.06045882600473e-05, + "loss": 17.3774, + "step": 19077 + }, + { + "epoch": 0.7952148722437581, + "grad_norm": 199.0, + "learning_rate": 1.0600432008397387e-05, + "loss": 11.3128, + "step": 19078 + }, + { + "epoch": 0.7952565545412863, + "grad_norm": 896.0, + "learning_rate": 1.0596276474808103e-05, + "loss": 19.5044, + "step": 19079 + }, + { + "epoch": 0.7952982368388145, + "grad_norm": 528.0, + "learning_rate": 1.0592121659355175e-05, + "loss": 15.0002, + "step": 19080 + }, + { + "epoch": 0.7953399191363428, + "grad_norm": 249.0, + "learning_rate": 1.058796756211433e-05, + "loss": 12.0006, + "step": 19081 + }, + { + "epoch": 0.795381601433871, + "grad_norm": 1040.0, + "learning_rate": 1.0583814183161278e-05, + "loss": 22.5045, + "step": 19082 + }, + { + "epoch": 0.7954232837313993, + "grad_norm": 956.0, + "learning_rate": 1.0579661522571705e-05, + "loss": 23.5013, + "step": 19083 + }, + { + "epoch": 0.7954649660289275, + "grad_norm": 720.0, + "learning_rate": 1.0575509580421306e-05, + "loss": 20.3791, + "step": 19084 + }, + { + "epoch": 0.7955066483264558, + "grad_norm": 244.0, + "learning_rate": 1.0571358356785743e-05, + "loss": 12.563, + "step": 19085 + }, + { + "epoch": 0.795548330623984, + "grad_norm": 217.0, + "learning_rate": 1.0567207851740668e-05, + "loss": 9.8128, + "step": 19086 + }, + { + "epoch": 0.7955900129215122, + "grad_norm": 434.0, + "learning_rate": 1.0563058065361725e-05, + "loss": 16.3755, + "step": 19087 + }, + { + "epoch": 0.7956316952190404, + "grad_norm": 420.0, + "learning_rate": 1.0558908997724576e-05, + "loss": 15.9377, + "step": 19088 + }, + { + "epoch": 0.7956733775165687, + "grad_norm": 154.0, + "learning_rate": 1.0554760648904782e-05, + "loss": 11.0628, + "step": 19089 + }, + { + "epoch": 0.795715059814097, + "grad_norm": 374.0, + "learning_rate": 1.0550613018978001e-05, + "loss": 15.0002, + "step": 19090 + }, + { + "epoch": 0.7957567421116252, + "grad_norm": 592.0, + "learning_rate": 1.0546466108019776e-05, + "loss": 18.5002, + "step": 19091 + }, + { + "epoch": 0.7957984244091534, + "grad_norm": 187.0, + "learning_rate": 1.0542319916105736e-05, + "loss": 11.3753, + "step": 19092 + }, + { + "epoch": 0.7958401067066817, + "grad_norm": 316.0, + "learning_rate": 1.0538174443311394e-05, + "loss": 13.4381, + "step": 19093 + }, + { + "epoch": 0.7958817890042099, + "grad_norm": 348.0, + "learning_rate": 1.053402968971235e-05, + "loss": 12.8763, + "step": 19094 + }, + { + "epoch": 0.7959234713017381, + "grad_norm": 216.0, + "learning_rate": 1.0529885655384092e-05, + "loss": 13.0005, + "step": 19095 + }, + { + "epoch": 0.7959651535992663, + "grad_norm": 528.0, + "learning_rate": 1.0525742340402196e-05, + "loss": 18.251, + "step": 19096 + }, + { + "epoch": 0.7960068358967947, + "grad_norm": 768.0, + "learning_rate": 1.0521599744842125e-05, + "loss": 22.8752, + "step": 19097 + }, + { + "epoch": 0.7960485181943229, + "grad_norm": 692.0, + "learning_rate": 1.0517457868779418e-05, + "loss": 20.1258, + "step": 19098 + }, + { + "epoch": 0.7960902004918511, + "grad_norm": 190.0, + "learning_rate": 1.0513316712289545e-05, + "loss": 11.6252, + "step": 19099 + }, + { + "epoch": 0.7961318827893793, + "grad_norm": 1360.0, + "learning_rate": 1.050917627544798e-05, + "loss": 34.0008, + "step": 19100 + }, + { + "epoch": 0.7961735650869076, + "grad_norm": 163.0, + "learning_rate": 1.0505036558330184e-05, + "loss": 8.7503, + "step": 19101 + }, + { + "epoch": 0.7962152473844358, + "grad_norm": 179.0, + "learning_rate": 1.0500897561011601e-05, + "loss": 12.2505, + "step": 19102 + }, + { + "epoch": 0.796256929681964, + "grad_norm": 516.0, + "learning_rate": 1.049675928356767e-05, + "loss": 14.2506, + "step": 19103 + }, + { + "epoch": 0.7962986119794923, + "grad_norm": 412.0, + "learning_rate": 1.0492621726073804e-05, + "loss": 15.0632, + "step": 19104 + }, + { + "epoch": 0.7963402942770206, + "grad_norm": 90.5, + "learning_rate": 1.0488484888605415e-05, + "loss": 7.9696, + "step": 19105 + }, + { + "epoch": 0.7963819765745488, + "grad_norm": 244.0, + "learning_rate": 1.04843487712379e-05, + "loss": 11.5002, + "step": 19106 + }, + { + "epoch": 0.796423658872077, + "grad_norm": 144.0, + "learning_rate": 1.0480213374046633e-05, + "loss": 11.1253, + "step": 19107 + }, + { + "epoch": 0.7964653411696053, + "grad_norm": 424.0, + "learning_rate": 1.0476078697106983e-05, + "loss": 15.6255, + "step": 19108 + }, + { + "epoch": 0.7965070234671335, + "grad_norm": 336.0, + "learning_rate": 1.0471944740494311e-05, + "loss": 12.3777, + "step": 19109 + }, + { + "epoch": 0.7965487057646617, + "grad_norm": 187.0, + "learning_rate": 1.0467811504283959e-05, + "loss": 11.1252, + "step": 19110 + }, + { + "epoch": 0.79659038806219, + "grad_norm": 160.0, + "learning_rate": 1.0463678988551246e-05, + "loss": 8.2503, + "step": 19111 + }, + { + "epoch": 0.7966320703597183, + "grad_norm": 668.0, + "learning_rate": 1.0459547193371493e-05, + "loss": 21.0002, + "step": 19112 + }, + { + "epoch": 0.7966737526572465, + "grad_norm": 314.0, + "learning_rate": 1.0455416118819993e-05, + "loss": 13.8128, + "step": 19113 + }, + { + "epoch": 0.7967154349547747, + "grad_norm": 576.0, + "learning_rate": 1.0451285764972068e-05, + "loss": 16.8801, + "step": 19114 + }, + { + "epoch": 0.7967571172523029, + "grad_norm": 252.0, + "learning_rate": 1.0447156131902947e-05, + "loss": 12.8129, + "step": 19115 + }, + { + "epoch": 0.7967987995498312, + "grad_norm": 326.0, + "learning_rate": 1.044302721968794e-05, + "loss": 13.7504, + "step": 19116 + }, + { + "epoch": 0.7968404818473594, + "grad_norm": 129.0, + "learning_rate": 1.043889902840225e-05, + "loss": 10.1253, + "step": 19117 + }, + { + "epoch": 0.7968821641448877, + "grad_norm": 696.0, + "learning_rate": 1.0434771558121166e-05, + "loss": 20.6253, + "step": 19118 + }, + { + "epoch": 0.7969238464424159, + "grad_norm": 456.0, + "learning_rate": 1.0430644808919848e-05, + "loss": 15.8753, + "step": 19119 + }, + { + "epoch": 0.7969655287399442, + "grad_norm": 161.0, + "learning_rate": 1.0426518780873574e-05, + "loss": 11.5632, + "step": 19120 + }, + { + "epoch": 0.7970072110374724, + "grad_norm": 480.0, + "learning_rate": 1.0422393474057479e-05, + "loss": 17.2515, + "step": 19121 + }, + { + "epoch": 0.7970488933350006, + "grad_norm": 616.0, + "learning_rate": 1.041826888854679e-05, + "loss": 21.8752, + "step": 19122 + }, + { + "epoch": 0.7970905756325288, + "grad_norm": 868.0, + "learning_rate": 1.0414145024416665e-05, + "loss": 24.5014, + "step": 19123 + }, + { + "epoch": 0.7971322579300572, + "grad_norm": 252.0, + "learning_rate": 1.041002188174226e-05, + "loss": 12.6877, + "step": 19124 + }, + { + "epoch": 0.7971739402275854, + "grad_norm": 160.0, + "learning_rate": 1.0405899460598723e-05, + "loss": 9.5627, + "step": 19125 + }, + { + "epoch": 0.7972156225251136, + "grad_norm": 300.0, + "learning_rate": 1.0401777761061181e-05, + "loss": 13.6879, + "step": 19126 + }, + { + "epoch": 0.7972573048226418, + "grad_norm": 502.0, + "learning_rate": 1.0397656783204756e-05, + "loss": 12.8787, + "step": 19127 + }, + { + "epoch": 0.7972989871201701, + "grad_norm": 676.0, + "learning_rate": 1.0393536527104547e-05, + "loss": 20.2502, + "step": 19128 + }, + { + "epoch": 0.7973406694176983, + "grad_norm": 470.0, + "learning_rate": 1.038941699283566e-05, + "loss": 16.5002, + "step": 19129 + }, + { + "epoch": 0.7973823517152265, + "grad_norm": 318.0, + "learning_rate": 1.0385298180473158e-05, + "loss": 13.7502, + "step": 19130 + }, + { + "epoch": 0.7974240340127547, + "grad_norm": 264.0, + "learning_rate": 1.0381180090092113e-05, + "loss": 11.0004, + "step": 19131 + }, + { + "epoch": 0.7974657163102831, + "grad_norm": 296.0, + "learning_rate": 1.0377062721767578e-05, + "loss": 13.1255, + "step": 19132 + }, + { + "epoch": 0.7975073986078113, + "grad_norm": 310.0, + "learning_rate": 1.0372946075574596e-05, + "loss": 13.3751, + "step": 19133 + }, + { + "epoch": 0.7975490809053395, + "grad_norm": 139.0, + "learning_rate": 1.0368830151588188e-05, + "loss": 7.8447, + "step": 19134 + }, + { + "epoch": 0.7975907632028677, + "grad_norm": 580.0, + "learning_rate": 1.036471494988337e-05, + "loss": 17.7522, + "step": 19135 + }, + { + "epoch": 0.797632445500396, + "grad_norm": 123.5, + "learning_rate": 1.0360600470535137e-05, + "loss": 10.7504, + "step": 19136 + }, + { + "epoch": 0.7976741277979242, + "grad_norm": 736.0, + "learning_rate": 1.0356486713618468e-05, + "loss": 18.7548, + "step": 19137 + }, + { + "epoch": 0.7977158100954524, + "grad_norm": 149.0, + "learning_rate": 1.0352373679208372e-05, + "loss": 10.5627, + "step": 19138 + }, + { + "epoch": 0.7977574923929807, + "grad_norm": 480.0, + "learning_rate": 1.0348261367379764e-05, + "loss": 17.1258, + "step": 19139 + }, + { + "epoch": 0.797799174690509, + "grad_norm": 233.0, + "learning_rate": 1.0344149778207635e-05, + "loss": 12.1253, + "step": 19140 + }, + { + "epoch": 0.7978408569880372, + "grad_norm": 115.0, + "learning_rate": 1.034003891176687e-05, + "loss": 10.188, + "step": 19141 + }, + { + "epoch": 0.7978825392855654, + "grad_norm": 462.0, + "learning_rate": 1.0335928768132447e-05, + "loss": 16.379, + "step": 19142 + }, + { + "epoch": 0.7979242215830936, + "grad_norm": 420.0, + "learning_rate": 1.0331819347379213e-05, + "loss": 15.1878, + "step": 19143 + }, + { + "epoch": 0.7979659038806219, + "grad_norm": 358.0, + "learning_rate": 1.0327710649582117e-05, + "loss": 11.9377, + "step": 19144 + }, + { + "epoch": 0.7980075861781502, + "grad_norm": 344.0, + "learning_rate": 1.0323602674815997e-05, + "loss": 13.8128, + "step": 19145 + }, + { + "epoch": 0.7980492684756784, + "grad_norm": 92.0, + "learning_rate": 1.0319495423155761e-05, + "loss": 6.3443, + "step": 19146 + }, + { + "epoch": 0.7980909507732066, + "grad_norm": 215.0, + "learning_rate": 1.031538889467622e-05, + "loss": 11.5628, + "step": 19147 + }, + { + "epoch": 0.7981326330707349, + "grad_norm": 33.25, + "learning_rate": 1.031128308945225e-05, + "loss": 6.1566, + "step": 19148 + }, + { + "epoch": 0.7981743153682631, + "grad_norm": 500.0, + "learning_rate": 1.0307178007558671e-05, + "loss": 16.2504, + "step": 19149 + }, + { + "epoch": 0.7982159976657913, + "grad_norm": 460.0, + "learning_rate": 1.03030736490703e-05, + "loss": 15.7546, + "step": 19150 + }, + { + "epoch": 0.7982576799633195, + "grad_norm": 394.0, + "learning_rate": 1.0298970014061926e-05, + "loss": 15.5628, + "step": 19151 + }, + { + "epoch": 0.7982993622608479, + "grad_norm": 211.0, + "learning_rate": 1.0294867102608358e-05, + "loss": 11.8127, + "step": 19152 + }, + { + "epoch": 0.7983410445583761, + "grad_norm": 201.0, + "learning_rate": 1.0290764914784356e-05, + "loss": 10.2503, + "step": 19153 + }, + { + "epoch": 0.7983827268559043, + "grad_norm": 145.0, + "learning_rate": 1.028666345066469e-05, + "loss": 9.2502, + "step": 19154 + }, + { + "epoch": 0.7984244091534325, + "grad_norm": 92.5, + "learning_rate": 1.0282562710324106e-05, + "loss": 9.1257, + "step": 19155 + }, + { + "epoch": 0.7984660914509608, + "grad_norm": 232.0, + "learning_rate": 1.0278462693837348e-05, + "loss": 11.689, + "step": 19156 + }, + { + "epoch": 0.798507773748489, + "grad_norm": 356.0, + "learning_rate": 1.027436340127913e-05, + "loss": 14.6877, + "step": 19157 + }, + { + "epoch": 0.7985494560460172, + "grad_norm": 278.0, + "learning_rate": 1.0270264832724164e-05, + "loss": 12.5627, + "step": 19158 + }, + { + "epoch": 0.7985911383435454, + "grad_norm": 153.0, + "learning_rate": 1.026616698824715e-05, + "loss": 11.0004, + "step": 19159 + }, + { + "epoch": 0.7986328206410738, + "grad_norm": 180.0, + "learning_rate": 1.0262069867922768e-05, + "loss": 11.5007, + "step": 19160 + }, + { + "epoch": 0.798674502938602, + "grad_norm": 768.0, + "learning_rate": 1.0257973471825694e-05, + "loss": 19.0047, + "step": 19161 + }, + { + "epoch": 0.7987161852361302, + "grad_norm": 984.0, + "learning_rate": 1.0253877800030581e-05, + "loss": 23.3752, + "step": 19162 + }, + { + "epoch": 0.7987578675336584, + "grad_norm": 392.0, + "learning_rate": 1.0249782852612056e-05, + "loss": 14.9381, + "step": 19163 + }, + { + "epoch": 0.7987995498311867, + "grad_norm": 352.0, + "learning_rate": 1.0245688629644796e-05, + "loss": 14.0627, + "step": 19164 + }, + { + "epoch": 0.7988412321287149, + "grad_norm": 264.0, + "learning_rate": 1.0241595131203364e-05, + "loss": 10.7515, + "step": 19165 + }, + { + "epoch": 0.7988829144262432, + "grad_norm": 808.0, + "learning_rate": 1.0237502357362416e-05, + "loss": 19.2549, + "step": 19166 + }, + { + "epoch": 0.7989245967237714, + "grad_norm": 328.0, + "learning_rate": 1.0233410308196495e-05, + "loss": 14.5628, + "step": 19167 + }, + { + "epoch": 0.7989662790212997, + "grad_norm": 688.0, + "learning_rate": 1.0229318983780223e-05, + "loss": 22.0002, + "step": 19168 + }, + { + "epoch": 0.7990079613188279, + "grad_norm": 130.0, + "learning_rate": 1.0225228384188119e-05, + "loss": 5.7816, + "step": 19169 + }, + { + "epoch": 0.7990496436163561, + "grad_norm": 486.0, + "learning_rate": 1.0221138509494782e-05, + "loss": 15.4377, + "step": 19170 + }, + { + "epoch": 0.7990913259138843, + "grad_norm": 169.0, + "learning_rate": 1.0217049359774705e-05, + "loss": 10.1877, + "step": 19171 + }, + { + "epoch": 0.7991330082114126, + "grad_norm": 376.0, + "learning_rate": 1.0212960935102445e-05, + "loss": 14.8756, + "step": 19172 + }, + { + "epoch": 0.7991746905089409, + "grad_norm": 636.0, + "learning_rate": 1.0208873235552501e-05, + "loss": 17.7535, + "step": 19173 + }, + { + "epoch": 0.7992163728064691, + "grad_norm": 256.0, + "learning_rate": 1.0204786261199378e-05, + "loss": 11.0629, + "step": 19174 + }, + { + "epoch": 0.7992580551039973, + "grad_norm": 394.0, + "learning_rate": 1.0200700012117559e-05, + "loss": 14.8754, + "step": 19175 + }, + { + "epoch": 0.7992997374015256, + "grad_norm": 149.0, + "learning_rate": 1.0196614488381518e-05, + "loss": 9.6252, + "step": 19176 + }, + { + "epoch": 0.7993414196990538, + "grad_norm": 113.0, + "learning_rate": 1.0192529690065705e-05, + "loss": 8.9382, + "step": 19177 + }, + { + "epoch": 0.799383101996582, + "grad_norm": 249.0, + "learning_rate": 1.0188445617244574e-05, + "loss": 11.5629, + "step": 19178 + }, + { + "epoch": 0.7994247842941103, + "grad_norm": 452.0, + "learning_rate": 1.018436226999256e-05, + "loss": 17.2504, + "step": 19179 + }, + { + "epoch": 0.7994664665916386, + "grad_norm": 510.0, + "learning_rate": 1.0180279648384077e-05, + "loss": 16.2532, + "step": 19180 + }, + { + "epoch": 0.7995081488891668, + "grad_norm": 296.0, + "learning_rate": 1.0176197752493528e-05, + "loss": 12.3753, + "step": 19181 + }, + { + "epoch": 0.799549831186695, + "grad_norm": 410.0, + "learning_rate": 1.0172116582395313e-05, + "loss": 15.2511, + "step": 19182 + }, + { + "epoch": 0.7995915134842233, + "grad_norm": 364.0, + "learning_rate": 1.0168036138163811e-05, + "loss": 14.5628, + "step": 19183 + }, + { + "epoch": 0.7996331957817515, + "grad_norm": 596.0, + "learning_rate": 1.0163956419873382e-05, + "loss": 19.6253, + "step": 19184 + }, + { + "epoch": 0.7996748780792797, + "grad_norm": 492.0, + "learning_rate": 1.0159877427598386e-05, + "loss": 18.5001, + "step": 19185 + }, + { + "epoch": 0.7997165603768079, + "grad_norm": 338.0, + "learning_rate": 1.0155799161413159e-05, + "loss": 14.5003, + "step": 19186 + }, + { + "epoch": 0.7997582426743363, + "grad_norm": 201.0, + "learning_rate": 1.0151721621392036e-05, + "loss": 11.4381, + "step": 19187 + }, + { + "epoch": 0.7997999249718645, + "grad_norm": 170.0, + "learning_rate": 1.0147644807609313e-05, + "loss": 9.6877, + "step": 19188 + }, + { + "epoch": 0.7998416072693927, + "grad_norm": 255.0, + "learning_rate": 1.0143568720139295e-05, + "loss": 11.6259, + "step": 19189 + }, + { + "epoch": 0.7998832895669209, + "grad_norm": 412.0, + "learning_rate": 1.0139493359056302e-05, + "loss": 14.3752, + "step": 19190 + }, + { + "epoch": 0.7999249718644492, + "grad_norm": 119.5, + "learning_rate": 1.0135418724434553e-05, + "loss": 7.6564, + "step": 19191 + }, + { + "epoch": 0.7999666541619774, + "grad_norm": 406.0, + "learning_rate": 1.013134481634836e-05, + "loss": 14.8753, + "step": 19192 + }, + { + "epoch": 0.8000083364595056, + "grad_norm": 402.0, + "learning_rate": 1.0127271634871927e-05, + "loss": 16.2502, + "step": 19193 + }, + { + "epoch": 0.8000500187570339, + "grad_norm": 214.0, + "learning_rate": 1.0123199180079529e-05, + "loss": 11.1255, + "step": 19194 + }, + { + "epoch": 0.8000917010545622, + "grad_norm": 198.0, + "learning_rate": 1.0119127452045341e-05, + "loss": 11.0002, + "step": 19195 + }, + { + "epoch": 0.8001333833520904, + "grad_norm": 243.0, + "learning_rate": 1.0115056450843624e-05, + "loss": 12.3756, + "step": 19196 + }, + { + "epoch": 0.8001750656496186, + "grad_norm": 102.5, + "learning_rate": 1.0110986176548514e-05, + "loss": 10.3128, + "step": 19197 + }, + { + "epoch": 0.8002167479471468, + "grad_norm": 66.0, + "learning_rate": 1.0106916629234236e-05, + "loss": 8.1256, + "step": 19198 + }, + { + "epoch": 0.8002584302446751, + "grad_norm": 181.0, + "learning_rate": 1.0102847808974947e-05, + "loss": 11.5006, + "step": 19199 + }, + { + "epoch": 0.8003001125422033, + "grad_norm": 466.0, + "learning_rate": 1.0098779715844798e-05, + "loss": 16.7505, + "step": 19200 + } + ], + "logging_steps": 1.0, + "max_steps": 23991, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 4800, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +}