| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 5.0, |
| "eval_steps": 500, |
| "global_step": 1440, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.003472222222222222, |
| "grad_norm": 5.6689868084883654, |
| "learning_rate": 5.555555555555555e-07, |
| "loss": 0.876, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.006944444444444444, |
| "grad_norm": 5.656438093524862, |
| "learning_rate": 1.111111111111111e-06, |
| "loss": 0.8718, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.010416666666666666, |
| "grad_norm": 5.547289268485405, |
| "learning_rate": 1.6666666666666667e-06, |
| "loss": 0.8636, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.013888888888888888, |
| "grad_norm": 5.673415707884681, |
| "learning_rate": 2.222222222222222e-06, |
| "loss": 0.8824, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.017361111111111112, |
| "grad_norm": 5.240294345520099, |
| "learning_rate": 2.7777777777777783e-06, |
| "loss": 0.8563, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.020833333333333332, |
| "grad_norm": 4.272810613406847, |
| "learning_rate": 3.3333333333333333e-06, |
| "loss": 0.8199, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.024305555555555556, |
| "grad_norm": 2.2685991272193733, |
| "learning_rate": 3.88888888888889e-06, |
| "loss": 0.7568, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.027777777777777776, |
| "grad_norm": 2.028178792401538, |
| "learning_rate": 4.444444444444444e-06, |
| "loss": 0.7485, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.03125, |
| "grad_norm": 2.1236911806845473, |
| "learning_rate": 5e-06, |
| "loss": 0.7176, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.034722222222222224, |
| "grad_norm": 3.3729211398567163, |
| "learning_rate": 5.555555555555557e-06, |
| "loss": 0.7357, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.03819444444444445, |
| "grad_norm": 3.259391674591041, |
| "learning_rate": 6.111111111111112e-06, |
| "loss": 0.709, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.041666666666666664, |
| "grad_norm": 2.9926292324873796, |
| "learning_rate": 6.666666666666667e-06, |
| "loss": 0.704, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.04513888888888889, |
| "grad_norm": 2.66382030319957, |
| "learning_rate": 7.222222222222223e-06, |
| "loss": 0.6683, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.04861111111111111, |
| "grad_norm": 2.3345720474288725, |
| "learning_rate": 7.77777777777778e-06, |
| "loss": 0.6581, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.052083333333333336, |
| "grad_norm": 1.6384019103841372, |
| "learning_rate": 8.333333333333334e-06, |
| "loss": 0.6456, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.05555555555555555, |
| "grad_norm": 1.3665817465424255, |
| "learning_rate": 8.888888888888888e-06, |
| "loss": 0.6188, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.059027777777777776, |
| "grad_norm": 1.8050485428474035, |
| "learning_rate": 9.444444444444445e-06, |
| "loss": 0.6289, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.0625, |
| "grad_norm": 1.8521082144961154, |
| "learning_rate": 1e-05, |
| "loss": 0.6167, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.06597222222222222, |
| "grad_norm": 1.2977695822726278, |
| "learning_rate": 1.0555555555555557e-05, |
| "loss": 0.6083, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.06944444444444445, |
| "grad_norm": 0.9050793342553565, |
| "learning_rate": 1.1111111111111113e-05, |
| "loss": 0.5946, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.07291666666666667, |
| "grad_norm": 1.1391127461928305, |
| "learning_rate": 1.1666666666666668e-05, |
| "loss": 0.5798, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.0763888888888889, |
| "grad_norm": 0.9433031328354344, |
| "learning_rate": 1.2222222222222224e-05, |
| "loss": 0.5818, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.0798611111111111, |
| "grad_norm": 0.6628303547284489, |
| "learning_rate": 1.2777777777777777e-05, |
| "loss": 0.5756, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.08333333333333333, |
| "grad_norm": 0.9347756103973528, |
| "learning_rate": 1.3333333333333333e-05, |
| "loss": 0.5705, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.08680555555555555, |
| "grad_norm": 0.8106549440748502, |
| "learning_rate": 1.388888888888889e-05, |
| "loss": 0.5543, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.09027777777777778, |
| "grad_norm": 0.567510927420338, |
| "learning_rate": 1.4444444444444446e-05, |
| "loss": 0.5543, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.09375, |
| "grad_norm": 0.6921706937343025, |
| "learning_rate": 1.5000000000000002e-05, |
| "loss": 0.5549, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.09722222222222222, |
| "grad_norm": 0.7027125894979898, |
| "learning_rate": 1.555555555555556e-05, |
| "loss": 0.5552, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.10069444444444445, |
| "grad_norm": 0.4234236865726793, |
| "learning_rate": 1.6111111111111115e-05, |
| "loss": 0.5496, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.10416666666666667, |
| "grad_norm": 0.636600259227426, |
| "learning_rate": 1.6666666666666667e-05, |
| "loss": 0.5476, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.1076388888888889, |
| "grad_norm": 0.4880874196057493, |
| "learning_rate": 1.7222222222222224e-05, |
| "loss": 0.5438, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.1111111111111111, |
| "grad_norm": 0.4615043705286091, |
| "learning_rate": 1.7777777777777777e-05, |
| "loss": 0.5417, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.11458333333333333, |
| "grad_norm": 0.4909859309922507, |
| "learning_rate": 1.8333333333333333e-05, |
| "loss": 0.5294, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.11805555555555555, |
| "grad_norm": 0.4107390902800856, |
| "learning_rate": 1.888888888888889e-05, |
| "loss": 0.5259, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.12152777777777778, |
| "grad_norm": 0.4843241567748437, |
| "learning_rate": 1.9444444444444445e-05, |
| "loss": 0.5183, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.125, |
| "grad_norm": 0.402362602696457, |
| "learning_rate": 2e-05, |
| "loss": 0.5233, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.1284722222222222, |
| "grad_norm": 0.4849278121412402, |
| "learning_rate": 2.0555555555555555e-05, |
| "loss": 0.5166, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.13194444444444445, |
| "grad_norm": 0.40344157604040815, |
| "learning_rate": 2.1111111111111114e-05, |
| "loss": 0.5242, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.13541666666666666, |
| "grad_norm": 0.439230264488894, |
| "learning_rate": 2.1666666666666667e-05, |
| "loss": 0.5164, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.1388888888888889, |
| "grad_norm": 0.4220862849771054, |
| "learning_rate": 2.2222222222222227e-05, |
| "loss": 0.5136, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.1423611111111111, |
| "grad_norm": 0.5279859076306369, |
| "learning_rate": 2.277777777777778e-05, |
| "loss": 0.5122, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.14583333333333334, |
| "grad_norm": 0.46244478908007053, |
| "learning_rate": 2.3333333333333336e-05, |
| "loss": 0.5118, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.14930555555555555, |
| "grad_norm": 0.4683599686272933, |
| "learning_rate": 2.388888888888889e-05, |
| "loss": 0.514, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.1527777777777778, |
| "grad_norm": 0.5322715297704302, |
| "learning_rate": 2.444444444444445e-05, |
| "loss": 0.4971, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.15625, |
| "grad_norm": 0.6507970131103591, |
| "learning_rate": 2.5e-05, |
| "loss": 0.4963, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.1597222222222222, |
| "grad_norm": 0.714750788035846, |
| "learning_rate": 2.5555555555555554e-05, |
| "loss": 0.5134, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.16319444444444445, |
| "grad_norm": 0.7687601059434547, |
| "learning_rate": 2.6111111111111114e-05, |
| "loss": 0.5064, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.16666666666666666, |
| "grad_norm": 0.8399521816080617, |
| "learning_rate": 2.6666666666666667e-05, |
| "loss": 0.4906, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.1701388888888889, |
| "grad_norm": 0.889997192400381, |
| "learning_rate": 2.7222222222222226e-05, |
| "loss": 0.5032, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.1736111111111111, |
| "grad_norm": 1.0475698420911328, |
| "learning_rate": 2.777777777777778e-05, |
| "loss": 0.493, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.17708333333333334, |
| "grad_norm": 1.118833819111554, |
| "learning_rate": 2.833333333333334e-05, |
| "loss": 0.4974, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.18055555555555555, |
| "grad_norm": 0.8024919998359595, |
| "learning_rate": 2.888888888888889e-05, |
| "loss": 0.4865, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.1840277777777778, |
| "grad_norm": 0.8606974505869477, |
| "learning_rate": 2.9444444444444448e-05, |
| "loss": 0.4926, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.1875, |
| "grad_norm": 0.7881779672296356, |
| "learning_rate": 3.0000000000000004e-05, |
| "loss": 0.4878, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.1909722222222222, |
| "grad_norm": 0.6080495175198938, |
| "learning_rate": 3.0555555555555554e-05, |
| "loss": 0.4884, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.19444444444444445, |
| "grad_norm": 0.807309170648098, |
| "learning_rate": 3.111111111111112e-05, |
| "loss": 0.4826, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.19791666666666666, |
| "grad_norm": 0.9813713400574569, |
| "learning_rate": 3.1666666666666666e-05, |
| "loss": 0.4858, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.2013888888888889, |
| "grad_norm": 1.0361441991825402, |
| "learning_rate": 3.222222222222223e-05, |
| "loss": 0.4919, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.2048611111111111, |
| "grad_norm": 0.8868025871110543, |
| "learning_rate": 3.277777777777778e-05, |
| "loss": 0.4824, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.20833333333333334, |
| "grad_norm": 0.9288701717203051, |
| "learning_rate": 3.3333333333333335e-05, |
| "loss": 0.4894, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.21180555555555555, |
| "grad_norm": 1.0162786242178787, |
| "learning_rate": 3.388888888888889e-05, |
| "loss": 0.4859, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.2152777777777778, |
| "grad_norm": 1.1593588998766855, |
| "learning_rate": 3.444444444444445e-05, |
| "loss": 0.4801, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.21875, |
| "grad_norm": 1.0130142454064106, |
| "learning_rate": 3.5000000000000004e-05, |
| "loss": 0.4867, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.2222222222222222, |
| "grad_norm": 1.1339138891874543, |
| "learning_rate": 3.555555555555555e-05, |
| "loss": 0.4801, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.22569444444444445, |
| "grad_norm": 0.9167679815009071, |
| "learning_rate": 3.6111111111111116e-05, |
| "loss": 0.472, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.22916666666666666, |
| "grad_norm": 0.9957122622820357, |
| "learning_rate": 3.6666666666666666e-05, |
| "loss": 0.4782, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.2326388888888889, |
| "grad_norm": 1.2768722683777673, |
| "learning_rate": 3.722222222222223e-05, |
| "loss": 0.4794, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.2361111111111111, |
| "grad_norm": 0.6981900383415166, |
| "learning_rate": 3.777777777777778e-05, |
| "loss": 0.4733, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.23958333333333334, |
| "grad_norm": 1.0133076333409807, |
| "learning_rate": 3.833333333333334e-05, |
| "loss": 0.4752, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.24305555555555555, |
| "grad_norm": 1.7404120248946109, |
| "learning_rate": 3.888888888888889e-05, |
| "loss": 0.4817, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.2465277777777778, |
| "grad_norm": 0.6651429804201384, |
| "learning_rate": 3.944444444444445e-05, |
| "loss": 0.4797, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 2.3281889521219488, |
| "learning_rate": 4e-05, |
| "loss": 0.4813, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.2534722222222222, |
| "grad_norm": 1.3476271982813572, |
| "learning_rate": 4.055555555555556e-05, |
| "loss": 0.4782, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.2569444444444444, |
| "grad_norm": 2.591174546004534, |
| "learning_rate": 4.111111111111111e-05, |
| "loss": 0.4863, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.2604166666666667, |
| "grad_norm": 2.755027938216314, |
| "learning_rate": 4.166666666666667e-05, |
| "loss": 0.4898, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.2638888888888889, |
| "grad_norm": 1.0484653191558952, |
| "learning_rate": 4.222222222222223e-05, |
| "loss": 0.4714, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.2673611111111111, |
| "grad_norm": 1.8159914872532417, |
| "learning_rate": 4.277777777777778e-05, |
| "loss": 0.486, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.2708333333333333, |
| "grad_norm": 1.575533113972724, |
| "learning_rate": 4.3333333333333334e-05, |
| "loss": 0.4998, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.2743055555555556, |
| "grad_norm": 1.247189140013317, |
| "learning_rate": 4.38888888888889e-05, |
| "loss": 0.472, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.2777777777777778, |
| "grad_norm": 1.8283011598184224, |
| "learning_rate": 4.444444444444445e-05, |
| "loss": 0.4896, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.28125, |
| "grad_norm": 1.8109706505904477, |
| "learning_rate": 4.5e-05, |
| "loss": 0.4881, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.2847222222222222, |
| "grad_norm": 1.057346335127151, |
| "learning_rate": 4.555555555555556e-05, |
| "loss": 0.4832, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.2881944444444444, |
| "grad_norm": 1.7646694577951128, |
| "learning_rate": 4.611111111111111e-05, |
| "loss": 0.4832, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.2916666666666667, |
| "grad_norm": 1.4140355829700804, |
| "learning_rate": 4.666666666666667e-05, |
| "loss": 0.4811, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.2951388888888889, |
| "grad_norm": 1.2593733584850433, |
| "learning_rate": 4.722222222222223e-05, |
| "loss": 0.4663, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.2986111111111111, |
| "grad_norm": 1.3968943399622709, |
| "learning_rate": 4.777777777777778e-05, |
| "loss": 0.4778, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.3020833333333333, |
| "grad_norm": 1.1507601425129197, |
| "learning_rate": 4.8333333333333334e-05, |
| "loss": 0.4725, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.3055555555555556, |
| "grad_norm": 1.5302822908979552, |
| "learning_rate": 4.88888888888889e-05, |
| "loss": 0.4824, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.3090277777777778, |
| "grad_norm": 0.9723391769006969, |
| "learning_rate": 4.944444444444445e-05, |
| "loss": 0.4777, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.3125, |
| "grad_norm": 1.3992163636274015, |
| "learning_rate": 5e-05, |
| "loss": 0.4757, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.3159722222222222, |
| "grad_norm": 0.799812937993386, |
| "learning_rate": 5.055555555555556e-05, |
| "loss": 0.4663, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.3194444444444444, |
| "grad_norm": 0.9859358600047391, |
| "learning_rate": 5.111111111111111e-05, |
| "loss": 0.4683, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.3229166666666667, |
| "grad_norm": 1.2225448020462069, |
| "learning_rate": 5.166666666666667e-05, |
| "loss": 0.4798, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.3263888888888889, |
| "grad_norm": 1.1666553572392628, |
| "learning_rate": 5.222222222222223e-05, |
| "loss": 0.4738, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.3298611111111111, |
| "grad_norm": 1.65630200439605, |
| "learning_rate": 5.2777777777777784e-05, |
| "loss": 0.4815, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.3333333333333333, |
| "grad_norm": 1.0280119977292617, |
| "learning_rate": 5.333333333333333e-05, |
| "loss": 0.4631, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.3368055555555556, |
| "grad_norm": 1.2759438689338, |
| "learning_rate": 5.3888888888888896e-05, |
| "loss": 0.4633, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.3402777777777778, |
| "grad_norm": 1.2013225394816978, |
| "learning_rate": 5.444444444444445e-05, |
| "loss": 0.4644, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.34375, |
| "grad_norm": 0.9467045454954154, |
| "learning_rate": 5.5e-05, |
| "loss": 0.4666, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.3472222222222222, |
| "grad_norm": 1.0593628732980047, |
| "learning_rate": 5.555555555555556e-05, |
| "loss": 0.4642, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.3506944444444444, |
| "grad_norm": 1.304024409909431, |
| "learning_rate": 5.6111111111111114e-05, |
| "loss": 0.4741, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.3541666666666667, |
| "grad_norm": 0.9810723420807926, |
| "learning_rate": 5.666666666666668e-05, |
| "loss": 0.4622, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.3576388888888889, |
| "grad_norm": 1.3123102030562221, |
| "learning_rate": 5.722222222222223e-05, |
| "loss": 0.4615, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.3611111111111111, |
| "grad_norm": 1.178298412260806, |
| "learning_rate": 5.777777777777778e-05, |
| "loss": 0.4599, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.3645833333333333, |
| "grad_norm": 1.4401758010598742, |
| "learning_rate": 5.833333333333333e-05, |
| "loss": 0.4595, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.3680555555555556, |
| "grad_norm": 0.9403895604831765, |
| "learning_rate": 5.8888888888888896e-05, |
| "loss": 0.4646, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.3715277777777778, |
| "grad_norm": 1.3214554536185026, |
| "learning_rate": 5.944444444444445e-05, |
| "loss": 0.4692, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.375, |
| "grad_norm": 0.7898763356241624, |
| "learning_rate": 6.000000000000001e-05, |
| "loss": 0.4745, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.3784722222222222, |
| "grad_norm": 1.3785771659946036, |
| "learning_rate": 6.055555555555556e-05, |
| "loss": 0.4656, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.3819444444444444, |
| "grad_norm": 0.8438355733572759, |
| "learning_rate": 6.111111111111111e-05, |
| "loss": 0.4658, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.3854166666666667, |
| "grad_norm": 0.9301929891119248, |
| "learning_rate": 6.166666666666667e-05, |
| "loss": 0.4676, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.3888888888888889, |
| "grad_norm": 1.1359808326811387, |
| "learning_rate": 6.222222222222223e-05, |
| "loss": 0.4682, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.3923611111111111, |
| "grad_norm": 0.6819036135597224, |
| "learning_rate": 6.277777777777778e-05, |
| "loss": 0.4638, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.3958333333333333, |
| "grad_norm": 1.1784222230236077, |
| "learning_rate": 6.333333333333333e-05, |
| "loss": 0.4721, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.3993055555555556, |
| "grad_norm": 1.0972045878518617, |
| "learning_rate": 6.38888888888889e-05, |
| "loss": 0.4564, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.4027777777777778, |
| "grad_norm": 1.1942570539893864, |
| "learning_rate": 6.444444444444446e-05, |
| "loss": 0.4606, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.40625, |
| "grad_norm": 1.3066111752440024, |
| "learning_rate": 6.500000000000001e-05, |
| "loss": 0.4574, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.4097222222222222, |
| "grad_norm": 1.2705685345556148, |
| "learning_rate": 6.555555555555556e-05, |
| "loss": 0.4608, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.4131944444444444, |
| "grad_norm": 1.0800640730680313, |
| "learning_rate": 6.611111111111111e-05, |
| "loss": 0.4689, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.4166666666666667, |
| "grad_norm": 1.48126351719224, |
| "learning_rate": 6.666666666666667e-05, |
| "loss": 0.4651, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.4201388888888889, |
| "grad_norm": 1.1069560673762247, |
| "learning_rate": 6.722222222222223e-05, |
| "loss": 0.4617, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.4236111111111111, |
| "grad_norm": 1.5000892020623857, |
| "learning_rate": 6.777777777777778e-05, |
| "loss": 0.4659, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.4270833333333333, |
| "grad_norm": 1.18006794714587, |
| "learning_rate": 6.833333333333333e-05, |
| "loss": 0.4618, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.4305555555555556, |
| "grad_norm": 1.2462151808344257, |
| "learning_rate": 6.88888888888889e-05, |
| "loss": 0.4594, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.4340277777777778, |
| "grad_norm": 1.0493304940666723, |
| "learning_rate": 6.944444444444446e-05, |
| "loss": 0.4625, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.4375, |
| "grad_norm": 1.6573194961926394, |
| "learning_rate": 7.000000000000001e-05, |
| "loss": 0.4604, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.4409722222222222, |
| "grad_norm": 0.8417297410097049, |
| "learning_rate": 7.055555555555556e-05, |
| "loss": 0.4522, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.4444444444444444, |
| "grad_norm": 1.5116369884276502, |
| "learning_rate": 7.11111111111111e-05, |
| "loss": 0.4664, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.4479166666666667, |
| "grad_norm": 1.098767994124789, |
| "learning_rate": 7.166666666666667e-05, |
| "loss": 0.454, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.4513888888888889, |
| "grad_norm": 1.4427870933514884, |
| "learning_rate": 7.222222222222223e-05, |
| "loss": 0.4581, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.4548611111111111, |
| "grad_norm": 1.131214917074712, |
| "learning_rate": 7.277777777777778e-05, |
| "loss": 0.463, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.4583333333333333, |
| "grad_norm": 1.1124160125629599, |
| "learning_rate": 7.333333333333333e-05, |
| "loss": 0.455, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.4618055555555556, |
| "grad_norm": 1.4234752545924882, |
| "learning_rate": 7.38888888888889e-05, |
| "loss": 0.4619, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.4652777777777778, |
| "grad_norm": 1.1724697852891888, |
| "learning_rate": 7.444444444444446e-05, |
| "loss": 0.4494, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.46875, |
| "grad_norm": 1.3419661610878133, |
| "learning_rate": 7.500000000000001e-05, |
| "loss": 0.4628, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.4722222222222222, |
| "grad_norm": 1.057112319107508, |
| "learning_rate": 7.555555555555556e-05, |
| "loss": 0.4547, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.4756944444444444, |
| "grad_norm": 1.3297790190386298, |
| "learning_rate": 7.611111111111112e-05, |
| "loss": 0.4658, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.4791666666666667, |
| "grad_norm": 1.080019562308979, |
| "learning_rate": 7.666666666666668e-05, |
| "loss": 0.4519, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.4826388888888889, |
| "grad_norm": 1.0209172735208736, |
| "learning_rate": 7.722222222222223e-05, |
| "loss": 0.4571, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.4861111111111111, |
| "grad_norm": 1.284571191682376, |
| "learning_rate": 7.777777777777778e-05, |
| "loss": 0.4632, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.4895833333333333, |
| "grad_norm": 1.243779273272225, |
| "learning_rate": 7.833333333333333e-05, |
| "loss": 0.4556, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.4930555555555556, |
| "grad_norm": 1.5929814480067013, |
| "learning_rate": 7.88888888888889e-05, |
| "loss": 0.4628, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.4965277777777778, |
| "grad_norm": 0.8996686117779537, |
| "learning_rate": 7.944444444444446e-05, |
| "loss": 0.4616, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.4114637381579962, |
| "learning_rate": 8e-05, |
| "loss": 0.4579, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.5034722222222222, |
| "grad_norm": 1.1413142228974857, |
| "learning_rate": 7.999988247790486e-05, |
| "loss": 0.4524, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.5069444444444444, |
| "grad_norm": 1.2535207264099173, |
| "learning_rate": 7.999952991230999e-05, |
| "loss": 0.4547, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.5104166666666666, |
| "grad_norm": 0.944579007044323, |
| "learning_rate": 7.99989423052871e-05, |
| "loss": 0.449, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.5138888888888888, |
| "grad_norm": 1.1702494630139326, |
| "learning_rate": 7.999811966028904e-05, |
| "loss": 0.4542, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.5173611111111112, |
| "grad_norm": 1.2981796057689705, |
| "learning_rate": 7.999706198214977e-05, |
| "loss": 0.4499, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.5208333333333334, |
| "grad_norm": 0.9795884203661855, |
| "learning_rate": 7.99957692770843e-05, |
| "loss": 0.4427, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.5243055555555556, |
| "grad_norm": 1.2125274010863194, |
| "learning_rate": 7.999424155268872e-05, |
| "loss": 0.4554, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.5277777777777778, |
| "grad_norm": 0.7608263775470029, |
| "learning_rate": 7.999247881794007e-05, |
| "loss": 0.4543, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.53125, |
| "grad_norm": 0.8988441693273684, |
| "learning_rate": 7.999048108319636e-05, |
| "loss": 0.454, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.5347222222222222, |
| "grad_norm": 0.9604446856356788, |
| "learning_rate": 7.998824836019654e-05, |
| "loss": 0.4518, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.5381944444444444, |
| "grad_norm": 1.465682474436967, |
| "learning_rate": 7.998578066206027e-05, |
| "loss": 0.4553, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.5416666666666666, |
| "grad_norm": 0.9953984939217686, |
| "learning_rate": 7.998307800328803e-05, |
| "loss": 0.4487, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.5451388888888888, |
| "grad_norm": 1.14883284369783, |
| "learning_rate": 7.998014039976093e-05, |
| "loss": 0.4504, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.5486111111111112, |
| "grad_norm": 1.1463500627507957, |
| "learning_rate": 7.99769678687406e-05, |
| "loss": 0.4458, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.5520833333333334, |
| "grad_norm": 1.7406399039819629, |
| "learning_rate": 7.997356042886921e-05, |
| "loss": 0.4476, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.5555555555555556, |
| "grad_norm": 0.9677522133096546, |
| "learning_rate": 7.996991810016922e-05, |
| "loss": 0.454, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.5590277777777778, |
| "grad_norm": 2.3399797692246738, |
| "learning_rate": 7.996604090404331e-05, |
| "loss": 0.46, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.5625, |
| "grad_norm": 1.769583224289047, |
| "learning_rate": 7.996192886327432e-05, |
| "loss": 0.4635, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.5659722222222222, |
| "grad_norm": 1.5805102265354327, |
| "learning_rate": 7.995758200202502e-05, |
| "loss": 0.4532, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.5694444444444444, |
| "grad_norm": 1.3241297914657204, |
| "learning_rate": 7.995300034583802e-05, |
| "loss": 0.4514, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.5729166666666666, |
| "grad_norm": 1.2125784678413019, |
| "learning_rate": 7.994818392163563e-05, |
| "loss": 0.4451, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.5763888888888888, |
| "grad_norm": 1.2227272948654673, |
| "learning_rate": 7.994313275771963e-05, |
| "loss": 0.4479, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.5798611111111112, |
| "grad_norm": 0.9281571542890602, |
| "learning_rate": 7.993784688377122e-05, |
| "loss": 0.4501, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.5833333333333334, |
| "grad_norm": 0.8929936296876618, |
| "learning_rate": 7.993232633085074e-05, |
| "loss": 0.445, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.5868055555555556, |
| "grad_norm": 1.238670375147291, |
| "learning_rate": 7.992657113139751e-05, |
| "loss": 0.455, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.5902777777777778, |
| "grad_norm": 1.0415211643774225, |
| "learning_rate": 7.992058131922974e-05, |
| "loss": 0.4427, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.59375, |
| "grad_norm": 1.5710938827548118, |
| "learning_rate": 7.991435692954414e-05, |
| "loss": 0.4468, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.5972222222222222, |
| "grad_norm": 0.8839063532349627, |
| "learning_rate": 7.990789799891592e-05, |
| "loss": 0.4445, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.6006944444444444, |
| "grad_norm": 1.697345060366342, |
| "learning_rate": 7.99012045652984e-05, |
| "loss": 0.4552, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.6041666666666666, |
| "grad_norm": 0.9164694087509572, |
| "learning_rate": 7.98942766680229e-05, |
| "loss": 0.4547, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.6076388888888888, |
| "grad_norm": 1.5276845461738495, |
| "learning_rate": 7.988711434779849e-05, |
| "loss": 0.4538, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.6111111111111112, |
| "grad_norm": 0.8693062056237906, |
| "learning_rate": 7.987971764671168e-05, |
| "loss": 0.4468, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.6145833333333334, |
| "grad_norm": 1.3482324568848507, |
| "learning_rate": 7.987208660822631e-05, |
| "loss": 0.4393, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.6180555555555556, |
| "grad_norm": 1.0523927613249133, |
| "learning_rate": 7.986422127718312e-05, |
| "loss": 0.4468, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.6215277777777778, |
| "grad_norm": 1.2102039495362458, |
| "learning_rate": 7.985612169979964e-05, |
| "loss": 0.4473, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.625, |
| "grad_norm": 1.041791299494975, |
| "learning_rate": 7.984778792366983e-05, |
| "loss": 0.4482, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.6284722222222222, |
| "grad_norm": 0.9702144942742554, |
| "learning_rate": 7.983921999776381e-05, |
| "loss": 0.4456, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.6319444444444444, |
| "grad_norm": 0.9634257079158082, |
| "learning_rate": 7.983041797242766e-05, |
| "loss": 0.4464, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.6354166666666666, |
| "grad_norm": 1.2159853741470283, |
| "learning_rate": 7.982138189938296e-05, |
| "loss": 0.4495, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.6388888888888888, |
| "grad_norm": 0.9230250746367079, |
| "learning_rate": 7.981211183172663e-05, |
| "loss": 0.4473, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.6423611111111112, |
| "grad_norm": 0.8276373290152851, |
| "learning_rate": 7.980260782393058e-05, |
| "loss": 0.4439, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.6458333333333334, |
| "grad_norm": 0.9210417068141048, |
| "learning_rate": 7.979286993184134e-05, |
| "loss": 0.4481, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.6493055555555556, |
| "grad_norm": 0.9826533521079801, |
| "learning_rate": 7.978289821267976e-05, |
| "loss": 0.4466, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.6527777777777778, |
| "grad_norm": 1.116467479939209, |
| "learning_rate": 7.977269272504075e-05, |
| "loss": 0.4426, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.65625, |
| "grad_norm": 1.231932756904634, |
| "learning_rate": 7.976225352889278e-05, |
| "loss": 0.445, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.6597222222222222, |
| "grad_norm": 0.5726339489471363, |
| "learning_rate": 7.975158068557771e-05, |
| "loss": 0.4398, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.6631944444444444, |
| "grad_norm": 0.8001387375399556, |
| "learning_rate": 7.974067425781025e-05, |
| "loss": 0.4398, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 1.2250064920535346, |
| "learning_rate": 7.972953430967773e-05, |
| "loss": 0.4411, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.6701388888888888, |
| "grad_norm": 0.6534744526647841, |
| "learning_rate": 7.971816090663963e-05, |
| "loss": 0.4502, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.6736111111111112, |
| "grad_norm": 0.5725567793011336, |
| "learning_rate": 7.970655411552728e-05, |
| "loss": 0.4389, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.6770833333333334, |
| "grad_norm": 0.7150266154502478, |
| "learning_rate": 7.96947140045434e-05, |
| "loss": 0.4359, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.6805555555555556, |
| "grad_norm": 0.6861133770491901, |
| "learning_rate": 7.96826406432617e-05, |
| "loss": 0.4293, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.6840277777777778, |
| "grad_norm": 0.5267677299616069, |
| "learning_rate": 7.967033410262653e-05, |
| "loss": 0.4411, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.6875, |
| "grad_norm": 0.7064713153906993, |
| "learning_rate": 7.965779445495243e-05, |
| "loss": 0.4409, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.6909722222222222, |
| "grad_norm": 0.7898210534003849, |
| "learning_rate": 7.964502177392363e-05, |
| "loss": 0.4414, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.6944444444444444, |
| "grad_norm": 1.0600333307371903, |
| "learning_rate": 7.963201613459381e-05, |
| "loss": 0.4497, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.6979166666666666, |
| "grad_norm": 1.3046328917134082, |
| "learning_rate": 7.961877761338545e-05, |
| "loss": 0.442, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.7013888888888888, |
| "grad_norm": 0.844190070594073, |
| "learning_rate": 7.960530628808944e-05, |
| "loss": 0.4377, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.7048611111111112, |
| "grad_norm": 0.9955414997984852, |
| "learning_rate": 7.959160223786475e-05, |
| "loss": 0.4377, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.7083333333333334, |
| "grad_norm": 1.0186927163629396, |
| "learning_rate": 7.957766554323778e-05, |
| "loss": 0.4407, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.7118055555555556, |
| "grad_norm": 1.6996204406204833, |
| "learning_rate": 7.956349628610204e-05, |
| "loss": 0.4465, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.7152777777777778, |
| "grad_norm": 0.8404953299598265, |
| "learning_rate": 7.954909454971756e-05, |
| "loss": 0.4422, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.71875, |
| "grad_norm": 1.9618717386693068, |
| "learning_rate": 7.953446041871044e-05, |
| "loss": 0.4514, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.7222222222222222, |
| "grad_norm": 1.0737999034049264, |
| "learning_rate": 7.951959397907237e-05, |
| "loss": 0.442, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.7256944444444444, |
| "grad_norm": 2.2317162303577955, |
| "learning_rate": 7.950449531816011e-05, |
| "loss": 0.4645, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.7291666666666666, |
| "grad_norm": 2.073214361985945, |
| "learning_rate": 7.948916452469497e-05, |
| "loss": 0.4612, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.7326388888888888, |
| "grad_norm": 1.0851082661931841, |
| "learning_rate": 7.947360168876231e-05, |
| "loss": 0.4396, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.7361111111111112, |
| "grad_norm": 1.3106986814089487, |
| "learning_rate": 7.945780690181096e-05, |
| "loss": 0.4517, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.7395833333333334, |
| "grad_norm": 0.8570698706313222, |
| "learning_rate": 7.944178025665277e-05, |
| "loss": 0.4538, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.7430555555555556, |
| "grad_norm": 1.3386762857789183, |
| "learning_rate": 7.942552184746196e-05, |
| "loss": 0.4416, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.7465277777777778, |
| "grad_norm": 1.0014581322428484, |
| "learning_rate": 7.940903176977469e-05, |
| "loss": 0.4523, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 1.0734898711169398, |
| "learning_rate": 7.939231012048833e-05, |
| "loss": 0.4447, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.7534722222222222, |
| "grad_norm": 0.993341523519617, |
| "learning_rate": 7.937535699786107e-05, |
| "loss": 0.45, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.7569444444444444, |
| "grad_norm": 0.6652791910828864, |
| "learning_rate": 7.935817250151124e-05, |
| "loss": 0.4324, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.7604166666666666, |
| "grad_norm": 0.6971295516673278, |
| "learning_rate": 7.934075673241672e-05, |
| "loss": 0.4426, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.7638888888888888, |
| "grad_norm": 0.673431333145143, |
| "learning_rate": 7.932310979291441e-05, |
| "loss": 0.4324, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.7673611111111112, |
| "grad_norm": 0.7732137215225624, |
| "learning_rate": 7.930523178669956e-05, |
| "loss": 0.4454, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.7708333333333334, |
| "grad_norm": 0.4823440611357369, |
| "learning_rate": 7.928712281882523e-05, |
| "loss": 0.4291, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.7743055555555556, |
| "grad_norm": 0.7868778607572823, |
| "learning_rate": 7.92687829957016e-05, |
| "loss": 0.4369, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.7777777777777778, |
| "grad_norm": 0.6577701332320577, |
| "learning_rate": 7.925021242509539e-05, |
| "loss": 0.4424, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.78125, |
| "grad_norm": 0.5424251079261185, |
| "learning_rate": 7.923141121612922e-05, |
| "loss": 0.4403, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.7847222222222222, |
| "grad_norm": 0.5054224207988611, |
| "learning_rate": 7.921237947928097e-05, |
| "loss": 0.4392, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.7881944444444444, |
| "grad_norm": 0.605462887935492, |
| "learning_rate": 7.91931173263831e-05, |
| "loss": 0.4398, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.7916666666666666, |
| "grad_norm": 0.5551065001075223, |
| "learning_rate": 7.917362487062207e-05, |
| "loss": 0.4349, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.7951388888888888, |
| "grad_norm": 0.6153818427714622, |
| "learning_rate": 7.915390222653756e-05, |
| "loss": 0.4298, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.7986111111111112, |
| "grad_norm": 0.7260586067886241, |
| "learning_rate": 7.913394951002191e-05, |
| "loss": 0.4391, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.8020833333333334, |
| "grad_norm": 0.8350027035555211, |
| "learning_rate": 7.911376683831937e-05, |
| "loss": 0.4423, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.8055555555555556, |
| "grad_norm": 1.0219604029866298, |
| "learning_rate": 7.909335433002543e-05, |
| "loss": 0.4336, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.8090277777777778, |
| "grad_norm": 1.1709001073322873, |
| "learning_rate": 7.907271210508612e-05, |
| "loss": 0.4281, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.8125, |
| "grad_norm": 0.8473876791721466, |
| "learning_rate": 7.905184028479734e-05, |
| "loss": 0.4335, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.8159722222222222, |
| "grad_norm": 0.9483895312739647, |
| "learning_rate": 7.903073899180408e-05, |
| "loss": 0.4354, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.8194444444444444, |
| "grad_norm": 1.211636095809243, |
| "learning_rate": 7.900940835009974e-05, |
| "loss": 0.4403, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.8229166666666666, |
| "grad_norm": 0.754367132994784, |
| "learning_rate": 7.89878484850254e-05, |
| "loss": 0.431, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.8263888888888888, |
| "grad_norm": 0.850436115404034, |
| "learning_rate": 7.89660595232691e-05, |
| "loss": 0.4361, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.8298611111111112, |
| "grad_norm": 0.9631686880042966, |
| "learning_rate": 7.894404159286507e-05, |
| "loss": 0.4377, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.8333333333333334, |
| "grad_norm": 0.8809972957235778, |
| "learning_rate": 7.892179482319297e-05, |
| "loss": 0.4412, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.8368055555555556, |
| "grad_norm": 0.7087009807332131, |
| "learning_rate": 7.889931934497713e-05, |
| "loss": 0.4384, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.8402777777777778, |
| "grad_norm": 0.6235179197671002, |
| "learning_rate": 7.887661529028583e-05, |
| "loss": 0.4396, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.84375, |
| "grad_norm": 0.5941580300026621, |
| "learning_rate": 7.885368279253045e-05, |
| "loss": 0.4312, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.8472222222222222, |
| "grad_norm": 0.7287526088534313, |
| "learning_rate": 7.883052198646481e-05, |
| "loss": 0.4319, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.8506944444444444, |
| "grad_norm": 0.8550617714802732, |
| "learning_rate": 7.880713300818417e-05, |
| "loss": 0.4265, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.8541666666666666, |
| "grad_norm": 1.070298714307543, |
| "learning_rate": 7.878351599512465e-05, |
| "loss": 0.4298, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.8576388888888888, |
| "grad_norm": 1.0005703836024833, |
| "learning_rate": 7.875967108606229e-05, |
| "loss": 0.4304, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.8611111111111112, |
| "grad_norm": 0.8656053628499113, |
| "learning_rate": 7.873559842111225e-05, |
| "loss": 0.4184, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.8645833333333334, |
| "grad_norm": 0.8664562224515696, |
| "learning_rate": 7.871129814172805e-05, |
| "loss": 0.4344, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.8680555555555556, |
| "grad_norm": 1.0238452521162664, |
| "learning_rate": 7.868677039070067e-05, |
| "loss": 0.4312, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.8715277777777778, |
| "grad_norm": 1.0099355495434388, |
| "learning_rate": 7.866201531215776e-05, |
| "loss": 0.4302, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.875, |
| "grad_norm": 0.8416275019579411, |
| "learning_rate": 7.863703305156273e-05, |
| "loss": 0.4284, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.8784722222222222, |
| "grad_norm": 0.8099259050315379, |
| "learning_rate": 7.8611823755714e-05, |
| "loss": 0.4344, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.8819444444444444, |
| "grad_norm": 0.6678679683627219, |
| "learning_rate": 7.858638757274398e-05, |
| "loss": 0.4231, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.8854166666666666, |
| "grad_norm": 0.47895420973208647, |
| "learning_rate": 7.856072465211839e-05, |
| "loss": 0.4206, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.8888888888888888, |
| "grad_norm": 1.009939643607457, |
| "learning_rate": 7.853483514463521e-05, |
| "loss": 0.4288, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.8923611111111112, |
| "grad_norm": 1.4189309407266693, |
| "learning_rate": 7.850871920242394e-05, |
| "loss": 0.4337, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.8958333333333334, |
| "grad_norm": 0.37058893592615144, |
| "learning_rate": 7.848237697894453e-05, |
| "loss": 0.4254, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.8993055555555556, |
| "grad_norm": 1.2115738610190847, |
| "learning_rate": 7.84558086289867e-05, |
| "loss": 0.424, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.9027777777777778, |
| "grad_norm": 0.8637909862046065, |
| "learning_rate": 7.842901430866882e-05, |
| "loss": 0.4224, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.90625, |
| "grad_norm": 0.8555605365080853, |
| "learning_rate": 7.840199417543716e-05, |
| "loss": 0.4215, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.9097222222222222, |
| "grad_norm": 0.8338852542700611, |
| "learning_rate": 7.837474838806481e-05, |
| "loss": 0.4253, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.9131944444444444, |
| "grad_norm": 0.7277509949557855, |
| "learning_rate": 7.834727710665091e-05, |
| "loss": 0.4237, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.9166666666666666, |
| "grad_norm": 1.0597249447053136, |
| "learning_rate": 7.831958049261956e-05, |
| "loss": 0.435, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.9201388888888888, |
| "grad_norm": 0.7628459806053108, |
| "learning_rate": 7.829165870871897e-05, |
| "loss": 0.4271, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.9236111111111112, |
| "grad_norm": 0.3800778344556053, |
| "learning_rate": 7.82635119190205e-05, |
| "loss": 0.4234, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.9270833333333334, |
| "grad_norm": 0.72949914163169, |
| "learning_rate": 7.823514028891758e-05, |
| "loss": 0.4254, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.9305555555555556, |
| "grad_norm": 0.8810789298670233, |
| "learning_rate": 7.820654398512492e-05, |
| "loss": 0.4202, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.9340277777777778, |
| "grad_norm": 0.9953625938611481, |
| "learning_rate": 7.817772317567739e-05, |
| "loss": 0.4263, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.9375, |
| "grad_norm": 1.0841275303637594, |
| "learning_rate": 7.814867802992907e-05, |
| "loss": 0.4271, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.9409722222222222, |
| "grad_norm": 0.8243338663917711, |
| "learning_rate": 7.811940871855232e-05, |
| "loss": 0.429, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.9444444444444444, |
| "grad_norm": 0.7969445305450061, |
| "learning_rate": 7.808991541353662e-05, |
| "loss": 0.4293, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.9479166666666666, |
| "grad_norm": 0.8402063861721795, |
| "learning_rate": 7.806019828818776e-05, |
| "loss": 0.4305, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.9513888888888888, |
| "grad_norm": 0.7708535810728068, |
| "learning_rate": 7.803025751712667e-05, |
| "loss": 0.4308, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.9548611111111112, |
| "grad_norm": 0.6791688966743965, |
| "learning_rate": 7.800009327628845e-05, |
| "loss": 0.4299, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.9583333333333334, |
| "grad_norm": 0.9406991087495775, |
| "learning_rate": 7.796970574292136e-05, |
| "loss": 0.4248, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.9618055555555556, |
| "grad_norm": 1.2117325105562007, |
| "learning_rate": 7.793909509558572e-05, |
| "loss": 0.4202, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.9652777777777778, |
| "grad_norm": 0.6831708132582254, |
| "learning_rate": 7.790826151415289e-05, |
| "loss": 0.4257, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.96875, |
| "grad_norm": 0.859673440712125, |
| "learning_rate": 7.787720517980424e-05, |
| "loss": 0.4183, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.9722222222222222, |
| "grad_norm": 0.9039026634213447, |
| "learning_rate": 7.784592627503004e-05, |
| "loss": 0.4184, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.9756944444444444, |
| "grad_norm": 0.7704704639747162, |
| "learning_rate": 7.781442498362838e-05, |
| "loss": 0.4245, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.9791666666666666, |
| "grad_norm": 1.021065945036308, |
| "learning_rate": 7.77827014907042e-05, |
| "loss": 0.4224, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.9826388888888888, |
| "grad_norm": 1.0035320025992345, |
| "learning_rate": 7.775075598266803e-05, |
| "loss": 0.4188, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.9861111111111112, |
| "grad_norm": 0.733225627159732, |
| "learning_rate": 7.771858864723504e-05, |
| "loss": 0.4139, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.9895833333333334, |
| "grad_norm": 0.5129363335278152, |
| "learning_rate": 7.768619967342386e-05, |
| "loss": 0.4295, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.9930555555555556, |
| "grad_norm": 0.44152260527622333, |
| "learning_rate": 7.76535892515555e-05, |
| "loss": 0.4329, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.9965277777777778, |
| "grad_norm": 0.5476141384850192, |
| "learning_rate": 7.76207575732522e-05, |
| "loss": 0.4225, |
| "step": 287 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.5830357412083533, |
| "learning_rate": 7.758770483143634e-05, |
| "loss": 0.4257, |
| "step": 288 |
| }, |
| { |
| "epoch": 1.0034722222222223, |
| "grad_norm": 0.7063916284367442, |
| "learning_rate": 7.755443122032931e-05, |
| "loss": 0.4051, |
| "step": 289 |
| }, |
| { |
| "epoch": 1.0069444444444444, |
| "grad_norm": 0.667202666738724, |
| "learning_rate": 7.752093693545032e-05, |
| "loss": 0.4003, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.0104166666666667, |
| "grad_norm": 0.6230156026991575, |
| "learning_rate": 7.74872221736153e-05, |
| "loss": 0.4062, |
| "step": 291 |
| }, |
| { |
| "epoch": 1.0138888888888888, |
| "grad_norm": 0.6047415209235458, |
| "learning_rate": 7.745328713293573e-05, |
| "loss": 0.399, |
| "step": 292 |
| }, |
| { |
| "epoch": 1.0173611111111112, |
| "grad_norm": 0.4623574863446841, |
| "learning_rate": 7.741913201281746e-05, |
| "loss": 0.4107, |
| "step": 293 |
| }, |
| { |
| "epoch": 1.0208333333333333, |
| "grad_norm": 0.44829714098160994, |
| "learning_rate": 7.738475701395955e-05, |
| "loss": 0.402, |
| "step": 294 |
| }, |
| { |
| "epoch": 1.0243055555555556, |
| "grad_norm": 0.5583386260853201, |
| "learning_rate": 7.735016233835308e-05, |
| "loss": 0.4037, |
| "step": 295 |
| }, |
| { |
| "epoch": 1.0277777777777777, |
| "grad_norm": 0.5849993828315929, |
| "learning_rate": 7.731534818928004e-05, |
| "loss": 0.4038, |
| "step": 296 |
| }, |
| { |
| "epoch": 1.03125, |
| "grad_norm": 0.5530322885658703, |
| "learning_rate": 7.728031477131195e-05, |
| "loss": 0.4057, |
| "step": 297 |
| }, |
| { |
| "epoch": 1.0347222222222223, |
| "grad_norm": 0.5809657912163412, |
| "learning_rate": 7.724506229030888e-05, |
| "loss": 0.4008, |
| "step": 298 |
| }, |
| { |
| "epoch": 1.0381944444444444, |
| "grad_norm": 0.7320117850928769, |
| "learning_rate": 7.72095909534181e-05, |
| "loss": 0.4069, |
| "step": 299 |
| }, |
| { |
| "epoch": 1.0416666666666667, |
| "grad_norm": 0.8148876222477555, |
| "learning_rate": 7.71739009690729e-05, |
| "loss": 0.4148, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.0451388888888888, |
| "grad_norm": 0.8604742887394918, |
| "learning_rate": 7.713799254699136e-05, |
| "loss": 0.4132, |
| "step": 301 |
| }, |
| { |
| "epoch": 1.0486111111111112, |
| "grad_norm": 0.8494756814809938, |
| "learning_rate": 7.710186589817515e-05, |
| "loss": 0.4056, |
| "step": 302 |
| }, |
| { |
| "epoch": 1.0520833333333333, |
| "grad_norm": 0.8006898767741991, |
| "learning_rate": 7.706552123490822e-05, |
| "loss": 0.4031, |
| "step": 303 |
| }, |
| { |
| "epoch": 1.0555555555555556, |
| "grad_norm": 0.7547949909298812, |
| "learning_rate": 7.702895877075563e-05, |
| "loss": 0.4084, |
| "step": 304 |
| }, |
| { |
| "epoch": 1.0590277777777777, |
| "grad_norm": 0.5910172696126633, |
| "learning_rate": 7.699217872056223e-05, |
| "loss": 0.4075, |
| "step": 305 |
| }, |
| { |
| "epoch": 1.0625, |
| "grad_norm": 0.4769866947519716, |
| "learning_rate": 7.695518130045147e-05, |
| "loss": 0.4028, |
| "step": 306 |
| }, |
| { |
| "epoch": 1.0659722222222223, |
| "grad_norm": 0.5949945873140698, |
| "learning_rate": 7.691796672782406e-05, |
| "loss": 0.398, |
| "step": 307 |
| }, |
| { |
| "epoch": 1.0694444444444444, |
| "grad_norm": 0.5723602950104976, |
| "learning_rate": 7.688053522135675e-05, |
| "loss": 0.4053, |
| "step": 308 |
| }, |
| { |
| "epoch": 1.0729166666666667, |
| "grad_norm": 0.4849220557566718, |
| "learning_rate": 7.684288700100095e-05, |
| "loss": 0.3934, |
| "step": 309 |
| }, |
| { |
| "epoch": 1.0763888888888888, |
| "grad_norm": 0.5878092037060889, |
| "learning_rate": 7.680502228798157e-05, |
| "loss": 0.4059, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.0798611111111112, |
| "grad_norm": 0.7914196489636522, |
| "learning_rate": 7.676694130479563e-05, |
| "loss": 0.3971, |
| "step": 311 |
| }, |
| { |
| "epoch": 1.0833333333333333, |
| "grad_norm": 0.9299321919759843, |
| "learning_rate": 7.672864427521097e-05, |
| "loss": 0.4123, |
| "step": 312 |
| }, |
| { |
| "epoch": 1.0868055555555556, |
| "grad_norm": 0.8715987505249858, |
| "learning_rate": 7.669013142426496e-05, |
| "loss": 0.4055, |
| "step": 313 |
| }, |
| { |
| "epoch": 1.0902777777777777, |
| "grad_norm": 0.8096045081793677, |
| "learning_rate": 7.665140297826313e-05, |
| "loss": 0.4021, |
| "step": 314 |
| }, |
| { |
| "epoch": 1.09375, |
| "grad_norm": 0.8475178262681384, |
| "learning_rate": 7.66124591647779e-05, |
| "loss": 0.4023, |
| "step": 315 |
| }, |
| { |
| "epoch": 1.0972222222222223, |
| "grad_norm": 0.7826778208724321, |
| "learning_rate": 7.657330021264718e-05, |
| "loss": 0.3982, |
| "step": 316 |
| }, |
| { |
| "epoch": 1.1006944444444444, |
| "grad_norm": 0.6181458437106809, |
| "learning_rate": 7.65339263519731e-05, |
| "loss": 0.4038, |
| "step": 317 |
| }, |
| { |
| "epoch": 1.1041666666666667, |
| "grad_norm": 0.5257606234932206, |
| "learning_rate": 7.649433781412058e-05, |
| "loss": 0.3975, |
| "step": 318 |
| }, |
| { |
| "epoch": 1.1076388888888888, |
| "grad_norm": 0.5137603347420444, |
| "learning_rate": 7.645453483171601e-05, |
| "loss": 0.4054, |
| "step": 319 |
| }, |
| { |
| "epoch": 1.1111111111111112, |
| "grad_norm": 0.6494379013037576, |
| "learning_rate": 7.641451763864587e-05, |
| "loss": 0.3967, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.1145833333333333, |
| "grad_norm": 0.7419787340823062, |
| "learning_rate": 7.637428647005541e-05, |
| "loss": 0.3956, |
| "step": 321 |
| }, |
| { |
| "epoch": 1.1180555555555556, |
| "grad_norm": 0.6989839067475451, |
| "learning_rate": 7.633384156234718e-05, |
| "loss": 0.4003, |
| "step": 322 |
| }, |
| { |
| "epoch": 1.1215277777777777, |
| "grad_norm": 0.6901694456258389, |
| "learning_rate": 7.629318315317968e-05, |
| "loss": 0.4026, |
| "step": 323 |
| }, |
| { |
| "epoch": 1.125, |
| "grad_norm": 0.7233257128268635, |
| "learning_rate": 7.625231148146601e-05, |
| "loss": 0.4087, |
| "step": 324 |
| }, |
| { |
| "epoch": 1.1284722222222223, |
| "grad_norm": 0.7506785296869003, |
| "learning_rate": 7.621122678737236e-05, |
| "loss": 0.3997, |
| "step": 325 |
| }, |
| { |
| "epoch": 1.1319444444444444, |
| "grad_norm": 0.7590348348849132, |
| "learning_rate": 7.616992931231671e-05, |
| "loss": 0.4021, |
| "step": 326 |
| }, |
| { |
| "epoch": 1.1354166666666667, |
| "grad_norm": 0.6901940604570691, |
| "learning_rate": 7.612841929896737e-05, |
| "loss": 0.4065, |
| "step": 327 |
| }, |
| { |
| "epoch": 1.1388888888888888, |
| "grad_norm": 0.580026833291539, |
| "learning_rate": 7.608669699124153e-05, |
| "loss": 0.3979, |
| "step": 328 |
| }, |
| { |
| "epoch": 1.1423611111111112, |
| "grad_norm": 0.5236840254807037, |
| "learning_rate": 7.604476263430379e-05, |
| "loss": 0.3998, |
| "step": 329 |
| }, |
| { |
| "epoch": 1.1458333333333333, |
| "grad_norm": 0.5415803185886238, |
| "learning_rate": 7.600261647456485e-05, |
| "loss": 0.4003, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.1493055555555556, |
| "grad_norm": 0.4862624810527434, |
| "learning_rate": 7.596025875967998e-05, |
| "loss": 0.4044, |
| "step": 331 |
| }, |
| { |
| "epoch": 1.1527777777777777, |
| "grad_norm": 0.5339213319556734, |
| "learning_rate": 7.591768973854753e-05, |
| "loss": 0.4035, |
| "step": 332 |
| }, |
| { |
| "epoch": 1.15625, |
| "grad_norm": 0.6310106513888443, |
| "learning_rate": 7.587490966130754e-05, |
| "loss": 0.3997, |
| "step": 333 |
| }, |
| { |
| "epoch": 1.1597222222222223, |
| "grad_norm": 0.5550205488151554, |
| "learning_rate": 7.58319187793402e-05, |
| "loss": 0.3967, |
| "step": 334 |
| }, |
| { |
| "epoch": 1.1631944444444444, |
| "grad_norm": 0.40985639820095615, |
| "learning_rate": 7.578871734526449e-05, |
| "loss": 0.3979, |
| "step": 335 |
| }, |
| { |
| "epoch": 1.1666666666666667, |
| "grad_norm": 0.42924572007540923, |
| "learning_rate": 7.57453056129365e-05, |
| "loss": 0.4059, |
| "step": 336 |
| }, |
| { |
| "epoch": 1.1701388888888888, |
| "grad_norm": 0.3770504743326086, |
| "learning_rate": 7.570168383744815e-05, |
| "loss": 0.3977, |
| "step": 337 |
| }, |
| { |
| "epoch": 1.1736111111111112, |
| "grad_norm": 0.4453962885153323, |
| "learning_rate": 7.565785227512555e-05, |
| "loss": 0.3986, |
| "step": 338 |
| }, |
| { |
| "epoch": 1.1770833333333333, |
| "grad_norm": 0.6401462585148607, |
| "learning_rate": 7.561381118352757e-05, |
| "loss": 0.4006, |
| "step": 339 |
| }, |
| { |
| "epoch": 1.1805555555555556, |
| "grad_norm": 0.8576068224785208, |
| "learning_rate": 7.556956082144425e-05, |
| "loss": 0.4028, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.1840277777777777, |
| "grad_norm": 0.970046404986134, |
| "learning_rate": 7.552510144889538e-05, |
| "loss": 0.395, |
| "step": 341 |
| }, |
| { |
| "epoch": 1.1875, |
| "grad_norm": 0.9237808454959463, |
| "learning_rate": 7.548043332712887e-05, |
| "loss": 0.3966, |
| "step": 342 |
| }, |
| { |
| "epoch": 1.1909722222222223, |
| "grad_norm": 0.7963471169582702, |
| "learning_rate": 7.54355567186193e-05, |
| "loss": 0.4029, |
| "step": 343 |
| }, |
| { |
| "epoch": 1.1944444444444444, |
| "grad_norm": 0.6884696150231001, |
| "learning_rate": 7.539047188706631e-05, |
| "loss": 0.4096, |
| "step": 344 |
| }, |
| { |
| "epoch": 1.1979166666666667, |
| "grad_norm": 0.6376467029816646, |
| "learning_rate": 7.534517909739312e-05, |
| "loss": 0.3982, |
| "step": 345 |
| }, |
| { |
| "epoch": 1.2013888888888888, |
| "grad_norm": 0.5931365038229, |
| "learning_rate": 7.529967861574487e-05, |
| "loss": 0.4077, |
| "step": 346 |
| }, |
| { |
| "epoch": 1.2048611111111112, |
| "grad_norm": 0.6606373517604195, |
| "learning_rate": 7.525397070948716e-05, |
| "loss": 0.4088, |
| "step": 347 |
| }, |
| { |
| "epoch": 1.2083333333333333, |
| "grad_norm": 0.8320458571146847, |
| "learning_rate": 7.520805564720444e-05, |
| "loss": 0.4018, |
| "step": 348 |
| }, |
| { |
| "epoch": 1.2118055555555556, |
| "grad_norm": 0.8998502488634423, |
| "learning_rate": 7.516193369869846e-05, |
| "loss": 0.4091, |
| "step": 349 |
| }, |
| { |
| "epoch": 1.2152777777777777, |
| "grad_norm": 0.8032832906987262, |
| "learning_rate": 7.511560513498658e-05, |
| "loss": 0.3993, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.21875, |
| "grad_norm": 0.6642271849354356, |
| "learning_rate": 7.506907022830032e-05, |
| "loss": 0.3994, |
| "step": 351 |
| }, |
| { |
| "epoch": 1.2222222222222223, |
| "grad_norm": 0.564826521818374, |
| "learning_rate": 7.502232925208365e-05, |
| "loss": 0.399, |
| "step": 352 |
| }, |
| { |
| "epoch": 1.2256944444444444, |
| "grad_norm": 0.47406836067731883, |
| "learning_rate": 7.497538248099144e-05, |
| "loss": 0.3943, |
| "step": 353 |
| }, |
| { |
| "epoch": 1.2291666666666667, |
| "grad_norm": 0.46564678159712414, |
| "learning_rate": 7.492823019088785e-05, |
| "loss": 0.3979, |
| "step": 354 |
| }, |
| { |
| "epoch": 1.2326388888888888, |
| "grad_norm": 0.5727503778599136, |
| "learning_rate": 7.488087265884466e-05, |
| "loss": 0.3972, |
| "step": 355 |
| }, |
| { |
| "epoch": 1.2361111111111112, |
| "grad_norm": 0.5506165925693018, |
| "learning_rate": 7.483331016313969e-05, |
| "loss": 0.4008, |
| "step": 356 |
| }, |
| { |
| "epoch": 1.2395833333333333, |
| "grad_norm": 0.5430765774399919, |
| "learning_rate": 7.478554298325517e-05, |
| "loss": 0.4005, |
| "step": 357 |
| }, |
| { |
| "epoch": 1.2430555555555556, |
| "grad_norm": 0.5256534569365339, |
| "learning_rate": 7.473757139987602e-05, |
| "loss": 0.394, |
| "step": 358 |
| }, |
| { |
| "epoch": 1.2465277777777777, |
| "grad_norm": 0.5452588129629314, |
| "learning_rate": 7.468939569488833e-05, |
| "loss": 0.4006, |
| "step": 359 |
| }, |
| { |
| "epoch": 1.25, |
| "grad_norm": 0.5375005795015402, |
| "learning_rate": 7.464101615137756e-05, |
| "loss": 0.4002, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.2534722222222223, |
| "grad_norm": 0.6640797930885677, |
| "learning_rate": 7.459243305362697e-05, |
| "loss": 0.3965, |
| "step": 361 |
| }, |
| { |
| "epoch": 1.2569444444444444, |
| "grad_norm": 6.509958727869088e+28, |
| "learning_rate": 7.454364668711595e-05, |
| "loss": 0.3984, |
| "step": 362 |
| }, |
| { |
| "epoch": 1.2604166666666667, |
| "grad_norm": 1.8590515577626585, |
| "learning_rate": 7.44946573385183e-05, |
| "loss": 0.416, |
| "step": 363 |
| }, |
| { |
| "epoch": 1.2638888888888888, |
| "grad_norm": 0.7545962814502798, |
| "learning_rate": 7.444546529570055e-05, |
| "loss": 0.4087, |
| "step": 364 |
| }, |
| { |
| "epoch": 1.2673611111111112, |
| "grad_norm": 0.9078399896381715, |
| "learning_rate": 7.439607084772032e-05, |
| "loss": 0.4021, |
| "step": 365 |
| }, |
| { |
| "epoch": 1.2708333333333333, |
| "grad_norm": 0.9915786959692952, |
| "learning_rate": 7.434647428482453e-05, |
| "loss": 0.4076, |
| "step": 366 |
| }, |
| { |
| "epoch": 1.2743055555555556, |
| "grad_norm": 1.1432908983480914, |
| "learning_rate": 7.42966758984478e-05, |
| "loss": 0.411, |
| "step": 367 |
| }, |
| { |
| "epoch": 1.2777777777777777, |
| "grad_norm": 0.7908216713685846, |
| "learning_rate": 7.424667598121067e-05, |
| "loss": 0.4048, |
| "step": 368 |
| }, |
| { |
| "epoch": 1.28125, |
| "grad_norm": 0.7840804475389772, |
| "learning_rate": 7.419647482691788e-05, |
| "loss": 0.3979, |
| "step": 369 |
| }, |
| { |
| "epoch": 1.2847222222222223, |
| "grad_norm": 0.7927335911266363, |
| "learning_rate": 7.414607273055666e-05, |
| "loss": 0.4041, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.2881944444444444, |
| "grad_norm": 0.7111887896878814, |
| "learning_rate": 7.409546998829503e-05, |
| "loss": 0.3922, |
| "step": 371 |
| }, |
| { |
| "epoch": 1.2916666666666667, |
| "grad_norm": 0.7947054781680923, |
| "learning_rate": 7.404466689747999e-05, |
| "loss": 0.4059, |
| "step": 372 |
| }, |
| { |
| "epoch": 1.2951388888888888, |
| "grad_norm": 0.7019883001980964, |
| "learning_rate": 7.399366375663584e-05, |
| "loss": 0.4024, |
| "step": 373 |
| }, |
| { |
| "epoch": 1.2986111111111112, |
| "grad_norm": 0.6218653316631517, |
| "learning_rate": 7.394246086546236e-05, |
| "loss": 0.4013, |
| "step": 374 |
| }, |
| { |
| "epoch": 1.3020833333333333, |
| "grad_norm": 0.7190549947235102, |
| "learning_rate": 7.389105852483312e-05, |
| "loss": 0.4069, |
| "step": 375 |
| }, |
| { |
| "epoch": 1.3055555555555556, |
| "grad_norm": 0.768859503130449, |
| "learning_rate": 7.383945703679365e-05, |
| "loss": 0.3958, |
| "step": 376 |
| }, |
| { |
| "epoch": 1.3090277777777777, |
| "grad_norm": 0.534674762997295, |
| "learning_rate": 7.37876567045597e-05, |
| "loss": 0.3969, |
| "step": 377 |
| }, |
| { |
| "epoch": 1.3125, |
| "grad_norm": 0.5445743842452613, |
| "learning_rate": 7.373565783251544e-05, |
| "loss": 0.4004, |
| "step": 378 |
| }, |
| { |
| "epoch": 1.3159722222222223, |
| "grad_norm": 0.49103846814991575, |
| "learning_rate": 7.368346072621169e-05, |
| "loss": 0.3961, |
| "step": 379 |
| }, |
| { |
| "epoch": 1.3194444444444444, |
| "grad_norm": 0.47339147058706754, |
| "learning_rate": 7.363106569236413e-05, |
| "loss": 0.4058, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.3229166666666667, |
| "grad_norm": 0.4912336379485776, |
| "learning_rate": 7.357847303885146e-05, |
| "loss": 0.3935, |
| "step": 381 |
| }, |
| { |
| "epoch": 1.3263888888888888, |
| "grad_norm": 0.47424561892980627, |
| "learning_rate": 7.352568307471363e-05, |
| "loss": 0.3962, |
| "step": 382 |
| }, |
| { |
| "epoch": 1.3298611111111112, |
| "grad_norm": 0.4530340325962746, |
| "learning_rate": 7.347269611014997e-05, |
| "loss": 0.4043, |
| "step": 383 |
| }, |
| { |
| "epoch": 1.3333333333333333, |
| "grad_norm": 0.4950169140082056, |
| "learning_rate": 7.341951245651747e-05, |
| "loss": 0.4042, |
| "step": 384 |
| }, |
| { |
| "epoch": 1.3368055555555556, |
| "grad_norm": 0.5020864609707268, |
| "learning_rate": 7.336613242632882e-05, |
| "loss": 0.3981, |
| "step": 385 |
| }, |
| { |
| "epoch": 1.3402777777777777, |
| "grad_norm": 0.3617561837680056, |
| "learning_rate": 7.33125563332507e-05, |
| "loss": 0.3943, |
| "step": 386 |
| }, |
| { |
| "epoch": 1.34375, |
| "grad_norm": 0.39426317679870326, |
| "learning_rate": 7.325878449210182e-05, |
| "loss": 0.4017, |
| "step": 387 |
| }, |
| { |
| "epoch": 1.3472222222222223, |
| "grad_norm": 0.36781313949402294, |
| "learning_rate": 7.320481721885116e-05, |
| "loss": 0.4054, |
| "step": 388 |
| }, |
| { |
| "epoch": 1.3506944444444444, |
| "grad_norm": 0.3743748114329641, |
| "learning_rate": 7.315065483061608e-05, |
| "loss": 0.3972, |
| "step": 389 |
| }, |
| { |
| "epoch": 1.3541666666666667, |
| "grad_norm": 0.4147718236807753, |
| "learning_rate": 7.309629764566042e-05, |
| "loss": 0.3942, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.3576388888888888, |
| "grad_norm": 0.466497809382821, |
| "learning_rate": 7.304174598339274e-05, |
| "loss": 0.3948, |
| "step": 391 |
| }, |
| { |
| "epoch": 1.3611111111111112, |
| "grad_norm": 0.4701553681056374, |
| "learning_rate": 7.298700016436427e-05, |
| "loss": 0.3993, |
| "step": 392 |
| }, |
| { |
| "epoch": 1.3645833333333333, |
| "grad_norm": 0.5674005815206642, |
| "learning_rate": 7.293206051026722e-05, |
| "loss": 0.4068, |
| "step": 393 |
| }, |
| { |
| "epoch": 1.3680555555555556, |
| "grad_norm": 0.7445442589940026, |
| "learning_rate": 7.287692734393273e-05, |
| "loss": 0.3935, |
| "step": 394 |
| }, |
| { |
| "epoch": 1.3715277777777777, |
| "grad_norm": 0.986306413661404, |
| "learning_rate": 7.282160098932906e-05, |
| "loss": 0.3977, |
| "step": 395 |
| }, |
| { |
| "epoch": 1.375, |
| "grad_norm": 1.1477021999284092, |
| "learning_rate": 7.276608177155968e-05, |
| "loss": 0.4049, |
| "step": 396 |
| }, |
| { |
| "epoch": 1.3784722222222223, |
| "grad_norm": 0.6486212053686043, |
| "learning_rate": 7.271037001686132e-05, |
| "loss": 0.3968, |
| "step": 397 |
| }, |
| { |
| "epoch": 1.3819444444444444, |
| "grad_norm": 0.3729943544399879, |
| "learning_rate": 7.265446605260208e-05, |
| "loss": 0.3968, |
| "step": 398 |
| }, |
| { |
| "epoch": 1.3854166666666667, |
| "grad_norm": 0.6010239194946239, |
| "learning_rate": 7.259837020727953e-05, |
| "loss": 0.3949, |
| "step": 399 |
| }, |
| { |
| "epoch": 1.3888888888888888, |
| "grad_norm": 0.753191689196547, |
| "learning_rate": 7.254208281051871e-05, |
| "loss": 0.3985, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.3923611111111112, |
| "grad_norm": 0.6980861620307499, |
| "learning_rate": 7.248560419307028e-05, |
| "loss": 0.3949, |
| "step": 401 |
| }, |
| { |
| "epoch": 1.3958333333333333, |
| "grad_norm": 0.5625805147809977, |
| "learning_rate": 7.242893468680849e-05, |
| "loss": 0.3965, |
| "step": 402 |
| }, |
| { |
| "epoch": 1.3993055555555556, |
| "grad_norm": 0.5129296233655678, |
| "learning_rate": 7.237207462472933e-05, |
| "loss": 0.3999, |
| "step": 403 |
| }, |
| { |
| "epoch": 1.4027777777777777, |
| "grad_norm": 0.5019690768893361, |
| "learning_rate": 7.231502434094845e-05, |
| "loss": 0.3967, |
| "step": 404 |
| }, |
| { |
| "epoch": 1.40625, |
| "grad_norm": 0.5198379539446143, |
| "learning_rate": 7.225778417069932e-05, |
| "loss": 0.3932, |
| "step": 405 |
| }, |
| { |
| "epoch": 1.4097222222222223, |
| "grad_norm": 0.5178789165907579, |
| "learning_rate": 7.220035445033114e-05, |
| "loss": 0.3943, |
| "step": 406 |
| }, |
| { |
| "epoch": 1.4131944444444444, |
| "grad_norm": 0.37822059845389094, |
| "learning_rate": 7.2142735517307e-05, |
| "loss": 0.3906, |
| "step": 407 |
| }, |
| { |
| "epoch": 1.4166666666666667, |
| "grad_norm": 0.44150796530328035, |
| "learning_rate": 7.208492771020176e-05, |
| "loss": 0.3944, |
| "step": 408 |
| }, |
| { |
| "epoch": 1.4201388888888888, |
| "grad_norm": 0.5232356469810064, |
| "learning_rate": 7.202693136870016e-05, |
| "loss": 0.3865, |
| "step": 409 |
| }, |
| { |
| "epoch": 1.4236111111111112, |
| "grad_norm": 0.46523603602882435, |
| "learning_rate": 7.196874683359479e-05, |
| "loss": 0.3989, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.4270833333333333, |
| "grad_norm": 0.30375565178239217, |
| "learning_rate": 7.191037444678407e-05, |
| "loss": 0.4039, |
| "step": 411 |
| }, |
| { |
| "epoch": 1.4305555555555556, |
| "grad_norm": 0.4400233462298016, |
| "learning_rate": 7.185181455127023e-05, |
| "loss": 0.3908, |
| "step": 412 |
| }, |
| { |
| "epoch": 1.4340277777777777, |
| "grad_norm": 0.5419376249064612, |
| "learning_rate": 7.179306749115739e-05, |
| "loss": 0.3961, |
| "step": 413 |
| }, |
| { |
| "epoch": 1.4375, |
| "grad_norm": 0.5049517808866749, |
| "learning_rate": 7.173413361164941e-05, |
| "loss": 0.39, |
| "step": 414 |
| }, |
| { |
| "epoch": 1.4409722222222223, |
| "grad_norm": 0.5615540577656827, |
| "learning_rate": 7.167501325904795e-05, |
| "loss": 0.3977, |
| "step": 415 |
| }, |
| { |
| "epoch": 1.4444444444444444, |
| "grad_norm": 0.677673898490289, |
| "learning_rate": 7.161570678075038e-05, |
| "loss": 0.3941, |
| "step": 416 |
| }, |
| { |
| "epoch": 1.4479166666666667, |
| "grad_norm": 0.7457041655072583, |
| "learning_rate": 7.155621452524779e-05, |
| "loss": 0.3982, |
| "step": 417 |
| }, |
| { |
| "epoch": 1.4513888888888888, |
| "grad_norm": 0.7907637822261093, |
| "learning_rate": 7.14965368421229e-05, |
| "loss": 0.4062, |
| "step": 418 |
| }, |
| { |
| "epoch": 1.4548611111111112, |
| "grad_norm": 0.8775535799179068, |
| "learning_rate": 7.143667408204803e-05, |
| "loss": 0.4041, |
| "step": 419 |
| }, |
| { |
| "epoch": 1.4583333333333333, |
| "grad_norm": 0.8830905484351036, |
| "learning_rate": 7.137662659678303e-05, |
| "loss": 0.398, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.4618055555555556, |
| "grad_norm": 0.64842475071273, |
| "learning_rate": 7.131639473917321e-05, |
| "loss": 0.3998, |
| "step": 421 |
| }, |
| { |
| "epoch": 1.4652777777777777, |
| "grad_norm": 0.44157655714059424, |
| "learning_rate": 7.12559788631473e-05, |
| "loss": 0.3943, |
| "step": 422 |
| }, |
| { |
| "epoch": 1.46875, |
| "grad_norm": 0.40931880728742037, |
| "learning_rate": 7.119537932371527e-05, |
| "loss": 0.3975, |
| "step": 423 |
| }, |
| { |
| "epoch": 1.4722222222222223, |
| "grad_norm": 0.4820032334826367, |
| "learning_rate": 7.113459647696641e-05, |
| "loss": 0.3935, |
| "step": 424 |
| }, |
| { |
| "epoch": 1.4756944444444444, |
| "grad_norm": 0.588832851916906, |
| "learning_rate": 7.107363068006706e-05, |
| "loss": 0.3961, |
| "step": 425 |
| }, |
| { |
| "epoch": 1.4791666666666667, |
| "grad_norm": 0.5425066819059217, |
| "learning_rate": 7.101248229125864e-05, |
| "loss": 0.398, |
| "step": 426 |
| }, |
| { |
| "epoch": 1.4826388888888888, |
| "grad_norm": 0.3955679513420198, |
| "learning_rate": 7.09511516698555e-05, |
| "loss": 0.3954, |
| "step": 427 |
| }, |
| { |
| "epoch": 1.4861111111111112, |
| "grad_norm": 0.3169987014319606, |
| "learning_rate": 7.088963917624277e-05, |
| "loss": 0.397, |
| "step": 428 |
| }, |
| { |
| "epoch": 1.4895833333333333, |
| "grad_norm": 0.44741101040643333, |
| "learning_rate": 7.082794517187432e-05, |
| "loss": 0.3914, |
| "step": 429 |
| }, |
| { |
| "epoch": 1.4930555555555556, |
| "grad_norm": 0.5227023640698025, |
| "learning_rate": 7.076607001927061e-05, |
| "loss": 0.3916, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.4965277777777777, |
| "grad_norm": 0.4360893533255743, |
| "learning_rate": 7.070401408201647e-05, |
| "loss": 0.3986, |
| "step": 431 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 0.3479497802804189, |
| "learning_rate": 7.064177772475912e-05, |
| "loss": 0.3958, |
| "step": 432 |
| }, |
| { |
| "epoch": 1.5034722222222223, |
| "grad_norm": 0.5897209351516681, |
| "learning_rate": 7.057936131320592e-05, |
| "loss": 0.4036, |
| "step": 433 |
| }, |
| { |
| "epoch": 1.5069444444444444, |
| "grad_norm": 0.7047406344920322, |
| "learning_rate": 7.051676521412221e-05, |
| "loss": 0.3949, |
| "step": 434 |
| }, |
| { |
| "epoch": 1.5104166666666665, |
| "grad_norm": 0.5517206030087352, |
| "learning_rate": 7.045398979532925e-05, |
| "loss": 0.4033, |
| "step": 435 |
| }, |
| { |
| "epoch": 1.5138888888888888, |
| "grad_norm": 0.36054079949957824, |
| "learning_rate": 7.039103542570199e-05, |
| "loss": 0.3958, |
| "step": 436 |
| }, |
| { |
| "epoch": 1.5173611111111112, |
| "grad_norm": 0.29130083311637406, |
| "learning_rate": 7.032790247516686e-05, |
| "loss": 0.3968, |
| "step": 437 |
| }, |
| { |
| "epoch": 1.5208333333333335, |
| "grad_norm": 0.39405579534978225, |
| "learning_rate": 7.026459131469972e-05, |
| "loss": 0.4093, |
| "step": 438 |
| }, |
| { |
| "epoch": 1.5243055555555556, |
| "grad_norm": 0.46575221665690475, |
| "learning_rate": 7.020110231632357e-05, |
| "loss": 0.4012, |
| "step": 439 |
| }, |
| { |
| "epoch": 1.5277777777777777, |
| "grad_norm": 0.46461614609662905, |
| "learning_rate": 7.013743585310642e-05, |
| "loss": 0.3967, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.53125, |
| "grad_norm": 0.48150122610101587, |
| "learning_rate": 7.00735922991591e-05, |
| "loss": 0.3993, |
| "step": 441 |
| }, |
| { |
| "epoch": 1.5347222222222223, |
| "grad_norm": 0.4642986163233927, |
| "learning_rate": 7.000957202963298e-05, |
| "loss": 0.3956, |
| "step": 442 |
| }, |
| { |
| "epoch": 1.5381944444444444, |
| "grad_norm": 0.397653915493047, |
| "learning_rate": 6.99453754207179e-05, |
| "loss": 0.3986, |
| "step": 443 |
| }, |
| { |
| "epoch": 1.5416666666666665, |
| "grad_norm": 0.4043417877836226, |
| "learning_rate": 6.988100284963985e-05, |
| "loss": 0.3941, |
| "step": 444 |
| }, |
| { |
| "epoch": 1.5451388888888888, |
| "grad_norm": 0.34751919690711397, |
| "learning_rate": 6.981645469465878e-05, |
| "loss": 0.3957, |
| "step": 445 |
| }, |
| { |
| "epoch": 1.5486111111111112, |
| "grad_norm": 0.45683297371671877, |
| "learning_rate": 6.975173133506646e-05, |
| "loss": 0.3937, |
| "step": 446 |
| }, |
| { |
| "epoch": 1.5520833333333335, |
| "grad_norm": 0.5944612746389557, |
| "learning_rate": 6.968683315118407e-05, |
| "loss": 0.3961, |
| "step": 447 |
| }, |
| { |
| "epoch": 1.5555555555555556, |
| "grad_norm": 0.7165358032510882, |
| "learning_rate": 6.96217605243602e-05, |
| "loss": 0.3976, |
| "step": 448 |
| }, |
| { |
| "epoch": 1.5590277777777777, |
| "grad_norm": 0.8996970961809184, |
| "learning_rate": 6.955651383696836e-05, |
| "loss": 0.3982, |
| "step": 449 |
| }, |
| { |
| "epoch": 1.5625, |
| "grad_norm": 1.228115875802595, |
| "learning_rate": 6.949109347240496e-05, |
| "loss": 0.3969, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.5659722222222223, |
| "grad_norm": 0.7021067773647806, |
| "learning_rate": 6.942549981508691e-05, |
| "loss": 0.3948, |
| "step": 451 |
| }, |
| { |
| "epoch": 1.5694444444444444, |
| "grad_norm": 0.4195458938653693, |
| "learning_rate": 6.935973325044941e-05, |
| "loss": 0.4029, |
| "step": 452 |
| }, |
| { |
| "epoch": 1.5729166666666665, |
| "grad_norm": 0.7064275771918259, |
| "learning_rate": 6.929379416494369e-05, |
| "loss": 0.391, |
| "step": 453 |
| }, |
| { |
| "epoch": 1.5763888888888888, |
| "grad_norm": 1.017357717230314, |
| "learning_rate": 6.92276829460347e-05, |
| "loss": 0.4085, |
| "step": 454 |
| }, |
| { |
| "epoch": 1.5798611111111112, |
| "grad_norm": 0.9563799327415947, |
| "learning_rate": 6.91613999821989e-05, |
| "loss": 0.3951, |
| "step": 455 |
| }, |
| { |
| "epoch": 1.5833333333333335, |
| "grad_norm": 0.7032227970552669, |
| "learning_rate": 6.909494566292195e-05, |
| "loss": 0.3954, |
| "step": 456 |
| }, |
| { |
| "epoch": 1.5868055555555556, |
| "grad_norm": 0.49934071955688775, |
| "learning_rate": 6.902832037869637e-05, |
| "loss": 0.3918, |
| "step": 457 |
| }, |
| { |
| "epoch": 1.5902777777777777, |
| "grad_norm": 0.4664001240844466, |
| "learning_rate": 6.89615245210193e-05, |
| "loss": 0.3938, |
| "step": 458 |
| }, |
| { |
| "epoch": 1.59375, |
| "grad_norm": 0.6434919721382892, |
| "learning_rate": 6.889455848239022e-05, |
| "loss": 0.4072, |
| "step": 459 |
| }, |
| { |
| "epoch": 1.5972222222222223, |
| "grad_norm": 0.7719379494052273, |
| "learning_rate": 6.882742265630859e-05, |
| "loss": 0.3938, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.6006944444444444, |
| "grad_norm": 0.8059082567281859, |
| "learning_rate": 6.876011743727154e-05, |
| "loss": 0.3995, |
| "step": 461 |
| }, |
| { |
| "epoch": 1.6041666666666665, |
| "grad_norm": 0.6320412549731026, |
| "learning_rate": 6.869264322077158e-05, |
| "loss": 0.3908, |
| "step": 462 |
| }, |
| { |
| "epoch": 1.6076388888888888, |
| "grad_norm": 0.4222247764158233, |
| "learning_rate": 6.86250004032943e-05, |
| "loss": 0.3929, |
| "step": 463 |
| }, |
| { |
| "epoch": 1.6111111111111112, |
| "grad_norm": 0.3699988949394749, |
| "learning_rate": 6.855718938231597e-05, |
| "loss": 0.389, |
| "step": 464 |
| }, |
| { |
| "epoch": 1.6145833333333335, |
| "grad_norm": 0.4049406846507113, |
| "learning_rate": 6.848921055630125e-05, |
| "loss": 0.3853, |
| "step": 465 |
| }, |
| { |
| "epoch": 1.6180555555555556, |
| "grad_norm": 0.4872135649150802, |
| "learning_rate": 6.842106432470084e-05, |
| "loss": 0.3966, |
| "step": 466 |
| }, |
| { |
| "epoch": 1.6215277777777777, |
| "grad_norm": 0.5738461208479633, |
| "learning_rate": 6.835275108794915e-05, |
| "loss": 0.4036, |
| "step": 467 |
| }, |
| { |
| "epoch": 1.625, |
| "grad_norm": 0.475084958136699, |
| "learning_rate": 6.828427124746191e-05, |
| "loss": 0.3943, |
| "step": 468 |
| }, |
| { |
| "epoch": 1.6284722222222223, |
| "grad_norm": 0.3344824698389433, |
| "learning_rate": 6.821562520563383e-05, |
| "loss": 0.3929, |
| "step": 469 |
| }, |
| { |
| "epoch": 1.6319444444444444, |
| "grad_norm": 0.34048825094857005, |
| "learning_rate": 6.814681336583624e-05, |
| "loss": 0.3953, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.6354166666666665, |
| "grad_norm": 0.34332483302626543, |
| "learning_rate": 6.807783613241474e-05, |
| "loss": 0.3913, |
| "step": 471 |
| }, |
| { |
| "epoch": 1.6388888888888888, |
| "grad_norm": 0.3722370480435148, |
| "learning_rate": 6.800869391068674e-05, |
| "loss": 0.3966, |
| "step": 472 |
| }, |
| { |
| "epoch": 1.6423611111111112, |
| "grad_norm": 0.3786915178498432, |
| "learning_rate": 6.793938710693922e-05, |
| "loss": 0.3932, |
| "step": 473 |
| }, |
| { |
| "epoch": 1.6458333333333335, |
| "grad_norm": 0.40117165891657924, |
| "learning_rate": 6.786991612842621e-05, |
| "loss": 0.3918, |
| "step": 474 |
| }, |
| { |
| "epoch": 1.6493055555555556, |
| "grad_norm": 0.4047825538421503, |
| "learning_rate": 6.780028138336643e-05, |
| "loss": 0.3931, |
| "step": 475 |
| }, |
| { |
| "epoch": 1.6527777777777777, |
| "grad_norm": 0.4073725341386445, |
| "learning_rate": 6.773048328094097e-05, |
| "loss": 0.3983, |
| "step": 476 |
| }, |
| { |
| "epoch": 1.65625, |
| "grad_norm": 0.4310315011113548, |
| "learning_rate": 6.766052223129079e-05, |
| "loss": 0.392, |
| "step": 477 |
| }, |
| { |
| "epoch": 1.6597222222222223, |
| "grad_norm": 0.38768773081292784, |
| "learning_rate": 6.759039864551431e-05, |
| "loss": 0.3876, |
| "step": 478 |
| }, |
| { |
| "epoch": 1.6631944444444444, |
| "grad_norm": 0.40706945931775057, |
| "learning_rate": 6.752011293566511e-05, |
| "loss": 0.395, |
| "step": 479 |
| }, |
| { |
| "epoch": 1.6666666666666665, |
| "grad_norm": 0.5687554325547544, |
| "learning_rate": 6.744966551474936e-05, |
| "loss": 0.3946, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.6701388888888888, |
| "grad_norm": 0.6755155309069832, |
| "learning_rate": 6.737905679672347e-05, |
| "loss": 0.3853, |
| "step": 481 |
| }, |
| { |
| "epoch": 1.6736111111111112, |
| "grad_norm": 0.5807751535885235, |
| "learning_rate": 6.730828719649171e-05, |
| "loss": 0.3966, |
| "step": 482 |
| }, |
| { |
| "epoch": 1.6770833333333335, |
| "grad_norm": 0.37771332961010123, |
| "learning_rate": 6.723735712990362e-05, |
| "loss": 0.3902, |
| "step": 483 |
| }, |
| { |
| "epoch": 1.6805555555555556, |
| "grad_norm": 0.32353917470642674, |
| "learning_rate": 6.716626701375174e-05, |
| "loss": 0.3902, |
| "step": 484 |
| }, |
| { |
| "epoch": 1.6840277777777777, |
| "grad_norm": 0.3992076255136683, |
| "learning_rate": 6.7095017265769e-05, |
| "loss": 0.3974, |
| "step": 485 |
| }, |
| { |
| "epoch": 1.6875, |
| "grad_norm": 0.48039669200418916, |
| "learning_rate": 6.702360830462642e-05, |
| "loss": 0.3938, |
| "step": 486 |
| }, |
| { |
| "epoch": 1.6909722222222223, |
| "grad_norm": 0.60640709998847, |
| "learning_rate": 6.695204054993051e-05, |
| "loss": 0.397, |
| "step": 487 |
| }, |
| { |
| "epoch": 1.6944444444444444, |
| "grad_norm": 0.7118701736955534, |
| "learning_rate": 6.688031442222091e-05, |
| "loss": 0.3948, |
| "step": 488 |
| }, |
| { |
| "epoch": 1.6979166666666665, |
| "grad_norm": 0.785511616790005, |
| "learning_rate": 6.680843034296785e-05, |
| "loss": 0.3958, |
| "step": 489 |
| }, |
| { |
| "epoch": 1.7013888888888888, |
| "grad_norm": 0.8490039815668733, |
| "learning_rate": 6.67363887345697e-05, |
| "loss": 0.3946, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.7048611111111112, |
| "grad_norm": 0.799012212330246, |
| "learning_rate": 6.666419002035053e-05, |
| "loss": 0.4004, |
| "step": 491 |
| }, |
| { |
| "epoch": 1.7083333333333335, |
| "grad_norm": 0.6451959607357418, |
| "learning_rate": 6.659183462455751e-05, |
| "loss": 0.3934, |
| "step": 492 |
| }, |
| { |
| "epoch": 1.7118055555555556, |
| "grad_norm": 0.4397460593795287, |
| "learning_rate": 6.651932297235858e-05, |
| "loss": 0.3968, |
| "step": 493 |
| }, |
| { |
| "epoch": 1.7152777777777777, |
| "grad_norm": 0.4984518575640306, |
| "learning_rate": 6.644665548983973e-05, |
| "loss": 0.3838, |
| "step": 494 |
| }, |
| { |
| "epoch": 1.71875, |
| "grad_norm": 0.5624968662346395, |
| "learning_rate": 6.637383260400276e-05, |
| "loss": 0.3882, |
| "step": 495 |
| }, |
| { |
| "epoch": 1.7222222222222223, |
| "grad_norm": 0.4976012577824521, |
| "learning_rate": 6.630085474276256e-05, |
| "loss": 0.3876, |
| "step": 496 |
| }, |
| { |
| "epoch": 1.7256944444444444, |
| "grad_norm": 0.40732966615342625, |
| "learning_rate": 6.622772233494467e-05, |
| "loss": 0.3967, |
| "step": 497 |
| }, |
| { |
| "epoch": 1.7291666666666665, |
| "grad_norm": 0.40592882952930137, |
| "learning_rate": 6.615443581028279e-05, |
| "loss": 0.396, |
| "step": 498 |
| }, |
| { |
| "epoch": 1.7326388888888888, |
| "grad_norm": 0.47886476411037715, |
| "learning_rate": 6.608099559941623e-05, |
| "loss": 0.3892, |
| "step": 499 |
| }, |
| { |
| "epoch": 1.7361111111111112, |
| "grad_norm": 0.41129655248344593, |
| "learning_rate": 6.600740213388735e-05, |
| "loss": 0.3837, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.7395833333333335, |
| "grad_norm": 0.3054387826354855, |
| "learning_rate": 6.593365584613906e-05, |
| "loss": 0.3946, |
| "step": 501 |
| }, |
| { |
| "epoch": 1.7430555555555556, |
| "grad_norm": 0.43919149776524113, |
| "learning_rate": 6.585975716951226e-05, |
| "loss": 0.3931, |
| "step": 502 |
| }, |
| { |
| "epoch": 1.7465277777777777, |
| "grad_norm": 0.44650735659448654, |
| "learning_rate": 6.578570653824335e-05, |
| "loss": 0.3967, |
| "step": 503 |
| }, |
| { |
| "epoch": 1.75, |
| "grad_norm": 0.3126097483009025, |
| "learning_rate": 6.571150438746157e-05, |
| "loss": 0.3874, |
| "step": 504 |
| }, |
| { |
| "epoch": 1.7534722222222223, |
| "grad_norm": 0.34139547055278535, |
| "learning_rate": 6.563715115318655e-05, |
| "loss": 0.3958, |
| "step": 505 |
| }, |
| { |
| "epoch": 1.7569444444444444, |
| "grad_norm": 0.4346890170698485, |
| "learning_rate": 6.556264727232567e-05, |
| "loss": 0.3913, |
| "step": 506 |
| }, |
| { |
| "epoch": 1.7604166666666665, |
| "grad_norm": 0.32111684006814456, |
| "learning_rate": 6.548799318267154e-05, |
| "loss": 0.3914, |
| "step": 507 |
| }, |
| { |
| "epoch": 1.7638888888888888, |
| "grad_norm": 0.24993037577302774, |
| "learning_rate": 6.54131893228994e-05, |
| "loss": 0.3903, |
| "step": 508 |
| }, |
| { |
| "epoch": 1.7673611111111112, |
| "grad_norm": 0.4529309860194363, |
| "learning_rate": 6.533823613256461e-05, |
| "loss": 0.3902, |
| "step": 509 |
| }, |
| { |
| "epoch": 1.7708333333333335, |
| "grad_norm": 0.3939925676268099, |
| "learning_rate": 6.526313405209991e-05, |
| "loss": 0.3932, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.7743055555555556, |
| "grad_norm": 0.2977509306937723, |
| "learning_rate": 6.518788352281303e-05, |
| "loss": 0.3883, |
| "step": 511 |
| }, |
| { |
| "epoch": 1.7777777777777777, |
| "grad_norm": 0.3926989264025188, |
| "learning_rate": 6.511248498688396e-05, |
| "loss": 0.3993, |
| "step": 512 |
| }, |
| { |
| "epoch": 1.78125, |
| "grad_norm": 0.4121738758470045, |
| "learning_rate": 6.503693888736238e-05, |
| "loss": 0.3897, |
| "step": 513 |
| }, |
| { |
| "epoch": 1.7847222222222223, |
| "grad_norm": 0.3360981108450817, |
| "learning_rate": 6.49612456681651e-05, |
| "loss": 0.3882, |
| "step": 514 |
| }, |
| { |
| "epoch": 1.7881944444444444, |
| "grad_norm": 0.3938069249933719, |
| "learning_rate": 6.488540577407337e-05, |
| "loss": 0.3901, |
| "step": 515 |
| }, |
| { |
| "epoch": 1.7916666666666665, |
| "grad_norm": 0.46994454215492776, |
| "learning_rate": 6.480941965073041e-05, |
| "loss": 0.39, |
| "step": 516 |
| }, |
| { |
| "epoch": 1.7951388888888888, |
| "grad_norm": 0.5388397889423108, |
| "learning_rate": 6.473328774463861e-05, |
| "loss": 0.3942, |
| "step": 517 |
| }, |
| { |
| "epoch": 1.7986111111111112, |
| "grad_norm": 0.6472064801068331, |
| "learning_rate": 6.465701050315702e-05, |
| "loss": 0.3856, |
| "step": 518 |
| }, |
| { |
| "epoch": 1.8020833333333335, |
| "grad_norm": 0.6669761089856858, |
| "learning_rate": 6.458058837449871e-05, |
| "loss": 0.3987, |
| "step": 519 |
| }, |
| { |
| "epoch": 1.8055555555555556, |
| "grad_norm": 0.7121336419902075, |
| "learning_rate": 6.450402180772811e-05, |
| "loss": 0.3969, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.8090277777777777, |
| "grad_norm": 0.7825620371561279, |
| "learning_rate": 6.44273112527584e-05, |
| "loss": 0.3828, |
| "step": 521 |
| }, |
| { |
| "epoch": 1.8125, |
| "grad_norm": 0.7356857985595523, |
| "learning_rate": 6.435045716034883e-05, |
| "loss": 0.3908, |
| "step": 522 |
| }, |
| { |
| "epoch": 1.8159722222222223, |
| "grad_norm": 0.6187744972361597, |
| "learning_rate": 6.427345998210209e-05, |
| "loss": 0.389, |
| "step": 523 |
| }, |
| { |
| "epoch": 1.8194444444444444, |
| "grad_norm": 0.48962757707999305, |
| "learning_rate": 6.419632017046167e-05, |
| "loss": 0.3879, |
| "step": 524 |
| }, |
| { |
| "epoch": 1.8229166666666665, |
| "grad_norm": 0.38371647053249225, |
| "learning_rate": 6.411903817870919e-05, |
| "loss": 0.3921, |
| "step": 525 |
| }, |
| { |
| "epoch": 1.8263888888888888, |
| "grad_norm": 0.3913261530262924, |
| "learning_rate": 6.404161446096172e-05, |
| "loss": 0.3836, |
| "step": 526 |
| }, |
| { |
| "epoch": 1.8298611111111112, |
| "grad_norm": 0.6113089881845829, |
| "learning_rate": 6.396404947216915e-05, |
| "loss": 0.391, |
| "step": 527 |
| }, |
| { |
| "epoch": 1.8333333333333335, |
| "grad_norm": 0.7838044752642598, |
| "learning_rate": 6.388634366811146e-05, |
| "loss": 0.3936, |
| "step": 528 |
| }, |
| { |
| "epoch": 1.8368055555555556, |
| "grad_norm": 0.7608944646229419, |
| "learning_rate": 6.38084975053961e-05, |
| "loss": 0.3904, |
| "step": 529 |
| }, |
| { |
| "epoch": 1.8402777777777777, |
| "grad_norm": 0.5793469586106695, |
| "learning_rate": 6.37305114414553e-05, |
| "loss": 0.3903, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.84375, |
| "grad_norm": 0.4597220685898171, |
| "learning_rate": 6.365238593454331e-05, |
| "loss": 0.3984, |
| "step": 531 |
| }, |
| { |
| "epoch": 1.8472222222222223, |
| "grad_norm": 0.5211196641640181, |
| "learning_rate": 6.35741214437338e-05, |
| "loss": 0.3915, |
| "step": 532 |
| }, |
| { |
| "epoch": 1.8506944444444444, |
| "grad_norm": 0.5511952598639375, |
| "learning_rate": 6.349571842891713e-05, |
| "loss": 0.4013, |
| "step": 533 |
| }, |
| { |
| "epoch": 1.8541666666666665, |
| "grad_norm": 0.4890246265904626, |
| "learning_rate": 6.341717735079763e-05, |
| "loss": 0.3928, |
| "step": 534 |
| }, |
| { |
| "epoch": 1.8576388888888888, |
| "grad_norm": 0.44583161362467083, |
| "learning_rate": 6.333849867089089e-05, |
| "loss": 0.395, |
| "step": 535 |
| }, |
| { |
| "epoch": 1.8611111111111112, |
| "grad_norm": 0.411850148556461, |
| "learning_rate": 6.325968285152107e-05, |
| "loss": 0.3887, |
| "step": 536 |
| }, |
| { |
| "epoch": 1.8645833333333335, |
| "grad_norm": 0.36467455928426995, |
| "learning_rate": 6.318073035581821e-05, |
| "loss": 0.3927, |
| "step": 537 |
| }, |
| { |
| "epoch": 1.8680555555555556, |
| "grad_norm": 0.4235914855953697, |
| "learning_rate": 6.31016416477154e-05, |
| "loss": 0.3829, |
| "step": 538 |
| }, |
| { |
| "epoch": 1.8715277777777777, |
| "grad_norm": 0.45603956391146694, |
| "learning_rate": 6.302241719194623e-05, |
| "loss": 0.387, |
| "step": 539 |
| }, |
| { |
| "epoch": 1.875, |
| "grad_norm": 0.4345935351579549, |
| "learning_rate": 6.294305745404185e-05, |
| "loss": 0.3921, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.8784722222222223, |
| "grad_norm": 0.43172199957177415, |
| "learning_rate": 6.286356290032842e-05, |
| "loss": 0.3865, |
| "step": 541 |
| }, |
| { |
| "epoch": 1.8819444444444444, |
| "grad_norm": 0.37950386852749723, |
| "learning_rate": 6.278393399792426e-05, |
| "loss": 0.3924, |
| "step": 542 |
| }, |
| { |
| "epoch": 1.8854166666666665, |
| "grad_norm": 0.2996805079524871, |
| "learning_rate": 6.270417121473716e-05, |
| "loss": 0.3868, |
| "step": 543 |
| }, |
| { |
| "epoch": 1.8888888888888888, |
| "grad_norm": 0.3427611051054387, |
| "learning_rate": 6.262427501946155e-05, |
| "loss": 0.3955, |
| "step": 544 |
| }, |
| { |
| "epoch": 1.8923611111111112, |
| "grad_norm": 0.4265987354144226, |
| "learning_rate": 6.254424588157587e-05, |
| "loss": 0.3922, |
| "step": 545 |
| }, |
| { |
| "epoch": 1.8958333333333335, |
| "grad_norm": 0.42877721944052416, |
| "learning_rate": 6.246408427133972e-05, |
| "loss": 0.3952, |
| "step": 546 |
| }, |
| { |
| "epoch": 1.8993055555555556, |
| "grad_norm": 0.36928582871837345, |
| "learning_rate": 6.238379065979111e-05, |
| "loss": 0.3921, |
| "step": 547 |
| }, |
| { |
| "epoch": 1.9027777777777777, |
| "grad_norm": 0.2820191982443896, |
| "learning_rate": 6.230336551874372e-05, |
| "loss": 0.3858, |
| "step": 548 |
| }, |
| { |
| "epoch": 1.90625, |
| "grad_norm": 0.3068943917467818, |
| "learning_rate": 6.22228093207841e-05, |
| "loss": 0.3908, |
| "step": 549 |
| }, |
| { |
| "epoch": 1.9097222222222223, |
| "grad_norm": 0.36438451862287263, |
| "learning_rate": 6.214212253926891e-05, |
| "loss": 0.3903, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.9131944444444444, |
| "grad_norm": 0.3919389997262451, |
| "learning_rate": 6.206130564832211e-05, |
| "loss": 0.3911, |
| "step": 551 |
| }, |
| { |
| "epoch": 1.9166666666666665, |
| "grad_norm": 0.37993753769113087, |
| "learning_rate": 6.198035912283225e-05, |
| "loss": 0.3888, |
| "step": 552 |
| }, |
| { |
| "epoch": 1.9201388888888888, |
| "grad_norm": 0.3167766051429095, |
| "learning_rate": 6.189928343844958e-05, |
| "loss": 0.3888, |
| "step": 553 |
| }, |
| { |
| "epoch": 1.9236111111111112, |
| "grad_norm": 0.3533856097778807, |
| "learning_rate": 6.18180790715833e-05, |
| "loss": 0.3868, |
| "step": 554 |
| }, |
| { |
| "epoch": 1.9270833333333335, |
| "grad_norm": 0.3720825699174947, |
| "learning_rate": 6.17367464993988e-05, |
| "loss": 0.3926, |
| "step": 555 |
| }, |
| { |
| "epoch": 1.9305555555555556, |
| "grad_norm": 0.390372909339937, |
| "learning_rate": 6.165528619981479e-05, |
| "loss": 0.3895, |
| "step": 556 |
| }, |
| { |
| "epoch": 1.9340277777777777, |
| "grad_norm": 0.43884352775151003, |
| "learning_rate": 6.157369865150052e-05, |
| "loss": 0.3932, |
| "step": 557 |
| }, |
| { |
| "epoch": 1.9375, |
| "grad_norm": 0.42288736407700567, |
| "learning_rate": 6.149198433387297e-05, |
| "loss": 0.3958, |
| "step": 558 |
| }, |
| { |
| "epoch": 1.9409722222222223, |
| "grad_norm": 0.40451538645376955, |
| "learning_rate": 6.141014372709402e-05, |
| "loss": 0.3936, |
| "step": 559 |
| }, |
| { |
| "epoch": 1.9444444444444444, |
| "grad_norm": 0.354175463043975, |
| "learning_rate": 6.132817731206766e-05, |
| "loss": 0.3904, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.9479166666666665, |
| "grad_norm": 0.3780580927853469, |
| "learning_rate": 6.124608557043713e-05, |
| "loss": 0.3936, |
| "step": 561 |
| }, |
| { |
| "epoch": 1.9513888888888888, |
| "grad_norm": 0.4013091767144706, |
| "learning_rate": 6.116386898458211e-05, |
| "loss": 0.3908, |
| "step": 562 |
| }, |
| { |
| "epoch": 1.9548611111111112, |
| "grad_norm": 0.3752656113478743, |
| "learning_rate": 6.108152803761585e-05, |
| "loss": 0.388, |
| "step": 563 |
| }, |
| { |
| "epoch": 1.9583333333333335, |
| "grad_norm": 0.42849730360980076, |
| "learning_rate": 6.099906321338241e-05, |
| "loss": 0.3883, |
| "step": 564 |
| }, |
| { |
| "epoch": 1.9618055555555556, |
| "grad_norm": 0.5136107162433327, |
| "learning_rate": 6.091647499645373e-05, |
| "loss": 0.3936, |
| "step": 565 |
| }, |
| { |
| "epoch": 1.9652777777777777, |
| "grad_norm": 0.4401404773608974, |
| "learning_rate": 6.08337638721268e-05, |
| "loss": 0.387, |
| "step": 566 |
| }, |
| { |
| "epoch": 1.96875, |
| "grad_norm": 0.3578780937355148, |
| "learning_rate": 6.075093032642087e-05, |
| "loss": 0.3798, |
| "step": 567 |
| }, |
| { |
| "epoch": 1.9722222222222223, |
| "grad_norm": 0.34936357235449345, |
| "learning_rate": 6.0667974846074524e-05, |
| "loss": 0.3965, |
| "step": 568 |
| }, |
| { |
| "epoch": 1.9756944444444444, |
| "grad_norm": 0.40689796724445154, |
| "learning_rate": 6.058489791854286e-05, |
| "loss": 0.3894, |
| "step": 569 |
| }, |
| { |
| "epoch": 1.9791666666666665, |
| "grad_norm": 0.4956233528878605, |
| "learning_rate": 6.0501700031994613e-05, |
| "loss": 0.3937, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.9826388888888888, |
| "grad_norm": 0.5833995404348259, |
| "learning_rate": 6.041838167530927e-05, |
| "loss": 0.3871, |
| "step": 571 |
| }, |
| { |
| "epoch": 1.9861111111111112, |
| "grad_norm": 0.6022857744397874, |
| "learning_rate": 6.033494333807422e-05, |
| "loss": 0.389, |
| "step": 572 |
| }, |
| { |
| "epoch": 1.9895833333333335, |
| "grad_norm": 0.5608017817744333, |
| "learning_rate": 6.02513855105819e-05, |
| "loss": 0.3983, |
| "step": 573 |
| }, |
| { |
| "epoch": 1.9930555555555556, |
| "grad_norm": 0.47618308572818047, |
| "learning_rate": 6.016770868382683e-05, |
| "loss": 0.3818, |
| "step": 574 |
| }, |
| { |
| "epoch": 1.9965277777777777, |
| "grad_norm": 0.3934864503184335, |
| "learning_rate": 6.008391334950281e-05, |
| "loss": 0.385, |
| "step": 575 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.25691954246846876, |
| "learning_rate": 6.000000000000001e-05, |
| "loss": 0.3698, |
| "step": 576 |
| }, |
| { |
| "epoch": 2.0034722222222223, |
| "grad_norm": 0.30107102968416166, |
| "learning_rate": 5.991596912840207e-05, |
| "loss": 0.3627, |
| "step": 577 |
| }, |
| { |
| "epoch": 2.0069444444444446, |
| "grad_norm": 0.5647121908849111, |
| "learning_rate": 5.983182122848318e-05, |
| "loss": 0.3514, |
| "step": 578 |
| }, |
| { |
| "epoch": 2.0104166666666665, |
| "grad_norm": 0.8511792389980791, |
| "learning_rate": 5.9747556794705213e-05, |
| "loss": 0.3589, |
| "step": 579 |
| }, |
| { |
| "epoch": 2.013888888888889, |
| "grad_norm": 0.940817594887035, |
| "learning_rate": 5.9663176322214826e-05, |
| "loss": 0.3622, |
| "step": 580 |
| }, |
| { |
| "epoch": 2.017361111111111, |
| "grad_norm": 0.7139112695072981, |
| "learning_rate": 5.957868030684051e-05, |
| "loss": 0.361, |
| "step": 581 |
| }, |
| { |
| "epoch": 2.0208333333333335, |
| "grad_norm": 0.4910633370182954, |
| "learning_rate": 5.94940692450897e-05, |
| "loss": 0.3565, |
| "step": 582 |
| }, |
| { |
| "epoch": 2.0243055555555554, |
| "grad_norm": 0.4269351784997279, |
| "learning_rate": 5.940934363414586e-05, |
| "loss": 0.3595, |
| "step": 583 |
| }, |
| { |
| "epoch": 2.0277777777777777, |
| "grad_norm": 0.4872235898220299, |
| "learning_rate": 5.9324503971865545e-05, |
| "loss": 0.3587, |
| "step": 584 |
| }, |
| { |
| "epoch": 2.03125, |
| "grad_norm": 0.5995095615790915, |
| "learning_rate": 5.923955075677551e-05, |
| "loss": 0.3554, |
| "step": 585 |
| }, |
| { |
| "epoch": 2.0347222222222223, |
| "grad_norm": 0.5648401977971076, |
| "learning_rate": 5.9154484488069736e-05, |
| "loss": 0.3581, |
| "step": 586 |
| }, |
| { |
| "epoch": 2.0381944444444446, |
| "grad_norm": 0.4158491724702212, |
| "learning_rate": 5.9069305665606554e-05, |
| "loss": 0.3553, |
| "step": 587 |
| }, |
| { |
| "epoch": 2.0416666666666665, |
| "grad_norm": 0.349690330012685, |
| "learning_rate": 5.8984014789905625e-05, |
| "loss": 0.3578, |
| "step": 588 |
| }, |
| { |
| "epoch": 2.045138888888889, |
| "grad_norm": 0.4802435080315265, |
| "learning_rate": 5.8898612362145066e-05, |
| "loss": 0.3584, |
| "step": 589 |
| }, |
| { |
| "epoch": 2.048611111111111, |
| "grad_norm": 0.48963145307942074, |
| "learning_rate": 5.8813098884158505e-05, |
| "loss": 0.3569, |
| "step": 590 |
| }, |
| { |
| "epoch": 2.0520833333333335, |
| "grad_norm": 0.3526405542275553, |
| "learning_rate": 5.8727474858432085e-05, |
| "loss": 0.358, |
| "step": 591 |
| }, |
| { |
| "epoch": 2.0555555555555554, |
| "grad_norm": 0.33023603489278375, |
| "learning_rate": 5.8641740788101566e-05, |
| "loss": 0.3603, |
| "step": 592 |
| }, |
| { |
| "epoch": 2.0590277777777777, |
| "grad_norm": 0.3840854831858298, |
| "learning_rate": 5.85558971769493e-05, |
| "loss": 0.3487, |
| "step": 593 |
| }, |
| { |
| "epoch": 2.0625, |
| "grad_norm": 0.4107313120312768, |
| "learning_rate": 5.846994452940137e-05, |
| "loss": 0.355, |
| "step": 594 |
| }, |
| { |
| "epoch": 2.0659722222222223, |
| "grad_norm": 0.3145742869476471, |
| "learning_rate": 5.83838833505245e-05, |
| "loss": 0.357, |
| "step": 595 |
| }, |
| { |
| "epoch": 2.0694444444444446, |
| "grad_norm": 0.27860334683052107, |
| "learning_rate": 5.8297714146023236e-05, |
| "loss": 0.351, |
| "step": 596 |
| }, |
| { |
| "epoch": 2.0729166666666665, |
| "grad_norm": 0.4136824169602067, |
| "learning_rate": 5.821143742223682e-05, |
| "loss": 0.3562, |
| "step": 597 |
| }, |
| { |
| "epoch": 2.076388888888889, |
| "grad_norm": 0.3673086857169161, |
| "learning_rate": 5.812505368613633e-05, |
| "loss": 0.3495, |
| "step": 598 |
| }, |
| { |
| "epoch": 2.079861111111111, |
| "grad_norm": 0.2862981526340435, |
| "learning_rate": 5.803856344532166e-05, |
| "loss": 0.3622, |
| "step": 599 |
| }, |
| { |
| "epoch": 2.0833333333333335, |
| "grad_norm": 0.2799086544794607, |
| "learning_rate": 5.79519672080185e-05, |
| "loss": 0.3585, |
| "step": 600 |
| }, |
| { |
| "epoch": 2.0868055555555554, |
| "grad_norm": 0.24475419710964016, |
| "learning_rate": 5.786526548307541e-05, |
| "loss": 0.3514, |
| "step": 601 |
| }, |
| { |
| "epoch": 2.0902777777777777, |
| "grad_norm": 0.2834139430354975, |
| "learning_rate": 5.777845877996085e-05, |
| "loss": 0.3596, |
| "step": 602 |
| }, |
| { |
| "epoch": 2.09375, |
| "grad_norm": 0.32111723120156277, |
| "learning_rate": 5.7691547608760055e-05, |
| "loss": 0.3559, |
| "step": 603 |
| }, |
| { |
| "epoch": 2.0972222222222223, |
| "grad_norm": 0.3194256373082478, |
| "learning_rate": 5.76045324801722e-05, |
| "loss": 0.3523, |
| "step": 604 |
| }, |
| { |
| "epoch": 2.1006944444444446, |
| "grad_norm": 0.3300710025133727, |
| "learning_rate": 5.7517413905507286e-05, |
| "loss": 0.3568, |
| "step": 605 |
| }, |
| { |
| "epoch": 2.1041666666666665, |
| "grad_norm": 0.3503765239910186, |
| "learning_rate": 5.743019239668318e-05, |
| "loss": 0.3537, |
| "step": 606 |
| }, |
| { |
| "epoch": 2.107638888888889, |
| "grad_norm": 0.3676525989023615, |
| "learning_rate": 5.7342868466222616e-05, |
| "loss": 0.3623, |
| "step": 607 |
| }, |
| { |
| "epoch": 2.111111111111111, |
| "grad_norm": 0.6703499129502645, |
| "learning_rate": 5.7255442627250146e-05, |
| "loss": 0.3626, |
| "step": 608 |
| }, |
| { |
| "epoch": 2.1145833333333335, |
| "grad_norm": 0.22185727620726894, |
| "learning_rate": 5.716791539348917e-05, |
| "loss": 0.354, |
| "step": 609 |
| }, |
| { |
| "epoch": 2.1180555555555554, |
| "grad_norm": 0.6429072892056448, |
| "learning_rate": 5.708028727925887e-05, |
| "loss": 0.3572, |
| "step": 610 |
| }, |
| { |
| "epoch": 2.1215277777777777, |
| "grad_norm": 0.3846890307207904, |
| "learning_rate": 5.6992558799471226e-05, |
| "loss": 0.3587, |
| "step": 611 |
| }, |
| { |
| "epoch": 2.125, |
| "grad_norm": 2.143120529808764, |
| "learning_rate": 5.6904730469627985e-05, |
| "loss": 0.375, |
| "step": 612 |
| }, |
| { |
| "epoch": 2.1284722222222223, |
| "grad_norm": 0.414767281586357, |
| "learning_rate": 5.681680280581761e-05, |
| "loss": 0.3679, |
| "step": 613 |
| }, |
| { |
| "epoch": 2.1319444444444446, |
| "grad_norm": 0.7323559863602489, |
| "learning_rate": 5.672877632471226e-05, |
| "loss": 0.3651, |
| "step": 614 |
| }, |
| { |
| "epoch": 2.1354166666666665, |
| "grad_norm": 0.7643817367842332, |
| "learning_rate": 5.664065154356477e-05, |
| "loss": 0.3609, |
| "step": 615 |
| }, |
| { |
| "epoch": 2.138888888888889, |
| "grad_norm": 1.9345025495859447, |
| "learning_rate": 5.6552428980205575e-05, |
| "loss": 0.372, |
| "step": 616 |
| }, |
| { |
| "epoch": 2.142361111111111, |
| "grad_norm": 208.50573500089143, |
| "learning_rate": 5.6464109153039695e-05, |
| "loss": 5.8523, |
| "step": 617 |
| }, |
| { |
| "epoch": 2.1458333333333335, |
| "grad_norm": 24.860622309173138, |
| "learning_rate": 5.6375692581043705e-05, |
| "loss": 0.5587, |
| "step": 618 |
| }, |
| { |
| "epoch": 2.1493055555555554, |
| "grad_norm": 24.26248109338951, |
| "learning_rate": 5.628717978376263e-05, |
| "loss": 0.7174, |
| "step": 619 |
| }, |
| { |
| "epoch": 2.1527777777777777, |
| "grad_norm": 6.523767538235247, |
| "learning_rate": 5.619857128130695e-05, |
| "loss": 0.4476, |
| "step": 620 |
| }, |
| { |
| "epoch": 2.15625, |
| "grad_norm": 6.2813772578997416, |
| "learning_rate": 5.61098675943495e-05, |
| "loss": 0.3991, |
| "step": 621 |
| }, |
| { |
| "epoch": 2.1597222222222223, |
| "grad_norm": 0.7292368990201091, |
| "learning_rate": 5.602106924412243e-05, |
| "loss": 0.3903, |
| "step": 622 |
| }, |
| { |
| "epoch": 2.1631944444444446, |
| "grad_norm": 1.4353569142671059, |
| "learning_rate": 5.5932176752414163e-05, |
| "loss": 0.3951, |
| "step": 623 |
| }, |
| { |
| "epoch": 2.1666666666666665, |
| "grad_norm": 0.7524898691540998, |
| "learning_rate": 5.584319064156628e-05, |
| "loss": 0.3791, |
| "step": 624 |
| }, |
| { |
| "epoch": 2.170138888888889, |
| "grad_norm": 0.7596150008455383, |
| "learning_rate": 5.57541114344705e-05, |
| "loss": 0.3742, |
| "step": 625 |
| }, |
| { |
| "epoch": 2.173611111111111, |
| "grad_norm": 0.6870434643700057, |
| "learning_rate": 5.566493965456557e-05, |
| "loss": 0.3786, |
| "step": 626 |
| }, |
| { |
| "epoch": 2.1770833333333335, |
| "grad_norm": 0.4825491333810975, |
| "learning_rate": 5.5575675825834215e-05, |
| "loss": 0.3746, |
| "step": 627 |
| }, |
| { |
| "epoch": 2.1805555555555554, |
| "grad_norm": 0.9393542952103341, |
| "learning_rate": 5.548632047280003e-05, |
| "loss": 0.3761, |
| "step": 628 |
| }, |
| { |
| "epoch": 2.1840277777777777, |
| "grad_norm": 0.6786528943222451, |
| "learning_rate": 5.539687412052445e-05, |
| "loss": 0.3707, |
| "step": 629 |
| }, |
| { |
| "epoch": 2.1875, |
| "grad_norm": 4.3304441633601884, |
| "learning_rate": 5.5307337294603595e-05, |
| "loss": 0.3928, |
| "step": 630 |
| }, |
| { |
| "epoch": 2.1909722222222223, |
| "grad_norm": 2.894806364255019, |
| "learning_rate": 5.521771052116524e-05, |
| "loss": 0.4186, |
| "step": 631 |
| }, |
| { |
| "epoch": 2.1944444444444446, |
| "grad_norm": 0.8736036582533201, |
| "learning_rate": 5.5127994326865706e-05, |
| "loss": 0.3829, |
| "step": 632 |
| }, |
| { |
| "epoch": 2.1979166666666665, |
| "grad_norm": 1.0402302831246584, |
| "learning_rate": 5.5038189238886724e-05, |
| "loss": 0.3917, |
| "step": 633 |
| }, |
| { |
| "epoch": 2.201388888888889, |
| "grad_norm": 1.0251763725005574, |
| "learning_rate": 5.4948295784932425e-05, |
| "loss": 0.384, |
| "step": 634 |
| }, |
| { |
| "epoch": 2.204861111111111, |
| "grad_norm": 0.8468595986592679, |
| "learning_rate": 5.485831449322614e-05, |
| "loss": 0.3717, |
| "step": 635 |
| }, |
| { |
| "epoch": 2.2083333333333335, |
| "grad_norm": 0.9633419107531916, |
| "learning_rate": 5.476824589250738e-05, |
| "loss": 0.3841, |
| "step": 636 |
| }, |
| { |
| "epoch": 2.2118055555555554, |
| "grad_norm": 0.6494993837379418, |
| "learning_rate": 5.467809051202867e-05, |
| "loss": 0.3765, |
| "step": 637 |
| }, |
| { |
| "epoch": 2.2152777777777777, |
| "grad_norm": 0.6328352776053527, |
| "learning_rate": 5.458784888155248e-05, |
| "loss": 0.3715, |
| "step": 638 |
| }, |
| { |
| "epoch": 2.21875, |
| "grad_norm": 0.5569738869215616, |
| "learning_rate": 5.4497521531348066e-05, |
| "loss": 0.3727, |
| "step": 639 |
| }, |
| { |
| "epoch": 2.2222222222222223, |
| "grad_norm": 0.511170034380733, |
| "learning_rate": 5.440710899218842e-05, |
| "loss": 0.3705, |
| "step": 640 |
| }, |
| { |
| "epoch": 2.2256944444444446, |
| "grad_norm": 0.5625035626615582, |
| "learning_rate": 5.431661179534708e-05, |
| "loss": 0.3672, |
| "step": 641 |
| }, |
| { |
| "epoch": 2.2291666666666665, |
| "grad_norm": 0.5466352203355245, |
| "learning_rate": 5.4226030472595075e-05, |
| "loss": 0.3777, |
| "step": 642 |
| }, |
| { |
| "epoch": 2.232638888888889, |
| "grad_norm": 0.5202495848597224, |
| "learning_rate": 5.4135365556197715e-05, |
| "loss": 0.364, |
| "step": 643 |
| }, |
| { |
| "epoch": 2.236111111111111, |
| "grad_norm": 0.48315072083496347, |
| "learning_rate": 5.404461757891156e-05, |
| "loss": 0.3621, |
| "step": 644 |
| }, |
| { |
| "epoch": 2.2395833333333335, |
| "grad_norm": 0.43131695889837246, |
| "learning_rate": 5.3953787073981236e-05, |
| "loss": 0.3691, |
| "step": 645 |
| }, |
| { |
| "epoch": 2.2430555555555554, |
| "grad_norm": 0.4189369002593043, |
| "learning_rate": 5.3862874575136304e-05, |
| "loss": 0.3623, |
| "step": 646 |
| }, |
| { |
| "epoch": 2.2465277777777777, |
| "grad_norm": 0.3745410821868568, |
| "learning_rate": 5.377188061658814e-05, |
| "loss": 0.3619, |
| "step": 647 |
| }, |
| { |
| "epoch": 2.25, |
| "grad_norm": 0.3409984577353413, |
| "learning_rate": 5.368080573302676e-05, |
| "loss": 0.3711, |
| "step": 648 |
| }, |
| { |
| "epoch": 2.2534722222222223, |
| "grad_norm": 0.37503860120051213, |
| "learning_rate": 5.358965045961772e-05, |
| "loss": 0.3616, |
| "step": 649 |
| }, |
| { |
| "epoch": 2.2569444444444446, |
| "grad_norm": 0.3382453595399695, |
| "learning_rate": 5.3498415331998965e-05, |
| "loss": 0.3716, |
| "step": 650 |
| }, |
| { |
| "epoch": 2.2604166666666665, |
| "grad_norm": 0.2988789618607428, |
| "learning_rate": 5.340710088627766e-05, |
| "loss": 0.3653, |
| "step": 651 |
| }, |
| { |
| "epoch": 2.263888888888889, |
| "grad_norm": 0.3876803657220898, |
| "learning_rate": 5.331570765902706e-05, |
| "loss": 0.3646, |
| "step": 652 |
| }, |
| { |
| "epoch": 2.267361111111111, |
| "grad_norm": 0.2482002164430231, |
| "learning_rate": 5.3224236187283345e-05, |
| "loss": 0.3588, |
| "step": 653 |
| }, |
| { |
| "epoch": 2.2708333333333335, |
| "grad_norm": 0.3492377558634399, |
| "learning_rate": 5.3132687008542454e-05, |
| "loss": 0.3674, |
| "step": 654 |
| }, |
| { |
| "epoch": 2.2743055555555554, |
| "grad_norm": 0.3057479810242644, |
| "learning_rate": 5.304106066075694e-05, |
| "loss": 0.3667, |
| "step": 655 |
| }, |
| { |
| "epoch": 2.2777777777777777, |
| "grad_norm": 0.28671680767187063, |
| "learning_rate": 5.294935768233285e-05, |
| "loss": 0.365, |
| "step": 656 |
| }, |
| { |
| "epoch": 2.28125, |
| "grad_norm": 0.22327813987047312, |
| "learning_rate": 5.2857578612126466e-05, |
| "loss": 0.359, |
| "step": 657 |
| }, |
| { |
| "epoch": 2.2847222222222223, |
| "grad_norm": 0.26865980718906646, |
| "learning_rate": 5.276572398944124e-05, |
| "loss": 0.3556, |
| "step": 658 |
| }, |
| { |
| "epoch": 2.2881944444444446, |
| "grad_norm": 0.2333779225620715, |
| "learning_rate": 5.267379435402455e-05, |
| "loss": 0.3574, |
| "step": 659 |
| }, |
| { |
| "epoch": 2.2916666666666665, |
| "grad_norm": 0.24382283579760292, |
| "learning_rate": 5.258179024606455e-05, |
| "loss": 0.3589, |
| "step": 660 |
| }, |
| { |
| "epoch": 2.295138888888889, |
| "grad_norm": 0.261824698068253, |
| "learning_rate": 5.2489712206187036e-05, |
| "loss": 0.3642, |
| "step": 661 |
| }, |
| { |
| "epoch": 2.298611111111111, |
| "grad_norm": 0.24569982834386714, |
| "learning_rate": 5.239756077545221e-05, |
| "loss": 0.3588, |
| "step": 662 |
| }, |
| { |
| "epoch": 2.3020833333333335, |
| "grad_norm": 0.29187895293715893, |
| "learning_rate": 5.2305336495351536e-05, |
| "loss": 0.3602, |
| "step": 663 |
| }, |
| { |
| "epoch": 2.3055555555555554, |
| "grad_norm": 0.2339347191042144, |
| "learning_rate": 5.2213039907804535e-05, |
| "loss": 0.3633, |
| "step": 664 |
| }, |
| { |
| "epoch": 2.3090277777777777, |
| "grad_norm": 0.22979503433977172, |
| "learning_rate": 5.212067155515563e-05, |
| "loss": 0.3606, |
| "step": 665 |
| }, |
| { |
| "epoch": 2.3125, |
| "grad_norm": 0.2044651546517708, |
| "learning_rate": 5.202823198017092e-05, |
| "loss": 0.3642, |
| "step": 666 |
| }, |
| { |
| "epoch": 2.3159722222222223, |
| "grad_norm": 0.21390953062575657, |
| "learning_rate": 5.1935721726035066e-05, |
| "loss": 0.3615, |
| "step": 667 |
| }, |
| { |
| "epoch": 2.3194444444444446, |
| "grad_norm": 0.21587882165366537, |
| "learning_rate": 5.1843141336348e-05, |
| "loss": 0.3563, |
| "step": 668 |
| }, |
| { |
| "epoch": 2.3229166666666665, |
| "grad_norm": 0.23130846400906935, |
| "learning_rate": 5.1750491355121776e-05, |
| "loss": 0.3621, |
| "step": 669 |
| }, |
| { |
| "epoch": 2.326388888888889, |
| "grad_norm": 0.20361212130904563, |
| "learning_rate": 5.165777232677741e-05, |
| "loss": 0.3616, |
| "step": 670 |
| }, |
| { |
| "epoch": 2.329861111111111, |
| "grad_norm": 0.21069360029668197, |
| "learning_rate": 5.15649847961416e-05, |
| "loss": 0.3593, |
| "step": 671 |
| }, |
| { |
| "epoch": 2.3333333333333335, |
| "grad_norm": 0.21641477786022795, |
| "learning_rate": 5.1472129308443616e-05, |
| "loss": 0.3577, |
| "step": 672 |
| }, |
| { |
| "epoch": 2.3368055555555554, |
| "grad_norm": 0.20207920681809247, |
| "learning_rate": 5.137920640931203e-05, |
| "loss": 0.3624, |
| "step": 673 |
| }, |
| { |
| "epoch": 2.3402777777777777, |
| "grad_norm": 0.2176212869974964, |
| "learning_rate": 5.1286216644771516e-05, |
| "loss": 0.3615, |
| "step": 674 |
| }, |
| { |
| "epoch": 2.34375, |
| "grad_norm": 0.22129145443500614, |
| "learning_rate": 5.1193160561239694e-05, |
| "loss": 0.3576, |
| "step": 675 |
| }, |
| { |
| "epoch": 2.3472222222222223, |
| "grad_norm": 0.2466102629786633, |
| "learning_rate": 5.1100038705523834e-05, |
| "loss": 0.3574, |
| "step": 676 |
| }, |
| { |
| "epoch": 2.3506944444444446, |
| "grad_norm": 0.21074133497030534, |
| "learning_rate": 5.100685162481774e-05, |
| "loss": 0.3587, |
| "step": 677 |
| }, |
| { |
| "epoch": 2.3541666666666665, |
| "grad_norm": 0.23866014599102006, |
| "learning_rate": 5.091359986669845e-05, |
| "loss": 0.3643, |
| "step": 678 |
| }, |
| { |
| "epoch": 2.357638888888889, |
| "grad_norm": 0.3002294853456305, |
| "learning_rate": 5.082028397912305e-05, |
| "loss": 0.3558, |
| "step": 679 |
| }, |
| { |
| "epoch": 2.361111111111111, |
| "grad_norm": 0.30099280117716753, |
| "learning_rate": 5.07269045104255e-05, |
| "loss": 0.3547, |
| "step": 680 |
| }, |
| { |
| "epoch": 2.3645833333333335, |
| "grad_norm": 0.2606514046962765, |
| "learning_rate": 5.0633462009313315e-05, |
| "loss": 0.3607, |
| "step": 681 |
| }, |
| { |
| "epoch": 2.3680555555555554, |
| "grad_norm": 0.2675783212789683, |
| "learning_rate": 5.053995702486443e-05, |
| "loss": 0.3639, |
| "step": 682 |
| }, |
| { |
| "epoch": 2.3715277777777777, |
| "grad_norm": 0.24318875517158728, |
| "learning_rate": 5.044639010652393e-05, |
| "loss": 0.359, |
| "step": 683 |
| }, |
| { |
| "epoch": 2.375, |
| "grad_norm": 0.23002381127619823, |
| "learning_rate": 5.0352761804100835e-05, |
| "loss": 0.3617, |
| "step": 684 |
| }, |
| { |
| "epoch": 2.3784722222222223, |
| "grad_norm": 0.2118164025124787, |
| "learning_rate": 5.025907266776484e-05, |
| "loss": 0.3556, |
| "step": 685 |
| }, |
| { |
| "epoch": 2.3819444444444446, |
| "grad_norm": 0.22285424218251762, |
| "learning_rate": 5.0165323248043145e-05, |
| "loss": 0.3538, |
| "step": 686 |
| }, |
| { |
| "epoch": 2.3854166666666665, |
| "grad_norm": 0.2387153365397832, |
| "learning_rate": 5.007151409581715e-05, |
| "loss": 0.3592, |
| "step": 687 |
| }, |
| { |
| "epoch": 2.388888888888889, |
| "grad_norm": 0.2301897194015837, |
| "learning_rate": 4.9977645762319255e-05, |
| "loss": 0.3563, |
| "step": 688 |
| }, |
| { |
| "epoch": 2.392361111111111, |
| "grad_norm": 0.28292987282319554, |
| "learning_rate": 4.988371879912964e-05, |
| "loss": 0.3686, |
| "step": 689 |
| }, |
| { |
| "epoch": 2.3958333333333335, |
| "grad_norm": 0.2924629331701138, |
| "learning_rate": 4.9789733758172956e-05, |
| "loss": 0.3659, |
| "step": 690 |
| }, |
| { |
| "epoch": 2.3993055555555554, |
| "grad_norm": 0.22966813193968594, |
| "learning_rate": 4.9695691191715175e-05, |
| "loss": 0.3652, |
| "step": 691 |
| }, |
| { |
| "epoch": 2.4027777777777777, |
| "grad_norm": 0.1887146801357064, |
| "learning_rate": 4.9601591652360244e-05, |
| "loss": 0.3586, |
| "step": 692 |
| }, |
| { |
| "epoch": 2.40625, |
| "grad_norm": 0.1857337748310565, |
| "learning_rate": 4.950743569304693e-05, |
| "loss": 0.3614, |
| "step": 693 |
| }, |
| { |
| "epoch": 2.4097222222222223, |
| "grad_norm": 0.2001722223098041, |
| "learning_rate": 4.941322386704551e-05, |
| "loss": 0.3551, |
| "step": 694 |
| }, |
| { |
| "epoch": 2.4131944444444446, |
| "grad_norm": 0.21840341629402213, |
| "learning_rate": 4.931895672795454e-05, |
| "loss": 0.3535, |
| "step": 695 |
| }, |
| { |
| "epoch": 2.4166666666666665, |
| "grad_norm": 0.2069911607036155, |
| "learning_rate": 4.922463482969761e-05, |
| "loss": 0.3562, |
| "step": 696 |
| }, |
| { |
| "epoch": 2.420138888888889, |
| "grad_norm": 0.16249095809217645, |
| "learning_rate": 4.913025872652007e-05, |
| "loss": 0.3632, |
| "step": 697 |
| }, |
| { |
| "epoch": 2.423611111111111, |
| "grad_norm": 0.19258216947616108, |
| "learning_rate": 4.903582897298579e-05, |
| "loss": 0.357, |
| "step": 698 |
| }, |
| { |
| "epoch": 2.4270833333333335, |
| "grad_norm": 0.17488328139804288, |
| "learning_rate": 4.89413461239739e-05, |
| "loss": 0.3633, |
| "step": 699 |
| }, |
| { |
| "epoch": 2.4305555555555554, |
| "grad_norm": 0.21230424394190295, |
| "learning_rate": 4.884681073467551e-05, |
| "loss": 0.3622, |
| "step": 700 |
| }, |
| { |
| "epoch": 2.4340277777777777, |
| "grad_norm": 0.2054127351550878, |
| "learning_rate": 4.8752223360590484e-05, |
| "loss": 0.3609, |
| "step": 701 |
| }, |
| { |
| "epoch": 2.4375, |
| "grad_norm": 0.16416468185173924, |
| "learning_rate": 4.8657584557524116e-05, |
| "loss": 0.3579, |
| "step": 702 |
| }, |
| { |
| "epoch": 2.4409722222222223, |
| "grad_norm": 0.20905287225635077, |
| "learning_rate": 4.8562894881583956e-05, |
| "loss": 0.3629, |
| "step": 703 |
| }, |
| { |
| "epoch": 2.4444444444444446, |
| "grad_norm": 0.18925844032734393, |
| "learning_rate": 4.846815488917644e-05, |
| "loss": 0.3626, |
| "step": 704 |
| }, |
| { |
| "epoch": 2.4479166666666665, |
| "grad_norm": 0.23302638899554443, |
| "learning_rate": 4.837336513700369e-05, |
| "loss": 0.3603, |
| "step": 705 |
| }, |
| { |
| "epoch": 2.451388888888889, |
| "grad_norm": 0.21386687962480064, |
| "learning_rate": 4.8278526182060225e-05, |
| "loss": 0.3573, |
| "step": 706 |
| }, |
| { |
| "epoch": 2.454861111111111, |
| "grad_norm": 0.16096788495870992, |
| "learning_rate": 4.8183638581629676e-05, |
| "loss": 0.3597, |
| "step": 707 |
| }, |
| { |
| "epoch": 2.4583333333333335, |
| "grad_norm": 0.17374476347622608, |
| "learning_rate": 4.808870289328153e-05, |
| "loss": 0.3616, |
| "step": 708 |
| }, |
| { |
| "epoch": 2.4618055555555554, |
| "grad_norm": 0.21830910332147066, |
| "learning_rate": 4.7993719674867815e-05, |
| "loss": 0.3558, |
| "step": 709 |
| }, |
| { |
| "epoch": 2.4652777777777777, |
| "grad_norm": 0.18240980841394056, |
| "learning_rate": 4.789868948451991e-05, |
| "loss": 0.3591, |
| "step": 710 |
| }, |
| { |
| "epoch": 2.46875, |
| "grad_norm": 0.21816236250652732, |
| "learning_rate": 4.780361288064514e-05, |
| "loss": 0.3604, |
| "step": 711 |
| }, |
| { |
| "epoch": 2.4722222222222223, |
| "grad_norm": 0.2728408320055425, |
| "learning_rate": 4.7708490421923596e-05, |
| "loss": 0.3586, |
| "step": 712 |
| }, |
| { |
| "epoch": 2.4756944444444446, |
| "grad_norm": 0.24429645460919563, |
| "learning_rate": 4.761332266730481e-05, |
| "loss": 0.3523, |
| "step": 713 |
| }, |
| { |
| "epoch": 2.4791666666666665, |
| "grad_norm": 0.19689609824801885, |
| "learning_rate": 4.751811017600448e-05, |
| "loss": 0.3606, |
| "step": 714 |
| }, |
| { |
| "epoch": 2.482638888888889, |
| "grad_norm": 0.273566755423662, |
| "learning_rate": 4.742285350750118e-05, |
| "loss": 0.3554, |
| "step": 715 |
| }, |
| { |
| "epoch": 2.486111111111111, |
| "grad_norm": 0.32623914313060043, |
| "learning_rate": 4.7327553221533074e-05, |
| "loss": 0.357, |
| "step": 716 |
| }, |
| { |
| "epoch": 2.4895833333333335, |
| "grad_norm": 0.2830951878660179, |
| "learning_rate": 4.723220987809462e-05, |
| "loss": 0.3578, |
| "step": 717 |
| }, |
| { |
| "epoch": 2.4930555555555554, |
| "grad_norm": 0.2565496381802557, |
| "learning_rate": 4.713682403743329e-05, |
| "loss": 0.3604, |
| "step": 718 |
| }, |
| { |
| "epoch": 2.4965277777777777, |
| "grad_norm": 0.21894746366691853, |
| "learning_rate": 4.7041396260046286e-05, |
| "loss": 0.3641, |
| "step": 719 |
| }, |
| { |
| "epoch": 2.5, |
| "grad_norm": 0.19901998551130898, |
| "learning_rate": 4.694592710667723e-05, |
| "loss": 0.3582, |
| "step": 720 |
| }, |
| { |
| "epoch": 2.5034722222222223, |
| "grad_norm": 0.24837568226290876, |
| "learning_rate": 4.6850417138312845e-05, |
| "loss": 0.3505, |
| "step": 721 |
| }, |
| { |
| "epoch": 2.5069444444444446, |
| "grad_norm": 0.3313870249246507, |
| "learning_rate": 4.6754866916179725e-05, |
| "loss": 0.3582, |
| "step": 722 |
| }, |
| { |
| "epoch": 2.5104166666666665, |
| "grad_norm": 0.2244873842332084, |
| "learning_rate": 4.6659277001740984e-05, |
| "loss": 0.3573, |
| "step": 723 |
| }, |
| { |
| "epoch": 2.513888888888889, |
| "grad_norm": 0.19767791466423057, |
| "learning_rate": 4.656364795669297e-05, |
| "loss": 0.36, |
| "step": 724 |
| }, |
| { |
| "epoch": 2.517361111111111, |
| "grad_norm": 0.28843808426003764, |
| "learning_rate": 4.646798034296197e-05, |
| "loss": 0.3604, |
| "step": 725 |
| }, |
| { |
| "epoch": 2.5208333333333335, |
| "grad_norm": 0.2796222422579987, |
| "learning_rate": 4.637227472270091e-05, |
| "loss": 0.3605, |
| "step": 726 |
| }, |
| { |
| "epoch": 2.5243055555555554, |
| "grad_norm": 0.2367371209993064, |
| "learning_rate": 4.6276531658286036e-05, |
| "loss": 0.3589, |
| "step": 727 |
| }, |
| { |
| "epoch": 2.5277777777777777, |
| "grad_norm": 0.20008216456325678, |
| "learning_rate": 4.618075171231363e-05, |
| "loss": 0.3571, |
| "step": 728 |
| }, |
| { |
| "epoch": 2.53125, |
| "grad_norm": 0.18250753943724574, |
| "learning_rate": 4.608493544759667e-05, |
| "loss": 0.3595, |
| "step": 729 |
| }, |
| { |
| "epoch": 2.5347222222222223, |
| "grad_norm": 0.22848019667076963, |
| "learning_rate": 4.59890834271616e-05, |
| "loss": 0.3599, |
| "step": 730 |
| }, |
| { |
| "epoch": 2.5381944444444446, |
| "grad_norm": 0.267718829441734, |
| "learning_rate": 4.589319621424489e-05, |
| "loss": 0.3612, |
| "step": 731 |
| }, |
| { |
| "epoch": 2.5416666666666665, |
| "grad_norm": 0.33157306810932696, |
| "learning_rate": 4.579727437228987e-05, |
| "loss": 0.3597, |
| "step": 732 |
| }, |
| { |
| "epoch": 2.545138888888889, |
| "grad_norm": 0.3143344523876356, |
| "learning_rate": 4.570131846494334e-05, |
| "loss": 0.3571, |
| "step": 733 |
| }, |
| { |
| "epoch": 2.548611111111111, |
| "grad_norm": 0.20354881157325236, |
| "learning_rate": 4.560532905605225e-05, |
| "loss": 0.3589, |
| "step": 734 |
| }, |
| { |
| "epoch": 2.5520833333333335, |
| "grad_norm": 0.22541333864731933, |
| "learning_rate": 4.550930670966043e-05, |
| "loss": 0.3579, |
| "step": 735 |
| }, |
| { |
| "epoch": 2.5555555555555554, |
| "grad_norm": 0.3280950019921769, |
| "learning_rate": 4.541325199000525e-05, |
| "loss": 0.3516, |
| "step": 736 |
| }, |
| { |
| "epoch": 2.5590277777777777, |
| "grad_norm": 0.24024319950783074, |
| "learning_rate": 4.5317165461514295e-05, |
| "loss": 0.3656, |
| "step": 737 |
| }, |
| { |
| "epoch": 2.5625, |
| "grad_norm": 0.1660705941990726, |
| "learning_rate": 4.522104768880208e-05, |
| "loss": 0.3584, |
| "step": 738 |
| }, |
| { |
| "epoch": 2.5659722222222223, |
| "grad_norm": 0.1888722193381791, |
| "learning_rate": 4.5124899236666694e-05, |
| "loss": 0.3646, |
| "step": 739 |
| }, |
| { |
| "epoch": 2.5694444444444446, |
| "grad_norm": 0.22749799010625654, |
| "learning_rate": 4.502872067008652e-05, |
| "loss": 0.354, |
| "step": 740 |
| }, |
| { |
| "epoch": 2.5729166666666665, |
| "grad_norm": 0.21243399731512363, |
| "learning_rate": 4.4932512554216886e-05, |
| "loss": 0.3602, |
| "step": 741 |
| }, |
| { |
| "epoch": 2.576388888888889, |
| "grad_norm": 0.17510977970439304, |
| "learning_rate": 4.483627545438678e-05, |
| "loss": 0.3607, |
| "step": 742 |
| }, |
| { |
| "epoch": 2.579861111111111, |
| "grad_norm": 0.20554150424391404, |
| "learning_rate": 4.4740009936095466e-05, |
| "loss": 0.3611, |
| "step": 743 |
| }, |
| { |
| "epoch": 2.5833333333333335, |
| "grad_norm": 0.20942087652236643, |
| "learning_rate": 4.464371656500921e-05, |
| "loss": 0.362, |
| "step": 744 |
| }, |
| { |
| "epoch": 2.5868055555555554, |
| "grad_norm": 0.22710097125473938, |
| "learning_rate": 4.4547395906957966e-05, |
| "loss": 0.3499, |
| "step": 745 |
| }, |
| { |
| "epoch": 2.5902777777777777, |
| "grad_norm": 0.21288271383011229, |
| "learning_rate": 4.4451048527932e-05, |
| "loss": 0.3626, |
| "step": 746 |
| }, |
| { |
| "epoch": 2.59375, |
| "grad_norm": 0.24824074926385184, |
| "learning_rate": 4.4354674994078585e-05, |
| "loss": 0.3646, |
| "step": 747 |
| }, |
| { |
| "epoch": 2.5972222222222223, |
| "grad_norm": 0.2184701047156981, |
| "learning_rate": 4.425827587169873e-05, |
| "loss": 0.3589, |
| "step": 748 |
| }, |
| { |
| "epoch": 2.6006944444444446, |
| "grad_norm": 0.22929717895377194, |
| "learning_rate": 4.4161851727243766e-05, |
| "loss": 0.3577, |
| "step": 749 |
| }, |
| { |
| "epoch": 2.6041666666666665, |
| "grad_norm": 0.31278505610599755, |
| "learning_rate": 4.406540312731208e-05, |
| "loss": 0.3561, |
| "step": 750 |
| }, |
| { |
| "epoch": 2.607638888888889, |
| "grad_norm": 0.25212944788531505, |
| "learning_rate": 4.396893063864573e-05, |
| "loss": 0.3561, |
| "step": 751 |
| }, |
| { |
| "epoch": 2.611111111111111, |
| "grad_norm": 0.17873710074529314, |
| "learning_rate": 4.387243482812717e-05, |
| "loss": 0.357, |
| "step": 752 |
| }, |
| { |
| "epoch": 2.6145833333333335, |
| "grad_norm": 0.29077615166300086, |
| "learning_rate": 4.37759162627759e-05, |
| "loss": 0.3561, |
| "step": 753 |
| }, |
| { |
| "epoch": 2.6180555555555554, |
| "grad_norm": 0.3467330594972484, |
| "learning_rate": 4.3679375509745104e-05, |
| "loss": 0.3676, |
| "step": 754 |
| }, |
| { |
| "epoch": 2.6215277777777777, |
| "grad_norm": 0.2993517680065959, |
| "learning_rate": 4.358281313631838e-05, |
| "loss": 0.3537, |
| "step": 755 |
| }, |
| { |
| "epoch": 2.625, |
| "grad_norm": 0.15785110489395995, |
| "learning_rate": 4.348622970990634e-05, |
| "loss": 0.3601, |
| "step": 756 |
| }, |
| { |
| "epoch": 2.6284722222222223, |
| "grad_norm": 0.22408309035303686, |
| "learning_rate": 4.338962579804331e-05, |
| "loss": 0.3541, |
| "step": 757 |
| }, |
| { |
| "epoch": 2.6319444444444446, |
| "grad_norm": 0.3382351165187617, |
| "learning_rate": 4.3293001968384e-05, |
| "loss": 0.3584, |
| "step": 758 |
| }, |
| { |
| "epoch": 2.6354166666666665, |
| "grad_norm": 0.279111362806744, |
| "learning_rate": 4.3196358788700164e-05, |
| "loss": 0.3614, |
| "step": 759 |
| }, |
| { |
| "epoch": 2.638888888888889, |
| "grad_norm": 0.17240804345082109, |
| "learning_rate": 4.309969682687724e-05, |
| "loss": 0.3535, |
| "step": 760 |
| }, |
| { |
| "epoch": 2.642361111111111, |
| "grad_norm": 0.20131161960623978, |
| "learning_rate": 4.300301665091105e-05, |
| "loss": 0.3562, |
| "step": 761 |
| }, |
| { |
| "epoch": 2.6458333333333335, |
| "grad_norm": 0.24162909795940918, |
| "learning_rate": 4.290631882890443e-05, |
| "loss": 0.3594, |
| "step": 762 |
| }, |
| { |
| "epoch": 2.6493055555555554, |
| "grad_norm": 0.21997131895223193, |
| "learning_rate": 4.2809603929063906e-05, |
| "loss": 0.3571, |
| "step": 763 |
| }, |
| { |
| "epoch": 2.6527777777777777, |
| "grad_norm": 0.19702297458082826, |
| "learning_rate": 4.271287251969637e-05, |
| "loss": 0.3612, |
| "step": 764 |
| }, |
| { |
| "epoch": 2.65625, |
| "grad_norm": 0.23837037232926317, |
| "learning_rate": 4.261612516920573e-05, |
| "loss": 0.3602, |
| "step": 765 |
| }, |
| { |
| "epoch": 2.6597222222222223, |
| "grad_norm": 0.2312337983450589, |
| "learning_rate": 4.251936244608953e-05, |
| "loss": 0.3542, |
| "step": 766 |
| }, |
| { |
| "epoch": 2.6631944444444446, |
| "grad_norm": 0.2538986261708629, |
| "learning_rate": 4.242258491893567e-05, |
| "loss": 0.3642, |
| "step": 767 |
| }, |
| { |
| "epoch": 2.6666666666666665, |
| "grad_norm": 0.1919542427472609, |
| "learning_rate": 4.2325793156419035e-05, |
| "loss": 0.3553, |
| "step": 768 |
| }, |
| { |
| "epoch": 2.670138888888889, |
| "grad_norm": 0.1959693615452782, |
| "learning_rate": 4.222898772729818e-05, |
| "loss": 0.3536, |
| "step": 769 |
| }, |
| { |
| "epoch": 2.673611111111111, |
| "grad_norm": 0.21247229460127162, |
| "learning_rate": 4.213216920041194e-05, |
| "loss": 0.3563, |
| "step": 770 |
| }, |
| { |
| "epoch": 2.6770833333333335, |
| "grad_norm": 0.22144773539883858, |
| "learning_rate": 4.203533814467611e-05, |
| "loss": 0.3636, |
| "step": 771 |
| }, |
| { |
| "epoch": 2.6805555555555554, |
| "grad_norm": 0.22808065589921542, |
| "learning_rate": 4.193849512908013e-05, |
| "loss": 0.3584, |
| "step": 772 |
| }, |
| { |
| "epoch": 2.6840277777777777, |
| "grad_norm": 0.20009577392696715, |
| "learning_rate": 4.1841640722683685e-05, |
| "loss": 0.3652, |
| "step": 773 |
| }, |
| { |
| "epoch": 2.6875, |
| "grad_norm": 0.2298061729577738, |
| "learning_rate": 4.174477549461345e-05, |
| "loss": 0.3608, |
| "step": 774 |
| }, |
| { |
| "epoch": 2.6909722222222223, |
| "grad_norm": 0.21869371124941922, |
| "learning_rate": 4.164790001405962e-05, |
| "loss": 0.3574, |
| "step": 775 |
| }, |
| { |
| "epoch": 2.6944444444444446, |
| "grad_norm": 0.21370183101499696, |
| "learning_rate": 4.155101485027268e-05, |
| "loss": 0.3532, |
| "step": 776 |
| }, |
| { |
| "epoch": 2.6979166666666665, |
| "grad_norm": 0.17168907041052586, |
| "learning_rate": 4.145412057256e-05, |
| "loss": 0.3554, |
| "step": 777 |
| }, |
| { |
| "epoch": 2.701388888888889, |
| "grad_norm": 0.20485180075506743, |
| "learning_rate": 4.1357217750282504e-05, |
| "loss": 0.362, |
| "step": 778 |
| }, |
| { |
| "epoch": 2.704861111111111, |
| "grad_norm": 0.22401006957798586, |
| "learning_rate": 4.1260306952851315e-05, |
| "loss": 0.3632, |
| "step": 779 |
| }, |
| { |
| "epoch": 2.7083333333333335, |
| "grad_norm": 0.2520664541360099, |
| "learning_rate": 4.116338874972446e-05, |
| "loss": 0.3616, |
| "step": 780 |
| }, |
| { |
| "epoch": 2.7118055555555554, |
| "grad_norm": 0.17674416781450486, |
| "learning_rate": 4.106646371040343e-05, |
| "loss": 0.3563, |
| "step": 781 |
| }, |
| { |
| "epoch": 2.7152777777777777, |
| "grad_norm": 0.24149354151984379, |
| "learning_rate": 4.096953240442993e-05, |
| "loss": 0.3596, |
| "step": 782 |
| }, |
| { |
| "epoch": 2.71875, |
| "grad_norm": 0.25292906375096735, |
| "learning_rate": 4.087259540138245e-05, |
| "loss": 0.3629, |
| "step": 783 |
| }, |
| { |
| "epoch": 2.7222222222222223, |
| "grad_norm": 0.21108199686390794, |
| "learning_rate": 4.077565327087298e-05, |
| "loss": 0.3595, |
| "step": 784 |
| }, |
| { |
| "epoch": 2.7256944444444446, |
| "grad_norm": 0.18238322067033086, |
| "learning_rate": 4.0678706582543634e-05, |
| "loss": 0.3576, |
| "step": 785 |
| }, |
| { |
| "epoch": 2.7291666666666665, |
| "grad_norm": 0.15893305599604374, |
| "learning_rate": 4.058175590606332e-05, |
| "loss": 0.3548, |
| "step": 786 |
| }, |
| { |
| "epoch": 2.732638888888889, |
| "grad_norm": 0.1572378850322172, |
| "learning_rate": 4.0484801811124346e-05, |
| "loss": 0.3513, |
| "step": 787 |
| }, |
| { |
| "epoch": 2.736111111111111, |
| "grad_norm": 0.1648972137773385, |
| "learning_rate": 4.0387844867439143e-05, |
| "loss": 0.3559, |
| "step": 788 |
| }, |
| { |
| "epoch": 2.7395833333333335, |
| "grad_norm": 0.1492622259544844, |
| "learning_rate": 4.029088564473688e-05, |
| "loss": 0.3558, |
| "step": 789 |
| }, |
| { |
| "epoch": 2.7430555555555554, |
| "grad_norm": 0.1749873424220576, |
| "learning_rate": 4.019392471276008e-05, |
| "loss": 0.3616, |
| "step": 790 |
| }, |
| { |
| "epoch": 2.7465277777777777, |
| "grad_norm": 0.16420183013763476, |
| "learning_rate": 4.0096962641261365e-05, |
| "loss": 0.3555, |
| "step": 791 |
| }, |
| { |
| "epoch": 2.75, |
| "grad_norm": 0.1782955356918841, |
| "learning_rate": 4e-05, |
| "loss": 0.3633, |
| "step": 792 |
| }, |
| { |
| "epoch": 2.7534722222222223, |
| "grad_norm": 0.17880962347986656, |
| "learning_rate": 3.990303735873866e-05, |
| "loss": 0.3527, |
| "step": 793 |
| }, |
| { |
| "epoch": 2.7569444444444446, |
| "grad_norm": 0.17598621623586508, |
| "learning_rate": 3.9806075287239935e-05, |
| "loss": 0.3664, |
| "step": 794 |
| }, |
| { |
| "epoch": 2.7604166666666665, |
| "grad_norm": 0.18620845392745122, |
| "learning_rate": 3.970911435526314e-05, |
| "loss": 0.3584, |
| "step": 795 |
| }, |
| { |
| "epoch": 2.763888888888889, |
| "grad_norm": 0.17465336973990567, |
| "learning_rate": 3.961215513256086e-05, |
| "loss": 0.357, |
| "step": 796 |
| }, |
| { |
| "epoch": 2.767361111111111, |
| "grad_norm": 0.17917224180289917, |
| "learning_rate": 3.9515198188875674e-05, |
| "loss": 0.3589, |
| "step": 797 |
| }, |
| { |
| "epoch": 2.7708333333333335, |
| "grad_norm": 0.1887633306567826, |
| "learning_rate": 3.9418244093936694e-05, |
| "loss": 0.3623, |
| "step": 798 |
| }, |
| { |
| "epoch": 2.7743055555555554, |
| "grad_norm": 0.22561488109817832, |
| "learning_rate": 3.9321293417456387e-05, |
| "loss": 0.357, |
| "step": 799 |
| }, |
| { |
| "epoch": 2.7777777777777777, |
| "grad_norm": 0.16907414954125385, |
| "learning_rate": 3.9224346729127034e-05, |
| "loss": 0.353, |
| "step": 800 |
| }, |
| { |
| "epoch": 2.78125, |
| "grad_norm": 0.19666725754174408, |
| "learning_rate": 3.912740459861756e-05, |
| "loss": 0.3658, |
| "step": 801 |
| }, |
| { |
| "epoch": 2.7847222222222223, |
| "grad_norm": 0.16730199368543427, |
| "learning_rate": 3.903046759557007e-05, |
| "loss": 0.3551, |
| "step": 802 |
| }, |
| { |
| "epoch": 2.7881944444444446, |
| "grad_norm": 0.17896309699401097, |
| "learning_rate": 3.893353628959658e-05, |
| "loss": 0.3604, |
| "step": 803 |
| }, |
| { |
| "epoch": 2.7916666666666665, |
| "grad_norm": 0.16510597022859788, |
| "learning_rate": 3.8836611250275546e-05, |
| "loss": 0.361, |
| "step": 804 |
| }, |
| { |
| "epoch": 2.795138888888889, |
| "grad_norm": 0.19370398445124015, |
| "learning_rate": 3.87396930471487e-05, |
| "loss": 0.3553, |
| "step": 805 |
| }, |
| { |
| "epoch": 2.798611111111111, |
| "grad_norm": 0.18393178304705537, |
| "learning_rate": 3.8642782249717516e-05, |
| "loss": 0.358, |
| "step": 806 |
| }, |
| { |
| "epoch": 2.8020833333333335, |
| "grad_norm": 0.24948450843414427, |
| "learning_rate": 3.854587942744002e-05, |
| "loss": 0.3638, |
| "step": 807 |
| }, |
| { |
| "epoch": 2.8055555555555554, |
| "grad_norm": 0.2830314635173867, |
| "learning_rate": 3.844898514972733e-05, |
| "loss": 0.3594, |
| "step": 808 |
| }, |
| { |
| "epoch": 2.8090277777777777, |
| "grad_norm": 0.2002181438855024, |
| "learning_rate": 3.835209998594039e-05, |
| "loss": 0.3624, |
| "step": 809 |
| }, |
| { |
| "epoch": 2.8125, |
| "grad_norm": 0.2511277931849178, |
| "learning_rate": 3.825522450538657e-05, |
| "loss": 0.3522, |
| "step": 810 |
| }, |
| { |
| "epoch": 2.8159722222222223, |
| "grad_norm": 0.1967207093366818, |
| "learning_rate": 3.815835927731632e-05, |
| "loss": 0.3652, |
| "step": 811 |
| }, |
| { |
| "epoch": 2.8194444444444446, |
| "grad_norm": 0.17960784639555785, |
| "learning_rate": 3.806150487091989e-05, |
| "loss": 0.3565, |
| "step": 812 |
| }, |
| { |
| "epoch": 2.8229166666666665, |
| "grad_norm": 0.2233174388393411, |
| "learning_rate": 3.79646618553239e-05, |
| "loss": 0.3535, |
| "step": 813 |
| }, |
| { |
| "epoch": 2.826388888888889, |
| "grad_norm": 0.19390324896728148, |
| "learning_rate": 3.786783079958808e-05, |
| "loss": 0.3514, |
| "step": 814 |
| }, |
| { |
| "epoch": 2.829861111111111, |
| "grad_norm": 0.18204031624872857, |
| "learning_rate": 3.777101227270183e-05, |
| "loss": 0.3603, |
| "step": 815 |
| }, |
| { |
| "epoch": 2.8333333333333335, |
| "grad_norm": 0.20344719272452805, |
| "learning_rate": 3.767420684358097e-05, |
| "loss": 0.3572, |
| "step": 816 |
| }, |
| { |
| "epoch": 2.8368055555555554, |
| "grad_norm": 0.19184933570521928, |
| "learning_rate": 3.757741508106434e-05, |
| "loss": 0.3644, |
| "step": 817 |
| }, |
| { |
| "epoch": 2.8402777777777777, |
| "grad_norm": 0.18424245484201168, |
| "learning_rate": 3.748063755391049e-05, |
| "loss": 0.3613, |
| "step": 818 |
| }, |
| { |
| "epoch": 2.84375, |
| "grad_norm": 0.21747633916072062, |
| "learning_rate": 3.738387483079428e-05, |
| "loss": 0.3563, |
| "step": 819 |
| }, |
| { |
| "epoch": 2.8472222222222223, |
| "grad_norm": 0.17447330084596435, |
| "learning_rate": 3.7287127480303634e-05, |
| "loss": 0.3536, |
| "step": 820 |
| }, |
| { |
| "epoch": 2.8506944444444446, |
| "grad_norm": 0.1922881427816934, |
| "learning_rate": 3.7190396070936093e-05, |
| "loss": 0.3557, |
| "step": 821 |
| }, |
| { |
| "epoch": 2.8541666666666665, |
| "grad_norm": 0.20972951109888854, |
| "learning_rate": 3.709368117109558e-05, |
| "loss": 0.3578, |
| "step": 822 |
| }, |
| { |
| "epoch": 2.857638888888889, |
| "grad_norm": 0.22468999669900613, |
| "learning_rate": 3.699698334908895e-05, |
| "loss": 0.3598, |
| "step": 823 |
| }, |
| { |
| "epoch": 2.861111111111111, |
| "grad_norm": 0.20049022903894825, |
| "learning_rate": 3.690030317312277e-05, |
| "loss": 0.3582, |
| "step": 824 |
| }, |
| { |
| "epoch": 2.8645833333333335, |
| "grad_norm": 0.17875399966945452, |
| "learning_rate": 3.6803641211299856e-05, |
| "loss": 0.3564, |
| "step": 825 |
| }, |
| { |
| "epoch": 2.8680555555555554, |
| "grad_norm": 0.2428253163358811, |
| "learning_rate": 3.670699803161601e-05, |
| "loss": 0.3557, |
| "step": 826 |
| }, |
| { |
| "epoch": 2.8715277777777777, |
| "grad_norm": 0.24065758729640713, |
| "learning_rate": 3.661037420195671e-05, |
| "loss": 0.3608, |
| "step": 827 |
| }, |
| { |
| "epoch": 2.875, |
| "grad_norm": 0.19617707994378045, |
| "learning_rate": 3.6513770290093674e-05, |
| "loss": 0.3544, |
| "step": 828 |
| }, |
| { |
| "epoch": 2.8784722222222223, |
| "grad_norm": 0.26671779662664247, |
| "learning_rate": 3.641718686368164e-05, |
| "loss": 0.3557, |
| "step": 829 |
| }, |
| { |
| "epoch": 2.8819444444444446, |
| "grad_norm": 0.1801343584986345, |
| "learning_rate": 3.63206244902549e-05, |
| "loss": 0.3543, |
| "step": 830 |
| }, |
| { |
| "epoch": 2.8854166666666665, |
| "grad_norm": 0.19450147708793394, |
| "learning_rate": 3.622408373722412e-05, |
| "loss": 0.3584, |
| "step": 831 |
| }, |
| { |
| "epoch": 2.888888888888889, |
| "grad_norm": 0.17598786584797108, |
| "learning_rate": 3.612756517187284e-05, |
| "loss": 0.3632, |
| "step": 832 |
| }, |
| { |
| "epoch": 2.892361111111111, |
| "grad_norm": 0.18182338931383799, |
| "learning_rate": 3.603106936135429e-05, |
| "loss": 0.3535, |
| "step": 833 |
| }, |
| { |
| "epoch": 2.8958333333333335, |
| "grad_norm": 0.22364386017448618, |
| "learning_rate": 3.5934596872687924e-05, |
| "loss": 0.3575, |
| "step": 834 |
| }, |
| { |
| "epoch": 2.8993055555555554, |
| "grad_norm": 0.1668601423162201, |
| "learning_rate": 3.583814827275624e-05, |
| "loss": 0.3569, |
| "step": 835 |
| }, |
| { |
| "epoch": 2.9027777777777777, |
| "grad_norm": 0.19801055806417453, |
| "learning_rate": 3.574172412830127e-05, |
| "loss": 0.3625, |
| "step": 836 |
| }, |
| { |
| "epoch": 2.90625, |
| "grad_norm": 0.23583604825625834, |
| "learning_rate": 3.564532500592143e-05, |
| "loss": 0.3575, |
| "step": 837 |
| }, |
| { |
| "epoch": 2.9097222222222223, |
| "grad_norm": 0.14520855199053195, |
| "learning_rate": 3.5548951472068017e-05, |
| "loss": 0.3497, |
| "step": 838 |
| }, |
| { |
| "epoch": 2.9131944444444446, |
| "grad_norm": 0.19605998381766435, |
| "learning_rate": 3.545260409304205e-05, |
| "loss": 0.358, |
| "step": 839 |
| }, |
| { |
| "epoch": 2.9166666666666665, |
| "grad_norm": 0.21453935925882056, |
| "learning_rate": 3.535628343499079e-05, |
| "loss": 0.3536, |
| "step": 840 |
| }, |
| { |
| "epoch": 2.920138888888889, |
| "grad_norm": 0.19409210510884808, |
| "learning_rate": 3.525999006390455e-05, |
| "loss": 0.3706, |
| "step": 841 |
| }, |
| { |
| "epoch": 2.923611111111111, |
| "grad_norm": 0.23198091133286045, |
| "learning_rate": 3.516372454561324e-05, |
| "loss": 0.363, |
| "step": 842 |
| }, |
| { |
| "epoch": 2.9270833333333335, |
| "grad_norm": 0.17318947090543216, |
| "learning_rate": 3.506748744578312e-05, |
| "loss": 0.3564, |
| "step": 843 |
| }, |
| { |
| "epoch": 2.9305555555555554, |
| "grad_norm": 0.17641845044561835, |
| "learning_rate": 3.49712793299135e-05, |
| "loss": 0.3593, |
| "step": 844 |
| }, |
| { |
| "epoch": 2.9340277777777777, |
| "grad_norm": 0.1852604985802232, |
| "learning_rate": 3.487510076333332e-05, |
| "loss": 0.3584, |
| "step": 845 |
| }, |
| { |
| "epoch": 2.9375, |
| "grad_norm": 0.17514830249343535, |
| "learning_rate": 3.477895231119795e-05, |
| "loss": 0.3634, |
| "step": 846 |
| }, |
| { |
| "epoch": 2.9409722222222223, |
| "grad_norm": 0.16734143827463377, |
| "learning_rate": 3.468283453848572e-05, |
| "loss": 0.3499, |
| "step": 847 |
| }, |
| { |
| "epoch": 2.9444444444444446, |
| "grad_norm": 0.18068561439735764, |
| "learning_rate": 3.458674800999477e-05, |
| "loss": 0.3603, |
| "step": 848 |
| }, |
| { |
| "epoch": 2.9479166666666665, |
| "grad_norm": 0.15836687872645414, |
| "learning_rate": 3.4490693290339576e-05, |
| "loss": 0.3566, |
| "step": 849 |
| }, |
| { |
| "epoch": 2.951388888888889, |
| "grad_norm": 0.18682847065633865, |
| "learning_rate": 3.4394670943947756e-05, |
| "loss": 0.3557, |
| "step": 850 |
| }, |
| { |
| "epoch": 2.954861111111111, |
| "grad_norm": 0.182860315826738, |
| "learning_rate": 3.4298681535056664e-05, |
| "loss": 0.3547, |
| "step": 851 |
| }, |
| { |
| "epoch": 2.9583333333333335, |
| "grad_norm": 0.1692085984740758, |
| "learning_rate": 3.4202725627710136e-05, |
| "loss": 0.3565, |
| "step": 852 |
| }, |
| { |
| "epoch": 2.9618055555555554, |
| "grad_norm": 0.16291805438785126, |
| "learning_rate": 3.410680378575512e-05, |
| "loss": 0.3578, |
| "step": 853 |
| }, |
| { |
| "epoch": 2.9652777777777777, |
| "grad_norm": 0.19636032650270333, |
| "learning_rate": 3.401091657283842e-05, |
| "loss": 0.3553, |
| "step": 854 |
| }, |
| { |
| "epoch": 2.96875, |
| "grad_norm": 0.22258957309784064, |
| "learning_rate": 3.3915064552403336e-05, |
| "loss": 0.3518, |
| "step": 855 |
| }, |
| { |
| "epoch": 2.9722222222222223, |
| "grad_norm": 0.16965579066125516, |
| "learning_rate": 3.3819248287686386e-05, |
| "loss": 0.3511, |
| "step": 856 |
| }, |
| { |
| "epoch": 2.9756944444444446, |
| "grad_norm": 0.1930972448807475, |
| "learning_rate": 3.3723468341713985e-05, |
| "loss": 0.3515, |
| "step": 857 |
| }, |
| { |
| "epoch": 2.9791666666666665, |
| "grad_norm": 0.1801137357638696, |
| "learning_rate": 3.3627725277299103e-05, |
| "loss": 0.3567, |
| "step": 858 |
| }, |
| { |
| "epoch": 2.982638888888889, |
| "grad_norm": 0.1768882568613071, |
| "learning_rate": 3.3532019657038045e-05, |
| "loss": 0.3565, |
| "step": 859 |
| }, |
| { |
| "epoch": 2.986111111111111, |
| "grad_norm": 0.18990156843143038, |
| "learning_rate": 3.343635204330704e-05, |
| "loss": 0.3505, |
| "step": 860 |
| }, |
| { |
| "epoch": 2.9895833333333335, |
| "grad_norm": 0.18215961637350045, |
| "learning_rate": 3.3340722998259036e-05, |
| "loss": 0.3609, |
| "step": 861 |
| }, |
| { |
| "epoch": 2.9930555555555554, |
| "grad_norm": 0.1850457048123024, |
| "learning_rate": 3.324513308382029e-05, |
| "loss": 0.3545, |
| "step": 862 |
| }, |
| { |
| "epoch": 2.9965277777777777, |
| "grad_norm": 0.19097473138907864, |
| "learning_rate": 3.314958286168718e-05, |
| "loss": 0.359, |
| "step": 863 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.20390114258764727, |
| "learning_rate": 3.305407289332279e-05, |
| "loss": 0.3371, |
| "step": 864 |
| }, |
| { |
| "epoch": 3.0034722222222223, |
| "grad_norm": 0.23184923556068548, |
| "learning_rate": 3.295860373995373e-05, |
| "loss": 0.3338, |
| "step": 865 |
| }, |
| { |
| "epoch": 3.0069444444444446, |
| "grad_norm": 0.22531202044446838, |
| "learning_rate": 3.2863175962566716e-05, |
| "loss": 0.3278, |
| "step": 866 |
| }, |
| { |
| "epoch": 3.0104166666666665, |
| "grad_norm": 0.21138866062659648, |
| "learning_rate": 3.2767790121905396e-05, |
| "loss": 0.3313, |
| "step": 867 |
| }, |
| { |
| "epoch": 3.013888888888889, |
| "grad_norm": 0.19393318904652032, |
| "learning_rate": 3.267244677846693e-05, |
| "loss": 0.3287, |
| "step": 868 |
| }, |
| { |
| "epoch": 3.017361111111111, |
| "grad_norm": 0.20297535530392305, |
| "learning_rate": 3.257714649249883e-05, |
| "loss": 0.3212, |
| "step": 869 |
| }, |
| { |
| "epoch": 3.0208333333333335, |
| "grad_norm": 0.18715172201692282, |
| "learning_rate": 3.248188982399553e-05, |
| "loss": 0.3224, |
| "step": 870 |
| }, |
| { |
| "epoch": 3.0243055555555554, |
| "grad_norm": 0.19690389505029438, |
| "learning_rate": 3.23866773326952e-05, |
| "loss": 0.3265, |
| "step": 871 |
| }, |
| { |
| "epoch": 3.0277777777777777, |
| "grad_norm": 0.18180988764739114, |
| "learning_rate": 3.229150957807641e-05, |
| "loss": 0.3221, |
| "step": 872 |
| }, |
| { |
| "epoch": 3.03125, |
| "grad_norm": 0.19869573153269798, |
| "learning_rate": 3.219638711935488e-05, |
| "loss": 0.3327, |
| "step": 873 |
| }, |
| { |
| "epoch": 3.0347222222222223, |
| "grad_norm": 0.17871850646547546, |
| "learning_rate": 3.210131051548011e-05, |
| "loss": 0.3281, |
| "step": 874 |
| }, |
| { |
| "epoch": 3.0381944444444446, |
| "grad_norm": 0.19745444724872563, |
| "learning_rate": 3.200628032513219e-05, |
| "loss": 0.3257, |
| "step": 875 |
| }, |
| { |
| "epoch": 3.0416666666666665, |
| "grad_norm": 0.16518241065195463, |
| "learning_rate": 3.191129710671849e-05, |
| "loss": 0.3231, |
| "step": 876 |
| }, |
| { |
| "epoch": 3.045138888888889, |
| "grad_norm": 0.15519829533874455, |
| "learning_rate": 3.181636141837033e-05, |
| "loss": 0.3325, |
| "step": 877 |
| }, |
| { |
| "epoch": 3.048611111111111, |
| "grad_norm": 0.15572856456527798, |
| "learning_rate": 3.1721473817939795e-05, |
| "loss": 0.3326, |
| "step": 878 |
| }, |
| { |
| "epoch": 3.0520833333333335, |
| "grad_norm": 0.16157608012366306, |
| "learning_rate": 3.162663486299632e-05, |
| "loss": 0.326, |
| "step": 879 |
| }, |
| { |
| "epoch": 3.0555555555555554, |
| "grad_norm": 0.15496294058585847, |
| "learning_rate": 3.153184511082359e-05, |
| "loss": 0.3244, |
| "step": 880 |
| }, |
| { |
| "epoch": 3.0590277777777777, |
| "grad_norm": 0.15446224821419566, |
| "learning_rate": 3.143710511841606e-05, |
| "loss": 0.3304, |
| "step": 881 |
| }, |
| { |
| "epoch": 3.0625, |
| "grad_norm": 0.15700101028157268, |
| "learning_rate": 3.134241544247589e-05, |
| "loss": 0.3286, |
| "step": 882 |
| }, |
| { |
| "epoch": 3.0659722222222223, |
| "grad_norm": 0.15344774586241733, |
| "learning_rate": 3.124777663940952e-05, |
| "loss": 0.3251, |
| "step": 883 |
| }, |
| { |
| "epoch": 3.0694444444444446, |
| "grad_norm": 0.16649283778376192, |
| "learning_rate": 3.1153189265324494e-05, |
| "loss": 0.3277, |
| "step": 884 |
| }, |
| { |
| "epoch": 3.0729166666666665, |
| "grad_norm": 0.16996931994950168, |
| "learning_rate": 3.1058653876026105e-05, |
| "loss": 0.332, |
| "step": 885 |
| }, |
| { |
| "epoch": 3.076388888888889, |
| "grad_norm": 0.15660620934902456, |
| "learning_rate": 3.0964171027014217e-05, |
| "loss": 0.3259, |
| "step": 886 |
| }, |
| { |
| "epoch": 3.079861111111111, |
| "grad_norm": 0.17381747937524708, |
| "learning_rate": 3.0869741273479934e-05, |
| "loss": 0.3332, |
| "step": 887 |
| }, |
| { |
| "epoch": 3.0833333333333335, |
| "grad_norm": 0.15325114202704937, |
| "learning_rate": 3.07753651703024e-05, |
| "loss": 0.3328, |
| "step": 888 |
| }, |
| { |
| "epoch": 3.0868055555555554, |
| "grad_norm": 0.16410160873161936, |
| "learning_rate": 3.068104327204546e-05, |
| "loss": 0.3248, |
| "step": 889 |
| }, |
| { |
| "epoch": 3.0902777777777777, |
| "grad_norm": 0.15629114043992434, |
| "learning_rate": 3.0586776132954504e-05, |
| "loss": 0.3274, |
| "step": 890 |
| }, |
| { |
| "epoch": 3.09375, |
| "grad_norm": 0.1287547525109488, |
| "learning_rate": 3.0492564306953083e-05, |
| "loss": 0.3313, |
| "step": 891 |
| }, |
| { |
| "epoch": 3.0972222222222223, |
| "grad_norm": 0.15918994404783066, |
| "learning_rate": 3.0398408347639773e-05, |
| "loss": 0.327, |
| "step": 892 |
| }, |
| { |
| "epoch": 3.1006944444444446, |
| "grad_norm": 0.15708787224166132, |
| "learning_rate": 3.0304308808284845e-05, |
| "loss": 0.3285, |
| "step": 893 |
| }, |
| { |
| "epoch": 3.1041666666666665, |
| "grad_norm": 0.14410613175392642, |
| "learning_rate": 3.0210266241827047e-05, |
| "loss": 0.3229, |
| "step": 894 |
| }, |
| { |
| "epoch": 3.107638888888889, |
| "grad_norm": 0.1454609051584777, |
| "learning_rate": 3.0116281200870383e-05, |
| "loss": 0.3283, |
| "step": 895 |
| }, |
| { |
| "epoch": 3.111111111111111, |
| "grad_norm": 0.15236322898098387, |
| "learning_rate": 3.0022354237680752e-05, |
| "loss": 0.3253, |
| "step": 896 |
| }, |
| { |
| "epoch": 3.1145833333333335, |
| "grad_norm": 0.13124398580702817, |
| "learning_rate": 2.9928485904182865e-05, |
| "loss": 0.3252, |
| "step": 897 |
| }, |
| { |
| "epoch": 3.1180555555555554, |
| "grad_norm": 0.15033157405355416, |
| "learning_rate": 2.9834676751956855e-05, |
| "loss": 0.3259, |
| "step": 898 |
| }, |
| { |
| "epoch": 3.1215277777777777, |
| "grad_norm": 0.12394460931334396, |
| "learning_rate": 2.9740927332235164e-05, |
| "loss": 0.326, |
| "step": 899 |
| }, |
| { |
| "epoch": 3.125, |
| "grad_norm": 0.13351975000878838, |
| "learning_rate": 2.9647238195899168e-05, |
| "loss": 0.3367, |
| "step": 900 |
| }, |
| { |
| "epoch": 3.1284722222222223, |
| "grad_norm": 0.1325052686323737, |
| "learning_rate": 2.9553609893476078e-05, |
| "loss": 0.3264, |
| "step": 901 |
| }, |
| { |
| "epoch": 3.1319444444444446, |
| "grad_norm": 0.13581375405721016, |
| "learning_rate": 2.9460042975135575e-05, |
| "loss": 0.3329, |
| "step": 902 |
| }, |
| { |
| "epoch": 3.1354166666666665, |
| "grad_norm": 0.1667562873637715, |
| "learning_rate": 2.936653799068669e-05, |
| "loss": 0.3283, |
| "step": 903 |
| }, |
| { |
| "epoch": 3.138888888888889, |
| "grad_norm": 0.15505693782068763, |
| "learning_rate": 2.9273095489574502e-05, |
| "loss": 0.3256, |
| "step": 904 |
| }, |
| { |
| "epoch": 3.142361111111111, |
| "grad_norm": 0.1487602254104706, |
| "learning_rate": 2.917971602087695e-05, |
| "loss": 0.3257, |
| "step": 905 |
| }, |
| { |
| "epoch": 3.1458333333333335, |
| "grad_norm": 0.1591921240522157, |
| "learning_rate": 2.9086400133301573e-05, |
| "loss": 0.3265, |
| "step": 906 |
| }, |
| { |
| "epoch": 3.1493055555555554, |
| "grad_norm": 0.13849832135798662, |
| "learning_rate": 2.8993148375182273e-05, |
| "loss": 0.3272, |
| "step": 907 |
| }, |
| { |
| "epoch": 3.1527777777777777, |
| "grad_norm": 0.1507894000952872, |
| "learning_rate": 2.889996129447618e-05, |
| "loss": 0.3272, |
| "step": 908 |
| }, |
| { |
| "epoch": 3.15625, |
| "grad_norm": 0.13584423605202275, |
| "learning_rate": 2.8806839438760322e-05, |
| "loss": 0.3269, |
| "step": 909 |
| }, |
| { |
| "epoch": 3.1597222222222223, |
| "grad_norm": 0.14817096095782104, |
| "learning_rate": 2.8713783355228497e-05, |
| "loss": 0.3257, |
| "step": 910 |
| }, |
| { |
| "epoch": 3.1631944444444446, |
| "grad_norm": 0.134355250776596, |
| "learning_rate": 2.8620793590687987e-05, |
| "loss": 0.3251, |
| "step": 911 |
| }, |
| { |
| "epoch": 3.1666666666666665, |
| "grad_norm": 0.13707426963646546, |
| "learning_rate": 2.8527870691556404e-05, |
| "loss": 0.3272, |
| "step": 912 |
| }, |
| { |
| "epoch": 3.170138888888889, |
| "grad_norm": 0.13959856877548055, |
| "learning_rate": 2.843501520385841e-05, |
| "loss": 0.3255, |
| "step": 913 |
| }, |
| { |
| "epoch": 3.173611111111111, |
| "grad_norm": 0.13553678964547694, |
| "learning_rate": 2.8342227673222608e-05, |
| "loss": 0.3278, |
| "step": 914 |
| }, |
| { |
| "epoch": 3.1770833333333335, |
| "grad_norm": 0.13659725325340102, |
| "learning_rate": 2.8249508644878224e-05, |
| "loss": 0.3225, |
| "step": 915 |
| }, |
| { |
| "epoch": 3.1805555555555554, |
| "grad_norm": 0.14820750705537203, |
| "learning_rate": 2.8156858663652015e-05, |
| "loss": 0.3318, |
| "step": 916 |
| }, |
| { |
| "epoch": 3.1840277777777777, |
| "grad_norm": 0.13640897033741206, |
| "learning_rate": 2.806427827396493e-05, |
| "loss": 0.3351, |
| "step": 917 |
| }, |
| { |
| "epoch": 3.1875, |
| "grad_norm": 0.12546349430475254, |
| "learning_rate": 2.7971768019829083e-05, |
| "loss": 0.3317, |
| "step": 918 |
| }, |
| { |
| "epoch": 3.1909722222222223, |
| "grad_norm": 0.12506137585820623, |
| "learning_rate": 2.7879328444844386e-05, |
| "loss": 0.3229, |
| "step": 919 |
| }, |
| { |
| "epoch": 3.1944444444444446, |
| "grad_norm": 0.14489730864288738, |
| "learning_rate": 2.778696009219548e-05, |
| "loss": 0.3238, |
| "step": 920 |
| }, |
| { |
| "epoch": 3.1979166666666665, |
| "grad_norm": 0.1314663587842031, |
| "learning_rate": 2.769466350464847e-05, |
| "loss": 0.3272, |
| "step": 921 |
| }, |
| { |
| "epoch": 3.201388888888889, |
| "grad_norm": 0.14506952594049383, |
| "learning_rate": 2.76024392245478e-05, |
| "loss": 0.3273, |
| "step": 922 |
| }, |
| { |
| "epoch": 3.204861111111111, |
| "grad_norm": 0.13448052337608013, |
| "learning_rate": 2.751028779381298e-05, |
| "loss": 0.3284, |
| "step": 923 |
| }, |
| { |
| "epoch": 3.2083333333333335, |
| "grad_norm": 0.14402460060846006, |
| "learning_rate": 2.7418209753935464e-05, |
| "loss": 0.3229, |
| "step": 924 |
| }, |
| { |
| "epoch": 3.2118055555555554, |
| "grad_norm": 0.1594688318321725, |
| "learning_rate": 2.732620564597547e-05, |
| "loss": 0.331, |
| "step": 925 |
| }, |
| { |
| "epoch": 3.2152777777777777, |
| "grad_norm": 0.16364319049182574, |
| "learning_rate": 2.7234276010558766e-05, |
| "loss": 0.3267, |
| "step": 926 |
| }, |
| { |
| "epoch": 3.21875, |
| "grad_norm": 0.15546709679880438, |
| "learning_rate": 2.7142421387873548e-05, |
| "loss": 0.3251, |
| "step": 927 |
| }, |
| { |
| "epoch": 3.2222222222222223, |
| "grad_norm": 0.15304291076882148, |
| "learning_rate": 2.7050642317667164e-05, |
| "loss": 0.3294, |
| "step": 928 |
| }, |
| { |
| "epoch": 3.2256944444444446, |
| "grad_norm": 0.16550996164467935, |
| "learning_rate": 2.695893933924308e-05, |
| "loss": 0.3219, |
| "step": 929 |
| }, |
| { |
| "epoch": 3.2291666666666665, |
| "grad_norm": 0.12571383158452695, |
| "learning_rate": 2.6867312991457563e-05, |
| "loss": 0.3301, |
| "step": 930 |
| }, |
| { |
| "epoch": 3.232638888888889, |
| "grad_norm": 0.16561727085953437, |
| "learning_rate": 2.6775763812716665e-05, |
| "loss": 0.328, |
| "step": 931 |
| }, |
| { |
| "epoch": 3.236111111111111, |
| "grad_norm": 0.12461275003256311, |
| "learning_rate": 2.6684292340972936e-05, |
| "loss": 0.3204, |
| "step": 932 |
| }, |
| { |
| "epoch": 3.2395833333333335, |
| "grad_norm": 0.16175602688976579, |
| "learning_rate": 2.659289911372234e-05, |
| "loss": 0.3297, |
| "step": 933 |
| }, |
| { |
| "epoch": 3.2430555555555554, |
| "grad_norm": 0.12598154391016275, |
| "learning_rate": 2.6501584668001038e-05, |
| "loss": 0.3315, |
| "step": 934 |
| }, |
| { |
| "epoch": 3.2465277777777777, |
| "grad_norm": 0.1412961765348397, |
| "learning_rate": 2.6410349540382285e-05, |
| "loss": 0.3283, |
| "step": 935 |
| }, |
| { |
| "epoch": 3.25, |
| "grad_norm": 0.14185747055205886, |
| "learning_rate": 2.6319194266973256e-05, |
| "loss": 0.3269, |
| "step": 936 |
| }, |
| { |
| "epoch": 3.2534722222222223, |
| "grad_norm": 0.12878278517933486, |
| "learning_rate": 2.6228119383411875e-05, |
| "loss": 0.333, |
| "step": 937 |
| }, |
| { |
| "epoch": 3.2569444444444446, |
| "grad_norm": 0.13440642206471998, |
| "learning_rate": 2.6137125424863713e-05, |
| "loss": 0.3254, |
| "step": 938 |
| }, |
| { |
| "epoch": 3.2604166666666665, |
| "grad_norm": 0.14596329626360965, |
| "learning_rate": 2.6046212926018774e-05, |
| "loss": 0.3258, |
| "step": 939 |
| }, |
| { |
| "epoch": 3.263888888888889, |
| "grad_norm": 0.1364611951529621, |
| "learning_rate": 2.5955382421088457e-05, |
| "loss": 0.3265, |
| "step": 940 |
| }, |
| { |
| "epoch": 3.267361111111111, |
| "grad_norm": 0.17050661806833226, |
| "learning_rate": 2.58646344438023e-05, |
| "loss": 0.3314, |
| "step": 941 |
| }, |
| { |
| "epoch": 3.2708333333333335, |
| "grad_norm": 0.1298103989698816, |
| "learning_rate": 2.577396952740495e-05, |
| "loss": 0.3323, |
| "step": 942 |
| }, |
| { |
| "epoch": 3.2743055555555554, |
| "grad_norm": 0.15642233563304042, |
| "learning_rate": 2.568338820465292e-05, |
| "loss": 0.3261, |
| "step": 943 |
| }, |
| { |
| "epoch": 3.2777777777777777, |
| "grad_norm": 0.12611133774369115, |
| "learning_rate": 2.5592891007811594e-05, |
| "loss": 0.3231, |
| "step": 944 |
| }, |
| { |
| "epoch": 3.28125, |
| "grad_norm": 0.14937341829368525, |
| "learning_rate": 2.550247846865194e-05, |
| "loss": 0.3283, |
| "step": 945 |
| }, |
| { |
| "epoch": 3.2847222222222223, |
| "grad_norm": 0.16357367165064074, |
| "learning_rate": 2.541215111844753e-05, |
| "loss": 0.3258, |
| "step": 946 |
| }, |
| { |
| "epoch": 3.2881944444444446, |
| "grad_norm": 0.1451872556880248, |
| "learning_rate": 2.5321909487971324e-05, |
| "loss": 0.3292, |
| "step": 947 |
| }, |
| { |
| "epoch": 3.2916666666666665, |
| "grad_norm": 0.155863544721583, |
| "learning_rate": 2.523175410749263e-05, |
| "loss": 0.3266, |
| "step": 948 |
| }, |
| { |
| "epoch": 3.295138888888889, |
| "grad_norm": 0.1437954314612826, |
| "learning_rate": 2.5141685506773862e-05, |
| "loss": 0.3249, |
| "step": 949 |
| }, |
| { |
| "epoch": 3.298611111111111, |
| "grad_norm": 0.14039277643177653, |
| "learning_rate": 2.505170421506759e-05, |
| "loss": 0.332, |
| "step": 950 |
| }, |
| { |
| "epoch": 3.3020833333333335, |
| "grad_norm": 0.14077753058037845, |
| "learning_rate": 2.4961810761113282e-05, |
| "loss": 0.3254, |
| "step": 951 |
| }, |
| { |
| "epoch": 3.3055555555555554, |
| "grad_norm": 0.15498427144289387, |
| "learning_rate": 2.4872005673134307e-05, |
| "loss": 0.3262, |
| "step": 952 |
| }, |
| { |
| "epoch": 3.3090277777777777, |
| "grad_norm": 0.13209964705917596, |
| "learning_rate": 2.4782289478834757e-05, |
| "loss": 0.3359, |
| "step": 953 |
| }, |
| { |
| "epoch": 3.3125, |
| "grad_norm": 0.13621175969696817, |
| "learning_rate": 2.4692662705396412e-05, |
| "loss": 0.33, |
| "step": 954 |
| }, |
| { |
| "epoch": 3.3159722222222223, |
| "grad_norm": 0.12829692288727748, |
| "learning_rate": 2.460312587947557e-05, |
| "loss": 0.3199, |
| "step": 955 |
| }, |
| { |
| "epoch": 3.3194444444444446, |
| "grad_norm": 0.12514830015509698, |
| "learning_rate": 2.4513679527199986e-05, |
| "loss": 0.3277, |
| "step": 956 |
| }, |
| { |
| "epoch": 3.3229166666666665, |
| "grad_norm": 0.14513050874335826, |
| "learning_rate": 2.4424324174165808e-05, |
| "loss": 0.332, |
| "step": 957 |
| }, |
| { |
| "epoch": 3.326388888888889, |
| "grad_norm": 0.11014959493081117, |
| "learning_rate": 2.4335060345434443e-05, |
| "loss": 0.3254, |
| "step": 958 |
| }, |
| { |
| "epoch": 3.329861111111111, |
| "grad_norm": 0.13668804458246817, |
| "learning_rate": 2.4245888565529518e-05, |
| "loss": 0.3256, |
| "step": 959 |
| }, |
| { |
| "epoch": 3.3333333333333335, |
| "grad_norm": 0.11888817466530431, |
| "learning_rate": 2.4156809358433728e-05, |
| "loss": 0.3313, |
| "step": 960 |
| }, |
| { |
| "epoch": 3.3368055555555554, |
| "grad_norm": 0.12990350418153296, |
| "learning_rate": 2.4067823247585857e-05, |
| "loss": 0.3266, |
| "step": 961 |
| }, |
| { |
| "epoch": 3.3402777777777777, |
| "grad_norm": 0.12637757796358817, |
| "learning_rate": 2.3978930755877583e-05, |
| "loss": 0.332, |
| "step": 962 |
| }, |
| { |
| "epoch": 3.34375, |
| "grad_norm": 0.1499424946562788, |
| "learning_rate": 2.389013240565052e-05, |
| "loss": 0.3257, |
| "step": 963 |
| }, |
| { |
| "epoch": 3.3472222222222223, |
| "grad_norm": 0.13046441675186193, |
| "learning_rate": 2.3801428718693055e-05, |
| "loss": 0.3352, |
| "step": 964 |
| }, |
| { |
| "epoch": 3.3506944444444446, |
| "grad_norm": 0.15641789451684035, |
| "learning_rate": 2.371282021623738e-05, |
| "loss": 0.3266, |
| "step": 965 |
| }, |
| { |
| "epoch": 3.3541666666666665, |
| "grad_norm": 0.15401041271475968, |
| "learning_rate": 2.3624307418956298e-05, |
| "loss": 0.3251, |
| "step": 966 |
| }, |
| { |
| "epoch": 3.357638888888889, |
| "grad_norm": 0.14610541183888143, |
| "learning_rate": 2.3535890846960318e-05, |
| "loss": 0.3274, |
| "step": 967 |
| }, |
| { |
| "epoch": 3.361111111111111, |
| "grad_norm": 0.1597491360564764, |
| "learning_rate": 2.3447571019794438e-05, |
| "loss": 0.3279, |
| "step": 968 |
| }, |
| { |
| "epoch": 3.3645833333333335, |
| "grad_norm": 0.1391254633674649, |
| "learning_rate": 2.3359348456435243e-05, |
| "loss": 0.3223, |
| "step": 969 |
| }, |
| { |
| "epoch": 3.3680555555555554, |
| "grad_norm": 0.16742166965814972, |
| "learning_rate": 2.327122367528775e-05, |
| "loss": 0.3213, |
| "step": 970 |
| }, |
| { |
| "epoch": 3.3715277777777777, |
| "grad_norm": 0.13795694128294528, |
| "learning_rate": 2.3183197194182395e-05, |
| "loss": 0.3267, |
| "step": 971 |
| }, |
| { |
| "epoch": 3.375, |
| "grad_norm": 0.15651560419922309, |
| "learning_rate": 2.3095269530372032e-05, |
| "loss": 0.3277, |
| "step": 972 |
| }, |
| { |
| "epoch": 3.3784722222222223, |
| "grad_norm": 0.1383038389996478, |
| "learning_rate": 2.300744120052878e-05, |
| "loss": 0.3233, |
| "step": 973 |
| }, |
| { |
| "epoch": 3.3819444444444446, |
| "grad_norm": 0.13798159811928254, |
| "learning_rate": 2.291971272074115e-05, |
| "loss": 0.3308, |
| "step": 974 |
| }, |
| { |
| "epoch": 3.3854166666666665, |
| "grad_norm": 0.1349158326186098, |
| "learning_rate": 2.2832084606510848e-05, |
| "loss": 0.3286, |
| "step": 975 |
| }, |
| { |
| "epoch": 3.388888888888889, |
| "grad_norm": 0.13930366060353663, |
| "learning_rate": 2.274455737274987e-05, |
| "loss": 0.3329, |
| "step": 976 |
| }, |
| { |
| "epoch": 3.392361111111111, |
| "grad_norm": 0.1310441724409089, |
| "learning_rate": 2.26571315337774e-05, |
| "loss": 0.3311, |
| "step": 977 |
| }, |
| { |
| "epoch": 3.3958333333333335, |
| "grad_norm": 0.12728966187369123, |
| "learning_rate": 2.2569807603316836e-05, |
| "loss": 0.3229, |
| "step": 978 |
| }, |
| { |
| "epoch": 3.3993055555555554, |
| "grad_norm": 0.14277390088433503, |
| "learning_rate": 2.2482586094492724e-05, |
| "loss": 0.328, |
| "step": 979 |
| }, |
| { |
| "epoch": 3.4027777777777777, |
| "grad_norm": 0.14441166578011597, |
| "learning_rate": 2.239546751982782e-05, |
| "loss": 0.3398, |
| "step": 980 |
| }, |
| { |
| "epoch": 3.40625, |
| "grad_norm": 0.15113050069038053, |
| "learning_rate": 2.2308452391239958e-05, |
| "loss": 0.3298, |
| "step": 981 |
| }, |
| { |
| "epoch": 3.4097222222222223, |
| "grad_norm": 0.1475369362828003, |
| "learning_rate": 2.2221541220039162e-05, |
| "loss": 0.327, |
| "step": 982 |
| }, |
| { |
| "epoch": 3.4131944444444446, |
| "grad_norm": 0.14568823290138413, |
| "learning_rate": 2.2134734516924583e-05, |
| "loss": 0.3301, |
| "step": 983 |
| }, |
| { |
| "epoch": 3.4166666666666665, |
| "grad_norm": 0.133394078745156, |
| "learning_rate": 2.2048032791981515e-05, |
| "loss": 0.3282, |
| "step": 984 |
| }, |
| { |
| "epoch": 3.420138888888889, |
| "grad_norm": 0.13571851430095155, |
| "learning_rate": 2.196143655467835e-05, |
| "loss": 0.3289, |
| "step": 985 |
| }, |
| { |
| "epoch": 3.423611111111111, |
| "grad_norm": 0.12212196306543147, |
| "learning_rate": 2.1874946313863673e-05, |
| "loss": 0.329, |
| "step": 986 |
| }, |
| { |
| "epoch": 3.4270833333333335, |
| "grad_norm": 0.1400616993150097, |
| "learning_rate": 2.1788562577763192e-05, |
| "loss": 0.3251, |
| "step": 987 |
| }, |
| { |
| "epoch": 3.4305555555555554, |
| "grad_norm": 0.12306894172185011, |
| "learning_rate": 2.1702285853976774e-05, |
| "loss": 0.3266, |
| "step": 988 |
| }, |
| { |
| "epoch": 3.4340277777777777, |
| "grad_norm": 0.1559157616814954, |
| "learning_rate": 2.161611664947551e-05, |
| "loss": 0.3258, |
| "step": 989 |
| }, |
| { |
| "epoch": 3.4375, |
| "grad_norm": 0.12516551486822852, |
| "learning_rate": 2.1530055470598654e-05, |
| "loss": 0.3265, |
| "step": 990 |
| }, |
| { |
| "epoch": 3.4409722222222223, |
| "grad_norm": 0.13080425032211002, |
| "learning_rate": 2.1444102823050706e-05, |
| "loss": 0.3316, |
| "step": 991 |
| }, |
| { |
| "epoch": 3.4444444444444446, |
| "grad_norm": 0.1300922642459424, |
| "learning_rate": 2.135825921189846e-05, |
| "loss": 0.3288, |
| "step": 992 |
| }, |
| { |
| "epoch": 3.4479166666666665, |
| "grad_norm": 0.12922486035564673, |
| "learning_rate": 2.1272525141567925e-05, |
| "loss": 0.3266, |
| "step": 993 |
| }, |
| { |
| "epoch": 3.451388888888889, |
| "grad_norm": 0.12469798677762536, |
| "learning_rate": 2.11869011158415e-05, |
| "loss": 0.3306, |
| "step": 994 |
| }, |
| { |
| "epoch": 3.454861111111111, |
| "grad_norm": 0.13120696522264091, |
| "learning_rate": 2.1101387637854948e-05, |
| "loss": 0.3287, |
| "step": 995 |
| }, |
| { |
| "epoch": 3.4583333333333335, |
| "grad_norm": 0.11755454189452605, |
| "learning_rate": 2.1015985210094385e-05, |
| "loss": 0.3235, |
| "step": 996 |
| }, |
| { |
| "epoch": 3.4618055555555554, |
| "grad_norm": 0.11981150512361477, |
| "learning_rate": 2.093069433439346e-05, |
| "loss": 0.3241, |
| "step": 997 |
| }, |
| { |
| "epoch": 3.4652777777777777, |
| "grad_norm": 0.1163241592558362, |
| "learning_rate": 2.084551551193026e-05, |
| "loss": 0.3317, |
| "step": 998 |
| }, |
| { |
| "epoch": 3.46875, |
| "grad_norm": 0.11818942476471016, |
| "learning_rate": 2.0760449243224504e-05, |
| "loss": 0.3239, |
| "step": 999 |
| }, |
| { |
| "epoch": 3.4722222222222223, |
| "grad_norm": 0.13391398089188974, |
| "learning_rate": 2.067549602813446e-05, |
| "loss": 0.3276, |
| "step": 1000 |
| }, |
| { |
| "epoch": 3.4756944444444446, |
| "grad_norm": 0.11776828786444583, |
| "learning_rate": 2.059065636585416e-05, |
| "loss": 0.3284, |
| "step": 1001 |
| }, |
| { |
| "epoch": 3.4791666666666665, |
| "grad_norm": 0.11898978369329122, |
| "learning_rate": 2.050593075491031e-05, |
| "loss": 0.3222, |
| "step": 1002 |
| }, |
| { |
| "epoch": 3.482638888888889, |
| "grad_norm": 0.12050085117961935, |
| "learning_rate": 2.0421319693159488e-05, |
| "loss": 0.3246, |
| "step": 1003 |
| }, |
| { |
| "epoch": 3.486111111111111, |
| "grad_norm": 0.11420196300243668, |
| "learning_rate": 2.033682367778518e-05, |
| "loss": 0.323, |
| "step": 1004 |
| }, |
| { |
| "epoch": 3.4895833333333335, |
| "grad_norm": 0.10906983179786023, |
| "learning_rate": 2.025244320529479e-05, |
| "loss": 0.3258, |
| "step": 1005 |
| }, |
| { |
| "epoch": 3.4930555555555554, |
| "grad_norm": 0.11713331459082049, |
| "learning_rate": 2.0168178771516844e-05, |
| "loss": 0.3256, |
| "step": 1006 |
| }, |
| { |
| "epoch": 3.4965277777777777, |
| "grad_norm": 0.12452120908228087, |
| "learning_rate": 2.0084030871597944e-05, |
| "loss": 0.3292, |
| "step": 1007 |
| }, |
| { |
| "epoch": 3.5, |
| "grad_norm": 0.11383590817143446, |
| "learning_rate": 2.0000000000000012e-05, |
| "loss": 0.3312, |
| "step": 1008 |
| }, |
| { |
| "epoch": 3.5034722222222223, |
| "grad_norm": 0.1361420180977826, |
| "learning_rate": 1.9916086650497206e-05, |
| "loss": 0.3316, |
| "step": 1009 |
| }, |
| { |
| "epoch": 3.5069444444444446, |
| "grad_norm": 0.1227570110480921, |
| "learning_rate": 1.9832291316173196e-05, |
| "loss": 0.3303, |
| "step": 1010 |
| }, |
| { |
| "epoch": 3.5104166666666665, |
| "grad_norm": 0.11318826375174597, |
| "learning_rate": 1.9748614489418118e-05, |
| "loss": 0.3233, |
| "step": 1011 |
| }, |
| { |
| "epoch": 3.513888888888889, |
| "grad_norm": 0.11329679043568246, |
| "learning_rate": 1.966505666192579e-05, |
| "loss": 0.3335, |
| "step": 1012 |
| }, |
| { |
| "epoch": 3.517361111111111, |
| "grad_norm": 0.12400647990567192, |
| "learning_rate": 1.9581618324690742e-05, |
| "loss": 0.3349, |
| "step": 1013 |
| }, |
| { |
| "epoch": 3.5208333333333335, |
| "grad_norm": 0.11126217524731555, |
| "learning_rate": 1.9498299968005393e-05, |
| "loss": 0.3226, |
| "step": 1014 |
| }, |
| { |
| "epoch": 3.5243055555555554, |
| "grad_norm": 0.11193919585531145, |
| "learning_rate": 1.9415102081457138e-05, |
| "loss": 0.3226, |
| "step": 1015 |
| }, |
| { |
| "epoch": 3.5277777777777777, |
| "grad_norm": 0.11071870031836675, |
| "learning_rate": 1.9332025153925486e-05, |
| "loss": 0.3268, |
| "step": 1016 |
| }, |
| { |
| "epoch": 3.53125, |
| "grad_norm": 0.11287024785977574, |
| "learning_rate": 1.9249069673579136e-05, |
| "loss": 0.3251, |
| "step": 1017 |
| }, |
| { |
| "epoch": 3.5347222222222223, |
| "grad_norm": 0.11110239502899291, |
| "learning_rate": 1.9166236127873215e-05, |
| "loss": 0.3233, |
| "step": 1018 |
| }, |
| { |
| "epoch": 3.5381944444444446, |
| "grad_norm": 0.11584789285191628, |
| "learning_rate": 1.9083525003546296e-05, |
| "loss": 0.3282, |
| "step": 1019 |
| }, |
| { |
| "epoch": 3.5416666666666665, |
| "grad_norm": 0.1253078621072049, |
| "learning_rate": 1.90009367866176e-05, |
| "loss": 0.332, |
| "step": 1020 |
| }, |
| { |
| "epoch": 3.545138888888889, |
| "grad_norm": 0.1198697407069051, |
| "learning_rate": 1.8918471962384163e-05, |
| "loss": 0.331, |
| "step": 1021 |
| }, |
| { |
| "epoch": 3.548611111111111, |
| "grad_norm": 0.12069680597925006, |
| "learning_rate": 1.8836131015417906e-05, |
| "loss": 0.3299, |
| "step": 1022 |
| }, |
| { |
| "epoch": 3.5520833333333335, |
| "grad_norm": 0.10755804286298509, |
| "learning_rate": 1.875391442956289e-05, |
| "loss": 0.3265, |
| "step": 1023 |
| }, |
| { |
| "epoch": 3.5555555555555554, |
| "grad_norm": 0.13204844212409522, |
| "learning_rate": 1.867182268793236e-05, |
| "loss": 0.3242, |
| "step": 1024 |
| }, |
| { |
| "epoch": 3.5590277777777777, |
| "grad_norm": 0.10964257930704686, |
| "learning_rate": 1.8589856272906e-05, |
| "loss": 0.329, |
| "step": 1025 |
| }, |
| { |
| "epoch": 3.5625, |
| "grad_norm": 0.12148237166024527, |
| "learning_rate": 1.8508015666127043e-05, |
| "loss": 0.3248, |
| "step": 1026 |
| }, |
| { |
| "epoch": 3.5659722222222223, |
| "grad_norm": 0.10930688998046321, |
| "learning_rate": 1.8426301348499495e-05, |
| "loss": 0.3249, |
| "step": 1027 |
| }, |
| { |
| "epoch": 3.5694444444444446, |
| "grad_norm": 0.12726809440921658, |
| "learning_rate": 1.8344713800185215e-05, |
| "loss": 0.3288, |
| "step": 1028 |
| }, |
| { |
| "epoch": 3.5729166666666665, |
| "grad_norm": 0.10648107999527251, |
| "learning_rate": 1.826325350060121e-05, |
| "loss": 0.3288, |
| "step": 1029 |
| }, |
| { |
| "epoch": 3.576388888888889, |
| "grad_norm": 0.11912716762210253, |
| "learning_rate": 1.8181920928416704e-05, |
| "loss": 0.3204, |
| "step": 1030 |
| }, |
| { |
| "epoch": 3.579861111111111, |
| "grad_norm": 0.12135009526529683, |
| "learning_rate": 1.810071656155044e-05, |
| "loss": 0.3247, |
| "step": 1031 |
| }, |
| { |
| "epoch": 3.5833333333333335, |
| "grad_norm": 0.11231174673001908, |
| "learning_rate": 1.8019640877167763e-05, |
| "loss": 0.3329, |
| "step": 1032 |
| }, |
| { |
| "epoch": 3.5868055555555554, |
| "grad_norm": 0.11687605127132657, |
| "learning_rate": 1.7938694351677907e-05, |
| "loss": 0.3255, |
| "step": 1033 |
| }, |
| { |
| "epoch": 3.5902777777777777, |
| "grad_norm": 0.11239743899073819, |
| "learning_rate": 1.785787746073111e-05, |
| "loss": 0.3256, |
| "step": 1034 |
| }, |
| { |
| "epoch": 3.59375, |
| "grad_norm": 0.12259189423086393, |
| "learning_rate": 1.7777190679215923e-05, |
| "loss": 0.3243, |
| "step": 1035 |
| }, |
| { |
| "epoch": 3.5972222222222223, |
| "grad_norm": 0.11193043928854648, |
| "learning_rate": 1.7696634481256293e-05, |
| "loss": 0.3266, |
| "step": 1036 |
| }, |
| { |
| "epoch": 3.6006944444444446, |
| "grad_norm": 0.12516968216797694, |
| "learning_rate": 1.761620934020889e-05, |
| "loss": 0.3269, |
| "step": 1037 |
| }, |
| { |
| "epoch": 3.6041666666666665, |
| "grad_norm": 0.10669706033996294, |
| "learning_rate": 1.753591572866029e-05, |
| "loss": 0.3254, |
| "step": 1038 |
| }, |
| { |
| "epoch": 3.607638888888889, |
| "grad_norm": 0.13136099916171398, |
| "learning_rate": 1.7455754118424134e-05, |
| "loss": 0.3328, |
| "step": 1039 |
| }, |
| { |
| "epoch": 3.611111111111111, |
| "grad_norm": 0.11840511583143465, |
| "learning_rate": 1.7375724980538465e-05, |
| "loss": 0.3324, |
| "step": 1040 |
| }, |
| { |
| "epoch": 3.6145833333333335, |
| "grad_norm": 0.13260287735222887, |
| "learning_rate": 1.7295828785262857e-05, |
| "loss": 0.3338, |
| "step": 1041 |
| }, |
| { |
| "epoch": 3.6180555555555554, |
| "grad_norm": 0.1092213984349806, |
| "learning_rate": 1.721606600207575e-05, |
| "loss": 0.3264, |
| "step": 1042 |
| }, |
| { |
| "epoch": 3.6215277777777777, |
| "grad_norm": 0.12107825432473537, |
| "learning_rate": 1.713643709967159e-05, |
| "loss": 0.3261, |
| "step": 1043 |
| }, |
| { |
| "epoch": 3.625, |
| "grad_norm": 0.11663128574485776, |
| "learning_rate": 1.7056942545958167e-05, |
| "loss": 0.3278, |
| "step": 1044 |
| }, |
| { |
| "epoch": 3.6284722222222223, |
| "grad_norm": 0.1162808823074308, |
| "learning_rate": 1.697758280805379e-05, |
| "loss": 0.328, |
| "step": 1045 |
| }, |
| { |
| "epoch": 3.6319444444444446, |
| "grad_norm": 0.11969320984733327, |
| "learning_rate": 1.68983583522846e-05, |
| "loss": 0.3286, |
| "step": 1046 |
| }, |
| { |
| "epoch": 3.6354166666666665, |
| "grad_norm": 0.12749537876295391, |
| "learning_rate": 1.68192696441818e-05, |
| "loss": 0.3282, |
| "step": 1047 |
| }, |
| { |
| "epoch": 3.638888888888889, |
| "grad_norm": 0.12432812932104631, |
| "learning_rate": 1.6740317148478932e-05, |
| "loss": 0.3298, |
| "step": 1048 |
| }, |
| { |
| "epoch": 3.642361111111111, |
| "grad_norm": 0.12352471143147438, |
| "learning_rate": 1.6661501329109118e-05, |
| "loss": 0.3261, |
| "step": 1049 |
| }, |
| { |
| "epoch": 3.6458333333333335, |
| "grad_norm": 0.12619465833114463, |
| "learning_rate": 1.6582822649202382e-05, |
| "loss": 0.3263, |
| "step": 1050 |
| }, |
| { |
| "epoch": 3.6493055555555554, |
| "grad_norm": 0.11466347815059254, |
| "learning_rate": 1.6504281571082873e-05, |
| "loss": 0.3194, |
| "step": 1051 |
| }, |
| { |
| "epoch": 3.6527777777777777, |
| "grad_norm": 0.13179675145606548, |
| "learning_rate": 1.642587855626621e-05, |
| "loss": 0.3319, |
| "step": 1052 |
| }, |
| { |
| "epoch": 3.65625, |
| "grad_norm": 0.11282192811037618, |
| "learning_rate": 1.6347614065456715e-05, |
| "loss": 0.3284, |
| "step": 1053 |
| }, |
| { |
| "epoch": 3.6597222222222223, |
| "grad_norm": 0.11886170256023952, |
| "learning_rate": 1.6269488558544724e-05, |
| "loss": 0.3293, |
| "step": 1054 |
| }, |
| { |
| "epoch": 3.6631944444444446, |
| "grad_norm": 0.11799256382181283, |
| "learning_rate": 1.6191502494603925e-05, |
| "loss": 0.3266, |
| "step": 1055 |
| }, |
| { |
| "epoch": 3.6666666666666665, |
| "grad_norm": 0.11390887631440014, |
| "learning_rate": 1.6113656331888563e-05, |
| "loss": 0.3272, |
| "step": 1056 |
| }, |
| { |
| "epoch": 3.670138888888889, |
| "grad_norm": 0.12377782950633084, |
| "learning_rate": 1.6035950527830868e-05, |
| "loss": 0.3299, |
| "step": 1057 |
| }, |
| { |
| "epoch": 3.673611111111111, |
| "grad_norm": 0.10228022446219863, |
| "learning_rate": 1.5958385539038285e-05, |
| "loss": 0.3311, |
| "step": 1058 |
| }, |
| { |
| "epoch": 3.6770833333333335, |
| "grad_norm": 0.12066991092121124, |
| "learning_rate": 1.588096182129082e-05, |
| "loss": 0.3286, |
| "step": 1059 |
| }, |
| { |
| "epoch": 3.6805555555555554, |
| "grad_norm": 0.10509918089098899, |
| "learning_rate": 1.580367982953833e-05, |
| "loss": 0.3292, |
| "step": 1060 |
| }, |
| { |
| "epoch": 3.6840277777777777, |
| "grad_norm": 0.10663352182573295, |
| "learning_rate": 1.572654001789792e-05, |
| "loss": 0.3334, |
| "step": 1061 |
| }, |
| { |
| "epoch": 3.6875, |
| "grad_norm": 0.11999418073196094, |
| "learning_rate": 1.5649542839651175e-05, |
| "loss": 0.3256, |
| "step": 1062 |
| }, |
| { |
| "epoch": 3.6909722222222223, |
| "grad_norm": 0.11145209248435085, |
| "learning_rate": 1.5572688747241605e-05, |
| "loss": 0.3269, |
| "step": 1063 |
| }, |
| { |
| "epoch": 3.6944444444444446, |
| "grad_norm": 0.11903495171170056, |
| "learning_rate": 1.5495978192271887e-05, |
| "loss": 0.32, |
| "step": 1064 |
| }, |
| { |
| "epoch": 3.6979166666666665, |
| "grad_norm": 0.11631140126765901, |
| "learning_rate": 1.5419411625501302e-05, |
| "loss": 0.3255, |
| "step": 1065 |
| }, |
| { |
| "epoch": 3.701388888888889, |
| "grad_norm": 0.11875582201597984, |
| "learning_rate": 1.534298949684299e-05, |
| "loss": 0.3273, |
| "step": 1066 |
| }, |
| { |
| "epoch": 3.704861111111111, |
| "grad_norm": 0.12463534502290068, |
| "learning_rate": 1.5266712255361413e-05, |
| "loss": 0.3282, |
| "step": 1067 |
| }, |
| { |
| "epoch": 3.7083333333333335, |
| "grad_norm": 0.11873227730380286, |
| "learning_rate": 1.5190580349269604e-05, |
| "loss": 0.3287, |
| "step": 1068 |
| }, |
| { |
| "epoch": 3.7118055555555554, |
| "grad_norm": 0.1253968455352852, |
| "learning_rate": 1.5114594225926631e-05, |
| "loss": 0.3373, |
| "step": 1069 |
| }, |
| { |
| "epoch": 3.7152777777777777, |
| "grad_norm": 0.11955105986890233, |
| "learning_rate": 1.503875433183493e-05, |
| "loss": 0.3309, |
| "step": 1070 |
| }, |
| { |
| "epoch": 3.71875, |
| "grad_norm": 0.11945767131489388, |
| "learning_rate": 1.4963061112637637e-05, |
| "loss": 0.3257, |
| "step": 1071 |
| }, |
| { |
| "epoch": 3.7222222222222223, |
| "grad_norm": 0.11320297989341972, |
| "learning_rate": 1.4887515013116067e-05, |
| "loss": 0.3324, |
| "step": 1072 |
| }, |
| { |
| "epoch": 3.7256944444444446, |
| "grad_norm": 0.10360639278268688, |
| "learning_rate": 1.481211647718698e-05, |
| "loss": 0.3214, |
| "step": 1073 |
| }, |
| { |
| "epoch": 3.7291666666666665, |
| "grad_norm": 0.10520008867884603, |
| "learning_rate": 1.4736865947900106e-05, |
| "loss": 0.3281, |
| "step": 1074 |
| }, |
| { |
| "epoch": 3.732638888888889, |
| "grad_norm": 0.10779658571104866, |
| "learning_rate": 1.4661763867435407e-05, |
| "loss": 0.3259, |
| "step": 1075 |
| }, |
| { |
| "epoch": 3.736111111111111, |
| "grad_norm": 0.10358861836884209, |
| "learning_rate": 1.4586810677100608e-05, |
| "loss": 0.3309, |
| "step": 1076 |
| }, |
| { |
| "epoch": 3.7395833333333335, |
| "grad_norm": 0.11055658533942075, |
| "learning_rate": 1.4512006817328472e-05, |
| "loss": 0.3268, |
| "step": 1077 |
| }, |
| { |
| "epoch": 3.7430555555555554, |
| "grad_norm": 0.11078061429552334, |
| "learning_rate": 1.4437352727674335e-05, |
| "loss": 0.3267, |
| "step": 1078 |
| }, |
| { |
| "epoch": 3.7465277777777777, |
| "grad_norm": 0.10435501091691729, |
| "learning_rate": 1.4362848846813461e-05, |
| "loss": 0.3245, |
| "step": 1079 |
| }, |
| { |
| "epoch": 3.75, |
| "grad_norm": 0.11960459787061607, |
| "learning_rate": 1.4288495612538427e-05, |
| "loss": 0.3308, |
| "step": 1080 |
| }, |
| { |
| "epoch": 3.7534722222222223, |
| "grad_norm": 0.10183573591431129, |
| "learning_rate": 1.4214293461756645e-05, |
| "loss": 0.3228, |
| "step": 1081 |
| }, |
| { |
| "epoch": 3.7569444444444446, |
| "grad_norm": 0.10326319760910473, |
| "learning_rate": 1.4140242830487743e-05, |
| "loss": 0.3257, |
| "step": 1082 |
| }, |
| { |
| "epoch": 3.7604166666666665, |
| "grad_norm": 0.10288885616446265, |
| "learning_rate": 1.406634415386095e-05, |
| "loss": 0.3312, |
| "step": 1083 |
| }, |
| { |
| "epoch": 3.763888888888889, |
| "grad_norm": 0.10259782848930725, |
| "learning_rate": 1.3992597866112667e-05, |
| "loss": 0.3245, |
| "step": 1084 |
| }, |
| { |
| "epoch": 3.767361111111111, |
| "grad_norm": 0.10218365204918832, |
| "learning_rate": 1.391900440058379e-05, |
| "loss": 0.3272, |
| "step": 1085 |
| }, |
| { |
| "epoch": 3.7708333333333335, |
| "grad_norm": 0.10627712104738776, |
| "learning_rate": 1.3845564189717218e-05, |
| "loss": 0.3275, |
| "step": 1086 |
| }, |
| { |
| "epoch": 3.7743055555555554, |
| "grad_norm": 0.10701771275450184, |
| "learning_rate": 1.3772277665055351e-05, |
| "loss": 0.3317, |
| "step": 1087 |
| }, |
| { |
| "epoch": 3.7777777777777777, |
| "grad_norm": 0.1095852881244597, |
| "learning_rate": 1.369914525723746e-05, |
| "loss": 0.3282, |
| "step": 1088 |
| }, |
| { |
| "epoch": 3.78125, |
| "grad_norm": 0.11023426100788798, |
| "learning_rate": 1.3626167395997247e-05, |
| "loss": 0.3282, |
| "step": 1089 |
| }, |
| { |
| "epoch": 3.7847222222222223, |
| "grad_norm": 0.09945960251671251, |
| "learning_rate": 1.3553344510160268e-05, |
| "loss": 0.3265, |
| "step": 1090 |
| }, |
| { |
| "epoch": 3.7881944444444446, |
| "grad_norm": 0.12218443061362727, |
| "learning_rate": 1.3480677027641443e-05, |
| "loss": 0.33, |
| "step": 1091 |
| }, |
| { |
| "epoch": 3.7916666666666665, |
| "grad_norm": 0.11447703332102428, |
| "learning_rate": 1.3408165375442486e-05, |
| "loss": 0.3305, |
| "step": 1092 |
| }, |
| { |
| "epoch": 3.795138888888889, |
| "grad_norm": 0.09651158172556974, |
| "learning_rate": 1.3335809979649486e-05, |
| "loss": 0.3204, |
| "step": 1093 |
| }, |
| { |
| "epoch": 3.798611111111111, |
| "grad_norm": 0.11384440993096656, |
| "learning_rate": 1.3263611265430303e-05, |
| "loss": 0.3267, |
| "step": 1094 |
| }, |
| { |
| "epoch": 3.8020833333333335, |
| "grad_norm": 0.11304722402701793, |
| "learning_rate": 1.319156965703217e-05, |
| "loss": 0.3326, |
| "step": 1095 |
| }, |
| { |
| "epoch": 3.8055555555555554, |
| "grad_norm": 0.10486976163312414, |
| "learning_rate": 1.3119685577779105e-05, |
| "loss": 0.3293, |
| "step": 1096 |
| }, |
| { |
| "epoch": 3.8090277777777777, |
| "grad_norm": 0.1271060262298, |
| "learning_rate": 1.3047959450069505e-05, |
| "loss": 0.3272, |
| "step": 1097 |
| }, |
| { |
| "epoch": 3.8125, |
| "grad_norm": 0.11143745204437848, |
| "learning_rate": 1.297639169537359e-05, |
| "loss": 0.3297, |
| "step": 1098 |
| }, |
| { |
| "epoch": 3.8159722222222223, |
| "grad_norm": 0.10831300071287393, |
| "learning_rate": 1.290498273423101e-05, |
| "loss": 0.3254, |
| "step": 1099 |
| }, |
| { |
| "epoch": 3.8194444444444446, |
| "grad_norm": 0.09826593280022544, |
| "learning_rate": 1.2833732986248277e-05, |
| "loss": 0.3237, |
| "step": 1100 |
| }, |
| { |
| "epoch": 3.8229166666666665, |
| "grad_norm": 0.1080511751809344, |
| "learning_rate": 1.2762642870096377e-05, |
| "loss": 0.3281, |
| "step": 1101 |
| }, |
| { |
| "epoch": 3.826388888888889, |
| "grad_norm": 0.10191099405231756, |
| "learning_rate": 1.2691712803508307e-05, |
| "loss": 0.3246, |
| "step": 1102 |
| }, |
| { |
| "epoch": 3.829861111111111, |
| "grad_norm": 0.1182255215246636, |
| "learning_rate": 1.2620943203276527e-05, |
| "loss": 0.3297, |
| "step": 1103 |
| }, |
| { |
| "epoch": 3.8333333333333335, |
| "grad_norm": 0.11372702532966089, |
| "learning_rate": 1.2550334485250661e-05, |
| "loss": 0.321, |
| "step": 1104 |
| }, |
| { |
| "epoch": 3.8368055555555554, |
| "grad_norm": 0.11153225684328712, |
| "learning_rate": 1.2479887064334904e-05, |
| "loss": 0.3247, |
| "step": 1105 |
| }, |
| { |
| "epoch": 3.8402777777777777, |
| "grad_norm": 0.10934410198524311, |
| "learning_rate": 1.24096013544857e-05, |
| "loss": 0.3179, |
| "step": 1106 |
| }, |
| { |
| "epoch": 3.84375, |
| "grad_norm": 0.11047573559225156, |
| "learning_rate": 1.233947776870923e-05, |
| "loss": 0.3237, |
| "step": 1107 |
| }, |
| { |
| "epoch": 3.8472222222222223, |
| "grad_norm": 0.10334708781569232, |
| "learning_rate": 1.2269516719059041e-05, |
| "loss": 0.3286, |
| "step": 1108 |
| }, |
| { |
| "epoch": 3.8506944444444446, |
| "grad_norm": 0.0997713836270273, |
| "learning_rate": 1.2199718616633574e-05, |
| "loss": 0.3323, |
| "step": 1109 |
| }, |
| { |
| "epoch": 3.8541666666666665, |
| "grad_norm": 0.10598909183310787, |
| "learning_rate": 1.2130083871573812e-05, |
| "loss": 0.3294, |
| "step": 1110 |
| }, |
| { |
| "epoch": 3.857638888888889, |
| "grad_norm": 0.1019537717012242, |
| "learning_rate": 1.2060612893060788e-05, |
| "loss": 0.3309, |
| "step": 1111 |
| }, |
| { |
| "epoch": 3.861111111111111, |
| "grad_norm": 0.10394292731566264, |
| "learning_rate": 1.1991306089313261e-05, |
| "loss": 0.3286, |
| "step": 1112 |
| }, |
| { |
| "epoch": 3.8645833333333335, |
| "grad_norm": 0.10505701208953087, |
| "learning_rate": 1.1922163867585268e-05, |
| "loss": 0.3271, |
| "step": 1113 |
| }, |
| { |
| "epoch": 3.8680555555555554, |
| "grad_norm": 0.11019283466687356, |
| "learning_rate": 1.1853186634163766e-05, |
| "loss": 0.3203, |
| "step": 1114 |
| }, |
| { |
| "epoch": 3.8715277777777777, |
| "grad_norm": 0.11259235512564435, |
| "learning_rate": 1.1784374794366177e-05, |
| "loss": 0.3283, |
| "step": 1115 |
| }, |
| { |
| "epoch": 3.875, |
| "grad_norm": 0.10395515531494519, |
| "learning_rate": 1.1715728752538103e-05, |
| "loss": 0.3301, |
| "step": 1116 |
| }, |
| { |
| "epoch": 3.8784722222222223, |
| "grad_norm": 0.09840864009316534, |
| "learning_rate": 1.1647248912050863e-05, |
| "loss": 0.3293, |
| "step": 1117 |
| }, |
| { |
| "epoch": 3.8819444444444446, |
| "grad_norm": 0.11150628322480051, |
| "learning_rate": 1.1578935675299166e-05, |
| "loss": 0.3218, |
| "step": 1118 |
| }, |
| { |
| "epoch": 3.8854166666666665, |
| "grad_norm": 0.09778565099255097, |
| "learning_rate": 1.1510789443698772e-05, |
| "loss": 0.3248, |
| "step": 1119 |
| }, |
| { |
| "epoch": 3.888888888888889, |
| "grad_norm": 0.10078294206883251, |
| "learning_rate": 1.1442810617684046e-05, |
| "loss": 0.3232, |
| "step": 1120 |
| }, |
| { |
| "epoch": 3.892361111111111, |
| "grad_norm": 0.10514241635853229, |
| "learning_rate": 1.1374999596705707e-05, |
| "loss": 0.3251, |
| "step": 1121 |
| }, |
| { |
| "epoch": 3.8958333333333335, |
| "grad_norm": 0.09516414758527439, |
| "learning_rate": 1.130735677922842e-05, |
| "loss": 0.3244, |
| "step": 1122 |
| }, |
| { |
| "epoch": 3.8993055555555554, |
| "grad_norm": 0.09425427526229069, |
| "learning_rate": 1.1239882562728476e-05, |
| "loss": 0.3278, |
| "step": 1123 |
| }, |
| { |
| "epoch": 3.9027777777777777, |
| "grad_norm": 0.10027536579030943, |
| "learning_rate": 1.1172577343691415e-05, |
| "loss": 0.3229, |
| "step": 1124 |
| }, |
| { |
| "epoch": 3.90625, |
| "grad_norm": 0.09683190273273522, |
| "learning_rate": 1.110544151760978e-05, |
| "loss": 0.3298, |
| "step": 1125 |
| }, |
| { |
| "epoch": 3.9097222222222223, |
| "grad_norm": 0.0967610942668413, |
| "learning_rate": 1.1038475478980697e-05, |
| "loss": 0.3279, |
| "step": 1126 |
| }, |
| { |
| "epoch": 3.9131944444444446, |
| "grad_norm": 0.10330176910362741, |
| "learning_rate": 1.0971679621303642e-05, |
| "loss": 0.3317, |
| "step": 1127 |
| }, |
| { |
| "epoch": 3.9166666666666665, |
| "grad_norm": 0.09289793987187862, |
| "learning_rate": 1.0905054337078051e-05, |
| "loss": 0.3227, |
| "step": 1128 |
| }, |
| { |
| "epoch": 3.920138888888889, |
| "grad_norm": 0.09497389173764989, |
| "learning_rate": 1.08386000178011e-05, |
| "loss": 0.3226, |
| "step": 1129 |
| }, |
| { |
| "epoch": 3.923611111111111, |
| "grad_norm": 0.0895936439673318, |
| "learning_rate": 1.0772317053965304e-05, |
| "loss": 0.3262, |
| "step": 1130 |
| }, |
| { |
| "epoch": 3.9270833333333335, |
| "grad_norm": 0.11102321960186208, |
| "learning_rate": 1.0706205835056326e-05, |
| "loss": 0.3263, |
| "step": 1131 |
| }, |
| { |
| "epoch": 3.9305555555555554, |
| "grad_norm": 0.10321047657838925, |
| "learning_rate": 1.0640266749550593e-05, |
| "loss": 0.3291, |
| "step": 1132 |
| }, |
| { |
| "epoch": 3.9340277777777777, |
| "grad_norm": 0.09582704106508376, |
| "learning_rate": 1.0574500184913083e-05, |
| "loss": 0.3264, |
| "step": 1133 |
| }, |
| { |
| "epoch": 3.9375, |
| "grad_norm": 0.10607451556807133, |
| "learning_rate": 1.0508906527595042e-05, |
| "loss": 0.3249, |
| "step": 1134 |
| }, |
| { |
| "epoch": 3.9409722222222223, |
| "grad_norm": 0.09752377721853296, |
| "learning_rate": 1.0443486163031644e-05, |
| "loss": 0.322, |
| "step": 1135 |
| }, |
| { |
| "epoch": 3.9444444444444446, |
| "grad_norm": 0.08985167666388755, |
| "learning_rate": 1.0378239475639823e-05, |
| "loss": 0.3312, |
| "step": 1136 |
| }, |
| { |
| "epoch": 3.9479166666666665, |
| "grad_norm": 0.09861848573864489, |
| "learning_rate": 1.0313166848815931e-05, |
| "loss": 0.3283, |
| "step": 1137 |
| }, |
| { |
| "epoch": 3.951388888888889, |
| "grad_norm": 0.09568519398223461, |
| "learning_rate": 1.0248268664933563e-05, |
| "loss": 0.3235, |
| "step": 1138 |
| }, |
| { |
| "epoch": 3.954861111111111, |
| "grad_norm": 0.09293210325829616, |
| "learning_rate": 1.018354530534122e-05, |
| "loss": 0.3233, |
| "step": 1139 |
| }, |
| { |
| "epoch": 3.9583333333333335, |
| "grad_norm": 0.09919535966598288, |
| "learning_rate": 1.0118997150360169e-05, |
| "loss": 0.3248, |
| "step": 1140 |
| }, |
| { |
| "epoch": 3.9618055555555554, |
| "grad_norm": 0.10003396984693862, |
| "learning_rate": 1.0054624579282107e-05, |
| "loss": 0.3258, |
| "step": 1141 |
| }, |
| { |
| "epoch": 3.9652777777777777, |
| "grad_norm": 0.09547811645073016, |
| "learning_rate": 9.990427970367032e-06, |
| "loss": 0.3248, |
| "step": 1142 |
| }, |
| { |
| "epoch": 3.96875, |
| "grad_norm": 0.09343511294470988, |
| "learning_rate": 9.92640770084091e-06, |
| "loss": 0.3228, |
| "step": 1143 |
| }, |
| { |
| "epoch": 3.9722222222222223, |
| "grad_norm": 0.09824876865049713, |
| "learning_rate": 9.862564146893571e-06, |
| "loss": 0.3261, |
| "step": 1144 |
| }, |
| { |
| "epoch": 3.9756944444444446, |
| "grad_norm": 0.09020643426664245, |
| "learning_rate": 9.798897683676425e-06, |
| "loss": 0.3206, |
| "step": 1145 |
| }, |
| { |
| "epoch": 3.9791666666666665, |
| "grad_norm": 0.09196067134166887, |
| "learning_rate": 9.735408685300287e-06, |
| "loss": 0.3287, |
| "step": 1146 |
| }, |
| { |
| "epoch": 3.982638888888889, |
| "grad_norm": 0.09624041193059374, |
| "learning_rate": 9.672097524833144e-06, |
| "loss": 0.3234, |
| "step": 1147 |
| }, |
| { |
| "epoch": 3.986111111111111, |
| "grad_norm": 0.09445572196365963, |
| "learning_rate": 9.60896457429803e-06, |
| "loss": 0.3269, |
| "step": 1148 |
| }, |
| { |
| "epoch": 3.9895833333333335, |
| "grad_norm": 0.09413435203202192, |
| "learning_rate": 9.546010204670759e-06, |
| "loss": 0.3249, |
| "step": 1149 |
| }, |
| { |
| "epoch": 3.9930555555555554, |
| "grad_norm": 0.09849551726546966, |
| "learning_rate": 9.4832347858778e-06, |
| "loss": 0.3327, |
| "step": 1150 |
| }, |
| { |
| "epoch": 3.9965277777777777, |
| "grad_norm": 0.10716050657493227, |
| "learning_rate": 9.420638686794104e-06, |
| "loss": 0.3332, |
| "step": 1151 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 0.14340310851517993, |
| "learning_rate": 9.358222275240884e-06, |
| "loss": 0.3105, |
| "step": 1152 |
| }, |
| { |
| "epoch": 4.003472222222222, |
| "grad_norm": 0.13131364336545615, |
| "learning_rate": 9.29598591798353e-06, |
| "loss": 0.3078, |
| "step": 1153 |
| }, |
| { |
| "epoch": 4.006944444444445, |
| "grad_norm": 0.11508669248540353, |
| "learning_rate": 9.233929980729406e-06, |
| "loss": 0.3034, |
| "step": 1154 |
| }, |
| { |
| "epoch": 4.010416666666667, |
| "grad_norm": 0.10786149626912109, |
| "learning_rate": 9.172054828125678e-06, |
| "loss": 0.304, |
| "step": 1155 |
| }, |
| { |
| "epoch": 4.013888888888889, |
| "grad_norm": 0.13206545124072844, |
| "learning_rate": 9.110360823757235e-06, |
| "loss": 0.3073, |
| "step": 1156 |
| }, |
| { |
| "epoch": 4.017361111111111, |
| "grad_norm": 0.13000704335108745, |
| "learning_rate": 9.048848330144517e-06, |
| "loss": 0.2984, |
| "step": 1157 |
| }, |
| { |
| "epoch": 4.020833333333333, |
| "grad_norm": 0.12136424684494163, |
| "learning_rate": 8.987517708741364e-06, |
| "loss": 0.3033, |
| "step": 1158 |
| }, |
| { |
| "epoch": 4.024305555555555, |
| "grad_norm": 0.1131951055285726, |
| "learning_rate": 8.926369319932955e-06, |
| "loss": 0.3038, |
| "step": 1159 |
| }, |
| { |
| "epoch": 4.027777777777778, |
| "grad_norm": 0.12276551490332963, |
| "learning_rate": 8.8654035230336e-06, |
| "loss": 0.3063, |
| "step": 1160 |
| }, |
| { |
| "epoch": 4.03125, |
| "grad_norm": 0.11606442023390179, |
| "learning_rate": 8.804620676284736e-06, |
| "loss": 0.3045, |
| "step": 1161 |
| }, |
| { |
| "epoch": 4.034722222222222, |
| "grad_norm": 0.11139314403259774, |
| "learning_rate": 8.74402113685271e-06, |
| "loss": 0.3007, |
| "step": 1162 |
| }, |
| { |
| "epoch": 4.038194444444445, |
| "grad_norm": 0.1065337658272395, |
| "learning_rate": 8.683605260826792e-06, |
| "loss": 0.3072, |
| "step": 1163 |
| }, |
| { |
| "epoch": 4.041666666666667, |
| "grad_norm": 0.10443063897412685, |
| "learning_rate": 8.623373403216972e-06, |
| "loss": 0.3046, |
| "step": 1164 |
| }, |
| { |
| "epoch": 4.045138888888889, |
| "grad_norm": 0.11050778057057117, |
| "learning_rate": 8.56332591795197e-06, |
| "loss": 0.3108, |
| "step": 1165 |
| }, |
| { |
| "epoch": 4.048611111111111, |
| "grad_norm": 0.10107821431355542, |
| "learning_rate": 8.503463157877112e-06, |
| "loss": 0.3041, |
| "step": 1166 |
| }, |
| { |
| "epoch": 4.052083333333333, |
| "grad_norm": 0.11491987551631903, |
| "learning_rate": 8.44378547475222e-06, |
| "loss": 0.3076, |
| "step": 1167 |
| }, |
| { |
| "epoch": 4.055555555555555, |
| "grad_norm": 0.10884189411682854, |
| "learning_rate": 8.384293219249633e-06, |
| "loss": 0.3095, |
| "step": 1168 |
| }, |
| { |
| "epoch": 4.059027777777778, |
| "grad_norm": 0.09910051377670472, |
| "learning_rate": 8.324986740952061e-06, |
| "loss": 0.3068, |
| "step": 1169 |
| }, |
| { |
| "epoch": 4.0625, |
| "grad_norm": 0.09641277889877216, |
| "learning_rate": 8.265866388350598e-06, |
| "loss": 0.305, |
| "step": 1170 |
| }, |
| { |
| "epoch": 4.065972222222222, |
| "grad_norm": 0.09639148768059384, |
| "learning_rate": 8.206932508842617e-06, |
| "loss": 0.3078, |
| "step": 1171 |
| }, |
| { |
| "epoch": 4.069444444444445, |
| "grad_norm": 0.09590017094288145, |
| "learning_rate": 8.148185448729778e-06, |
| "loss": 0.3048, |
| "step": 1172 |
| }, |
| { |
| "epoch": 4.072916666666667, |
| "grad_norm": 0.1031626172943818, |
| "learning_rate": 8.089625553215947e-06, |
| "loss": 0.3072, |
| "step": 1173 |
| }, |
| { |
| "epoch": 4.076388888888889, |
| "grad_norm": 0.09505984395972193, |
| "learning_rate": 8.031253166405223e-06, |
| "loss": 0.3067, |
| "step": 1174 |
| }, |
| { |
| "epoch": 4.079861111111111, |
| "grad_norm": 0.09796967816252462, |
| "learning_rate": 7.973068631299848e-06, |
| "loss": 0.3049, |
| "step": 1175 |
| }, |
| { |
| "epoch": 4.083333333333333, |
| "grad_norm": 0.10038582605001176, |
| "learning_rate": 7.915072289798247e-06, |
| "loss": 0.31, |
| "step": 1176 |
| }, |
| { |
| "epoch": 4.086805555555555, |
| "grad_norm": 0.09456176428791851, |
| "learning_rate": 7.857264482693007e-06, |
| "loss": 0.301, |
| "step": 1177 |
| }, |
| { |
| "epoch": 4.090277777777778, |
| "grad_norm": 0.10194949961977534, |
| "learning_rate": 7.799645549668869e-06, |
| "loss": 0.3044, |
| "step": 1178 |
| }, |
| { |
| "epoch": 4.09375, |
| "grad_norm": 0.10057820263217915, |
| "learning_rate": 7.742215829300695e-06, |
| "loss": 0.306, |
| "step": 1179 |
| }, |
| { |
| "epoch": 4.097222222222222, |
| "grad_norm": 0.09003366079286232, |
| "learning_rate": 7.684975659051557e-06, |
| "loss": 0.3068, |
| "step": 1180 |
| }, |
| { |
| "epoch": 4.100694444444445, |
| "grad_norm": 0.10159173660529588, |
| "learning_rate": 7.627925375270684e-06, |
| "loss": 0.3079, |
| "step": 1181 |
| }, |
| { |
| "epoch": 4.104166666666667, |
| "grad_norm": 0.09248887797460335, |
| "learning_rate": 7.5710653131915125e-06, |
| "loss": 0.3056, |
| "step": 1182 |
| }, |
| { |
| "epoch": 4.107638888888889, |
| "grad_norm": 0.09784437202252298, |
| "learning_rate": 7.514395806929742e-06, |
| "loss": 0.3069, |
| "step": 1183 |
| }, |
| { |
| "epoch": 4.111111111111111, |
| "grad_norm": 0.10274066112669682, |
| "learning_rate": 7.457917189481301e-06, |
| "loss": 0.3053, |
| "step": 1184 |
| }, |
| { |
| "epoch": 4.114583333333333, |
| "grad_norm": 0.08905635702892338, |
| "learning_rate": 7.401629792720495e-06, |
| "loss": 0.3028, |
| "step": 1185 |
| }, |
| { |
| "epoch": 4.118055555555555, |
| "grad_norm": 0.0932267327763702, |
| "learning_rate": 7.345533947397933e-06, |
| "loss": 0.3053, |
| "step": 1186 |
| }, |
| { |
| "epoch": 4.121527777777778, |
| "grad_norm": 0.09571394607798979, |
| "learning_rate": 7.289629983138691e-06, |
| "loss": 0.305, |
| "step": 1187 |
| }, |
| { |
| "epoch": 4.125, |
| "grad_norm": 0.09484033625260786, |
| "learning_rate": 7.233918228440324e-06, |
| "loss": 0.3033, |
| "step": 1188 |
| }, |
| { |
| "epoch": 4.128472222222222, |
| "grad_norm": 0.09866883982672792, |
| "learning_rate": 7.1783990106709485e-06, |
| "loss": 0.3043, |
| "step": 1189 |
| }, |
| { |
| "epoch": 4.131944444444445, |
| "grad_norm": 0.09157610827195094, |
| "learning_rate": 7.123072656067278e-06, |
| "loss": 0.3022, |
| "step": 1190 |
| }, |
| { |
| "epoch": 4.135416666666667, |
| "grad_norm": 0.08992237518920917, |
| "learning_rate": 7.067939489732794e-06, |
| "loss": 0.3056, |
| "step": 1191 |
| }, |
| { |
| "epoch": 4.138888888888889, |
| "grad_norm": 0.08399036043953488, |
| "learning_rate": 7.0129998356357295e-06, |
| "loss": 0.2967, |
| "step": 1192 |
| }, |
| { |
| "epoch": 4.142361111111111, |
| "grad_norm": 0.09226206512891923, |
| "learning_rate": 6.958254016607275e-06, |
| "loss": 0.3004, |
| "step": 1193 |
| }, |
| { |
| "epoch": 4.145833333333333, |
| "grad_norm": 0.09669707681367447, |
| "learning_rate": 6.903702354339578e-06, |
| "loss": 0.3008, |
| "step": 1194 |
| }, |
| { |
| "epoch": 4.149305555555555, |
| "grad_norm": 0.08590239014832185, |
| "learning_rate": 6.849345169383941e-06, |
| "loss": 0.3076, |
| "step": 1195 |
| }, |
| { |
| "epoch": 4.152777777777778, |
| "grad_norm": 0.09036361233423511, |
| "learning_rate": 6.795182781148848e-06, |
| "loss": 0.3074, |
| "step": 1196 |
| }, |
| { |
| "epoch": 4.15625, |
| "grad_norm": 0.09500461151290321, |
| "learning_rate": 6.7412155078981865e-06, |
| "loss": 0.3017, |
| "step": 1197 |
| }, |
| { |
| "epoch": 4.159722222222222, |
| "grad_norm": 0.08791034639868246, |
| "learning_rate": 6.687443666749316e-06, |
| "loss": 0.3071, |
| "step": 1198 |
| }, |
| { |
| "epoch": 4.163194444444445, |
| "grad_norm": 0.08752933631983714, |
| "learning_rate": 6.633867573671185e-06, |
| "loss": 0.3015, |
| "step": 1199 |
| }, |
| { |
| "epoch": 4.166666666666667, |
| "grad_norm": 0.08896289666476007, |
| "learning_rate": 6.58048754348255e-06, |
| "loss": 0.3036, |
| "step": 1200 |
| }, |
| { |
| "epoch": 4.170138888888889, |
| "grad_norm": 0.08717440264167338, |
| "learning_rate": 6.527303889850038e-06, |
| "loss": 0.3075, |
| "step": 1201 |
| }, |
| { |
| "epoch": 4.173611111111111, |
| "grad_norm": 0.09311114762510445, |
| "learning_rate": 6.474316925286391e-06, |
| "loss": 0.3064, |
| "step": 1202 |
| }, |
| { |
| "epoch": 4.177083333333333, |
| "grad_norm": 0.09054319998608429, |
| "learning_rate": 6.421526961148545e-06, |
| "loss": 0.307, |
| "step": 1203 |
| }, |
| { |
| "epoch": 4.180555555555555, |
| "grad_norm": 0.08473005230511284, |
| "learning_rate": 6.368934307635881e-06, |
| "loss": 0.3018, |
| "step": 1204 |
| }, |
| { |
| "epoch": 4.184027777777778, |
| "grad_norm": 0.0906747780665984, |
| "learning_rate": 6.316539273788316e-06, |
| "loss": 0.3049, |
| "step": 1205 |
| }, |
| { |
| "epoch": 4.1875, |
| "grad_norm": 0.09376500153252898, |
| "learning_rate": 6.26434216748458e-06, |
| "loss": 0.309, |
| "step": 1206 |
| }, |
| { |
| "epoch": 4.190972222222222, |
| "grad_norm": 0.09594364360393172, |
| "learning_rate": 6.2123432954403155e-06, |
| "loss": 0.3046, |
| "step": 1207 |
| }, |
| { |
| "epoch": 4.194444444444445, |
| "grad_norm": 0.0821759111515786, |
| "learning_rate": 6.160542963206357e-06, |
| "loss": 0.2996, |
| "step": 1208 |
| }, |
| { |
| "epoch": 4.197916666666667, |
| "grad_norm": 0.08749906272107089, |
| "learning_rate": 6.108941475166879e-06, |
| "loss": 0.3079, |
| "step": 1209 |
| }, |
| { |
| "epoch": 4.201388888888889, |
| "grad_norm": 0.09555774118608272, |
| "learning_rate": 6.057539134537642e-06, |
| "loss": 0.3087, |
| "step": 1210 |
| }, |
| { |
| "epoch": 4.204861111111111, |
| "grad_norm": 0.08402200455892112, |
| "learning_rate": 6.006336243364161e-06, |
| "loss": 0.3047, |
| "step": 1211 |
| }, |
| { |
| "epoch": 4.208333333333333, |
| "grad_norm": 0.08995895348715435, |
| "learning_rate": 5.955333102520011e-06, |
| "loss": 0.3058, |
| "step": 1212 |
| }, |
| { |
| "epoch": 4.211805555555555, |
| "grad_norm": 0.087339945882205, |
| "learning_rate": 5.904530011704977e-06, |
| "loss": 0.306, |
| "step": 1213 |
| }, |
| { |
| "epoch": 4.215277777777778, |
| "grad_norm": 0.08520619113415061, |
| "learning_rate": 5.853927269443351e-06, |
| "loss": 0.3036, |
| "step": 1214 |
| }, |
| { |
| "epoch": 4.21875, |
| "grad_norm": 0.09260547720974625, |
| "learning_rate": 5.803525173082145e-06, |
| "loss": 0.3122, |
| "step": 1215 |
| }, |
| { |
| "epoch": 4.222222222222222, |
| "grad_norm": 0.09653805351478427, |
| "learning_rate": 5.753324018789346e-06, |
| "loss": 0.3001, |
| "step": 1216 |
| }, |
| { |
| "epoch": 4.225694444444445, |
| "grad_norm": 0.09089979862793579, |
| "learning_rate": 5.703324101552215e-06, |
| "loss": 0.3081, |
| "step": 1217 |
| }, |
| { |
| "epoch": 4.229166666666667, |
| "grad_norm": 0.09407662119818534, |
| "learning_rate": 5.653525715175483e-06, |
| "loss": 0.305, |
| "step": 1218 |
| }, |
| { |
| "epoch": 4.232638888888889, |
| "grad_norm": 0.08882380776423075, |
| "learning_rate": 5.6039291522796925e-06, |
| "loss": 0.3094, |
| "step": 1219 |
| }, |
| { |
| "epoch": 4.236111111111111, |
| "grad_norm": 0.08654673046171545, |
| "learning_rate": 5.554534704299448e-06, |
| "loss": 0.3017, |
| "step": 1220 |
| }, |
| { |
| "epoch": 4.239583333333333, |
| "grad_norm": 0.09564054745602778, |
| "learning_rate": 5.5053426614817094e-06, |
| "loss": 0.3064, |
| "step": 1221 |
| }, |
| { |
| "epoch": 4.243055555555555, |
| "grad_norm": 0.08742594086106721, |
| "learning_rate": 5.456353312884051e-06, |
| "loss": 0.3054, |
| "step": 1222 |
| }, |
| { |
| "epoch": 4.246527777777778, |
| "grad_norm": 0.08777112302991236, |
| "learning_rate": 5.407566946373037e-06, |
| "loss": 0.3033, |
| "step": 1223 |
| }, |
| { |
| "epoch": 4.25, |
| "grad_norm": 0.08507735550646113, |
| "learning_rate": 5.358983848622452e-06, |
| "loss": 0.302, |
| "step": 1224 |
| }, |
| { |
| "epoch": 4.253472222222222, |
| "grad_norm": 0.09199965080958919, |
| "learning_rate": 5.310604305111686e-06, |
| "loss": 0.3093, |
| "step": 1225 |
| }, |
| { |
| "epoch": 4.256944444444445, |
| "grad_norm": 0.09239775881843512, |
| "learning_rate": 5.262428600123981e-06, |
| "loss": 0.3092, |
| "step": 1226 |
| }, |
| { |
| "epoch": 4.260416666666667, |
| "grad_norm": 0.08274177231578059, |
| "learning_rate": 5.2144570167448475e-06, |
| "loss": 0.307, |
| "step": 1227 |
| }, |
| { |
| "epoch": 4.263888888888889, |
| "grad_norm": 0.08396101555744864, |
| "learning_rate": 5.1666898368603195e-06, |
| "loss": 0.3032, |
| "step": 1228 |
| }, |
| { |
| "epoch": 4.267361111111111, |
| "grad_norm": 0.08953375546952966, |
| "learning_rate": 5.119127341155365e-06, |
| "loss": 0.3047, |
| "step": 1229 |
| }, |
| { |
| "epoch": 4.270833333333333, |
| "grad_norm": 0.09007507657663101, |
| "learning_rate": 5.07176980911217e-06, |
| "loss": 0.3047, |
| "step": 1230 |
| }, |
| { |
| "epoch": 4.274305555555555, |
| "grad_norm": 0.08417185133114392, |
| "learning_rate": 5.024617519008574e-06, |
| "loss": 0.3024, |
| "step": 1231 |
| }, |
| { |
| "epoch": 4.277777777777778, |
| "grad_norm": 0.08609345920977819, |
| "learning_rate": 4.97767074791637e-06, |
| "loss": 0.3064, |
| "step": 1232 |
| }, |
| { |
| "epoch": 4.28125, |
| "grad_norm": 0.08472912916639756, |
| "learning_rate": 4.930929771699693e-06, |
| "loss": 0.3092, |
| "step": 1233 |
| }, |
| { |
| "epoch": 4.284722222222222, |
| "grad_norm": 0.08804997352238675, |
| "learning_rate": 4.8843948650134285e-06, |
| "loss": 0.299, |
| "step": 1234 |
| }, |
| { |
| "epoch": 4.288194444444445, |
| "grad_norm": 0.08479705600440504, |
| "learning_rate": 4.838066301301547e-06, |
| "loss": 0.3062, |
| "step": 1235 |
| }, |
| { |
| "epoch": 4.291666666666667, |
| "grad_norm": 0.08081270894823607, |
| "learning_rate": 4.791944352795561e-06, |
| "loss": 0.3062, |
| "step": 1236 |
| }, |
| { |
| "epoch": 4.295138888888889, |
| "grad_norm": 0.08287377233415162, |
| "learning_rate": 4.746029290512852e-06, |
| "loss": 0.3031, |
| "step": 1237 |
| }, |
| { |
| "epoch": 4.298611111111111, |
| "grad_norm": 0.08775456916989416, |
| "learning_rate": 4.700321384255158e-06, |
| "loss": 0.3019, |
| "step": 1238 |
| }, |
| { |
| "epoch": 4.302083333333333, |
| "grad_norm": 0.0872987560615773, |
| "learning_rate": 4.654820902606898e-06, |
| "loss": 0.3051, |
| "step": 1239 |
| }, |
| { |
| "epoch": 4.305555555555555, |
| "grad_norm": 0.08121221496704681, |
| "learning_rate": 4.609528112933688e-06, |
| "loss": 0.3111, |
| "step": 1240 |
| }, |
| { |
| "epoch": 4.309027777777778, |
| "grad_norm": 0.08383067090092823, |
| "learning_rate": 4.564443281380708e-06, |
| "loss": 0.3079, |
| "step": 1241 |
| }, |
| { |
| "epoch": 4.3125, |
| "grad_norm": 0.08586819103118022, |
| "learning_rate": 4.519566672871132e-06, |
| "loss": 0.3072, |
| "step": 1242 |
| }, |
| { |
| "epoch": 4.315972222222222, |
| "grad_norm": 0.08230433839566585, |
| "learning_rate": 4.474898551104625e-06, |
| "loss": 0.3077, |
| "step": 1243 |
| }, |
| { |
| "epoch": 4.319444444444445, |
| "grad_norm": 0.08210657080061083, |
| "learning_rate": 4.430439178555759e-06, |
| "loss": 0.3033, |
| "step": 1244 |
| }, |
| { |
| "epoch": 4.322916666666667, |
| "grad_norm": 0.08729196012855645, |
| "learning_rate": 4.386188816472441e-06, |
| "loss": 0.3111, |
| "step": 1245 |
| }, |
| { |
| "epoch": 4.326388888888889, |
| "grad_norm": 0.08240213073346828, |
| "learning_rate": 4.342147724874459e-06, |
| "loss": 0.3088, |
| "step": 1246 |
| }, |
| { |
| "epoch": 4.329861111111111, |
| "grad_norm": 0.0794807984870581, |
| "learning_rate": 4.29831616255187e-06, |
| "loss": 0.3033, |
| "step": 1247 |
| }, |
| { |
| "epoch": 4.333333333333333, |
| "grad_norm": 0.08829701160660025, |
| "learning_rate": 4.254694387063514e-06, |
| "loss": 0.3075, |
| "step": 1248 |
| }, |
| { |
| "epoch": 4.336805555555555, |
| "grad_norm": 0.08044160076356655, |
| "learning_rate": 4.2112826547355335e-06, |
| "loss": 0.3064, |
| "step": 1249 |
| }, |
| { |
| "epoch": 4.340277777777778, |
| "grad_norm": 0.08636152108755218, |
| "learning_rate": 4.168081220659796e-06, |
| "loss": 0.305, |
| "step": 1250 |
| }, |
| { |
| "epoch": 4.34375, |
| "grad_norm": 0.08091856000413852, |
| "learning_rate": 4.12509033869247e-06, |
| "loss": 0.3038, |
| "step": 1251 |
| }, |
| { |
| "epoch": 4.347222222222222, |
| "grad_norm": 0.08354541659416939, |
| "learning_rate": 4.082310261452471e-06, |
| "loss": 0.3083, |
| "step": 1252 |
| }, |
| { |
| "epoch": 4.350694444444445, |
| "grad_norm": 0.08171670419800484, |
| "learning_rate": 4.039741240320028e-06, |
| "loss": 0.3015, |
| "step": 1253 |
| }, |
| { |
| "epoch": 4.354166666666667, |
| "grad_norm": 0.08148652161118189, |
| "learning_rate": 3.997383525435154e-06, |
| "loss": 0.3063, |
| "step": 1254 |
| }, |
| { |
| "epoch": 4.357638888888889, |
| "grad_norm": 0.08161676617579289, |
| "learning_rate": 3.9552373656962295e-06, |
| "loss": 0.3052, |
| "step": 1255 |
| }, |
| { |
| "epoch": 4.361111111111111, |
| "grad_norm": 0.07993172768627148, |
| "learning_rate": 3.913303008758491e-06, |
| "loss": 0.3058, |
| "step": 1256 |
| }, |
| { |
| "epoch": 4.364583333333333, |
| "grad_norm": 0.08226260042852836, |
| "learning_rate": 3.871580701032631e-06, |
| "loss": 0.3048, |
| "step": 1257 |
| }, |
| { |
| "epoch": 4.368055555555555, |
| "grad_norm": 0.07963967554779505, |
| "learning_rate": 3.830070687683285e-06, |
| "loss": 0.3063, |
| "step": 1258 |
| }, |
| { |
| "epoch": 4.371527777777778, |
| "grad_norm": 0.08031725225973586, |
| "learning_rate": 3.78877321262765e-06, |
| "loss": 0.3072, |
| "step": 1259 |
| }, |
| { |
| "epoch": 4.375, |
| "grad_norm": 0.07809304104102638, |
| "learning_rate": 3.747688518534003e-06, |
| "loss": 0.3023, |
| "step": 1260 |
| }, |
| { |
| "epoch": 4.378472222222222, |
| "grad_norm": 0.07830478547816339, |
| "learning_rate": 3.706816846820327e-06, |
| "loss": 0.3016, |
| "step": 1261 |
| }, |
| { |
| "epoch": 4.381944444444445, |
| "grad_norm": 0.08141185465094594, |
| "learning_rate": 3.666158437652829e-06, |
| "loss": 0.3072, |
| "step": 1262 |
| }, |
| { |
| "epoch": 4.385416666666667, |
| "grad_norm": 0.08225321441942754, |
| "learning_rate": 3.6257135299445943e-06, |
| "loss": 0.3141, |
| "step": 1263 |
| }, |
| { |
| "epoch": 4.388888888888889, |
| "grad_norm": 0.0816264618707245, |
| "learning_rate": 3.585482361354138e-06, |
| "loss": 0.3058, |
| "step": 1264 |
| }, |
| { |
| "epoch": 4.392361111111111, |
| "grad_norm": 0.08196549798085448, |
| "learning_rate": 3.545465168284006e-06, |
| "loss": 0.3055, |
| "step": 1265 |
| }, |
| { |
| "epoch": 4.395833333333333, |
| "grad_norm": 0.07849614984159622, |
| "learning_rate": 3.5056621858794393e-06, |
| "loss": 0.3051, |
| "step": 1266 |
| }, |
| { |
| "epoch": 4.399305555555555, |
| "grad_norm": 0.0819544023451366, |
| "learning_rate": 3.4660736480269084e-06, |
| "loss": 0.3079, |
| "step": 1267 |
| }, |
| { |
| "epoch": 4.402777777777778, |
| "grad_norm": 0.08201178776199941, |
| "learning_rate": 3.42669978735283e-06, |
| "loss": 0.3066, |
| "step": 1268 |
| }, |
| { |
| "epoch": 4.40625, |
| "grad_norm": 0.07898815368040525, |
| "learning_rate": 3.3875408352221164e-06, |
| "loss": 0.3015, |
| "step": 1269 |
| }, |
| { |
| "epoch": 4.409722222222222, |
| "grad_norm": 0.08124064277394925, |
| "learning_rate": 3.348597021736888e-06, |
| "loss": 0.3112, |
| "step": 1270 |
| }, |
| { |
| "epoch": 4.413194444444445, |
| "grad_norm": 0.0780944772652276, |
| "learning_rate": 3.309868575735058e-06, |
| "loss": 0.3081, |
| "step": 1271 |
| }, |
| { |
| "epoch": 4.416666666666667, |
| "grad_norm": 0.07799431800188858, |
| "learning_rate": 3.2713557247890447e-06, |
| "loss": 0.3084, |
| "step": 1272 |
| }, |
| { |
| "epoch": 4.420138888888889, |
| "grad_norm": 0.08246204656448664, |
| "learning_rate": 3.233058695204383e-06, |
| "loss": 0.3016, |
| "step": 1273 |
| }, |
| { |
| "epoch": 4.423611111111111, |
| "grad_norm": 0.0801454285645143, |
| "learning_rate": 3.194977712018439e-06, |
| "loss": 0.3105, |
| "step": 1274 |
| }, |
| { |
| "epoch": 4.427083333333333, |
| "grad_norm": 0.07819837320085002, |
| "learning_rate": 3.157112998999057e-06, |
| "loss": 0.3052, |
| "step": 1275 |
| }, |
| { |
| "epoch": 4.430555555555555, |
| "grad_norm": 0.08098405872621355, |
| "learning_rate": 3.1194647786432663e-06, |
| "loss": 0.303, |
| "step": 1276 |
| }, |
| { |
| "epoch": 4.434027777777778, |
| "grad_norm": 0.08486769449790087, |
| "learning_rate": 3.082033272175933e-06, |
| "loss": 0.3102, |
| "step": 1277 |
| }, |
| { |
| "epoch": 4.4375, |
| "grad_norm": 0.07846939710904592, |
| "learning_rate": 3.0448186995485307e-06, |
| "loss": 0.3073, |
| "step": 1278 |
| }, |
| { |
| "epoch": 4.440972222222222, |
| "grad_norm": 0.07724147069385455, |
| "learning_rate": 3.0078212794377814e-06, |
| "loss": 0.3071, |
| "step": 1279 |
| }, |
| { |
| "epoch": 4.444444444444445, |
| "grad_norm": 0.07929202020664662, |
| "learning_rate": 2.9710412292443868e-06, |
| "loss": 0.3018, |
| "step": 1280 |
| }, |
| { |
| "epoch": 4.447916666666667, |
| "grad_norm": 0.07979841043601492, |
| "learning_rate": 2.934478765091795e-06, |
| "loss": 0.3055, |
| "step": 1281 |
| }, |
| { |
| "epoch": 4.451388888888889, |
| "grad_norm": 0.07774246164480418, |
| "learning_rate": 2.8981341018248587e-06, |
| "loss": 0.3046, |
| "step": 1282 |
| }, |
| { |
| "epoch": 4.454861111111111, |
| "grad_norm": 0.07712391520906098, |
| "learning_rate": 2.8620074530086373e-06, |
| "loss": 0.3064, |
| "step": 1283 |
| }, |
| { |
| "epoch": 4.458333333333333, |
| "grad_norm": 0.07713201817547355, |
| "learning_rate": 2.8260990309270987e-06, |
| "loss": 0.3077, |
| "step": 1284 |
| }, |
| { |
| "epoch": 4.461805555555555, |
| "grad_norm": 0.08201941158917679, |
| "learning_rate": 2.7904090465819036e-06, |
| "loss": 0.306, |
| "step": 1285 |
| }, |
| { |
| "epoch": 4.465277777777778, |
| "grad_norm": 0.07755953735549277, |
| "learning_rate": 2.7549377096911213e-06, |
| "loss": 0.3051, |
| "step": 1286 |
| }, |
| { |
| "epoch": 4.46875, |
| "grad_norm": 0.07859301016230703, |
| "learning_rate": 2.7196852286880624e-06, |
| "loss": 0.3009, |
| "step": 1287 |
| }, |
| { |
| "epoch": 4.472222222222222, |
| "grad_norm": 0.07681147137276423, |
| "learning_rate": 2.6846518107199782e-06, |
| "loss": 0.3014, |
| "step": 1288 |
| }, |
| { |
| "epoch": 4.475694444444445, |
| "grad_norm": 0.0788469179654231, |
| "learning_rate": 2.649837661646921e-06, |
| "loss": 0.3088, |
| "step": 1289 |
| }, |
| { |
| "epoch": 4.479166666666667, |
| "grad_norm": 0.0800162833065285, |
| "learning_rate": 2.6152429860404647e-06, |
| "loss": 0.3041, |
| "step": 1290 |
| }, |
| { |
| "epoch": 4.482638888888889, |
| "grad_norm": 0.07945724490526976, |
| "learning_rate": 2.580867987182556e-06, |
| "loss": 0.3026, |
| "step": 1291 |
| }, |
| { |
| "epoch": 4.486111111111111, |
| "grad_norm": 0.0793594687341575, |
| "learning_rate": 2.546712867064276e-06, |
| "loss": 0.3083, |
| "step": 1292 |
| }, |
| { |
| "epoch": 4.489583333333333, |
| "grad_norm": 0.07985397006865015, |
| "learning_rate": 2.512777826384709e-06, |
| "loss": 0.3007, |
| "step": 1293 |
| }, |
| { |
| "epoch": 4.493055555555555, |
| "grad_norm": 0.07759493413593757, |
| "learning_rate": 2.479063064549689e-06, |
| "loss": 0.3003, |
| "step": 1294 |
| }, |
| { |
| "epoch": 4.496527777777778, |
| "grad_norm": 0.07904821520349109, |
| "learning_rate": 2.4455687796706996e-06, |
| "loss": 0.3037, |
| "step": 1295 |
| }, |
| { |
| "epoch": 4.5, |
| "grad_norm": 0.07959351475882469, |
| "learning_rate": 2.4122951685636674e-06, |
| "loss": 0.3105, |
| "step": 1296 |
| }, |
| { |
| "epoch": 4.503472222222222, |
| "grad_norm": 0.08438189556943551, |
| "learning_rate": 2.3792424267478077e-06, |
| "loss": 0.3128, |
| "step": 1297 |
| }, |
| { |
| "epoch": 4.506944444444445, |
| "grad_norm": 0.0805267903725845, |
| "learning_rate": 2.34641074844451e-06, |
| "loss": 0.3076, |
| "step": 1298 |
| }, |
| { |
| "epoch": 4.510416666666667, |
| "grad_norm": 0.07648828401004298, |
| "learning_rate": 2.313800326576141e-06, |
| "loss": 0.3054, |
| "step": 1299 |
| }, |
| { |
| "epoch": 4.513888888888889, |
| "grad_norm": 0.07847170167124491, |
| "learning_rate": 2.281411352764966e-06, |
| "loss": 0.3043, |
| "step": 1300 |
| }, |
| { |
| "epoch": 4.517361111111111, |
| "grad_norm": 0.07964758385848214, |
| "learning_rate": 2.249244017331975e-06, |
| "loss": 0.3052, |
| "step": 1301 |
| }, |
| { |
| "epoch": 4.520833333333333, |
| "grad_norm": 0.08137734757681728, |
| "learning_rate": 2.217298509295813e-06, |
| "loss": 0.3101, |
| "step": 1302 |
| }, |
| { |
| "epoch": 4.524305555555555, |
| "grad_norm": 0.08059904880275212, |
| "learning_rate": 2.185575016371626e-06, |
| "loss": 0.3067, |
| "step": 1303 |
| }, |
| { |
| "epoch": 4.527777777777778, |
| "grad_norm": 0.07642771619974303, |
| "learning_rate": 2.1540737249699893e-06, |
| "loss": 0.3006, |
| "step": 1304 |
| }, |
| { |
| "epoch": 4.53125, |
| "grad_norm": 0.07450068026649213, |
| "learning_rate": 2.122794820195777e-06, |
| "loss": 0.3029, |
| "step": 1305 |
| }, |
| { |
| "epoch": 4.534722222222222, |
| "grad_norm": 0.07699770912588345, |
| "learning_rate": 2.0917384858471168e-06, |
| "loss": 0.3073, |
| "step": 1306 |
| }, |
| { |
| "epoch": 4.538194444444445, |
| "grad_norm": 0.08118194206437729, |
| "learning_rate": 2.0609049044142894e-06, |
| "loss": 0.3086, |
| "step": 1307 |
| }, |
| { |
| "epoch": 4.541666666666667, |
| "grad_norm": 0.07918192426609688, |
| "learning_rate": 2.0302942570786446e-06, |
| "loss": 0.3033, |
| "step": 1308 |
| }, |
| { |
| "epoch": 4.545138888888889, |
| "grad_norm": 0.0818161903856754, |
| "learning_rate": 1.999906723711549e-06, |
| "loss": 0.3091, |
| "step": 1309 |
| }, |
| { |
| "epoch": 4.548611111111111, |
| "grad_norm": 0.07712654079943236, |
| "learning_rate": 1.9697424828733423e-06, |
| "loss": 0.301, |
| "step": 1310 |
| }, |
| { |
| "epoch": 4.552083333333333, |
| "grad_norm": 0.07937817181604188, |
| "learning_rate": 1.9398017118122546e-06, |
| "loss": 0.3008, |
| "step": 1311 |
| }, |
| { |
| "epoch": 4.555555555555555, |
| "grad_norm": 0.07761097719890697, |
| "learning_rate": 1.9100845864633875e-06, |
| "loss": 0.3035, |
| "step": 1312 |
| }, |
| { |
| "epoch": 4.559027777777778, |
| "grad_norm": 0.08249170241628408, |
| "learning_rate": 1.880591281447699e-06, |
| "loss": 0.3077, |
| "step": 1313 |
| }, |
| { |
| "epoch": 4.5625, |
| "grad_norm": 0.07671567257184514, |
| "learning_rate": 1.8513219700709272e-06, |
| "loss": 0.3012, |
| "step": 1314 |
| }, |
| { |
| "epoch": 4.565972222222222, |
| "grad_norm": 0.08240625489775082, |
| "learning_rate": 1.8222768243226108e-06, |
| "loss": 0.3051, |
| "step": 1315 |
| }, |
| { |
| "epoch": 4.569444444444445, |
| "grad_norm": 0.08106392892833238, |
| "learning_rate": 1.793456014875079e-06, |
| "loss": 0.3027, |
| "step": 1316 |
| }, |
| { |
| "epoch": 4.572916666666667, |
| "grad_norm": 0.080852573793533, |
| "learning_rate": 1.7648597110824183e-06, |
| "loss": 0.3075, |
| "step": 1317 |
| }, |
| { |
| "epoch": 4.576388888888889, |
| "grad_norm": 0.07502019845627791, |
| "learning_rate": 1.7364880809795082e-06, |
| "loss": 0.3015, |
| "step": 1318 |
| }, |
| { |
| "epoch": 4.579861111111111, |
| "grad_norm": 0.07400493685151668, |
| "learning_rate": 1.708341291281026e-06, |
| "loss": 0.3009, |
| "step": 1319 |
| }, |
| { |
| "epoch": 4.583333333333333, |
| "grad_norm": 0.07449432188464176, |
| "learning_rate": 1.6804195073804442e-06, |
| "loss": 0.3059, |
| "step": 1320 |
| }, |
| { |
| "epoch": 4.586805555555555, |
| "grad_norm": 0.08268528004577931, |
| "learning_rate": 1.6527228933491012e-06, |
| "loss": 0.3076, |
| "step": 1321 |
| }, |
| { |
| "epoch": 4.590277777777778, |
| "grad_norm": 0.07316983791427785, |
| "learning_rate": 1.6252516119351947e-06, |
| "loss": 0.3039, |
| "step": 1322 |
| }, |
| { |
| "epoch": 4.59375, |
| "grad_norm": 0.08087200910739684, |
| "learning_rate": 1.598005824562856e-06, |
| "loss": 0.3064, |
| "step": 1323 |
| }, |
| { |
| "epoch": 4.597222222222222, |
| "grad_norm": 0.07713339500755885, |
| "learning_rate": 1.5709856913311795e-06, |
| "loss": 0.3063, |
| "step": 1324 |
| }, |
| { |
| "epoch": 4.600694444444445, |
| "grad_norm": 0.07639629736437101, |
| "learning_rate": 1.5441913710133106e-06, |
| "loss": 0.3113, |
| "step": 1325 |
| }, |
| { |
| "epoch": 4.604166666666667, |
| "grad_norm": 0.07622410815157274, |
| "learning_rate": 1.5176230210554744e-06, |
| "loss": 0.3095, |
| "step": 1326 |
| }, |
| { |
| "epoch": 4.607638888888889, |
| "grad_norm": 0.07402239150583342, |
| "learning_rate": 1.4912807975760734e-06, |
| "loss": 0.3001, |
| "step": 1327 |
| }, |
| { |
| "epoch": 4.611111111111111, |
| "grad_norm": 0.07586041151318136, |
| "learning_rate": 1.4651648553647869e-06, |
| "loss": 0.3049, |
| "step": 1328 |
| }, |
| { |
| "epoch": 4.614583333333333, |
| "grad_norm": 0.07577501190460577, |
| "learning_rate": 1.4392753478816145e-06, |
| "loss": 0.3092, |
| "step": 1329 |
| }, |
| { |
| "epoch": 4.618055555555555, |
| "grad_norm": 0.07339670766161828, |
| "learning_rate": 1.4136124272560259e-06, |
| "loss": 0.3056, |
| "step": 1330 |
| }, |
| { |
| "epoch": 4.621527777777778, |
| "grad_norm": 0.07347997250949742, |
| "learning_rate": 1.3881762442860124e-06, |
| "loss": 0.3063, |
| "step": 1331 |
| }, |
| { |
| "epoch": 4.625, |
| "grad_norm": 0.07682233056778245, |
| "learning_rate": 1.3629669484372722e-06, |
| "loss": 0.3087, |
| "step": 1332 |
| }, |
| { |
| "epoch": 4.628472222222222, |
| "grad_norm": 0.07453451880293445, |
| "learning_rate": 1.3379846878422487e-06, |
| "loss": 0.3057, |
| "step": 1333 |
| }, |
| { |
| "epoch": 4.631944444444445, |
| "grad_norm": 0.07745996539898557, |
| "learning_rate": 1.313229609299338e-06, |
| "loss": 0.3044, |
| "step": 1334 |
| }, |
| { |
| "epoch": 4.635416666666667, |
| "grad_norm": 0.0780732404982061, |
| "learning_rate": 1.2887018582719634e-06, |
| "loss": 0.3037, |
| "step": 1335 |
| }, |
| { |
| "epoch": 4.638888888888889, |
| "grad_norm": 0.07389341458338661, |
| "learning_rate": 1.2644015788877684e-06, |
| "loss": 0.3011, |
| "step": 1336 |
| }, |
| { |
| "epoch": 4.642361111111111, |
| "grad_norm": 0.07456241935811461, |
| "learning_rate": 1.2403289139377317e-06, |
| "loss": 0.3035, |
| "step": 1337 |
| }, |
| { |
| "epoch": 4.645833333333333, |
| "grad_norm": 0.07562411943342495, |
| "learning_rate": 1.2164840048753602e-06, |
| "loss": 0.3069, |
| "step": 1338 |
| }, |
| { |
| "epoch": 4.649305555555555, |
| "grad_norm": 0.07804683083088992, |
| "learning_rate": 1.1928669918158309e-06, |
| "loss": 0.3061, |
| "step": 1339 |
| }, |
| { |
| "epoch": 4.652777777777778, |
| "grad_norm": 0.07338410128940559, |
| "learning_rate": 1.1694780135352013e-06, |
| "loss": 0.3019, |
| "step": 1340 |
| }, |
| { |
| "epoch": 4.65625, |
| "grad_norm": 0.07497239513325749, |
| "learning_rate": 1.1463172074695428e-06, |
| "loss": 0.3049, |
| "step": 1341 |
| }, |
| { |
| "epoch": 4.659722222222222, |
| "grad_norm": 0.07652107630464167, |
| "learning_rate": 1.1233847097141858e-06, |
| "loss": 0.3009, |
| "step": 1342 |
| }, |
| { |
| "epoch": 4.663194444444445, |
| "grad_norm": 0.07466572845911, |
| "learning_rate": 1.1006806550228855e-06, |
| "loss": 0.305, |
| "step": 1343 |
| }, |
| { |
| "epoch": 4.666666666666667, |
| "grad_norm": 0.07949689532916121, |
| "learning_rate": 1.0782051768070477e-06, |
| "loss": 0.3106, |
| "step": 1344 |
| }, |
| { |
| "epoch": 4.670138888888889, |
| "grad_norm": 0.07716711613529656, |
| "learning_rate": 1.0559584071349405e-06, |
| "loss": 0.3067, |
| "step": 1345 |
| }, |
| { |
| "epoch": 4.673611111111111, |
| "grad_norm": 0.07714967482230316, |
| "learning_rate": 1.0339404767309014e-06, |
| "loss": 0.3033, |
| "step": 1346 |
| }, |
| { |
| "epoch": 4.677083333333333, |
| "grad_norm": 0.0747240286372511, |
| "learning_rate": 1.0121515149746108e-06, |
| "loss": 0.302, |
| "step": 1347 |
| }, |
| { |
| "epoch": 4.680555555555555, |
| "grad_norm": 0.07699672596500706, |
| "learning_rate": 9.905916499002787e-07, |
| "loss": 0.3075, |
| "step": 1348 |
| }, |
| { |
| "epoch": 4.684027777777778, |
| "grad_norm": 0.07462981706073524, |
| "learning_rate": 9.692610081959342e-07, |
| "loss": 0.3071, |
| "step": 1349 |
| }, |
| { |
| "epoch": 4.6875, |
| "grad_norm": 0.07440562255699622, |
| "learning_rate": 9.481597152026656e-07, |
| "loss": 0.3035, |
| "step": 1350 |
| }, |
| { |
| "epoch": 4.690972222222222, |
| "grad_norm": 0.0747750169854033, |
| "learning_rate": 9.272878949138798e-07, |
| "loss": 0.3026, |
| "step": 1351 |
| }, |
| { |
| "epoch": 4.694444444444445, |
| "grad_norm": 0.07517286545410393, |
| "learning_rate": 9.066456699745774e-07, |
| "loss": 0.2988, |
| "step": 1352 |
| }, |
| { |
| "epoch": 4.697916666666667, |
| "grad_norm": 0.07777257948685168, |
| "learning_rate": 8.862331616806385e-07, |
| "loss": 0.3025, |
| "step": 1353 |
| }, |
| { |
| "epoch": 4.701388888888889, |
| "grad_norm": 0.07679937737402819, |
| "learning_rate": 8.660504899780986e-07, |
| "loss": 0.3066, |
| "step": 1354 |
| }, |
| { |
| "epoch": 4.704861111111111, |
| "grad_norm": 0.07607956492556339, |
| "learning_rate": 8.460977734624509e-07, |
| "loss": 0.3035, |
| "step": 1355 |
| }, |
| { |
| "epoch": 4.708333333333333, |
| "grad_norm": 0.07534506879573015, |
| "learning_rate": 8.263751293779409e-07, |
| "loss": 0.3094, |
| "step": 1356 |
| }, |
| { |
| "epoch": 4.711805555555555, |
| "grad_norm": 0.07663473789080033, |
| "learning_rate": 8.068826736169e-07, |
| "loss": 0.3053, |
| "step": 1357 |
| }, |
| { |
| "epoch": 4.715277777777778, |
| "grad_norm": 0.07510530680390314, |
| "learning_rate": 7.876205207190391e-07, |
| "loss": 0.3092, |
| "step": 1358 |
| }, |
| { |
| "epoch": 4.71875, |
| "grad_norm": 0.07377768224328009, |
| "learning_rate": 7.685887838707828e-07, |
| "loss": 0.3031, |
| "step": 1359 |
| }, |
| { |
| "epoch": 4.722222222222222, |
| "grad_norm": 0.07504818836415791, |
| "learning_rate": 7.497875749046124e-07, |
| "loss": 0.3069, |
| "step": 1360 |
| }, |
| { |
| "epoch": 4.725694444444445, |
| "grad_norm": 0.07299334086816671, |
| "learning_rate": 7.312170042984035e-07, |
| "loss": 0.3021, |
| "step": 1361 |
| }, |
| { |
| "epoch": 4.729166666666667, |
| "grad_norm": 0.0779726563674373, |
| "learning_rate": 7.128771811747737e-07, |
| "loss": 0.3079, |
| "step": 1362 |
| }, |
| { |
| "epoch": 4.732638888888889, |
| "grad_norm": 0.0744531952974455, |
| "learning_rate": 6.947682133004386e-07, |
| "loss": 0.3057, |
| "step": 1363 |
| }, |
| { |
| "epoch": 4.736111111111111, |
| "grad_norm": 0.07704018762229559, |
| "learning_rate": 6.768902070856031e-07, |
| "loss": 0.3067, |
| "step": 1364 |
| }, |
| { |
| "epoch": 4.739583333333333, |
| "grad_norm": 0.07639788284252715, |
| "learning_rate": 6.592432675832916e-07, |
| "loss": 0.3114, |
| "step": 1365 |
| }, |
| { |
| "epoch": 4.743055555555555, |
| "grad_norm": 0.07596548343087427, |
| "learning_rate": 6.418274984887741e-07, |
| "loss": 0.299, |
| "step": 1366 |
| }, |
| { |
| "epoch": 4.746527777777778, |
| "grad_norm": 0.07579121693711281, |
| "learning_rate": 6.24643002138936e-07, |
| "loss": 0.3096, |
| "step": 1367 |
| }, |
| { |
| "epoch": 4.75, |
| "grad_norm": 0.07585599254548926, |
| "learning_rate": 6.076898795116792e-07, |
| "loss": 0.306, |
| "step": 1368 |
| }, |
| { |
| "epoch": 4.753472222222222, |
| "grad_norm": 0.07564652114058751, |
| "learning_rate": 5.909682302253217e-07, |
| "loss": 0.3053, |
| "step": 1369 |
| }, |
| { |
| "epoch": 4.756944444444445, |
| "grad_norm": 0.07414673161087322, |
| "learning_rate": 5.744781525380339e-07, |
| "loss": 0.3077, |
| "step": 1370 |
| }, |
| { |
| "epoch": 4.760416666666667, |
| "grad_norm": 0.07612522562021062, |
| "learning_rate": 5.582197433472348e-07, |
| "loss": 0.3056, |
| "step": 1371 |
| }, |
| { |
| "epoch": 4.763888888888889, |
| "grad_norm": 0.07684508645065607, |
| "learning_rate": 5.421930981890455e-07, |
| "loss": 0.3037, |
| "step": 1372 |
| }, |
| { |
| "epoch": 4.767361111111111, |
| "grad_norm": 0.07299978275560645, |
| "learning_rate": 5.263983112377036e-07, |
| "loss": 0.3051, |
| "step": 1373 |
| }, |
| { |
| "epoch": 4.770833333333333, |
| "grad_norm": 0.07539354501312999, |
| "learning_rate": 5.108354753050381e-07, |
| "loss": 0.3066, |
| "step": 1374 |
| }, |
| { |
| "epoch": 4.774305555555555, |
| "grad_norm": 0.07144155759475616, |
| "learning_rate": 4.955046818398979e-07, |
| "loss": 0.3046, |
| "step": 1375 |
| }, |
| { |
| "epoch": 4.777777777777778, |
| "grad_norm": 0.0750194771698725, |
| "learning_rate": 4.804060209276396e-07, |
| "loss": 0.3051, |
| "step": 1376 |
| }, |
| { |
| "epoch": 4.78125, |
| "grad_norm": 0.07386907645068111, |
| "learning_rate": 4.6553958128957355e-07, |
| "loss": 0.3051, |
| "step": 1377 |
| }, |
| { |
| "epoch": 4.784722222222222, |
| "grad_norm": 0.07300681819406184, |
| "learning_rate": 4.509054502824528e-07, |
| "loss": 0.3053, |
| "step": 1378 |
| }, |
| { |
| "epoch": 4.788194444444445, |
| "grad_norm": 0.07209552136082469, |
| "learning_rate": 4.365037138979622e-07, |
| "loss": 0.301, |
| "step": 1379 |
| }, |
| { |
| "epoch": 4.791666666666667, |
| "grad_norm": 0.07361568021097385, |
| "learning_rate": 4.223344567622212e-07, |
| "loss": 0.3016, |
| "step": 1380 |
| }, |
| { |
| "epoch": 4.795138888888889, |
| "grad_norm": 0.07498679679992676, |
| "learning_rate": 4.083977621352642e-07, |
| "loss": 0.3109, |
| "step": 1381 |
| }, |
| { |
| "epoch": 4.798611111111111, |
| "grad_norm": 0.07594221300650468, |
| "learning_rate": 3.946937119105654e-07, |
| "loss": 0.2995, |
| "step": 1382 |
| }, |
| { |
| "epoch": 4.802083333333333, |
| "grad_norm": 0.07394072454121102, |
| "learning_rate": 3.8122238661456814e-07, |
| "loss": 0.3024, |
| "step": 1383 |
| }, |
| { |
| "epoch": 4.805555555555555, |
| "grad_norm": 0.07353465846849089, |
| "learning_rate": 3.679838654061874e-07, |
| "loss": 0.3008, |
| "step": 1384 |
| }, |
| { |
| "epoch": 4.809027777777778, |
| "grad_norm": 0.0720259043424231, |
| "learning_rate": 3.5497822607636123e-07, |
| "loss": 0.3048, |
| "step": 1385 |
| }, |
| { |
| "epoch": 4.8125, |
| "grad_norm": 0.07294963832385294, |
| "learning_rate": 3.4220554504758475e-07, |
| "loss": 0.3084, |
| "step": 1386 |
| }, |
| { |
| "epoch": 4.815972222222222, |
| "grad_norm": 0.0726666221581321, |
| "learning_rate": 3.2966589737347457e-07, |
| "loss": 0.3086, |
| "step": 1387 |
| }, |
| { |
| "epoch": 4.819444444444445, |
| "grad_norm": 0.07483350159915376, |
| "learning_rate": 3.173593567383071e-07, |
| "loss": 0.3003, |
| "step": 1388 |
| }, |
| { |
| "epoch": 4.822916666666667, |
| "grad_norm": 0.07322136882905836, |
| "learning_rate": 3.0528599545661453e-07, |
| "loss": 0.3039, |
| "step": 1389 |
| }, |
| { |
| "epoch": 4.826388888888889, |
| "grad_norm": 0.07073641232974301, |
| "learning_rate": 2.9344588447272726e-07, |
| "loss": 0.3033, |
| "step": 1390 |
| }, |
| { |
| "epoch": 4.829861111111111, |
| "grad_norm": 0.07470320302872045, |
| "learning_rate": 2.818390933603743e-07, |
| "loss": 0.3005, |
| "step": 1391 |
| }, |
| { |
| "epoch": 4.833333333333333, |
| "grad_norm": 0.07324094393948057, |
| "learning_rate": 2.704656903222791e-07, |
| "loss": 0.3056, |
| "step": 1392 |
| }, |
| { |
| "epoch": 4.836805555555555, |
| "grad_norm": 0.07274475401225772, |
| "learning_rate": 2.5932574218975104e-07, |
| "loss": 0.305, |
| "step": 1393 |
| }, |
| { |
| "epoch": 4.840277777777778, |
| "grad_norm": 0.07473214948879901, |
| "learning_rate": 2.484193144222946e-07, |
| "loss": 0.3076, |
| "step": 1394 |
| }, |
| { |
| "epoch": 4.84375, |
| "grad_norm": 0.07221235816804243, |
| "learning_rate": 2.3774647110721415e-07, |
| "loss": 0.3036, |
| "step": 1395 |
| }, |
| { |
| "epoch": 4.847222222222222, |
| "grad_norm": 0.07367780540336007, |
| "learning_rate": 2.273072749592631e-07, |
| "loss": 0.3045, |
| "step": 1396 |
| }, |
| { |
| "epoch": 4.850694444444445, |
| "grad_norm": 0.07325375453284966, |
| "learning_rate": 2.1710178732024413e-07, |
| "loss": 0.3049, |
| "step": 1397 |
| }, |
| { |
| "epoch": 4.854166666666667, |
| "grad_norm": 0.07659484091306909, |
| "learning_rate": 2.0713006815868075e-07, |
| "loss": 0.3117, |
| "step": 1398 |
| }, |
| { |
| "epoch": 4.857638888888889, |
| "grad_norm": 0.07535899223039774, |
| "learning_rate": 1.973921760694264e-07, |
| "loss": 0.3051, |
| "step": 1399 |
| }, |
| { |
| "epoch": 4.861111111111111, |
| "grad_norm": 0.07300398348215961, |
| "learning_rate": 1.8788816827336686e-07, |
| "loss": 0.3056, |
| "step": 1400 |
| }, |
| { |
| "epoch": 4.864583333333333, |
| "grad_norm": 0.07316979292661353, |
| "learning_rate": 1.7861810061704287e-07, |
| "loss": 0.3057, |
| "step": 1401 |
| }, |
| { |
| "epoch": 4.868055555555555, |
| "grad_norm": 0.07378740384961463, |
| "learning_rate": 1.6958202757234366e-07, |
| "loss": 0.3049, |
| "step": 1402 |
| }, |
| { |
| "epoch": 4.871527777777778, |
| "grad_norm": 0.07181291686698073, |
| "learning_rate": 1.6078000223618272e-07, |
| "loss": 0.3067, |
| "step": 1403 |
| }, |
| { |
| "epoch": 4.875, |
| "grad_norm": 0.07295819865239954, |
| "learning_rate": 1.522120763301782e-07, |
| "loss": 0.3074, |
| "step": 1404 |
| }, |
| { |
| "epoch": 4.878472222222222, |
| "grad_norm": 0.07232277152642255, |
| "learning_rate": 1.438783002003641e-07, |
| "loss": 0.3023, |
| "step": 1405 |
| }, |
| { |
| "epoch": 4.881944444444445, |
| "grad_norm": 0.07234458208768678, |
| "learning_rate": 1.3577872281688388e-07, |
| "loss": 0.3082, |
| "step": 1406 |
| }, |
| { |
| "epoch": 4.885416666666667, |
| "grad_norm": 0.07276557670020045, |
| "learning_rate": 1.2791339177369745e-07, |
| "loss": 0.2999, |
| "step": 1407 |
| }, |
| { |
| "epoch": 4.888888888888889, |
| "grad_norm": 0.0744224740118525, |
| "learning_rate": 1.2028235328831906e-07, |
| "loss": 0.3079, |
| "step": 1408 |
| }, |
| { |
| "epoch": 4.892361111111111, |
| "grad_norm": 0.0724196144673555, |
| "learning_rate": 1.1288565220152426e-07, |
| "loss": 0.3043, |
| "step": 1409 |
| }, |
| { |
| "epoch": 4.895833333333333, |
| "grad_norm": 0.07355374246045054, |
| "learning_rate": 1.0572333197711005e-07, |
| "loss": 0.3028, |
| "step": 1410 |
| }, |
| { |
| "epoch": 4.899305555555555, |
| "grad_norm": 0.07251943734879686, |
| "learning_rate": 9.879543470161512e-08, |
| "loss": 0.3015, |
| "step": 1411 |
| }, |
| { |
| "epoch": 4.902777777777778, |
| "grad_norm": 0.0739982426020259, |
| "learning_rate": 9.21020010840934e-08, |
| "loss": 0.3049, |
| "step": 1412 |
| }, |
| { |
| "epoch": 4.90625, |
| "grad_norm": 0.07307140402702485, |
| "learning_rate": 8.564307045586085e-08, |
| "loss": 0.308, |
| "step": 1413 |
| }, |
| { |
| "epoch": 4.909722222222222, |
| "grad_norm": 0.07273314483178361, |
| "learning_rate": 7.941868077026905e-08, |
| "loss": 0.3013, |
| "step": 1414 |
| }, |
| { |
| "epoch": 4.913194444444445, |
| "grad_norm": 0.07179264299655526, |
| "learning_rate": 7.34288686024831e-08, |
| "loss": 0.3041, |
| "step": 1415 |
| }, |
| { |
| "epoch": 4.916666666666667, |
| "grad_norm": 0.07399654735570108, |
| "learning_rate": 6.767366914927298e-08, |
| "loss": 0.3065, |
| "step": 1416 |
| }, |
| { |
| "epoch": 4.920138888888889, |
| "grad_norm": 0.0728246842339243, |
| "learning_rate": 6.215311622878695e-08, |
| "loss": 0.307, |
| "step": 1417 |
| }, |
| { |
| "epoch": 4.923611111111111, |
| "grad_norm": 0.07402560027995006, |
| "learning_rate": 5.6867242280373994e-08, |
| "loss": 0.3103, |
| "step": 1418 |
| }, |
| { |
| "epoch": 4.927083333333333, |
| "grad_norm": 0.07383259587315397, |
| "learning_rate": 5.1816078364383956e-08, |
| "loss": 0.306, |
| "step": 1419 |
| }, |
| { |
| "epoch": 4.930555555555555, |
| "grad_norm": 0.07298007618541999, |
| "learning_rate": 4.699965416198549e-08, |
| "loss": 0.3054, |
| "step": 1420 |
| }, |
| { |
| "epoch": 4.934027777777778, |
| "grad_norm": 0.07188192284714194, |
| "learning_rate": 4.241799797498836e-08, |
| "loss": 0.3008, |
| "step": 1421 |
| }, |
| { |
| "epoch": 4.9375, |
| "grad_norm": 0.07255291024636266, |
| "learning_rate": 3.8071136725688074e-08, |
| "loss": 0.3054, |
| "step": 1422 |
| }, |
| { |
| "epoch": 4.940972222222222, |
| "grad_norm": 0.07306219408514232, |
| "learning_rate": 3.3959095956697106e-08, |
| "loss": 0.3033, |
| "step": 1423 |
| }, |
| { |
| "epoch": 4.944444444444445, |
| "grad_norm": 0.07293410022631318, |
| "learning_rate": 3.0081899830798345e-08, |
| "loss": 0.3042, |
| "step": 1424 |
| }, |
| { |
| "epoch": 4.947916666666667, |
| "grad_norm": 0.07153901090491563, |
| "learning_rate": 2.6439571130798536e-08, |
| "loss": 0.3071, |
| "step": 1425 |
| }, |
| { |
| "epoch": 4.951388888888889, |
| "grad_norm": 0.07296799735265355, |
| "learning_rate": 2.3032131259403955e-08, |
| "loss": 0.2973, |
| "step": 1426 |
| }, |
| { |
| "epoch": 4.954861111111111, |
| "grad_norm": 0.07029056717086786, |
| "learning_rate": 1.9859600239087175e-08, |
| "loss": 0.3035, |
| "step": 1427 |
| }, |
| { |
| "epoch": 4.958333333333333, |
| "grad_norm": 0.07458737702442902, |
| "learning_rate": 1.6921996711976028e-08, |
| "loss": 0.3096, |
| "step": 1428 |
| }, |
| { |
| "epoch": 4.961805555555555, |
| "grad_norm": 0.07206677082554877, |
| "learning_rate": 1.4219337939738175e-08, |
| "loss": 0.306, |
| "step": 1429 |
| }, |
| { |
| "epoch": 4.965277777777778, |
| "grad_norm": 0.07170156509697462, |
| "learning_rate": 1.175163980347005e-08, |
| "loss": 0.3027, |
| "step": 1430 |
| }, |
| { |
| "epoch": 4.96875, |
| "grad_norm": 0.07216683994988987, |
| "learning_rate": 9.518916803634703e-09, |
| "loss": 0.2986, |
| "step": 1431 |
| }, |
| { |
| "epoch": 4.972222222222222, |
| "grad_norm": 0.07279226964622872, |
| "learning_rate": 7.521182059946342e-09, |
| "loss": 0.3057, |
| "step": 1432 |
| }, |
| { |
| "epoch": 4.975694444444445, |
| "grad_norm": 0.07392442432994105, |
| "learning_rate": 5.758447311294823e-09, |
| "loss": 0.3071, |
| "step": 1433 |
| }, |
| { |
| "epoch": 4.979166666666667, |
| "grad_norm": 0.07187539149864376, |
| "learning_rate": 4.230722915701257e-09, |
| "loss": 0.3029, |
| "step": 1434 |
| }, |
| { |
| "epoch": 4.982638888888889, |
| "grad_norm": 0.07059128687591827, |
| "learning_rate": 2.93801785022918e-09, |
| "loss": 0.305, |
| "step": 1435 |
| }, |
| { |
| "epoch": 4.986111111111111, |
| "grad_norm": 0.07343485822486388, |
| "learning_rate": 1.8803397109534715e-09, |
| "loss": 0.3038, |
| "step": 1436 |
| }, |
| { |
| "epoch": 4.989583333333333, |
| "grad_norm": 0.07147607345386381, |
| "learning_rate": 1.057694712902624e-09, |
| "loss": 0.3063, |
| "step": 1437 |
| }, |
| { |
| "epoch": 4.993055555555555, |
| "grad_norm": 0.07245299095155071, |
| "learning_rate": 4.700876900187723e-10, |
| "loss": 0.3022, |
| "step": 1438 |
| }, |
| { |
| "epoch": 4.996527777777778, |
| "grad_norm": 0.07367615840163165, |
| "learning_rate": 1.175220951488143e-10, |
| "loss": 0.3066, |
| "step": 1439 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 0.08989122624390881, |
| "learning_rate": 0.0, |
| "loss": 0.298, |
| "step": 1440 |
| }, |
| { |
| "epoch": 5.0, |
| "step": 1440, |
| "total_flos": 2.415764485177344e+16, |
| "train_loss": 0.3780418654489848, |
| "train_runtime": 19797.8662, |
| "train_samples_per_second": 37.137, |
| "train_steps_per_second": 0.073 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 1440, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.415764485177344e+16, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|