| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 353, |
| "global_step": 353, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0028328611898017, |
| "grad_norm": 0.66796875, |
| "learning_rate": 1e-05, |
| "loss": 2.1755, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0056657223796034, |
| "grad_norm": 0.64453125, |
| "learning_rate": 9.971671388101982e-06, |
| "loss": 2.2091, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0084985835694051, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.943342776203968e-06, |
| "loss": 2.225, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0113314447592068, |
| "grad_norm": 0.59765625, |
| "learning_rate": 9.91501416430595e-06, |
| "loss": 2.2321, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.014164305949008499, |
| "grad_norm": 0.55078125, |
| "learning_rate": 9.886685552407933e-06, |
| "loss": 2.1532, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0169971671388102, |
| "grad_norm": 0.55078125, |
| "learning_rate": 9.858356940509916e-06, |
| "loss": 2.1496, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.019830028328611898, |
| "grad_norm": 0.52734375, |
| "learning_rate": 9.8300283286119e-06, |
| "loss": 2.1559, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.0226628895184136, |
| "grad_norm": 0.498046875, |
| "learning_rate": 9.801699716713881e-06, |
| "loss": 2.1314, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.025495750708215296, |
| "grad_norm": 0.48828125, |
| "learning_rate": 9.773371104815866e-06, |
| "loss": 2.0267, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.028328611898016998, |
| "grad_norm": 0.46875, |
| "learning_rate": 9.745042492917848e-06, |
| "loss": 2.056, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.031161473087818695, |
| "grad_norm": 0.4609375, |
| "learning_rate": 9.716713881019831e-06, |
| "loss": 2.0301, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.0339943342776204, |
| "grad_norm": 0.4453125, |
| "learning_rate": 9.688385269121814e-06, |
| "loss": 1.9753, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.036827195467422094, |
| "grad_norm": 0.439453125, |
| "learning_rate": 9.660056657223796e-06, |
| "loss": 1.9751, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.039660056657223795, |
| "grad_norm": 0.42578125, |
| "learning_rate": 9.63172804532578e-06, |
| "loss": 1.9012, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.042492917847025496, |
| "grad_norm": 0.421875, |
| "learning_rate": 9.603399433427763e-06, |
| "loss": 1.8181, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.0453257790368272, |
| "grad_norm": 0.451171875, |
| "learning_rate": 9.575070821529746e-06, |
| "loss": 1.8512, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.04815864022662889, |
| "grad_norm": 0.43359375, |
| "learning_rate": 9.546742209631728e-06, |
| "loss": 1.806, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.05099150141643059, |
| "grad_norm": 0.4296875, |
| "learning_rate": 9.518413597733713e-06, |
| "loss": 1.8077, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.053824362606232294, |
| "grad_norm": 0.4296875, |
| "learning_rate": 9.490084985835695e-06, |
| "loss": 1.8114, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.056657223796033995, |
| "grad_norm": 0.416015625, |
| "learning_rate": 9.461756373937678e-06, |
| "loss": 1.7811, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.059490084985835696, |
| "grad_norm": 0.396484375, |
| "learning_rate": 9.433427762039661e-06, |
| "loss": 1.7397, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.06232294617563739, |
| "grad_norm": 0.373046875, |
| "learning_rate": 9.405099150141643e-06, |
| "loss": 1.7354, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.06515580736543909, |
| "grad_norm": 0.373046875, |
| "learning_rate": 9.376770538243626e-06, |
| "loss": 1.7333, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.0679886685552408, |
| "grad_norm": 0.345703125, |
| "learning_rate": 9.34844192634561e-06, |
| "loss": 1.635, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.0708215297450425, |
| "grad_norm": 0.341796875, |
| "learning_rate": 9.320113314447593e-06, |
| "loss": 1.6462, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.07365439093484419, |
| "grad_norm": 0.349609375, |
| "learning_rate": 9.291784702549575e-06, |
| "loss": 1.6374, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.0764872521246459, |
| "grad_norm": 0.3359375, |
| "learning_rate": 9.26345609065156e-06, |
| "loss": 1.6747, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.07932011331444759, |
| "grad_norm": 0.32421875, |
| "learning_rate": 9.235127478753542e-06, |
| "loss": 1.5885, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.0821529745042493, |
| "grad_norm": 0.326171875, |
| "learning_rate": 9.206798866855525e-06, |
| "loss": 1.5845, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.08498583569405099, |
| "grad_norm": 0.3515625, |
| "learning_rate": 9.178470254957508e-06, |
| "loss": 1.5462, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.08781869688385269, |
| "grad_norm": 0.3203125, |
| "learning_rate": 9.150141643059492e-06, |
| "loss": 1.5114, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.0906515580736544, |
| "grad_norm": 0.306640625, |
| "learning_rate": 9.121813031161473e-06, |
| "loss": 1.4994, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.09348441926345609, |
| "grad_norm": 0.3125, |
| "learning_rate": 9.093484419263457e-06, |
| "loss": 1.475, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.09631728045325778, |
| "grad_norm": 0.3125, |
| "learning_rate": 9.06515580736544e-06, |
| "loss": 1.5322, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.09915014164305949, |
| "grad_norm": 0.3125, |
| "learning_rate": 9.036827195467422e-06, |
| "loss": 1.4353, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.10198300283286119, |
| "grad_norm": 0.29296875, |
| "learning_rate": 9.008498583569407e-06, |
| "loss": 1.4779, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.1048158640226629, |
| "grad_norm": 0.2890625, |
| "learning_rate": 8.980169971671388e-06, |
| "loss": 1.4647, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.10764872521246459, |
| "grad_norm": 0.267578125, |
| "learning_rate": 8.951841359773372e-06, |
| "loss": 1.46, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.11048158640226628, |
| "grad_norm": 0.26171875, |
| "learning_rate": 8.923512747875355e-06, |
| "loss": 1.3724, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.11331444759206799, |
| "grad_norm": 0.259765625, |
| "learning_rate": 8.895184135977339e-06, |
| "loss": 1.4102, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.11614730878186968, |
| "grad_norm": 0.26171875, |
| "learning_rate": 8.86685552407932e-06, |
| "loss": 1.3912, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.11898016997167139, |
| "grad_norm": 0.267578125, |
| "learning_rate": 8.838526912181304e-06, |
| "loss": 1.3747, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.12181303116147309, |
| "grad_norm": 0.2578125, |
| "learning_rate": 8.810198300283287e-06, |
| "loss": 1.3842, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.12464589235127478, |
| "grad_norm": 0.2578125, |
| "learning_rate": 8.78186968838527e-06, |
| "loss": 1.3656, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.1274787535410765, |
| "grad_norm": 0.25, |
| "learning_rate": 8.753541076487254e-06, |
| "loss": 1.3078, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.13031161473087818, |
| "grad_norm": 0.2421875, |
| "learning_rate": 8.725212464589235e-06, |
| "loss": 1.3541, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.13314447592067988, |
| "grad_norm": 0.2373046875, |
| "learning_rate": 8.696883852691219e-06, |
| "loss": 1.3789, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.1359773371104816, |
| "grad_norm": 0.2470703125, |
| "learning_rate": 8.668555240793202e-06, |
| "loss": 1.349, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.1388101983002833, |
| "grad_norm": 0.23046875, |
| "learning_rate": 8.640226628895185e-06, |
| "loss": 1.2902, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.141643059490085, |
| "grad_norm": 0.2255859375, |
| "learning_rate": 8.611898016997167e-06, |
| "loss": 1.2897, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.14447592067988668, |
| "grad_norm": 0.279296875, |
| "learning_rate": 8.583569405099152e-06, |
| "loss": 1.2663, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.14730878186968838, |
| "grad_norm": 0.212890625, |
| "learning_rate": 8.555240793201134e-06, |
| "loss": 1.2528, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.1501416430594901, |
| "grad_norm": 0.228515625, |
| "learning_rate": 8.526912181303117e-06, |
| "loss": 1.2968, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.1529745042492918, |
| "grad_norm": 0.232421875, |
| "learning_rate": 8.4985835694051e-06, |
| "loss": 1.2783, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.1558073654390935, |
| "grad_norm": 0.2138671875, |
| "learning_rate": 8.470254957507082e-06, |
| "loss": 1.2798, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.15864022662889518, |
| "grad_norm": 0.2314453125, |
| "learning_rate": 8.441926345609066e-06, |
| "loss": 1.2971, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.16147308781869688, |
| "grad_norm": 0.2216796875, |
| "learning_rate": 8.413597733711049e-06, |
| "loss": 1.2609, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.1643059490084986, |
| "grad_norm": 0.2109375, |
| "learning_rate": 8.385269121813032e-06, |
| "loss": 1.2514, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.1671388101983003, |
| "grad_norm": 0.212890625, |
| "learning_rate": 8.356940509915014e-06, |
| "loss": 1.2499, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.16997167138810199, |
| "grad_norm": 0.201171875, |
| "learning_rate": 8.328611898016999e-06, |
| "loss": 1.2098, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.17280453257790368, |
| "grad_norm": 0.203125, |
| "learning_rate": 8.30028328611898e-06, |
| "loss": 1.1736, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.17563739376770537, |
| "grad_norm": 0.1845703125, |
| "learning_rate": 8.271954674220964e-06, |
| "loss": 1.2286, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.17847025495750707, |
| "grad_norm": 0.189453125, |
| "learning_rate": 8.243626062322947e-06, |
| "loss": 1.244, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.1813031161473088, |
| "grad_norm": 0.259765625, |
| "learning_rate": 8.215297450424929e-06, |
| "loss": 1.2047, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.18413597733711048, |
| "grad_norm": 0.19140625, |
| "learning_rate": 8.186968838526912e-06, |
| "loss": 1.1574, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.18696883852691218, |
| "grad_norm": 0.201171875, |
| "learning_rate": 8.158640226628896e-06, |
| "loss": 1.2277, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.18980169971671387, |
| "grad_norm": 0.1953125, |
| "learning_rate": 8.13031161473088e-06, |
| "loss": 1.2256, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.19263456090651557, |
| "grad_norm": 0.19140625, |
| "learning_rate": 8.101983002832861e-06, |
| "loss": 1.1519, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.1954674220963173, |
| "grad_norm": 0.2158203125, |
| "learning_rate": 8.073654390934846e-06, |
| "loss": 1.1983, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.19830028328611898, |
| "grad_norm": 0.203125, |
| "learning_rate": 8.045325779036828e-06, |
| "loss": 1.1948, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.20113314447592068, |
| "grad_norm": 0.17578125, |
| "learning_rate": 8.016997167138811e-06, |
| "loss": 1.1572, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.20396600566572237, |
| "grad_norm": 0.3671875, |
| "learning_rate": 7.988668555240794e-06, |
| "loss": 1.1669, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.20679886685552407, |
| "grad_norm": 0.173828125, |
| "learning_rate": 7.960339943342776e-06, |
| "loss": 1.1866, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.2096317280453258, |
| "grad_norm": 0.185546875, |
| "learning_rate": 7.93201133144476e-06, |
| "loss": 1.1576, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.21246458923512748, |
| "grad_norm": 0.189453125, |
| "learning_rate": 7.903682719546743e-06, |
| "loss": 1.1815, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.21529745042492918, |
| "grad_norm": 0.19140625, |
| "learning_rate": 7.875354107648726e-06, |
| "loss": 1.1867, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.21813031161473087, |
| "grad_norm": 0.2177734375, |
| "learning_rate": 7.847025495750708e-06, |
| "loss": 1.1909, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.22096317280453256, |
| "grad_norm": 0.1904296875, |
| "learning_rate": 7.818696883852693e-06, |
| "loss": 1.1545, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.2237960339943343, |
| "grad_norm": 0.1767578125, |
| "learning_rate": 7.790368271954675e-06, |
| "loss": 1.1305, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.22662889518413598, |
| "grad_norm": 0.1806640625, |
| "learning_rate": 7.762039660056658e-06, |
| "loss": 1.1386, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.22946175637393768, |
| "grad_norm": 0.189453125, |
| "learning_rate": 7.733711048158641e-06, |
| "loss": 1.1119, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.23229461756373937, |
| "grad_norm": 0.1826171875, |
| "learning_rate": 7.705382436260623e-06, |
| "loss": 1.0934, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.23512747875354106, |
| "grad_norm": 0.1826171875, |
| "learning_rate": 7.677053824362606e-06, |
| "loss": 1.1482, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.23796033994334279, |
| "grad_norm": 0.21484375, |
| "learning_rate": 7.64872521246459e-06, |
| "loss": 1.1272, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.24079320113314448, |
| "grad_norm": 0.185546875, |
| "learning_rate": 7.620396600566573e-06, |
| "loss": 1.141, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.24362606232294617, |
| "grad_norm": 0.17578125, |
| "learning_rate": 7.5920679886685555e-06, |
| "loss": 1.0635, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.24645892351274787, |
| "grad_norm": 0.1875, |
| "learning_rate": 7.563739376770539e-06, |
| "loss": 1.1217, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.24929178470254956, |
| "grad_norm": 0.1875, |
| "learning_rate": 7.535410764872521e-06, |
| "loss": 1.1251, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.2521246458923513, |
| "grad_norm": 0.1796875, |
| "learning_rate": 7.507082152974506e-06, |
| "loss": 1.1115, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.254957507082153, |
| "grad_norm": 0.177734375, |
| "learning_rate": 7.478753541076488e-06, |
| "loss": 1.1327, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.2577903682719547, |
| "grad_norm": 0.181640625, |
| "learning_rate": 7.4504249291784715e-06, |
| "loss": 1.0844, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.26062322946175637, |
| "grad_norm": 0.185546875, |
| "learning_rate": 7.422096317280454e-06, |
| "loss": 1.1698, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.26345609065155806, |
| "grad_norm": 0.205078125, |
| "learning_rate": 7.3937677053824365e-06, |
| "loss": 1.0853, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.26628895184135976, |
| "grad_norm": 0.1904296875, |
| "learning_rate": 7.36543909348442e-06, |
| "loss": 1.0659, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.26912181303116145, |
| "grad_norm": 0.1875, |
| "learning_rate": 7.337110481586402e-06, |
| "loss": 1.1106, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.2719546742209632, |
| "grad_norm": 0.2158203125, |
| "learning_rate": 7.308781869688386e-06, |
| "loss": 1.078, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.2747875354107649, |
| "grad_norm": 0.166015625, |
| "learning_rate": 7.280453257790368e-06, |
| "loss": 1.0965, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.2776203966005666, |
| "grad_norm": 0.1806640625, |
| "learning_rate": 7.2521246458923525e-06, |
| "loss": 1.0903, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.2804532577903683, |
| "grad_norm": 0.1845703125, |
| "learning_rate": 7.223796033994335e-06, |
| "loss": 1.0846, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.28328611898017, |
| "grad_norm": 0.1796875, |
| "learning_rate": 7.195467422096318e-06, |
| "loss": 1.0697, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.28611898016997167, |
| "grad_norm": 0.1826171875, |
| "learning_rate": 7.167138810198301e-06, |
| "loss": 1.057, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.28895184135977336, |
| "grad_norm": 0.1787109375, |
| "learning_rate": 7.1388101983002834e-06, |
| "loss": 1.099, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.29178470254957506, |
| "grad_norm": 0.177734375, |
| "learning_rate": 7.110481586402267e-06, |
| "loss": 1.084, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.29461756373937675, |
| "grad_norm": 0.171875, |
| "learning_rate": 7.082152974504249e-06, |
| "loss": 1.087, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.29745042492917845, |
| "grad_norm": 0.185546875, |
| "learning_rate": 7.053824362606233e-06, |
| "loss": 1.0623, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.3002832861189802, |
| "grad_norm": 0.1787109375, |
| "learning_rate": 7.025495750708215e-06, |
| "loss": 1.0897, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.3031161473087819, |
| "grad_norm": 0.1865234375, |
| "learning_rate": 6.997167138810199e-06, |
| "loss": 1.0815, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.3059490084985836, |
| "grad_norm": 0.1845703125, |
| "learning_rate": 6.968838526912182e-06, |
| "loss": 1.0606, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.3087818696883853, |
| "grad_norm": 0.18359375, |
| "learning_rate": 6.940509915014165e-06, |
| "loss": 1.0666, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.311614730878187, |
| "grad_norm": 0.181640625, |
| "learning_rate": 6.912181303116148e-06, |
| "loss": 1.0455, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.31444759206798867, |
| "grad_norm": 0.1904296875, |
| "learning_rate": 6.883852691218131e-06, |
| "loss": 1.0297, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.31728045325779036, |
| "grad_norm": 0.1748046875, |
| "learning_rate": 6.855524079320114e-06, |
| "loss": 1.0525, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.32011331444759206, |
| "grad_norm": 0.1826171875, |
| "learning_rate": 6.827195467422096e-06, |
| "loss": 1.0928, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.32294617563739375, |
| "grad_norm": 0.1982421875, |
| "learning_rate": 6.79886685552408e-06, |
| "loss": 1.0403, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.32577903682719545, |
| "grad_norm": 0.171875, |
| "learning_rate": 6.770538243626062e-06, |
| "loss": 1.0516, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.3286118980169972, |
| "grad_norm": 0.21875, |
| "learning_rate": 6.742209631728046e-06, |
| "loss": 1.1099, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.3314447592067989, |
| "grad_norm": 0.1904296875, |
| "learning_rate": 6.713881019830029e-06, |
| "loss": 1.0589, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.3342776203966006, |
| "grad_norm": 0.1787109375, |
| "learning_rate": 6.685552407932012e-06, |
| "loss": 1.0825, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.3371104815864023, |
| "grad_norm": 0.1787109375, |
| "learning_rate": 6.657223796033995e-06, |
| "loss": 1.0532, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.33994334277620397, |
| "grad_norm": 0.279296875, |
| "learning_rate": 6.628895184135978e-06, |
| "loss": 1.0116, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.34277620396600567, |
| "grad_norm": 0.2060546875, |
| "learning_rate": 6.600566572237961e-06, |
| "loss": 1.0315, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.34560906515580736, |
| "grad_norm": 0.189453125, |
| "learning_rate": 6.572237960339945e-06, |
| "loss": 1.0635, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.34844192634560905, |
| "grad_norm": 0.1904296875, |
| "learning_rate": 6.543909348441927e-06, |
| "loss": 1.0109, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.35127478753541075, |
| "grad_norm": 0.1826171875, |
| "learning_rate": 6.51558073654391e-06, |
| "loss": 1.0757, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.35410764872521244, |
| "grad_norm": 0.17578125, |
| "learning_rate": 6.487252124645893e-06, |
| "loss": 1.0243, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.35694050991501414, |
| "grad_norm": 0.1806640625, |
| "learning_rate": 6.458923512747876e-06, |
| "loss": 1.0556, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.3597733711048159, |
| "grad_norm": 0.18359375, |
| "learning_rate": 6.430594900849859e-06, |
| "loss": 1.0156, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.3626062322946176, |
| "grad_norm": 0.1767578125, |
| "learning_rate": 6.402266288951842e-06, |
| "loss": 1.0416, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.3654390934844193, |
| "grad_norm": 0.255859375, |
| "learning_rate": 6.373937677053825e-06, |
| "loss": 0.9877, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.36827195467422097, |
| "grad_norm": 0.1806640625, |
| "learning_rate": 6.3456090651558075e-06, |
| "loss": 1.0046, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.37110481586402266, |
| "grad_norm": 0.1708984375, |
| "learning_rate": 6.317280453257792e-06, |
| "loss": 1.0296, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.37393767705382436, |
| "grad_norm": 0.1796875, |
| "learning_rate": 6.288951841359774e-06, |
| "loss": 1.0826, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.37677053824362605, |
| "grad_norm": 0.1826171875, |
| "learning_rate": 6.260623229461757e-06, |
| "loss": 1.0053, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.37960339943342775, |
| "grad_norm": 0.1806640625, |
| "learning_rate": 6.23229461756374e-06, |
| "loss": 1.0212, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.38243626062322944, |
| "grad_norm": 0.1923828125, |
| "learning_rate": 6.203966005665723e-06, |
| "loss": 1.0028, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.38526912181303113, |
| "grad_norm": 0.189453125, |
| "learning_rate": 6.175637393767706e-06, |
| "loss": 1.0219, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.3881019830028329, |
| "grad_norm": 0.1796875, |
| "learning_rate": 6.1473087818696885e-06, |
| "loss": 0.9871, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.3909348441926346, |
| "grad_norm": 0.1689453125, |
| "learning_rate": 6.118980169971672e-06, |
| "loss": 0.9813, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.3937677053824363, |
| "grad_norm": 0.193359375, |
| "learning_rate": 6.090651558073654e-06, |
| "loss": 1.0145, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.39660056657223797, |
| "grad_norm": 0.1962890625, |
| "learning_rate": 6.062322946175639e-06, |
| "loss": 1.0154, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.39943342776203966, |
| "grad_norm": 0.1953125, |
| "learning_rate": 6.033994334277621e-06, |
| "loss": 1.0139, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.40226628895184136, |
| "grad_norm": 0.296875, |
| "learning_rate": 6.0056657223796045e-06, |
| "loss": 1.0364, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.40509915014164305, |
| "grad_norm": 0.20703125, |
| "learning_rate": 5.977337110481587e-06, |
| "loss": 1.0514, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.40793201133144474, |
| "grad_norm": 0.486328125, |
| "learning_rate": 5.9490084985835695e-06, |
| "loss": 1.1056, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.41076487252124644, |
| "grad_norm": 0.193359375, |
| "learning_rate": 5.920679886685553e-06, |
| "loss": 1.007, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.41359773371104813, |
| "grad_norm": 0.21484375, |
| "learning_rate": 5.892351274787535e-06, |
| "loss": 0.9819, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.4164305949008499, |
| "grad_norm": 0.2109375, |
| "learning_rate": 5.864022662889519e-06, |
| "loss": 1.0075, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.4192634560906516, |
| "grad_norm": 0.1962890625, |
| "learning_rate": 5.835694050991501e-06, |
| "loss": 1.0239, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.42209631728045327, |
| "grad_norm": 0.185546875, |
| "learning_rate": 5.8073654390934855e-06, |
| "loss": 0.9913, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.42492917847025496, |
| "grad_norm": 0.1875, |
| "learning_rate": 5.779036827195468e-06, |
| "loss": 0.9825, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.42776203966005666, |
| "grad_norm": 0.1806640625, |
| "learning_rate": 5.750708215297451e-06, |
| "loss": 1.0331, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.43059490084985835, |
| "grad_norm": 0.1884765625, |
| "learning_rate": 5.722379603399434e-06, |
| "loss": 1.0101, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.43342776203966005, |
| "grad_norm": 0.2158203125, |
| "learning_rate": 5.6940509915014164e-06, |
| "loss": 1.027, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.43626062322946174, |
| "grad_norm": 0.1923828125, |
| "learning_rate": 5.6657223796034e-06, |
| "loss": 0.9743, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.43909348441926344, |
| "grad_norm": 0.201171875, |
| "learning_rate": 5.637393767705382e-06, |
| "loss": 1.0431, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.44192634560906513, |
| "grad_norm": 0.1904296875, |
| "learning_rate": 5.609065155807366e-06, |
| "loss": 1.0434, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.4447592067988669, |
| "grad_norm": 0.1806640625, |
| "learning_rate": 5.580736543909348e-06, |
| "loss": 1.0371, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.4475920679886686, |
| "grad_norm": 0.185546875, |
| "learning_rate": 5.552407932011332e-06, |
| "loss": 0.98, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.45042492917847027, |
| "grad_norm": 0.189453125, |
| "learning_rate": 5.524079320113315e-06, |
| "loss": 1.0277, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.45325779036827196, |
| "grad_norm": 0.201171875, |
| "learning_rate": 5.495750708215298e-06, |
| "loss": 0.9847, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.45609065155807366, |
| "grad_norm": 0.1875, |
| "learning_rate": 5.467422096317281e-06, |
| "loss": 1.0478, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.45892351274787535, |
| "grad_norm": 0.1875, |
| "learning_rate": 5.439093484419264e-06, |
| "loss": 1.0335, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.46175637393767704, |
| "grad_norm": 0.3359375, |
| "learning_rate": 5.410764872521247e-06, |
| "loss": 1.0122, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.46458923512747874, |
| "grad_norm": 0.1806640625, |
| "learning_rate": 5.382436260623229e-06, |
| "loss": 1.0004, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.46742209631728043, |
| "grad_norm": 0.1904296875, |
| "learning_rate": 5.3541076487252134e-06, |
| "loss": 1.0127, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.4702549575070821, |
| "grad_norm": 0.2060546875, |
| "learning_rate": 5.325779036827196e-06, |
| "loss": 0.9798, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.4730878186968839, |
| "grad_norm": 0.185546875, |
| "learning_rate": 5.297450424929179e-06, |
| "loss": 0.9583, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.47592067988668557, |
| "grad_norm": 0.193359375, |
| "learning_rate": 5.269121813031162e-06, |
| "loss": 1.0096, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.47875354107648727, |
| "grad_norm": 0.2080078125, |
| "learning_rate": 5.240793201133145e-06, |
| "loss": 0.9659, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.48158640226628896, |
| "grad_norm": 0.1875, |
| "learning_rate": 5.212464589235128e-06, |
| "loss": 0.9984, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.48441926345609065, |
| "grad_norm": 0.1796875, |
| "learning_rate": 5.184135977337111e-06, |
| "loss": 0.9813, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.48725212464589235, |
| "grad_norm": 0.18359375, |
| "learning_rate": 5.155807365439094e-06, |
| "loss": 1.0177, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.49008498583569404, |
| "grad_norm": 0.181640625, |
| "learning_rate": 5.127478753541076e-06, |
| "loss": 0.9552, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.49291784702549574, |
| "grad_norm": 0.2890625, |
| "learning_rate": 5.09915014164306e-06, |
| "loss": 0.9856, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.49575070821529743, |
| "grad_norm": 0.1923828125, |
| "learning_rate": 5.070821529745043e-06, |
| "loss": 1.0161, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.4985835694050991, |
| "grad_norm": 0.193359375, |
| "learning_rate": 5.042492917847026e-06, |
| "loss": 1.0018, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.5014164305949008, |
| "grad_norm": 0.189453125, |
| "learning_rate": 5.014164305949009e-06, |
| "loss": 0.9836, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.5042492917847026, |
| "grad_norm": 0.1884765625, |
| "learning_rate": 4.985835694050991e-06, |
| "loss": 0.992, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.5070821529745042, |
| "grad_norm": 0.181640625, |
| "learning_rate": 4.957507082152975e-06, |
| "loss": 0.9899, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.509915014164306, |
| "grad_norm": 0.189453125, |
| "learning_rate": 4.929178470254958e-06, |
| "loss": 0.9491, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.5127478753541076, |
| "grad_norm": 0.2099609375, |
| "learning_rate": 4.9008498583569405e-06, |
| "loss": 1.0256, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.5155807365439093, |
| "grad_norm": 0.1748046875, |
| "learning_rate": 4.872521246458924e-06, |
| "loss": 0.9842, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.5184135977337111, |
| "grad_norm": 0.197265625, |
| "learning_rate": 4.844192634560907e-06, |
| "loss": 0.9541, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.5212464589235127, |
| "grad_norm": 0.19140625, |
| "learning_rate": 4.81586402266289e-06, |
| "loss": 1.0063, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.5240793201133145, |
| "grad_norm": 0.1845703125, |
| "learning_rate": 4.787535410764873e-06, |
| "loss": 0.9557, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.5269121813031161, |
| "grad_norm": 0.1826171875, |
| "learning_rate": 4.7592067988668565e-06, |
| "loss": 0.969, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.5297450424929179, |
| "grad_norm": 0.1845703125, |
| "learning_rate": 4.730878186968839e-06, |
| "loss": 0.9744, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.5325779036827195, |
| "grad_norm": 0.201171875, |
| "learning_rate": 4.7025495750708215e-06, |
| "loss": 1.01, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.5354107648725213, |
| "grad_norm": 0.19140625, |
| "learning_rate": 4.674220963172805e-06, |
| "loss": 0.9906, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.5382436260623229, |
| "grad_norm": 0.1923828125, |
| "learning_rate": 4.645892351274787e-06, |
| "loss": 0.9201, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.5410764872521246, |
| "grad_norm": 0.19140625, |
| "learning_rate": 4.617563739376771e-06, |
| "loss": 1.0246, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.5439093484419264, |
| "grad_norm": 0.1962890625, |
| "learning_rate": 4.589235127478754e-06, |
| "loss": 1.0046, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.546742209631728, |
| "grad_norm": 0.271484375, |
| "learning_rate": 4.560906515580737e-06, |
| "loss": 1.0012, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.5495750708215298, |
| "grad_norm": 0.189453125, |
| "learning_rate": 4.53257790368272e-06, |
| "loss": 0.98, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.5524079320113314, |
| "grad_norm": 0.1962890625, |
| "learning_rate": 4.504249291784703e-06, |
| "loss": 0.9673, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.5552407932011332, |
| "grad_norm": 0.2119140625, |
| "learning_rate": 4.475920679886686e-06, |
| "loss": 0.9799, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.5580736543909348, |
| "grad_norm": 0.17578125, |
| "learning_rate": 4.447592067988669e-06, |
| "loss": 0.9836, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.5609065155807366, |
| "grad_norm": 0.1884765625, |
| "learning_rate": 4.419263456090652e-06, |
| "loss": 0.9722, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.5637393767705382, |
| "grad_norm": 0.193359375, |
| "learning_rate": 4.390934844192635e-06, |
| "loss": 0.9721, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.56657223796034, |
| "grad_norm": 0.19140625, |
| "learning_rate": 4.362606232294618e-06, |
| "loss": 1.0037, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.5694050991501416, |
| "grad_norm": 0.181640625, |
| "learning_rate": 4.334277620396601e-06, |
| "loss": 0.9169, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.5722379603399433, |
| "grad_norm": 0.1875, |
| "learning_rate": 4.3059490084985835e-06, |
| "loss": 0.9355, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.5750708215297451, |
| "grad_norm": 0.19921875, |
| "learning_rate": 4.277620396600567e-06, |
| "loss": 0.9022, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.5779036827195467, |
| "grad_norm": 0.1796875, |
| "learning_rate": 4.24929178470255e-06, |
| "loss": 0.9479, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.5807365439093485, |
| "grad_norm": 0.1845703125, |
| "learning_rate": 4.220963172804533e-06, |
| "loss": 0.9451, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.5835694050991501, |
| "grad_norm": 0.1953125, |
| "learning_rate": 4.192634560906516e-06, |
| "loss": 0.9563, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.5864022662889519, |
| "grad_norm": 0.1962890625, |
| "learning_rate": 4.1643059490084995e-06, |
| "loss": 0.9982, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.5892351274787535, |
| "grad_norm": 0.2578125, |
| "learning_rate": 4.135977337110482e-06, |
| "loss": 0.9793, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.5920679886685553, |
| "grad_norm": 0.1943359375, |
| "learning_rate": 4.1076487252124646e-06, |
| "loss": 0.9449, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.5949008498583569, |
| "grad_norm": 0.1884765625, |
| "learning_rate": 4.079320113314448e-06, |
| "loss": 0.9728, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.5977337110481586, |
| "grad_norm": 0.283203125, |
| "learning_rate": 4.0509915014164304e-06, |
| "loss": 0.9411, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.6005665722379604, |
| "grad_norm": 0.19140625, |
| "learning_rate": 4.022662889518414e-06, |
| "loss": 0.955, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.603399433427762, |
| "grad_norm": 0.216796875, |
| "learning_rate": 3.994334277620397e-06, |
| "loss": 1.0079, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.6062322946175638, |
| "grad_norm": 0.2255859375, |
| "learning_rate": 3.96600566572238e-06, |
| "loss": 1.0317, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.6090651558073654, |
| "grad_norm": 0.1865234375, |
| "learning_rate": 3.937677053824363e-06, |
| "loss": 0.955, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.6118980169971672, |
| "grad_norm": 0.201171875, |
| "learning_rate": 3.909348441926346e-06, |
| "loss": 0.9851, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.6147308781869688, |
| "grad_norm": 0.1845703125, |
| "learning_rate": 3.881019830028329e-06, |
| "loss": 0.9755, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.6175637393767706, |
| "grad_norm": 0.2080078125, |
| "learning_rate": 3.8526912181303115e-06, |
| "loss": 0.9555, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.6203966005665722, |
| "grad_norm": 0.2060546875, |
| "learning_rate": 3.824362606232295e-06, |
| "loss": 0.9393, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.623229461756374, |
| "grad_norm": 0.1923828125, |
| "learning_rate": 3.7960339943342778e-06, |
| "loss": 0.9663, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.6260623229461756, |
| "grad_norm": 0.1845703125, |
| "learning_rate": 3.7677053824362607e-06, |
| "loss": 0.9679, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.6288951841359773, |
| "grad_norm": 0.1845703125, |
| "learning_rate": 3.739376770538244e-06, |
| "loss": 0.9999, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.6317280453257791, |
| "grad_norm": 0.1728515625, |
| "learning_rate": 3.711048158640227e-06, |
| "loss": 0.9794, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.6345609065155807, |
| "grad_norm": 0.1923828125, |
| "learning_rate": 3.68271954674221e-06, |
| "loss": 0.9569, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.6373937677053825, |
| "grad_norm": 0.2001953125, |
| "learning_rate": 3.654390934844193e-06, |
| "loss": 0.9209, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.6402266288951841, |
| "grad_norm": 0.2021484375, |
| "learning_rate": 3.6260623229461763e-06, |
| "loss": 0.9086, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.6430594900849859, |
| "grad_norm": 0.17578125, |
| "learning_rate": 3.597733711048159e-06, |
| "loss": 0.9626, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.6458923512747875, |
| "grad_norm": 0.302734375, |
| "learning_rate": 3.5694050991501417e-06, |
| "loss": 0.9974, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.6487252124645893, |
| "grad_norm": 0.2138671875, |
| "learning_rate": 3.5410764872521247e-06, |
| "loss": 0.9935, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.6515580736543909, |
| "grad_norm": 0.20703125, |
| "learning_rate": 3.5127478753541076e-06, |
| "loss": 0.9876, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.6543909348441926, |
| "grad_norm": 0.18359375, |
| "learning_rate": 3.484419263456091e-06, |
| "loss": 0.9723, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.6572237960339944, |
| "grad_norm": 0.1953125, |
| "learning_rate": 3.456090651558074e-06, |
| "loss": 0.9463, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.660056657223796, |
| "grad_norm": 0.193359375, |
| "learning_rate": 3.427762039660057e-06, |
| "loss": 0.9352, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.6628895184135978, |
| "grad_norm": 0.1953125, |
| "learning_rate": 3.39943342776204e-06, |
| "loss": 0.9747, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.6657223796033994, |
| "grad_norm": 0.185546875, |
| "learning_rate": 3.371104815864023e-06, |
| "loss": 0.9989, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.6685552407932012, |
| "grad_norm": 0.203125, |
| "learning_rate": 3.342776203966006e-06, |
| "loss": 0.9537, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.6713881019830028, |
| "grad_norm": 0.1923828125, |
| "learning_rate": 3.314447592067989e-06, |
| "loss": 0.9934, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.6742209631728046, |
| "grad_norm": 0.1865234375, |
| "learning_rate": 3.2861189801699724e-06, |
| "loss": 0.9563, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.6770538243626062, |
| "grad_norm": 0.203125, |
| "learning_rate": 3.257790368271955e-06, |
| "loss": 0.9485, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.6798866855524079, |
| "grad_norm": 0.2021484375, |
| "learning_rate": 3.229461756373938e-06, |
| "loss": 0.9965, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.6827195467422096, |
| "grad_norm": 0.18359375, |
| "learning_rate": 3.201133144475921e-06, |
| "loss": 0.9483, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.6855524079320113, |
| "grad_norm": 0.1806640625, |
| "learning_rate": 3.1728045325779038e-06, |
| "loss": 0.9968, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.6883852691218131, |
| "grad_norm": 0.26171875, |
| "learning_rate": 3.144475920679887e-06, |
| "loss": 1.045, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.6912181303116147, |
| "grad_norm": 0.177734375, |
| "learning_rate": 3.11614730878187e-06, |
| "loss": 0.9608, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.6940509915014165, |
| "grad_norm": 0.1865234375, |
| "learning_rate": 3.087818696883853e-06, |
| "loss": 0.9612, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.6968838526912181, |
| "grad_norm": 0.18359375, |
| "learning_rate": 3.059490084985836e-06, |
| "loss": 0.9463, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.6997167138810199, |
| "grad_norm": 0.1962890625, |
| "learning_rate": 3.0311614730878193e-06, |
| "loss": 0.9678, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.7025495750708215, |
| "grad_norm": 0.197265625, |
| "learning_rate": 3.0028328611898022e-06, |
| "loss": 0.976, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.7053824362606232, |
| "grad_norm": 0.19140625, |
| "learning_rate": 2.9745042492917848e-06, |
| "loss": 0.9383, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.7082152974504249, |
| "grad_norm": 0.2060546875, |
| "learning_rate": 2.9461756373937677e-06, |
| "loss": 0.9564, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.7110481586402266, |
| "grad_norm": 0.189453125, |
| "learning_rate": 2.9178470254957506e-06, |
| "loss": 0.9827, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.7138810198300283, |
| "grad_norm": 0.1884765625, |
| "learning_rate": 2.889518413597734e-06, |
| "loss": 0.9901, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.71671388101983, |
| "grad_norm": 0.173828125, |
| "learning_rate": 2.861189801699717e-06, |
| "loss": 0.9924, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.7195467422096318, |
| "grad_norm": 0.1904296875, |
| "learning_rate": 2.8328611898017e-06, |
| "loss": 0.9146, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.7223796033994334, |
| "grad_norm": 0.1884765625, |
| "learning_rate": 2.804532577903683e-06, |
| "loss": 0.9433, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.7252124645892352, |
| "grad_norm": 0.1865234375, |
| "learning_rate": 2.776203966005666e-06, |
| "loss": 1.002, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.7280453257790368, |
| "grad_norm": 0.1923828125, |
| "learning_rate": 2.747875354107649e-06, |
| "loss": 0.9499, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.7308781869688386, |
| "grad_norm": 0.177734375, |
| "learning_rate": 2.719546742209632e-06, |
| "loss": 0.961, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.7337110481586402, |
| "grad_norm": 0.1962890625, |
| "learning_rate": 2.6912181303116146e-06, |
| "loss": 0.9718, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.7365439093484419, |
| "grad_norm": 0.1796875, |
| "learning_rate": 2.662889518413598e-06, |
| "loss": 0.9463, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.7393767705382436, |
| "grad_norm": 0.2041015625, |
| "learning_rate": 2.634560906515581e-06, |
| "loss": 0.9902, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.7422096317280453, |
| "grad_norm": 0.18359375, |
| "learning_rate": 2.606232294617564e-06, |
| "loss": 0.978, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.7450424929178471, |
| "grad_norm": 0.1796875, |
| "learning_rate": 2.577903682719547e-06, |
| "loss": 0.9666, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.7478753541076487, |
| "grad_norm": 0.1923828125, |
| "learning_rate": 2.54957507082153e-06, |
| "loss": 0.9661, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.7507082152974505, |
| "grad_norm": 0.1875, |
| "learning_rate": 2.521246458923513e-06, |
| "loss": 0.9864, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.7535410764872521, |
| "grad_norm": 0.2119140625, |
| "learning_rate": 2.4929178470254956e-06, |
| "loss": 0.9696, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.7563739376770539, |
| "grad_norm": 0.1865234375, |
| "learning_rate": 2.464589235127479e-06, |
| "loss": 0.9856, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.7592067988668555, |
| "grad_norm": 0.1806640625, |
| "learning_rate": 2.436260623229462e-06, |
| "loss": 0.9611, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.7620396600566572, |
| "grad_norm": 0.1962890625, |
| "learning_rate": 2.407932011331445e-06, |
| "loss": 1.0221, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.7648725212464589, |
| "grad_norm": 0.189453125, |
| "learning_rate": 2.3796033994334282e-06, |
| "loss": 0.927, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.7677053824362606, |
| "grad_norm": 0.2265625, |
| "learning_rate": 2.3512747875354108e-06, |
| "loss": 0.9316, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.7705382436260623, |
| "grad_norm": 0.2021484375, |
| "learning_rate": 2.3229461756373937e-06, |
| "loss": 0.9322, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.773371104815864, |
| "grad_norm": 0.1943359375, |
| "learning_rate": 2.294617563739377e-06, |
| "loss": 0.9601, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.7762039660056658, |
| "grad_norm": 0.17578125, |
| "learning_rate": 2.26628895184136e-06, |
| "loss": 0.9732, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.7790368271954674, |
| "grad_norm": 0.197265625, |
| "learning_rate": 2.237960339943343e-06, |
| "loss": 0.9748, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.7818696883852692, |
| "grad_norm": 0.220703125, |
| "learning_rate": 2.209631728045326e-06, |
| "loss": 1.0213, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.7847025495750708, |
| "grad_norm": 0.181640625, |
| "learning_rate": 2.181303116147309e-06, |
| "loss": 0.9537, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.7875354107648725, |
| "grad_norm": 0.1884765625, |
| "learning_rate": 2.1529745042492918e-06, |
| "loss": 0.9502, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.7903682719546742, |
| "grad_norm": 0.19921875, |
| "learning_rate": 2.124645892351275e-06, |
| "loss": 0.9992, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.7932011331444759, |
| "grad_norm": 0.189453125, |
| "learning_rate": 2.096317280453258e-06, |
| "loss": 0.9134, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.7960339943342776, |
| "grad_norm": 0.1943359375, |
| "learning_rate": 2.067988668555241e-06, |
| "loss": 0.967, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.7988668555240793, |
| "grad_norm": 0.1767578125, |
| "learning_rate": 2.039660056657224e-06, |
| "loss": 0.9572, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.8016997167138811, |
| "grad_norm": 0.181640625, |
| "learning_rate": 2.011331444759207e-06, |
| "loss": 0.9702, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.8045325779036827, |
| "grad_norm": 0.185546875, |
| "learning_rate": 1.98300283286119e-06, |
| "loss": 0.9377, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.8073654390934845, |
| "grad_norm": 0.1884765625, |
| "learning_rate": 1.954674220963173e-06, |
| "loss": 0.9457, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.8101983002832861, |
| "grad_norm": 0.1923828125, |
| "learning_rate": 1.9263456090651557e-06, |
| "loss": 0.9599, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.8130311614730878, |
| "grad_norm": 0.1875, |
| "learning_rate": 1.8980169971671389e-06, |
| "loss": 0.9141, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.8158640226628895, |
| "grad_norm": 0.240234375, |
| "learning_rate": 1.869688385269122e-06, |
| "loss": 0.9284, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.8186968838526912, |
| "grad_norm": 0.1767578125, |
| "learning_rate": 1.841359773371105e-06, |
| "loss": 0.9514, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.8215297450424929, |
| "grad_norm": 0.1806640625, |
| "learning_rate": 1.8130311614730881e-06, |
| "loss": 0.9254, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.8243626062322946, |
| "grad_norm": 0.185546875, |
| "learning_rate": 1.7847025495750709e-06, |
| "loss": 0.9482, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.8271954674220963, |
| "grad_norm": 0.2080078125, |
| "learning_rate": 1.7563739376770538e-06, |
| "loss": 1.0053, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.830028328611898, |
| "grad_norm": 0.18359375, |
| "learning_rate": 1.728045325779037e-06, |
| "loss": 0.9205, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.8328611898016998, |
| "grad_norm": 0.2021484375, |
| "learning_rate": 1.69971671388102e-06, |
| "loss": 0.9878, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.8356940509915014, |
| "grad_norm": 0.181640625, |
| "learning_rate": 1.671388101983003e-06, |
| "loss": 0.9305, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.8385269121813032, |
| "grad_norm": 0.1796875, |
| "learning_rate": 1.6430594900849862e-06, |
| "loss": 0.9422, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.8413597733711048, |
| "grad_norm": 0.1845703125, |
| "learning_rate": 1.614730878186969e-06, |
| "loss": 0.9603, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.8441926345609065, |
| "grad_norm": 0.1787109375, |
| "learning_rate": 1.5864022662889519e-06, |
| "loss": 0.9395, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.8470254957507082, |
| "grad_norm": 0.2060546875, |
| "learning_rate": 1.558073654390935e-06, |
| "loss": 0.9388, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.8498583569405099, |
| "grad_norm": 0.1767578125, |
| "learning_rate": 1.529745042492918e-06, |
| "loss": 0.9586, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.8526912181303116, |
| "grad_norm": 0.21484375, |
| "learning_rate": 1.5014164305949011e-06, |
| "loss": 0.9981, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.8555240793201133, |
| "grad_norm": 0.193359375, |
| "learning_rate": 1.4730878186968839e-06, |
| "loss": 0.9962, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.8583569405099151, |
| "grad_norm": 0.1865234375, |
| "learning_rate": 1.444759206798867e-06, |
| "loss": 0.9145, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.8611898016997167, |
| "grad_norm": 0.1962890625, |
| "learning_rate": 1.41643059490085e-06, |
| "loss": 0.8994, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.8640226628895185, |
| "grad_norm": 0.2099609375, |
| "learning_rate": 1.388101983002833e-06, |
| "loss": 1.0032, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.8668555240793201, |
| "grad_norm": 0.17578125, |
| "learning_rate": 1.359773371104816e-06, |
| "loss": 0.9649, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.8696883852691218, |
| "grad_norm": 0.1796875, |
| "learning_rate": 1.331444759206799e-06, |
| "loss": 0.9337, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.8725212464589235, |
| "grad_norm": 0.1826171875, |
| "learning_rate": 1.303116147308782e-06, |
| "loss": 0.9582, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.8753541076487252, |
| "grad_norm": 0.1923828125, |
| "learning_rate": 1.274787535410765e-06, |
| "loss": 0.9472, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.8781869688385269, |
| "grad_norm": 0.1884765625, |
| "learning_rate": 1.2464589235127478e-06, |
| "loss": 0.9748, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.8810198300283286, |
| "grad_norm": 0.189453125, |
| "learning_rate": 1.218130311614731e-06, |
| "loss": 0.9634, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.8838526912181303, |
| "grad_norm": 0.1748046875, |
| "learning_rate": 1.1898016997167141e-06, |
| "loss": 0.9364, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.886685552407932, |
| "grad_norm": 0.1865234375, |
| "learning_rate": 1.1614730878186968e-06, |
| "loss": 0.9471, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.8895184135977338, |
| "grad_norm": 0.1826171875, |
| "learning_rate": 1.13314447592068e-06, |
| "loss": 0.9459, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.8923512747875354, |
| "grad_norm": 0.193359375, |
| "learning_rate": 1.104815864022663e-06, |
| "loss": 0.9185, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.8951841359773371, |
| "grad_norm": 0.193359375, |
| "learning_rate": 1.0764872521246459e-06, |
| "loss": 0.9783, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.8980169971671388, |
| "grad_norm": 0.185546875, |
| "learning_rate": 1.048158640226629e-06, |
| "loss": 0.9274, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.9008498583569405, |
| "grad_norm": 0.1748046875, |
| "learning_rate": 1.019830028328612e-06, |
| "loss": 0.9623, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.9036827195467422, |
| "grad_norm": 0.1962890625, |
| "learning_rate": 9.91501416430595e-07, |
| "loss": 0.921, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.9065155807365439, |
| "grad_norm": 0.2041015625, |
| "learning_rate": 9.631728045325779e-07, |
| "loss": 0.8889, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.9093484419263456, |
| "grad_norm": 0.193359375, |
| "learning_rate": 9.34844192634561e-07, |
| "loss": 0.9488, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.9121813031161473, |
| "grad_norm": 0.2060546875, |
| "learning_rate": 9.065155807365441e-07, |
| "loss": 0.8908, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.9150141643059491, |
| "grad_norm": 0.1826171875, |
| "learning_rate": 8.781869688385269e-07, |
| "loss": 0.9786, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.9178470254957507, |
| "grad_norm": 0.1884765625, |
| "learning_rate": 8.4985835694051e-07, |
| "loss": 0.9221, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.9206798866855525, |
| "grad_norm": 0.1787109375, |
| "learning_rate": 8.215297450424931e-07, |
| "loss": 0.9032, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.9235127478753541, |
| "grad_norm": 0.1826171875, |
| "learning_rate": 7.932011331444759e-07, |
| "loss": 0.9524, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.9263456090651558, |
| "grad_norm": 0.275390625, |
| "learning_rate": 7.64872521246459e-07, |
| "loss": 0.9702, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.9291784702549575, |
| "grad_norm": 0.185546875, |
| "learning_rate": 7.365439093484419e-07, |
| "loss": 0.9442, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.9320113314447592, |
| "grad_norm": 0.1953125, |
| "learning_rate": 7.08215297450425e-07, |
| "loss": 1.0047, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.9348441926345609, |
| "grad_norm": 0.203125, |
| "learning_rate": 6.79886685552408e-07, |
| "loss": 0.9214, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.9376770538243626, |
| "grad_norm": 0.1962890625, |
| "learning_rate": 6.51558073654391e-07, |
| "loss": 0.9284, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.9405099150141643, |
| "grad_norm": 0.1845703125, |
| "learning_rate": 6.232294617563739e-07, |
| "loss": 0.9649, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.943342776203966, |
| "grad_norm": 0.197265625, |
| "learning_rate": 5.949008498583571e-07, |
| "loss": 0.911, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.9461756373937678, |
| "grad_norm": 0.1748046875, |
| "learning_rate": 5.6657223796034e-07, |
| "loss": 0.9113, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.9490084985835694, |
| "grad_norm": 0.1767578125, |
| "learning_rate": 5.382436260623229e-07, |
| "loss": 0.9448, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.9518413597733711, |
| "grad_norm": 0.19140625, |
| "learning_rate": 5.09915014164306e-07, |
| "loss": 1.0154, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.9546742209631728, |
| "grad_norm": 0.1865234375, |
| "learning_rate": 4.815864022662889e-07, |
| "loss": 0.9408, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.9575070821529745, |
| "grad_norm": 0.1884765625, |
| "learning_rate": 4.5325779036827203e-07, |
| "loss": 0.9479, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.9603399433427762, |
| "grad_norm": 0.193359375, |
| "learning_rate": 4.24929178470255e-07, |
| "loss": 0.9675, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.9631728045325779, |
| "grad_norm": 0.1845703125, |
| "learning_rate": 3.9660056657223797e-07, |
| "loss": 0.9578, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.9660056657223796, |
| "grad_norm": 0.18359375, |
| "learning_rate": 3.6827195467422096e-07, |
| "loss": 0.9381, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.9688385269121813, |
| "grad_norm": 0.1826171875, |
| "learning_rate": 3.39943342776204e-07, |
| "loss": 0.952, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.9716713881019831, |
| "grad_norm": 0.1962890625, |
| "learning_rate": 3.1161473087818695e-07, |
| "loss": 0.9378, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.9745042492917847, |
| "grad_norm": 0.189453125, |
| "learning_rate": 2.8328611898017e-07, |
| "loss": 0.9659, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.9773371104815864, |
| "grad_norm": 0.19921875, |
| "learning_rate": 2.54957507082153e-07, |
| "loss": 1.0183, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.9801699716713881, |
| "grad_norm": 0.1962890625, |
| "learning_rate": 2.2662889518413602e-07, |
| "loss": 0.9739, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.9830028328611898, |
| "grad_norm": 0.18359375, |
| "learning_rate": 1.9830028328611898e-07, |
| "loss": 0.9404, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.9858356940509915, |
| "grad_norm": 0.1923828125, |
| "learning_rate": 1.69971671388102e-07, |
| "loss": 0.9621, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.9886685552407932, |
| "grad_norm": 0.1875, |
| "learning_rate": 1.41643059490085e-07, |
| "loss": 0.9278, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.9915014164305949, |
| "grad_norm": 0.193359375, |
| "learning_rate": 1.1331444759206801e-07, |
| "loss": 0.9648, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.9943342776203966, |
| "grad_norm": 0.181640625, |
| "learning_rate": 8.4985835694051e-08, |
| "loss": 0.9472, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.9971671388101983, |
| "grad_norm": 0.1826171875, |
| "learning_rate": 5.6657223796034004e-08, |
| "loss": 0.9625, |
| "step": 352 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.1767578125, |
| "learning_rate": 2.8328611898017002e-08, |
| "loss": 0.9569, |
| "step": 353 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 0.9534078240394592, |
| "eval_runtime": 12.4147, |
| "eval_samples_per_second": 2.9, |
| "eval_steps_per_second": 0.403, |
| "step": 353 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 353, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 0, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.2025098545607475e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|