| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 31.380753138075313, |
| "eval_steps": 500, |
| "global_step": 15000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.02092050209205021, |
| "grad_norm": 2.293994665145874, |
| "learning_rate": 1.2000000000000002e-06, |
| "loss": 1.4456, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.04184100418410042, |
| "grad_norm": 1.97152578830719, |
| "learning_rate": 2.5333333333333334e-06, |
| "loss": 1.502, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.06276150627615062, |
| "grad_norm": 1.8830021619796753, |
| "learning_rate": 3.866666666666667e-06, |
| "loss": 1.5108, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.08368200836820083, |
| "grad_norm": 1.9566497802734375, |
| "learning_rate": 5.2e-06, |
| "loss": 1.4048, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.10460251046025104, |
| "grad_norm": 1.8011934757232666, |
| "learning_rate": 6.533333333333333e-06, |
| "loss": 1.2728, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.12552301255230125, |
| "grad_norm": 1.3720723390579224, |
| "learning_rate": 7.866666666666667e-06, |
| "loss": 1.1924, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.14644351464435146, |
| "grad_norm": 1.3044836521148682, |
| "learning_rate": 9.2e-06, |
| "loss": 0.9494, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.16736401673640167, |
| "grad_norm": 0.8721851706504822, |
| "learning_rate": 1.0533333333333335e-05, |
| "loss": 0.8668, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.18828451882845187, |
| "grad_norm": 0.5817344188690186, |
| "learning_rate": 1.1866666666666668e-05, |
| "loss": 0.7112, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.20920502092050208, |
| "grad_norm": 0.8804004192352295, |
| "learning_rate": 1.32e-05, |
| "loss": 0.6325, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.2301255230125523, |
| "grad_norm": 0.4721251428127289, |
| "learning_rate": 1.4533333333333335e-05, |
| "loss": 0.5486, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.2510460251046025, |
| "grad_norm": 0.5858187675476074, |
| "learning_rate": 1.586666666666667e-05, |
| "loss": 0.4759, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.2719665271966527, |
| "grad_norm": 0.8786276578903198, |
| "learning_rate": 1.7199999999999998e-05, |
| "loss": 0.4455, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.2928870292887029, |
| "grad_norm": 0.581843912601471, |
| "learning_rate": 1.8533333333333334e-05, |
| "loss": 0.4113, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.3138075313807531, |
| "grad_norm": 0.6839861273765564, |
| "learning_rate": 1.9866666666666667e-05, |
| "loss": 0.3713, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.33472803347280333, |
| "grad_norm": 0.49527251720428467, |
| "learning_rate": 2.12e-05, |
| "loss": 0.3411, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.35564853556485354, |
| "grad_norm": 0.3660479784011841, |
| "learning_rate": 2.2533333333333333e-05, |
| "loss": 0.3304, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.37656903765690375, |
| "grad_norm": 0.3848160207271576, |
| "learning_rate": 2.3866666666666666e-05, |
| "loss": 0.2835, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.39748953974895396, |
| "grad_norm": 0.4325522482395172, |
| "learning_rate": 2.5200000000000003e-05, |
| "loss": 0.2554, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.41841004184100417, |
| "grad_norm": 0.6360965371131897, |
| "learning_rate": 2.6533333333333332e-05, |
| "loss": 0.2746, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.4393305439330544, |
| "grad_norm": 0.5752055644989014, |
| "learning_rate": 2.786666666666667e-05, |
| "loss": 0.2489, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.4602510460251046, |
| "grad_norm": 0.902524471282959, |
| "learning_rate": 2.9199999999999998e-05, |
| "loss": 0.2425, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.4811715481171548, |
| "grad_norm": 0.39021795988082886, |
| "learning_rate": 3.0533333333333335e-05, |
| "loss": 0.2184, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.502092050209205, |
| "grad_norm": 0.8243975639343262, |
| "learning_rate": 3.1866666666666664e-05, |
| "loss": 0.215, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.5230125523012552, |
| "grad_norm": 0.6084154844284058, |
| "learning_rate": 3.32e-05, |
| "loss": 0.2175, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.5439330543933054, |
| "grad_norm": 0.5785391330718994, |
| "learning_rate": 3.453333333333334e-05, |
| "loss": 0.2032, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.5648535564853556, |
| "grad_norm": 0.9847169518470764, |
| "learning_rate": 3.586666666666667e-05, |
| "loss": 0.2173, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.5857740585774058, |
| "grad_norm": 0.3699074983596802, |
| "learning_rate": 3.72e-05, |
| "loss": 0.1937, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.606694560669456, |
| "grad_norm": 0.4798552095890045, |
| "learning_rate": 3.853333333333334e-05, |
| "loss": 0.1952, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.6276150627615062, |
| "grad_norm": 0.5542566776275635, |
| "learning_rate": 3.986666666666667e-05, |
| "loss": 0.1954, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.6485355648535565, |
| "grad_norm": 0.5909680724143982, |
| "learning_rate": 4.12e-05, |
| "loss": 0.1852, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.6694560669456067, |
| "grad_norm": 0.6381629109382629, |
| "learning_rate": 4.2533333333333335e-05, |
| "loss": 0.1828, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.6903765690376569, |
| "grad_norm": 0.4231165647506714, |
| "learning_rate": 4.3866666666666665e-05, |
| "loss": 0.1728, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.7112970711297071, |
| "grad_norm": 0.5259320735931396, |
| "learning_rate": 4.52e-05, |
| "loss": 0.1665, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.7322175732217573, |
| "grad_norm": 0.49583449959754944, |
| "learning_rate": 4.653333333333334e-05, |
| "loss": 0.1569, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.7531380753138075, |
| "grad_norm": 0.7853049039840698, |
| "learning_rate": 4.7866666666666674e-05, |
| "loss": 0.1647, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.7740585774058577, |
| "grad_norm": 0.49986371397972107, |
| "learning_rate": 4.92e-05, |
| "loss": 0.1587, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.7949790794979079, |
| "grad_norm": 0.8034011125564575, |
| "learning_rate": 5.053333333333333e-05, |
| "loss": 0.1586, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.8158995815899581, |
| "grad_norm": 0.48629796504974365, |
| "learning_rate": 5.1866666666666676e-05, |
| "loss": 0.1534, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.8368200836820083, |
| "grad_norm": 0.4972072243690491, |
| "learning_rate": 5.3200000000000006e-05, |
| "loss": 0.1568, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.8577405857740585, |
| "grad_norm": 0.8214737176895142, |
| "learning_rate": 5.4533333333333335e-05, |
| "loss": 0.1591, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.8786610878661087, |
| "grad_norm": 0.6962748169898987, |
| "learning_rate": 5.5866666666666665e-05, |
| "loss": 0.1534, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.899581589958159, |
| "grad_norm": 0.4874151647090912, |
| "learning_rate": 5.72e-05, |
| "loss": 0.1607, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.9205020920502092, |
| "grad_norm": 0.8123170137405396, |
| "learning_rate": 5.853333333333334e-05, |
| "loss": 0.1557, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.9414225941422594, |
| "grad_norm": 0.6196077466011047, |
| "learning_rate": 5.9866666666666674e-05, |
| "loss": 0.1487, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.9623430962343096, |
| "grad_norm": 0.5131699442863464, |
| "learning_rate": 6.12e-05, |
| "loss": 0.1706, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.9832635983263598, |
| "grad_norm": 0.4041496217250824, |
| "learning_rate": 6.253333333333333e-05, |
| "loss": 0.157, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.00418410041841, |
| "grad_norm": 0.4113733172416687, |
| "learning_rate": 6.386666666666667e-05, |
| "loss": 0.143, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.0251046025104602, |
| "grad_norm": 0.5570617914199829, |
| "learning_rate": 6.52e-05, |
| "loss": 0.1518, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.0460251046025104, |
| "grad_norm": 0.47739163041114807, |
| "learning_rate": 6.653333333333334e-05, |
| "loss": 0.1356, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.0669456066945606, |
| "grad_norm": 0.6506455540657043, |
| "learning_rate": 6.786666666666667e-05, |
| "loss": 0.1391, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.0878661087866108, |
| "grad_norm": 0.597764790058136, |
| "learning_rate": 6.92e-05, |
| "loss": 0.1471, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.108786610878661, |
| "grad_norm": 0.4553702771663666, |
| "learning_rate": 7.053333333333334e-05, |
| "loss": 0.1454, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.1297071129707112, |
| "grad_norm": 0.47173255681991577, |
| "learning_rate": 7.186666666666667e-05, |
| "loss": 0.1429, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.1506276150627615, |
| "grad_norm": 0.49343475699424744, |
| "learning_rate": 7.32e-05, |
| "loss": 0.1314, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.1715481171548117, |
| "grad_norm": 0.39795058965682983, |
| "learning_rate": 7.453333333333333e-05, |
| "loss": 0.1262, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.1924686192468619, |
| "grad_norm": 0.40215185284614563, |
| "learning_rate": 7.586666666666668e-05, |
| "loss": 0.1221, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.213389121338912, |
| "grad_norm": 0.5300595760345459, |
| "learning_rate": 7.72e-05, |
| "loss": 0.139, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.2343096234309623, |
| "grad_norm": 1.0076900720596313, |
| "learning_rate": 7.853333333333334e-05, |
| "loss": 0.1345, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.2552301255230125, |
| "grad_norm": 0.5394683480262756, |
| "learning_rate": 7.986666666666667e-05, |
| "loss": 0.138, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.2761506276150627, |
| "grad_norm": 0.5423910617828369, |
| "learning_rate": 8.120000000000001e-05, |
| "loss": 0.1392, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.297071129707113, |
| "grad_norm": 0.35370203852653503, |
| "learning_rate": 8.253333333333334e-05, |
| "loss": 0.1206, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.3179916317991631, |
| "grad_norm": 0.35600051283836365, |
| "learning_rate": 8.386666666666667e-05, |
| "loss": 0.1235, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.3389121338912133, |
| "grad_norm": 0.43719682097435, |
| "learning_rate": 8.52e-05, |
| "loss": 0.1179, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.3598326359832635, |
| "grad_norm": 0.4204005300998688, |
| "learning_rate": 8.653333333333333e-05, |
| "loss": 0.1307, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.3807531380753137, |
| "grad_norm": 0.4343251883983612, |
| "learning_rate": 8.786666666666667e-05, |
| "loss": 0.1217, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.401673640167364, |
| "grad_norm": 0.580028772354126, |
| "learning_rate": 8.92e-05, |
| "loss": 0.1355, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.4225941422594142, |
| "grad_norm": 0.41393300890922546, |
| "learning_rate": 9.053333333333334e-05, |
| "loss": 0.1247, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.4435146443514644, |
| "grad_norm": 0.40060940384864807, |
| "learning_rate": 9.186666666666667e-05, |
| "loss": 0.1258, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.4644351464435146, |
| "grad_norm": 0.4414738714694977, |
| "learning_rate": 9.320000000000002e-05, |
| "loss": 0.1262, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.4853556485355648, |
| "grad_norm": 0.444823682308197, |
| "learning_rate": 9.453333333333335e-05, |
| "loss": 0.1342, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.506276150627615, |
| "grad_norm": 0.7578327655792236, |
| "learning_rate": 9.586666666666667e-05, |
| "loss": 0.1236, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.5271966527196654, |
| "grad_norm": 0.3242191970348358, |
| "learning_rate": 9.72e-05, |
| "loss": 0.1155, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.5481171548117154, |
| "grad_norm": 0.34916719794273376, |
| "learning_rate": 9.853333333333333e-05, |
| "loss": 0.1088, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.5690376569037658, |
| "grad_norm": 0.34131109714508057, |
| "learning_rate": 9.986666666666668e-05, |
| "loss": 0.1152, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.5899581589958158, |
| "grad_norm": 0.54949551820755, |
| "learning_rate": 9.999990157738453e-05, |
| "loss": 0.1084, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.6108786610878663, |
| "grad_norm": 0.912358820438385, |
| "learning_rate": 9.999956135155687e-05, |
| "loss": 0.1198, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.6317991631799162, |
| "grad_norm": 0.45504382252693176, |
| "learning_rate": 9.99989781090763e-05, |
| "loss": 0.125, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.6527196652719667, |
| "grad_norm": 0.5051805377006531, |
| "learning_rate": 9.999815185277755e-05, |
| "loss": 0.1144, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.6736401673640167, |
| "grad_norm": 0.36151519417762756, |
| "learning_rate": 9.999708258667652e-05, |
| "loss": 0.1073, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.694560669456067, |
| "grad_norm": 0.4040098190307617, |
| "learning_rate": 9.999577031597029e-05, |
| "loss": 0.124, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.715481171548117, |
| "grad_norm": 0.4677584767341614, |
| "learning_rate": 9.999421504703696e-05, |
| "loss": 0.1061, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.7364016736401675, |
| "grad_norm": 0.4312772750854492, |
| "learning_rate": 9.999241678743574e-05, |
| "loss": 0.1088, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.7573221757322175, |
| "grad_norm": 0.44205185770988464, |
| "learning_rate": 9.999037554590683e-05, |
| "loss": 0.107, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.778242677824268, |
| "grad_norm": 0.34728363156318665, |
| "learning_rate": 9.998809133237143e-05, |
| "loss": 0.1246, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.799163179916318, |
| "grad_norm": 0.3395802676677704, |
| "learning_rate": 9.998556415793169e-05, |
| "loss": 0.1001, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.8200836820083683, |
| "grad_norm": 0.3991422951221466, |
| "learning_rate": 9.998279403487062e-05, |
| "loss": 0.109, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.8410041841004183, |
| "grad_norm": 0.2969718277454376, |
| "learning_rate": 9.997978097665205e-05, |
| "loss": 0.1016, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.8619246861924688, |
| "grad_norm": 0.3824094533920288, |
| "learning_rate": 9.99765249979206e-05, |
| "loss": 0.111, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.8828451882845187, |
| "grad_norm": 0.32365161180496216, |
| "learning_rate": 9.997302611450154e-05, |
| "loss": 0.1102, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.9037656903765692, |
| "grad_norm": 0.34358587861061096, |
| "learning_rate": 9.996928434340073e-05, |
| "loss": 0.1023, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.9246861924686192, |
| "grad_norm": 0.36740976572036743, |
| "learning_rate": 9.996529970280462e-05, |
| "loss": 0.1098, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.9456066945606696, |
| "grad_norm": 0.29451724886894226, |
| "learning_rate": 9.996107221208004e-05, |
| "loss": 0.1028, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.9665271966527196, |
| "grad_norm": 0.37186339497566223, |
| "learning_rate": 9.995660189177419e-05, |
| "loss": 0.0943, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.98744769874477, |
| "grad_norm": 0.3423303961753845, |
| "learning_rate": 9.995188876361451e-05, |
| "loss": 0.1076, |
| "step": 950 |
| }, |
| { |
| "epoch": 2.00836820083682, |
| "grad_norm": 0.3213839530944824, |
| "learning_rate": 9.994693285050857e-05, |
| "loss": 0.1159, |
| "step": 960 |
| }, |
| { |
| "epoch": 2.0292887029288704, |
| "grad_norm": 0.3634227216243744, |
| "learning_rate": 9.994173417654395e-05, |
| "loss": 0.1028, |
| "step": 970 |
| }, |
| { |
| "epoch": 2.0502092050209204, |
| "grad_norm": 0.5577953457832336, |
| "learning_rate": 9.993629276698821e-05, |
| "loss": 0.1018, |
| "step": 980 |
| }, |
| { |
| "epoch": 2.071129707112971, |
| "grad_norm": 0.5105161070823669, |
| "learning_rate": 9.993060864828858e-05, |
| "loss": 0.1103, |
| "step": 990 |
| }, |
| { |
| "epoch": 2.092050209205021, |
| "grad_norm": 0.458475798368454, |
| "learning_rate": 9.992468184807206e-05, |
| "loss": 0.1064, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.1129707112970713, |
| "grad_norm": 0.5843384265899658, |
| "learning_rate": 9.991851239514511e-05, |
| "loss": 0.1124, |
| "step": 1010 |
| }, |
| { |
| "epoch": 2.1338912133891212, |
| "grad_norm": 0.5279590487480164, |
| "learning_rate": 9.991210031949359e-05, |
| "loss": 0.1015, |
| "step": 1020 |
| }, |
| { |
| "epoch": 2.1548117154811717, |
| "grad_norm": 0.39147329330444336, |
| "learning_rate": 9.990544565228259e-05, |
| "loss": 0.1089, |
| "step": 1030 |
| }, |
| { |
| "epoch": 2.1757322175732217, |
| "grad_norm": 0.37064120173454285, |
| "learning_rate": 9.989854842585631e-05, |
| "loss": 0.1114, |
| "step": 1040 |
| }, |
| { |
| "epoch": 2.196652719665272, |
| "grad_norm": 0.3259464502334595, |
| "learning_rate": 9.989140867373783e-05, |
| "loss": 0.0913, |
| "step": 1050 |
| }, |
| { |
| "epoch": 2.217573221757322, |
| "grad_norm": 0.3360336124897003, |
| "learning_rate": 9.988402643062907e-05, |
| "loss": 0.1062, |
| "step": 1060 |
| }, |
| { |
| "epoch": 2.2384937238493725, |
| "grad_norm": 0.29747986793518066, |
| "learning_rate": 9.987640173241046e-05, |
| "loss": 0.111, |
| "step": 1070 |
| }, |
| { |
| "epoch": 2.2594142259414225, |
| "grad_norm": 0.4818101227283478, |
| "learning_rate": 9.986853461614093e-05, |
| "loss": 0.1023, |
| "step": 1080 |
| }, |
| { |
| "epoch": 2.280334728033473, |
| "grad_norm": 0.42483437061309814, |
| "learning_rate": 9.986042512005763e-05, |
| "loss": 0.0998, |
| "step": 1090 |
| }, |
| { |
| "epoch": 2.301255230125523, |
| "grad_norm": 0.3709806501865387, |
| "learning_rate": 9.985207328357573e-05, |
| "loss": 0.1011, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.3221757322175733, |
| "grad_norm": 0.2622065246105194, |
| "learning_rate": 9.984347914728829e-05, |
| "loss": 0.0996, |
| "step": 1110 |
| }, |
| { |
| "epoch": 2.3430962343096233, |
| "grad_norm": 0.37095901370048523, |
| "learning_rate": 9.983464275296605e-05, |
| "loss": 0.1018, |
| "step": 1120 |
| }, |
| { |
| "epoch": 2.3640167364016738, |
| "grad_norm": 0.28311190009117126, |
| "learning_rate": 9.982556414355724e-05, |
| "loss": 0.1063, |
| "step": 1130 |
| }, |
| { |
| "epoch": 2.3849372384937237, |
| "grad_norm": 0.372185081243515, |
| "learning_rate": 9.981624336318726e-05, |
| "loss": 0.1042, |
| "step": 1140 |
| }, |
| { |
| "epoch": 2.405857740585774, |
| "grad_norm": 0.2405327707529068, |
| "learning_rate": 9.980668045715864e-05, |
| "loss": 0.0931, |
| "step": 1150 |
| }, |
| { |
| "epoch": 2.426778242677824, |
| "grad_norm": 0.34624001383781433, |
| "learning_rate": 9.979687547195066e-05, |
| "loss": 0.0981, |
| "step": 1160 |
| }, |
| { |
| "epoch": 2.4476987447698746, |
| "grad_norm": 0.4329279363155365, |
| "learning_rate": 9.978682845521927e-05, |
| "loss": 0.108, |
| "step": 1170 |
| }, |
| { |
| "epoch": 2.4686192468619246, |
| "grad_norm": 0.5653864145278931, |
| "learning_rate": 9.977653945579673e-05, |
| "loss": 0.1026, |
| "step": 1180 |
| }, |
| { |
| "epoch": 2.489539748953975, |
| "grad_norm": 0.27248165011405945, |
| "learning_rate": 9.976600852369144e-05, |
| "loss": 0.0928, |
| "step": 1190 |
| }, |
| { |
| "epoch": 2.510460251046025, |
| "grad_norm": 0.3201051354408264, |
| "learning_rate": 9.975523571008769e-05, |
| "loss": 0.1024, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.5313807531380754, |
| "grad_norm": 0.3279741704463959, |
| "learning_rate": 9.97442210673454e-05, |
| "loss": 0.1023, |
| "step": 1210 |
| }, |
| { |
| "epoch": 2.5523012552301254, |
| "grad_norm": 0.2990843653678894, |
| "learning_rate": 9.973296464899988e-05, |
| "loss": 0.0938, |
| "step": 1220 |
| }, |
| { |
| "epoch": 2.573221757322176, |
| "grad_norm": 0.31866946816444397, |
| "learning_rate": 9.972146650976154e-05, |
| "loss": 0.1018, |
| "step": 1230 |
| }, |
| { |
| "epoch": 2.594142259414226, |
| "grad_norm": 0.2663305401802063, |
| "learning_rate": 9.970972670551566e-05, |
| "loss": 0.0984, |
| "step": 1240 |
| }, |
| { |
| "epoch": 2.6150627615062763, |
| "grad_norm": 0.33940839767456055, |
| "learning_rate": 9.969774529332212e-05, |
| "loss": 0.0988, |
| "step": 1250 |
| }, |
| { |
| "epoch": 2.6359832635983262, |
| "grad_norm": 0.2570817172527313, |
| "learning_rate": 9.968552233141504e-05, |
| "loss": 0.0978, |
| "step": 1260 |
| }, |
| { |
| "epoch": 2.6569037656903767, |
| "grad_norm": 0.3066076636314392, |
| "learning_rate": 9.967305787920264e-05, |
| "loss": 0.0952, |
| "step": 1270 |
| }, |
| { |
| "epoch": 2.6778242677824267, |
| "grad_norm": 0.2956596910953522, |
| "learning_rate": 9.966035199726684e-05, |
| "loss": 0.0925, |
| "step": 1280 |
| }, |
| { |
| "epoch": 2.698744769874477, |
| "grad_norm": 0.4384233057498932, |
| "learning_rate": 9.9647404747363e-05, |
| "loss": 0.1044, |
| "step": 1290 |
| }, |
| { |
| "epoch": 2.719665271966527, |
| "grad_norm": 0.3082908093929291, |
| "learning_rate": 9.96342161924196e-05, |
| "loss": 0.099, |
| "step": 1300 |
| }, |
| { |
| "epoch": 2.7405857740585775, |
| "grad_norm": 0.5076243877410889, |
| "learning_rate": 9.962078639653797e-05, |
| "loss": 0.0925, |
| "step": 1310 |
| }, |
| { |
| "epoch": 2.7615062761506275, |
| "grad_norm": 0.3102075159549713, |
| "learning_rate": 9.960711542499202e-05, |
| "loss": 0.0996, |
| "step": 1320 |
| }, |
| { |
| "epoch": 2.782426778242678, |
| "grad_norm": 0.31955546140670776, |
| "learning_rate": 9.959320334422772e-05, |
| "loss": 0.0889, |
| "step": 1330 |
| }, |
| { |
| "epoch": 2.803347280334728, |
| "grad_norm": 0.27515116333961487, |
| "learning_rate": 9.957905022186309e-05, |
| "loss": 0.0902, |
| "step": 1340 |
| }, |
| { |
| "epoch": 2.8242677824267783, |
| "grad_norm": 0.31273916363716125, |
| "learning_rate": 9.956465612668757e-05, |
| "loss": 0.0889, |
| "step": 1350 |
| }, |
| { |
| "epoch": 2.8451882845188283, |
| "grad_norm": 0.36738961935043335, |
| "learning_rate": 9.95500211286619e-05, |
| "loss": 0.0961, |
| "step": 1360 |
| }, |
| { |
| "epoch": 2.8661087866108788, |
| "grad_norm": 0.3078056275844574, |
| "learning_rate": 9.953514529891763e-05, |
| "loss": 0.0804, |
| "step": 1370 |
| }, |
| { |
| "epoch": 2.8870292887029287, |
| "grad_norm": 0.23031267523765564, |
| "learning_rate": 9.952002870975693e-05, |
| "loss": 0.0906, |
| "step": 1380 |
| }, |
| { |
| "epoch": 2.907949790794979, |
| "grad_norm": 0.2518852651119232, |
| "learning_rate": 9.950467143465207e-05, |
| "loss": 0.084, |
| "step": 1390 |
| }, |
| { |
| "epoch": 2.928870292887029, |
| "grad_norm": 0.4326651394367218, |
| "learning_rate": 9.94890735482452e-05, |
| "loss": 0.0828, |
| "step": 1400 |
| }, |
| { |
| "epoch": 2.9497907949790796, |
| "grad_norm": 0.26489028334617615, |
| "learning_rate": 9.947323512634788e-05, |
| "loss": 0.088, |
| "step": 1410 |
| }, |
| { |
| "epoch": 2.9707112970711296, |
| "grad_norm": 0.286268025636673, |
| "learning_rate": 9.945715624594081e-05, |
| "loss": 0.0901, |
| "step": 1420 |
| }, |
| { |
| "epoch": 2.99163179916318, |
| "grad_norm": 0.40001794695854187, |
| "learning_rate": 9.944083698517339e-05, |
| "loss": 0.0913, |
| "step": 1430 |
| }, |
| { |
| "epoch": 3.01255230125523, |
| "grad_norm": 0.26771172881126404, |
| "learning_rate": 9.942427742336334e-05, |
| "loss": 0.0883, |
| "step": 1440 |
| }, |
| { |
| "epoch": 3.0334728033472804, |
| "grad_norm": 0.3441821038722992, |
| "learning_rate": 9.940747764099638e-05, |
| "loss": 0.0867, |
| "step": 1450 |
| }, |
| { |
| "epoch": 3.0543933054393304, |
| "grad_norm": 0.23330751061439514, |
| "learning_rate": 9.939043771972574e-05, |
| "loss": 0.0899, |
| "step": 1460 |
| }, |
| { |
| "epoch": 3.075313807531381, |
| "grad_norm": 0.3174346685409546, |
| "learning_rate": 9.937315774237186e-05, |
| "loss": 0.0854, |
| "step": 1470 |
| }, |
| { |
| "epoch": 3.096234309623431, |
| "grad_norm": 0.2819070518016815, |
| "learning_rate": 9.93556377929219e-05, |
| "loss": 0.0945, |
| "step": 1480 |
| }, |
| { |
| "epoch": 3.1171548117154813, |
| "grad_norm": 0.2613016664981842, |
| "learning_rate": 9.933787795652942e-05, |
| "loss": 0.0963, |
| "step": 1490 |
| }, |
| { |
| "epoch": 3.1380753138075312, |
| "grad_norm": 0.32942673563957214, |
| "learning_rate": 9.931987831951386e-05, |
| "loss": 0.096, |
| "step": 1500 |
| }, |
| { |
| "epoch": 3.1589958158995817, |
| "grad_norm": 0.36867812275886536, |
| "learning_rate": 9.930163896936027e-05, |
| "loss": 0.1027, |
| "step": 1510 |
| }, |
| { |
| "epoch": 3.1799163179916317, |
| "grad_norm": 0.31028738617897034, |
| "learning_rate": 9.92831599947187e-05, |
| "loss": 0.0957, |
| "step": 1520 |
| }, |
| { |
| "epoch": 3.200836820083682, |
| "grad_norm": 0.23857052624225616, |
| "learning_rate": 9.926444148540393e-05, |
| "loss": 0.0869, |
| "step": 1530 |
| }, |
| { |
| "epoch": 3.221757322175732, |
| "grad_norm": 0.30246081948280334, |
| "learning_rate": 9.924548353239495e-05, |
| "loss": 0.0872, |
| "step": 1540 |
| }, |
| { |
| "epoch": 3.2426778242677825, |
| "grad_norm": 0.2901047170162201, |
| "learning_rate": 9.922628622783451e-05, |
| "loss": 0.1011, |
| "step": 1550 |
| }, |
| { |
| "epoch": 3.2635983263598325, |
| "grad_norm": 0.40179872512817383, |
| "learning_rate": 9.920684966502878e-05, |
| "loss": 0.0882, |
| "step": 1560 |
| }, |
| { |
| "epoch": 3.284518828451883, |
| "grad_norm": 0.29192686080932617, |
| "learning_rate": 9.918717393844669e-05, |
| "loss": 0.0835, |
| "step": 1570 |
| }, |
| { |
| "epoch": 3.305439330543933, |
| "grad_norm": 0.38771605491638184, |
| "learning_rate": 9.916725914371969e-05, |
| "loss": 0.0852, |
| "step": 1580 |
| }, |
| { |
| "epoch": 3.3263598326359833, |
| "grad_norm": 0.219853475689888, |
| "learning_rate": 9.914710537764117e-05, |
| "loss": 0.089, |
| "step": 1590 |
| }, |
| { |
| "epoch": 3.3472803347280333, |
| "grad_norm": 0.33001509308815, |
| "learning_rate": 9.912671273816601e-05, |
| "loss": 0.0893, |
| "step": 1600 |
| }, |
| { |
| "epoch": 3.3682008368200838, |
| "grad_norm": 0.23087120056152344, |
| "learning_rate": 9.910608132441008e-05, |
| "loss": 0.0847, |
| "step": 1610 |
| }, |
| { |
| "epoch": 3.3891213389121337, |
| "grad_norm": 0.2661932706832886, |
| "learning_rate": 9.908521123664981e-05, |
| "loss": 0.084, |
| "step": 1620 |
| }, |
| { |
| "epoch": 3.410041841004184, |
| "grad_norm": 0.2942771017551422, |
| "learning_rate": 9.906410257632168e-05, |
| "loss": 0.0785, |
| "step": 1630 |
| }, |
| { |
| "epoch": 3.430962343096234, |
| "grad_norm": 0.22297202050685883, |
| "learning_rate": 9.904275544602169e-05, |
| "loss": 0.0827, |
| "step": 1640 |
| }, |
| { |
| "epoch": 3.4518828451882846, |
| "grad_norm": 0.31877461075782776, |
| "learning_rate": 9.902116994950493e-05, |
| "loss": 0.0987, |
| "step": 1650 |
| }, |
| { |
| "epoch": 3.4728033472803346, |
| "grad_norm": 0.403152734041214, |
| "learning_rate": 9.899934619168501e-05, |
| "loss": 0.0859, |
| "step": 1660 |
| }, |
| { |
| "epoch": 3.493723849372385, |
| "grad_norm": 0.29642191529273987, |
| "learning_rate": 9.89772842786336e-05, |
| "loss": 0.0838, |
| "step": 1670 |
| }, |
| { |
| "epoch": 3.514644351464435, |
| "grad_norm": 0.3551464080810547, |
| "learning_rate": 9.895498431757989e-05, |
| "loss": 0.0903, |
| "step": 1680 |
| }, |
| { |
| "epoch": 3.5355648535564854, |
| "grad_norm": 0.3941759169101715, |
| "learning_rate": 9.893244641691006e-05, |
| "loss": 0.0916, |
| "step": 1690 |
| }, |
| { |
| "epoch": 3.5564853556485354, |
| "grad_norm": 0.2608686685562134, |
| "learning_rate": 9.890967068616677e-05, |
| "loss": 0.0832, |
| "step": 1700 |
| }, |
| { |
| "epoch": 3.577405857740586, |
| "grad_norm": 0.25500214099884033, |
| "learning_rate": 9.888665723604864e-05, |
| "loss": 0.0846, |
| "step": 1710 |
| }, |
| { |
| "epoch": 3.598326359832636, |
| "grad_norm": 0.2380712330341339, |
| "learning_rate": 9.886340617840968e-05, |
| "loss": 0.0902, |
| "step": 1720 |
| }, |
| { |
| "epoch": 3.6192468619246863, |
| "grad_norm": 0.2662540376186371, |
| "learning_rate": 9.883991762625876e-05, |
| "loss": 0.0843, |
| "step": 1730 |
| }, |
| { |
| "epoch": 3.6401673640167362, |
| "grad_norm": 0.33657851815223694, |
| "learning_rate": 9.881619169375908e-05, |
| "loss": 0.0852, |
| "step": 1740 |
| }, |
| { |
| "epoch": 3.6610878661087867, |
| "grad_norm": 0.25955089926719666, |
| "learning_rate": 9.879222849622758e-05, |
| "loss": 0.0827, |
| "step": 1750 |
| }, |
| { |
| "epoch": 3.6820083682008367, |
| "grad_norm": 0.2592358887195587, |
| "learning_rate": 9.876802815013439e-05, |
| "loss": 0.0943, |
| "step": 1760 |
| }, |
| { |
| "epoch": 3.702928870292887, |
| "grad_norm": 0.32307639718055725, |
| "learning_rate": 9.87435907731023e-05, |
| "loss": 0.0734, |
| "step": 1770 |
| }, |
| { |
| "epoch": 3.723849372384937, |
| "grad_norm": 0.22112824022769928, |
| "learning_rate": 9.871891648390614e-05, |
| "loss": 0.0886, |
| "step": 1780 |
| }, |
| { |
| "epoch": 3.7447698744769875, |
| "grad_norm": 0.32087430357933044, |
| "learning_rate": 9.869400540247223e-05, |
| "loss": 0.0882, |
| "step": 1790 |
| }, |
| { |
| "epoch": 3.7656903765690375, |
| "grad_norm": 0.30099472403526306, |
| "learning_rate": 9.866885764987776e-05, |
| "loss": 0.0871, |
| "step": 1800 |
| }, |
| { |
| "epoch": 3.786610878661088, |
| "grad_norm": 0.2685706913471222, |
| "learning_rate": 9.86434733483503e-05, |
| "loss": 0.0805, |
| "step": 1810 |
| }, |
| { |
| "epoch": 3.8075313807531384, |
| "grad_norm": 0.27751418948173523, |
| "learning_rate": 9.861785262126705e-05, |
| "loss": 0.0841, |
| "step": 1820 |
| }, |
| { |
| "epoch": 3.8284518828451883, |
| "grad_norm": 0.28366604447364807, |
| "learning_rate": 9.85919955931544e-05, |
| "loss": 0.0846, |
| "step": 1830 |
| }, |
| { |
| "epoch": 3.8493723849372383, |
| "grad_norm": 0.24972891807556152, |
| "learning_rate": 9.856590238968721e-05, |
| "loss": 0.0792, |
| "step": 1840 |
| }, |
| { |
| "epoch": 3.8702928870292888, |
| "grad_norm": 0.22181373834609985, |
| "learning_rate": 9.853957313768824e-05, |
| "loss": 0.0808, |
| "step": 1850 |
| }, |
| { |
| "epoch": 3.891213389121339, |
| "grad_norm": 0.36809661984443665, |
| "learning_rate": 9.851300796512755e-05, |
| "loss": 0.0895, |
| "step": 1860 |
| }, |
| { |
| "epoch": 3.912133891213389, |
| "grad_norm": 0.37876737117767334, |
| "learning_rate": 9.848620700112188e-05, |
| "loss": 0.0873, |
| "step": 1870 |
| }, |
| { |
| "epoch": 3.933054393305439, |
| "grad_norm": 0.2749776840209961, |
| "learning_rate": 9.845917037593396e-05, |
| "loss": 0.0798, |
| "step": 1880 |
| }, |
| { |
| "epoch": 3.9539748953974896, |
| "grad_norm": 0.28249871730804443, |
| "learning_rate": 9.843189822097196e-05, |
| "loss": 0.0772, |
| "step": 1890 |
| }, |
| { |
| "epoch": 3.97489539748954, |
| "grad_norm": 0.26489949226379395, |
| "learning_rate": 9.84043906687888e-05, |
| "loss": 0.0871, |
| "step": 1900 |
| }, |
| { |
| "epoch": 3.99581589958159, |
| "grad_norm": 0.2860475778579712, |
| "learning_rate": 9.837664785308149e-05, |
| "loss": 0.0934, |
| "step": 1910 |
| }, |
| { |
| "epoch": 4.01673640167364, |
| "grad_norm": 0.33550700545310974, |
| "learning_rate": 9.834866990869059e-05, |
| "loss": 0.0844, |
| "step": 1920 |
| }, |
| { |
| "epoch": 4.03765690376569, |
| "grad_norm": 0.2818957567214966, |
| "learning_rate": 9.832045697159938e-05, |
| "loss": 0.0852, |
| "step": 1930 |
| }, |
| { |
| "epoch": 4.058577405857741, |
| "grad_norm": 0.36980465054512024, |
| "learning_rate": 9.829200917893334e-05, |
| "loss": 0.0922, |
| "step": 1940 |
| }, |
| { |
| "epoch": 4.079497907949791, |
| "grad_norm": 0.3046676516532898, |
| "learning_rate": 9.826332666895944e-05, |
| "loss": 0.0867, |
| "step": 1950 |
| }, |
| { |
| "epoch": 4.100418410041841, |
| "grad_norm": 0.3276398777961731, |
| "learning_rate": 9.823440958108545e-05, |
| "loss": 0.0764, |
| "step": 1960 |
| }, |
| { |
| "epoch": 4.121338912133891, |
| "grad_norm": 0.2901808023452759, |
| "learning_rate": 9.820525805585927e-05, |
| "loss": 0.0796, |
| "step": 1970 |
| }, |
| { |
| "epoch": 4.142259414225942, |
| "grad_norm": 0.27789413928985596, |
| "learning_rate": 9.81758722349683e-05, |
| "loss": 0.0897, |
| "step": 1980 |
| }, |
| { |
| "epoch": 4.163179916317992, |
| "grad_norm": 0.23803479969501495, |
| "learning_rate": 9.814625226123862e-05, |
| "loss": 0.0905, |
| "step": 1990 |
| }, |
| { |
| "epoch": 4.184100418410042, |
| "grad_norm": 0.27403533458709717, |
| "learning_rate": 9.811639827863449e-05, |
| "loss": 0.0832, |
| "step": 2000 |
| }, |
| { |
| "epoch": 4.205020920502092, |
| "grad_norm": 0.2629660964012146, |
| "learning_rate": 9.808631043225741e-05, |
| "loss": 0.0755, |
| "step": 2010 |
| }, |
| { |
| "epoch": 4.2259414225941425, |
| "grad_norm": 0.34758999943733215, |
| "learning_rate": 9.805598886834567e-05, |
| "loss": 0.0803, |
| "step": 2020 |
| }, |
| { |
| "epoch": 4.2468619246861925, |
| "grad_norm": 0.23933303356170654, |
| "learning_rate": 9.802543373427344e-05, |
| "loss": 0.0889, |
| "step": 2030 |
| }, |
| { |
| "epoch": 4.2677824267782425, |
| "grad_norm": 0.24562481045722961, |
| "learning_rate": 9.799464517855018e-05, |
| "loss": 0.0824, |
| "step": 2040 |
| }, |
| { |
| "epoch": 4.2887029288702925, |
| "grad_norm": 0.31750568747520447, |
| "learning_rate": 9.79636233508198e-05, |
| "loss": 0.079, |
| "step": 2050 |
| }, |
| { |
| "epoch": 4.309623430962343, |
| "grad_norm": 0.2367181032896042, |
| "learning_rate": 9.793236840186005e-05, |
| "loss": 0.0757, |
| "step": 2060 |
| }, |
| { |
| "epoch": 4.330543933054393, |
| "grad_norm": 0.22896745800971985, |
| "learning_rate": 9.790088048358175e-05, |
| "loss": 0.0712, |
| "step": 2070 |
| }, |
| { |
| "epoch": 4.351464435146443, |
| "grad_norm": 0.26197147369384766, |
| "learning_rate": 9.786915974902798e-05, |
| "loss": 0.0812, |
| "step": 2080 |
| }, |
| { |
| "epoch": 4.372384937238493, |
| "grad_norm": 0.2804318964481354, |
| "learning_rate": 9.783720635237343e-05, |
| "loss": 0.067, |
| "step": 2090 |
| }, |
| { |
| "epoch": 4.393305439330544, |
| "grad_norm": 0.2822119891643524, |
| "learning_rate": 9.780502044892362e-05, |
| "loss": 0.0803, |
| "step": 2100 |
| }, |
| { |
| "epoch": 4.414225941422594, |
| "grad_norm": 0.2411976307630539, |
| "learning_rate": 9.777260219511415e-05, |
| "loss": 0.0721, |
| "step": 2110 |
| }, |
| { |
| "epoch": 4.435146443514644, |
| "grad_norm": 0.3058140277862549, |
| "learning_rate": 9.773995174850989e-05, |
| "loss": 0.0763, |
| "step": 2120 |
| }, |
| { |
| "epoch": 4.456066945606695, |
| "grad_norm": 0.34904828667640686, |
| "learning_rate": 9.770706926780428e-05, |
| "loss": 0.084, |
| "step": 2130 |
| }, |
| { |
| "epoch": 4.476987447698745, |
| "grad_norm": 0.21338161826133728, |
| "learning_rate": 9.767395491281855e-05, |
| "loss": 0.0714, |
| "step": 2140 |
| }, |
| { |
| "epoch": 4.497907949790795, |
| "grad_norm": 0.24260395765304565, |
| "learning_rate": 9.764060884450086e-05, |
| "loss": 0.0924, |
| "step": 2150 |
| }, |
| { |
| "epoch": 4.518828451882845, |
| "grad_norm": 0.3183063864707947, |
| "learning_rate": 9.76070312249257e-05, |
| "loss": 0.0752, |
| "step": 2160 |
| }, |
| { |
| "epoch": 4.539748953974895, |
| "grad_norm": 0.27661818265914917, |
| "learning_rate": 9.757322221729283e-05, |
| "loss": 0.0783, |
| "step": 2170 |
| }, |
| { |
| "epoch": 4.560669456066946, |
| "grad_norm": 0.284467488527298, |
| "learning_rate": 9.753918198592682e-05, |
| "loss": 0.072, |
| "step": 2180 |
| }, |
| { |
| "epoch": 4.581589958158996, |
| "grad_norm": 0.24314570426940918, |
| "learning_rate": 9.750491069627593e-05, |
| "loss": 0.0799, |
| "step": 2190 |
| }, |
| { |
| "epoch": 4.602510460251046, |
| "grad_norm": 0.23835724592208862, |
| "learning_rate": 9.747040851491149e-05, |
| "loss": 0.0711, |
| "step": 2200 |
| }, |
| { |
| "epoch": 4.623430962343097, |
| "grad_norm": 0.2222239226102829, |
| "learning_rate": 9.743567560952711e-05, |
| "loss": 0.071, |
| "step": 2210 |
| }, |
| { |
| "epoch": 4.644351464435147, |
| "grad_norm": 0.19406676292419434, |
| "learning_rate": 9.740071214893773e-05, |
| "loss": 0.0648, |
| "step": 2220 |
| }, |
| { |
| "epoch": 4.665271966527197, |
| "grad_norm": 0.3223171830177307, |
| "learning_rate": 9.736551830307892e-05, |
| "loss": 0.0707, |
| "step": 2230 |
| }, |
| { |
| "epoch": 4.686192468619247, |
| "grad_norm": 0.26071134209632874, |
| "learning_rate": 9.733009424300597e-05, |
| "loss": 0.0799, |
| "step": 2240 |
| }, |
| { |
| "epoch": 4.707112970711297, |
| "grad_norm": 0.28553199768066406, |
| "learning_rate": 9.729444014089314e-05, |
| "loss": 0.0743, |
| "step": 2250 |
| }, |
| { |
| "epoch": 4.7280334728033475, |
| "grad_norm": 0.3291798532009125, |
| "learning_rate": 9.725855617003275e-05, |
| "loss": 0.0789, |
| "step": 2260 |
| }, |
| { |
| "epoch": 4.7489539748953975, |
| "grad_norm": 0.20497122406959534, |
| "learning_rate": 9.72224425048344e-05, |
| "loss": 0.0833, |
| "step": 2270 |
| }, |
| { |
| "epoch": 4.7698744769874475, |
| "grad_norm": 0.2501441538333893, |
| "learning_rate": 9.718609932082405e-05, |
| "loss": 0.0778, |
| "step": 2280 |
| }, |
| { |
| "epoch": 4.790794979079498, |
| "grad_norm": 0.25415176153182983, |
| "learning_rate": 9.714952679464323e-05, |
| "loss": 0.0771, |
| "step": 2290 |
| }, |
| { |
| "epoch": 4.811715481171548, |
| "grad_norm": 0.22852188348770142, |
| "learning_rate": 9.711272510404816e-05, |
| "loss": 0.0677, |
| "step": 2300 |
| }, |
| { |
| "epoch": 4.832635983263598, |
| "grad_norm": 0.24375076591968536, |
| "learning_rate": 9.70756944279089e-05, |
| "loss": 0.0769, |
| "step": 2310 |
| }, |
| { |
| "epoch": 4.853556485355648, |
| "grad_norm": 0.2883508503437042, |
| "learning_rate": 9.70384349462084e-05, |
| "loss": 0.0756, |
| "step": 2320 |
| }, |
| { |
| "epoch": 4.874476987447698, |
| "grad_norm": 0.29686424136161804, |
| "learning_rate": 9.700094684004182e-05, |
| "loss": 0.0782, |
| "step": 2330 |
| }, |
| { |
| "epoch": 4.895397489539749, |
| "grad_norm": 0.22778484225273132, |
| "learning_rate": 9.696323029161535e-05, |
| "loss": 0.0822, |
| "step": 2340 |
| }, |
| { |
| "epoch": 4.916317991631799, |
| "grad_norm": 0.25161683559417725, |
| "learning_rate": 9.692528548424567e-05, |
| "loss": 0.0755, |
| "step": 2350 |
| }, |
| { |
| "epoch": 4.937238493723849, |
| "grad_norm": 0.20022211968898773, |
| "learning_rate": 9.688711260235872e-05, |
| "loss": 0.0795, |
| "step": 2360 |
| }, |
| { |
| "epoch": 4.9581589958159, |
| "grad_norm": 0.4135867953300476, |
| "learning_rate": 9.684871183148912e-05, |
| "loss": 0.0781, |
| "step": 2370 |
| }, |
| { |
| "epoch": 4.97907949790795, |
| "grad_norm": 0.19854891300201416, |
| "learning_rate": 9.681008335827898e-05, |
| "loss": 0.0729, |
| "step": 2380 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 1.996017336845398, |
| "learning_rate": 9.677122737047724e-05, |
| "loss": 0.0775, |
| "step": 2390 |
| }, |
| { |
| "epoch": 5.02092050209205, |
| "grad_norm": 0.34404969215393066, |
| "learning_rate": 9.673214405693857e-05, |
| "loss": 0.0845, |
| "step": 2400 |
| }, |
| { |
| "epoch": 5.0418410041841, |
| "grad_norm": 0.27230435609817505, |
| "learning_rate": 9.669283360762258e-05, |
| "loss": 0.0745, |
| "step": 2410 |
| }, |
| { |
| "epoch": 5.062761506276151, |
| "grad_norm": 0.3299899995326996, |
| "learning_rate": 9.66532962135928e-05, |
| "loss": 0.0708, |
| "step": 2420 |
| }, |
| { |
| "epoch": 5.083682008368201, |
| "grad_norm": 0.23986473679542542, |
| "learning_rate": 9.661353206701582e-05, |
| "loss": 0.0707, |
| "step": 2430 |
| }, |
| { |
| "epoch": 5.104602510460251, |
| "grad_norm": 0.18780270218849182, |
| "learning_rate": 9.657354136116035e-05, |
| "loss": 0.0747, |
| "step": 2440 |
| }, |
| { |
| "epoch": 5.125523012552302, |
| "grad_norm": 0.24139155447483063, |
| "learning_rate": 9.653332429039625e-05, |
| "loss": 0.0766, |
| "step": 2450 |
| }, |
| { |
| "epoch": 5.146443514644352, |
| "grad_norm": 0.3180379867553711, |
| "learning_rate": 9.649288105019356e-05, |
| "loss": 0.0799, |
| "step": 2460 |
| }, |
| { |
| "epoch": 5.167364016736402, |
| "grad_norm": 0.3000459372997284, |
| "learning_rate": 9.645221183712165e-05, |
| "loss": 0.0802, |
| "step": 2470 |
| }, |
| { |
| "epoch": 5.188284518828452, |
| "grad_norm": 0.2193576991558075, |
| "learning_rate": 9.641131684884817e-05, |
| "loss": 0.0805, |
| "step": 2480 |
| }, |
| { |
| "epoch": 5.209205020920502, |
| "grad_norm": 0.2673724591732025, |
| "learning_rate": 9.637019628413813e-05, |
| "loss": 0.0816, |
| "step": 2490 |
| }, |
| { |
| "epoch": 5.2301255230125525, |
| "grad_norm": 0.2733951807022095, |
| "learning_rate": 9.632885034285291e-05, |
| "loss": 0.0731, |
| "step": 2500 |
| }, |
| { |
| "epoch": 5.2510460251046025, |
| "grad_norm": 0.3638957738876343, |
| "learning_rate": 9.628727922594931e-05, |
| "loss": 0.0817, |
| "step": 2510 |
| }, |
| { |
| "epoch": 5.2719665271966525, |
| "grad_norm": 0.26588529348373413, |
| "learning_rate": 9.624548313547862e-05, |
| "loss": 0.073, |
| "step": 2520 |
| }, |
| { |
| "epoch": 5.292887029288703, |
| "grad_norm": 0.27198341488838196, |
| "learning_rate": 9.620346227458547e-05, |
| "loss": 0.0691, |
| "step": 2530 |
| }, |
| { |
| "epoch": 5.313807531380753, |
| "grad_norm": 0.2196781486272812, |
| "learning_rate": 9.616121684750712e-05, |
| "loss": 0.0727, |
| "step": 2540 |
| }, |
| { |
| "epoch": 5.334728033472803, |
| "grad_norm": 0.24275390803813934, |
| "learning_rate": 9.611874705957215e-05, |
| "loss": 0.082, |
| "step": 2550 |
| }, |
| { |
| "epoch": 5.355648535564853, |
| "grad_norm": 0.18836568295955658, |
| "learning_rate": 9.607605311719972e-05, |
| "loss": 0.0833, |
| "step": 2560 |
| }, |
| { |
| "epoch": 5.376569037656903, |
| "grad_norm": 0.20861050486564636, |
| "learning_rate": 9.603313522789841e-05, |
| "loss": 0.075, |
| "step": 2570 |
| }, |
| { |
| "epoch": 5.397489539748954, |
| "grad_norm": 0.22330905497074127, |
| "learning_rate": 9.598999360026529e-05, |
| "loss": 0.0773, |
| "step": 2580 |
| }, |
| { |
| "epoch": 5.418410041841004, |
| "grad_norm": 0.2990499436855316, |
| "learning_rate": 9.59466284439849e-05, |
| "loss": 0.0759, |
| "step": 2590 |
| }, |
| { |
| "epoch": 5.439330543933054, |
| "grad_norm": 0.22060814499855042, |
| "learning_rate": 9.590303996982815e-05, |
| "loss": 0.0695, |
| "step": 2600 |
| }, |
| { |
| "epoch": 5.460251046025105, |
| "grad_norm": 0.26148760318756104, |
| "learning_rate": 9.585922838965145e-05, |
| "loss": 0.0722, |
| "step": 2610 |
| }, |
| { |
| "epoch": 5.481171548117155, |
| "grad_norm": 0.2634856402873993, |
| "learning_rate": 9.581519391639549e-05, |
| "loss": 0.0719, |
| "step": 2620 |
| }, |
| { |
| "epoch": 5.502092050209205, |
| "grad_norm": 0.26813215017318726, |
| "learning_rate": 9.577093676408439e-05, |
| "loss": 0.0709, |
| "step": 2630 |
| }, |
| { |
| "epoch": 5.523012552301255, |
| "grad_norm": 0.27922987937927246, |
| "learning_rate": 9.572645714782453e-05, |
| "loss": 0.0748, |
| "step": 2640 |
| }, |
| { |
| "epoch": 5.543933054393305, |
| "grad_norm": 0.29743626713752747, |
| "learning_rate": 9.568175528380354e-05, |
| "loss": 0.0702, |
| "step": 2650 |
| }, |
| { |
| "epoch": 5.564853556485356, |
| "grad_norm": 0.27912113070487976, |
| "learning_rate": 9.56368313892893e-05, |
| "loss": 0.088, |
| "step": 2660 |
| }, |
| { |
| "epoch": 5.585774058577406, |
| "grad_norm": 0.23611173033714294, |
| "learning_rate": 9.55916856826288e-05, |
| "loss": 0.0751, |
| "step": 2670 |
| }, |
| { |
| "epoch": 5.606694560669456, |
| "grad_norm": 0.19908355176448822, |
| "learning_rate": 9.554631838324713e-05, |
| "loss": 0.0765, |
| "step": 2680 |
| }, |
| { |
| "epoch": 5.627615062761507, |
| "grad_norm": 0.181325763463974, |
| "learning_rate": 9.55007297116464e-05, |
| "loss": 0.0723, |
| "step": 2690 |
| }, |
| { |
| "epoch": 5.648535564853557, |
| "grad_norm": 0.22809170186519623, |
| "learning_rate": 9.545491988940472e-05, |
| "loss": 0.0817, |
| "step": 2700 |
| }, |
| { |
| "epoch": 5.669456066945607, |
| "grad_norm": 0.24609525501728058, |
| "learning_rate": 9.540888913917501e-05, |
| "loss": 0.0729, |
| "step": 2710 |
| }, |
| { |
| "epoch": 5.690376569037657, |
| "grad_norm": 0.3130713105201721, |
| "learning_rate": 9.536263768468401e-05, |
| "loss": 0.0743, |
| "step": 2720 |
| }, |
| { |
| "epoch": 5.711297071129707, |
| "grad_norm": 0.27948838472366333, |
| "learning_rate": 9.531616575073117e-05, |
| "loss": 0.0759, |
| "step": 2730 |
| }, |
| { |
| "epoch": 5.7322175732217575, |
| "grad_norm": 0.20584532618522644, |
| "learning_rate": 9.526947356318754e-05, |
| "loss": 0.0747, |
| "step": 2740 |
| }, |
| { |
| "epoch": 5.7531380753138075, |
| "grad_norm": 0.27664265036582947, |
| "learning_rate": 9.52225613489947e-05, |
| "loss": 0.0679, |
| "step": 2750 |
| }, |
| { |
| "epoch": 5.7740585774058575, |
| "grad_norm": 0.24720332026481628, |
| "learning_rate": 9.517542933616365e-05, |
| "loss": 0.0736, |
| "step": 2760 |
| }, |
| { |
| "epoch": 5.794979079497908, |
| "grad_norm": 0.23605448007583618, |
| "learning_rate": 9.512807775377366e-05, |
| "loss": 0.0719, |
| "step": 2770 |
| }, |
| { |
| "epoch": 5.815899581589958, |
| "grad_norm": 0.25352632999420166, |
| "learning_rate": 9.508050683197121e-05, |
| "loss": 0.0706, |
| "step": 2780 |
| }, |
| { |
| "epoch": 5.836820083682008, |
| "grad_norm": 0.2646510899066925, |
| "learning_rate": 9.503271680196888e-05, |
| "loss": 0.079, |
| "step": 2790 |
| }, |
| { |
| "epoch": 5.857740585774058, |
| "grad_norm": 0.2195678949356079, |
| "learning_rate": 9.498470789604413e-05, |
| "loss": 0.0652, |
| "step": 2800 |
| }, |
| { |
| "epoch": 5.878661087866108, |
| "grad_norm": 0.23384016752243042, |
| "learning_rate": 9.49364803475383e-05, |
| "loss": 0.0813, |
| "step": 2810 |
| }, |
| { |
| "epoch": 5.899581589958159, |
| "grad_norm": 0.2790519595146179, |
| "learning_rate": 9.48880343908554e-05, |
| "loss": 0.0689, |
| "step": 2820 |
| }, |
| { |
| "epoch": 5.920502092050209, |
| "grad_norm": 0.2614765465259552, |
| "learning_rate": 9.4839370261461e-05, |
| "loss": 0.0739, |
| "step": 2830 |
| }, |
| { |
| "epoch": 5.941422594142259, |
| "grad_norm": 0.20076067745685577, |
| "learning_rate": 9.479048819588098e-05, |
| "loss": 0.064, |
| "step": 2840 |
| }, |
| { |
| "epoch": 5.96234309623431, |
| "grad_norm": 0.19265033304691315, |
| "learning_rate": 9.474138843170063e-05, |
| "loss": 0.0703, |
| "step": 2850 |
| }, |
| { |
| "epoch": 5.98326359832636, |
| "grad_norm": 0.2400965690612793, |
| "learning_rate": 9.46920712075632e-05, |
| "loss": 0.0774, |
| "step": 2860 |
| }, |
| { |
| "epoch": 6.00418410041841, |
| "grad_norm": 0.2949677109718323, |
| "learning_rate": 9.464253676316893e-05, |
| "loss": 0.0701, |
| "step": 2870 |
| }, |
| { |
| "epoch": 6.02510460251046, |
| "grad_norm": 0.1922580450773239, |
| "learning_rate": 9.459278533927384e-05, |
| "loss": 0.0676, |
| "step": 2880 |
| }, |
| { |
| "epoch": 6.046025104602511, |
| "grad_norm": 0.21086367964744568, |
| "learning_rate": 9.454281717768854e-05, |
| "loss": 0.0719, |
| "step": 2890 |
| }, |
| { |
| "epoch": 6.066945606694561, |
| "grad_norm": 0.2067558616399765, |
| "learning_rate": 9.449263252127708e-05, |
| "loss": 0.0748, |
| "step": 2900 |
| }, |
| { |
| "epoch": 6.087866108786611, |
| "grad_norm": 0.19907134771347046, |
| "learning_rate": 9.444223161395573e-05, |
| "loss": 0.0753, |
| "step": 2910 |
| }, |
| { |
| "epoch": 6.108786610878661, |
| "grad_norm": 0.2764277756214142, |
| "learning_rate": 9.439161470069184e-05, |
| "loss": 0.0814, |
| "step": 2920 |
| }, |
| { |
| "epoch": 6.129707112970712, |
| "grad_norm": 0.25734201073646545, |
| "learning_rate": 9.43407820275026e-05, |
| "loss": 0.0748, |
| "step": 2930 |
| }, |
| { |
| "epoch": 6.150627615062762, |
| "grad_norm": 0.26663076877593994, |
| "learning_rate": 9.428973384145396e-05, |
| "loss": 0.0719, |
| "step": 2940 |
| }, |
| { |
| "epoch": 6.171548117154812, |
| "grad_norm": 0.22700175642967224, |
| "learning_rate": 9.423847039065922e-05, |
| "loss": 0.0685, |
| "step": 2950 |
| }, |
| { |
| "epoch": 6.192468619246862, |
| "grad_norm": 0.28385862708091736, |
| "learning_rate": 9.418699192427805e-05, |
| "loss": 0.0745, |
| "step": 2960 |
| }, |
| { |
| "epoch": 6.2133891213389125, |
| "grad_norm": 0.28692567348480225, |
| "learning_rate": 9.41352986925151e-05, |
| "loss": 0.0772, |
| "step": 2970 |
| }, |
| { |
| "epoch": 6.2343096234309625, |
| "grad_norm": 0.2288256287574768, |
| "learning_rate": 9.408339094661895e-05, |
| "loss": 0.0706, |
| "step": 2980 |
| }, |
| { |
| "epoch": 6.2552301255230125, |
| "grad_norm": 0.38740843534469604, |
| "learning_rate": 9.40312689388807e-05, |
| "loss": 0.0696, |
| "step": 2990 |
| }, |
| { |
| "epoch": 6.2761506276150625, |
| "grad_norm": 0.23974934220314026, |
| "learning_rate": 9.397893292263292e-05, |
| "loss": 0.0675, |
| "step": 3000 |
| }, |
| { |
| "epoch": 6.297071129707113, |
| "grad_norm": 0.26130393147468567, |
| "learning_rate": 9.392638315224829e-05, |
| "loss": 0.0711, |
| "step": 3010 |
| }, |
| { |
| "epoch": 6.317991631799163, |
| "grad_norm": 0.17046813666820526, |
| "learning_rate": 9.387361988313846e-05, |
| "loss": 0.0676, |
| "step": 3020 |
| }, |
| { |
| "epoch": 6.338912133891213, |
| "grad_norm": 0.31989800930023193, |
| "learning_rate": 9.38206433717527e-05, |
| "loss": 0.0762, |
| "step": 3030 |
| }, |
| { |
| "epoch": 6.359832635983263, |
| "grad_norm": 0.18395087122917175, |
| "learning_rate": 9.376745387557681e-05, |
| "loss": 0.0645, |
| "step": 3040 |
| }, |
| { |
| "epoch": 6.380753138075314, |
| "grad_norm": 0.24593815207481384, |
| "learning_rate": 9.371405165313169e-05, |
| "loss": 0.0679, |
| "step": 3050 |
| }, |
| { |
| "epoch": 6.401673640167364, |
| "grad_norm": 0.26608437299728394, |
| "learning_rate": 9.366043696397222e-05, |
| "loss": 0.0795, |
| "step": 3060 |
| }, |
| { |
| "epoch": 6.422594142259414, |
| "grad_norm": 0.2634703516960144, |
| "learning_rate": 9.360661006868592e-05, |
| "loss": 0.0779, |
| "step": 3070 |
| }, |
| { |
| "epoch": 6.443514644351464, |
| "grad_norm": 0.22274665534496307, |
| "learning_rate": 9.355257122889173e-05, |
| "loss": 0.0784, |
| "step": 3080 |
| }, |
| { |
| "epoch": 6.464435146443515, |
| "grad_norm": 0.21002769470214844, |
| "learning_rate": 9.349832070723871e-05, |
| "loss": 0.0718, |
| "step": 3090 |
| }, |
| { |
| "epoch": 6.485355648535565, |
| "grad_norm": 0.20783476531505585, |
| "learning_rate": 9.34438587674048e-05, |
| "loss": 0.0749, |
| "step": 3100 |
| }, |
| { |
| "epoch": 6.506276150627615, |
| "grad_norm": 0.1763143390417099, |
| "learning_rate": 9.338918567409545e-05, |
| "loss": 0.0699, |
| "step": 3110 |
| }, |
| { |
| "epoch": 6.527196652719665, |
| "grad_norm": 0.25238746404647827, |
| "learning_rate": 9.333430169304247e-05, |
| "loss": 0.0735, |
| "step": 3120 |
| }, |
| { |
| "epoch": 6.548117154811716, |
| "grad_norm": 0.21795807778835297, |
| "learning_rate": 9.327920709100259e-05, |
| "loss": 0.0715, |
| "step": 3130 |
| }, |
| { |
| "epoch": 6.569037656903766, |
| "grad_norm": 0.26992955803871155, |
| "learning_rate": 9.322390213575631e-05, |
| "loss": 0.0792, |
| "step": 3140 |
| }, |
| { |
| "epoch": 6.589958158995816, |
| "grad_norm": 0.18660517036914825, |
| "learning_rate": 9.316838709610648e-05, |
| "loss": 0.0668, |
| "step": 3150 |
| }, |
| { |
| "epoch": 6.610878661087866, |
| "grad_norm": 0.30259087681770325, |
| "learning_rate": 9.311266224187706e-05, |
| "loss": 0.0686, |
| "step": 3160 |
| }, |
| { |
| "epoch": 6.631799163179917, |
| "grad_norm": 0.28926482796669006, |
| "learning_rate": 9.305672784391175e-05, |
| "loss": 0.069, |
| "step": 3170 |
| }, |
| { |
| "epoch": 6.652719665271967, |
| "grad_norm": 0.22975818812847137, |
| "learning_rate": 9.300058417407276e-05, |
| "loss": 0.0744, |
| "step": 3180 |
| }, |
| { |
| "epoch": 6.673640167364017, |
| "grad_norm": 0.248214989900589, |
| "learning_rate": 9.29442315052394e-05, |
| "loss": 0.0674, |
| "step": 3190 |
| }, |
| { |
| "epoch": 6.694560669456067, |
| "grad_norm": 0.23017367720603943, |
| "learning_rate": 9.288767011130684e-05, |
| "loss": 0.0767, |
| "step": 3200 |
| }, |
| { |
| "epoch": 6.7154811715481175, |
| "grad_norm": 0.30157166719436646, |
| "learning_rate": 9.283090026718466e-05, |
| "loss": 0.075, |
| "step": 3210 |
| }, |
| { |
| "epoch": 6.7364016736401675, |
| "grad_norm": 0.24175700545310974, |
| "learning_rate": 9.277392224879568e-05, |
| "loss": 0.0726, |
| "step": 3220 |
| }, |
| { |
| "epoch": 6.7573221757322175, |
| "grad_norm": 0.32189878821372986, |
| "learning_rate": 9.271673633307445e-05, |
| "loss": 0.0672, |
| "step": 3230 |
| }, |
| { |
| "epoch": 6.7782426778242675, |
| "grad_norm": 0.2515658736228943, |
| "learning_rate": 9.265934279796602e-05, |
| "loss": 0.057, |
| "step": 3240 |
| }, |
| { |
| "epoch": 6.799163179916318, |
| "grad_norm": 0.27900680899620056, |
| "learning_rate": 9.260174192242453e-05, |
| "loss": 0.0643, |
| "step": 3250 |
| }, |
| { |
| "epoch": 6.820083682008368, |
| "grad_norm": 0.20859810709953308, |
| "learning_rate": 9.254393398641185e-05, |
| "loss": 0.0694, |
| "step": 3260 |
| }, |
| { |
| "epoch": 6.841004184100418, |
| "grad_norm": 0.33800724148750305, |
| "learning_rate": 9.248591927089628e-05, |
| "loss": 0.0698, |
| "step": 3270 |
| }, |
| { |
| "epoch": 6.861924686192468, |
| "grad_norm": 0.27680280804634094, |
| "learning_rate": 9.242769805785115e-05, |
| "loss": 0.0683, |
| "step": 3280 |
| }, |
| { |
| "epoch": 6.882845188284519, |
| "grad_norm": 0.24083566665649414, |
| "learning_rate": 9.236927063025342e-05, |
| "loss": 0.0653, |
| "step": 3290 |
| }, |
| { |
| "epoch": 6.903765690376569, |
| "grad_norm": 0.2222203016281128, |
| "learning_rate": 9.231063727208234e-05, |
| "loss": 0.065, |
| "step": 3300 |
| }, |
| { |
| "epoch": 6.924686192468619, |
| "grad_norm": 0.18106094002723694, |
| "learning_rate": 9.225179826831807e-05, |
| "loss": 0.0591, |
| "step": 3310 |
| }, |
| { |
| "epoch": 6.945606694560669, |
| "grad_norm": 0.24536311626434326, |
| "learning_rate": 9.219275390494024e-05, |
| "loss": 0.0667, |
| "step": 3320 |
| }, |
| { |
| "epoch": 6.96652719665272, |
| "grad_norm": 0.23865914344787598, |
| "learning_rate": 9.213350446892668e-05, |
| "loss": 0.0623, |
| "step": 3330 |
| }, |
| { |
| "epoch": 6.98744769874477, |
| "grad_norm": 0.1846132129430771, |
| "learning_rate": 9.207405024825186e-05, |
| "loss": 0.0771, |
| "step": 3340 |
| }, |
| { |
| "epoch": 7.00836820083682, |
| "grad_norm": 0.2307969182729721, |
| "learning_rate": 9.201439153188569e-05, |
| "loss": 0.0994, |
| "step": 3350 |
| }, |
| { |
| "epoch": 7.02928870292887, |
| "grad_norm": 0.3087303936481476, |
| "learning_rate": 9.19545286097919e-05, |
| "loss": 0.0697, |
| "step": 3360 |
| }, |
| { |
| "epoch": 7.050209205020921, |
| "grad_norm": 0.21722181141376495, |
| "learning_rate": 9.189446177292679e-05, |
| "loss": 0.0752, |
| "step": 3370 |
| }, |
| { |
| "epoch": 7.071129707112971, |
| "grad_norm": 0.2604837715625763, |
| "learning_rate": 9.183419131323778e-05, |
| "loss": 0.0778, |
| "step": 3380 |
| }, |
| { |
| "epoch": 7.092050209205021, |
| "grad_norm": 0.2235862761735916, |
| "learning_rate": 9.177371752366191e-05, |
| "loss": 0.0753, |
| "step": 3390 |
| }, |
| { |
| "epoch": 7.112970711297071, |
| "grad_norm": 0.20492425560951233, |
| "learning_rate": 9.171304069812454e-05, |
| "loss": 0.0623, |
| "step": 3400 |
| }, |
| { |
| "epoch": 7.133891213389122, |
| "grad_norm": 0.26874053478240967, |
| "learning_rate": 9.165216113153782e-05, |
| "loss": 0.0762, |
| "step": 3410 |
| }, |
| { |
| "epoch": 7.154811715481172, |
| "grad_norm": 0.1912374496459961, |
| "learning_rate": 9.159107911979936e-05, |
| "loss": 0.0655, |
| "step": 3420 |
| }, |
| { |
| "epoch": 7.175732217573222, |
| "grad_norm": 0.208548441529274, |
| "learning_rate": 9.152979495979063e-05, |
| "loss": 0.0675, |
| "step": 3430 |
| }, |
| { |
| "epoch": 7.196652719665272, |
| "grad_norm": 0.26155465841293335, |
| "learning_rate": 9.146830894937571e-05, |
| "loss": 0.0692, |
| "step": 3440 |
| }, |
| { |
| "epoch": 7.2175732217573225, |
| "grad_norm": 0.3482346534729004, |
| "learning_rate": 9.140662138739969e-05, |
| "loss": 0.0723, |
| "step": 3450 |
| }, |
| { |
| "epoch": 7.2384937238493725, |
| "grad_norm": 0.2451719492673874, |
| "learning_rate": 9.134473257368732e-05, |
| "loss": 0.0691, |
| "step": 3460 |
| }, |
| { |
| "epoch": 7.2594142259414225, |
| "grad_norm": 0.2660260498523712, |
| "learning_rate": 9.128264280904145e-05, |
| "loss": 0.073, |
| "step": 3470 |
| }, |
| { |
| "epoch": 7.2803347280334725, |
| "grad_norm": 0.32091331481933594, |
| "learning_rate": 9.122035239524169e-05, |
| "loss": 0.071, |
| "step": 3480 |
| }, |
| { |
| "epoch": 7.301255230125523, |
| "grad_norm": 0.23416461050510406, |
| "learning_rate": 9.115786163504285e-05, |
| "loss": 0.0634, |
| "step": 3490 |
| }, |
| { |
| "epoch": 7.322175732217573, |
| "grad_norm": 0.19586139917373657, |
| "learning_rate": 9.10951708321735e-05, |
| "loss": 0.0646, |
| "step": 3500 |
| }, |
| { |
| "epoch": 7.343096234309623, |
| "grad_norm": 0.19783517718315125, |
| "learning_rate": 9.10322802913345e-05, |
| "loss": 0.0683, |
| "step": 3510 |
| }, |
| { |
| "epoch": 7.364016736401673, |
| "grad_norm": 0.24476327002048492, |
| "learning_rate": 9.096919031819751e-05, |
| "loss": 0.0642, |
| "step": 3520 |
| }, |
| { |
| "epoch": 7.384937238493724, |
| "grad_norm": 0.25556132197380066, |
| "learning_rate": 9.090590121940348e-05, |
| "loss": 0.0655, |
| "step": 3530 |
| }, |
| { |
| "epoch": 7.405857740585774, |
| "grad_norm": 0.26956650614738464, |
| "learning_rate": 9.084241330256121e-05, |
| "loss": 0.0743, |
| "step": 3540 |
| }, |
| { |
| "epoch": 7.426778242677824, |
| "grad_norm": 0.22216159105300903, |
| "learning_rate": 9.077872687624586e-05, |
| "loss": 0.0715, |
| "step": 3550 |
| }, |
| { |
| "epoch": 7.447698744769874, |
| "grad_norm": 0.27579641342163086, |
| "learning_rate": 9.071484224999735e-05, |
| "loss": 0.0731, |
| "step": 3560 |
| }, |
| { |
| "epoch": 7.468619246861925, |
| "grad_norm": 0.26512736082077026, |
| "learning_rate": 9.0650759734319e-05, |
| "loss": 0.0741, |
| "step": 3570 |
| }, |
| { |
| "epoch": 7.489539748953975, |
| "grad_norm": 0.2138041853904724, |
| "learning_rate": 9.05864796406759e-05, |
| "loss": 0.0698, |
| "step": 3580 |
| }, |
| { |
| "epoch": 7.510460251046025, |
| "grad_norm": 0.2592664062976837, |
| "learning_rate": 9.052200228149343e-05, |
| "loss": 0.0711, |
| "step": 3590 |
| }, |
| { |
| "epoch": 7.531380753138075, |
| "grad_norm": 0.27044913172721863, |
| "learning_rate": 9.04573279701558e-05, |
| "loss": 0.0715, |
| "step": 3600 |
| }, |
| { |
| "epoch": 7.552301255230126, |
| "grad_norm": 0.28558292984962463, |
| "learning_rate": 9.039245702100448e-05, |
| "loss": 0.0652, |
| "step": 3610 |
| }, |
| { |
| "epoch": 7.573221757322176, |
| "grad_norm": 0.37628281116485596, |
| "learning_rate": 9.032738974933664e-05, |
| "loss": 0.0685, |
| "step": 3620 |
| }, |
| { |
| "epoch": 7.594142259414226, |
| "grad_norm": 0.3403959572315216, |
| "learning_rate": 9.026212647140365e-05, |
| "loss": 0.0719, |
| "step": 3630 |
| }, |
| { |
| "epoch": 7.615062761506276, |
| "grad_norm": 0.24478821456432343, |
| "learning_rate": 9.019666750440956e-05, |
| "loss": 0.0683, |
| "step": 3640 |
| }, |
| { |
| "epoch": 7.635983263598327, |
| "grad_norm": 0.18002082407474518, |
| "learning_rate": 9.013101316650956e-05, |
| "loss": 0.0689, |
| "step": 3650 |
| }, |
| { |
| "epoch": 7.656903765690377, |
| "grad_norm": 0.22669140994548798, |
| "learning_rate": 9.00651637768084e-05, |
| "loss": 0.0668, |
| "step": 3660 |
| }, |
| { |
| "epoch": 7.677824267782427, |
| "grad_norm": 0.33720284700393677, |
| "learning_rate": 8.999911965535885e-05, |
| "loss": 0.0673, |
| "step": 3670 |
| }, |
| { |
| "epoch": 7.698744769874477, |
| "grad_norm": 0.2454947829246521, |
| "learning_rate": 8.993288112316014e-05, |
| "loss": 0.0651, |
| "step": 3680 |
| }, |
| { |
| "epoch": 7.7196652719665275, |
| "grad_norm": 0.24713526666164398, |
| "learning_rate": 8.986644850215644e-05, |
| "loss": 0.0731, |
| "step": 3690 |
| }, |
| { |
| "epoch": 7.7405857740585775, |
| "grad_norm": 0.16733984649181366, |
| "learning_rate": 8.979982211523523e-05, |
| "loss": 0.0693, |
| "step": 3700 |
| }, |
| { |
| "epoch": 7.7615062761506275, |
| "grad_norm": 0.19022060930728912, |
| "learning_rate": 8.97330022862258e-05, |
| "loss": 0.0626, |
| "step": 3710 |
| }, |
| { |
| "epoch": 7.7824267782426775, |
| "grad_norm": 0.18342478573322296, |
| "learning_rate": 8.96659893398976e-05, |
| "loss": 0.0762, |
| "step": 3720 |
| }, |
| { |
| "epoch": 7.803347280334728, |
| "grad_norm": 0.24058032035827637, |
| "learning_rate": 8.959878360195876e-05, |
| "loss": 0.0662, |
| "step": 3730 |
| }, |
| { |
| "epoch": 7.824267782426778, |
| "grad_norm": 0.22845084965229034, |
| "learning_rate": 8.953138539905438e-05, |
| "loss": 0.0641, |
| "step": 3740 |
| }, |
| { |
| "epoch": 7.845188284518828, |
| "grad_norm": 0.21459327638149261, |
| "learning_rate": 8.946379505876506e-05, |
| "loss": 0.0711, |
| "step": 3750 |
| }, |
| { |
| "epoch": 7.866108786610878, |
| "grad_norm": 0.19498105347156525, |
| "learning_rate": 8.939601290960527e-05, |
| "loss": 0.0649, |
| "step": 3760 |
| }, |
| { |
| "epoch": 7.887029288702929, |
| "grad_norm": 0.23208363354206085, |
| "learning_rate": 8.932803928102167e-05, |
| "loss": 0.0676, |
| "step": 3770 |
| }, |
| { |
| "epoch": 7.907949790794979, |
| "grad_norm": 0.2670055627822876, |
| "learning_rate": 8.925987450339168e-05, |
| "loss": 0.0719, |
| "step": 3780 |
| }, |
| { |
| "epoch": 7.928870292887029, |
| "grad_norm": 0.15933391451835632, |
| "learning_rate": 8.919151890802172e-05, |
| "loss": 0.057, |
| "step": 3790 |
| }, |
| { |
| "epoch": 7.949790794979079, |
| "grad_norm": 0.19486863911151886, |
| "learning_rate": 8.912297282714564e-05, |
| "loss": 0.0592, |
| "step": 3800 |
| }, |
| { |
| "epoch": 7.97071129707113, |
| "grad_norm": 0.20606879889965057, |
| "learning_rate": 8.905423659392316e-05, |
| "loss": 0.0658, |
| "step": 3810 |
| }, |
| { |
| "epoch": 7.99163179916318, |
| "grad_norm": 0.31158018112182617, |
| "learning_rate": 8.898531054243822e-05, |
| "loss": 0.0707, |
| "step": 3820 |
| }, |
| { |
| "epoch": 8.01255230125523, |
| "grad_norm": 0.2499541938304901, |
| "learning_rate": 8.891619500769729e-05, |
| "loss": 0.0704, |
| "step": 3830 |
| }, |
| { |
| "epoch": 8.03347280334728, |
| "grad_norm": 0.2598934769630432, |
| "learning_rate": 8.884689032562785e-05, |
| "loss": 0.0604, |
| "step": 3840 |
| }, |
| { |
| "epoch": 8.05439330543933, |
| "grad_norm": 0.16147422790527344, |
| "learning_rate": 8.87773968330767e-05, |
| "loss": 0.0667, |
| "step": 3850 |
| }, |
| { |
| "epoch": 8.07531380753138, |
| "grad_norm": 0.43483439087867737, |
| "learning_rate": 8.870771486780832e-05, |
| "loss": 0.0589, |
| "step": 3860 |
| }, |
| { |
| "epoch": 8.096234309623432, |
| "grad_norm": 0.20415087044239044, |
| "learning_rate": 8.863784476850322e-05, |
| "loss": 0.0801, |
| "step": 3870 |
| }, |
| { |
| "epoch": 8.117154811715482, |
| "grad_norm": 0.22121910750865936, |
| "learning_rate": 8.856778687475635e-05, |
| "loss": 0.0633, |
| "step": 3880 |
| }, |
| { |
| "epoch": 8.138075313807532, |
| "grad_norm": 0.19645985960960388, |
| "learning_rate": 8.849754152707541e-05, |
| "loss": 0.0695, |
| "step": 3890 |
| }, |
| { |
| "epoch": 8.158995815899582, |
| "grad_norm": 0.21949157118797302, |
| "learning_rate": 8.842710906687916e-05, |
| "loss": 0.0639, |
| "step": 3900 |
| }, |
| { |
| "epoch": 8.179916317991632, |
| "grad_norm": 0.288669615983963, |
| "learning_rate": 8.83564898364958e-05, |
| "loss": 0.0691, |
| "step": 3910 |
| }, |
| { |
| "epoch": 8.200836820083682, |
| "grad_norm": 0.2480822205543518, |
| "learning_rate": 8.828568417916136e-05, |
| "loss": 0.068, |
| "step": 3920 |
| }, |
| { |
| "epoch": 8.221757322175732, |
| "grad_norm": 0.2346440553665161, |
| "learning_rate": 8.821469243901794e-05, |
| "loss": 0.063, |
| "step": 3930 |
| }, |
| { |
| "epoch": 8.242677824267782, |
| "grad_norm": 0.1865086555480957, |
| "learning_rate": 8.814351496111201e-05, |
| "loss": 0.068, |
| "step": 3940 |
| }, |
| { |
| "epoch": 8.263598326359833, |
| "grad_norm": 0.21872031688690186, |
| "learning_rate": 8.807215209139293e-05, |
| "loss": 0.0697, |
| "step": 3950 |
| }, |
| { |
| "epoch": 8.284518828451883, |
| "grad_norm": 0.2085472047328949, |
| "learning_rate": 8.8000604176711e-05, |
| "loss": 0.0729, |
| "step": 3960 |
| }, |
| { |
| "epoch": 8.305439330543933, |
| "grad_norm": 0.18910904228687286, |
| "learning_rate": 8.792887156481598e-05, |
| "loss": 0.0678, |
| "step": 3970 |
| }, |
| { |
| "epoch": 8.326359832635983, |
| "grad_norm": 0.24901749193668365, |
| "learning_rate": 8.785695460435534e-05, |
| "loss": 0.0708, |
| "step": 3980 |
| }, |
| { |
| "epoch": 8.347280334728033, |
| "grad_norm": 0.2784505784511566, |
| "learning_rate": 8.778485364487248e-05, |
| "loss": 0.0592, |
| "step": 3990 |
| }, |
| { |
| "epoch": 8.368200836820083, |
| "grad_norm": 0.2604342997074127, |
| "learning_rate": 8.771256903680519e-05, |
| "loss": 0.0638, |
| "step": 4000 |
| }, |
| { |
| "epoch": 8.389121338912133, |
| "grad_norm": 0.16785018146038055, |
| "learning_rate": 8.764010113148382e-05, |
| "loss": 0.067, |
| "step": 4010 |
| }, |
| { |
| "epoch": 8.410041841004183, |
| "grad_norm": 0.18678627908229828, |
| "learning_rate": 8.756745028112959e-05, |
| "loss": 0.0595, |
| "step": 4020 |
| }, |
| { |
| "epoch": 8.430962343096235, |
| "grad_norm": 0.24131298065185547, |
| "learning_rate": 8.749461683885296e-05, |
| "loss": 0.0648, |
| "step": 4030 |
| }, |
| { |
| "epoch": 8.451882845188285, |
| "grad_norm": 0.2394871860742569, |
| "learning_rate": 8.742160115865179e-05, |
| "loss": 0.0648, |
| "step": 4040 |
| }, |
| { |
| "epoch": 8.472803347280335, |
| "grad_norm": 0.25061601400375366, |
| "learning_rate": 8.734840359540974e-05, |
| "loss": 0.071, |
| "step": 4050 |
| }, |
| { |
| "epoch": 8.493723849372385, |
| "grad_norm": 0.2007174789905548, |
| "learning_rate": 8.727502450489446e-05, |
| "loss": 0.0652, |
| "step": 4060 |
| }, |
| { |
| "epoch": 8.514644351464435, |
| "grad_norm": 0.27728864550590515, |
| "learning_rate": 8.720146424375591e-05, |
| "loss": 0.0708, |
| "step": 4070 |
| }, |
| { |
| "epoch": 8.535564853556485, |
| "grad_norm": 0.23519816994667053, |
| "learning_rate": 8.712772316952458e-05, |
| "loss": 0.0642, |
| "step": 4080 |
| }, |
| { |
| "epoch": 8.556485355648535, |
| "grad_norm": 0.2769893705844879, |
| "learning_rate": 8.705380164060982e-05, |
| "loss": 0.0643, |
| "step": 4090 |
| }, |
| { |
| "epoch": 8.577405857740585, |
| "grad_norm": 0.23661431670188904, |
| "learning_rate": 8.697970001629799e-05, |
| "loss": 0.0624, |
| "step": 4100 |
| }, |
| { |
| "epoch": 8.598326359832637, |
| "grad_norm": 0.30667105317115784, |
| "learning_rate": 8.690541865675084e-05, |
| "loss": 0.0739, |
| "step": 4110 |
| }, |
| { |
| "epoch": 8.619246861924687, |
| "grad_norm": 0.21583613753318787, |
| "learning_rate": 8.68309579230037e-05, |
| "loss": 0.0675, |
| "step": 4120 |
| }, |
| { |
| "epoch": 8.640167364016737, |
| "grad_norm": 0.3050605356693268, |
| "learning_rate": 8.675631817696372e-05, |
| "loss": 0.0675, |
| "step": 4130 |
| }, |
| { |
| "epoch": 8.661087866108787, |
| "grad_norm": 0.19947989284992218, |
| "learning_rate": 8.668149978140808e-05, |
| "loss": 0.0646, |
| "step": 4140 |
| }, |
| { |
| "epoch": 8.682008368200837, |
| "grad_norm": 0.25193777680397034, |
| "learning_rate": 8.66065030999823e-05, |
| "loss": 0.0655, |
| "step": 4150 |
| }, |
| { |
| "epoch": 8.702928870292887, |
| "grad_norm": 0.1913091540336609, |
| "learning_rate": 8.653132849719845e-05, |
| "loss": 0.0591, |
| "step": 4160 |
| }, |
| { |
| "epoch": 8.723849372384937, |
| "grad_norm": 0.23112277686595917, |
| "learning_rate": 8.64559763384333e-05, |
| "loss": 0.0597, |
| "step": 4170 |
| }, |
| { |
| "epoch": 8.744769874476987, |
| "grad_norm": 0.2377346009016037, |
| "learning_rate": 8.638044698992669e-05, |
| "loss": 0.0654, |
| "step": 4180 |
| }, |
| { |
| "epoch": 8.765690376569038, |
| "grad_norm": 0.2642386853694916, |
| "learning_rate": 8.630474081877959e-05, |
| "loss": 0.0664, |
| "step": 4190 |
| }, |
| { |
| "epoch": 8.786610878661088, |
| "grad_norm": 0.21720066666603088, |
| "learning_rate": 8.62288581929525e-05, |
| "loss": 0.0574, |
| "step": 4200 |
| }, |
| { |
| "epoch": 8.807531380753138, |
| "grad_norm": 0.1970827728509903, |
| "learning_rate": 8.615279948126343e-05, |
| "loss": 0.0601, |
| "step": 4210 |
| }, |
| { |
| "epoch": 8.828451882845188, |
| "grad_norm": 0.2270515263080597, |
| "learning_rate": 8.60765650533863e-05, |
| "loss": 0.0629, |
| "step": 4220 |
| }, |
| { |
| "epoch": 8.849372384937238, |
| "grad_norm": 0.31851762533187866, |
| "learning_rate": 8.60001552798491e-05, |
| "loss": 0.0605, |
| "step": 4230 |
| }, |
| { |
| "epoch": 8.870292887029288, |
| "grad_norm": 0.18598251044750214, |
| "learning_rate": 8.592357053203202e-05, |
| "loss": 0.0604, |
| "step": 4240 |
| }, |
| { |
| "epoch": 8.891213389121338, |
| "grad_norm": 0.2765622138977051, |
| "learning_rate": 8.58468111821657e-05, |
| "loss": 0.0719, |
| "step": 4250 |
| }, |
| { |
| "epoch": 8.91213389121339, |
| "grad_norm": 0.2147555947303772, |
| "learning_rate": 8.576987760332943e-05, |
| "loss": 0.0703, |
| "step": 4260 |
| }, |
| { |
| "epoch": 8.93305439330544, |
| "grad_norm": 0.21149249374866486, |
| "learning_rate": 8.56927701694493e-05, |
| "loss": 0.0743, |
| "step": 4270 |
| }, |
| { |
| "epoch": 8.95397489539749, |
| "grad_norm": 0.2396778017282486, |
| "learning_rate": 8.561548925529643e-05, |
| "loss": 0.0644, |
| "step": 4280 |
| }, |
| { |
| "epoch": 8.97489539748954, |
| "grad_norm": 0.21674704551696777, |
| "learning_rate": 8.553803523648506e-05, |
| "loss": 0.0596, |
| "step": 4290 |
| }, |
| { |
| "epoch": 8.99581589958159, |
| "grad_norm": 0.3351154923439026, |
| "learning_rate": 8.546040848947086e-05, |
| "loss": 0.0668, |
| "step": 4300 |
| }, |
| { |
| "epoch": 9.01673640167364, |
| "grad_norm": 0.2393759787082672, |
| "learning_rate": 8.538260939154894e-05, |
| "loss": 0.0682, |
| "step": 4310 |
| }, |
| { |
| "epoch": 9.03765690376569, |
| "grad_norm": 0.2055775374174118, |
| "learning_rate": 8.530463832085218e-05, |
| "loss": 0.0683, |
| "step": 4320 |
| }, |
| { |
| "epoch": 9.05857740585774, |
| "grad_norm": 0.23119060695171356, |
| "learning_rate": 8.522649565634927e-05, |
| "loss": 0.0596, |
| "step": 4330 |
| }, |
| { |
| "epoch": 9.07949790794979, |
| "grad_norm": 0.2209976464509964, |
| "learning_rate": 8.51481817778429e-05, |
| "loss": 0.0651, |
| "step": 4340 |
| }, |
| { |
| "epoch": 9.100418410041842, |
| "grad_norm": 0.2024475336074829, |
| "learning_rate": 8.506969706596797e-05, |
| "loss": 0.062, |
| "step": 4350 |
| }, |
| { |
| "epoch": 9.121338912133892, |
| "grad_norm": 0.18801653385162354, |
| "learning_rate": 8.499104190218964e-05, |
| "loss": 0.0673, |
| "step": 4360 |
| }, |
| { |
| "epoch": 9.142259414225942, |
| "grad_norm": 0.1942189484834671, |
| "learning_rate": 8.49122166688016e-05, |
| "loss": 0.0647, |
| "step": 4370 |
| }, |
| { |
| "epoch": 9.163179916317992, |
| "grad_norm": 0.2365776151418686, |
| "learning_rate": 8.483322174892404e-05, |
| "loss": 0.0631, |
| "step": 4380 |
| }, |
| { |
| "epoch": 9.184100418410042, |
| "grad_norm": 0.22303998470306396, |
| "learning_rate": 8.475405752650199e-05, |
| "loss": 0.0629, |
| "step": 4390 |
| }, |
| { |
| "epoch": 9.205020920502092, |
| "grad_norm": 0.2030114233493805, |
| "learning_rate": 8.467472438630328e-05, |
| "loss": 0.063, |
| "step": 4400 |
| }, |
| { |
| "epoch": 9.225941422594142, |
| "grad_norm": 0.20828013122081757, |
| "learning_rate": 8.459522271391682e-05, |
| "loss": 0.0733, |
| "step": 4410 |
| }, |
| { |
| "epoch": 9.246861924686192, |
| "grad_norm": 0.22515518963336945, |
| "learning_rate": 8.451555289575057e-05, |
| "loss": 0.0606, |
| "step": 4420 |
| }, |
| { |
| "epoch": 9.267782426778243, |
| "grad_norm": 0.20329605042934418, |
| "learning_rate": 8.443571531902981e-05, |
| "loss": 0.0634, |
| "step": 4430 |
| }, |
| { |
| "epoch": 9.288702928870293, |
| "grad_norm": 0.21866239607334137, |
| "learning_rate": 8.435571037179512e-05, |
| "loss": 0.0587, |
| "step": 4440 |
| }, |
| { |
| "epoch": 9.309623430962343, |
| "grad_norm": 0.2187184989452362, |
| "learning_rate": 8.427553844290062e-05, |
| "loss": 0.0731, |
| "step": 4450 |
| }, |
| { |
| "epoch": 9.330543933054393, |
| "grad_norm": 0.2076680064201355, |
| "learning_rate": 8.419519992201201e-05, |
| "loss": 0.0593, |
| "step": 4460 |
| }, |
| { |
| "epoch": 9.351464435146443, |
| "grad_norm": 0.29090818762779236, |
| "learning_rate": 8.411469519960469e-05, |
| "loss": 0.0691, |
| "step": 4470 |
| }, |
| { |
| "epoch": 9.372384937238493, |
| "grad_norm": 0.20809774100780487, |
| "learning_rate": 8.403402466696182e-05, |
| "loss": 0.0615, |
| "step": 4480 |
| }, |
| { |
| "epoch": 9.393305439330543, |
| "grad_norm": 0.23946796357631683, |
| "learning_rate": 8.395318871617255e-05, |
| "loss": 0.0616, |
| "step": 4490 |
| }, |
| { |
| "epoch": 9.414225941422593, |
| "grad_norm": 0.21378174424171448, |
| "learning_rate": 8.387218774012992e-05, |
| "loss": 0.0585, |
| "step": 4500 |
| }, |
| { |
| "epoch": 9.435146443514645, |
| "grad_norm": 0.21545960009098053, |
| "learning_rate": 8.379102213252915e-05, |
| "loss": 0.0641, |
| "step": 4510 |
| }, |
| { |
| "epoch": 9.456066945606695, |
| "grad_norm": 0.19115141034126282, |
| "learning_rate": 8.370969228786556e-05, |
| "loss": 0.0729, |
| "step": 4520 |
| }, |
| { |
| "epoch": 9.476987447698745, |
| "grad_norm": 0.21977156400680542, |
| "learning_rate": 8.362819860143275e-05, |
| "loss": 0.0664, |
| "step": 4530 |
| }, |
| { |
| "epoch": 9.497907949790795, |
| "grad_norm": 0.18935155868530273, |
| "learning_rate": 8.354654146932066e-05, |
| "loss": 0.0613, |
| "step": 4540 |
| }, |
| { |
| "epoch": 9.518828451882845, |
| "grad_norm": 0.20135313272476196, |
| "learning_rate": 8.346472128841364e-05, |
| "loss": 0.0619, |
| "step": 4550 |
| }, |
| { |
| "epoch": 9.539748953974895, |
| "grad_norm": 0.1766224056482315, |
| "learning_rate": 8.338273845638848e-05, |
| "loss": 0.0565, |
| "step": 4560 |
| }, |
| { |
| "epoch": 9.560669456066945, |
| "grad_norm": 0.18868570029735565, |
| "learning_rate": 8.330059337171258e-05, |
| "loss": 0.0633, |
| "step": 4570 |
| }, |
| { |
| "epoch": 9.581589958158997, |
| "grad_norm": 0.2175145000219345, |
| "learning_rate": 8.32182864336419e-05, |
| "loss": 0.0641, |
| "step": 4580 |
| }, |
| { |
| "epoch": 9.602510460251047, |
| "grad_norm": 0.3119242787361145, |
| "learning_rate": 8.313581804221908e-05, |
| "loss": 0.0681, |
| "step": 4590 |
| }, |
| { |
| "epoch": 9.623430962343097, |
| "grad_norm": 0.149024099111557, |
| "learning_rate": 8.305318859827147e-05, |
| "loss": 0.0601, |
| "step": 4600 |
| }, |
| { |
| "epoch": 9.644351464435147, |
| "grad_norm": 0.23879316449165344, |
| "learning_rate": 8.297039850340923e-05, |
| "loss": 0.0595, |
| "step": 4610 |
| }, |
| { |
| "epoch": 9.665271966527197, |
| "grad_norm": 0.21861664950847626, |
| "learning_rate": 8.288744816002331e-05, |
| "loss": 0.0653, |
| "step": 4620 |
| }, |
| { |
| "epoch": 9.686192468619247, |
| "grad_norm": 0.1617562174797058, |
| "learning_rate": 8.280433797128357e-05, |
| "loss": 0.0651, |
| "step": 4630 |
| }, |
| { |
| "epoch": 9.707112970711297, |
| "grad_norm": 0.23158331215381622, |
| "learning_rate": 8.272106834113674e-05, |
| "loss": 0.0617, |
| "step": 4640 |
| }, |
| { |
| "epoch": 9.728033472803347, |
| "grad_norm": 0.21476268768310547, |
| "learning_rate": 8.26376396743045e-05, |
| "loss": 0.0625, |
| "step": 4650 |
| }, |
| { |
| "epoch": 9.748953974895397, |
| "grad_norm": 0.21035324037075043, |
| "learning_rate": 8.25540523762815e-05, |
| "loss": 0.0626, |
| "step": 4660 |
| }, |
| { |
| "epoch": 9.769874476987448, |
| "grad_norm": 0.23255029320716858, |
| "learning_rate": 8.247030685333346e-05, |
| "loss": 0.0713, |
| "step": 4670 |
| }, |
| { |
| "epoch": 9.790794979079498, |
| "grad_norm": 0.16255882382392883, |
| "learning_rate": 8.238640351249503e-05, |
| "loss": 0.0643, |
| "step": 4680 |
| }, |
| { |
| "epoch": 9.811715481171548, |
| "grad_norm": 0.2551625669002533, |
| "learning_rate": 8.2302342761568e-05, |
| "loss": 0.0598, |
| "step": 4690 |
| }, |
| { |
| "epoch": 9.832635983263598, |
| "grad_norm": 0.2389603704214096, |
| "learning_rate": 8.221812500911919e-05, |
| "loss": 0.063, |
| "step": 4700 |
| }, |
| { |
| "epoch": 9.853556485355648, |
| "grad_norm": 0.22120417654514313, |
| "learning_rate": 8.213375066447853e-05, |
| "loss": 0.0684, |
| "step": 4710 |
| }, |
| { |
| "epoch": 9.874476987447698, |
| "grad_norm": 0.2859916090965271, |
| "learning_rate": 8.204922013773702e-05, |
| "loss": 0.0602, |
| "step": 4720 |
| }, |
| { |
| "epoch": 9.895397489539748, |
| "grad_norm": 0.23960047960281372, |
| "learning_rate": 8.196453383974478e-05, |
| "loss": 0.0684, |
| "step": 4730 |
| }, |
| { |
| "epoch": 9.9163179916318, |
| "grad_norm": 0.26067450642585754, |
| "learning_rate": 8.187969218210904e-05, |
| "loss": 0.0618, |
| "step": 4740 |
| }, |
| { |
| "epoch": 9.93723849372385, |
| "grad_norm": 0.30207592248916626, |
| "learning_rate": 8.179469557719213e-05, |
| "loss": 0.0587, |
| "step": 4750 |
| }, |
| { |
| "epoch": 9.9581589958159, |
| "grad_norm": 0.18259580433368683, |
| "learning_rate": 8.170954443810948e-05, |
| "loss": 0.0623, |
| "step": 4760 |
| }, |
| { |
| "epoch": 9.97907949790795, |
| "grad_norm": 0.18433575332164764, |
| "learning_rate": 8.162423917872764e-05, |
| "loss": 0.0649, |
| "step": 4770 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 0.33734753727912903, |
| "learning_rate": 8.153878021366217e-05, |
| "loss": 0.0584, |
| "step": 4780 |
| }, |
| { |
| "epoch": 10.02092050209205, |
| "grad_norm": 0.21706482768058777, |
| "learning_rate": 8.14531679582758e-05, |
| "loss": 0.066, |
| "step": 4790 |
| }, |
| { |
| "epoch": 10.0418410041841, |
| "grad_norm": 0.24494396150112152, |
| "learning_rate": 8.136740282867621e-05, |
| "loss": 0.0621, |
| "step": 4800 |
| }, |
| { |
| "epoch": 10.06276150627615, |
| "grad_norm": 0.20361743867397308, |
| "learning_rate": 8.128148524171418e-05, |
| "loss": 0.0639, |
| "step": 4810 |
| }, |
| { |
| "epoch": 10.0836820083682, |
| "grad_norm": 0.2107476443052292, |
| "learning_rate": 8.119541561498146e-05, |
| "loss": 0.0621, |
| "step": 4820 |
| }, |
| { |
| "epoch": 10.104602510460252, |
| "grad_norm": 0.14518998563289642, |
| "learning_rate": 8.110919436680877e-05, |
| "loss": 0.0536, |
| "step": 4830 |
| }, |
| { |
| "epoch": 10.125523012552302, |
| "grad_norm": 0.2824230194091797, |
| "learning_rate": 8.102282191626378e-05, |
| "loss": 0.062, |
| "step": 4840 |
| }, |
| { |
| "epoch": 10.146443514644352, |
| "grad_norm": 0.21971572935581207, |
| "learning_rate": 8.0936298683149e-05, |
| "loss": 0.0766, |
| "step": 4850 |
| }, |
| { |
| "epoch": 10.167364016736402, |
| "grad_norm": 0.22919374704360962, |
| "learning_rate": 8.084962508799991e-05, |
| "loss": 0.0628, |
| "step": 4860 |
| }, |
| { |
| "epoch": 10.188284518828452, |
| "grad_norm": 0.29920902848243713, |
| "learning_rate": 8.076280155208273e-05, |
| "loss": 0.063, |
| "step": 4870 |
| }, |
| { |
| "epoch": 10.209205020920502, |
| "grad_norm": 0.20193646848201752, |
| "learning_rate": 8.067582849739245e-05, |
| "loss": 0.0556, |
| "step": 4880 |
| }, |
| { |
| "epoch": 10.230125523012552, |
| "grad_norm": 0.18968577682971954, |
| "learning_rate": 8.058870634665079e-05, |
| "loss": 0.0644, |
| "step": 4890 |
| }, |
| { |
| "epoch": 10.251046025104603, |
| "grad_norm": 0.18380005657672882, |
| "learning_rate": 8.050143552330414e-05, |
| "loss": 0.0629, |
| "step": 4900 |
| }, |
| { |
| "epoch": 10.271966527196653, |
| "grad_norm": 0.20432725548744202, |
| "learning_rate": 8.041401645152151e-05, |
| "loss": 0.0615, |
| "step": 4910 |
| }, |
| { |
| "epoch": 10.292887029288703, |
| "grad_norm": 0.20045466721057892, |
| "learning_rate": 8.032644955619239e-05, |
| "loss": 0.0566, |
| "step": 4920 |
| }, |
| { |
| "epoch": 10.313807531380753, |
| "grad_norm": 0.23264667391777039, |
| "learning_rate": 8.023873526292483e-05, |
| "loss": 0.0618, |
| "step": 4930 |
| }, |
| { |
| "epoch": 10.334728033472803, |
| "grad_norm": 0.1525534689426422, |
| "learning_rate": 8.015087399804322e-05, |
| "loss": 0.0619, |
| "step": 4940 |
| }, |
| { |
| "epoch": 10.355648535564853, |
| "grad_norm": 0.24940307438373566, |
| "learning_rate": 8.006286618858635e-05, |
| "loss": 0.057, |
| "step": 4950 |
| }, |
| { |
| "epoch": 10.376569037656903, |
| "grad_norm": 0.23391863703727722, |
| "learning_rate": 7.99747122623052e-05, |
| "loss": 0.063, |
| "step": 4960 |
| }, |
| { |
| "epoch": 10.397489539748953, |
| "grad_norm": 0.18223020434379578, |
| "learning_rate": 7.988641264766097e-05, |
| "loss": 0.0538, |
| "step": 4970 |
| }, |
| { |
| "epoch": 10.418410041841003, |
| "grad_norm": 0.19934974610805511, |
| "learning_rate": 7.9797967773823e-05, |
| "loss": 0.0677, |
| "step": 4980 |
| }, |
| { |
| "epoch": 10.439330543933055, |
| "grad_norm": 0.14090116322040558, |
| "learning_rate": 7.970937807066659e-05, |
| "loss": 0.0647, |
| "step": 4990 |
| }, |
| { |
| "epoch": 10.460251046025105, |
| "grad_norm": 0.18634556233882904, |
| "learning_rate": 7.962064396877098e-05, |
| "loss": 0.061, |
| "step": 5000 |
| }, |
| { |
| "epoch": 10.481171548117155, |
| "grad_norm": 0.2548336982727051, |
| "learning_rate": 7.953176589941722e-05, |
| "loss": 0.0643, |
| "step": 5010 |
| }, |
| { |
| "epoch": 10.502092050209205, |
| "grad_norm": 0.18743976950645447, |
| "learning_rate": 7.944274429458614e-05, |
| "loss": 0.063, |
| "step": 5020 |
| }, |
| { |
| "epoch": 10.523012552301255, |
| "grad_norm": 0.20314089953899384, |
| "learning_rate": 7.93535795869562e-05, |
| "loss": 0.0599, |
| "step": 5030 |
| }, |
| { |
| "epoch": 10.543933054393305, |
| "grad_norm": 0.21856661140918732, |
| "learning_rate": 7.926427220990134e-05, |
| "loss": 0.0594, |
| "step": 5040 |
| }, |
| { |
| "epoch": 10.564853556485355, |
| "grad_norm": 0.20998577773571014, |
| "learning_rate": 7.9174822597489e-05, |
| "loss": 0.0581, |
| "step": 5050 |
| }, |
| { |
| "epoch": 10.585774058577407, |
| "grad_norm": 0.19951078295707703, |
| "learning_rate": 7.908523118447789e-05, |
| "loss": 0.06, |
| "step": 5060 |
| }, |
| { |
| "epoch": 10.606694560669457, |
| "grad_norm": 0.19410060346126556, |
| "learning_rate": 7.89954984063159e-05, |
| "loss": 0.0634, |
| "step": 5070 |
| }, |
| { |
| "epoch": 10.627615062761507, |
| "grad_norm": 0.19956843554973602, |
| "learning_rate": 7.890562469913811e-05, |
| "loss": 0.0519, |
| "step": 5080 |
| }, |
| { |
| "epoch": 10.648535564853557, |
| "grad_norm": 0.17840741574764252, |
| "learning_rate": 7.881561049976447e-05, |
| "loss": 0.0625, |
| "step": 5090 |
| }, |
| { |
| "epoch": 10.669456066945607, |
| "grad_norm": 0.1961093693971634, |
| "learning_rate": 7.872545624569779e-05, |
| "loss": 0.0607, |
| "step": 5100 |
| }, |
| { |
| "epoch": 10.690376569037657, |
| "grad_norm": 0.2057664543390274, |
| "learning_rate": 7.863516237512164e-05, |
| "loss": 0.0654, |
| "step": 5110 |
| }, |
| { |
| "epoch": 10.711297071129707, |
| "grad_norm": 0.18237608671188354, |
| "learning_rate": 7.854472932689815e-05, |
| "loss": 0.0629, |
| "step": 5120 |
| }, |
| { |
| "epoch": 10.732217573221757, |
| "grad_norm": 0.166738823056221, |
| "learning_rate": 7.845415754056591e-05, |
| "loss": 0.0673, |
| "step": 5130 |
| }, |
| { |
| "epoch": 10.753138075313807, |
| "grad_norm": 0.21595996618270874, |
| "learning_rate": 7.836344745633783e-05, |
| "loss": 0.0549, |
| "step": 5140 |
| }, |
| { |
| "epoch": 10.774058577405858, |
| "grad_norm": 0.25722071528434753, |
| "learning_rate": 7.8272599515099e-05, |
| "loss": 0.0576, |
| "step": 5150 |
| }, |
| { |
| "epoch": 10.794979079497908, |
| "grad_norm": 0.19984610378742218, |
| "learning_rate": 7.818161415840453e-05, |
| "loss": 0.0617, |
| "step": 5160 |
| }, |
| { |
| "epoch": 10.815899581589958, |
| "grad_norm": 0.2560693323612213, |
| "learning_rate": 7.809049182847745e-05, |
| "loss": 0.0607, |
| "step": 5170 |
| }, |
| { |
| "epoch": 10.836820083682008, |
| "grad_norm": 0.18944580852985382, |
| "learning_rate": 7.799923296820653e-05, |
| "loss": 0.0489, |
| "step": 5180 |
| }, |
| { |
| "epoch": 10.857740585774058, |
| "grad_norm": 0.2388056218624115, |
| "learning_rate": 7.790783802114408e-05, |
| "loss": 0.0581, |
| "step": 5190 |
| }, |
| { |
| "epoch": 10.878661087866108, |
| "grad_norm": 0.18988242745399475, |
| "learning_rate": 7.781630743150392e-05, |
| "loss": 0.0619, |
| "step": 5200 |
| }, |
| { |
| "epoch": 10.899581589958158, |
| "grad_norm": 0.19641171395778656, |
| "learning_rate": 7.772464164415907e-05, |
| "loss": 0.0673, |
| "step": 5210 |
| }, |
| { |
| "epoch": 10.92050209205021, |
| "grad_norm": 0.24006357789039612, |
| "learning_rate": 7.763284110463973e-05, |
| "loss": 0.0651, |
| "step": 5220 |
| }, |
| { |
| "epoch": 10.94142259414226, |
| "grad_norm": 0.1970333606004715, |
| "learning_rate": 7.754090625913099e-05, |
| "loss": 0.0585, |
| "step": 5230 |
| }, |
| { |
| "epoch": 10.96234309623431, |
| "grad_norm": 0.26474833488464355, |
| "learning_rate": 7.744883755447075e-05, |
| "loss": 0.0637, |
| "step": 5240 |
| }, |
| { |
| "epoch": 10.98326359832636, |
| "grad_norm": 0.23458439111709595, |
| "learning_rate": 7.735663543814749e-05, |
| "loss": 0.0567, |
| "step": 5250 |
| }, |
| { |
| "epoch": 11.00418410041841, |
| "grad_norm": 0.17837220430374146, |
| "learning_rate": 7.726430035829813e-05, |
| "loss": 0.0551, |
| "step": 5260 |
| }, |
| { |
| "epoch": 11.02510460251046, |
| "grad_norm": 0.22072383761405945, |
| "learning_rate": 7.717183276370586e-05, |
| "loss": 0.0656, |
| "step": 5270 |
| }, |
| { |
| "epoch": 11.04602510460251, |
| "grad_norm": 0.1936493068933487, |
| "learning_rate": 7.707923310379794e-05, |
| "loss": 0.0636, |
| "step": 5280 |
| }, |
| { |
| "epoch": 11.06694560669456, |
| "grad_norm": 0.2801334261894226, |
| "learning_rate": 7.698650182864351e-05, |
| "loss": 0.0662, |
| "step": 5290 |
| }, |
| { |
| "epoch": 11.087866108786612, |
| "grad_norm": 0.21320787072181702, |
| "learning_rate": 7.689363938895138e-05, |
| "loss": 0.0628, |
| "step": 5300 |
| }, |
| { |
| "epoch": 11.108786610878662, |
| "grad_norm": 0.2542664110660553, |
| "learning_rate": 7.680064623606791e-05, |
| "loss": 0.0564, |
| "step": 5310 |
| }, |
| { |
| "epoch": 11.129707112970712, |
| "grad_norm": 0.19938422739505768, |
| "learning_rate": 7.670752282197476e-05, |
| "loss": 0.0603, |
| "step": 5320 |
| }, |
| { |
| "epoch": 11.150627615062762, |
| "grad_norm": 0.2444109469652176, |
| "learning_rate": 7.66142695992867e-05, |
| "loss": 0.0617, |
| "step": 5330 |
| }, |
| { |
| "epoch": 11.171548117154812, |
| "grad_norm": 0.17026054859161377, |
| "learning_rate": 7.652088702124944e-05, |
| "loss": 0.0552, |
| "step": 5340 |
| }, |
| { |
| "epoch": 11.192468619246862, |
| "grad_norm": 0.20425626635551453, |
| "learning_rate": 7.64273755417374e-05, |
| "loss": 0.0567, |
| "step": 5350 |
| }, |
| { |
| "epoch": 11.213389121338912, |
| "grad_norm": 0.2587556838989258, |
| "learning_rate": 7.633373561525148e-05, |
| "loss": 0.0628, |
| "step": 5360 |
| }, |
| { |
| "epoch": 11.234309623430962, |
| "grad_norm": 0.19820363819599152, |
| "learning_rate": 7.623996769691691e-05, |
| "loss": 0.0652, |
| "step": 5370 |
| }, |
| { |
| "epoch": 11.255230125523013, |
| "grad_norm": 0.216740220785141, |
| "learning_rate": 7.614607224248103e-05, |
| "loss": 0.0674, |
| "step": 5380 |
| }, |
| { |
| "epoch": 11.276150627615063, |
| "grad_norm": 0.22586363554000854, |
| "learning_rate": 7.605204970831096e-05, |
| "loss": 0.0602, |
| "step": 5390 |
| }, |
| { |
| "epoch": 11.297071129707113, |
| "grad_norm": 0.29689839482307434, |
| "learning_rate": 7.595790055139163e-05, |
| "loss": 0.0618, |
| "step": 5400 |
| }, |
| { |
| "epoch": 11.317991631799163, |
| "grad_norm": 0.3049317002296448, |
| "learning_rate": 7.586362522932323e-05, |
| "loss": 0.0603, |
| "step": 5410 |
| }, |
| { |
| "epoch": 11.338912133891213, |
| "grad_norm": 0.20710164308547974, |
| "learning_rate": 7.576922420031929e-05, |
| "loss": 0.0635, |
| "step": 5420 |
| }, |
| { |
| "epoch": 11.359832635983263, |
| "grad_norm": 0.23157654702663422, |
| "learning_rate": 7.567469792320428e-05, |
| "loss": 0.0598, |
| "step": 5430 |
| }, |
| { |
| "epoch": 11.380753138075313, |
| "grad_norm": 0.20519325137138367, |
| "learning_rate": 7.558004685741137e-05, |
| "loss": 0.0582, |
| "step": 5440 |
| }, |
| { |
| "epoch": 11.401673640167363, |
| "grad_norm": 0.29819655418395996, |
| "learning_rate": 7.548527146298036e-05, |
| "loss": 0.065, |
| "step": 5450 |
| }, |
| { |
| "epoch": 11.422594142259415, |
| "grad_norm": 0.16741812229156494, |
| "learning_rate": 7.539037220055527e-05, |
| "loss": 0.0594, |
| "step": 5460 |
| }, |
| { |
| "epoch": 11.443514644351465, |
| "grad_norm": 0.19318008422851562, |
| "learning_rate": 7.529534953138213e-05, |
| "loss": 0.061, |
| "step": 5470 |
| }, |
| { |
| "epoch": 11.464435146443515, |
| "grad_norm": 0.27596399188041687, |
| "learning_rate": 7.520020391730684e-05, |
| "loss": 0.0546, |
| "step": 5480 |
| }, |
| { |
| "epoch": 11.485355648535565, |
| "grad_norm": 0.1646844744682312, |
| "learning_rate": 7.510493582077281e-05, |
| "loss": 0.0554, |
| "step": 5490 |
| }, |
| { |
| "epoch": 11.506276150627615, |
| "grad_norm": 0.19541478157043457, |
| "learning_rate": 7.500954570481882e-05, |
| "loss": 0.0586, |
| "step": 5500 |
| }, |
| { |
| "epoch": 11.527196652719665, |
| "grad_norm": 0.23444367945194244, |
| "learning_rate": 7.491403403307662e-05, |
| "loss": 0.0538, |
| "step": 5510 |
| }, |
| { |
| "epoch": 11.548117154811715, |
| "grad_norm": 0.19553513824939728, |
| "learning_rate": 7.481840126976885e-05, |
| "loss": 0.0531, |
| "step": 5520 |
| }, |
| { |
| "epoch": 11.569037656903765, |
| "grad_norm": 0.22565273940563202, |
| "learning_rate": 7.472264787970666e-05, |
| "loss": 0.053, |
| "step": 5530 |
| }, |
| { |
| "epoch": 11.589958158995817, |
| "grad_norm": 0.20270612835884094, |
| "learning_rate": 7.462677432828751e-05, |
| "loss": 0.0698, |
| "step": 5540 |
| }, |
| { |
| "epoch": 11.610878661087867, |
| "grad_norm": 0.19266432523727417, |
| "learning_rate": 7.453078108149287e-05, |
| "loss": 0.0562, |
| "step": 5550 |
| }, |
| { |
| "epoch": 11.631799163179917, |
| "grad_norm": 0.22119610011577606, |
| "learning_rate": 7.443466860588599e-05, |
| "loss": 0.0561, |
| "step": 5560 |
| }, |
| { |
| "epoch": 11.652719665271967, |
| "grad_norm": 0.15727278590202332, |
| "learning_rate": 7.43384373686096e-05, |
| "loss": 0.0602, |
| "step": 5570 |
| }, |
| { |
| "epoch": 11.673640167364017, |
| "grad_norm": 0.26173195242881775, |
| "learning_rate": 7.424208783738367e-05, |
| "loss": 0.0555, |
| "step": 5580 |
| }, |
| { |
| "epoch": 11.694560669456067, |
| "grad_norm": 0.1976771056652069, |
| "learning_rate": 7.414562048050315e-05, |
| "loss": 0.0585, |
| "step": 5590 |
| }, |
| { |
| "epoch": 11.715481171548117, |
| "grad_norm": 0.16339626908302307, |
| "learning_rate": 7.404903576683559e-05, |
| "loss": 0.0628, |
| "step": 5600 |
| }, |
| { |
| "epoch": 11.736401673640167, |
| "grad_norm": 0.2986162602901459, |
| "learning_rate": 7.3952334165819e-05, |
| "loss": 0.0607, |
| "step": 5610 |
| }, |
| { |
| "epoch": 11.757322175732218, |
| "grad_norm": 0.1576758623123169, |
| "learning_rate": 7.385551614745952e-05, |
| "loss": 0.0641, |
| "step": 5620 |
| }, |
| { |
| "epoch": 11.778242677824268, |
| "grad_norm": 0.22546252608299255, |
| "learning_rate": 7.375858218232905e-05, |
| "loss": 0.0587, |
| "step": 5630 |
| }, |
| { |
| "epoch": 11.799163179916318, |
| "grad_norm": 0.22091388702392578, |
| "learning_rate": 7.366153274156312e-05, |
| "loss": 0.0519, |
| "step": 5640 |
| }, |
| { |
| "epoch": 11.820083682008368, |
| "grad_norm": 0.17188245058059692, |
| "learning_rate": 7.356436829685844e-05, |
| "loss": 0.0562, |
| "step": 5650 |
| }, |
| { |
| "epoch": 11.841004184100418, |
| "grad_norm": 0.1593337208032608, |
| "learning_rate": 7.346708932047074e-05, |
| "loss": 0.055, |
| "step": 5660 |
| }, |
| { |
| "epoch": 11.861924686192468, |
| "grad_norm": 0.30180731415748596, |
| "learning_rate": 7.336969628521237e-05, |
| "loss": 0.0586, |
| "step": 5670 |
| }, |
| { |
| "epoch": 11.882845188284518, |
| "grad_norm": 0.18105119466781616, |
| "learning_rate": 7.32721896644501e-05, |
| "loss": 0.0551, |
| "step": 5680 |
| }, |
| { |
| "epoch": 11.903765690376568, |
| "grad_norm": 0.14182309806346893, |
| "learning_rate": 7.317456993210272e-05, |
| "loss": 0.0523, |
| "step": 5690 |
| }, |
| { |
| "epoch": 11.92468619246862, |
| "grad_norm": 0.20939673483371735, |
| "learning_rate": 7.307683756263881e-05, |
| "loss": 0.067, |
| "step": 5700 |
| }, |
| { |
| "epoch": 11.94560669456067, |
| "grad_norm": 0.2384624034166336, |
| "learning_rate": 7.297899303107441e-05, |
| "loss": 0.0542, |
| "step": 5710 |
| }, |
| { |
| "epoch": 11.96652719665272, |
| "grad_norm": 0.21782812476158142, |
| "learning_rate": 7.288103681297068e-05, |
| "loss": 0.0597, |
| "step": 5720 |
| }, |
| { |
| "epoch": 11.98744769874477, |
| "grad_norm": 0.27633702754974365, |
| "learning_rate": 7.278296938443166e-05, |
| "loss": 0.0661, |
| "step": 5730 |
| }, |
| { |
| "epoch": 12.00836820083682, |
| "grad_norm": 0.21131427586078644, |
| "learning_rate": 7.26847912221019e-05, |
| "loss": 0.0601, |
| "step": 5740 |
| }, |
| { |
| "epoch": 12.02928870292887, |
| "grad_norm": 0.15685714781284332, |
| "learning_rate": 7.258650280316415e-05, |
| "loss": 0.0566, |
| "step": 5750 |
| }, |
| { |
| "epoch": 12.05020920502092, |
| "grad_norm": 0.20454737544059753, |
| "learning_rate": 7.248810460533706e-05, |
| "loss": 0.0563, |
| "step": 5760 |
| }, |
| { |
| "epoch": 12.07112970711297, |
| "grad_norm": 0.23846374452114105, |
| "learning_rate": 7.238959710687282e-05, |
| "loss": 0.058, |
| "step": 5770 |
| }, |
| { |
| "epoch": 12.092050209205022, |
| "grad_norm": 0.1912502944469452, |
| "learning_rate": 7.229098078655489e-05, |
| "loss": 0.0568, |
| "step": 5780 |
| }, |
| { |
| "epoch": 12.112970711297072, |
| "grad_norm": 0.16738390922546387, |
| "learning_rate": 7.219225612369565e-05, |
| "loss": 0.0621, |
| "step": 5790 |
| }, |
| { |
| "epoch": 12.133891213389122, |
| "grad_norm": 0.21919557452201843, |
| "learning_rate": 7.209342359813404e-05, |
| "loss": 0.0589, |
| "step": 5800 |
| }, |
| { |
| "epoch": 12.154811715481172, |
| "grad_norm": 0.21319399774074554, |
| "learning_rate": 7.199448369023327e-05, |
| "loss": 0.0539, |
| "step": 5810 |
| }, |
| { |
| "epoch": 12.175732217573222, |
| "grad_norm": 0.14287406206130981, |
| "learning_rate": 7.189543688087845e-05, |
| "loss": 0.0541, |
| "step": 5820 |
| }, |
| { |
| "epoch": 12.196652719665272, |
| "grad_norm": 0.15173019468784332, |
| "learning_rate": 7.17962836514743e-05, |
| "loss": 0.0537, |
| "step": 5830 |
| }, |
| { |
| "epoch": 12.217573221757322, |
| "grad_norm": 0.18111112713813782, |
| "learning_rate": 7.169702448394279e-05, |
| "loss": 0.0619, |
| "step": 5840 |
| }, |
| { |
| "epoch": 12.238493723849372, |
| "grad_norm": 0.21038256585597992, |
| "learning_rate": 7.159765986072071e-05, |
| "loss": 0.0606, |
| "step": 5850 |
| }, |
| { |
| "epoch": 12.259414225941423, |
| "grad_norm": 0.20237678289413452, |
| "learning_rate": 7.149819026475751e-05, |
| "loss": 0.0645, |
| "step": 5860 |
| }, |
| { |
| "epoch": 12.280334728033473, |
| "grad_norm": 0.23611639440059662, |
| "learning_rate": 7.139861617951275e-05, |
| "loss": 0.0623, |
| "step": 5870 |
| }, |
| { |
| "epoch": 12.301255230125523, |
| "grad_norm": 0.22483764588832855, |
| "learning_rate": 7.129893808895395e-05, |
| "loss": 0.0583, |
| "step": 5880 |
| }, |
| { |
| "epoch": 12.322175732217573, |
| "grad_norm": 0.25868797302246094, |
| "learning_rate": 7.119915647755404e-05, |
| "loss": 0.0678, |
| "step": 5890 |
| }, |
| { |
| "epoch": 12.343096234309623, |
| "grad_norm": 0.23106709122657776, |
| "learning_rate": 7.109927183028914e-05, |
| "loss": 0.0614, |
| "step": 5900 |
| }, |
| { |
| "epoch": 12.364016736401673, |
| "grad_norm": 0.23294997215270996, |
| "learning_rate": 7.099928463263619e-05, |
| "loss": 0.0569, |
| "step": 5910 |
| }, |
| { |
| "epoch": 12.384937238493723, |
| "grad_norm": 0.1758103370666504, |
| "learning_rate": 7.08991953705705e-05, |
| "loss": 0.052, |
| "step": 5920 |
| }, |
| { |
| "epoch": 12.405857740585773, |
| "grad_norm": 0.22264644503593445, |
| "learning_rate": 7.07990045305635e-05, |
| "loss": 0.0599, |
| "step": 5930 |
| }, |
| { |
| "epoch": 12.426778242677825, |
| "grad_norm": 0.21731536090373993, |
| "learning_rate": 7.069871259958034e-05, |
| "loss": 0.055, |
| "step": 5940 |
| }, |
| { |
| "epoch": 12.447698744769875, |
| "grad_norm": 0.15914727747440338, |
| "learning_rate": 7.059832006507745e-05, |
| "loss": 0.0544, |
| "step": 5950 |
| }, |
| { |
| "epoch": 12.468619246861925, |
| "grad_norm": 0.2970428764820099, |
| "learning_rate": 7.049782741500028e-05, |
| "loss": 0.0585, |
| "step": 5960 |
| }, |
| { |
| "epoch": 12.489539748953975, |
| "grad_norm": 0.24060587584972382, |
| "learning_rate": 7.039723513778087e-05, |
| "loss": 0.057, |
| "step": 5970 |
| }, |
| { |
| "epoch": 12.510460251046025, |
| "grad_norm": 0.27191680669784546, |
| "learning_rate": 7.029654372233544e-05, |
| "loss": 0.062, |
| "step": 5980 |
| }, |
| { |
| "epoch": 12.531380753138075, |
| "grad_norm": 0.1836891770362854, |
| "learning_rate": 7.019575365806215e-05, |
| "loss": 0.0581, |
| "step": 5990 |
| }, |
| { |
| "epoch": 12.552301255230125, |
| "grad_norm": 0.22609245777130127, |
| "learning_rate": 7.009486543483858e-05, |
| "loss": 0.0571, |
| "step": 6000 |
| }, |
| { |
| "epoch": 12.573221757322175, |
| "grad_norm": 0.23014889657497406, |
| "learning_rate": 6.999387954301934e-05, |
| "loss": 0.0641, |
| "step": 6010 |
| }, |
| { |
| "epoch": 12.594142259414227, |
| "grad_norm": 0.19445940852165222, |
| "learning_rate": 6.989279647343388e-05, |
| "loss": 0.0525, |
| "step": 6020 |
| }, |
| { |
| "epoch": 12.615062761506277, |
| "grad_norm": 0.17028450965881348, |
| "learning_rate": 6.979161671738382e-05, |
| "loss": 0.0571, |
| "step": 6030 |
| }, |
| { |
| "epoch": 12.635983263598327, |
| "grad_norm": 0.18240980803966522, |
| "learning_rate": 6.969034076664085e-05, |
| "loss": 0.053, |
| "step": 6040 |
| }, |
| { |
| "epoch": 12.656903765690377, |
| "grad_norm": 0.19363874197006226, |
| "learning_rate": 6.958896911344411e-05, |
| "loss": 0.0569, |
| "step": 6050 |
| }, |
| { |
| "epoch": 12.677824267782427, |
| "grad_norm": 0.19600875675678253, |
| "learning_rate": 6.948750225049791e-05, |
| "loss": 0.0483, |
| "step": 6060 |
| }, |
| { |
| "epoch": 12.698744769874477, |
| "grad_norm": 0.17009440064430237, |
| "learning_rate": 6.938594067096936e-05, |
| "loss": 0.0503, |
| "step": 6070 |
| }, |
| { |
| "epoch": 12.719665271966527, |
| "grad_norm": 0.23083089292049408, |
| "learning_rate": 6.928428486848587e-05, |
| "loss": 0.0562, |
| "step": 6080 |
| }, |
| { |
| "epoch": 12.740585774058577, |
| "grad_norm": 0.18276119232177734, |
| "learning_rate": 6.918253533713282e-05, |
| "loss": 0.0569, |
| "step": 6090 |
| }, |
| { |
| "epoch": 12.761506276150628, |
| "grad_norm": 0.19873914122581482, |
| "learning_rate": 6.908069257145118e-05, |
| "loss": 0.057, |
| "step": 6100 |
| }, |
| { |
| "epoch": 12.782426778242678, |
| "grad_norm": 0.21409256756305695, |
| "learning_rate": 6.897875706643506e-05, |
| "loss": 0.0528, |
| "step": 6110 |
| }, |
| { |
| "epoch": 12.803347280334728, |
| "grad_norm": 0.1738913506269455, |
| "learning_rate": 6.887672931752927e-05, |
| "loss": 0.0529, |
| "step": 6120 |
| }, |
| { |
| "epoch": 12.824267782426778, |
| "grad_norm": 0.20664235949516296, |
| "learning_rate": 6.877460982062706e-05, |
| "loss": 0.0551, |
| "step": 6130 |
| }, |
| { |
| "epoch": 12.845188284518828, |
| "grad_norm": 0.2231292873620987, |
| "learning_rate": 6.86723990720675e-05, |
| "loss": 0.06, |
| "step": 6140 |
| }, |
| { |
| "epoch": 12.866108786610878, |
| "grad_norm": 0.2546100914478302, |
| "learning_rate": 6.857009756863326e-05, |
| "loss": 0.0544, |
| "step": 6150 |
| }, |
| { |
| "epoch": 12.887029288702928, |
| "grad_norm": 0.18180951476097107, |
| "learning_rate": 6.846770580754807e-05, |
| "loss": 0.0524, |
| "step": 6160 |
| }, |
| { |
| "epoch": 12.907949790794978, |
| "grad_norm": 0.2514965534210205, |
| "learning_rate": 6.836522428647438e-05, |
| "loss": 0.0591, |
| "step": 6170 |
| }, |
| { |
| "epoch": 12.92887029288703, |
| "grad_norm": 0.21483202278614044, |
| "learning_rate": 6.826265350351083e-05, |
| "loss": 0.0542, |
| "step": 6180 |
| }, |
| { |
| "epoch": 12.94979079497908, |
| "grad_norm": 0.21657870709896088, |
| "learning_rate": 6.815999395719e-05, |
| "loss": 0.0586, |
| "step": 6190 |
| }, |
| { |
| "epoch": 12.97071129707113, |
| "grad_norm": 0.20378071069717407, |
| "learning_rate": 6.805724614647586e-05, |
| "loss": 0.0574, |
| "step": 6200 |
| }, |
| { |
| "epoch": 12.99163179916318, |
| "grad_norm": 0.11441192775964737, |
| "learning_rate": 6.795441057076136e-05, |
| "loss": 0.0634, |
| "step": 6210 |
| }, |
| { |
| "epoch": 13.01255230125523, |
| "grad_norm": 0.15164940059185028, |
| "learning_rate": 6.785148772986603e-05, |
| "loss": 0.0487, |
| "step": 6220 |
| }, |
| { |
| "epoch": 13.03347280334728, |
| "grad_norm": 0.23580867052078247, |
| "learning_rate": 6.774847812403355e-05, |
| "loss": 0.0535, |
| "step": 6230 |
| }, |
| { |
| "epoch": 13.05439330543933, |
| "grad_norm": 0.16759879887104034, |
| "learning_rate": 6.76453822539293e-05, |
| "loss": 0.0593, |
| "step": 6240 |
| }, |
| { |
| "epoch": 13.07531380753138, |
| "grad_norm": 0.37763532996177673, |
| "learning_rate": 6.754220062063793e-05, |
| "loss": 0.0569, |
| "step": 6250 |
| }, |
| { |
| "epoch": 13.096234309623432, |
| "grad_norm": 0.23050107061862946, |
| "learning_rate": 6.743893372566099e-05, |
| "loss": 0.0551, |
| "step": 6260 |
| }, |
| { |
| "epoch": 13.117154811715482, |
| "grad_norm": 0.18879632651805878, |
| "learning_rate": 6.733558207091434e-05, |
| "loss": 0.0571, |
| "step": 6270 |
| }, |
| { |
| "epoch": 13.138075313807532, |
| "grad_norm": 0.23209887742996216, |
| "learning_rate": 6.723214615872585e-05, |
| "loss": 0.0626, |
| "step": 6280 |
| }, |
| { |
| "epoch": 13.158995815899582, |
| "grad_norm": 0.16621044278144836, |
| "learning_rate": 6.712862649183295e-05, |
| "loss": 0.0537, |
| "step": 6290 |
| }, |
| { |
| "epoch": 13.179916317991632, |
| "grad_norm": 0.2773350477218628, |
| "learning_rate": 6.70250235733801e-05, |
| "loss": 0.0624, |
| "step": 6300 |
| }, |
| { |
| "epoch": 13.200836820083682, |
| "grad_norm": 0.17005692422389984, |
| "learning_rate": 6.692133790691639e-05, |
| "loss": 0.0564, |
| "step": 6310 |
| }, |
| { |
| "epoch": 13.221757322175732, |
| "grad_norm": 0.2646332085132599, |
| "learning_rate": 6.681756999639311e-05, |
| "loss": 0.0557, |
| "step": 6320 |
| }, |
| { |
| "epoch": 13.242677824267782, |
| "grad_norm": 0.25880181789398193, |
| "learning_rate": 6.671372034616132e-05, |
| "loss": 0.0492, |
| "step": 6330 |
| }, |
| { |
| "epoch": 13.263598326359833, |
| "grad_norm": 0.17128583788871765, |
| "learning_rate": 6.660978946096933e-05, |
| "loss": 0.0545, |
| "step": 6340 |
| }, |
| { |
| "epoch": 13.284518828451883, |
| "grad_norm": 0.2767598330974579, |
| "learning_rate": 6.650577784596026e-05, |
| "loss": 0.0612, |
| "step": 6350 |
| }, |
| { |
| "epoch": 13.305439330543933, |
| "grad_norm": 0.17852754890918732, |
| "learning_rate": 6.640168600666967e-05, |
| "loss": 0.0475, |
| "step": 6360 |
| }, |
| { |
| "epoch": 13.326359832635983, |
| "grad_norm": 0.2585950195789337, |
| "learning_rate": 6.629751444902299e-05, |
| "loss": 0.0569, |
| "step": 6370 |
| }, |
| { |
| "epoch": 13.347280334728033, |
| "grad_norm": 0.27310749888420105, |
| "learning_rate": 6.619326367933312e-05, |
| "loss": 0.0578, |
| "step": 6380 |
| }, |
| { |
| "epoch": 13.368200836820083, |
| "grad_norm": 0.1731962114572525, |
| "learning_rate": 6.608893420429798e-05, |
| "loss": 0.0568, |
| "step": 6390 |
| }, |
| { |
| "epoch": 13.389121338912133, |
| "grad_norm": 0.196332648396492, |
| "learning_rate": 6.598452653099803e-05, |
| "loss": 0.0524, |
| "step": 6400 |
| }, |
| { |
| "epoch": 13.410041841004183, |
| "grad_norm": 0.21636244654655457, |
| "learning_rate": 6.588004116689375e-05, |
| "loss": 0.0596, |
| "step": 6410 |
| }, |
| { |
| "epoch": 13.430962343096235, |
| "grad_norm": 0.24683816730976105, |
| "learning_rate": 6.57754786198233e-05, |
| "loss": 0.0622, |
| "step": 6420 |
| }, |
| { |
| "epoch": 13.451882845188285, |
| "grad_norm": 0.19918185472488403, |
| "learning_rate": 6.567083939799992e-05, |
| "loss": 0.0574, |
| "step": 6430 |
| }, |
| { |
| "epoch": 13.472803347280335, |
| "grad_norm": 0.18632373213768005, |
| "learning_rate": 6.556612401000954e-05, |
| "loss": 0.0493, |
| "step": 6440 |
| }, |
| { |
| "epoch": 13.493723849372385, |
| "grad_norm": 0.19650404155254364, |
| "learning_rate": 6.54613329648083e-05, |
| "loss": 0.0529, |
| "step": 6450 |
| }, |
| { |
| "epoch": 13.514644351464435, |
| "grad_norm": 0.2306489199399948, |
| "learning_rate": 6.535646677172005e-05, |
| "loss": 0.0507, |
| "step": 6460 |
| }, |
| { |
| "epoch": 13.535564853556485, |
| "grad_norm": 0.1978807896375656, |
| "learning_rate": 6.52515259404339e-05, |
| "loss": 0.048, |
| "step": 6470 |
| }, |
| { |
| "epoch": 13.556485355648535, |
| "grad_norm": 0.2691389322280884, |
| "learning_rate": 6.514651098100167e-05, |
| "loss": 0.0549, |
| "step": 6480 |
| }, |
| { |
| "epoch": 13.577405857740585, |
| "grad_norm": 0.18920831382274628, |
| "learning_rate": 6.504142240383555e-05, |
| "loss": 0.0586, |
| "step": 6490 |
| }, |
| { |
| "epoch": 13.598326359832637, |
| "grad_norm": 0.19168440997600555, |
| "learning_rate": 6.493626071970549e-05, |
| "loss": 0.0603, |
| "step": 6500 |
| }, |
| { |
| "epoch": 13.619246861924687, |
| "grad_norm": 0.22604981064796448, |
| "learning_rate": 6.483102643973682e-05, |
| "loss": 0.0622, |
| "step": 6510 |
| }, |
| { |
| "epoch": 13.640167364016737, |
| "grad_norm": 0.21754129230976105, |
| "learning_rate": 6.472572007540764e-05, |
| "loss": 0.0566, |
| "step": 6520 |
| }, |
| { |
| "epoch": 13.661087866108787, |
| "grad_norm": 0.17625564336776733, |
| "learning_rate": 6.462034213854645e-05, |
| "loss": 0.0575, |
| "step": 6530 |
| }, |
| { |
| "epoch": 13.682008368200837, |
| "grad_norm": 0.19063302874565125, |
| "learning_rate": 6.451489314132962e-05, |
| "loss": 0.0604, |
| "step": 6540 |
| }, |
| { |
| "epoch": 13.702928870292887, |
| "grad_norm": 0.2231472134590149, |
| "learning_rate": 6.440937359627893e-05, |
| "loss": 0.0515, |
| "step": 6550 |
| }, |
| { |
| "epoch": 13.723849372384937, |
| "grad_norm": 0.18337441980838776, |
| "learning_rate": 6.430378401625894e-05, |
| "loss": 0.054, |
| "step": 6560 |
| }, |
| { |
| "epoch": 13.744769874476987, |
| "grad_norm": 0.19114044308662415, |
| "learning_rate": 6.419812491447472e-05, |
| "loss": 0.0581, |
| "step": 6570 |
| }, |
| { |
| "epoch": 13.765690376569038, |
| "grad_norm": 0.22809480130672455, |
| "learning_rate": 6.409239680446919e-05, |
| "loss": 0.0529, |
| "step": 6580 |
| }, |
| { |
| "epoch": 13.786610878661088, |
| "grad_norm": 0.20257806777954102, |
| "learning_rate": 6.398660020012072e-05, |
| "loss": 0.066, |
| "step": 6590 |
| }, |
| { |
| "epoch": 13.807531380753138, |
| "grad_norm": 0.1861211657524109, |
| "learning_rate": 6.38807356156405e-05, |
| "loss": 0.0557, |
| "step": 6600 |
| }, |
| { |
| "epoch": 13.828451882845188, |
| "grad_norm": 0.2235741913318634, |
| "learning_rate": 6.377480356557022e-05, |
| "loss": 0.0541, |
| "step": 6610 |
| }, |
| { |
| "epoch": 13.849372384937238, |
| "grad_norm": 0.20858436822891235, |
| "learning_rate": 6.366880456477942e-05, |
| "loss": 0.0592, |
| "step": 6620 |
| }, |
| { |
| "epoch": 13.870292887029288, |
| "grad_norm": 0.19966097176074982, |
| "learning_rate": 6.356273912846312e-05, |
| "loss": 0.054, |
| "step": 6630 |
| }, |
| { |
| "epoch": 13.891213389121338, |
| "grad_norm": 0.24596375226974487, |
| "learning_rate": 6.34566077721391e-05, |
| "loss": 0.0649, |
| "step": 6640 |
| }, |
| { |
| "epoch": 13.91213389121339, |
| "grad_norm": 0.1986909806728363, |
| "learning_rate": 6.335041101164569e-05, |
| "loss": 0.0597, |
| "step": 6650 |
| }, |
| { |
| "epoch": 13.93305439330544, |
| "grad_norm": 0.21726685762405396, |
| "learning_rate": 6.324414936313904e-05, |
| "loss": 0.0525, |
| "step": 6660 |
| }, |
| { |
| "epoch": 13.95397489539749, |
| "grad_norm": 0.17825187742710114, |
| "learning_rate": 6.313782334309066e-05, |
| "loss": 0.0602, |
| "step": 6670 |
| }, |
| { |
| "epoch": 13.97489539748954, |
| "grad_norm": 0.16731053590774536, |
| "learning_rate": 6.303143346828499e-05, |
| "loss": 0.0541, |
| "step": 6680 |
| }, |
| { |
| "epoch": 13.99581589958159, |
| "grad_norm": 0.20779544115066528, |
| "learning_rate": 6.292498025581674e-05, |
| "loss": 0.0623, |
| "step": 6690 |
| }, |
| { |
| "epoch": 14.01673640167364, |
| "grad_norm": 0.1886224001646042, |
| "learning_rate": 6.281846422308857e-05, |
| "loss": 0.0496, |
| "step": 6700 |
| }, |
| { |
| "epoch": 14.03765690376569, |
| "grad_norm": 0.17492704093456268, |
| "learning_rate": 6.271188588780839e-05, |
| "loss": 0.0552, |
| "step": 6710 |
| }, |
| { |
| "epoch": 14.05857740585774, |
| "grad_norm": 0.1669052243232727, |
| "learning_rate": 6.260524576798694e-05, |
| "loss": 0.0471, |
| "step": 6720 |
| }, |
| { |
| "epoch": 14.07949790794979, |
| "grad_norm": 0.2435608208179474, |
| "learning_rate": 6.249854438193528e-05, |
| "loss": 0.0575, |
| "step": 6730 |
| }, |
| { |
| "epoch": 14.100418410041842, |
| "grad_norm": 0.19360196590423584, |
| "learning_rate": 6.239178224826224e-05, |
| "loss": 0.0492, |
| "step": 6740 |
| }, |
| { |
| "epoch": 14.121338912133892, |
| "grad_norm": 0.18208938837051392, |
| "learning_rate": 6.228495988587188e-05, |
| "loss": 0.0627, |
| "step": 6750 |
| }, |
| { |
| "epoch": 14.142259414225942, |
| "grad_norm": 0.16620376706123352, |
| "learning_rate": 6.217807781396106e-05, |
| "loss": 0.0515, |
| "step": 6760 |
| }, |
| { |
| "epoch": 14.163179916317992, |
| "grad_norm": 0.17440137267112732, |
| "learning_rate": 6.207113655201676e-05, |
| "loss": 0.0449, |
| "step": 6770 |
| }, |
| { |
| "epoch": 14.184100418410042, |
| "grad_norm": 0.20068810880184174, |
| "learning_rate": 6.196413661981368e-05, |
| "loss": 0.0513, |
| "step": 6780 |
| }, |
| { |
| "epoch": 14.205020920502092, |
| "grad_norm": 0.30458423495292664, |
| "learning_rate": 6.185707853741175e-05, |
| "loss": 0.0526, |
| "step": 6790 |
| }, |
| { |
| "epoch": 14.225941422594142, |
| "grad_norm": 0.20665733516216278, |
| "learning_rate": 6.174996282515344e-05, |
| "loss": 0.0581, |
| "step": 6800 |
| }, |
| { |
| "epoch": 14.246861924686192, |
| "grad_norm": 0.19424675405025482, |
| "learning_rate": 6.164279000366131e-05, |
| "loss": 0.058, |
| "step": 6810 |
| }, |
| { |
| "epoch": 14.267782426778243, |
| "grad_norm": 0.2010163515806198, |
| "learning_rate": 6.153556059383561e-05, |
| "loss": 0.0549, |
| "step": 6820 |
| }, |
| { |
| "epoch": 14.288702928870293, |
| "grad_norm": 0.18270374834537506, |
| "learning_rate": 6.142827511685152e-05, |
| "loss": 0.0517, |
| "step": 6830 |
| }, |
| { |
| "epoch": 14.309623430962343, |
| "grad_norm": 0.16748015582561493, |
| "learning_rate": 6.132093409415678e-05, |
| "loss": 0.0457, |
| "step": 6840 |
| }, |
| { |
| "epoch": 14.330543933054393, |
| "grad_norm": 0.20862819254398346, |
| "learning_rate": 6.121353804746907e-05, |
| "loss": 0.0528, |
| "step": 6850 |
| }, |
| { |
| "epoch": 14.351464435146443, |
| "grad_norm": 0.22936491668224335, |
| "learning_rate": 6.110608749877352e-05, |
| "loss": 0.0473, |
| "step": 6860 |
| }, |
| { |
| "epoch": 14.372384937238493, |
| "grad_norm": 0.21870329976081848, |
| "learning_rate": 6.0998582970320205e-05, |
| "loss": 0.0545, |
| "step": 6870 |
| }, |
| { |
| "epoch": 14.393305439330543, |
| "grad_norm": 0.22260119020938873, |
| "learning_rate": 6.0891024984621506e-05, |
| "loss": 0.0604, |
| "step": 6880 |
| }, |
| { |
| "epoch": 14.414225941422593, |
| "grad_norm": 0.19479791820049286, |
| "learning_rate": 6.078341406444961e-05, |
| "loss": 0.0588, |
| "step": 6890 |
| }, |
| { |
| "epoch": 14.435146443514645, |
| "grad_norm": 0.29949843883514404, |
| "learning_rate": 6.067575073283405e-05, |
| "loss": 0.0584, |
| "step": 6900 |
| }, |
| { |
| "epoch": 14.456066945606695, |
| "grad_norm": 0.19155101478099823, |
| "learning_rate": 6.0568035513059073e-05, |
| "loss": 0.0517, |
| "step": 6910 |
| }, |
| { |
| "epoch": 14.476987447698745, |
| "grad_norm": 0.24398870766162872, |
| "learning_rate": 6.046026892866109e-05, |
| "loss": 0.0557, |
| "step": 6920 |
| }, |
| { |
| "epoch": 14.497907949790795, |
| "grad_norm": 0.2503119707107544, |
| "learning_rate": 6.0352451503426214e-05, |
| "loss": 0.0623, |
| "step": 6930 |
| }, |
| { |
| "epoch": 14.518828451882845, |
| "grad_norm": 0.1765798181295395, |
| "learning_rate": 6.024458376138762e-05, |
| "loss": 0.0506, |
| "step": 6940 |
| }, |
| { |
| "epoch": 14.539748953974895, |
| "grad_norm": 0.1602436751127243, |
| "learning_rate": 6.013666622682306e-05, |
| "loss": 0.0493, |
| "step": 6950 |
| }, |
| { |
| "epoch": 14.560669456066945, |
| "grad_norm": 0.22358030080795288, |
| "learning_rate": 6.002869942425231e-05, |
| "loss": 0.0582, |
| "step": 6960 |
| }, |
| { |
| "epoch": 14.581589958158997, |
| "grad_norm": 0.23106183111667633, |
| "learning_rate": 5.992068387843459e-05, |
| "loss": 0.0544, |
| "step": 6970 |
| }, |
| { |
| "epoch": 14.602510460251047, |
| "grad_norm": 0.19091829657554626, |
| "learning_rate": 5.981262011436603e-05, |
| "loss": 0.0479, |
| "step": 6980 |
| }, |
| { |
| "epoch": 14.623430962343097, |
| "grad_norm": 0.16238416731357574, |
| "learning_rate": 5.970450865727712e-05, |
| "loss": 0.0513, |
| "step": 6990 |
| }, |
| { |
| "epoch": 14.644351464435147, |
| "grad_norm": 0.22124448418617249, |
| "learning_rate": 5.9596350032630156e-05, |
| "loss": 0.0516, |
| "step": 7000 |
| }, |
| { |
| "epoch": 14.665271966527197, |
| "grad_norm": 0.18922406435012817, |
| "learning_rate": 5.9488144766116714e-05, |
| "loss": 0.0504, |
| "step": 7010 |
| }, |
| { |
| "epoch": 14.686192468619247, |
| "grad_norm": 0.19591659307479858, |
| "learning_rate": 5.9379893383655006e-05, |
| "loss": 0.0571, |
| "step": 7020 |
| }, |
| { |
| "epoch": 14.707112970711297, |
| "grad_norm": 0.2549462914466858, |
| "learning_rate": 5.927159641138744e-05, |
| "loss": 0.0511, |
| "step": 7030 |
| }, |
| { |
| "epoch": 14.728033472803347, |
| "grad_norm": 0.24339456856250763, |
| "learning_rate": 5.916325437567799e-05, |
| "loss": 0.053, |
| "step": 7040 |
| }, |
| { |
| "epoch": 14.748953974895397, |
| "grad_norm": 0.2489190697669983, |
| "learning_rate": 5.905486780310966e-05, |
| "loss": 0.0517, |
| "step": 7050 |
| }, |
| { |
| "epoch": 14.769874476987448, |
| "grad_norm": 0.2775043249130249, |
| "learning_rate": 5.8946437220481887e-05, |
| "loss": 0.0662, |
| "step": 7060 |
| }, |
| { |
| "epoch": 14.790794979079498, |
| "grad_norm": 0.21394887566566467, |
| "learning_rate": 5.883796315480805e-05, |
| "loss": 0.0603, |
| "step": 7070 |
| }, |
| { |
| "epoch": 14.811715481171548, |
| "grad_norm": 0.17485828697681427, |
| "learning_rate": 5.872944613331288e-05, |
| "loss": 0.0559, |
| "step": 7080 |
| }, |
| { |
| "epoch": 14.832635983263598, |
| "grad_norm": 0.24739952385425568, |
| "learning_rate": 5.862088668342986e-05, |
| "loss": 0.052, |
| "step": 7090 |
| }, |
| { |
| "epoch": 14.853556485355648, |
| "grad_norm": 0.15297842025756836, |
| "learning_rate": 5.8512285332798714e-05, |
| "loss": 0.057, |
| "step": 7100 |
| }, |
| { |
| "epoch": 14.874476987447698, |
| "grad_norm": 0.18535470962524414, |
| "learning_rate": 5.840364260926277e-05, |
| "loss": 0.0526, |
| "step": 7110 |
| }, |
| { |
| "epoch": 14.895397489539748, |
| "grad_norm": 0.19784200191497803, |
| "learning_rate": 5.8294959040866505e-05, |
| "loss": 0.0558, |
| "step": 7120 |
| }, |
| { |
| "epoch": 14.9163179916318, |
| "grad_norm": 0.1543385535478592, |
| "learning_rate": 5.818623515585292e-05, |
| "loss": 0.049, |
| "step": 7130 |
| }, |
| { |
| "epoch": 14.93723849372385, |
| "grad_norm": 0.17051587998867035, |
| "learning_rate": 5.8077471482660896e-05, |
| "loss": 0.0537, |
| "step": 7140 |
| }, |
| { |
| "epoch": 14.9581589958159, |
| "grad_norm": 0.19550904631614685, |
| "learning_rate": 5.796866854992276e-05, |
| "loss": 0.0595, |
| "step": 7150 |
| }, |
| { |
| "epoch": 14.97907949790795, |
| "grad_norm": 0.2906009256839752, |
| "learning_rate": 5.7859826886461676e-05, |
| "loss": 0.058, |
| "step": 7160 |
| }, |
| { |
| "epoch": 15.0, |
| "grad_norm": 1.7434508800506592, |
| "learning_rate": 5.775094702128899e-05, |
| "loss": 0.0554, |
| "step": 7170 |
| }, |
| { |
| "epoch": 15.02092050209205, |
| "grad_norm": 0.23117059469223022, |
| "learning_rate": 5.7642029483601746e-05, |
| "loss": 0.056, |
| "step": 7180 |
| }, |
| { |
| "epoch": 15.0418410041841, |
| "grad_norm": 0.24025872349739075, |
| "learning_rate": 5.753307480278012e-05, |
| "loss": 0.0631, |
| "step": 7190 |
| }, |
| { |
| "epoch": 15.06276150627615, |
| "grad_norm": 0.31165027618408203, |
| "learning_rate": 5.742408350838478e-05, |
| "loss": 0.0571, |
| "step": 7200 |
| }, |
| { |
| "epoch": 15.0836820083682, |
| "grad_norm": 0.2065475434064865, |
| "learning_rate": 5.7315056130154374e-05, |
| "loss": 0.0567, |
| "step": 7210 |
| }, |
| { |
| "epoch": 15.104602510460252, |
| "grad_norm": 0.23966439068317413, |
| "learning_rate": 5.720599319800292e-05, |
| "loss": 0.0504, |
| "step": 7220 |
| }, |
| { |
| "epoch": 15.125523012552302, |
| "grad_norm": 0.16557417809963226, |
| "learning_rate": 5.709689524201722e-05, |
| "loss": 0.0554, |
| "step": 7230 |
| }, |
| { |
| "epoch": 15.146443514644352, |
| "grad_norm": 0.24131985008716583, |
| "learning_rate": 5.698776279245437e-05, |
| "loss": 0.0564, |
| "step": 7240 |
| }, |
| { |
| "epoch": 15.167364016736402, |
| "grad_norm": 0.22979691624641418, |
| "learning_rate": 5.6878596379739036e-05, |
| "loss": 0.0493, |
| "step": 7250 |
| }, |
| { |
| "epoch": 15.188284518828452, |
| "grad_norm": 0.1828659474849701, |
| "learning_rate": 5.676939653446103e-05, |
| "loss": 0.0509, |
| "step": 7260 |
| }, |
| { |
| "epoch": 15.209205020920502, |
| "grad_norm": 0.1639285683631897, |
| "learning_rate": 5.666016378737261e-05, |
| "loss": 0.0582, |
| "step": 7270 |
| }, |
| { |
| "epoch": 15.230125523012552, |
| "grad_norm": 0.22438766062259674, |
| "learning_rate": 5.655089866938596e-05, |
| "loss": 0.0541, |
| "step": 7280 |
| }, |
| { |
| "epoch": 15.251046025104603, |
| "grad_norm": 0.22549496591091156, |
| "learning_rate": 5.6441601711570615e-05, |
| "loss": 0.0532, |
| "step": 7290 |
| }, |
| { |
| "epoch": 15.271966527196653, |
| "grad_norm": 0.22666563093662262, |
| "learning_rate": 5.633227344515085e-05, |
| "loss": 0.0462, |
| "step": 7300 |
| }, |
| { |
| "epoch": 15.292887029288703, |
| "grad_norm": 0.1765870302915573, |
| "learning_rate": 5.6222914401503116e-05, |
| "loss": 0.0539, |
| "step": 7310 |
| }, |
| { |
| "epoch": 15.313807531380753, |
| "grad_norm": 0.20818251371383667, |
| "learning_rate": 5.611352511215343e-05, |
| "loss": 0.0544, |
| "step": 7320 |
| }, |
| { |
| "epoch": 15.334728033472803, |
| "grad_norm": 0.19694848358631134, |
| "learning_rate": 5.600410610877488e-05, |
| "loss": 0.0495, |
| "step": 7330 |
| }, |
| { |
| "epoch": 15.355648535564853, |
| "grad_norm": 0.17688891291618347, |
| "learning_rate": 5.58946579231849e-05, |
| "loss": 0.0547, |
| "step": 7340 |
| }, |
| { |
| "epoch": 15.376569037656903, |
| "grad_norm": 0.21549320220947266, |
| "learning_rate": 5.578518108734279e-05, |
| "loss": 0.0517, |
| "step": 7350 |
| }, |
| { |
| "epoch": 15.397489539748953, |
| "grad_norm": 0.21116769313812256, |
| "learning_rate": 5.5675676133347096e-05, |
| "loss": 0.0526, |
| "step": 7360 |
| }, |
| { |
| "epoch": 15.418410041841003, |
| "grad_norm": 0.17821955680847168, |
| "learning_rate": 5.556614359343307e-05, |
| "loss": 0.0483, |
| "step": 7370 |
| }, |
| { |
| "epoch": 15.439330543933055, |
| "grad_norm": 0.2199217528104782, |
| "learning_rate": 5.545658399996999e-05, |
| "loss": 0.0478, |
| "step": 7380 |
| }, |
| { |
| "epoch": 15.460251046025105, |
| "grad_norm": 0.21454869210720062, |
| "learning_rate": 5.534699788545862e-05, |
| "loss": 0.053, |
| "step": 7390 |
| }, |
| { |
| "epoch": 15.481171548117155, |
| "grad_norm": 0.2748502492904663, |
| "learning_rate": 5.523738578252867e-05, |
| "loss": 0.0524, |
| "step": 7400 |
| }, |
| { |
| "epoch": 15.502092050209205, |
| "grad_norm": 0.2580304443836212, |
| "learning_rate": 5.512774822393614e-05, |
| "loss": 0.054, |
| "step": 7410 |
| }, |
| { |
| "epoch": 15.523012552301255, |
| "grad_norm": 0.22264724969863892, |
| "learning_rate": 5.5018085742560744e-05, |
| "loss": 0.0442, |
| "step": 7420 |
| }, |
| { |
| "epoch": 15.543933054393305, |
| "grad_norm": 0.20303569734096527, |
| "learning_rate": 5.4908398871403365e-05, |
| "loss": 0.0557, |
| "step": 7430 |
| }, |
| { |
| "epoch": 15.564853556485355, |
| "grad_norm": 0.44104623794555664, |
| "learning_rate": 5.4798688143583375e-05, |
| "loss": 0.0549, |
| "step": 7440 |
| }, |
| { |
| "epoch": 15.585774058577407, |
| "grad_norm": 0.1938384473323822, |
| "learning_rate": 5.468895409233615e-05, |
| "loss": 0.0549, |
| "step": 7450 |
| }, |
| { |
| "epoch": 15.606694560669457, |
| "grad_norm": 0.15259292721748352, |
| "learning_rate": 5.4579197251010414e-05, |
| "loss": 0.0513, |
| "step": 7460 |
| }, |
| { |
| "epoch": 15.627615062761507, |
| "grad_norm": 0.15911541879177094, |
| "learning_rate": 5.446941815306563e-05, |
| "loss": 0.0434, |
| "step": 7470 |
| }, |
| { |
| "epoch": 15.648535564853557, |
| "grad_norm": 0.1852342039346695, |
| "learning_rate": 5.435961733206947e-05, |
| "loss": 0.0609, |
| "step": 7480 |
| }, |
| { |
| "epoch": 15.669456066945607, |
| "grad_norm": 0.25824370980262756, |
| "learning_rate": 5.424979532169516e-05, |
| "loss": 0.0506, |
| "step": 7490 |
| }, |
| { |
| "epoch": 15.690376569037657, |
| "grad_norm": 0.19190651178359985, |
| "learning_rate": 5.413995265571895e-05, |
| "loss": 0.054, |
| "step": 7500 |
| }, |
| { |
| "epoch": 15.711297071129707, |
| "grad_norm": 0.24761340022087097, |
| "learning_rate": 5.403008986801746e-05, |
| "loss": 0.0558, |
| "step": 7510 |
| }, |
| { |
| "epoch": 15.732217573221757, |
| "grad_norm": 0.21222639083862305, |
| "learning_rate": 5.3920207492565114e-05, |
| "loss": 0.0506, |
| "step": 7520 |
| }, |
| { |
| "epoch": 15.753138075313807, |
| "grad_norm": 0.18009130656719208, |
| "learning_rate": 5.381030606343154e-05, |
| "loss": 0.0531, |
| "step": 7530 |
| }, |
| { |
| "epoch": 15.774058577405858, |
| "grad_norm": 0.2761033773422241, |
| "learning_rate": 5.370038611477894e-05, |
| "loss": 0.0516, |
| "step": 7540 |
| }, |
| { |
| "epoch": 15.794979079497908, |
| "grad_norm": 0.194721981883049, |
| "learning_rate": 5.359044818085963e-05, |
| "loss": 0.057, |
| "step": 7550 |
| }, |
| { |
| "epoch": 15.815899581589958, |
| "grad_norm": 0.20458896458148956, |
| "learning_rate": 5.3480492796013214e-05, |
| "loss": 0.0477, |
| "step": 7560 |
| }, |
| { |
| "epoch": 15.836820083682008, |
| "grad_norm": 0.13182362914085388, |
| "learning_rate": 5.33705204946642e-05, |
| "loss": 0.0475, |
| "step": 7570 |
| }, |
| { |
| "epoch": 15.857740585774058, |
| "grad_norm": 0.14037500321865082, |
| "learning_rate": 5.326053181131927e-05, |
| "loss": 0.0521, |
| "step": 7580 |
| }, |
| { |
| "epoch": 15.878661087866108, |
| "grad_norm": 0.237847238779068, |
| "learning_rate": 5.3150527280564776e-05, |
| "loss": 0.0578, |
| "step": 7590 |
| }, |
| { |
| "epoch": 15.899581589958158, |
| "grad_norm": 0.24887144565582275, |
| "learning_rate": 5.3040507437064034e-05, |
| "loss": 0.0544, |
| "step": 7600 |
| }, |
| { |
| "epoch": 15.92050209205021, |
| "grad_norm": 0.22051165997982025, |
| "learning_rate": 5.293047281555482e-05, |
| "loss": 0.0545, |
| "step": 7610 |
| }, |
| { |
| "epoch": 15.94142259414226, |
| "grad_norm": 0.167110413312912, |
| "learning_rate": 5.2820423950846765e-05, |
| "loss": 0.047, |
| "step": 7620 |
| }, |
| { |
| "epoch": 15.96234309623431, |
| "grad_norm": 0.16554315388202667, |
| "learning_rate": 5.2710361377818696e-05, |
| "loss": 0.0513, |
| "step": 7630 |
| }, |
| { |
| "epoch": 15.98326359832636, |
| "grad_norm": 0.14443959295749664, |
| "learning_rate": 5.2600285631416026e-05, |
| "loss": 0.0498, |
| "step": 7640 |
| }, |
| { |
| "epoch": 16.00418410041841, |
| "grad_norm": 0.2642640471458435, |
| "learning_rate": 5.249019724664826e-05, |
| "loss": 0.048, |
| "step": 7650 |
| }, |
| { |
| "epoch": 16.02510460251046, |
| "grad_norm": 0.20373564958572388, |
| "learning_rate": 5.2380096758586315e-05, |
| "loss": 0.0602, |
| "step": 7660 |
| }, |
| { |
| "epoch": 16.04602510460251, |
| "grad_norm": 0.22573313117027283, |
| "learning_rate": 5.226998470235993e-05, |
| "loss": 0.0491, |
| "step": 7670 |
| }, |
| { |
| "epoch": 16.06694560669456, |
| "grad_norm": 0.2332305908203125, |
| "learning_rate": 5.215986161315507e-05, |
| "loss": 0.0527, |
| "step": 7680 |
| }, |
| { |
| "epoch": 16.08786610878661, |
| "grad_norm": 0.191607266664505, |
| "learning_rate": 5.20497280262113e-05, |
| "loss": 0.0467, |
| "step": 7690 |
| }, |
| { |
| "epoch": 16.10878661087866, |
| "grad_norm": 0.22203144431114197, |
| "learning_rate": 5.193958447681924e-05, |
| "loss": 0.051, |
| "step": 7700 |
| }, |
| { |
| "epoch": 16.12970711297071, |
| "grad_norm": 0.20125925540924072, |
| "learning_rate": 5.182943150031793e-05, |
| "loss": 0.0459, |
| "step": 7710 |
| }, |
| { |
| "epoch": 16.15062761506276, |
| "grad_norm": 0.2417091727256775, |
| "learning_rate": 5.1719269632092204e-05, |
| "loss": 0.0524, |
| "step": 7720 |
| }, |
| { |
| "epoch": 16.171548117154813, |
| "grad_norm": 0.20296020805835724, |
| "learning_rate": 5.160909940757015e-05, |
| "loss": 0.0496, |
| "step": 7730 |
| }, |
| { |
| "epoch": 16.192468619246863, |
| "grad_norm": 0.16926750540733337, |
| "learning_rate": 5.149892136222043e-05, |
| "loss": 0.0544, |
| "step": 7740 |
| }, |
| { |
| "epoch": 16.213389121338913, |
| "grad_norm": 0.1914985626935959, |
| "learning_rate": 5.1388736031549744e-05, |
| "loss": 0.0507, |
| "step": 7750 |
| }, |
| { |
| "epoch": 16.234309623430963, |
| "grad_norm": 0.2034175992012024, |
| "learning_rate": 5.127854395110021e-05, |
| "loss": 0.0459, |
| "step": 7760 |
| }, |
| { |
| "epoch": 16.255230125523013, |
| "grad_norm": 0.1641988456249237, |
| "learning_rate": 5.116834565644671e-05, |
| "loss": 0.0503, |
| "step": 7770 |
| }, |
| { |
| "epoch": 16.276150627615063, |
| "grad_norm": 0.17824122309684753, |
| "learning_rate": 5.10581416831944e-05, |
| "loss": 0.048, |
| "step": 7780 |
| }, |
| { |
| "epoch": 16.297071129707113, |
| "grad_norm": 0.15559978783130646, |
| "learning_rate": 5.094793256697593e-05, |
| "loss": 0.0543, |
| "step": 7790 |
| }, |
| { |
| "epoch": 16.317991631799163, |
| "grad_norm": 0.18097707629203796, |
| "learning_rate": 5.0837718843449075e-05, |
| "loss": 0.0522, |
| "step": 7800 |
| }, |
| { |
| "epoch": 16.338912133891213, |
| "grad_norm": 0.20327609777450562, |
| "learning_rate": 5.07275010482939e-05, |
| "loss": 0.0508, |
| "step": 7810 |
| }, |
| { |
| "epoch": 16.359832635983263, |
| "grad_norm": 0.15760394930839539, |
| "learning_rate": 5.061727971721032e-05, |
| "loss": 0.0426, |
| "step": 7820 |
| }, |
| { |
| "epoch": 16.380753138075313, |
| "grad_norm": 0.2155444324016571, |
| "learning_rate": 5.050705538591538e-05, |
| "loss": 0.0556, |
| "step": 7830 |
| }, |
| { |
| "epoch": 16.401673640167363, |
| "grad_norm": 0.1983986645936966, |
| "learning_rate": 5.0396828590140785e-05, |
| "loss": 0.0567, |
| "step": 7840 |
| }, |
| { |
| "epoch": 16.422594142259413, |
| "grad_norm": 0.19888253509998322, |
| "learning_rate": 5.0286599865630157e-05, |
| "loss": 0.0583, |
| "step": 7850 |
| }, |
| { |
| "epoch": 16.443514644351463, |
| "grad_norm": 0.14076486229896545, |
| "learning_rate": 5.017636974813649e-05, |
| "loss": 0.051, |
| "step": 7860 |
| }, |
| { |
| "epoch": 16.464435146443513, |
| "grad_norm": 0.22674958407878876, |
| "learning_rate": 5.006613877341959e-05, |
| "loss": 0.0479, |
| "step": 7870 |
| }, |
| { |
| "epoch": 16.485355648535563, |
| "grad_norm": 0.23650920391082764, |
| "learning_rate": 4.99559074772434e-05, |
| "loss": 0.0521, |
| "step": 7880 |
| }, |
| { |
| "epoch": 16.506276150627617, |
| "grad_norm": 0.2411746233701706, |
| "learning_rate": 4.9845676395373455e-05, |
| "loss": 0.0456, |
| "step": 7890 |
| }, |
| { |
| "epoch": 16.527196652719667, |
| "grad_norm": 0.2268081158399582, |
| "learning_rate": 4.9735446063574184e-05, |
| "loss": 0.0498, |
| "step": 7900 |
| }, |
| { |
| "epoch": 16.548117154811717, |
| "grad_norm": 0.1393486112356186, |
| "learning_rate": 4.962521701760645e-05, |
| "loss": 0.05, |
| "step": 7910 |
| }, |
| { |
| "epoch": 16.569037656903767, |
| "grad_norm": 0.16085295379161835, |
| "learning_rate": 4.951498979322482e-05, |
| "loss": 0.0528, |
| "step": 7920 |
| }, |
| { |
| "epoch": 16.589958158995817, |
| "grad_norm": 0.17427772283554077, |
| "learning_rate": 4.9404764926174996e-05, |
| "loss": 0.0558, |
| "step": 7930 |
| }, |
| { |
| "epoch": 16.610878661087867, |
| "grad_norm": 0.12604235112667084, |
| "learning_rate": 4.929454295219127e-05, |
| "loss": 0.0515, |
| "step": 7940 |
| }, |
| { |
| "epoch": 16.631799163179917, |
| "grad_norm": 0.19448940455913544, |
| "learning_rate": 4.9184324406993844e-05, |
| "loss": 0.0567, |
| "step": 7950 |
| }, |
| { |
| "epoch": 16.652719665271967, |
| "grad_norm": 0.24758629500865936, |
| "learning_rate": 4.907410982628623e-05, |
| "loss": 0.0572, |
| "step": 7960 |
| }, |
| { |
| "epoch": 16.673640167364017, |
| "grad_norm": 0.15390925109386444, |
| "learning_rate": 4.896389974575273e-05, |
| "loss": 0.0576, |
| "step": 7970 |
| }, |
| { |
| "epoch": 16.694560669456067, |
| "grad_norm": 0.18204046785831451, |
| "learning_rate": 4.885369470105571e-05, |
| "loss": 0.0569, |
| "step": 7980 |
| }, |
| { |
| "epoch": 16.715481171548117, |
| "grad_norm": 0.2037144899368286, |
| "learning_rate": 4.874349522783313e-05, |
| "loss": 0.0535, |
| "step": 7990 |
| }, |
| { |
| "epoch": 16.736401673640167, |
| "grad_norm": 0.20136842131614685, |
| "learning_rate": 4.863330186169581e-05, |
| "loss": 0.0605, |
| "step": 8000 |
| }, |
| { |
| "epoch": 16.757322175732217, |
| "grad_norm": 0.21489471197128296, |
| "learning_rate": 4.8523115138224885e-05, |
| "loss": 0.0516, |
| "step": 8010 |
| }, |
| { |
| "epoch": 16.778242677824267, |
| "grad_norm": 0.23964069783687592, |
| "learning_rate": 4.841293559296928e-05, |
| "loss": 0.0555, |
| "step": 8020 |
| }, |
| { |
| "epoch": 16.799163179916317, |
| "grad_norm": 0.21049214899539948, |
| "learning_rate": 4.830276376144295e-05, |
| "loss": 0.0567, |
| "step": 8030 |
| }, |
| { |
| "epoch": 16.820083682008367, |
| "grad_norm": 0.23198960721492767, |
| "learning_rate": 4.819260017912237e-05, |
| "loss": 0.0513, |
| "step": 8040 |
| }, |
| { |
| "epoch": 16.84100418410042, |
| "grad_norm": 0.2600829601287842, |
| "learning_rate": 4.808244538144396e-05, |
| "loss": 0.0511, |
| "step": 8050 |
| }, |
| { |
| "epoch": 16.86192468619247, |
| "grad_norm": 0.1669810265302658, |
| "learning_rate": 4.797229990380142e-05, |
| "loss": 0.0464, |
| "step": 8060 |
| }, |
| { |
| "epoch": 16.88284518828452, |
| "grad_norm": 0.1619091033935547, |
| "learning_rate": 4.786216428154317e-05, |
| "loss": 0.0509, |
| "step": 8070 |
| }, |
| { |
| "epoch": 16.90376569037657, |
| "grad_norm": 0.22189967334270477, |
| "learning_rate": 4.7752039049969685e-05, |
| "loss": 0.0536, |
| "step": 8080 |
| }, |
| { |
| "epoch": 16.92468619246862, |
| "grad_norm": 0.1866067796945572, |
| "learning_rate": 4.7641924744330956e-05, |
| "loss": 0.0437, |
| "step": 8090 |
| }, |
| { |
| "epoch": 16.94560669456067, |
| "grad_norm": 0.24444331228733063, |
| "learning_rate": 4.7531821899823925e-05, |
| "loss": 0.0506, |
| "step": 8100 |
| }, |
| { |
| "epoch": 16.96652719665272, |
| "grad_norm": 0.2442953884601593, |
| "learning_rate": 4.742173105158973e-05, |
| "loss": 0.0512, |
| "step": 8110 |
| }, |
| { |
| "epoch": 16.98744769874477, |
| "grad_norm": 0.2353048324584961, |
| "learning_rate": 4.731165273471129e-05, |
| "loss": 0.0481, |
| "step": 8120 |
| }, |
| { |
| "epoch": 17.00836820083682, |
| "grad_norm": 0.21988806128501892, |
| "learning_rate": 4.720158748421057e-05, |
| "loss": 0.0495, |
| "step": 8130 |
| }, |
| { |
| "epoch": 17.02928870292887, |
| "grad_norm": 0.26627087593078613, |
| "learning_rate": 4.709153583504602e-05, |
| "loss": 0.0517, |
| "step": 8140 |
| }, |
| { |
| "epoch": 17.05020920502092, |
| "grad_norm": 0.1413852423429489, |
| "learning_rate": 4.6981498322110027e-05, |
| "loss": 0.0395, |
| "step": 8150 |
| }, |
| { |
| "epoch": 17.07112970711297, |
| "grad_norm": 0.2996743619441986, |
| "learning_rate": 4.6871475480226256e-05, |
| "loss": 0.0568, |
| "step": 8160 |
| }, |
| { |
| "epoch": 17.09205020920502, |
| "grad_norm": 0.2565760016441345, |
| "learning_rate": 4.6761467844147004e-05, |
| "loss": 0.055, |
| "step": 8170 |
| }, |
| { |
| "epoch": 17.11297071129707, |
| "grad_norm": 0.23399613797664642, |
| "learning_rate": 4.665147594855076e-05, |
| "loss": 0.0467, |
| "step": 8180 |
| }, |
| { |
| "epoch": 17.13389121338912, |
| "grad_norm": 0.22994205355644226, |
| "learning_rate": 4.654150032803943e-05, |
| "loss": 0.0548, |
| "step": 8190 |
| }, |
| { |
| "epoch": 17.15481171548117, |
| "grad_norm": 0.24214763939380646, |
| "learning_rate": 4.643154151713588e-05, |
| "loss": 0.0531, |
| "step": 8200 |
| }, |
| { |
| "epoch": 17.175732217573223, |
| "grad_norm": 0.2184325009584427, |
| "learning_rate": 4.6321600050281225e-05, |
| "loss": 0.0543, |
| "step": 8210 |
| }, |
| { |
| "epoch": 17.196652719665273, |
| "grad_norm": 0.20963142812252045, |
| "learning_rate": 4.6211676461832264e-05, |
| "loss": 0.0468, |
| "step": 8220 |
| }, |
| { |
| "epoch": 17.217573221757323, |
| "grad_norm": 0.25760477781295776, |
| "learning_rate": 4.610177128605899e-05, |
| "loss": 0.056, |
| "step": 8230 |
| }, |
| { |
| "epoch": 17.238493723849373, |
| "grad_norm": 0.22882136702537537, |
| "learning_rate": 4.599188505714184e-05, |
| "loss": 0.0531, |
| "step": 8240 |
| }, |
| { |
| "epoch": 17.259414225941423, |
| "grad_norm": 0.16614781320095062, |
| "learning_rate": 4.588201830916912e-05, |
| "loss": 0.0514, |
| "step": 8250 |
| }, |
| { |
| "epoch": 17.280334728033473, |
| "grad_norm": 0.15788774192333221, |
| "learning_rate": 4.577217157613456e-05, |
| "loss": 0.0499, |
| "step": 8260 |
| }, |
| { |
| "epoch": 17.301255230125523, |
| "grad_norm": 0.17240086197853088, |
| "learning_rate": 4.566234539193452e-05, |
| "loss": 0.0485, |
| "step": 8270 |
| }, |
| { |
| "epoch": 17.322175732217573, |
| "grad_norm": 0.18178971111774445, |
| "learning_rate": 4.555254029036555e-05, |
| "loss": 0.0554, |
| "step": 8280 |
| }, |
| { |
| "epoch": 17.343096234309623, |
| "grad_norm": 0.22576147317886353, |
| "learning_rate": 4.544275680512165e-05, |
| "loss": 0.0546, |
| "step": 8290 |
| }, |
| { |
| "epoch": 17.364016736401673, |
| "grad_norm": 0.2796344459056854, |
| "learning_rate": 4.5332995469791836e-05, |
| "loss": 0.0481, |
| "step": 8300 |
| }, |
| { |
| "epoch": 17.384937238493723, |
| "grad_norm": 0.2606862485408783, |
| "learning_rate": 4.522325681785744e-05, |
| "loss": 0.0483, |
| "step": 8310 |
| }, |
| { |
| "epoch": 17.405857740585773, |
| "grad_norm": 0.18557748198509216, |
| "learning_rate": 4.511354138268952e-05, |
| "loss": 0.0472, |
| "step": 8320 |
| }, |
| { |
| "epoch": 17.426778242677823, |
| "grad_norm": 0.17844267189502716, |
| "learning_rate": 4.50038496975463e-05, |
| "loss": 0.0542, |
| "step": 8330 |
| }, |
| { |
| "epoch": 17.447698744769873, |
| "grad_norm": 0.22394883632659912, |
| "learning_rate": 4.489418229557063e-05, |
| "loss": 0.0492, |
| "step": 8340 |
| }, |
| { |
| "epoch": 17.468619246861923, |
| "grad_norm": 0.20772208273410797, |
| "learning_rate": 4.478453970978722e-05, |
| "loss": 0.0517, |
| "step": 8350 |
| }, |
| { |
| "epoch": 17.489539748953973, |
| "grad_norm": 0.21835976839065552, |
| "learning_rate": 4.4674922473100286e-05, |
| "loss": 0.0493, |
| "step": 8360 |
| }, |
| { |
| "epoch": 17.510460251046027, |
| "grad_norm": 0.20974472165107727, |
| "learning_rate": 4.4565331118290756e-05, |
| "loss": 0.0472, |
| "step": 8370 |
| }, |
| { |
| "epoch": 17.531380753138077, |
| "grad_norm": 0.24277760088443756, |
| "learning_rate": 4.4455766178013775e-05, |
| "loss": 0.0454, |
| "step": 8380 |
| }, |
| { |
| "epoch": 17.552301255230127, |
| "grad_norm": 0.23588554561138153, |
| "learning_rate": 4.434622818479615e-05, |
| "loss": 0.0456, |
| "step": 8390 |
| }, |
| { |
| "epoch": 17.573221757322177, |
| "grad_norm": 0.21761219203472137, |
| "learning_rate": 4.4236717671033646e-05, |
| "loss": 0.0524, |
| "step": 8400 |
| }, |
| { |
| "epoch": 17.594142259414227, |
| "grad_norm": 0.25820693373680115, |
| "learning_rate": 4.412723516898853e-05, |
| "loss": 0.0547, |
| "step": 8410 |
| }, |
| { |
| "epoch": 17.615062761506277, |
| "grad_norm": 0.21615050733089447, |
| "learning_rate": 4.40177812107869e-05, |
| "loss": 0.0465, |
| "step": 8420 |
| }, |
| { |
| "epoch": 17.635983263598327, |
| "grad_norm": 0.16582241654396057, |
| "learning_rate": 4.390835632841606e-05, |
| "loss": 0.0492, |
| "step": 8430 |
| }, |
| { |
| "epoch": 17.656903765690377, |
| "grad_norm": 0.17844611406326294, |
| "learning_rate": 4.3798961053722115e-05, |
| "loss": 0.0456, |
| "step": 8440 |
| }, |
| { |
| "epoch": 17.677824267782427, |
| "grad_norm": 0.1292414367198944, |
| "learning_rate": 4.368959591840718e-05, |
| "loss": 0.0434, |
| "step": 8450 |
| }, |
| { |
| "epoch": 17.698744769874477, |
| "grad_norm": 0.21596643328666687, |
| "learning_rate": 4.3580261454026865e-05, |
| "loss": 0.0507, |
| "step": 8460 |
| }, |
| { |
| "epoch": 17.719665271966527, |
| "grad_norm": 0.27433592081069946, |
| "learning_rate": 4.3470958191987786e-05, |
| "loss": 0.0484, |
| "step": 8470 |
| }, |
| { |
| "epoch": 17.740585774058577, |
| "grad_norm": 0.1953336000442505, |
| "learning_rate": 4.336168666354484e-05, |
| "loss": 0.0484, |
| "step": 8480 |
| }, |
| { |
| "epoch": 17.761506276150627, |
| "grad_norm": 0.16921213269233704, |
| "learning_rate": 4.325244739979873e-05, |
| "loss": 0.045, |
| "step": 8490 |
| }, |
| { |
| "epoch": 17.782426778242677, |
| "grad_norm": 0.14465534687042236, |
| "learning_rate": 4.314324093169332e-05, |
| "loss": 0.0534, |
| "step": 8500 |
| }, |
| { |
| "epoch": 17.803347280334727, |
| "grad_norm": 0.16474668681621552, |
| "learning_rate": 4.303406779001302e-05, |
| "loss": 0.0514, |
| "step": 8510 |
| }, |
| { |
| "epoch": 17.824267782426777, |
| "grad_norm": 0.1937585026025772, |
| "learning_rate": 4.292492850538038e-05, |
| "loss": 0.048, |
| "step": 8520 |
| }, |
| { |
| "epoch": 17.84518828451883, |
| "grad_norm": 0.1891181617975235, |
| "learning_rate": 4.28158236082533e-05, |
| "loss": 0.0527, |
| "step": 8530 |
| }, |
| { |
| "epoch": 17.86610878661088, |
| "grad_norm": 0.19222994148731232, |
| "learning_rate": 4.270675362892256e-05, |
| "loss": 0.0514, |
| "step": 8540 |
| }, |
| { |
| "epoch": 17.88702928870293, |
| "grad_norm": 0.17729121446609497, |
| "learning_rate": 4.2597719097509246e-05, |
| "loss": 0.0473, |
| "step": 8550 |
| }, |
| { |
| "epoch": 17.90794979079498, |
| "grad_norm": 0.18024882674217224, |
| "learning_rate": 4.2488720543962146e-05, |
| "loss": 0.0504, |
| "step": 8560 |
| }, |
| { |
| "epoch": 17.92887029288703, |
| "grad_norm": 0.19688746333122253, |
| "learning_rate": 4.23797584980552e-05, |
| "loss": 0.0478, |
| "step": 8570 |
| }, |
| { |
| "epoch": 17.94979079497908, |
| "grad_norm": 0.2507992088794708, |
| "learning_rate": 4.227083348938486e-05, |
| "loss": 0.0454, |
| "step": 8580 |
| }, |
| { |
| "epoch": 17.97071129707113, |
| "grad_norm": 0.19047075510025024, |
| "learning_rate": 4.2161946047367586e-05, |
| "loss": 0.0489, |
| "step": 8590 |
| }, |
| { |
| "epoch": 17.99163179916318, |
| "grad_norm": 0.17170806229114532, |
| "learning_rate": 4.2053096701237294e-05, |
| "loss": 0.0455, |
| "step": 8600 |
| }, |
| { |
| "epoch": 18.01255230125523, |
| "grad_norm": 0.21080999076366425, |
| "learning_rate": 4.1944285980042656e-05, |
| "loss": 0.0735, |
| "step": 8610 |
| }, |
| { |
| "epoch": 18.03347280334728, |
| "grad_norm": 0.22191490232944489, |
| "learning_rate": 4.183551441264469e-05, |
| "loss": 0.0493, |
| "step": 8620 |
| }, |
| { |
| "epoch": 18.05439330543933, |
| "grad_norm": 0.1833556592464447, |
| "learning_rate": 4.172678252771408e-05, |
| "loss": 0.0518, |
| "step": 8630 |
| }, |
| { |
| "epoch": 18.07531380753138, |
| "grad_norm": 0.1750289648771286, |
| "learning_rate": 4.16180908537286e-05, |
| "loss": 0.0508, |
| "step": 8640 |
| }, |
| { |
| "epoch": 18.09623430962343, |
| "grad_norm": 0.24376438558101654, |
| "learning_rate": 4.150943991897065e-05, |
| "loss": 0.0492, |
| "step": 8650 |
| }, |
| { |
| "epoch": 18.11715481171548, |
| "grad_norm": 0.19261358678340912, |
| "learning_rate": 4.1400830251524605e-05, |
| "loss": 0.0443, |
| "step": 8660 |
| }, |
| { |
| "epoch": 18.13807531380753, |
| "grad_norm": 0.17163701355457306, |
| "learning_rate": 4.1292262379274215e-05, |
| "loss": 0.0498, |
| "step": 8670 |
| }, |
| { |
| "epoch": 18.15899581589958, |
| "grad_norm": 0.20435048639774323, |
| "learning_rate": 4.118373682990016e-05, |
| "loss": 0.0469, |
| "step": 8680 |
| }, |
| { |
| "epoch": 18.179916317991633, |
| "grad_norm": 0.21016691625118256, |
| "learning_rate": 4.107525413087737e-05, |
| "loss": 0.0462, |
| "step": 8690 |
| }, |
| { |
| "epoch": 18.200836820083683, |
| "grad_norm": 0.21283622086048126, |
| "learning_rate": 4.096681480947252e-05, |
| "loss": 0.0533, |
| "step": 8700 |
| }, |
| { |
| "epoch": 18.221757322175733, |
| "grad_norm": 0.17881537973880768, |
| "learning_rate": 4.085841939274146e-05, |
| "loss": 0.0456, |
| "step": 8710 |
| }, |
| { |
| "epoch": 18.242677824267783, |
| "grad_norm": 0.1933610439300537, |
| "learning_rate": 4.075006840752662e-05, |
| "loss": 0.0488, |
| "step": 8720 |
| }, |
| { |
| "epoch": 18.263598326359833, |
| "grad_norm": 0.18333800137043, |
| "learning_rate": 4.0641762380454515e-05, |
| "loss": 0.043, |
| "step": 8730 |
| }, |
| { |
| "epoch": 18.284518828451883, |
| "grad_norm": 0.20655131340026855, |
| "learning_rate": 4.0533501837933134e-05, |
| "loss": 0.0533, |
| "step": 8740 |
| }, |
| { |
| "epoch": 18.305439330543933, |
| "grad_norm": 0.17780126631259918, |
| "learning_rate": 4.042528730614936e-05, |
| "loss": 0.0419, |
| "step": 8750 |
| }, |
| { |
| "epoch": 18.326359832635983, |
| "grad_norm": 0.3027488589286804, |
| "learning_rate": 4.0317119311066486e-05, |
| "loss": 0.0538, |
| "step": 8760 |
| }, |
| { |
| "epoch": 18.347280334728033, |
| "grad_norm": 0.19167320430278778, |
| "learning_rate": 4.02089983784216e-05, |
| "loss": 0.0499, |
| "step": 8770 |
| }, |
| { |
| "epoch": 18.368200836820083, |
| "grad_norm": 0.22469434142112732, |
| "learning_rate": 4.010092503372309e-05, |
| "loss": 0.0507, |
| "step": 8780 |
| }, |
| { |
| "epoch": 18.389121338912133, |
| "grad_norm": 0.19235819578170776, |
| "learning_rate": 3.999289980224797e-05, |
| "loss": 0.0463, |
| "step": 8790 |
| }, |
| { |
| "epoch": 18.410041841004183, |
| "grad_norm": 0.2260487973690033, |
| "learning_rate": 3.9884923209039455e-05, |
| "loss": 0.0528, |
| "step": 8800 |
| }, |
| { |
| "epoch": 18.430962343096233, |
| "grad_norm": 0.18136419355869293, |
| "learning_rate": 3.977699577890439e-05, |
| "loss": 0.0533, |
| "step": 8810 |
| }, |
| { |
| "epoch": 18.451882845188283, |
| "grad_norm": 0.17911958694458008, |
| "learning_rate": 3.96691180364106e-05, |
| "loss": 0.0545, |
| "step": 8820 |
| }, |
| { |
| "epoch": 18.472803347280333, |
| "grad_norm": 0.15562835335731506, |
| "learning_rate": 3.956129050588446e-05, |
| "loss": 0.0462, |
| "step": 8830 |
| }, |
| { |
| "epoch": 18.493723849372383, |
| "grad_norm": 0.18491879105567932, |
| "learning_rate": 3.9453513711408275e-05, |
| "loss": 0.0524, |
| "step": 8840 |
| }, |
| { |
| "epoch": 18.514644351464437, |
| "grad_norm": 0.20958520472049713, |
| "learning_rate": 3.934578817681774e-05, |
| "loss": 0.0524, |
| "step": 8850 |
| }, |
| { |
| "epoch": 18.535564853556487, |
| "grad_norm": 0.16132481396198273, |
| "learning_rate": 3.9238114425699465e-05, |
| "loss": 0.0457, |
| "step": 8860 |
| }, |
| { |
| "epoch": 18.556485355648537, |
| "grad_norm": 0.18827597796916962, |
| "learning_rate": 3.91304929813883e-05, |
| "loss": 0.0545, |
| "step": 8870 |
| }, |
| { |
| "epoch": 18.577405857740587, |
| "grad_norm": 0.24619817733764648, |
| "learning_rate": 3.902292436696489e-05, |
| "loss": 0.0514, |
| "step": 8880 |
| }, |
| { |
| "epoch": 18.598326359832637, |
| "grad_norm": 0.17363670468330383, |
| "learning_rate": 3.891540910525316e-05, |
| "loss": 0.0491, |
| "step": 8890 |
| }, |
| { |
| "epoch": 18.619246861924687, |
| "grad_norm": 0.1485545039176941, |
| "learning_rate": 3.8807947718817624e-05, |
| "loss": 0.0446, |
| "step": 8900 |
| }, |
| { |
| "epoch": 18.640167364016737, |
| "grad_norm": 0.19468659162521362, |
| "learning_rate": 3.870054072996103e-05, |
| "loss": 0.0496, |
| "step": 8910 |
| }, |
| { |
| "epoch": 18.661087866108787, |
| "grad_norm": 0.14619556069374084, |
| "learning_rate": 3.859318866072168e-05, |
| "loss": 0.0539, |
| "step": 8920 |
| }, |
| { |
| "epoch": 18.682008368200837, |
| "grad_norm": 0.16320869326591492, |
| "learning_rate": 3.8485892032870965e-05, |
| "loss": 0.0436, |
| "step": 8930 |
| }, |
| { |
| "epoch": 18.702928870292887, |
| "grad_norm": 0.19678333401679993, |
| "learning_rate": 3.83786513679108e-05, |
| "loss": 0.0452, |
| "step": 8940 |
| }, |
| { |
| "epoch": 18.723849372384937, |
| "grad_norm": 0.20504184067249298, |
| "learning_rate": 3.8271467187071134e-05, |
| "loss": 0.0458, |
| "step": 8950 |
| }, |
| { |
| "epoch": 18.744769874476987, |
| "grad_norm": 0.1663840264081955, |
| "learning_rate": 3.816434001130732e-05, |
| "loss": 0.0468, |
| "step": 8960 |
| }, |
| { |
| "epoch": 18.765690376569037, |
| "grad_norm": 0.1819252073764801, |
| "learning_rate": 3.8057270361297706e-05, |
| "loss": 0.0527, |
| "step": 8970 |
| }, |
| { |
| "epoch": 18.786610878661087, |
| "grad_norm": 0.2027643322944641, |
| "learning_rate": 3.7950258757440985e-05, |
| "loss": 0.0511, |
| "step": 8980 |
| }, |
| { |
| "epoch": 18.807531380753137, |
| "grad_norm": 0.1975032091140747, |
| "learning_rate": 3.78433057198538e-05, |
| "loss": 0.041, |
| "step": 8990 |
| }, |
| { |
| "epoch": 18.828451882845187, |
| "grad_norm": 0.2793610990047455, |
| "learning_rate": 3.773641176836807e-05, |
| "loss": 0.0514, |
| "step": 9000 |
| }, |
| { |
| "epoch": 18.84937238493724, |
| "grad_norm": 0.19008958339691162, |
| "learning_rate": 3.7629577422528555e-05, |
| "loss": 0.0518, |
| "step": 9010 |
| }, |
| { |
| "epoch": 18.87029288702929, |
| "grad_norm": 0.23253273963928223, |
| "learning_rate": 3.7522803201590325e-05, |
| "loss": 0.048, |
| "step": 9020 |
| }, |
| { |
| "epoch": 18.89121338912134, |
| "grad_norm": 0.1523078978061676, |
| "learning_rate": 3.741608962451621e-05, |
| "loss": 0.0458, |
| "step": 9030 |
| }, |
| { |
| "epoch": 18.91213389121339, |
| "grad_norm": 0.24275828897953033, |
| "learning_rate": 3.730943720997427e-05, |
| "loss": 0.0466, |
| "step": 9040 |
| }, |
| { |
| "epoch": 18.93305439330544, |
| "grad_norm": 0.19571851193904877, |
| "learning_rate": 3.720284647633532e-05, |
| "loss": 0.0446, |
| "step": 9050 |
| }, |
| { |
| "epoch": 18.95397489539749, |
| "grad_norm": 0.22840335965156555, |
| "learning_rate": 3.7096317941670365e-05, |
| "loss": 0.0438, |
| "step": 9060 |
| }, |
| { |
| "epoch": 18.97489539748954, |
| "grad_norm": 0.17244522273540497, |
| "learning_rate": 3.698985212374814e-05, |
| "loss": 0.0454, |
| "step": 9070 |
| }, |
| { |
| "epoch": 18.99581589958159, |
| "grad_norm": 0.1521451771259308, |
| "learning_rate": 3.6883449540032477e-05, |
| "loss": 0.0436, |
| "step": 9080 |
| }, |
| { |
| "epoch": 19.01673640167364, |
| "grad_norm": 0.2062705010175705, |
| "learning_rate": 3.6777110707679905e-05, |
| "loss": 0.0511, |
| "step": 9090 |
| }, |
| { |
| "epoch": 19.03765690376569, |
| "grad_norm": 0.20016764104366302, |
| "learning_rate": 3.667083614353715e-05, |
| "loss": 0.0438, |
| "step": 9100 |
| }, |
| { |
| "epoch": 19.05857740585774, |
| "grad_norm": 0.15178647637367249, |
| "learning_rate": 3.6564626364138465e-05, |
| "loss": 0.045, |
| "step": 9110 |
| }, |
| { |
| "epoch": 19.07949790794979, |
| "grad_norm": 0.19940511882305145, |
| "learning_rate": 3.645848188570331e-05, |
| "loss": 0.0492, |
| "step": 9120 |
| }, |
| { |
| "epoch": 19.10041841004184, |
| "grad_norm": 0.3522074818611145, |
| "learning_rate": 3.635240322413374e-05, |
| "loss": 0.0538, |
| "step": 9130 |
| }, |
| { |
| "epoch": 19.12133891213389, |
| "grad_norm": 0.28608760237693787, |
| "learning_rate": 3.624639089501187e-05, |
| "loss": 0.0447, |
| "step": 9140 |
| }, |
| { |
| "epoch": 19.14225941422594, |
| "grad_norm": 0.2104145586490631, |
| "learning_rate": 3.614044541359749e-05, |
| "loss": 0.0428, |
| "step": 9150 |
| }, |
| { |
| "epoch": 19.16317991631799, |
| "grad_norm": 0.25791677832603455, |
| "learning_rate": 3.603456729482541e-05, |
| "loss": 0.0462, |
| "step": 9160 |
| }, |
| { |
| "epoch": 19.184100418410043, |
| "grad_norm": 0.1877574324607849, |
| "learning_rate": 3.5928757053303055e-05, |
| "loss": 0.0548, |
| "step": 9170 |
| }, |
| { |
| "epoch": 19.205020920502093, |
| "grad_norm": 0.1788347214460373, |
| "learning_rate": 3.5823015203308e-05, |
| "loss": 0.0487, |
| "step": 9180 |
| }, |
| { |
| "epoch": 19.225941422594143, |
| "grad_norm": 0.21949069201946259, |
| "learning_rate": 3.57173422587853e-05, |
| "loss": 0.0529, |
| "step": 9190 |
| }, |
| { |
| "epoch": 19.246861924686193, |
| "grad_norm": 0.15112219750881195, |
| "learning_rate": 3.561173873334522e-05, |
| "loss": 0.0469, |
| "step": 9200 |
| }, |
| { |
| "epoch": 19.267782426778243, |
| "grad_norm": 0.18177688121795654, |
| "learning_rate": 3.550620514026056e-05, |
| "loss": 0.0446, |
| "step": 9210 |
| }, |
| { |
| "epoch": 19.288702928870293, |
| "grad_norm": 0.14711672067642212, |
| "learning_rate": 3.54007419924642e-05, |
| "loss": 0.0482, |
| "step": 9220 |
| }, |
| { |
| "epoch": 19.309623430962343, |
| "grad_norm": 0.20293840765953064, |
| "learning_rate": 3.52953498025467e-05, |
| "loss": 0.0491, |
| "step": 9230 |
| }, |
| { |
| "epoch": 19.330543933054393, |
| "grad_norm": 0.1724160760641098, |
| "learning_rate": 3.519002908275368e-05, |
| "loss": 0.0495, |
| "step": 9240 |
| }, |
| { |
| "epoch": 19.351464435146443, |
| "grad_norm": 0.1835453063249588, |
| "learning_rate": 3.508478034498339e-05, |
| "loss": 0.0471, |
| "step": 9250 |
| }, |
| { |
| "epoch": 19.372384937238493, |
| "grad_norm": 0.16504423320293427, |
| "learning_rate": 3.497960410078427e-05, |
| "loss": 0.0489, |
| "step": 9260 |
| }, |
| { |
| "epoch": 19.393305439330543, |
| "grad_norm": 0.19601042568683624, |
| "learning_rate": 3.487450086135236e-05, |
| "loss": 0.0494, |
| "step": 9270 |
| }, |
| { |
| "epoch": 19.414225941422593, |
| "grad_norm": 0.19500787556171417, |
| "learning_rate": 3.476947113752891e-05, |
| "loss": 0.0475, |
| "step": 9280 |
| }, |
| { |
| "epoch": 19.435146443514643, |
| "grad_norm": 0.15155597031116486, |
| "learning_rate": 3.4664515439797823e-05, |
| "loss": 0.0417, |
| "step": 9290 |
| }, |
| { |
| "epoch": 19.456066945606693, |
| "grad_norm": 0.23204268515110016, |
| "learning_rate": 3.45596342782832e-05, |
| "loss": 0.0466, |
| "step": 9300 |
| }, |
| { |
| "epoch": 19.476987447698743, |
| "grad_norm": 0.14878489077091217, |
| "learning_rate": 3.4454828162746936e-05, |
| "loss": 0.0441, |
| "step": 9310 |
| }, |
| { |
| "epoch": 19.497907949790793, |
| "grad_norm": 0.19891710579395294, |
| "learning_rate": 3.435009760258608e-05, |
| "loss": 0.0531, |
| "step": 9320 |
| }, |
| { |
| "epoch": 19.518828451882847, |
| "grad_norm": 0.21309182047843933, |
| "learning_rate": 3.424544310683057e-05, |
| "loss": 0.053, |
| "step": 9330 |
| }, |
| { |
| "epoch": 19.539748953974897, |
| "grad_norm": 0.15616843104362488, |
| "learning_rate": 3.41408651841405e-05, |
| "loss": 0.0463, |
| "step": 9340 |
| }, |
| { |
| "epoch": 19.560669456066947, |
| "grad_norm": 0.19337613880634308, |
| "learning_rate": 3.403636434280388e-05, |
| "loss": 0.05, |
| "step": 9350 |
| }, |
| { |
| "epoch": 19.581589958158997, |
| "grad_norm": 0.19758610427379608, |
| "learning_rate": 3.393194109073411e-05, |
| "loss": 0.0533, |
| "step": 9360 |
| }, |
| { |
| "epoch": 19.602510460251047, |
| "grad_norm": 0.21965035796165466, |
| "learning_rate": 3.3827595935467376e-05, |
| "loss": 0.0431, |
| "step": 9370 |
| }, |
| { |
| "epoch": 19.623430962343097, |
| "grad_norm": 0.19240938127040863, |
| "learning_rate": 3.3723329384160344e-05, |
| "loss": 0.0509, |
| "step": 9380 |
| }, |
| { |
| "epoch": 19.644351464435147, |
| "grad_norm": 0.2612786889076233, |
| "learning_rate": 3.3619141943587646e-05, |
| "loss": 0.0507, |
| "step": 9390 |
| }, |
| { |
| "epoch": 19.665271966527197, |
| "grad_norm": 0.1438675969839096, |
| "learning_rate": 3.351503412013935e-05, |
| "loss": 0.049, |
| "step": 9400 |
| }, |
| { |
| "epoch": 19.686192468619247, |
| "grad_norm": 0.17203199863433838, |
| "learning_rate": 3.341100641981863e-05, |
| "loss": 0.0504, |
| "step": 9410 |
| }, |
| { |
| "epoch": 19.707112970711297, |
| "grad_norm": 0.20994871854782104, |
| "learning_rate": 3.330705934823919e-05, |
| "loss": 0.0441, |
| "step": 9420 |
| }, |
| { |
| "epoch": 19.728033472803347, |
| "grad_norm": 0.18428127467632294, |
| "learning_rate": 3.3203193410622804e-05, |
| "loss": 0.0455, |
| "step": 9430 |
| }, |
| { |
| "epoch": 19.748953974895397, |
| "grad_norm": 0.22371669113636017, |
| "learning_rate": 3.309940911179701e-05, |
| "loss": 0.0475, |
| "step": 9440 |
| }, |
| { |
| "epoch": 19.769874476987447, |
| "grad_norm": 0.1059647798538208, |
| "learning_rate": 3.2995706956192465e-05, |
| "loss": 0.047, |
| "step": 9450 |
| }, |
| { |
| "epoch": 19.790794979079497, |
| "grad_norm": 0.18892386555671692, |
| "learning_rate": 3.289208744784059e-05, |
| "loss": 0.0565, |
| "step": 9460 |
| }, |
| { |
| "epoch": 19.811715481171547, |
| "grad_norm": 0.2226765900850296, |
| "learning_rate": 3.2788551090371164e-05, |
| "loss": 0.0428, |
| "step": 9470 |
| }, |
| { |
| "epoch": 19.8326359832636, |
| "grad_norm": 0.23749548196792603, |
| "learning_rate": 3.268509838700974e-05, |
| "loss": 0.0531, |
| "step": 9480 |
| }, |
| { |
| "epoch": 19.85355648535565, |
| "grad_norm": 0.17291848361492157, |
| "learning_rate": 3.258172984057535e-05, |
| "loss": 0.0521, |
| "step": 9490 |
| }, |
| { |
| "epoch": 19.8744769874477, |
| "grad_norm": 0.23719993233680725, |
| "learning_rate": 3.247844595347798e-05, |
| "loss": 0.0465, |
| "step": 9500 |
| }, |
| { |
| "epoch": 19.89539748953975, |
| "grad_norm": 0.19163990020751953, |
| "learning_rate": 3.2375247227716077e-05, |
| "loss": 0.0527, |
| "step": 9510 |
| }, |
| { |
| "epoch": 19.9163179916318, |
| "grad_norm": 0.20488514006137848, |
| "learning_rate": 3.2272134164874264e-05, |
| "loss": 0.053, |
| "step": 9520 |
| }, |
| { |
| "epoch": 19.93723849372385, |
| "grad_norm": 0.1729532778263092, |
| "learning_rate": 3.216910726612073e-05, |
| "loss": 0.0451, |
| "step": 9530 |
| }, |
| { |
| "epoch": 19.9581589958159, |
| "grad_norm": 0.20507602393627167, |
| "learning_rate": 3.2066167032204956e-05, |
| "loss": 0.0494, |
| "step": 9540 |
| }, |
| { |
| "epoch": 19.97907949790795, |
| "grad_norm": 0.20738042891025543, |
| "learning_rate": 3.196331396345512e-05, |
| "loss": 0.0478, |
| "step": 9550 |
| }, |
| { |
| "epoch": 20.0, |
| "grad_norm": 0.16895107924938202, |
| "learning_rate": 3.186054855977577e-05, |
| "loss": 0.0479, |
| "step": 9560 |
| }, |
| { |
| "epoch": 20.02092050209205, |
| "grad_norm": 0.1991293877363205, |
| "learning_rate": 3.175787132064542e-05, |
| "loss": 0.0488, |
| "step": 9570 |
| }, |
| { |
| "epoch": 20.0418410041841, |
| "grad_norm": 0.21318255364894867, |
| "learning_rate": 3.165528274511397e-05, |
| "loss": 0.0533, |
| "step": 9580 |
| }, |
| { |
| "epoch": 20.06276150627615, |
| "grad_norm": 0.2068648636341095, |
| "learning_rate": 3.155278333180047e-05, |
| "loss": 0.0509, |
| "step": 9590 |
| }, |
| { |
| "epoch": 20.0836820083682, |
| "grad_norm": 0.18824225664138794, |
| "learning_rate": 3.14503735788906e-05, |
| "loss": 0.0499, |
| "step": 9600 |
| }, |
| { |
| "epoch": 20.10460251046025, |
| "grad_norm": 0.2116539180278778, |
| "learning_rate": 3.134805398413419e-05, |
| "loss": 0.0426, |
| "step": 9610 |
| }, |
| { |
| "epoch": 20.1255230125523, |
| "grad_norm": 0.21344134211540222, |
| "learning_rate": 3.1245825044842954e-05, |
| "loss": 0.045, |
| "step": 9620 |
| }, |
| { |
| "epoch": 20.14644351464435, |
| "grad_norm": 0.1412409096956253, |
| "learning_rate": 3.114368725788791e-05, |
| "loss": 0.0534, |
| "step": 9630 |
| }, |
| { |
| "epoch": 20.1673640167364, |
| "grad_norm": 0.25958654284477234, |
| "learning_rate": 3.1041641119697075e-05, |
| "loss": 0.0496, |
| "step": 9640 |
| }, |
| { |
| "epoch": 20.188284518828453, |
| "grad_norm": 0.2157263457775116, |
| "learning_rate": 3.093968712625306e-05, |
| "loss": 0.054, |
| "step": 9650 |
| }, |
| { |
| "epoch": 20.209205020920503, |
| "grad_norm": 0.15186910331249237, |
| "learning_rate": 3.0837825773090535e-05, |
| "loss": 0.0419, |
| "step": 9660 |
| }, |
| { |
| "epoch": 20.230125523012553, |
| "grad_norm": 0.18805916607379913, |
| "learning_rate": 3.073605755529395e-05, |
| "loss": 0.0478, |
| "step": 9670 |
| }, |
| { |
| "epoch": 20.251046025104603, |
| "grad_norm": 0.2179500162601471, |
| "learning_rate": 3.063438296749511e-05, |
| "loss": 0.0468, |
| "step": 9680 |
| }, |
| { |
| "epoch": 20.271966527196653, |
| "grad_norm": 0.18823978304862976, |
| "learning_rate": 3.053280250387067e-05, |
| "loss": 0.0465, |
| "step": 9690 |
| }, |
| { |
| "epoch": 20.292887029288703, |
| "grad_norm": 0.20382045209407806, |
| "learning_rate": 3.043131665813988e-05, |
| "loss": 0.0499, |
| "step": 9700 |
| }, |
| { |
| "epoch": 20.313807531380753, |
| "grad_norm": 0.1649629771709442, |
| "learning_rate": 3.0329925923562073e-05, |
| "loss": 0.0428, |
| "step": 9710 |
| }, |
| { |
| "epoch": 20.334728033472803, |
| "grad_norm": 0.19102589786052704, |
| "learning_rate": 3.0228630792934277e-05, |
| "loss": 0.0444, |
| "step": 9720 |
| }, |
| { |
| "epoch": 20.355648535564853, |
| "grad_norm": 0.21547861397266388, |
| "learning_rate": 3.0127431758588918e-05, |
| "loss": 0.0547, |
| "step": 9730 |
| }, |
| { |
| "epoch": 20.376569037656903, |
| "grad_norm": 0.2640725374221802, |
| "learning_rate": 3.002632931239133e-05, |
| "loss": 0.0421, |
| "step": 9740 |
| }, |
| { |
| "epoch": 20.397489539748953, |
| "grad_norm": 0.19886314868927002, |
| "learning_rate": 2.992532394573735e-05, |
| "loss": 0.0492, |
| "step": 9750 |
| }, |
| { |
| "epoch": 20.418410041841003, |
| "grad_norm": 0.17059046030044556, |
| "learning_rate": 2.982441614955105e-05, |
| "loss": 0.0444, |
| "step": 9760 |
| }, |
| { |
| "epoch": 20.439330543933053, |
| "grad_norm": 0.20661625266075134, |
| "learning_rate": 2.972360641428218e-05, |
| "loss": 0.0457, |
| "step": 9770 |
| }, |
| { |
| "epoch": 20.460251046025103, |
| "grad_norm": 0.19825486838817596, |
| "learning_rate": 2.9622895229903973e-05, |
| "loss": 0.0438, |
| "step": 9780 |
| }, |
| { |
| "epoch": 20.481171548117153, |
| "grad_norm": 0.25336378812789917, |
| "learning_rate": 2.9522283085910612e-05, |
| "loss": 0.0461, |
| "step": 9790 |
| }, |
| { |
| "epoch": 20.502092050209207, |
| "grad_norm": 0.22274994850158691, |
| "learning_rate": 2.942177047131489e-05, |
| "loss": 0.0426, |
| "step": 9800 |
| }, |
| { |
| "epoch": 20.523012552301257, |
| "grad_norm": 0.1447426825761795, |
| "learning_rate": 2.9321357874645905e-05, |
| "loss": 0.0518, |
| "step": 9810 |
| }, |
| { |
| "epoch": 20.543933054393307, |
| "grad_norm": 0.23118385672569275, |
| "learning_rate": 2.9221045783946577e-05, |
| "loss": 0.049, |
| "step": 9820 |
| }, |
| { |
| "epoch": 20.564853556485357, |
| "grad_norm": 0.17135083675384521, |
| "learning_rate": 2.9120834686771394e-05, |
| "loss": 0.0423, |
| "step": 9830 |
| }, |
| { |
| "epoch": 20.585774058577407, |
| "grad_norm": 0.19719673693180084, |
| "learning_rate": 2.902072507018392e-05, |
| "loss": 0.0426, |
| "step": 9840 |
| }, |
| { |
| "epoch": 20.606694560669457, |
| "grad_norm": 0.1972011774778366, |
| "learning_rate": 2.892071742075446e-05, |
| "loss": 0.0558, |
| "step": 9850 |
| }, |
| { |
| "epoch": 20.627615062761507, |
| "grad_norm": 0.1997687965631485, |
| "learning_rate": 2.8820812224557812e-05, |
| "loss": 0.0511, |
| "step": 9860 |
| }, |
| { |
| "epoch": 20.648535564853557, |
| "grad_norm": 0.18193860352039337, |
| "learning_rate": 2.8721009967170764e-05, |
| "loss": 0.0521, |
| "step": 9870 |
| }, |
| { |
| "epoch": 20.669456066945607, |
| "grad_norm": 0.13904160261154175, |
| "learning_rate": 2.8621311133669748e-05, |
| "loss": 0.0476, |
| "step": 9880 |
| }, |
| { |
| "epoch": 20.690376569037657, |
| "grad_norm": 0.19542591273784637, |
| "learning_rate": 2.8521716208628595e-05, |
| "loss": 0.0433, |
| "step": 9890 |
| }, |
| { |
| "epoch": 20.711297071129707, |
| "grad_norm": 0.25673067569732666, |
| "learning_rate": 2.8422225676116015e-05, |
| "loss": 0.0482, |
| "step": 9900 |
| }, |
| { |
| "epoch": 20.732217573221757, |
| "grad_norm": 0.193390354514122, |
| "learning_rate": 2.832284001969342e-05, |
| "loss": 0.0483, |
| "step": 9910 |
| }, |
| { |
| "epoch": 20.753138075313807, |
| "grad_norm": 0.2549495995044708, |
| "learning_rate": 2.8223559722412408e-05, |
| "loss": 0.0438, |
| "step": 9920 |
| }, |
| { |
| "epoch": 20.774058577405857, |
| "grad_norm": 0.20687030255794525, |
| "learning_rate": 2.8124385266812516e-05, |
| "loss": 0.0497, |
| "step": 9930 |
| }, |
| { |
| "epoch": 20.794979079497907, |
| "grad_norm": 0.20353694260120392, |
| "learning_rate": 2.802531713491886e-05, |
| "loss": 0.0508, |
| "step": 9940 |
| }, |
| { |
| "epoch": 20.815899581589957, |
| "grad_norm": 0.1879122108221054, |
| "learning_rate": 2.7926355808239822e-05, |
| "loss": 0.0421, |
| "step": 9950 |
| }, |
| { |
| "epoch": 20.836820083682007, |
| "grad_norm": 0.2186966836452484, |
| "learning_rate": 2.782750176776458e-05, |
| "loss": 0.0539, |
| "step": 9960 |
| }, |
| { |
| "epoch": 20.85774058577406, |
| "grad_norm": 0.22850190103054047, |
| "learning_rate": 2.7728755493960946e-05, |
| "loss": 0.0525, |
| "step": 9970 |
| }, |
| { |
| "epoch": 20.87866108786611, |
| "grad_norm": 0.1749819964170456, |
| "learning_rate": 2.7630117466772876e-05, |
| "loss": 0.0436, |
| "step": 9980 |
| }, |
| { |
| "epoch": 20.89958158995816, |
| "grad_norm": 0.1613214612007141, |
| "learning_rate": 2.7531588165618278e-05, |
| "loss": 0.0492, |
| "step": 9990 |
| }, |
| { |
| "epoch": 20.92050209205021, |
| "grad_norm": 0.18235832452774048, |
| "learning_rate": 2.7433168069386533e-05, |
| "loss": 0.0448, |
| "step": 10000 |
| }, |
| { |
| "epoch": 20.94142259414226, |
| "grad_norm": 0.18606573343276978, |
| "learning_rate": 2.7334857656436308e-05, |
| "loss": 0.0483, |
| "step": 10010 |
| }, |
| { |
| "epoch": 20.96234309623431, |
| "grad_norm": 0.15410400927066803, |
| "learning_rate": 2.7236657404593157e-05, |
| "loss": 0.0471, |
| "step": 10020 |
| }, |
| { |
| "epoch": 20.98326359832636, |
| "grad_norm": 0.18072716891765594, |
| "learning_rate": 2.713856779114716e-05, |
| "loss": 0.0477, |
| "step": 10030 |
| }, |
| { |
| "epoch": 21.00418410041841, |
| "grad_norm": 0.21399426460266113, |
| "learning_rate": 2.704058929285074e-05, |
| "loss": 0.0455, |
| "step": 10040 |
| }, |
| { |
| "epoch": 21.02510460251046, |
| "grad_norm": 0.20552700757980347, |
| "learning_rate": 2.6942722385916175e-05, |
| "loss": 0.0476, |
| "step": 10050 |
| }, |
| { |
| "epoch": 21.04602510460251, |
| "grad_norm": 0.23852410912513733, |
| "learning_rate": 2.6844967546013394e-05, |
| "loss": 0.0514, |
| "step": 10060 |
| }, |
| { |
| "epoch": 21.06694560669456, |
| "grad_norm": 0.15994329750537872, |
| "learning_rate": 2.6747325248267673e-05, |
| "loss": 0.042, |
| "step": 10070 |
| }, |
| { |
| "epoch": 21.08786610878661, |
| "grad_norm": 0.1782161295413971, |
| "learning_rate": 2.664979596725724e-05, |
| "loss": 0.0485, |
| "step": 10080 |
| }, |
| { |
| "epoch": 21.10878661087866, |
| "grad_norm": 0.1394461840391159, |
| "learning_rate": 2.655238017701105e-05, |
| "loss": 0.0459, |
| "step": 10090 |
| }, |
| { |
| "epoch": 21.12970711297071, |
| "grad_norm": 0.20787139236927032, |
| "learning_rate": 2.6455078351006455e-05, |
| "loss": 0.0503, |
| "step": 10100 |
| }, |
| { |
| "epoch": 21.15062761506276, |
| "grad_norm": 0.15584444999694824, |
| "learning_rate": 2.6357890962166866e-05, |
| "loss": 0.0388, |
| "step": 10110 |
| }, |
| { |
| "epoch": 21.171548117154813, |
| "grad_norm": 0.24930870532989502, |
| "learning_rate": 2.6260818482859534e-05, |
| "loss": 0.0497, |
| "step": 10120 |
| }, |
| { |
| "epoch": 21.192468619246863, |
| "grad_norm": 0.19874218106269836, |
| "learning_rate": 2.6163861384893156e-05, |
| "loss": 0.0484, |
| "step": 10130 |
| }, |
| { |
| "epoch": 21.213389121338913, |
| "grad_norm": 0.2356872707605362, |
| "learning_rate": 2.606702013951564e-05, |
| "loss": 0.0449, |
| "step": 10140 |
| }, |
| { |
| "epoch": 21.234309623430963, |
| "grad_norm": 0.22813639044761658, |
| "learning_rate": 2.5970295217411844e-05, |
| "loss": 0.0461, |
| "step": 10150 |
| }, |
| { |
| "epoch": 21.255230125523013, |
| "grad_norm": 0.25937241315841675, |
| "learning_rate": 2.5873687088701236e-05, |
| "loss": 0.0437, |
| "step": 10160 |
| }, |
| { |
| "epoch": 21.276150627615063, |
| "grad_norm": 0.23558540642261505, |
| "learning_rate": 2.5777196222935596e-05, |
| "loss": 0.0445, |
| "step": 10170 |
| }, |
| { |
| "epoch": 21.297071129707113, |
| "grad_norm": 0.18446467816829681, |
| "learning_rate": 2.5680823089096807e-05, |
| "loss": 0.0441, |
| "step": 10180 |
| }, |
| { |
| "epoch": 21.317991631799163, |
| "grad_norm": 0.1600533127784729, |
| "learning_rate": 2.558456815559448e-05, |
| "loss": 0.0459, |
| "step": 10190 |
| }, |
| { |
| "epoch": 21.338912133891213, |
| "grad_norm": 0.18886277079582214, |
| "learning_rate": 2.548843189026378e-05, |
| "loss": 0.0468, |
| "step": 10200 |
| }, |
| { |
| "epoch": 21.359832635983263, |
| "grad_norm": 0.2278919667005539, |
| "learning_rate": 2.5392414760363048e-05, |
| "loss": 0.047, |
| "step": 10210 |
| }, |
| { |
| "epoch": 21.380753138075313, |
| "grad_norm": 0.23357948660850525, |
| "learning_rate": 2.529651723257162e-05, |
| "loss": 0.0468, |
| "step": 10220 |
| }, |
| { |
| "epoch": 21.401673640167363, |
| "grad_norm": 0.20221158862113953, |
| "learning_rate": 2.5200739772987537e-05, |
| "loss": 0.049, |
| "step": 10230 |
| }, |
| { |
| "epoch": 21.422594142259413, |
| "grad_norm": 0.18963316082954407, |
| "learning_rate": 2.5105082847125184e-05, |
| "loss": 0.0413, |
| "step": 10240 |
| }, |
| { |
| "epoch": 21.443514644351463, |
| "grad_norm": 0.17294231057167053, |
| "learning_rate": 2.5009546919913218e-05, |
| "loss": 0.0472, |
| "step": 10250 |
| }, |
| { |
| "epoch": 21.464435146443513, |
| "grad_norm": 0.21104329824447632, |
| "learning_rate": 2.4914132455692098e-05, |
| "loss": 0.0418, |
| "step": 10260 |
| }, |
| { |
| "epoch": 21.485355648535563, |
| "grad_norm": 0.17008601129055023, |
| "learning_rate": 2.4818839918211962e-05, |
| "loss": 0.0385, |
| "step": 10270 |
| }, |
| { |
| "epoch": 21.506276150627617, |
| "grad_norm": 0.1992831528186798, |
| "learning_rate": 2.4723669770630376e-05, |
| "loss": 0.0496, |
| "step": 10280 |
| }, |
| { |
| "epoch": 21.527196652719667, |
| "grad_norm": 0.2100939303636551, |
| "learning_rate": 2.4628622475509972e-05, |
| "loss": 0.0482, |
| "step": 10290 |
| }, |
| { |
| "epoch": 21.548117154811717, |
| "grad_norm": 0.21112582087516785, |
| "learning_rate": 2.4533698494816342e-05, |
| "loss": 0.0559, |
| "step": 10300 |
| }, |
| { |
| "epoch": 21.569037656903767, |
| "grad_norm": 0.21962668001651764, |
| "learning_rate": 2.44388982899157e-05, |
| "loss": 0.0542, |
| "step": 10310 |
| }, |
| { |
| "epoch": 21.589958158995817, |
| "grad_norm": 0.16883806884288788, |
| "learning_rate": 2.4344222321572636e-05, |
| "loss": 0.0461, |
| "step": 10320 |
| }, |
| { |
| "epoch": 21.610878661087867, |
| "grad_norm": 0.21127848327159882, |
| "learning_rate": 2.4249671049947954e-05, |
| "loss": 0.0448, |
| "step": 10330 |
| }, |
| { |
| "epoch": 21.631799163179917, |
| "grad_norm": 0.15680480003356934, |
| "learning_rate": 2.4155244934596333e-05, |
| "loss": 0.0465, |
| "step": 10340 |
| }, |
| { |
| "epoch": 21.652719665271967, |
| "grad_norm": 0.17642885446548462, |
| "learning_rate": 2.406094443446416e-05, |
| "loss": 0.0422, |
| "step": 10350 |
| }, |
| { |
| "epoch": 21.673640167364017, |
| "grad_norm": 0.1920289397239685, |
| "learning_rate": 2.3966770007887317e-05, |
| "loss": 0.0438, |
| "step": 10360 |
| }, |
| { |
| "epoch": 21.694560669456067, |
| "grad_norm": 0.15706102550029755, |
| "learning_rate": 2.3872722112588903e-05, |
| "loss": 0.0404, |
| "step": 10370 |
| }, |
| { |
| "epoch": 21.715481171548117, |
| "grad_norm": 0.2093636691570282, |
| "learning_rate": 2.3778801205676997e-05, |
| "loss": 0.0381, |
| "step": 10380 |
| }, |
| { |
| "epoch": 21.736401673640167, |
| "grad_norm": 0.2329166829586029, |
| "learning_rate": 2.3685007743642524e-05, |
| "loss": 0.0518, |
| "step": 10390 |
| }, |
| { |
| "epoch": 21.757322175732217, |
| "grad_norm": 0.1390232890844345, |
| "learning_rate": 2.3591342182356914e-05, |
| "loss": 0.0468, |
| "step": 10400 |
| }, |
| { |
| "epoch": 21.778242677824267, |
| "grad_norm": 0.32355770468711853, |
| "learning_rate": 2.3497804977070016e-05, |
| "loss": 0.0446, |
| "step": 10410 |
| }, |
| { |
| "epoch": 21.799163179916317, |
| "grad_norm": 0.16483452916145325, |
| "learning_rate": 2.3404396582407777e-05, |
| "loss": 0.0502, |
| "step": 10420 |
| }, |
| { |
| "epoch": 21.820083682008367, |
| "grad_norm": 0.17312510311603546, |
| "learning_rate": 2.331111745237007e-05, |
| "loss": 0.0416, |
| "step": 10430 |
| }, |
| { |
| "epoch": 21.84100418410042, |
| "grad_norm": 0.17626938223838806, |
| "learning_rate": 2.3217968040328526e-05, |
| "loss": 0.0499, |
| "step": 10440 |
| }, |
| { |
| "epoch": 21.86192468619247, |
| "grad_norm": 0.2215689867734909, |
| "learning_rate": 2.3124948799024286e-05, |
| "loss": 0.0413, |
| "step": 10450 |
| }, |
| { |
| "epoch": 21.88284518828452, |
| "grad_norm": 0.28537923097610474, |
| "learning_rate": 2.3032060180565828e-05, |
| "loss": 0.0505, |
| "step": 10460 |
| }, |
| { |
| "epoch": 21.90376569037657, |
| "grad_norm": 0.21920333802700043, |
| "learning_rate": 2.2939302636426724e-05, |
| "loss": 0.0424, |
| "step": 10470 |
| }, |
| { |
| "epoch": 21.92468619246862, |
| "grad_norm": 0.2144719809293747, |
| "learning_rate": 2.2846676617443458e-05, |
| "loss": 0.0403, |
| "step": 10480 |
| }, |
| { |
| "epoch": 21.94560669456067, |
| "grad_norm": 0.16767054796218872, |
| "learning_rate": 2.275418257381332e-05, |
| "loss": 0.0435, |
| "step": 10490 |
| }, |
| { |
| "epoch": 21.96652719665272, |
| "grad_norm": 0.1883849948644638, |
| "learning_rate": 2.2661820955092083e-05, |
| "loss": 0.0483, |
| "step": 10500 |
| }, |
| { |
| "epoch": 21.98744769874477, |
| "grad_norm": 0.19924281537532806, |
| "learning_rate": 2.256959221019193e-05, |
| "loss": 0.0445, |
| "step": 10510 |
| }, |
| { |
| "epoch": 22.00836820083682, |
| "grad_norm": 0.22506235539913177, |
| "learning_rate": 2.2477496787379227e-05, |
| "loss": 0.0548, |
| "step": 10520 |
| }, |
| { |
| "epoch": 22.02928870292887, |
| "grad_norm": 0.24542684853076935, |
| "learning_rate": 2.238553513427229e-05, |
| "loss": 0.0482, |
| "step": 10530 |
| }, |
| { |
| "epoch": 22.05020920502092, |
| "grad_norm": 0.22342945635318756, |
| "learning_rate": 2.2293707697839344e-05, |
| "loss": 0.0516, |
| "step": 10540 |
| }, |
| { |
| "epoch": 22.07112970711297, |
| "grad_norm": 0.2504042685031891, |
| "learning_rate": 2.2202014924396214e-05, |
| "loss": 0.0506, |
| "step": 10550 |
| }, |
| { |
| "epoch": 22.09205020920502, |
| "grad_norm": 0.1865241527557373, |
| "learning_rate": 2.21104572596042e-05, |
| "loss": 0.0399, |
| "step": 10560 |
| }, |
| { |
| "epoch": 22.11297071129707, |
| "grad_norm": 0.16631072759628296, |
| "learning_rate": 2.2019035148468e-05, |
| "loss": 0.0415, |
| "step": 10570 |
| }, |
| { |
| "epoch": 22.13389121338912, |
| "grad_norm": 0.21772721409797668, |
| "learning_rate": 2.1927749035333374e-05, |
| "loss": 0.0453, |
| "step": 10580 |
| }, |
| { |
| "epoch": 22.15481171548117, |
| "grad_norm": 0.18731866776943207, |
| "learning_rate": 2.1836599363885152e-05, |
| "loss": 0.0435, |
| "step": 10590 |
| }, |
| { |
| "epoch": 22.175732217573223, |
| "grad_norm": 0.21080322563648224, |
| "learning_rate": 2.1745586577144993e-05, |
| "loss": 0.0437, |
| "step": 10600 |
| }, |
| { |
| "epoch": 22.196652719665273, |
| "grad_norm": 0.18895407021045685, |
| "learning_rate": 2.1654711117469207e-05, |
| "loss": 0.0455, |
| "step": 10610 |
| }, |
| { |
| "epoch": 22.217573221757323, |
| "grad_norm": 0.2682255506515503, |
| "learning_rate": 2.1563973426546702e-05, |
| "loss": 0.0471, |
| "step": 10620 |
| }, |
| { |
| "epoch": 22.238493723849373, |
| "grad_norm": 0.2073366940021515, |
| "learning_rate": 2.1473373945396728e-05, |
| "loss": 0.0432, |
| "step": 10630 |
| }, |
| { |
| "epoch": 22.259414225941423, |
| "grad_norm": 0.1627686470746994, |
| "learning_rate": 2.138291311436679e-05, |
| "loss": 0.0451, |
| "step": 10640 |
| }, |
| { |
| "epoch": 22.280334728033473, |
| "grad_norm": 0.14305227994918823, |
| "learning_rate": 2.1292591373130518e-05, |
| "loss": 0.045, |
| "step": 10650 |
| }, |
| { |
| "epoch": 22.301255230125523, |
| "grad_norm": 0.18689580261707306, |
| "learning_rate": 2.1202409160685528e-05, |
| "loss": 0.0426, |
| "step": 10660 |
| }, |
| { |
| "epoch": 22.322175732217573, |
| "grad_norm": 0.16318003833293915, |
| "learning_rate": 2.1112366915351228e-05, |
| "loss": 0.0403, |
| "step": 10670 |
| }, |
| { |
| "epoch": 22.343096234309623, |
| "grad_norm": 0.1883779615163803, |
| "learning_rate": 2.102246507476679e-05, |
| "loss": 0.0459, |
| "step": 10680 |
| }, |
| { |
| "epoch": 22.364016736401673, |
| "grad_norm": 0.20092038810253143, |
| "learning_rate": 2.09327040758889e-05, |
| "loss": 0.0448, |
| "step": 10690 |
| }, |
| { |
| "epoch": 22.384937238493723, |
| "grad_norm": 0.19910837709903717, |
| "learning_rate": 2.0843084354989767e-05, |
| "loss": 0.0456, |
| "step": 10700 |
| }, |
| { |
| "epoch": 22.405857740585773, |
| "grad_norm": 0.1743541806936264, |
| "learning_rate": 2.0753606347654892e-05, |
| "loss": 0.0469, |
| "step": 10710 |
| }, |
| { |
| "epoch": 22.426778242677823, |
| "grad_norm": 0.21876223385334015, |
| "learning_rate": 2.0664270488780985e-05, |
| "loss": 0.0439, |
| "step": 10720 |
| }, |
| { |
| "epoch": 22.447698744769873, |
| "grad_norm": 0.2005428522825241, |
| "learning_rate": 2.0575077212573905e-05, |
| "loss": 0.0445, |
| "step": 10730 |
| }, |
| { |
| "epoch": 22.468619246861923, |
| "grad_norm": 0.158347949385643, |
| "learning_rate": 2.0486026952546484e-05, |
| "loss": 0.0465, |
| "step": 10740 |
| }, |
| { |
| "epoch": 22.489539748953973, |
| "grad_norm": 0.16677257418632507, |
| "learning_rate": 2.0397120141516457e-05, |
| "loss": 0.0402, |
| "step": 10750 |
| }, |
| { |
| "epoch": 22.510460251046027, |
| "grad_norm": 0.1984304040670395, |
| "learning_rate": 2.0308357211604313e-05, |
| "loss": 0.0451, |
| "step": 10760 |
| }, |
| { |
| "epoch": 22.531380753138077, |
| "grad_norm": 0.20393149554729462, |
| "learning_rate": 2.0219738594231224e-05, |
| "loss": 0.0436, |
| "step": 10770 |
| }, |
| { |
| "epoch": 22.552301255230127, |
| "grad_norm": 0.1970740109682083, |
| "learning_rate": 2.0131264720116993e-05, |
| "loss": 0.0432, |
| "step": 10780 |
| }, |
| { |
| "epoch": 22.573221757322177, |
| "grad_norm": 0.2895578444004059, |
| "learning_rate": 2.0042936019277853e-05, |
| "loss": 0.0475, |
| "step": 10790 |
| }, |
| { |
| "epoch": 22.594142259414227, |
| "grad_norm": 0.20197248458862305, |
| "learning_rate": 1.99547529210245e-05, |
| "loss": 0.0458, |
| "step": 10800 |
| }, |
| { |
| "epoch": 22.615062761506277, |
| "grad_norm": 0.22795496881008148, |
| "learning_rate": 1.9866715853959934e-05, |
| "loss": 0.0444, |
| "step": 10810 |
| }, |
| { |
| "epoch": 22.635983263598327, |
| "grad_norm": 0.18115581572055817, |
| "learning_rate": 1.977882524597734e-05, |
| "loss": 0.0434, |
| "step": 10820 |
| }, |
| { |
| "epoch": 22.656903765690377, |
| "grad_norm": 0.22426635026931763, |
| "learning_rate": 1.969108152425813e-05, |
| "loss": 0.0477, |
| "step": 10830 |
| }, |
| { |
| "epoch": 22.677824267782427, |
| "grad_norm": 0.20142389833927155, |
| "learning_rate": 1.9603485115269744e-05, |
| "loss": 0.0497, |
| "step": 10840 |
| }, |
| { |
| "epoch": 22.698744769874477, |
| "grad_norm": 0.19367477297782898, |
| "learning_rate": 1.9516036444763613e-05, |
| "loss": 0.0465, |
| "step": 10850 |
| }, |
| { |
| "epoch": 22.719665271966527, |
| "grad_norm": 0.29349711537361145, |
| "learning_rate": 1.9428735937773173e-05, |
| "loss": 0.0481, |
| "step": 10860 |
| }, |
| { |
| "epoch": 22.740585774058577, |
| "grad_norm": 0.11351630091667175, |
| "learning_rate": 1.9341584018611646e-05, |
| "loss": 0.0425, |
| "step": 10870 |
| }, |
| { |
| "epoch": 22.761506276150627, |
| "grad_norm": 0.18608035147190094, |
| "learning_rate": 1.9254581110870123e-05, |
| "loss": 0.0402, |
| "step": 10880 |
| }, |
| { |
| "epoch": 22.782426778242677, |
| "grad_norm": 0.14883318543434143, |
| "learning_rate": 1.916772763741544e-05, |
| "loss": 0.0396, |
| "step": 10890 |
| }, |
| { |
| "epoch": 22.803347280334727, |
| "grad_norm": 0.21729174256324768, |
| "learning_rate": 1.908102402038807e-05, |
| "loss": 0.041, |
| "step": 10900 |
| }, |
| { |
| "epoch": 22.824267782426777, |
| "grad_norm": 0.18119406700134277, |
| "learning_rate": 1.8994470681200204e-05, |
| "loss": 0.0388, |
| "step": 10910 |
| }, |
| { |
| "epoch": 22.84518828451883, |
| "grad_norm": 0.2152002453804016, |
| "learning_rate": 1.8908068040533578e-05, |
| "loss": 0.049, |
| "step": 10920 |
| }, |
| { |
| "epoch": 22.86610878661088, |
| "grad_norm": 0.25034478306770325, |
| "learning_rate": 1.8821816518337455e-05, |
| "loss": 0.048, |
| "step": 10930 |
| }, |
| { |
| "epoch": 22.88702928870293, |
| "grad_norm": 0.17609989643096924, |
| "learning_rate": 1.8735716533826663e-05, |
| "loss": 0.0415, |
| "step": 10940 |
| }, |
| { |
| "epoch": 22.90794979079498, |
| "grad_norm": 0.2574017643928528, |
| "learning_rate": 1.8649768505479476e-05, |
| "loss": 0.0494, |
| "step": 10950 |
| }, |
| { |
| "epoch": 22.92887029288703, |
| "grad_norm": 0.24520331621170044, |
| "learning_rate": 1.8563972851035616e-05, |
| "loss": 0.0454, |
| "step": 10960 |
| }, |
| { |
| "epoch": 22.94979079497908, |
| "grad_norm": 0.22302265465259552, |
| "learning_rate": 1.847832998749418e-05, |
| "loss": 0.0454, |
| "step": 10970 |
| }, |
| { |
| "epoch": 22.97071129707113, |
| "grad_norm": 0.2167111337184906, |
| "learning_rate": 1.8392840331111644e-05, |
| "loss": 0.0419, |
| "step": 10980 |
| }, |
| { |
| "epoch": 22.99163179916318, |
| "grad_norm": 0.1871531903743744, |
| "learning_rate": 1.830750429739989e-05, |
| "loss": 0.0439, |
| "step": 10990 |
| }, |
| { |
| "epoch": 23.01255230125523, |
| "grad_norm": 0.18190360069274902, |
| "learning_rate": 1.822232230112409e-05, |
| "loss": 0.0559, |
| "step": 11000 |
| }, |
| { |
| "epoch": 23.03347280334728, |
| "grad_norm": 0.16407927870750427, |
| "learning_rate": 1.813729475630071e-05, |
| "loss": 0.0465, |
| "step": 11010 |
| }, |
| { |
| "epoch": 23.05439330543933, |
| "grad_norm": 0.20566906034946442, |
| "learning_rate": 1.8052422076195635e-05, |
| "loss": 0.0553, |
| "step": 11020 |
| }, |
| { |
| "epoch": 23.07531380753138, |
| "grad_norm": 0.2527928054332733, |
| "learning_rate": 1.7967704673321918e-05, |
| "loss": 0.0456, |
| "step": 11030 |
| }, |
| { |
| "epoch": 23.09623430962343, |
| "grad_norm": 0.28300896286964417, |
| "learning_rate": 1.7883142959438004e-05, |
| "loss": 0.0472, |
| "step": 11040 |
| }, |
| { |
| "epoch": 23.11715481171548, |
| "grad_norm": 0.21645322442054749, |
| "learning_rate": 1.779873734554558e-05, |
| "loss": 0.0527, |
| "step": 11050 |
| }, |
| { |
| "epoch": 23.13807531380753, |
| "grad_norm": 0.2419598549604416, |
| "learning_rate": 1.771448824188761e-05, |
| "loss": 0.0416, |
| "step": 11060 |
| }, |
| { |
| "epoch": 23.15899581589958, |
| "grad_norm": 0.1742696762084961, |
| "learning_rate": 1.763039605794644e-05, |
| "loss": 0.042, |
| "step": 11070 |
| }, |
| { |
| "epoch": 23.179916317991633, |
| "grad_norm": 0.1963113397359848, |
| "learning_rate": 1.754646120244164e-05, |
| "loss": 0.0472, |
| "step": 11080 |
| }, |
| { |
| "epoch": 23.200836820083683, |
| "grad_norm": 0.14322315156459808, |
| "learning_rate": 1.7462684083328144e-05, |
| "loss": 0.0459, |
| "step": 11090 |
| }, |
| { |
| "epoch": 23.221757322175733, |
| "grad_norm": 0.1473892629146576, |
| "learning_rate": 1.7379065107794262e-05, |
| "loss": 0.0484, |
| "step": 11100 |
| }, |
| { |
| "epoch": 23.242677824267783, |
| "grad_norm": 0.20052359998226166, |
| "learning_rate": 1.7295604682259586e-05, |
| "loss": 0.0504, |
| "step": 11110 |
| }, |
| { |
| "epoch": 23.263598326359833, |
| "grad_norm": 0.20940597355365753, |
| "learning_rate": 1.7212303212373175e-05, |
| "loss": 0.0444, |
| "step": 11120 |
| }, |
| { |
| "epoch": 23.284518828451883, |
| "grad_norm": 0.30127179622650146, |
| "learning_rate": 1.712916110301146e-05, |
| "loss": 0.0535, |
| "step": 11130 |
| }, |
| { |
| "epoch": 23.305439330543933, |
| "grad_norm": 0.1848253309726715, |
| "learning_rate": 1.7046178758276298e-05, |
| "loss": 0.0456, |
| "step": 11140 |
| }, |
| { |
| "epoch": 23.326359832635983, |
| "grad_norm": 0.175484761595726, |
| "learning_rate": 1.696335658149309e-05, |
| "loss": 0.0427, |
| "step": 11150 |
| }, |
| { |
| "epoch": 23.347280334728033, |
| "grad_norm": 0.23676013946533203, |
| "learning_rate": 1.6880694975208727e-05, |
| "loss": 0.0479, |
| "step": 11160 |
| }, |
| { |
| "epoch": 23.368200836820083, |
| "grad_norm": 0.21369418501853943, |
| "learning_rate": 1.6798194341189687e-05, |
| "loss": 0.0547, |
| "step": 11170 |
| }, |
| { |
| "epoch": 23.389121338912133, |
| "grad_norm": 0.21497584879398346, |
| "learning_rate": 1.671585508042003e-05, |
| "loss": 0.0414, |
| "step": 11180 |
| }, |
| { |
| "epoch": 23.410041841004183, |
| "grad_norm": 0.1943253129720688, |
| "learning_rate": 1.6633677593099483e-05, |
| "loss": 0.0438, |
| "step": 11190 |
| }, |
| { |
| "epoch": 23.430962343096233, |
| "grad_norm": 0.12377497553825378, |
| "learning_rate": 1.655166227864154e-05, |
| "loss": 0.0447, |
| "step": 11200 |
| }, |
| { |
| "epoch": 23.451882845188283, |
| "grad_norm": 0.23389364778995514, |
| "learning_rate": 1.6469809535671426e-05, |
| "loss": 0.0461, |
| "step": 11210 |
| }, |
| { |
| "epoch": 23.472803347280333, |
| "grad_norm": 0.19530461728572845, |
| "learning_rate": 1.638811976202421e-05, |
| "loss": 0.053, |
| "step": 11220 |
| }, |
| { |
| "epoch": 23.493723849372383, |
| "grad_norm": 0.2097453773021698, |
| "learning_rate": 1.6306593354742895e-05, |
| "loss": 0.0517, |
| "step": 11230 |
| }, |
| { |
| "epoch": 23.514644351464437, |
| "grad_norm": 0.20635390281677246, |
| "learning_rate": 1.6225230710076455e-05, |
| "loss": 0.0462, |
| "step": 11240 |
| }, |
| { |
| "epoch": 23.535564853556487, |
| "grad_norm": 0.25905895233154297, |
| "learning_rate": 1.6144032223477924e-05, |
| "loss": 0.0495, |
| "step": 11250 |
| }, |
| { |
| "epoch": 23.556485355648537, |
| "grad_norm": 0.18913106620311737, |
| "learning_rate": 1.606299828960243e-05, |
| "loss": 0.0462, |
| "step": 11260 |
| }, |
| { |
| "epoch": 23.577405857740587, |
| "grad_norm": 0.1762065887451172, |
| "learning_rate": 1.5982129302305337e-05, |
| "loss": 0.0446, |
| "step": 11270 |
| }, |
| { |
| "epoch": 23.598326359832637, |
| "grad_norm": 0.20014171302318573, |
| "learning_rate": 1.590142565464032e-05, |
| "loss": 0.0425, |
| "step": 11280 |
| }, |
| { |
| "epoch": 23.619246861924687, |
| "grad_norm": 0.16639460623264313, |
| "learning_rate": 1.5820887738857408e-05, |
| "loss": 0.0454, |
| "step": 11290 |
| }, |
| { |
| "epoch": 23.640167364016737, |
| "grad_norm": 0.15281502902507782, |
| "learning_rate": 1.5740515946401134e-05, |
| "loss": 0.0423, |
| "step": 11300 |
| }, |
| { |
| "epoch": 23.661087866108787, |
| "grad_norm": 0.2168046087026596, |
| "learning_rate": 1.5660310667908634e-05, |
| "loss": 0.0448, |
| "step": 11310 |
| }, |
| { |
| "epoch": 23.682008368200837, |
| "grad_norm": 0.19726291298866272, |
| "learning_rate": 1.5580272293207655e-05, |
| "loss": 0.0497, |
| "step": 11320 |
| }, |
| { |
| "epoch": 23.702928870292887, |
| "grad_norm": 0.16284000873565674, |
| "learning_rate": 1.5500401211314796e-05, |
| "loss": 0.0424, |
| "step": 11330 |
| }, |
| { |
| "epoch": 23.723849372384937, |
| "grad_norm": 0.24472852051258087, |
| "learning_rate": 1.542069781043351e-05, |
| "loss": 0.0427, |
| "step": 11340 |
| }, |
| { |
| "epoch": 23.744769874476987, |
| "grad_norm": 0.23519930243492126, |
| "learning_rate": 1.534116247795226e-05, |
| "loss": 0.0475, |
| "step": 11350 |
| }, |
| { |
| "epoch": 23.765690376569037, |
| "grad_norm": 0.16218726336956024, |
| "learning_rate": 1.526179560044267e-05, |
| "loss": 0.0442, |
| "step": 11360 |
| }, |
| { |
| "epoch": 23.786610878661087, |
| "grad_norm": 0.21716220676898956, |
| "learning_rate": 1.5182597563657552e-05, |
| "loss": 0.0456, |
| "step": 11370 |
| }, |
| { |
| "epoch": 23.807531380753137, |
| "grad_norm": 0.2158024162054062, |
| "learning_rate": 1.5103568752529135e-05, |
| "loss": 0.0396, |
| "step": 11380 |
| }, |
| { |
| "epoch": 23.828451882845187, |
| "grad_norm": 0.1851579248905182, |
| "learning_rate": 1.5024709551167142e-05, |
| "loss": 0.0467, |
| "step": 11390 |
| }, |
| { |
| "epoch": 23.84937238493724, |
| "grad_norm": 0.27738890051841736, |
| "learning_rate": 1.4946020342856898e-05, |
| "loss": 0.0429, |
| "step": 11400 |
| }, |
| { |
| "epoch": 23.87029288702929, |
| "grad_norm": 0.22943255305290222, |
| "learning_rate": 1.4867501510057546e-05, |
| "loss": 0.0396, |
| "step": 11410 |
| }, |
| { |
| "epoch": 23.89121338912134, |
| "grad_norm": 0.19552598893642426, |
| "learning_rate": 1.4789153434400094e-05, |
| "loss": 0.0393, |
| "step": 11420 |
| }, |
| { |
| "epoch": 23.91213389121339, |
| "grad_norm": 0.28375789523124695, |
| "learning_rate": 1.4710976496685614e-05, |
| "loss": 0.0458, |
| "step": 11430 |
| }, |
| { |
| "epoch": 23.93305439330544, |
| "grad_norm": 0.23132510483264923, |
| "learning_rate": 1.4632971076883406e-05, |
| "loss": 0.0447, |
| "step": 11440 |
| }, |
| { |
| "epoch": 23.95397489539749, |
| "grad_norm": 0.16944925487041473, |
| "learning_rate": 1.4555137554129117e-05, |
| "loss": 0.0493, |
| "step": 11450 |
| }, |
| { |
| "epoch": 23.97489539748954, |
| "grad_norm": 0.17942452430725098, |
| "learning_rate": 1.4477476306722925e-05, |
| "loss": 0.0426, |
| "step": 11460 |
| }, |
| { |
| "epoch": 23.99581589958159, |
| "grad_norm": 0.18083634972572327, |
| "learning_rate": 1.439998771212766e-05, |
| "loss": 0.0475, |
| "step": 11470 |
| }, |
| { |
| "epoch": 24.01673640167364, |
| "grad_norm": 0.19945523142814636, |
| "learning_rate": 1.4322672146966982e-05, |
| "loss": 0.0463, |
| "step": 11480 |
| }, |
| { |
| "epoch": 24.03765690376569, |
| "grad_norm": 0.15182597935199738, |
| "learning_rate": 1.4245529987023621e-05, |
| "loss": 0.051, |
| "step": 11490 |
| }, |
| { |
| "epoch": 24.05857740585774, |
| "grad_norm": 0.23692505061626434, |
| "learning_rate": 1.4168561607237436e-05, |
| "loss": 0.0475, |
| "step": 11500 |
| }, |
| { |
| "epoch": 24.07949790794979, |
| "grad_norm": 0.25118017196655273, |
| "learning_rate": 1.4091767381703657e-05, |
| "loss": 0.0381, |
| "step": 11510 |
| }, |
| { |
| "epoch": 24.10041841004184, |
| "grad_norm": 0.19620022177696228, |
| "learning_rate": 1.4015147683671087e-05, |
| "loss": 0.0438, |
| "step": 11520 |
| }, |
| { |
| "epoch": 24.12133891213389, |
| "grad_norm": 0.20508503913879395, |
| "learning_rate": 1.3938702885540239e-05, |
| "loss": 0.0407, |
| "step": 11530 |
| }, |
| { |
| "epoch": 24.14225941422594, |
| "grad_norm": 0.16729703545570374, |
| "learning_rate": 1.3862433358861576e-05, |
| "loss": 0.0379, |
| "step": 11540 |
| }, |
| { |
| "epoch": 24.16317991631799, |
| "grad_norm": 0.18478308618068695, |
| "learning_rate": 1.3786339474333636e-05, |
| "loss": 0.0469, |
| "step": 11550 |
| }, |
| { |
| "epoch": 24.184100418410043, |
| "grad_norm": 0.1481492966413498, |
| "learning_rate": 1.3710421601801265e-05, |
| "loss": 0.0434, |
| "step": 11560 |
| }, |
| { |
| "epoch": 24.205020920502093, |
| "grad_norm": 0.21265815198421478, |
| "learning_rate": 1.3634680110253883e-05, |
| "loss": 0.0493, |
| "step": 11570 |
| }, |
| { |
| "epoch": 24.225941422594143, |
| "grad_norm": 0.25307002663612366, |
| "learning_rate": 1.3559115367823556e-05, |
| "loss": 0.0485, |
| "step": 11580 |
| }, |
| { |
| "epoch": 24.246861924686193, |
| "grad_norm": 0.1825999766588211, |
| "learning_rate": 1.3483727741783342e-05, |
| "loss": 0.0499, |
| "step": 11590 |
| }, |
| { |
| "epoch": 24.267782426778243, |
| "grad_norm": 0.1724889725446701, |
| "learning_rate": 1.3408517598545444e-05, |
| "loss": 0.0397, |
| "step": 11600 |
| }, |
| { |
| "epoch": 24.288702928870293, |
| "grad_norm": 0.18095286190509796, |
| "learning_rate": 1.3333485303659381e-05, |
| "loss": 0.0427, |
| "step": 11610 |
| }, |
| { |
| "epoch": 24.309623430962343, |
| "grad_norm": 0.17512592673301697, |
| "learning_rate": 1.3258631221810331e-05, |
| "loss": 0.0429, |
| "step": 11620 |
| }, |
| { |
| "epoch": 24.330543933054393, |
| "grad_norm": 0.18383602797985077, |
| "learning_rate": 1.3183955716817232e-05, |
| "loss": 0.0433, |
| "step": 11630 |
| }, |
| { |
| "epoch": 24.351464435146443, |
| "grad_norm": 0.19692164659500122, |
| "learning_rate": 1.3109459151631076e-05, |
| "loss": 0.0437, |
| "step": 11640 |
| }, |
| { |
| "epoch": 24.372384937238493, |
| "grad_norm": 0.15425680577754974, |
| "learning_rate": 1.3035141888333202e-05, |
| "loss": 0.0427, |
| "step": 11650 |
| }, |
| { |
| "epoch": 24.393305439330543, |
| "grad_norm": 0.12884384393692017, |
| "learning_rate": 1.2961004288133388e-05, |
| "loss": 0.0422, |
| "step": 11660 |
| }, |
| { |
| "epoch": 24.414225941422593, |
| "grad_norm": 0.1541462391614914, |
| "learning_rate": 1.2887046711368245e-05, |
| "loss": 0.0418, |
| "step": 11670 |
| }, |
| { |
| "epoch": 24.435146443514643, |
| "grad_norm": 0.20649564266204834, |
| "learning_rate": 1.2813269517499399e-05, |
| "loss": 0.044, |
| "step": 11680 |
| }, |
| { |
| "epoch": 24.456066945606693, |
| "grad_norm": 0.19635066390037537, |
| "learning_rate": 1.273967306511169e-05, |
| "loss": 0.0409, |
| "step": 11690 |
| }, |
| { |
| "epoch": 24.476987447698743, |
| "grad_norm": 0.2694770097732544, |
| "learning_rate": 1.2666257711911566e-05, |
| "loss": 0.044, |
| "step": 11700 |
| }, |
| { |
| "epoch": 24.497907949790793, |
| "grad_norm": 0.16663697361946106, |
| "learning_rate": 1.2593023814725214e-05, |
| "loss": 0.04, |
| "step": 11710 |
| }, |
| { |
| "epoch": 24.518828451882847, |
| "grad_norm": 0.24324466288089752, |
| "learning_rate": 1.251997172949686e-05, |
| "loss": 0.0476, |
| "step": 11720 |
| }, |
| { |
| "epoch": 24.539748953974897, |
| "grad_norm": 0.20587030053138733, |
| "learning_rate": 1.2447101811287109e-05, |
| "loss": 0.0533, |
| "step": 11730 |
| }, |
| { |
| "epoch": 24.560669456066947, |
| "grad_norm": 0.17844031751155853, |
| "learning_rate": 1.237441441427114e-05, |
| "loss": 0.0373, |
| "step": 11740 |
| }, |
| { |
| "epoch": 24.581589958158997, |
| "grad_norm": 0.22329042851924896, |
| "learning_rate": 1.2301909891737018e-05, |
| "loss": 0.0386, |
| "step": 11750 |
| }, |
| { |
| "epoch": 24.602510460251047, |
| "grad_norm": 0.20262449979782104, |
| "learning_rate": 1.2229588596083957e-05, |
| "loss": 0.0459, |
| "step": 11760 |
| }, |
| { |
| "epoch": 24.623430962343097, |
| "grad_norm": 0.14917254447937012, |
| "learning_rate": 1.2157450878820608e-05, |
| "loss": 0.0395, |
| "step": 11770 |
| }, |
| { |
| "epoch": 24.644351464435147, |
| "grad_norm": 0.23771631717681885, |
| "learning_rate": 1.2085497090563407e-05, |
| "loss": 0.0519, |
| "step": 11780 |
| }, |
| { |
| "epoch": 24.665271966527197, |
| "grad_norm": 0.20867544412612915, |
| "learning_rate": 1.2013727581034783e-05, |
| "loss": 0.0411, |
| "step": 11790 |
| }, |
| { |
| "epoch": 24.686192468619247, |
| "grad_norm": 0.23529614508152008, |
| "learning_rate": 1.1942142699061498e-05, |
| "loss": 0.0444, |
| "step": 11800 |
| }, |
| { |
| "epoch": 24.707112970711297, |
| "grad_norm": 0.22313570976257324, |
| "learning_rate": 1.1870742792572992e-05, |
| "loss": 0.0439, |
| "step": 11810 |
| }, |
| { |
| "epoch": 24.728033472803347, |
| "grad_norm": 0.16360808908939362, |
| "learning_rate": 1.1799528208599637e-05, |
| "loss": 0.0467, |
| "step": 11820 |
| }, |
| { |
| "epoch": 24.748953974895397, |
| "grad_norm": 0.14838069677352905, |
| "learning_rate": 1.1728499293271079e-05, |
| "loss": 0.0429, |
| "step": 11830 |
| }, |
| { |
| "epoch": 24.769874476987447, |
| "grad_norm": 0.2394830286502838, |
| "learning_rate": 1.1657656391814509e-05, |
| "loss": 0.0481, |
| "step": 11840 |
| }, |
| { |
| "epoch": 24.790794979079497, |
| "grad_norm": 0.27211302518844604, |
| "learning_rate": 1.1586999848553043e-05, |
| "loss": 0.0455, |
| "step": 11850 |
| }, |
| { |
| "epoch": 24.811715481171547, |
| "grad_norm": 0.18247251212596893, |
| "learning_rate": 1.1516530006904053e-05, |
| "loss": 0.0397, |
| "step": 11860 |
| }, |
| { |
| "epoch": 24.8326359832636, |
| "grad_norm": 0.1879725158214569, |
| "learning_rate": 1.1446247209377403e-05, |
| "loss": 0.045, |
| "step": 11870 |
| }, |
| { |
| "epoch": 24.85355648535565, |
| "grad_norm": 0.21421553194522858, |
| "learning_rate": 1.1376151797573925e-05, |
| "loss": 0.0434, |
| "step": 11880 |
| }, |
| { |
| "epoch": 24.8744769874477, |
| "grad_norm": 0.18666699528694153, |
| "learning_rate": 1.1306244112183662e-05, |
| "loss": 0.0449, |
| "step": 11890 |
| }, |
| { |
| "epoch": 24.89539748953975, |
| "grad_norm": 0.2565382122993469, |
| "learning_rate": 1.1236524492984203e-05, |
| "loss": 0.0457, |
| "step": 11900 |
| }, |
| { |
| "epoch": 24.9163179916318, |
| "grad_norm": 0.16528093814849854, |
| "learning_rate": 1.116699327883911e-05, |
| "loss": 0.0391, |
| "step": 11910 |
| }, |
| { |
| "epoch": 24.93723849372385, |
| "grad_norm": 0.13788525760173798, |
| "learning_rate": 1.1097650807696209e-05, |
| "loss": 0.0465, |
| "step": 11920 |
| }, |
| { |
| "epoch": 24.9581589958159, |
| "grad_norm": 0.20516015589237213, |
| "learning_rate": 1.1028497416585931e-05, |
| "loss": 0.0486, |
| "step": 11930 |
| }, |
| { |
| "epoch": 24.97907949790795, |
| "grad_norm": 0.20356875658035278, |
| "learning_rate": 1.0959533441619762e-05, |
| "loss": 0.0414, |
| "step": 11940 |
| }, |
| { |
| "epoch": 25.0, |
| "grad_norm": 0.44614553451538086, |
| "learning_rate": 1.0890759217988527e-05, |
| "loss": 0.0362, |
| "step": 11950 |
| }, |
| { |
| "epoch": 25.02092050209205, |
| "grad_norm": 0.19991981983184814, |
| "learning_rate": 1.0822175079960806e-05, |
| "loss": 0.0464, |
| "step": 11960 |
| }, |
| { |
| "epoch": 25.0418410041841, |
| "grad_norm": 0.1874738484621048, |
| "learning_rate": 1.0753781360881265e-05, |
| "loss": 0.0416, |
| "step": 11970 |
| }, |
| { |
| "epoch": 25.06276150627615, |
| "grad_norm": 0.21726657450199127, |
| "learning_rate": 1.0685578393169055e-05, |
| "loss": 0.0384, |
| "step": 11980 |
| }, |
| { |
| "epoch": 25.0836820083682, |
| "grad_norm": 0.18857204914093018, |
| "learning_rate": 1.061756650831625e-05, |
| "loss": 0.0466, |
| "step": 11990 |
| }, |
| { |
| "epoch": 25.10460251046025, |
| "grad_norm": 0.22070184350013733, |
| "learning_rate": 1.054974603688616e-05, |
| "loss": 0.0431, |
| "step": 12000 |
| }, |
| { |
| "epoch": 25.1255230125523, |
| "grad_norm": 0.24246034026145935, |
| "learning_rate": 1.048211730851173e-05, |
| "loss": 0.0428, |
| "step": 12010 |
| }, |
| { |
| "epoch": 25.14644351464435, |
| "grad_norm": 0.1892884373664856, |
| "learning_rate": 1.0414680651894004e-05, |
| "loss": 0.0421, |
| "step": 12020 |
| }, |
| { |
| "epoch": 25.1673640167364, |
| "grad_norm": 0.18135856091976166, |
| "learning_rate": 1.034743639480047e-05, |
| "loss": 0.0438, |
| "step": 12030 |
| }, |
| { |
| "epoch": 25.188284518828453, |
| "grad_norm": 0.2031504511833191, |
| "learning_rate": 1.0280384864063497e-05, |
| "loss": 0.0454, |
| "step": 12040 |
| }, |
| { |
| "epoch": 25.209205020920503, |
| "grad_norm": 0.24415917694568634, |
| "learning_rate": 1.0213526385578704e-05, |
| "loss": 0.0446, |
| "step": 12050 |
| }, |
| { |
| "epoch": 25.230125523012553, |
| "grad_norm": 0.2209267020225525, |
| "learning_rate": 1.0146861284303394e-05, |
| "loss": 0.0426, |
| "step": 12060 |
| }, |
| { |
| "epoch": 25.251046025104603, |
| "grad_norm": 0.1683529168367386, |
| "learning_rate": 1.0080389884255037e-05, |
| "loss": 0.0437, |
| "step": 12070 |
| }, |
| { |
| "epoch": 25.271966527196653, |
| "grad_norm": 0.23323380947113037, |
| "learning_rate": 1.0014112508509588e-05, |
| "loss": 0.0429, |
| "step": 12080 |
| }, |
| { |
| "epoch": 25.292887029288703, |
| "grad_norm": 0.1716199666261673, |
| "learning_rate": 9.948029479199994e-06, |
| "loss": 0.0536, |
| "step": 12090 |
| }, |
| { |
| "epoch": 25.313807531380753, |
| "grad_norm": 0.21185162663459778, |
| "learning_rate": 9.882141117514632e-06, |
| "loss": 0.0396, |
| "step": 12100 |
| }, |
| { |
| "epoch": 25.334728033472803, |
| "grad_norm": 0.17677056789398193, |
| "learning_rate": 9.816447743695656e-06, |
| "loss": 0.0416, |
| "step": 12110 |
| }, |
| { |
| "epoch": 25.355648535564853, |
| "grad_norm": 0.18352273106575012, |
| "learning_rate": 9.75094967703758e-06, |
| "loss": 0.0475, |
| "step": 12120 |
| }, |
| { |
| "epoch": 25.376569037656903, |
| "grad_norm": 0.17828936874866486, |
| "learning_rate": 9.685647235885597e-06, |
| "loss": 0.0417, |
| "step": 12130 |
| }, |
| { |
| "epoch": 25.397489539748953, |
| "grad_norm": 0.19843615591526031, |
| "learning_rate": 9.620540737634087e-06, |
| "loss": 0.043, |
| "step": 12140 |
| }, |
| { |
| "epoch": 25.418410041841003, |
| "grad_norm": 0.17798744142055511, |
| "learning_rate": 9.555630498725133e-06, |
| "loss": 0.0492, |
| "step": 12150 |
| }, |
| { |
| "epoch": 25.439330543933053, |
| "grad_norm": 0.19702628254890442, |
| "learning_rate": 9.49091683464684e-06, |
| "loss": 0.041, |
| "step": 12160 |
| }, |
| { |
| "epoch": 25.460251046025103, |
| "grad_norm": 0.16698186099529266, |
| "learning_rate": 9.426400059931955e-06, |
| "loss": 0.0475, |
| "step": 12170 |
| }, |
| { |
| "epoch": 25.481171548117153, |
| "grad_norm": 0.2713240087032318, |
| "learning_rate": 9.362080488156245e-06, |
| "loss": 0.0494, |
| "step": 12180 |
| }, |
| { |
| "epoch": 25.502092050209207, |
| "grad_norm": 0.1628587692975998, |
| "learning_rate": 9.29795843193697e-06, |
| "loss": 0.0414, |
| "step": 12190 |
| }, |
| { |
| "epoch": 25.523012552301257, |
| "grad_norm": 0.19244007766246796, |
| "learning_rate": 9.234034202931447e-06, |
| "loss": 0.0468, |
| "step": 12200 |
| }, |
| { |
| "epoch": 25.543933054393307, |
| "grad_norm": 0.27872234582901, |
| "learning_rate": 9.170308111835418e-06, |
| "loss": 0.047, |
| "step": 12210 |
| }, |
| { |
| "epoch": 25.564853556485357, |
| "grad_norm": 0.23407423496246338, |
| "learning_rate": 9.106780468381631e-06, |
| "loss": 0.0376, |
| "step": 12220 |
| }, |
| { |
| "epoch": 25.585774058577407, |
| "grad_norm": 0.1899377554655075, |
| "learning_rate": 9.043451581338302e-06, |
| "loss": 0.043, |
| "step": 12230 |
| }, |
| { |
| "epoch": 25.606694560669457, |
| "grad_norm": 0.3231353461742401, |
| "learning_rate": 8.980321758507615e-06, |
| "loss": 0.0488, |
| "step": 12240 |
| }, |
| { |
| "epoch": 25.627615062761507, |
| "grad_norm": 0.19835972785949707, |
| "learning_rate": 8.91739130672425e-06, |
| "loss": 0.0468, |
| "step": 12250 |
| }, |
| { |
| "epoch": 25.648535564853557, |
| "grad_norm": 0.12836074829101562, |
| "learning_rate": 8.85466053185382e-06, |
| "loss": 0.049, |
| "step": 12260 |
| }, |
| { |
| "epoch": 25.669456066945607, |
| "grad_norm": 0.18373306095600128, |
| "learning_rate": 8.792129738791455e-06, |
| "loss": 0.0403, |
| "step": 12270 |
| }, |
| { |
| "epoch": 25.690376569037657, |
| "grad_norm": 0.2383619099855423, |
| "learning_rate": 8.729799231460318e-06, |
| "loss": 0.0416, |
| "step": 12280 |
| }, |
| { |
| "epoch": 25.711297071129707, |
| "grad_norm": 0.17554903030395508, |
| "learning_rate": 8.66766931281009e-06, |
| "loss": 0.0471, |
| "step": 12290 |
| }, |
| { |
| "epoch": 25.732217573221757, |
| "grad_norm": 0.17318107187747955, |
| "learning_rate": 8.6057402848155e-06, |
| "loss": 0.0449, |
| "step": 12300 |
| }, |
| { |
| "epoch": 25.753138075313807, |
| "grad_norm": 0.18916559219360352, |
| "learning_rate": 8.544012448474904e-06, |
| "loss": 0.0448, |
| "step": 12310 |
| }, |
| { |
| "epoch": 25.774058577405857, |
| "grad_norm": 0.1694333255290985, |
| "learning_rate": 8.482486103808779e-06, |
| "loss": 0.0423, |
| "step": 12320 |
| }, |
| { |
| "epoch": 25.794979079497907, |
| "grad_norm": 0.22283673286437988, |
| "learning_rate": 8.42116154985828e-06, |
| "loss": 0.0395, |
| "step": 12330 |
| }, |
| { |
| "epoch": 25.815899581589957, |
| "grad_norm": 0.24180816113948822, |
| "learning_rate": 8.360039084683779e-06, |
| "loss": 0.0405, |
| "step": 12340 |
| }, |
| { |
| "epoch": 25.836820083682007, |
| "grad_norm": 0.11373143643140793, |
| "learning_rate": 8.299119005363404e-06, |
| "loss": 0.0381, |
| "step": 12350 |
| }, |
| { |
| "epoch": 25.85774058577406, |
| "grad_norm": 0.21663306653499603, |
| "learning_rate": 8.238401607991647e-06, |
| "loss": 0.0464, |
| "step": 12360 |
| }, |
| { |
| "epoch": 25.87866108786611, |
| "grad_norm": 0.16208253800868988, |
| "learning_rate": 8.177887187677847e-06, |
| "loss": 0.0382, |
| "step": 12370 |
| }, |
| { |
| "epoch": 25.89958158995816, |
| "grad_norm": 0.2553822696208954, |
| "learning_rate": 8.117576038544838e-06, |
| "loss": 0.0439, |
| "step": 12380 |
| }, |
| { |
| "epoch": 25.92050209205021, |
| "grad_norm": 0.1499353051185608, |
| "learning_rate": 8.057468453727479e-06, |
| "loss": 0.0344, |
| "step": 12390 |
| }, |
| { |
| "epoch": 25.94142259414226, |
| "grad_norm": 0.22214387357234955, |
| "learning_rate": 7.997564725371182e-06, |
| "loss": 0.0437, |
| "step": 12400 |
| }, |
| { |
| "epoch": 25.96234309623431, |
| "grad_norm": 0.20151209831237793, |
| "learning_rate": 7.937865144630601e-06, |
| "loss": 0.0455, |
| "step": 12410 |
| }, |
| { |
| "epoch": 25.98326359832636, |
| "grad_norm": 0.2003437876701355, |
| "learning_rate": 7.878370001668116e-06, |
| "loss": 0.0466, |
| "step": 12420 |
| }, |
| { |
| "epoch": 26.00418410041841, |
| "grad_norm": 0.17744222283363342, |
| "learning_rate": 7.819079585652461e-06, |
| "loss": 0.0438, |
| "step": 12430 |
| }, |
| { |
| "epoch": 26.02510460251046, |
| "grad_norm": 0.280471533536911, |
| "learning_rate": 7.759994184757358e-06, |
| "loss": 0.047, |
| "step": 12440 |
| }, |
| { |
| "epoch": 26.04602510460251, |
| "grad_norm": 0.19019319117069244, |
| "learning_rate": 7.701114086160027e-06, |
| "loss": 0.0452, |
| "step": 12450 |
| }, |
| { |
| "epoch": 26.06694560669456, |
| "grad_norm": 0.1652364730834961, |
| "learning_rate": 7.642439576039884e-06, |
| "loss": 0.0462, |
| "step": 12460 |
| }, |
| { |
| "epoch": 26.08786610878661, |
| "grad_norm": 0.2389875054359436, |
| "learning_rate": 7.583970939577101e-06, |
| "loss": 0.0437, |
| "step": 12470 |
| }, |
| { |
| "epoch": 26.10878661087866, |
| "grad_norm": 0.1723814159631729, |
| "learning_rate": 7.525708460951197e-06, |
| "loss": 0.0481, |
| "step": 12480 |
| }, |
| { |
| "epoch": 26.12970711297071, |
| "grad_norm": 0.21704067289829254, |
| "learning_rate": 7.467652423339733e-06, |
| "loss": 0.0464, |
| "step": 12490 |
| }, |
| { |
| "epoch": 26.15062761506276, |
| "grad_norm": 0.18124651908874512, |
| "learning_rate": 7.409803108916841e-06, |
| "loss": 0.0426, |
| "step": 12500 |
| }, |
| { |
| "epoch": 26.171548117154813, |
| "grad_norm": 0.1521020233631134, |
| "learning_rate": 7.35216079885192e-06, |
| "loss": 0.0414, |
| "step": 12510 |
| }, |
| { |
| "epoch": 26.192468619246863, |
| "grad_norm": 0.22082534432411194, |
| "learning_rate": 7.29472577330827e-06, |
| "loss": 0.0408, |
| "step": 12520 |
| }, |
| { |
| "epoch": 26.213389121338913, |
| "grad_norm": 0.19320394098758698, |
| "learning_rate": 7.237498311441676e-06, |
| "loss": 0.0411, |
| "step": 12530 |
| }, |
| { |
| "epoch": 26.234309623430963, |
| "grad_norm": 0.2235615849494934, |
| "learning_rate": 7.180478691399134e-06, |
| "loss": 0.0443, |
| "step": 12540 |
| }, |
| { |
| "epoch": 26.255230125523013, |
| "grad_norm": 0.15242235362529755, |
| "learning_rate": 7.123667190317396e-06, |
| "loss": 0.0377, |
| "step": 12550 |
| }, |
| { |
| "epoch": 26.276150627615063, |
| "grad_norm": 0.22409453988075256, |
| "learning_rate": 7.06706408432169e-06, |
| "loss": 0.043, |
| "step": 12560 |
| }, |
| { |
| "epoch": 26.297071129707113, |
| "grad_norm": 0.20269347727298737, |
| "learning_rate": 7.010669648524404e-06, |
| "loss": 0.0382, |
| "step": 12570 |
| }, |
| { |
| "epoch": 26.317991631799163, |
| "grad_norm": 0.25661835074424744, |
| "learning_rate": 6.954484157023661e-06, |
| "loss": 0.0475, |
| "step": 12580 |
| }, |
| { |
| "epoch": 26.338912133891213, |
| "grad_norm": 0.25946345925331116, |
| "learning_rate": 6.898507882902078e-06, |
| "loss": 0.0416, |
| "step": 12590 |
| }, |
| { |
| "epoch": 26.359832635983263, |
| "grad_norm": 0.15361930429935455, |
| "learning_rate": 6.842741098225358e-06, |
| "loss": 0.0477, |
| "step": 12600 |
| }, |
| { |
| "epoch": 26.380753138075313, |
| "grad_norm": 0.1839352697134018, |
| "learning_rate": 6.787184074041031e-06, |
| "loss": 0.0415, |
| "step": 12610 |
| }, |
| { |
| "epoch": 26.401673640167363, |
| "grad_norm": 0.19836997985839844, |
| "learning_rate": 6.731837080377129e-06, |
| "loss": 0.0391, |
| "step": 12620 |
| }, |
| { |
| "epoch": 26.422594142259413, |
| "grad_norm": 0.15911194682121277, |
| "learning_rate": 6.676700386240814e-06, |
| "loss": 0.0423, |
| "step": 12630 |
| }, |
| { |
| "epoch": 26.443514644351463, |
| "grad_norm": 0.18121299147605896, |
| "learning_rate": 6.621774259617125e-06, |
| "loss": 0.041, |
| "step": 12640 |
| }, |
| { |
| "epoch": 26.464435146443513, |
| "grad_norm": 0.21054553985595703, |
| "learning_rate": 6.567058967467704e-06, |
| "loss": 0.0467, |
| "step": 12650 |
| }, |
| { |
| "epoch": 26.485355648535563, |
| "grad_norm": 0.22585222125053406, |
| "learning_rate": 6.51255477572939e-06, |
| "loss": 0.0458, |
| "step": 12660 |
| }, |
| { |
| "epoch": 26.506276150627617, |
| "grad_norm": 0.23863200843334198, |
| "learning_rate": 6.45826194931306e-06, |
| "loss": 0.0387, |
| "step": 12670 |
| }, |
| { |
| "epoch": 26.527196652719667, |
| "grad_norm": 0.18971945345401764, |
| "learning_rate": 6.4041807521022454e-06, |
| "loss": 0.0426, |
| "step": 12680 |
| }, |
| { |
| "epoch": 26.548117154811717, |
| "grad_norm": 0.252937376499176, |
| "learning_rate": 6.350311446951868e-06, |
| "loss": 0.0456, |
| "step": 12690 |
| }, |
| { |
| "epoch": 26.569037656903767, |
| "grad_norm": 0.20583398640155792, |
| "learning_rate": 6.29665429568701e-06, |
| "loss": 0.041, |
| "step": 12700 |
| }, |
| { |
| "epoch": 26.589958158995817, |
| "grad_norm": 0.1962657868862152, |
| "learning_rate": 6.2432095591015705e-06, |
| "loss": 0.042, |
| "step": 12710 |
| }, |
| { |
| "epoch": 26.610878661087867, |
| "grad_norm": 0.15278510749340057, |
| "learning_rate": 6.1899774969570444e-06, |
| "loss": 0.048, |
| "step": 12720 |
| }, |
| { |
| "epoch": 26.631799163179917, |
| "grad_norm": 0.20337019860744476, |
| "learning_rate": 6.136958367981272e-06, |
| "loss": 0.0432, |
| "step": 12730 |
| }, |
| { |
| "epoch": 26.652719665271967, |
| "grad_norm": 0.19672217965126038, |
| "learning_rate": 6.084152429867113e-06, |
| "loss": 0.0404, |
| "step": 12740 |
| }, |
| { |
| "epoch": 26.673640167364017, |
| "grad_norm": 0.19690439105033875, |
| "learning_rate": 6.0315599392712865e-06, |
| "loss": 0.0424, |
| "step": 12750 |
| }, |
| { |
| "epoch": 26.694560669456067, |
| "grad_norm": 0.1715863049030304, |
| "learning_rate": 5.979181151813057e-06, |
| "loss": 0.0383, |
| "step": 12760 |
| }, |
| { |
| "epoch": 26.715481171548117, |
| "grad_norm": 0.17519311606884003, |
| "learning_rate": 5.927016322072992e-06, |
| "loss": 0.0422, |
| "step": 12770 |
| }, |
| { |
| "epoch": 26.736401673640167, |
| "grad_norm": 0.1893201619386673, |
| "learning_rate": 5.875065703591787e-06, |
| "loss": 0.0437, |
| "step": 12780 |
| }, |
| { |
| "epoch": 26.757322175732217, |
| "grad_norm": 0.26540607213974, |
| "learning_rate": 5.823329548868939e-06, |
| "loss": 0.0433, |
| "step": 12790 |
| }, |
| { |
| "epoch": 26.778242677824267, |
| "grad_norm": 0.23649485409259796, |
| "learning_rate": 5.77180810936162e-06, |
| "loss": 0.044, |
| "step": 12800 |
| }, |
| { |
| "epoch": 26.799163179916317, |
| "grad_norm": 0.20049796998500824, |
| "learning_rate": 5.720501635483366e-06, |
| "loss": 0.0444, |
| "step": 12810 |
| }, |
| { |
| "epoch": 26.820083682008367, |
| "grad_norm": 0.22236889600753784, |
| "learning_rate": 5.669410376602918e-06, |
| "loss": 0.0526, |
| "step": 12820 |
| }, |
| { |
| "epoch": 26.84100418410042, |
| "grad_norm": 0.2502869665622711, |
| "learning_rate": 5.618534581043011e-06, |
| "loss": 0.0497, |
| "step": 12830 |
| }, |
| { |
| "epoch": 26.86192468619247, |
| "grad_norm": 0.16508817672729492, |
| "learning_rate": 5.5678744960791005e-06, |
| "loss": 0.0423, |
| "step": 12840 |
| }, |
| { |
| "epoch": 26.88284518828452, |
| "grad_norm": 0.22233647108078003, |
| "learning_rate": 5.517430367938237e-06, |
| "loss": 0.0464, |
| "step": 12850 |
| }, |
| { |
| "epoch": 26.90376569037657, |
| "grad_norm": 0.20295816659927368, |
| "learning_rate": 5.467202441797842e-06, |
| "loss": 0.0446, |
| "step": 12860 |
| }, |
| { |
| "epoch": 26.92468619246862, |
| "grad_norm": 0.22901266813278198, |
| "learning_rate": 5.417190961784497e-06, |
| "loss": 0.0388, |
| "step": 12870 |
| }, |
| { |
| "epoch": 26.94560669456067, |
| "grad_norm": 0.22929273545742035, |
| "learning_rate": 5.3673961709727885e-06, |
| "loss": 0.0435, |
| "step": 12880 |
| }, |
| { |
| "epoch": 26.96652719665272, |
| "grad_norm": 0.30230170488357544, |
| "learning_rate": 5.317818311384115e-06, |
| "loss": 0.0434, |
| "step": 12890 |
| }, |
| { |
| "epoch": 26.98744769874477, |
| "grad_norm": 0.20971617102622986, |
| "learning_rate": 5.2684576239854895e-06, |
| "loss": 0.0476, |
| "step": 12900 |
| }, |
| { |
| "epoch": 27.00836820083682, |
| "grad_norm": 0.16513416171073914, |
| "learning_rate": 5.219314348688414e-06, |
| "loss": 0.0406, |
| "step": 12910 |
| }, |
| { |
| "epoch": 27.02928870292887, |
| "grad_norm": 0.16726276278495789, |
| "learning_rate": 5.170388724347658e-06, |
| "loss": 0.0427, |
| "step": 12920 |
| }, |
| { |
| "epoch": 27.05020920502092, |
| "grad_norm": 0.20750859379768372, |
| "learning_rate": 5.1216809887601245e-06, |
| "loss": 0.0412, |
| "step": 12930 |
| }, |
| { |
| "epoch": 27.07112970711297, |
| "grad_norm": 0.22323675453662872, |
| "learning_rate": 5.073191378663733e-06, |
| "loss": 0.0421, |
| "step": 12940 |
| }, |
| { |
| "epoch": 27.09205020920502, |
| "grad_norm": 0.24951502680778503, |
| "learning_rate": 5.024920129736188e-06, |
| "loss": 0.0439, |
| "step": 12950 |
| }, |
| { |
| "epoch": 27.11297071129707, |
| "grad_norm": 0.18910029530525208, |
| "learning_rate": 4.976867476593894e-06, |
| "loss": 0.0411, |
| "step": 12960 |
| }, |
| { |
| "epoch": 27.13389121338912, |
| "grad_norm": 0.17495794594287872, |
| "learning_rate": 4.929033652790821e-06, |
| "loss": 0.037, |
| "step": 12970 |
| }, |
| { |
| "epoch": 27.15481171548117, |
| "grad_norm": 0.15714290738105774, |
| "learning_rate": 4.881418890817296e-06, |
| "loss": 0.0385, |
| "step": 12980 |
| }, |
| { |
| "epoch": 27.175732217573223, |
| "grad_norm": 0.151872456073761, |
| "learning_rate": 4.834023422098971e-06, |
| "loss": 0.0409, |
| "step": 12990 |
| }, |
| { |
| "epoch": 27.196652719665273, |
| "grad_norm": 0.2499384582042694, |
| "learning_rate": 4.7868474769956266e-06, |
| "loss": 0.0466, |
| "step": 13000 |
| }, |
| { |
| "epoch": 27.217573221757323, |
| "grad_norm": 0.19462943077087402, |
| "learning_rate": 4.7398912848000636e-06, |
| "loss": 0.0421, |
| "step": 13010 |
| }, |
| { |
| "epoch": 27.238493723849373, |
| "grad_norm": 0.24162252247333527, |
| "learning_rate": 4.6931550737370264e-06, |
| "loss": 0.0457, |
| "step": 13020 |
| }, |
| { |
| "epoch": 27.259414225941423, |
| "grad_norm": 0.14873123168945312, |
| "learning_rate": 4.646639070962067e-06, |
| "loss": 0.0481, |
| "step": 13030 |
| }, |
| { |
| "epoch": 27.280334728033473, |
| "grad_norm": 0.278059184551239, |
| "learning_rate": 4.600343502560439e-06, |
| "loss": 0.0462, |
| "step": 13040 |
| }, |
| { |
| "epoch": 27.301255230125523, |
| "grad_norm": 0.24148941040039062, |
| "learning_rate": 4.55426859354599e-06, |
| "loss": 0.0485, |
| "step": 13050 |
| }, |
| { |
| "epoch": 27.322175732217573, |
| "grad_norm": 0.1880323588848114, |
| "learning_rate": 4.5084145678600805e-06, |
| "loss": 0.047, |
| "step": 13060 |
| }, |
| { |
| "epoch": 27.343096234309623, |
| "grad_norm": 0.20042458176612854, |
| "learning_rate": 4.462781648370518e-06, |
| "loss": 0.0354, |
| "step": 13070 |
| }, |
| { |
| "epoch": 27.364016736401673, |
| "grad_norm": 0.23684915900230408, |
| "learning_rate": 4.417370056870418e-06, |
| "loss": 0.0419, |
| "step": 13080 |
| }, |
| { |
| "epoch": 27.384937238493723, |
| "grad_norm": 0.18872502446174622, |
| "learning_rate": 4.372180014077193e-06, |
| "loss": 0.0452, |
| "step": 13090 |
| }, |
| { |
| "epoch": 27.405857740585773, |
| "grad_norm": 0.1701870709657669, |
| "learning_rate": 4.327211739631415e-06, |
| "loss": 0.0372, |
| "step": 13100 |
| }, |
| { |
| "epoch": 27.426778242677823, |
| "grad_norm": 0.21626059710979462, |
| "learning_rate": 4.282465452095802e-06, |
| "loss": 0.0432, |
| "step": 13110 |
| }, |
| { |
| "epoch": 27.447698744769873, |
| "grad_norm": 0.18621067702770233, |
| "learning_rate": 4.237941368954124e-06, |
| "loss": 0.0425, |
| "step": 13120 |
| }, |
| { |
| "epoch": 27.468619246861923, |
| "grad_norm": 0.17577169835567474, |
| "learning_rate": 4.193639706610147e-06, |
| "loss": 0.0449, |
| "step": 13130 |
| }, |
| { |
| "epoch": 27.489539748953973, |
| "grad_norm": 0.20410019159317017, |
| "learning_rate": 4.149560680386588e-06, |
| "loss": 0.0411, |
| "step": 13140 |
| }, |
| { |
| "epoch": 27.510460251046027, |
| "grad_norm": 0.13330447673797607, |
| "learning_rate": 4.105704504524094e-06, |
| "loss": 0.0478, |
| "step": 13150 |
| }, |
| { |
| "epoch": 27.531380753138077, |
| "grad_norm": 0.228925421833992, |
| "learning_rate": 4.0620713921801334e-06, |
| "loss": 0.0424, |
| "step": 13160 |
| }, |
| { |
| "epoch": 27.552301255230127, |
| "grad_norm": 0.2237783670425415, |
| "learning_rate": 4.0186615554280385e-06, |
| "loss": 0.0412, |
| "step": 13170 |
| }, |
| { |
| "epoch": 27.573221757322177, |
| "grad_norm": 0.17737041413784027, |
| "learning_rate": 3.975475205255929e-06, |
| "loss": 0.0386, |
| "step": 13180 |
| }, |
| { |
| "epoch": 27.594142259414227, |
| "grad_norm": 0.36772456765174866, |
| "learning_rate": 3.932512551565676e-06, |
| "loss": 0.0397, |
| "step": 13190 |
| }, |
| { |
| "epoch": 27.615062761506277, |
| "grad_norm": 0.22337375581264496, |
| "learning_rate": 3.889773803171936e-06, |
| "loss": 0.0453, |
| "step": 13200 |
| }, |
| { |
| "epoch": 27.635983263598327, |
| "grad_norm": 0.18996427953243256, |
| "learning_rate": 3.847259167801076e-06, |
| "loss": 0.037, |
| "step": 13210 |
| }, |
| { |
| "epoch": 27.656903765690377, |
| "grad_norm": 0.19474196434020996, |
| "learning_rate": 3.804968852090185e-06, |
| "loss": 0.0422, |
| "step": 13220 |
| }, |
| { |
| "epoch": 27.677824267782427, |
| "grad_norm": 0.19374209642410278, |
| "learning_rate": 3.762903061586104e-06, |
| "loss": 0.0537, |
| "step": 13230 |
| }, |
| { |
| "epoch": 27.698744769874477, |
| "grad_norm": 0.31516656279563904, |
| "learning_rate": 3.721062000744363e-06, |
| "loss": 0.0462, |
| "step": 13240 |
| }, |
| { |
| "epoch": 27.719665271966527, |
| "grad_norm": 0.18470466136932373, |
| "learning_rate": 3.679445872928244e-06, |
| "loss": 0.0417, |
| "step": 13250 |
| }, |
| { |
| "epoch": 27.740585774058577, |
| "grad_norm": 0.1719268262386322, |
| "learning_rate": 3.6380548804077707e-06, |
| "loss": 0.0478, |
| "step": 13260 |
| }, |
| { |
| "epoch": 27.761506276150627, |
| "grad_norm": 0.16592474281787872, |
| "learning_rate": 3.5968892243587016e-06, |
| "loss": 0.0448, |
| "step": 13270 |
| }, |
| { |
| "epoch": 27.782426778242677, |
| "grad_norm": 0.18621480464935303, |
| "learning_rate": 3.555949104861611e-06, |
| "loss": 0.0393, |
| "step": 13280 |
| }, |
| { |
| "epoch": 27.803347280334727, |
| "grad_norm": 0.17843583226203918, |
| "learning_rate": 3.5152347209008394e-06, |
| "loss": 0.0534, |
| "step": 13290 |
| }, |
| { |
| "epoch": 27.824267782426777, |
| "grad_norm": 0.1969691812992096, |
| "learning_rate": 3.4747462703636104e-06, |
| "loss": 0.0494, |
| "step": 13300 |
| }, |
| { |
| "epoch": 27.84518828451883, |
| "grad_norm": 0.16685451567173004, |
| "learning_rate": 3.434483950038986e-06, |
| "loss": 0.0403, |
| "step": 13310 |
| }, |
| { |
| "epoch": 27.86610878661088, |
| "grad_norm": 0.18303453922271729, |
| "learning_rate": 3.3944479556169806e-06, |
| "loss": 0.0503, |
| "step": 13320 |
| }, |
| { |
| "epoch": 27.88702928870293, |
| "grad_norm": 0.2683485448360443, |
| "learning_rate": 3.3546384816875665e-06, |
| "loss": 0.0422, |
| "step": 13330 |
| }, |
| { |
| "epoch": 27.90794979079498, |
| "grad_norm": 0.11434085667133331, |
| "learning_rate": 3.315055721739746e-06, |
| "loss": 0.0405, |
| "step": 13340 |
| }, |
| { |
| "epoch": 27.92887029288703, |
| "grad_norm": 0.1836562305688858, |
| "learning_rate": 3.275699868160592e-06, |
| "loss": 0.0416, |
| "step": 13350 |
| }, |
| { |
| "epoch": 27.94979079497908, |
| "grad_norm": 0.20131796598434448, |
| "learning_rate": 3.23657111223436e-06, |
| "loss": 0.0465, |
| "step": 13360 |
| }, |
| { |
| "epoch": 27.97071129707113, |
| "grad_norm": 0.20107074081897736, |
| "learning_rate": 3.1976696441414764e-06, |
| "loss": 0.0418, |
| "step": 13370 |
| }, |
| { |
| "epoch": 27.99163179916318, |
| "grad_norm": 0.19358371198177338, |
| "learning_rate": 3.158995652957719e-06, |
| "loss": 0.0394, |
| "step": 13380 |
| }, |
| { |
| "epoch": 28.01255230125523, |
| "grad_norm": 0.18561676144599915, |
| "learning_rate": 3.1205493266531937e-06, |
| "loss": 0.0425, |
| "step": 13390 |
| }, |
| { |
| "epoch": 28.03347280334728, |
| "grad_norm": 0.18126420676708221, |
| "learning_rate": 3.082330852091497e-06, |
| "loss": 0.0389, |
| "step": 13400 |
| }, |
| { |
| "epoch": 28.05439330543933, |
| "grad_norm": 0.2034623920917511, |
| "learning_rate": 3.0443404150287847e-06, |
| "loss": 0.0429, |
| "step": 13410 |
| }, |
| { |
| "epoch": 28.07531380753138, |
| "grad_norm": 0.16916005313396454, |
| "learning_rate": 3.0065782001128475e-06, |
| "loss": 0.037, |
| "step": 13420 |
| }, |
| { |
| "epoch": 28.09623430962343, |
| "grad_norm": 0.1833481341600418, |
| "learning_rate": 2.9690443908822252e-06, |
| "loss": 0.0399, |
| "step": 13430 |
| }, |
| { |
| "epoch": 28.11715481171548, |
| "grad_norm": 0.21004605293273926, |
| "learning_rate": 2.9317391697653518e-06, |
| "loss": 0.0473, |
| "step": 13440 |
| }, |
| { |
| "epoch": 28.13807531380753, |
| "grad_norm": 0.14262895286083221, |
| "learning_rate": 2.8946627180795936e-06, |
| "loss": 0.0443, |
| "step": 13450 |
| }, |
| { |
| "epoch": 28.15899581589958, |
| "grad_norm": 0.22318962216377258, |
| "learning_rate": 2.8578152160304573e-06, |
| "loss": 0.0491, |
| "step": 13460 |
| }, |
| { |
| "epoch": 28.179916317991633, |
| "grad_norm": 0.22127728164196014, |
| "learning_rate": 2.821196842710638e-06, |
| "loss": 0.0482, |
| "step": 13470 |
| }, |
| { |
| "epoch": 28.200836820083683, |
| "grad_norm": 0.18599243462085724, |
| "learning_rate": 2.7848077760991853e-06, |
| "loss": 0.0456, |
| "step": 13480 |
| }, |
| { |
| "epoch": 28.221757322175733, |
| "grad_norm": 0.2014002501964569, |
| "learning_rate": 2.7486481930606434e-06, |
| "loss": 0.0427, |
| "step": 13490 |
| }, |
| { |
| "epoch": 28.242677824267783, |
| "grad_norm": 0.15174347162246704, |
| "learning_rate": 2.712718269344161e-06, |
| "loss": 0.0421, |
| "step": 13500 |
| }, |
| { |
| "epoch": 28.263598326359833, |
| "grad_norm": 0.2831948697566986, |
| "learning_rate": 2.677018179582669e-06, |
| "loss": 0.0458, |
| "step": 13510 |
| }, |
| { |
| "epoch": 28.284518828451883, |
| "grad_norm": 0.16338075697422028, |
| "learning_rate": 2.641548097292024e-06, |
| "loss": 0.0468, |
| "step": 13520 |
| }, |
| { |
| "epoch": 28.305439330543933, |
| "grad_norm": 0.16803938150405884, |
| "learning_rate": 2.606308194870133e-06, |
| "loss": 0.0459, |
| "step": 13530 |
| }, |
| { |
| "epoch": 28.326359832635983, |
| "grad_norm": 0.17965620756149292, |
| "learning_rate": 2.5712986435961707e-06, |
| "loss": 0.0466, |
| "step": 13540 |
| }, |
| { |
| "epoch": 28.347280334728033, |
| "grad_norm": 0.19231706857681274, |
| "learning_rate": 2.536519613629723e-06, |
| "loss": 0.043, |
| "step": 13550 |
| }, |
| { |
| "epoch": 28.368200836820083, |
| "grad_norm": 0.22725863754749298, |
| "learning_rate": 2.501971274009923e-06, |
| "loss": 0.0432, |
| "step": 13560 |
| }, |
| { |
| "epoch": 28.389121338912133, |
| "grad_norm": 0.18293611705303192, |
| "learning_rate": 2.467653792654695e-06, |
| "loss": 0.0487, |
| "step": 13570 |
| }, |
| { |
| "epoch": 28.410041841004183, |
| "grad_norm": 0.22322218120098114, |
| "learning_rate": 2.4335673363598822e-06, |
| "loss": 0.0416, |
| "step": 13580 |
| }, |
| { |
| "epoch": 28.430962343096233, |
| "grad_norm": 0.19227294623851776, |
| "learning_rate": 2.399712070798471e-06, |
| "loss": 0.042, |
| "step": 13590 |
| }, |
| { |
| "epoch": 28.451882845188283, |
| "grad_norm": 0.23683269321918488, |
| "learning_rate": 2.3660881605197694e-06, |
| "loss": 0.0437, |
| "step": 13600 |
| }, |
| { |
| "epoch": 28.472803347280333, |
| "grad_norm": 0.1566876918077469, |
| "learning_rate": 2.332695768948617e-06, |
| "loss": 0.0448, |
| "step": 13610 |
| }, |
| { |
| "epoch": 28.493723849372383, |
| "grad_norm": 0.23689772188663483, |
| "learning_rate": 2.299535058384583e-06, |
| "loss": 0.0439, |
| "step": 13620 |
| }, |
| { |
| "epoch": 28.514644351464437, |
| "grad_norm": 0.20031625032424927, |
| "learning_rate": 2.266606190001186e-06, |
| "loss": 0.0439, |
| "step": 13630 |
| }, |
| { |
| "epoch": 28.535564853556487, |
| "grad_norm": 0.19360850751399994, |
| "learning_rate": 2.2339093238450737e-06, |
| "loss": 0.0414, |
| "step": 13640 |
| }, |
| { |
| "epoch": 28.556485355648537, |
| "grad_norm": 0.21083463728427887, |
| "learning_rate": 2.20144461883533e-06, |
| "loss": 0.0436, |
| "step": 13650 |
| }, |
| { |
| "epoch": 28.577405857740587, |
| "grad_norm": 0.19224782288074493, |
| "learning_rate": 2.1692122327625908e-06, |
| "loss": 0.0409, |
| "step": 13660 |
| }, |
| { |
| "epoch": 28.598326359832637, |
| "grad_norm": 0.20601852238178253, |
| "learning_rate": 2.137212322288379e-06, |
| "loss": 0.0462, |
| "step": 13670 |
| }, |
| { |
| "epoch": 28.619246861924687, |
| "grad_norm": 0.19003936648368835, |
| "learning_rate": 2.105445042944282e-06, |
| "loss": 0.0407, |
| "step": 13680 |
| }, |
| { |
| "epoch": 28.640167364016737, |
| "grad_norm": 0.15093214809894562, |
| "learning_rate": 2.0739105491312027e-06, |
| "loss": 0.0427, |
| "step": 13690 |
| }, |
| { |
| "epoch": 28.661087866108787, |
| "grad_norm": 0.2095935046672821, |
| "learning_rate": 2.0426089941186443e-06, |
| "loss": 0.042, |
| "step": 13700 |
| }, |
| { |
| "epoch": 28.682008368200837, |
| "grad_norm": 0.30906569957733154, |
| "learning_rate": 2.0115405300439093e-06, |
| "loss": 0.0479, |
| "step": 13710 |
| }, |
| { |
| "epoch": 28.702928870292887, |
| "grad_norm": 0.2684055268764496, |
| "learning_rate": 1.9807053079114013e-06, |
| "loss": 0.0445, |
| "step": 13720 |
| }, |
| { |
| "epoch": 28.723849372384937, |
| "grad_norm": 0.19294482469558716, |
| "learning_rate": 1.9501034775919024e-06, |
| "loss": 0.0411, |
| "step": 13730 |
| }, |
| { |
| "epoch": 28.744769874476987, |
| "grad_norm": 0.18607813119888306, |
| "learning_rate": 1.9197351878217917e-06, |
| "loss": 0.0422, |
| "step": 13740 |
| }, |
| { |
| "epoch": 28.765690376569037, |
| "grad_norm": 0.16477899253368378, |
| "learning_rate": 1.8896005862023669e-06, |
| "loss": 0.0398, |
| "step": 13750 |
| }, |
| { |
| "epoch": 28.786610878661087, |
| "grad_norm": 0.16216787695884705, |
| "learning_rate": 1.8596998191991288e-06, |
| "loss": 0.0425, |
| "step": 13760 |
| }, |
| { |
| "epoch": 28.807531380753137, |
| "grad_norm": 0.2556508779525757, |
| "learning_rate": 1.8300330321410208e-06, |
| "loss": 0.0521, |
| "step": 13770 |
| }, |
| { |
| "epoch": 28.828451882845187, |
| "grad_norm": 0.15799850225448608, |
| "learning_rate": 1.8006003692197794e-06, |
| "loss": 0.0432, |
| "step": 13780 |
| }, |
| { |
| "epoch": 28.84937238493724, |
| "grad_norm": 0.28834351897239685, |
| "learning_rate": 1.7714019734892062e-06, |
| "loss": 0.0423, |
| "step": 13790 |
| }, |
| { |
| "epoch": 28.87029288702929, |
| "grad_norm": 0.21049931645393372, |
| "learning_rate": 1.7424379868644759e-06, |
| "loss": 0.0412, |
| "step": 13800 |
| }, |
| { |
| "epoch": 28.89121338912134, |
| "grad_norm": 0.1965758353471756, |
| "learning_rate": 1.71370855012144e-06, |
| "loss": 0.0412, |
| "step": 13810 |
| }, |
| { |
| "epoch": 28.91213389121339, |
| "grad_norm": 0.15672869980335236, |
| "learning_rate": 1.6852138028959574e-06, |
| "loss": 0.0396, |
| "step": 13820 |
| }, |
| { |
| "epoch": 28.93305439330544, |
| "grad_norm": 0.21490143239498138, |
| "learning_rate": 1.6569538836832044e-06, |
| "loss": 0.0489, |
| "step": 13830 |
| }, |
| { |
| "epoch": 28.95397489539749, |
| "grad_norm": 0.2109925001859665, |
| "learning_rate": 1.6289289298370147e-06, |
| "loss": 0.0458, |
| "step": 13840 |
| }, |
| { |
| "epoch": 28.97489539748954, |
| "grad_norm": 0.1507532149553299, |
| "learning_rate": 1.6011390775691748e-06, |
| "loss": 0.0462, |
| "step": 13850 |
| }, |
| { |
| "epoch": 28.99581589958159, |
| "grad_norm": 0.17817844450473785, |
| "learning_rate": 1.5735844619488238e-06, |
| "loss": 0.0432, |
| "step": 13860 |
| }, |
| { |
| "epoch": 29.01673640167364, |
| "grad_norm": 0.22161857783794403, |
| "learning_rate": 1.5462652169017322e-06, |
| "loss": 0.0359, |
| "step": 13870 |
| }, |
| { |
| "epoch": 29.03765690376569, |
| "grad_norm": 0.2035890519618988, |
| "learning_rate": 1.5191814752097023e-06, |
| "loss": 0.0454, |
| "step": 13880 |
| }, |
| { |
| "epoch": 29.05857740585774, |
| "grad_norm": 0.1549598127603531, |
| "learning_rate": 1.492333368509896e-06, |
| "loss": 0.0434, |
| "step": 13890 |
| }, |
| { |
| "epoch": 29.07949790794979, |
| "grad_norm": 0.17815828323364258, |
| "learning_rate": 1.4657210272941923e-06, |
| "loss": 0.0404, |
| "step": 13900 |
| }, |
| { |
| "epoch": 29.10041841004184, |
| "grad_norm": 0.19060222804546356, |
| "learning_rate": 1.4393445809085748e-06, |
| "loss": 0.0414, |
| "step": 13910 |
| }, |
| { |
| "epoch": 29.12133891213389, |
| "grad_norm": 0.1861022710800171, |
| "learning_rate": 1.4132041575524834e-06, |
| "loss": 0.0447, |
| "step": 13920 |
| }, |
| { |
| "epoch": 29.14225941422594, |
| "grad_norm": 0.20010648667812347, |
| "learning_rate": 1.387299884278187e-06, |
| "loss": 0.0444, |
| "step": 13930 |
| }, |
| { |
| "epoch": 29.16317991631799, |
| "grad_norm": 0.3000384569168091, |
| "learning_rate": 1.3616318869901945e-06, |
| "loss": 0.0428, |
| "step": 13940 |
| }, |
| { |
| "epoch": 29.184100418410043, |
| "grad_norm": 0.17813272774219513, |
| "learning_rate": 1.336200290444606e-06, |
| "loss": 0.0478, |
| "step": 13950 |
| }, |
| { |
| "epoch": 29.205020920502093, |
| "grad_norm": 0.15890157222747803, |
| "learning_rate": 1.3110052182485454e-06, |
| "loss": 0.0389, |
| "step": 13960 |
| }, |
| { |
| "epoch": 29.225941422594143, |
| "grad_norm": 0.1963435560464859, |
| "learning_rate": 1.2860467928595298e-06, |
| "loss": 0.0456, |
| "step": 13970 |
| }, |
| { |
| "epoch": 29.246861924686193, |
| "grad_norm": 0.21045024693012238, |
| "learning_rate": 1.2613251355848732e-06, |
| "loss": 0.0433, |
| "step": 13980 |
| }, |
| { |
| "epoch": 29.267782426778243, |
| "grad_norm": 0.19739368557929993, |
| "learning_rate": 1.2368403665811324e-06, |
| "loss": 0.0414, |
| "step": 13990 |
| }, |
| { |
| "epoch": 29.288702928870293, |
| "grad_norm": 0.23470686376094818, |
| "learning_rate": 1.2125926048534686e-06, |
| "loss": 0.0424, |
| "step": 14000 |
| }, |
| { |
| "epoch": 29.309623430962343, |
| "grad_norm": 0.11903766542673111, |
| "learning_rate": 1.1885819682551259e-06, |
| "loss": 0.0374, |
| "step": 14010 |
| }, |
| { |
| "epoch": 29.330543933054393, |
| "grad_norm": 0.16985724866390228, |
| "learning_rate": 1.164808573486814e-06, |
| "loss": 0.036, |
| "step": 14020 |
| }, |
| { |
| "epoch": 29.351464435146443, |
| "grad_norm": 0.19907903671264648, |
| "learning_rate": 1.1412725360961608e-06, |
| "loss": 0.0404, |
| "step": 14030 |
| }, |
| { |
| "epoch": 29.372384937238493, |
| "grad_norm": 0.15983760356903076, |
| "learning_rate": 1.1179739704771486e-06, |
| "loss": 0.0447, |
| "step": 14040 |
| }, |
| { |
| "epoch": 29.393305439330543, |
| "grad_norm": 0.21767009794712067, |
| "learning_rate": 1.0949129898695675e-06, |
| "loss": 0.0498, |
| "step": 14050 |
| }, |
| { |
| "epoch": 29.414225941422593, |
| "grad_norm": 0.23430395126342773, |
| "learning_rate": 1.0720897063584423e-06, |
| "loss": 0.0426, |
| "step": 14060 |
| }, |
| { |
| "epoch": 29.435146443514643, |
| "grad_norm": 0.14188767969608307, |
| "learning_rate": 1.0495042308735103e-06, |
| "loss": 0.0408, |
| "step": 14070 |
| }, |
| { |
| "epoch": 29.456066945606693, |
| "grad_norm": 0.18734532594680786, |
| "learning_rate": 1.0271566731886617e-06, |
| "loss": 0.0402, |
| "step": 14080 |
| }, |
| { |
| "epoch": 29.476987447698743, |
| "grad_norm": 0.16820771992206573, |
| "learning_rate": 1.005047141921428e-06, |
| "loss": 0.0429, |
| "step": 14090 |
| }, |
| { |
| "epoch": 29.497907949790793, |
| "grad_norm": 0.1820981651544571, |
| "learning_rate": 9.831757445324274e-07, |
| "loss": 0.0488, |
| "step": 14100 |
| }, |
| { |
| "epoch": 29.518828451882847, |
| "grad_norm": 0.19326896965503693, |
| "learning_rate": 9.615425873248761e-07, |
| "loss": 0.039, |
| "step": 14110 |
| }, |
| { |
| "epoch": 29.539748953974897, |
| "grad_norm": 0.16431112587451935, |
| "learning_rate": 9.401477754440502e-07, |
| "loss": 0.0416, |
| "step": 14120 |
| }, |
| { |
| "epoch": 29.560669456066947, |
| "grad_norm": 0.2652297914028168, |
| "learning_rate": 9.189914128767684e-07, |
| "loss": 0.0412, |
| "step": 14130 |
| }, |
| { |
| "epoch": 29.581589958158997, |
| "grad_norm": 0.19096574187278748, |
| "learning_rate": 8.980736024508996e-07, |
| "loss": 0.0362, |
| "step": 14140 |
| }, |
| { |
| "epoch": 29.602510460251047, |
| "grad_norm": 0.31353315711021423, |
| "learning_rate": 8.77394445834867e-07, |
| "loss": 0.0421, |
| "step": 14150 |
| }, |
| { |
| "epoch": 29.623430962343097, |
| "grad_norm": 0.27862218022346497, |
| "learning_rate": 8.569540435371281e-07, |
| "loss": 0.0445, |
| "step": 14160 |
| }, |
| { |
| "epoch": 29.644351464435147, |
| "grad_norm": 0.21319982409477234, |
| "learning_rate": 8.367524949057348e-07, |
| "loss": 0.0451, |
| "step": 14170 |
| }, |
| { |
| "epoch": 29.665271966527197, |
| "grad_norm": 0.1328536719083786, |
| "learning_rate": 8.167898981277844e-07, |
| "loss": 0.0401, |
| "step": 14180 |
| }, |
| { |
| "epoch": 29.686192468619247, |
| "grad_norm": 0.21670937538146973, |
| "learning_rate": 7.970663502290143e-07, |
| "loss": 0.0375, |
| "step": 14190 |
| }, |
| { |
| "epoch": 29.707112970711297, |
| "grad_norm": 0.21872220933437347, |
| "learning_rate": 7.775819470732692e-07, |
| "loss": 0.0407, |
| "step": 14200 |
| }, |
| { |
| "epoch": 29.728033472803347, |
| "grad_norm": 0.16718120872974396, |
| "learning_rate": 7.583367833620681e-07, |
| "loss": 0.04, |
| "step": 14210 |
| }, |
| { |
| "epoch": 29.748953974895397, |
| "grad_norm": 0.1932561844587326, |
| "learning_rate": 7.39330952634143e-07, |
| "loss": 0.0482, |
| "step": 14220 |
| }, |
| { |
| "epoch": 29.769874476987447, |
| "grad_norm": 0.17794445157051086, |
| "learning_rate": 7.205645472649681e-07, |
| "loss": 0.0407, |
| "step": 14230 |
| }, |
| { |
| "epoch": 29.790794979079497, |
| "grad_norm": 0.19033575057983398, |
| "learning_rate": 7.020376584663202e-07, |
| "loss": 0.0446, |
| "step": 14240 |
| }, |
| { |
| "epoch": 29.811715481171547, |
| "grad_norm": 0.20763246715068817, |
| "learning_rate": 6.83750376285841e-07, |
| "loss": 0.0467, |
| "step": 14250 |
| }, |
| { |
| "epoch": 29.8326359832636, |
| "grad_norm": 0.15791551768779755, |
| "learning_rate": 6.657027896065982e-07, |
| "loss": 0.0462, |
| "step": 14260 |
| }, |
| { |
| "epoch": 29.85355648535565, |
| "grad_norm": 0.1774657666683197, |
| "learning_rate": 6.478949861466355e-07, |
| "loss": 0.0404, |
| "step": 14270 |
| }, |
| { |
| "epoch": 29.8744769874477, |
| "grad_norm": 0.17342764139175415, |
| "learning_rate": 6.303270524585736e-07, |
| "loss": 0.0456, |
| "step": 14280 |
| }, |
| { |
| "epoch": 29.89539748953975, |
| "grad_norm": 0.1711912751197815, |
| "learning_rate": 6.129990739291713e-07, |
| "loss": 0.0445, |
| "step": 14290 |
| }, |
| { |
| "epoch": 29.9163179916318, |
| "grad_norm": 0.19799375534057617, |
| "learning_rate": 5.959111347789093e-07, |
| "loss": 0.0364, |
| "step": 14300 |
| }, |
| { |
| "epoch": 29.93723849372385, |
| "grad_norm": 0.21139439940452576, |
| "learning_rate": 5.790633180615956e-07, |
| "loss": 0.0404, |
| "step": 14310 |
| }, |
| { |
| "epoch": 29.9581589958159, |
| "grad_norm": 0.2200784832239151, |
| "learning_rate": 5.624557056639446e-07, |
| "loss": 0.0377, |
| "step": 14320 |
| }, |
| { |
| "epoch": 29.97907949790795, |
| "grad_norm": 0.16954976320266724, |
| "learning_rate": 5.460883783051984e-07, |
| "loss": 0.0399, |
| "step": 14330 |
| }, |
| { |
| "epoch": 30.0, |
| "grad_norm": 0.5953750014305115, |
| "learning_rate": 5.299614155367171e-07, |
| "loss": 0.0394, |
| "step": 14340 |
| }, |
| { |
| "epoch": 30.02092050209205, |
| "grad_norm": 0.1778673529624939, |
| "learning_rate": 5.140748957415897e-07, |
| "loss": 0.0447, |
| "step": 14350 |
| }, |
| { |
| "epoch": 30.0418410041841, |
| "grad_norm": 0.2141769528388977, |
| "learning_rate": 4.984288961342787e-07, |
| "loss": 0.049, |
| "step": 14360 |
| }, |
| { |
| "epoch": 30.06276150627615, |
| "grad_norm": 0.1671600043773651, |
| "learning_rate": 4.830234927602206e-07, |
| "loss": 0.0447, |
| "step": 14370 |
| }, |
| { |
| "epoch": 30.0836820083682, |
| "grad_norm": 0.19291211664676666, |
| "learning_rate": 4.6785876049545986e-07, |
| "loss": 0.039, |
| "step": 14380 |
| }, |
| { |
| "epoch": 30.10460251046025, |
| "grad_norm": 0.20223639905452728, |
| "learning_rate": 4.5293477304629297e-07, |
| "loss": 0.0461, |
| "step": 14390 |
| }, |
| { |
| "epoch": 30.1255230125523, |
| "grad_norm": 0.22967307269573212, |
| "learning_rate": 4.382516029489081e-07, |
| "loss": 0.0443, |
| "step": 14400 |
| }, |
| { |
| "epoch": 30.14644351464435, |
| "grad_norm": 0.22878475487232208, |
| "learning_rate": 4.2380932156902975e-07, |
| "loss": 0.0441, |
| "step": 14410 |
| }, |
| { |
| "epoch": 30.1673640167364, |
| "grad_norm": 0.1665542870759964, |
| "learning_rate": 4.0960799910156335e-07, |
| "loss": 0.0394, |
| "step": 14420 |
| }, |
| { |
| "epoch": 30.188284518828453, |
| "grad_norm": 0.216718852519989, |
| "learning_rate": 3.956477045702844e-07, |
| "loss": 0.043, |
| "step": 14430 |
| }, |
| { |
| "epoch": 30.209205020920503, |
| "grad_norm": 0.20483459532260895, |
| "learning_rate": 3.819285058274613e-07, |
| "loss": 0.0477, |
| "step": 14440 |
| }, |
| { |
| "epoch": 30.230125523012553, |
| "grad_norm": 0.20638415217399597, |
| "learning_rate": 3.684504695535496e-07, |
| "loss": 0.036, |
| "step": 14450 |
| }, |
| { |
| "epoch": 30.251046025104603, |
| "grad_norm": 0.11912164092063904, |
| "learning_rate": 3.552136612568813e-07, |
| "loss": 0.0351, |
| "step": 14460 |
| }, |
| { |
| "epoch": 30.271966527196653, |
| "grad_norm": 0.20847609639167786, |
| "learning_rate": 3.422181452733042e-07, |
| "loss": 0.0406, |
| "step": 14470 |
| }, |
| { |
| "epoch": 30.292887029288703, |
| "grad_norm": 0.15011854469776154, |
| "learning_rate": 3.294639847659209e-07, |
| "loss": 0.0491, |
| "step": 14480 |
| }, |
| { |
| "epoch": 30.313807531380753, |
| "grad_norm": 0.21889813244342804, |
| "learning_rate": 3.169512417247389e-07, |
| "loss": 0.0359, |
| "step": 14490 |
| }, |
| { |
| "epoch": 30.334728033472803, |
| "grad_norm": 0.21624627709388733, |
| "learning_rate": 3.046799769663822e-07, |
| "loss": 0.0499, |
| "step": 14500 |
| }, |
| { |
| "epoch": 30.355648535564853, |
| "grad_norm": 0.209271639585495, |
| "learning_rate": 2.926502501338191e-07, |
| "loss": 0.046, |
| "step": 14510 |
| }, |
| { |
| "epoch": 30.376569037656903, |
| "grad_norm": 0.260744571685791, |
| "learning_rate": 2.808621196960404e-07, |
| "loss": 0.0428, |
| "step": 14520 |
| }, |
| { |
| "epoch": 30.397489539748953, |
| "grad_norm": 0.22880905866622925, |
| "learning_rate": 2.6931564294778164e-07, |
| "loss": 0.0351, |
| "step": 14530 |
| }, |
| { |
| "epoch": 30.418410041841003, |
| "grad_norm": 0.1641969531774521, |
| "learning_rate": 2.58010876009257e-07, |
| "loss": 0.0414, |
| "step": 14540 |
| }, |
| { |
| "epoch": 30.439330543933053, |
| "grad_norm": 0.15474116802215576, |
| "learning_rate": 2.4694787382589237e-07, |
| "loss": 0.0362, |
| "step": 14550 |
| }, |
| { |
| "epoch": 30.460251046025103, |
| "grad_norm": 0.23926879465579987, |
| "learning_rate": 2.3612669016802592e-07, |
| "loss": 0.0376, |
| "step": 14560 |
| }, |
| { |
| "epoch": 30.481171548117153, |
| "grad_norm": 0.19552695751190186, |
| "learning_rate": 2.2554737763068045e-07, |
| "loss": 0.0478, |
| "step": 14570 |
| }, |
| { |
| "epoch": 30.502092050209207, |
| "grad_norm": 0.17018336057662964, |
| "learning_rate": 2.152099876332858e-07, |
| "loss": 0.0455, |
| "step": 14580 |
| }, |
| { |
| "epoch": 30.523012552301257, |
| "grad_norm": 0.1531943380832672, |
| "learning_rate": 2.051145704194457e-07, |
| "loss": 0.0386, |
| "step": 14590 |
| }, |
| { |
| "epoch": 30.543933054393307, |
| "grad_norm": 0.17155568301677704, |
| "learning_rate": 1.9526117505667129e-07, |
| "loss": 0.0424, |
| "step": 14600 |
| }, |
| { |
| "epoch": 30.564853556485357, |
| "grad_norm": 0.1632601022720337, |
| "learning_rate": 1.856498494361758e-07, |
| "loss": 0.0376, |
| "step": 14610 |
| }, |
| { |
| "epoch": 30.585774058577407, |
| "grad_norm": 0.2161942720413208, |
| "learning_rate": 1.7628064027260803e-07, |
| "loss": 0.0402, |
| "step": 14620 |
| }, |
| { |
| "epoch": 30.606694560669457, |
| "grad_norm": 0.22608569264411926, |
| "learning_rate": 1.671535931038415e-07, |
| "loss": 0.0443, |
| "step": 14630 |
| }, |
| { |
| "epoch": 30.627615062761507, |
| "grad_norm": 0.15184949338436127, |
| "learning_rate": 1.5826875229076333e-07, |
| "loss": 0.0417, |
| "step": 14640 |
| }, |
| { |
| "epoch": 30.648535564853557, |
| "grad_norm": 0.1841401606798172, |
| "learning_rate": 1.496261610170302e-07, |
| "loss": 0.0397, |
| "step": 14650 |
| }, |
| { |
| "epoch": 30.669456066945607, |
| "grad_norm": 0.17780251801013947, |
| "learning_rate": 1.4122586128888503e-07, |
| "loss": 0.0545, |
| "step": 14660 |
| }, |
| { |
| "epoch": 30.690376569037657, |
| "grad_norm": 0.32660233974456787, |
| "learning_rate": 1.3306789393494612e-07, |
| "loss": 0.0437, |
| "step": 14670 |
| }, |
| { |
| "epoch": 30.711297071129707, |
| "grad_norm": 0.20620398223400116, |
| "learning_rate": 1.2515229860599054e-07, |
| "loss": 0.0414, |
| "step": 14680 |
| }, |
| { |
| "epoch": 30.732217573221757, |
| "grad_norm": 0.3603353798389435, |
| "learning_rate": 1.1747911377478771e-07, |
| "loss": 0.0465, |
| "step": 14690 |
| }, |
| { |
| "epoch": 30.753138075313807, |
| "grad_norm": 0.18610107898712158, |
| "learning_rate": 1.1004837673589952e-07, |
| "loss": 0.0385, |
| "step": 14700 |
| }, |
| { |
| "epoch": 30.774058577405857, |
| "grad_norm": 0.24775725603103638, |
| "learning_rate": 1.0286012360550267e-07, |
| "loss": 0.0507, |
| "step": 14710 |
| }, |
| { |
| "epoch": 30.794979079497907, |
| "grad_norm": 0.19726818799972534, |
| "learning_rate": 9.591438932121111e-08, |
| "loss": 0.0421, |
| "step": 14720 |
| }, |
| { |
| "epoch": 30.815899581589957, |
| "grad_norm": 0.2514331340789795, |
| "learning_rate": 8.921120764189272e-08, |
| "loss": 0.039, |
| "step": 14730 |
| }, |
| { |
| "epoch": 30.836820083682007, |
| "grad_norm": 0.21896202862262726, |
| "learning_rate": 8.275061114753068e-08, |
| "loss": 0.0551, |
| "step": 14740 |
| }, |
| { |
| "epoch": 30.85774058577406, |
| "grad_norm": 0.21271194517612457, |
| "learning_rate": 7.65326312390624e-08, |
| "loss": 0.0478, |
| "step": 14750 |
| }, |
| { |
| "epoch": 30.87866108786611, |
| "grad_norm": 0.20337381958961487, |
| "learning_rate": 7.055729813819079e-08, |
| "loss": 0.0449, |
| "step": 14760 |
| }, |
| { |
| "epoch": 30.89958158995816, |
| "grad_norm": 0.18087029457092285, |
| "learning_rate": 6.48246408872899e-08, |
| "loss": 0.044, |
| "step": 14770 |
| }, |
| { |
| "epoch": 30.92050209205021, |
| "grad_norm": 0.15678484737873077, |
| "learning_rate": 5.9334687349227314e-08, |
| "loss": 0.0434, |
| "step": 14780 |
| }, |
| { |
| "epoch": 30.94142259414226, |
| "grad_norm": 0.18289516866207123, |
| "learning_rate": 5.4087464207236426e-08, |
| "loss": 0.0419, |
| "step": 14790 |
| }, |
| { |
| "epoch": 30.96234309623431, |
| "grad_norm": 0.21055713295936584, |
| "learning_rate": 4.9082996964794345e-08, |
| "loss": 0.0502, |
| "step": 14800 |
| }, |
| { |
| "epoch": 30.98326359832636, |
| "grad_norm": 0.16997161507606506, |
| "learning_rate": 4.432130994548866e-08, |
| "loss": 0.045, |
| "step": 14810 |
| }, |
| { |
| "epoch": 31.00418410041841, |
| "grad_norm": 0.15064077079296112, |
| "learning_rate": 3.980242629291198e-08, |
| "loss": 0.0556, |
| "step": 14820 |
| }, |
| { |
| "epoch": 31.02510460251046, |
| "grad_norm": 0.23443807661533356, |
| "learning_rate": 3.5526367970539765e-08, |
| "loss": 0.0377, |
| "step": 14830 |
| }, |
| { |
| "epoch": 31.04602510460251, |
| "grad_norm": 0.24141825735569, |
| "learning_rate": 3.1493155761613826e-08, |
| "loss": 0.043, |
| "step": 14840 |
| }, |
| { |
| "epoch": 31.06694560669456, |
| "grad_norm": 0.1744871884584427, |
| "learning_rate": 2.7702809269058992e-08, |
| "loss": 0.0407, |
| "step": 14850 |
| }, |
| { |
| "epoch": 31.08786610878661, |
| "grad_norm": 0.18967264890670776, |
| "learning_rate": 2.4155346915394337e-08, |
| "loss": 0.0447, |
| "step": 14860 |
| }, |
| { |
| "epoch": 31.10878661087866, |
| "grad_norm": 0.19735538959503174, |
| "learning_rate": 2.085078594261103e-08, |
| "loss": 0.0426, |
| "step": 14870 |
| }, |
| { |
| "epoch": 31.12970711297071, |
| "grad_norm": 0.13576459884643555, |
| "learning_rate": 1.7789142412122372e-08, |
| "loss": 0.0398, |
| "step": 14880 |
| }, |
| { |
| "epoch": 31.15062761506276, |
| "grad_norm": 0.17847837507724762, |
| "learning_rate": 1.4970431204663905e-08, |
| "loss": 0.0403, |
| "step": 14890 |
| }, |
| { |
| "epoch": 31.171548117154813, |
| "grad_norm": 0.27608200907707214, |
| "learning_rate": 1.2394666020226764e-08, |
| "loss": 0.045, |
| "step": 14900 |
| }, |
| { |
| "epoch": 31.192468619246863, |
| "grad_norm": 0.246599480509758, |
| "learning_rate": 1.0061859378007743e-08, |
| "loss": 0.0404, |
| "step": 14910 |
| }, |
| { |
| "epoch": 31.213389121338913, |
| "grad_norm": 0.19810283184051514, |
| "learning_rate": 7.97202261630936e-09, |
| "loss": 0.0424, |
| "step": 14920 |
| }, |
| { |
| "epoch": 31.234309623430963, |
| "grad_norm": 0.14922457933425903, |
| "learning_rate": 6.125165892539863e-09, |
| "loss": 0.035, |
| "step": 14930 |
| }, |
| { |
| "epoch": 31.255230125523013, |
| "grad_norm": 0.1568155139684677, |
| "learning_rate": 4.5212981831022076e-09, |
| "loss": 0.0384, |
| "step": 14940 |
| }, |
| { |
| "epoch": 31.276150627615063, |
| "grad_norm": 0.20648132264614105, |
| "learning_rate": 3.1604272834051542e-09, |
| "loss": 0.0546, |
| "step": 14950 |
| }, |
| { |
| "epoch": 31.297071129707113, |
| "grad_norm": 0.14501909911632538, |
| "learning_rate": 2.04255980778556e-09, |
| "loss": 0.0369, |
| "step": 14960 |
| }, |
| { |
| "epoch": 31.317991631799163, |
| "grad_norm": 0.20130616426467896, |
| "learning_rate": 1.1677011895028234e-09, |
| "loss": 0.0464, |
| "step": 14970 |
| }, |
| { |
| "epoch": 31.338912133891213, |
| "grad_norm": 0.22078537940979004, |
| "learning_rate": 5.358556807000259e-10, |
| "loss": 0.0453, |
| "step": 14980 |
| }, |
| { |
| "epoch": 31.359832635983263, |
| "grad_norm": 0.20974136888980865, |
| "learning_rate": 1.4702635238728058e-10, |
| "loss": 0.0401, |
| "step": 14990 |
| }, |
| { |
| "epoch": 31.380753138075313, |
| "grad_norm": 0.31738701462745667, |
| "learning_rate": 1.2150944139754927e-12, |
| "loss": 0.0377, |
| "step": 15000 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 15000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 32, |
| "save_steps": 5000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|