| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.25, |
| "eval_steps": 500, |
| "global_step": 20000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "grad_norm": 1.4432214498519897, |
| "learning_rate": 2.25e-07, |
| "loss": 1.2176, |
| "step": 10 |
| }, |
| { |
| "grad_norm": 1.5728261470794678, |
| "learning_rate": 4.75e-07, |
| "loss": 1.2201, |
| "step": 20 |
| }, |
| { |
| "grad_norm": 1.7649030685424805, |
| "learning_rate": 7.25e-07, |
| "loss": 1.2206, |
| "step": 30 |
| }, |
| { |
| "grad_norm": 1.3215194940567017, |
| "learning_rate": 9.75e-07, |
| "loss": 1.2173, |
| "step": 40 |
| }, |
| { |
| "grad_norm": 1.1600717306137085, |
| "learning_rate": 1.2250000000000001e-06, |
| "loss": 1.2025, |
| "step": 50 |
| }, |
| { |
| "grad_norm": 0.8358485698699951, |
| "learning_rate": 1.475e-06, |
| "loss": 1.1694, |
| "step": 60 |
| }, |
| { |
| "grad_norm": 0.7083495855331421, |
| "learning_rate": 1.7250000000000002e-06, |
| "loss": 1.1466, |
| "step": 70 |
| }, |
| { |
| "grad_norm": 0.4341243505477905, |
| "learning_rate": 1.975e-06, |
| "loss": 1.1362, |
| "step": 80 |
| }, |
| { |
| "grad_norm": 0.5044889450073242, |
| "learning_rate": 2.225e-06, |
| "loss": 1.1188, |
| "step": 90 |
| }, |
| { |
| "grad_norm": 0.47925513982772827, |
| "learning_rate": 2.4750000000000004e-06, |
| "loss": 1.1153, |
| "step": 100 |
| }, |
| { |
| "grad_norm": 0.38645315170288086, |
| "learning_rate": 2.725e-06, |
| "loss": 1.1092, |
| "step": 110 |
| }, |
| { |
| "grad_norm": 0.3141016662120819, |
| "learning_rate": 2.975e-06, |
| "loss": 1.1073, |
| "step": 120 |
| }, |
| { |
| "grad_norm": 0.24226568639278412, |
| "learning_rate": 3.225e-06, |
| "loss": 1.1137, |
| "step": 130 |
| }, |
| { |
| "grad_norm": 0.28608769178390503, |
| "learning_rate": 3.4750000000000006e-06, |
| "loss": 1.1113, |
| "step": 140 |
| }, |
| { |
| "grad_norm": 0.23423372209072113, |
| "learning_rate": 3.725e-06, |
| "loss": 1.109, |
| "step": 150 |
| }, |
| { |
| "grad_norm": 0.3078203499317169, |
| "learning_rate": 3.975e-06, |
| "loss": 1.0943, |
| "step": 160 |
| }, |
| { |
| "grad_norm": 0.28202998638153076, |
| "learning_rate": 4.225e-06, |
| "loss": 1.086, |
| "step": 170 |
| }, |
| { |
| "grad_norm": 0.2905002534389496, |
| "learning_rate": 4.475e-06, |
| "loss": 1.0802, |
| "step": 180 |
| }, |
| { |
| "grad_norm": 0.4020580053329468, |
| "learning_rate": 4.7250000000000005e-06, |
| "loss": 1.0717, |
| "step": 190 |
| }, |
| { |
| "grad_norm": 0.32478901743888855, |
| "learning_rate": 4.975000000000001e-06, |
| "loss": 1.057, |
| "step": 200 |
| }, |
| { |
| "grad_norm": 0.3583512306213379, |
| "learning_rate": 5.225e-06, |
| "loss": 1.0553, |
| "step": 210 |
| }, |
| { |
| "grad_norm": 0.45024362206459045, |
| "learning_rate": 5.475e-06, |
| "loss": 1.0609, |
| "step": 220 |
| }, |
| { |
| "grad_norm": 0.37869516015052795, |
| "learning_rate": 5.725e-06, |
| "loss": 1.0491, |
| "step": 230 |
| }, |
| { |
| "grad_norm": 0.41009441018104553, |
| "learning_rate": 5.975e-06, |
| "loss": 1.0478, |
| "step": 240 |
| }, |
| { |
| "grad_norm": 0.36011573672294617, |
| "learning_rate": 6.2250000000000005e-06, |
| "loss": 1.0444, |
| "step": 250 |
| }, |
| { |
| "grad_norm": 0.4219399094581604, |
| "learning_rate": 6.475000000000001e-06, |
| "loss": 1.0465, |
| "step": 260 |
| }, |
| { |
| "grad_norm": 0.38675457239151, |
| "learning_rate": 6.725000000000001e-06, |
| "loss": 1.0392, |
| "step": 270 |
| }, |
| { |
| "grad_norm": 0.548558235168457, |
| "learning_rate": 6.975000000000001e-06, |
| "loss": 1.0355, |
| "step": 280 |
| }, |
| { |
| "grad_norm": 0.6899825930595398, |
| "learning_rate": 7.2249999999999994e-06, |
| "loss": 1.0218, |
| "step": 290 |
| }, |
| { |
| "grad_norm": 1.5976816415786743, |
| "learning_rate": 7.4750000000000004e-06, |
| "loss": 1.0082, |
| "step": 300 |
| }, |
| { |
| "grad_norm": 1.2727433443069458, |
| "learning_rate": 7.725e-06, |
| "loss": 0.9888, |
| "step": 310 |
| }, |
| { |
| "grad_norm": 1.0883558988571167, |
| "learning_rate": 7.975e-06, |
| "loss": 0.9611, |
| "step": 320 |
| }, |
| { |
| "grad_norm": 1.0092294216156006, |
| "learning_rate": 8.225e-06, |
| "loss": 0.9325, |
| "step": 330 |
| }, |
| { |
| "grad_norm": 1.1305841207504272, |
| "learning_rate": 8.475000000000001e-06, |
| "loss": 0.9152, |
| "step": 340 |
| }, |
| { |
| "grad_norm": 0.8867524862289429, |
| "learning_rate": 8.725e-06, |
| "loss": 0.908, |
| "step": 350 |
| }, |
| { |
| "grad_norm": 1.259811520576477, |
| "learning_rate": 8.975e-06, |
| "loss": 0.8933, |
| "step": 360 |
| }, |
| { |
| "grad_norm": 1.06889009475708, |
| "learning_rate": 9.225e-06, |
| "loss": 0.8738, |
| "step": 370 |
| }, |
| { |
| "grad_norm": 1.2798677682876587, |
| "learning_rate": 9.475e-06, |
| "loss": 0.8585, |
| "step": 380 |
| }, |
| { |
| "grad_norm": 1.078529953956604, |
| "learning_rate": 9.725000000000001e-06, |
| "loss": 0.8468, |
| "step": 390 |
| }, |
| { |
| "grad_norm": 1.662562608718872, |
| "learning_rate": 9.975e-06, |
| "loss": 0.808, |
| "step": 400 |
| }, |
| { |
| "grad_norm": 1.4320045709609985, |
| "learning_rate": 1.0225e-05, |
| "loss": 0.7797, |
| "step": 410 |
| }, |
| { |
| "grad_norm": 1.7881171703338623, |
| "learning_rate": 1.0475e-05, |
| "loss": 0.7256, |
| "step": 420 |
| }, |
| { |
| "grad_norm": 1.8525526523590088, |
| "learning_rate": 1.0725e-05, |
| "loss": 0.7013, |
| "step": 430 |
| }, |
| { |
| "grad_norm": 2.307291269302368, |
| "learning_rate": 1.0975e-05, |
| "loss": 0.6795, |
| "step": 440 |
| }, |
| { |
| "grad_norm": 1.3419337272644043, |
| "learning_rate": 1.1225e-05, |
| "loss": 0.6455, |
| "step": 450 |
| }, |
| { |
| "grad_norm": 1.9353570938110352, |
| "learning_rate": 1.1475000000000001e-05, |
| "loss": 0.6027, |
| "step": 460 |
| }, |
| { |
| "grad_norm": 1.9074933528900146, |
| "learning_rate": 1.1725e-05, |
| "loss": 0.5679, |
| "step": 470 |
| }, |
| { |
| "grad_norm": 2.188081741333008, |
| "learning_rate": 1.1975e-05, |
| "loss": 0.5357, |
| "step": 480 |
| }, |
| { |
| "grad_norm": 2.6344354152679443, |
| "learning_rate": 1.2225e-05, |
| "loss": 0.4859, |
| "step": 490 |
| }, |
| { |
| "grad_norm": 2.0352654457092285, |
| "learning_rate": 1.2475e-05, |
| "loss": 0.4646, |
| "step": 500 |
| }, |
| { |
| "grad_norm": 2.3691606521606445, |
| "learning_rate": 1.2725000000000001e-05, |
| "loss": 0.4303, |
| "step": 510 |
| }, |
| { |
| "grad_norm": 1.9416195154190063, |
| "learning_rate": 1.2975e-05, |
| "loss": 0.4039, |
| "step": 520 |
| }, |
| { |
| "grad_norm": 2.360607862472534, |
| "learning_rate": 1.3225000000000001e-05, |
| "loss": 0.3815, |
| "step": 530 |
| }, |
| { |
| "grad_norm": 2.2228074073791504, |
| "learning_rate": 1.3475000000000002e-05, |
| "loss": 0.3611, |
| "step": 540 |
| }, |
| { |
| "grad_norm": 2.24345326423645, |
| "learning_rate": 1.3725000000000002e-05, |
| "loss": 0.326, |
| "step": 550 |
| }, |
| { |
| "grad_norm": 1.9834187030792236, |
| "learning_rate": 1.3975000000000003e-05, |
| "loss": 0.3165, |
| "step": 560 |
| }, |
| { |
| "grad_norm": 2.3031227588653564, |
| "learning_rate": 1.4225e-05, |
| "loss": 0.2947, |
| "step": 570 |
| }, |
| { |
| "grad_norm": 2.648381471633911, |
| "learning_rate": 1.4475e-05, |
| "loss": 0.2736, |
| "step": 580 |
| }, |
| { |
| "grad_norm": 2.803637981414795, |
| "learning_rate": 1.4725e-05, |
| "loss": 0.2526, |
| "step": 590 |
| }, |
| { |
| "grad_norm": 2.120967388153076, |
| "learning_rate": 1.4975e-05, |
| "loss": 0.2509, |
| "step": 600 |
| }, |
| { |
| "grad_norm": 2.4047417640686035, |
| "learning_rate": 1.5225e-05, |
| "loss": 0.2239, |
| "step": 610 |
| }, |
| { |
| "grad_norm": 1.69230055809021, |
| "learning_rate": 1.5475e-05, |
| "loss": 0.2176, |
| "step": 620 |
| }, |
| { |
| "grad_norm": 2.378695011138916, |
| "learning_rate": 1.5725e-05, |
| "loss": 0.2005, |
| "step": 630 |
| }, |
| { |
| "grad_norm": 1.6571719646453857, |
| "learning_rate": 1.5975000000000002e-05, |
| "loss": 0.1789, |
| "step": 640 |
| }, |
| { |
| "grad_norm": 2.107168197631836, |
| "learning_rate": 1.6225e-05, |
| "loss": 0.1756, |
| "step": 650 |
| }, |
| { |
| "grad_norm": 1.9598829746246338, |
| "learning_rate": 1.6475e-05, |
| "loss": 0.1759, |
| "step": 660 |
| }, |
| { |
| "grad_norm": 1.8880751132965088, |
| "learning_rate": 1.6725000000000003e-05, |
| "loss": 0.1627, |
| "step": 670 |
| }, |
| { |
| "grad_norm": 2.0237114429473877, |
| "learning_rate": 1.6975000000000003e-05, |
| "loss": 0.1573, |
| "step": 680 |
| }, |
| { |
| "grad_norm": 2.319857597351074, |
| "learning_rate": 1.7225e-05, |
| "loss": 0.1573, |
| "step": 690 |
| }, |
| { |
| "grad_norm": 2.4036319255828857, |
| "learning_rate": 1.7475e-05, |
| "loss": 0.1535, |
| "step": 700 |
| }, |
| { |
| "grad_norm": 2.049755334854126, |
| "learning_rate": 1.7725e-05, |
| "loss": 0.1671, |
| "step": 710 |
| }, |
| { |
| "grad_norm": 2.00295090675354, |
| "learning_rate": 1.7975e-05, |
| "loss": 0.1514, |
| "step": 720 |
| }, |
| { |
| "grad_norm": 2.2543041706085205, |
| "learning_rate": 1.8225e-05, |
| "loss": 0.1315, |
| "step": 730 |
| }, |
| { |
| "grad_norm": 2.125260591506958, |
| "learning_rate": 1.8475000000000002e-05, |
| "loss": 0.1482, |
| "step": 740 |
| }, |
| { |
| "grad_norm": 2.1975486278533936, |
| "learning_rate": 1.8725e-05, |
| "loss": 0.1441, |
| "step": 750 |
| }, |
| { |
| "grad_norm": 2.1511855125427246, |
| "learning_rate": 1.8975e-05, |
| "loss": 0.1447, |
| "step": 760 |
| }, |
| { |
| "grad_norm": 1.8105329275131226, |
| "learning_rate": 1.9225e-05, |
| "loss": 0.1388, |
| "step": 770 |
| }, |
| { |
| "grad_norm": 2.322596549987793, |
| "learning_rate": 1.9475000000000002e-05, |
| "loss": 0.1394, |
| "step": 780 |
| }, |
| { |
| "grad_norm": 1.4760518074035645, |
| "learning_rate": 1.9725000000000002e-05, |
| "loss": 0.1326, |
| "step": 790 |
| }, |
| { |
| "grad_norm": 1.71709144115448, |
| "learning_rate": 1.9975e-05, |
| "loss": 0.1343, |
| "step": 800 |
| }, |
| { |
| "grad_norm": 1.6595476865768433, |
| "learning_rate": 2.0225000000000004e-05, |
| "loss": 0.1289, |
| "step": 810 |
| }, |
| { |
| "grad_norm": 1.7283124923706055, |
| "learning_rate": 2.0475e-05, |
| "loss": 0.1229, |
| "step": 820 |
| }, |
| { |
| "grad_norm": 1.847590446472168, |
| "learning_rate": 2.0725e-05, |
| "loss": 0.1253, |
| "step": 830 |
| }, |
| { |
| "grad_norm": 1.5455881357192993, |
| "learning_rate": 2.0975e-05, |
| "loss": 0.1213, |
| "step": 840 |
| }, |
| { |
| "grad_norm": 1.900499701499939, |
| "learning_rate": 2.1225e-05, |
| "loss": 0.1255, |
| "step": 850 |
| }, |
| { |
| "grad_norm": 2.0430359840393066, |
| "learning_rate": 2.1475e-05, |
| "loss": 0.1285, |
| "step": 860 |
| }, |
| { |
| "grad_norm": 1.9488919973373413, |
| "learning_rate": 2.1725e-05, |
| "loss": 0.1202, |
| "step": 870 |
| }, |
| { |
| "grad_norm": 2.0237948894500732, |
| "learning_rate": 2.1975000000000002e-05, |
| "loss": 0.1242, |
| "step": 880 |
| }, |
| { |
| "grad_norm": 1.9511979818344116, |
| "learning_rate": 2.2225e-05, |
| "loss": 0.1149, |
| "step": 890 |
| }, |
| { |
| "grad_norm": 2.1467931270599365, |
| "learning_rate": 2.2475e-05, |
| "loss": 0.1124, |
| "step": 900 |
| }, |
| { |
| "grad_norm": 1.8478883504867554, |
| "learning_rate": 2.2725000000000003e-05, |
| "loss": 0.1128, |
| "step": 910 |
| }, |
| { |
| "grad_norm": 1.758474588394165, |
| "learning_rate": 2.2975000000000003e-05, |
| "loss": 0.1079, |
| "step": 920 |
| }, |
| { |
| "grad_norm": 1.6769682168960571, |
| "learning_rate": 2.3225000000000002e-05, |
| "loss": 0.1068, |
| "step": 930 |
| }, |
| { |
| "grad_norm": 1.5823380947113037, |
| "learning_rate": 2.3475e-05, |
| "loss": 0.1005, |
| "step": 940 |
| }, |
| { |
| "grad_norm": 1.388976812362671, |
| "learning_rate": 2.3725e-05, |
| "loss": 0.1069, |
| "step": 950 |
| }, |
| { |
| "grad_norm": 1.6260298490524292, |
| "learning_rate": 2.3975e-05, |
| "loss": 0.1084, |
| "step": 960 |
| }, |
| { |
| "grad_norm": 1.9050042629241943, |
| "learning_rate": 2.4225e-05, |
| "loss": 0.1075, |
| "step": 970 |
| }, |
| { |
| "grad_norm": 2.01832914352417, |
| "learning_rate": 2.4475000000000002e-05, |
| "loss": 0.1097, |
| "step": 980 |
| }, |
| { |
| "grad_norm": 1.7089976072311401, |
| "learning_rate": 2.4725e-05, |
| "loss": 0.106, |
| "step": 990 |
| }, |
| { |
| "grad_norm": 1.495911717414856, |
| "learning_rate": 2.4975e-05, |
| "loss": 0.1145, |
| "step": 1000 |
| }, |
| { |
| "grad_norm": 1.6462352275848389, |
| "learning_rate": 2.5225e-05, |
| "loss": 0.103, |
| "step": 1010 |
| }, |
| { |
| "grad_norm": 1.2390409708023071, |
| "learning_rate": 2.5475e-05, |
| "loss": 0.1083, |
| "step": 1020 |
| }, |
| { |
| "grad_norm": 1.2533819675445557, |
| "learning_rate": 2.5725e-05, |
| "loss": 0.1114, |
| "step": 1030 |
| }, |
| { |
| "grad_norm": 1.3296815156936646, |
| "learning_rate": 2.5974999999999998e-05, |
| "loss": 0.1002, |
| "step": 1040 |
| }, |
| { |
| "grad_norm": 1.5312912464141846, |
| "learning_rate": 2.6225e-05, |
| "loss": 0.0978, |
| "step": 1050 |
| }, |
| { |
| "grad_norm": 1.5894063711166382, |
| "learning_rate": 2.6475e-05, |
| "loss": 0.0967, |
| "step": 1060 |
| }, |
| { |
| "grad_norm": 1.6358612775802612, |
| "learning_rate": 2.6725e-05, |
| "loss": 0.0999, |
| "step": 1070 |
| }, |
| { |
| "grad_norm": 1.8921128511428833, |
| "learning_rate": 2.6975000000000002e-05, |
| "loss": 0.0922, |
| "step": 1080 |
| }, |
| { |
| "grad_norm": 1.783052682876587, |
| "learning_rate": 2.7225e-05, |
| "loss": 0.0923, |
| "step": 1090 |
| }, |
| { |
| "grad_norm": 1.5674539804458618, |
| "learning_rate": 2.7475e-05, |
| "loss": 0.1028, |
| "step": 1100 |
| }, |
| { |
| "grad_norm": 1.3623286485671997, |
| "learning_rate": 2.7725e-05, |
| "loss": 0.0875, |
| "step": 1110 |
| }, |
| { |
| "grad_norm": 1.5205024480819702, |
| "learning_rate": 2.7975000000000002e-05, |
| "loss": 0.0987, |
| "step": 1120 |
| }, |
| { |
| "grad_norm": 1.4222640991210938, |
| "learning_rate": 2.8225e-05, |
| "loss": 0.0977, |
| "step": 1130 |
| }, |
| { |
| "grad_norm": 1.4713680744171143, |
| "learning_rate": 2.8475e-05, |
| "loss": 0.0894, |
| "step": 1140 |
| }, |
| { |
| "grad_norm": 1.4934260845184326, |
| "learning_rate": 2.8725e-05, |
| "loss": 0.1008, |
| "step": 1150 |
| }, |
| { |
| "grad_norm": 1.3262085914611816, |
| "learning_rate": 2.8975000000000003e-05, |
| "loss": 0.0888, |
| "step": 1160 |
| }, |
| { |
| "grad_norm": 1.3066847324371338, |
| "learning_rate": 2.9225000000000002e-05, |
| "loss": 0.0931, |
| "step": 1170 |
| }, |
| { |
| "grad_norm": 1.570601224899292, |
| "learning_rate": 2.9475e-05, |
| "loss": 0.0938, |
| "step": 1180 |
| }, |
| { |
| "grad_norm": 1.1952320337295532, |
| "learning_rate": 2.9725000000000004e-05, |
| "loss": 0.0886, |
| "step": 1190 |
| }, |
| { |
| "grad_norm": 1.5063790082931519, |
| "learning_rate": 2.9975000000000004e-05, |
| "loss": 0.0947, |
| "step": 1200 |
| }, |
| { |
| "grad_norm": 1.5905829668045044, |
| "learning_rate": 3.0225000000000003e-05, |
| "loss": 0.0893, |
| "step": 1210 |
| }, |
| { |
| "grad_norm": 1.1922318935394287, |
| "learning_rate": 3.0475000000000002e-05, |
| "loss": 0.0784, |
| "step": 1220 |
| }, |
| { |
| "grad_norm": 0.9978857636451721, |
| "learning_rate": 3.0725e-05, |
| "loss": 0.0813, |
| "step": 1230 |
| }, |
| { |
| "grad_norm": 1.6586356163024902, |
| "learning_rate": 3.0975e-05, |
| "loss": 0.0908, |
| "step": 1240 |
| }, |
| { |
| "grad_norm": 1.605808973312378, |
| "learning_rate": 3.122500000000001e-05, |
| "loss": 0.0899, |
| "step": 1250 |
| }, |
| { |
| "grad_norm": 1.4405441284179688, |
| "learning_rate": 3.1475e-05, |
| "loss": 0.0785, |
| "step": 1260 |
| }, |
| { |
| "grad_norm": 1.1686655282974243, |
| "learning_rate": 3.1725e-05, |
| "loss": 0.0931, |
| "step": 1270 |
| }, |
| { |
| "grad_norm": 1.3049031496047974, |
| "learning_rate": 3.1975e-05, |
| "loss": 0.0878, |
| "step": 1280 |
| }, |
| { |
| "grad_norm": 1.106573462486267, |
| "learning_rate": 3.2225e-05, |
| "loss": 0.0873, |
| "step": 1290 |
| }, |
| { |
| "grad_norm": 1.315805435180664, |
| "learning_rate": 3.2474999999999997e-05, |
| "loss": 0.084, |
| "step": 1300 |
| }, |
| { |
| "grad_norm": 1.4285238981246948, |
| "learning_rate": 3.2725e-05, |
| "loss": 0.0799, |
| "step": 1310 |
| }, |
| { |
| "grad_norm": 1.3433620929718018, |
| "learning_rate": 3.2975e-05, |
| "loss": 0.0929, |
| "step": 1320 |
| }, |
| { |
| "grad_norm": 1.210469126701355, |
| "learning_rate": 3.3225e-05, |
| "loss": 0.0787, |
| "step": 1330 |
| }, |
| { |
| "grad_norm": 1.4694474935531616, |
| "learning_rate": 3.3475e-05, |
| "loss": 0.0779, |
| "step": 1340 |
| }, |
| { |
| "grad_norm": 1.232349157333374, |
| "learning_rate": 3.3725e-05, |
| "loss": 0.0889, |
| "step": 1350 |
| }, |
| { |
| "grad_norm": 1.6629468202590942, |
| "learning_rate": 3.3975e-05, |
| "loss": 0.0903, |
| "step": 1360 |
| }, |
| { |
| "grad_norm": 1.3241008520126343, |
| "learning_rate": 3.4225e-05, |
| "loss": 0.0797, |
| "step": 1370 |
| }, |
| { |
| "grad_norm": 1.1734715700149536, |
| "learning_rate": 3.4475000000000005e-05, |
| "loss": 0.0774, |
| "step": 1380 |
| }, |
| { |
| "grad_norm": 1.503920555114746, |
| "learning_rate": 3.4725000000000004e-05, |
| "loss": 0.0748, |
| "step": 1390 |
| }, |
| { |
| "grad_norm": 0.890848696231842, |
| "learning_rate": 3.4975e-05, |
| "loss": 0.075, |
| "step": 1400 |
| }, |
| { |
| "grad_norm": 1.2756930589675903, |
| "learning_rate": 3.5225e-05, |
| "loss": 0.0761, |
| "step": 1410 |
| }, |
| { |
| "grad_norm": 1.3272982835769653, |
| "learning_rate": 3.5475e-05, |
| "loss": 0.0757, |
| "step": 1420 |
| }, |
| { |
| "grad_norm": 1.2470251321792603, |
| "learning_rate": 3.5725e-05, |
| "loss": 0.0898, |
| "step": 1430 |
| }, |
| { |
| "grad_norm": 1.2361502647399902, |
| "learning_rate": 3.5975e-05, |
| "loss": 0.0766, |
| "step": 1440 |
| }, |
| { |
| "grad_norm": 1.2339922189712524, |
| "learning_rate": 3.6225000000000006e-05, |
| "loss": 0.0737, |
| "step": 1450 |
| }, |
| { |
| "grad_norm": 1.0657695531845093, |
| "learning_rate": 3.6475000000000006e-05, |
| "loss": 0.0836, |
| "step": 1460 |
| }, |
| { |
| "grad_norm": 1.262316346168518, |
| "learning_rate": 3.6725000000000005e-05, |
| "loss": 0.0724, |
| "step": 1470 |
| }, |
| { |
| "grad_norm": 1.256439447402954, |
| "learning_rate": 3.6975000000000004e-05, |
| "loss": 0.071, |
| "step": 1480 |
| }, |
| { |
| "grad_norm": 1.0629289150238037, |
| "learning_rate": 3.7225000000000004e-05, |
| "loss": 0.0801, |
| "step": 1490 |
| }, |
| { |
| "grad_norm": 0.9662951827049255, |
| "learning_rate": 3.7475e-05, |
| "loss": 0.0748, |
| "step": 1500 |
| }, |
| { |
| "grad_norm": 1.4304341077804565, |
| "learning_rate": 3.7725e-05, |
| "loss": 0.0784, |
| "step": 1510 |
| }, |
| { |
| "grad_norm": 1.2773442268371582, |
| "learning_rate": 3.7975e-05, |
| "loss": 0.0756, |
| "step": 1520 |
| }, |
| { |
| "grad_norm": 1.1348503828048706, |
| "learning_rate": 3.8225e-05, |
| "loss": 0.0736, |
| "step": 1530 |
| }, |
| { |
| "grad_norm": 1.0709927082061768, |
| "learning_rate": 3.8475e-05, |
| "loss": 0.0754, |
| "step": 1540 |
| }, |
| { |
| "grad_norm": 1.0270332098007202, |
| "learning_rate": 3.8725e-05, |
| "loss": 0.0751, |
| "step": 1550 |
| }, |
| { |
| "grad_norm": 1.0089706182479858, |
| "learning_rate": 3.8975e-05, |
| "loss": 0.0731, |
| "step": 1560 |
| }, |
| { |
| "grad_norm": 1.1657140254974365, |
| "learning_rate": 3.9225e-05, |
| "loss": 0.0794, |
| "step": 1570 |
| }, |
| { |
| "grad_norm": 1.2647517919540405, |
| "learning_rate": 3.9475000000000004e-05, |
| "loss": 0.0692, |
| "step": 1580 |
| }, |
| { |
| "grad_norm": 0.9981470704078674, |
| "learning_rate": 3.9725e-05, |
| "loss": 0.0725, |
| "step": 1590 |
| }, |
| { |
| "grad_norm": 0.9286271929740906, |
| "learning_rate": 3.9975e-05, |
| "loss": 0.0753, |
| "step": 1600 |
| }, |
| { |
| "grad_norm": 1.0005375146865845, |
| "learning_rate": 4.0225e-05, |
| "loss": 0.0707, |
| "step": 1610 |
| }, |
| { |
| "grad_norm": 1.1060173511505127, |
| "learning_rate": 4.0475e-05, |
| "loss": 0.0685, |
| "step": 1620 |
| }, |
| { |
| "grad_norm": 1.0261473655700684, |
| "learning_rate": 4.0725e-05, |
| "loss": 0.072, |
| "step": 1630 |
| }, |
| { |
| "grad_norm": 1.0329949855804443, |
| "learning_rate": 4.0975e-05, |
| "loss": 0.0743, |
| "step": 1640 |
| }, |
| { |
| "grad_norm": 0.9242092370986938, |
| "learning_rate": 4.1225e-05, |
| "loss": 0.0664, |
| "step": 1650 |
| }, |
| { |
| "grad_norm": 0.963623583316803, |
| "learning_rate": 4.1475000000000005e-05, |
| "loss": 0.0653, |
| "step": 1660 |
| }, |
| { |
| "grad_norm": 1.1713886260986328, |
| "learning_rate": 4.1725000000000005e-05, |
| "loss": 0.0734, |
| "step": 1670 |
| }, |
| { |
| "grad_norm": 1.1296294927597046, |
| "learning_rate": 4.1975000000000004e-05, |
| "loss": 0.0698, |
| "step": 1680 |
| }, |
| { |
| "grad_norm": 1.015258550643921, |
| "learning_rate": 4.2225e-05, |
| "loss": 0.065, |
| "step": 1690 |
| }, |
| { |
| "grad_norm": 0.919792115688324, |
| "learning_rate": 4.2475e-05, |
| "loss": 0.0759, |
| "step": 1700 |
| }, |
| { |
| "grad_norm": 0.8036102652549744, |
| "learning_rate": 4.2725e-05, |
| "loss": 0.0666, |
| "step": 1710 |
| }, |
| { |
| "grad_norm": 1.2176201343536377, |
| "learning_rate": 4.2975e-05, |
| "loss": 0.0713, |
| "step": 1720 |
| }, |
| { |
| "grad_norm": 0.8653284907341003, |
| "learning_rate": 4.322500000000001e-05, |
| "loss": 0.0675, |
| "step": 1730 |
| }, |
| { |
| "grad_norm": 1.0311731100082397, |
| "learning_rate": 4.3475000000000006e-05, |
| "loss": 0.0651, |
| "step": 1740 |
| }, |
| { |
| "grad_norm": 0.9236791729927063, |
| "learning_rate": 4.3725000000000006e-05, |
| "loss": 0.0619, |
| "step": 1750 |
| }, |
| { |
| "grad_norm": 0.8307299613952637, |
| "learning_rate": 4.3975e-05, |
| "loss": 0.0627, |
| "step": 1760 |
| }, |
| { |
| "grad_norm": 0.8774833083152771, |
| "learning_rate": 4.4225e-05, |
| "loss": 0.0761, |
| "step": 1770 |
| }, |
| { |
| "grad_norm": 0.7616822123527527, |
| "learning_rate": 4.4475e-05, |
| "loss": 0.0668, |
| "step": 1780 |
| }, |
| { |
| "grad_norm": 0.9956639409065247, |
| "learning_rate": 4.4725e-05, |
| "loss": 0.0644, |
| "step": 1790 |
| }, |
| { |
| "grad_norm": 0.8970800042152405, |
| "learning_rate": 4.4975e-05, |
| "loss": 0.0704, |
| "step": 1800 |
| }, |
| { |
| "grad_norm": 0.9267357587814331, |
| "learning_rate": 4.5225e-05, |
| "loss": 0.0656, |
| "step": 1810 |
| }, |
| { |
| "grad_norm": 1.125333309173584, |
| "learning_rate": 4.5475e-05, |
| "loss": 0.0678, |
| "step": 1820 |
| }, |
| { |
| "grad_norm": 0.8214185237884521, |
| "learning_rate": 4.5725e-05, |
| "loss": 0.0642, |
| "step": 1830 |
| }, |
| { |
| "grad_norm": 0.8470892906188965, |
| "learning_rate": 4.5975e-05, |
| "loss": 0.0596, |
| "step": 1840 |
| }, |
| { |
| "grad_norm": 0.9773886203765869, |
| "learning_rate": 4.6225e-05, |
| "loss": 0.0619, |
| "step": 1850 |
| }, |
| { |
| "grad_norm": 0.9647141695022583, |
| "learning_rate": 4.6475000000000005e-05, |
| "loss": 0.0638, |
| "step": 1860 |
| }, |
| { |
| "grad_norm": 0.7629884481430054, |
| "learning_rate": 4.6725000000000004e-05, |
| "loss": 0.0637, |
| "step": 1870 |
| }, |
| { |
| "grad_norm": 1.3230962753295898, |
| "learning_rate": 4.6975000000000003e-05, |
| "loss": 0.0603, |
| "step": 1880 |
| }, |
| { |
| "grad_norm": 0.6659095287322998, |
| "learning_rate": 4.7225e-05, |
| "loss": 0.0587, |
| "step": 1890 |
| }, |
| { |
| "grad_norm": 0.8982052803039551, |
| "learning_rate": 4.7475e-05, |
| "loss": 0.0638, |
| "step": 1900 |
| }, |
| { |
| "grad_norm": 1.0291091203689575, |
| "learning_rate": 4.7725e-05, |
| "loss": 0.0642, |
| "step": 1910 |
| }, |
| { |
| "grad_norm": 1.109850287437439, |
| "learning_rate": 4.7975e-05, |
| "loss": 0.0581, |
| "step": 1920 |
| }, |
| { |
| "grad_norm": 1.130811333656311, |
| "learning_rate": 4.822500000000001e-05, |
| "loss": 0.0597, |
| "step": 1930 |
| }, |
| { |
| "grad_norm": 0.752946138381958, |
| "learning_rate": 4.8475000000000006e-05, |
| "loss": 0.0733, |
| "step": 1940 |
| }, |
| { |
| "grad_norm": 0.8653437495231628, |
| "learning_rate": 4.8725000000000005e-05, |
| "loss": 0.0598, |
| "step": 1950 |
| }, |
| { |
| "grad_norm": 0.6604033708572388, |
| "learning_rate": 4.8975000000000005e-05, |
| "loss": 0.0582, |
| "step": 1960 |
| }, |
| { |
| "grad_norm": 0.7046467661857605, |
| "learning_rate": 4.9225000000000004e-05, |
| "loss": 0.0638, |
| "step": 1970 |
| }, |
| { |
| "grad_norm": 1.0231807231903076, |
| "learning_rate": 4.9475e-05, |
| "loss": 0.0688, |
| "step": 1980 |
| }, |
| { |
| "grad_norm": 0.8604083061218262, |
| "learning_rate": 4.9725e-05, |
| "loss": 0.0593, |
| "step": 1990 |
| }, |
| { |
| "grad_norm": 0.9460083246231079, |
| "learning_rate": 4.9975e-05, |
| "loss": 0.0627, |
| "step": 2000 |
| }, |
| { |
| "grad_norm": 0.9021515846252441, |
| "learning_rate": 5.0225e-05, |
| "loss": 0.0631, |
| "step": 2010 |
| }, |
| { |
| "grad_norm": 0.7849692106246948, |
| "learning_rate": 5.047500000000001e-05, |
| "loss": 0.0643, |
| "step": 2020 |
| }, |
| { |
| "grad_norm": 0.7976584434509277, |
| "learning_rate": 5.0725e-05, |
| "loss": 0.0562, |
| "step": 2030 |
| }, |
| { |
| "grad_norm": 0.9405306577682495, |
| "learning_rate": 5.0975000000000006e-05, |
| "loss": 0.0638, |
| "step": 2040 |
| }, |
| { |
| "grad_norm": 0.7782784700393677, |
| "learning_rate": 5.1225e-05, |
| "loss": 0.0645, |
| "step": 2050 |
| }, |
| { |
| "grad_norm": 0.9583297371864319, |
| "learning_rate": 5.1475000000000004e-05, |
| "loss": 0.0614, |
| "step": 2060 |
| }, |
| { |
| "grad_norm": 0.8113962411880493, |
| "learning_rate": 5.1725000000000004e-05, |
| "loss": 0.0611, |
| "step": 2070 |
| }, |
| { |
| "grad_norm": 1.0610644817352295, |
| "learning_rate": 5.197500000000001e-05, |
| "loss": 0.0622, |
| "step": 2080 |
| }, |
| { |
| "grad_norm": 0.7754601836204529, |
| "learning_rate": 5.2225e-05, |
| "loss": 0.0608, |
| "step": 2090 |
| }, |
| { |
| "grad_norm": 0.8049399256706238, |
| "learning_rate": 5.247500000000001e-05, |
| "loss": 0.0573, |
| "step": 2100 |
| }, |
| { |
| "grad_norm": 0.882763683795929, |
| "learning_rate": 5.2725e-05, |
| "loss": 0.0587, |
| "step": 2110 |
| }, |
| { |
| "grad_norm": 0.784490704536438, |
| "learning_rate": 5.297500000000001e-05, |
| "loss": 0.0545, |
| "step": 2120 |
| }, |
| { |
| "grad_norm": 0.8992010951042175, |
| "learning_rate": 5.3225e-05, |
| "loss": 0.0611, |
| "step": 2130 |
| }, |
| { |
| "grad_norm": 0.735640823841095, |
| "learning_rate": 5.3475e-05, |
| "loss": 0.0606, |
| "step": 2140 |
| }, |
| { |
| "grad_norm": 0.6198451519012451, |
| "learning_rate": 5.3725000000000005e-05, |
| "loss": 0.0598, |
| "step": 2150 |
| }, |
| { |
| "grad_norm": 0.7771381735801697, |
| "learning_rate": 5.3975e-05, |
| "loss": 0.05, |
| "step": 2160 |
| }, |
| { |
| "grad_norm": 0.738305389881134, |
| "learning_rate": 5.4225000000000003e-05, |
| "loss": 0.0574, |
| "step": 2170 |
| }, |
| { |
| "grad_norm": 0.794254720211029, |
| "learning_rate": 5.4474999999999996e-05, |
| "loss": 0.0596, |
| "step": 2180 |
| }, |
| { |
| "grad_norm": 0.732258677482605, |
| "learning_rate": 5.4725e-05, |
| "loss": 0.0573, |
| "step": 2190 |
| }, |
| { |
| "grad_norm": 0.918854832649231, |
| "learning_rate": 5.4975e-05, |
| "loss": 0.0533, |
| "step": 2200 |
| }, |
| { |
| "grad_norm": 0.7550817131996155, |
| "learning_rate": 5.522500000000001e-05, |
| "loss": 0.0548, |
| "step": 2210 |
| }, |
| { |
| "grad_norm": 0.6007593274116516, |
| "learning_rate": 5.5475e-05, |
| "loss": 0.0551, |
| "step": 2220 |
| }, |
| { |
| "grad_norm": 0.8961713910102844, |
| "learning_rate": 5.5725000000000006e-05, |
| "loss": 0.0547, |
| "step": 2230 |
| }, |
| { |
| "grad_norm": 0.8532932996749878, |
| "learning_rate": 5.5975e-05, |
| "loss": 0.059, |
| "step": 2240 |
| }, |
| { |
| "grad_norm": 0.736585259437561, |
| "learning_rate": 5.6225000000000005e-05, |
| "loss": 0.0621, |
| "step": 2250 |
| }, |
| { |
| "grad_norm": 0.9078760743141174, |
| "learning_rate": 5.6475e-05, |
| "loss": 0.0651, |
| "step": 2260 |
| }, |
| { |
| "grad_norm": 0.7268538475036621, |
| "learning_rate": 5.6725e-05, |
| "loss": 0.0632, |
| "step": 2270 |
| }, |
| { |
| "grad_norm": 0.7124704718589783, |
| "learning_rate": 5.6975e-05, |
| "loss": 0.0542, |
| "step": 2280 |
| }, |
| { |
| "grad_norm": 0.6639304757118225, |
| "learning_rate": 5.722500000000001e-05, |
| "loss": 0.0498, |
| "step": 2290 |
| }, |
| { |
| "grad_norm": 0.47446736693382263, |
| "learning_rate": 5.7475e-05, |
| "loss": 0.0536, |
| "step": 2300 |
| }, |
| { |
| "grad_norm": 0.6784394979476929, |
| "learning_rate": 5.772500000000001e-05, |
| "loss": 0.0565, |
| "step": 2310 |
| }, |
| { |
| "grad_norm": 0.826998770236969, |
| "learning_rate": 5.7975e-05, |
| "loss": 0.0549, |
| "step": 2320 |
| }, |
| { |
| "grad_norm": 0.6909579634666443, |
| "learning_rate": 5.8225000000000006e-05, |
| "loss": 0.0591, |
| "step": 2330 |
| }, |
| { |
| "grad_norm": 0.7020758986473083, |
| "learning_rate": 5.8475000000000005e-05, |
| "loss": 0.0558, |
| "step": 2340 |
| }, |
| { |
| "grad_norm": 0.9214292764663696, |
| "learning_rate": 5.8725000000000004e-05, |
| "loss": 0.0532, |
| "step": 2350 |
| }, |
| { |
| "grad_norm": 0.6545830965042114, |
| "learning_rate": 5.8975000000000004e-05, |
| "loss": 0.0611, |
| "step": 2360 |
| }, |
| { |
| "grad_norm": 0.5123686194419861, |
| "learning_rate": 5.922500000000001e-05, |
| "loss": 0.0569, |
| "step": 2370 |
| }, |
| { |
| "grad_norm": 0.9388223886489868, |
| "learning_rate": 5.9475e-05, |
| "loss": 0.0577, |
| "step": 2380 |
| }, |
| { |
| "grad_norm": 0.6229625940322876, |
| "learning_rate": 5.9724999999999995e-05, |
| "loss": 0.056, |
| "step": 2390 |
| }, |
| { |
| "grad_norm": 0.5820695757865906, |
| "learning_rate": 5.9975e-05, |
| "loss": 0.0562, |
| "step": 2400 |
| }, |
| { |
| "grad_norm": 0.7807344794273376, |
| "learning_rate": 6.0225e-05, |
| "loss": 0.0517, |
| "step": 2410 |
| }, |
| { |
| "grad_norm": 0.6235376596450806, |
| "learning_rate": 6.0475000000000006e-05, |
| "loss": 0.053, |
| "step": 2420 |
| }, |
| { |
| "grad_norm": 0.6682143807411194, |
| "learning_rate": 6.0725e-05, |
| "loss": 0.0538, |
| "step": 2430 |
| }, |
| { |
| "grad_norm": 0.625502347946167, |
| "learning_rate": 6.0975000000000005e-05, |
| "loss": 0.0503, |
| "step": 2440 |
| }, |
| { |
| "grad_norm": 0.7932114005088806, |
| "learning_rate": 6.1225e-05, |
| "loss": 0.0534, |
| "step": 2450 |
| }, |
| { |
| "grad_norm": 0.746182918548584, |
| "learning_rate": 6.1475e-05, |
| "loss": 0.0605, |
| "step": 2460 |
| }, |
| { |
| "grad_norm": 0.4769274890422821, |
| "learning_rate": 6.1725e-05, |
| "loss": 0.0489, |
| "step": 2470 |
| }, |
| { |
| "grad_norm": 0.7346721291542053, |
| "learning_rate": 6.1975e-05, |
| "loss": 0.0539, |
| "step": 2480 |
| }, |
| { |
| "grad_norm": 0.7812864780426025, |
| "learning_rate": 6.2225e-05, |
| "loss": 0.0498, |
| "step": 2490 |
| }, |
| { |
| "grad_norm": 0.7502540349960327, |
| "learning_rate": 6.2475e-05, |
| "loss": 0.0554, |
| "step": 2500 |
| }, |
| { |
| "grad_norm": 0.7815192937850952, |
| "learning_rate": 6.2725e-05, |
| "loss": 0.0573, |
| "step": 2510 |
| }, |
| { |
| "grad_norm": 0.647578775882721, |
| "learning_rate": 6.297500000000001e-05, |
| "loss": 0.0522, |
| "step": 2520 |
| }, |
| { |
| "grad_norm": 0.7370708584785461, |
| "learning_rate": 6.3225e-05, |
| "loss": 0.0492, |
| "step": 2530 |
| }, |
| { |
| "grad_norm": 0.5644780993461609, |
| "learning_rate": 6.347500000000001e-05, |
| "loss": 0.0487, |
| "step": 2540 |
| }, |
| { |
| "grad_norm": 0.7326799035072327, |
| "learning_rate": 6.3725e-05, |
| "loss": 0.0561, |
| "step": 2550 |
| }, |
| { |
| "grad_norm": 0.6633973717689514, |
| "learning_rate": 6.397500000000001e-05, |
| "loss": 0.0474, |
| "step": 2560 |
| }, |
| { |
| "grad_norm": 0.7116085290908813, |
| "learning_rate": 6.4225e-05, |
| "loss": 0.0482, |
| "step": 2570 |
| }, |
| { |
| "grad_norm": 0.6394766569137573, |
| "learning_rate": 6.447500000000001e-05, |
| "loss": 0.0578, |
| "step": 2580 |
| }, |
| { |
| "grad_norm": 0.7912008166313171, |
| "learning_rate": 6.4725e-05, |
| "loss": 0.0505, |
| "step": 2590 |
| }, |
| { |
| "grad_norm": 0.744045078754425, |
| "learning_rate": 6.497500000000001e-05, |
| "loss": 0.0521, |
| "step": 2600 |
| }, |
| { |
| "grad_norm": 0.8164146542549133, |
| "learning_rate": 6.5225e-05, |
| "loss": 0.0501, |
| "step": 2610 |
| }, |
| { |
| "grad_norm": 0.5666782259941101, |
| "learning_rate": 6.5475e-05, |
| "loss": 0.0499, |
| "step": 2620 |
| }, |
| { |
| "grad_norm": 0.8259328007698059, |
| "learning_rate": 6.5725e-05, |
| "loss": 0.0541, |
| "step": 2630 |
| }, |
| { |
| "grad_norm": 0.5527114868164062, |
| "learning_rate": 6.5975e-05, |
| "loss": 0.051, |
| "step": 2640 |
| }, |
| { |
| "grad_norm": 0.58927983045578, |
| "learning_rate": 6.6225e-05, |
| "loss": 0.0513, |
| "step": 2650 |
| }, |
| { |
| "grad_norm": 0.4701420068740845, |
| "learning_rate": 6.6475e-05, |
| "loss": 0.0476, |
| "step": 2660 |
| }, |
| { |
| "grad_norm": 0.4634196162223816, |
| "learning_rate": 6.672500000000001e-05, |
| "loss": 0.053, |
| "step": 2670 |
| }, |
| { |
| "grad_norm": 0.7381142973899841, |
| "learning_rate": 6.6975e-05, |
| "loss": 0.0475, |
| "step": 2680 |
| }, |
| { |
| "grad_norm": 0.635261595249176, |
| "learning_rate": 6.722500000000001e-05, |
| "loss": 0.0566, |
| "step": 2690 |
| }, |
| { |
| "grad_norm": 0.5740917921066284, |
| "learning_rate": 6.7475e-05, |
| "loss": 0.0509, |
| "step": 2700 |
| }, |
| { |
| "grad_norm": 0.591770350933075, |
| "learning_rate": 6.7725e-05, |
| "loss": 0.0562, |
| "step": 2710 |
| }, |
| { |
| "grad_norm": 0.771838366985321, |
| "learning_rate": 6.7975e-05, |
| "loss": 0.0502, |
| "step": 2720 |
| }, |
| { |
| "grad_norm": 0.7171633243560791, |
| "learning_rate": 6.8225e-05, |
| "loss": 0.0548, |
| "step": 2730 |
| }, |
| { |
| "grad_norm": 0.6417476534843445, |
| "learning_rate": 6.8475e-05, |
| "loss": 0.0557, |
| "step": 2740 |
| }, |
| { |
| "grad_norm": 0.7948347330093384, |
| "learning_rate": 6.8725e-05, |
| "loss": 0.0518, |
| "step": 2750 |
| }, |
| { |
| "grad_norm": 0.6874246001243591, |
| "learning_rate": 6.8975e-05, |
| "loss": 0.0529, |
| "step": 2760 |
| }, |
| { |
| "grad_norm": 0.5800514817237854, |
| "learning_rate": 6.9225e-05, |
| "loss": 0.0457, |
| "step": 2770 |
| }, |
| { |
| "grad_norm": 0.5260016322135925, |
| "learning_rate": 6.9475e-05, |
| "loss": 0.0521, |
| "step": 2780 |
| }, |
| { |
| "grad_norm": 0.5826354026794434, |
| "learning_rate": 6.9725e-05, |
| "loss": 0.0512, |
| "step": 2790 |
| }, |
| { |
| "grad_norm": 0.570827305316925, |
| "learning_rate": 6.997500000000001e-05, |
| "loss": 0.0488, |
| "step": 2800 |
| }, |
| { |
| "grad_norm": 0.6201433539390564, |
| "learning_rate": 7.022500000000001e-05, |
| "loss": 0.045, |
| "step": 2810 |
| }, |
| { |
| "grad_norm": 0.6378107070922852, |
| "learning_rate": 7.0475e-05, |
| "loss": 0.0432, |
| "step": 2820 |
| }, |
| { |
| "grad_norm": 0.7345712780952454, |
| "learning_rate": 7.072500000000001e-05, |
| "loss": 0.0533, |
| "step": 2830 |
| }, |
| { |
| "grad_norm": 0.5760173797607422, |
| "learning_rate": 7.0975e-05, |
| "loss": 0.0522, |
| "step": 2840 |
| }, |
| { |
| "grad_norm": 0.5737773180007935, |
| "learning_rate": 7.122500000000001e-05, |
| "loss": 0.0467, |
| "step": 2850 |
| }, |
| { |
| "grad_norm": 0.5438277125358582, |
| "learning_rate": 7.1475e-05, |
| "loss": 0.0486, |
| "step": 2860 |
| }, |
| { |
| "grad_norm": 0.6070303916931152, |
| "learning_rate": 7.172500000000001e-05, |
| "loss": 0.05, |
| "step": 2870 |
| }, |
| { |
| "grad_norm": 0.5815456509590149, |
| "learning_rate": 7.1975e-05, |
| "loss": 0.0462, |
| "step": 2880 |
| }, |
| { |
| "grad_norm": 0.6135198473930359, |
| "learning_rate": 7.2225e-05, |
| "loss": 0.0446, |
| "step": 2890 |
| }, |
| { |
| "grad_norm": 0.7103997468948364, |
| "learning_rate": 7.2475e-05, |
| "loss": 0.0457, |
| "step": 2900 |
| }, |
| { |
| "grad_norm": 0.4404158592224121, |
| "learning_rate": 7.272499999999999e-05, |
| "loss": 0.0449, |
| "step": 2910 |
| }, |
| { |
| "grad_norm": 0.5680925250053406, |
| "learning_rate": 7.2975e-05, |
| "loss": 0.0436, |
| "step": 2920 |
| }, |
| { |
| "grad_norm": 0.6294459104537964, |
| "learning_rate": 7.3225e-05, |
| "loss": 0.0523, |
| "step": 2930 |
| }, |
| { |
| "grad_norm": 0.6799387335777283, |
| "learning_rate": 7.347500000000001e-05, |
| "loss": 0.0542, |
| "step": 2940 |
| }, |
| { |
| "grad_norm": 0.5161760449409485, |
| "learning_rate": 7.3725e-05, |
| "loss": 0.0523, |
| "step": 2950 |
| }, |
| { |
| "grad_norm": 0.5557070970535278, |
| "learning_rate": 7.397500000000001e-05, |
| "loss": 0.0558, |
| "step": 2960 |
| }, |
| { |
| "grad_norm": 0.5801904201507568, |
| "learning_rate": 7.4225e-05, |
| "loss": 0.0482, |
| "step": 2970 |
| }, |
| { |
| "grad_norm": 0.5068460702896118, |
| "learning_rate": 7.447500000000001e-05, |
| "loss": 0.048, |
| "step": 2980 |
| }, |
| { |
| "grad_norm": 0.49014368653297424, |
| "learning_rate": 7.4725e-05, |
| "loss": 0.0463, |
| "step": 2990 |
| }, |
| { |
| "grad_norm": 0.47462910413742065, |
| "learning_rate": 7.4975e-05, |
| "loss": 0.0488, |
| "step": 3000 |
| }, |
| { |
| "grad_norm": 0.6453213691711426, |
| "learning_rate": 7.5225e-05, |
| "loss": 0.0541, |
| "step": 3010 |
| }, |
| { |
| "grad_norm": 0.6153588891029358, |
| "learning_rate": 7.5475e-05, |
| "loss": 0.0469, |
| "step": 3020 |
| }, |
| { |
| "grad_norm": 0.4591832458972931, |
| "learning_rate": 7.5725e-05, |
| "loss": 0.0445, |
| "step": 3030 |
| }, |
| { |
| "grad_norm": 0.6439560651779175, |
| "learning_rate": 7.5975e-05, |
| "loss": 0.0425, |
| "step": 3040 |
| }, |
| { |
| "grad_norm": 0.5622746348381042, |
| "learning_rate": 7.6225e-05, |
| "loss": 0.0477, |
| "step": 3050 |
| }, |
| { |
| "grad_norm": 0.47292307019233704, |
| "learning_rate": 7.6475e-05, |
| "loss": 0.0452, |
| "step": 3060 |
| }, |
| { |
| "grad_norm": 0.6778805255889893, |
| "learning_rate": 7.672500000000001e-05, |
| "loss": 0.0477, |
| "step": 3070 |
| }, |
| { |
| "grad_norm": 0.4940475821495056, |
| "learning_rate": 7.697500000000001e-05, |
| "loss": 0.0445, |
| "step": 3080 |
| }, |
| { |
| "grad_norm": 0.5741375088691711, |
| "learning_rate": 7.722500000000001e-05, |
| "loss": 0.0457, |
| "step": 3090 |
| }, |
| { |
| "grad_norm": 0.6149645447731018, |
| "learning_rate": 7.747500000000001e-05, |
| "loss": 0.0495, |
| "step": 3100 |
| }, |
| { |
| "grad_norm": 0.5375564694404602, |
| "learning_rate": 7.7725e-05, |
| "loss": 0.054, |
| "step": 3110 |
| }, |
| { |
| "grad_norm": 0.5478602051734924, |
| "learning_rate": 7.797500000000001e-05, |
| "loss": 0.0423, |
| "step": 3120 |
| }, |
| { |
| "grad_norm": 0.4536065459251404, |
| "learning_rate": 7.8225e-05, |
| "loss": 0.0524, |
| "step": 3130 |
| }, |
| { |
| "grad_norm": 0.5834420323371887, |
| "learning_rate": 7.8475e-05, |
| "loss": 0.0447, |
| "step": 3140 |
| }, |
| { |
| "grad_norm": 0.5539586544036865, |
| "learning_rate": 7.8725e-05, |
| "loss": 0.0481, |
| "step": 3150 |
| }, |
| { |
| "grad_norm": 0.6485045552253723, |
| "learning_rate": 7.8975e-05, |
| "loss": 0.0424, |
| "step": 3160 |
| }, |
| { |
| "grad_norm": 0.4400906264781952, |
| "learning_rate": 7.9225e-05, |
| "loss": 0.046, |
| "step": 3170 |
| }, |
| { |
| "grad_norm": 0.42406129837036133, |
| "learning_rate": 7.9475e-05, |
| "loss": 0.0487, |
| "step": 3180 |
| }, |
| { |
| "grad_norm": 0.43238261342048645, |
| "learning_rate": 7.9725e-05, |
| "loss": 0.0491, |
| "step": 3190 |
| }, |
| { |
| "grad_norm": 0.5488142967224121, |
| "learning_rate": 7.9975e-05, |
| "loss": 0.042, |
| "step": 3200 |
| }, |
| { |
| "grad_norm": 0.43411511182785034, |
| "learning_rate": 8.022500000000001e-05, |
| "loss": 0.043, |
| "step": 3210 |
| }, |
| { |
| "grad_norm": 0.5469537377357483, |
| "learning_rate": 8.0475e-05, |
| "loss": 0.0455, |
| "step": 3220 |
| }, |
| { |
| "grad_norm": 0.5347734689712524, |
| "learning_rate": 8.072500000000001e-05, |
| "loss": 0.0519, |
| "step": 3230 |
| }, |
| { |
| "grad_norm": 0.7352176308631897, |
| "learning_rate": 8.0975e-05, |
| "loss": 0.0482, |
| "step": 3240 |
| }, |
| { |
| "grad_norm": 0.45272985100746155, |
| "learning_rate": 8.122500000000001e-05, |
| "loss": 0.0487, |
| "step": 3250 |
| }, |
| { |
| "grad_norm": 0.5572296977043152, |
| "learning_rate": 8.1475e-05, |
| "loss": 0.0485, |
| "step": 3260 |
| }, |
| { |
| "grad_norm": 0.7316561937332153, |
| "learning_rate": 8.172500000000001e-05, |
| "loss": 0.0475, |
| "step": 3270 |
| }, |
| { |
| "grad_norm": 0.6347060203552246, |
| "learning_rate": 8.1975e-05, |
| "loss": 0.0438, |
| "step": 3280 |
| }, |
| { |
| "grad_norm": 0.6271874904632568, |
| "learning_rate": 8.2225e-05, |
| "loss": 0.0489, |
| "step": 3290 |
| }, |
| { |
| "grad_norm": 0.3748721778392792, |
| "learning_rate": 8.2475e-05, |
| "loss": 0.0476, |
| "step": 3300 |
| }, |
| { |
| "grad_norm": 0.4579646587371826, |
| "learning_rate": 8.2725e-05, |
| "loss": 0.0442, |
| "step": 3310 |
| }, |
| { |
| "grad_norm": 0.46649906039237976, |
| "learning_rate": 8.2975e-05, |
| "loss": 0.0495, |
| "step": 3320 |
| }, |
| { |
| "grad_norm": 0.5870574712753296, |
| "learning_rate": 8.3225e-05, |
| "loss": 0.0452, |
| "step": 3330 |
| }, |
| { |
| "grad_norm": 0.42938050627708435, |
| "learning_rate": 8.347500000000001e-05, |
| "loss": 0.0445, |
| "step": 3340 |
| }, |
| { |
| "grad_norm": 0.4771455228328705, |
| "learning_rate": 8.3725e-05, |
| "loss": 0.0452, |
| "step": 3350 |
| }, |
| { |
| "grad_norm": 0.46662503480911255, |
| "learning_rate": 8.397500000000001e-05, |
| "loss": 0.0445, |
| "step": 3360 |
| }, |
| { |
| "grad_norm": 0.43966543674468994, |
| "learning_rate": 8.422500000000001e-05, |
| "loss": 0.0427, |
| "step": 3370 |
| }, |
| { |
| "grad_norm": 0.5394494533538818, |
| "learning_rate": 8.447500000000001e-05, |
| "loss": 0.0453, |
| "step": 3380 |
| }, |
| { |
| "grad_norm": 0.4468139708042145, |
| "learning_rate": 8.4725e-05, |
| "loss": 0.0479, |
| "step": 3390 |
| }, |
| { |
| "grad_norm": 0.5478911399841309, |
| "learning_rate": 8.4975e-05, |
| "loss": 0.048, |
| "step": 3400 |
| }, |
| { |
| "grad_norm": 0.4762585759162903, |
| "learning_rate": 8.5225e-05, |
| "loss": 0.05, |
| "step": 3410 |
| }, |
| { |
| "grad_norm": 0.5173057913780212, |
| "learning_rate": 8.5475e-05, |
| "loss": 0.0442, |
| "step": 3420 |
| }, |
| { |
| "grad_norm": 0.5589162707328796, |
| "learning_rate": 8.5725e-05, |
| "loss": 0.0436, |
| "step": 3430 |
| }, |
| { |
| "grad_norm": 0.5997108817100525, |
| "learning_rate": 8.5975e-05, |
| "loss": 0.0467, |
| "step": 3440 |
| }, |
| { |
| "grad_norm": 0.4055095911026001, |
| "learning_rate": 8.6225e-05, |
| "loss": 0.0401, |
| "step": 3450 |
| }, |
| { |
| "grad_norm": 0.593838632106781, |
| "learning_rate": 8.6475e-05, |
| "loss": 0.0428, |
| "step": 3460 |
| }, |
| { |
| "grad_norm": 0.6099951863288879, |
| "learning_rate": 8.672500000000001e-05, |
| "loss": 0.0488, |
| "step": 3470 |
| }, |
| { |
| "grad_norm": 0.4896881580352783, |
| "learning_rate": 8.6975e-05, |
| "loss": 0.0454, |
| "step": 3480 |
| }, |
| { |
| "grad_norm": 0.4892227053642273, |
| "learning_rate": 8.7225e-05, |
| "loss": 0.043, |
| "step": 3490 |
| }, |
| { |
| "grad_norm": 0.4351995885372162, |
| "learning_rate": 8.747500000000001e-05, |
| "loss": 0.0423, |
| "step": 3500 |
| }, |
| { |
| "grad_norm": 0.6612355709075928, |
| "learning_rate": 8.7725e-05, |
| "loss": 0.0494, |
| "step": 3510 |
| }, |
| { |
| "grad_norm": 0.490257203578949, |
| "learning_rate": 8.797500000000001e-05, |
| "loss": 0.0442, |
| "step": 3520 |
| }, |
| { |
| "grad_norm": 0.4129466414451599, |
| "learning_rate": 8.8225e-05, |
| "loss": 0.0451, |
| "step": 3530 |
| }, |
| { |
| "grad_norm": 0.5617825984954834, |
| "learning_rate": 8.847500000000001e-05, |
| "loss": 0.0438, |
| "step": 3540 |
| }, |
| { |
| "grad_norm": 0.6246066093444824, |
| "learning_rate": 8.8725e-05, |
| "loss": 0.0445, |
| "step": 3550 |
| }, |
| { |
| "grad_norm": 0.40516310930252075, |
| "learning_rate": 8.897500000000001e-05, |
| "loss": 0.0458, |
| "step": 3560 |
| }, |
| { |
| "grad_norm": 0.5519590973854065, |
| "learning_rate": 8.9225e-05, |
| "loss": 0.0458, |
| "step": 3570 |
| }, |
| { |
| "grad_norm": 0.4888351261615753, |
| "learning_rate": 8.9475e-05, |
| "loss": 0.0475, |
| "step": 3580 |
| }, |
| { |
| "grad_norm": 0.39518243074417114, |
| "learning_rate": 8.9725e-05, |
| "loss": 0.043, |
| "step": 3590 |
| }, |
| { |
| "grad_norm": 0.45062577724456787, |
| "learning_rate": 8.9975e-05, |
| "loss": 0.0435, |
| "step": 3600 |
| }, |
| { |
| "grad_norm": 0.47647690773010254, |
| "learning_rate": 9.0225e-05, |
| "loss": 0.0432, |
| "step": 3610 |
| }, |
| { |
| "grad_norm": 0.5328305959701538, |
| "learning_rate": 9.0475e-05, |
| "loss": 0.0436, |
| "step": 3620 |
| }, |
| { |
| "grad_norm": 0.4746423661708832, |
| "learning_rate": 9.072500000000001e-05, |
| "loss": 0.0459, |
| "step": 3630 |
| }, |
| { |
| "grad_norm": 0.5875771045684814, |
| "learning_rate": 9.0975e-05, |
| "loss": 0.0427, |
| "step": 3640 |
| }, |
| { |
| "grad_norm": 0.4863992929458618, |
| "learning_rate": 9.122500000000001e-05, |
| "loss": 0.0405, |
| "step": 3650 |
| }, |
| { |
| "grad_norm": 0.5402488112449646, |
| "learning_rate": 9.1475e-05, |
| "loss": 0.0419, |
| "step": 3660 |
| }, |
| { |
| "grad_norm": 0.5397239327430725, |
| "learning_rate": 9.172500000000001e-05, |
| "loss": 0.0415, |
| "step": 3670 |
| }, |
| { |
| "grad_norm": 0.5445768237113953, |
| "learning_rate": 9.1975e-05, |
| "loss": 0.0401, |
| "step": 3680 |
| }, |
| { |
| "grad_norm": 0.5987370014190674, |
| "learning_rate": 9.2225e-05, |
| "loss": 0.0471, |
| "step": 3690 |
| }, |
| { |
| "grad_norm": 0.7309731841087341, |
| "learning_rate": 9.2475e-05, |
| "loss": 0.0383, |
| "step": 3700 |
| }, |
| { |
| "grad_norm": 0.5340412259101868, |
| "learning_rate": 9.2725e-05, |
| "loss": 0.0423, |
| "step": 3710 |
| }, |
| { |
| "grad_norm": 0.46930262446403503, |
| "learning_rate": 9.2975e-05, |
| "loss": 0.0427, |
| "step": 3720 |
| }, |
| { |
| "grad_norm": 0.4816801846027374, |
| "learning_rate": 9.3225e-05, |
| "loss": 0.0457, |
| "step": 3730 |
| }, |
| { |
| "grad_norm": 0.5450388193130493, |
| "learning_rate": 9.3475e-05, |
| "loss": 0.0465, |
| "step": 3740 |
| }, |
| { |
| "grad_norm": 0.48289021849632263, |
| "learning_rate": 9.3725e-05, |
| "loss": 0.0438, |
| "step": 3750 |
| }, |
| { |
| "grad_norm": 0.38312482833862305, |
| "learning_rate": 9.397500000000001e-05, |
| "loss": 0.0448, |
| "step": 3760 |
| }, |
| { |
| "grad_norm": 0.4564635455608368, |
| "learning_rate": 9.422500000000001e-05, |
| "loss": 0.0376, |
| "step": 3770 |
| }, |
| { |
| "grad_norm": 0.501518189907074, |
| "learning_rate": 9.4475e-05, |
| "loss": 0.0422, |
| "step": 3780 |
| }, |
| { |
| "grad_norm": 0.5535281896591187, |
| "learning_rate": 9.472500000000001e-05, |
| "loss": 0.0415, |
| "step": 3790 |
| }, |
| { |
| "grad_norm": 0.5068562626838684, |
| "learning_rate": 9.4975e-05, |
| "loss": 0.044, |
| "step": 3800 |
| }, |
| { |
| "grad_norm": 0.4238268733024597, |
| "learning_rate": 9.522500000000001e-05, |
| "loss": 0.0512, |
| "step": 3810 |
| }, |
| { |
| "grad_norm": 0.40982863306999207, |
| "learning_rate": 9.5475e-05, |
| "loss": 0.0514, |
| "step": 3820 |
| }, |
| { |
| "grad_norm": 0.4888747036457062, |
| "learning_rate": 9.572500000000001e-05, |
| "loss": 0.0417, |
| "step": 3830 |
| }, |
| { |
| "grad_norm": 0.36337292194366455, |
| "learning_rate": 9.5975e-05, |
| "loss": 0.0452, |
| "step": 3840 |
| }, |
| { |
| "grad_norm": 0.5338613986968994, |
| "learning_rate": 9.622500000000001e-05, |
| "loss": 0.0439, |
| "step": 3850 |
| }, |
| { |
| "grad_norm": 0.5002034306526184, |
| "learning_rate": 9.6475e-05, |
| "loss": 0.046, |
| "step": 3860 |
| }, |
| { |
| "grad_norm": 0.5638089179992676, |
| "learning_rate": 9.6725e-05, |
| "loss": 0.0414, |
| "step": 3870 |
| }, |
| { |
| "grad_norm": 0.4765036106109619, |
| "learning_rate": 9.6975e-05, |
| "loss": 0.042, |
| "step": 3880 |
| }, |
| { |
| "grad_norm": 0.443176805973053, |
| "learning_rate": 9.7225e-05, |
| "loss": 0.046, |
| "step": 3890 |
| }, |
| { |
| "grad_norm": 0.39989593625068665, |
| "learning_rate": 9.747500000000001e-05, |
| "loss": 0.0472, |
| "step": 3900 |
| }, |
| { |
| "grad_norm": 0.44405046105384827, |
| "learning_rate": 9.7725e-05, |
| "loss": 0.0426, |
| "step": 3910 |
| }, |
| { |
| "grad_norm": 0.5469679832458496, |
| "learning_rate": 9.797500000000001e-05, |
| "loss": 0.0494, |
| "step": 3920 |
| }, |
| { |
| "grad_norm": 0.5193902254104614, |
| "learning_rate": 9.8225e-05, |
| "loss": 0.0418, |
| "step": 3930 |
| }, |
| { |
| "grad_norm": 0.44379061460494995, |
| "learning_rate": 9.847500000000001e-05, |
| "loss": 0.0439, |
| "step": 3940 |
| }, |
| { |
| "grad_norm": 0.3936866819858551, |
| "learning_rate": 9.8725e-05, |
| "loss": 0.0427, |
| "step": 3950 |
| }, |
| { |
| "grad_norm": 0.3606123924255371, |
| "learning_rate": 9.897500000000001e-05, |
| "loss": 0.0414, |
| "step": 3960 |
| }, |
| { |
| "grad_norm": 0.4419628381729126, |
| "learning_rate": 9.9225e-05, |
| "loss": 0.0386, |
| "step": 3970 |
| }, |
| { |
| "grad_norm": 0.43262916803359985, |
| "learning_rate": 9.9475e-05, |
| "loss": 0.0397, |
| "step": 3980 |
| }, |
| { |
| "grad_norm": 0.35829615592956543, |
| "learning_rate": 9.9725e-05, |
| "loss": 0.0388, |
| "step": 3990 |
| }, |
| { |
| "grad_norm": 0.4421238899230957, |
| "learning_rate": 9.9975e-05, |
| "loss": 0.0407, |
| "step": 4000 |
| }, |
| { |
| "grad_norm": 0.3859304189682007, |
| "learning_rate": 9.999999653982884e-05, |
| "loss": 0.0399, |
| "step": 4010 |
| }, |
| { |
| "grad_norm": 0.31160181760787964, |
| "learning_rate": 9.999998457874392e-05, |
| "loss": 0.0379, |
| "step": 4020 |
| }, |
| { |
| "grad_norm": 0.43243879079818726, |
| "learning_rate": 9.999996407402913e-05, |
| "loss": 0.0423, |
| "step": 4030 |
| }, |
| { |
| "grad_norm": 0.4805966317653656, |
| "learning_rate": 9.999993502568801e-05, |
| "loss": 0.041, |
| "step": 4040 |
| }, |
| { |
| "grad_norm": 0.4580017626285553, |
| "learning_rate": 9.999989743372548e-05, |
| "loss": 0.0421, |
| "step": 4050 |
| }, |
| { |
| "grad_norm": 0.4651296138763428, |
| "learning_rate": 9.999985129814798e-05, |
| "loss": 0.0418, |
| "step": 4060 |
| }, |
| { |
| "grad_norm": 0.4289516508579254, |
| "learning_rate": 9.99997966189634e-05, |
| "loss": 0.0376, |
| "step": 4070 |
| }, |
| { |
| "grad_norm": 0.4172024130821228, |
| "learning_rate": 9.999973339618107e-05, |
| "loss": 0.0403, |
| "step": 4080 |
| }, |
| { |
| "grad_norm": 0.4561389088630676, |
| "learning_rate": 9.999966162981179e-05, |
| "loss": 0.0404, |
| "step": 4090 |
| }, |
| { |
| "grad_norm": 0.4419674575328827, |
| "learning_rate": 9.999958131986784e-05, |
| "loss": 0.0399, |
| "step": 4100 |
| }, |
| { |
| "grad_norm": 0.3951598107814789, |
| "learning_rate": 9.999949246636293e-05, |
| "loss": 0.0449, |
| "step": 4110 |
| }, |
| { |
| "grad_norm": 0.3458001911640167, |
| "learning_rate": 9.999939506931224e-05, |
| "loss": 0.0429, |
| "step": 4120 |
| }, |
| { |
| "grad_norm": 0.5925477147102356, |
| "learning_rate": 9.999928912873243e-05, |
| "loss": 0.046, |
| "step": 4130 |
| }, |
| { |
| "grad_norm": 0.35705140233039856, |
| "learning_rate": 9.999917464464159e-05, |
| "loss": 0.0417, |
| "step": 4140 |
| }, |
| { |
| "grad_norm": 0.39123034477233887, |
| "learning_rate": 9.999905161705929e-05, |
| "loss": 0.0412, |
| "step": 4150 |
| }, |
| { |
| "grad_norm": 0.4060784578323364, |
| "learning_rate": 9.999892004600653e-05, |
| "loss": 0.037, |
| "step": 4160 |
| }, |
| { |
| "grad_norm": 0.37971585988998413, |
| "learning_rate": 9.999877993150581e-05, |
| "loss": 0.0422, |
| "step": 4170 |
| }, |
| { |
| "grad_norm": 0.5471509695053101, |
| "learning_rate": 9.999863127358108e-05, |
| "loss": 0.0423, |
| "step": 4180 |
| }, |
| { |
| "grad_norm": 0.38467666506767273, |
| "learning_rate": 9.999847407225773e-05, |
| "loss": 0.0423, |
| "step": 4190 |
| }, |
| { |
| "grad_norm": 0.464873731136322, |
| "learning_rate": 9.999830832756262e-05, |
| "loss": 0.038, |
| "step": 4200 |
| }, |
| { |
| "grad_norm": 0.47094425559043884, |
| "learning_rate": 9.999813403952407e-05, |
| "loss": 0.0379, |
| "step": 4210 |
| }, |
| { |
| "grad_norm": 0.4815444052219391, |
| "learning_rate": 9.999795120817187e-05, |
| "loss": 0.0399, |
| "step": 4220 |
| }, |
| { |
| "grad_norm": 0.4033340513706207, |
| "learning_rate": 9.999775983353725e-05, |
| "loss": 0.037, |
| "step": 4230 |
| }, |
| { |
| "grad_norm": 0.3992239832878113, |
| "learning_rate": 9.999755991565292e-05, |
| "loss": 0.0361, |
| "step": 4240 |
| }, |
| { |
| "grad_norm": 0.4696410894393921, |
| "learning_rate": 9.999735145455303e-05, |
| "loss": 0.0386, |
| "step": 4250 |
| }, |
| { |
| "grad_norm": 0.32953622937202454, |
| "learning_rate": 9.99971344502732e-05, |
| "loss": 0.0376, |
| "step": 4260 |
| }, |
| { |
| "grad_norm": 0.3710175156593323, |
| "learning_rate": 9.999690890285053e-05, |
| "loss": 0.0389, |
| "step": 4270 |
| }, |
| { |
| "grad_norm": 0.4310072362422943, |
| "learning_rate": 9.999667481232356e-05, |
| "loss": 0.0414, |
| "step": 4280 |
| }, |
| { |
| "grad_norm": 0.386064738035202, |
| "learning_rate": 9.999643217873225e-05, |
| "loss": 0.0349, |
| "step": 4290 |
| }, |
| { |
| "grad_norm": 0.3644770085811615, |
| "learning_rate": 9.999618100211809e-05, |
| "loss": 0.0352, |
| "step": 4300 |
| }, |
| { |
| "grad_norm": 0.474679559469223, |
| "learning_rate": 9.999592128252402e-05, |
| "loss": 0.0373, |
| "step": 4310 |
| }, |
| { |
| "grad_norm": 0.3991343379020691, |
| "learning_rate": 9.999565301999437e-05, |
| "loss": 0.0398, |
| "step": 4320 |
| }, |
| { |
| "grad_norm": 0.35236024856567383, |
| "learning_rate": 9.999537621457502e-05, |
| "loss": 0.0366, |
| "step": 4330 |
| }, |
| { |
| "grad_norm": 0.3644237816333771, |
| "learning_rate": 9.999509086631323e-05, |
| "loss": 0.0398, |
| "step": 4340 |
| }, |
| { |
| "grad_norm": 0.4314301013946533, |
| "learning_rate": 9.99947969752578e-05, |
| "loss": 0.0443, |
| "step": 4350 |
| }, |
| { |
| "grad_norm": 0.3834511339664459, |
| "learning_rate": 9.999449454145891e-05, |
| "loss": 0.0382, |
| "step": 4360 |
| }, |
| { |
| "grad_norm": 0.347470760345459, |
| "learning_rate": 9.999418356496827e-05, |
| "loss": 0.0431, |
| "step": 4370 |
| }, |
| { |
| "grad_norm": 0.45474135875701904, |
| "learning_rate": 9.999386404583899e-05, |
| "loss": 0.0412, |
| "step": 4380 |
| }, |
| { |
| "grad_norm": 0.3765535056591034, |
| "learning_rate": 9.999353598412568e-05, |
| "loss": 0.0452, |
| "step": 4390 |
| }, |
| { |
| "grad_norm": 0.28268489241600037, |
| "learning_rate": 9.999319937988442e-05, |
| "loss": 0.0364, |
| "step": 4400 |
| }, |
| { |
| "grad_norm": 0.4939219057559967, |
| "learning_rate": 9.999285423317268e-05, |
| "loss": 0.0399, |
| "step": 4410 |
| }, |
| { |
| "grad_norm": 0.3784657418727875, |
| "learning_rate": 9.999250054404947e-05, |
| "loss": 0.0413, |
| "step": 4420 |
| }, |
| { |
| "grad_norm": 0.3653174638748169, |
| "learning_rate": 9.99921383125752e-05, |
| "loss": 0.0363, |
| "step": 4430 |
| }, |
| { |
| "grad_norm": 0.48910290002822876, |
| "learning_rate": 9.99917675388118e-05, |
| "loss": 0.0391, |
| "step": 4440 |
| }, |
| { |
| "grad_norm": 0.3770338296890259, |
| "learning_rate": 9.99913882228226e-05, |
| "loss": 0.0389, |
| "step": 4450 |
| }, |
| { |
| "grad_norm": 0.4055532217025757, |
| "learning_rate": 9.999100036467242e-05, |
| "loss": 0.037, |
| "step": 4460 |
| }, |
| { |
| "grad_norm": 0.26587072014808655, |
| "learning_rate": 9.999060396442753e-05, |
| "loss": 0.0356, |
| "step": 4470 |
| }, |
| { |
| "grad_norm": 0.41433122754096985, |
| "learning_rate": 9.999019902215566e-05, |
| "loss": 0.0379, |
| "step": 4480 |
| }, |
| { |
| "grad_norm": 0.4437147080898285, |
| "learning_rate": 9.998978553792602e-05, |
| "loss": 0.042, |
| "step": 4490 |
| }, |
| { |
| "grad_norm": 0.42392024397850037, |
| "learning_rate": 9.998936351180926e-05, |
| "loss": 0.0379, |
| "step": 4500 |
| }, |
| { |
| "grad_norm": 0.3875804841518402, |
| "learning_rate": 9.998893294387747e-05, |
| "loss": 0.0349, |
| "step": 4510 |
| }, |
| { |
| "grad_norm": 0.37056973576545715, |
| "learning_rate": 9.998849383420426e-05, |
| "loss": 0.036, |
| "step": 4520 |
| }, |
| { |
| "grad_norm": 0.3561025857925415, |
| "learning_rate": 9.998804618286465e-05, |
| "loss": 0.0396, |
| "step": 4530 |
| }, |
| { |
| "grad_norm": 0.5577656626701355, |
| "learning_rate": 9.99875899899351e-05, |
| "loss": 0.039, |
| "step": 4540 |
| }, |
| { |
| "grad_norm": 0.40716665983200073, |
| "learning_rate": 9.99871252554936e-05, |
| "loss": 0.034, |
| "step": 4550 |
| }, |
| { |
| "grad_norm": 0.39829781651496887, |
| "learning_rate": 9.998665197961955e-05, |
| "loss": 0.0351, |
| "step": 4560 |
| }, |
| { |
| "grad_norm": 0.33696404099464417, |
| "learning_rate": 9.998617016239379e-05, |
| "loss": 0.0346, |
| "step": 4570 |
| }, |
| { |
| "grad_norm": 0.38184264302253723, |
| "learning_rate": 9.998567980389869e-05, |
| "loss": 0.0409, |
| "step": 4580 |
| }, |
| { |
| "grad_norm": 0.4413798451423645, |
| "learning_rate": 9.998518090421802e-05, |
| "loss": 0.0407, |
| "step": 4590 |
| }, |
| { |
| "grad_norm": 0.3794148862361908, |
| "learning_rate": 9.998467346343703e-05, |
| "loss": 0.037, |
| "step": 4600 |
| }, |
| { |
| "grad_norm": 0.35736557841300964, |
| "learning_rate": 9.998415748164243e-05, |
| "loss": 0.0417, |
| "step": 4610 |
| }, |
| { |
| "grad_norm": 0.30888238549232483, |
| "learning_rate": 9.998363295892238e-05, |
| "loss": 0.0369, |
| "step": 4620 |
| }, |
| { |
| "grad_norm": 0.3827449083328247, |
| "learning_rate": 9.998309989536652e-05, |
| "loss": 0.0369, |
| "step": 4630 |
| }, |
| { |
| "grad_norm": 0.3302731513977051, |
| "learning_rate": 9.998255829106593e-05, |
| "loss": 0.0394, |
| "step": 4640 |
| }, |
| { |
| "grad_norm": 0.3657535910606384, |
| "learning_rate": 9.998200814611316e-05, |
| "loss": 0.0361, |
| "step": 4650 |
| }, |
| { |
| "grad_norm": 0.29982033371925354, |
| "learning_rate": 9.998144946060219e-05, |
| "loss": 0.0375, |
| "step": 4660 |
| }, |
| { |
| "grad_norm": 0.5621727705001831, |
| "learning_rate": 9.998088223462852e-05, |
| "loss": 0.0374, |
| "step": 4670 |
| }, |
| { |
| "grad_norm": 0.37400975823402405, |
| "learning_rate": 9.998030646828905e-05, |
| "loss": 0.0322, |
| "step": 4680 |
| }, |
| { |
| "grad_norm": 0.4110400676727295, |
| "learning_rate": 9.997972216168217e-05, |
| "loss": 0.0348, |
| "step": 4690 |
| }, |
| { |
| "grad_norm": 0.512689471244812, |
| "learning_rate": 9.997912931490771e-05, |
| "loss": 0.044, |
| "step": 4700 |
| }, |
| { |
| "grad_norm": 0.4697954058647156, |
| "learning_rate": 9.9978527928067e-05, |
| "loss": 0.0397, |
| "step": 4710 |
| }, |
| { |
| "grad_norm": 0.4623161256313324, |
| "learning_rate": 9.997791800126277e-05, |
| "loss": 0.0343, |
| "step": 4720 |
| }, |
| { |
| "grad_norm": 0.44245240092277527, |
| "learning_rate": 9.997729953459927e-05, |
| "loss": 0.037, |
| "step": 4730 |
| }, |
| { |
| "grad_norm": 0.35390836000442505, |
| "learning_rate": 9.997667252818214e-05, |
| "loss": 0.038, |
| "step": 4740 |
| }, |
| { |
| "grad_norm": 0.3483659029006958, |
| "learning_rate": 9.997603698211855e-05, |
| "loss": 0.0377, |
| "step": 4750 |
| }, |
| { |
| "grad_norm": 0.46905985474586487, |
| "learning_rate": 9.99753928965171e-05, |
| "loss": 0.0388, |
| "step": 4760 |
| }, |
| { |
| "grad_norm": 0.3583959937095642, |
| "learning_rate": 9.997474027148781e-05, |
| "loss": 0.0397, |
| "step": 4770 |
| }, |
| { |
| "grad_norm": 0.382405161857605, |
| "learning_rate": 9.997407910714223e-05, |
| "loss": 0.0363, |
| "step": 4780 |
| }, |
| { |
| "grad_norm": 0.36503803730010986, |
| "learning_rate": 9.997340940359332e-05, |
| "loss": 0.039, |
| "step": 4790 |
| }, |
| { |
| "grad_norm": 0.3273303508758545, |
| "learning_rate": 9.997273116095552e-05, |
| "loss": 0.0383, |
| "step": 4800 |
| }, |
| { |
| "grad_norm": 0.5083199739456177, |
| "learning_rate": 9.997204437934473e-05, |
| "loss": 0.0341, |
| "step": 4810 |
| }, |
| { |
| "grad_norm": 0.3080153167247772, |
| "learning_rate": 9.997134905887829e-05, |
| "loss": 0.0369, |
| "step": 4820 |
| }, |
| { |
| "grad_norm": 0.30154985189437866, |
| "learning_rate": 9.997064519967501e-05, |
| "loss": 0.0348, |
| "step": 4830 |
| }, |
| { |
| "grad_norm": 0.3803936839103699, |
| "learning_rate": 9.996993280185517e-05, |
| "loss": 0.0348, |
| "step": 4840 |
| }, |
| { |
| "grad_norm": 0.2470090538263321, |
| "learning_rate": 9.99692118655405e-05, |
| "loss": 0.0343, |
| "step": 4850 |
| }, |
| { |
| "grad_norm": 0.4055911600589752, |
| "learning_rate": 9.996848239085417e-05, |
| "loss": 0.0339, |
| "step": 4860 |
| }, |
| { |
| "grad_norm": 0.353442519903183, |
| "learning_rate": 9.996774437792085e-05, |
| "loss": 0.0386, |
| "step": 4870 |
| }, |
| { |
| "grad_norm": 0.4112813472747803, |
| "learning_rate": 9.996699782686664e-05, |
| "loss": 0.035, |
| "step": 4880 |
| }, |
| { |
| "grad_norm": 0.38305455446243286, |
| "learning_rate": 9.996624273781909e-05, |
| "loss": 0.0377, |
| "step": 4890 |
| }, |
| { |
| "grad_norm": 0.3009546995162964, |
| "learning_rate": 9.996547911090725e-05, |
| "loss": 0.0341, |
| "step": 4900 |
| }, |
| { |
| "grad_norm": 0.31697291135787964, |
| "learning_rate": 9.996470694626157e-05, |
| "loss": 0.0322, |
| "step": 4910 |
| }, |
| { |
| "grad_norm": 0.3814009130001068, |
| "learning_rate": 9.996392624401403e-05, |
| "loss": 0.0351, |
| "step": 4920 |
| }, |
| { |
| "grad_norm": 0.3119279742240906, |
| "learning_rate": 9.996313700429801e-05, |
| "loss": 0.0363, |
| "step": 4930 |
| }, |
| { |
| "grad_norm": 0.3278277516365051, |
| "learning_rate": 9.996233922724836e-05, |
| "loss": 0.0314, |
| "step": 4940 |
| }, |
| { |
| "grad_norm": 0.5751047134399414, |
| "learning_rate": 9.996153291300141e-05, |
| "loss": 0.0371, |
| "step": 4950 |
| }, |
| { |
| "grad_norm": 0.3887098431587219, |
| "learning_rate": 9.996071806169494e-05, |
| "loss": 0.0378, |
| "step": 4960 |
| }, |
| { |
| "grad_norm": 0.2754780948162079, |
| "learning_rate": 9.995989467346817e-05, |
| "loss": 0.0303, |
| "step": 4970 |
| }, |
| { |
| "grad_norm": 0.35232511162757874, |
| "learning_rate": 9.995906274846183e-05, |
| "loss": 0.0388, |
| "step": 4980 |
| }, |
| { |
| "grad_norm": 0.3542734980583191, |
| "learning_rate": 9.995822228681803e-05, |
| "loss": 0.0375, |
| "step": 4990 |
| }, |
| { |
| "grad_norm": 0.34408876299858093, |
| "learning_rate": 9.99573732886804e-05, |
| "loss": 0.0358, |
| "step": 5000 |
| }, |
| { |
| "grad_norm": 0.39004313945770264, |
| "learning_rate": 9.995651575419402e-05, |
| "loss": 0.0375, |
| "step": 5010 |
| }, |
| { |
| "grad_norm": 0.3584635257720947, |
| "learning_rate": 9.995564968350541e-05, |
| "loss": 0.0372, |
| "step": 5020 |
| }, |
| { |
| "grad_norm": 0.3550162613391876, |
| "learning_rate": 9.995477507676256e-05, |
| "loss": 0.032, |
| "step": 5030 |
| }, |
| { |
| "grad_norm": 0.37321898341178894, |
| "learning_rate": 9.995389193411493e-05, |
| "loss": 0.0311, |
| "step": 5040 |
| }, |
| { |
| "grad_norm": 0.38647428154945374, |
| "learning_rate": 9.995300025571339e-05, |
| "loss": 0.0357, |
| "step": 5050 |
| }, |
| { |
| "grad_norm": 0.3812742233276367, |
| "learning_rate": 9.995210004171034e-05, |
| "loss": 0.0381, |
| "step": 5060 |
| }, |
| { |
| "grad_norm": 0.408979207277298, |
| "learning_rate": 9.995119129225956e-05, |
| "loss": 0.0362, |
| "step": 5070 |
| }, |
| { |
| "grad_norm": 0.3359850347042084, |
| "learning_rate": 9.995027400751637e-05, |
| "loss": 0.0351, |
| "step": 5080 |
| }, |
| { |
| "grad_norm": 0.40490883588790894, |
| "learning_rate": 9.994934818763751e-05, |
| "loss": 0.0387, |
| "step": 5090 |
| }, |
| { |
| "grad_norm": 0.4428238868713379, |
| "learning_rate": 9.994841383278115e-05, |
| "loss": 0.0359, |
| "step": 5100 |
| }, |
| { |
| "grad_norm": 0.3872987926006317, |
| "learning_rate": 9.994747094310695e-05, |
| "loss": 0.0385, |
| "step": 5110 |
| }, |
| { |
| "grad_norm": 0.31335529685020447, |
| "learning_rate": 9.994651951877604e-05, |
| "loss": 0.0324, |
| "step": 5120 |
| }, |
| { |
| "grad_norm": 0.3368927538394928, |
| "learning_rate": 9.994555955995099e-05, |
| "loss": 0.0331, |
| "step": 5130 |
| }, |
| { |
| "grad_norm": 0.41906657814979553, |
| "learning_rate": 9.994459106679581e-05, |
| "loss": 0.0358, |
| "step": 5140 |
| }, |
| { |
| "grad_norm": 0.37263497710227966, |
| "learning_rate": 9.994361403947603e-05, |
| "loss": 0.0362, |
| "step": 5150 |
| }, |
| { |
| "grad_norm": 0.32935646176338196, |
| "learning_rate": 9.994262847815854e-05, |
| "loss": 0.0357, |
| "step": 5160 |
| }, |
| { |
| "grad_norm": 0.2794903516769409, |
| "learning_rate": 9.99416343830118e-05, |
| "loss": 0.0332, |
| "step": 5170 |
| }, |
| { |
| "grad_norm": 0.31271782517433167, |
| "learning_rate": 9.994063175420565e-05, |
| "loss": 0.03, |
| "step": 5180 |
| }, |
| { |
| "grad_norm": 0.3257259726524353, |
| "learning_rate": 9.99396205919114e-05, |
| "loss": 0.0372, |
| "step": 5190 |
| }, |
| { |
| "grad_norm": 0.3059450089931488, |
| "learning_rate": 9.993860089630185e-05, |
| "loss": 0.033, |
| "step": 5200 |
| }, |
| { |
| "grad_norm": 0.2800959646701813, |
| "learning_rate": 9.993757266755123e-05, |
| "loss": 0.0364, |
| "step": 5210 |
| }, |
| { |
| "grad_norm": 0.3736426532268524, |
| "learning_rate": 9.993653590583522e-05, |
| "loss": 0.0404, |
| "step": 5220 |
| }, |
| { |
| "grad_norm": 0.3520766794681549, |
| "learning_rate": 9.993549061133102e-05, |
| "loss": 0.0349, |
| "step": 5230 |
| }, |
| { |
| "grad_norm": 0.4305180311203003, |
| "learning_rate": 9.993443678421719e-05, |
| "loss": 0.031, |
| "step": 5240 |
| }, |
| { |
| "grad_norm": 0.30332329869270325, |
| "learning_rate": 9.993337442467384e-05, |
| "loss": 0.0354, |
| "step": 5250 |
| }, |
| { |
| "grad_norm": 0.357280433177948, |
| "learning_rate": 9.993230353288248e-05, |
| "loss": 0.0333, |
| "step": 5260 |
| }, |
| { |
| "grad_norm": 0.38651740550994873, |
| "learning_rate": 9.993122410902608e-05, |
| "loss": 0.0336, |
| "step": 5270 |
| }, |
| { |
| "grad_norm": 0.3957500457763672, |
| "learning_rate": 9.993013615328912e-05, |
| "loss": 0.0337, |
| "step": 5280 |
| }, |
| { |
| "grad_norm": 0.30943194031715393, |
| "learning_rate": 9.992903966585747e-05, |
| "loss": 0.0351, |
| "step": 5290 |
| }, |
| { |
| "grad_norm": 0.29888811707496643, |
| "learning_rate": 9.992793464691852e-05, |
| "loss": 0.0371, |
| "step": 5300 |
| }, |
| { |
| "grad_norm": 0.32478439807891846, |
| "learning_rate": 9.992682109666105e-05, |
| "loss": 0.0401, |
| "step": 5310 |
| }, |
| { |
| "grad_norm": 0.4232689142227173, |
| "learning_rate": 9.992569901527538e-05, |
| "loss": 0.0424, |
| "step": 5320 |
| }, |
| { |
| "grad_norm": 0.30550339818000793, |
| "learning_rate": 9.99245684029532e-05, |
| "loss": 0.0325, |
| "step": 5330 |
| }, |
| { |
| "grad_norm": 0.2808350622653961, |
| "learning_rate": 9.992342925988774e-05, |
| "loss": 0.0363, |
| "step": 5340 |
| }, |
| { |
| "grad_norm": 0.37217894196510315, |
| "learning_rate": 9.992228158627361e-05, |
| "loss": 0.0349, |
| "step": 5350 |
| }, |
| { |
| "grad_norm": 0.29954707622528076, |
| "learning_rate": 9.992112538230693e-05, |
| "loss": 0.0314, |
| "step": 5360 |
| }, |
| { |
| "grad_norm": 0.4495854675769806, |
| "learning_rate": 9.991996064818527e-05, |
| "loss": 0.0326, |
| "step": 5370 |
| }, |
| { |
| "grad_norm": 0.3042782247066498, |
| "learning_rate": 9.991878738410768e-05, |
| "loss": 0.0338, |
| "step": 5380 |
| }, |
| { |
| "grad_norm": 0.3876139521598816, |
| "learning_rate": 9.991760559027457e-05, |
| "loss": 0.0337, |
| "step": 5390 |
| }, |
| { |
| "grad_norm": 0.40060412883758545, |
| "learning_rate": 9.991641526688793e-05, |
| "loss": 0.03, |
| "step": 5400 |
| }, |
| { |
| "grad_norm": 0.3015538156032562, |
| "learning_rate": 9.991521641415113e-05, |
| "loss": 0.0322, |
| "step": 5410 |
| }, |
| { |
| "grad_norm": 0.28446367383003235, |
| "learning_rate": 9.991400903226904e-05, |
| "loss": 0.0318, |
| "step": 5420 |
| }, |
| { |
| "grad_norm": 0.42916229367256165, |
| "learning_rate": 9.991279312144794e-05, |
| "loss": 0.0368, |
| "step": 5430 |
| }, |
| { |
| "grad_norm": 0.32056114077568054, |
| "learning_rate": 9.991156868189564e-05, |
| "loss": 0.0331, |
| "step": 5440 |
| }, |
| { |
| "grad_norm": 0.4189446270465851, |
| "learning_rate": 9.991033571382131e-05, |
| "loss": 0.0354, |
| "step": 5450 |
| }, |
| { |
| "grad_norm": 0.3427908718585968, |
| "learning_rate": 9.990909421743569e-05, |
| "loss": 0.0322, |
| "step": 5460 |
| }, |
| { |
| "grad_norm": 0.29680436849594116, |
| "learning_rate": 9.990784419295085e-05, |
| "loss": 0.0315, |
| "step": 5470 |
| }, |
| { |
| "grad_norm": 0.3475859463214874, |
| "learning_rate": 9.990658564058044e-05, |
| "loss": 0.0356, |
| "step": 5480 |
| }, |
| { |
| "grad_norm": 0.2693791091442108, |
| "learning_rate": 9.990531856053948e-05, |
| "loss": 0.0341, |
| "step": 5490 |
| }, |
| { |
| "grad_norm": 0.38264793157577515, |
| "learning_rate": 9.99040429530445e-05, |
| "loss": 0.0305, |
| "step": 5500 |
| }, |
| { |
| "grad_norm": 0.4049840271472931, |
| "learning_rate": 9.990275881831346e-05, |
| "loss": 0.0345, |
| "step": 5510 |
| }, |
| { |
| "grad_norm": 0.4251091480255127, |
| "learning_rate": 9.990146615656577e-05, |
| "loss": 0.0366, |
| "step": 5520 |
| }, |
| { |
| "grad_norm": 0.28920596837997437, |
| "learning_rate": 9.990016496802233e-05, |
| "loss": 0.0328, |
| "step": 5530 |
| }, |
| { |
| "grad_norm": 0.3436765670776367, |
| "learning_rate": 9.989885525290548e-05, |
| "loss": 0.0313, |
| "step": 5540 |
| }, |
| { |
| "grad_norm": 0.29532015323638916, |
| "learning_rate": 9.989753701143897e-05, |
| "loss": 0.0341, |
| "step": 5550 |
| }, |
| { |
| "grad_norm": 0.3735540509223938, |
| "learning_rate": 9.989621024384812e-05, |
| "loss": 0.0316, |
| "step": 5560 |
| }, |
| { |
| "grad_norm": 0.4115545451641083, |
| "learning_rate": 9.989487495035959e-05, |
| "loss": 0.0323, |
| "step": 5570 |
| }, |
| { |
| "grad_norm": 0.3986823260784149, |
| "learning_rate": 9.989353113120156e-05, |
| "loss": 0.0314, |
| "step": 5580 |
| }, |
| { |
| "grad_norm": 0.28433433175086975, |
| "learning_rate": 9.989217878660366e-05, |
| "loss": 0.0315, |
| "step": 5590 |
| }, |
| { |
| "grad_norm": 0.2662765681743622, |
| "learning_rate": 9.989081791679695e-05, |
| "loss": 0.0303, |
| "step": 5600 |
| }, |
| { |
| "grad_norm": 0.30207517743110657, |
| "learning_rate": 9.988944852201397e-05, |
| "loss": 0.0311, |
| "step": 5610 |
| }, |
| { |
| "grad_norm": 0.33197832107543945, |
| "learning_rate": 9.988807060248873e-05, |
| "loss": 0.0304, |
| "step": 5620 |
| }, |
| { |
| "grad_norm": 0.2492564171552658, |
| "learning_rate": 9.988668415845665e-05, |
| "loss": 0.0287, |
| "step": 5630 |
| }, |
| { |
| "grad_norm": 0.43481743335723877, |
| "learning_rate": 9.988528919015466e-05, |
| "loss": 0.0294, |
| "step": 5640 |
| }, |
| { |
| "grad_norm": 0.3716481029987335, |
| "learning_rate": 9.988388569782112e-05, |
| "loss": 0.0306, |
| "step": 5650 |
| }, |
| { |
| "grad_norm": 0.2877008616924286, |
| "learning_rate": 9.988247368169583e-05, |
| "loss": 0.0408, |
| "step": 5660 |
| }, |
| { |
| "grad_norm": 0.31733956933021545, |
| "learning_rate": 9.988105314202007e-05, |
| "loss": 0.0334, |
| "step": 5670 |
| }, |
| { |
| "grad_norm": 0.42810311913490295, |
| "learning_rate": 9.987962407903659e-05, |
| "loss": 0.0319, |
| "step": 5680 |
| }, |
| { |
| "grad_norm": 0.3225559592247009, |
| "learning_rate": 9.987818649298957e-05, |
| "loss": 0.0293, |
| "step": 5690 |
| }, |
| { |
| "grad_norm": 0.3467288613319397, |
| "learning_rate": 9.987674038412465e-05, |
| "loss": 0.0339, |
| "step": 5700 |
| }, |
| { |
| "grad_norm": 0.45623427629470825, |
| "learning_rate": 9.987528575268891e-05, |
| "loss": 0.0319, |
| "step": 5710 |
| }, |
| { |
| "grad_norm": 0.2944740653038025, |
| "learning_rate": 9.987382259893095e-05, |
| "loss": 0.0318, |
| "step": 5720 |
| }, |
| { |
| "grad_norm": 0.3048604726791382, |
| "learning_rate": 9.987235092310074e-05, |
| "loss": 0.0331, |
| "step": 5730 |
| }, |
| { |
| "grad_norm": 0.3735939860343933, |
| "learning_rate": 9.987087072544978e-05, |
| "loss": 0.0327, |
| "step": 5740 |
| }, |
| { |
| "grad_norm": 0.3563518524169922, |
| "learning_rate": 9.9869382006231e-05, |
| "loss": 0.0321, |
| "step": 5750 |
| }, |
| { |
| "grad_norm": 0.3049486577510834, |
| "learning_rate": 9.986788476569875e-05, |
| "loss": 0.0295, |
| "step": 5760 |
| }, |
| { |
| "grad_norm": 0.31051209568977356, |
| "learning_rate": 9.986637900410887e-05, |
| "loss": 0.0349, |
| "step": 5770 |
| }, |
| { |
| "grad_norm": 0.3869427442550659, |
| "learning_rate": 9.986486472171869e-05, |
| "loss": 0.0322, |
| "step": 5780 |
| }, |
| { |
| "grad_norm": 0.4189186692237854, |
| "learning_rate": 9.986334191878692e-05, |
| "loss": 0.0342, |
| "step": 5790 |
| }, |
| { |
| "grad_norm": 0.43416473269462585, |
| "learning_rate": 9.986181059557378e-05, |
| "loss": 0.031, |
| "step": 5800 |
| }, |
| { |
| "grad_norm": 0.3650332987308502, |
| "learning_rate": 9.986027075234094e-05, |
| "loss": 0.0299, |
| "step": 5810 |
| }, |
| { |
| "grad_norm": 0.3248865306377411, |
| "learning_rate": 9.985872238935152e-05, |
| "loss": 0.0332, |
| "step": 5820 |
| }, |
| { |
| "grad_norm": 0.3286206126213074, |
| "learning_rate": 9.985716550687008e-05, |
| "loss": 0.0336, |
| "step": 5830 |
| }, |
| { |
| "grad_norm": 0.373761385679245, |
| "learning_rate": 9.985560010516264e-05, |
| "loss": 0.033, |
| "step": 5840 |
| }, |
| { |
| "grad_norm": 0.2942267954349518, |
| "learning_rate": 9.985402618449668e-05, |
| "loss": 0.0315, |
| "step": 5850 |
| }, |
| { |
| "grad_norm": 0.27267733216285706, |
| "learning_rate": 9.985244374514118e-05, |
| "loss": 0.0335, |
| "step": 5860 |
| }, |
| { |
| "grad_norm": 0.35840272903442383, |
| "learning_rate": 9.985085278736651e-05, |
| "loss": 0.0358, |
| "step": 5870 |
| }, |
| { |
| "grad_norm": 0.3712354004383087, |
| "learning_rate": 9.984925331144452e-05, |
| "loss": 0.0309, |
| "step": 5880 |
| }, |
| { |
| "grad_norm": 0.33825939893722534, |
| "learning_rate": 9.984764531764851e-05, |
| "loss": 0.0293, |
| "step": 5890 |
| }, |
| { |
| "grad_norm": 0.3744773864746094, |
| "learning_rate": 9.984602880625326e-05, |
| "loss": 0.0305, |
| "step": 5900 |
| }, |
| { |
| "grad_norm": 0.3779265582561493, |
| "learning_rate": 9.9844403777535e-05, |
| "loss": 0.0336, |
| "step": 5910 |
| }, |
| { |
| "grad_norm": 0.35232800245285034, |
| "learning_rate": 9.984277023177135e-05, |
| "loss": 0.03, |
| "step": 5920 |
| }, |
| { |
| "grad_norm": 0.24044311046600342, |
| "learning_rate": 9.984112816924148e-05, |
| "loss": 0.0277, |
| "step": 5930 |
| }, |
| { |
| "grad_norm": 0.31373876333236694, |
| "learning_rate": 9.983947759022596e-05, |
| "loss": 0.0338, |
| "step": 5940 |
| }, |
| { |
| "grad_norm": 0.3518969714641571, |
| "learning_rate": 9.983781849500682e-05, |
| "loss": 0.0295, |
| "step": 5950 |
| }, |
| { |
| "grad_norm": 0.3579181432723999, |
| "learning_rate": 9.98361508838676e-05, |
| "loss": 0.0298, |
| "step": 5960 |
| }, |
| { |
| "grad_norm": 0.2864387333393097, |
| "learning_rate": 9.98344747570932e-05, |
| "loss": 0.0316, |
| "step": 5970 |
| }, |
| { |
| "grad_norm": 0.3602316379547119, |
| "learning_rate": 9.983279011497004e-05, |
| "loss": 0.0311, |
| "step": 5980 |
| }, |
| { |
| "grad_norm": 0.2712448537349701, |
| "learning_rate": 9.983109695778596e-05, |
| "loss": 0.032, |
| "step": 5990 |
| }, |
| { |
| "grad_norm": 0.3071662187576294, |
| "learning_rate": 9.982939528583032e-05, |
| "loss": 0.0334, |
| "step": 6000 |
| }, |
| { |
| "grad_norm": 0.38609135150909424, |
| "learning_rate": 9.982768509939385e-05, |
| "loss": 0.0285, |
| "step": 6010 |
| }, |
| { |
| "grad_norm": 0.2968983054161072, |
| "learning_rate": 9.982596639876879e-05, |
| "loss": 0.0292, |
| "step": 6020 |
| }, |
| { |
| "grad_norm": 0.2913767695426941, |
| "learning_rate": 9.982423918424881e-05, |
| "loss": 0.0276, |
| "step": 6030 |
| }, |
| { |
| "grad_norm": 0.2720889151096344, |
| "learning_rate": 9.982250345612908e-05, |
| "loss": 0.0305, |
| "step": 6040 |
| }, |
| { |
| "grad_norm": 0.2988946735858917, |
| "learning_rate": 9.982075921470611e-05, |
| "loss": 0.0309, |
| "step": 6050 |
| }, |
| { |
| "grad_norm": 0.3016357421875, |
| "learning_rate": 9.981900646027802e-05, |
| "loss": 0.0284, |
| "step": 6060 |
| }, |
| { |
| "grad_norm": 0.3710649311542511, |
| "learning_rate": 9.981724519314425e-05, |
| "loss": 0.0321, |
| "step": 6070 |
| }, |
| { |
| "grad_norm": 0.32104140520095825, |
| "learning_rate": 9.981547541360581e-05, |
| "loss": 0.035, |
| "step": 6080 |
| }, |
| { |
| "grad_norm": 0.2984887361526489, |
| "learning_rate": 9.981369712196508e-05, |
| "loss": 0.0282, |
| "step": 6090 |
| }, |
| { |
| "grad_norm": 0.29977795481681824, |
| "learning_rate": 9.981191031852592e-05, |
| "loss": 0.0338, |
| "step": 6100 |
| }, |
| { |
| "grad_norm": 0.29678675532341003, |
| "learning_rate": 9.981011500359362e-05, |
| "loss": 0.0301, |
| "step": 6110 |
| }, |
| { |
| "grad_norm": 0.3343314230442047, |
| "learning_rate": 9.9808311177475e-05, |
| "loss": 0.0351, |
| "step": 6120 |
| }, |
| { |
| "grad_norm": 0.32088878750801086, |
| "learning_rate": 9.980649884047826e-05, |
| "loss": 0.0318, |
| "step": 6130 |
| }, |
| { |
| "grad_norm": 0.3055264949798584, |
| "learning_rate": 9.980467799291307e-05, |
| "loss": 0.0309, |
| "step": 6140 |
| }, |
| { |
| "grad_norm": 0.4143613874912262, |
| "learning_rate": 9.980284863509058e-05, |
| "loss": 0.0338, |
| "step": 6150 |
| }, |
| { |
| "grad_norm": 0.35827532410621643, |
| "learning_rate": 9.980101076732334e-05, |
| "loss": 0.0316, |
| "step": 6160 |
| }, |
| { |
| "grad_norm": 0.42603468894958496, |
| "learning_rate": 9.979916438992544e-05, |
| "loss": 0.0338, |
| "step": 6170 |
| }, |
| { |
| "grad_norm": 0.3776651620864868, |
| "learning_rate": 9.979730950321237e-05, |
| "loss": 0.0317, |
| "step": 6180 |
| }, |
| { |
| "grad_norm": 0.31897443532943726, |
| "learning_rate": 9.979544610750104e-05, |
| "loss": 0.0286, |
| "step": 6190 |
| }, |
| { |
| "grad_norm": 0.247903972864151, |
| "learning_rate": 9.97935742031099e-05, |
| "loss": 0.0309, |
| "step": 6200 |
| }, |
| { |
| "grad_norm": 0.2799277603626251, |
| "learning_rate": 9.979169379035878e-05, |
| "loss": 0.0306, |
| "step": 6210 |
| }, |
| { |
| "grad_norm": 0.455984503030777, |
| "learning_rate": 9.978980486956899e-05, |
| "loss": 0.0344, |
| "step": 6220 |
| }, |
| { |
| "grad_norm": 0.36091381311416626, |
| "learning_rate": 9.978790744106332e-05, |
| "loss": 0.034, |
| "step": 6230 |
| }, |
| { |
| "grad_norm": 0.3725851774215698, |
| "learning_rate": 9.978600150516594e-05, |
| "loss": 0.0315, |
| "step": 6240 |
| }, |
| { |
| "grad_norm": 0.3584500849246979, |
| "learning_rate": 9.978408706220259e-05, |
| "loss": 0.036, |
| "step": 6250 |
| }, |
| { |
| "grad_norm": 0.29220858216285706, |
| "learning_rate": 9.978216411250032e-05, |
| "loss": 0.0372, |
| "step": 6260 |
| }, |
| { |
| "grad_norm": 0.31616538763046265, |
| "learning_rate": 9.978023265638778e-05, |
| "loss": 0.0298, |
| "step": 6270 |
| }, |
| { |
| "grad_norm": 0.3402068614959717, |
| "learning_rate": 9.977829269419495e-05, |
| "loss": 0.0332, |
| "step": 6280 |
| }, |
| { |
| "grad_norm": 0.3208469748497009, |
| "learning_rate": 9.977634422625335e-05, |
| "loss": 0.0298, |
| "step": 6290 |
| }, |
| { |
| "grad_norm": 0.2783920168876648, |
| "learning_rate": 9.97743872528959e-05, |
| "loss": 0.0376, |
| "step": 6300 |
| }, |
| { |
| "grad_norm": 0.36577823758125305, |
| "learning_rate": 9.9772421774457e-05, |
| "loss": 0.0326, |
| "step": 6310 |
| }, |
| { |
| "grad_norm": 0.3835756182670593, |
| "learning_rate": 9.977044779127252e-05, |
| "loss": 0.0316, |
| "step": 6320 |
| }, |
| { |
| "grad_norm": 0.35437050461769104, |
| "learning_rate": 9.976846530367971e-05, |
| "loss": 0.0338, |
| "step": 6330 |
| }, |
| { |
| "grad_norm": 0.35576331615448, |
| "learning_rate": 9.976647431201735e-05, |
| "loss": 0.0337, |
| "step": 6340 |
| }, |
| { |
| "grad_norm": 0.3176501989364624, |
| "learning_rate": 9.976447481662568e-05, |
| "loss": 0.0349, |
| "step": 6350 |
| }, |
| { |
| "grad_norm": 0.3097964823246002, |
| "learning_rate": 9.976246681784629e-05, |
| "loss": 0.0326, |
| "step": 6360 |
| }, |
| { |
| "grad_norm": 0.2853200435638428, |
| "learning_rate": 9.976045031602234e-05, |
| "loss": 0.0339, |
| "step": 6370 |
| }, |
| { |
| "grad_norm": 0.4057390093803406, |
| "learning_rate": 9.975842531149837e-05, |
| "loss": 0.0358, |
| "step": 6380 |
| }, |
| { |
| "grad_norm": 0.26724973320961, |
| "learning_rate": 9.975639180462043e-05, |
| "loss": 0.0341, |
| "step": 6390 |
| }, |
| { |
| "grad_norm": 0.29897722601890564, |
| "learning_rate": 9.975434979573596e-05, |
| "loss": 0.0299, |
| "step": 6400 |
| }, |
| { |
| "grad_norm": 0.3819584846496582, |
| "learning_rate": 9.97522992851939e-05, |
| "loss": 0.0345, |
| "step": 6410 |
| }, |
| { |
| "grad_norm": 0.37220311164855957, |
| "learning_rate": 9.975024027334461e-05, |
| "loss": 0.0341, |
| "step": 6420 |
| }, |
| { |
| "grad_norm": 0.3132483959197998, |
| "learning_rate": 9.974817276053993e-05, |
| "loss": 0.0309, |
| "step": 6430 |
| }, |
| { |
| "grad_norm": 0.32484710216522217, |
| "learning_rate": 9.974609674713315e-05, |
| "loss": 0.0319, |
| "step": 6440 |
| }, |
| { |
| "grad_norm": 0.35498714447021484, |
| "learning_rate": 9.9744012233479e-05, |
| "loss": 0.0305, |
| "step": 6450 |
| }, |
| { |
| "grad_norm": 0.36161261796951294, |
| "learning_rate": 9.974191921993366e-05, |
| "loss": 0.0305, |
| "step": 6460 |
| }, |
| { |
| "grad_norm": 0.2131146341562271, |
| "learning_rate": 9.973981770685474e-05, |
| "loss": 0.0313, |
| "step": 6470 |
| }, |
| { |
| "grad_norm": 0.3179318308830261, |
| "learning_rate": 9.97377076946014e-05, |
| "loss": 0.0334, |
| "step": 6480 |
| }, |
| { |
| "grad_norm": 0.2530212998390198, |
| "learning_rate": 9.973558918353412e-05, |
| "loss": 0.0336, |
| "step": 6490 |
| }, |
| { |
| "grad_norm": 0.2359841912984848, |
| "learning_rate": 9.973346217401494e-05, |
| "loss": 0.0293, |
| "step": 6500 |
| }, |
| { |
| "grad_norm": 0.3290267288684845, |
| "learning_rate": 9.973132666640726e-05, |
| "loss": 0.0299, |
| "step": 6510 |
| }, |
| { |
| "grad_norm": 0.2806616425514221, |
| "learning_rate": 9.972918266107602e-05, |
| "loss": 0.0275, |
| "step": 6520 |
| }, |
| { |
| "grad_norm": 0.289767861366272, |
| "learning_rate": 9.972703015838756e-05, |
| "loss": 0.0315, |
| "step": 6530 |
| }, |
| { |
| "grad_norm": 0.30729788541793823, |
| "learning_rate": 9.97248691587097e-05, |
| "loss": 0.0297, |
| "step": 6540 |
| }, |
| { |
| "grad_norm": 0.29331719875335693, |
| "learning_rate": 9.972269966241166e-05, |
| "loss": 0.031, |
| "step": 6550 |
| }, |
| { |
| "grad_norm": 0.26936954259872437, |
| "learning_rate": 9.972052166986417e-05, |
| "loss": 0.0304, |
| "step": 6560 |
| }, |
| { |
| "grad_norm": 0.25787121057510376, |
| "learning_rate": 9.971833518143938e-05, |
| "loss": 0.0352, |
| "step": 6570 |
| }, |
| { |
| "grad_norm": 0.29834407567977905, |
| "learning_rate": 9.971614019751093e-05, |
| "loss": 0.0284, |
| "step": 6580 |
| }, |
| { |
| "grad_norm": 0.22129976749420166, |
| "learning_rate": 9.971393671845383e-05, |
| "loss": 0.0317, |
| "step": 6590 |
| }, |
| { |
| "grad_norm": 0.2899000942707062, |
| "learning_rate": 9.971172474464464e-05, |
| "loss": 0.0336, |
| "step": 6600 |
| }, |
| { |
| "grad_norm": 0.29629984498023987, |
| "learning_rate": 9.97095042764613e-05, |
| "loss": 0.0347, |
| "step": 6610 |
| }, |
| { |
| "grad_norm": 0.389089971780777, |
| "learning_rate": 9.970727531428324e-05, |
| "loss": 0.0263, |
| "step": 6620 |
| }, |
| { |
| "grad_norm": 0.24067829549312592, |
| "learning_rate": 9.970503785849132e-05, |
| "loss": 0.0354, |
| "step": 6630 |
| }, |
| { |
| "grad_norm": 0.3028007745742798, |
| "learning_rate": 9.970279190946788e-05, |
| "loss": 0.0309, |
| "step": 6640 |
| }, |
| { |
| "grad_norm": 0.305187463760376, |
| "learning_rate": 9.970053746759667e-05, |
| "loss": 0.0274, |
| "step": 6650 |
| }, |
| { |
| "grad_norm": 0.2956674098968506, |
| "learning_rate": 9.969827453326292e-05, |
| "loss": 0.0325, |
| "step": 6660 |
| }, |
| { |
| "grad_norm": 0.30844828486442566, |
| "learning_rate": 9.969600310685332e-05, |
| "loss": 0.0334, |
| "step": 6670 |
| }, |
| { |
| "grad_norm": 0.23201362788677216, |
| "learning_rate": 9.969372318875596e-05, |
| "loss": 0.0299, |
| "step": 6680 |
| }, |
| { |
| "grad_norm": 0.38267290592193604, |
| "learning_rate": 9.969143477936043e-05, |
| "loss": 0.0314, |
| "step": 6690 |
| }, |
| { |
| "grad_norm": 0.22565823793411255, |
| "learning_rate": 9.968913787905775e-05, |
| "loss": 0.0311, |
| "step": 6700 |
| }, |
| { |
| "grad_norm": 0.31415167450904846, |
| "learning_rate": 9.968683248824045e-05, |
| "loss": 0.0305, |
| "step": 6710 |
| }, |
| { |
| "grad_norm": 0.2609900236129761, |
| "learning_rate": 9.968451860730238e-05, |
| "loss": 0.0316, |
| "step": 6720 |
| }, |
| { |
| "grad_norm": 0.3864940404891968, |
| "learning_rate": 9.968219623663896e-05, |
| "loss": 0.0275, |
| "step": 6730 |
| }, |
| { |
| "grad_norm": 0.3017069101333618, |
| "learning_rate": 9.967986537664702e-05, |
| "loss": 0.0304, |
| "step": 6740 |
| }, |
| { |
| "grad_norm": 0.29363933205604553, |
| "learning_rate": 9.967752602772483e-05, |
| "loss": 0.0278, |
| "step": 6750 |
| }, |
| { |
| "grad_norm": 0.33982428908348083, |
| "learning_rate": 9.967517819027212e-05, |
| "loss": 0.0283, |
| "step": 6760 |
| }, |
| { |
| "grad_norm": 0.29549187421798706, |
| "learning_rate": 9.967282186469009e-05, |
| "loss": 0.0287, |
| "step": 6770 |
| }, |
| { |
| "grad_norm": 0.2883082926273346, |
| "learning_rate": 9.967045705138135e-05, |
| "loss": 0.0298, |
| "step": 6780 |
| }, |
| { |
| "grad_norm": 0.2381998896598816, |
| "learning_rate": 9.966808375074998e-05, |
| "loss": 0.0295, |
| "step": 6790 |
| }, |
| { |
| "grad_norm": 0.2355462908744812, |
| "learning_rate": 9.966570196320154e-05, |
| "loss": 0.03, |
| "step": 6800 |
| }, |
| { |
| "grad_norm": 0.31626275181770325, |
| "learning_rate": 9.966331168914299e-05, |
| "loss": 0.0305, |
| "step": 6810 |
| }, |
| { |
| "grad_norm": 0.2476751059293747, |
| "learning_rate": 9.966091292898277e-05, |
| "loss": 0.0328, |
| "step": 6820 |
| }, |
| { |
| "grad_norm": 0.27976781129837036, |
| "learning_rate": 9.965850568313076e-05, |
| "loss": 0.0269, |
| "step": 6830 |
| }, |
| { |
| "grad_norm": 0.23612117767333984, |
| "learning_rate": 9.965608995199827e-05, |
| "loss": 0.0301, |
| "step": 6840 |
| }, |
| { |
| "grad_norm": 0.3097231686115265, |
| "learning_rate": 9.965366573599812e-05, |
| "loss": 0.0331, |
| "step": 6850 |
| }, |
| { |
| "grad_norm": 0.3308660387992859, |
| "learning_rate": 9.965123303554453e-05, |
| "loss": 0.0336, |
| "step": 6860 |
| }, |
| { |
| "grad_norm": 0.22662802040576935, |
| "learning_rate": 9.964879185105317e-05, |
| "loss": 0.0256, |
| "step": 6870 |
| }, |
| { |
| "grad_norm": 0.3358778655529022, |
| "learning_rate": 9.964634218294119e-05, |
| "loss": 0.0339, |
| "step": 6880 |
| }, |
| { |
| "grad_norm": 0.2232239544391632, |
| "learning_rate": 9.964388403162714e-05, |
| "loss": 0.0264, |
| "step": 6890 |
| }, |
| { |
| "grad_norm": 0.2929636836051941, |
| "learning_rate": 9.96414173975311e-05, |
| "loss": 0.0277, |
| "step": 6900 |
| }, |
| { |
| "grad_norm": 0.29091522097587585, |
| "learning_rate": 9.963894228107451e-05, |
| "loss": 0.0263, |
| "step": 6910 |
| }, |
| { |
| "grad_norm": 0.27420079708099365, |
| "learning_rate": 9.963645868268032e-05, |
| "loss": 0.0307, |
| "step": 6920 |
| }, |
| { |
| "grad_norm": 0.34498360753059387, |
| "learning_rate": 9.963396660277289e-05, |
| "loss": 0.0304, |
| "step": 6930 |
| }, |
| { |
| "grad_norm": 0.22748416662216187, |
| "learning_rate": 9.963146604177807e-05, |
| "loss": 0.0274, |
| "step": 6940 |
| }, |
| { |
| "grad_norm": 0.27264806628227234, |
| "learning_rate": 9.962895700012311e-05, |
| "loss": 0.0276, |
| "step": 6950 |
| }, |
| { |
| "grad_norm": 0.2757764756679535, |
| "learning_rate": 9.962643947823677e-05, |
| "loss": 0.0317, |
| "step": 6960 |
| }, |
| { |
| "grad_norm": 0.27199292182922363, |
| "learning_rate": 9.962391347654921e-05, |
| "loss": 0.0285, |
| "step": 6970 |
| }, |
| { |
| "grad_norm": 0.2811877131462097, |
| "learning_rate": 9.962137899549204e-05, |
| "loss": 0.0302, |
| "step": 6980 |
| }, |
| { |
| "grad_norm": 0.3445790410041809, |
| "learning_rate": 9.961883603549835e-05, |
| "loss": 0.0288, |
| "step": 6990 |
| }, |
| { |
| "grad_norm": 0.2968343198299408, |
| "learning_rate": 9.961628459700267e-05, |
| "loss": 0.0311, |
| "step": 7000 |
| }, |
| { |
| "grad_norm": 0.21912340819835663, |
| "learning_rate": 9.961372468044095e-05, |
| "loss": 0.0309, |
| "step": 7010 |
| }, |
| { |
| "grad_norm": 0.23689767718315125, |
| "learning_rate": 9.961115628625062e-05, |
| "loss": 0.0297, |
| "step": 7020 |
| }, |
| { |
| "grad_norm": 0.23842456936836243, |
| "learning_rate": 9.960857941487056e-05, |
| "loss": 0.0249, |
| "step": 7030 |
| }, |
| { |
| "grad_norm": 0.39411240816116333, |
| "learning_rate": 9.960599406674106e-05, |
| "loss": 0.0277, |
| "step": 7040 |
| }, |
| { |
| "grad_norm": 0.38863542675971985, |
| "learning_rate": 9.960340024230393e-05, |
| "loss": 0.0269, |
| "step": 7050 |
| }, |
| { |
| "grad_norm": 0.3907487690448761, |
| "learning_rate": 9.960079794200232e-05, |
| "loss": 0.0295, |
| "step": 7060 |
| }, |
| { |
| "grad_norm": 0.2819722592830658, |
| "learning_rate": 9.959818716628096e-05, |
| "loss": 0.0263, |
| "step": 7070 |
| }, |
| { |
| "grad_norm": 0.2811400890350342, |
| "learning_rate": 9.95955679155859e-05, |
| "loss": 0.0271, |
| "step": 7080 |
| }, |
| { |
| "grad_norm": 0.28184035420417786, |
| "learning_rate": 9.959294019036472e-05, |
| "loss": 0.0289, |
| "step": 7090 |
| }, |
| { |
| "grad_norm": 0.3162672817707062, |
| "learning_rate": 9.959030399106646e-05, |
| "loss": 0.0248, |
| "step": 7100 |
| }, |
| { |
| "grad_norm": 0.28583642840385437, |
| "learning_rate": 9.958765931814153e-05, |
| "loss": 0.0307, |
| "step": 7110 |
| }, |
| { |
| "grad_norm": 0.2085208147764206, |
| "learning_rate": 9.958500617204184e-05, |
| "loss": 0.0301, |
| "step": 7120 |
| }, |
| { |
| "grad_norm": 0.32559746503829956, |
| "learning_rate": 9.958234455322075e-05, |
| "loss": 0.0267, |
| "step": 7130 |
| }, |
| { |
| "grad_norm": 0.4059063792228699, |
| "learning_rate": 9.957967446213308e-05, |
| "loss": 0.0299, |
| "step": 7140 |
| }, |
| { |
| "grad_norm": 0.32962876558303833, |
| "learning_rate": 9.957699589923501e-05, |
| "loss": 0.0265, |
| "step": 7150 |
| }, |
| { |
| "grad_norm": 0.2489539533853531, |
| "learning_rate": 9.957430886498431e-05, |
| "loss": 0.0266, |
| "step": 7160 |
| }, |
| { |
| "grad_norm": 0.26507315039634705, |
| "learning_rate": 9.957161335984008e-05, |
| "loss": 0.0251, |
| "step": 7170 |
| }, |
| { |
| "grad_norm": 0.31960949301719666, |
| "learning_rate": 9.956890938426291e-05, |
| "loss": 0.0297, |
| "step": 7180 |
| }, |
| { |
| "grad_norm": 0.39985284209251404, |
| "learning_rate": 9.956619693871482e-05, |
| "loss": 0.0291, |
| "step": 7190 |
| }, |
| { |
| "grad_norm": 0.3071722388267517, |
| "learning_rate": 9.956347602365934e-05, |
| "loss": 0.0299, |
| "step": 7200 |
| }, |
| { |
| "grad_norm": 0.2650475800037384, |
| "learning_rate": 9.956074663956135e-05, |
| "loss": 0.0296, |
| "step": 7210 |
| }, |
| { |
| "grad_norm": 0.380520761013031, |
| "learning_rate": 9.955800878688726e-05, |
| "loss": 0.0299, |
| "step": 7220 |
| }, |
| { |
| "grad_norm": 0.32119613885879517, |
| "learning_rate": 9.955526246610489e-05, |
| "loss": 0.0308, |
| "step": 7230 |
| }, |
| { |
| "grad_norm": 0.3378693461418152, |
| "learning_rate": 9.955250767768349e-05, |
| "loss": 0.0256, |
| "step": 7240 |
| }, |
| { |
| "grad_norm": 0.2946402132511139, |
| "learning_rate": 9.95497444220938e-05, |
| "loss": 0.0266, |
| "step": 7250 |
| }, |
| { |
| "grad_norm": 0.2820631265640259, |
| "learning_rate": 9.954697269980797e-05, |
| "loss": 0.026, |
| "step": 7260 |
| }, |
| { |
| "grad_norm": 0.30381670594215393, |
| "learning_rate": 9.954419251129962e-05, |
| "loss": 0.0263, |
| "step": 7270 |
| }, |
| { |
| "grad_norm": 0.2934808135032654, |
| "learning_rate": 9.95414038570438e-05, |
| "loss": 0.0247, |
| "step": 7280 |
| }, |
| { |
| "grad_norm": 0.24104171991348267, |
| "learning_rate": 9.953860673751703e-05, |
| "loss": 0.0273, |
| "step": 7290 |
| }, |
| { |
| "grad_norm": 0.24186210334300995, |
| "learning_rate": 9.953580115319725e-05, |
| "loss": 0.0267, |
| "step": 7300 |
| }, |
| { |
| "grad_norm": 0.34005916118621826, |
| "learning_rate": 9.953298710456387e-05, |
| "loss": 0.026, |
| "step": 7310 |
| }, |
| { |
| "grad_norm": 0.2795884609222412, |
| "learning_rate": 9.953016459209771e-05, |
| "loss": 0.0273, |
| "step": 7320 |
| }, |
| { |
| "grad_norm": 0.30493995547294617, |
| "learning_rate": 9.952733361628108e-05, |
| "loss": 0.0312, |
| "step": 7330 |
| }, |
| { |
| "grad_norm": 0.29594191908836365, |
| "learning_rate": 9.952449417759772e-05, |
| "loss": 0.0284, |
| "step": 7340 |
| }, |
| { |
| "grad_norm": 0.4051782190799713, |
| "learning_rate": 9.952164627653279e-05, |
| "loss": 0.0305, |
| "step": 7350 |
| }, |
| { |
| "grad_norm": 0.2247595191001892, |
| "learning_rate": 9.951878991357292e-05, |
| "loss": 0.026, |
| "step": 7360 |
| }, |
| { |
| "grad_norm": 0.2571432292461395, |
| "learning_rate": 9.951592508920622e-05, |
| "loss": 0.028, |
| "step": 7370 |
| }, |
| { |
| "grad_norm": 0.2743799090385437, |
| "learning_rate": 9.951305180392219e-05, |
| "loss": 0.0262, |
| "step": 7380 |
| }, |
| { |
| "grad_norm": 0.33545973896980286, |
| "learning_rate": 9.951017005821178e-05, |
| "loss": 0.0299, |
| "step": 7390 |
| }, |
| { |
| "grad_norm": 0.2926540970802307, |
| "learning_rate": 9.95072798525674e-05, |
| "loss": 0.0291, |
| "step": 7400 |
| }, |
| { |
| "grad_norm": 0.3546326458454132, |
| "learning_rate": 9.950438118748293e-05, |
| "loss": 0.0317, |
| "step": 7410 |
| }, |
| { |
| "grad_norm": 0.30050361156463623, |
| "learning_rate": 9.950147406345366e-05, |
| "loss": 0.0281, |
| "step": 7420 |
| }, |
| { |
| "grad_norm": 0.23915919661521912, |
| "learning_rate": 9.949855848097635e-05, |
| "loss": 0.0349, |
| "step": 7430 |
| }, |
| { |
| "grad_norm": 0.19533124566078186, |
| "learning_rate": 9.949563444054916e-05, |
| "loss": 0.0296, |
| "step": 7440 |
| }, |
| { |
| "grad_norm": 0.35138800740242004, |
| "learning_rate": 9.949270194267178e-05, |
| "loss": 0.0304, |
| "step": 7450 |
| }, |
| { |
| "grad_norm": 0.2618047893047333, |
| "learning_rate": 9.948976098784526e-05, |
| "loss": 0.0293, |
| "step": 7460 |
| }, |
| { |
| "grad_norm": 0.36352285742759705, |
| "learning_rate": 9.948681157657213e-05, |
| "loss": 0.0331, |
| "step": 7470 |
| }, |
| { |
| "grad_norm": 0.30749431252479553, |
| "learning_rate": 9.948385370935638e-05, |
| "loss": 0.0276, |
| "step": 7480 |
| }, |
| { |
| "grad_norm": 0.28992339968681335, |
| "learning_rate": 9.94808873867034e-05, |
| "loss": 0.0296, |
| "step": 7490 |
| }, |
| { |
| "grad_norm": 0.21981099247932434, |
| "learning_rate": 9.947791260912009e-05, |
| "loss": 0.0303, |
| "step": 7500 |
| }, |
| { |
| "grad_norm": 0.3701888918876648, |
| "learning_rate": 9.947492937711474e-05, |
| "loss": 0.0247, |
| "step": 7510 |
| }, |
| { |
| "grad_norm": 0.29776903986930847, |
| "learning_rate": 9.947193769119707e-05, |
| "loss": 0.0243, |
| "step": 7520 |
| }, |
| { |
| "grad_norm": 0.26625514030456543, |
| "learning_rate": 9.946893755187834e-05, |
| "loss": 0.0292, |
| "step": 7530 |
| }, |
| { |
| "grad_norm": 0.3425240218639374, |
| "learning_rate": 9.946592895967115e-05, |
| "loss": 0.0279, |
| "step": 7540 |
| }, |
| { |
| "grad_norm": 0.3067252039909363, |
| "learning_rate": 9.94629119150896e-05, |
| "loss": 0.0272, |
| "step": 7550 |
| }, |
| { |
| "grad_norm": 0.3026585578918457, |
| "learning_rate": 9.94598864186492e-05, |
| "loss": 0.0303, |
| "step": 7560 |
| }, |
| { |
| "grad_norm": 0.25410696864128113, |
| "learning_rate": 9.945685247086696e-05, |
| "loss": 0.0256, |
| "step": 7570 |
| }, |
| { |
| "grad_norm": 0.29221490025520325, |
| "learning_rate": 9.945381007226129e-05, |
| "loss": 0.0304, |
| "step": 7580 |
| }, |
| { |
| "grad_norm": 0.24581211805343628, |
| "learning_rate": 9.945075922335203e-05, |
| "loss": 0.0234, |
| "step": 7590 |
| }, |
| { |
| "grad_norm": 0.28238850831985474, |
| "learning_rate": 9.944769992466049e-05, |
| "loss": 0.0263, |
| "step": 7600 |
| }, |
| { |
| "grad_norm": 0.251505047082901, |
| "learning_rate": 9.944463217670945e-05, |
| "loss": 0.0291, |
| "step": 7610 |
| }, |
| { |
| "grad_norm": 0.2315089851617813, |
| "learning_rate": 9.944155598002307e-05, |
| "loss": 0.0281, |
| "step": 7620 |
| }, |
| { |
| "grad_norm": 0.26602116227149963, |
| "learning_rate": 9.943847133512701e-05, |
| "loss": 0.026, |
| "step": 7630 |
| }, |
| { |
| "grad_norm": 0.40367060899734497, |
| "learning_rate": 9.943537824254834e-05, |
| "loss": 0.034, |
| "step": 7640 |
| }, |
| { |
| "grad_norm": 0.26691651344299316, |
| "learning_rate": 9.943227670281559e-05, |
| "loss": 0.0315, |
| "step": 7650 |
| }, |
| { |
| "grad_norm": 0.24326331913471222, |
| "learning_rate": 9.942916671645873e-05, |
| "loss": 0.0243, |
| "step": 7660 |
| }, |
| { |
| "grad_norm": 0.3013291656970978, |
| "learning_rate": 9.942604828400916e-05, |
| "loss": 0.0277, |
| "step": 7670 |
| }, |
| { |
| "grad_norm": 0.3073980510234833, |
| "learning_rate": 9.942292140599975e-05, |
| "loss": 0.0278, |
| "step": 7680 |
| }, |
| { |
| "grad_norm": 0.2990018427371979, |
| "learning_rate": 9.94197860829648e-05, |
| "loss": 0.0284, |
| "step": 7690 |
| }, |
| { |
| "grad_norm": 0.273559033870697, |
| "learning_rate": 9.941664231544004e-05, |
| "loss": 0.0274, |
| "step": 7700 |
| }, |
| { |
| "grad_norm": 0.2964268624782562, |
| "learning_rate": 9.941349010396264e-05, |
| "loss": 0.0271, |
| "step": 7710 |
| }, |
| { |
| "grad_norm": 0.28047996759414673, |
| "learning_rate": 9.941032944907125e-05, |
| "loss": 0.0224, |
| "step": 7720 |
| }, |
| { |
| "grad_norm": 0.2647440433502197, |
| "learning_rate": 9.940716035130596e-05, |
| "loss": 0.0277, |
| "step": 7730 |
| }, |
| { |
| "grad_norm": 0.3013063073158264, |
| "learning_rate": 9.940398281120821e-05, |
| "loss": 0.0257, |
| "step": 7740 |
| }, |
| { |
| "grad_norm": 0.2597537338733673, |
| "learning_rate": 9.940079682932102e-05, |
| "loss": 0.0283, |
| "step": 7750 |
| }, |
| { |
| "grad_norm": 0.21689435839653015, |
| "learning_rate": 9.939760240618877e-05, |
| "loss": 0.0271, |
| "step": 7760 |
| }, |
| { |
| "grad_norm": 0.26306426525115967, |
| "learning_rate": 9.939439954235729e-05, |
| "loss": 0.025, |
| "step": 7770 |
| }, |
| { |
| "grad_norm": 0.2899826467037201, |
| "learning_rate": 9.939118823837387e-05, |
| "loss": 0.0245, |
| "step": 7780 |
| }, |
| { |
| "grad_norm": 0.19139663875102997, |
| "learning_rate": 9.938796849478725e-05, |
| "loss": 0.0226, |
| "step": 7790 |
| }, |
| { |
| "grad_norm": 0.1981378048658371, |
| "learning_rate": 9.938474031214755e-05, |
| "loss": 0.0254, |
| "step": 7800 |
| }, |
| { |
| "grad_norm": 0.2784959077835083, |
| "learning_rate": 9.938150369100643e-05, |
| "loss": 0.026, |
| "step": 7810 |
| }, |
| { |
| "grad_norm": 0.25330811738967896, |
| "learning_rate": 9.93782586319169e-05, |
| "loss": 0.0249, |
| "step": 7820 |
| }, |
| { |
| "grad_norm": 0.2706470191478729, |
| "learning_rate": 9.937500513543348e-05, |
| "loss": 0.0244, |
| "step": 7830 |
| }, |
| { |
| "grad_norm": 0.2947620153427124, |
| "learning_rate": 9.937174320211207e-05, |
| "loss": 0.0261, |
| "step": 7840 |
| }, |
| { |
| "grad_norm": 0.23056454956531525, |
| "learning_rate": 9.936847283251009e-05, |
| "loss": 0.0229, |
| "step": 7850 |
| }, |
| { |
| "grad_norm": 0.3381933271884918, |
| "learning_rate": 9.936519402718632e-05, |
| "loss": 0.0251, |
| "step": 7860 |
| }, |
| { |
| "grad_norm": 0.2765756845474243, |
| "learning_rate": 9.936190678670102e-05, |
| "loss": 0.0247, |
| "step": 7870 |
| }, |
| { |
| "grad_norm": 0.22710005939006805, |
| "learning_rate": 9.935861111161593e-05, |
| "loss": 0.0292, |
| "step": 7880 |
| }, |
| { |
| "grad_norm": 0.2768805921077728, |
| "learning_rate": 9.935530700249416e-05, |
| "loss": 0.023, |
| "step": 7890 |
| }, |
| { |
| "grad_norm": 0.207578644156456, |
| "learning_rate": 9.935199445990028e-05, |
| "loss": 0.0252, |
| "step": 7900 |
| }, |
| { |
| "grad_norm": 0.3019058406352997, |
| "learning_rate": 9.934867348440033e-05, |
| "loss": 0.0295, |
| "step": 7910 |
| }, |
| { |
| "grad_norm": 0.26228857040405273, |
| "learning_rate": 9.934534407656176e-05, |
| "loss": 0.0264, |
| "step": 7920 |
| }, |
| { |
| "grad_norm": 0.3489040434360504, |
| "learning_rate": 9.93420062369535e-05, |
| "loss": 0.0289, |
| "step": 7930 |
| }, |
| { |
| "grad_norm": 0.26589077711105347, |
| "learning_rate": 9.933865996614589e-05, |
| "loss": 0.029, |
| "step": 7940 |
| }, |
| { |
| "grad_norm": 0.24701766669750214, |
| "learning_rate": 9.933530526471068e-05, |
| "loss": 0.0236, |
| "step": 7950 |
| }, |
| { |
| "grad_norm": 0.28054970502853394, |
| "learning_rate": 9.933194213322114e-05, |
| "loss": 0.0269, |
| "step": 7960 |
| }, |
| { |
| "grad_norm": 0.25400447845458984, |
| "learning_rate": 9.932857057225192e-05, |
| "loss": 0.0274, |
| "step": 7970 |
| }, |
| { |
| "grad_norm": 0.2344646155834198, |
| "learning_rate": 9.932519058237912e-05, |
| "loss": 0.0283, |
| "step": 7980 |
| }, |
| { |
| "grad_norm": 0.2422647625207901, |
| "learning_rate": 9.932180216418032e-05, |
| "loss": 0.0237, |
| "step": 7990 |
| }, |
| { |
| "grad_norm": 0.23054273426532745, |
| "learning_rate": 9.931840531823446e-05, |
| "loss": 0.0243, |
| "step": 8000 |
| }, |
| { |
| "grad_norm": 0.2869581878185272, |
| "learning_rate": 9.9315000045122e-05, |
| "loss": 0.0262, |
| "step": 8010 |
| }, |
| { |
| "grad_norm": 0.30287492275238037, |
| "learning_rate": 9.931158634542481e-05, |
| "loss": 0.028, |
| "step": 8020 |
| }, |
| { |
| "grad_norm": 0.22392156720161438, |
| "learning_rate": 9.930816421972617e-05, |
| "loss": 0.0259, |
| "step": 8030 |
| }, |
| { |
| "grad_norm": 0.3009633421897888, |
| "learning_rate": 9.930473366861086e-05, |
| "loss": 0.0276, |
| "step": 8040 |
| }, |
| { |
| "grad_norm": 0.2293577939271927, |
| "learning_rate": 9.930129469266505e-05, |
| "loss": 0.0242, |
| "step": 8050 |
| }, |
| { |
| "grad_norm": 0.20792780816555023, |
| "learning_rate": 9.929784729247638e-05, |
| "loss": 0.0238, |
| "step": 8060 |
| }, |
| { |
| "grad_norm": 0.30487683415412903, |
| "learning_rate": 9.929439146863389e-05, |
| "loss": 0.0269, |
| "step": 8070 |
| }, |
| { |
| "grad_norm": 0.2773206830024719, |
| "learning_rate": 9.92909272217281e-05, |
| "loss": 0.0289, |
| "step": 8080 |
| }, |
| { |
| "grad_norm": 0.2209886908531189, |
| "learning_rate": 9.928745455235097e-05, |
| "loss": 0.0239, |
| "step": 8090 |
| }, |
| { |
| "grad_norm": 0.2363114356994629, |
| "learning_rate": 9.928397346109588e-05, |
| "loss": 0.0258, |
| "step": 8100 |
| }, |
| { |
| "grad_norm": 0.23011241853237152, |
| "learning_rate": 9.928048394855762e-05, |
| "loss": 0.0287, |
| "step": 8110 |
| }, |
| { |
| "grad_norm": 0.25448304414749146, |
| "learning_rate": 9.92769860153325e-05, |
| "loss": 0.0278, |
| "step": 8120 |
| }, |
| { |
| "grad_norm": 0.26419103145599365, |
| "learning_rate": 9.927347966201819e-05, |
| "loss": 0.0247, |
| "step": 8130 |
| }, |
| { |
| "grad_norm": 0.30355438590049744, |
| "learning_rate": 9.926996488921383e-05, |
| "loss": 0.0285, |
| "step": 8140 |
| }, |
| { |
| "grad_norm": 0.30782219767570496, |
| "learning_rate": 9.926644169752001e-05, |
| "loss": 0.029, |
| "step": 8150 |
| }, |
| { |
| "grad_norm": 0.25190019607543945, |
| "learning_rate": 9.926291008753875e-05, |
| "loss": 0.0275, |
| "step": 8160 |
| }, |
| { |
| "grad_norm": 0.2714674174785614, |
| "learning_rate": 9.92593700598735e-05, |
| "loss": 0.0272, |
| "step": 8170 |
| }, |
| { |
| "grad_norm": 0.29583925008773804, |
| "learning_rate": 9.925582161512915e-05, |
| "loss": 0.0261, |
| "step": 8180 |
| }, |
| { |
| "grad_norm": 0.2607842981815338, |
| "learning_rate": 9.925226475391205e-05, |
| "loss": 0.0279, |
| "step": 8190 |
| }, |
| { |
| "grad_norm": 0.23056012392044067, |
| "learning_rate": 9.924869947682993e-05, |
| "loss": 0.0231, |
| "step": 8200 |
| }, |
| { |
| "grad_norm": 0.2710048258304596, |
| "learning_rate": 9.924512578449204e-05, |
| "loss": 0.0262, |
| "step": 8210 |
| }, |
| { |
| "grad_norm": 0.3241911828517914, |
| "learning_rate": 9.924154367750901e-05, |
| "loss": 0.0224, |
| "step": 8220 |
| }, |
| { |
| "grad_norm": 0.28296151757240295, |
| "learning_rate": 9.923795315649293e-05, |
| "loss": 0.0245, |
| "step": 8230 |
| }, |
| { |
| "grad_norm": 0.2435009628534317, |
| "learning_rate": 9.92343542220573e-05, |
| "loss": 0.0241, |
| "step": 8240 |
| }, |
| { |
| "grad_norm": 0.2360232025384903, |
| "learning_rate": 9.92307468748171e-05, |
| "loss": 0.025, |
| "step": 8250 |
| }, |
| { |
| "grad_norm": 0.3190790116786957, |
| "learning_rate": 9.922713111538873e-05, |
| "loss": 0.027, |
| "step": 8260 |
| }, |
| { |
| "grad_norm": 0.20300662517547607, |
| "learning_rate": 9.922350694439003e-05, |
| "loss": 0.0284, |
| "step": 8270 |
| }, |
| { |
| "grad_norm": 0.2054835557937622, |
| "learning_rate": 9.921987436244024e-05, |
| "loss": 0.0276, |
| "step": 8280 |
| }, |
| { |
| "grad_norm": 0.2487817406654358, |
| "learning_rate": 9.921623337016008e-05, |
| "loss": 0.0255, |
| "step": 8290 |
| }, |
| { |
| "grad_norm": 0.24625623226165771, |
| "learning_rate": 9.921258396817172e-05, |
| "loss": 0.0239, |
| "step": 8300 |
| }, |
| { |
| "grad_norm": 0.3312872350215912, |
| "learning_rate": 9.920892615709874e-05, |
| "loss": 0.0272, |
| "step": 8310 |
| }, |
| { |
| "grad_norm": 0.24326156079769135, |
| "learning_rate": 9.920525993756612e-05, |
| "loss": 0.0272, |
| "step": 8320 |
| }, |
| { |
| "grad_norm": 0.26025789976119995, |
| "learning_rate": 9.920158531020036e-05, |
| "loss": 0.0247, |
| "step": 8330 |
| }, |
| { |
| "grad_norm": 0.2548008859157562, |
| "learning_rate": 9.919790227562933e-05, |
| "loss": 0.025, |
| "step": 8340 |
| }, |
| { |
| "grad_norm": 0.20518814027309418, |
| "learning_rate": 9.919421083448237e-05, |
| "loss": 0.0232, |
| "step": 8350 |
| }, |
| { |
| "grad_norm": 0.24752728641033173, |
| "learning_rate": 9.919051098739022e-05, |
| "loss": 0.0235, |
| "step": 8360 |
| }, |
| { |
| "grad_norm": 0.25243431329727173, |
| "learning_rate": 9.918680273498514e-05, |
| "loss": 0.0245, |
| "step": 8370 |
| }, |
| { |
| "grad_norm": 0.2901827096939087, |
| "learning_rate": 9.918308607790072e-05, |
| "loss": 0.0254, |
| "step": 8380 |
| }, |
| { |
| "grad_norm": 0.2677813470363617, |
| "learning_rate": 9.917936101677205e-05, |
| "loss": 0.023, |
| "step": 8390 |
| }, |
| { |
| "grad_norm": 0.3392478823661804, |
| "learning_rate": 9.917562755223564e-05, |
| "loss": 0.0223, |
| "step": 8400 |
| }, |
| { |
| "grad_norm": 0.23790103197097778, |
| "learning_rate": 9.917188568492944e-05, |
| "loss": 0.0227, |
| "step": 8410 |
| }, |
| { |
| "grad_norm": 0.34391242265701294, |
| "learning_rate": 9.916813541549283e-05, |
| "loss": 0.026, |
| "step": 8420 |
| }, |
| { |
| "grad_norm": 0.2735089957714081, |
| "learning_rate": 9.916437674456663e-05, |
| "loss": 0.0211, |
| "step": 8430 |
| }, |
| { |
| "grad_norm": 0.19652947783470154, |
| "learning_rate": 9.916060967279308e-05, |
| "loss": 0.0243, |
| "step": 8440 |
| }, |
| { |
| "grad_norm": 0.25613489747047424, |
| "learning_rate": 9.91568342008159e-05, |
| "loss": 0.0267, |
| "step": 8450 |
| }, |
| { |
| "grad_norm": 0.2633569538593292, |
| "learning_rate": 9.915305032928019e-05, |
| "loss": 0.0254, |
| "step": 8460 |
| }, |
| { |
| "grad_norm": 0.23426930606365204, |
| "learning_rate": 9.914925805883253e-05, |
| "loss": 0.0259, |
| "step": 8470 |
| }, |
| { |
| "grad_norm": 0.2730519771575928, |
| "learning_rate": 9.914545739012088e-05, |
| "loss": 0.0246, |
| "step": 8480 |
| }, |
| { |
| "grad_norm": 0.21916942298412323, |
| "learning_rate": 9.91416483237947e-05, |
| "loss": 0.023, |
| "step": 8490 |
| }, |
| { |
| "grad_norm": 0.2629518210887909, |
| "learning_rate": 9.913783086050485e-05, |
| "loss": 0.0233, |
| "step": 8500 |
| }, |
| { |
| "grad_norm": 0.2678612172603607, |
| "learning_rate": 9.913400500090364e-05, |
| "loss": 0.023, |
| "step": 8510 |
| }, |
| { |
| "grad_norm": 0.26758983731269836, |
| "learning_rate": 9.913017074564479e-05, |
| "loss": 0.0255, |
| "step": 8520 |
| }, |
| { |
| "grad_norm": 0.20346862077713013, |
| "learning_rate": 9.912632809538348e-05, |
| "loss": 0.0287, |
| "step": 8530 |
| }, |
| { |
| "grad_norm": 0.24763819575309753, |
| "learning_rate": 9.912247705077629e-05, |
| "loss": 0.0286, |
| "step": 8540 |
| }, |
| { |
| "grad_norm": 0.3322838544845581, |
| "learning_rate": 9.911861761248127e-05, |
| "loss": 0.0261, |
| "step": 8550 |
| }, |
| { |
| "grad_norm": 0.22574086487293243, |
| "learning_rate": 9.91147497811579e-05, |
| "loss": 0.0256, |
| "step": 8560 |
| }, |
| { |
| "grad_norm": 0.3162856698036194, |
| "learning_rate": 9.911087355746709e-05, |
| "loss": 0.0223, |
| "step": 8570 |
| }, |
| { |
| "grad_norm": 0.3085162043571472, |
| "learning_rate": 9.910698894207117e-05, |
| "loss": 0.0253, |
| "step": 8580 |
| }, |
| { |
| "grad_norm": 0.21497994661331177, |
| "learning_rate": 9.910309593563392e-05, |
| "loss": 0.023, |
| "step": 8590 |
| }, |
| { |
| "grad_norm": 0.19948522746562958, |
| "learning_rate": 9.909919453882057e-05, |
| "loss": 0.0223, |
| "step": 8600 |
| }, |
| { |
| "grad_norm": 0.2361244112253189, |
| "learning_rate": 9.90952847522977e-05, |
| "loss": 0.0241, |
| "step": 8610 |
| }, |
| { |
| "grad_norm": 0.29840973019599915, |
| "learning_rate": 9.909136657673346e-05, |
| "loss": 0.0258, |
| "step": 8620 |
| }, |
| { |
| "grad_norm": 0.25735464692115784, |
| "learning_rate": 9.908744001279731e-05, |
| "loss": 0.021, |
| "step": 8630 |
| }, |
| { |
| "grad_norm": 0.2598496079444885, |
| "learning_rate": 9.90835050611602e-05, |
| "loss": 0.0223, |
| "step": 8640 |
| }, |
| { |
| "grad_norm": 0.23943184316158295, |
| "learning_rate": 9.90795617224945e-05, |
| "loss": 0.0266, |
| "step": 8650 |
| }, |
| { |
| "grad_norm": 0.21757365763187408, |
| "learning_rate": 9.907560999747405e-05, |
| "loss": 0.0253, |
| "step": 8660 |
| }, |
| { |
| "grad_norm": 0.2191021740436554, |
| "learning_rate": 9.907164988677408e-05, |
| "loss": 0.0257, |
| "step": 8670 |
| }, |
| { |
| "grad_norm": 0.2412397861480713, |
| "learning_rate": 9.906768139107124e-05, |
| "loss": 0.0249, |
| "step": 8680 |
| }, |
| { |
| "grad_norm": 0.2552809417247772, |
| "learning_rate": 9.906370451104367e-05, |
| "loss": 0.0239, |
| "step": 8690 |
| }, |
| { |
| "grad_norm": 0.34299716353416443, |
| "learning_rate": 9.905971924737088e-05, |
| "loss": 0.0219, |
| "step": 8700 |
| }, |
| { |
| "grad_norm": 0.20640961825847626, |
| "learning_rate": 9.905572560073387e-05, |
| "loss": 0.0234, |
| "step": 8710 |
| }, |
| { |
| "grad_norm": 0.3168725371360779, |
| "learning_rate": 9.905172357181501e-05, |
| "loss": 0.0203, |
| "step": 8720 |
| }, |
| { |
| "grad_norm": 0.2035190910100937, |
| "learning_rate": 9.904771316129817e-05, |
| "loss": 0.0268, |
| "step": 8730 |
| }, |
| { |
| "grad_norm": 0.274885892868042, |
| "learning_rate": 9.904369436986862e-05, |
| "loss": 0.0227, |
| "step": 8740 |
| }, |
| { |
| "grad_norm": 0.3238915205001831, |
| "learning_rate": 9.903966719821303e-05, |
| "loss": 0.0274, |
| "step": 8750 |
| }, |
| { |
| "grad_norm": 0.31647101044654846, |
| "learning_rate": 9.903563164701956e-05, |
| "loss": 0.0248, |
| "step": 8760 |
| }, |
| { |
| "grad_norm": 0.25814852118492126, |
| "learning_rate": 9.903158771697778e-05, |
| "loss": 0.0212, |
| "step": 8770 |
| }, |
| { |
| "grad_norm": 0.28999999165534973, |
| "learning_rate": 9.902753540877867e-05, |
| "loss": 0.0215, |
| "step": 8780 |
| }, |
| { |
| "grad_norm": 0.23847296833992004, |
| "learning_rate": 9.902347472311466e-05, |
| "loss": 0.0233, |
| "step": 8790 |
| }, |
| { |
| "grad_norm": 0.21799112856388092, |
| "learning_rate": 9.901940566067962e-05, |
| "loss": 0.0281, |
| "step": 8800 |
| }, |
| { |
| "grad_norm": 0.2692367136478424, |
| "learning_rate": 9.901532822216883e-05, |
| "loss": 0.0255, |
| "step": 8810 |
| }, |
| { |
| "grad_norm": 0.20797976851463318, |
| "learning_rate": 9.901124240827904e-05, |
| "loss": 0.0267, |
| "step": 8820 |
| }, |
| { |
| "grad_norm": 0.16693612933158875, |
| "learning_rate": 9.900714821970835e-05, |
| "loss": 0.0285, |
| "step": 8830 |
| }, |
| { |
| "grad_norm": 0.23021844029426575, |
| "learning_rate": 9.900304565715641e-05, |
| "loss": 0.0265, |
| "step": 8840 |
| }, |
| { |
| "grad_norm": 0.2509411871433258, |
| "learning_rate": 9.899893472132419e-05, |
| "loss": 0.0263, |
| "step": 8850 |
| }, |
| { |
| "grad_norm": 0.24572598934173584, |
| "learning_rate": 9.899481541291415e-05, |
| "loss": 0.0237, |
| "step": 8860 |
| }, |
| { |
| "grad_norm": 0.2603510320186615, |
| "learning_rate": 9.899068773263016e-05, |
| "loss": 0.0254, |
| "step": 8870 |
| }, |
| { |
| "grad_norm": 0.24887840449810028, |
| "learning_rate": 9.898655168117754e-05, |
| "loss": 0.0237, |
| "step": 8880 |
| }, |
| { |
| "grad_norm": 0.261653333902359, |
| "learning_rate": 9.898240725926302e-05, |
| "loss": 0.0253, |
| "step": 8890 |
| }, |
| { |
| "grad_norm": 0.304040789604187, |
| "learning_rate": 9.897825446759478e-05, |
| "loss": 0.0206, |
| "step": 8900 |
| }, |
| { |
| "grad_norm": 0.26770681142807007, |
| "learning_rate": 9.897409330688241e-05, |
| "loss": 0.0246, |
| "step": 8910 |
| }, |
| { |
| "grad_norm": 0.2028878927230835, |
| "learning_rate": 9.896992377783692e-05, |
| "loss": 0.0224, |
| "step": 8920 |
| }, |
| { |
| "grad_norm": 0.20525990426540375, |
| "learning_rate": 9.89657458811708e-05, |
| "loss": 0.0224, |
| "step": 8930 |
| }, |
| { |
| "grad_norm": 0.2241036295890808, |
| "learning_rate": 9.896155961759792e-05, |
| "loss": 0.0221, |
| "step": 8940 |
| }, |
| { |
| "grad_norm": 0.29430943727493286, |
| "learning_rate": 9.895736498783361e-05, |
| "loss": 0.026, |
| "step": 8950 |
| }, |
| { |
| "grad_norm": 0.3020664155483246, |
| "learning_rate": 9.895316199259462e-05, |
| "loss": 0.0283, |
| "step": 8960 |
| }, |
| { |
| "grad_norm": 0.24055476486682892, |
| "learning_rate": 9.894895063259909e-05, |
| "loss": 0.0259, |
| "step": 8970 |
| }, |
| { |
| "grad_norm": 0.2667890191078186, |
| "learning_rate": 9.894473090856667e-05, |
| "loss": 0.0268, |
| "step": 8980 |
| }, |
| { |
| "grad_norm": 0.2760610580444336, |
| "learning_rate": 9.894050282121839e-05, |
| "loss": 0.0255, |
| "step": 8990 |
| }, |
| { |
| "grad_norm": 0.2546814978122711, |
| "learning_rate": 9.893626637127668e-05, |
| "loss": 0.0282, |
| "step": 9000 |
| }, |
| { |
| "grad_norm": 0.23294395208358765, |
| "learning_rate": 9.893202155946546e-05, |
| "loss": 0.0267, |
| "step": 9010 |
| }, |
| { |
| "grad_norm": 0.29643920063972473, |
| "learning_rate": 9.892776838651006e-05, |
| "loss": 0.021, |
| "step": 9020 |
| }, |
| { |
| "grad_norm": 0.2604086399078369, |
| "learning_rate": 9.892350685313722e-05, |
| "loss": 0.0266, |
| "step": 9030 |
| }, |
| { |
| "grad_norm": 0.2267618030309677, |
| "learning_rate": 9.891923696007513e-05, |
| "loss": 0.0242, |
| "step": 9040 |
| }, |
| { |
| "grad_norm": 0.22060492634773254, |
| "learning_rate": 9.891495870805336e-05, |
| "loss": 0.0212, |
| "step": 9050 |
| }, |
| { |
| "grad_norm": 0.230683833360672, |
| "learning_rate": 9.891067209780298e-05, |
| "loss": 0.0226, |
| "step": 9060 |
| }, |
| { |
| "grad_norm": 0.25970616936683655, |
| "learning_rate": 9.890637713005646e-05, |
| "loss": 0.024, |
| "step": 9070 |
| }, |
| { |
| "grad_norm": 0.3181430399417877, |
| "learning_rate": 9.890207380554767e-05, |
| "loss": 0.0244, |
| "step": 9080 |
| }, |
| { |
| "grad_norm": 0.30151650309562683, |
| "learning_rate": 9.889776212501196e-05, |
| "loss": 0.0254, |
| "step": 9090 |
| }, |
| { |
| "grad_norm": 0.20736676454544067, |
| "learning_rate": 9.889344208918605e-05, |
| "loss": 0.0228, |
| "step": 9100 |
| }, |
| { |
| "grad_norm": 0.17086367309093475, |
| "learning_rate": 9.888911369880812e-05, |
| "loss": 0.0225, |
| "step": 9110 |
| }, |
| { |
| "grad_norm": 0.24429209530353546, |
| "learning_rate": 9.888477695461777e-05, |
| "loss": 0.0252, |
| "step": 9120 |
| }, |
| { |
| "grad_norm": 0.2361510843038559, |
| "learning_rate": 9.888043185735607e-05, |
| "loss": 0.0237, |
| "step": 9130 |
| }, |
| { |
| "grad_norm": 0.25494277477264404, |
| "learning_rate": 9.887607840776542e-05, |
| "loss": 0.0253, |
| "step": 9140 |
| }, |
| { |
| "grad_norm": 0.21166761219501495, |
| "learning_rate": 9.887171660658975e-05, |
| "loss": 0.023, |
| "step": 9150 |
| }, |
| { |
| "grad_norm": 0.2803945541381836, |
| "learning_rate": 9.886734645457435e-05, |
| "loss": 0.0227, |
| "step": 9160 |
| }, |
| { |
| "grad_norm": 0.22324180603027344, |
| "learning_rate": 9.886296795246597e-05, |
| "loss": 0.0263, |
| "step": 9170 |
| }, |
| { |
| "grad_norm": 0.24454787373542786, |
| "learning_rate": 9.885858110101276e-05, |
| "loss": 0.0224, |
| "step": 9180 |
| }, |
| { |
| "grad_norm": 0.2701004147529602, |
| "learning_rate": 9.885418590096434e-05, |
| "loss": 0.0238, |
| "step": 9190 |
| }, |
| { |
| "grad_norm": 0.26043701171875, |
| "learning_rate": 9.88497823530717e-05, |
| "loss": 0.0245, |
| "step": 9200 |
| }, |
| { |
| "grad_norm": 0.31409865617752075, |
| "learning_rate": 9.884537045808732e-05, |
| "loss": 0.0254, |
| "step": 9210 |
| }, |
| { |
| "grad_norm": 0.30593937635421753, |
| "learning_rate": 9.884095021676502e-05, |
| "loss": 0.0282, |
| "step": 9220 |
| }, |
| { |
| "grad_norm": 0.2996588349342346, |
| "learning_rate": 9.883652162986017e-05, |
| "loss": 0.0239, |
| "step": 9230 |
| }, |
| { |
| "grad_norm": 0.31036072969436646, |
| "learning_rate": 9.883208469812943e-05, |
| "loss": 0.0242, |
| "step": 9240 |
| }, |
| { |
| "grad_norm": 0.2229168713092804, |
| "learning_rate": 9.882763942233098e-05, |
| "loss": 0.0227, |
| "step": 9250 |
| }, |
| { |
| "grad_norm": 0.21954940259456635, |
| "learning_rate": 9.882318580322441e-05, |
| "loss": 0.0228, |
| "step": 9260 |
| }, |
| { |
| "grad_norm": 0.20078396797180176, |
| "learning_rate": 9.881872384157067e-05, |
| "loss": 0.0238, |
| "step": 9270 |
| }, |
| { |
| "grad_norm": 0.35384637117385864, |
| "learning_rate": 9.881425353813225e-05, |
| "loss": 0.0232, |
| "step": 9280 |
| }, |
| { |
| "grad_norm": 0.23953738808631897, |
| "learning_rate": 9.880977489367296e-05, |
| "loss": 0.0242, |
| "step": 9290 |
| }, |
| { |
| "grad_norm": 0.31235888600349426, |
| "learning_rate": 9.88052879089581e-05, |
| "loss": 0.0248, |
| "step": 9300 |
| }, |
| { |
| "grad_norm": 0.29150423407554626, |
| "learning_rate": 9.880079258475434e-05, |
| "loss": 0.0228, |
| "step": 9310 |
| }, |
| { |
| "grad_norm": 0.21773847937583923, |
| "learning_rate": 9.879628892182985e-05, |
| "loss": 0.0205, |
| "step": 9320 |
| }, |
| { |
| "grad_norm": 0.21790239214897156, |
| "learning_rate": 9.879177692095416e-05, |
| "loss": 0.0227, |
| "step": 9330 |
| }, |
| { |
| "grad_norm": 0.22045554220676422, |
| "learning_rate": 9.878725658289825e-05, |
| "loss": 0.0215, |
| "step": 9340 |
| }, |
| { |
| "grad_norm": 0.2567881643772125, |
| "learning_rate": 9.878272790843454e-05, |
| "loss": 0.0222, |
| "step": 9350 |
| }, |
| { |
| "grad_norm": 0.27277886867523193, |
| "learning_rate": 9.877819089833682e-05, |
| "loss": 0.0208, |
| "step": 9360 |
| }, |
| { |
| "grad_norm": 0.2389393150806427, |
| "learning_rate": 9.877364555338038e-05, |
| "loss": 0.0201, |
| "step": 9370 |
| }, |
| { |
| "grad_norm": 0.22118143737316132, |
| "learning_rate": 9.876909187434186e-05, |
| "loss": 0.0223, |
| "step": 9380 |
| }, |
| { |
| "grad_norm": 0.23778778314590454, |
| "learning_rate": 9.876452986199939e-05, |
| "loss": 0.0198, |
| "step": 9390 |
| }, |
| { |
| "grad_norm": 0.23677578568458557, |
| "learning_rate": 9.875995951713248e-05, |
| "loss": 0.0228, |
| "step": 9400 |
| }, |
| { |
| "grad_norm": 0.2681795060634613, |
| "learning_rate": 9.875538084052207e-05, |
| "loss": 0.0191, |
| "step": 9410 |
| }, |
| { |
| "grad_norm": 0.26710137724876404, |
| "learning_rate": 9.875079383295053e-05, |
| "loss": 0.0224, |
| "step": 9420 |
| }, |
| { |
| "grad_norm": 0.27031534910202026, |
| "learning_rate": 9.874619849520167e-05, |
| "loss": 0.0243, |
| "step": 9430 |
| }, |
| { |
| "grad_norm": 0.2640977203845978, |
| "learning_rate": 9.874159482806069e-05, |
| "loss": 0.021, |
| "step": 9440 |
| }, |
| { |
| "grad_norm": 0.27031025290489197, |
| "learning_rate": 9.873698283231426e-05, |
| "loss": 0.0247, |
| "step": 9450 |
| }, |
| { |
| "grad_norm": 0.2868097424507141, |
| "learning_rate": 9.87323625087504e-05, |
| "loss": 0.0214, |
| "step": 9460 |
| }, |
| { |
| "grad_norm": 0.21061162650585175, |
| "learning_rate": 9.872773385815863e-05, |
| "loss": 0.0228, |
| "step": 9470 |
| }, |
| { |
| "grad_norm": 0.19812670350074768, |
| "learning_rate": 9.872309688132986e-05, |
| "loss": 0.0233, |
| "step": 9480 |
| }, |
| { |
| "grad_norm": 0.33431848883628845, |
| "learning_rate": 9.871845157905639e-05, |
| "loss": 0.0217, |
| "step": 9490 |
| }, |
| { |
| "grad_norm": 0.2857918441295624, |
| "learning_rate": 9.871379795213201e-05, |
| "loss": 0.0234, |
| "step": 9500 |
| }, |
| { |
| "grad_norm": 0.24880020320415497, |
| "learning_rate": 9.87091360013519e-05, |
| "loss": 0.0241, |
| "step": 9510 |
| }, |
| { |
| "grad_norm": 0.2224482148885727, |
| "learning_rate": 9.870446572751262e-05, |
| "loss": 0.022, |
| "step": 9520 |
| }, |
| { |
| "grad_norm": 0.2825033962726593, |
| "learning_rate": 9.869978713141224e-05, |
| "loss": 0.0186, |
| "step": 9530 |
| }, |
| { |
| "grad_norm": 0.2358483225107193, |
| "learning_rate": 9.869510021385016e-05, |
| "loss": 0.0211, |
| "step": 9540 |
| }, |
| { |
| "grad_norm": 0.21550311148166656, |
| "learning_rate": 9.869040497562727e-05, |
| "loss": 0.0267, |
| "step": 9550 |
| }, |
| { |
| "grad_norm": 0.29373157024383545, |
| "learning_rate": 9.868570141754587e-05, |
| "loss": 0.0225, |
| "step": 9560 |
| }, |
| { |
| "grad_norm": 0.32460886240005493, |
| "learning_rate": 9.868098954040965e-05, |
| "loss": 0.023, |
| "step": 9570 |
| }, |
| { |
| "grad_norm": 0.36684587597846985, |
| "learning_rate": 9.867626934502374e-05, |
| "loss": 0.0255, |
| "step": 9580 |
| }, |
| { |
| "grad_norm": 0.30278316140174866, |
| "learning_rate": 9.86715408321947e-05, |
| "loss": 0.026, |
| "step": 9590 |
| }, |
| { |
| "grad_norm": 0.25676724314689636, |
| "learning_rate": 9.86668040027305e-05, |
| "loss": 0.0205, |
| "step": 9600 |
| }, |
| { |
| "grad_norm": 0.23263034224510193, |
| "learning_rate": 9.866205885744053e-05, |
| "loss": 0.024, |
| "step": 9610 |
| }, |
| { |
| "grad_norm": 0.2575293481349945, |
| "learning_rate": 9.865730539713563e-05, |
| "loss": 0.0199, |
| "step": 9620 |
| }, |
| { |
| "grad_norm": 0.27320706844329834, |
| "learning_rate": 9.8652543622628e-05, |
| "loss": 0.0201, |
| "step": 9630 |
| }, |
| { |
| "grad_norm": 0.22485195100307465, |
| "learning_rate": 9.864777353473132e-05, |
| "loss": 0.0253, |
| "step": 9640 |
| }, |
| { |
| "grad_norm": 0.23003463447093964, |
| "learning_rate": 9.864299513426068e-05, |
| "loss": 0.0244, |
| "step": 9650 |
| }, |
| { |
| "grad_norm": 0.2642805278301239, |
| "learning_rate": 9.863820842203254e-05, |
| "loss": 0.0246, |
| "step": 9660 |
| }, |
| { |
| "grad_norm": 0.2115945667028427, |
| "learning_rate": 9.863341339886483e-05, |
| "loss": 0.0192, |
| "step": 9670 |
| }, |
| { |
| "grad_norm": 0.18153990805149078, |
| "learning_rate": 9.86286100655769e-05, |
| "loss": 0.0191, |
| "step": 9680 |
| }, |
| { |
| "grad_norm": 0.2407338172197342, |
| "learning_rate": 9.862379842298953e-05, |
| "loss": 0.0235, |
| "step": 9690 |
| }, |
| { |
| "grad_norm": 0.2723984122276306, |
| "learning_rate": 9.861897847192485e-05, |
| "loss": 0.0245, |
| "step": 9700 |
| }, |
| { |
| "grad_norm": 0.23615650832653046, |
| "learning_rate": 9.86141502132065e-05, |
| "loss": 0.0221, |
| "step": 9710 |
| }, |
| { |
| "grad_norm": 0.2216765582561493, |
| "learning_rate": 9.860931364765946e-05, |
| "loss": 0.0222, |
| "step": 9720 |
| }, |
| { |
| "grad_norm": 0.215382382273674, |
| "learning_rate": 9.860446877611021e-05, |
| "loss": 0.0197, |
| "step": 9730 |
| }, |
| { |
| "grad_norm": 0.17680546641349792, |
| "learning_rate": 9.859961559938655e-05, |
| "loss": 0.021, |
| "step": 9740 |
| }, |
| { |
| "grad_norm": 0.2677190601825714, |
| "learning_rate": 9.85947541183178e-05, |
| "loss": 0.0221, |
| "step": 9750 |
| }, |
| { |
| "grad_norm": 0.2742480933666229, |
| "learning_rate": 9.858988433373463e-05, |
| "loss": 0.0216, |
| "step": 9760 |
| }, |
| { |
| "grad_norm": 0.24318669736385345, |
| "learning_rate": 9.858500624646918e-05, |
| "loss": 0.0199, |
| "step": 9770 |
| }, |
| { |
| "grad_norm": 0.2163631170988083, |
| "learning_rate": 9.858011985735497e-05, |
| "loss": 0.0172, |
| "step": 9780 |
| }, |
| { |
| "grad_norm": 0.1700691431760788, |
| "learning_rate": 9.857522516722693e-05, |
| "loss": 0.019, |
| "step": 9790 |
| }, |
| { |
| "grad_norm": 0.2203397899866104, |
| "learning_rate": 9.857032217692145e-05, |
| "loss": 0.0205, |
| "step": 9800 |
| }, |
| { |
| "grad_norm": 0.1914152354001999, |
| "learning_rate": 9.856541088727631e-05, |
| "loss": 0.0277, |
| "step": 9810 |
| }, |
| { |
| "grad_norm": 0.35746708512306213, |
| "learning_rate": 9.856049129913072e-05, |
| "loss": 0.0248, |
| "step": 9820 |
| }, |
| { |
| "grad_norm": 0.2433345466852188, |
| "learning_rate": 9.85555634133253e-05, |
| "loss": 0.0247, |
| "step": 9830 |
| }, |
| { |
| "grad_norm": 0.22579282522201538, |
| "learning_rate": 9.855062723070208e-05, |
| "loss": 0.0256, |
| "step": 9840 |
| }, |
| { |
| "grad_norm": 0.2790820002555847, |
| "learning_rate": 9.854568275210454e-05, |
| "loss": 0.026, |
| "step": 9850 |
| }, |
| { |
| "grad_norm": 0.22638730704784393, |
| "learning_rate": 9.854072997837754e-05, |
| "loss": 0.0221, |
| "step": 9860 |
| }, |
| { |
| "grad_norm": 0.18334059417247772, |
| "learning_rate": 9.853576891036737e-05, |
| "loss": 0.0185, |
| "step": 9870 |
| }, |
| { |
| "grad_norm": 0.2652938961982727, |
| "learning_rate": 9.853079954892177e-05, |
| "loss": 0.02, |
| "step": 9880 |
| }, |
| { |
| "grad_norm": 0.21239930391311646, |
| "learning_rate": 9.852582189488983e-05, |
| "loss": 0.0206, |
| "step": 9890 |
| }, |
| { |
| "grad_norm": 0.26186928153038025, |
| "learning_rate": 9.852083594912212e-05, |
| "loss": 0.0218, |
| "step": 9900 |
| }, |
| { |
| "grad_norm": 0.23631031811237335, |
| "learning_rate": 9.851584171247058e-05, |
| "loss": 0.0203, |
| "step": 9910 |
| }, |
| { |
| "grad_norm": 0.28428515791893005, |
| "learning_rate": 9.851083918578863e-05, |
| "loss": 0.0198, |
| "step": 9920 |
| }, |
| { |
| "grad_norm": 0.1825859546661377, |
| "learning_rate": 9.850582836993103e-05, |
| "loss": 0.022, |
| "step": 9930 |
| }, |
| { |
| "grad_norm": 0.18512263894081116, |
| "learning_rate": 9.850080926575397e-05, |
| "loss": 0.0178, |
| "step": 9940 |
| }, |
| { |
| "grad_norm": 0.27902159094810486, |
| "learning_rate": 9.849578187411515e-05, |
| "loss": 0.0242, |
| "step": 9950 |
| }, |
| { |
| "grad_norm": 0.20011653006076813, |
| "learning_rate": 9.849074619587354e-05, |
| "loss": 0.0205, |
| "step": 9960 |
| }, |
| { |
| "grad_norm": 0.22842735052108765, |
| "learning_rate": 9.848570223188964e-05, |
| "loss": 0.0212, |
| "step": 9970 |
| }, |
| { |
| "grad_norm": 0.18112444877624512, |
| "learning_rate": 9.848064998302531e-05, |
| "loss": 0.0164, |
| "step": 9980 |
| }, |
| { |
| "grad_norm": 0.25175318121910095, |
| "learning_rate": 9.847558945014386e-05, |
| "loss": 0.0208, |
| "step": 9990 |
| }, |
| { |
| "grad_norm": 0.22751906514167786, |
| "learning_rate": 9.847052063410996e-05, |
| "loss": 0.0216, |
| "step": 10000 |
| }, |
| { |
| "grad_norm": 0.2501031756401062, |
| "learning_rate": 9.846544353578977e-05, |
| "loss": 0.0187, |
| "step": 10010 |
| }, |
| { |
| "grad_norm": 0.2205076813697815, |
| "learning_rate": 9.846035815605081e-05, |
| "loss": 0.0211, |
| "step": 10020 |
| }, |
| { |
| "grad_norm": 0.29377835988998413, |
| "learning_rate": 9.845526449576204e-05, |
| "loss": 0.0214, |
| "step": 10030 |
| }, |
| { |
| "grad_norm": 0.21597570180892944, |
| "learning_rate": 9.845016255579383e-05, |
| "loss": 0.0195, |
| "step": 10040 |
| }, |
| { |
| "grad_norm": 0.19214503467082977, |
| "learning_rate": 9.844505233701794e-05, |
| "loss": 0.0187, |
| "step": 10050 |
| }, |
| { |
| "grad_norm": 0.25303030014038086, |
| "learning_rate": 9.843993384030757e-05, |
| "loss": 0.0195, |
| "step": 10060 |
| }, |
| { |
| "grad_norm": 0.21829861402511597, |
| "learning_rate": 9.843480706653737e-05, |
| "loss": 0.0187, |
| "step": 10070 |
| }, |
| { |
| "grad_norm": 0.2369549721479416, |
| "learning_rate": 9.84296720165833e-05, |
| "loss": 0.0255, |
| "step": 10080 |
| }, |
| { |
| "grad_norm": 0.2949647903442383, |
| "learning_rate": 9.842452869132286e-05, |
| "loss": 0.0203, |
| "step": 10090 |
| }, |
| { |
| "grad_norm": 0.2276425063610077, |
| "learning_rate": 9.841937709163489e-05, |
| "loss": 0.0187, |
| "step": 10100 |
| }, |
| { |
| "grad_norm": 0.18210828304290771, |
| "learning_rate": 9.841421721839962e-05, |
| "loss": 0.0169, |
| "step": 10110 |
| }, |
| { |
| "grad_norm": 0.26168766617774963, |
| "learning_rate": 9.840904907249879e-05, |
| "loss": 0.0205, |
| "step": 10120 |
| }, |
| { |
| "grad_norm": 0.2785622775554657, |
| "learning_rate": 9.840387265481545e-05, |
| "loss": 0.0216, |
| "step": 10130 |
| }, |
| { |
| "grad_norm": 0.2521674931049347, |
| "learning_rate": 9.839868796623411e-05, |
| "loss": 0.0181, |
| "step": 10140 |
| }, |
| { |
| "grad_norm": 0.21578259766101837, |
| "learning_rate": 9.839349500764072e-05, |
| "loss": 0.0181, |
| "step": 10150 |
| }, |
| { |
| "grad_norm": 0.2497054636478424, |
| "learning_rate": 9.83882937799226e-05, |
| "loss": 0.0213, |
| "step": 10160 |
| }, |
| { |
| "grad_norm": 0.3409843444824219, |
| "learning_rate": 9.838308428396849e-05, |
| "loss": 0.0218, |
| "step": 10170 |
| }, |
| { |
| "grad_norm": 0.29681217670440674, |
| "learning_rate": 9.837786652066854e-05, |
| "loss": 0.0229, |
| "step": 10180 |
| }, |
| { |
| "grad_norm": 0.24830268323421478, |
| "learning_rate": 9.837264049091437e-05, |
| "loss": 0.0209, |
| "step": 10190 |
| }, |
| { |
| "grad_norm": 0.21985377371311188, |
| "learning_rate": 9.836740619559893e-05, |
| "loss": 0.02, |
| "step": 10200 |
| }, |
| { |
| "grad_norm": 0.2771212160587311, |
| "learning_rate": 9.836216363561659e-05, |
| "loss": 0.024, |
| "step": 10210 |
| }, |
| { |
| "grad_norm": 0.2835344970226288, |
| "learning_rate": 9.835691281186322e-05, |
| "loss": 0.0263, |
| "step": 10220 |
| }, |
| { |
| "grad_norm": 0.31411153078079224, |
| "learning_rate": 9.8351653725236e-05, |
| "loss": 0.0208, |
| "step": 10230 |
| }, |
| { |
| "grad_norm": 0.1784379929304123, |
| "learning_rate": 9.83463863766336e-05, |
| "loss": 0.0189, |
| "step": 10240 |
| }, |
| { |
| "grad_norm": 0.19949562847614288, |
| "learning_rate": 9.834111076695602e-05, |
| "loss": 0.0179, |
| "step": 10250 |
| }, |
| { |
| "grad_norm": 0.27524039149284363, |
| "learning_rate": 9.833582689710477e-05, |
| "loss": 0.0237, |
| "step": 10260 |
| }, |
| { |
| "grad_norm": 0.19682864844799042, |
| "learning_rate": 9.833053476798268e-05, |
| "loss": 0.0219, |
| "step": 10270 |
| }, |
| { |
| "grad_norm": 0.29202038049697876, |
| "learning_rate": 9.832523438049404e-05, |
| "loss": 0.0231, |
| "step": 10280 |
| }, |
| { |
| "grad_norm": 0.21768589317798615, |
| "learning_rate": 9.831992573554454e-05, |
| "loss": 0.018, |
| "step": 10290 |
| }, |
| { |
| "grad_norm": 0.1866806596517563, |
| "learning_rate": 9.831460883404128e-05, |
| "loss": 0.0191, |
| "step": 10300 |
| }, |
| { |
| "grad_norm": 0.27372342348098755, |
| "learning_rate": 9.830928367689278e-05, |
| "loss": 0.0188, |
| "step": 10310 |
| }, |
| { |
| "grad_norm": 0.2904968559741974, |
| "learning_rate": 9.830395026500896e-05, |
| "loss": 0.0204, |
| "step": 10320 |
| }, |
| { |
| "grad_norm": 0.26565808057785034, |
| "learning_rate": 9.829860859930115e-05, |
| "loss": 0.0219, |
| "step": 10330 |
| }, |
| { |
| "grad_norm": 0.18882012367248535, |
| "learning_rate": 9.829325868068212e-05, |
| "loss": 0.0192, |
| "step": 10340 |
| }, |
| { |
| "grad_norm": 0.2175988107919693, |
| "learning_rate": 9.8287900510066e-05, |
| "loss": 0.0252, |
| "step": 10350 |
| }, |
| { |
| "grad_norm": 0.26010170578956604, |
| "learning_rate": 9.828253408836834e-05, |
| "loss": 0.0241, |
| "step": 10360 |
| }, |
| { |
| "grad_norm": 0.27464449405670166, |
| "learning_rate": 9.827715941650615e-05, |
| "loss": 0.0203, |
| "step": 10370 |
| }, |
| { |
| "grad_norm": 0.22996774315834045, |
| "learning_rate": 9.82717764953978e-05, |
| "loss": 0.0177, |
| "step": 10380 |
| }, |
| { |
| "grad_norm": 0.16900886595249176, |
| "learning_rate": 9.826638532596308e-05, |
| "loss": 0.0182, |
| "step": 10390 |
| }, |
| { |
| "grad_norm": 0.2104736566543579, |
| "learning_rate": 9.82609859091232e-05, |
| "loss": 0.0186, |
| "step": 10400 |
| }, |
| { |
| "grad_norm": 0.20910122990608215, |
| "learning_rate": 9.825557824580076e-05, |
| "loss": 0.0189, |
| "step": 10410 |
| }, |
| { |
| "grad_norm": 0.22928059101104736, |
| "learning_rate": 9.82501623369198e-05, |
| "loss": 0.0183, |
| "step": 10420 |
| }, |
| { |
| "grad_norm": 0.3247082233428955, |
| "learning_rate": 9.824473818340574e-05, |
| "loss": 0.0212, |
| "step": 10430 |
| }, |
| { |
| "grad_norm": 0.1974494755268097, |
| "learning_rate": 9.823930578618541e-05, |
| "loss": 0.0196, |
| "step": 10440 |
| }, |
| { |
| "grad_norm": 0.2238084077835083, |
| "learning_rate": 9.823386514618709e-05, |
| "loss": 0.0186, |
| "step": 10450 |
| }, |
| { |
| "grad_norm": 0.17666709423065186, |
| "learning_rate": 9.82284162643404e-05, |
| "loss": 0.0191, |
| "step": 10460 |
| }, |
| { |
| "grad_norm": 0.1988755762577057, |
| "learning_rate": 9.822295914157642e-05, |
| "loss": 0.0216, |
| "step": 10470 |
| }, |
| { |
| "grad_norm": 0.19286410510540009, |
| "learning_rate": 9.821749377882763e-05, |
| "loss": 0.019, |
| "step": 10480 |
| }, |
| { |
| "grad_norm": 0.20021604001522064, |
| "learning_rate": 9.821202017702791e-05, |
| "loss": 0.0191, |
| "step": 10490 |
| }, |
| { |
| "grad_norm": 0.3215916156768799, |
| "learning_rate": 9.820653833711253e-05, |
| "loss": 0.0195, |
| "step": 10500 |
| }, |
| { |
| "grad_norm": 0.19581910967826843, |
| "learning_rate": 9.820104826001822e-05, |
| "loss": 0.0205, |
| "step": 10510 |
| }, |
| { |
| "grad_norm": 0.253947377204895, |
| "learning_rate": 9.819554994668305e-05, |
| "loss": 0.0213, |
| "step": 10520 |
| }, |
| { |
| "grad_norm": 0.18200746178627014, |
| "learning_rate": 9.819004339804654e-05, |
| "loss": 0.0209, |
| "step": 10530 |
| }, |
| { |
| "grad_norm": 0.27813297510147095, |
| "learning_rate": 9.818452861504961e-05, |
| "loss": 0.0207, |
| "step": 10540 |
| }, |
| { |
| "grad_norm": 0.24935060739517212, |
| "learning_rate": 9.81790055986346e-05, |
| "loss": 0.0223, |
| "step": 10550 |
| }, |
| { |
| "grad_norm": 0.20500843226909637, |
| "learning_rate": 9.817347434974523e-05, |
| "loss": 0.0194, |
| "step": 10560 |
| }, |
| { |
| "grad_norm": 0.2651246190071106, |
| "learning_rate": 9.816793486932664e-05, |
| "loss": 0.0209, |
| "step": 10570 |
| }, |
| { |
| "grad_norm": 0.17405372858047485, |
| "learning_rate": 9.816238715832538e-05, |
| "loss": 0.017, |
| "step": 10580 |
| }, |
| { |
| "grad_norm": 0.23822303116321564, |
| "learning_rate": 9.815683121768939e-05, |
| "loss": 0.0229, |
| "step": 10590 |
| }, |
| { |
| "grad_norm": 0.24041873216629028, |
| "learning_rate": 9.815126704836804e-05, |
| "loss": 0.0223, |
| "step": 10600 |
| }, |
| { |
| "grad_norm": 0.20693781971931458, |
| "learning_rate": 9.81456946513121e-05, |
| "loss": 0.0222, |
| "step": 10610 |
| }, |
| { |
| "grad_norm": 0.23794129490852356, |
| "learning_rate": 9.814011402747373e-05, |
| "loss": 0.0245, |
| "step": 10620 |
| }, |
| { |
| "grad_norm": 0.27533841133117676, |
| "learning_rate": 9.813452517780651e-05, |
| "loss": 0.0198, |
| "step": 10630 |
| }, |
| { |
| "grad_norm": 0.24632394313812256, |
| "learning_rate": 9.81289281032654e-05, |
| "loss": 0.0241, |
| "step": 10640 |
| }, |
| { |
| "grad_norm": 0.1977643370628357, |
| "learning_rate": 9.812332280480683e-05, |
| "loss": 0.0189, |
| "step": 10650 |
| }, |
| { |
| "grad_norm": 0.23927035927772522, |
| "learning_rate": 9.811770928338854e-05, |
| "loss": 0.0186, |
| "step": 10660 |
| }, |
| { |
| "grad_norm": 0.16118136048316956, |
| "learning_rate": 9.811208753996979e-05, |
| "loss": 0.0177, |
| "step": 10670 |
| }, |
| { |
| "grad_norm": 0.2045382857322693, |
| "learning_rate": 9.810645757551113e-05, |
| "loss": 0.0166, |
| "step": 10680 |
| }, |
| { |
| "grad_norm": 0.19995464384555817, |
| "learning_rate": 9.810081939097459e-05, |
| "loss": 0.0185, |
| "step": 10690 |
| }, |
| { |
| "grad_norm": 0.15094652771949768, |
| "learning_rate": 9.809517298732356e-05, |
| "loss": 0.0194, |
| "step": 10700 |
| }, |
| { |
| "grad_norm": 0.1950456202030182, |
| "learning_rate": 9.80895183655229e-05, |
| "loss": 0.0168, |
| "step": 10710 |
| }, |
| { |
| "grad_norm": 0.1724327653646469, |
| "learning_rate": 9.808385552653877e-05, |
| "loss": 0.0215, |
| "step": 10720 |
| }, |
| { |
| "grad_norm": 0.2382352352142334, |
| "learning_rate": 9.807818447133886e-05, |
| "loss": 0.0209, |
| "step": 10730 |
| }, |
| { |
| "grad_norm": 0.2521894872188568, |
| "learning_rate": 9.807250520089215e-05, |
| "loss": 0.0178, |
| "step": 10740 |
| }, |
| { |
| "grad_norm": 0.24485500156879425, |
| "learning_rate": 9.806681771616908e-05, |
| "loss": 0.018, |
| "step": 10750 |
| }, |
| { |
| "grad_norm": 0.21275334060192108, |
| "learning_rate": 9.80611220181415e-05, |
| "loss": 0.0207, |
| "step": 10760 |
| }, |
| { |
| "grad_norm": 0.17692400515079498, |
| "learning_rate": 9.805541810778264e-05, |
| "loss": 0.0212, |
| "step": 10770 |
| }, |
| { |
| "grad_norm": 0.2477423995733261, |
| "learning_rate": 9.804970598606716e-05, |
| "loss": 0.0188, |
| "step": 10780 |
| }, |
| { |
| "grad_norm": 0.288703054189682, |
| "learning_rate": 9.804398565397106e-05, |
| "loss": 0.0233, |
| "step": 10790 |
| }, |
| { |
| "grad_norm": 0.28948503732681274, |
| "learning_rate": 9.803825711247183e-05, |
| "loss": 0.0206, |
| "step": 10800 |
| }, |
| { |
| "grad_norm": 0.2287627011537552, |
| "learning_rate": 9.803252036254831e-05, |
| "loss": 0.0162, |
| "step": 10810 |
| }, |
| { |
| "grad_norm": 0.24518610537052155, |
| "learning_rate": 9.802677540518076e-05, |
| "loss": 0.021, |
| "step": 10820 |
| }, |
| { |
| "grad_norm": 0.1741969734430313, |
| "learning_rate": 9.802102224135081e-05, |
| "loss": 0.0203, |
| "step": 10830 |
| }, |
| { |
| "grad_norm": 0.20629048347473145, |
| "learning_rate": 9.801526087204155e-05, |
| "loss": 0.0184, |
| "step": 10840 |
| }, |
| { |
| "grad_norm": 0.21048341691493988, |
| "learning_rate": 9.800949129823743e-05, |
| "loss": 0.0166, |
| "step": 10850 |
| }, |
| { |
| "grad_norm": 0.16457433998584747, |
| "learning_rate": 9.80037135209243e-05, |
| "loss": 0.0206, |
| "step": 10860 |
| }, |
| { |
| "grad_norm": 0.20526692271232605, |
| "learning_rate": 9.799792754108946e-05, |
| "loss": 0.02, |
| "step": 10870 |
| }, |
| { |
| "grad_norm": 0.29450953006744385, |
| "learning_rate": 9.799213335972152e-05, |
| "loss": 0.0192, |
| "step": 10880 |
| }, |
| { |
| "grad_norm": 0.20133313536643982, |
| "learning_rate": 9.798633097781058e-05, |
| "loss": 0.0226, |
| "step": 10890 |
| }, |
| { |
| "grad_norm": 0.24203220009803772, |
| "learning_rate": 9.79805203963481e-05, |
| "loss": 0.0182, |
| "step": 10900 |
| }, |
| { |
| "grad_norm": 0.19610702991485596, |
| "learning_rate": 9.797470161632697e-05, |
| "loss": 0.0197, |
| "step": 10910 |
| }, |
| { |
| "grad_norm": 0.18819737434387207, |
| "learning_rate": 9.796887463874145e-05, |
| "loss": 0.0215, |
| "step": 10920 |
| }, |
| { |
| "grad_norm": 0.2768223285675049, |
| "learning_rate": 9.796303946458718e-05, |
| "loss": 0.0185, |
| "step": 10930 |
| }, |
| { |
| "grad_norm": 0.2169492244720459, |
| "learning_rate": 9.795719609486127e-05, |
| "loss": 0.0175, |
| "step": 10940 |
| }, |
| { |
| "grad_norm": 0.21109357476234436, |
| "learning_rate": 9.795134453056219e-05, |
| "loss": 0.0152, |
| "step": 10950 |
| }, |
| { |
| "grad_norm": 0.26809945702552795, |
| "learning_rate": 9.794548477268979e-05, |
| "loss": 0.0163, |
| "step": 10960 |
| }, |
| { |
| "grad_norm": 0.24783478677272797, |
| "learning_rate": 9.793961682224537e-05, |
| "loss": 0.0163, |
| "step": 10970 |
| }, |
| { |
| "grad_norm": 0.2160470336675644, |
| "learning_rate": 9.793374068023156e-05, |
| "loss": 0.0203, |
| "step": 10980 |
| }, |
| { |
| "grad_norm": 0.25220364332199097, |
| "learning_rate": 9.792785634765247e-05, |
| "loss": 0.0209, |
| "step": 10990 |
| }, |
| { |
| "grad_norm": 0.24036410450935364, |
| "learning_rate": 9.792196382551357e-05, |
| "loss": 0.0208, |
| "step": 11000 |
| }, |
| { |
| "grad_norm": 0.2242831289768219, |
| "learning_rate": 9.791606311482171e-05, |
| "loss": 0.018, |
| "step": 11010 |
| }, |
| { |
| "grad_norm": 0.25045308470726013, |
| "learning_rate": 9.791015421658518e-05, |
| "loss": 0.0186, |
| "step": 11020 |
| }, |
| { |
| "grad_norm": 0.21759934723377228, |
| "learning_rate": 9.790423713181362e-05, |
| "loss": 0.0199, |
| "step": 11030 |
| }, |
| { |
| "grad_norm": 0.22301211953163147, |
| "learning_rate": 9.789831186151814e-05, |
| "loss": 0.0174, |
| "step": 11040 |
| }, |
| { |
| "grad_norm": 0.24609437584877014, |
| "learning_rate": 9.789237840671118e-05, |
| "loss": 0.0198, |
| "step": 11050 |
| }, |
| { |
| "grad_norm": 0.23131372034549713, |
| "learning_rate": 9.78864367684066e-05, |
| "loss": 0.0181, |
| "step": 11060 |
| }, |
| { |
| "grad_norm": 0.18929408490657806, |
| "learning_rate": 9.788048694761968e-05, |
| "loss": 0.0182, |
| "step": 11070 |
| }, |
| { |
| "grad_norm": 0.21103902161121368, |
| "learning_rate": 9.787452894536709e-05, |
| "loss": 0.0189, |
| "step": 11080 |
| }, |
| { |
| "grad_norm": 0.22037075459957123, |
| "learning_rate": 9.786856276266685e-05, |
| "loss": 0.0175, |
| "step": 11090 |
| }, |
| { |
| "grad_norm": 0.2759108245372772, |
| "learning_rate": 9.786258840053845e-05, |
| "loss": 0.0199, |
| "step": 11100 |
| }, |
| { |
| "grad_norm": 0.24369940161705017, |
| "learning_rate": 9.785660586000273e-05, |
| "loss": 0.0178, |
| "step": 11110 |
| }, |
| { |
| "grad_norm": 0.2781594395637512, |
| "learning_rate": 9.785061514208196e-05, |
| "loss": 0.0181, |
| "step": 11120 |
| }, |
| { |
| "grad_norm": 0.26724958419799805, |
| "learning_rate": 9.784461624779977e-05, |
| "loss": 0.0191, |
| "step": 11130 |
| }, |
| { |
| "grad_norm": 0.2424626648426056, |
| "learning_rate": 9.783860917818123e-05, |
| "loss": 0.02, |
| "step": 11140 |
| }, |
| { |
| "grad_norm": 0.20541176199913025, |
| "learning_rate": 9.783259393425277e-05, |
| "loss": 0.0212, |
| "step": 11150 |
| }, |
| { |
| "grad_norm": 0.26179322600364685, |
| "learning_rate": 9.782657051704221e-05, |
| "loss": 0.0145, |
| "step": 11160 |
| }, |
| { |
| "grad_norm": 0.22024081647396088, |
| "learning_rate": 9.782053892757883e-05, |
| "loss": 0.0163, |
| "step": 11170 |
| }, |
| { |
| "grad_norm": 0.2568201720714569, |
| "learning_rate": 9.781449916689324e-05, |
| "loss": 0.021, |
| "step": 11180 |
| }, |
| { |
| "grad_norm": 0.24892078340053558, |
| "learning_rate": 9.780845123601746e-05, |
| "loss": 0.0183, |
| "step": 11190 |
| }, |
| { |
| "grad_norm": 0.18937984108924866, |
| "learning_rate": 9.780239513598492e-05, |
| "loss": 0.019, |
| "step": 11200 |
| }, |
| { |
| "grad_norm": 0.19871971011161804, |
| "learning_rate": 9.779633086783047e-05, |
| "loss": 0.0223, |
| "step": 11210 |
| }, |
| { |
| "grad_norm": 0.23312966525554657, |
| "learning_rate": 9.779025843259031e-05, |
| "loss": 0.0172, |
| "step": 11220 |
| }, |
| { |
| "grad_norm": 0.23910565674304962, |
| "learning_rate": 9.778417783130204e-05, |
| "loss": 0.0179, |
| "step": 11230 |
| }, |
| { |
| "grad_norm": 0.2221440076828003, |
| "learning_rate": 9.777808906500468e-05, |
| "loss": 0.0177, |
| "step": 11240 |
| }, |
| { |
| "grad_norm": 0.25122180581092834, |
| "learning_rate": 9.777199213473862e-05, |
| "loss": 0.0198, |
| "step": 11250 |
| }, |
| { |
| "grad_norm": 0.20925727486610413, |
| "learning_rate": 9.77658870415457e-05, |
| "loss": 0.0226, |
| "step": 11260 |
| }, |
| { |
| "grad_norm": 0.250315397977829, |
| "learning_rate": 9.775977378646906e-05, |
| "loss": 0.0245, |
| "step": 11270 |
| }, |
| { |
| "grad_norm": 0.15758101642131805, |
| "learning_rate": 9.775365237055331e-05, |
| "loss": 0.0177, |
| "step": 11280 |
| }, |
| { |
| "grad_norm": 0.27685776352882385, |
| "learning_rate": 9.774752279484445e-05, |
| "loss": 0.0182, |
| "step": 11290 |
| }, |
| { |
| "grad_norm": 0.19731733202934265, |
| "learning_rate": 9.774138506038984e-05, |
| "loss": 0.0192, |
| "step": 11300 |
| }, |
| { |
| "grad_norm": 0.20881041884422302, |
| "learning_rate": 9.773523916823826e-05, |
| "loss": 0.0217, |
| "step": 11310 |
| }, |
| { |
| "grad_norm": 0.20949961245059967, |
| "learning_rate": 9.772908511943986e-05, |
| "loss": 0.0168, |
| "step": 11320 |
| }, |
| { |
| "grad_norm": 0.15379177033901215, |
| "learning_rate": 9.77229229150462e-05, |
| "loss": 0.0187, |
| "step": 11330 |
| }, |
| { |
| "grad_norm": 0.19608713686466217, |
| "learning_rate": 9.771675255611024e-05, |
| "loss": 0.0217, |
| "step": 11340 |
| }, |
| { |
| "grad_norm": 0.19298717379570007, |
| "learning_rate": 9.771057404368632e-05, |
| "loss": 0.0215, |
| "step": 11350 |
| }, |
| { |
| "grad_norm": 0.2094731628894806, |
| "learning_rate": 9.770438737883018e-05, |
| "loss": 0.0183, |
| "step": 11360 |
| }, |
| { |
| "grad_norm": 0.26386967301368713, |
| "learning_rate": 9.769819256259898e-05, |
| "loss": 0.0181, |
| "step": 11370 |
| }, |
| { |
| "grad_norm": 0.31744125485420227, |
| "learning_rate": 9.769198959605119e-05, |
| "loss": 0.0212, |
| "step": 11380 |
| }, |
| { |
| "grad_norm": 0.234335258603096, |
| "learning_rate": 9.768577848024678e-05, |
| "loss": 0.0205, |
| "step": 11390 |
| }, |
| { |
| "grad_norm": 0.21479220688343048, |
| "learning_rate": 9.767955921624702e-05, |
| "loss": 0.0196, |
| "step": 11400 |
| }, |
| { |
| "grad_norm": 0.3026210069656372, |
| "learning_rate": 9.767333180511465e-05, |
| "loss": 0.0209, |
| "step": 11410 |
| }, |
| { |
| "grad_norm": 0.246705561876297, |
| "learning_rate": 9.766709624791373e-05, |
| "loss": 0.023, |
| "step": 11420 |
| }, |
| { |
| "grad_norm": 0.22756491601467133, |
| "learning_rate": 9.766085254570975e-05, |
| "loss": 0.0223, |
| "step": 11430 |
| }, |
| { |
| "grad_norm": 0.31085556745529175, |
| "learning_rate": 9.76546006995696e-05, |
| "loss": 0.022, |
| "step": 11440 |
| }, |
| { |
| "grad_norm": 0.2894473373889923, |
| "learning_rate": 9.764834071056155e-05, |
| "loss": 0.0185, |
| "step": 11450 |
| }, |
| { |
| "grad_norm": 0.18649722635746002, |
| "learning_rate": 9.764207257975526e-05, |
| "loss": 0.0204, |
| "step": 11460 |
| }, |
| { |
| "grad_norm": 0.2198372781276703, |
| "learning_rate": 9.763579630822179e-05, |
| "loss": 0.0205, |
| "step": 11470 |
| }, |
| { |
| "grad_norm": 0.21714451909065247, |
| "learning_rate": 9.762951189703356e-05, |
| "loss": 0.02, |
| "step": 11480 |
| }, |
| { |
| "grad_norm": 0.16019868850708008, |
| "learning_rate": 9.762321934726442e-05, |
| "loss": 0.0179, |
| "step": 11490 |
| }, |
| { |
| "grad_norm": 0.18601438403129578, |
| "learning_rate": 9.761691865998959e-05, |
| "loss": 0.0227, |
| "step": 11500 |
| }, |
| { |
| "grad_norm": 0.22601468861103058, |
| "learning_rate": 9.76106098362857e-05, |
| "loss": 0.0188, |
| "step": 11510 |
| }, |
| { |
| "grad_norm": 0.21661071479320526, |
| "learning_rate": 9.760429287723072e-05, |
| "loss": 0.0216, |
| "step": 11520 |
| }, |
| { |
| "grad_norm": 0.17467108368873596, |
| "learning_rate": 9.759796778390406e-05, |
| "loss": 0.0191, |
| "step": 11530 |
| }, |
| { |
| "grad_norm": 0.2121114879846573, |
| "learning_rate": 9.759163455738653e-05, |
| "loss": 0.0201, |
| "step": 11540 |
| }, |
| { |
| "grad_norm": 0.2389015406370163, |
| "learning_rate": 9.75852931987603e-05, |
| "loss": 0.0157, |
| "step": 11550 |
| }, |
| { |
| "grad_norm": 0.2745116651058197, |
| "learning_rate": 9.757894370910891e-05, |
| "loss": 0.0218, |
| "step": 11560 |
| }, |
| { |
| "grad_norm": 0.2628045082092285, |
| "learning_rate": 9.757258608951733e-05, |
| "loss": 0.0217, |
| "step": 11570 |
| }, |
| { |
| "grad_norm": 0.2073826789855957, |
| "learning_rate": 9.75662203410719e-05, |
| "loss": 0.0173, |
| "step": 11580 |
| }, |
| { |
| "grad_norm": 0.23417799174785614, |
| "learning_rate": 9.755984646486034e-05, |
| "loss": 0.0168, |
| "step": 11590 |
| }, |
| { |
| "grad_norm": 0.14510011672973633, |
| "learning_rate": 9.75534644619718e-05, |
| "loss": 0.0151, |
| "step": 11600 |
| }, |
| { |
| "grad_norm": 0.2490101009607315, |
| "learning_rate": 9.754707433349676e-05, |
| "loss": 0.0186, |
| "step": 11610 |
| }, |
| { |
| "grad_norm": 0.21024779975414276, |
| "learning_rate": 9.754067608052715e-05, |
| "loss": 0.0216, |
| "step": 11620 |
| }, |
| { |
| "grad_norm": 0.2229691743850708, |
| "learning_rate": 9.753426970415622e-05, |
| "loss": 0.0173, |
| "step": 11630 |
| }, |
| { |
| "grad_norm": 0.2373073399066925, |
| "learning_rate": 9.752785520547868e-05, |
| "loss": 0.0192, |
| "step": 11640 |
| }, |
| { |
| "grad_norm": 0.2037600576877594, |
| "learning_rate": 9.752143258559056e-05, |
| "loss": 0.0168, |
| "step": 11650 |
| }, |
| { |
| "grad_norm": 0.2464536875486374, |
| "learning_rate": 9.751500184558933e-05, |
| "loss": 0.0177, |
| "step": 11660 |
| }, |
| { |
| "grad_norm": 0.22050020098686218, |
| "learning_rate": 9.750856298657383e-05, |
| "loss": 0.0156, |
| "step": 11670 |
| }, |
| { |
| "grad_norm": 0.19472891092300415, |
| "learning_rate": 9.750211600964428e-05, |
| "loss": 0.0152, |
| "step": 11680 |
| }, |
| { |
| "grad_norm": 0.33395636081695557, |
| "learning_rate": 9.749566091590226e-05, |
| "loss": 0.0207, |
| "step": 11690 |
| }, |
| { |
| "grad_norm": 0.2324424535036087, |
| "learning_rate": 9.748919770645083e-05, |
| "loss": 0.0212, |
| "step": 11700 |
| }, |
| { |
| "grad_norm": 0.16833680868148804, |
| "learning_rate": 9.748272638239432e-05, |
| "loss": 0.0197, |
| "step": 11710 |
| }, |
| { |
| "grad_norm": 0.20751014351844788, |
| "learning_rate": 9.747624694483855e-05, |
| "loss": 0.0208, |
| "step": 11720 |
| }, |
| { |
| "grad_norm": 0.1955762505531311, |
| "learning_rate": 9.746975939489065e-05, |
| "loss": 0.0158, |
| "step": 11730 |
| }, |
| { |
| "grad_norm": 0.15604378283023834, |
| "learning_rate": 9.746326373365918e-05, |
| "loss": 0.0164, |
| "step": 11740 |
| }, |
| { |
| "grad_norm": 0.22884982824325562, |
| "learning_rate": 9.745675996225403e-05, |
| "loss": 0.0165, |
| "step": 11750 |
| }, |
| { |
| "grad_norm": 0.24646146595478058, |
| "learning_rate": 9.745024808178657e-05, |
| "loss": 0.0184, |
| "step": 11760 |
| }, |
| { |
| "grad_norm": 0.269307017326355, |
| "learning_rate": 9.744372809336947e-05, |
| "loss": 0.018, |
| "step": 11770 |
| }, |
| { |
| "grad_norm": 0.17529548704624176, |
| "learning_rate": 9.743719999811682e-05, |
| "loss": 0.0225, |
| "step": 11780 |
| }, |
| { |
| "grad_norm": 0.17929251492023468, |
| "learning_rate": 9.743066379714412e-05, |
| "loss": 0.0162, |
| "step": 11790 |
| }, |
| { |
| "grad_norm": 0.17738574743270874, |
| "learning_rate": 9.74241194915682e-05, |
| "loss": 0.0236, |
| "step": 11800 |
| }, |
| { |
| "grad_norm": 0.21286842226982117, |
| "learning_rate": 9.741756708250731e-05, |
| "loss": 0.0166, |
| "step": 11810 |
| }, |
| { |
| "grad_norm": 0.2095780074596405, |
| "learning_rate": 9.741100657108109e-05, |
| "loss": 0.0216, |
| "step": 11820 |
| }, |
| { |
| "grad_norm": 0.21511229872703552, |
| "learning_rate": 9.740443795841054e-05, |
| "loss": 0.0194, |
| "step": 11830 |
| }, |
| { |
| "grad_norm": 0.20982207357883453, |
| "learning_rate": 9.739786124561805e-05, |
| "loss": 0.0178, |
| "step": 11840 |
| }, |
| { |
| "grad_norm": 0.14046761393547058, |
| "learning_rate": 9.73912764338274e-05, |
| "loss": 0.0201, |
| "step": 11850 |
| }, |
| { |
| "grad_norm": 0.21997249126434326, |
| "learning_rate": 9.738468352416377e-05, |
| "loss": 0.0205, |
| "step": 11860 |
| }, |
| { |
| "grad_norm": 0.23343665897846222, |
| "learning_rate": 9.737808251775369e-05, |
| "loss": 0.0204, |
| "step": 11870 |
| }, |
| { |
| "grad_norm": 0.22911225259304047, |
| "learning_rate": 9.737147341572512e-05, |
| "loss": 0.0175, |
| "step": 11880 |
| }, |
| { |
| "grad_norm": 0.21971078217029572, |
| "learning_rate": 9.736485621920735e-05, |
| "loss": 0.0182, |
| "step": 11890 |
| }, |
| { |
| "grad_norm": 0.2601747512817383, |
| "learning_rate": 9.735823092933108e-05, |
| "loss": 0.0257, |
| "step": 11900 |
| }, |
| { |
| "grad_norm": 0.17831431329250336, |
| "learning_rate": 9.735159754722838e-05, |
| "loss": 0.0178, |
| "step": 11910 |
| }, |
| { |
| "grad_norm": 0.16906364262104034, |
| "learning_rate": 9.734495607403275e-05, |
| "loss": 0.0184, |
| "step": 11920 |
| }, |
| { |
| "grad_norm": 0.15772491693496704, |
| "learning_rate": 9.733830651087901e-05, |
| "loss": 0.019, |
| "step": 11930 |
| }, |
| { |
| "grad_norm": 0.19316568970680237, |
| "learning_rate": 9.733164885890338e-05, |
| "loss": 0.0152, |
| "step": 11940 |
| }, |
| { |
| "grad_norm": 0.20866607129573822, |
| "learning_rate": 9.732498311924349e-05, |
| "loss": 0.021, |
| "step": 11950 |
| }, |
| { |
| "grad_norm": 0.20008015632629395, |
| "learning_rate": 9.731830929303833e-05, |
| "loss": 0.0189, |
| "step": 11960 |
| }, |
| { |
| "grad_norm": 0.25079259276390076, |
| "learning_rate": 9.731162738142827e-05, |
| "loss": 0.0193, |
| "step": 11970 |
| }, |
| { |
| "grad_norm": 0.21481575071811676, |
| "learning_rate": 9.730493738555506e-05, |
| "loss": 0.0235, |
| "step": 11980 |
| }, |
| { |
| "grad_norm": 0.2959555983543396, |
| "learning_rate": 9.729823930656186e-05, |
| "loss": 0.0192, |
| "step": 11990 |
| }, |
| { |
| "grad_norm": 0.2065613716840744, |
| "learning_rate": 9.729153314559316e-05, |
| "loss": 0.0188, |
| "step": 12000 |
| }, |
| { |
| "grad_norm": 0.20867669582366943, |
| "learning_rate": 9.728481890379486e-05, |
| "loss": 0.0213, |
| "step": 12010 |
| }, |
| { |
| "grad_norm": 0.25730040669441223, |
| "learning_rate": 9.727809658231428e-05, |
| "loss": 0.0205, |
| "step": 12020 |
| }, |
| { |
| "grad_norm": 0.29852187633514404, |
| "learning_rate": 9.727136618230003e-05, |
| "loss": 0.0206, |
| "step": 12030 |
| }, |
| { |
| "grad_norm": 0.24638858437538147, |
| "learning_rate": 9.726462770490219e-05, |
| "loss": 0.0209, |
| "step": 12040 |
| }, |
| { |
| "grad_norm": 0.14810189604759216, |
| "learning_rate": 9.725788115127214e-05, |
| "loss": 0.0175, |
| "step": 12050 |
| }, |
| { |
| "grad_norm": 0.2689899504184723, |
| "learning_rate": 9.725112652256274e-05, |
| "loss": 0.0188, |
| "step": 12060 |
| }, |
| { |
| "grad_norm": 0.2269291877746582, |
| "learning_rate": 9.724436381992812e-05, |
| "loss": 0.018, |
| "step": 12070 |
| }, |
| { |
| "grad_norm": 0.23517954349517822, |
| "learning_rate": 9.723759304452387e-05, |
| "loss": 0.0185, |
| "step": 12080 |
| }, |
| { |
| "grad_norm": 0.2234342098236084, |
| "learning_rate": 9.72308141975069e-05, |
| "loss": 0.0168, |
| "step": 12090 |
| }, |
| { |
| "grad_norm": 0.25882044434547424, |
| "learning_rate": 9.722402728003557e-05, |
| "loss": 0.0182, |
| "step": 12100 |
| }, |
| { |
| "grad_norm": 0.1912042200565338, |
| "learning_rate": 9.721723229326953e-05, |
| "loss": 0.0177, |
| "step": 12110 |
| }, |
| { |
| "grad_norm": 0.20937858521938324, |
| "learning_rate": 9.721042923836992e-05, |
| "loss": 0.02, |
| "step": 12120 |
| }, |
| { |
| "grad_norm": 0.2105017900466919, |
| "learning_rate": 9.720361811649914e-05, |
| "loss": 0.0185, |
| "step": 12130 |
| }, |
| { |
| "grad_norm": 0.19680309295654297, |
| "learning_rate": 9.719679892882106e-05, |
| "loss": 0.0148, |
| "step": 12140 |
| }, |
| { |
| "grad_norm": 0.22222547233104706, |
| "learning_rate": 9.718997167650085e-05, |
| "loss": 0.0174, |
| "step": 12150 |
| }, |
| { |
| "grad_norm": 0.21008817851543427, |
| "learning_rate": 9.718313636070515e-05, |
| "loss": 0.0198, |
| "step": 12160 |
| }, |
| { |
| "grad_norm": 0.2227344512939453, |
| "learning_rate": 9.717629298260192e-05, |
| "loss": 0.0199, |
| "step": 12170 |
| }, |
| { |
| "grad_norm": 0.18639013171195984, |
| "learning_rate": 9.716944154336047e-05, |
| "loss": 0.0164, |
| "step": 12180 |
| }, |
| { |
| "grad_norm": 0.15603755414485931, |
| "learning_rate": 9.716258204415157e-05, |
| "loss": 0.0172, |
| "step": 12190 |
| }, |
| { |
| "grad_norm": 0.22610436379909515, |
| "learning_rate": 9.715571448614728e-05, |
| "loss": 0.0202, |
| "step": 12200 |
| }, |
| { |
| "grad_norm": 0.2384321540594101, |
| "learning_rate": 9.71488388705211e-05, |
| "loss": 0.0173, |
| "step": 12210 |
| }, |
| { |
| "grad_norm": 0.21900944411754608, |
| "learning_rate": 9.714195519844788e-05, |
| "loss": 0.018, |
| "step": 12220 |
| }, |
| { |
| "grad_norm": 0.23115262389183044, |
| "learning_rate": 9.713506347110386e-05, |
| "loss": 0.016, |
| "step": 12230 |
| }, |
| { |
| "grad_norm": 0.21534818410873413, |
| "learning_rate": 9.712816368966662e-05, |
| "loss": 0.0179, |
| "step": 12240 |
| }, |
| { |
| "grad_norm": 0.24454522132873535, |
| "learning_rate": 9.712125585531517e-05, |
| "loss": 0.0195, |
| "step": 12250 |
| }, |
| { |
| "grad_norm": 0.20938843488693237, |
| "learning_rate": 9.711433996922988e-05, |
| "loss": 0.0189, |
| "step": 12260 |
| }, |
| { |
| "grad_norm": 0.2134719341993332, |
| "learning_rate": 9.710741603259245e-05, |
| "loss": 0.022, |
| "step": 12270 |
| }, |
| { |
| "grad_norm": 0.16311052441596985, |
| "learning_rate": 9.710048404658603e-05, |
| "loss": 0.0195, |
| "step": 12280 |
| }, |
| { |
| "grad_norm": 0.1718016266822815, |
| "learning_rate": 9.709354401239508e-05, |
| "loss": 0.017, |
| "step": 12290 |
| }, |
| { |
| "grad_norm": 0.19942371547222137, |
| "learning_rate": 9.708659593120546e-05, |
| "loss": 0.0181, |
| "step": 12300 |
| }, |
| { |
| "grad_norm": 0.2483605444431305, |
| "learning_rate": 9.707963980420443e-05, |
| "loss": 0.0213, |
| "step": 12310 |
| }, |
| { |
| "grad_norm": 0.26829585433006287, |
| "learning_rate": 9.707267563258058e-05, |
| "loss": 0.0157, |
| "step": 12320 |
| }, |
| { |
| "grad_norm": 0.19812586903572083, |
| "learning_rate": 9.70657034175239e-05, |
| "loss": 0.0186, |
| "step": 12330 |
| }, |
| { |
| "grad_norm": 0.2553393840789795, |
| "learning_rate": 9.705872316022577e-05, |
| "loss": 0.0218, |
| "step": 12340 |
| }, |
| { |
| "grad_norm": 0.1981436163187027, |
| "learning_rate": 9.705173486187891e-05, |
| "loss": 0.018, |
| "step": 12350 |
| }, |
| { |
| "grad_norm": 0.23439428210258484, |
| "learning_rate": 9.704473852367741e-05, |
| "loss": 0.0229, |
| "step": 12360 |
| }, |
| { |
| "grad_norm": 0.2348204404115677, |
| "learning_rate": 9.70377341468168e-05, |
| "loss": 0.0184, |
| "step": 12370 |
| }, |
| { |
| "grad_norm": 0.23930864036083221, |
| "learning_rate": 9.703072173249389e-05, |
| "loss": 0.0186, |
| "step": 12380 |
| }, |
| { |
| "grad_norm": 0.2222331166267395, |
| "learning_rate": 9.702370128190693e-05, |
| "loss": 0.0182, |
| "step": 12390 |
| }, |
| { |
| "grad_norm": 0.1899869740009308, |
| "learning_rate": 9.701667279625552e-05, |
| "loss": 0.0215, |
| "step": 12400 |
| }, |
| { |
| "grad_norm": 0.18148602545261383, |
| "learning_rate": 9.700963627674065e-05, |
| "loss": 0.02, |
| "step": 12410 |
| }, |
| { |
| "grad_norm": 0.22572743892669678, |
| "learning_rate": 9.700259172456466e-05, |
| "loss": 0.017, |
| "step": 12420 |
| }, |
| { |
| "grad_norm": 0.21851639449596405, |
| "learning_rate": 9.699553914093124e-05, |
| "loss": 0.0156, |
| "step": 12430 |
| }, |
| { |
| "grad_norm": 0.18523038923740387, |
| "learning_rate": 9.698847852704553e-05, |
| "loss": 0.0169, |
| "step": 12440 |
| }, |
| { |
| "grad_norm": 0.20364411175251007, |
| "learning_rate": 9.6981409884114e-05, |
| "loss": 0.0254, |
| "step": 12450 |
| }, |
| { |
| "grad_norm": 0.18808870017528534, |
| "learning_rate": 9.697433321334443e-05, |
| "loss": 0.0187, |
| "step": 12460 |
| }, |
| { |
| "grad_norm": 0.21851050853729248, |
| "learning_rate": 9.696724851594607e-05, |
| "loss": 0.0218, |
| "step": 12470 |
| }, |
| { |
| "grad_norm": 0.18276174366474152, |
| "learning_rate": 9.696015579312952e-05, |
| "loss": 0.0166, |
| "step": 12480 |
| }, |
| { |
| "grad_norm": 0.2173735648393631, |
| "learning_rate": 9.695305504610668e-05, |
| "loss": 0.0176, |
| "step": 12490 |
| }, |
| { |
| "grad_norm": 0.25715184211730957, |
| "learning_rate": 9.694594627609092e-05, |
| "loss": 0.0155, |
| "step": 12500 |
| }, |
| { |
| "grad_norm": 0.15247225761413574, |
| "learning_rate": 9.693882948429691e-05, |
| "loss": 0.018, |
| "step": 12510 |
| }, |
| { |
| "grad_norm": 0.22634707391262054, |
| "learning_rate": 9.693170467194071e-05, |
| "loss": 0.0182, |
| "step": 12520 |
| }, |
| { |
| "grad_norm": 0.2305721938610077, |
| "learning_rate": 9.692457184023977e-05, |
| "loss": 0.0203, |
| "step": 12530 |
| }, |
| { |
| "grad_norm": 0.22422613203525543, |
| "learning_rate": 9.691743099041291e-05, |
| "loss": 0.0175, |
| "step": 12540 |
| }, |
| { |
| "grad_norm": 0.23468373715877533, |
| "learning_rate": 9.691028212368027e-05, |
| "loss": 0.0187, |
| "step": 12550 |
| }, |
| { |
| "grad_norm": 0.13476166129112244, |
| "learning_rate": 9.690312524126342e-05, |
| "loss": 0.0158, |
| "step": 12560 |
| }, |
| { |
| "grad_norm": 0.15493354201316833, |
| "learning_rate": 9.689596034438527e-05, |
| "loss": 0.0164, |
| "step": 12570 |
| }, |
| { |
| "grad_norm": 0.167372927069664, |
| "learning_rate": 9.688878743427012e-05, |
| "loss": 0.0163, |
| "step": 12580 |
| }, |
| { |
| "grad_norm": 0.2126246839761734, |
| "learning_rate": 9.688160651214359e-05, |
| "loss": 0.0172, |
| "step": 12590 |
| }, |
| { |
| "grad_norm": 0.2218184620141983, |
| "learning_rate": 9.687441757923273e-05, |
| "loss": 0.0179, |
| "step": 12600 |
| }, |
| { |
| "grad_norm": 0.21006833016872406, |
| "learning_rate": 9.68672206367659e-05, |
| "loss": 0.0188, |
| "step": 12610 |
| }, |
| { |
| "grad_norm": 0.17882493138313293, |
| "learning_rate": 9.686001568597291e-05, |
| "loss": 0.0194, |
| "step": 12620 |
| }, |
| { |
| "grad_norm": 0.21169313788414001, |
| "learning_rate": 9.685280272808486e-05, |
| "loss": 0.0167, |
| "step": 12630 |
| }, |
| { |
| "grad_norm": 0.2793411910533905, |
| "learning_rate": 9.684558176433424e-05, |
| "loss": 0.0178, |
| "step": 12640 |
| }, |
| { |
| "grad_norm": 0.2542753517627716, |
| "learning_rate": 9.683835279595495e-05, |
| "loss": 0.0167, |
| "step": 12650 |
| }, |
| { |
| "grad_norm": 0.20705217123031616, |
| "learning_rate": 9.683111582418216e-05, |
| "loss": 0.0139, |
| "step": 12660 |
| }, |
| { |
| "grad_norm": 0.2595694363117218, |
| "learning_rate": 9.682387085025254e-05, |
| "loss": 0.0148, |
| "step": 12670 |
| }, |
| { |
| "grad_norm": 0.2114218771457672, |
| "learning_rate": 9.681661787540401e-05, |
| "loss": 0.019, |
| "step": 12680 |
| }, |
| { |
| "grad_norm": 0.1812354326248169, |
| "learning_rate": 9.680935690087593e-05, |
| "loss": 0.0191, |
| "step": 12690 |
| }, |
| { |
| "grad_norm": 0.16314566135406494, |
| "learning_rate": 9.680208792790901e-05, |
| "loss": 0.0158, |
| "step": 12700 |
| }, |
| { |
| "grad_norm": 0.15389156341552734, |
| "learning_rate": 9.679481095774529e-05, |
| "loss": 0.015, |
| "step": 12710 |
| }, |
| { |
| "grad_norm": 0.17251567542552948, |
| "learning_rate": 9.678752599162822e-05, |
| "loss": 0.0176, |
| "step": 12720 |
| }, |
| { |
| "grad_norm": 0.24053889513015747, |
| "learning_rate": 9.678023303080259e-05, |
| "loss": 0.0163, |
| "step": 12730 |
| }, |
| { |
| "grad_norm": 0.19373218715190887, |
| "learning_rate": 9.677293207651459e-05, |
| "loss": 0.0164, |
| "step": 12740 |
| }, |
| { |
| "grad_norm": 0.18236075341701508, |
| "learning_rate": 9.676562313001173e-05, |
| "loss": 0.0168, |
| "step": 12750 |
| }, |
| { |
| "grad_norm": 0.19816508889198303, |
| "learning_rate": 9.675830619254293e-05, |
| "loss": 0.0181, |
| "step": 12760 |
| }, |
| { |
| "grad_norm": 0.2341136932373047, |
| "learning_rate": 9.675098126535843e-05, |
| "loss": 0.0195, |
| "step": 12770 |
| }, |
| { |
| "grad_norm": 0.22628170251846313, |
| "learning_rate": 9.674364834970988e-05, |
| "loss": 0.0215, |
| "step": 12780 |
| }, |
| { |
| "grad_norm": 0.22022584080696106, |
| "learning_rate": 9.673630744685028e-05, |
| "loss": 0.0161, |
| "step": 12790 |
| }, |
| { |
| "grad_norm": 0.2745243310928345, |
| "learning_rate": 9.672895855803397e-05, |
| "loss": 0.0179, |
| "step": 12800 |
| }, |
| { |
| "grad_norm": 0.25176534056663513, |
| "learning_rate": 9.672160168451667e-05, |
| "loss": 0.0155, |
| "step": 12810 |
| }, |
| { |
| "grad_norm": 0.25133299827575684, |
| "learning_rate": 9.671423682755549e-05, |
| "loss": 0.0185, |
| "step": 12820 |
| }, |
| { |
| "grad_norm": 0.1872323751449585, |
| "learning_rate": 9.670686398840888e-05, |
| "loss": 0.0175, |
| "step": 12830 |
| }, |
| { |
| "grad_norm": 0.16792120039463043, |
| "learning_rate": 9.669948316833664e-05, |
| "loss": 0.0166, |
| "step": 12840 |
| }, |
| { |
| "grad_norm": 0.22031652927398682, |
| "learning_rate": 9.669209436859997e-05, |
| "loss": 0.0178, |
| "step": 12850 |
| }, |
| { |
| "grad_norm": 0.27442485094070435, |
| "learning_rate": 9.66846975904614e-05, |
| "loss": 0.0192, |
| "step": 12860 |
| }, |
| { |
| "grad_norm": 0.2164306640625, |
| "learning_rate": 9.667729283518483e-05, |
| "loss": 0.016, |
| "step": 12870 |
| }, |
| { |
| "grad_norm": 0.17880965769290924, |
| "learning_rate": 9.666988010403557e-05, |
| "loss": 0.0156, |
| "step": 12880 |
| }, |
| { |
| "grad_norm": 0.21804316341876984, |
| "learning_rate": 9.66624593982802e-05, |
| "loss": 0.0167, |
| "step": 12890 |
| }, |
| { |
| "grad_norm": 0.20562773942947388, |
| "learning_rate": 9.665503071918675e-05, |
| "loss": 0.0172, |
| "step": 12900 |
| }, |
| { |
| "grad_norm": 0.22280491888523102, |
| "learning_rate": 9.664759406802456e-05, |
| "loss": 0.0158, |
| "step": 12910 |
| }, |
| { |
| "grad_norm": 0.20018264651298523, |
| "learning_rate": 9.664014944606437e-05, |
| "loss": 0.0187, |
| "step": 12920 |
| }, |
| { |
| "grad_norm": 0.20155006647109985, |
| "learning_rate": 9.663269685457822e-05, |
| "loss": 0.018, |
| "step": 12930 |
| }, |
| { |
| "grad_norm": 0.2240087389945984, |
| "learning_rate": 9.662523629483962e-05, |
| "loss": 0.0173, |
| "step": 12940 |
| }, |
| { |
| "grad_norm": 0.20313407480716705, |
| "learning_rate": 9.661776776812333e-05, |
| "loss": 0.0153, |
| "step": 12950 |
| }, |
| { |
| "grad_norm": 0.23612536489963531, |
| "learning_rate": 9.661029127570553e-05, |
| "loss": 0.0183, |
| "step": 12960 |
| }, |
| { |
| "grad_norm": 0.18177230656147003, |
| "learning_rate": 9.660280681886373e-05, |
| "loss": 0.0174, |
| "step": 12970 |
| }, |
| { |
| "grad_norm": 0.18900875747203827, |
| "learning_rate": 9.659531439887685e-05, |
| "loss": 0.017, |
| "step": 12980 |
| }, |
| { |
| "grad_norm": 0.2031058371067047, |
| "learning_rate": 9.658781401702511e-05, |
| "loss": 0.0238, |
| "step": 12990 |
| }, |
| { |
| "grad_norm": 0.17947645485401154, |
| "learning_rate": 9.658030567459015e-05, |
| "loss": 0.0147, |
| "step": 13000 |
| }, |
| { |
| "grad_norm": 0.19303488731384277, |
| "learning_rate": 9.65727893728549e-05, |
| "loss": 0.0179, |
| "step": 13010 |
| }, |
| { |
| "grad_norm": 0.2641489505767822, |
| "learning_rate": 9.656526511310375e-05, |
| "loss": 0.0171, |
| "step": 13020 |
| }, |
| { |
| "grad_norm": 0.19446289539337158, |
| "learning_rate": 9.655773289662233e-05, |
| "loss": 0.0168, |
| "step": 13030 |
| }, |
| { |
| "grad_norm": 0.22106243669986725, |
| "learning_rate": 9.655019272469772e-05, |
| "loss": 0.0212, |
| "step": 13040 |
| }, |
| { |
| "grad_norm": 0.20340028405189514, |
| "learning_rate": 9.654264459861832e-05, |
| "loss": 0.0192, |
| "step": 13050 |
| }, |
| { |
| "grad_norm": 0.16195544600486755, |
| "learning_rate": 9.653508851967391e-05, |
| "loss": 0.0167, |
| "step": 13060 |
| }, |
| { |
| "grad_norm": 0.17701230943202972, |
| "learning_rate": 9.65275244891556e-05, |
| "loss": 0.0155, |
| "step": 13070 |
| }, |
| { |
| "grad_norm": 0.2103540152311325, |
| "learning_rate": 9.651995250835591e-05, |
| "loss": 0.0155, |
| "step": 13080 |
| }, |
| { |
| "grad_norm": 0.14413878321647644, |
| "learning_rate": 9.651237257856862e-05, |
| "loss": 0.0149, |
| "step": 13090 |
| }, |
| { |
| "grad_norm": 0.22007222473621368, |
| "learning_rate": 9.6504784701089e-05, |
| "loss": 0.0148, |
| "step": 13100 |
| }, |
| { |
| "grad_norm": 0.205142542719841, |
| "learning_rate": 9.649718887721357e-05, |
| "loss": 0.0178, |
| "step": 13110 |
| }, |
| { |
| "grad_norm": 0.1939914971590042, |
| "learning_rate": 9.648958510824028e-05, |
| "loss": 0.016, |
| "step": 13120 |
| }, |
| { |
| "grad_norm": 0.18156073987483978, |
| "learning_rate": 9.648197339546837e-05, |
| "loss": 0.018, |
| "step": 13130 |
| }, |
| { |
| "grad_norm": 0.23905836045742035, |
| "learning_rate": 9.647435374019851e-05, |
| "loss": 0.0181, |
| "step": 13140 |
| }, |
| { |
| "grad_norm": 0.25276172161102295, |
| "learning_rate": 9.646672614373266e-05, |
| "loss": 0.0195, |
| "step": 13150 |
| }, |
| { |
| "grad_norm": 0.2888982892036438, |
| "learning_rate": 9.645909060737418e-05, |
| "loss": 0.0176, |
| "step": 13160 |
| }, |
| { |
| "grad_norm": 0.18068911135196686, |
| "learning_rate": 9.645144713242778e-05, |
| "loss": 0.0216, |
| "step": 13170 |
| }, |
| { |
| "grad_norm": 0.22713512182235718, |
| "learning_rate": 9.64437957201995e-05, |
| "loss": 0.0182, |
| "step": 13180 |
| }, |
| { |
| "grad_norm": 0.18977467715740204, |
| "learning_rate": 9.643613637199678e-05, |
| "loss": 0.0186, |
| "step": 13190 |
| }, |
| { |
| "grad_norm": 0.2133685052394867, |
| "learning_rate": 9.642846908912839e-05, |
| "loss": 0.0176, |
| "step": 13200 |
| }, |
| { |
| "grad_norm": 0.15161854028701782, |
| "learning_rate": 9.642079387290444e-05, |
| "loss": 0.0164, |
| "step": 13210 |
| }, |
| { |
| "grad_norm": 0.16772955656051636, |
| "learning_rate": 9.641311072463644e-05, |
| "loss": 0.0194, |
| "step": 13220 |
| }, |
| { |
| "grad_norm": 0.21082094311714172, |
| "learning_rate": 9.640541964563722e-05, |
| "loss": 0.0207, |
| "step": 13230 |
| }, |
| { |
| "grad_norm": 0.2545044720172882, |
| "learning_rate": 9.639772063722096e-05, |
| "loss": 0.0219, |
| "step": 13240 |
| }, |
| { |
| "grad_norm": 0.1294104903936386, |
| "learning_rate": 9.639001370070324e-05, |
| "loss": 0.0174, |
| "step": 13250 |
| }, |
| { |
| "grad_norm": 0.25759389996528625, |
| "learning_rate": 9.638229883740095e-05, |
| "loss": 0.017, |
| "step": 13260 |
| }, |
| { |
| "grad_norm": 0.1531188040971756, |
| "learning_rate": 9.637457604863233e-05, |
| "loss": 0.0155, |
| "step": 13270 |
| }, |
| { |
| "grad_norm": 0.1654675155878067, |
| "learning_rate": 9.636684533571703e-05, |
| "loss": 0.0179, |
| "step": 13280 |
| }, |
| { |
| "grad_norm": 0.18904055655002594, |
| "learning_rate": 9.635910669997599e-05, |
| "loss": 0.0146, |
| "step": 13290 |
| }, |
| { |
| "grad_norm": 0.22429679334163666, |
| "learning_rate": 9.635136014273154e-05, |
| "loss": 0.0181, |
| "step": 13300 |
| }, |
| { |
| "grad_norm": 0.23229560256004333, |
| "learning_rate": 9.634360566530735e-05, |
| "loss": 0.0167, |
| "step": 13310 |
| }, |
| { |
| "grad_norm": 0.24955442547798157, |
| "learning_rate": 9.633584326902845e-05, |
| "loss": 0.0177, |
| "step": 13320 |
| }, |
| { |
| "grad_norm": 0.22871516644954681, |
| "learning_rate": 9.632807295522124e-05, |
| "loss": 0.0175, |
| "step": 13330 |
| }, |
| { |
| "grad_norm": 0.20401830971240997, |
| "learning_rate": 9.632029472521342e-05, |
| "loss": 0.0165, |
| "step": 13340 |
| }, |
| { |
| "grad_norm": 0.1854197382926941, |
| "learning_rate": 9.631250858033409e-05, |
| "loss": 0.0152, |
| "step": 13350 |
| }, |
| { |
| "grad_norm": 0.21331296861171722, |
| "learning_rate": 9.630471452191371e-05, |
| "loss": 0.0162, |
| "step": 13360 |
| }, |
| { |
| "grad_norm": 0.22700276970863342, |
| "learning_rate": 9.629691255128405e-05, |
| "loss": 0.0161, |
| "step": 13370 |
| }, |
| { |
| "grad_norm": 0.1945546269416809, |
| "learning_rate": 9.628910266977825e-05, |
| "loss": 0.0175, |
| "step": 13380 |
| }, |
| { |
| "grad_norm": 0.17648547887802124, |
| "learning_rate": 9.628128487873083e-05, |
| "loss": 0.0141, |
| "step": 13390 |
| }, |
| { |
| "grad_norm": 0.13480976223945618, |
| "learning_rate": 9.627345917947761e-05, |
| "loss": 0.0157, |
| "step": 13400 |
| }, |
| { |
| "grad_norm": 0.19314318895339966, |
| "learning_rate": 9.626562557335579e-05, |
| "loss": 0.0147, |
| "step": 13410 |
| }, |
| { |
| "grad_norm": 0.17685508728027344, |
| "learning_rate": 9.625778406170393e-05, |
| "loss": 0.0141, |
| "step": 13420 |
| }, |
| { |
| "grad_norm": 0.1886247843503952, |
| "learning_rate": 9.624993464586193e-05, |
| "loss": 0.018, |
| "step": 13430 |
| }, |
| { |
| "grad_norm": 0.24076367914676666, |
| "learning_rate": 9.624207732717105e-05, |
| "loss": 0.0189, |
| "step": 13440 |
| }, |
| { |
| "grad_norm": 0.19189012050628662, |
| "learning_rate": 9.623421210697386e-05, |
| "loss": 0.0164, |
| "step": 13450 |
| }, |
| { |
| "grad_norm": 0.18599392473697662, |
| "learning_rate": 9.622633898661434e-05, |
| "loss": 0.0175, |
| "step": 13460 |
| }, |
| { |
| "grad_norm": 0.23745107650756836, |
| "learning_rate": 9.621845796743778e-05, |
| "loss": 0.0207, |
| "step": 13470 |
| }, |
| { |
| "grad_norm": 0.21209335327148438, |
| "learning_rate": 9.621056905079082e-05, |
| "loss": 0.017, |
| "step": 13480 |
| }, |
| { |
| "grad_norm": 0.19345171749591827, |
| "learning_rate": 9.620267223802149e-05, |
| "loss": 0.015, |
| "step": 13490 |
| }, |
| { |
| "grad_norm": 0.2062082141637802, |
| "learning_rate": 9.619476753047911e-05, |
| "loss": 0.0149, |
| "step": 13500 |
| }, |
| { |
| "grad_norm": 0.2011566311120987, |
| "learning_rate": 9.618685492951438e-05, |
| "loss": 0.0178, |
| "step": 13510 |
| }, |
| { |
| "grad_norm": 0.28119274973869324, |
| "learning_rate": 9.617893443647938e-05, |
| "loss": 0.0148, |
| "step": 13520 |
| }, |
| { |
| "grad_norm": 0.2512681484222412, |
| "learning_rate": 9.617100605272746e-05, |
| "loss": 0.0143, |
| "step": 13530 |
| }, |
| { |
| "grad_norm": 0.2108534872531891, |
| "learning_rate": 9.616306977961338e-05, |
| "loss": 0.015, |
| "step": 13540 |
| }, |
| { |
| "grad_norm": 0.1767207682132721, |
| "learning_rate": 9.615512561849326e-05, |
| "loss": 0.0153, |
| "step": 13550 |
| }, |
| { |
| "grad_norm": 0.20185871422290802, |
| "learning_rate": 9.61471735707245e-05, |
| "loss": 0.0196, |
| "step": 13560 |
| }, |
| { |
| "grad_norm": 0.1924266219139099, |
| "learning_rate": 9.613921363766592e-05, |
| "loss": 0.0158, |
| "step": 13570 |
| }, |
| { |
| "grad_norm": 0.14430062472820282, |
| "learning_rate": 9.613124582067763e-05, |
| "loss": 0.0144, |
| "step": 13580 |
| }, |
| { |
| "grad_norm": 0.21115557849407196, |
| "learning_rate": 9.612327012112112e-05, |
| "loss": 0.0166, |
| "step": 13590 |
| }, |
| { |
| "grad_norm": 0.2514290511608124, |
| "learning_rate": 9.611528654035921e-05, |
| "loss": 0.0146, |
| "step": 13600 |
| }, |
| { |
| "grad_norm": 0.20375177264213562, |
| "learning_rate": 9.610729507975611e-05, |
| "loss": 0.0177, |
| "step": 13610 |
| }, |
| { |
| "grad_norm": 0.19954927265644073, |
| "learning_rate": 9.609929574067731e-05, |
| "loss": 0.0162, |
| "step": 13620 |
| }, |
| { |
| "grad_norm": 0.1741858273744583, |
| "learning_rate": 9.609128852448967e-05, |
| "loss": 0.016, |
| "step": 13630 |
| }, |
| { |
| "grad_norm": 0.21210941672325134, |
| "learning_rate": 9.608327343256143e-05, |
| "loss": 0.0173, |
| "step": 13640 |
| }, |
| { |
| "grad_norm": 0.2422517091035843, |
| "learning_rate": 9.607525046626216e-05, |
| "loss": 0.0182, |
| "step": 13650 |
| }, |
| { |
| "grad_norm": 0.19217988848686218, |
| "learning_rate": 9.606721962696272e-05, |
| "loss": 0.0157, |
| "step": 13660 |
| }, |
| { |
| "grad_norm": 0.14938074350357056, |
| "learning_rate": 9.60591809160354e-05, |
| "loss": 0.0168, |
| "step": 13670 |
| }, |
| { |
| "grad_norm": 0.2352713644504547, |
| "learning_rate": 9.605113433485378e-05, |
| "loss": 0.0174, |
| "step": 13680 |
| }, |
| { |
| "grad_norm": 0.16971217095851898, |
| "learning_rate": 9.604307988479279e-05, |
| "loss": 0.0174, |
| "step": 13690 |
| }, |
| { |
| "grad_norm": 0.17832019925117493, |
| "learning_rate": 9.603501756722876e-05, |
| "loss": 0.0144, |
| "step": 13700 |
| }, |
| { |
| "grad_norm": 0.21248316764831543, |
| "learning_rate": 9.602694738353927e-05, |
| "loss": 0.0176, |
| "step": 13710 |
| }, |
| { |
| "grad_norm": 0.19290727376937866, |
| "learning_rate": 9.601886933510331e-05, |
| "loss": 0.0188, |
| "step": 13720 |
| }, |
| { |
| "grad_norm": 0.15208296477794647, |
| "learning_rate": 9.60107834233012e-05, |
| "loss": 0.0157, |
| "step": 13730 |
| }, |
| { |
| "grad_norm": 0.20310387015342712, |
| "learning_rate": 9.60026896495146e-05, |
| "loss": 0.0176, |
| "step": 13740 |
| }, |
| { |
| "grad_norm": 0.24995331466197968, |
| "learning_rate": 9.599458801512652e-05, |
| "loss": 0.0148, |
| "step": 13750 |
| }, |
| { |
| "grad_norm": 0.19693876802921295, |
| "learning_rate": 9.598647852152129e-05, |
| "loss": 0.0146, |
| "step": 13760 |
| }, |
| { |
| "grad_norm": 0.21272483468055725, |
| "learning_rate": 9.597836117008462e-05, |
| "loss": 0.0179, |
| "step": 13770 |
| }, |
| { |
| "grad_norm": 0.20627114176750183, |
| "learning_rate": 9.597023596220356e-05, |
| "loss": 0.0187, |
| "step": 13780 |
| }, |
| { |
| "grad_norm": 0.2480172961950302, |
| "learning_rate": 9.596210289926643e-05, |
| "loss": 0.0162, |
| "step": 13790 |
| }, |
| { |
| "grad_norm": 0.22674791514873505, |
| "learning_rate": 9.5953961982663e-05, |
| "loss": 0.0161, |
| "step": 13800 |
| }, |
| { |
| "grad_norm": 0.17927564680576324, |
| "learning_rate": 9.594581321378431e-05, |
| "loss": 0.0158, |
| "step": 13810 |
| }, |
| { |
| "grad_norm": 0.2015874683856964, |
| "learning_rate": 9.593765659402276e-05, |
| "loss": 0.0164, |
| "step": 13820 |
| }, |
| { |
| "grad_norm": 0.15319640934467316, |
| "learning_rate": 9.59294921247721e-05, |
| "loss": 0.0146, |
| "step": 13830 |
| }, |
| { |
| "grad_norm": 0.19437569379806519, |
| "learning_rate": 9.59213198074274e-05, |
| "loss": 0.0145, |
| "step": 13840 |
| }, |
| { |
| "grad_norm": 0.18472889065742493, |
| "learning_rate": 9.59131396433851e-05, |
| "loss": 0.0172, |
| "step": 13850 |
| }, |
| { |
| "grad_norm": 0.17919857800006866, |
| "learning_rate": 9.590495163404297e-05, |
| "loss": 0.0186, |
| "step": 13860 |
| }, |
| { |
| "grad_norm": 0.1990271508693695, |
| "learning_rate": 9.589675578080009e-05, |
| "loss": 0.0175, |
| "step": 13870 |
| }, |
| { |
| "grad_norm": 0.23030996322631836, |
| "learning_rate": 9.588855208505694e-05, |
| "loss": 0.0175, |
| "step": 13880 |
| }, |
| { |
| "grad_norm": 0.16449768841266632, |
| "learning_rate": 9.588034054821529e-05, |
| "loss": 0.0143, |
| "step": 13890 |
| }, |
| { |
| "grad_norm": 0.2096904218196869, |
| "learning_rate": 9.587212117167826e-05, |
| "loss": 0.0159, |
| "step": 13900 |
| }, |
| { |
| "grad_norm": 0.14766645431518555, |
| "learning_rate": 9.586389395685033e-05, |
| "loss": 0.0128, |
| "step": 13910 |
| }, |
| { |
| "grad_norm": 0.2344929277896881, |
| "learning_rate": 9.585565890513733e-05, |
| "loss": 0.0147, |
| "step": 13920 |
| }, |
| { |
| "grad_norm": 0.18312610685825348, |
| "learning_rate": 9.584741601794636e-05, |
| "loss": 0.0153, |
| "step": 13930 |
| }, |
| { |
| "grad_norm": 0.24041059613227844, |
| "learning_rate": 9.58391652966859e-05, |
| "loss": 0.0157, |
| "step": 13940 |
| }, |
| { |
| "grad_norm": 0.19608065485954285, |
| "learning_rate": 9.583090674276583e-05, |
| "loss": 0.017, |
| "step": 13950 |
| }, |
| { |
| "grad_norm": 0.27070385217666626, |
| "learning_rate": 9.582264035759726e-05, |
| "loss": 0.0161, |
| "step": 13960 |
| }, |
| { |
| "grad_norm": 0.1791030615568161, |
| "learning_rate": 9.58143661425927e-05, |
| "loss": 0.016, |
| "step": 13970 |
| }, |
| { |
| "grad_norm": 0.21301402151584625, |
| "learning_rate": 9.580608409916601e-05, |
| "loss": 0.0175, |
| "step": 13980 |
| }, |
| { |
| "grad_norm": 0.2352142035961151, |
| "learning_rate": 9.579779422873233e-05, |
| "loss": 0.017, |
| "step": 13990 |
| }, |
| { |
| "grad_norm": 0.22693519294261932, |
| "learning_rate": 9.578949653270819e-05, |
| "loss": 0.0211, |
| "step": 14000 |
| }, |
| { |
| "grad_norm": 0.18038628995418549, |
| "learning_rate": 9.578119101251144e-05, |
| "loss": 0.0144, |
| "step": 14010 |
| }, |
| { |
| "grad_norm": 0.2149980664253235, |
| "learning_rate": 9.577287766956127e-05, |
| "loss": 0.0171, |
| "step": 14020 |
| }, |
| { |
| "grad_norm": 0.22819343209266663, |
| "learning_rate": 9.57645565052782e-05, |
| "loss": 0.0149, |
| "step": 14030 |
| }, |
| { |
| "grad_norm": 0.22780239582061768, |
| "learning_rate": 9.575622752108407e-05, |
| "loss": 0.0172, |
| "step": 14040 |
| }, |
| { |
| "grad_norm": 0.2205226868391037, |
| "learning_rate": 9.57478907184021e-05, |
| "loss": 0.0179, |
| "step": 14050 |
| }, |
| { |
| "grad_norm": 0.20616720616817474, |
| "learning_rate": 9.573954609865681e-05, |
| "loss": 0.0158, |
| "step": 14060 |
| }, |
| { |
| "grad_norm": 0.16584700345993042, |
| "learning_rate": 9.573119366327408e-05, |
| "loss": 0.0173, |
| "step": 14070 |
| }, |
| { |
| "grad_norm": 0.2013131082057953, |
| "learning_rate": 9.57228334136811e-05, |
| "loss": 0.0153, |
| "step": 14080 |
| }, |
| { |
| "grad_norm": 0.35026389360427856, |
| "learning_rate": 9.571446535130641e-05, |
| "loss": 0.0165, |
| "step": 14090 |
| }, |
| { |
| "grad_norm": 0.17682762444019318, |
| "learning_rate": 9.570608947757988e-05, |
| "loss": 0.0161, |
| "step": 14100 |
| }, |
| { |
| "grad_norm": 0.25271111726760864, |
| "learning_rate": 9.569770579393274e-05, |
| "loss": 0.0162, |
| "step": 14110 |
| }, |
| { |
| "grad_norm": 0.1814207136631012, |
| "learning_rate": 9.56893143017975e-05, |
| "loss": 0.0157, |
| "step": 14120 |
| }, |
| { |
| "grad_norm": 0.20927149057388306, |
| "learning_rate": 9.568091500260806e-05, |
| "loss": 0.016, |
| "step": 14130 |
| }, |
| { |
| "grad_norm": 0.18713536858558655, |
| "learning_rate": 9.567250789779961e-05, |
| "loss": 0.0149, |
| "step": 14140 |
| }, |
| { |
| "grad_norm": 0.16728201508522034, |
| "learning_rate": 9.566409298880872e-05, |
| "loss": 0.0176, |
| "step": 14150 |
| }, |
| { |
| "grad_norm": 0.18786218762397766, |
| "learning_rate": 9.565567027707326e-05, |
| "loss": 0.0178, |
| "step": 14160 |
| }, |
| { |
| "grad_norm": 0.2244640588760376, |
| "learning_rate": 9.56472397640324e-05, |
| "loss": 0.0166, |
| "step": 14170 |
| }, |
| { |
| "grad_norm": 0.21699632704257965, |
| "learning_rate": 9.563880145112675e-05, |
| "loss": 0.0202, |
| "step": 14180 |
| }, |
| { |
| "grad_norm": 0.18745189905166626, |
| "learning_rate": 9.563035533979814e-05, |
| "loss": 0.0166, |
| "step": 14190 |
| }, |
| { |
| "grad_norm": 0.19325166940689087, |
| "learning_rate": 9.562190143148981e-05, |
| "loss": 0.0204, |
| "step": 14200 |
| }, |
| { |
| "grad_norm": 0.14940635859966278, |
| "learning_rate": 9.561343972764627e-05, |
| "loss": 0.0129, |
| "step": 14210 |
| }, |
| { |
| "grad_norm": 0.23209263384342194, |
| "learning_rate": 9.560497022971343e-05, |
| "loss": 0.0154, |
| "step": 14220 |
| }, |
| { |
| "grad_norm": 0.1903807520866394, |
| "learning_rate": 9.559649293913847e-05, |
| "loss": 0.015, |
| "step": 14230 |
| }, |
| { |
| "grad_norm": 0.1858266294002533, |
| "learning_rate": 9.558800785736993e-05, |
| "loss": 0.0179, |
| "step": 14240 |
| }, |
| { |
| "grad_norm": 0.24292738735675812, |
| "learning_rate": 9.557951498585767e-05, |
| "loss": 0.0188, |
| "step": 14250 |
| }, |
| { |
| "grad_norm": 0.2099161595106125, |
| "learning_rate": 9.557101432605293e-05, |
| "loss": 0.0163, |
| "step": 14260 |
| }, |
| { |
| "grad_norm": 0.23850341141223907, |
| "learning_rate": 9.556250587940818e-05, |
| "loss": 0.0177, |
| "step": 14270 |
| }, |
| { |
| "grad_norm": 0.22388815879821777, |
| "learning_rate": 9.555398964737734e-05, |
| "loss": 0.0166, |
| "step": 14280 |
| }, |
| { |
| "grad_norm": 0.2458370327949524, |
| "learning_rate": 9.554546563141555e-05, |
| "loss": 0.0168, |
| "step": 14290 |
| }, |
| { |
| "grad_norm": 0.21718600392341614, |
| "learning_rate": 9.553693383297937e-05, |
| "loss": 0.0188, |
| "step": 14300 |
| }, |
| { |
| "grad_norm": 0.21735943853855133, |
| "learning_rate": 9.552839425352663e-05, |
| "loss": 0.0162, |
| "step": 14310 |
| }, |
| { |
| "grad_norm": 0.15514010190963745, |
| "learning_rate": 9.551984689451652e-05, |
| "loss": 0.017, |
| "step": 14320 |
| }, |
| { |
| "grad_norm": 0.21300536394119263, |
| "learning_rate": 9.551129175740953e-05, |
| "loss": 0.0176, |
| "step": 14330 |
| }, |
| { |
| "grad_norm": 0.17090490460395813, |
| "learning_rate": 9.550272884366754e-05, |
| "loss": 0.0149, |
| "step": 14340 |
| }, |
| { |
| "grad_norm": 0.20311683416366577, |
| "learning_rate": 9.549415815475369e-05, |
| "loss": 0.0152, |
| "step": 14350 |
| }, |
| { |
| "grad_norm": 0.14933757483959198, |
| "learning_rate": 9.548557969213247e-05, |
| "loss": 0.0152, |
| "step": 14360 |
| }, |
| { |
| "grad_norm": 0.15693646669387817, |
| "learning_rate": 9.547699345726972e-05, |
| "loss": 0.0122, |
| "step": 14370 |
| }, |
| { |
| "grad_norm": 0.13444781303405762, |
| "learning_rate": 9.546839945163257e-05, |
| "loss": 0.0125, |
| "step": 14380 |
| }, |
| { |
| "grad_norm": 0.22073717415332794, |
| "learning_rate": 9.545979767668953e-05, |
| "loss": 0.0176, |
| "step": 14390 |
| }, |
| { |
| "grad_norm": 0.25846320390701294, |
| "learning_rate": 9.54511881339104e-05, |
| "loss": 0.016, |
| "step": 14400 |
| }, |
| { |
| "grad_norm": 0.2418988198041916, |
| "learning_rate": 9.54425708247663e-05, |
| "loss": 0.0196, |
| "step": 14410 |
| }, |
| { |
| "grad_norm": 0.2165713757276535, |
| "learning_rate": 9.543394575072972e-05, |
| "loss": 0.017, |
| "step": 14420 |
| }, |
| { |
| "grad_norm": 0.22406932711601257, |
| "learning_rate": 9.542531291327441e-05, |
| "loss": 0.0149, |
| "step": 14430 |
| }, |
| { |
| "grad_norm": 0.18911834061145782, |
| "learning_rate": 9.541667231387552e-05, |
| "loss": 0.0147, |
| "step": 14440 |
| }, |
| { |
| "grad_norm": 0.20237530767917633, |
| "learning_rate": 9.540802395400949e-05, |
| "loss": 0.0158, |
| "step": 14450 |
| }, |
| { |
| "grad_norm": 0.21332259476184845, |
| "learning_rate": 9.539936783515406e-05, |
| "loss": 0.016, |
| "step": 14460 |
| }, |
| { |
| "grad_norm": 0.16064292192459106, |
| "learning_rate": 9.539070395878835e-05, |
| "loss": 0.0169, |
| "step": 14470 |
| }, |
| { |
| "grad_norm": 0.19493581354618073, |
| "learning_rate": 9.538203232639277e-05, |
| "loss": 0.0154, |
| "step": 14480 |
| }, |
| { |
| "grad_norm": 0.19080771505832672, |
| "learning_rate": 9.537335293944907e-05, |
| "loss": 0.0144, |
| "step": 14490 |
| }, |
| { |
| "grad_norm": 0.20722368359565735, |
| "learning_rate": 9.536466579944032e-05, |
| "loss": 0.0158, |
| "step": 14500 |
| }, |
| { |
| "grad_norm": 0.18488697707653046, |
| "learning_rate": 9.535597090785091e-05, |
| "loss": 0.0169, |
| "step": 14510 |
| }, |
| { |
| "grad_norm": 0.19848453998565674, |
| "learning_rate": 9.534726826616656e-05, |
| "loss": 0.0143, |
| "step": 14520 |
| }, |
| { |
| "grad_norm": 0.18197226524353027, |
| "learning_rate": 9.53385578758743e-05, |
| "loss": 0.015, |
| "step": 14530 |
| }, |
| { |
| "grad_norm": 0.15892091393470764, |
| "learning_rate": 9.532983973846252e-05, |
| "loss": 0.0187, |
| "step": 14540 |
| }, |
| { |
| "grad_norm": 0.17897970974445343, |
| "learning_rate": 9.53211138554209e-05, |
| "loss": 0.0154, |
| "step": 14550 |
| }, |
| { |
| "grad_norm": 0.1671089380979538, |
| "learning_rate": 9.531238022824047e-05, |
| "loss": 0.0159, |
| "step": 14560 |
| }, |
| { |
| "grad_norm": 0.2073274850845337, |
| "learning_rate": 9.530363885841355e-05, |
| "loss": 0.0173, |
| "step": 14570 |
| }, |
| { |
| "grad_norm": 0.26559683680534363, |
| "learning_rate": 9.52948897474338e-05, |
| "loss": 0.0167, |
| "step": 14580 |
| }, |
| { |
| "grad_norm": 0.15245188772678375, |
| "learning_rate": 9.528613289679622e-05, |
| "loss": 0.015, |
| "step": 14590 |
| }, |
| { |
| "grad_norm": 0.17604784667491913, |
| "learning_rate": 9.52773683079971e-05, |
| "loss": 0.0143, |
| "step": 14600 |
| }, |
| { |
| "grad_norm": 0.16941019892692566, |
| "learning_rate": 9.526859598253407e-05, |
| "loss": 0.0145, |
| "step": 14610 |
| }, |
| { |
| "grad_norm": 0.20509329438209534, |
| "learning_rate": 9.525981592190609e-05, |
| "loss": 0.0168, |
| "step": 14620 |
| }, |
| { |
| "grad_norm": 0.21102458238601685, |
| "learning_rate": 9.525102812761342e-05, |
| "loss": 0.0171, |
| "step": 14630 |
| }, |
| { |
| "grad_norm": 0.1973593533039093, |
| "learning_rate": 9.524223260115768e-05, |
| "loss": 0.0157, |
| "step": 14640 |
| }, |
| { |
| "grad_norm": 0.2082795649766922, |
| "learning_rate": 9.523342934404175e-05, |
| "loss": 0.0142, |
| "step": 14650 |
| }, |
| { |
| "grad_norm": 0.1476413905620575, |
| "learning_rate": 9.522461835776989e-05, |
| "loss": 0.0182, |
| "step": 14660 |
| }, |
| { |
| "grad_norm": 0.16569702327251434, |
| "learning_rate": 9.521579964384764e-05, |
| "loss": 0.0143, |
| "step": 14670 |
| }, |
| { |
| "grad_norm": 0.14856582880020142, |
| "learning_rate": 9.52069732037819e-05, |
| "loss": 0.0157, |
| "step": 14680 |
| }, |
| { |
| "grad_norm": 0.25279876589775085, |
| "learning_rate": 9.519813903908083e-05, |
| "loss": 0.0166, |
| "step": 14690 |
| }, |
| { |
| "grad_norm": 0.19278138875961304, |
| "learning_rate": 9.5189297151254e-05, |
| "loss": 0.0163, |
| "step": 14700 |
| }, |
| { |
| "grad_norm": 0.126902237534523, |
| "learning_rate": 9.518044754181218e-05, |
| "loss": 0.0128, |
| "step": 14710 |
| }, |
| { |
| "grad_norm": 0.18358808755874634, |
| "learning_rate": 9.51715902122676e-05, |
| "loss": 0.0155, |
| "step": 14720 |
| }, |
| { |
| "grad_norm": 0.20050916075706482, |
| "learning_rate": 9.516272516413368e-05, |
| "loss": 0.0158, |
| "step": 14730 |
| }, |
| { |
| "grad_norm": 0.21295876801013947, |
| "learning_rate": 9.515385239892525e-05, |
| "loss": 0.0184, |
| "step": 14740 |
| }, |
| { |
| "grad_norm": 0.20115697383880615, |
| "learning_rate": 9.514497191815839e-05, |
| "loss": 0.016, |
| "step": 14750 |
| }, |
| { |
| "grad_norm": 0.17910780012607574, |
| "learning_rate": 9.513608372335055e-05, |
| "loss": 0.0138, |
| "step": 14760 |
| }, |
| { |
| "grad_norm": 0.1855442374944687, |
| "learning_rate": 9.512718781602045e-05, |
| "loss": 0.0155, |
| "step": 14770 |
| }, |
| { |
| "grad_norm": 0.20865614712238312, |
| "learning_rate": 9.511828419768823e-05, |
| "loss": 0.0149, |
| "step": 14780 |
| }, |
| { |
| "grad_norm": 0.20891661942005157, |
| "learning_rate": 9.510937286987521e-05, |
| "loss": 0.0175, |
| "step": 14790 |
| }, |
| { |
| "grad_norm": 0.14707662165164948, |
| "learning_rate": 9.510045383410408e-05, |
| "loss": 0.0142, |
| "step": 14800 |
| }, |
| { |
| "grad_norm": 0.29158416390419006, |
| "learning_rate": 9.509152709189892e-05, |
| "loss": 0.0147, |
| "step": 14810 |
| }, |
| { |
| "grad_norm": 0.17530637979507446, |
| "learning_rate": 9.508259264478504e-05, |
| "loss": 0.0172, |
| "step": 14820 |
| }, |
| { |
| "grad_norm": 0.1772986799478531, |
| "learning_rate": 9.507365049428909e-05, |
| "loss": 0.017, |
| "step": 14830 |
| }, |
| { |
| "grad_norm": 0.19525521993637085, |
| "learning_rate": 9.506470064193902e-05, |
| "loss": 0.0172, |
| "step": 14840 |
| }, |
| { |
| "grad_norm": 0.1994933784008026, |
| "learning_rate": 9.505574308926414e-05, |
| "loss": 0.0152, |
| "step": 14850 |
| }, |
| { |
| "grad_norm": 0.19826896488666534, |
| "learning_rate": 9.504677783779505e-05, |
| "loss": 0.0149, |
| "step": 14860 |
| }, |
| { |
| "grad_norm": 0.22154049575328827, |
| "learning_rate": 9.503780488906365e-05, |
| "loss": 0.0153, |
| "step": 14870 |
| }, |
| { |
| "grad_norm": 0.24092242121696472, |
| "learning_rate": 9.502882424460319e-05, |
| "loss": 0.0171, |
| "step": 14880 |
| }, |
| { |
| "grad_norm": 0.18074648082256317, |
| "learning_rate": 9.501983590594821e-05, |
| "loss": 0.017, |
| "step": 14890 |
| }, |
| { |
| "grad_norm": 0.21665740013122559, |
| "learning_rate": 9.501083987463455e-05, |
| "loss": 0.0161, |
| "step": 14900 |
| }, |
| { |
| "grad_norm": 0.22991301119327545, |
| "learning_rate": 9.500183615219942e-05, |
| "loss": 0.0147, |
| "step": 14910 |
| }, |
| { |
| "grad_norm": 0.1284361481666565, |
| "learning_rate": 9.49928247401813e-05, |
| "loss": 0.0196, |
| "step": 14920 |
| }, |
| { |
| "grad_norm": 0.17300018668174744, |
| "learning_rate": 9.498380564011997e-05, |
| "loss": 0.0173, |
| "step": 14930 |
| }, |
| { |
| "grad_norm": 0.24073739349842072, |
| "learning_rate": 9.497477885355656e-05, |
| "loss": 0.0183, |
| "step": 14940 |
| }, |
| { |
| "grad_norm": 0.18364053964614868, |
| "learning_rate": 9.496574438203353e-05, |
| "loss": 0.0146, |
| "step": 14950 |
| }, |
| { |
| "grad_norm": 0.1605290174484253, |
| "learning_rate": 9.495670222709459e-05, |
| "loss": 0.0136, |
| "step": 14960 |
| }, |
| { |
| "grad_norm": 0.2259046733379364, |
| "learning_rate": 9.494765239028483e-05, |
| "loss": 0.0205, |
| "step": 14970 |
| }, |
| { |
| "grad_norm": 0.18903137743473053, |
| "learning_rate": 9.493859487315057e-05, |
| "loss": 0.0152, |
| "step": 14980 |
| }, |
| { |
| "grad_norm": 0.20537538826465607, |
| "learning_rate": 9.492952967723953e-05, |
| "loss": 0.0152, |
| "step": 14990 |
| }, |
| { |
| "grad_norm": 0.15892380475997925, |
| "learning_rate": 9.492045680410068e-05, |
| "loss": 0.018, |
| "step": 15000 |
| }, |
| { |
| "grad_norm": 0.282776802778244, |
| "learning_rate": 9.491137625528436e-05, |
| "loss": 0.0158, |
| "step": 15010 |
| }, |
| { |
| "grad_norm": 0.15211687982082367, |
| "learning_rate": 9.490228803234215e-05, |
| "loss": 0.0181, |
| "step": 15020 |
| }, |
| { |
| "grad_norm": 0.2093910574913025, |
| "learning_rate": 9.489319213682701e-05, |
| "loss": 0.0158, |
| "step": 15030 |
| }, |
| { |
| "grad_norm": 0.18943658471107483, |
| "learning_rate": 9.488408857029316e-05, |
| "loss": 0.0153, |
| "step": 15040 |
| }, |
| { |
| "grad_norm": 0.19043107330799103, |
| "learning_rate": 9.487497733429616e-05, |
| "loss": 0.0161, |
| "step": 15050 |
| }, |
| { |
| "grad_norm": 0.20405618846416473, |
| "learning_rate": 9.486585843039286e-05, |
| "loss": 0.0133, |
| "step": 15060 |
| }, |
| { |
| "grad_norm": 0.1904863864183426, |
| "learning_rate": 9.485673186014143e-05, |
| "loss": 0.0143, |
| "step": 15070 |
| }, |
| { |
| "grad_norm": 0.17432111501693726, |
| "learning_rate": 9.484759762510137e-05, |
| "loss": 0.0148, |
| "step": 15080 |
| }, |
| { |
| "grad_norm": 0.22494779527187347, |
| "learning_rate": 9.483845572683346e-05, |
| "loss": 0.015, |
| "step": 15090 |
| }, |
| { |
| "grad_norm": 0.2297731190919876, |
| "learning_rate": 9.48293061668998e-05, |
| "loss": 0.0149, |
| "step": 15100 |
| }, |
| { |
| "grad_norm": 0.17565256357192993, |
| "learning_rate": 9.48201489468638e-05, |
| "loss": 0.015, |
| "step": 15110 |
| }, |
| { |
| "grad_norm": 0.17492714524269104, |
| "learning_rate": 9.481098406829016e-05, |
| "loss": 0.0152, |
| "step": 15120 |
| }, |
| { |
| "grad_norm": 0.2154332548379898, |
| "learning_rate": 9.480181153274495e-05, |
| "loss": 0.0165, |
| "step": 15130 |
| }, |
| { |
| "grad_norm": 0.1828521341085434, |
| "learning_rate": 9.479263134179548e-05, |
| "loss": 0.0199, |
| "step": 15140 |
| }, |
| { |
| "grad_norm": 0.17296741902828217, |
| "learning_rate": 9.478344349701039e-05, |
| "loss": 0.018, |
| "step": 15150 |
| }, |
| { |
| "grad_norm": 0.13950493931770325, |
| "learning_rate": 9.477424799995964e-05, |
| "loss": 0.0157, |
| "step": 15160 |
| }, |
| { |
| "grad_norm": 0.15247932076454163, |
| "learning_rate": 9.476504485221448e-05, |
| "loss": 0.0156, |
| "step": 15170 |
| }, |
| { |
| "grad_norm": 0.24140794575214386, |
| "learning_rate": 9.475583405534748e-05, |
| "loss": 0.0162, |
| "step": 15180 |
| }, |
| { |
| "grad_norm": 0.18423013389110565, |
| "learning_rate": 9.474661561093251e-05, |
| "loss": 0.0198, |
| "step": 15190 |
| }, |
| { |
| "grad_norm": 0.21005667746067047, |
| "learning_rate": 9.473738952054478e-05, |
| "loss": 0.0154, |
| "step": 15200 |
| }, |
| { |
| "grad_norm": 0.18604272603988647, |
| "learning_rate": 9.472815578576073e-05, |
| "loss": 0.0163, |
| "step": 15210 |
| }, |
| { |
| "grad_norm": 0.19538111984729767, |
| "learning_rate": 9.471891440815817e-05, |
| "loss": 0.0149, |
| "step": 15220 |
| }, |
| { |
| "grad_norm": 0.13286109268665314, |
| "learning_rate": 9.470966538931621e-05, |
| "loss": 0.0142, |
| "step": 15230 |
| }, |
| { |
| "grad_norm": 0.20065544545650482, |
| "learning_rate": 9.470040873081525e-05, |
| "loss": 0.0158, |
| "step": 15240 |
| }, |
| { |
| "grad_norm": 0.20127904415130615, |
| "learning_rate": 9.469114443423698e-05, |
| "loss": 0.0138, |
| "step": 15250 |
| }, |
| { |
| "grad_norm": 0.15562212467193604, |
| "learning_rate": 9.468187250116445e-05, |
| "loss": 0.0161, |
| "step": 15260 |
| }, |
| { |
| "grad_norm": 0.21185919642448425, |
| "learning_rate": 9.467259293318197e-05, |
| "loss": 0.0149, |
| "step": 15270 |
| }, |
| { |
| "grad_norm": 0.1858607530593872, |
| "learning_rate": 9.466330573187514e-05, |
| "loss": 0.0162, |
| "step": 15280 |
| }, |
| { |
| "grad_norm": 0.20158518850803375, |
| "learning_rate": 9.46540108988309e-05, |
| "loss": 0.0136, |
| "step": 15290 |
| }, |
| { |
| "grad_norm": 0.19349181652069092, |
| "learning_rate": 9.46447084356375e-05, |
| "loss": 0.0138, |
| "step": 15300 |
| }, |
| { |
| "grad_norm": 0.14437109231948853, |
| "learning_rate": 9.463539834388447e-05, |
| "loss": 0.0127, |
| "step": 15310 |
| }, |
| { |
| "grad_norm": 0.1690860241651535, |
| "learning_rate": 9.462608062516263e-05, |
| "loss": 0.0163, |
| "step": 15320 |
| }, |
| { |
| "grad_norm": 0.1996581107378006, |
| "learning_rate": 9.461675528106413e-05, |
| "loss": 0.014, |
| "step": 15330 |
| }, |
| { |
| "grad_norm": 0.20567628741264343, |
| "learning_rate": 9.460742231318244e-05, |
| "loss": 0.0163, |
| "step": 15340 |
| }, |
| { |
| "grad_norm": 0.1955721229314804, |
| "learning_rate": 9.459808172311229e-05, |
| "loss": 0.0161, |
| "step": 15350 |
| }, |
| { |
| "grad_norm": 0.18463757634162903, |
| "learning_rate": 9.458873351244972e-05, |
| "loss": 0.0164, |
| "step": 15360 |
| }, |
| { |
| "grad_norm": 0.16779930889606476, |
| "learning_rate": 9.457937768279211e-05, |
| "loss": 0.0188, |
| "step": 15370 |
| }, |
| { |
| "grad_norm": 0.21408873796463013, |
| "learning_rate": 9.45700142357381e-05, |
| "loss": 0.0183, |
| "step": 15380 |
| }, |
| { |
| "grad_norm": 0.2272716909646988, |
| "learning_rate": 9.456064317288765e-05, |
| "loss": 0.0153, |
| "step": 15390 |
| }, |
| { |
| "grad_norm": 0.18865090608596802, |
| "learning_rate": 9.455126449584201e-05, |
| "loss": 0.0167, |
| "step": 15400 |
| }, |
| { |
| "grad_norm": 0.18456076085567474, |
| "learning_rate": 9.454187820620375e-05, |
| "loss": 0.0158, |
| "step": 15410 |
| }, |
| { |
| "grad_norm": 0.2326492965221405, |
| "learning_rate": 9.453248430557673e-05, |
| "loss": 0.0161, |
| "step": 15420 |
| }, |
| { |
| "grad_norm": 0.19523780047893524, |
| "learning_rate": 9.452308279556611e-05, |
| "loss": 0.0166, |
| "step": 15430 |
| }, |
| { |
| "grad_norm": 0.17657940089702606, |
| "learning_rate": 9.451367367777835e-05, |
| "loss": 0.0163, |
| "step": 15440 |
| }, |
| { |
| "grad_norm": 0.18257446587085724, |
| "learning_rate": 9.450425695382122e-05, |
| "loss": 0.014, |
| "step": 15450 |
| }, |
| { |
| "grad_norm": 0.12774761021137238, |
| "learning_rate": 9.449483262530375e-05, |
| "loss": 0.016, |
| "step": 15460 |
| }, |
| { |
| "grad_norm": 0.20910178124904633, |
| "learning_rate": 9.448540069383633e-05, |
| "loss": 0.0166, |
| "step": 15470 |
| }, |
| { |
| "grad_norm": 0.23977041244506836, |
| "learning_rate": 9.447596116103061e-05, |
| "loss": 0.0176, |
| "step": 15480 |
| }, |
| { |
| "grad_norm": 0.15449866652488708, |
| "learning_rate": 9.446651402849955e-05, |
| "loss": 0.0205, |
| "step": 15490 |
| }, |
| { |
| "grad_norm": 0.18119966983795166, |
| "learning_rate": 9.44570592978574e-05, |
| "loss": 0.0177, |
| "step": 15500 |
| }, |
| { |
| "grad_norm": 0.30893462896347046, |
| "learning_rate": 9.444759697071972e-05, |
| "loss": 0.0178, |
| "step": 15510 |
| }, |
| { |
| "grad_norm": 0.16717420518398285, |
| "learning_rate": 9.443812704870336e-05, |
| "loss": 0.0152, |
| "step": 15520 |
| }, |
| { |
| "grad_norm": 0.1347653716802597, |
| "learning_rate": 9.442864953342649e-05, |
| "loss": 0.0149, |
| "step": 15530 |
| }, |
| { |
| "grad_norm": 0.12874631583690643, |
| "learning_rate": 9.441916442650852e-05, |
| "loss": 0.0156, |
| "step": 15540 |
| }, |
| { |
| "grad_norm": 0.22822712361812592, |
| "learning_rate": 9.440967172957023e-05, |
| "loss": 0.0176, |
| "step": 15550 |
| }, |
| { |
| "grad_norm": 0.24268729984760284, |
| "learning_rate": 9.440017144423364e-05, |
| "loss": 0.0147, |
| "step": 15560 |
| }, |
| { |
| "grad_norm": 0.16856762766838074, |
| "learning_rate": 9.439066357212209e-05, |
| "loss": 0.0143, |
| "step": 15570 |
| }, |
| { |
| "grad_norm": 0.21613584458827972, |
| "learning_rate": 9.438114811486022e-05, |
| "loss": 0.0125, |
| "step": 15580 |
| }, |
| { |
| "grad_norm": 0.1790010631084442, |
| "learning_rate": 9.4371625074074e-05, |
| "loss": 0.0158, |
| "step": 15590 |
| }, |
| { |
| "grad_norm": 0.15197868645191193, |
| "learning_rate": 9.436209445139059e-05, |
| "loss": 0.0159, |
| "step": 15600 |
| }, |
| { |
| "grad_norm": 0.1518256664276123, |
| "learning_rate": 9.435255624843855e-05, |
| "loss": 0.0146, |
| "step": 15610 |
| }, |
| { |
| "grad_norm": 0.16170205175876617, |
| "learning_rate": 9.43430104668477e-05, |
| "loss": 0.0136, |
| "step": 15620 |
| }, |
| { |
| "grad_norm": 0.12304233759641647, |
| "learning_rate": 9.433345710824914e-05, |
| "loss": 0.0149, |
| "step": 15630 |
| }, |
| { |
| "grad_norm": 0.14309190213680267, |
| "learning_rate": 9.432389617427529e-05, |
| "loss": 0.0151, |
| "step": 15640 |
| }, |
| { |
| "grad_norm": 0.18983034789562225, |
| "learning_rate": 9.431432766655984e-05, |
| "loss": 0.0156, |
| "step": 15650 |
| }, |
| { |
| "grad_norm": 0.23073400557041168, |
| "learning_rate": 9.430475158673778e-05, |
| "loss": 0.016, |
| "step": 15660 |
| }, |
| { |
| "grad_norm": 0.1544876992702484, |
| "learning_rate": 9.429516793644542e-05, |
| "loss": 0.0157, |
| "step": 15670 |
| }, |
| { |
| "grad_norm": 0.17597949504852295, |
| "learning_rate": 9.428557671732034e-05, |
| "loss": 0.0154, |
| "step": 15680 |
| }, |
| { |
| "grad_norm": 0.16778236627578735, |
| "learning_rate": 9.42759779310014e-05, |
| "loss": 0.0142, |
| "step": 15690 |
| }, |
| { |
| "grad_norm": 0.15050950646400452, |
| "learning_rate": 9.426637157912879e-05, |
| "loss": 0.0127, |
| "step": 15700 |
| }, |
| { |
| "grad_norm": 0.241947203874588, |
| "learning_rate": 9.425675766334397e-05, |
| "loss": 0.0149, |
| "step": 15710 |
| }, |
| { |
| "grad_norm": 0.2014119029045105, |
| "learning_rate": 9.424713618528968e-05, |
| "loss": 0.0148, |
| "step": 15720 |
| }, |
| { |
| "grad_norm": 0.18093015253543854, |
| "learning_rate": 9.423750714661e-05, |
| "loss": 0.0165, |
| "step": 15730 |
| }, |
| { |
| "grad_norm": 0.17958128452301025, |
| "learning_rate": 9.422787054895022e-05, |
| "loss": 0.0161, |
| "step": 15740 |
| }, |
| { |
| "grad_norm": 0.20761585235595703, |
| "learning_rate": 9.4218226393957e-05, |
| "loss": 0.0152, |
| "step": 15750 |
| }, |
| { |
| "grad_norm": 0.23117801547050476, |
| "learning_rate": 9.420857468327828e-05, |
| "loss": 0.0185, |
| "step": 15760 |
| }, |
| { |
| "grad_norm": 0.17901940643787384, |
| "learning_rate": 9.419891541856323e-05, |
| "loss": 0.0168, |
| "step": 15770 |
| }, |
| { |
| "grad_norm": 0.19466669857501984, |
| "learning_rate": 9.41892486014624e-05, |
| "loss": 0.0158, |
| "step": 15780 |
| }, |
| { |
| "grad_norm": 0.18765583634376526, |
| "learning_rate": 9.417957423362756e-05, |
| "loss": 0.0148, |
| "step": 15790 |
| }, |
| { |
| "grad_norm": 0.224447101354599, |
| "learning_rate": 9.416989231671178e-05, |
| "loss": 0.014, |
| "step": 15800 |
| }, |
| { |
| "grad_norm": 0.20261625945568085, |
| "learning_rate": 9.416020285236946e-05, |
| "loss": 0.0128, |
| "step": 15810 |
| }, |
| { |
| "grad_norm": 0.143612340092659, |
| "learning_rate": 9.415050584225626e-05, |
| "loss": 0.0192, |
| "step": 15820 |
| }, |
| { |
| "grad_norm": 0.22563102841377258, |
| "learning_rate": 9.414080128802914e-05, |
| "loss": 0.0137, |
| "step": 15830 |
| }, |
| { |
| "grad_norm": 0.1522049754858017, |
| "learning_rate": 9.413108919134632e-05, |
| "loss": 0.0188, |
| "step": 15840 |
| }, |
| { |
| "grad_norm": 0.1357893943786621, |
| "learning_rate": 9.412136955386734e-05, |
| "loss": 0.0163, |
| "step": 15850 |
| }, |
| { |
| "grad_norm": 0.1896251142024994, |
| "learning_rate": 9.411164237725303e-05, |
| "loss": 0.0155, |
| "step": 15860 |
| }, |
| { |
| "grad_norm": 0.18479351699352264, |
| "learning_rate": 9.41019076631655e-05, |
| "loss": 0.0142, |
| "step": 15870 |
| }, |
| { |
| "grad_norm": 0.15194737911224365, |
| "learning_rate": 9.409216541326815e-05, |
| "loss": 0.0157, |
| "step": 15880 |
| }, |
| { |
| "grad_norm": 0.21557243168354034, |
| "learning_rate": 9.408241562922564e-05, |
| "loss": 0.0152, |
| "step": 15890 |
| }, |
| { |
| "grad_norm": 0.17626039683818817, |
| "learning_rate": 9.407265831270395e-05, |
| "loss": 0.0181, |
| "step": 15900 |
| }, |
| { |
| "grad_norm": 0.23398922383785248, |
| "learning_rate": 9.406289346537035e-05, |
| "loss": 0.0158, |
| "step": 15910 |
| }, |
| { |
| "grad_norm": 0.1912570744752884, |
| "learning_rate": 9.405312108889339e-05, |
| "loss": 0.0197, |
| "step": 15920 |
| }, |
| { |
| "grad_norm": 0.17436890304088593, |
| "learning_rate": 9.404334118494288e-05, |
| "loss": 0.0143, |
| "step": 15930 |
| }, |
| { |
| "grad_norm": 0.1936892718076706, |
| "learning_rate": 9.403355375518995e-05, |
| "loss": 0.016, |
| "step": 15940 |
| }, |
| { |
| "grad_norm": 0.2042391300201416, |
| "learning_rate": 9.4023758801307e-05, |
| "loss": 0.0177, |
| "step": 15950 |
| }, |
| { |
| "grad_norm": 0.1821664571762085, |
| "learning_rate": 9.401395632496774e-05, |
| "loss": 0.0154, |
| "step": 15960 |
| }, |
| { |
| "grad_norm": 0.13864484429359436, |
| "learning_rate": 9.400414632784711e-05, |
| "loss": 0.0142, |
| "step": 15970 |
| }, |
| { |
| "grad_norm": 0.17017197608947754, |
| "learning_rate": 9.39943288116214e-05, |
| "loss": 0.014, |
| "step": 15980 |
| }, |
| { |
| "grad_norm": 0.13913613557815552, |
| "learning_rate": 9.398450377796815e-05, |
| "loss": 0.0135, |
| "step": 15990 |
| }, |
| { |
| "grad_norm": 0.19775359332561493, |
| "learning_rate": 9.397467122856616e-05, |
| "loss": 0.018, |
| "step": 16000 |
| }, |
| { |
| "grad_norm": 0.2045111358165741, |
| "learning_rate": 9.396483116509558e-05, |
| "loss": 0.0165, |
| "step": 16010 |
| }, |
| { |
| "grad_norm": 0.12941493093967438, |
| "learning_rate": 9.39549835892378e-05, |
| "loss": 0.017, |
| "step": 16020 |
| }, |
| { |
| "grad_norm": 0.2243223935365677, |
| "learning_rate": 9.39451285026755e-05, |
| "loss": 0.0154, |
| "step": 16030 |
| }, |
| { |
| "grad_norm": 0.13051633536815643, |
| "learning_rate": 9.393526590709262e-05, |
| "loss": 0.0151, |
| "step": 16040 |
| }, |
| { |
| "grad_norm": 0.1551182121038437, |
| "learning_rate": 9.392539580417444e-05, |
| "loss": 0.0154, |
| "step": 16050 |
| }, |
| { |
| "grad_norm": 0.19762742519378662, |
| "learning_rate": 9.391551819560747e-05, |
| "loss": 0.0162, |
| "step": 16060 |
| }, |
| { |
| "grad_norm": 0.15003331005573273, |
| "learning_rate": 9.390563308307955e-05, |
| "loss": 0.0137, |
| "step": 16070 |
| }, |
| { |
| "grad_norm": 0.17223556339740753, |
| "learning_rate": 9.389574046827974e-05, |
| "loss": 0.0164, |
| "step": 16080 |
| }, |
| { |
| "grad_norm": 0.17182190716266632, |
| "learning_rate": 9.388584035289845e-05, |
| "loss": 0.0139, |
| "step": 16090 |
| }, |
| { |
| "grad_norm": 0.16798833012580872, |
| "learning_rate": 9.387593273862732e-05, |
| "loss": 0.015, |
| "step": 16100 |
| }, |
| { |
| "grad_norm": 0.21085280179977417, |
| "learning_rate": 9.386601762715929e-05, |
| "loss": 0.0137, |
| "step": 16110 |
| }, |
| { |
| "grad_norm": 0.20345155894756317, |
| "learning_rate": 9.38560950201886e-05, |
| "loss": 0.0132, |
| "step": 16120 |
| }, |
| { |
| "grad_norm": 0.19954214990139008, |
| "learning_rate": 9.384616491941071e-05, |
| "loss": 0.0158, |
| "step": 16130 |
| }, |
| { |
| "grad_norm": 0.1865217238664627, |
| "learning_rate": 9.383622732652245e-05, |
| "loss": 0.0143, |
| "step": 16140 |
| }, |
| { |
| "grad_norm": 0.20265163481235504, |
| "learning_rate": 9.382628224322187e-05, |
| "loss": 0.0138, |
| "step": 16150 |
| }, |
| { |
| "grad_norm": 0.19903923571109772, |
| "learning_rate": 9.381632967120829e-05, |
| "loss": 0.0154, |
| "step": 16160 |
| }, |
| { |
| "grad_norm": 0.15950298309326172, |
| "learning_rate": 9.380636961218235e-05, |
| "loss": 0.0139, |
| "step": 16170 |
| }, |
| { |
| "grad_norm": 0.15036417543888092, |
| "learning_rate": 9.379640206784597e-05, |
| "loss": 0.0158, |
| "step": 16180 |
| }, |
| { |
| "grad_norm": 0.18149831891059875, |
| "learning_rate": 9.378642703990229e-05, |
| "loss": 0.0151, |
| "step": 16190 |
| }, |
| { |
| "grad_norm": 0.17164242267608643, |
| "learning_rate": 9.37764445300558e-05, |
| "loss": 0.0147, |
| "step": 16200 |
| }, |
| { |
| "grad_norm": 0.1989191621541977, |
| "learning_rate": 9.376645454001222e-05, |
| "loss": 0.0144, |
| "step": 16210 |
| }, |
| { |
| "grad_norm": 0.17502199113368988, |
| "learning_rate": 9.375645707147858e-05, |
| "loss": 0.0158, |
| "step": 16220 |
| }, |
| { |
| "grad_norm": 0.12843117117881775, |
| "learning_rate": 9.374645212616316e-05, |
| "loss": 0.0142, |
| "step": 16230 |
| }, |
| { |
| "grad_norm": 0.23635591566562653, |
| "learning_rate": 9.373643970577555e-05, |
| "loss": 0.016, |
| "step": 16240 |
| }, |
| { |
| "grad_norm": 0.1574121117591858, |
| "learning_rate": 9.372641981202659e-05, |
| "loss": 0.0124, |
| "step": 16250 |
| }, |
| { |
| "grad_norm": 0.19652670621871948, |
| "learning_rate": 9.37163924466284e-05, |
| "loss": 0.0184, |
| "step": 16260 |
| }, |
| { |
| "grad_norm": 0.1898224800825119, |
| "learning_rate": 9.370635761129438e-05, |
| "loss": 0.0157, |
| "step": 16270 |
| }, |
| { |
| "grad_norm": 0.19329339265823364, |
| "learning_rate": 9.36963153077392e-05, |
| "loss": 0.0181, |
| "step": 16280 |
| }, |
| { |
| "grad_norm": 0.14157329499721527, |
| "learning_rate": 9.368626553767888e-05, |
| "loss": 0.0131, |
| "step": 16290 |
| }, |
| { |
| "grad_norm": 0.21744973957538605, |
| "learning_rate": 9.367620830283057e-05, |
| "loss": 0.0157, |
| "step": 16300 |
| }, |
| { |
| "grad_norm": 0.17362214624881744, |
| "learning_rate": 9.366614360491281e-05, |
| "loss": 0.0155, |
| "step": 16310 |
| }, |
| { |
| "grad_norm": 0.1637934446334839, |
| "learning_rate": 9.365607144564539e-05, |
| "loss": 0.0142, |
| "step": 16320 |
| }, |
| { |
| "grad_norm": 0.19428077340126038, |
| "learning_rate": 9.364599182674934e-05, |
| "loss": 0.0139, |
| "step": 16330 |
| }, |
| { |
| "grad_norm": 0.1941431313753128, |
| "learning_rate": 9.3635904749947e-05, |
| "loss": 0.0128, |
| "step": 16340 |
| }, |
| { |
| "grad_norm": 0.1572149097919464, |
| "learning_rate": 9.362581021696202e-05, |
| "loss": 0.0155, |
| "step": 16350 |
| }, |
| { |
| "grad_norm": 0.1676313579082489, |
| "learning_rate": 9.361570822951921e-05, |
| "loss": 0.0172, |
| "step": 16360 |
| }, |
| { |
| "grad_norm": 0.1637125164270401, |
| "learning_rate": 9.360559878934476e-05, |
| "loss": 0.0126, |
| "step": 16370 |
| }, |
| { |
| "grad_norm": 0.23842716217041016, |
| "learning_rate": 9.359548189816611e-05, |
| "loss": 0.0131, |
| "step": 16380 |
| }, |
| { |
| "grad_norm": 0.27230238914489746, |
| "learning_rate": 9.358535755771193e-05, |
| "loss": 0.015, |
| "step": 16390 |
| }, |
| { |
| "grad_norm": 0.16588149964809418, |
| "learning_rate": 9.357522576971221e-05, |
| "loss": 0.0175, |
| "step": 16400 |
| }, |
| { |
| "grad_norm": 0.1918477714061737, |
| "learning_rate": 9.356508653589819e-05, |
| "loss": 0.0157, |
| "step": 16410 |
| }, |
| { |
| "grad_norm": 0.20776967704296112, |
| "learning_rate": 9.355493985800237e-05, |
| "loss": 0.0161, |
| "step": 16420 |
| }, |
| { |
| "grad_norm": 0.2630099356174469, |
| "learning_rate": 9.354478573775857e-05, |
| "loss": 0.0122, |
| "step": 16430 |
| }, |
| { |
| "grad_norm": 0.19244301319122314, |
| "learning_rate": 9.353462417690186e-05, |
| "loss": 0.0136, |
| "step": 16440 |
| }, |
| { |
| "grad_norm": 0.24064555764198303, |
| "learning_rate": 9.352445517716853e-05, |
| "loss": 0.0152, |
| "step": 16450 |
| }, |
| { |
| "grad_norm": 0.2524726688861847, |
| "learning_rate": 9.351427874029621e-05, |
| "loss": 0.0172, |
| "step": 16460 |
| }, |
| { |
| "grad_norm": 0.15851224958896637, |
| "learning_rate": 9.350409486802379e-05, |
| "loss": 0.0142, |
| "step": 16470 |
| }, |
| { |
| "grad_norm": 0.19096924364566803, |
| "learning_rate": 9.349390356209138e-05, |
| "loss": 0.014, |
| "step": 16480 |
| }, |
| { |
| "grad_norm": 0.1291278898715973, |
| "learning_rate": 9.348370482424042e-05, |
| "loss": 0.0134, |
| "step": 16490 |
| }, |
| { |
| "grad_norm": 0.1367338001728058, |
| "learning_rate": 9.347349865621357e-05, |
| "loss": 0.0133, |
| "step": 16500 |
| }, |
| { |
| "grad_norm": 0.201796293258667, |
| "learning_rate": 9.346328505975481e-05, |
| "loss": 0.0113, |
| "step": 16510 |
| }, |
| { |
| "grad_norm": 0.1760982722043991, |
| "learning_rate": 9.345306403660936e-05, |
| "loss": 0.0152, |
| "step": 16520 |
| }, |
| { |
| "grad_norm": 0.16534508764743805, |
| "learning_rate": 9.344283558852371e-05, |
| "loss": 0.0152, |
| "step": 16530 |
| }, |
| { |
| "grad_norm": 0.16075266897678375, |
| "learning_rate": 9.343259971724563e-05, |
| "loss": 0.0141, |
| "step": 16540 |
| }, |
| { |
| "grad_norm": 0.18174687027931213, |
| "learning_rate": 9.342235642452413e-05, |
| "loss": 0.0123, |
| "step": 16550 |
| }, |
| { |
| "grad_norm": 0.2342667430639267, |
| "learning_rate": 9.341210571210954e-05, |
| "loss": 0.0157, |
| "step": 16560 |
| }, |
| { |
| "grad_norm": 0.18194811046123505, |
| "learning_rate": 9.340184758175338e-05, |
| "loss": 0.0152, |
| "step": 16570 |
| }, |
| { |
| "grad_norm": 0.16675084829330444, |
| "learning_rate": 9.339158203520854e-05, |
| "loss": 0.0133, |
| "step": 16580 |
| }, |
| { |
| "grad_norm": 0.132276251912117, |
| "learning_rate": 9.338130907422908e-05, |
| "loss": 0.0143, |
| "step": 16590 |
| }, |
| { |
| "grad_norm": 0.15153101086616516, |
| "learning_rate": 9.337102870057037e-05, |
| "loss": 0.0141, |
| "step": 16600 |
| }, |
| { |
| "grad_norm": 0.1930789202451706, |
| "learning_rate": 9.336074091598907e-05, |
| "loss": 0.0142, |
| "step": 16610 |
| }, |
| { |
| "grad_norm": 0.22428837418556213, |
| "learning_rate": 9.335044572224306e-05, |
| "loss": 0.0136, |
| "step": 16620 |
| }, |
| { |
| "grad_norm": 0.198419988155365, |
| "learning_rate": 9.334014312109151e-05, |
| "loss": 0.0168, |
| "step": 16630 |
| }, |
| { |
| "grad_norm": 0.21213442087173462, |
| "learning_rate": 9.332983311429486e-05, |
| "loss": 0.0149, |
| "step": 16640 |
| }, |
| { |
| "grad_norm": 0.1644720733165741, |
| "learning_rate": 9.33195157036148e-05, |
| "loss": 0.0187, |
| "step": 16650 |
| }, |
| { |
| "grad_norm": 0.13675765693187714, |
| "learning_rate": 9.330919089081432e-05, |
| "loss": 0.0127, |
| "step": 16660 |
| }, |
| { |
| "grad_norm": 0.18107080459594727, |
| "learning_rate": 9.32988586776576e-05, |
| "loss": 0.0166, |
| "step": 16670 |
| }, |
| { |
| "grad_norm": 0.12249067425727844, |
| "learning_rate": 9.328851906591016e-05, |
| "loss": 0.0124, |
| "step": 16680 |
| }, |
| { |
| "grad_norm": 0.15982289612293243, |
| "learning_rate": 9.327817205733875e-05, |
| "loss": 0.0125, |
| "step": 16690 |
| }, |
| { |
| "grad_norm": 0.14900913834571838, |
| "learning_rate": 9.326781765371142e-05, |
| "loss": 0.013, |
| "step": 16700 |
| }, |
| { |
| "grad_norm": 0.10367929190397263, |
| "learning_rate": 9.325745585679741e-05, |
| "loss": 0.0142, |
| "step": 16710 |
| }, |
| { |
| "grad_norm": 0.14036177098751068, |
| "learning_rate": 9.32470866683673e-05, |
| "loss": 0.0121, |
| "step": 16720 |
| }, |
| { |
| "grad_norm": 0.2109682857990265, |
| "learning_rate": 9.323671009019288e-05, |
| "loss": 0.0128, |
| "step": 16730 |
| }, |
| { |
| "grad_norm": 0.1827767938375473, |
| "learning_rate": 9.322632612404725e-05, |
| "loss": 0.0167, |
| "step": 16740 |
| }, |
| { |
| "grad_norm": 0.2175469547510147, |
| "learning_rate": 9.321593477170471e-05, |
| "loss": 0.0156, |
| "step": 16750 |
| }, |
| { |
| "grad_norm": 0.16873490810394287, |
| "learning_rate": 9.320553603494088e-05, |
| "loss": 0.0153, |
| "step": 16760 |
| }, |
| { |
| "grad_norm": 0.206427663564682, |
| "learning_rate": 9.319512991553261e-05, |
| "loss": 0.0163, |
| "step": 16770 |
| }, |
| { |
| "grad_norm": 0.20298726856708527, |
| "learning_rate": 9.318471641525803e-05, |
| "loss": 0.0158, |
| "step": 16780 |
| }, |
| { |
| "grad_norm": 0.1599826216697693, |
| "learning_rate": 9.317429553589652e-05, |
| "loss": 0.0137, |
| "step": 16790 |
| }, |
| { |
| "grad_norm": 0.15837444365024567, |
| "learning_rate": 9.316386727922873e-05, |
| "loss": 0.0143, |
| "step": 16800 |
| }, |
| { |
| "grad_norm": 0.1554252803325653, |
| "learning_rate": 9.315343164703656e-05, |
| "loss": 0.0111, |
| "step": 16810 |
| }, |
| { |
| "grad_norm": 0.20519639551639557, |
| "learning_rate": 9.314298864110316e-05, |
| "loss": 0.014, |
| "step": 16820 |
| }, |
| { |
| "grad_norm": 0.19669020175933838, |
| "learning_rate": 9.313253826321295e-05, |
| "loss": 0.0145, |
| "step": 16830 |
| }, |
| { |
| "grad_norm": 0.20008881390094757, |
| "learning_rate": 9.312208051515165e-05, |
| "loss": 0.013, |
| "step": 16840 |
| }, |
| { |
| "grad_norm": 0.17566107213497162, |
| "learning_rate": 9.311161539870618e-05, |
| "loss": 0.0163, |
| "step": 16850 |
| }, |
| { |
| "grad_norm": 0.20041196048259735, |
| "learning_rate": 9.310114291566474e-05, |
| "loss": 0.0135, |
| "step": 16860 |
| }, |
| { |
| "grad_norm": 0.16520047187805176, |
| "learning_rate": 9.309066306781679e-05, |
| "loss": 0.014, |
| "step": 16870 |
| }, |
| { |
| "grad_norm": 0.18632209300994873, |
| "learning_rate": 9.308017585695306e-05, |
| "loss": 0.0148, |
| "step": 16880 |
| }, |
| { |
| "grad_norm": 0.22278279066085815, |
| "learning_rate": 9.306968128486552e-05, |
| "loss": 0.0133, |
| "step": 16890 |
| }, |
| { |
| "grad_norm": 0.1548001766204834, |
| "learning_rate": 9.30591793533474e-05, |
| "loss": 0.014, |
| "step": 16900 |
| }, |
| { |
| "grad_norm": 0.17120777070522308, |
| "learning_rate": 9.304867006419321e-05, |
| "loss": 0.0125, |
| "step": 16910 |
| }, |
| { |
| "grad_norm": 0.2500787079334259, |
| "learning_rate": 9.303815341919868e-05, |
| "loss": 0.0179, |
| "step": 16920 |
| }, |
| { |
| "grad_norm": 0.19493171572685242, |
| "learning_rate": 9.302762942016084e-05, |
| "loss": 0.0133, |
| "step": 16930 |
| }, |
| { |
| "grad_norm": 0.1815623939037323, |
| "learning_rate": 9.301709806887792e-05, |
| "loss": 0.0132, |
| "step": 16940 |
| }, |
| { |
| "grad_norm": 0.1313970983028412, |
| "learning_rate": 9.300655936714948e-05, |
| "loss": 0.0157, |
| "step": 16950 |
| }, |
| { |
| "grad_norm": 0.14121149480342865, |
| "learning_rate": 9.299601331677627e-05, |
| "loss": 0.0168, |
| "step": 16960 |
| }, |
| { |
| "grad_norm": 0.2180827260017395, |
| "learning_rate": 9.298545991956033e-05, |
| "loss": 0.0169, |
| "step": 16970 |
| }, |
| { |
| "grad_norm": 0.17011718451976776, |
| "learning_rate": 9.297489917730493e-05, |
| "loss": 0.0144, |
| "step": 16980 |
| }, |
| { |
| "grad_norm": 0.1423613578081131, |
| "learning_rate": 9.296433109181464e-05, |
| "loss": 0.0132, |
| "step": 16990 |
| }, |
| { |
| "grad_norm": 0.14495424926280975, |
| "learning_rate": 9.295375566489523e-05, |
| "loss": 0.0153, |
| "step": 17000 |
| }, |
| { |
| "grad_norm": 0.2507295608520508, |
| "learning_rate": 9.294317289835379e-05, |
| "loss": 0.0188, |
| "step": 17010 |
| }, |
| { |
| "grad_norm": 0.1876184493303299, |
| "learning_rate": 9.293258279399859e-05, |
| "loss": 0.0159, |
| "step": 17020 |
| }, |
| { |
| "grad_norm": 0.21413366496562958, |
| "learning_rate": 9.292198535363919e-05, |
| "loss": 0.0139, |
| "step": 17030 |
| }, |
| { |
| "grad_norm": 0.19295363128185272, |
| "learning_rate": 9.291138057908641e-05, |
| "loss": 0.0154, |
| "step": 17040 |
| }, |
| { |
| "grad_norm": 0.2160753458738327, |
| "learning_rate": 9.290076847215234e-05, |
| "loss": 0.0154, |
| "step": 17050 |
| }, |
| { |
| "grad_norm": 0.1723812073469162, |
| "learning_rate": 9.289014903465025e-05, |
| "loss": 0.0146, |
| "step": 17060 |
| }, |
| { |
| "grad_norm": 0.21360719203948975, |
| "learning_rate": 9.287952226839475e-05, |
| "loss": 0.0152, |
| "step": 17070 |
| }, |
| { |
| "grad_norm": 0.22365033626556396, |
| "learning_rate": 9.286888817520164e-05, |
| "loss": 0.0123, |
| "step": 17080 |
| }, |
| { |
| "grad_norm": 0.16959181427955627, |
| "learning_rate": 9.285824675688803e-05, |
| "loss": 0.0163, |
| "step": 17090 |
| }, |
| { |
| "grad_norm": 0.16515499353408813, |
| "learning_rate": 9.28475980152722e-05, |
| "loss": 0.0126, |
| "step": 17100 |
| }, |
| { |
| "grad_norm": 0.21997210383415222, |
| "learning_rate": 9.283694195217379e-05, |
| "loss": 0.0128, |
| "step": 17110 |
| }, |
| { |
| "grad_norm": 0.1831325888633728, |
| "learning_rate": 9.282627856941356e-05, |
| "loss": 0.0142, |
| "step": 17120 |
| }, |
| { |
| "grad_norm": 0.23816367983818054, |
| "learning_rate": 9.281560786881363e-05, |
| "loss": 0.0139, |
| "step": 17130 |
| }, |
| { |
| "grad_norm": 0.23735882341861725, |
| "learning_rate": 9.280492985219733e-05, |
| "loss": 0.0161, |
| "step": 17140 |
| }, |
| { |
| "grad_norm": 0.20103737711906433, |
| "learning_rate": 9.279424452138924e-05, |
| "loss": 0.0149, |
| "step": 17150 |
| }, |
| { |
| "grad_norm": 0.15094976127147675, |
| "learning_rate": 9.278355187821517e-05, |
| "loss": 0.0159, |
| "step": 17160 |
| }, |
| { |
| "grad_norm": 0.20321892201900482, |
| "learning_rate": 9.277285192450224e-05, |
| "loss": 0.0123, |
| "step": 17170 |
| }, |
| { |
| "grad_norm": 0.22879266738891602, |
| "learning_rate": 9.276214466207875e-05, |
| "loss": 0.0138, |
| "step": 17180 |
| }, |
| { |
| "grad_norm": 0.25783663988113403, |
| "learning_rate": 9.275143009277427e-05, |
| "loss": 0.0187, |
| "step": 17190 |
| }, |
| { |
| "grad_norm": 0.21469669044017792, |
| "learning_rate": 9.274070821841964e-05, |
| "loss": 0.0135, |
| "step": 17200 |
| }, |
| { |
| "grad_norm": 0.1551484763622284, |
| "learning_rate": 9.272997904084696e-05, |
| "loss": 0.0129, |
| "step": 17210 |
| }, |
| { |
| "grad_norm": 0.16763684153556824, |
| "learning_rate": 9.271924256188951e-05, |
| "loss": 0.0148, |
| "step": 17220 |
| }, |
| { |
| "grad_norm": 0.21348515152931213, |
| "learning_rate": 9.270849878338189e-05, |
| "loss": 0.0177, |
| "step": 17230 |
| }, |
| { |
| "grad_norm": 0.16984668374061584, |
| "learning_rate": 9.269774770715991e-05, |
| "loss": 0.0157, |
| "step": 17240 |
| }, |
| { |
| "grad_norm": 0.18757328391075134, |
| "learning_rate": 9.268698933506061e-05, |
| "loss": 0.0141, |
| "step": 17250 |
| }, |
| { |
| "grad_norm": 0.18285399675369263, |
| "learning_rate": 9.267622366892235e-05, |
| "loss": 0.0151, |
| "step": 17260 |
| }, |
| { |
| "grad_norm": 0.21176794171333313, |
| "learning_rate": 9.266545071058465e-05, |
| "loss": 0.0146, |
| "step": 17270 |
| }, |
| { |
| "grad_norm": 0.18544535338878632, |
| "learning_rate": 9.265467046188833e-05, |
| "loss": 0.0143, |
| "step": 17280 |
| }, |
| { |
| "grad_norm": 0.16830794513225555, |
| "learning_rate": 9.264388292467543e-05, |
| "loss": 0.0133, |
| "step": 17290 |
| }, |
| { |
| "grad_norm": 0.2336021363735199, |
| "learning_rate": 9.263308810078926e-05, |
| "loss": 0.0151, |
| "step": 17300 |
| }, |
| { |
| "grad_norm": 0.17986004054546356, |
| "learning_rate": 9.262228599207434e-05, |
| "loss": 0.0131, |
| "step": 17310 |
| }, |
| { |
| "grad_norm": 0.18517930805683136, |
| "learning_rate": 9.261147660037647e-05, |
| "loss": 0.0153, |
| "step": 17320 |
| }, |
| { |
| "grad_norm": 0.21676015853881836, |
| "learning_rate": 9.26006599275427e-05, |
| "loss": 0.0173, |
| "step": 17330 |
| }, |
| { |
| "grad_norm": 0.18562880158424377, |
| "learning_rate": 9.258983597542124e-05, |
| "loss": 0.0144, |
| "step": 17340 |
| }, |
| { |
| "grad_norm": 0.1738789826631546, |
| "learning_rate": 9.257900474586167e-05, |
| "loss": 0.0159, |
| "step": 17350 |
| }, |
| { |
| "grad_norm": 0.16082869470119476, |
| "learning_rate": 9.256816624071471e-05, |
| "loss": 0.014, |
| "step": 17360 |
| }, |
| { |
| "grad_norm": 0.16219903528690338, |
| "learning_rate": 9.25573204618324e-05, |
| "loss": 0.0126, |
| "step": 17370 |
| }, |
| { |
| "grad_norm": 0.1800624579191208, |
| "learning_rate": 9.254646741106796e-05, |
| "loss": 0.0147, |
| "step": 17380 |
| }, |
| { |
| "grad_norm": 0.13241958618164062, |
| "learning_rate": 9.253560709027589e-05, |
| "loss": 0.0142, |
| "step": 17390 |
| }, |
| { |
| "grad_norm": 0.19069504737854004, |
| "learning_rate": 9.252473950131192e-05, |
| "loss": 0.0154, |
| "step": 17400 |
| }, |
| { |
| "grad_norm": 0.19009700417518616, |
| "learning_rate": 9.251386464603302e-05, |
| "loss": 0.0152, |
| "step": 17410 |
| }, |
| { |
| "grad_norm": 0.18059396743774414, |
| "learning_rate": 9.250298252629741e-05, |
| "loss": 0.0133, |
| "step": 17420 |
| }, |
| { |
| "grad_norm": 0.2055145800113678, |
| "learning_rate": 9.249209314396454e-05, |
| "loss": 0.0146, |
| "step": 17430 |
| }, |
| { |
| "grad_norm": 0.1910519003868103, |
| "learning_rate": 9.248119650089513e-05, |
| "loss": 0.0146, |
| "step": 17440 |
| }, |
| { |
| "grad_norm": 0.18890254199504852, |
| "learning_rate": 9.247029259895108e-05, |
| "loss": 0.0169, |
| "step": 17450 |
| }, |
| { |
| "grad_norm": 0.1751728057861328, |
| "learning_rate": 9.24593814399956e-05, |
| "loss": 0.0135, |
| "step": 17460 |
| }, |
| { |
| "grad_norm": 0.14816506206989288, |
| "learning_rate": 9.244846302589309e-05, |
| "loss": 0.0158, |
| "step": 17470 |
| }, |
| { |
| "grad_norm": 0.18479114770889282, |
| "learning_rate": 9.243753735850923e-05, |
| "loss": 0.0144, |
| "step": 17480 |
| }, |
| { |
| "grad_norm": 0.11731395870447159, |
| "learning_rate": 9.24266044397109e-05, |
| "loss": 0.0155, |
| "step": 17490 |
| }, |
| { |
| "grad_norm": 0.18621566891670227, |
| "learning_rate": 9.241566427136624e-05, |
| "loss": 0.0148, |
| "step": 17500 |
| }, |
| { |
| "grad_norm": 0.1920589804649353, |
| "learning_rate": 9.240471685534463e-05, |
| "loss": 0.0157, |
| "step": 17510 |
| }, |
| { |
| "grad_norm": 0.13983657956123352, |
| "learning_rate": 9.239376219351667e-05, |
| "loss": 0.0123, |
| "step": 17520 |
| }, |
| { |
| "grad_norm": 0.2200879007577896, |
| "learning_rate": 9.238280028775425e-05, |
| "loss": 0.0144, |
| "step": 17530 |
| }, |
| { |
| "grad_norm": 0.18812744319438934, |
| "learning_rate": 9.237183113993041e-05, |
| "loss": 0.0133, |
| "step": 17540 |
| }, |
| { |
| "grad_norm": 0.16559596359729767, |
| "learning_rate": 9.236085475191952e-05, |
| "loss": 0.013, |
| "step": 17550 |
| }, |
| { |
| "grad_norm": 0.12157084792852402, |
| "learning_rate": 9.234987112559709e-05, |
| "loss": 0.0144, |
| "step": 17560 |
| }, |
| { |
| "grad_norm": 0.18532204627990723, |
| "learning_rate": 9.233888026283999e-05, |
| "loss": 0.0147, |
| "step": 17570 |
| }, |
| { |
| "grad_norm": 0.19556936621665955, |
| "learning_rate": 9.232788216552619e-05, |
| "loss": 0.0148, |
| "step": 17580 |
| }, |
| { |
| "grad_norm": 0.13391506671905518, |
| "learning_rate": 9.231687683553502e-05, |
| "loss": 0.0144, |
| "step": 17590 |
| }, |
| { |
| "grad_norm": 0.16114942729473114, |
| "learning_rate": 9.230586427474698e-05, |
| "loss": 0.0117, |
| "step": 17600 |
| }, |
| { |
| "grad_norm": 0.13999807834625244, |
| "learning_rate": 9.229484448504379e-05, |
| "loss": 0.0138, |
| "step": 17610 |
| }, |
| { |
| "grad_norm": 0.13610270619392395, |
| "learning_rate": 9.228381746830843e-05, |
| "loss": 0.0122, |
| "step": 17620 |
| }, |
| { |
| "grad_norm": 0.17827920615673065, |
| "learning_rate": 9.227278322642514e-05, |
| "loss": 0.0147, |
| "step": 17630 |
| }, |
| { |
| "grad_norm": 0.15096025168895721, |
| "learning_rate": 9.226174176127937e-05, |
| "loss": 0.0127, |
| "step": 17640 |
| }, |
| { |
| "grad_norm": 0.21146535873413086, |
| "learning_rate": 9.22506930747578e-05, |
| "loss": 0.0113, |
| "step": 17650 |
| }, |
| { |
| "grad_norm": 0.22395026683807373, |
| "learning_rate": 9.223963716874831e-05, |
| "loss": 0.011, |
| "step": 17660 |
| }, |
| { |
| "grad_norm": 0.14799124002456665, |
| "learning_rate": 9.222857404514012e-05, |
| "loss": 0.0135, |
| "step": 17670 |
| }, |
| { |
| "grad_norm": 0.17478370666503906, |
| "learning_rate": 9.221750370582355e-05, |
| "loss": 0.0163, |
| "step": 17680 |
| }, |
| { |
| "grad_norm": 0.17225725948810577, |
| "learning_rate": 9.220642615269028e-05, |
| "loss": 0.0139, |
| "step": 17690 |
| }, |
| { |
| "grad_norm": 0.1988813281059265, |
| "learning_rate": 9.219534138763311e-05, |
| "loss": 0.0137, |
| "step": 17700 |
| }, |
| { |
| "grad_norm": 0.16782264411449432, |
| "learning_rate": 9.218424941254613e-05, |
| "loss": 0.0131, |
| "step": 17710 |
| }, |
| { |
| "grad_norm": 0.19038794934749603, |
| "learning_rate": 9.217315022932468e-05, |
| "loss": 0.0156, |
| "step": 17720 |
| }, |
| { |
| "grad_norm": 0.15249748528003693, |
| "learning_rate": 9.216204383986528e-05, |
| "loss": 0.0139, |
| "step": 17730 |
| }, |
| { |
| "grad_norm": 0.14543838798999786, |
| "learning_rate": 9.215093024606574e-05, |
| "loss": 0.0163, |
| "step": 17740 |
| }, |
| { |
| "grad_norm": 0.1764708012342453, |
| "learning_rate": 9.213980944982506e-05, |
| "loss": 0.0124, |
| "step": 17750 |
| }, |
| { |
| "grad_norm": 0.15326310694217682, |
| "learning_rate": 9.212868145304346e-05, |
| "loss": 0.0179, |
| "step": 17760 |
| }, |
| { |
| "grad_norm": 0.1893349587917328, |
| "learning_rate": 9.211754625762241e-05, |
| "loss": 0.0135, |
| "step": 17770 |
| }, |
| { |
| "grad_norm": 0.13428562879562378, |
| "learning_rate": 9.210640386546463e-05, |
| "loss": 0.0138, |
| "step": 17780 |
| }, |
| { |
| "grad_norm": 0.14793631434440613, |
| "learning_rate": 9.209525427847405e-05, |
| "loss": 0.0146, |
| "step": 17790 |
| }, |
| { |
| "grad_norm": 0.16333283483982086, |
| "learning_rate": 9.208409749855583e-05, |
| "loss": 0.0128, |
| "step": 17800 |
| }, |
| { |
| "grad_norm": 0.2120400369167328, |
| "learning_rate": 9.207293352761633e-05, |
| "loss": 0.0181, |
| "step": 17810 |
| }, |
| { |
| "grad_norm": 0.15726876258850098, |
| "learning_rate": 9.206176236756319e-05, |
| "loss": 0.0151, |
| "step": 17820 |
| }, |
| { |
| "grad_norm": 0.16201718151569366, |
| "learning_rate": 9.205058402030525e-05, |
| "loss": 0.0126, |
| "step": 17830 |
| }, |
| { |
| "grad_norm": 0.13875119388103485, |
| "learning_rate": 9.203939848775259e-05, |
| "loss": 0.0134, |
| "step": 17840 |
| }, |
| { |
| "grad_norm": 0.12077285349369049, |
| "learning_rate": 9.202820577181652e-05, |
| "loss": 0.0134, |
| "step": 17850 |
| }, |
| { |
| "grad_norm": 0.25868210196495056, |
| "learning_rate": 9.201700587440953e-05, |
| "loss": 0.014, |
| "step": 17860 |
| }, |
| { |
| "grad_norm": 0.21567592024803162, |
| "learning_rate": 9.200579879744544e-05, |
| "loss": 0.0181, |
| "step": 17870 |
| }, |
| { |
| "grad_norm": 0.24580392241477966, |
| "learning_rate": 9.199458454283918e-05, |
| "loss": 0.0133, |
| "step": 17880 |
| }, |
| { |
| "grad_norm": 0.20059938728809357, |
| "learning_rate": 9.198336311250697e-05, |
| "loss": 0.0157, |
| "step": 17890 |
| }, |
| { |
| "grad_norm": 0.20259112119674683, |
| "learning_rate": 9.197213450836626e-05, |
| "loss": 0.0151, |
| "step": 17900 |
| }, |
| { |
| "grad_norm": 0.2202037274837494, |
| "learning_rate": 9.19608987323357e-05, |
| "loss": 0.0161, |
| "step": 17910 |
| }, |
| { |
| "grad_norm": 0.16498060524463654, |
| "learning_rate": 9.194965578633517e-05, |
| "loss": 0.0161, |
| "step": 17920 |
| }, |
| { |
| "grad_norm": 0.15870080888271332, |
| "learning_rate": 9.193840567228582e-05, |
| "loss": 0.0129, |
| "step": 17930 |
| }, |
| { |
| "grad_norm": 0.17275480926036835, |
| "learning_rate": 9.192714839210994e-05, |
| "loss": 0.0163, |
| "step": 17940 |
| }, |
| { |
| "grad_norm": 0.190365269780159, |
| "learning_rate": 9.19158839477311e-05, |
| "loss": 0.0121, |
| "step": 17950 |
| }, |
| { |
| "grad_norm": 0.2227722406387329, |
| "learning_rate": 9.190461234107411e-05, |
| "loss": 0.0174, |
| "step": 17960 |
| }, |
| { |
| "grad_norm": 0.21828261017799377, |
| "learning_rate": 9.189333357406496e-05, |
| "loss": 0.0152, |
| "step": 17970 |
| }, |
| { |
| "grad_norm": 0.16810466349124908, |
| "learning_rate": 9.188204764863089e-05, |
| "loss": 0.0145, |
| "step": 17980 |
| }, |
| { |
| "grad_norm": 0.180048868060112, |
| "learning_rate": 9.187075456670033e-05, |
| "loss": 0.0152, |
| "step": 17990 |
| }, |
| { |
| "grad_norm": 0.16667920351028442, |
| "learning_rate": 9.1859454330203e-05, |
| "loss": 0.0135, |
| "step": 18000 |
| }, |
| { |
| "grad_norm": 0.2036157101392746, |
| "learning_rate": 9.18481469410698e-05, |
| "loss": 0.0155, |
| "step": 18010 |
| }, |
| { |
| "grad_norm": 0.1677679866552353, |
| "learning_rate": 9.183683240123281e-05, |
| "loss": 0.0117, |
| "step": 18020 |
| }, |
| { |
| "grad_norm": 0.20066669583320618, |
| "learning_rate": 9.182551071262541e-05, |
| "loss": 0.013, |
| "step": 18030 |
| }, |
| { |
| "grad_norm": 0.17513668537139893, |
| "learning_rate": 9.181418187718218e-05, |
| "loss": 0.0131, |
| "step": 18040 |
| }, |
| { |
| "grad_norm": 0.16238491237163544, |
| "learning_rate": 9.180284589683888e-05, |
| "loss": 0.015, |
| "step": 18050 |
| }, |
| { |
| "grad_norm": 0.1820688396692276, |
| "learning_rate": 9.17915027735325e-05, |
| "loss": 0.0157, |
| "step": 18060 |
| }, |
| { |
| "grad_norm": 0.19918294250965118, |
| "learning_rate": 9.178015250920133e-05, |
| "loss": 0.0167, |
| "step": 18070 |
| }, |
| { |
| "grad_norm": 0.17456094920635223, |
| "learning_rate": 9.176879510578477e-05, |
| "loss": 0.0136, |
| "step": 18080 |
| }, |
| { |
| "grad_norm": 0.202936589717865, |
| "learning_rate": 9.17574305652235e-05, |
| "loss": 0.0157, |
| "step": 18090 |
| }, |
| { |
| "grad_norm": 0.16613461077213287, |
| "learning_rate": 9.174605888945942e-05, |
| "loss": 0.0153, |
| "step": 18100 |
| }, |
| { |
| "grad_norm": 0.1491008847951889, |
| "learning_rate": 9.173468008043564e-05, |
| "loss": 0.0132, |
| "step": 18110 |
| }, |
| { |
| "grad_norm": 0.189555361866951, |
| "learning_rate": 9.172329414009648e-05, |
| "loss": 0.0195, |
| "step": 18120 |
| }, |
| { |
| "grad_norm": 0.18217122554779053, |
| "learning_rate": 9.171190107038747e-05, |
| "loss": 0.0146, |
| "step": 18130 |
| }, |
| { |
| "grad_norm": 0.2065577208995819, |
| "learning_rate": 9.170050087325541e-05, |
| "loss": 0.0122, |
| "step": 18140 |
| }, |
| { |
| "grad_norm": 0.17024259269237518, |
| "learning_rate": 9.168909355064824e-05, |
| "loss": 0.0123, |
| "step": 18150 |
| }, |
| { |
| "grad_norm": 0.25083598494529724, |
| "learning_rate": 9.167767910451519e-05, |
| "loss": 0.0135, |
| "step": 18160 |
| }, |
| { |
| "grad_norm": 0.1365453153848648, |
| "learning_rate": 9.166625753680669e-05, |
| "loss": 0.0128, |
| "step": 18170 |
| }, |
| { |
| "grad_norm": 0.1414394974708557, |
| "learning_rate": 9.165482884947431e-05, |
| "loss": 0.0122, |
| "step": 18180 |
| }, |
| { |
| "grad_norm": 0.12690173089504242, |
| "learning_rate": 9.164339304447098e-05, |
| "loss": 0.014, |
| "step": 18190 |
| }, |
| { |
| "grad_norm": 0.17947471141815186, |
| "learning_rate": 9.163195012375072e-05, |
| "loss": 0.015, |
| "step": 18200 |
| }, |
| { |
| "grad_norm": 0.16831102967262268, |
| "learning_rate": 9.16205000892688e-05, |
| "loss": 0.0131, |
| "step": 18210 |
| }, |
| { |
| "grad_norm": 0.15917395055294037, |
| "learning_rate": 9.160904294298175e-05, |
| "loss": 0.0137, |
| "step": 18220 |
| }, |
| { |
| "grad_norm": 0.18532268702983856, |
| "learning_rate": 9.159757868684727e-05, |
| "loss": 0.0155, |
| "step": 18230 |
| }, |
| { |
| "grad_norm": 0.16364696621894836, |
| "learning_rate": 9.15861073228243e-05, |
| "loss": 0.0155, |
| "step": 18240 |
| }, |
| { |
| "grad_norm": 0.19000813364982605, |
| "learning_rate": 9.157462885287296e-05, |
| "loss": 0.0147, |
| "step": 18250 |
| }, |
| { |
| "grad_norm": 0.16765892505645752, |
| "learning_rate": 9.156314327895461e-05, |
| "loss": 0.0149, |
| "step": 18260 |
| }, |
| { |
| "grad_norm": 0.1955818086862564, |
| "learning_rate": 9.155165060303185e-05, |
| "loss": 0.0158, |
| "step": 18270 |
| }, |
| { |
| "grad_norm": 0.19901178777217865, |
| "learning_rate": 9.154015082706841e-05, |
| "loss": 0.0154, |
| "step": 18280 |
| }, |
| { |
| "grad_norm": 0.22178158164024353, |
| "learning_rate": 9.152864395302936e-05, |
| "loss": 0.013, |
| "step": 18290 |
| }, |
| { |
| "grad_norm": 0.13203920423984528, |
| "learning_rate": 9.151712998288085e-05, |
| "loss": 0.0122, |
| "step": 18300 |
| }, |
| { |
| "grad_norm": 0.20603708922863007, |
| "learning_rate": 9.150560891859031e-05, |
| "loss": 0.0184, |
| "step": 18310 |
| }, |
| { |
| "grad_norm": 0.13683779537677765, |
| "learning_rate": 9.14940807621264e-05, |
| "loss": 0.0126, |
| "step": 18320 |
| }, |
| { |
| "grad_norm": 0.2200845330953598, |
| "learning_rate": 9.148254551545894e-05, |
| "loss": 0.0177, |
| "step": 18330 |
| }, |
| { |
| "grad_norm": 0.1949702948331833, |
| "learning_rate": 9.147100318055901e-05, |
| "loss": 0.015, |
| "step": 18340 |
| }, |
| { |
| "grad_norm": 0.1641172617673874, |
| "learning_rate": 9.145945375939888e-05, |
| "loss": 0.0136, |
| "step": 18350 |
| }, |
| { |
| "grad_norm": 0.2542160451412201, |
| "learning_rate": 9.144789725395203e-05, |
| "loss": 0.0134, |
| "step": 18360 |
| }, |
| { |
| "grad_norm": 0.22699974477291107, |
| "learning_rate": 9.14363336661931e-05, |
| "loss": 0.0115, |
| "step": 18370 |
| }, |
| { |
| "grad_norm": 0.182538703083992, |
| "learning_rate": 9.142476299809806e-05, |
| "loss": 0.0147, |
| "step": 18380 |
| }, |
| { |
| "grad_norm": 0.1428644359111786, |
| "learning_rate": 9.1413185251644e-05, |
| "loss": 0.0119, |
| "step": 18390 |
| }, |
| { |
| "grad_norm": 0.1707461029291153, |
| "learning_rate": 9.140160042880923e-05, |
| "loss": 0.0123, |
| "step": 18400 |
| }, |
| { |
| "grad_norm": 0.21117953956127167, |
| "learning_rate": 9.139000853157327e-05, |
| "loss": 0.0131, |
| "step": 18410 |
| }, |
| { |
| "grad_norm": 0.18192517757415771, |
| "learning_rate": 9.137840956191688e-05, |
| "loss": 0.0136, |
| "step": 18420 |
| }, |
| { |
| "grad_norm": 0.17930957674980164, |
| "learning_rate": 9.136680352182199e-05, |
| "loss": 0.0144, |
| "step": 18430 |
| }, |
| { |
| "grad_norm": 0.19649390876293182, |
| "learning_rate": 9.135519041327177e-05, |
| "loss": 0.0154, |
| "step": 18440 |
| }, |
| { |
| "grad_norm": 0.23601211607456207, |
| "learning_rate": 9.134357023825058e-05, |
| "loss": 0.0163, |
| "step": 18450 |
| }, |
| { |
| "grad_norm": 0.19874411821365356, |
| "learning_rate": 9.133194299874398e-05, |
| "loss": 0.0121, |
| "step": 18460 |
| }, |
| { |
| "grad_norm": 0.15509752929210663, |
| "learning_rate": 9.132030869673876e-05, |
| "loss": 0.0124, |
| "step": 18470 |
| }, |
| { |
| "grad_norm": 0.1924155354499817, |
| "learning_rate": 9.130866733422288e-05, |
| "loss": 0.012, |
| "step": 18480 |
| }, |
| { |
| "grad_norm": 0.1527821570634842, |
| "learning_rate": 9.129701891318556e-05, |
| "loss": 0.0166, |
| "step": 18490 |
| }, |
| { |
| "grad_norm": 0.1380731761455536, |
| "learning_rate": 9.128536343561718e-05, |
| "loss": 0.0129, |
| "step": 18500 |
| }, |
| { |
| "grad_norm": 0.18899448215961456, |
| "learning_rate": 9.127370090350934e-05, |
| "loss": 0.0139, |
| "step": 18510 |
| }, |
| { |
| "grad_norm": 0.16718028485774994, |
| "learning_rate": 9.126203131885487e-05, |
| "loss": 0.0145, |
| "step": 18520 |
| }, |
| { |
| "grad_norm": 0.12178225070238113, |
| "learning_rate": 9.125035468364775e-05, |
| "loss": 0.0122, |
| "step": 18530 |
| }, |
| { |
| "grad_norm": 0.2568337321281433, |
| "learning_rate": 9.123867099988322e-05, |
| "loss": 0.0135, |
| "step": 18540 |
| }, |
| { |
| "grad_norm": 0.16624175012111664, |
| "learning_rate": 9.122698026955769e-05, |
| "loss": 0.0155, |
| "step": 18550 |
| }, |
| { |
| "grad_norm": 0.1602160781621933, |
| "learning_rate": 9.12152824946688e-05, |
| "loss": 0.0136, |
| "step": 18560 |
| }, |
| { |
| "grad_norm": 0.1256667822599411, |
| "learning_rate": 9.120357767721538e-05, |
| "loss": 0.0123, |
| "step": 18570 |
| }, |
| { |
| "grad_norm": 0.1104787215590477, |
| "learning_rate": 9.119186581919745e-05, |
| "loss": 0.012, |
| "step": 18580 |
| }, |
| { |
| "grad_norm": 0.2000863254070282, |
| "learning_rate": 9.118014692261624e-05, |
| "loss": 0.0132, |
| "step": 18590 |
| }, |
| { |
| "grad_norm": 0.14768925309181213, |
| "learning_rate": 9.116842098947422e-05, |
| "loss": 0.0131, |
| "step": 18600 |
| }, |
| { |
| "grad_norm": 0.2148488461971283, |
| "learning_rate": 9.115668802177499e-05, |
| "loss": 0.0161, |
| "step": 18610 |
| }, |
| { |
| "grad_norm": 0.18289022147655487, |
| "learning_rate": 9.114494802152342e-05, |
| "loss": 0.0156, |
| "step": 18620 |
| }, |
| { |
| "grad_norm": 0.2317921668291092, |
| "learning_rate": 9.113320099072555e-05, |
| "loss": 0.0159, |
| "step": 18630 |
| }, |
| { |
| "grad_norm": 0.14073435962200165, |
| "learning_rate": 9.112144693138864e-05, |
| "loss": 0.012, |
| "step": 18640 |
| }, |
| { |
| "grad_norm": 0.13443489372730255, |
| "learning_rate": 9.110968584552111e-05, |
| "loss": 0.0121, |
| "step": 18650 |
| }, |
| { |
| "grad_norm": 0.13705019652843475, |
| "learning_rate": 9.109791773513264e-05, |
| "loss": 0.0131, |
| "step": 18660 |
| }, |
| { |
| "grad_norm": 0.16150884330272675, |
| "learning_rate": 9.108614260223403e-05, |
| "loss": 0.0134, |
| "step": 18670 |
| }, |
| { |
| "grad_norm": 0.1957915872335434, |
| "learning_rate": 9.107436044883738e-05, |
| "loss": 0.0153, |
| "step": 18680 |
| }, |
| { |
| "grad_norm": 0.1729505956172943, |
| "learning_rate": 9.10625712769559e-05, |
| "loss": 0.0139, |
| "step": 18690 |
| }, |
| { |
| "grad_norm": 0.12898528575897217, |
| "learning_rate": 9.105077508860406e-05, |
| "loss": 0.0135, |
| "step": 18700 |
| }, |
| { |
| "grad_norm": 0.21195413172245026, |
| "learning_rate": 9.103897188579751e-05, |
| "loss": 0.0122, |
| "step": 18710 |
| }, |
| { |
| "grad_norm": 0.16921821236610413, |
| "learning_rate": 9.102716167055308e-05, |
| "loss": 0.0121, |
| "step": 18720 |
| }, |
| { |
| "grad_norm": 0.1766607016324997, |
| "learning_rate": 9.10153444448888e-05, |
| "loss": 0.015, |
| "step": 18730 |
| }, |
| { |
| "grad_norm": 0.18350866436958313, |
| "learning_rate": 9.100352021082393e-05, |
| "loss": 0.0124, |
| "step": 18740 |
| }, |
| { |
| "grad_norm": 0.1389177292585373, |
| "learning_rate": 9.099168897037891e-05, |
| "loss": 0.0111, |
| "step": 18750 |
| }, |
| { |
| "grad_norm": 0.12991195917129517, |
| "learning_rate": 9.097985072557538e-05, |
| "loss": 0.0099, |
| "step": 18760 |
| }, |
| { |
| "grad_norm": 0.15150626003742218, |
| "learning_rate": 9.096800547843615e-05, |
| "loss": 0.0114, |
| "step": 18770 |
| }, |
| { |
| "grad_norm": 0.12411829829216003, |
| "learning_rate": 9.095615323098526e-05, |
| "loss": 0.0139, |
| "step": 18780 |
| }, |
| { |
| "grad_norm": 0.13244572281837463, |
| "learning_rate": 9.094429398524795e-05, |
| "loss": 0.012, |
| "step": 18790 |
| }, |
| { |
| "grad_norm": 0.19732749462127686, |
| "learning_rate": 9.093242774325061e-05, |
| "loss": 0.0141, |
| "step": 18800 |
| }, |
| { |
| "grad_norm": 0.1617794632911682, |
| "learning_rate": 9.092055450702088e-05, |
| "loss": 0.0118, |
| "step": 18810 |
| }, |
| { |
| "grad_norm": 0.1828819066286087, |
| "learning_rate": 9.090867427858756e-05, |
| "loss": 0.0165, |
| "step": 18820 |
| }, |
| { |
| "grad_norm": 0.15517683327198029, |
| "learning_rate": 9.089678705998066e-05, |
| "loss": 0.0138, |
| "step": 18830 |
| }, |
| { |
| "grad_norm": 0.21763205528259277, |
| "learning_rate": 9.088489285323139e-05, |
| "loss": 0.0136, |
| "step": 18840 |
| }, |
| { |
| "grad_norm": 0.1687460094690323, |
| "learning_rate": 9.087299166037212e-05, |
| "loss": 0.013, |
| "step": 18850 |
| }, |
| { |
| "grad_norm": 0.19683127105236053, |
| "learning_rate": 9.086108348343647e-05, |
| "loss": 0.0119, |
| "step": 18860 |
| }, |
| { |
| "grad_norm": 0.20528091490268707, |
| "learning_rate": 9.08491683244592e-05, |
| "loss": 0.0116, |
| "step": 18870 |
| }, |
| { |
| "grad_norm": 0.15028712153434753, |
| "learning_rate": 9.08372461854763e-05, |
| "loss": 0.0135, |
| "step": 18880 |
| }, |
| { |
| "grad_norm": 0.18136392533779144, |
| "learning_rate": 9.082531706852492e-05, |
| "loss": 0.0164, |
| "step": 18890 |
| }, |
| { |
| "grad_norm": 0.14097478985786438, |
| "learning_rate": 9.081338097564342e-05, |
| "loss": 0.0121, |
| "step": 18900 |
| }, |
| { |
| "grad_norm": 0.18584544956684113, |
| "learning_rate": 9.080143790887137e-05, |
| "loss": 0.0136, |
| "step": 18910 |
| }, |
| { |
| "grad_norm": 0.17267796397209167, |
| "learning_rate": 9.07894878702495e-05, |
| "loss": 0.0137, |
| "step": 18920 |
| }, |
| { |
| "grad_norm": 0.15958671271800995, |
| "learning_rate": 9.077753086181974e-05, |
| "loss": 0.014, |
| "step": 18930 |
| }, |
| { |
| "grad_norm": 0.1633959263563156, |
| "learning_rate": 9.076556688562524e-05, |
| "loss": 0.0128, |
| "step": 18940 |
| }, |
| { |
| "grad_norm": 0.20178668200969696, |
| "learning_rate": 9.075359594371029e-05, |
| "loss": 0.014, |
| "step": 18950 |
| }, |
| { |
| "grad_norm": 0.17557993531227112, |
| "learning_rate": 9.07416180381204e-05, |
| "loss": 0.0143, |
| "step": 18960 |
| }, |
| { |
| "grad_norm": 0.15889889001846313, |
| "learning_rate": 9.072963317090228e-05, |
| "loss": 0.0134, |
| "step": 18970 |
| }, |
| { |
| "grad_norm": 0.17338280379772186, |
| "learning_rate": 9.071764134410382e-05, |
| "loss": 0.0134, |
| "step": 18980 |
| }, |
| { |
| "grad_norm": 0.169789120554924, |
| "learning_rate": 9.070564255977407e-05, |
| "loss": 0.015, |
| "step": 18990 |
| }, |
| { |
| "grad_norm": 0.19041848182678223, |
| "learning_rate": 9.06936368199633e-05, |
| "loss": 0.0156, |
| "step": 19000 |
| }, |
| { |
| "grad_norm": 0.19369029998779297, |
| "learning_rate": 9.0681624126723e-05, |
| "loss": 0.0143, |
| "step": 19010 |
| }, |
| { |
| "grad_norm": 0.16650448739528656, |
| "learning_rate": 9.066960448210576e-05, |
| "loss": 0.0127, |
| "step": 19020 |
| }, |
| { |
| "grad_norm": 0.20635902881622314, |
| "learning_rate": 9.065757788816543e-05, |
| "loss": 0.0147, |
| "step": 19030 |
| }, |
| { |
| "grad_norm": 0.1792377382516861, |
| "learning_rate": 9.064554434695705e-05, |
| "loss": 0.0115, |
| "step": 19040 |
| }, |
| { |
| "grad_norm": 0.17120027542114258, |
| "learning_rate": 9.063350386053677e-05, |
| "loss": 0.0128, |
| "step": 19050 |
| }, |
| { |
| "grad_norm": 0.16933444142341614, |
| "learning_rate": 9.062145643096202e-05, |
| "loss": 0.0132, |
| "step": 19060 |
| }, |
| { |
| "grad_norm": 0.19243763387203217, |
| "learning_rate": 9.060940206029136e-05, |
| "loss": 0.0143, |
| "step": 19070 |
| }, |
| { |
| "grad_norm": 0.19466683268547058, |
| "learning_rate": 9.059734075058457e-05, |
| "loss": 0.0173, |
| "step": 19080 |
| }, |
| { |
| "grad_norm": 0.1642509400844574, |
| "learning_rate": 9.058527250390257e-05, |
| "loss": 0.0132, |
| "step": 19090 |
| }, |
| { |
| "grad_norm": 0.20794539153575897, |
| "learning_rate": 9.057319732230752e-05, |
| "loss": 0.0145, |
| "step": 19100 |
| }, |
| { |
| "grad_norm": 0.1727791279554367, |
| "learning_rate": 9.056111520786273e-05, |
| "loss": 0.0139, |
| "step": 19110 |
| }, |
| { |
| "grad_norm": 0.20383897423744202, |
| "learning_rate": 9.054902616263268e-05, |
| "loss": 0.0149, |
| "step": 19120 |
| }, |
| { |
| "grad_norm": 0.14843112230300903, |
| "learning_rate": 9.05369301886831e-05, |
| "loss": 0.0132, |
| "step": 19130 |
| }, |
| { |
| "grad_norm": 0.14206208288669586, |
| "learning_rate": 9.052482728808083e-05, |
| "loss": 0.0124, |
| "step": 19140 |
| }, |
| { |
| "grad_norm": 0.17277280986309052, |
| "learning_rate": 9.051271746289391e-05, |
| "loss": 0.0129, |
| "step": 19150 |
| }, |
| { |
| "grad_norm": 0.17671369016170502, |
| "learning_rate": 9.050060071519162e-05, |
| "loss": 0.0115, |
| "step": 19160 |
| }, |
| { |
| "grad_norm": 0.1999529004096985, |
| "learning_rate": 9.048847704704437e-05, |
| "loss": 0.0128, |
| "step": 19170 |
| }, |
| { |
| "grad_norm": 0.22170741856098175, |
| "learning_rate": 9.047634646052376e-05, |
| "loss": 0.0153, |
| "step": 19180 |
| }, |
| { |
| "grad_norm": 0.17207320034503937, |
| "learning_rate": 9.046420895770256e-05, |
| "loss": 0.0139, |
| "step": 19190 |
| }, |
| { |
| "grad_norm": 0.17688094079494476, |
| "learning_rate": 9.045206454065473e-05, |
| "loss": 0.0121, |
| "step": 19200 |
| }, |
| { |
| "grad_norm": 0.1338597536087036, |
| "learning_rate": 9.043991321145546e-05, |
| "loss": 0.0118, |
| "step": 19210 |
| }, |
| { |
| "grad_norm": 0.14638498425483704, |
| "learning_rate": 9.042775497218105e-05, |
| "loss": 0.0113, |
| "step": 19220 |
| }, |
| { |
| "grad_norm": 0.1648397445678711, |
| "learning_rate": 9.041558982490901e-05, |
| "loss": 0.0132, |
| "step": 19230 |
| }, |
| { |
| "grad_norm": 0.11956263333559036, |
| "learning_rate": 9.040341777171805e-05, |
| "loss": 0.0123, |
| "step": 19240 |
| }, |
| { |
| "grad_norm": 0.19842317700386047, |
| "learning_rate": 9.039123881468802e-05, |
| "loss": 0.0134, |
| "step": 19250 |
| }, |
| { |
| "grad_norm": 0.14097820222377777, |
| "learning_rate": 9.037905295589998e-05, |
| "loss": 0.0118, |
| "step": 19260 |
| }, |
| { |
| "grad_norm": 0.1730496734380722, |
| "learning_rate": 9.036686019743617e-05, |
| "loss": 0.0112, |
| "step": 19270 |
| }, |
| { |
| "grad_norm": 0.23227731883525848, |
| "learning_rate": 9.035466054137997e-05, |
| "loss": 0.0139, |
| "step": 19280 |
| }, |
| { |
| "grad_norm": 0.19463399052619934, |
| "learning_rate": 9.0342453989816e-05, |
| "loss": 0.0144, |
| "step": 19290 |
| }, |
| { |
| "grad_norm": 0.16545595228672028, |
| "learning_rate": 9.033024054483e-05, |
| "loss": 0.0115, |
| "step": 19300 |
| }, |
| { |
| "grad_norm": 0.12306548655033112, |
| "learning_rate": 9.031802020850894e-05, |
| "loss": 0.0114, |
| "step": 19310 |
| }, |
| { |
| "grad_norm": 0.18252205848693848, |
| "learning_rate": 9.030579298294092e-05, |
| "loss": 0.0124, |
| "step": 19320 |
| }, |
| { |
| "grad_norm": 0.2004423439502716, |
| "learning_rate": 9.029355887021524e-05, |
| "loss": 0.0156, |
| "step": 19330 |
| }, |
| { |
| "grad_norm": 0.151464581489563, |
| "learning_rate": 9.028131787242238e-05, |
| "loss": 0.0135, |
| "step": 19340 |
| }, |
| { |
| "grad_norm": 0.13323767483234406, |
| "learning_rate": 9.026906999165399e-05, |
| "loss": 0.0124, |
| "step": 19350 |
| }, |
| { |
| "grad_norm": 0.16443386673927307, |
| "learning_rate": 9.025681523000291e-05, |
| "loss": 0.013, |
| "step": 19360 |
| }, |
| { |
| "grad_norm": 0.19285282492637634, |
| "learning_rate": 9.024455358956315e-05, |
| "loss": 0.0123, |
| "step": 19370 |
| }, |
| { |
| "grad_norm": 0.21474361419677734, |
| "learning_rate": 9.023228507242984e-05, |
| "loss": 0.0136, |
| "step": 19380 |
| }, |
| { |
| "grad_norm": 0.20101943612098694, |
| "learning_rate": 9.022000968069937e-05, |
| "loss": 0.0137, |
| "step": 19390 |
| }, |
| { |
| "grad_norm": 0.17402178049087524, |
| "learning_rate": 9.020772741646928e-05, |
| "loss": 0.0136, |
| "step": 19400 |
| }, |
| { |
| "grad_norm": 0.22035923600196838, |
| "learning_rate": 9.019543828183826e-05, |
| "loss": 0.017, |
| "step": 19410 |
| }, |
| { |
| "grad_norm": 0.12223270535469055, |
| "learning_rate": 9.018314227890616e-05, |
| "loss": 0.0126, |
| "step": 19420 |
| }, |
| { |
| "grad_norm": 0.1647353619337082, |
| "learning_rate": 9.017083940977408e-05, |
| "loss": 0.0132, |
| "step": 19430 |
| }, |
| { |
| "grad_norm": 0.17604686319828033, |
| "learning_rate": 9.015852967654422e-05, |
| "loss": 0.0137, |
| "step": 19440 |
| }, |
| { |
| "grad_norm": 0.2017330378293991, |
| "learning_rate": 9.014621308131996e-05, |
| "loss": 0.0141, |
| "step": 19450 |
| }, |
| { |
| "grad_norm": 0.2677697241306305, |
| "learning_rate": 9.01338896262059e-05, |
| "loss": 0.012, |
| "step": 19460 |
| }, |
| { |
| "grad_norm": 0.18286541104316711, |
| "learning_rate": 9.012155931330777e-05, |
| "loss": 0.0153, |
| "step": 19470 |
| }, |
| { |
| "grad_norm": 0.18797534704208374, |
| "learning_rate": 9.010922214473246e-05, |
| "loss": 0.0147, |
| "step": 19480 |
| }, |
| { |
| "grad_norm": 0.20319977402687073, |
| "learning_rate": 9.009687812258808e-05, |
| "loss": 0.0132, |
| "step": 19490 |
| }, |
| { |
| "grad_norm": 0.18280743062496185, |
| "learning_rate": 9.00845272489839e-05, |
| "loss": 0.0107, |
| "step": 19500 |
| }, |
| { |
| "grad_norm": 0.1853189617395401, |
| "learning_rate": 9.007216952603031e-05, |
| "loss": 0.0155, |
| "step": 19510 |
| }, |
| { |
| "grad_norm": 0.1349164843559265, |
| "learning_rate": 9.005980495583894e-05, |
| "loss": 0.0118, |
| "step": 19520 |
| }, |
| { |
| "grad_norm": 0.22038881480693817, |
| "learning_rate": 9.004743354052252e-05, |
| "loss": 0.0137, |
| "step": 19530 |
| }, |
| { |
| "grad_norm": 0.16567367315292358, |
| "learning_rate": 9.003505528219503e-05, |
| "loss": 0.0118, |
| "step": 19540 |
| }, |
| { |
| "grad_norm": 0.2835250794887543, |
| "learning_rate": 9.002267018297154e-05, |
| "loss": 0.0148, |
| "step": 19550 |
| }, |
| { |
| "grad_norm": 0.15941092371940613, |
| "learning_rate": 9.001027824496834e-05, |
| "loss": 0.013, |
| "step": 19560 |
| }, |
| { |
| "grad_norm": 0.1713016778230667, |
| "learning_rate": 8.999787947030287e-05, |
| "loss": 0.0141, |
| "step": 19570 |
| }, |
| { |
| "grad_norm": 0.18185533583164215, |
| "learning_rate": 8.998547386109376e-05, |
| "loss": 0.0112, |
| "step": 19580 |
| }, |
| { |
| "grad_norm": 0.18436452746391296, |
| "learning_rate": 8.997306141946073e-05, |
| "loss": 0.0141, |
| "step": 19590 |
| }, |
| { |
| "grad_norm": 0.17156757414340973, |
| "learning_rate": 8.996064214752481e-05, |
| "loss": 0.0153, |
| "step": 19600 |
| }, |
| { |
| "grad_norm": 0.1925446093082428, |
| "learning_rate": 8.994821604740806e-05, |
| "loss": 0.0125, |
| "step": 19610 |
| }, |
| { |
| "grad_norm": 0.1612347662448883, |
| "learning_rate": 8.993578312123377e-05, |
| "loss": 0.0147, |
| "step": 19620 |
| }, |
| { |
| "grad_norm": 0.20712831616401672, |
| "learning_rate": 8.992334337112639e-05, |
| "loss": 0.0127, |
| "step": 19630 |
| }, |
| { |
| "grad_norm": 0.18162360787391663, |
| "learning_rate": 8.991089679921154e-05, |
| "loss": 0.0151, |
| "step": 19640 |
| }, |
| { |
| "grad_norm": 0.15992426872253418, |
| "learning_rate": 8.989844340761599e-05, |
| "loss": 0.0125, |
| "step": 19650 |
| }, |
| { |
| "grad_norm": 0.15727925300598145, |
| "learning_rate": 8.988598319846768e-05, |
| "loss": 0.0129, |
| "step": 19660 |
| }, |
| { |
| "grad_norm": 0.17979225516319275, |
| "learning_rate": 8.987351617389574e-05, |
| "loss": 0.0133, |
| "step": 19670 |
| }, |
| { |
| "grad_norm": 0.15485969185829163, |
| "learning_rate": 8.98610423360304e-05, |
| "loss": 0.0144, |
| "step": 19680 |
| }, |
| { |
| "grad_norm": 0.1544332504272461, |
| "learning_rate": 8.984856168700317e-05, |
| "loss": 0.0129, |
| "step": 19690 |
| }, |
| { |
| "grad_norm": 0.1792757511138916, |
| "learning_rate": 8.983607422894658e-05, |
| "loss": 0.0114, |
| "step": 19700 |
| }, |
| { |
| "grad_norm": 0.15564578771591187, |
| "learning_rate": 8.982357996399442e-05, |
| "loss": 0.0145, |
| "step": 19710 |
| }, |
| { |
| "grad_norm": 0.14563113451004028, |
| "learning_rate": 8.981107889428164e-05, |
| "loss": 0.0118, |
| "step": 19720 |
| }, |
| { |
| "grad_norm": 0.20107707381248474, |
| "learning_rate": 8.979857102194428e-05, |
| "loss": 0.0152, |
| "step": 19730 |
| }, |
| { |
| "grad_norm": 0.16668701171875, |
| "learning_rate": 8.978605634911968e-05, |
| "loss": 0.0132, |
| "step": 19740 |
| }, |
| { |
| "grad_norm": 0.17781060934066772, |
| "learning_rate": 8.977353487794616e-05, |
| "loss": 0.0129, |
| "step": 19750 |
| }, |
| { |
| "grad_norm": 0.13641557097434998, |
| "learning_rate": 8.976100661056334e-05, |
| "loss": 0.0119, |
| "step": 19760 |
| }, |
| { |
| "grad_norm": 0.21125675737857819, |
| "learning_rate": 8.974847154911197e-05, |
| "loss": 0.0148, |
| "step": 19770 |
| }, |
| { |
| "grad_norm": 0.19573289155960083, |
| "learning_rate": 8.973592969573393e-05, |
| "loss": 0.012, |
| "step": 19780 |
| }, |
| { |
| "grad_norm": 0.15342311561107635, |
| "learning_rate": 8.972338105257228e-05, |
| "loss": 0.0137, |
| "step": 19790 |
| }, |
| { |
| "grad_norm": 0.13895206153392792, |
| "learning_rate": 8.971082562177125e-05, |
| "loss": 0.0112, |
| "step": 19800 |
| }, |
| { |
| "grad_norm": 0.13383953273296356, |
| "learning_rate": 8.96982634054762e-05, |
| "loss": 0.0141, |
| "step": 19810 |
| }, |
| { |
| "grad_norm": 0.17109906673431396, |
| "learning_rate": 8.96856944058337e-05, |
| "loss": 0.0135, |
| "step": 19820 |
| }, |
| { |
| "grad_norm": 0.18697112798690796, |
| "learning_rate": 8.967311862499144e-05, |
| "loss": 0.0146, |
| "step": 19830 |
| }, |
| { |
| "grad_norm": 0.1723792403936386, |
| "learning_rate": 8.966053606509825e-05, |
| "loss": 0.0135, |
| "step": 19840 |
| }, |
| { |
| "grad_norm": 0.15005964040756226, |
| "learning_rate": 8.964794672830417e-05, |
| "loss": 0.0119, |
| "step": 19850 |
| }, |
| { |
| "grad_norm": 0.15086112916469574, |
| "learning_rate": 8.963535061676038e-05, |
| "loss": 0.0151, |
| "step": 19860 |
| }, |
| { |
| "grad_norm": 0.11962204426527023, |
| "learning_rate": 8.962274773261918e-05, |
| "loss": 0.0137, |
| "step": 19870 |
| }, |
| { |
| "grad_norm": 0.19353535771369934, |
| "learning_rate": 8.961013807803409e-05, |
| "loss": 0.0148, |
| "step": 19880 |
| }, |
| { |
| "grad_norm": 0.20438653230667114, |
| "learning_rate": 8.959752165515973e-05, |
| "loss": 0.013, |
| "step": 19890 |
| }, |
| { |
| "grad_norm": 0.18479126691818237, |
| "learning_rate": 8.958489846615193e-05, |
| "loss": 0.0143, |
| "step": 19900 |
| }, |
| { |
| "grad_norm": 0.152145653963089, |
| "learning_rate": 8.957226851316762e-05, |
| "loss": 0.0167, |
| "step": 19910 |
| }, |
| { |
| "grad_norm": 0.18823210895061493, |
| "learning_rate": 8.955963179836493e-05, |
| "loss": 0.0135, |
| "step": 19920 |
| }, |
| { |
| "grad_norm": 0.1508171707391739, |
| "learning_rate": 8.954698832390312e-05, |
| "loss": 0.0125, |
| "step": 19930 |
| }, |
| { |
| "grad_norm": 0.12273351103067398, |
| "learning_rate": 8.953433809194263e-05, |
| "loss": 0.0126, |
| "step": 19940 |
| }, |
| { |
| "grad_norm": 0.15448608994483948, |
| "learning_rate": 8.9521681104645e-05, |
| "loss": 0.0112, |
| "step": 19950 |
| }, |
| { |
| "grad_norm": 0.16210980713367462, |
| "learning_rate": 8.9509017364173e-05, |
| "loss": 0.0142, |
| "step": 19960 |
| }, |
| { |
| "grad_norm": 0.16296792030334473, |
| "learning_rate": 8.949634687269052e-05, |
| "loss": 0.0133, |
| "step": 19970 |
| }, |
| { |
| "grad_norm": 0.22923751175403595, |
| "learning_rate": 8.948366963236259e-05, |
| "loss": 0.0109, |
| "step": 19980 |
| }, |
| { |
| "grad_norm": 0.180278941988945, |
| "learning_rate": 8.947098564535538e-05, |
| "loss": 0.0108, |
| "step": 19990 |
| }, |
| { |
| "grad_norm": 0.1744961142539978, |
| "learning_rate": 8.945829491383627e-05, |
| "loss": 0.0131, |
| "step": 20000 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 80000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 9223372036854775807, |
| "save_steps": 20000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 48, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|