| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 101.01010101010101, |
| "eval_steps": 500, |
| "global_step": 20000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.050505050505050504, |
| "grad_norm": 4.838683605194092, |
| "learning_rate": 9e-07, |
| "loss": 0.7954, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.10101010101010101, |
| "grad_norm": 4.33256721496582, |
| "learning_rate": 1.9e-06, |
| "loss": 0.8154, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.15151515151515152, |
| "grad_norm": 4.210081577301025, |
| "learning_rate": 2.9e-06, |
| "loss": 0.6939, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.20202020202020202, |
| "grad_norm": 2.2548859119415283, |
| "learning_rate": 3.9e-06, |
| "loss": 0.5252, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.25252525252525254, |
| "grad_norm": 1.7198079824447632, |
| "learning_rate": 4.9000000000000005e-06, |
| "loss": 0.4129, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.30303030303030304, |
| "grad_norm": 0.937373161315918, |
| "learning_rate": 5.9e-06, |
| "loss": 0.35, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.35353535353535354, |
| "grad_norm": 0.7054076194763184, |
| "learning_rate": 6.900000000000001e-06, |
| "loss": 0.2942, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.40404040404040403, |
| "grad_norm": 0.2772926688194275, |
| "learning_rate": 7.9e-06, |
| "loss": 0.2407, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.45454545454545453, |
| "grad_norm": 0.4112100899219513, |
| "learning_rate": 8.9e-06, |
| "loss": 0.2179, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.5050505050505051, |
| "grad_norm": 0.2514132559299469, |
| "learning_rate": 9.900000000000002e-06, |
| "loss": 0.2055, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.5555555555555556, |
| "grad_norm": 0.21379052102565765, |
| "learning_rate": 1.09e-05, |
| "loss": 0.1989, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.6060606060606061, |
| "grad_norm": 0.2271135300397873, |
| "learning_rate": 1.19e-05, |
| "loss": 0.1912, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.6565656565656566, |
| "grad_norm": 0.16979487240314484, |
| "learning_rate": 1.29e-05, |
| "loss": 0.1837, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.7070707070707071, |
| "grad_norm": 0.17147108912467957, |
| "learning_rate": 1.3900000000000002e-05, |
| "loss": 0.1726, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.7575757575757576, |
| "grad_norm": 0.18867719173431396, |
| "learning_rate": 1.49e-05, |
| "loss": 0.1746, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.8080808080808081, |
| "grad_norm": 0.22579677402973175, |
| "learning_rate": 1.59e-05, |
| "loss": 0.1681, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.8585858585858586, |
| "grad_norm": 0.21772785484790802, |
| "learning_rate": 1.69e-05, |
| "loss": 0.1578, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.9090909090909091, |
| "grad_norm": 0.19519145786762238, |
| "learning_rate": 1.79e-05, |
| "loss": 0.1483, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.9595959595959596, |
| "grad_norm": 0.2763207256793976, |
| "learning_rate": 1.8900000000000002e-05, |
| "loss": 0.1378, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.0101010101010102, |
| "grad_norm": 0.2061455100774765, |
| "learning_rate": 1.9900000000000003e-05, |
| "loss": 0.1354, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.0606060606060606, |
| "grad_norm": 0.2533341348171234, |
| "learning_rate": 2.09e-05, |
| "loss": 0.1309, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.1111111111111112, |
| "grad_norm": 0.21148192882537842, |
| "learning_rate": 2.19e-05, |
| "loss": 0.1304, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.1616161616161615, |
| "grad_norm": 0.26073169708251953, |
| "learning_rate": 2.29e-05, |
| "loss": 0.1231, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.2121212121212122, |
| "grad_norm": 0.22160248458385468, |
| "learning_rate": 2.39e-05, |
| "loss": 0.122, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.2626262626262625, |
| "grad_norm": 0.24378563463687897, |
| "learning_rate": 2.4900000000000002e-05, |
| "loss": 0.1232, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.3131313131313131, |
| "grad_norm": 0.21751199662685394, |
| "learning_rate": 2.5900000000000003e-05, |
| "loss": 0.1192, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.3636363636363638, |
| "grad_norm": 0.20169033110141754, |
| "learning_rate": 2.6900000000000003e-05, |
| "loss": 0.1183, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.4141414141414141, |
| "grad_norm": 0.34014809131622314, |
| "learning_rate": 2.7900000000000004e-05, |
| "loss": 0.1176, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.4646464646464645, |
| "grad_norm": 0.31275469064712524, |
| "learning_rate": 2.8899999999999998e-05, |
| "loss": 0.1163, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.5151515151515151, |
| "grad_norm": 0.2603381872177124, |
| "learning_rate": 2.9900000000000002e-05, |
| "loss": 0.1152, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.5656565656565657, |
| "grad_norm": 0.23752418160438538, |
| "learning_rate": 3.09e-05, |
| "loss": 0.113, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.6161616161616161, |
| "grad_norm": 0.2137400507926941, |
| "learning_rate": 3.19e-05, |
| "loss": 0.1165, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.6666666666666665, |
| "grad_norm": 0.22636736929416656, |
| "learning_rate": 3.29e-05, |
| "loss": 0.1145, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.7171717171717171, |
| "grad_norm": 0.2236429899930954, |
| "learning_rate": 3.3900000000000004e-05, |
| "loss": 0.1122, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.7676767676767677, |
| "grad_norm": 0.24513526260852814, |
| "learning_rate": 3.49e-05, |
| "loss": 0.1114, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.8181818181818183, |
| "grad_norm": 0.2539370357990265, |
| "learning_rate": 3.59e-05, |
| "loss": 0.116, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.8686868686868687, |
| "grad_norm": 0.25643476843833923, |
| "learning_rate": 3.69e-05, |
| "loss": 0.1135, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.9191919191919191, |
| "grad_norm": 0.21701207756996155, |
| "learning_rate": 3.79e-05, |
| "loss": 0.1148, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.9696969696969697, |
| "grad_norm": 0.2011701613664627, |
| "learning_rate": 3.8900000000000004e-05, |
| "loss": 0.1101, |
| "step": 390 |
| }, |
| { |
| "epoch": 2.0202020202020203, |
| "grad_norm": 0.2636883854866028, |
| "learning_rate": 3.99e-05, |
| "loss": 0.1096, |
| "step": 400 |
| }, |
| { |
| "epoch": 2.0707070707070705, |
| "grad_norm": 0.19827833771705627, |
| "learning_rate": 4.09e-05, |
| "loss": 0.1087, |
| "step": 410 |
| }, |
| { |
| "epoch": 2.121212121212121, |
| "grad_norm": 0.2320198118686676, |
| "learning_rate": 4.19e-05, |
| "loss": 0.1101, |
| "step": 420 |
| }, |
| { |
| "epoch": 2.1717171717171717, |
| "grad_norm": 0.2725471556186676, |
| "learning_rate": 4.29e-05, |
| "loss": 0.1135, |
| "step": 430 |
| }, |
| { |
| "epoch": 2.2222222222222223, |
| "grad_norm": 0.18407052755355835, |
| "learning_rate": 4.39e-05, |
| "loss": 0.1085, |
| "step": 440 |
| }, |
| { |
| "epoch": 2.2727272727272725, |
| "grad_norm": 0.2238778918981552, |
| "learning_rate": 4.49e-05, |
| "loss": 0.1126, |
| "step": 450 |
| }, |
| { |
| "epoch": 2.323232323232323, |
| "grad_norm": 0.3567017912864685, |
| "learning_rate": 4.5900000000000004e-05, |
| "loss": 0.1134, |
| "step": 460 |
| }, |
| { |
| "epoch": 2.3737373737373737, |
| "grad_norm": 0.21803605556488037, |
| "learning_rate": 4.69e-05, |
| "loss": 0.1102, |
| "step": 470 |
| }, |
| { |
| "epoch": 2.4242424242424243, |
| "grad_norm": 0.23381847143173218, |
| "learning_rate": 4.79e-05, |
| "loss": 0.1107, |
| "step": 480 |
| }, |
| { |
| "epoch": 2.474747474747475, |
| "grad_norm": 0.23200933635234833, |
| "learning_rate": 4.89e-05, |
| "loss": 0.1078, |
| "step": 490 |
| }, |
| { |
| "epoch": 2.525252525252525, |
| "grad_norm": 0.2065354883670807, |
| "learning_rate": 4.99e-05, |
| "loss": 0.1072, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.5757575757575757, |
| "grad_norm": 0.18421830236911774, |
| "learning_rate": 5.0900000000000004e-05, |
| "loss": 0.1084, |
| "step": 510 |
| }, |
| { |
| "epoch": 2.6262626262626263, |
| "grad_norm": 0.18189777433872223, |
| "learning_rate": 5.19e-05, |
| "loss": 0.1059, |
| "step": 520 |
| }, |
| { |
| "epoch": 2.676767676767677, |
| "grad_norm": 0.12934866547584534, |
| "learning_rate": 5.2900000000000005e-05, |
| "loss": 0.1101, |
| "step": 530 |
| }, |
| { |
| "epoch": 2.7272727272727275, |
| "grad_norm": 0.2099960893392563, |
| "learning_rate": 5.390000000000001e-05, |
| "loss": 0.1098, |
| "step": 540 |
| }, |
| { |
| "epoch": 2.7777777777777777, |
| "grad_norm": 0.20838184654712677, |
| "learning_rate": 5.4900000000000006e-05, |
| "loss": 0.1059, |
| "step": 550 |
| }, |
| { |
| "epoch": 2.8282828282828283, |
| "grad_norm": 0.2569470703601837, |
| "learning_rate": 5.590000000000001e-05, |
| "loss": 0.1053, |
| "step": 560 |
| }, |
| { |
| "epoch": 2.878787878787879, |
| "grad_norm": 0.2702576220035553, |
| "learning_rate": 5.69e-05, |
| "loss": 0.1105, |
| "step": 570 |
| }, |
| { |
| "epoch": 2.929292929292929, |
| "grad_norm": 0.16491034626960754, |
| "learning_rate": 5.79e-05, |
| "loss": 0.1071, |
| "step": 580 |
| }, |
| { |
| "epoch": 2.9797979797979797, |
| "grad_norm": 0.23221731185913086, |
| "learning_rate": 5.89e-05, |
| "loss": 0.1075, |
| "step": 590 |
| }, |
| { |
| "epoch": 3.0303030303030303, |
| "grad_norm": 0.25280851125717163, |
| "learning_rate": 5.99e-05, |
| "loss": 0.1089, |
| "step": 600 |
| }, |
| { |
| "epoch": 3.080808080808081, |
| "grad_norm": 0.23243561387062073, |
| "learning_rate": 6.09e-05, |
| "loss": 0.1127, |
| "step": 610 |
| }, |
| { |
| "epoch": 3.1313131313131315, |
| "grad_norm": 0.2028040885925293, |
| "learning_rate": 6.19e-05, |
| "loss": 0.1081, |
| "step": 620 |
| }, |
| { |
| "epoch": 3.1818181818181817, |
| "grad_norm": 0.1944403201341629, |
| "learning_rate": 6.29e-05, |
| "loss": 0.1064, |
| "step": 630 |
| }, |
| { |
| "epoch": 3.2323232323232323, |
| "grad_norm": 0.2539620101451874, |
| "learning_rate": 6.390000000000001e-05, |
| "loss": 0.1066, |
| "step": 640 |
| }, |
| { |
| "epoch": 3.282828282828283, |
| "grad_norm": 0.21503722667694092, |
| "learning_rate": 6.49e-05, |
| "loss": 0.1061, |
| "step": 650 |
| }, |
| { |
| "epoch": 3.3333333333333335, |
| "grad_norm": 0.21252280473709106, |
| "learning_rate": 6.59e-05, |
| "loss": 0.1076, |
| "step": 660 |
| }, |
| { |
| "epoch": 3.3838383838383836, |
| "grad_norm": 0.20107881724834442, |
| "learning_rate": 6.690000000000001e-05, |
| "loss": 0.1074, |
| "step": 670 |
| }, |
| { |
| "epoch": 3.4343434343434343, |
| "grad_norm": 0.1607273370027542, |
| "learning_rate": 6.790000000000001e-05, |
| "loss": 0.1061, |
| "step": 680 |
| }, |
| { |
| "epoch": 3.484848484848485, |
| "grad_norm": 0.23353174328804016, |
| "learning_rate": 6.89e-05, |
| "loss": 0.1089, |
| "step": 690 |
| }, |
| { |
| "epoch": 3.5353535353535355, |
| "grad_norm": 0.19595587253570557, |
| "learning_rate": 6.99e-05, |
| "loss": 0.1066, |
| "step": 700 |
| }, |
| { |
| "epoch": 3.5858585858585856, |
| "grad_norm": 0.17970235645771027, |
| "learning_rate": 7.09e-05, |
| "loss": 0.1067, |
| "step": 710 |
| }, |
| { |
| "epoch": 3.6363636363636362, |
| "grad_norm": 0.25375887751579285, |
| "learning_rate": 7.19e-05, |
| "loss": 0.1047, |
| "step": 720 |
| }, |
| { |
| "epoch": 3.686868686868687, |
| "grad_norm": 0.19426773488521576, |
| "learning_rate": 7.29e-05, |
| "loss": 0.1031, |
| "step": 730 |
| }, |
| { |
| "epoch": 3.7373737373737375, |
| "grad_norm": 0.1988278031349182, |
| "learning_rate": 7.390000000000001e-05, |
| "loss": 0.1016, |
| "step": 740 |
| }, |
| { |
| "epoch": 3.787878787878788, |
| "grad_norm": 0.24086520075798035, |
| "learning_rate": 7.49e-05, |
| "loss": 0.1026, |
| "step": 750 |
| }, |
| { |
| "epoch": 3.8383838383838382, |
| "grad_norm": 0.21574537456035614, |
| "learning_rate": 7.59e-05, |
| "loss": 0.1012, |
| "step": 760 |
| }, |
| { |
| "epoch": 3.888888888888889, |
| "grad_norm": 0.33338817954063416, |
| "learning_rate": 7.69e-05, |
| "loss": 0.1002, |
| "step": 770 |
| }, |
| { |
| "epoch": 3.9393939393939394, |
| "grad_norm": 0.3103275001049042, |
| "learning_rate": 7.790000000000001e-05, |
| "loss": 0.0993, |
| "step": 780 |
| }, |
| { |
| "epoch": 3.98989898989899, |
| "grad_norm": 0.7165053486824036, |
| "learning_rate": 7.890000000000001e-05, |
| "loss": 0.096, |
| "step": 790 |
| }, |
| { |
| "epoch": 4.040404040404041, |
| "grad_norm": 0.8318674564361572, |
| "learning_rate": 7.99e-05, |
| "loss": 0.0946, |
| "step": 800 |
| }, |
| { |
| "epoch": 4.090909090909091, |
| "grad_norm": 0.26858198642730713, |
| "learning_rate": 8.090000000000001e-05, |
| "loss": 0.0872, |
| "step": 810 |
| }, |
| { |
| "epoch": 4.141414141414141, |
| "grad_norm": 0.27473196387290955, |
| "learning_rate": 8.19e-05, |
| "loss": 0.0857, |
| "step": 820 |
| }, |
| { |
| "epoch": 4.191919191919192, |
| "grad_norm": 0.1972387433052063, |
| "learning_rate": 8.29e-05, |
| "loss": 0.0841, |
| "step": 830 |
| }, |
| { |
| "epoch": 4.242424242424242, |
| "grad_norm": 0.15754260122776031, |
| "learning_rate": 8.39e-05, |
| "loss": 0.0798, |
| "step": 840 |
| }, |
| { |
| "epoch": 4.292929292929293, |
| "grad_norm": 0.2291656881570816, |
| "learning_rate": 8.49e-05, |
| "loss": 0.0761, |
| "step": 850 |
| }, |
| { |
| "epoch": 4.343434343434343, |
| "grad_norm": 0.23920407891273499, |
| "learning_rate": 8.59e-05, |
| "loss": 0.0761, |
| "step": 860 |
| }, |
| { |
| "epoch": 4.393939393939394, |
| "grad_norm": 0.3075353503227234, |
| "learning_rate": 8.69e-05, |
| "loss": 0.073, |
| "step": 870 |
| }, |
| { |
| "epoch": 4.444444444444445, |
| "grad_norm": 0.29688555002212524, |
| "learning_rate": 8.790000000000001e-05, |
| "loss": 0.073, |
| "step": 880 |
| }, |
| { |
| "epoch": 4.494949494949495, |
| "grad_norm": 0.604381263256073, |
| "learning_rate": 8.89e-05, |
| "loss": 0.0671, |
| "step": 890 |
| }, |
| { |
| "epoch": 4.545454545454545, |
| "grad_norm": 0.48267149925231934, |
| "learning_rate": 8.99e-05, |
| "loss": 0.0694, |
| "step": 900 |
| }, |
| { |
| "epoch": 4.595959595959596, |
| "grad_norm": 0.21557459235191345, |
| "learning_rate": 9.090000000000001e-05, |
| "loss": 0.0606, |
| "step": 910 |
| }, |
| { |
| "epoch": 4.646464646464646, |
| "grad_norm": 0.20908492803573608, |
| "learning_rate": 9.190000000000001e-05, |
| "loss": 0.0582, |
| "step": 920 |
| }, |
| { |
| "epoch": 4.696969696969697, |
| "grad_norm": 0.1931847631931305, |
| "learning_rate": 9.290000000000001e-05, |
| "loss": 0.0515, |
| "step": 930 |
| }, |
| { |
| "epoch": 4.747474747474747, |
| "grad_norm": 0.4423510730266571, |
| "learning_rate": 9.39e-05, |
| "loss": 0.053, |
| "step": 940 |
| }, |
| { |
| "epoch": 4.797979797979798, |
| "grad_norm": 0.26107069849967957, |
| "learning_rate": 9.49e-05, |
| "loss": 0.0527, |
| "step": 950 |
| }, |
| { |
| "epoch": 4.848484848484849, |
| "grad_norm": 0.1657687872648239, |
| "learning_rate": 9.59e-05, |
| "loss": 0.0537, |
| "step": 960 |
| }, |
| { |
| "epoch": 4.898989898989899, |
| "grad_norm": 0.2073659747838974, |
| "learning_rate": 9.69e-05, |
| "loss": 0.0489, |
| "step": 970 |
| }, |
| { |
| "epoch": 4.94949494949495, |
| "grad_norm": 0.15980064868927002, |
| "learning_rate": 9.790000000000001e-05, |
| "loss": 0.0469, |
| "step": 980 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 0.18640227615833282, |
| "learning_rate": 9.89e-05, |
| "loss": 0.0472, |
| "step": 990 |
| }, |
| { |
| "epoch": 5.05050505050505, |
| "grad_norm": 0.2025427520275116, |
| "learning_rate": 9.99e-05, |
| "loss": 0.0485, |
| "step": 1000 |
| }, |
| { |
| "epoch": 5.101010101010101, |
| "grad_norm": 0.17412807047367096, |
| "learning_rate": 9.999994463727085e-05, |
| "loss": 0.0455, |
| "step": 1010 |
| }, |
| { |
| "epoch": 5.151515151515151, |
| "grad_norm": 0.21700482070446014, |
| "learning_rate": 9.999975326009292e-05, |
| "loss": 0.0475, |
| "step": 1020 |
| }, |
| { |
| "epoch": 5.202020202020202, |
| "grad_norm": 0.18712277710437775, |
| "learning_rate": 9.999942518549879e-05, |
| "loss": 0.0466, |
| "step": 1030 |
| }, |
| { |
| "epoch": 5.252525252525253, |
| "grad_norm": 0.2451334446668625, |
| "learning_rate": 9.999896041438544e-05, |
| "loss": 0.0454, |
| "step": 1040 |
| }, |
| { |
| "epoch": 5.303030303030303, |
| "grad_norm": 0.19966530799865723, |
| "learning_rate": 9.999835894802353e-05, |
| "loss": 0.045, |
| "step": 1050 |
| }, |
| { |
| "epoch": 5.353535353535354, |
| "grad_norm": 0.23916348814964294, |
| "learning_rate": 9.999762078805743e-05, |
| "loss": 0.0484, |
| "step": 1060 |
| }, |
| { |
| "epoch": 5.404040404040404, |
| "grad_norm": 0.2810751795768738, |
| "learning_rate": 9.999674593650526e-05, |
| "loss": 0.0435, |
| "step": 1070 |
| }, |
| { |
| "epoch": 5.454545454545454, |
| "grad_norm": 0.17749905586242676, |
| "learning_rate": 9.99957343957588e-05, |
| "loss": 0.0464, |
| "step": 1080 |
| }, |
| { |
| "epoch": 5.505050505050505, |
| "grad_norm": 0.2932732403278351, |
| "learning_rate": 9.99945861685836e-05, |
| "loss": 0.0484, |
| "step": 1090 |
| }, |
| { |
| "epoch": 5.555555555555555, |
| "grad_norm": 0.2771592438220978, |
| "learning_rate": 9.999330125811884e-05, |
| "loss": 0.0444, |
| "step": 1100 |
| }, |
| { |
| "epoch": 5.606060606060606, |
| "grad_norm": 0.1735796481370926, |
| "learning_rate": 9.999187966787744e-05, |
| "loss": 0.0424, |
| "step": 1110 |
| }, |
| { |
| "epoch": 5.656565656565657, |
| "grad_norm": 0.17742590606212616, |
| "learning_rate": 9.999032140174595e-05, |
| "loss": 0.041, |
| "step": 1120 |
| }, |
| { |
| "epoch": 5.707070707070707, |
| "grad_norm": 0.1527431607246399, |
| "learning_rate": 9.998862646398464e-05, |
| "loss": 0.0407, |
| "step": 1130 |
| }, |
| { |
| "epoch": 5.757575757575758, |
| "grad_norm": 0.2582555413246155, |
| "learning_rate": 9.998679485922739e-05, |
| "loss": 0.0408, |
| "step": 1140 |
| }, |
| { |
| "epoch": 5.808080808080808, |
| "grad_norm": 0.21207116544246674, |
| "learning_rate": 9.998482659248174e-05, |
| "loss": 0.0425, |
| "step": 1150 |
| }, |
| { |
| "epoch": 5.858585858585858, |
| "grad_norm": 0.18631471693515778, |
| "learning_rate": 9.998272166912883e-05, |
| "loss": 0.0357, |
| "step": 1160 |
| }, |
| { |
| "epoch": 5.909090909090909, |
| "grad_norm": 0.2374238222837448, |
| "learning_rate": 9.998048009492347e-05, |
| "loss": 0.0341, |
| "step": 1170 |
| }, |
| { |
| "epoch": 5.959595959595959, |
| "grad_norm": 0.7843369841575623, |
| "learning_rate": 9.997810187599403e-05, |
| "loss": 0.0356, |
| "step": 1180 |
| }, |
| { |
| "epoch": 6.01010101010101, |
| "grad_norm": 0.21243363618850708, |
| "learning_rate": 9.997558701884249e-05, |
| "loss": 0.031, |
| "step": 1190 |
| }, |
| { |
| "epoch": 6.0606060606060606, |
| "grad_norm": 0.49288108944892883, |
| "learning_rate": 9.997293553034433e-05, |
| "loss": 0.0251, |
| "step": 1200 |
| }, |
| { |
| "epoch": 6.111111111111111, |
| "grad_norm": 0.429260790348053, |
| "learning_rate": 9.997014741774866e-05, |
| "loss": 0.0198, |
| "step": 1210 |
| }, |
| { |
| "epoch": 6.161616161616162, |
| "grad_norm": 0.23916080594062805, |
| "learning_rate": 9.996722268867803e-05, |
| "loss": 0.0192, |
| "step": 1220 |
| }, |
| { |
| "epoch": 6.212121212121212, |
| "grad_norm": 0.18160603940486908, |
| "learning_rate": 9.996416135112858e-05, |
| "loss": 0.0162, |
| "step": 1230 |
| }, |
| { |
| "epoch": 6.262626262626263, |
| "grad_norm": 0.1530262678861618, |
| "learning_rate": 9.996096341346988e-05, |
| "loss": 0.0176, |
| "step": 1240 |
| }, |
| { |
| "epoch": 6.313131313131313, |
| "grad_norm": 0.15108224749565125, |
| "learning_rate": 9.995762888444495e-05, |
| "loss": 0.0166, |
| "step": 1250 |
| }, |
| { |
| "epoch": 6.363636363636363, |
| "grad_norm": 0.12075727432966232, |
| "learning_rate": 9.995415777317027e-05, |
| "loss": 0.0139, |
| "step": 1260 |
| }, |
| { |
| "epoch": 6.414141414141414, |
| "grad_norm": 0.15300285816192627, |
| "learning_rate": 9.995055008913574e-05, |
| "loss": 0.0136, |
| "step": 1270 |
| }, |
| { |
| "epoch": 6.4646464646464645, |
| "grad_norm": 0.14300335943698883, |
| "learning_rate": 9.994680584220463e-05, |
| "loss": 0.0154, |
| "step": 1280 |
| }, |
| { |
| "epoch": 6.515151515151516, |
| "grad_norm": 0.1369652897119522, |
| "learning_rate": 9.994292504261355e-05, |
| "loss": 0.013, |
| "step": 1290 |
| }, |
| { |
| "epoch": 6.565656565656566, |
| "grad_norm": 0.15715359151363373, |
| "learning_rate": 9.993890770097247e-05, |
| "loss": 0.0122, |
| "step": 1300 |
| }, |
| { |
| "epoch": 6.616161616161616, |
| "grad_norm": 0.192514106631279, |
| "learning_rate": 9.993475382826467e-05, |
| "loss": 0.013, |
| "step": 1310 |
| }, |
| { |
| "epoch": 6.666666666666667, |
| "grad_norm": 0.24130967259407043, |
| "learning_rate": 9.993046343584664e-05, |
| "loss": 0.0131, |
| "step": 1320 |
| }, |
| { |
| "epoch": 6.717171717171717, |
| "grad_norm": 0.158871129155159, |
| "learning_rate": 9.992603653544816e-05, |
| "loss": 0.0126, |
| "step": 1330 |
| }, |
| { |
| "epoch": 6.767676767676767, |
| "grad_norm": 0.2723108232021332, |
| "learning_rate": 9.992147313917222e-05, |
| "loss": 0.0144, |
| "step": 1340 |
| }, |
| { |
| "epoch": 6.818181818181818, |
| "grad_norm": 0.2583635151386261, |
| "learning_rate": 9.991677325949497e-05, |
| "loss": 0.0112, |
| "step": 1350 |
| }, |
| { |
| "epoch": 6.8686868686868685, |
| "grad_norm": 0.2646903693675995, |
| "learning_rate": 9.991193690926568e-05, |
| "loss": 0.0163, |
| "step": 1360 |
| }, |
| { |
| "epoch": 6.91919191919192, |
| "grad_norm": 0.17906567454338074, |
| "learning_rate": 9.990696410170678e-05, |
| "loss": 0.0126, |
| "step": 1370 |
| }, |
| { |
| "epoch": 6.96969696969697, |
| "grad_norm": 0.17373201251029968, |
| "learning_rate": 9.990185485041371e-05, |
| "loss": 0.014, |
| "step": 1380 |
| }, |
| { |
| "epoch": 7.02020202020202, |
| "grad_norm": 0.1234615296125412, |
| "learning_rate": 9.989660916935498e-05, |
| "loss": 0.0119, |
| "step": 1390 |
| }, |
| { |
| "epoch": 7.070707070707071, |
| "grad_norm": 0.133011132478714, |
| "learning_rate": 9.989122707287208e-05, |
| "loss": 0.0108, |
| "step": 1400 |
| }, |
| { |
| "epoch": 7.121212121212121, |
| "grad_norm": 0.11188605427742004, |
| "learning_rate": 9.988570857567945e-05, |
| "loss": 0.0108, |
| "step": 1410 |
| }, |
| { |
| "epoch": 7.171717171717171, |
| "grad_norm": 0.11211735755205154, |
| "learning_rate": 9.988005369286446e-05, |
| "loss": 0.0172, |
| "step": 1420 |
| }, |
| { |
| "epoch": 7.222222222222222, |
| "grad_norm": 0.15307727456092834, |
| "learning_rate": 9.987426243988734e-05, |
| "loss": 0.015, |
| "step": 1430 |
| }, |
| { |
| "epoch": 7.2727272727272725, |
| "grad_norm": 0.16796652972698212, |
| "learning_rate": 9.986833483258114e-05, |
| "loss": 0.0129, |
| "step": 1440 |
| }, |
| { |
| "epoch": 7.3232323232323235, |
| "grad_norm": 0.16543126106262207, |
| "learning_rate": 9.986227088715173e-05, |
| "loss": 0.0127, |
| "step": 1450 |
| }, |
| { |
| "epoch": 7.373737373737374, |
| "grad_norm": 0.1630929559469223, |
| "learning_rate": 9.98560706201777e-05, |
| "loss": 0.0129, |
| "step": 1460 |
| }, |
| { |
| "epoch": 7.424242424242424, |
| "grad_norm": 0.10342209786176682, |
| "learning_rate": 9.984973404861036e-05, |
| "loss": 0.0099, |
| "step": 1470 |
| }, |
| { |
| "epoch": 7.474747474747475, |
| "grad_norm": 0.12904970347881317, |
| "learning_rate": 9.984326118977361e-05, |
| "loss": 0.011, |
| "step": 1480 |
| }, |
| { |
| "epoch": 7.525252525252525, |
| "grad_norm": 0.09910032898187637, |
| "learning_rate": 9.983665206136406e-05, |
| "loss": 0.0103, |
| "step": 1490 |
| }, |
| { |
| "epoch": 7.575757575757576, |
| "grad_norm": 0.11549292504787445, |
| "learning_rate": 9.982990668145075e-05, |
| "loss": 0.0113, |
| "step": 1500 |
| }, |
| { |
| "epoch": 7.626262626262626, |
| "grad_norm": 0.09299202263355255, |
| "learning_rate": 9.982302506847534e-05, |
| "loss": 0.0103, |
| "step": 1510 |
| }, |
| { |
| "epoch": 7.6767676767676765, |
| "grad_norm": 0.13474924862384796, |
| "learning_rate": 9.981600724125189e-05, |
| "loss": 0.0116, |
| "step": 1520 |
| }, |
| { |
| "epoch": 7.7272727272727275, |
| "grad_norm": 0.14938341081142426, |
| "learning_rate": 9.980885321896685e-05, |
| "loss": 0.0108, |
| "step": 1530 |
| }, |
| { |
| "epoch": 7.777777777777778, |
| "grad_norm": 0.15139977633953094, |
| "learning_rate": 9.980156302117905e-05, |
| "loss": 0.0095, |
| "step": 1540 |
| }, |
| { |
| "epoch": 7.828282828282829, |
| "grad_norm": 0.13270030915737152, |
| "learning_rate": 9.979413666781963e-05, |
| "loss": 0.0128, |
| "step": 1550 |
| }, |
| { |
| "epoch": 7.878787878787879, |
| "grad_norm": 0.17968319356441498, |
| "learning_rate": 9.978657417919193e-05, |
| "loss": 0.0116, |
| "step": 1560 |
| }, |
| { |
| "epoch": 7.929292929292929, |
| "grad_norm": 0.15713919699192047, |
| "learning_rate": 9.977887557597153e-05, |
| "loss": 0.0111, |
| "step": 1570 |
| }, |
| { |
| "epoch": 7.97979797979798, |
| "grad_norm": 0.16841000318527222, |
| "learning_rate": 9.97710408792061e-05, |
| "loss": 0.0122, |
| "step": 1580 |
| }, |
| { |
| "epoch": 8.030303030303031, |
| "grad_norm": 0.2323077768087387, |
| "learning_rate": 9.976307011031542e-05, |
| "loss": 0.0114, |
| "step": 1590 |
| }, |
| { |
| "epoch": 8.080808080808081, |
| "grad_norm": 0.20963816344738007, |
| "learning_rate": 9.975496329109126e-05, |
| "loss": 0.0111, |
| "step": 1600 |
| }, |
| { |
| "epoch": 8.131313131313131, |
| "grad_norm": 0.14088201522827148, |
| "learning_rate": 9.974672044369732e-05, |
| "loss": 0.0127, |
| "step": 1610 |
| }, |
| { |
| "epoch": 8.181818181818182, |
| "grad_norm": 0.14957164227962494, |
| "learning_rate": 9.97383415906693e-05, |
| "loss": 0.0099, |
| "step": 1620 |
| }, |
| { |
| "epoch": 8.232323232323232, |
| "grad_norm": 0.12963111698627472, |
| "learning_rate": 9.97298267549146e-05, |
| "loss": 0.012, |
| "step": 1630 |
| }, |
| { |
| "epoch": 8.282828282828282, |
| "grad_norm": 0.1677665114402771, |
| "learning_rate": 9.972117595971249e-05, |
| "loss": 0.0114, |
| "step": 1640 |
| }, |
| { |
| "epoch": 8.333333333333334, |
| "grad_norm": 0.12478765845298767, |
| "learning_rate": 9.971238922871391e-05, |
| "loss": 0.0109, |
| "step": 1650 |
| }, |
| { |
| "epoch": 8.383838383838384, |
| "grad_norm": 0.16371428966522217, |
| "learning_rate": 9.970346658594142e-05, |
| "loss": 0.0127, |
| "step": 1660 |
| }, |
| { |
| "epoch": 8.434343434343434, |
| "grad_norm": 0.1488392949104309, |
| "learning_rate": 9.969440805578923e-05, |
| "loss": 0.0116, |
| "step": 1670 |
| }, |
| { |
| "epoch": 8.484848484848484, |
| "grad_norm": 0.15939933061599731, |
| "learning_rate": 9.968521366302298e-05, |
| "loss": 0.0112, |
| "step": 1680 |
| }, |
| { |
| "epoch": 8.535353535353535, |
| "grad_norm": 0.14795666933059692, |
| "learning_rate": 9.967588343277981e-05, |
| "loss": 0.0107, |
| "step": 1690 |
| }, |
| { |
| "epoch": 8.585858585858587, |
| "grad_norm": 0.14237447082996368, |
| "learning_rate": 9.966641739056818e-05, |
| "loss": 0.0102, |
| "step": 1700 |
| }, |
| { |
| "epoch": 8.636363636363637, |
| "grad_norm": 0.17013949155807495, |
| "learning_rate": 9.965681556226793e-05, |
| "loss": 0.0131, |
| "step": 1710 |
| }, |
| { |
| "epoch": 8.686868686868687, |
| "grad_norm": 0.1752924919128418, |
| "learning_rate": 9.964707797413006e-05, |
| "loss": 0.0108, |
| "step": 1720 |
| }, |
| { |
| "epoch": 8.737373737373737, |
| "grad_norm": 0.12599825859069824, |
| "learning_rate": 9.963720465277679e-05, |
| "loss": 0.0082, |
| "step": 1730 |
| }, |
| { |
| "epoch": 8.787878787878787, |
| "grad_norm": 0.11326688528060913, |
| "learning_rate": 9.96271956252014e-05, |
| "loss": 0.0126, |
| "step": 1740 |
| }, |
| { |
| "epoch": 8.83838383838384, |
| "grad_norm": 0.12932933866977692, |
| "learning_rate": 9.961705091876816e-05, |
| "loss": 0.0106, |
| "step": 1750 |
| }, |
| { |
| "epoch": 8.88888888888889, |
| "grad_norm": 0.10299669951200485, |
| "learning_rate": 9.960677056121235e-05, |
| "loss": 0.0091, |
| "step": 1760 |
| }, |
| { |
| "epoch": 8.93939393939394, |
| "grad_norm": 0.09663958847522736, |
| "learning_rate": 9.959635458064005e-05, |
| "loss": 0.0089, |
| "step": 1770 |
| }, |
| { |
| "epoch": 8.98989898989899, |
| "grad_norm": 0.17171341180801392, |
| "learning_rate": 9.958580300552815e-05, |
| "loss": 0.0093, |
| "step": 1780 |
| }, |
| { |
| "epoch": 9.04040404040404, |
| "grad_norm": 0.16513721644878387, |
| "learning_rate": 9.957511586472426e-05, |
| "loss": 0.0116, |
| "step": 1790 |
| }, |
| { |
| "epoch": 9.090909090909092, |
| "grad_norm": 0.12167911976575851, |
| "learning_rate": 9.956429318744662e-05, |
| "loss": 0.013, |
| "step": 1800 |
| }, |
| { |
| "epoch": 9.141414141414142, |
| "grad_norm": 0.18825121223926544, |
| "learning_rate": 9.955333500328404e-05, |
| "loss": 0.0123, |
| "step": 1810 |
| }, |
| { |
| "epoch": 9.191919191919192, |
| "grad_norm": 0.14115002751350403, |
| "learning_rate": 9.95422413421957e-05, |
| "loss": 0.0133, |
| "step": 1820 |
| }, |
| { |
| "epoch": 9.242424242424242, |
| "grad_norm": 0.09593846648931503, |
| "learning_rate": 9.953101223451133e-05, |
| "loss": 0.0094, |
| "step": 1830 |
| }, |
| { |
| "epoch": 9.292929292929292, |
| "grad_norm": 0.14259865880012512, |
| "learning_rate": 9.951964771093085e-05, |
| "loss": 0.0102, |
| "step": 1840 |
| }, |
| { |
| "epoch": 9.343434343434343, |
| "grad_norm": 0.13605698943138123, |
| "learning_rate": 9.950814780252442e-05, |
| "loss": 0.0112, |
| "step": 1850 |
| }, |
| { |
| "epoch": 9.393939393939394, |
| "grad_norm": 0.1667507141828537, |
| "learning_rate": 9.949651254073236e-05, |
| "loss": 0.0099, |
| "step": 1860 |
| }, |
| { |
| "epoch": 9.444444444444445, |
| "grad_norm": 0.13645249605178833, |
| "learning_rate": 9.948474195736504e-05, |
| "loss": 0.013, |
| "step": 1870 |
| }, |
| { |
| "epoch": 9.494949494949495, |
| "grad_norm": 0.1865932047367096, |
| "learning_rate": 9.947283608460277e-05, |
| "loss": 0.0099, |
| "step": 1880 |
| }, |
| { |
| "epoch": 9.545454545454545, |
| "grad_norm": 0.239735946059227, |
| "learning_rate": 9.946079495499577e-05, |
| "loss": 0.0119, |
| "step": 1890 |
| }, |
| { |
| "epoch": 9.595959595959595, |
| "grad_norm": 0.17933997511863708, |
| "learning_rate": 9.944861860146401e-05, |
| "loss": 0.0117, |
| "step": 1900 |
| }, |
| { |
| "epoch": 9.646464646464647, |
| "grad_norm": 0.16404038667678833, |
| "learning_rate": 9.943630705729719e-05, |
| "loss": 0.0115, |
| "step": 1910 |
| }, |
| { |
| "epoch": 9.696969696969697, |
| "grad_norm": 0.14647988975048065, |
| "learning_rate": 9.942386035615459e-05, |
| "loss": 0.0109, |
| "step": 1920 |
| }, |
| { |
| "epoch": 9.747474747474747, |
| "grad_norm": 0.14759261906147003, |
| "learning_rate": 9.941127853206503e-05, |
| "loss": 0.0093, |
| "step": 1930 |
| }, |
| { |
| "epoch": 9.797979797979798, |
| "grad_norm": 0.16250784695148468, |
| "learning_rate": 9.939856161942673e-05, |
| "loss": 0.0108, |
| "step": 1940 |
| }, |
| { |
| "epoch": 9.848484848484848, |
| "grad_norm": 0.12154943495988846, |
| "learning_rate": 9.938570965300724e-05, |
| "loss": 0.01, |
| "step": 1950 |
| }, |
| { |
| "epoch": 9.8989898989899, |
| "grad_norm": 0.13515616953372955, |
| "learning_rate": 9.937272266794335e-05, |
| "loss": 0.0098, |
| "step": 1960 |
| }, |
| { |
| "epoch": 9.94949494949495, |
| "grad_norm": 0.1455046683549881, |
| "learning_rate": 9.935960069974096e-05, |
| "loss": 0.0114, |
| "step": 1970 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 0.18031057715415955, |
| "learning_rate": 9.934634378427506e-05, |
| "loss": 0.0129, |
| "step": 1980 |
| }, |
| { |
| "epoch": 10.05050505050505, |
| "grad_norm": 0.17974168062210083, |
| "learning_rate": 9.933295195778954e-05, |
| "loss": 0.0086, |
| "step": 1990 |
| }, |
| { |
| "epoch": 10.1010101010101, |
| "grad_norm": 0.1430104821920395, |
| "learning_rate": 9.931942525689715e-05, |
| "loss": 0.0126, |
| "step": 2000 |
| }, |
| { |
| "epoch": 10.151515151515152, |
| "grad_norm": 0.09873004257678986, |
| "learning_rate": 9.930576371857936e-05, |
| "loss": 0.0114, |
| "step": 2010 |
| }, |
| { |
| "epoch": 10.202020202020202, |
| "grad_norm": 0.1394491046667099, |
| "learning_rate": 9.929196738018629e-05, |
| "loss": 0.009, |
| "step": 2020 |
| }, |
| { |
| "epoch": 10.252525252525253, |
| "grad_norm": 0.13830651342868805, |
| "learning_rate": 9.927803627943662e-05, |
| "loss": 0.0104, |
| "step": 2030 |
| }, |
| { |
| "epoch": 10.303030303030303, |
| "grad_norm": 0.1312495768070221, |
| "learning_rate": 9.926397045441744e-05, |
| "loss": 0.0109, |
| "step": 2040 |
| }, |
| { |
| "epoch": 10.353535353535353, |
| "grad_norm": 0.15347275137901306, |
| "learning_rate": 9.924976994358417e-05, |
| "loss": 0.0102, |
| "step": 2050 |
| }, |
| { |
| "epoch": 10.404040404040405, |
| "grad_norm": 0.14619392156600952, |
| "learning_rate": 9.923543478576048e-05, |
| "loss": 0.0111, |
| "step": 2060 |
| }, |
| { |
| "epoch": 10.454545454545455, |
| "grad_norm": 0.18486180901527405, |
| "learning_rate": 9.922096502013813e-05, |
| "loss": 0.0098, |
| "step": 2070 |
| }, |
| { |
| "epoch": 10.505050505050505, |
| "grad_norm": 0.16239513456821442, |
| "learning_rate": 9.92063606862769e-05, |
| "loss": 0.0102, |
| "step": 2080 |
| }, |
| { |
| "epoch": 10.555555555555555, |
| "grad_norm": 0.15381303429603577, |
| "learning_rate": 9.919162182410453e-05, |
| "loss": 0.0098, |
| "step": 2090 |
| }, |
| { |
| "epoch": 10.606060606060606, |
| "grad_norm": 0.17403188347816467, |
| "learning_rate": 9.917674847391645e-05, |
| "loss": 0.0101, |
| "step": 2100 |
| }, |
| { |
| "epoch": 10.656565656565657, |
| "grad_norm": 0.16825300455093384, |
| "learning_rate": 9.916174067637584e-05, |
| "loss": 0.0106, |
| "step": 2110 |
| }, |
| { |
| "epoch": 10.707070707070708, |
| "grad_norm": 0.14630188047885895, |
| "learning_rate": 9.914659847251348e-05, |
| "loss": 0.0095, |
| "step": 2120 |
| }, |
| { |
| "epoch": 10.757575757575758, |
| "grad_norm": 0.16878801584243774, |
| "learning_rate": 9.913132190372753e-05, |
| "loss": 0.0099, |
| "step": 2130 |
| }, |
| { |
| "epoch": 10.808080808080808, |
| "grad_norm": 0.12667079269886017, |
| "learning_rate": 9.911591101178359e-05, |
| "loss": 0.0097, |
| "step": 2140 |
| }, |
| { |
| "epoch": 10.858585858585858, |
| "grad_norm": 0.12915708124637604, |
| "learning_rate": 9.910036583881443e-05, |
| "loss": 0.01, |
| "step": 2150 |
| }, |
| { |
| "epoch": 10.909090909090908, |
| "grad_norm": 0.13464243710041046, |
| "learning_rate": 9.908468642731995e-05, |
| "loss": 0.0101, |
| "step": 2160 |
| }, |
| { |
| "epoch": 10.95959595959596, |
| "grad_norm": 0.1238473653793335, |
| "learning_rate": 9.906887282016707e-05, |
| "loss": 0.0115, |
| "step": 2170 |
| }, |
| { |
| "epoch": 11.01010101010101, |
| "grad_norm": 0.08457051217556, |
| "learning_rate": 9.90529250605896e-05, |
| "loss": 0.0085, |
| "step": 2180 |
| }, |
| { |
| "epoch": 11.06060606060606, |
| "grad_norm": 0.150074303150177, |
| "learning_rate": 9.903684319218809e-05, |
| "loss": 0.0109, |
| "step": 2190 |
| }, |
| { |
| "epoch": 11.11111111111111, |
| "grad_norm": 0.12013274431228638, |
| "learning_rate": 9.902062725892976e-05, |
| "loss": 0.0099, |
| "step": 2200 |
| }, |
| { |
| "epoch": 11.16161616161616, |
| "grad_norm": 0.15226365625858307, |
| "learning_rate": 9.900427730514834e-05, |
| "loss": 0.0109, |
| "step": 2210 |
| }, |
| { |
| "epoch": 11.212121212121213, |
| "grad_norm": 0.14881497621536255, |
| "learning_rate": 9.8987793375544e-05, |
| "loss": 0.0079, |
| "step": 2220 |
| }, |
| { |
| "epoch": 11.262626262626263, |
| "grad_norm": 0.1303631216287613, |
| "learning_rate": 9.897117551518318e-05, |
| "loss": 0.0111, |
| "step": 2230 |
| }, |
| { |
| "epoch": 11.313131313131313, |
| "grad_norm": 0.14369747042655945, |
| "learning_rate": 9.895442376949844e-05, |
| "loss": 0.0099, |
| "step": 2240 |
| }, |
| { |
| "epoch": 11.363636363636363, |
| "grad_norm": 0.1292843520641327, |
| "learning_rate": 9.893753818428845e-05, |
| "loss": 0.0079, |
| "step": 2250 |
| }, |
| { |
| "epoch": 11.414141414141413, |
| "grad_norm": 0.13971160352230072, |
| "learning_rate": 9.892051880571773e-05, |
| "loss": 0.0089, |
| "step": 2260 |
| }, |
| { |
| "epoch": 11.464646464646465, |
| "grad_norm": 0.15083859860897064, |
| "learning_rate": 9.890336568031663e-05, |
| "loss": 0.0121, |
| "step": 2270 |
| }, |
| { |
| "epoch": 11.515151515151516, |
| "grad_norm": 0.1554926335811615, |
| "learning_rate": 9.888607885498113e-05, |
| "loss": 0.0093, |
| "step": 2280 |
| }, |
| { |
| "epoch": 11.565656565656566, |
| "grad_norm": 0.12625600397586823, |
| "learning_rate": 9.886865837697275e-05, |
| "loss": 0.0089, |
| "step": 2290 |
| }, |
| { |
| "epoch": 11.616161616161616, |
| "grad_norm": 0.113410584628582, |
| "learning_rate": 9.88511042939184e-05, |
| "loss": 0.0095, |
| "step": 2300 |
| }, |
| { |
| "epoch": 11.666666666666666, |
| "grad_norm": 0.14116285741329193, |
| "learning_rate": 9.883341665381028e-05, |
| "loss": 0.0073, |
| "step": 2310 |
| }, |
| { |
| "epoch": 11.717171717171716, |
| "grad_norm": 0.11192002892494202, |
| "learning_rate": 9.881559550500575e-05, |
| "loss": 0.0082, |
| "step": 2320 |
| }, |
| { |
| "epoch": 11.767676767676768, |
| "grad_norm": 0.11441117525100708, |
| "learning_rate": 9.879764089622712e-05, |
| "loss": 0.0076, |
| "step": 2330 |
| }, |
| { |
| "epoch": 11.818181818181818, |
| "grad_norm": 0.0957820788025856, |
| "learning_rate": 9.87795528765616e-05, |
| "loss": 0.0081, |
| "step": 2340 |
| }, |
| { |
| "epoch": 11.868686868686869, |
| "grad_norm": 0.09258463978767395, |
| "learning_rate": 9.876133149546118e-05, |
| "loss": 0.0125, |
| "step": 2350 |
| }, |
| { |
| "epoch": 11.919191919191919, |
| "grad_norm": 0.12749741971492767, |
| "learning_rate": 9.874297680274238e-05, |
| "loss": 0.011, |
| "step": 2360 |
| }, |
| { |
| "epoch": 11.969696969696969, |
| "grad_norm": 0.16064293682575226, |
| "learning_rate": 9.872448884858624e-05, |
| "loss": 0.0107, |
| "step": 2370 |
| }, |
| { |
| "epoch": 12.02020202020202, |
| "grad_norm": 0.19199725985527039, |
| "learning_rate": 9.870586768353815e-05, |
| "loss": 0.0095, |
| "step": 2380 |
| }, |
| { |
| "epoch": 12.070707070707071, |
| "grad_norm": 0.15348611772060394, |
| "learning_rate": 9.868711335850764e-05, |
| "loss": 0.0089, |
| "step": 2390 |
| }, |
| { |
| "epoch": 12.121212121212121, |
| "grad_norm": 0.16762219369411469, |
| "learning_rate": 9.866822592476833e-05, |
| "loss": 0.0103, |
| "step": 2400 |
| }, |
| { |
| "epoch": 12.171717171717171, |
| "grad_norm": 0.12766151130199432, |
| "learning_rate": 9.86492054339577e-05, |
| "loss": 0.0104, |
| "step": 2410 |
| }, |
| { |
| "epoch": 12.222222222222221, |
| "grad_norm": 0.1027817353606224, |
| "learning_rate": 9.863005193807711e-05, |
| "loss": 0.0085, |
| "step": 2420 |
| }, |
| { |
| "epoch": 12.272727272727273, |
| "grad_norm": 0.1302451640367508, |
| "learning_rate": 9.861076548949143e-05, |
| "loss": 0.0075, |
| "step": 2430 |
| }, |
| { |
| "epoch": 12.323232323232324, |
| "grad_norm": 0.08964911848306656, |
| "learning_rate": 9.859134614092912e-05, |
| "loss": 0.0082, |
| "step": 2440 |
| }, |
| { |
| "epoch": 12.373737373737374, |
| "grad_norm": 0.15729080140590668, |
| "learning_rate": 9.857179394548191e-05, |
| "loss": 0.0085, |
| "step": 2450 |
| }, |
| { |
| "epoch": 12.424242424242424, |
| "grad_norm": 0.12807294726371765, |
| "learning_rate": 9.855210895660477e-05, |
| "loss": 0.0099, |
| "step": 2460 |
| }, |
| { |
| "epoch": 12.474747474747474, |
| "grad_norm": 0.09408801048994064, |
| "learning_rate": 9.853229122811568e-05, |
| "loss": 0.0092, |
| "step": 2470 |
| }, |
| { |
| "epoch": 12.525252525252526, |
| "grad_norm": 0.1233830526471138, |
| "learning_rate": 9.851234081419559e-05, |
| "loss": 0.0103, |
| "step": 2480 |
| }, |
| { |
| "epoch": 12.575757575757576, |
| "grad_norm": 0.11289270222187042, |
| "learning_rate": 9.849225776938814e-05, |
| "loss": 0.0117, |
| "step": 2490 |
| }, |
| { |
| "epoch": 12.626262626262626, |
| "grad_norm": 0.11409828066825867, |
| "learning_rate": 9.847204214859964e-05, |
| "loss": 0.0093, |
| "step": 2500 |
| }, |
| { |
| "epoch": 12.676767676767676, |
| "grad_norm": 0.1210876926779747, |
| "learning_rate": 9.845169400709879e-05, |
| "loss": 0.0114, |
| "step": 2510 |
| }, |
| { |
| "epoch": 12.727272727272727, |
| "grad_norm": 0.20763614773750305, |
| "learning_rate": 9.843121340051664e-05, |
| "loss": 0.01, |
| "step": 2520 |
| }, |
| { |
| "epoch": 12.777777777777779, |
| "grad_norm": 0.14084021747112274, |
| "learning_rate": 9.841060038484641e-05, |
| "loss": 0.008, |
| "step": 2530 |
| }, |
| { |
| "epoch": 12.828282828282829, |
| "grad_norm": 0.12081307172775269, |
| "learning_rate": 9.838985501644328e-05, |
| "loss": 0.0095, |
| "step": 2540 |
| }, |
| { |
| "epoch": 12.878787878787879, |
| "grad_norm": 0.12796291708946228, |
| "learning_rate": 9.83689773520243e-05, |
| "loss": 0.0101, |
| "step": 2550 |
| }, |
| { |
| "epoch": 12.929292929292929, |
| "grad_norm": 0.1175423189997673, |
| "learning_rate": 9.834796744866819e-05, |
| "loss": 0.0098, |
| "step": 2560 |
| }, |
| { |
| "epoch": 12.97979797979798, |
| "grad_norm": 0.1307343691587448, |
| "learning_rate": 9.832682536381525e-05, |
| "loss": 0.0072, |
| "step": 2570 |
| }, |
| { |
| "epoch": 13.030303030303031, |
| "grad_norm": 0.14350418746471405, |
| "learning_rate": 9.830555115526711e-05, |
| "loss": 0.0145, |
| "step": 2580 |
| }, |
| { |
| "epoch": 13.080808080808081, |
| "grad_norm": 0.1591469645500183, |
| "learning_rate": 9.828414488118667e-05, |
| "loss": 0.0084, |
| "step": 2590 |
| }, |
| { |
| "epoch": 13.131313131313131, |
| "grad_norm": 0.12446706742048264, |
| "learning_rate": 9.826260660009785e-05, |
| "loss": 0.0106, |
| "step": 2600 |
| }, |
| { |
| "epoch": 13.181818181818182, |
| "grad_norm": 0.14549347758293152, |
| "learning_rate": 9.824093637088547e-05, |
| "loss": 0.0097, |
| "step": 2610 |
| }, |
| { |
| "epoch": 13.232323232323232, |
| "grad_norm": 0.16026464104652405, |
| "learning_rate": 9.821913425279514e-05, |
| "loss": 0.0085, |
| "step": 2620 |
| }, |
| { |
| "epoch": 13.282828282828282, |
| "grad_norm": 0.11597767472267151, |
| "learning_rate": 9.8197200305433e-05, |
| "loss": 0.0083, |
| "step": 2630 |
| }, |
| { |
| "epoch": 13.333333333333334, |
| "grad_norm": 0.13113585114479065, |
| "learning_rate": 9.817513458876564e-05, |
| "loss": 0.0091, |
| "step": 2640 |
| }, |
| { |
| "epoch": 13.383838383838384, |
| "grad_norm": 0.15264829993247986, |
| "learning_rate": 9.815293716311987e-05, |
| "loss": 0.0105, |
| "step": 2650 |
| }, |
| { |
| "epoch": 13.434343434343434, |
| "grad_norm": 0.16135656833648682, |
| "learning_rate": 9.813060808918262e-05, |
| "loss": 0.0106, |
| "step": 2660 |
| }, |
| { |
| "epoch": 13.484848484848484, |
| "grad_norm": 0.14902526140213013, |
| "learning_rate": 9.810814742800069e-05, |
| "loss": 0.0089, |
| "step": 2670 |
| }, |
| { |
| "epoch": 13.535353535353535, |
| "grad_norm": 0.1348474770784378, |
| "learning_rate": 9.808555524098074e-05, |
| "loss": 0.0112, |
| "step": 2680 |
| }, |
| { |
| "epoch": 13.585858585858587, |
| "grad_norm": 0.12888960540294647, |
| "learning_rate": 9.806283158988887e-05, |
| "loss": 0.0088, |
| "step": 2690 |
| }, |
| { |
| "epoch": 13.636363636363637, |
| "grad_norm": 0.12744148075580597, |
| "learning_rate": 9.803997653685072e-05, |
| "loss": 0.0091, |
| "step": 2700 |
| }, |
| { |
| "epoch": 13.686868686868687, |
| "grad_norm": 0.11552807688713074, |
| "learning_rate": 9.801699014435112e-05, |
| "loss": 0.0087, |
| "step": 2710 |
| }, |
| { |
| "epoch": 13.737373737373737, |
| "grad_norm": 0.17679044604301453, |
| "learning_rate": 9.799387247523398e-05, |
| "loss": 0.0104, |
| "step": 2720 |
| }, |
| { |
| "epoch": 13.787878787878787, |
| "grad_norm": 0.10784724354743958, |
| "learning_rate": 9.797062359270215e-05, |
| "loss": 0.0117, |
| "step": 2730 |
| }, |
| { |
| "epoch": 13.83838383838384, |
| "grad_norm": 0.10222921520471573, |
| "learning_rate": 9.794724356031715e-05, |
| "loss": 0.008, |
| "step": 2740 |
| }, |
| { |
| "epoch": 13.88888888888889, |
| "grad_norm": 0.12102789431810379, |
| "learning_rate": 9.792373244199913e-05, |
| "loss": 0.009, |
| "step": 2750 |
| }, |
| { |
| "epoch": 13.93939393939394, |
| "grad_norm": 0.12577442824840546, |
| "learning_rate": 9.790009030202658e-05, |
| "loss": 0.0075, |
| "step": 2760 |
| }, |
| { |
| "epoch": 13.98989898989899, |
| "grad_norm": 0.1563095599412918, |
| "learning_rate": 9.78763172050362e-05, |
| "loss": 0.0113, |
| "step": 2770 |
| }, |
| { |
| "epoch": 14.04040404040404, |
| "grad_norm": 0.10740932077169418, |
| "learning_rate": 9.785241321602274e-05, |
| "loss": 0.0104, |
| "step": 2780 |
| }, |
| { |
| "epoch": 14.090909090909092, |
| "grad_norm": 0.1451617032289505, |
| "learning_rate": 9.782837840033879e-05, |
| "loss": 0.0098, |
| "step": 2790 |
| }, |
| { |
| "epoch": 14.141414141414142, |
| "grad_norm": 0.11700810492038727, |
| "learning_rate": 9.780421282369461e-05, |
| "loss": 0.0075, |
| "step": 2800 |
| }, |
| { |
| "epoch": 14.191919191919192, |
| "grad_norm": 0.11071896553039551, |
| "learning_rate": 9.777991655215797e-05, |
| "loss": 0.0103, |
| "step": 2810 |
| }, |
| { |
| "epoch": 14.242424242424242, |
| "grad_norm": 0.10571824014186859, |
| "learning_rate": 9.775548965215394e-05, |
| "loss": 0.009, |
| "step": 2820 |
| }, |
| { |
| "epoch": 14.292929292929292, |
| "grad_norm": 0.1258680820465088, |
| "learning_rate": 9.773093219046474e-05, |
| "loss": 0.0089, |
| "step": 2830 |
| }, |
| { |
| "epoch": 14.343434343434343, |
| "grad_norm": 0.10428173094987869, |
| "learning_rate": 9.770624423422954e-05, |
| "loss": 0.0083, |
| "step": 2840 |
| }, |
| { |
| "epoch": 14.393939393939394, |
| "grad_norm": 0.091452457010746, |
| "learning_rate": 9.768142585094426e-05, |
| "loss": 0.0091, |
| "step": 2850 |
| }, |
| { |
| "epoch": 14.444444444444445, |
| "grad_norm": 0.10861776024103165, |
| "learning_rate": 9.765647710846142e-05, |
| "loss": 0.0101, |
| "step": 2860 |
| }, |
| { |
| "epoch": 14.494949494949495, |
| "grad_norm": 0.09481234103441238, |
| "learning_rate": 9.763139807498991e-05, |
| "loss": 0.0087, |
| "step": 2870 |
| }, |
| { |
| "epoch": 14.545454545454545, |
| "grad_norm": 0.11266465485095978, |
| "learning_rate": 9.760618881909487e-05, |
| "loss": 0.0077, |
| "step": 2880 |
| }, |
| { |
| "epoch": 14.595959595959595, |
| "grad_norm": 0.09945003688335419, |
| "learning_rate": 9.758084940969744e-05, |
| "loss": 0.0074, |
| "step": 2890 |
| }, |
| { |
| "epoch": 14.646464646464647, |
| "grad_norm": 0.10776614397764206, |
| "learning_rate": 9.755537991607459e-05, |
| "loss": 0.0075, |
| "step": 2900 |
| }, |
| { |
| "epoch": 14.696969696969697, |
| "grad_norm": 0.10660045593976974, |
| "learning_rate": 9.752978040785895e-05, |
| "loss": 0.0101, |
| "step": 2910 |
| }, |
| { |
| "epoch": 14.747474747474747, |
| "grad_norm": 0.10037105530500412, |
| "learning_rate": 9.750405095503859e-05, |
| "loss": 0.0081, |
| "step": 2920 |
| }, |
| { |
| "epoch": 14.797979797979798, |
| "grad_norm": 0.13063082098960876, |
| "learning_rate": 9.747819162795686e-05, |
| "loss": 0.0076, |
| "step": 2930 |
| }, |
| { |
| "epoch": 14.848484848484848, |
| "grad_norm": 0.10624378174543381, |
| "learning_rate": 9.745220249731217e-05, |
| "loss": 0.0083, |
| "step": 2940 |
| }, |
| { |
| "epoch": 14.8989898989899, |
| "grad_norm": 0.11614657193422318, |
| "learning_rate": 9.742608363415781e-05, |
| "loss": 0.011, |
| "step": 2950 |
| }, |
| { |
| "epoch": 14.94949494949495, |
| "grad_norm": 0.10755102336406708, |
| "learning_rate": 9.739983510990176e-05, |
| "loss": 0.0069, |
| "step": 2960 |
| }, |
| { |
| "epoch": 15.0, |
| "grad_norm": 0.10821855813264847, |
| "learning_rate": 9.737345699630647e-05, |
| "loss": 0.0073, |
| "step": 2970 |
| }, |
| { |
| "epoch": 15.05050505050505, |
| "grad_norm": 0.13390570878982544, |
| "learning_rate": 9.734694936548869e-05, |
| "loss": 0.0096, |
| "step": 2980 |
| }, |
| { |
| "epoch": 15.1010101010101, |
| "grad_norm": 0.14151936769485474, |
| "learning_rate": 9.732031228991932e-05, |
| "loss": 0.0108, |
| "step": 2990 |
| }, |
| { |
| "epoch": 15.151515151515152, |
| "grad_norm": 0.15358908474445343, |
| "learning_rate": 9.729354584242302e-05, |
| "loss": 0.0083, |
| "step": 3000 |
| }, |
| { |
| "epoch": 15.202020202020202, |
| "grad_norm": 0.13862118124961853, |
| "learning_rate": 9.726665009617832e-05, |
| "loss": 0.0103, |
| "step": 3010 |
| }, |
| { |
| "epoch": 15.252525252525253, |
| "grad_norm": 0.14177963137626648, |
| "learning_rate": 9.723962512471714e-05, |
| "loss": 0.0078, |
| "step": 3020 |
| }, |
| { |
| "epoch": 15.303030303030303, |
| "grad_norm": 0.11334635317325592, |
| "learning_rate": 9.72124710019247e-05, |
| "loss": 0.0079, |
| "step": 3030 |
| }, |
| { |
| "epoch": 15.353535353535353, |
| "grad_norm": 0.10515457391738892, |
| "learning_rate": 9.718518780203934e-05, |
| "loss": 0.0087, |
| "step": 3040 |
| }, |
| { |
| "epoch": 15.404040404040405, |
| "grad_norm": 0.153967946767807, |
| "learning_rate": 9.715777559965228e-05, |
| "loss": 0.0109, |
| "step": 3050 |
| }, |
| { |
| "epoch": 15.454545454545455, |
| "grad_norm": 0.09760258346796036, |
| "learning_rate": 9.713023446970746e-05, |
| "loss": 0.0109, |
| "step": 3060 |
| }, |
| { |
| "epoch": 15.505050505050505, |
| "grad_norm": 0.095925472676754, |
| "learning_rate": 9.710256448750126e-05, |
| "loss": 0.0073, |
| "step": 3070 |
| }, |
| { |
| "epoch": 15.555555555555555, |
| "grad_norm": 0.14394019544124603, |
| "learning_rate": 9.707476572868235e-05, |
| "loss": 0.0088, |
| "step": 3080 |
| }, |
| { |
| "epoch": 15.606060606060606, |
| "grad_norm": 0.08589974790811539, |
| "learning_rate": 9.704683826925149e-05, |
| "loss": 0.0098, |
| "step": 3090 |
| }, |
| { |
| "epoch": 15.656565656565657, |
| "grad_norm": 0.124774269759655, |
| "learning_rate": 9.701878218556129e-05, |
| "loss": 0.0078, |
| "step": 3100 |
| }, |
| { |
| "epoch": 15.707070707070708, |
| "grad_norm": 0.15777871012687683, |
| "learning_rate": 9.699059755431598e-05, |
| "loss": 0.0094, |
| "step": 3110 |
| }, |
| { |
| "epoch": 15.757575757575758, |
| "grad_norm": 0.11258449405431747, |
| "learning_rate": 9.696228445257132e-05, |
| "loss": 0.0118, |
| "step": 3120 |
| }, |
| { |
| "epoch": 15.808080808080808, |
| "grad_norm": 0.11212020367383957, |
| "learning_rate": 9.693384295773419e-05, |
| "loss": 0.0087, |
| "step": 3130 |
| }, |
| { |
| "epoch": 15.858585858585858, |
| "grad_norm": 0.10437760502099991, |
| "learning_rate": 9.690527314756259e-05, |
| "loss": 0.0115, |
| "step": 3140 |
| }, |
| { |
| "epoch": 15.909090909090908, |
| "grad_norm": 0.13476359844207764, |
| "learning_rate": 9.687657510016527e-05, |
| "loss": 0.0092, |
| "step": 3150 |
| }, |
| { |
| "epoch": 15.95959595959596, |
| "grad_norm": 0.12884069979190826, |
| "learning_rate": 9.684774889400161e-05, |
| "loss": 0.0093, |
| "step": 3160 |
| }, |
| { |
| "epoch": 16.01010101010101, |
| "grad_norm": 0.09555072337388992, |
| "learning_rate": 9.681879460788135e-05, |
| "loss": 0.0076, |
| "step": 3170 |
| }, |
| { |
| "epoch": 16.060606060606062, |
| "grad_norm": 0.09997926652431488, |
| "learning_rate": 9.67897123209644e-05, |
| "loss": 0.0075, |
| "step": 3180 |
| }, |
| { |
| "epoch": 16.11111111111111, |
| "grad_norm": 0.09748507291078568, |
| "learning_rate": 9.676050211276062e-05, |
| "loss": 0.0086, |
| "step": 3190 |
| }, |
| { |
| "epoch": 16.161616161616163, |
| "grad_norm": 0.10043340921401978, |
| "learning_rate": 9.673116406312962e-05, |
| "loss": 0.0083, |
| "step": 3200 |
| }, |
| { |
| "epoch": 16.21212121212121, |
| "grad_norm": 0.11663683503866196, |
| "learning_rate": 9.67016982522805e-05, |
| "loss": 0.0078, |
| "step": 3210 |
| }, |
| { |
| "epoch": 16.262626262626263, |
| "grad_norm": 0.12899702787399292, |
| "learning_rate": 9.667210476077164e-05, |
| "loss": 0.0083, |
| "step": 3220 |
| }, |
| { |
| "epoch": 16.313131313131315, |
| "grad_norm": 0.1379714012145996, |
| "learning_rate": 9.664238366951055e-05, |
| "loss": 0.0073, |
| "step": 3230 |
| }, |
| { |
| "epoch": 16.363636363636363, |
| "grad_norm": 0.10237933695316315, |
| "learning_rate": 9.661253505975355e-05, |
| "loss": 0.0087, |
| "step": 3240 |
| }, |
| { |
| "epoch": 16.414141414141415, |
| "grad_norm": 0.10615336149930954, |
| "learning_rate": 9.658255901310557e-05, |
| "loss": 0.0084, |
| "step": 3250 |
| }, |
| { |
| "epoch": 16.464646464646464, |
| "grad_norm": 0.12208615988492966, |
| "learning_rate": 9.655245561152e-05, |
| "loss": 0.0072, |
| "step": 3260 |
| }, |
| { |
| "epoch": 16.515151515151516, |
| "grad_norm": 0.10828232020139694, |
| "learning_rate": 9.65222249372984e-05, |
| "loss": 0.0086, |
| "step": 3270 |
| }, |
| { |
| "epoch": 16.565656565656564, |
| "grad_norm": 0.10949241369962692, |
| "learning_rate": 9.649186707309026e-05, |
| "loss": 0.0071, |
| "step": 3280 |
| }, |
| { |
| "epoch": 16.616161616161616, |
| "grad_norm": 0.12728309631347656, |
| "learning_rate": 9.646138210189283e-05, |
| "loss": 0.0102, |
| "step": 3290 |
| }, |
| { |
| "epoch": 16.666666666666668, |
| "grad_norm": 0.13336427509784698, |
| "learning_rate": 9.643077010705087e-05, |
| "loss": 0.0089, |
| "step": 3300 |
| }, |
| { |
| "epoch": 16.717171717171716, |
| "grad_norm": 0.1352432370185852, |
| "learning_rate": 9.640003117225637e-05, |
| "loss": 0.009, |
| "step": 3310 |
| }, |
| { |
| "epoch": 16.767676767676768, |
| "grad_norm": 0.12393588572740555, |
| "learning_rate": 9.636916538154846e-05, |
| "loss": 0.0088, |
| "step": 3320 |
| }, |
| { |
| "epoch": 16.818181818181817, |
| "grad_norm": 0.1410790979862213, |
| "learning_rate": 9.633817281931296e-05, |
| "loss": 0.0081, |
| "step": 3330 |
| }, |
| { |
| "epoch": 16.86868686868687, |
| "grad_norm": 0.10467179864645004, |
| "learning_rate": 9.630705357028242e-05, |
| "loss": 0.0113, |
| "step": 3340 |
| }, |
| { |
| "epoch": 16.91919191919192, |
| "grad_norm": 0.12543462216854095, |
| "learning_rate": 9.627580771953563e-05, |
| "loss": 0.0102, |
| "step": 3350 |
| }, |
| { |
| "epoch": 16.96969696969697, |
| "grad_norm": 0.1151360422372818, |
| "learning_rate": 9.624443535249759e-05, |
| "loss": 0.0074, |
| "step": 3360 |
| }, |
| { |
| "epoch": 17.02020202020202, |
| "grad_norm": 0.10366681963205338, |
| "learning_rate": 9.621293655493913e-05, |
| "loss": 0.0077, |
| "step": 3370 |
| }, |
| { |
| "epoch": 17.07070707070707, |
| "grad_norm": 0.10378500074148178, |
| "learning_rate": 9.618131141297675e-05, |
| "loss": 0.0077, |
| "step": 3380 |
| }, |
| { |
| "epoch": 17.12121212121212, |
| "grad_norm": 0.1064554750919342, |
| "learning_rate": 9.614956001307242e-05, |
| "loss": 0.0064, |
| "step": 3390 |
| }, |
| { |
| "epoch": 17.171717171717173, |
| "grad_norm": 0.13094130158424377, |
| "learning_rate": 9.611768244203321e-05, |
| "loss": 0.0095, |
| "step": 3400 |
| }, |
| { |
| "epoch": 17.22222222222222, |
| "grad_norm": 0.12749044597148895, |
| "learning_rate": 9.60856787870112e-05, |
| "loss": 0.0084, |
| "step": 3410 |
| }, |
| { |
| "epoch": 17.272727272727273, |
| "grad_norm": 0.09612113237380981, |
| "learning_rate": 9.605354913550318e-05, |
| "loss": 0.0083, |
| "step": 3420 |
| }, |
| { |
| "epoch": 17.32323232323232, |
| "grad_norm": 0.11700460314750671, |
| "learning_rate": 9.602129357535037e-05, |
| "loss": 0.0073, |
| "step": 3430 |
| }, |
| { |
| "epoch": 17.373737373737374, |
| "grad_norm": 0.1073329746723175, |
| "learning_rate": 9.598891219473825e-05, |
| "loss": 0.0061, |
| "step": 3440 |
| }, |
| { |
| "epoch": 17.424242424242426, |
| "grad_norm": 0.11856622993946075, |
| "learning_rate": 9.595640508219625e-05, |
| "loss": 0.0083, |
| "step": 3450 |
| }, |
| { |
| "epoch": 17.474747474747474, |
| "grad_norm": 0.15250164270401, |
| "learning_rate": 9.592377232659761e-05, |
| "loss": 0.0073, |
| "step": 3460 |
| }, |
| { |
| "epoch": 17.525252525252526, |
| "grad_norm": 0.1608418971300125, |
| "learning_rate": 9.589101401715904e-05, |
| "loss": 0.0079, |
| "step": 3470 |
| }, |
| { |
| "epoch": 17.575757575757574, |
| "grad_norm": 0.1401984691619873, |
| "learning_rate": 9.585813024344045e-05, |
| "loss": 0.011, |
| "step": 3480 |
| }, |
| { |
| "epoch": 17.626262626262626, |
| "grad_norm": 0.12536591291427612, |
| "learning_rate": 9.58251210953449e-05, |
| "loss": 0.0078, |
| "step": 3490 |
| }, |
| { |
| "epoch": 17.67676767676768, |
| "grad_norm": 0.11372566968202591, |
| "learning_rate": 9.579198666311809e-05, |
| "loss": 0.0089, |
| "step": 3500 |
| }, |
| { |
| "epoch": 17.727272727272727, |
| "grad_norm": 0.1742161214351654, |
| "learning_rate": 9.575872703734832e-05, |
| "loss": 0.013, |
| "step": 3510 |
| }, |
| { |
| "epoch": 17.77777777777778, |
| "grad_norm": 0.12576960027217865, |
| "learning_rate": 9.572534230896611e-05, |
| "loss": 0.0082, |
| "step": 3520 |
| }, |
| { |
| "epoch": 17.828282828282827, |
| "grad_norm": 0.1194293200969696, |
| "learning_rate": 9.569183256924403e-05, |
| "loss": 0.0081, |
| "step": 3530 |
| }, |
| { |
| "epoch": 17.87878787878788, |
| "grad_norm": 0.13364839553833008, |
| "learning_rate": 9.565819790979646e-05, |
| "loss": 0.0094, |
| "step": 3540 |
| }, |
| { |
| "epoch": 17.92929292929293, |
| "grad_norm": 0.13368232548236847, |
| "learning_rate": 9.562443842257925e-05, |
| "loss": 0.0077, |
| "step": 3550 |
| }, |
| { |
| "epoch": 17.97979797979798, |
| "grad_norm": 0.12154847383499146, |
| "learning_rate": 9.559055419988956e-05, |
| "loss": 0.0083, |
| "step": 3560 |
| }, |
| { |
| "epoch": 18.03030303030303, |
| "grad_norm": 0.16522938013076782, |
| "learning_rate": 9.555654533436557e-05, |
| "loss": 0.0073, |
| "step": 3570 |
| }, |
| { |
| "epoch": 18.08080808080808, |
| "grad_norm": 0.1213236078619957, |
| "learning_rate": 9.552241191898621e-05, |
| "loss": 0.0079, |
| "step": 3580 |
| }, |
| { |
| "epoch": 18.13131313131313, |
| "grad_norm": 0.13810697197914124, |
| "learning_rate": 9.548815404707092e-05, |
| "loss": 0.0081, |
| "step": 3590 |
| }, |
| { |
| "epoch": 18.181818181818183, |
| "grad_norm": 0.1554109752178192, |
| "learning_rate": 9.545377181227942e-05, |
| "loss": 0.0094, |
| "step": 3600 |
| }, |
| { |
| "epoch": 18.232323232323232, |
| "grad_norm": 0.16937795281410217, |
| "learning_rate": 9.541926530861145e-05, |
| "loss": 0.007, |
| "step": 3610 |
| }, |
| { |
| "epoch": 18.282828282828284, |
| "grad_norm": 0.12501734495162964, |
| "learning_rate": 9.538463463040645e-05, |
| "loss": 0.0073, |
| "step": 3620 |
| }, |
| { |
| "epoch": 18.333333333333332, |
| "grad_norm": 0.12023884803056717, |
| "learning_rate": 9.534987987234337e-05, |
| "loss": 0.0089, |
| "step": 3630 |
| }, |
| { |
| "epoch": 18.383838383838384, |
| "grad_norm": 0.09741893410682678, |
| "learning_rate": 9.53150011294404e-05, |
| "loss": 0.0065, |
| "step": 3640 |
| }, |
| { |
| "epoch": 18.434343434343436, |
| "grad_norm": 0.09889008104801178, |
| "learning_rate": 9.527999849705471e-05, |
| "loss": 0.0075, |
| "step": 3650 |
| }, |
| { |
| "epoch": 18.484848484848484, |
| "grad_norm": 0.11164385080337524, |
| "learning_rate": 9.524487207088213e-05, |
| "loss": 0.008, |
| "step": 3660 |
| }, |
| { |
| "epoch": 18.535353535353536, |
| "grad_norm": 0.1567450314760208, |
| "learning_rate": 9.520962194695698e-05, |
| "loss": 0.0117, |
| "step": 3670 |
| }, |
| { |
| "epoch": 18.585858585858585, |
| "grad_norm": 0.11846747994422913, |
| "learning_rate": 9.517424822165175e-05, |
| "loss": 0.008, |
| "step": 3680 |
| }, |
| { |
| "epoch": 18.636363636363637, |
| "grad_norm": 0.15824367105960846, |
| "learning_rate": 9.513875099167685e-05, |
| "loss": 0.0106, |
| "step": 3690 |
| }, |
| { |
| "epoch": 18.686868686868685, |
| "grad_norm": 0.12076467275619507, |
| "learning_rate": 9.510313035408035e-05, |
| "loss": 0.0067, |
| "step": 3700 |
| }, |
| { |
| "epoch": 18.737373737373737, |
| "grad_norm": 0.08991780132055283, |
| "learning_rate": 9.506738640624775e-05, |
| "loss": 0.009, |
| "step": 3710 |
| }, |
| { |
| "epoch": 18.78787878787879, |
| "grad_norm": 0.10382210463285446, |
| "learning_rate": 9.50315192459016e-05, |
| "loss": 0.0086, |
| "step": 3720 |
| }, |
| { |
| "epoch": 18.838383838383837, |
| "grad_norm": 0.12380581349134445, |
| "learning_rate": 9.499552897110136e-05, |
| "loss": 0.0083, |
| "step": 3730 |
| }, |
| { |
| "epoch": 18.88888888888889, |
| "grad_norm": 0.10122298449277878, |
| "learning_rate": 9.495941568024304e-05, |
| "loss": 0.0077, |
| "step": 3740 |
| }, |
| { |
| "epoch": 18.939393939393938, |
| "grad_norm": 0.10849478095769882, |
| "learning_rate": 9.492317947205904e-05, |
| "loss": 0.0067, |
| "step": 3750 |
| }, |
| { |
| "epoch": 18.98989898989899, |
| "grad_norm": 0.1261819452047348, |
| "learning_rate": 9.488682044561775e-05, |
| "loss": 0.0112, |
| "step": 3760 |
| }, |
| { |
| "epoch": 19.04040404040404, |
| "grad_norm": 0.11821562796831131, |
| "learning_rate": 9.485033870032335e-05, |
| "loss": 0.0083, |
| "step": 3770 |
| }, |
| { |
| "epoch": 19.09090909090909, |
| "grad_norm": 0.1322745382785797, |
| "learning_rate": 9.481373433591556e-05, |
| "loss": 0.0066, |
| "step": 3780 |
| }, |
| { |
| "epoch": 19.141414141414142, |
| "grad_norm": 0.11863276362419128, |
| "learning_rate": 9.47770074524693e-05, |
| "loss": 0.0079, |
| "step": 3790 |
| }, |
| { |
| "epoch": 19.19191919191919, |
| "grad_norm": 0.1268235296010971, |
| "learning_rate": 9.474015815039446e-05, |
| "loss": 0.0077, |
| "step": 3800 |
| }, |
| { |
| "epoch": 19.242424242424242, |
| "grad_norm": 0.12315259128808975, |
| "learning_rate": 9.470318653043565e-05, |
| "loss": 0.0107, |
| "step": 3810 |
| }, |
| { |
| "epoch": 19.292929292929294, |
| "grad_norm": 0.11973121762275696, |
| "learning_rate": 9.466609269367185e-05, |
| "loss": 0.0079, |
| "step": 3820 |
| }, |
| { |
| "epoch": 19.343434343434343, |
| "grad_norm": 0.09223002940416336, |
| "learning_rate": 9.46288767415162e-05, |
| "loss": 0.0079, |
| "step": 3830 |
| }, |
| { |
| "epoch": 19.393939393939394, |
| "grad_norm": 0.12107054144144058, |
| "learning_rate": 9.459153877571567e-05, |
| "loss": 0.0095, |
| "step": 3840 |
| }, |
| { |
| "epoch": 19.444444444444443, |
| "grad_norm": 0.12477860599756241, |
| "learning_rate": 9.455407889835087e-05, |
| "loss": 0.0077, |
| "step": 3850 |
| }, |
| { |
| "epoch": 19.494949494949495, |
| "grad_norm": 0.11893380433320999, |
| "learning_rate": 9.451649721183564e-05, |
| "loss": 0.0067, |
| "step": 3860 |
| }, |
| { |
| "epoch": 19.545454545454547, |
| "grad_norm": 0.08777452260255814, |
| "learning_rate": 9.447879381891692e-05, |
| "loss": 0.0072, |
| "step": 3870 |
| }, |
| { |
| "epoch": 19.595959595959595, |
| "grad_norm": 0.1062486320734024, |
| "learning_rate": 9.444096882267428e-05, |
| "loss": 0.007, |
| "step": 3880 |
| }, |
| { |
| "epoch": 19.646464646464647, |
| "grad_norm": 0.11268304288387299, |
| "learning_rate": 9.440302232651988e-05, |
| "loss": 0.0076, |
| "step": 3890 |
| }, |
| { |
| "epoch": 19.696969696969695, |
| "grad_norm": 0.1250857412815094, |
| "learning_rate": 9.436495443419795e-05, |
| "loss": 0.009, |
| "step": 3900 |
| }, |
| { |
| "epoch": 19.747474747474747, |
| "grad_norm": 0.10293550789356232, |
| "learning_rate": 9.432676524978466e-05, |
| "loss": 0.0085, |
| "step": 3910 |
| }, |
| { |
| "epoch": 19.7979797979798, |
| "grad_norm": 0.10072023421525955, |
| "learning_rate": 9.42884548776878e-05, |
| "loss": 0.0075, |
| "step": 3920 |
| }, |
| { |
| "epoch": 19.848484848484848, |
| "grad_norm": 0.10994452238082886, |
| "learning_rate": 9.425002342264646e-05, |
| "loss": 0.0075, |
| "step": 3930 |
| }, |
| { |
| "epoch": 19.8989898989899, |
| "grad_norm": 0.11905818432569504, |
| "learning_rate": 9.421147098973077e-05, |
| "loss": 0.0087, |
| "step": 3940 |
| }, |
| { |
| "epoch": 19.949494949494948, |
| "grad_norm": 0.11534831672906876, |
| "learning_rate": 9.41727976843416e-05, |
| "loss": 0.0069, |
| "step": 3950 |
| }, |
| { |
| "epoch": 20.0, |
| "grad_norm": 0.08924000710248947, |
| "learning_rate": 9.413400361221029e-05, |
| "loss": 0.0075, |
| "step": 3960 |
| }, |
| { |
| "epoch": 20.050505050505052, |
| "grad_norm": 0.09442752599716187, |
| "learning_rate": 9.409508887939835e-05, |
| "loss": 0.0092, |
| "step": 3970 |
| }, |
| { |
| "epoch": 20.1010101010101, |
| "grad_norm": 0.12191225588321686, |
| "learning_rate": 9.40560535922972e-05, |
| "loss": 0.0085, |
| "step": 3980 |
| }, |
| { |
| "epoch": 20.151515151515152, |
| "grad_norm": 0.10384054481983185, |
| "learning_rate": 9.40168978576278e-05, |
| "loss": 0.0075, |
| "step": 3990 |
| }, |
| { |
| "epoch": 20.2020202020202, |
| "grad_norm": 0.11969480663537979, |
| "learning_rate": 9.397762178244043e-05, |
| "loss": 0.0082, |
| "step": 4000 |
| }, |
| { |
| "epoch": 20.252525252525253, |
| "grad_norm": 0.10135440528392792, |
| "learning_rate": 9.393822547411439e-05, |
| "loss": 0.0078, |
| "step": 4010 |
| }, |
| { |
| "epoch": 20.303030303030305, |
| "grad_norm": 0.11920253187417984, |
| "learning_rate": 9.389870904035769e-05, |
| "loss": 0.0076, |
| "step": 4020 |
| }, |
| { |
| "epoch": 20.353535353535353, |
| "grad_norm": 0.11987727135419846, |
| "learning_rate": 9.385907258920672e-05, |
| "loss": 0.006, |
| "step": 4030 |
| }, |
| { |
| "epoch": 20.404040404040405, |
| "grad_norm": 0.1490645408630371, |
| "learning_rate": 9.381931622902607e-05, |
| "loss": 0.0086, |
| "step": 4040 |
| }, |
| { |
| "epoch": 20.454545454545453, |
| "grad_norm": 0.14731353521347046, |
| "learning_rate": 9.377944006850807e-05, |
| "loss": 0.009, |
| "step": 4050 |
| }, |
| { |
| "epoch": 20.505050505050505, |
| "grad_norm": 0.09531271457672119, |
| "learning_rate": 9.373944421667265e-05, |
| "loss": 0.0075, |
| "step": 4060 |
| }, |
| { |
| "epoch": 20.555555555555557, |
| "grad_norm": 0.11966171860694885, |
| "learning_rate": 9.369932878286691e-05, |
| "loss": 0.0074, |
| "step": 4070 |
| }, |
| { |
| "epoch": 20.606060606060606, |
| "grad_norm": 0.11719571799039841, |
| "learning_rate": 9.365909387676494e-05, |
| "loss": 0.0079, |
| "step": 4080 |
| }, |
| { |
| "epoch": 20.656565656565657, |
| "grad_norm": 0.10984815657138824, |
| "learning_rate": 9.361873960836744e-05, |
| "loss": 0.0078, |
| "step": 4090 |
| }, |
| { |
| "epoch": 20.707070707070706, |
| "grad_norm": 0.09501279890537262, |
| "learning_rate": 9.357826608800142e-05, |
| "loss": 0.0074, |
| "step": 4100 |
| }, |
| { |
| "epoch": 20.757575757575758, |
| "grad_norm": 0.09812503308057785, |
| "learning_rate": 9.353767342631994e-05, |
| "loss": 0.0065, |
| "step": 4110 |
| }, |
| { |
| "epoch": 20.80808080808081, |
| "grad_norm": 0.09090051054954529, |
| "learning_rate": 9.34969617343018e-05, |
| "loss": 0.0085, |
| "step": 4120 |
| }, |
| { |
| "epoch": 20.858585858585858, |
| "grad_norm": 0.11152360588312149, |
| "learning_rate": 9.345613112325122e-05, |
| "loss": 0.0076, |
| "step": 4130 |
| }, |
| { |
| "epoch": 20.90909090909091, |
| "grad_norm": 0.10534431785345078, |
| "learning_rate": 9.34151817047975e-05, |
| "loss": 0.0069, |
| "step": 4140 |
| }, |
| { |
| "epoch": 20.95959595959596, |
| "grad_norm": 0.12160573154687881, |
| "learning_rate": 9.33741135908948e-05, |
| "loss": 0.0067, |
| "step": 4150 |
| }, |
| { |
| "epoch": 21.01010101010101, |
| "grad_norm": 0.09798427671194077, |
| "learning_rate": 9.33329268938218e-05, |
| "loss": 0.0074, |
| "step": 4160 |
| }, |
| { |
| "epoch": 21.060606060606062, |
| "grad_norm": 0.11137689650058746, |
| "learning_rate": 9.329162172618132e-05, |
| "loss": 0.009, |
| "step": 4170 |
| }, |
| { |
| "epoch": 21.11111111111111, |
| "grad_norm": 0.12333360314369202, |
| "learning_rate": 9.325019820090013e-05, |
| "loss": 0.01, |
| "step": 4180 |
| }, |
| { |
| "epoch": 21.161616161616163, |
| "grad_norm": 0.09919871389865875, |
| "learning_rate": 9.320865643122855e-05, |
| "loss": 0.0072, |
| "step": 4190 |
| }, |
| { |
| "epoch": 21.21212121212121, |
| "grad_norm": 0.0981341078877449, |
| "learning_rate": 9.316699653074023e-05, |
| "loss": 0.0071, |
| "step": 4200 |
| }, |
| { |
| "epoch": 21.262626262626263, |
| "grad_norm": 0.12079688906669617, |
| "learning_rate": 9.312521861333172e-05, |
| "loss": 0.0071, |
| "step": 4210 |
| }, |
| { |
| "epoch": 21.313131313131315, |
| "grad_norm": 0.10652966797351837, |
| "learning_rate": 9.308332279322224e-05, |
| "loss": 0.0078, |
| "step": 4220 |
| }, |
| { |
| "epoch": 21.363636363636363, |
| "grad_norm": 0.09568671882152557, |
| "learning_rate": 9.304130918495338e-05, |
| "loss": 0.0077, |
| "step": 4230 |
| }, |
| { |
| "epoch": 21.414141414141415, |
| "grad_norm": 0.12118476629257202, |
| "learning_rate": 9.299917790338874e-05, |
| "loss": 0.0099, |
| "step": 4240 |
| }, |
| { |
| "epoch": 21.464646464646464, |
| "grad_norm": 0.1092967540025711, |
| "learning_rate": 9.295692906371363e-05, |
| "loss": 0.0108, |
| "step": 4250 |
| }, |
| { |
| "epoch": 21.515151515151516, |
| "grad_norm": 0.15497858822345734, |
| "learning_rate": 9.291456278143476e-05, |
| "loss": 0.0114, |
| "step": 4260 |
| }, |
| { |
| "epoch": 21.565656565656564, |
| "grad_norm": 0.11261726915836334, |
| "learning_rate": 9.287207917237994e-05, |
| "loss": 0.0092, |
| "step": 4270 |
| }, |
| { |
| "epoch": 21.616161616161616, |
| "grad_norm": 0.1242527961730957, |
| "learning_rate": 9.282947835269773e-05, |
| "loss": 0.0091, |
| "step": 4280 |
| }, |
| { |
| "epoch": 21.666666666666668, |
| "grad_norm": 0.106032595038414, |
| "learning_rate": 9.278676043885715e-05, |
| "loss": 0.0066, |
| "step": 4290 |
| }, |
| { |
| "epoch": 21.717171717171716, |
| "grad_norm": 0.11986482888460159, |
| "learning_rate": 9.274392554764733e-05, |
| "loss": 0.0076, |
| "step": 4300 |
| }, |
| { |
| "epoch": 21.767676767676768, |
| "grad_norm": 0.08947091549634933, |
| "learning_rate": 9.270097379617723e-05, |
| "loss": 0.0064, |
| "step": 4310 |
| }, |
| { |
| "epoch": 21.818181818181817, |
| "grad_norm": 0.11636929214000702, |
| "learning_rate": 9.26579053018753e-05, |
| "loss": 0.0062, |
| "step": 4320 |
| }, |
| { |
| "epoch": 21.86868686868687, |
| "grad_norm": 0.0967460572719574, |
| "learning_rate": 9.261472018248918e-05, |
| "loss": 0.0076, |
| "step": 4330 |
| }, |
| { |
| "epoch": 21.91919191919192, |
| "grad_norm": 0.08452008664608002, |
| "learning_rate": 9.25714185560853e-05, |
| "loss": 0.0065, |
| "step": 4340 |
| }, |
| { |
| "epoch": 21.96969696969697, |
| "grad_norm": 0.1070544421672821, |
| "learning_rate": 9.252800054104868e-05, |
| "loss": 0.0076, |
| "step": 4350 |
| }, |
| { |
| "epoch": 22.02020202020202, |
| "grad_norm": 0.11194749921560287, |
| "learning_rate": 9.248446625608252e-05, |
| "loss": 0.0072, |
| "step": 4360 |
| }, |
| { |
| "epoch": 22.07070707070707, |
| "grad_norm": 0.11036941409111023, |
| "learning_rate": 9.244081582020789e-05, |
| "loss": 0.0078, |
| "step": 4370 |
| }, |
| { |
| "epoch": 22.12121212121212, |
| "grad_norm": 0.06584583222866058, |
| "learning_rate": 9.239704935276339e-05, |
| "loss": 0.0075, |
| "step": 4380 |
| }, |
| { |
| "epoch": 22.171717171717173, |
| "grad_norm": 0.10412444919347763, |
| "learning_rate": 9.235316697340489e-05, |
| "loss": 0.0076, |
| "step": 4390 |
| }, |
| { |
| "epoch": 22.22222222222222, |
| "grad_norm": 0.1213592141866684, |
| "learning_rate": 9.230916880210512e-05, |
| "loss": 0.0058, |
| "step": 4400 |
| }, |
| { |
| "epoch": 22.272727272727273, |
| "grad_norm": 0.10108072310686111, |
| "learning_rate": 9.226505495915342e-05, |
| "loss": 0.0082, |
| "step": 4410 |
| }, |
| { |
| "epoch": 22.32323232323232, |
| "grad_norm": 0.09545589983463287, |
| "learning_rate": 9.222082556515536e-05, |
| "loss": 0.0089, |
| "step": 4420 |
| }, |
| { |
| "epoch": 22.373737373737374, |
| "grad_norm": 0.11577674001455307, |
| "learning_rate": 9.217648074103242e-05, |
| "loss": 0.0084, |
| "step": 4430 |
| }, |
| { |
| "epoch": 22.424242424242426, |
| "grad_norm": 0.10396119952201843, |
| "learning_rate": 9.213202060802161e-05, |
| "loss": 0.0074, |
| "step": 4440 |
| }, |
| { |
| "epoch": 22.474747474747474, |
| "grad_norm": 0.1027739942073822, |
| "learning_rate": 9.208744528767528e-05, |
| "loss": 0.0078, |
| "step": 4450 |
| }, |
| { |
| "epoch": 22.525252525252526, |
| "grad_norm": 0.13134846091270447, |
| "learning_rate": 9.204275490186064e-05, |
| "loss": 0.0065, |
| "step": 4460 |
| }, |
| { |
| "epoch": 22.575757575757574, |
| "grad_norm": 0.09173861145973206, |
| "learning_rate": 9.199794957275949e-05, |
| "loss": 0.007, |
| "step": 4470 |
| }, |
| { |
| "epoch": 22.626262626262626, |
| "grad_norm": 0.11349400877952576, |
| "learning_rate": 9.19530294228679e-05, |
| "loss": 0.0075, |
| "step": 4480 |
| }, |
| { |
| "epoch": 22.67676767676768, |
| "grad_norm": 0.13349007070064545, |
| "learning_rate": 9.190799457499583e-05, |
| "loss": 0.0072, |
| "step": 4490 |
| }, |
| { |
| "epoch": 22.727272727272727, |
| "grad_norm": 0.11512991786003113, |
| "learning_rate": 9.186284515226686e-05, |
| "loss": 0.0064, |
| "step": 4500 |
| }, |
| { |
| "epoch": 22.77777777777778, |
| "grad_norm": 0.08801310509443283, |
| "learning_rate": 9.181758127811777e-05, |
| "loss": 0.0063, |
| "step": 4510 |
| }, |
| { |
| "epoch": 22.828282828282827, |
| "grad_norm": 0.09956815093755722, |
| "learning_rate": 9.177220307629825e-05, |
| "loss": 0.006, |
| "step": 4520 |
| }, |
| { |
| "epoch": 22.87878787878788, |
| "grad_norm": 0.07794970273971558, |
| "learning_rate": 9.172671067087059e-05, |
| "loss": 0.0099, |
| "step": 4530 |
| }, |
| { |
| "epoch": 22.92929292929293, |
| "grad_norm": 0.10630300641059875, |
| "learning_rate": 9.16811041862093e-05, |
| "loss": 0.0074, |
| "step": 4540 |
| }, |
| { |
| "epoch": 22.97979797979798, |
| "grad_norm": 0.08774016797542572, |
| "learning_rate": 9.163538374700076e-05, |
| "loss": 0.0056, |
| "step": 4550 |
| }, |
| { |
| "epoch": 23.03030303030303, |
| "grad_norm": 0.10545973479747772, |
| "learning_rate": 9.158954947824287e-05, |
| "loss": 0.0066, |
| "step": 4560 |
| }, |
| { |
| "epoch": 23.08080808080808, |
| "grad_norm": 0.13509832322597504, |
| "learning_rate": 9.154360150524482e-05, |
| "loss": 0.0076, |
| "step": 4570 |
| }, |
| { |
| "epoch": 23.13131313131313, |
| "grad_norm": 0.1109355166554451, |
| "learning_rate": 9.14975399536266e-05, |
| "loss": 0.0062, |
| "step": 4580 |
| }, |
| { |
| "epoch": 23.181818181818183, |
| "grad_norm": 0.10734201222658157, |
| "learning_rate": 9.14513649493187e-05, |
| "loss": 0.0076, |
| "step": 4590 |
| }, |
| { |
| "epoch": 23.232323232323232, |
| "grad_norm": 0.11387687921524048, |
| "learning_rate": 9.140507661856187e-05, |
| "loss": 0.0083, |
| "step": 4600 |
| }, |
| { |
| "epoch": 23.282828282828284, |
| "grad_norm": 0.10864299535751343, |
| "learning_rate": 9.135867508790661e-05, |
| "loss": 0.007, |
| "step": 4610 |
| }, |
| { |
| "epoch": 23.333333333333332, |
| "grad_norm": 0.0891706645488739, |
| "learning_rate": 9.131216048421291e-05, |
| "loss": 0.0079, |
| "step": 4620 |
| }, |
| { |
| "epoch": 23.383838383838384, |
| "grad_norm": 0.0904461070895195, |
| "learning_rate": 9.126553293464998e-05, |
| "loss": 0.0079, |
| "step": 4630 |
| }, |
| { |
| "epoch": 23.434343434343436, |
| "grad_norm": 0.10799886286258698, |
| "learning_rate": 9.121879256669572e-05, |
| "loss": 0.0072, |
| "step": 4640 |
| }, |
| { |
| "epoch": 23.484848484848484, |
| "grad_norm": 0.10787851363420486, |
| "learning_rate": 9.117193950813652e-05, |
| "loss": 0.0059, |
| "step": 4650 |
| }, |
| { |
| "epoch": 23.535353535353536, |
| "grad_norm": 0.09552913904190063, |
| "learning_rate": 9.112497388706685e-05, |
| "loss": 0.0074, |
| "step": 4660 |
| }, |
| { |
| "epoch": 23.585858585858585, |
| "grad_norm": 0.11768249422311783, |
| "learning_rate": 9.10778958318889e-05, |
| "loss": 0.008, |
| "step": 4670 |
| }, |
| { |
| "epoch": 23.636363636363637, |
| "grad_norm": 0.0975164920091629, |
| "learning_rate": 9.103070547131232e-05, |
| "loss": 0.0078, |
| "step": 4680 |
| }, |
| { |
| "epoch": 23.686868686868685, |
| "grad_norm": 0.12091819941997528, |
| "learning_rate": 9.098340293435375e-05, |
| "loss": 0.0078, |
| "step": 4690 |
| }, |
| { |
| "epoch": 23.737373737373737, |
| "grad_norm": 0.11832273006439209, |
| "learning_rate": 9.093598835033649e-05, |
| "loss": 0.0086, |
| "step": 4700 |
| }, |
| { |
| "epoch": 23.78787878787879, |
| "grad_norm": 0.14794909954071045, |
| "learning_rate": 9.088846184889021e-05, |
| "loss": 0.008, |
| "step": 4710 |
| }, |
| { |
| "epoch": 23.838383838383837, |
| "grad_norm": 0.12697231769561768, |
| "learning_rate": 9.084082355995057e-05, |
| "loss": 0.0068, |
| "step": 4720 |
| }, |
| { |
| "epoch": 23.88888888888889, |
| "grad_norm": 0.11589431017637253, |
| "learning_rate": 9.079307361375882e-05, |
| "loss": 0.0069, |
| "step": 4730 |
| }, |
| { |
| "epoch": 23.939393939393938, |
| "grad_norm": 0.10414907336235046, |
| "learning_rate": 9.074521214086149e-05, |
| "loss": 0.007, |
| "step": 4740 |
| }, |
| { |
| "epoch": 23.98989898989899, |
| "grad_norm": 0.08066228777170181, |
| "learning_rate": 9.069723927211001e-05, |
| "loss": 0.0074, |
| "step": 4750 |
| }, |
| { |
| "epoch": 24.04040404040404, |
| "grad_norm": 0.07949458062648773, |
| "learning_rate": 9.064915513866037e-05, |
| "loss": 0.0063, |
| "step": 4760 |
| }, |
| { |
| "epoch": 24.09090909090909, |
| "grad_norm": 0.12256435304880142, |
| "learning_rate": 9.060095987197279e-05, |
| "loss": 0.0069, |
| "step": 4770 |
| }, |
| { |
| "epoch": 24.141414141414142, |
| "grad_norm": 0.09446379542350769, |
| "learning_rate": 9.055265360381126e-05, |
| "loss": 0.0076, |
| "step": 4780 |
| }, |
| { |
| "epoch": 24.19191919191919, |
| "grad_norm": 0.08074428141117096, |
| "learning_rate": 9.050423646624326e-05, |
| "loss": 0.0076, |
| "step": 4790 |
| }, |
| { |
| "epoch": 24.242424242424242, |
| "grad_norm": 0.10249504446983337, |
| "learning_rate": 9.045570859163943e-05, |
| "loss": 0.0086, |
| "step": 4800 |
| }, |
| { |
| "epoch": 24.292929292929294, |
| "grad_norm": 0.12497933208942413, |
| "learning_rate": 9.04070701126731e-05, |
| "loss": 0.0058, |
| "step": 4810 |
| }, |
| { |
| "epoch": 24.343434343434343, |
| "grad_norm": 0.1036774143576622, |
| "learning_rate": 9.035832116232001e-05, |
| "loss": 0.0064, |
| "step": 4820 |
| }, |
| { |
| "epoch": 24.393939393939394, |
| "grad_norm": 0.11740745604038239, |
| "learning_rate": 9.030946187385796e-05, |
| "loss": 0.0078, |
| "step": 4830 |
| }, |
| { |
| "epoch": 24.444444444444443, |
| "grad_norm": 0.08568645268678665, |
| "learning_rate": 9.026049238086635e-05, |
| "loss": 0.0072, |
| "step": 4840 |
| }, |
| { |
| "epoch": 24.494949494949495, |
| "grad_norm": 0.09723944216966629, |
| "learning_rate": 9.021141281722591e-05, |
| "loss": 0.006, |
| "step": 4850 |
| }, |
| { |
| "epoch": 24.545454545454547, |
| "grad_norm": 0.10702981054782867, |
| "learning_rate": 9.01622233171183e-05, |
| "loss": 0.0088, |
| "step": 4860 |
| }, |
| { |
| "epoch": 24.595959595959595, |
| "grad_norm": 0.0748906210064888, |
| "learning_rate": 9.011292401502574e-05, |
| "loss": 0.0055, |
| "step": 4870 |
| }, |
| { |
| "epoch": 24.646464646464647, |
| "grad_norm": 0.10826072096824646, |
| "learning_rate": 9.006351504573063e-05, |
| "loss": 0.0086, |
| "step": 4880 |
| }, |
| { |
| "epoch": 24.696969696969695, |
| "grad_norm": 0.12552088499069214, |
| "learning_rate": 9.001399654431519e-05, |
| "loss": 0.0074, |
| "step": 4890 |
| }, |
| { |
| "epoch": 24.747474747474747, |
| "grad_norm": 0.09460155665874481, |
| "learning_rate": 8.996436864616116e-05, |
| "loss": 0.0075, |
| "step": 4900 |
| }, |
| { |
| "epoch": 24.7979797979798, |
| "grad_norm": 0.09930367767810822, |
| "learning_rate": 8.991463148694925e-05, |
| "loss": 0.0078, |
| "step": 4910 |
| }, |
| { |
| "epoch": 24.848484848484848, |
| "grad_norm": 0.13195104897022247, |
| "learning_rate": 8.986478520265902e-05, |
| "loss": 0.0079, |
| "step": 4920 |
| }, |
| { |
| "epoch": 24.8989898989899, |
| "grad_norm": 0.12911023199558258, |
| "learning_rate": 8.981482992956827e-05, |
| "loss": 0.0079, |
| "step": 4930 |
| }, |
| { |
| "epoch": 24.949494949494948, |
| "grad_norm": 0.11185169219970703, |
| "learning_rate": 8.976476580425282e-05, |
| "loss": 0.0075, |
| "step": 4940 |
| }, |
| { |
| "epoch": 25.0, |
| "grad_norm": 0.11149358749389648, |
| "learning_rate": 8.971459296358606e-05, |
| "loss": 0.0062, |
| "step": 4950 |
| }, |
| { |
| "epoch": 25.050505050505052, |
| "grad_norm": 0.11152830719947815, |
| "learning_rate": 8.966431154473864e-05, |
| "loss": 0.0083, |
| "step": 4960 |
| }, |
| { |
| "epoch": 25.1010101010101, |
| "grad_norm": 0.0897165983915329, |
| "learning_rate": 8.961392168517803e-05, |
| "loss": 0.0076, |
| "step": 4970 |
| }, |
| { |
| "epoch": 25.151515151515152, |
| "grad_norm": 0.10588689893484116, |
| "learning_rate": 8.956342352266821e-05, |
| "loss": 0.0077, |
| "step": 4980 |
| }, |
| { |
| "epoch": 25.2020202020202, |
| "grad_norm": 0.09523741900920868, |
| "learning_rate": 8.95128171952692e-05, |
| "loss": 0.0061, |
| "step": 4990 |
| }, |
| { |
| "epoch": 25.252525252525253, |
| "grad_norm": 0.12982645630836487, |
| "learning_rate": 8.946210284133676e-05, |
| "loss": 0.0101, |
| "step": 5000 |
| }, |
| { |
| "epoch": 25.303030303030305, |
| "grad_norm": 0.13828161358833313, |
| "learning_rate": 8.941128059952201e-05, |
| "loss": 0.0082, |
| "step": 5010 |
| }, |
| { |
| "epoch": 25.353535353535353, |
| "grad_norm": 0.11768993735313416, |
| "learning_rate": 8.936035060877102e-05, |
| "loss": 0.0064, |
| "step": 5020 |
| }, |
| { |
| "epoch": 25.404040404040405, |
| "grad_norm": 0.10774098336696625, |
| "learning_rate": 8.930931300832443e-05, |
| "loss": 0.0075, |
| "step": 5030 |
| }, |
| { |
| "epoch": 25.454545454545453, |
| "grad_norm": 0.10704880207777023, |
| "learning_rate": 8.925816793771711e-05, |
| "loss": 0.0084, |
| "step": 5040 |
| }, |
| { |
| "epoch": 25.505050505050505, |
| "grad_norm": 0.11722766607999802, |
| "learning_rate": 8.92069155367777e-05, |
| "loss": 0.0083, |
| "step": 5050 |
| }, |
| { |
| "epoch": 25.555555555555557, |
| "grad_norm": 0.1379360854625702, |
| "learning_rate": 8.915555594562834e-05, |
| "loss": 0.0075, |
| "step": 5060 |
| }, |
| { |
| "epoch": 25.606060606060606, |
| "grad_norm": 0.13628102838993073, |
| "learning_rate": 8.910408930468416e-05, |
| "loss": 0.0095, |
| "step": 5070 |
| }, |
| { |
| "epoch": 25.656565656565657, |
| "grad_norm": 0.11168596148490906, |
| "learning_rate": 8.905251575465303e-05, |
| "loss": 0.0081, |
| "step": 5080 |
| }, |
| { |
| "epoch": 25.707070707070706, |
| "grad_norm": 0.1227908506989479, |
| "learning_rate": 8.900083543653502e-05, |
| "loss": 0.0095, |
| "step": 5090 |
| }, |
| { |
| "epoch": 25.757575757575758, |
| "grad_norm": 0.0903075560927391, |
| "learning_rate": 8.894904849162218e-05, |
| "loss": 0.0081, |
| "step": 5100 |
| }, |
| { |
| "epoch": 25.80808080808081, |
| "grad_norm": 0.10377117991447449, |
| "learning_rate": 8.889715506149802e-05, |
| "loss": 0.0086, |
| "step": 5110 |
| }, |
| { |
| "epoch": 25.858585858585858, |
| "grad_norm": 0.086542509496212, |
| "learning_rate": 8.884515528803722e-05, |
| "loss": 0.0064, |
| "step": 5120 |
| }, |
| { |
| "epoch": 25.90909090909091, |
| "grad_norm": 0.09911636263132095, |
| "learning_rate": 8.879304931340517e-05, |
| "loss": 0.0092, |
| "step": 5130 |
| }, |
| { |
| "epoch": 25.95959595959596, |
| "grad_norm": 0.11916545033454895, |
| "learning_rate": 8.874083728005759e-05, |
| "loss": 0.008, |
| "step": 5140 |
| }, |
| { |
| "epoch": 26.01010101010101, |
| "grad_norm": 0.11988221108913422, |
| "learning_rate": 8.868851933074021e-05, |
| "loss": 0.0078, |
| "step": 5150 |
| }, |
| { |
| "epoch": 26.060606060606062, |
| "grad_norm": 0.11108119040727615, |
| "learning_rate": 8.863609560848829e-05, |
| "loss": 0.0072, |
| "step": 5160 |
| }, |
| { |
| "epoch": 26.11111111111111, |
| "grad_norm": 0.08851511031389236, |
| "learning_rate": 8.85835662566263e-05, |
| "loss": 0.0067, |
| "step": 5170 |
| }, |
| { |
| "epoch": 26.161616161616163, |
| "grad_norm": 0.09384623914957047, |
| "learning_rate": 8.853093141876747e-05, |
| "loss": 0.0083, |
| "step": 5180 |
| }, |
| { |
| "epoch": 26.21212121212121, |
| "grad_norm": 0.09134040772914886, |
| "learning_rate": 8.847819123881343e-05, |
| "loss": 0.0075, |
| "step": 5190 |
| }, |
| { |
| "epoch": 26.262626262626263, |
| "grad_norm": 0.10978759825229645, |
| "learning_rate": 8.842534586095383e-05, |
| "loss": 0.0067, |
| "step": 5200 |
| }, |
| { |
| "epoch": 26.313131313131315, |
| "grad_norm": 0.07117953151464462, |
| "learning_rate": 8.837239542966593e-05, |
| "loss": 0.006, |
| "step": 5210 |
| }, |
| { |
| "epoch": 26.363636363636363, |
| "grad_norm": 0.1428268849849701, |
| "learning_rate": 8.831934008971417e-05, |
| "loss": 0.0079, |
| "step": 5220 |
| }, |
| { |
| "epoch": 26.414141414141415, |
| "grad_norm": 0.10671624541282654, |
| "learning_rate": 8.826617998614982e-05, |
| "loss": 0.0057, |
| "step": 5230 |
| }, |
| { |
| "epoch": 26.464646464646464, |
| "grad_norm": 0.11384488642215729, |
| "learning_rate": 8.821291526431056e-05, |
| "loss": 0.0089, |
| "step": 5240 |
| }, |
| { |
| "epoch": 26.515151515151516, |
| "grad_norm": 0.1087106391787529, |
| "learning_rate": 8.815954606982015e-05, |
| "loss": 0.0079, |
| "step": 5250 |
| }, |
| { |
| "epoch": 26.565656565656564, |
| "grad_norm": 0.1099553182721138, |
| "learning_rate": 8.810607254858789e-05, |
| "loss": 0.0089, |
| "step": 5260 |
| }, |
| { |
| "epoch": 26.616161616161616, |
| "grad_norm": 0.10249440371990204, |
| "learning_rate": 8.805249484680838e-05, |
| "loss": 0.0077, |
| "step": 5270 |
| }, |
| { |
| "epoch": 26.666666666666668, |
| "grad_norm": 0.12097164243459702, |
| "learning_rate": 8.799881311096096e-05, |
| "loss": 0.0092, |
| "step": 5280 |
| }, |
| { |
| "epoch": 26.717171717171716, |
| "grad_norm": 0.08143244683742523, |
| "learning_rate": 8.794502748780949e-05, |
| "loss": 0.0096, |
| "step": 5290 |
| }, |
| { |
| "epoch": 26.767676767676768, |
| "grad_norm": 0.10786762833595276, |
| "learning_rate": 8.78911381244018e-05, |
| "loss": 0.0074, |
| "step": 5300 |
| }, |
| { |
| "epoch": 26.818181818181817, |
| "grad_norm": 0.12914958596229553, |
| "learning_rate": 8.783714516806933e-05, |
| "loss": 0.0091, |
| "step": 5310 |
| }, |
| { |
| "epoch": 26.86868686868687, |
| "grad_norm": 0.11626844853162766, |
| "learning_rate": 8.77830487664268e-05, |
| "loss": 0.0065, |
| "step": 5320 |
| }, |
| { |
| "epoch": 26.91919191919192, |
| "grad_norm": 0.11947870999574661, |
| "learning_rate": 8.772884906737167e-05, |
| "loss": 0.0093, |
| "step": 5330 |
| }, |
| { |
| "epoch": 26.96969696969697, |
| "grad_norm": 0.08934204280376434, |
| "learning_rate": 8.767454621908387e-05, |
| "loss": 0.0081, |
| "step": 5340 |
| }, |
| { |
| "epoch": 27.02020202020202, |
| "grad_norm": 0.11970902234315872, |
| "learning_rate": 8.76201403700253e-05, |
| "loss": 0.0073, |
| "step": 5350 |
| }, |
| { |
| "epoch": 27.07070707070707, |
| "grad_norm": 0.13303697109222412, |
| "learning_rate": 8.756563166893949e-05, |
| "loss": 0.0084, |
| "step": 5360 |
| }, |
| { |
| "epoch": 27.12121212121212, |
| "grad_norm": 0.12613415718078613, |
| "learning_rate": 8.751102026485113e-05, |
| "loss": 0.0086, |
| "step": 5370 |
| }, |
| { |
| "epoch": 27.171717171717173, |
| "grad_norm": 0.11872614175081253, |
| "learning_rate": 8.745630630706571e-05, |
| "loss": 0.0068, |
| "step": 5380 |
| }, |
| { |
| "epoch": 27.22222222222222, |
| "grad_norm": 0.14131346344947815, |
| "learning_rate": 8.740148994516912e-05, |
| "loss": 0.0076, |
| "step": 5390 |
| }, |
| { |
| "epoch": 27.272727272727273, |
| "grad_norm": 0.10952199995517731, |
| "learning_rate": 8.73465713290272e-05, |
| "loss": 0.0062, |
| "step": 5400 |
| }, |
| { |
| "epoch": 27.32323232323232, |
| "grad_norm": 0.12086033821105957, |
| "learning_rate": 8.729155060878533e-05, |
| "loss": 0.0065, |
| "step": 5410 |
| }, |
| { |
| "epoch": 27.373737373737374, |
| "grad_norm": 0.08737418055534363, |
| "learning_rate": 8.723642793486809e-05, |
| "loss": 0.0058, |
| "step": 5420 |
| }, |
| { |
| "epoch": 27.424242424242426, |
| "grad_norm": 0.09776380658149719, |
| "learning_rate": 8.718120345797873e-05, |
| "loss": 0.0079, |
| "step": 5430 |
| }, |
| { |
| "epoch": 27.474747474747474, |
| "grad_norm": 0.09645076841115952, |
| "learning_rate": 8.712587732909889e-05, |
| "loss": 0.0068, |
| "step": 5440 |
| }, |
| { |
| "epoch": 27.525252525252526, |
| "grad_norm": 0.11092785000801086, |
| "learning_rate": 8.707044969948806e-05, |
| "loss": 0.0074, |
| "step": 5450 |
| }, |
| { |
| "epoch": 27.575757575757574, |
| "grad_norm": 0.11504276096820831, |
| "learning_rate": 8.701492072068329e-05, |
| "loss": 0.0076, |
| "step": 5460 |
| }, |
| { |
| "epoch": 27.626262626262626, |
| "grad_norm": 0.11145307868719101, |
| "learning_rate": 8.695929054449869e-05, |
| "loss": 0.0057, |
| "step": 5470 |
| }, |
| { |
| "epoch": 27.67676767676768, |
| "grad_norm": 0.11437544226646423, |
| "learning_rate": 8.690355932302501e-05, |
| "loss": 0.0093, |
| "step": 5480 |
| }, |
| { |
| "epoch": 27.727272727272727, |
| "grad_norm": 0.11535003781318665, |
| "learning_rate": 8.684772720862931e-05, |
| "loss": 0.009, |
| "step": 5490 |
| }, |
| { |
| "epoch": 27.77777777777778, |
| "grad_norm": 0.132643461227417, |
| "learning_rate": 8.679179435395446e-05, |
| "loss": 0.0066, |
| "step": 5500 |
| }, |
| { |
| "epoch": 27.828282828282827, |
| "grad_norm": 0.08867459744215012, |
| "learning_rate": 8.673576091191874e-05, |
| "loss": 0.0073, |
| "step": 5510 |
| }, |
| { |
| "epoch": 27.87878787878788, |
| "grad_norm": 0.09122374653816223, |
| "learning_rate": 8.667962703571541e-05, |
| "loss": 0.0061, |
| "step": 5520 |
| }, |
| { |
| "epoch": 27.92929292929293, |
| "grad_norm": 0.07415696233510971, |
| "learning_rate": 8.662339287881238e-05, |
| "loss": 0.0054, |
| "step": 5530 |
| }, |
| { |
| "epoch": 27.97979797979798, |
| "grad_norm": 0.08912459760904312, |
| "learning_rate": 8.656705859495169e-05, |
| "loss": 0.0067, |
| "step": 5540 |
| }, |
| { |
| "epoch": 28.03030303030303, |
| "grad_norm": 0.08272477239370346, |
| "learning_rate": 8.651062433814912e-05, |
| "loss": 0.0082, |
| "step": 5550 |
| }, |
| { |
| "epoch": 28.08080808080808, |
| "grad_norm": 0.09257591515779495, |
| "learning_rate": 8.645409026269375e-05, |
| "loss": 0.006, |
| "step": 5560 |
| }, |
| { |
| "epoch": 28.13131313131313, |
| "grad_norm": 0.11933919042348862, |
| "learning_rate": 8.639745652314759e-05, |
| "loss": 0.0074, |
| "step": 5570 |
| }, |
| { |
| "epoch": 28.181818181818183, |
| "grad_norm": 0.09677407890558243, |
| "learning_rate": 8.634072327434515e-05, |
| "loss": 0.0062, |
| "step": 5580 |
| }, |
| { |
| "epoch": 28.232323232323232, |
| "grad_norm": 0.11988983303308487, |
| "learning_rate": 8.628389067139294e-05, |
| "loss": 0.0074, |
| "step": 5590 |
| }, |
| { |
| "epoch": 28.282828282828284, |
| "grad_norm": 0.1122865080833435, |
| "learning_rate": 8.622695886966911e-05, |
| "loss": 0.0068, |
| "step": 5600 |
| }, |
| { |
| "epoch": 28.333333333333332, |
| "grad_norm": 0.13117124140262604, |
| "learning_rate": 8.616992802482308e-05, |
| "loss": 0.009, |
| "step": 5610 |
| }, |
| { |
| "epoch": 28.383838383838384, |
| "grad_norm": 0.14589503407478333, |
| "learning_rate": 8.611279829277496e-05, |
| "loss": 0.0084, |
| "step": 5620 |
| }, |
| { |
| "epoch": 28.434343434343436, |
| "grad_norm": 0.10349099338054657, |
| "learning_rate": 8.605556982971528e-05, |
| "loss": 0.0062, |
| "step": 5630 |
| }, |
| { |
| "epoch": 28.484848484848484, |
| "grad_norm": 0.10457582771778107, |
| "learning_rate": 8.599824279210447e-05, |
| "loss": 0.0071, |
| "step": 5640 |
| }, |
| { |
| "epoch": 28.535353535353536, |
| "grad_norm": 0.08215578645467758, |
| "learning_rate": 8.594081733667243e-05, |
| "loss": 0.0071, |
| "step": 5650 |
| }, |
| { |
| "epoch": 28.585858585858585, |
| "grad_norm": 0.0971410796046257, |
| "learning_rate": 8.58832936204182e-05, |
| "loss": 0.0065, |
| "step": 5660 |
| }, |
| { |
| "epoch": 28.636363636363637, |
| "grad_norm": 0.08389327675104141, |
| "learning_rate": 8.582567180060942e-05, |
| "loss": 0.0068, |
| "step": 5670 |
| }, |
| { |
| "epoch": 28.686868686868685, |
| "grad_norm": 0.08150910586118698, |
| "learning_rate": 8.576795203478194e-05, |
| "loss": 0.0056, |
| "step": 5680 |
| }, |
| { |
| "epoch": 28.737373737373737, |
| "grad_norm": 0.07271291315555573, |
| "learning_rate": 8.571013448073939e-05, |
| "loss": 0.0072, |
| "step": 5690 |
| }, |
| { |
| "epoch": 28.78787878787879, |
| "grad_norm": 0.08212652802467346, |
| "learning_rate": 8.565221929655275e-05, |
| "loss": 0.0075, |
| "step": 5700 |
| }, |
| { |
| "epoch": 28.838383838383837, |
| "grad_norm": 0.08190129697322845, |
| "learning_rate": 8.559420664055992e-05, |
| "loss": 0.0055, |
| "step": 5710 |
| }, |
| { |
| "epoch": 28.88888888888889, |
| "grad_norm": 0.10054586827754974, |
| "learning_rate": 8.553609667136532e-05, |
| "loss": 0.0072, |
| "step": 5720 |
| }, |
| { |
| "epoch": 28.939393939393938, |
| "grad_norm": 0.09532570093870163, |
| "learning_rate": 8.547788954783936e-05, |
| "loss": 0.0066, |
| "step": 5730 |
| }, |
| { |
| "epoch": 28.98989898989899, |
| "grad_norm": 0.06579931080341339, |
| "learning_rate": 8.541958542911808e-05, |
| "loss": 0.0059, |
| "step": 5740 |
| }, |
| { |
| "epoch": 29.04040404040404, |
| "grad_norm": 0.09251992404460907, |
| "learning_rate": 8.536118447460275e-05, |
| "loss": 0.0063, |
| "step": 5750 |
| }, |
| { |
| "epoch": 29.09090909090909, |
| "grad_norm": 0.09162105619907379, |
| "learning_rate": 8.530268684395932e-05, |
| "loss": 0.0077, |
| "step": 5760 |
| }, |
| { |
| "epoch": 29.141414141414142, |
| "grad_norm": 0.11248607188463211, |
| "learning_rate": 8.524409269711807e-05, |
| "loss": 0.0071, |
| "step": 5770 |
| }, |
| { |
| "epoch": 29.19191919191919, |
| "grad_norm": 0.09818435460329056, |
| "learning_rate": 8.51854021942732e-05, |
| "loss": 0.0072, |
| "step": 5780 |
| }, |
| { |
| "epoch": 29.242424242424242, |
| "grad_norm": 0.08665101230144501, |
| "learning_rate": 8.512661549588227e-05, |
| "loss": 0.0064, |
| "step": 5790 |
| }, |
| { |
| "epoch": 29.292929292929294, |
| "grad_norm": 0.09845367819070816, |
| "learning_rate": 8.506773276266588e-05, |
| "loss": 0.0066, |
| "step": 5800 |
| }, |
| { |
| "epoch": 29.343434343434343, |
| "grad_norm": 0.10669195652008057, |
| "learning_rate": 8.500875415560721e-05, |
| "loss": 0.0072, |
| "step": 5810 |
| }, |
| { |
| "epoch": 29.393939393939394, |
| "grad_norm": 0.09235820174217224, |
| "learning_rate": 8.494967983595144e-05, |
| "loss": 0.0053, |
| "step": 5820 |
| }, |
| { |
| "epoch": 29.444444444444443, |
| "grad_norm": 0.08998201042413712, |
| "learning_rate": 8.489050996520558e-05, |
| "loss": 0.0067, |
| "step": 5830 |
| }, |
| { |
| "epoch": 29.494949494949495, |
| "grad_norm": 0.08802379667758942, |
| "learning_rate": 8.483124470513775e-05, |
| "loss": 0.0066, |
| "step": 5840 |
| }, |
| { |
| "epoch": 29.545454545454547, |
| "grad_norm": 0.08596701920032501, |
| "learning_rate": 8.477188421777692e-05, |
| "loss": 0.0058, |
| "step": 5850 |
| }, |
| { |
| "epoch": 29.595959595959595, |
| "grad_norm": 0.10740692913532257, |
| "learning_rate": 8.47124286654124e-05, |
| "loss": 0.0074, |
| "step": 5860 |
| }, |
| { |
| "epoch": 29.646464646464647, |
| "grad_norm": 0.07536567002534866, |
| "learning_rate": 8.465287821059341e-05, |
| "loss": 0.008, |
| "step": 5870 |
| }, |
| { |
| "epoch": 29.696969696969695, |
| "grad_norm": 0.12022113054990768, |
| "learning_rate": 8.45932330161286e-05, |
| "loss": 0.0067, |
| "step": 5880 |
| }, |
| { |
| "epoch": 29.747474747474747, |
| "grad_norm": 0.07126189768314362, |
| "learning_rate": 8.453349324508567e-05, |
| "loss": 0.0055, |
| "step": 5890 |
| }, |
| { |
| "epoch": 29.7979797979798, |
| "grad_norm": 0.12092109769582748, |
| "learning_rate": 8.447365906079088e-05, |
| "loss": 0.0065, |
| "step": 5900 |
| }, |
| { |
| "epoch": 29.848484848484848, |
| "grad_norm": 0.09916812181472778, |
| "learning_rate": 8.441373062682856e-05, |
| "loss": 0.0071, |
| "step": 5910 |
| }, |
| { |
| "epoch": 29.8989898989899, |
| "grad_norm": 0.07447104156017303, |
| "learning_rate": 8.43537081070408e-05, |
| "loss": 0.0073, |
| "step": 5920 |
| }, |
| { |
| "epoch": 29.949494949494948, |
| "grad_norm": 0.13843820989131927, |
| "learning_rate": 8.429359166552689e-05, |
| "loss": 0.0079, |
| "step": 5930 |
| }, |
| { |
| "epoch": 30.0, |
| "grad_norm": 0.13404156267642975, |
| "learning_rate": 8.423338146664284e-05, |
| "loss": 0.0085, |
| "step": 5940 |
| }, |
| { |
| "epoch": 30.050505050505052, |
| "grad_norm": 0.12789103388786316, |
| "learning_rate": 8.417307767500107e-05, |
| "loss": 0.0099, |
| "step": 5950 |
| }, |
| { |
| "epoch": 30.1010101010101, |
| "grad_norm": 0.13638529181480408, |
| "learning_rate": 8.411268045546983e-05, |
| "loss": 0.009, |
| "step": 5960 |
| }, |
| { |
| "epoch": 30.151515151515152, |
| "grad_norm": 0.09583284705877304, |
| "learning_rate": 8.405218997317281e-05, |
| "loss": 0.009, |
| "step": 5970 |
| }, |
| { |
| "epoch": 30.2020202020202, |
| "grad_norm": 0.10043736547231674, |
| "learning_rate": 8.399160639348869e-05, |
| "loss": 0.0094, |
| "step": 5980 |
| }, |
| { |
| "epoch": 30.252525252525253, |
| "grad_norm": 0.1063493862748146, |
| "learning_rate": 8.393092988205065e-05, |
| "loss": 0.0076, |
| "step": 5990 |
| }, |
| { |
| "epoch": 30.303030303030305, |
| "grad_norm": 0.10915140062570572, |
| "learning_rate": 8.387016060474597e-05, |
| "loss": 0.0078, |
| "step": 6000 |
| }, |
| { |
| "epoch": 30.353535353535353, |
| "grad_norm": 0.10273183137178421, |
| "learning_rate": 8.380929872771551e-05, |
| "loss": 0.0062, |
| "step": 6010 |
| }, |
| { |
| "epoch": 30.404040404040405, |
| "grad_norm": 0.13782581686973572, |
| "learning_rate": 8.374834441735335e-05, |
| "loss": 0.0059, |
| "step": 6020 |
| }, |
| { |
| "epoch": 30.454545454545453, |
| "grad_norm": 0.11863265931606293, |
| "learning_rate": 8.368729784030622e-05, |
| "loss": 0.0073, |
| "step": 6030 |
| }, |
| { |
| "epoch": 30.505050505050505, |
| "grad_norm": 0.10877472907304764, |
| "learning_rate": 8.362615916347315e-05, |
| "loss": 0.008, |
| "step": 6040 |
| }, |
| { |
| "epoch": 30.555555555555557, |
| "grad_norm": 0.13170336186885834, |
| "learning_rate": 8.356492855400493e-05, |
| "loss": 0.0077, |
| "step": 6050 |
| }, |
| { |
| "epoch": 30.606060606060606, |
| "grad_norm": 0.11651741713285446, |
| "learning_rate": 8.350360617930371e-05, |
| "loss": 0.0071, |
| "step": 6060 |
| }, |
| { |
| "epoch": 30.656565656565657, |
| "grad_norm": 0.06456653773784637, |
| "learning_rate": 8.344219220702255e-05, |
| "loss": 0.0069, |
| "step": 6070 |
| }, |
| { |
| "epoch": 30.707070707070706, |
| "grad_norm": 0.09194045513868332, |
| "learning_rate": 8.338068680506485e-05, |
| "loss": 0.0056, |
| "step": 6080 |
| }, |
| { |
| "epoch": 30.757575757575758, |
| "grad_norm": 0.0947142019867897, |
| "learning_rate": 8.33190901415841e-05, |
| "loss": 0.0071, |
| "step": 6090 |
| }, |
| { |
| "epoch": 30.80808080808081, |
| "grad_norm": 0.0878327488899231, |
| "learning_rate": 8.325740238498317e-05, |
| "loss": 0.0063, |
| "step": 6100 |
| }, |
| { |
| "epoch": 30.858585858585858, |
| "grad_norm": 0.11127763986587524, |
| "learning_rate": 8.319562370391406e-05, |
| "loss": 0.0081, |
| "step": 6110 |
| }, |
| { |
| "epoch": 30.90909090909091, |
| "grad_norm": 0.09407719224691391, |
| "learning_rate": 8.31337542672773e-05, |
| "loss": 0.0094, |
| "step": 6120 |
| }, |
| { |
| "epoch": 30.95959595959596, |
| "grad_norm": 0.09434665739536285, |
| "learning_rate": 8.307179424422158e-05, |
| "loss": 0.0049, |
| "step": 6130 |
| }, |
| { |
| "epoch": 31.01010101010101, |
| "grad_norm": 0.11631440371274948, |
| "learning_rate": 8.300974380414327e-05, |
| "loss": 0.0072, |
| "step": 6140 |
| }, |
| { |
| "epoch": 31.060606060606062, |
| "grad_norm": 0.09293083846569061, |
| "learning_rate": 8.294760311668586e-05, |
| "loss": 0.0085, |
| "step": 6150 |
| }, |
| { |
| "epoch": 31.11111111111111, |
| "grad_norm": 0.12425116449594498, |
| "learning_rate": 8.288537235173961e-05, |
| "loss": 0.0067, |
| "step": 6160 |
| }, |
| { |
| "epoch": 31.161616161616163, |
| "grad_norm": 0.14992353320121765, |
| "learning_rate": 8.282305167944108e-05, |
| "loss": 0.0069, |
| "step": 6170 |
| }, |
| { |
| "epoch": 31.21212121212121, |
| "grad_norm": 0.07511713355779648, |
| "learning_rate": 8.276064127017262e-05, |
| "loss": 0.0065, |
| "step": 6180 |
| }, |
| { |
| "epoch": 31.262626262626263, |
| "grad_norm": 0.07064453512430191, |
| "learning_rate": 8.269814129456189e-05, |
| "loss": 0.006, |
| "step": 6190 |
| }, |
| { |
| "epoch": 31.313131313131315, |
| "grad_norm": 0.08784216642379761, |
| "learning_rate": 8.263555192348143e-05, |
| "loss": 0.0065, |
| "step": 6200 |
| }, |
| { |
| "epoch": 31.363636363636363, |
| "grad_norm": 0.11456893384456635, |
| "learning_rate": 8.257287332804819e-05, |
| "loss": 0.0079, |
| "step": 6210 |
| }, |
| { |
| "epoch": 31.414141414141415, |
| "grad_norm": 0.08418618142604828, |
| "learning_rate": 8.251010567962307e-05, |
| "loss": 0.0055, |
| "step": 6220 |
| }, |
| { |
| "epoch": 31.464646464646464, |
| "grad_norm": 0.08997195959091187, |
| "learning_rate": 8.244724914981041e-05, |
| "loss": 0.0065, |
| "step": 6230 |
| }, |
| { |
| "epoch": 31.515151515151516, |
| "grad_norm": 0.10326318442821503, |
| "learning_rate": 8.238430391045757e-05, |
| "loss": 0.0079, |
| "step": 6240 |
| }, |
| { |
| "epoch": 31.565656565656564, |
| "grad_norm": 0.12960706651210785, |
| "learning_rate": 8.232127013365445e-05, |
| "loss": 0.0079, |
| "step": 6250 |
| }, |
| { |
| "epoch": 31.616161616161616, |
| "grad_norm": 0.10820852220058441, |
| "learning_rate": 8.225814799173295e-05, |
| "loss": 0.0063, |
| "step": 6260 |
| }, |
| { |
| "epoch": 31.666666666666668, |
| "grad_norm": 0.09975355118513107, |
| "learning_rate": 8.219493765726663e-05, |
| "loss": 0.0057, |
| "step": 6270 |
| }, |
| { |
| "epoch": 31.717171717171716, |
| "grad_norm": 0.09366460889577866, |
| "learning_rate": 8.21316393030701e-05, |
| "loss": 0.0059, |
| "step": 6280 |
| }, |
| { |
| "epoch": 31.767676767676768, |
| "grad_norm": 0.07935548573732376, |
| "learning_rate": 8.206825310219865e-05, |
| "loss": 0.0066, |
| "step": 6290 |
| }, |
| { |
| "epoch": 31.818181818181817, |
| "grad_norm": 0.08710746467113495, |
| "learning_rate": 8.200477922794776e-05, |
| "loss": 0.0058, |
| "step": 6300 |
| }, |
| { |
| "epoch": 31.86868686868687, |
| "grad_norm": 0.12248288094997406, |
| "learning_rate": 8.194121785385256e-05, |
| "loss": 0.0072, |
| "step": 6310 |
| }, |
| { |
| "epoch": 31.91919191919192, |
| "grad_norm": 0.09302649646997452, |
| "learning_rate": 8.187756915368741e-05, |
| "loss": 0.0072, |
| "step": 6320 |
| }, |
| { |
| "epoch": 31.96969696969697, |
| "grad_norm": 0.10607737302780151, |
| "learning_rate": 8.181383330146544e-05, |
| "loss": 0.0089, |
| "step": 6330 |
| }, |
| { |
| "epoch": 32.02020202020202, |
| "grad_norm": 0.0958535447716713, |
| "learning_rate": 8.175001047143804e-05, |
| "loss": 0.006, |
| "step": 6340 |
| }, |
| { |
| "epoch": 32.07070707070707, |
| "grad_norm": 0.1101100891828537, |
| "learning_rate": 8.168610083809438e-05, |
| "loss": 0.0098, |
| "step": 6350 |
| }, |
| { |
| "epoch": 32.121212121212125, |
| "grad_norm": 0.08499649167060852, |
| "learning_rate": 8.162210457616095e-05, |
| "loss": 0.0052, |
| "step": 6360 |
| }, |
| { |
| "epoch": 32.17171717171717, |
| "grad_norm": 0.11346311867237091, |
| "learning_rate": 8.155802186060109e-05, |
| "loss": 0.0064, |
| "step": 6370 |
| }, |
| { |
| "epoch": 32.22222222222222, |
| "grad_norm": 0.10506398975849152, |
| "learning_rate": 8.149385286661453e-05, |
| "loss": 0.0093, |
| "step": 6380 |
| }, |
| { |
| "epoch": 32.27272727272727, |
| "grad_norm": 0.10304601490497589, |
| "learning_rate": 8.14295977696368e-05, |
| "loss": 0.0054, |
| "step": 6390 |
| }, |
| { |
| "epoch": 32.323232323232325, |
| "grad_norm": 0.09554129838943481, |
| "learning_rate": 8.13652567453389e-05, |
| "loss": 0.0055, |
| "step": 6400 |
| }, |
| { |
| "epoch": 32.37373737373738, |
| "grad_norm": 0.1417800933122635, |
| "learning_rate": 8.130082996962676e-05, |
| "loss": 0.008, |
| "step": 6410 |
| }, |
| { |
| "epoch": 32.42424242424242, |
| "grad_norm": 0.1227949932217598, |
| "learning_rate": 8.123631761864068e-05, |
| "loss": 0.0067, |
| "step": 6420 |
| }, |
| { |
| "epoch": 32.474747474747474, |
| "grad_norm": 0.12324743717908859, |
| "learning_rate": 8.1171719868755e-05, |
| "loss": 0.0065, |
| "step": 6430 |
| }, |
| { |
| "epoch": 32.525252525252526, |
| "grad_norm": 0.11707497388124466, |
| "learning_rate": 8.110703689657748e-05, |
| "loss": 0.008, |
| "step": 6440 |
| }, |
| { |
| "epoch": 32.57575757575758, |
| "grad_norm": 0.10671434551477432, |
| "learning_rate": 8.104226887894892e-05, |
| "loss": 0.0062, |
| "step": 6450 |
| }, |
| { |
| "epoch": 32.62626262626263, |
| "grad_norm": 0.07873687148094177, |
| "learning_rate": 8.097741599294257e-05, |
| "loss": 0.0068, |
| "step": 6460 |
| }, |
| { |
| "epoch": 32.676767676767675, |
| "grad_norm": 0.0950477346777916, |
| "learning_rate": 8.091247841586378e-05, |
| "loss": 0.0063, |
| "step": 6470 |
| }, |
| { |
| "epoch": 32.72727272727273, |
| "grad_norm": 0.12098478525876999, |
| "learning_rate": 8.084745632524939e-05, |
| "loss": 0.0073, |
| "step": 6480 |
| }, |
| { |
| "epoch": 32.77777777777778, |
| "grad_norm": 0.08094990998506546, |
| "learning_rate": 8.07823498988673e-05, |
| "loss": 0.0092, |
| "step": 6490 |
| }, |
| { |
| "epoch": 32.82828282828283, |
| "grad_norm": 0.09462501853704453, |
| "learning_rate": 8.071715931471602e-05, |
| "loss": 0.0081, |
| "step": 6500 |
| }, |
| { |
| "epoch": 32.878787878787875, |
| "grad_norm": 0.07903637737035751, |
| "learning_rate": 8.06518847510241e-05, |
| "loss": 0.0062, |
| "step": 6510 |
| }, |
| { |
| "epoch": 32.92929292929293, |
| "grad_norm": 0.12335184216499329, |
| "learning_rate": 8.058652638624971e-05, |
| "loss": 0.0077, |
| "step": 6520 |
| }, |
| { |
| "epoch": 32.97979797979798, |
| "grad_norm": 0.07843010872602463, |
| "learning_rate": 8.052108439908013e-05, |
| "loss": 0.0074, |
| "step": 6530 |
| }, |
| { |
| "epoch": 33.03030303030303, |
| "grad_norm": 0.08075881004333496, |
| "learning_rate": 8.045555896843125e-05, |
| "loss": 0.0071, |
| "step": 6540 |
| }, |
| { |
| "epoch": 33.08080808080808, |
| "grad_norm": 0.08033275604248047, |
| "learning_rate": 8.03899502734471e-05, |
| "loss": 0.0075, |
| "step": 6550 |
| }, |
| { |
| "epoch": 33.13131313131313, |
| "grad_norm": 0.07282350957393646, |
| "learning_rate": 8.032425849349931e-05, |
| "loss": 0.0052, |
| "step": 6560 |
| }, |
| { |
| "epoch": 33.18181818181818, |
| "grad_norm": 0.1006646379828453, |
| "learning_rate": 8.025848380818674e-05, |
| "loss": 0.006, |
| "step": 6570 |
| }, |
| { |
| "epoch": 33.23232323232323, |
| "grad_norm": 0.056686993688344955, |
| "learning_rate": 8.019262639733487e-05, |
| "loss": 0.0051, |
| "step": 6580 |
| }, |
| { |
| "epoch": 33.282828282828284, |
| "grad_norm": 0.06989490985870361, |
| "learning_rate": 8.012668644099531e-05, |
| "loss": 0.0053, |
| "step": 6590 |
| }, |
| { |
| "epoch": 33.333333333333336, |
| "grad_norm": 0.09771039336919785, |
| "learning_rate": 8.006066411944542e-05, |
| "loss": 0.0061, |
| "step": 6600 |
| }, |
| { |
| "epoch": 33.38383838383838, |
| "grad_norm": 0.08803494274616241, |
| "learning_rate": 7.999455961318769e-05, |
| "loss": 0.0071, |
| "step": 6610 |
| }, |
| { |
| "epoch": 33.43434343434343, |
| "grad_norm": 0.09492011368274689, |
| "learning_rate": 7.992837310294932e-05, |
| "loss": 0.007, |
| "step": 6620 |
| }, |
| { |
| "epoch": 33.484848484848484, |
| "grad_norm": 0.10140733420848846, |
| "learning_rate": 7.986210476968167e-05, |
| "loss": 0.0077, |
| "step": 6630 |
| }, |
| { |
| "epoch": 33.535353535353536, |
| "grad_norm": 0.12458433955907822, |
| "learning_rate": 7.97957547945599e-05, |
| "loss": 0.0055, |
| "step": 6640 |
| }, |
| { |
| "epoch": 33.58585858585859, |
| "grad_norm": 0.08677127212285995, |
| "learning_rate": 7.972932335898226e-05, |
| "loss": 0.007, |
| "step": 6650 |
| }, |
| { |
| "epoch": 33.63636363636363, |
| "grad_norm": 0.10718109458684921, |
| "learning_rate": 7.966281064456975e-05, |
| "loss": 0.008, |
| "step": 6660 |
| }, |
| { |
| "epoch": 33.686868686868685, |
| "grad_norm": 0.09110673516988754, |
| "learning_rate": 7.959621683316563e-05, |
| "loss": 0.0052, |
| "step": 6670 |
| }, |
| { |
| "epoch": 33.73737373737374, |
| "grad_norm": 0.08715398609638214, |
| "learning_rate": 7.952954210683481e-05, |
| "loss": 0.006, |
| "step": 6680 |
| }, |
| { |
| "epoch": 33.78787878787879, |
| "grad_norm": 0.08741055428981781, |
| "learning_rate": 7.946278664786345e-05, |
| "loss": 0.0052, |
| "step": 6690 |
| }, |
| { |
| "epoch": 33.83838383838384, |
| "grad_norm": 0.060904067009687424, |
| "learning_rate": 7.939595063875842e-05, |
| "loss": 0.0055, |
| "step": 6700 |
| }, |
| { |
| "epoch": 33.888888888888886, |
| "grad_norm": 0.08258910477161407, |
| "learning_rate": 7.932903426224683e-05, |
| "loss": 0.0072, |
| "step": 6710 |
| }, |
| { |
| "epoch": 33.93939393939394, |
| "grad_norm": 0.1035085991024971, |
| "learning_rate": 7.926203770127552e-05, |
| "loss": 0.0058, |
| "step": 6720 |
| }, |
| { |
| "epoch": 33.98989898989899, |
| "grad_norm": 0.10330823808908463, |
| "learning_rate": 7.919496113901046e-05, |
| "loss": 0.0057, |
| "step": 6730 |
| }, |
| { |
| "epoch": 34.04040404040404, |
| "grad_norm": 0.08210787177085876, |
| "learning_rate": 7.912780475883649e-05, |
| "loss": 0.0063, |
| "step": 6740 |
| }, |
| { |
| "epoch": 34.09090909090909, |
| "grad_norm": 0.07032239437103271, |
| "learning_rate": 7.906056874435652e-05, |
| "loss": 0.0056, |
| "step": 6750 |
| }, |
| { |
| "epoch": 34.14141414141414, |
| "grad_norm": 0.08258673548698425, |
| "learning_rate": 7.899325327939131e-05, |
| "loss": 0.0076, |
| "step": 6760 |
| }, |
| { |
| "epoch": 34.19191919191919, |
| "grad_norm": 0.13491354882717133, |
| "learning_rate": 7.892585854797872e-05, |
| "loss": 0.0076, |
| "step": 6770 |
| }, |
| { |
| "epoch": 34.24242424242424, |
| "grad_norm": 0.10750430077314377, |
| "learning_rate": 7.88583847343734e-05, |
| "loss": 0.0067, |
| "step": 6780 |
| }, |
| { |
| "epoch": 34.292929292929294, |
| "grad_norm": 0.09099852293729782, |
| "learning_rate": 7.879083202304616e-05, |
| "loss": 0.0061, |
| "step": 6790 |
| }, |
| { |
| "epoch": 34.343434343434346, |
| "grad_norm": 0.1012977883219719, |
| "learning_rate": 7.872320059868355e-05, |
| "loss": 0.0067, |
| "step": 6800 |
| }, |
| { |
| "epoch": 34.39393939393939, |
| "grad_norm": 0.07483401894569397, |
| "learning_rate": 7.865549064618729e-05, |
| "loss": 0.0064, |
| "step": 6810 |
| }, |
| { |
| "epoch": 34.44444444444444, |
| "grad_norm": 0.09567782282829285, |
| "learning_rate": 7.858770235067381e-05, |
| "loss": 0.0089, |
| "step": 6820 |
| }, |
| { |
| "epoch": 34.494949494949495, |
| "grad_norm": 0.10369649529457092, |
| "learning_rate": 7.851983589747374e-05, |
| "loss": 0.0093, |
| "step": 6830 |
| }, |
| { |
| "epoch": 34.54545454545455, |
| "grad_norm": 0.10483183711767197, |
| "learning_rate": 7.845189147213133e-05, |
| "loss": 0.0067, |
| "step": 6840 |
| }, |
| { |
| "epoch": 34.5959595959596, |
| "grad_norm": 0.11058826744556427, |
| "learning_rate": 7.838386926040407e-05, |
| "loss": 0.0069, |
| "step": 6850 |
| }, |
| { |
| "epoch": 34.64646464646464, |
| "grad_norm": 0.07869322597980499, |
| "learning_rate": 7.83157694482621e-05, |
| "loss": 0.0053, |
| "step": 6860 |
| }, |
| { |
| "epoch": 34.696969696969695, |
| "grad_norm": 0.10286489129066467, |
| "learning_rate": 7.824759222188768e-05, |
| "loss": 0.0076, |
| "step": 6870 |
| }, |
| { |
| "epoch": 34.74747474747475, |
| "grad_norm": 0.12901929020881653, |
| "learning_rate": 7.817933776767478e-05, |
| "loss": 0.0055, |
| "step": 6880 |
| }, |
| { |
| "epoch": 34.7979797979798, |
| "grad_norm": 0.10225682705640793, |
| "learning_rate": 7.811100627222842e-05, |
| "loss": 0.0069, |
| "step": 6890 |
| }, |
| { |
| "epoch": 34.84848484848485, |
| "grad_norm": 0.07189654558897018, |
| "learning_rate": 7.804259792236435e-05, |
| "loss": 0.005, |
| "step": 6900 |
| }, |
| { |
| "epoch": 34.898989898989896, |
| "grad_norm": 0.10680422931909561, |
| "learning_rate": 7.797411290510835e-05, |
| "loss": 0.006, |
| "step": 6910 |
| }, |
| { |
| "epoch": 34.94949494949495, |
| "grad_norm": 0.083693727850914, |
| "learning_rate": 7.790555140769586e-05, |
| "loss": 0.0076, |
| "step": 6920 |
| }, |
| { |
| "epoch": 35.0, |
| "grad_norm": 0.10102104395627975, |
| "learning_rate": 7.78369136175714e-05, |
| "loss": 0.0071, |
| "step": 6930 |
| }, |
| { |
| "epoch": 35.05050505050505, |
| "grad_norm": 0.09667722880840302, |
| "learning_rate": 7.776819972238806e-05, |
| "loss": 0.0078, |
| "step": 6940 |
| }, |
| { |
| "epoch": 35.101010101010104, |
| "grad_norm": 0.07413113862276077, |
| "learning_rate": 7.7699409910007e-05, |
| "loss": 0.0066, |
| "step": 6950 |
| }, |
| { |
| "epoch": 35.15151515151515, |
| "grad_norm": 0.09257117658853531, |
| "learning_rate": 7.763054436849694e-05, |
| "loss": 0.0061, |
| "step": 6960 |
| }, |
| { |
| "epoch": 35.2020202020202, |
| "grad_norm": 0.09753405302762985, |
| "learning_rate": 7.756160328613364e-05, |
| "loss": 0.0055, |
| "step": 6970 |
| }, |
| { |
| "epoch": 35.25252525252525, |
| "grad_norm": 0.08075829595327377, |
| "learning_rate": 7.749258685139942e-05, |
| "loss": 0.0063, |
| "step": 6980 |
| }, |
| { |
| "epoch": 35.303030303030305, |
| "grad_norm": 0.08916448801755905, |
| "learning_rate": 7.742349525298253e-05, |
| "loss": 0.0064, |
| "step": 6990 |
| }, |
| { |
| "epoch": 35.35353535353536, |
| "grad_norm": 0.07732652127742767, |
| "learning_rate": 7.735432867977679e-05, |
| "loss": 0.0081, |
| "step": 7000 |
| }, |
| { |
| "epoch": 35.4040404040404, |
| "grad_norm": 0.07458748668432236, |
| "learning_rate": 7.728508732088096e-05, |
| "loss": 0.0057, |
| "step": 7010 |
| }, |
| { |
| "epoch": 35.45454545454545, |
| "grad_norm": 0.08430732041597366, |
| "learning_rate": 7.721577136559825e-05, |
| "loss": 0.0051, |
| "step": 7020 |
| }, |
| { |
| "epoch": 35.505050505050505, |
| "grad_norm": 0.08712313324213028, |
| "learning_rate": 7.714638100343588e-05, |
| "loss": 0.006, |
| "step": 7030 |
| }, |
| { |
| "epoch": 35.55555555555556, |
| "grad_norm": 0.08303209394216537, |
| "learning_rate": 7.707691642410444e-05, |
| "loss": 0.0073, |
| "step": 7040 |
| }, |
| { |
| "epoch": 35.60606060606061, |
| "grad_norm": 0.09957766532897949, |
| "learning_rate": 7.70073778175174e-05, |
| "loss": 0.0064, |
| "step": 7050 |
| }, |
| { |
| "epoch": 35.656565656565654, |
| "grad_norm": 0.11975084245204926, |
| "learning_rate": 7.69377653737907e-05, |
| "loss": 0.0084, |
| "step": 7060 |
| }, |
| { |
| "epoch": 35.707070707070706, |
| "grad_norm": 0.08120808750391006, |
| "learning_rate": 7.686807928324209e-05, |
| "loss": 0.0074, |
| "step": 7070 |
| }, |
| { |
| "epoch": 35.75757575757576, |
| "grad_norm": 0.13053081929683685, |
| "learning_rate": 7.679831973639065e-05, |
| "loss": 0.0091, |
| "step": 7080 |
| }, |
| { |
| "epoch": 35.80808080808081, |
| "grad_norm": 0.10247783362865448, |
| "learning_rate": 7.672848692395637e-05, |
| "loss": 0.0083, |
| "step": 7090 |
| }, |
| { |
| "epoch": 35.85858585858586, |
| "grad_norm": 0.09460171312093735, |
| "learning_rate": 7.665858103685944e-05, |
| "loss": 0.0064, |
| "step": 7100 |
| }, |
| { |
| "epoch": 35.90909090909091, |
| "grad_norm": 0.1075742319226265, |
| "learning_rate": 7.658860226621991e-05, |
| "loss": 0.0073, |
| "step": 7110 |
| }, |
| { |
| "epoch": 35.95959595959596, |
| "grad_norm": 0.06698296219110489, |
| "learning_rate": 7.651855080335708e-05, |
| "loss": 0.0051, |
| "step": 7120 |
| }, |
| { |
| "epoch": 36.01010101010101, |
| "grad_norm": 0.0706351175904274, |
| "learning_rate": 7.644842683978896e-05, |
| "loss": 0.0063, |
| "step": 7130 |
| }, |
| { |
| "epoch": 36.06060606060606, |
| "grad_norm": 0.10589048266410828, |
| "learning_rate": 7.63782305672318e-05, |
| "loss": 0.0076, |
| "step": 7140 |
| }, |
| { |
| "epoch": 36.111111111111114, |
| "grad_norm": 0.09960482269525528, |
| "learning_rate": 7.63079621775995e-05, |
| "loss": 0.0055, |
| "step": 7150 |
| }, |
| { |
| "epoch": 36.16161616161616, |
| "grad_norm": 0.08814871311187744, |
| "learning_rate": 7.623762186300319e-05, |
| "loss": 0.0066, |
| "step": 7160 |
| }, |
| { |
| "epoch": 36.21212121212121, |
| "grad_norm": 0.09638148546218872, |
| "learning_rate": 7.616720981575057e-05, |
| "loss": 0.0079, |
| "step": 7170 |
| }, |
| { |
| "epoch": 36.26262626262626, |
| "grad_norm": 0.07061902433633804, |
| "learning_rate": 7.609672622834552e-05, |
| "loss": 0.0074, |
| "step": 7180 |
| }, |
| { |
| "epoch": 36.313131313131315, |
| "grad_norm": 0.10157573968172073, |
| "learning_rate": 7.602617129348747e-05, |
| "loss": 0.0066, |
| "step": 7190 |
| }, |
| { |
| "epoch": 36.36363636363637, |
| "grad_norm": 0.10003424435853958, |
| "learning_rate": 7.595554520407088e-05, |
| "loss": 0.0105, |
| "step": 7200 |
| }, |
| { |
| "epoch": 36.41414141414141, |
| "grad_norm": 0.12245219945907593, |
| "learning_rate": 7.588484815318484e-05, |
| "loss": 0.0075, |
| "step": 7210 |
| }, |
| { |
| "epoch": 36.464646464646464, |
| "grad_norm": 0.11477605253458023, |
| "learning_rate": 7.581408033411234e-05, |
| "loss": 0.0067, |
| "step": 7220 |
| }, |
| { |
| "epoch": 36.515151515151516, |
| "grad_norm": 0.12604714930057526, |
| "learning_rate": 7.574324194032995e-05, |
| "loss": 0.0084, |
| "step": 7230 |
| }, |
| { |
| "epoch": 36.56565656565657, |
| "grad_norm": 0.10568782687187195, |
| "learning_rate": 7.567233316550705e-05, |
| "loss": 0.0073, |
| "step": 7240 |
| }, |
| { |
| "epoch": 36.61616161616162, |
| "grad_norm": 0.0990833267569542, |
| "learning_rate": 7.560135420350562e-05, |
| "loss": 0.0073, |
| "step": 7250 |
| }, |
| { |
| "epoch": 36.666666666666664, |
| "grad_norm": 0.09167541563510895, |
| "learning_rate": 7.553030524837935e-05, |
| "loss": 0.0066, |
| "step": 7260 |
| }, |
| { |
| "epoch": 36.717171717171716, |
| "grad_norm": 0.0814923644065857, |
| "learning_rate": 7.545918649437341e-05, |
| "loss": 0.0067, |
| "step": 7270 |
| }, |
| { |
| "epoch": 36.76767676767677, |
| "grad_norm": 0.0903998538851738, |
| "learning_rate": 7.538799813592377e-05, |
| "loss": 0.007, |
| "step": 7280 |
| }, |
| { |
| "epoch": 36.81818181818182, |
| "grad_norm": 0.08639223128557205, |
| "learning_rate": 7.531674036765662e-05, |
| "loss": 0.0063, |
| "step": 7290 |
| }, |
| { |
| "epoch": 36.86868686868687, |
| "grad_norm": 0.1197165921330452, |
| "learning_rate": 7.524541338438807e-05, |
| "loss": 0.0074, |
| "step": 7300 |
| }, |
| { |
| "epoch": 36.91919191919192, |
| "grad_norm": 0.1356610655784607, |
| "learning_rate": 7.517401738112328e-05, |
| "loss": 0.0061, |
| "step": 7310 |
| }, |
| { |
| "epoch": 36.96969696969697, |
| "grad_norm": 0.13831479847431183, |
| "learning_rate": 7.510255255305628e-05, |
| "loss": 0.0061, |
| "step": 7320 |
| }, |
| { |
| "epoch": 37.02020202020202, |
| "grad_norm": 0.142642542719841, |
| "learning_rate": 7.503101909556911e-05, |
| "loss": 0.0075, |
| "step": 7330 |
| }, |
| { |
| "epoch": 37.07070707070707, |
| "grad_norm": 0.14699555933475494, |
| "learning_rate": 7.495941720423154e-05, |
| "loss": 0.0066, |
| "step": 7340 |
| }, |
| { |
| "epoch": 37.121212121212125, |
| "grad_norm": 0.12971366941928864, |
| "learning_rate": 7.488774707480042e-05, |
| "loss": 0.0058, |
| "step": 7350 |
| }, |
| { |
| "epoch": 37.17171717171717, |
| "grad_norm": 0.11332003772258759, |
| "learning_rate": 7.481600890321911e-05, |
| "loss": 0.0073, |
| "step": 7360 |
| }, |
| { |
| "epoch": 37.22222222222222, |
| "grad_norm": 0.11855194717645645, |
| "learning_rate": 7.474420288561708e-05, |
| "loss": 0.0077, |
| "step": 7370 |
| }, |
| { |
| "epoch": 37.27272727272727, |
| "grad_norm": 0.13500656187534332, |
| "learning_rate": 7.467232921830921e-05, |
| "loss": 0.0068, |
| "step": 7380 |
| }, |
| { |
| "epoch": 37.323232323232325, |
| "grad_norm": 0.11447048932313919, |
| "learning_rate": 7.460038809779537e-05, |
| "loss": 0.0073, |
| "step": 7390 |
| }, |
| { |
| "epoch": 37.37373737373738, |
| "grad_norm": 0.09125279635190964, |
| "learning_rate": 7.452837972075983e-05, |
| "loss": 0.0065, |
| "step": 7400 |
| }, |
| { |
| "epoch": 37.42424242424242, |
| "grad_norm": 0.09449958801269531, |
| "learning_rate": 7.445630428407074e-05, |
| "loss": 0.006, |
| "step": 7410 |
| }, |
| { |
| "epoch": 37.474747474747474, |
| "grad_norm": 0.08667749166488647, |
| "learning_rate": 7.43841619847796e-05, |
| "loss": 0.007, |
| "step": 7420 |
| }, |
| { |
| "epoch": 37.525252525252526, |
| "grad_norm": 0.07609740644693375, |
| "learning_rate": 7.431195302012072e-05, |
| "loss": 0.0057, |
| "step": 7430 |
| }, |
| { |
| "epoch": 37.57575757575758, |
| "grad_norm": 0.10035132616758347, |
| "learning_rate": 7.423967758751061e-05, |
| "loss": 0.0077, |
| "step": 7440 |
| }, |
| { |
| "epoch": 37.62626262626263, |
| "grad_norm": 0.10131976008415222, |
| "learning_rate": 7.416733588454758e-05, |
| "loss": 0.0053, |
| "step": 7450 |
| }, |
| { |
| "epoch": 37.676767676767675, |
| "grad_norm": 0.15532907843589783, |
| "learning_rate": 7.409492810901106e-05, |
| "loss": 0.0061, |
| "step": 7460 |
| }, |
| { |
| "epoch": 37.72727272727273, |
| "grad_norm": 0.09854361414909363, |
| "learning_rate": 7.402245445886116e-05, |
| "loss": 0.0059, |
| "step": 7470 |
| }, |
| { |
| "epoch": 37.77777777777778, |
| "grad_norm": 0.08614123612642288, |
| "learning_rate": 7.394991513223806e-05, |
| "loss": 0.007, |
| "step": 7480 |
| }, |
| { |
| "epoch": 37.82828282828283, |
| "grad_norm": 0.0699806734919548, |
| "learning_rate": 7.38773103274615e-05, |
| "loss": 0.0056, |
| "step": 7490 |
| }, |
| { |
| "epoch": 37.878787878787875, |
| "grad_norm": 0.08869198709726334, |
| "learning_rate": 7.380464024303028e-05, |
| "loss": 0.0054, |
| "step": 7500 |
| }, |
| { |
| "epoch": 37.92929292929293, |
| "grad_norm": 0.08139863610267639, |
| "learning_rate": 7.373190507762162e-05, |
| "loss": 0.0062, |
| "step": 7510 |
| }, |
| { |
| "epoch": 37.97979797979798, |
| "grad_norm": 0.08704749494791031, |
| "learning_rate": 7.365910503009066e-05, |
| "loss": 0.0063, |
| "step": 7520 |
| }, |
| { |
| "epoch": 38.03030303030303, |
| "grad_norm": 0.08641494065523148, |
| "learning_rate": 7.358624029946996e-05, |
| "loss": 0.0049, |
| "step": 7530 |
| }, |
| { |
| "epoch": 38.08080808080808, |
| "grad_norm": 0.09129251539707184, |
| "learning_rate": 7.351331108496893e-05, |
| "loss": 0.0079, |
| "step": 7540 |
| }, |
| { |
| "epoch": 38.13131313131313, |
| "grad_norm": 0.074696384370327, |
| "learning_rate": 7.344031758597325e-05, |
| "loss": 0.006, |
| "step": 7550 |
| }, |
| { |
| "epoch": 38.18181818181818, |
| "grad_norm": 0.11536741256713867, |
| "learning_rate": 7.336726000204435e-05, |
| "loss": 0.0071, |
| "step": 7560 |
| }, |
| { |
| "epoch": 38.23232323232323, |
| "grad_norm": 0.09663663059473038, |
| "learning_rate": 7.32941385329189e-05, |
| "loss": 0.0054, |
| "step": 7570 |
| }, |
| { |
| "epoch": 38.282828282828284, |
| "grad_norm": 0.11166119575500488, |
| "learning_rate": 7.322095337850816e-05, |
| "loss": 0.0065, |
| "step": 7580 |
| }, |
| { |
| "epoch": 38.333333333333336, |
| "grad_norm": 0.09225056320428848, |
| "learning_rate": 7.314770473889758e-05, |
| "loss": 0.007, |
| "step": 7590 |
| }, |
| { |
| "epoch": 38.38383838383838, |
| "grad_norm": 0.07359269261360168, |
| "learning_rate": 7.307439281434615e-05, |
| "loss": 0.0055, |
| "step": 7600 |
| }, |
| { |
| "epoch": 38.43434343434343, |
| "grad_norm": 0.08286816626787186, |
| "learning_rate": 7.300101780528585e-05, |
| "loss": 0.0066, |
| "step": 7610 |
| }, |
| { |
| "epoch": 38.484848484848484, |
| "grad_norm": 0.0844549685716629, |
| "learning_rate": 7.292757991232117e-05, |
| "loss": 0.0054, |
| "step": 7620 |
| }, |
| { |
| "epoch": 38.535353535353536, |
| "grad_norm": 0.07672478258609772, |
| "learning_rate": 7.285407933622848e-05, |
| "loss": 0.0057, |
| "step": 7630 |
| }, |
| { |
| "epoch": 38.58585858585859, |
| "grad_norm": 0.11021941900253296, |
| "learning_rate": 7.278051627795557e-05, |
| "loss": 0.0084, |
| "step": 7640 |
| }, |
| { |
| "epoch": 38.63636363636363, |
| "grad_norm": 0.09413682669401169, |
| "learning_rate": 7.270689093862105e-05, |
| "loss": 0.0066, |
| "step": 7650 |
| }, |
| { |
| "epoch": 38.686868686868685, |
| "grad_norm": 0.1022055596113205, |
| "learning_rate": 7.263320351951374e-05, |
| "loss": 0.0078, |
| "step": 7660 |
| }, |
| { |
| "epoch": 38.73737373737374, |
| "grad_norm": 0.08541738241910934, |
| "learning_rate": 7.255945422209227e-05, |
| "loss": 0.0071, |
| "step": 7670 |
| }, |
| { |
| "epoch": 38.78787878787879, |
| "grad_norm": 0.15710507333278656, |
| "learning_rate": 7.248564324798437e-05, |
| "loss": 0.0065, |
| "step": 7680 |
| }, |
| { |
| "epoch": 38.83838383838384, |
| "grad_norm": 0.09763623774051666, |
| "learning_rate": 7.241177079898644e-05, |
| "loss": 0.0069, |
| "step": 7690 |
| }, |
| { |
| "epoch": 38.888888888888886, |
| "grad_norm": 0.07331934571266174, |
| "learning_rate": 7.233783707706295e-05, |
| "loss": 0.005, |
| "step": 7700 |
| }, |
| { |
| "epoch": 38.93939393939394, |
| "grad_norm": 0.08508452028036118, |
| "learning_rate": 7.226384228434586e-05, |
| "loss": 0.0076, |
| "step": 7710 |
| }, |
| { |
| "epoch": 38.98989898989899, |
| "grad_norm": 0.09544289857149124, |
| "learning_rate": 7.21897866231341e-05, |
| "loss": 0.0069, |
| "step": 7720 |
| }, |
| { |
| "epoch": 39.04040404040404, |
| "grad_norm": 0.0833614394068718, |
| "learning_rate": 7.211567029589303e-05, |
| "loss": 0.0075, |
| "step": 7730 |
| }, |
| { |
| "epoch": 39.09090909090909, |
| "grad_norm": 0.07486817240715027, |
| "learning_rate": 7.204149350525387e-05, |
| "loss": 0.0051, |
| "step": 7740 |
| }, |
| { |
| "epoch": 39.14141414141414, |
| "grad_norm": 0.08089493215084076, |
| "learning_rate": 7.196725645401309e-05, |
| "loss": 0.0062, |
| "step": 7750 |
| }, |
| { |
| "epoch": 39.19191919191919, |
| "grad_norm": 0.08093910664319992, |
| "learning_rate": 7.1892959345132e-05, |
| "loss": 0.007, |
| "step": 7760 |
| }, |
| { |
| "epoch": 39.24242424242424, |
| "grad_norm": 0.07971242815256119, |
| "learning_rate": 7.181860238173605e-05, |
| "loss": 0.0064, |
| "step": 7770 |
| }, |
| { |
| "epoch": 39.292929292929294, |
| "grad_norm": 0.07001394778490067, |
| "learning_rate": 7.174418576711432e-05, |
| "loss": 0.0059, |
| "step": 7780 |
| }, |
| { |
| "epoch": 39.343434343434346, |
| "grad_norm": 0.09065631031990051, |
| "learning_rate": 7.1669709704719e-05, |
| "loss": 0.0061, |
| "step": 7790 |
| }, |
| { |
| "epoch": 39.39393939393939, |
| "grad_norm": 0.11827976256608963, |
| "learning_rate": 7.159517439816481e-05, |
| "loss": 0.008, |
| "step": 7800 |
| }, |
| { |
| "epoch": 39.44444444444444, |
| "grad_norm": 0.10306557267904282, |
| "learning_rate": 7.152058005122842e-05, |
| "loss": 0.0069, |
| "step": 7810 |
| }, |
| { |
| "epoch": 39.494949494949495, |
| "grad_norm": 0.11831233650445938, |
| "learning_rate": 7.144592686784793e-05, |
| "loss": 0.0049, |
| "step": 7820 |
| }, |
| { |
| "epoch": 39.54545454545455, |
| "grad_norm": 0.07928682118654251, |
| "learning_rate": 7.137121505212229e-05, |
| "loss": 0.0053, |
| "step": 7830 |
| }, |
| { |
| "epoch": 39.5959595959596, |
| "grad_norm": 0.07911905646324158, |
| "learning_rate": 7.129644480831077e-05, |
| "loss": 0.0056, |
| "step": 7840 |
| }, |
| { |
| "epoch": 39.64646464646464, |
| "grad_norm": 0.10212128609418869, |
| "learning_rate": 7.122161634083234e-05, |
| "loss": 0.011, |
| "step": 7850 |
| }, |
| { |
| "epoch": 39.696969696969695, |
| "grad_norm": 0.10481354594230652, |
| "learning_rate": 7.114672985426516e-05, |
| "loss": 0.0084, |
| "step": 7860 |
| }, |
| { |
| "epoch": 39.74747474747475, |
| "grad_norm": 0.08356379717588425, |
| "learning_rate": 7.107178555334606e-05, |
| "loss": 0.0076, |
| "step": 7870 |
| }, |
| { |
| "epoch": 39.7979797979798, |
| "grad_norm": 0.118691086769104, |
| "learning_rate": 7.099678364296989e-05, |
| "loss": 0.0078, |
| "step": 7880 |
| }, |
| { |
| "epoch": 39.84848484848485, |
| "grad_norm": 0.12084811180830002, |
| "learning_rate": 7.0921724328189e-05, |
| "loss": 0.0065, |
| "step": 7890 |
| }, |
| { |
| "epoch": 39.898989898989896, |
| "grad_norm": 0.14413075149059296, |
| "learning_rate": 7.084660781421268e-05, |
| "loss": 0.0052, |
| "step": 7900 |
| }, |
| { |
| "epoch": 39.94949494949495, |
| "grad_norm": 0.129902184009552, |
| "learning_rate": 7.077143430640662e-05, |
| "loss": 0.006, |
| "step": 7910 |
| }, |
| { |
| "epoch": 40.0, |
| "grad_norm": 0.1121053621172905, |
| "learning_rate": 7.069620401029232e-05, |
| "loss": 0.0057, |
| "step": 7920 |
| }, |
| { |
| "epoch": 40.05050505050505, |
| "grad_norm": 0.12401356548070908, |
| "learning_rate": 7.062091713154655e-05, |
| "loss": 0.006, |
| "step": 7930 |
| }, |
| { |
| "epoch": 40.101010101010104, |
| "grad_norm": 0.09731826186180115, |
| "learning_rate": 7.054557387600075e-05, |
| "loss": 0.0065, |
| "step": 7940 |
| }, |
| { |
| "epoch": 40.15151515151515, |
| "grad_norm": 0.10260061174631119, |
| "learning_rate": 7.04701744496405e-05, |
| "loss": 0.0053, |
| "step": 7950 |
| }, |
| { |
| "epoch": 40.2020202020202, |
| "grad_norm": 0.07520230114459991, |
| "learning_rate": 7.039471905860495e-05, |
| "loss": 0.0082, |
| "step": 7960 |
| }, |
| { |
| "epoch": 40.25252525252525, |
| "grad_norm": 0.08596933633089066, |
| "learning_rate": 7.031920790918628e-05, |
| "loss": 0.0066, |
| "step": 7970 |
| }, |
| { |
| "epoch": 40.303030303030305, |
| "grad_norm": 0.10693103075027466, |
| "learning_rate": 7.024364120782906e-05, |
| "loss": 0.0051, |
| "step": 7980 |
| }, |
| { |
| "epoch": 40.35353535353536, |
| "grad_norm": 0.08185599744319916, |
| "learning_rate": 7.016801916112978e-05, |
| "loss": 0.0059, |
| "step": 7990 |
| }, |
| { |
| "epoch": 40.4040404040404, |
| "grad_norm": 0.08541405946016312, |
| "learning_rate": 7.009234197583623e-05, |
| "loss": 0.0057, |
| "step": 8000 |
| }, |
| { |
| "epoch": 40.45454545454545, |
| "grad_norm": 0.10112811625003815, |
| "learning_rate": 7.001660985884692e-05, |
| "loss": 0.0053, |
| "step": 8010 |
| }, |
| { |
| "epoch": 40.505050505050505, |
| "grad_norm": 0.1043177992105484, |
| "learning_rate": 6.994082301721063e-05, |
| "loss": 0.0064, |
| "step": 8020 |
| }, |
| { |
| "epoch": 40.55555555555556, |
| "grad_norm": 0.07557950913906097, |
| "learning_rate": 6.986498165812563e-05, |
| "loss": 0.0064, |
| "step": 8030 |
| }, |
| { |
| "epoch": 40.60606060606061, |
| "grad_norm": 0.08657993376255035, |
| "learning_rate": 6.978908598893932e-05, |
| "loss": 0.0061, |
| "step": 8040 |
| }, |
| { |
| "epoch": 40.656565656565654, |
| "grad_norm": 0.07899653166532516, |
| "learning_rate": 6.971313621714756e-05, |
| "loss": 0.0056, |
| "step": 8050 |
| }, |
| { |
| "epoch": 40.707070707070706, |
| "grad_norm": 0.09126917272806168, |
| "learning_rate": 6.96371325503941e-05, |
| "loss": 0.008, |
| "step": 8060 |
| }, |
| { |
| "epoch": 40.75757575757576, |
| "grad_norm": 0.07319652289152145, |
| "learning_rate": 6.956107519647014e-05, |
| "loss": 0.0065, |
| "step": 8070 |
| }, |
| { |
| "epoch": 40.80808080808081, |
| "grad_norm": 0.07428599894046783, |
| "learning_rate": 6.94849643633135e-05, |
| "loss": 0.0055, |
| "step": 8080 |
| }, |
| { |
| "epoch": 40.85858585858586, |
| "grad_norm": 0.06460615992546082, |
| "learning_rate": 6.940880025900834e-05, |
| "loss": 0.0048, |
| "step": 8090 |
| }, |
| { |
| "epoch": 40.90909090909091, |
| "grad_norm": 0.07539784163236618, |
| "learning_rate": 6.933258309178438e-05, |
| "loss": 0.0059, |
| "step": 8100 |
| }, |
| { |
| "epoch": 40.95959595959596, |
| "grad_norm": 0.07928739488124847, |
| "learning_rate": 6.925631307001646e-05, |
| "loss": 0.0062, |
| "step": 8110 |
| }, |
| { |
| "epoch": 41.01010101010101, |
| "grad_norm": 0.09086461365222931, |
| "learning_rate": 6.91799904022239e-05, |
| "loss": 0.0049, |
| "step": 8120 |
| }, |
| { |
| "epoch": 41.06060606060606, |
| "grad_norm": 0.08263850212097168, |
| "learning_rate": 6.910361529706997e-05, |
| "loss": 0.0077, |
| "step": 8130 |
| }, |
| { |
| "epoch": 41.111111111111114, |
| "grad_norm": 0.08942471444606781, |
| "learning_rate": 6.902718796336131e-05, |
| "loss": 0.0056, |
| "step": 8140 |
| }, |
| { |
| "epoch": 41.16161616161616, |
| "grad_norm": 0.0962069183588028, |
| "learning_rate": 6.895070861004729e-05, |
| "loss": 0.006, |
| "step": 8150 |
| }, |
| { |
| "epoch": 41.21212121212121, |
| "grad_norm": 0.09362100064754486, |
| "learning_rate": 6.887417744621956e-05, |
| "loss": 0.0069, |
| "step": 8160 |
| }, |
| { |
| "epoch": 41.26262626262626, |
| "grad_norm": 0.1343112587928772, |
| "learning_rate": 6.87975946811114e-05, |
| "loss": 0.0083, |
| "step": 8170 |
| }, |
| { |
| "epoch": 41.313131313131315, |
| "grad_norm": 0.1238149031996727, |
| "learning_rate": 6.872096052409718e-05, |
| "loss": 0.007, |
| "step": 8180 |
| }, |
| { |
| "epoch": 41.36363636363637, |
| "grad_norm": 0.09413960576057434, |
| "learning_rate": 6.864427518469174e-05, |
| "loss": 0.006, |
| "step": 8190 |
| }, |
| { |
| "epoch": 41.41414141414141, |
| "grad_norm": 0.12410678714513779, |
| "learning_rate": 6.856753887254986e-05, |
| "loss": 0.0055, |
| "step": 8200 |
| }, |
| { |
| "epoch": 41.464646464646464, |
| "grad_norm": 0.14607135951519012, |
| "learning_rate": 6.849075179746572e-05, |
| "loss": 0.008, |
| "step": 8210 |
| }, |
| { |
| "epoch": 41.515151515151516, |
| "grad_norm": 0.11495771259069443, |
| "learning_rate": 6.841391416937221e-05, |
| "loss": 0.008, |
| "step": 8220 |
| }, |
| { |
| "epoch": 41.56565656565657, |
| "grad_norm": 0.09883935749530792, |
| "learning_rate": 6.833702619834053e-05, |
| "loss": 0.0063, |
| "step": 8230 |
| }, |
| { |
| "epoch": 41.61616161616162, |
| "grad_norm": 0.0959559977054596, |
| "learning_rate": 6.82600880945794e-05, |
| "loss": 0.0065, |
| "step": 8240 |
| }, |
| { |
| "epoch": 41.666666666666664, |
| "grad_norm": 0.0858960971236229, |
| "learning_rate": 6.818310006843468e-05, |
| "loss": 0.0059, |
| "step": 8250 |
| }, |
| { |
| "epoch": 41.717171717171716, |
| "grad_norm": 0.08538820594549179, |
| "learning_rate": 6.810606233038868e-05, |
| "loss": 0.0066, |
| "step": 8260 |
| }, |
| { |
| "epoch": 41.76767676767677, |
| "grad_norm": 0.06607403606176376, |
| "learning_rate": 6.802897509105966e-05, |
| "loss": 0.0053, |
| "step": 8270 |
| }, |
| { |
| "epoch": 41.81818181818182, |
| "grad_norm": 0.1101832315325737, |
| "learning_rate": 6.79518385612012e-05, |
| "loss": 0.0084, |
| "step": 8280 |
| }, |
| { |
| "epoch": 41.86868686868687, |
| "grad_norm": 0.11751208454370499, |
| "learning_rate": 6.787465295170157e-05, |
| "loss": 0.0069, |
| "step": 8290 |
| }, |
| { |
| "epoch": 41.91919191919192, |
| "grad_norm": 0.09388010203838348, |
| "learning_rate": 6.779741847358332e-05, |
| "loss": 0.0065, |
| "step": 8300 |
| }, |
| { |
| "epoch": 41.96969696969697, |
| "grad_norm": 0.10893885046243668, |
| "learning_rate": 6.772013533800256e-05, |
| "loss": 0.0053, |
| "step": 8310 |
| }, |
| { |
| "epoch": 42.02020202020202, |
| "grad_norm": 0.08084888011217117, |
| "learning_rate": 6.764280375624843e-05, |
| "loss": 0.0059, |
| "step": 8320 |
| }, |
| { |
| "epoch": 42.07070707070707, |
| "grad_norm": 0.07593891024589539, |
| "learning_rate": 6.756542393974252e-05, |
| "loss": 0.0057, |
| "step": 8330 |
| }, |
| { |
| "epoch": 42.121212121212125, |
| "grad_norm": 0.0631684958934784, |
| "learning_rate": 6.748799610003828e-05, |
| "loss": 0.0046, |
| "step": 8340 |
| }, |
| { |
| "epoch": 42.17171717171717, |
| "grad_norm": 0.05589769408106804, |
| "learning_rate": 6.741052044882048e-05, |
| "loss": 0.0071, |
| "step": 8350 |
| }, |
| { |
| "epoch": 42.22222222222222, |
| "grad_norm": 0.07409624010324478, |
| "learning_rate": 6.73329971979046e-05, |
| "loss": 0.0063, |
| "step": 8360 |
| }, |
| { |
| "epoch": 42.27272727272727, |
| "grad_norm": 0.08300615847110748, |
| "learning_rate": 6.725542655923625e-05, |
| "loss": 0.006, |
| "step": 8370 |
| }, |
| { |
| "epoch": 42.323232323232325, |
| "grad_norm": 0.07565245777368546, |
| "learning_rate": 6.717780874489057e-05, |
| "loss": 0.006, |
| "step": 8380 |
| }, |
| { |
| "epoch": 42.37373737373738, |
| "grad_norm": 0.09201968461275101, |
| "learning_rate": 6.710014396707172e-05, |
| "loss": 0.0057, |
| "step": 8390 |
| }, |
| { |
| "epoch": 42.42424242424242, |
| "grad_norm": 0.07684004306793213, |
| "learning_rate": 6.702243243811221e-05, |
| "loss": 0.0053, |
| "step": 8400 |
| }, |
| { |
| "epoch": 42.474747474747474, |
| "grad_norm": 0.11335530877113342, |
| "learning_rate": 6.694467437047244e-05, |
| "loss": 0.0066, |
| "step": 8410 |
| }, |
| { |
| "epoch": 42.525252525252526, |
| "grad_norm": 0.08338102698326111, |
| "learning_rate": 6.686686997673997e-05, |
| "loss": 0.0061, |
| "step": 8420 |
| }, |
| { |
| "epoch": 42.57575757575758, |
| "grad_norm": 0.0817718654870987, |
| "learning_rate": 6.678901946962903e-05, |
| "loss": 0.0065, |
| "step": 8430 |
| }, |
| { |
| "epoch": 42.62626262626263, |
| "grad_norm": 0.09911955147981644, |
| "learning_rate": 6.671112306197996e-05, |
| "loss": 0.007, |
| "step": 8440 |
| }, |
| { |
| "epoch": 42.676767676767675, |
| "grad_norm": 0.11123485863208771, |
| "learning_rate": 6.663318096675854e-05, |
| "loss": 0.0075, |
| "step": 8450 |
| }, |
| { |
| "epoch": 42.72727272727273, |
| "grad_norm": 0.0967571809887886, |
| "learning_rate": 6.655519339705552e-05, |
| "loss": 0.0052, |
| "step": 8460 |
| }, |
| { |
| "epoch": 42.77777777777778, |
| "grad_norm": 0.08213367313146591, |
| "learning_rate": 6.647716056608588e-05, |
| "loss": 0.0073, |
| "step": 8470 |
| }, |
| { |
| "epoch": 42.82828282828283, |
| "grad_norm": 0.08507321029901505, |
| "learning_rate": 6.639908268718843e-05, |
| "loss": 0.0071, |
| "step": 8480 |
| }, |
| { |
| "epoch": 42.878787878787875, |
| "grad_norm": 0.09975874423980713, |
| "learning_rate": 6.632095997382514e-05, |
| "loss": 0.0077, |
| "step": 8490 |
| }, |
| { |
| "epoch": 42.92929292929293, |
| "grad_norm": 0.10843493044376373, |
| "learning_rate": 6.624279263958047e-05, |
| "loss": 0.0076, |
| "step": 8500 |
| }, |
| { |
| "epoch": 42.97979797979798, |
| "grad_norm": 0.08945771306753159, |
| "learning_rate": 6.616458089816097e-05, |
| "loss": 0.0077, |
| "step": 8510 |
| }, |
| { |
| "epoch": 43.03030303030303, |
| "grad_norm": 0.0653749406337738, |
| "learning_rate": 6.608632496339454e-05, |
| "loss": 0.0056, |
| "step": 8520 |
| }, |
| { |
| "epoch": 43.08080808080808, |
| "grad_norm": 0.08834986388683319, |
| "learning_rate": 6.600802504922988e-05, |
| "loss": 0.0059, |
| "step": 8530 |
| }, |
| { |
| "epoch": 43.13131313131313, |
| "grad_norm": 0.07630692422389984, |
| "learning_rate": 6.592968136973604e-05, |
| "loss": 0.0046, |
| "step": 8540 |
| }, |
| { |
| "epoch": 43.18181818181818, |
| "grad_norm": 0.07411595433950424, |
| "learning_rate": 6.585129413910159e-05, |
| "loss": 0.0049, |
| "step": 8550 |
| }, |
| { |
| "epoch": 43.23232323232323, |
| "grad_norm": 0.09347326308488846, |
| "learning_rate": 6.577286357163424e-05, |
| "loss": 0.0058, |
| "step": 8560 |
| }, |
| { |
| "epoch": 43.282828282828284, |
| "grad_norm": 0.08481166511774063, |
| "learning_rate": 6.569438988176018e-05, |
| "loss": 0.0044, |
| "step": 8570 |
| }, |
| { |
| "epoch": 43.333333333333336, |
| "grad_norm": 0.1018621101975441, |
| "learning_rate": 6.561587328402347e-05, |
| "loss": 0.0072, |
| "step": 8580 |
| }, |
| { |
| "epoch": 43.38383838383838, |
| "grad_norm": 0.07204155623912811, |
| "learning_rate": 6.553731399308549e-05, |
| "loss": 0.0062, |
| "step": 8590 |
| }, |
| { |
| "epoch": 43.43434343434343, |
| "grad_norm": 0.0675915852189064, |
| "learning_rate": 6.545871222372436e-05, |
| "loss": 0.0049, |
| "step": 8600 |
| }, |
| { |
| "epoch": 43.484848484848484, |
| "grad_norm": 0.10433326661586761, |
| "learning_rate": 6.538006819083426e-05, |
| "loss": 0.0066, |
| "step": 8610 |
| }, |
| { |
| "epoch": 43.535353535353536, |
| "grad_norm": 0.08985594660043716, |
| "learning_rate": 6.530138210942505e-05, |
| "loss": 0.0068, |
| "step": 8620 |
| }, |
| { |
| "epoch": 43.58585858585859, |
| "grad_norm": 0.12887050211429596, |
| "learning_rate": 6.522265419462141e-05, |
| "loss": 0.0068, |
| "step": 8630 |
| }, |
| { |
| "epoch": 43.63636363636363, |
| "grad_norm": 0.0892995148897171, |
| "learning_rate": 6.514388466166248e-05, |
| "loss": 0.0075, |
| "step": 8640 |
| }, |
| { |
| "epoch": 43.686868686868685, |
| "grad_norm": 0.07329066842794418, |
| "learning_rate": 6.506507372590119e-05, |
| "loss": 0.0054, |
| "step": 8650 |
| }, |
| { |
| "epoch": 43.73737373737374, |
| "grad_norm": 0.08778270334005356, |
| "learning_rate": 6.498622160280355e-05, |
| "loss": 0.0063, |
| "step": 8660 |
| }, |
| { |
| "epoch": 43.78787878787879, |
| "grad_norm": 0.0945509523153305, |
| "learning_rate": 6.490732850794832e-05, |
| "loss": 0.0058, |
| "step": 8670 |
| }, |
| { |
| "epoch": 43.83838383838384, |
| "grad_norm": 0.06548502296209335, |
| "learning_rate": 6.482839465702616e-05, |
| "loss": 0.0053, |
| "step": 8680 |
| }, |
| { |
| "epoch": 43.888888888888886, |
| "grad_norm": 0.07383645325899124, |
| "learning_rate": 6.474942026583923e-05, |
| "loss": 0.0067, |
| "step": 8690 |
| }, |
| { |
| "epoch": 43.93939393939394, |
| "grad_norm": 0.06914056092500687, |
| "learning_rate": 6.467040555030052e-05, |
| "loss": 0.0061, |
| "step": 8700 |
| }, |
| { |
| "epoch": 43.98989898989899, |
| "grad_norm": 0.1021365225315094, |
| "learning_rate": 6.459135072643321e-05, |
| "loss": 0.0066, |
| "step": 8710 |
| }, |
| { |
| "epoch": 44.04040404040404, |
| "grad_norm": 0.0761815682053566, |
| "learning_rate": 6.451225601037019e-05, |
| "loss": 0.005, |
| "step": 8720 |
| }, |
| { |
| "epoch": 44.09090909090909, |
| "grad_norm": 0.09026603400707245, |
| "learning_rate": 6.443312161835338e-05, |
| "loss": 0.0064, |
| "step": 8730 |
| }, |
| { |
| "epoch": 44.14141414141414, |
| "grad_norm": 0.08395136892795563, |
| "learning_rate": 6.43539477667332e-05, |
| "loss": 0.0071, |
| "step": 8740 |
| }, |
| { |
| "epoch": 44.19191919191919, |
| "grad_norm": 0.11590272933244705, |
| "learning_rate": 6.427473467196793e-05, |
| "loss": 0.0068, |
| "step": 8750 |
| }, |
| { |
| "epoch": 44.24242424242424, |
| "grad_norm": 0.06911282986402512, |
| "learning_rate": 6.419548255062315e-05, |
| "loss": 0.0049, |
| "step": 8760 |
| }, |
| { |
| "epoch": 44.292929292929294, |
| "grad_norm": 0.11533380299806595, |
| "learning_rate": 6.411619161937112e-05, |
| "loss": 0.0046, |
| "step": 8770 |
| }, |
| { |
| "epoch": 44.343434343434346, |
| "grad_norm": 0.08067536354064941, |
| "learning_rate": 6.403686209499022e-05, |
| "loss": 0.0056, |
| "step": 8780 |
| }, |
| { |
| "epoch": 44.39393939393939, |
| "grad_norm": 0.08932334184646606, |
| "learning_rate": 6.395749419436437e-05, |
| "loss": 0.0069, |
| "step": 8790 |
| }, |
| { |
| "epoch": 44.44444444444444, |
| "grad_norm": 0.06973592191934586, |
| "learning_rate": 6.387808813448234e-05, |
| "loss": 0.0065, |
| "step": 8800 |
| }, |
| { |
| "epoch": 44.494949494949495, |
| "grad_norm": 0.08561541885137558, |
| "learning_rate": 6.37986441324373e-05, |
| "loss": 0.0051, |
| "step": 8810 |
| }, |
| { |
| "epoch": 44.54545454545455, |
| "grad_norm": 0.0895218551158905, |
| "learning_rate": 6.37191624054261e-05, |
| "loss": 0.0055, |
| "step": 8820 |
| }, |
| { |
| "epoch": 44.5959595959596, |
| "grad_norm": 0.10036598891019821, |
| "learning_rate": 6.363964317074872e-05, |
| "loss": 0.0059, |
| "step": 8830 |
| }, |
| { |
| "epoch": 44.64646464646464, |
| "grad_norm": 0.0781303197145462, |
| "learning_rate": 6.356008664580776e-05, |
| "loss": 0.0057, |
| "step": 8840 |
| }, |
| { |
| "epoch": 44.696969696969695, |
| "grad_norm": 0.10582607984542847, |
| "learning_rate": 6.348049304810771e-05, |
| "loss": 0.0055, |
| "step": 8850 |
| }, |
| { |
| "epoch": 44.74747474747475, |
| "grad_norm": 0.08705408871173859, |
| "learning_rate": 6.340086259525442e-05, |
| "loss": 0.0062, |
| "step": 8860 |
| }, |
| { |
| "epoch": 44.7979797979798, |
| "grad_norm": 0.09126641601324081, |
| "learning_rate": 6.332119550495448e-05, |
| "loss": 0.0065, |
| "step": 8870 |
| }, |
| { |
| "epoch": 44.84848484848485, |
| "grad_norm": 0.07082478702068329, |
| "learning_rate": 6.324149199501473e-05, |
| "loss": 0.0053, |
| "step": 8880 |
| }, |
| { |
| "epoch": 44.898989898989896, |
| "grad_norm": 0.09088589996099472, |
| "learning_rate": 6.316175228334146e-05, |
| "loss": 0.0053, |
| "step": 8890 |
| }, |
| { |
| "epoch": 44.94949494949495, |
| "grad_norm": 0.08115913718938828, |
| "learning_rate": 6.308197658794003e-05, |
| "loss": 0.0054, |
| "step": 8900 |
| }, |
| { |
| "epoch": 45.0, |
| "grad_norm": 0.08952506631612778, |
| "learning_rate": 6.300216512691417e-05, |
| "loss": 0.0068, |
| "step": 8910 |
| }, |
| { |
| "epoch": 45.05050505050505, |
| "grad_norm": 0.055260710418224335, |
| "learning_rate": 6.292231811846532e-05, |
| "loss": 0.0066, |
| "step": 8920 |
| }, |
| { |
| "epoch": 45.101010101010104, |
| "grad_norm": 0.06796885281801224, |
| "learning_rate": 6.284243578089217e-05, |
| "loss": 0.0042, |
| "step": 8930 |
| }, |
| { |
| "epoch": 45.15151515151515, |
| "grad_norm": 0.0921490341424942, |
| "learning_rate": 6.276251833258999e-05, |
| "loss": 0.0061, |
| "step": 8940 |
| }, |
| { |
| "epoch": 45.2020202020202, |
| "grad_norm": 0.09780972450971603, |
| "learning_rate": 6.268256599205003e-05, |
| "loss": 0.0049, |
| "step": 8950 |
| }, |
| { |
| "epoch": 45.25252525252525, |
| "grad_norm": 0.09189427644014359, |
| "learning_rate": 6.260257897785892e-05, |
| "loss": 0.0058, |
| "step": 8960 |
| }, |
| { |
| "epoch": 45.303030303030305, |
| "grad_norm": 0.06331084668636322, |
| "learning_rate": 6.252255750869811e-05, |
| "loss": 0.0069, |
| "step": 8970 |
| }, |
| { |
| "epoch": 45.35353535353536, |
| "grad_norm": 0.05751638114452362, |
| "learning_rate": 6.244250180334325e-05, |
| "loss": 0.0055, |
| "step": 8980 |
| }, |
| { |
| "epoch": 45.4040404040404, |
| "grad_norm": 0.07604741305112839, |
| "learning_rate": 6.236241208066356e-05, |
| "loss": 0.0055, |
| "step": 8990 |
| }, |
| { |
| "epoch": 45.45454545454545, |
| "grad_norm": 0.08801376074552536, |
| "learning_rate": 6.228228855962133e-05, |
| "loss": 0.0058, |
| "step": 9000 |
| }, |
| { |
| "epoch": 45.505050505050505, |
| "grad_norm": 0.09753900021314621, |
| "learning_rate": 6.220213145927115e-05, |
| "loss": 0.0096, |
| "step": 9010 |
| }, |
| { |
| "epoch": 45.55555555555556, |
| "grad_norm": 0.08806826919317245, |
| "learning_rate": 6.212194099875951e-05, |
| "loss": 0.0067, |
| "step": 9020 |
| }, |
| { |
| "epoch": 45.60606060606061, |
| "grad_norm": 0.08526429533958435, |
| "learning_rate": 6.204171739732405e-05, |
| "loss": 0.0055, |
| "step": 9030 |
| }, |
| { |
| "epoch": 45.656565656565654, |
| "grad_norm": 0.08367390185594559, |
| "learning_rate": 6.196146087429303e-05, |
| "loss": 0.0056, |
| "step": 9040 |
| }, |
| { |
| "epoch": 45.707070707070706, |
| "grad_norm": 0.09870857000350952, |
| "learning_rate": 6.188117164908474e-05, |
| "loss": 0.0082, |
| "step": 9050 |
| }, |
| { |
| "epoch": 45.75757575757576, |
| "grad_norm": 0.07542259991168976, |
| "learning_rate": 6.180084994120684e-05, |
| "loss": 0.0061, |
| "step": 9060 |
| }, |
| { |
| "epoch": 45.80808080808081, |
| "grad_norm": 0.06929942965507507, |
| "learning_rate": 6.17204959702558e-05, |
| "loss": 0.0068, |
| "step": 9070 |
| }, |
| { |
| "epoch": 45.85858585858586, |
| "grad_norm": 0.09860829263925552, |
| "learning_rate": 6.164010995591635e-05, |
| "loss": 0.0064, |
| "step": 9080 |
| }, |
| { |
| "epoch": 45.90909090909091, |
| "grad_norm": 0.06903965026140213, |
| "learning_rate": 6.155969211796076e-05, |
| "loss": 0.005, |
| "step": 9090 |
| }, |
| { |
| "epoch": 45.95959595959596, |
| "grad_norm": 0.06512020528316498, |
| "learning_rate": 6.147924267624829e-05, |
| "loss": 0.0049, |
| "step": 9100 |
| }, |
| { |
| "epoch": 46.01010101010101, |
| "grad_norm": 0.09591370075941086, |
| "learning_rate": 6.13987618507247e-05, |
| "loss": 0.0073, |
| "step": 9110 |
| }, |
| { |
| "epoch": 46.06060606060606, |
| "grad_norm": 0.0899357721209526, |
| "learning_rate": 6.131824986142147e-05, |
| "loss": 0.0056, |
| "step": 9120 |
| }, |
| { |
| "epoch": 46.111111111111114, |
| "grad_norm": 0.1200484037399292, |
| "learning_rate": 6.123770692845529e-05, |
| "loss": 0.0066, |
| "step": 9130 |
| }, |
| { |
| "epoch": 46.16161616161616, |
| "grad_norm": 0.09225388616323471, |
| "learning_rate": 6.11571332720275e-05, |
| "loss": 0.0099, |
| "step": 9140 |
| }, |
| { |
| "epoch": 46.21212121212121, |
| "grad_norm": 0.07816360890865326, |
| "learning_rate": 6.107652911242336e-05, |
| "loss": 0.005, |
| "step": 9150 |
| }, |
| { |
| "epoch": 46.26262626262626, |
| "grad_norm": 0.061487093567848206, |
| "learning_rate": 6.0995894670011586e-05, |
| "loss": 0.0058, |
| "step": 9160 |
| }, |
| { |
| "epoch": 46.313131313131315, |
| "grad_norm": 0.08314535021781921, |
| "learning_rate": 6.091523016524368e-05, |
| "loss": 0.0053, |
| "step": 9170 |
| }, |
| { |
| "epoch": 46.36363636363637, |
| "grad_norm": 0.07461676746606827, |
| "learning_rate": 6.083453581865328e-05, |
| "loss": 0.0072, |
| "step": 9180 |
| }, |
| { |
| "epoch": 46.41414141414141, |
| "grad_norm": 0.078267902135849, |
| "learning_rate": 6.075381185085568e-05, |
| "loss": 0.0051, |
| "step": 9190 |
| }, |
| { |
| "epoch": 46.464646464646464, |
| "grad_norm": 0.09539106488227844, |
| "learning_rate": 6.067305848254709e-05, |
| "loss": 0.0074, |
| "step": 9200 |
| }, |
| { |
| "epoch": 46.515151515151516, |
| "grad_norm": 0.08689859509468079, |
| "learning_rate": 6.059227593450418e-05, |
| "loss": 0.0057, |
| "step": 9210 |
| }, |
| { |
| "epoch": 46.56565656565657, |
| "grad_norm": 0.07925509661436081, |
| "learning_rate": 6.051146442758333e-05, |
| "loss": 0.0066, |
| "step": 9220 |
| }, |
| { |
| "epoch": 46.61616161616162, |
| "grad_norm": 0.08211587369441986, |
| "learning_rate": 6.043062418272012e-05, |
| "loss": 0.0069, |
| "step": 9230 |
| }, |
| { |
| "epoch": 46.666666666666664, |
| "grad_norm": 0.09856962412595749, |
| "learning_rate": 6.0349755420928666e-05, |
| "loss": 0.0051, |
| "step": 9240 |
| }, |
| { |
| "epoch": 46.717171717171716, |
| "grad_norm": 0.07650469243526459, |
| "learning_rate": 6.0268858363301105e-05, |
| "loss": 0.0074, |
| "step": 9250 |
| }, |
| { |
| "epoch": 46.76767676767677, |
| "grad_norm": 0.07090752571821213, |
| "learning_rate": 6.018793323100689e-05, |
| "loss": 0.0063, |
| "step": 9260 |
| }, |
| { |
| "epoch": 46.81818181818182, |
| "grad_norm": 0.08118520677089691, |
| "learning_rate": 6.0106980245292255e-05, |
| "loss": 0.0047, |
| "step": 9270 |
| }, |
| { |
| "epoch": 46.86868686868687, |
| "grad_norm": 0.08310334384441376, |
| "learning_rate": 6.002599962747957e-05, |
| "loss": 0.0045, |
| "step": 9280 |
| }, |
| { |
| "epoch": 46.91919191919192, |
| "grad_norm": 0.07714284211397171, |
| "learning_rate": 5.994499159896673e-05, |
| "loss": 0.0059, |
| "step": 9290 |
| }, |
| { |
| "epoch": 46.96969696969697, |
| "grad_norm": 0.07758394628763199, |
| "learning_rate": 5.9863956381226607e-05, |
| "loss": 0.0066, |
| "step": 9300 |
| }, |
| { |
| "epoch": 47.02020202020202, |
| "grad_norm": 0.08241862058639526, |
| "learning_rate": 5.9782894195806394e-05, |
| "loss": 0.0043, |
| "step": 9310 |
| }, |
| { |
| "epoch": 47.07070707070707, |
| "grad_norm": 0.09015623480081558, |
| "learning_rate": 5.9701805264327004e-05, |
| "loss": 0.0065, |
| "step": 9320 |
| }, |
| { |
| "epoch": 47.121212121212125, |
| "grad_norm": 0.07688300311565399, |
| "learning_rate": 5.96206898084825e-05, |
| "loss": 0.0051, |
| "step": 9330 |
| }, |
| { |
| "epoch": 47.17171717171717, |
| "grad_norm": 0.07197550684213638, |
| "learning_rate": 5.953954805003942e-05, |
| "loss": 0.0043, |
| "step": 9340 |
| }, |
| { |
| "epoch": 47.22222222222222, |
| "grad_norm": 0.07104367017745972, |
| "learning_rate": 5.945838021083623e-05, |
| "loss": 0.0055, |
| "step": 9350 |
| }, |
| { |
| "epoch": 47.27272727272727, |
| "grad_norm": 0.07048725336790085, |
| "learning_rate": 5.9377186512782714e-05, |
| "loss": 0.0078, |
| "step": 9360 |
| }, |
| { |
| "epoch": 47.323232323232325, |
| "grad_norm": 0.06764592975378036, |
| "learning_rate": 5.929596717785935e-05, |
| "loss": 0.006, |
| "step": 9370 |
| }, |
| { |
| "epoch": 47.37373737373738, |
| "grad_norm": 0.08396624028682709, |
| "learning_rate": 5.921472242811668e-05, |
| "loss": 0.006, |
| "step": 9380 |
| }, |
| { |
| "epoch": 47.42424242424242, |
| "grad_norm": 0.0651288777589798, |
| "learning_rate": 5.913345248567475e-05, |
| "loss": 0.0035, |
| "step": 9390 |
| }, |
| { |
| "epoch": 47.474747474747474, |
| "grad_norm": 0.06895799189805984, |
| "learning_rate": 5.905215757272248e-05, |
| "loss": 0.0058, |
| "step": 9400 |
| }, |
| { |
| "epoch": 47.525252525252526, |
| "grad_norm": 0.0806833878159523, |
| "learning_rate": 5.897083791151706e-05, |
| "loss": 0.0056, |
| "step": 9410 |
| }, |
| { |
| "epoch": 47.57575757575758, |
| "grad_norm": 0.07073444128036499, |
| "learning_rate": 5.888949372438336e-05, |
| "loss": 0.006, |
| "step": 9420 |
| }, |
| { |
| "epoch": 47.62626262626263, |
| "grad_norm": 0.10627099126577377, |
| "learning_rate": 5.8808125233713255e-05, |
| "loss": 0.0071, |
| "step": 9430 |
| }, |
| { |
| "epoch": 47.676767676767675, |
| "grad_norm": 0.07969611138105392, |
| "learning_rate": 5.872673266196509e-05, |
| "loss": 0.0074, |
| "step": 9440 |
| }, |
| { |
| "epoch": 47.72727272727273, |
| "grad_norm": 0.08513494580984116, |
| "learning_rate": 5.864531623166305e-05, |
| "loss": 0.0055, |
| "step": 9450 |
| }, |
| { |
| "epoch": 47.77777777777778, |
| "grad_norm": 0.07286807894706726, |
| "learning_rate": 5.856387616539656e-05, |
| "loss": 0.0066, |
| "step": 9460 |
| }, |
| { |
| "epoch": 47.82828282828283, |
| "grad_norm": 0.09580907225608826, |
| "learning_rate": 5.848241268581967e-05, |
| "loss": 0.0056, |
| "step": 9470 |
| }, |
| { |
| "epoch": 47.878787878787875, |
| "grad_norm": 0.0854707881808281, |
| "learning_rate": 5.840092601565037e-05, |
| "loss": 0.0069, |
| "step": 9480 |
| }, |
| { |
| "epoch": 47.92929292929293, |
| "grad_norm": 0.09100658446550369, |
| "learning_rate": 5.8319416377670144e-05, |
| "loss": 0.0062, |
| "step": 9490 |
| }, |
| { |
| "epoch": 47.97979797979798, |
| "grad_norm": 0.08703695237636566, |
| "learning_rate": 5.82378839947232e-05, |
| "loss": 0.0073, |
| "step": 9500 |
| }, |
| { |
| "epoch": 48.03030303030303, |
| "grad_norm": 0.09483689069747925, |
| "learning_rate": 5.815632908971599e-05, |
| "loss": 0.0057, |
| "step": 9510 |
| }, |
| { |
| "epoch": 48.08080808080808, |
| "grad_norm": 0.107728973031044, |
| "learning_rate": 5.80747518856165e-05, |
| "loss": 0.01, |
| "step": 9520 |
| }, |
| { |
| "epoch": 48.13131313131313, |
| "grad_norm": 0.08231758326292038, |
| "learning_rate": 5.799315260545367e-05, |
| "loss": 0.0049, |
| "step": 9530 |
| }, |
| { |
| "epoch": 48.18181818181818, |
| "grad_norm": 0.09906929731369019, |
| "learning_rate": 5.791153147231686e-05, |
| "loss": 0.0057, |
| "step": 9540 |
| }, |
| { |
| "epoch": 48.23232323232323, |
| "grad_norm": 0.1140546202659607, |
| "learning_rate": 5.782988870935509e-05, |
| "loss": 0.0056, |
| "step": 9550 |
| }, |
| { |
| "epoch": 48.282828282828284, |
| "grad_norm": 0.08557623624801636, |
| "learning_rate": 5.774822453977657e-05, |
| "loss": 0.0079, |
| "step": 9560 |
| }, |
| { |
| "epoch": 48.333333333333336, |
| "grad_norm": 0.08940117806196213, |
| "learning_rate": 5.7666539186848036e-05, |
| "loss": 0.0064, |
| "step": 9570 |
| }, |
| { |
| "epoch": 48.38383838383838, |
| "grad_norm": 0.08870693296194077, |
| "learning_rate": 5.758483287389411e-05, |
| "loss": 0.0058, |
| "step": 9580 |
| }, |
| { |
| "epoch": 48.43434343434343, |
| "grad_norm": 0.11826785653829575, |
| "learning_rate": 5.7503105824296735e-05, |
| "loss": 0.008, |
| "step": 9590 |
| }, |
| { |
| "epoch": 48.484848484848484, |
| "grad_norm": 0.08257710188627243, |
| "learning_rate": 5.742135826149453e-05, |
| "loss": 0.0069, |
| "step": 9600 |
| }, |
| { |
| "epoch": 48.535353535353536, |
| "grad_norm": 0.09031946957111359, |
| "learning_rate": 5.7339590408982223e-05, |
| "loss": 0.0075, |
| "step": 9610 |
| }, |
| { |
| "epoch": 48.58585858585859, |
| "grad_norm": 0.0918421596288681, |
| "learning_rate": 5.725780249031e-05, |
| "loss": 0.0077, |
| "step": 9620 |
| }, |
| { |
| "epoch": 48.63636363636363, |
| "grad_norm": 0.0809360221028328, |
| "learning_rate": 5.717599472908292e-05, |
| "loss": 0.0088, |
| "step": 9630 |
| }, |
| { |
| "epoch": 48.686868686868685, |
| "grad_norm": 0.10445072501897812, |
| "learning_rate": 5.7094167348960237e-05, |
| "loss": 0.0049, |
| "step": 9640 |
| }, |
| { |
| "epoch": 48.73737373737374, |
| "grad_norm": 0.07750872522592545, |
| "learning_rate": 5.7012320573654945e-05, |
| "loss": 0.0062, |
| "step": 9650 |
| }, |
| { |
| "epoch": 48.78787878787879, |
| "grad_norm": 0.11803114414215088, |
| "learning_rate": 5.693045462693295e-05, |
| "loss": 0.0052, |
| "step": 9660 |
| }, |
| { |
| "epoch": 48.83838383838384, |
| "grad_norm": 0.07798698544502258, |
| "learning_rate": 5.684856973261266e-05, |
| "loss": 0.0047, |
| "step": 9670 |
| }, |
| { |
| "epoch": 48.888888888888886, |
| "grad_norm": 0.07985520362854004, |
| "learning_rate": 5.6766666114564215e-05, |
| "loss": 0.0062, |
| "step": 9680 |
| }, |
| { |
| "epoch": 48.93939393939394, |
| "grad_norm": 0.08038681745529175, |
| "learning_rate": 5.668474399670899e-05, |
| "loss": 0.0059, |
| "step": 9690 |
| }, |
| { |
| "epoch": 48.98989898989899, |
| "grad_norm": 0.0792309045791626, |
| "learning_rate": 5.660280360301896e-05, |
| "loss": 0.007, |
| "step": 9700 |
| }, |
| { |
| "epoch": 49.04040404040404, |
| "grad_norm": 0.0572558157145977, |
| "learning_rate": 5.652084515751599e-05, |
| "loss": 0.0039, |
| "step": 9710 |
| }, |
| { |
| "epoch": 49.09090909090909, |
| "grad_norm": 0.086373470723629, |
| "learning_rate": 5.643886888427137e-05, |
| "loss": 0.0051, |
| "step": 9720 |
| }, |
| { |
| "epoch": 49.14141414141414, |
| "grad_norm": 0.09625247865915298, |
| "learning_rate": 5.6356875007405074e-05, |
| "loss": 0.0068, |
| "step": 9730 |
| }, |
| { |
| "epoch": 49.19191919191919, |
| "grad_norm": 0.08003372699022293, |
| "learning_rate": 5.627486375108525e-05, |
| "loss": 0.0057, |
| "step": 9740 |
| }, |
| { |
| "epoch": 49.24242424242424, |
| "grad_norm": 0.07249989360570908, |
| "learning_rate": 5.619283533952754e-05, |
| "loss": 0.0048, |
| "step": 9750 |
| }, |
| { |
| "epoch": 49.292929292929294, |
| "grad_norm": 0.11093539744615555, |
| "learning_rate": 5.6110789996994474e-05, |
| "loss": 0.0071, |
| "step": 9760 |
| }, |
| { |
| "epoch": 49.343434343434346, |
| "grad_norm": 0.06999137252569199, |
| "learning_rate": 5.602872794779491e-05, |
| "loss": 0.005, |
| "step": 9770 |
| }, |
| { |
| "epoch": 49.39393939393939, |
| "grad_norm": 0.0835166648030281, |
| "learning_rate": 5.594664941628334e-05, |
| "loss": 0.0056, |
| "step": 9780 |
| }, |
| { |
| "epoch": 49.44444444444444, |
| "grad_norm": 0.07963220030069351, |
| "learning_rate": 5.5864554626859324e-05, |
| "loss": 0.0048, |
| "step": 9790 |
| }, |
| { |
| "epoch": 49.494949494949495, |
| "grad_norm": 0.1068798378109932, |
| "learning_rate": 5.578244380396691e-05, |
| "loss": 0.0069, |
| "step": 9800 |
| }, |
| { |
| "epoch": 49.54545454545455, |
| "grad_norm": 0.0755709856748581, |
| "learning_rate": 5.570031717209394e-05, |
| "loss": 0.007, |
| "step": 9810 |
| }, |
| { |
| "epoch": 49.5959595959596, |
| "grad_norm": 0.09895585477352142, |
| "learning_rate": 5.561817495577147e-05, |
| "loss": 0.0063, |
| "step": 9820 |
| }, |
| { |
| "epoch": 49.64646464646464, |
| "grad_norm": 0.11327171325683594, |
| "learning_rate": 5.5536017379573215e-05, |
| "loss": 0.0061, |
| "step": 9830 |
| }, |
| { |
| "epoch": 49.696969696969695, |
| "grad_norm": 0.05810012295842171, |
| "learning_rate": 5.545384466811483e-05, |
| "loss": 0.0054, |
| "step": 9840 |
| }, |
| { |
| "epoch": 49.74747474747475, |
| "grad_norm": 0.09888649731874466, |
| "learning_rate": 5.5371657046053384e-05, |
| "loss": 0.0064, |
| "step": 9850 |
| }, |
| { |
| "epoch": 49.7979797979798, |
| "grad_norm": 0.07226064056158066, |
| "learning_rate": 5.528945473808669e-05, |
| "loss": 0.0052, |
| "step": 9860 |
| }, |
| { |
| "epoch": 49.84848484848485, |
| "grad_norm": 0.09987892210483551, |
| "learning_rate": 5.520723796895272e-05, |
| "loss": 0.0065, |
| "step": 9870 |
| }, |
| { |
| "epoch": 49.898989898989896, |
| "grad_norm": 0.13089631497859955, |
| "learning_rate": 5.512500696342897e-05, |
| "loss": 0.0077, |
| "step": 9880 |
| }, |
| { |
| "epoch": 49.94949494949495, |
| "grad_norm": 0.07991752028465271, |
| "learning_rate": 5.504276194633188e-05, |
| "loss": 0.0086, |
| "step": 9890 |
| }, |
| { |
| "epoch": 50.0, |
| "grad_norm": 0.11326275765895844, |
| "learning_rate": 5.49605031425162e-05, |
| "loss": 0.007, |
| "step": 9900 |
| }, |
| { |
| "epoch": 50.05050505050505, |
| "grad_norm": 0.10719384253025055, |
| "learning_rate": 5.487823077687434e-05, |
| "loss": 0.006, |
| "step": 9910 |
| }, |
| { |
| "epoch": 50.101010101010104, |
| "grad_norm": 0.08654998987913132, |
| "learning_rate": 5.4795945074335806e-05, |
| "loss": 0.0069, |
| "step": 9920 |
| }, |
| { |
| "epoch": 50.15151515151515, |
| "grad_norm": 0.11176794022321701, |
| "learning_rate": 5.471364625986657e-05, |
| "loss": 0.0057, |
| "step": 9930 |
| }, |
| { |
| "epoch": 50.2020202020202, |
| "grad_norm": 0.09996799379587173, |
| "learning_rate": 5.463133455846845e-05, |
| "loss": 0.0049, |
| "step": 9940 |
| }, |
| { |
| "epoch": 50.25252525252525, |
| "grad_norm": 0.07378264516592026, |
| "learning_rate": 5.4549010195178505e-05, |
| "loss": 0.0049, |
| "step": 9950 |
| }, |
| { |
| "epoch": 50.303030303030305, |
| "grad_norm": 0.09218428283929825, |
| "learning_rate": 5.446667339506838e-05, |
| "loss": 0.0101, |
| "step": 9960 |
| }, |
| { |
| "epoch": 50.35353535353536, |
| "grad_norm": 0.06757820397615433, |
| "learning_rate": 5.4384324383243756e-05, |
| "loss": 0.005, |
| "step": 9970 |
| }, |
| { |
| "epoch": 50.4040404040404, |
| "grad_norm": 0.08747211843729019, |
| "learning_rate": 5.430196338484368e-05, |
| "loss": 0.0075, |
| "step": 9980 |
| }, |
| { |
| "epoch": 50.45454545454545, |
| "grad_norm": 0.06817392259836197, |
| "learning_rate": 5.4219590625039975e-05, |
| "loss": 0.0049, |
| "step": 9990 |
| }, |
| { |
| "epoch": 50.505050505050505, |
| "grad_norm": 0.06360001862049103, |
| "learning_rate": 5.413720632903664e-05, |
| "loss": 0.005, |
| "step": 10000 |
| }, |
| { |
| "epoch": 50.55555555555556, |
| "grad_norm": 0.061094626784324646, |
| "learning_rate": 5.405481072206917e-05, |
| "loss": 0.0049, |
| "step": 10010 |
| }, |
| { |
| "epoch": 50.60606060606061, |
| "grad_norm": 0.05916479974985123, |
| "learning_rate": 5.397240402940402e-05, |
| "loss": 0.0055, |
| "step": 10020 |
| }, |
| { |
| "epoch": 50.656565656565654, |
| "grad_norm": 0.13299202919006348, |
| "learning_rate": 5.388998647633794e-05, |
| "loss": 0.0069, |
| "step": 10030 |
| }, |
| { |
| "epoch": 50.707070707070706, |
| "grad_norm": 0.10348938405513763, |
| "learning_rate": 5.380755828819737e-05, |
| "loss": 0.0049, |
| "step": 10040 |
| }, |
| { |
| "epoch": 50.75757575757576, |
| "grad_norm": 0.09889785945415497, |
| "learning_rate": 5.3725119690337846e-05, |
| "loss": 0.0048, |
| "step": 10050 |
| }, |
| { |
| "epoch": 50.80808080808081, |
| "grad_norm": 0.08522062748670578, |
| "learning_rate": 5.3642670908143324e-05, |
| "loss": 0.0048, |
| "step": 10060 |
| }, |
| { |
| "epoch": 50.85858585858586, |
| "grad_norm": 0.0868203416466713, |
| "learning_rate": 5.356021216702562e-05, |
| "loss": 0.0053, |
| "step": 10070 |
| }, |
| { |
| "epoch": 50.90909090909091, |
| "grad_norm": 0.09965244680643082, |
| "learning_rate": 5.347774369242381e-05, |
| "loss": 0.0054, |
| "step": 10080 |
| }, |
| { |
| "epoch": 50.95959595959596, |
| "grad_norm": 0.09508970379829407, |
| "learning_rate": 5.3395265709803545e-05, |
| "loss": 0.006, |
| "step": 10090 |
| }, |
| { |
| "epoch": 51.01010101010101, |
| "grad_norm": 0.09000930190086365, |
| "learning_rate": 5.331277844465647e-05, |
| "loss": 0.0057, |
| "step": 10100 |
| }, |
| { |
| "epoch": 51.06060606060606, |
| "grad_norm": 0.10642437636852264, |
| "learning_rate": 5.323028212249963e-05, |
| "loss": 0.0053, |
| "step": 10110 |
| }, |
| { |
| "epoch": 51.111111111111114, |
| "grad_norm": 0.0741676390171051, |
| "learning_rate": 5.314777696887481e-05, |
| "loss": 0.0047, |
| "step": 10120 |
| }, |
| { |
| "epoch": 51.16161616161616, |
| "grad_norm": 0.053581781685352325, |
| "learning_rate": 5.306526320934796e-05, |
| "loss": 0.0035, |
| "step": 10130 |
| }, |
| { |
| "epoch": 51.21212121212121, |
| "grad_norm": 0.0830620601773262, |
| "learning_rate": 5.298274106950854e-05, |
| "loss": 0.0047, |
| "step": 10140 |
| }, |
| { |
| "epoch": 51.26262626262626, |
| "grad_norm": 0.08052989840507507, |
| "learning_rate": 5.290021077496893e-05, |
| "loss": 0.0067, |
| "step": 10150 |
| }, |
| { |
| "epoch": 51.313131313131315, |
| "grad_norm": 0.07509579509496689, |
| "learning_rate": 5.2817672551363816e-05, |
| "loss": 0.0041, |
| "step": 10160 |
| }, |
| { |
| "epoch": 51.36363636363637, |
| "grad_norm": 0.0755707398056984, |
| "learning_rate": 5.273512662434952e-05, |
| "loss": 0.005, |
| "step": 10170 |
| }, |
| { |
| "epoch": 51.41414141414141, |
| "grad_norm": 0.06803159415721893, |
| "learning_rate": 5.265257321960349e-05, |
| "loss": 0.0051, |
| "step": 10180 |
| }, |
| { |
| "epoch": 51.464646464646464, |
| "grad_norm": 0.07200125604867935, |
| "learning_rate": 5.257001256282357e-05, |
| "loss": 0.0049, |
| "step": 10190 |
| }, |
| { |
| "epoch": 51.515151515151516, |
| "grad_norm": 0.07128570973873138, |
| "learning_rate": 5.248744487972742e-05, |
| "loss": 0.0049, |
| "step": 10200 |
| }, |
| { |
| "epoch": 51.56565656565657, |
| "grad_norm": 0.061628054827451706, |
| "learning_rate": 5.240487039605196e-05, |
| "loss": 0.0059, |
| "step": 10210 |
| }, |
| { |
| "epoch": 51.61616161616162, |
| "grad_norm": 0.08912592381238937, |
| "learning_rate": 5.232228933755267e-05, |
| "loss": 0.0061, |
| "step": 10220 |
| }, |
| { |
| "epoch": 51.666666666666664, |
| "grad_norm": 0.09351558983325958, |
| "learning_rate": 5.2239701930003006e-05, |
| "loss": 0.0063, |
| "step": 10230 |
| }, |
| { |
| "epoch": 51.717171717171716, |
| "grad_norm": 0.09715887904167175, |
| "learning_rate": 5.215710839919379e-05, |
| "loss": 0.0053, |
| "step": 10240 |
| }, |
| { |
| "epoch": 51.76767676767677, |
| "grad_norm": 0.06928492337465286, |
| "learning_rate": 5.207450897093257e-05, |
| "loss": 0.0042, |
| "step": 10250 |
| }, |
| { |
| "epoch": 51.81818181818182, |
| "grad_norm": 0.07262247800827026, |
| "learning_rate": 5.1991903871043046e-05, |
| "loss": 0.0052, |
| "step": 10260 |
| }, |
| { |
| "epoch": 51.86868686868687, |
| "grad_norm": 0.08397950977087021, |
| "learning_rate": 5.190929332536439e-05, |
| "loss": 0.0059, |
| "step": 10270 |
| }, |
| { |
| "epoch": 51.91919191919192, |
| "grad_norm": 0.07148350030183792, |
| "learning_rate": 5.182667755975071e-05, |
| "loss": 0.0047, |
| "step": 10280 |
| }, |
| { |
| "epoch": 51.96969696969697, |
| "grad_norm": 0.05985071882605553, |
| "learning_rate": 5.1744056800070315e-05, |
| "loss": 0.0058, |
| "step": 10290 |
| }, |
| { |
| "epoch": 52.02020202020202, |
| "grad_norm": 0.0841803178191185, |
| "learning_rate": 5.166143127220524e-05, |
| "loss": 0.0041, |
| "step": 10300 |
| }, |
| { |
| "epoch": 52.07070707070707, |
| "grad_norm": 0.07089784741401672, |
| "learning_rate": 5.1578801202050485e-05, |
| "loss": 0.005, |
| "step": 10310 |
| }, |
| { |
| "epoch": 52.121212121212125, |
| "grad_norm": 0.07146300375461578, |
| "learning_rate": 5.149616681551355e-05, |
| "loss": 0.0064, |
| "step": 10320 |
| }, |
| { |
| "epoch": 52.17171717171717, |
| "grad_norm": 0.07370232790708542, |
| "learning_rate": 5.141352833851367e-05, |
| "loss": 0.0044, |
| "step": 10330 |
| }, |
| { |
| "epoch": 52.22222222222222, |
| "grad_norm": 0.0585806630551815, |
| "learning_rate": 5.1330885996981285e-05, |
| "loss": 0.0056, |
| "step": 10340 |
| }, |
| { |
| "epoch": 52.27272727272727, |
| "grad_norm": 0.073703333735466, |
| "learning_rate": 5.124824001685741e-05, |
| "loss": 0.0051, |
| "step": 10350 |
| }, |
| { |
| "epoch": 52.323232323232325, |
| "grad_norm": 0.07572797685861588, |
| "learning_rate": 5.116559062409298e-05, |
| "loss": 0.0056, |
| "step": 10360 |
| }, |
| { |
| "epoch": 52.37373737373738, |
| "grad_norm": 0.06590241193771362, |
| "learning_rate": 5.10829380446483e-05, |
| "loss": 0.0055, |
| "step": 10370 |
| }, |
| { |
| "epoch": 52.42424242424242, |
| "grad_norm": 0.0645478218793869, |
| "learning_rate": 5.100028250449235e-05, |
| "loss": 0.0042, |
| "step": 10380 |
| }, |
| { |
| "epoch": 52.474747474747474, |
| "grad_norm": 0.1251508742570877, |
| "learning_rate": 5.0917624229602234e-05, |
| "loss": 0.0084, |
| "step": 10390 |
| }, |
| { |
| "epoch": 52.525252525252526, |
| "grad_norm": 0.09933517873287201, |
| "learning_rate": 5.0834963445962524e-05, |
| "loss": 0.0065, |
| "step": 10400 |
| }, |
| { |
| "epoch": 52.57575757575758, |
| "grad_norm": 0.07929286360740662, |
| "learning_rate": 5.075230037956461e-05, |
| "loss": 0.0057, |
| "step": 10410 |
| }, |
| { |
| "epoch": 52.62626262626263, |
| "grad_norm": 0.0616859495639801, |
| "learning_rate": 5.0669635256406213e-05, |
| "loss": 0.005, |
| "step": 10420 |
| }, |
| { |
| "epoch": 52.676767676767675, |
| "grad_norm": 0.10052742063999176, |
| "learning_rate": 5.058696830249058e-05, |
| "loss": 0.008, |
| "step": 10430 |
| }, |
| { |
| "epoch": 52.72727272727273, |
| "grad_norm": 0.07857155799865723, |
| "learning_rate": 5.050429974382602e-05, |
| "loss": 0.0067, |
| "step": 10440 |
| }, |
| { |
| "epoch": 52.77777777777778, |
| "grad_norm": 0.06531845778226852, |
| "learning_rate": 5.042162980642523e-05, |
| "loss": 0.0044, |
| "step": 10450 |
| }, |
| { |
| "epoch": 52.82828282828283, |
| "grad_norm": 0.06926816701889038, |
| "learning_rate": 5.033895871630462e-05, |
| "loss": 0.0045, |
| "step": 10460 |
| }, |
| { |
| "epoch": 52.878787878787875, |
| "grad_norm": 0.07584326714277267, |
| "learning_rate": 5.025628669948386e-05, |
| "loss": 0.0047, |
| "step": 10470 |
| }, |
| { |
| "epoch": 52.92929292929293, |
| "grad_norm": 0.053224917501211166, |
| "learning_rate": 5.017361398198502e-05, |
| "loss": 0.0035, |
| "step": 10480 |
| }, |
| { |
| "epoch": 52.97979797979798, |
| "grad_norm": 0.07290273159742355, |
| "learning_rate": 5.009094078983221e-05, |
| "loss": 0.0066, |
| "step": 10490 |
| }, |
| { |
| "epoch": 53.03030303030303, |
| "grad_norm": 0.06658138334751129, |
| "learning_rate": 5.000826734905073e-05, |
| "loss": 0.0042, |
| "step": 10500 |
| }, |
| { |
| "epoch": 53.08080808080808, |
| "grad_norm": 0.07196727395057678, |
| "learning_rate": 4.9925593885666645e-05, |
| "loss": 0.0074, |
| "step": 10510 |
| }, |
| { |
| "epoch": 53.13131313131313, |
| "grad_norm": 0.06606525182723999, |
| "learning_rate": 4.984292062570602e-05, |
| "loss": 0.0063, |
| "step": 10520 |
| }, |
| { |
| "epoch": 53.18181818181818, |
| "grad_norm": 0.0682004913687706, |
| "learning_rate": 4.976024779519442e-05, |
| "loss": 0.0054, |
| "step": 10530 |
| }, |
| { |
| "epoch": 53.23232323232323, |
| "grad_norm": 0.07657551765441895, |
| "learning_rate": 4.9677575620156194e-05, |
| "loss": 0.006, |
| "step": 10540 |
| }, |
| { |
| "epoch": 53.282828282828284, |
| "grad_norm": 0.07075445353984833, |
| "learning_rate": 4.959490432661391e-05, |
| "loss": 0.0058, |
| "step": 10550 |
| }, |
| { |
| "epoch": 53.333333333333336, |
| "grad_norm": 0.07303524017333984, |
| "learning_rate": 4.9512234140587726e-05, |
| "loss": 0.0063, |
| "step": 10560 |
| }, |
| { |
| "epoch": 53.38383838383838, |
| "grad_norm": 0.07229966670274734, |
| "learning_rate": 4.942956528809477e-05, |
| "loss": 0.0052, |
| "step": 10570 |
| }, |
| { |
| "epoch": 53.43434343434343, |
| "grad_norm": 0.07793861627578735, |
| "learning_rate": 4.934689799514854e-05, |
| "loss": 0.0086, |
| "step": 10580 |
| }, |
| { |
| "epoch": 53.484848484848484, |
| "grad_norm": 0.08973843604326248, |
| "learning_rate": 4.926423248775827e-05, |
| "loss": 0.0077, |
| "step": 10590 |
| }, |
| { |
| "epoch": 53.535353535353536, |
| "grad_norm": 0.06838146597146988, |
| "learning_rate": 4.918156899192826e-05, |
| "loss": 0.0058, |
| "step": 10600 |
| }, |
| { |
| "epoch": 53.58585858585859, |
| "grad_norm": 0.09407958388328552, |
| "learning_rate": 4.909890773365738e-05, |
| "loss": 0.0054, |
| "step": 10610 |
| }, |
| { |
| "epoch": 53.63636363636363, |
| "grad_norm": 0.08756718784570694, |
| "learning_rate": 4.9016248938938344e-05, |
| "loss": 0.0052, |
| "step": 10620 |
| }, |
| { |
| "epoch": 53.686868686868685, |
| "grad_norm": 0.06948436051607132, |
| "learning_rate": 4.8933592833757156e-05, |
| "loss": 0.0043, |
| "step": 10630 |
| }, |
| { |
| "epoch": 53.73737373737374, |
| "grad_norm": 0.09100628644227982, |
| "learning_rate": 4.8850939644092435e-05, |
| "loss": 0.0055, |
| "step": 10640 |
| }, |
| { |
| "epoch": 53.78787878787879, |
| "grad_norm": 0.07502390444278717, |
| "learning_rate": 4.876828959591485e-05, |
| "loss": 0.0055, |
| "step": 10650 |
| }, |
| { |
| "epoch": 53.83838383838384, |
| "grad_norm": 0.06284298747777939, |
| "learning_rate": 4.8685642915186474e-05, |
| "loss": 0.0046, |
| "step": 10660 |
| }, |
| { |
| "epoch": 53.888888888888886, |
| "grad_norm": 0.07325311750173569, |
| "learning_rate": 4.860299982786018e-05, |
| "loss": 0.0046, |
| "step": 10670 |
| }, |
| { |
| "epoch": 53.93939393939394, |
| "grad_norm": 0.0637088492512703, |
| "learning_rate": 4.852036055987901e-05, |
| "loss": 0.0051, |
| "step": 10680 |
| }, |
| { |
| "epoch": 53.98989898989899, |
| "grad_norm": 0.0807725265622139, |
| "learning_rate": 4.843772533717558e-05, |
| "loss": 0.0052, |
| "step": 10690 |
| }, |
| { |
| "epoch": 54.04040404040404, |
| "grad_norm": 0.10505745559930801, |
| "learning_rate": 4.835509438567142e-05, |
| "loss": 0.0058, |
| "step": 10700 |
| }, |
| { |
| "epoch": 54.09090909090909, |
| "grad_norm": 0.059285569936037064, |
| "learning_rate": 4.827246793127639e-05, |
| "loss": 0.0043, |
| "step": 10710 |
| }, |
| { |
| "epoch": 54.14141414141414, |
| "grad_norm": 0.06206269934773445, |
| "learning_rate": 4.818984619988807e-05, |
| "loss": 0.0047, |
| "step": 10720 |
| }, |
| { |
| "epoch": 54.19191919191919, |
| "grad_norm": 0.08005829900503159, |
| "learning_rate": 4.810722941739115e-05, |
| "loss": 0.0048, |
| "step": 10730 |
| }, |
| { |
| "epoch": 54.24242424242424, |
| "grad_norm": 0.06146629527211189, |
| "learning_rate": 4.8024617809656684e-05, |
| "loss": 0.0045, |
| "step": 10740 |
| }, |
| { |
| "epoch": 54.292929292929294, |
| "grad_norm": 0.09175661206245422, |
| "learning_rate": 4.794201160254171e-05, |
| "loss": 0.0073, |
| "step": 10750 |
| }, |
| { |
| "epoch": 54.343434343434346, |
| "grad_norm": 0.08606816083192825, |
| "learning_rate": 4.785941102188844e-05, |
| "loss": 0.0053, |
| "step": 10760 |
| }, |
| { |
| "epoch": 54.39393939393939, |
| "grad_norm": 0.07772952318191528, |
| "learning_rate": 4.7776816293523686e-05, |
| "loss": 0.0054, |
| "step": 10770 |
| }, |
| { |
| "epoch": 54.44444444444444, |
| "grad_norm": 0.10094921290874481, |
| "learning_rate": 4.769422764325832e-05, |
| "loss": 0.0079, |
| "step": 10780 |
| }, |
| { |
| "epoch": 54.494949494949495, |
| "grad_norm": 0.06703133136034012, |
| "learning_rate": 4.76116452968865e-05, |
| "loss": 0.0047, |
| "step": 10790 |
| }, |
| { |
| "epoch": 54.54545454545455, |
| "grad_norm": 0.06761986017227173, |
| "learning_rate": 4.752906948018525e-05, |
| "loss": 0.0044, |
| "step": 10800 |
| }, |
| { |
| "epoch": 54.5959595959596, |
| "grad_norm": 0.07833712548017502, |
| "learning_rate": 4.7446500418913684e-05, |
| "loss": 0.0058, |
| "step": 10810 |
| }, |
| { |
| "epoch": 54.64646464646464, |
| "grad_norm": 0.05290314182639122, |
| "learning_rate": 4.736393833881247e-05, |
| "loss": 0.0034, |
| "step": 10820 |
| }, |
| { |
| "epoch": 54.696969696969695, |
| "grad_norm": 0.0583065040409565, |
| "learning_rate": 4.7281383465603194e-05, |
| "loss": 0.0045, |
| "step": 10830 |
| }, |
| { |
| "epoch": 54.74747474747475, |
| "grad_norm": 0.08608434349298477, |
| "learning_rate": 4.71988360249877e-05, |
| "loss": 0.0051, |
| "step": 10840 |
| }, |
| { |
| "epoch": 54.7979797979798, |
| "grad_norm": 0.07287909090518951, |
| "learning_rate": 4.7116296242647554e-05, |
| "loss": 0.0073, |
| "step": 10850 |
| }, |
| { |
| "epoch": 54.84848484848485, |
| "grad_norm": 0.05687038600444794, |
| "learning_rate": 4.703376434424336e-05, |
| "loss": 0.0047, |
| "step": 10860 |
| }, |
| { |
| "epoch": 54.898989898989896, |
| "grad_norm": 0.055672843009233475, |
| "learning_rate": 4.695124055541421e-05, |
| "loss": 0.0038, |
| "step": 10870 |
| }, |
| { |
| "epoch": 54.94949494949495, |
| "grad_norm": 0.087603859603405, |
| "learning_rate": 4.6868725101776934e-05, |
| "loss": 0.0049, |
| "step": 10880 |
| }, |
| { |
| "epoch": 55.0, |
| "grad_norm": 0.05893929302692413, |
| "learning_rate": 4.678621820892567e-05, |
| "loss": 0.0053, |
| "step": 10890 |
| }, |
| { |
| "epoch": 55.05050505050505, |
| "grad_norm": 0.0687452107667923, |
| "learning_rate": 4.670372010243111e-05, |
| "loss": 0.0055, |
| "step": 10900 |
| }, |
| { |
| "epoch": 55.101010101010104, |
| "grad_norm": 0.057449426501989365, |
| "learning_rate": 4.662123100783992e-05, |
| "loss": 0.0052, |
| "step": 10910 |
| }, |
| { |
| "epoch": 55.15151515151515, |
| "grad_norm": 0.07398934662342072, |
| "learning_rate": 4.653875115067415e-05, |
| "loss": 0.0055, |
| "step": 10920 |
| }, |
| { |
| "epoch": 55.2020202020202, |
| "grad_norm": 0.0607762411236763, |
| "learning_rate": 4.6456280756430545e-05, |
| "loss": 0.0043, |
| "step": 10930 |
| }, |
| { |
| "epoch": 55.25252525252525, |
| "grad_norm": 0.05980117619037628, |
| "learning_rate": 4.637382005058004e-05, |
| "loss": 0.006, |
| "step": 10940 |
| }, |
| { |
| "epoch": 55.303030303030305, |
| "grad_norm": 0.06854324787855148, |
| "learning_rate": 4.629136925856705e-05, |
| "loss": 0.0051, |
| "step": 10950 |
| }, |
| { |
| "epoch": 55.35353535353536, |
| "grad_norm": 0.07301419973373413, |
| "learning_rate": 4.6208928605808895e-05, |
| "loss": 0.0055, |
| "step": 10960 |
| }, |
| { |
| "epoch": 55.4040404040404, |
| "grad_norm": 0.09380615502595901, |
| "learning_rate": 4.612649831769519e-05, |
| "loss": 0.0062, |
| "step": 10970 |
| }, |
| { |
| "epoch": 55.45454545454545, |
| "grad_norm": 0.07099052518606186, |
| "learning_rate": 4.604407861958715e-05, |
| "loss": 0.0059, |
| "step": 10980 |
| }, |
| { |
| "epoch": 55.505050505050505, |
| "grad_norm": 0.06443140655755997, |
| "learning_rate": 4.5961669736817114e-05, |
| "loss": 0.006, |
| "step": 10990 |
| }, |
| { |
| "epoch": 55.55555555555556, |
| "grad_norm": 0.07866071909666061, |
| "learning_rate": 4.5879271894687814e-05, |
| "loss": 0.0042, |
| "step": 11000 |
| }, |
| { |
| "epoch": 55.60606060606061, |
| "grad_norm": 0.06538591533899307, |
| "learning_rate": 4.5796885318471826e-05, |
| "loss": 0.004, |
| "step": 11010 |
| }, |
| { |
| "epoch": 55.656565656565654, |
| "grad_norm": 0.07754694670438766, |
| "learning_rate": 4.571451023341086e-05, |
| "loss": 0.0067, |
| "step": 11020 |
| }, |
| { |
| "epoch": 55.707070707070706, |
| "grad_norm": 0.08924394100904465, |
| "learning_rate": 4.563214686471527e-05, |
| "loss": 0.0049, |
| "step": 11030 |
| }, |
| { |
| "epoch": 55.75757575757576, |
| "grad_norm": 0.08166371285915375, |
| "learning_rate": 4.5549795437563365e-05, |
| "loss": 0.0051, |
| "step": 11040 |
| }, |
| { |
| "epoch": 55.80808080808081, |
| "grad_norm": 0.06600627303123474, |
| "learning_rate": 4.546745617710081e-05, |
| "loss": 0.0047, |
| "step": 11050 |
| }, |
| { |
| "epoch": 55.85858585858586, |
| "grad_norm": 0.09323030710220337, |
| "learning_rate": 4.5385129308440014e-05, |
| "loss": 0.0053, |
| "step": 11060 |
| }, |
| { |
| "epoch": 55.90909090909091, |
| "grad_norm": 0.05906745418906212, |
| "learning_rate": 4.530281505665944e-05, |
| "loss": 0.0036, |
| "step": 11070 |
| }, |
| { |
| "epoch": 55.95959595959596, |
| "grad_norm": 0.08033601194620132, |
| "learning_rate": 4.5220513646803134e-05, |
| "loss": 0.0058, |
| "step": 11080 |
| }, |
| { |
| "epoch": 56.01010101010101, |
| "grad_norm": 0.0650600865483284, |
| "learning_rate": 4.513822530388003e-05, |
| "loss": 0.0067, |
| "step": 11090 |
| }, |
| { |
| "epoch": 56.06060606060606, |
| "grad_norm": 0.08486595004796982, |
| "learning_rate": 4.5055950252863296e-05, |
| "loss": 0.0052, |
| "step": 11100 |
| }, |
| { |
| "epoch": 56.111111111111114, |
| "grad_norm": 0.06517436355352402, |
| "learning_rate": 4.4973688718689803e-05, |
| "loss": 0.0043, |
| "step": 11110 |
| }, |
| { |
| "epoch": 56.16161616161616, |
| "grad_norm": 0.07070663571357727, |
| "learning_rate": 4.4891440926259406e-05, |
| "loss": 0.0054, |
| "step": 11120 |
| }, |
| { |
| "epoch": 56.21212121212121, |
| "grad_norm": 0.04750152304768562, |
| "learning_rate": 4.480920710043443e-05, |
| "loss": 0.0038, |
| "step": 11130 |
| }, |
| { |
| "epoch": 56.26262626262626, |
| "grad_norm": 0.0681268721818924, |
| "learning_rate": 4.4726987466039044e-05, |
| "loss": 0.0039, |
| "step": 11140 |
| }, |
| { |
| "epoch": 56.313131313131315, |
| "grad_norm": 0.08281800895929337, |
| "learning_rate": 4.46447822478586e-05, |
| "loss": 0.0055, |
| "step": 11150 |
| }, |
| { |
| "epoch": 56.36363636363637, |
| "grad_norm": 0.09894859045743942, |
| "learning_rate": 4.4562591670638974e-05, |
| "loss": 0.006, |
| "step": 11160 |
| }, |
| { |
| "epoch": 56.41414141414141, |
| "grad_norm": 0.09736963361501694, |
| "learning_rate": 4.4480415959086105e-05, |
| "loss": 0.0047, |
| "step": 11170 |
| }, |
| { |
| "epoch": 56.464646464646464, |
| "grad_norm": 0.09591088443994522, |
| "learning_rate": 4.439825533786522e-05, |
| "loss": 0.0057, |
| "step": 11180 |
| }, |
| { |
| "epoch": 56.515151515151516, |
| "grad_norm": 0.12203825265169144, |
| "learning_rate": 4.431611003160035e-05, |
| "loss": 0.0062, |
| "step": 11190 |
| }, |
| { |
| "epoch": 56.56565656565657, |
| "grad_norm": 0.08218017220497131, |
| "learning_rate": 4.4233980264873636e-05, |
| "loss": 0.0079, |
| "step": 11200 |
| }, |
| { |
| "epoch": 56.61616161616162, |
| "grad_norm": 0.07585697621107101, |
| "learning_rate": 4.4151866262224684e-05, |
| "loss": 0.0038, |
| "step": 11210 |
| }, |
| { |
| "epoch": 56.666666666666664, |
| "grad_norm": 0.08610648661851883, |
| "learning_rate": 4.406976824815006e-05, |
| "loss": 0.0051, |
| "step": 11220 |
| }, |
| { |
| "epoch": 56.717171717171716, |
| "grad_norm": 0.08567634224891663, |
| "learning_rate": 4.3987686447102595e-05, |
| "loss": 0.0052, |
| "step": 11230 |
| }, |
| { |
| "epoch": 56.76767676767677, |
| "grad_norm": 0.07810795307159424, |
| "learning_rate": 4.3905621083490804e-05, |
| "loss": 0.0064, |
| "step": 11240 |
| }, |
| { |
| "epoch": 56.81818181818182, |
| "grad_norm": 0.07648720592260361, |
| "learning_rate": 4.3823572381678286e-05, |
| "loss": 0.006, |
| "step": 11250 |
| }, |
| { |
| "epoch": 56.86868686868687, |
| "grad_norm": 0.07088477909564972, |
| "learning_rate": 4.374154056598301e-05, |
| "loss": 0.0047, |
| "step": 11260 |
| }, |
| { |
| "epoch": 56.91919191919192, |
| "grad_norm": 0.07164214551448822, |
| "learning_rate": 4.3659525860676845e-05, |
| "loss": 0.0058, |
| "step": 11270 |
| }, |
| { |
| "epoch": 56.96969696969697, |
| "grad_norm": 0.07769747078418732, |
| "learning_rate": 4.3577528489984854e-05, |
| "loss": 0.0048, |
| "step": 11280 |
| }, |
| { |
| "epoch": 57.02020202020202, |
| "grad_norm": 0.0868016928434372, |
| "learning_rate": 4.349554867808476e-05, |
| "loss": 0.0048, |
| "step": 11290 |
| }, |
| { |
| "epoch": 57.07070707070707, |
| "grad_norm": 0.07188599556684494, |
| "learning_rate": 4.34135866491062e-05, |
| "loss": 0.0039, |
| "step": 11300 |
| }, |
| { |
| "epoch": 57.121212121212125, |
| "grad_norm": 0.07405246794223785, |
| "learning_rate": 4.333164262713022e-05, |
| "loss": 0.0053, |
| "step": 11310 |
| }, |
| { |
| "epoch": 57.17171717171717, |
| "grad_norm": 0.08174362778663635, |
| "learning_rate": 4.324971683618868e-05, |
| "loss": 0.0047, |
| "step": 11320 |
| }, |
| { |
| "epoch": 57.22222222222222, |
| "grad_norm": 0.07250235974788666, |
| "learning_rate": 4.316780950026354e-05, |
| "loss": 0.0066, |
| "step": 11330 |
| }, |
| { |
| "epoch": 57.27272727272727, |
| "grad_norm": 0.08913738280534744, |
| "learning_rate": 4.308592084328637e-05, |
| "loss": 0.0053, |
| "step": 11340 |
| }, |
| { |
| "epoch": 57.323232323232325, |
| "grad_norm": 0.07944918423891068, |
| "learning_rate": 4.3004051089137576e-05, |
| "loss": 0.0067, |
| "step": 11350 |
| }, |
| { |
| "epoch": 57.37373737373738, |
| "grad_norm": 0.07268029451370239, |
| "learning_rate": 4.292220046164597e-05, |
| "loss": 0.0064, |
| "step": 11360 |
| }, |
| { |
| "epoch": 57.42424242424242, |
| "grad_norm": 0.0606326088309288, |
| "learning_rate": 4.2840369184588035e-05, |
| "loss": 0.0038, |
| "step": 11370 |
| }, |
| { |
| "epoch": 57.474747474747474, |
| "grad_norm": 0.07500310242176056, |
| "learning_rate": 4.2758557481687345e-05, |
| "loss": 0.0052, |
| "step": 11380 |
| }, |
| { |
| "epoch": 57.525252525252526, |
| "grad_norm": 0.12503297626972198, |
| "learning_rate": 4.267676557661403e-05, |
| "loss": 0.0063, |
| "step": 11390 |
| }, |
| { |
| "epoch": 57.57575757575758, |
| "grad_norm": 0.07479877024888992, |
| "learning_rate": 4.2594993692983955e-05, |
| "loss": 0.0055, |
| "step": 11400 |
| }, |
| { |
| "epoch": 57.62626262626263, |
| "grad_norm": 0.08710306882858276, |
| "learning_rate": 4.251324205435837e-05, |
| "loss": 0.0052, |
| "step": 11410 |
| }, |
| { |
| "epoch": 57.676767676767675, |
| "grad_norm": 0.0786527469754219, |
| "learning_rate": 4.243151088424312e-05, |
| "loss": 0.0049, |
| "step": 11420 |
| }, |
| { |
| "epoch": 57.72727272727273, |
| "grad_norm": 0.09157560020685196, |
| "learning_rate": 4.234980040608813e-05, |
| "loss": 0.0052, |
| "step": 11430 |
| }, |
| { |
| "epoch": 57.77777777777778, |
| "grad_norm": 0.0648542121052742, |
| "learning_rate": 4.22681108432867e-05, |
| "loss": 0.0069, |
| "step": 11440 |
| }, |
| { |
| "epoch": 57.82828282828283, |
| "grad_norm": 0.09745177626609802, |
| "learning_rate": 4.2186442419174984e-05, |
| "loss": 0.0054, |
| "step": 11450 |
| }, |
| { |
| "epoch": 57.878787878787875, |
| "grad_norm": 0.07642856985330582, |
| "learning_rate": 4.210479535703133e-05, |
| "loss": 0.0045, |
| "step": 11460 |
| }, |
| { |
| "epoch": 57.92929292929293, |
| "grad_norm": 0.06650850176811218, |
| "learning_rate": 4.202316988007567e-05, |
| "loss": 0.0056, |
| "step": 11470 |
| }, |
| { |
| "epoch": 57.97979797979798, |
| "grad_norm": 0.06585467606782913, |
| "learning_rate": 4.194156621146901e-05, |
| "loss": 0.0052, |
| "step": 11480 |
| }, |
| { |
| "epoch": 58.03030303030303, |
| "grad_norm": 0.07500506937503815, |
| "learning_rate": 4.1859984574312596e-05, |
| "loss": 0.0046, |
| "step": 11490 |
| }, |
| { |
| "epoch": 58.08080808080808, |
| "grad_norm": 0.060382239520549774, |
| "learning_rate": 4.177842519164752e-05, |
| "loss": 0.0038, |
| "step": 11500 |
| }, |
| { |
| "epoch": 58.13131313131313, |
| "grad_norm": 0.08187796920537949, |
| "learning_rate": 4.169688828645404e-05, |
| "loss": 0.005, |
| "step": 11510 |
| }, |
| { |
| "epoch": 58.18181818181818, |
| "grad_norm": 0.1101619228720665, |
| "learning_rate": 4.161537408165092e-05, |
| "loss": 0.0059, |
| "step": 11520 |
| }, |
| { |
| "epoch": 58.23232323232323, |
| "grad_norm": 0.08434976637363434, |
| "learning_rate": 4.1533882800094924e-05, |
| "loss": 0.0062, |
| "step": 11530 |
| }, |
| { |
| "epoch": 58.282828282828284, |
| "grad_norm": 0.05603298917412758, |
| "learning_rate": 4.145241466458005e-05, |
| "loss": 0.0052, |
| "step": 11540 |
| }, |
| { |
| "epoch": 58.333333333333336, |
| "grad_norm": 0.08299846202135086, |
| "learning_rate": 4.13709698978371e-05, |
| "loss": 0.0045, |
| "step": 11550 |
| }, |
| { |
| "epoch": 58.38383838383838, |
| "grad_norm": 0.06292783468961716, |
| "learning_rate": 4.1289548722532944e-05, |
| "loss": 0.0046, |
| "step": 11560 |
| }, |
| { |
| "epoch": 58.43434343434343, |
| "grad_norm": 0.06852615624666214, |
| "learning_rate": 4.120815136126999e-05, |
| "loss": 0.0055, |
| "step": 11570 |
| }, |
| { |
| "epoch": 58.484848484848484, |
| "grad_norm": 0.059573862701654434, |
| "learning_rate": 4.112677803658548e-05, |
| "loss": 0.0045, |
| "step": 11580 |
| }, |
| { |
| "epoch": 58.535353535353536, |
| "grad_norm": 0.06968608498573303, |
| "learning_rate": 4.1045428970951e-05, |
| "loss": 0.0051, |
| "step": 11590 |
| }, |
| { |
| "epoch": 58.58585858585859, |
| "grad_norm": 0.06369612365961075, |
| "learning_rate": 4.0964104386771785e-05, |
| "loss": 0.0046, |
| "step": 11600 |
| }, |
| { |
| "epoch": 58.63636363636363, |
| "grad_norm": 0.08433613926172256, |
| "learning_rate": 4.0882804506386144e-05, |
| "loss": 0.0051, |
| "step": 11610 |
| }, |
| { |
| "epoch": 58.686868686868685, |
| "grad_norm": 0.08206941187381744, |
| "learning_rate": 4.080152955206485e-05, |
| "loss": 0.0052, |
| "step": 11620 |
| }, |
| { |
| "epoch": 58.73737373737374, |
| "grad_norm": 0.09028971195220947, |
| "learning_rate": 4.0720279746010505e-05, |
| "loss": 0.0048, |
| "step": 11630 |
| }, |
| { |
| "epoch": 58.78787878787879, |
| "grad_norm": 0.08200250566005707, |
| "learning_rate": 4.063905531035699e-05, |
| "loss": 0.0061, |
| "step": 11640 |
| }, |
| { |
| "epoch": 58.83838383838384, |
| "grad_norm": 0.09245746582746506, |
| "learning_rate": 4.055785646716882e-05, |
| "loss": 0.0048, |
| "step": 11650 |
| }, |
| { |
| "epoch": 58.888888888888886, |
| "grad_norm": 0.08818136900663376, |
| "learning_rate": 4.047668343844051e-05, |
| "loss": 0.0053, |
| "step": 11660 |
| }, |
| { |
| "epoch": 58.93939393939394, |
| "grad_norm": 0.06342174112796783, |
| "learning_rate": 4.039553644609604e-05, |
| "loss": 0.0044, |
| "step": 11670 |
| }, |
| { |
| "epoch": 58.98989898989899, |
| "grad_norm": 0.06733866035938263, |
| "learning_rate": 4.0314415711988176e-05, |
| "loss": 0.005, |
| "step": 11680 |
| }, |
| { |
| "epoch": 59.04040404040404, |
| "grad_norm": 0.07066482305526733, |
| "learning_rate": 4.023332145789792e-05, |
| "loss": 0.0045, |
| "step": 11690 |
| }, |
| { |
| "epoch": 59.09090909090909, |
| "grad_norm": 0.09244602918624878, |
| "learning_rate": 4.015225390553385e-05, |
| "loss": 0.0065, |
| "step": 11700 |
| }, |
| { |
| "epoch": 59.14141414141414, |
| "grad_norm": 0.07884165644645691, |
| "learning_rate": 4.007121327653158e-05, |
| "loss": 0.0056, |
| "step": 11710 |
| }, |
| { |
| "epoch": 59.19191919191919, |
| "grad_norm": 0.08624677360057831, |
| "learning_rate": 3.9990199792453064e-05, |
| "loss": 0.0038, |
| "step": 11720 |
| }, |
| { |
| "epoch": 59.24242424242424, |
| "grad_norm": 0.06628291308879852, |
| "learning_rate": 3.9909213674786103e-05, |
| "loss": 0.0066, |
| "step": 11730 |
| }, |
| { |
| "epoch": 59.292929292929294, |
| "grad_norm": 0.078164242208004, |
| "learning_rate": 3.982825514494363e-05, |
| "loss": 0.0048, |
| "step": 11740 |
| }, |
| { |
| "epoch": 59.343434343434346, |
| "grad_norm": 0.061982400715351105, |
| "learning_rate": 3.974732442426319e-05, |
| "loss": 0.0055, |
| "step": 11750 |
| }, |
| { |
| "epoch": 59.39393939393939, |
| "grad_norm": 0.06395875662565231, |
| "learning_rate": 3.966642173400629e-05, |
| "loss": 0.0048, |
| "step": 11760 |
| }, |
| { |
| "epoch": 59.44444444444444, |
| "grad_norm": 0.04708195477724075, |
| "learning_rate": 3.9585547295357764e-05, |
| "loss": 0.0038, |
| "step": 11770 |
| }, |
| { |
| "epoch": 59.494949494949495, |
| "grad_norm": 0.07769767940044403, |
| "learning_rate": 3.950470132942526e-05, |
| "loss": 0.0049, |
| "step": 11780 |
| }, |
| { |
| "epoch": 59.54545454545455, |
| "grad_norm": 0.08551177382469177, |
| "learning_rate": 3.942388405723856e-05, |
| "loss": 0.0055, |
| "step": 11790 |
| }, |
| { |
| "epoch": 59.5959595959596, |
| "grad_norm": 0.10105745494365692, |
| "learning_rate": 3.9343095699749e-05, |
| "loss": 0.0061, |
| "step": 11800 |
| }, |
| { |
| "epoch": 59.64646464646464, |
| "grad_norm": 0.06033230572938919, |
| "learning_rate": 3.9262336477828874e-05, |
| "loss": 0.0041, |
| "step": 11810 |
| }, |
| { |
| "epoch": 59.696969696969695, |
| "grad_norm": 0.06136485934257507, |
| "learning_rate": 3.9181606612270794e-05, |
| "loss": 0.0051, |
| "step": 11820 |
| }, |
| { |
| "epoch": 59.74747474747475, |
| "grad_norm": 0.1000821590423584, |
| "learning_rate": 3.910090632378713e-05, |
| "loss": 0.0049, |
| "step": 11830 |
| }, |
| { |
| "epoch": 59.7979797979798, |
| "grad_norm": 0.07589410990476608, |
| "learning_rate": 3.90202358330094e-05, |
| "loss": 0.0048, |
| "step": 11840 |
| }, |
| { |
| "epoch": 59.84848484848485, |
| "grad_norm": 0.06528744846582413, |
| "learning_rate": 3.8939595360487656e-05, |
| "loss": 0.0047, |
| "step": 11850 |
| }, |
| { |
| "epoch": 59.898989898989896, |
| "grad_norm": 0.07403555512428284, |
| "learning_rate": 3.885898512668984e-05, |
| "loss": 0.0058, |
| "step": 11860 |
| }, |
| { |
| "epoch": 59.94949494949495, |
| "grad_norm": 0.07309906929731369, |
| "learning_rate": 3.877840535200127e-05, |
| "loss": 0.0061, |
| "step": 11870 |
| }, |
| { |
| "epoch": 60.0, |
| "grad_norm": 0.07583467662334442, |
| "learning_rate": 3.869785625672397e-05, |
| "loss": 0.0042, |
| "step": 11880 |
| }, |
| { |
| "epoch": 60.05050505050505, |
| "grad_norm": 0.08331216871738434, |
| "learning_rate": 3.8617338061076094e-05, |
| "loss": 0.0039, |
| "step": 11890 |
| }, |
| { |
| "epoch": 60.101010101010104, |
| "grad_norm": 0.060760460793972015, |
| "learning_rate": 3.853685098519132e-05, |
| "loss": 0.0037, |
| "step": 11900 |
| }, |
| { |
| "epoch": 60.15151515151515, |
| "grad_norm": 0.07851552963256836, |
| "learning_rate": 3.845639524911823e-05, |
| "loss": 0.006, |
| "step": 11910 |
| }, |
| { |
| "epoch": 60.2020202020202, |
| "grad_norm": 0.05853588879108429, |
| "learning_rate": 3.837597107281974e-05, |
| "loss": 0.005, |
| "step": 11920 |
| }, |
| { |
| "epoch": 60.25252525252525, |
| "grad_norm": 0.05888538807630539, |
| "learning_rate": 3.829557867617247e-05, |
| "loss": 0.0049, |
| "step": 11930 |
| }, |
| { |
| "epoch": 60.303030303030305, |
| "grad_norm": 0.0676129013299942, |
| "learning_rate": 3.821521827896618e-05, |
| "loss": 0.0057, |
| "step": 11940 |
| }, |
| { |
| "epoch": 60.35353535353536, |
| "grad_norm": 0.07474690675735474, |
| "learning_rate": 3.81348901009031e-05, |
| "loss": 0.007, |
| "step": 11950 |
| }, |
| { |
| "epoch": 60.4040404040404, |
| "grad_norm": 0.07955371588468552, |
| "learning_rate": 3.805459436159741e-05, |
| "loss": 0.005, |
| "step": 11960 |
| }, |
| { |
| "epoch": 60.45454545454545, |
| "grad_norm": 0.11533652991056442, |
| "learning_rate": 3.797433128057461e-05, |
| "loss": 0.0065, |
| "step": 11970 |
| }, |
| { |
| "epoch": 60.505050505050505, |
| "grad_norm": 0.09757378697395325, |
| "learning_rate": 3.789410107727089e-05, |
| "loss": 0.0066, |
| "step": 11980 |
| }, |
| { |
| "epoch": 60.55555555555556, |
| "grad_norm": 0.07602959126234055, |
| "learning_rate": 3.781390397103257e-05, |
| "loss": 0.0043, |
| "step": 11990 |
| }, |
| { |
| "epoch": 60.60606060606061, |
| "grad_norm": 0.07756085693836212, |
| "learning_rate": 3.7733740181115455e-05, |
| "loss": 0.0054, |
| "step": 12000 |
| }, |
| { |
| "epoch": 60.656565656565654, |
| "grad_norm": 0.07569324225187302, |
| "learning_rate": 3.7653609926684306e-05, |
| "loss": 0.0047, |
| "step": 12010 |
| }, |
| { |
| "epoch": 60.707070707070706, |
| "grad_norm": 0.09722679853439331, |
| "learning_rate": 3.757351342681217e-05, |
| "loss": 0.0049, |
| "step": 12020 |
| }, |
| { |
| "epoch": 60.75757575757576, |
| "grad_norm": 0.06494282186031342, |
| "learning_rate": 3.749345090047982e-05, |
| "loss": 0.0039, |
| "step": 12030 |
| }, |
| { |
| "epoch": 60.80808080808081, |
| "grad_norm": 0.06007422134280205, |
| "learning_rate": 3.741342256657515e-05, |
| "loss": 0.0043, |
| "step": 12040 |
| }, |
| { |
| "epoch": 60.85858585858586, |
| "grad_norm": 0.05537177249789238, |
| "learning_rate": 3.7333428643892567e-05, |
| "loss": 0.0035, |
| "step": 12050 |
| }, |
| { |
| "epoch": 60.90909090909091, |
| "grad_norm": 0.047110263258218765, |
| "learning_rate": 3.725346935113239e-05, |
| "loss": 0.0048, |
| "step": 12060 |
| }, |
| { |
| "epoch": 60.95959595959596, |
| "grad_norm": 0.07465965300798416, |
| "learning_rate": 3.717354490690029e-05, |
| "loss": 0.004, |
| "step": 12070 |
| }, |
| { |
| "epoch": 61.01010101010101, |
| "grad_norm": 0.07359350472688675, |
| "learning_rate": 3.709365552970664e-05, |
| "loss": 0.0049, |
| "step": 12080 |
| }, |
| { |
| "epoch": 61.06060606060606, |
| "grad_norm": 0.09681553393602371, |
| "learning_rate": 3.7013801437965945e-05, |
| "loss": 0.0051, |
| "step": 12090 |
| }, |
| { |
| "epoch": 61.111111111111114, |
| "grad_norm": 0.08243206143379211, |
| "learning_rate": 3.693398284999623e-05, |
| "loss": 0.0051, |
| "step": 12100 |
| }, |
| { |
| "epoch": 61.16161616161616, |
| "grad_norm": 0.08160921186208725, |
| "learning_rate": 3.6854199984018484e-05, |
| "loss": 0.0065, |
| "step": 12110 |
| }, |
| { |
| "epoch": 61.21212121212121, |
| "grad_norm": 0.08912672102451324, |
| "learning_rate": 3.677445305815601e-05, |
| "loss": 0.0058, |
| "step": 12120 |
| }, |
| { |
| "epoch": 61.26262626262626, |
| "grad_norm": 0.07024632394313812, |
| "learning_rate": 3.669474229043387e-05, |
| "loss": 0.0053, |
| "step": 12130 |
| }, |
| { |
| "epoch": 61.313131313131315, |
| "grad_norm": 0.06513278931379318, |
| "learning_rate": 3.6615067898778235e-05, |
| "loss": 0.0055, |
| "step": 12140 |
| }, |
| { |
| "epoch": 61.36363636363637, |
| "grad_norm": 0.06326031684875488, |
| "learning_rate": 3.6535430101015866e-05, |
| "loss": 0.0053, |
| "step": 12150 |
| }, |
| { |
| "epoch": 61.41414141414141, |
| "grad_norm": 0.09243161231279373, |
| "learning_rate": 3.645582911487345e-05, |
| "loss": 0.0056, |
| "step": 12160 |
| }, |
| { |
| "epoch": 61.464646464646464, |
| "grad_norm": 0.08320049941539764, |
| "learning_rate": 3.637626515797706e-05, |
| "loss": 0.0061, |
| "step": 12170 |
| }, |
| { |
| "epoch": 61.515151515151516, |
| "grad_norm": 0.0725349709391594, |
| "learning_rate": 3.629673844785152e-05, |
| "loss": 0.0044, |
| "step": 12180 |
| }, |
| { |
| "epoch": 61.56565656565657, |
| "grad_norm": 0.07812684029340744, |
| "learning_rate": 3.621724920191979e-05, |
| "loss": 0.0051, |
| "step": 12190 |
| }, |
| { |
| "epoch": 61.61616161616162, |
| "grad_norm": 0.08229352533817291, |
| "learning_rate": 3.6137797637502444e-05, |
| "loss": 0.0041, |
| "step": 12200 |
| }, |
| { |
| "epoch": 61.666666666666664, |
| "grad_norm": 0.09214409440755844, |
| "learning_rate": 3.6058383971817035e-05, |
| "loss": 0.0052, |
| "step": 12210 |
| }, |
| { |
| "epoch": 61.717171717171716, |
| "grad_norm": 0.07542754709720612, |
| "learning_rate": 3.59790084219775e-05, |
| "loss": 0.0059, |
| "step": 12220 |
| }, |
| { |
| "epoch": 61.76767676767677, |
| "grad_norm": 0.08901143819093704, |
| "learning_rate": 3.589967120499353e-05, |
| "loss": 0.0047, |
| "step": 12230 |
| }, |
| { |
| "epoch": 61.81818181818182, |
| "grad_norm": 0.045608628541231155, |
| "learning_rate": 3.5820372537770075e-05, |
| "loss": 0.0039, |
| "step": 12240 |
| }, |
| { |
| "epoch": 61.86868686868687, |
| "grad_norm": 0.06428248435258865, |
| "learning_rate": 3.5741112637106655e-05, |
| "loss": 0.0036, |
| "step": 12250 |
| }, |
| { |
| "epoch": 61.91919191919192, |
| "grad_norm": 0.05902957171201706, |
| "learning_rate": 3.5661891719696804e-05, |
| "loss": 0.0047, |
| "step": 12260 |
| }, |
| { |
| "epoch": 61.96969696969697, |
| "grad_norm": 0.06801366060972214, |
| "learning_rate": 3.5582710002127504e-05, |
| "loss": 0.0052, |
| "step": 12270 |
| }, |
| { |
| "epoch": 62.02020202020202, |
| "grad_norm": 0.07201153039932251, |
| "learning_rate": 3.550356770087853e-05, |
| "loss": 0.0048, |
| "step": 12280 |
| }, |
| { |
| "epoch": 62.07070707070707, |
| "grad_norm": 0.08575928211212158, |
| "learning_rate": 3.5424465032321914e-05, |
| "loss": 0.0053, |
| "step": 12290 |
| }, |
| { |
| "epoch": 62.121212121212125, |
| "grad_norm": 0.08873433619737625, |
| "learning_rate": 3.5345402212721335e-05, |
| "loss": 0.0065, |
| "step": 12300 |
| }, |
| { |
| "epoch": 62.17171717171717, |
| "grad_norm": 0.07674960047006607, |
| "learning_rate": 3.526637945823152e-05, |
| "loss": 0.0067, |
| "step": 12310 |
| }, |
| { |
| "epoch": 62.22222222222222, |
| "grad_norm": 0.08316290378570557, |
| "learning_rate": 3.518739698489767e-05, |
| "loss": 0.0058, |
| "step": 12320 |
| }, |
| { |
| "epoch": 62.27272727272727, |
| "grad_norm": 0.07834005355834961, |
| "learning_rate": 3.510845500865485e-05, |
| "loss": 0.005, |
| "step": 12330 |
| }, |
| { |
| "epoch": 62.323232323232325, |
| "grad_norm": 0.08361705392599106, |
| "learning_rate": 3.502955374532739e-05, |
| "loss": 0.0052, |
| "step": 12340 |
| }, |
| { |
| "epoch": 62.37373737373738, |
| "grad_norm": 0.05693356692790985, |
| "learning_rate": 3.495069341062836e-05, |
| "loss": 0.0063, |
| "step": 12350 |
| }, |
| { |
| "epoch": 62.42424242424242, |
| "grad_norm": 0.06085111200809479, |
| "learning_rate": 3.4871874220158896e-05, |
| "loss": 0.0044, |
| "step": 12360 |
| }, |
| { |
| "epoch": 62.474747474747474, |
| "grad_norm": 0.07266676425933838, |
| "learning_rate": 3.479309638940762e-05, |
| "loss": 0.0056, |
| "step": 12370 |
| }, |
| { |
| "epoch": 62.525252525252526, |
| "grad_norm": 0.06355738639831543, |
| "learning_rate": 3.4714360133750146e-05, |
| "loss": 0.0066, |
| "step": 12380 |
| }, |
| { |
| "epoch": 62.57575757575758, |
| "grad_norm": 0.0940740630030632, |
| "learning_rate": 3.463566566844839e-05, |
| "loss": 0.0089, |
| "step": 12390 |
| }, |
| { |
| "epoch": 62.62626262626263, |
| "grad_norm": 0.09107744693756104, |
| "learning_rate": 3.4557013208650016e-05, |
| "loss": 0.0061, |
| "step": 12400 |
| }, |
| { |
| "epoch": 62.676767676767675, |
| "grad_norm": 0.07417166978120804, |
| "learning_rate": 3.4478402969387857e-05, |
| "loss": 0.0056, |
| "step": 12410 |
| }, |
| { |
| "epoch": 62.72727272727273, |
| "grad_norm": 0.06793921440839767, |
| "learning_rate": 3.4399835165579266e-05, |
| "loss": 0.0063, |
| "step": 12420 |
| }, |
| { |
| "epoch": 62.77777777777778, |
| "grad_norm": 0.0719066932797432, |
| "learning_rate": 3.4321310012025645e-05, |
| "loss": 0.0045, |
| "step": 12430 |
| }, |
| { |
| "epoch": 62.82828282828283, |
| "grad_norm": 0.08217573910951614, |
| "learning_rate": 3.424282772341176e-05, |
| "loss": 0.005, |
| "step": 12440 |
| }, |
| { |
| "epoch": 62.878787878787875, |
| "grad_norm": 0.07394882291555405, |
| "learning_rate": 3.416438851430519e-05, |
| "loss": 0.0044, |
| "step": 12450 |
| }, |
| { |
| "epoch": 62.92929292929293, |
| "grad_norm": 0.0737714022397995, |
| "learning_rate": 3.408599259915577e-05, |
| "loss": 0.0042, |
| "step": 12460 |
| }, |
| { |
| "epoch": 62.97979797979798, |
| "grad_norm": 0.09192294627428055, |
| "learning_rate": 3.400764019229487e-05, |
| "loss": 0.0051, |
| "step": 12470 |
| }, |
| { |
| "epoch": 63.03030303030303, |
| "grad_norm": 0.06432031095027924, |
| "learning_rate": 3.3929331507935035e-05, |
| "loss": 0.0062, |
| "step": 12480 |
| }, |
| { |
| "epoch": 63.08080808080808, |
| "grad_norm": 0.07523802667856216, |
| "learning_rate": 3.3851066760169196e-05, |
| "loss": 0.0048, |
| "step": 12490 |
| }, |
| { |
| "epoch": 63.13131313131313, |
| "grad_norm": 0.05541188642382622, |
| "learning_rate": 3.377284616297021e-05, |
| "loss": 0.0046, |
| "step": 12500 |
| }, |
| { |
| "epoch": 63.18181818181818, |
| "grad_norm": 0.07300509512424469, |
| "learning_rate": 3.3694669930190166e-05, |
| "loss": 0.0052, |
| "step": 12510 |
| }, |
| { |
| "epoch": 63.23232323232323, |
| "grad_norm": 0.05590880289673805, |
| "learning_rate": 3.36165382755599e-05, |
| "loss": 0.0054, |
| "step": 12520 |
| }, |
| { |
| "epoch": 63.282828282828284, |
| "grad_norm": 0.05362105369567871, |
| "learning_rate": 3.35384514126884e-05, |
| "loss": 0.0053, |
| "step": 12530 |
| }, |
| { |
| "epoch": 63.333333333333336, |
| "grad_norm": 0.06589356809854507, |
| "learning_rate": 3.3460409555062154e-05, |
| "loss": 0.0045, |
| "step": 12540 |
| }, |
| { |
| "epoch": 63.38383838383838, |
| "grad_norm": 0.07191315293312073, |
| "learning_rate": 3.3382412916044645e-05, |
| "loss": 0.0048, |
| "step": 12550 |
| }, |
| { |
| "epoch": 63.43434343434343, |
| "grad_norm": 0.06216345354914665, |
| "learning_rate": 3.330446170887566e-05, |
| "loss": 0.0039, |
| "step": 12560 |
| }, |
| { |
| "epoch": 63.484848484848484, |
| "grad_norm": 0.05226152017712593, |
| "learning_rate": 3.3226556146670834e-05, |
| "loss": 0.0046, |
| "step": 12570 |
| }, |
| { |
| "epoch": 63.535353535353536, |
| "grad_norm": 0.0821138545870781, |
| "learning_rate": 3.314869644242102e-05, |
| "loss": 0.0055, |
| "step": 12580 |
| }, |
| { |
| "epoch": 63.58585858585859, |
| "grad_norm": 0.050606440752744675, |
| "learning_rate": 3.3070882808991674e-05, |
| "loss": 0.0048, |
| "step": 12590 |
| }, |
| { |
| "epoch": 63.63636363636363, |
| "grad_norm": 0.06547045707702637, |
| "learning_rate": 3.2993115459122305e-05, |
| "loss": 0.0057, |
| "step": 12600 |
| }, |
| { |
| "epoch": 63.686868686868685, |
| "grad_norm": 0.05647405609488487, |
| "learning_rate": 3.2915394605425835e-05, |
| "loss": 0.0057, |
| "step": 12610 |
| }, |
| { |
| "epoch": 63.73737373737374, |
| "grad_norm": 0.057373255491256714, |
| "learning_rate": 3.283772046038816e-05, |
| "loss": 0.0057, |
| "step": 12620 |
| }, |
| { |
| "epoch": 63.78787878787879, |
| "grad_norm": 0.052257802337408066, |
| "learning_rate": 3.276009323636739e-05, |
| "loss": 0.0049, |
| "step": 12630 |
| }, |
| { |
| "epoch": 63.83838383838384, |
| "grad_norm": 0.08003655821084976, |
| "learning_rate": 3.268251314559344e-05, |
| "loss": 0.0046, |
| "step": 12640 |
| }, |
| { |
| "epoch": 63.888888888888886, |
| "grad_norm": 0.05270685628056526, |
| "learning_rate": 3.2604980400167254e-05, |
| "loss": 0.0043, |
| "step": 12650 |
| }, |
| { |
| "epoch": 63.93939393939394, |
| "grad_norm": 0.05644606426358223, |
| "learning_rate": 3.252749521206042e-05, |
| "loss": 0.0066, |
| "step": 12660 |
| }, |
| { |
| "epoch": 63.98989898989899, |
| "grad_norm": 0.06533727049827576, |
| "learning_rate": 3.2450057793114494e-05, |
| "loss": 0.0054, |
| "step": 12670 |
| }, |
| { |
| "epoch": 64.04040404040404, |
| "grad_norm": 0.0494505949318409, |
| "learning_rate": 3.2372668355040435e-05, |
| "loss": 0.0046, |
| "step": 12680 |
| }, |
| { |
| "epoch": 64.0909090909091, |
| "grad_norm": 0.0732744112610817, |
| "learning_rate": 3.2295327109418005e-05, |
| "loss": 0.0052, |
| "step": 12690 |
| }, |
| { |
| "epoch": 64.14141414141415, |
| "grad_norm": 0.07944296300411224, |
| "learning_rate": 3.221803426769518e-05, |
| "loss": 0.0055, |
| "step": 12700 |
| }, |
| { |
| "epoch": 64.1919191919192, |
| "grad_norm": 0.0921500101685524, |
| "learning_rate": 3.214079004118768e-05, |
| "loss": 0.0054, |
| "step": 12710 |
| }, |
| { |
| "epoch": 64.24242424242425, |
| "grad_norm": 0.06708259135484695, |
| "learning_rate": 3.2063594641078234e-05, |
| "loss": 0.0066, |
| "step": 12720 |
| }, |
| { |
| "epoch": 64.29292929292929, |
| "grad_norm": 0.06441912800073624, |
| "learning_rate": 3.198644827841616e-05, |
| "loss": 0.0054, |
| "step": 12730 |
| }, |
| { |
| "epoch": 64.34343434343434, |
| "grad_norm": 0.06818021833896637, |
| "learning_rate": 3.1909351164116654e-05, |
| "loss": 0.0074, |
| "step": 12740 |
| }, |
| { |
| "epoch": 64.39393939393939, |
| "grad_norm": 0.07320462167263031, |
| "learning_rate": 3.183230350896026e-05, |
| "loss": 0.0042, |
| "step": 12750 |
| }, |
| { |
| "epoch": 64.44444444444444, |
| "grad_norm": 0.09995702654123306, |
| "learning_rate": 3.1755305523592337e-05, |
| "loss": 0.007, |
| "step": 12760 |
| }, |
| { |
| "epoch": 64.4949494949495, |
| "grad_norm": 0.0855710357427597, |
| "learning_rate": 3.167835741852245e-05, |
| "loss": 0.005, |
| "step": 12770 |
| }, |
| { |
| "epoch": 64.54545454545455, |
| "grad_norm": 0.05828683078289032, |
| "learning_rate": 3.160145940412378e-05, |
| "loss": 0.0051, |
| "step": 12780 |
| }, |
| { |
| "epoch": 64.5959595959596, |
| "grad_norm": 0.07758744806051254, |
| "learning_rate": 3.1524611690632545e-05, |
| "loss": 0.005, |
| "step": 12790 |
| }, |
| { |
| "epoch": 64.64646464646465, |
| "grad_norm": 0.0555390901863575, |
| "learning_rate": 3.144781448814746e-05, |
| "loss": 0.006, |
| "step": 12800 |
| }, |
| { |
| "epoch": 64.6969696969697, |
| "grad_norm": 0.08839180320501328, |
| "learning_rate": 3.1371068006629145e-05, |
| "loss": 0.006, |
| "step": 12810 |
| }, |
| { |
| "epoch": 64.74747474747475, |
| "grad_norm": 0.07359647750854492, |
| "learning_rate": 3.129437245589956e-05, |
| "loss": 0.0058, |
| "step": 12820 |
| }, |
| { |
| "epoch": 64.79797979797979, |
| "grad_norm": 0.08297309279441833, |
| "learning_rate": 3.121772804564143e-05, |
| "loss": 0.0045, |
| "step": 12830 |
| }, |
| { |
| "epoch": 64.84848484848484, |
| "grad_norm": 0.06690573692321777, |
| "learning_rate": 3.11411349853976e-05, |
| "loss": 0.0047, |
| "step": 12840 |
| }, |
| { |
| "epoch": 64.8989898989899, |
| "grad_norm": 0.06899934262037277, |
| "learning_rate": 3.10645934845706e-05, |
| "loss": 0.0051, |
| "step": 12850 |
| }, |
| { |
| "epoch": 64.94949494949495, |
| "grad_norm": 0.055221546441316605, |
| "learning_rate": 3.098810375242196e-05, |
| "loss": 0.0038, |
| "step": 12860 |
| }, |
| { |
| "epoch": 65.0, |
| "grad_norm": 0.08049985021352768, |
| "learning_rate": 3.0911665998071704e-05, |
| "loss": 0.0049, |
| "step": 12870 |
| }, |
| { |
| "epoch": 65.05050505050505, |
| "grad_norm": 0.0675831213593483, |
| "learning_rate": 3.083528043049774e-05, |
| "loss": 0.0042, |
| "step": 12880 |
| }, |
| { |
| "epoch": 65.1010101010101, |
| "grad_norm": 0.060909245163202286, |
| "learning_rate": 3.0758947258535255e-05, |
| "loss": 0.004, |
| "step": 12890 |
| }, |
| { |
| "epoch": 65.15151515151516, |
| "grad_norm": 0.0815868005156517, |
| "learning_rate": 3.068266669087625e-05, |
| "loss": 0.0046, |
| "step": 12900 |
| }, |
| { |
| "epoch": 65.20202020202021, |
| "grad_norm": 0.062239207327365875, |
| "learning_rate": 3.060643893606887e-05, |
| "loss": 0.0041, |
| "step": 12910 |
| }, |
| { |
| "epoch": 65.25252525252525, |
| "grad_norm": 0.06763526052236557, |
| "learning_rate": 3.053026420251693e-05, |
| "loss": 0.0047, |
| "step": 12920 |
| }, |
| { |
| "epoch": 65.3030303030303, |
| "grad_norm": 0.07184776663780212, |
| "learning_rate": 3.0454142698479183e-05, |
| "loss": 0.005, |
| "step": 12930 |
| }, |
| { |
| "epoch": 65.35353535353535, |
| "grad_norm": 0.05789589509367943, |
| "learning_rate": 3.0378074632068954e-05, |
| "loss": 0.0043, |
| "step": 12940 |
| }, |
| { |
| "epoch": 65.4040404040404, |
| "grad_norm": 0.04791947454214096, |
| "learning_rate": 3.0302060211253408e-05, |
| "loss": 0.0051, |
| "step": 12950 |
| }, |
| { |
| "epoch": 65.45454545454545, |
| "grad_norm": 0.07119539380073547, |
| "learning_rate": 3.0226099643853073e-05, |
| "loss": 0.0043, |
| "step": 12960 |
| }, |
| { |
| "epoch": 65.5050505050505, |
| "grad_norm": 0.09146495163440704, |
| "learning_rate": 3.0150193137541283e-05, |
| "loss": 0.0055, |
| "step": 12970 |
| }, |
| { |
| "epoch": 65.55555555555556, |
| "grad_norm": 0.05645667016506195, |
| "learning_rate": 3.0074340899843467e-05, |
| "loss": 0.004, |
| "step": 12980 |
| }, |
| { |
| "epoch": 65.60606060606061, |
| "grad_norm": 0.04769594222307205, |
| "learning_rate": 2.999854313813677e-05, |
| "loss": 0.0047, |
| "step": 12990 |
| }, |
| { |
| "epoch": 65.65656565656566, |
| "grad_norm": 0.049779243767261505, |
| "learning_rate": 2.9922800059649382e-05, |
| "loss": 0.0039, |
| "step": 13000 |
| }, |
| { |
| "epoch": 65.70707070707071, |
| "grad_norm": 0.06682920455932617, |
| "learning_rate": 2.9847111871459976e-05, |
| "loss": 0.0064, |
| "step": 13010 |
| }, |
| { |
| "epoch": 65.75757575757575, |
| "grad_norm": 0.07163585722446442, |
| "learning_rate": 2.977147878049721e-05, |
| "loss": 0.0041, |
| "step": 13020 |
| }, |
| { |
| "epoch": 65.8080808080808, |
| "grad_norm": 0.07817397266626358, |
| "learning_rate": 2.9695900993539006e-05, |
| "loss": 0.0062, |
| "step": 13030 |
| }, |
| { |
| "epoch": 65.85858585858585, |
| "grad_norm": 0.09959842264652252, |
| "learning_rate": 2.9620378717212183e-05, |
| "loss": 0.0045, |
| "step": 13040 |
| }, |
| { |
| "epoch": 65.9090909090909, |
| "grad_norm": 0.07189232110977173, |
| "learning_rate": 2.9544912157991745e-05, |
| "loss": 0.006, |
| "step": 13050 |
| }, |
| { |
| "epoch": 65.95959595959596, |
| "grad_norm": 0.07503862679004669, |
| "learning_rate": 2.9469501522200405e-05, |
| "loss": 0.0045, |
| "step": 13060 |
| }, |
| { |
| "epoch": 66.01010101010101, |
| "grad_norm": 0.07388830184936523, |
| "learning_rate": 2.9394147016007946e-05, |
| "loss": 0.0037, |
| "step": 13070 |
| }, |
| { |
| "epoch": 66.06060606060606, |
| "grad_norm": 0.07522162795066833, |
| "learning_rate": 2.9318848845430702e-05, |
| "loss": 0.0051, |
| "step": 13080 |
| }, |
| { |
| "epoch": 66.11111111111111, |
| "grad_norm": 0.08214754611253738, |
| "learning_rate": 2.9243607216331013e-05, |
| "loss": 0.0064, |
| "step": 13090 |
| }, |
| { |
| "epoch": 66.16161616161617, |
| "grad_norm": 0.05355595797300339, |
| "learning_rate": 2.916842233441661e-05, |
| "loss": 0.0034, |
| "step": 13100 |
| }, |
| { |
| "epoch": 66.21212121212122, |
| "grad_norm": 0.0729057639837265, |
| "learning_rate": 2.90932944052401e-05, |
| "loss": 0.0033, |
| "step": 13110 |
| }, |
| { |
| "epoch": 66.26262626262626, |
| "grad_norm": 0.0553322397172451, |
| "learning_rate": 2.9018223634198354e-05, |
| "loss": 0.0064, |
| "step": 13120 |
| }, |
| { |
| "epoch": 66.31313131313131, |
| "grad_norm": 0.06777006387710571, |
| "learning_rate": 2.8943210226532025e-05, |
| "loss": 0.0054, |
| "step": 13130 |
| }, |
| { |
| "epoch": 66.36363636363636, |
| "grad_norm": 0.09812850505113602, |
| "learning_rate": 2.8868254387324857e-05, |
| "loss": 0.0045, |
| "step": 13140 |
| }, |
| { |
| "epoch": 66.41414141414141, |
| "grad_norm": 0.05137278139591217, |
| "learning_rate": 2.8793356321503306e-05, |
| "loss": 0.0036, |
| "step": 13150 |
| }, |
| { |
| "epoch": 66.46464646464646, |
| "grad_norm": 0.05424187332391739, |
| "learning_rate": 2.87185162338358e-05, |
| "loss": 0.0051, |
| "step": 13160 |
| }, |
| { |
| "epoch": 66.51515151515152, |
| "grad_norm": 0.07314357161521912, |
| "learning_rate": 2.8643734328932253e-05, |
| "loss": 0.0064, |
| "step": 13170 |
| }, |
| { |
| "epoch": 66.56565656565657, |
| "grad_norm": 0.07719216495752335, |
| "learning_rate": 2.856901081124359e-05, |
| "loss": 0.0045, |
| "step": 13180 |
| }, |
| { |
| "epoch": 66.61616161616162, |
| "grad_norm": 0.05442417040467262, |
| "learning_rate": 2.8494345885061002e-05, |
| "loss": 0.0038, |
| "step": 13190 |
| }, |
| { |
| "epoch": 66.66666666666667, |
| "grad_norm": 0.057431772351264954, |
| "learning_rate": 2.8419739754515616e-05, |
| "loss": 0.0044, |
| "step": 13200 |
| }, |
| { |
| "epoch": 66.71717171717172, |
| "grad_norm": 0.06787672638893127, |
| "learning_rate": 2.8345192623577666e-05, |
| "loss": 0.0028, |
| "step": 13210 |
| }, |
| { |
| "epoch": 66.76767676767676, |
| "grad_norm": 0.07410194724798203, |
| "learning_rate": 2.8270704696056193e-05, |
| "loss": 0.0057, |
| "step": 13220 |
| }, |
| { |
| "epoch": 66.81818181818181, |
| "grad_norm": 0.05657150223851204, |
| "learning_rate": 2.8196276175598367e-05, |
| "loss": 0.0041, |
| "step": 13230 |
| }, |
| { |
| "epoch": 66.86868686868686, |
| "grad_norm": 0.04950426146388054, |
| "learning_rate": 2.8121907265688884e-05, |
| "loss": 0.0033, |
| "step": 13240 |
| }, |
| { |
| "epoch": 66.91919191919192, |
| "grad_norm": 0.07835181802511215, |
| "learning_rate": 2.804759816964957e-05, |
| "loss": 0.006, |
| "step": 13250 |
| }, |
| { |
| "epoch": 66.96969696969697, |
| "grad_norm": 0.05732427537441254, |
| "learning_rate": 2.797334909063857e-05, |
| "loss": 0.0047, |
| "step": 13260 |
| }, |
| { |
| "epoch": 67.02020202020202, |
| "grad_norm": 0.06540197134017944, |
| "learning_rate": 2.7899160231650056e-05, |
| "loss": 0.0042, |
| "step": 13270 |
| }, |
| { |
| "epoch": 67.07070707070707, |
| "grad_norm": 0.06971903890371323, |
| "learning_rate": 2.7825031795513585e-05, |
| "loss": 0.0038, |
| "step": 13280 |
| }, |
| { |
| "epoch": 67.12121212121212, |
| "grad_norm": 0.04491217061877251, |
| "learning_rate": 2.775096398489341e-05, |
| "loss": 0.0036, |
| "step": 13290 |
| }, |
| { |
| "epoch": 67.17171717171718, |
| "grad_norm": 0.06614677608013153, |
| "learning_rate": 2.7676957002288163e-05, |
| "loss": 0.0054, |
| "step": 13300 |
| }, |
| { |
| "epoch": 67.22222222222223, |
| "grad_norm": 0.05024334043264389, |
| "learning_rate": 2.760301105003003e-05, |
| "loss": 0.0044, |
| "step": 13310 |
| }, |
| { |
| "epoch": 67.27272727272727, |
| "grad_norm": 0.07406572252511978, |
| "learning_rate": 2.752912633028446e-05, |
| "loss": 0.0066, |
| "step": 13320 |
| }, |
| { |
| "epoch": 67.32323232323232, |
| "grad_norm": 0.06403311342000961, |
| "learning_rate": 2.7455303045049474e-05, |
| "loss": 0.0039, |
| "step": 13330 |
| }, |
| { |
| "epoch": 67.37373737373737, |
| "grad_norm": 0.06765282154083252, |
| "learning_rate": 2.7381541396155098e-05, |
| "loss": 0.0058, |
| "step": 13340 |
| }, |
| { |
| "epoch": 67.42424242424242, |
| "grad_norm": 0.06906427443027496, |
| "learning_rate": 2.730784158526286e-05, |
| "loss": 0.0039, |
| "step": 13350 |
| }, |
| { |
| "epoch": 67.47474747474747, |
| "grad_norm": 0.049665167927742004, |
| "learning_rate": 2.723420381386521e-05, |
| "loss": 0.0042, |
| "step": 13360 |
| }, |
| { |
| "epoch": 67.52525252525253, |
| "grad_norm": 0.061579711735248566, |
| "learning_rate": 2.7160628283285018e-05, |
| "loss": 0.0048, |
| "step": 13370 |
| }, |
| { |
| "epoch": 67.57575757575758, |
| "grad_norm": 0.058590102940797806, |
| "learning_rate": 2.7087115194675007e-05, |
| "loss": 0.0039, |
| "step": 13380 |
| }, |
| { |
| "epoch": 67.62626262626263, |
| "grad_norm": 0.060228172689676285, |
| "learning_rate": 2.701366474901712e-05, |
| "loss": 0.0044, |
| "step": 13390 |
| }, |
| { |
| "epoch": 67.67676767676768, |
| "grad_norm": 0.062431450933218, |
| "learning_rate": 2.6940277147122085e-05, |
| "loss": 0.006, |
| "step": 13400 |
| }, |
| { |
| "epoch": 67.72727272727273, |
| "grad_norm": 0.05195033550262451, |
| "learning_rate": 2.686695258962878e-05, |
| "loss": 0.0056, |
| "step": 13410 |
| }, |
| { |
| "epoch": 67.77777777777777, |
| "grad_norm": 0.06195671483874321, |
| "learning_rate": 2.679369127700375e-05, |
| "loss": 0.0057, |
| "step": 13420 |
| }, |
| { |
| "epoch": 67.82828282828282, |
| "grad_norm": 0.07876665890216827, |
| "learning_rate": 2.672049340954067e-05, |
| "loss": 0.0036, |
| "step": 13430 |
| }, |
| { |
| "epoch": 67.87878787878788, |
| "grad_norm": 0.09201064705848694, |
| "learning_rate": 2.6647359187359676e-05, |
| "loss": 0.0046, |
| "step": 13440 |
| }, |
| { |
| "epoch": 67.92929292929293, |
| "grad_norm": 0.04689439386129379, |
| "learning_rate": 2.6574288810406946e-05, |
| "loss": 0.003, |
| "step": 13450 |
| }, |
| { |
| "epoch": 67.97979797979798, |
| "grad_norm": 0.04318065568804741, |
| "learning_rate": 2.6501282478454083e-05, |
| "loss": 0.0037, |
| "step": 13460 |
| }, |
| { |
| "epoch": 68.03030303030303, |
| "grad_norm": 0.07252330332994461, |
| "learning_rate": 2.6428340391097618e-05, |
| "loss": 0.0052, |
| "step": 13470 |
| }, |
| { |
| "epoch": 68.08080808080808, |
| "grad_norm": 0.07007390260696411, |
| "learning_rate": 2.6355462747758485e-05, |
| "loss": 0.0053, |
| "step": 13480 |
| }, |
| { |
| "epoch": 68.13131313131314, |
| "grad_norm": 0.05501498654484749, |
| "learning_rate": 2.6282649747681304e-05, |
| "loss": 0.0039, |
| "step": 13490 |
| }, |
| { |
| "epoch": 68.18181818181819, |
| "grad_norm": 0.07237014919519424, |
| "learning_rate": 2.620990158993406e-05, |
| "loss": 0.0044, |
| "step": 13500 |
| }, |
| { |
| "epoch": 68.23232323232324, |
| "grad_norm": 0.08312316983938217, |
| "learning_rate": 2.6137218473407477e-05, |
| "loss": 0.0053, |
| "step": 13510 |
| }, |
| { |
| "epoch": 68.28282828282828, |
| "grad_norm": 0.05203476920723915, |
| "learning_rate": 2.606460059681436e-05, |
| "loss": 0.0045, |
| "step": 13520 |
| }, |
| { |
| "epoch": 68.33333333333333, |
| "grad_norm": 0.085822194814682, |
| "learning_rate": 2.599204815868928e-05, |
| "loss": 0.0056, |
| "step": 13530 |
| }, |
| { |
| "epoch": 68.38383838383838, |
| "grad_norm": 0.06915038824081421, |
| "learning_rate": 2.5919561357387756e-05, |
| "loss": 0.0047, |
| "step": 13540 |
| }, |
| { |
| "epoch": 68.43434343434343, |
| "grad_norm": 0.07934459298849106, |
| "learning_rate": 2.5847140391085972e-05, |
| "loss": 0.0032, |
| "step": 13550 |
| }, |
| { |
| "epoch": 68.48484848484848, |
| "grad_norm": 0.07973135262727737, |
| "learning_rate": 2.5774785457780103e-05, |
| "loss": 0.0058, |
| "step": 13560 |
| }, |
| { |
| "epoch": 68.53535353535354, |
| "grad_norm": 0.07660122215747833, |
| "learning_rate": 2.5702496755285753e-05, |
| "loss": 0.0063, |
| "step": 13570 |
| }, |
| { |
| "epoch": 68.58585858585859, |
| "grad_norm": 0.06190371513366699, |
| "learning_rate": 2.5630274481237483e-05, |
| "loss": 0.0057, |
| "step": 13580 |
| }, |
| { |
| "epoch": 68.63636363636364, |
| "grad_norm": 0.05545649304986, |
| "learning_rate": 2.5558118833088197e-05, |
| "loss": 0.0035, |
| "step": 13590 |
| }, |
| { |
| "epoch": 68.68686868686869, |
| "grad_norm": 0.07043877243995667, |
| "learning_rate": 2.548603000810872e-05, |
| "loss": 0.0049, |
| "step": 13600 |
| }, |
| { |
| "epoch": 68.73737373737374, |
| "grad_norm": 0.06498979032039642, |
| "learning_rate": 2.5414008203387152e-05, |
| "loss": 0.0067, |
| "step": 13610 |
| }, |
| { |
| "epoch": 68.78787878787878, |
| "grad_norm": 0.05473397672176361, |
| "learning_rate": 2.534205361582834e-05, |
| "loss": 0.0053, |
| "step": 13620 |
| }, |
| { |
| "epoch": 68.83838383838383, |
| "grad_norm": 0.08444904536008835, |
| "learning_rate": 2.527016644215338e-05, |
| "loss": 0.0045, |
| "step": 13630 |
| }, |
| { |
| "epoch": 68.88888888888889, |
| "grad_norm": 0.08686760812997818, |
| "learning_rate": 2.519834687889905e-05, |
| "loss": 0.0063, |
| "step": 13640 |
| }, |
| { |
| "epoch": 68.93939393939394, |
| "grad_norm": 0.05677926167845726, |
| "learning_rate": 2.5126595122417295e-05, |
| "loss": 0.0051, |
| "step": 13650 |
| }, |
| { |
| "epoch": 68.98989898989899, |
| "grad_norm": 0.05708545818924904, |
| "learning_rate": 2.5054911368874713e-05, |
| "loss": 0.0039, |
| "step": 13660 |
| }, |
| { |
| "epoch": 69.04040404040404, |
| "grad_norm": 0.08997657895088196, |
| "learning_rate": 2.4983295814251916e-05, |
| "loss": 0.0055, |
| "step": 13670 |
| }, |
| { |
| "epoch": 69.0909090909091, |
| "grad_norm": 0.04982747882604599, |
| "learning_rate": 2.4911748654343105e-05, |
| "loss": 0.0045, |
| "step": 13680 |
| }, |
| { |
| "epoch": 69.14141414141415, |
| "grad_norm": 0.05778666213154793, |
| "learning_rate": 2.4840270084755463e-05, |
| "loss": 0.0041, |
| "step": 13690 |
| }, |
| { |
| "epoch": 69.1919191919192, |
| "grad_norm": 0.06701663881540298, |
| "learning_rate": 2.4768860300908685e-05, |
| "loss": 0.0048, |
| "step": 13700 |
| }, |
| { |
| "epoch": 69.24242424242425, |
| "grad_norm": 0.06584847718477249, |
| "learning_rate": 2.469751949803443e-05, |
| "loss": 0.0054, |
| "step": 13710 |
| }, |
| { |
| "epoch": 69.29292929292929, |
| "grad_norm": 0.07367159426212311, |
| "learning_rate": 2.4626247871175666e-05, |
| "loss": 0.0048, |
| "step": 13720 |
| }, |
| { |
| "epoch": 69.34343434343434, |
| "grad_norm": 0.059459950774908066, |
| "learning_rate": 2.4555045615186346e-05, |
| "loss": 0.0048, |
| "step": 13730 |
| }, |
| { |
| "epoch": 69.39393939393939, |
| "grad_norm": 0.059161387383937836, |
| "learning_rate": 2.4483912924730677e-05, |
| "loss": 0.0045, |
| "step": 13740 |
| }, |
| { |
| "epoch": 69.44444444444444, |
| "grad_norm": 0.06189454346895218, |
| "learning_rate": 2.4412849994282742e-05, |
| "loss": 0.0053, |
| "step": 13750 |
| }, |
| { |
| "epoch": 69.4949494949495, |
| "grad_norm": 0.04815782234072685, |
| "learning_rate": 2.434185701812592e-05, |
| "loss": 0.0035, |
| "step": 13760 |
| }, |
| { |
| "epoch": 69.54545454545455, |
| "grad_norm": 0.06368819624185562, |
| "learning_rate": 2.4270934190352218e-05, |
| "loss": 0.0032, |
| "step": 13770 |
| }, |
| { |
| "epoch": 69.5959595959596, |
| "grad_norm": 0.05354767665266991, |
| "learning_rate": 2.4200081704861998e-05, |
| "loss": 0.0049, |
| "step": 13780 |
| }, |
| { |
| "epoch": 69.64646464646465, |
| "grad_norm": 0.054494358599185944, |
| "learning_rate": 2.412929975536321e-05, |
| "loss": 0.0059, |
| "step": 13790 |
| }, |
| { |
| "epoch": 69.6969696969697, |
| "grad_norm": 0.061242010444402695, |
| "learning_rate": 2.4058588535371017e-05, |
| "loss": 0.0039, |
| "step": 13800 |
| }, |
| { |
| "epoch": 69.74747474747475, |
| "grad_norm": 0.05417219176888466, |
| "learning_rate": 2.3987948238207243e-05, |
| "loss": 0.0052, |
| "step": 13810 |
| }, |
| { |
| "epoch": 69.79797979797979, |
| "grad_norm": 0.07070023566484451, |
| "learning_rate": 2.3917379056999678e-05, |
| "loss": 0.0046, |
| "step": 13820 |
| }, |
| { |
| "epoch": 69.84848484848484, |
| "grad_norm": 0.06046900898218155, |
| "learning_rate": 2.3846881184681824e-05, |
| "loss": 0.0031, |
| "step": 13830 |
| }, |
| { |
| "epoch": 69.8989898989899, |
| "grad_norm": 0.048801153898239136, |
| "learning_rate": 2.377645481399214e-05, |
| "loss": 0.0045, |
| "step": 13840 |
| }, |
| { |
| "epoch": 69.94949494949495, |
| "grad_norm": 0.08491257578134537, |
| "learning_rate": 2.3706100137473667e-05, |
| "loss": 0.0047, |
| "step": 13850 |
| }, |
| { |
| "epoch": 70.0, |
| "grad_norm": 0.0640738233923912, |
| "learning_rate": 2.3635817347473394e-05, |
| "loss": 0.0051, |
| "step": 13860 |
| }, |
| { |
| "epoch": 70.05050505050505, |
| "grad_norm": 0.07088373601436615, |
| "learning_rate": 2.3565606636141757e-05, |
| "loss": 0.0049, |
| "step": 13870 |
| }, |
| { |
| "epoch": 70.1010101010101, |
| "grad_norm": 0.07026105374097824, |
| "learning_rate": 2.3495468195432203e-05, |
| "loss": 0.0051, |
| "step": 13880 |
| }, |
| { |
| "epoch": 70.15151515151516, |
| "grad_norm": 0.05001705512404442, |
| "learning_rate": 2.3425402217100507e-05, |
| "loss": 0.0039, |
| "step": 13890 |
| }, |
| { |
| "epoch": 70.20202020202021, |
| "grad_norm": 0.06308233737945557, |
| "learning_rate": 2.3355408892704424e-05, |
| "loss": 0.005, |
| "step": 13900 |
| }, |
| { |
| "epoch": 70.25252525252525, |
| "grad_norm": 0.0497092679142952, |
| "learning_rate": 2.3285488413603003e-05, |
| "loss": 0.0045, |
| "step": 13910 |
| }, |
| { |
| "epoch": 70.3030303030303, |
| "grad_norm": 0.060735829174518585, |
| "learning_rate": 2.321564097095615e-05, |
| "loss": 0.0058, |
| "step": 13920 |
| }, |
| { |
| "epoch": 70.35353535353535, |
| "grad_norm": 0.05949006974697113, |
| "learning_rate": 2.3145866755724142e-05, |
| "loss": 0.0046, |
| "step": 13930 |
| }, |
| { |
| "epoch": 70.4040404040404, |
| "grad_norm": 0.05738426744937897, |
| "learning_rate": 2.307616595866699e-05, |
| "loss": 0.0036, |
| "step": 13940 |
| }, |
| { |
| "epoch": 70.45454545454545, |
| "grad_norm": 0.05803406983613968, |
| "learning_rate": 2.3006538770344032e-05, |
| "loss": 0.0053, |
| "step": 13950 |
| }, |
| { |
| "epoch": 70.5050505050505, |
| "grad_norm": 0.06555799394845963, |
| "learning_rate": 2.293698538111334e-05, |
| "loss": 0.0054, |
| "step": 13960 |
| }, |
| { |
| "epoch": 70.55555555555556, |
| "grad_norm": 0.062229253351688385, |
| "learning_rate": 2.28675059811312e-05, |
| "loss": 0.004, |
| "step": 13970 |
| }, |
| { |
| "epoch": 70.60606060606061, |
| "grad_norm": 0.04653414711356163, |
| "learning_rate": 2.279810076035167e-05, |
| "loss": 0.0052, |
| "step": 13980 |
| }, |
| { |
| "epoch": 70.65656565656566, |
| "grad_norm": 0.051848623901605606, |
| "learning_rate": 2.272876990852596e-05, |
| "loss": 0.005, |
| "step": 13990 |
| }, |
| { |
| "epoch": 70.70707070707071, |
| "grad_norm": 0.05748780444264412, |
| "learning_rate": 2.265951361520195e-05, |
| "loss": 0.0042, |
| "step": 14000 |
| }, |
| { |
| "epoch": 70.75757575757575, |
| "grad_norm": 0.04157336428761482, |
| "learning_rate": 2.2590332069723748e-05, |
| "loss": 0.0046, |
| "step": 14010 |
| }, |
| { |
| "epoch": 70.8080808080808, |
| "grad_norm": 0.04744965210556984, |
| "learning_rate": 2.2521225461231004e-05, |
| "loss": 0.0037, |
| "step": 14020 |
| }, |
| { |
| "epoch": 70.85858585858585, |
| "grad_norm": 0.05207390710711479, |
| "learning_rate": 2.2452193978658597e-05, |
| "loss": 0.0041, |
| "step": 14030 |
| }, |
| { |
| "epoch": 70.9090909090909, |
| "grad_norm": 0.05188289284706116, |
| "learning_rate": 2.238323781073594e-05, |
| "loss": 0.0038, |
| "step": 14040 |
| }, |
| { |
| "epoch": 70.95959595959596, |
| "grad_norm": 0.08076687157154083, |
| "learning_rate": 2.2314357145986552e-05, |
| "loss": 0.0046, |
| "step": 14050 |
| }, |
| { |
| "epoch": 71.01010101010101, |
| "grad_norm": 0.06967968493700027, |
| "learning_rate": 2.224555217272757e-05, |
| "loss": 0.0043, |
| "step": 14060 |
| }, |
| { |
| "epoch": 71.06060606060606, |
| "grad_norm": 0.07322169840335846, |
| "learning_rate": 2.2176823079069127e-05, |
| "loss": 0.0055, |
| "step": 14070 |
| }, |
| { |
| "epoch": 71.11111111111111, |
| "grad_norm": 0.052052583545446396, |
| "learning_rate": 2.210817005291398e-05, |
| "loss": 0.0031, |
| "step": 14080 |
| }, |
| { |
| "epoch": 71.16161616161617, |
| "grad_norm": 0.0381552092730999, |
| "learning_rate": 2.203959328195686e-05, |
| "loss": 0.0043, |
| "step": 14090 |
| }, |
| { |
| "epoch": 71.21212121212122, |
| "grad_norm": 0.06513004004955292, |
| "learning_rate": 2.1971092953684026e-05, |
| "loss": 0.0041, |
| "step": 14100 |
| }, |
| { |
| "epoch": 71.26262626262626, |
| "grad_norm": 0.05560123175382614, |
| "learning_rate": 2.1902669255372788e-05, |
| "loss": 0.0056, |
| "step": 14110 |
| }, |
| { |
| "epoch": 71.31313131313131, |
| "grad_norm": 0.05521290376782417, |
| "learning_rate": 2.1834322374090897e-05, |
| "loss": 0.0032, |
| "step": 14120 |
| }, |
| { |
| "epoch": 71.36363636363636, |
| "grad_norm": 0.052491992712020874, |
| "learning_rate": 2.1766052496696153e-05, |
| "loss": 0.004, |
| "step": 14130 |
| }, |
| { |
| "epoch": 71.41414141414141, |
| "grad_norm": 0.04862045869231224, |
| "learning_rate": 2.169785980983577e-05, |
| "loss": 0.004, |
| "step": 14140 |
| }, |
| { |
| "epoch": 71.46464646464646, |
| "grad_norm": 0.08472513407468796, |
| "learning_rate": 2.162974449994593e-05, |
| "loss": 0.0052, |
| "step": 14150 |
| }, |
| { |
| "epoch": 71.51515151515152, |
| "grad_norm": 0.061069928109645844, |
| "learning_rate": 2.1561706753251337e-05, |
| "loss": 0.0054, |
| "step": 14160 |
| }, |
| { |
| "epoch": 71.56565656565657, |
| "grad_norm": 0.06548821181058884, |
| "learning_rate": 2.1493746755764544e-05, |
| "loss": 0.0041, |
| "step": 14170 |
| }, |
| { |
| "epoch": 71.61616161616162, |
| "grad_norm": 0.0786743015050888, |
| "learning_rate": 2.1425864693285635e-05, |
| "loss": 0.0056, |
| "step": 14180 |
| }, |
| { |
| "epoch": 71.66666666666667, |
| "grad_norm": 0.0525517575442791, |
| "learning_rate": 2.1358060751401547e-05, |
| "loss": 0.0051, |
| "step": 14190 |
| }, |
| { |
| "epoch": 71.71717171717172, |
| "grad_norm": 0.058683931827545166, |
| "learning_rate": 2.129033511548566e-05, |
| "loss": 0.0039, |
| "step": 14200 |
| }, |
| { |
| "epoch": 71.76767676767676, |
| "grad_norm": 0.04611467197537422, |
| "learning_rate": 2.1222687970697315e-05, |
| "loss": 0.0071, |
| "step": 14210 |
| }, |
| { |
| "epoch": 71.81818181818181, |
| "grad_norm": 0.08835970610380173, |
| "learning_rate": 2.1155119501981173e-05, |
| "loss": 0.0049, |
| "step": 14220 |
| }, |
| { |
| "epoch": 71.86868686868686, |
| "grad_norm": 0.09078796952962875, |
| "learning_rate": 2.1087629894066895e-05, |
| "loss": 0.0054, |
| "step": 14230 |
| }, |
| { |
| "epoch": 71.91919191919192, |
| "grad_norm": 0.07898732274770737, |
| "learning_rate": 2.1020219331468473e-05, |
| "loss": 0.0037, |
| "step": 14240 |
| }, |
| { |
| "epoch": 71.96969696969697, |
| "grad_norm": 0.08723940700292587, |
| "learning_rate": 2.095288799848379e-05, |
| "loss": 0.0059, |
| "step": 14250 |
| }, |
| { |
| "epoch": 72.02020202020202, |
| "grad_norm": 0.08722518384456635, |
| "learning_rate": 2.088563607919417e-05, |
| "loss": 0.0047, |
| "step": 14260 |
| }, |
| { |
| "epoch": 72.07070707070707, |
| "grad_norm": 0.08295470476150513, |
| "learning_rate": 2.0818463757463786e-05, |
| "loss": 0.0045, |
| "step": 14270 |
| }, |
| { |
| "epoch": 72.12121212121212, |
| "grad_norm": 0.0689222663640976, |
| "learning_rate": 2.0751371216939175e-05, |
| "loss": 0.0039, |
| "step": 14280 |
| }, |
| { |
| "epoch": 72.17171717171718, |
| "grad_norm": 0.06622111797332764, |
| "learning_rate": 2.068435864104882e-05, |
| "loss": 0.0051, |
| "step": 14290 |
| }, |
| { |
| "epoch": 72.22222222222223, |
| "grad_norm": 0.046118929982185364, |
| "learning_rate": 2.0617426213002506e-05, |
| "loss": 0.005, |
| "step": 14300 |
| }, |
| { |
| "epoch": 72.27272727272727, |
| "grad_norm": 0.0494387149810791, |
| "learning_rate": 2.055057411579097e-05, |
| "loss": 0.0045, |
| "step": 14310 |
| }, |
| { |
| "epoch": 72.32323232323232, |
| "grad_norm": 0.10006352514028549, |
| "learning_rate": 2.0483802532185286e-05, |
| "loss": 0.0054, |
| "step": 14320 |
| }, |
| { |
| "epoch": 72.37373737373737, |
| "grad_norm": 0.07914059609174728, |
| "learning_rate": 2.041711164473638e-05, |
| "loss": 0.0058, |
| "step": 14330 |
| }, |
| { |
| "epoch": 72.42424242424242, |
| "grad_norm": 0.07188300788402557, |
| "learning_rate": 2.0350501635774637e-05, |
| "loss": 0.0048, |
| "step": 14340 |
| }, |
| { |
| "epoch": 72.47474747474747, |
| "grad_norm": 0.0841880738735199, |
| "learning_rate": 2.0283972687409247e-05, |
| "loss": 0.0054, |
| "step": 14350 |
| }, |
| { |
| "epoch": 72.52525252525253, |
| "grad_norm": 0.07978257536888123, |
| "learning_rate": 2.021752498152784e-05, |
| "loss": 0.0036, |
| "step": 14360 |
| }, |
| { |
| "epoch": 72.57575757575758, |
| "grad_norm": 0.06919857859611511, |
| "learning_rate": 2.015115869979589e-05, |
| "loss": 0.0069, |
| "step": 14370 |
| }, |
| { |
| "epoch": 72.62626262626263, |
| "grad_norm": 0.06726115942001343, |
| "learning_rate": 2.0084874023656265e-05, |
| "loss": 0.0042, |
| "step": 14380 |
| }, |
| { |
| "epoch": 72.67676767676768, |
| "grad_norm": 0.0693756714463234, |
| "learning_rate": 2.001867113432877e-05, |
| "loss": 0.0039, |
| "step": 14390 |
| }, |
| { |
| "epoch": 72.72727272727273, |
| "grad_norm": 0.054609593003988266, |
| "learning_rate": 1.995255021280954e-05, |
| "loss": 0.0051, |
| "step": 14400 |
| }, |
| { |
| "epoch": 72.77777777777777, |
| "grad_norm": 0.05328965187072754, |
| "learning_rate": 1.9886511439870688e-05, |
| "loss": 0.0049, |
| "step": 14410 |
| }, |
| { |
| "epoch": 72.82828282828282, |
| "grad_norm": 0.032446861267089844, |
| "learning_rate": 1.9820554996059675e-05, |
| "loss": 0.003, |
| "step": 14420 |
| }, |
| { |
| "epoch": 72.87878787878788, |
| "grad_norm": 0.0658450797200203, |
| "learning_rate": 1.9754681061698893e-05, |
| "loss": 0.0043, |
| "step": 14430 |
| }, |
| { |
| "epoch": 72.92929292929293, |
| "grad_norm": 0.05227955803275108, |
| "learning_rate": 1.9688889816885185e-05, |
| "loss": 0.0047, |
| "step": 14440 |
| }, |
| { |
| "epoch": 72.97979797979798, |
| "grad_norm": 0.06745505332946777, |
| "learning_rate": 1.962318144148928e-05, |
| "loss": 0.0063, |
| "step": 14450 |
| }, |
| { |
| "epoch": 73.03030303030303, |
| "grad_norm": 0.04939642548561096, |
| "learning_rate": 1.955755611515539e-05, |
| "loss": 0.0054, |
| "step": 14460 |
| }, |
| { |
| "epoch": 73.08080808080808, |
| "grad_norm": 0.055355317890644073, |
| "learning_rate": 1.9492014017300642e-05, |
| "loss": 0.0047, |
| "step": 14470 |
| }, |
| { |
| "epoch": 73.13131313131314, |
| "grad_norm": 0.049474820494651794, |
| "learning_rate": 1.942655532711461e-05, |
| "loss": 0.0047, |
| "step": 14480 |
| }, |
| { |
| "epoch": 73.18181818181819, |
| "grad_norm": 0.06670935451984406, |
| "learning_rate": 1.9361180223558882e-05, |
| "loss": 0.0041, |
| "step": 14490 |
| }, |
| { |
| "epoch": 73.23232323232324, |
| "grad_norm": 0.04916433244943619, |
| "learning_rate": 1.929588888536647e-05, |
| "loss": 0.0071, |
| "step": 14500 |
| }, |
| { |
| "epoch": 73.28282828282828, |
| "grad_norm": 0.05686993896961212, |
| "learning_rate": 1.9230681491041425e-05, |
| "loss": 0.0035, |
| "step": 14510 |
| }, |
| { |
| "epoch": 73.33333333333333, |
| "grad_norm": 0.09180948138237, |
| "learning_rate": 1.9165558218858264e-05, |
| "loss": 0.0036, |
| "step": 14520 |
| }, |
| { |
| "epoch": 73.38383838383838, |
| "grad_norm": 0.058741386979818344, |
| "learning_rate": 1.9100519246861505e-05, |
| "loss": 0.0041, |
| "step": 14530 |
| }, |
| { |
| "epoch": 73.43434343434343, |
| "grad_norm": 0.0724206417798996, |
| "learning_rate": 1.9035564752865248e-05, |
| "loss": 0.0048, |
| "step": 14540 |
| }, |
| { |
| "epoch": 73.48484848484848, |
| "grad_norm": 0.07343342900276184, |
| "learning_rate": 1.897069491445258e-05, |
| "loss": 0.0051, |
| "step": 14550 |
| }, |
| { |
| "epoch": 73.53535353535354, |
| "grad_norm": 0.06606832891702652, |
| "learning_rate": 1.890590990897515e-05, |
| "loss": 0.005, |
| "step": 14560 |
| }, |
| { |
| "epoch": 73.58585858585859, |
| "grad_norm": 0.05549941211938858, |
| "learning_rate": 1.884120991355272e-05, |
| "loss": 0.0046, |
| "step": 14570 |
| }, |
| { |
| "epoch": 73.63636363636364, |
| "grad_norm": 0.06467105448246002, |
| "learning_rate": 1.8776595105072576e-05, |
| "loss": 0.0044, |
| "step": 14580 |
| }, |
| { |
| "epoch": 73.68686868686869, |
| "grad_norm": 0.05910492315888405, |
| "learning_rate": 1.8712065660189166e-05, |
| "loss": 0.0059, |
| "step": 14590 |
| }, |
| { |
| "epoch": 73.73737373737374, |
| "grad_norm": 0.06877965480089188, |
| "learning_rate": 1.8647621755323513e-05, |
| "loss": 0.0052, |
| "step": 14600 |
| }, |
| { |
| "epoch": 73.78787878787878, |
| "grad_norm": 0.08204954117536545, |
| "learning_rate": 1.858326356666278e-05, |
| "loss": 0.0052, |
| "step": 14610 |
| }, |
| { |
| "epoch": 73.83838383838383, |
| "grad_norm": 0.06020423024892807, |
| "learning_rate": 1.851899127015983e-05, |
| "loss": 0.0045, |
| "step": 14620 |
| }, |
| { |
| "epoch": 73.88888888888889, |
| "grad_norm": 0.059846315532922745, |
| "learning_rate": 1.8454805041532626e-05, |
| "loss": 0.0048, |
| "step": 14630 |
| }, |
| { |
| "epoch": 73.93939393939394, |
| "grad_norm": 0.06199897453188896, |
| "learning_rate": 1.8390705056263906e-05, |
| "loss": 0.004, |
| "step": 14640 |
| }, |
| { |
| "epoch": 73.98989898989899, |
| "grad_norm": 0.07288287580013275, |
| "learning_rate": 1.832669148960057e-05, |
| "loss": 0.0062, |
| "step": 14650 |
| }, |
| { |
| "epoch": 74.04040404040404, |
| "grad_norm": 0.047384828329086304, |
| "learning_rate": 1.8262764516553233e-05, |
| "loss": 0.0042, |
| "step": 14660 |
| }, |
| { |
| "epoch": 74.0909090909091, |
| "grad_norm": 0.06429743766784668, |
| "learning_rate": 1.8198924311895843e-05, |
| "loss": 0.0056, |
| "step": 14670 |
| }, |
| { |
| "epoch": 74.14141414141415, |
| "grad_norm": 0.05965222790837288, |
| "learning_rate": 1.813517105016505e-05, |
| "loss": 0.0043, |
| "step": 14680 |
| }, |
| { |
| "epoch": 74.1919191919192, |
| "grad_norm": 0.052977193146944046, |
| "learning_rate": 1.8071504905659888e-05, |
| "loss": 0.0034, |
| "step": 14690 |
| }, |
| { |
| "epoch": 74.24242424242425, |
| "grad_norm": 0.049651678651571274, |
| "learning_rate": 1.800792605244109e-05, |
| "loss": 0.0037, |
| "step": 14700 |
| }, |
| { |
| "epoch": 74.29292929292929, |
| "grad_norm": 0.06719347834587097, |
| "learning_rate": 1.7944434664330844e-05, |
| "loss": 0.0038, |
| "step": 14710 |
| }, |
| { |
| "epoch": 74.34343434343434, |
| "grad_norm": 0.06231442466378212, |
| "learning_rate": 1.7881030914912212e-05, |
| "loss": 0.0037, |
| "step": 14720 |
| }, |
| { |
| "epoch": 74.39393939393939, |
| "grad_norm": 0.050820283591747284, |
| "learning_rate": 1.7817714977528577e-05, |
| "loss": 0.0038, |
| "step": 14730 |
| }, |
| { |
| "epoch": 74.44444444444444, |
| "grad_norm": 0.05880574882030487, |
| "learning_rate": 1.7754487025283332e-05, |
| "loss": 0.0045, |
| "step": 14740 |
| }, |
| { |
| "epoch": 74.4949494949495, |
| "grad_norm": 0.0927504226565361, |
| "learning_rate": 1.7691347231039275e-05, |
| "loss": 0.0059, |
| "step": 14750 |
| }, |
| { |
| "epoch": 74.54545454545455, |
| "grad_norm": 0.08144530653953552, |
| "learning_rate": 1.7628295767418164e-05, |
| "loss": 0.0051, |
| "step": 14760 |
| }, |
| { |
| "epoch": 74.5959595959596, |
| "grad_norm": 0.06355073302984238, |
| "learning_rate": 1.7565332806800333e-05, |
| "loss": 0.0038, |
| "step": 14770 |
| }, |
| { |
| "epoch": 74.64646464646465, |
| "grad_norm": 0.06839146465063095, |
| "learning_rate": 1.750245852132408e-05, |
| "loss": 0.0031, |
| "step": 14780 |
| }, |
| { |
| "epoch": 74.6969696969697, |
| "grad_norm": 0.06102769821882248, |
| "learning_rate": 1.7439673082885323e-05, |
| "loss": 0.005, |
| "step": 14790 |
| }, |
| { |
| "epoch": 74.74747474747475, |
| "grad_norm": 0.055848948657512665, |
| "learning_rate": 1.7376976663137047e-05, |
| "loss": 0.005, |
| "step": 14800 |
| }, |
| { |
| "epoch": 74.79797979797979, |
| "grad_norm": 0.07152023166418076, |
| "learning_rate": 1.7314369433488853e-05, |
| "loss": 0.0041, |
| "step": 14810 |
| }, |
| { |
| "epoch": 74.84848484848484, |
| "grad_norm": 0.04546472057700157, |
| "learning_rate": 1.7251851565106548e-05, |
| "loss": 0.0038, |
| "step": 14820 |
| }, |
| { |
| "epoch": 74.8989898989899, |
| "grad_norm": 0.07095196098089218, |
| "learning_rate": 1.7189423228911574e-05, |
| "loss": 0.0047, |
| "step": 14830 |
| }, |
| { |
| "epoch": 74.94949494949495, |
| "grad_norm": 0.04196056351065636, |
| "learning_rate": 1.7127084595580606e-05, |
| "loss": 0.0045, |
| "step": 14840 |
| }, |
| { |
| "epoch": 75.0, |
| "grad_norm": 0.06862890720367432, |
| "learning_rate": 1.706483583554513e-05, |
| "loss": 0.0042, |
| "step": 14850 |
| }, |
| { |
| "epoch": 75.05050505050505, |
| "grad_norm": 0.05502363666892052, |
| "learning_rate": 1.700267711899083e-05, |
| "loss": 0.0033, |
| "step": 14860 |
| }, |
| { |
| "epoch": 75.1010101010101, |
| "grad_norm": 0.06139271333813667, |
| "learning_rate": 1.69406086158573e-05, |
| "loss": 0.0044, |
| "step": 14870 |
| }, |
| { |
| "epoch": 75.15151515151516, |
| "grad_norm": 0.05764559656381607, |
| "learning_rate": 1.6878630495837455e-05, |
| "loss": 0.0038, |
| "step": 14880 |
| }, |
| { |
| "epoch": 75.20202020202021, |
| "grad_norm": 0.04205452650785446, |
| "learning_rate": 1.681674292837707e-05, |
| "loss": 0.0047, |
| "step": 14890 |
| }, |
| { |
| "epoch": 75.25252525252525, |
| "grad_norm": 0.0718129351735115, |
| "learning_rate": 1.6754946082674444e-05, |
| "loss": 0.006, |
| "step": 14900 |
| }, |
| { |
| "epoch": 75.3030303030303, |
| "grad_norm": 0.07275405526161194, |
| "learning_rate": 1.6693240127679748e-05, |
| "loss": 0.0046, |
| "step": 14910 |
| }, |
| { |
| "epoch": 75.35353535353535, |
| "grad_norm": 0.05961213633418083, |
| "learning_rate": 1.663162523209475e-05, |
| "loss": 0.0034, |
| "step": 14920 |
| }, |
| { |
| "epoch": 75.4040404040404, |
| "grad_norm": 0.06537947803735733, |
| "learning_rate": 1.6570101564372193e-05, |
| "loss": 0.0037, |
| "step": 14930 |
| }, |
| { |
| "epoch": 75.45454545454545, |
| "grad_norm": 0.07248780876398087, |
| "learning_rate": 1.650866929271543e-05, |
| "loss": 0.004, |
| "step": 14940 |
| }, |
| { |
| "epoch": 75.5050505050505, |
| "grad_norm": 0.04667038843035698, |
| "learning_rate": 1.644732858507797e-05, |
| "loss": 0.0057, |
| "step": 14950 |
| }, |
| { |
| "epoch": 75.55555555555556, |
| "grad_norm": 0.04714682325720787, |
| "learning_rate": 1.6386079609162943e-05, |
| "loss": 0.0041, |
| "step": 14960 |
| }, |
| { |
| "epoch": 75.60606060606061, |
| "grad_norm": 0.05778916925191879, |
| "learning_rate": 1.6324922532422742e-05, |
| "loss": 0.0034, |
| "step": 14970 |
| }, |
| { |
| "epoch": 75.65656565656566, |
| "grad_norm": 0.03641088306903839, |
| "learning_rate": 1.6263857522058434e-05, |
| "loss": 0.0036, |
| "step": 14980 |
| }, |
| { |
| "epoch": 75.70707070707071, |
| "grad_norm": 0.06227831542491913, |
| "learning_rate": 1.6202884745019443e-05, |
| "loss": 0.0054, |
| "step": 14990 |
| }, |
| { |
| "epoch": 75.75757575757575, |
| "grad_norm": 0.055802613496780396, |
| "learning_rate": 1.614200436800304e-05, |
| "loss": 0.0034, |
| "step": 15000 |
| }, |
| { |
| "epoch": 75.8080808080808, |
| "grad_norm": 0.0685703307390213, |
| "learning_rate": 1.6081216557453814e-05, |
| "loss": 0.003, |
| "step": 15010 |
| }, |
| { |
| "epoch": 75.85858585858585, |
| "grad_norm": 0.06024642661213875, |
| "learning_rate": 1.6020521479563367e-05, |
| "loss": 0.0032, |
| "step": 15020 |
| }, |
| { |
| "epoch": 75.9090909090909, |
| "grad_norm": 0.05856112018227577, |
| "learning_rate": 1.5959919300269654e-05, |
| "loss": 0.0034, |
| "step": 15030 |
| }, |
| { |
| "epoch": 75.95959595959596, |
| "grad_norm": 0.05291834473609924, |
| "learning_rate": 1.5899410185256764e-05, |
| "loss": 0.004, |
| "step": 15040 |
| }, |
| { |
| "epoch": 76.01010101010101, |
| "grad_norm": 0.07722914218902588, |
| "learning_rate": 1.583899429995431e-05, |
| "loss": 0.004, |
| "step": 15050 |
| }, |
| { |
| "epoch": 76.06060606060606, |
| "grad_norm": 0.10629014670848846, |
| "learning_rate": 1.5778671809536993e-05, |
| "loss": 0.0051, |
| "step": 15060 |
| }, |
| { |
| "epoch": 76.11111111111111, |
| "grad_norm": 0.06335756927728653, |
| "learning_rate": 1.5718442878924246e-05, |
| "loss": 0.0042, |
| "step": 15070 |
| }, |
| { |
| "epoch": 76.16161616161617, |
| "grad_norm": 0.050389233976602554, |
| "learning_rate": 1.5658307672779593e-05, |
| "loss": 0.0036, |
| "step": 15080 |
| }, |
| { |
| "epoch": 76.21212121212122, |
| "grad_norm": 0.05184765160083771, |
| "learning_rate": 1.5598266355510427e-05, |
| "loss": 0.0031, |
| "step": 15090 |
| }, |
| { |
| "epoch": 76.26262626262626, |
| "grad_norm": 0.06405451148748398, |
| "learning_rate": 1.553831909126744e-05, |
| "loss": 0.0043, |
| "step": 15100 |
| }, |
| { |
| "epoch": 76.31313131313131, |
| "grad_norm": 0.05169626697897911, |
| "learning_rate": 1.5478466043944135e-05, |
| "loss": 0.0053, |
| "step": 15110 |
| }, |
| { |
| "epoch": 76.36363636363636, |
| "grad_norm": 0.05367683619260788, |
| "learning_rate": 1.5418707377176468e-05, |
| "loss": 0.0039, |
| "step": 15120 |
| }, |
| { |
| "epoch": 76.41414141414141, |
| "grad_norm": 0.04693097621202469, |
| "learning_rate": 1.535904325434233e-05, |
| "loss": 0.004, |
| "step": 15130 |
| }, |
| { |
| "epoch": 76.46464646464646, |
| "grad_norm": 0.07248338311910629, |
| "learning_rate": 1.529947383856118e-05, |
| "loss": 0.0044, |
| "step": 15140 |
| }, |
| { |
| "epoch": 76.51515151515152, |
| "grad_norm": 0.06776734441518784, |
| "learning_rate": 1.5239999292693524e-05, |
| "loss": 0.0045, |
| "step": 15150 |
| }, |
| { |
| "epoch": 76.56565656565657, |
| "grad_norm": 0.03950027376413345, |
| "learning_rate": 1.5180619779340505e-05, |
| "loss": 0.0052, |
| "step": 15160 |
| }, |
| { |
| "epoch": 76.61616161616162, |
| "grad_norm": 0.05872876197099686, |
| "learning_rate": 1.5121335460843428e-05, |
| "loss": 0.0048, |
| "step": 15170 |
| }, |
| { |
| "epoch": 76.66666666666667, |
| "grad_norm": 0.07150712609291077, |
| "learning_rate": 1.5062146499283347e-05, |
| "loss": 0.0054, |
| "step": 15180 |
| }, |
| { |
| "epoch": 76.71717171717172, |
| "grad_norm": 0.06075382977724075, |
| "learning_rate": 1.5003053056480643e-05, |
| "loss": 0.0038, |
| "step": 15190 |
| }, |
| { |
| "epoch": 76.76767676767676, |
| "grad_norm": 0.0625942051410675, |
| "learning_rate": 1.4944055293994551e-05, |
| "loss": 0.005, |
| "step": 15200 |
| }, |
| { |
| "epoch": 76.81818181818181, |
| "grad_norm": 0.04936752840876579, |
| "learning_rate": 1.4885153373122656e-05, |
| "loss": 0.004, |
| "step": 15210 |
| }, |
| { |
| "epoch": 76.86868686868686, |
| "grad_norm": 0.053812943398952484, |
| "learning_rate": 1.482634745490059e-05, |
| "loss": 0.0046, |
| "step": 15220 |
| }, |
| { |
| "epoch": 76.91919191919192, |
| "grad_norm": 0.034780919551849365, |
| "learning_rate": 1.4767637700101466e-05, |
| "loss": 0.0037, |
| "step": 15230 |
| }, |
| { |
| "epoch": 76.96969696969697, |
| "grad_norm": 0.07412702590227127, |
| "learning_rate": 1.4709024269235528e-05, |
| "loss": 0.0035, |
| "step": 15240 |
| }, |
| { |
| "epoch": 77.02020202020202, |
| "grad_norm": 0.06261882185935974, |
| "learning_rate": 1.4650507322549684e-05, |
| "loss": 0.004, |
| "step": 15250 |
| }, |
| { |
| "epoch": 77.07070707070707, |
| "grad_norm": 0.07539445161819458, |
| "learning_rate": 1.4592087020026972e-05, |
| "loss": 0.0055, |
| "step": 15260 |
| }, |
| { |
| "epoch": 77.12121212121212, |
| "grad_norm": 0.04201270267367363, |
| "learning_rate": 1.4533763521386318e-05, |
| "loss": 0.005, |
| "step": 15270 |
| }, |
| { |
| "epoch": 77.17171717171718, |
| "grad_norm": 0.04661370441317558, |
| "learning_rate": 1.44755369860819e-05, |
| "loss": 0.0039, |
| "step": 15280 |
| }, |
| { |
| "epoch": 77.22222222222223, |
| "grad_norm": 0.047967035323381424, |
| "learning_rate": 1.441740757330287e-05, |
| "loss": 0.0041, |
| "step": 15290 |
| }, |
| { |
| "epoch": 77.27272727272727, |
| "grad_norm": 0.07083559781312943, |
| "learning_rate": 1.4359375441972844e-05, |
| "loss": 0.0044, |
| "step": 15300 |
| }, |
| { |
| "epoch": 77.32323232323232, |
| "grad_norm": 0.04990070313215256, |
| "learning_rate": 1.4301440750749395e-05, |
| "loss": 0.0033, |
| "step": 15310 |
| }, |
| { |
| "epoch": 77.37373737373737, |
| "grad_norm": 0.047141920775175095, |
| "learning_rate": 1.4243603658023808e-05, |
| "loss": 0.0038, |
| "step": 15320 |
| }, |
| { |
| "epoch": 77.42424242424242, |
| "grad_norm": 0.04210389032959938, |
| "learning_rate": 1.4185864321920444e-05, |
| "loss": 0.0031, |
| "step": 15330 |
| }, |
| { |
| "epoch": 77.47474747474747, |
| "grad_norm": 0.06798895448446274, |
| "learning_rate": 1.4128222900296485e-05, |
| "loss": 0.0051, |
| "step": 15340 |
| }, |
| { |
| "epoch": 77.52525252525253, |
| "grad_norm": 0.061257921159267426, |
| "learning_rate": 1.407067955074135e-05, |
| "loss": 0.0043, |
| "step": 15350 |
| }, |
| { |
| "epoch": 77.57575757575758, |
| "grad_norm": 0.06226058304309845, |
| "learning_rate": 1.4013234430576356e-05, |
| "loss": 0.0034, |
| "step": 15360 |
| }, |
| { |
| "epoch": 77.62626262626263, |
| "grad_norm": 0.050137437880039215, |
| "learning_rate": 1.3955887696854286e-05, |
| "loss": 0.0028, |
| "step": 15370 |
| }, |
| { |
| "epoch": 77.67676767676768, |
| "grad_norm": 0.05346724018454552, |
| "learning_rate": 1.38986395063589e-05, |
| "loss": 0.0033, |
| "step": 15380 |
| }, |
| { |
| "epoch": 77.72727272727273, |
| "grad_norm": 0.05710911378264427, |
| "learning_rate": 1.3841490015604597e-05, |
| "loss": 0.0041, |
| "step": 15390 |
| }, |
| { |
| "epoch": 77.77777777777777, |
| "grad_norm": 0.037551987916231155, |
| "learning_rate": 1.3784439380835879e-05, |
| "loss": 0.005, |
| "step": 15400 |
| }, |
| { |
| "epoch": 77.82828282828282, |
| "grad_norm": 0.05128645896911621, |
| "learning_rate": 1.3727487758026986e-05, |
| "loss": 0.0035, |
| "step": 15410 |
| }, |
| { |
| "epoch": 77.87878787878788, |
| "grad_norm": 0.0609634704887867, |
| "learning_rate": 1.3670635302881525e-05, |
| "loss": 0.0039, |
| "step": 15420 |
| }, |
| { |
| "epoch": 77.92929292929293, |
| "grad_norm": 0.056504521518945694, |
| "learning_rate": 1.3613882170831888e-05, |
| "loss": 0.0054, |
| "step": 15430 |
| }, |
| { |
| "epoch": 77.97979797979798, |
| "grad_norm": 0.06024637073278427, |
| "learning_rate": 1.355722851703901e-05, |
| "loss": 0.0025, |
| "step": 15440 |
| }, |
| { |
| "epoch": 78.03030303030303, |
| "grad_norm": 0.04463696479797363, |
| "learning_rate": 1.3500674496391814e-05, |
| "loss": 0.0034, |
| "step": 15450 |
| }, |
| { |
| "epoch": 78.08080808080808, |
| "grad_norm": 0.06066044420003891, |
| "learning_rate": 1.3444220263506795e-05, |
| "loss": 0.0052, |
| "step": 15460 |
| }, |
| { |
| "epoch": 78.13131313131314, |
| "grad_norm": 0.06282877922058105, |
| "learning_rate": 1.3387865972727714e-05, |
| "loss": 0.0051, |
| "step": 15470 |
| }, |
| { |
| "epoch": 78.18181818181819, |
| "grad_norm": 0.07366183400154114, |
| "learning_rate": 1.3331611778125036e-05, |
| "loss": 0.0029, |
| "step": 15480 |
| }, |
| { |
| "epoch": 78.23232323232324, |
| "grad_norm": 0.03769391030073166, |
| "learning_rate": 1.3275457833495564e-05, |
| "loss": 0.0036, |
| "step": 15490 |
| }, |
| { |
| "epoch": 78.28282828282828, |
| "grad_norm": 0.03883744403719902, |
| "learning_rate": 1.3219404292362065e-05, |
| "loss": 0.004, |
| "step": 15500 |
| }, |
| { |
| "epoch": 78.33333333333333, |
| "grad_norm": 0.052609916776418686, |
| "learning_rate": 1.3163451307972751e-05, |
| "loss": 0.0039, |
| "step": 15510 |
| }, |
| { |
| "epoch": 78.38383838383838, |
| "grad_norm": 0.05125768110156059, |
| "learning_rate": 1.3107599033300977e-05, |
| "loss": 0.0048, |
| "step": 15520 |
| }, |
| { |
| "epoch": 78.43434343434343, |
| "grad_norm": 0.03518494963645935, |
| "learning_rate": 1.305184762104471e-05, |
| "loss": 0.0051, |
| "step": 15530 |
| }, |
| { |
| "epoch": 78.48484848484848, |
| "grad_norm": 0.041943952441215515, |
| "learning_rate": 1.2996197223626178e-05, |
| "loss": 0.0032, |
| "step": 15540 |
| }, |
| { |
| "epoch": 78.53535353535354, |
| "grad_norm": 0.07465706765651703, |
| "learning_rate": 1.2940647993191457e-05, |
| "loss": 0.004, |
| "step": 15550 |
| }, |
| { |
| "epoch": 78.58585858585859, |
| "grad_norm": 0.05254758521914482, |
| "learning_rate": 1.2885200081610005e-05, |
| "loss": 0.004, |
| "step": 15560 |
| }, |
| { |
| "epoch": 78.63636363636364, |
| "grad_norm": 0.05680657923221588, |
| "learning_rate": 1.2829853640474316e-05, |
| "loss": 0.004, |
| "step": 15570 |
| }, |
| { |
| "epoch": 78.68686868686869, |
| "grad_norm": 0.05936729907989502, |
| "learning_rate": 1.2774608821099438e-05, |
| "loss": 0.0045, |
| "step": 15580 |
| }, |
| { |
| "epoch": 78.73737373737374, |
| "grad_norm": 0.041997797787189484, |
| "learning_rate": 1.2719465774522577e-05, |
| "loss": 0.0043, |
| "step": 15590 |
| }, |
| { |
| "epoch": 78.78787878787878, |
| "grad_norm": 0.04186940938234329, |
| "learning_rate": 1.2664424651502755e-05, |
| "loss": 0.0032, |
| "step": 15600 |
| }, |
| { |
| "epoch": 78.83838383838383, |
| "grad_norm": 0.044853802770376205, |
| "learning_rate": 1.260948560252026e-05, |
| "loss": 0.005, |
| "step": 15610 |
| }, |
| { |
| "epoch": 78.88888888888889, |
| "grad_norm": 0.11562099307775497, |
| "learning_rate": 1.2554648777776396e-05, |
| "loss": 0.0041, |
| "step": 15620 |
| }, |
| { |
| "epoch": 78.93939393939394, |
| "grad_norm": 0.04681319370865822, |
| "learning_rate": 1.2499914327192919e-05, |
| "loss": 0.0037, |
| "step": 15630 |
| }, |
| { |
| "epoch": 78.98989898989899, |
| "grad_norm": 0.044238701462745667, |
| "learning_rate": 1.2445282400411722e-05, |
| "loss": 0.0042, |
| "step": 15640 |
| }, |
| { |
| "epoch": 79.04040404040404, |
| "grad_norm": 0.07145074754953384, |
| "learning_rate": 1.2390753146794437e-05, |
| "loss": 0.0033, |
| "step": 15650 |
| }, |
| { |
| "epoch": 79.0909090909091, |
| "grad_norm": 0.05898287147283554, |
| "learning_rate": 1.2336326715421925e-05, |
| "loss": 0.0042, |
| "step": 15660 |
| }, |
| { |
| "epoch": 79.14141414141415, |
| "grad_norm": 0.06610440462827682, |
| "learning_rate": 1.2282003255094005e-05, |
| "loss": 0.0038, |
| "step": 15670 |
| }, |
| { |
| "epoch": 79.1919191919192, |
| "grad_norm": 0.047698404639959335, |
| "learning_rate": 1.2227782914328928e-05, |
| "loss": 0.005, |
| "step": 15680 |
| }, |
| { |
| "epoch": 79.24242424242425, |
| "grad_norm": 0.043358251452445984, |
| "learning_rate": 1.2173665841363018e-05, |
| "loss": 0.0034, |
| "step": 15690 |
| }, |
| { |
| "epoch": 79.29292929292929, |
| "grad_norm": 0.0439022034406662, |
| "learning_rate": 1.211965218415032e-05, |
| "loss": 0.0042, |
| "step": 15700 |
| }, |
| { |
| "epoch": 79.34343434343434, |
| "grad_norm": 0.07258440554141998, |
| "learning_rate": 1.2065742090362082e-05, |
| "loss": 0.0043, |
| "step": 15710 |
| }, |
| { |
| "epoch": 79.39393939393939, |
| "grad_norm": 0.06255728751420975, |
| "learning_rate": 1.2011935707386457e-05, |
| "loss": 0.0043, |
| "step": 15720 |
| }, |
| { |
| "epoch": 79.44444444444444, |
| "grad_norm": 0.05151816084980965, |
| "learning_rate": 1.1958233182328044e-05, |
| "loss": 0.0035, |
| "step": 15730 |
| }, |
| { |
| "epoch": 79.4949494949495, |
| "grad_norm": 0.051262177526950836, |
| "learning_rate": 1.1904634662007474e-05, |
| "loss": 0.005, |
| "step": 15740 |
| }, |
| { |
| "epoch": 79.54545454545455, |
| "grad_norm": 0.07386596500873566, |
| "learning_rate": 1.1851140292961088e-05, |
| "loss": 0.0043, |
| "step": 15750 |
| }, |
| { |
| "epoch": 79.5959595959596, |
| "grad_norm": 0.054496970027685165, |
| "learning_rate": 1.1797750221440424e-05, |
| "loss": 0.003, |
| "step": 15760 |
| }, |
| { |
| "epoch": 79.64646464646465, |
| "grad_norm": 0.03256992623209953, |
| "learning_rate": 1.1744464593411897e-05, |
| "loss": 0.0044, |
| "step": 15770 |
| }, |
| { |
| "epoch": 79.6969696969697, |
| "grad_norm": 0.04662999510765076, |
| "learning_rate": 1.1691283554556399e-05, |
| "loss": 0.0048, |
| "step": 15780 |
| }, |
| { |
| "epoch": 79.74747474747475, |
| "grad_norm": 0.04938117414712906, |
| "learning_rate": 1.1638207250268834e-05, |
| "loss": 0.0045, |
| "step": 15790 |
| }, |
| { |
| "epoch": 79.79797979797979, |
| "grad_norm": 0.040971651673316956, |
| "learning_rate": 1.158523582565782e-05, |
| "loss": 0.0036, |
| "step": 15800 |
| }, |
| { |
| "epoch": 79.84848484848484, |
| "grad_norm": 0.041988521814346313, |
| "learning_rate": 1.1532369425545192e-05, |
| "loss": 0.0028, |
| "step": 15810 |
| }, |
| { |
| "epoch": 79.8989898989899, |
| "grad_norm": 0.06723316013813019, |
| "learning_rate": 1.1479608194465662e-05, |
| "loss": 0.0048, |
| "step": 15820 |
| }, |
| { |
| "epoch": 79.94949494949495, |
| "grad_norm": 0.05386148765683174, |
| "learning_rate": 1.1426952276666442e-05, |
| "loss": 0.0041, |
| "step": 15830 |
| }, |
| { |
| "epoch": 80.0, |
| "grad_norm": 0.0681806206703186, |
| "learning_rate": 1.1374401816106778e-05, |
| "loss": 0.003, |
| "step": 15840 |
| }, |
| { |
| "epoch": 80.05050505050505, |
| "grad_norm": 0.048018619418144226, |
| "learning_rate": 1.1321956956457646e-05, |
| "loss": 0.0041, |
| "step": 15850 |
| }, |
| { |
| "epoch": 80.1010101010101, |
| "grad_norm": 0.05383714288473129, |
| "learning_rate": 1.1269617841101277e-05, |
| "loss": 0.0041, |
| "step": 15860 |
| }, |
| { |
| "epoch": 80.15151515151516, |
| "grad_norm": 0.05390230566263199, |
| "learning_rate": 1.1217384613130804e-05, |
| "loss": 0.0047, |
| "step": 15870 |
| }, |
| { |
| "epoch": 80.20202020202021, |
| "grad_norm": 0.07060696184635162, |
| "learning_rate": 1.11652574153499e-05, |
| "loss": 0.0038, |
| "step": 15880 |
| }, |
| { |
| "epoch": 80.25252525252525, |
| "grad_norm": 0.05693084001541138, |
| "learning_rate": 1.1113236390272303e-05, |
| "loss": 0.0059, |
| "step": 15890 |
| }, |
| { |
| "epoch": 80.3030303030303, |
| "grad_norm": 0.05802145600318909, |
| "learning_rate": 1.106132168012155e-05, |
| "loss": 0.0046, |
| "step": 15900 |
| }, |
| { |
| "epoch": 80.35353535353535, |
| "grad_norm": 0.044204868376255035, |
| "learning_rate": 1.1009513426830448e-05, |
| "loss": 0.0036, |
| "step": 15910 |
| }, |
| { |
| "epoch": 80.4040404040404, |
| "grad_norm": 0.04770777374505997, |
| "learning_rate": 1.0957811772040777e-05, |
| "loss": 0.0056, |
| "step": 15920 |
| }, |
| { |
| "epoch": 80.45454545454545, |
| "grad_norm": 0.0569189190864563, |
| "learning_rate": 1.0906216857102913e-05, |
| "loss": 0.0057, |
| "step": 15930 |
| }, |
| { |
| "epoch": 80.5050505050505, |
| "grad_norm": 0.05206622555851936, |
| "learning_rate": 1.0854728823075355e-05, |
| "loss": 0.0034, |
| "step": 15940 |
| }, |
| { |
| "epoch": 80.55555555555556, |
| "grad_norm": 0.038848914206027985, |
| "learning_rate": 1.0803347810724452e-05, |
| "loss": 0.0039, |
| "step": 15950 |
| }, |
| { |
| "epoch": 80.60606060606061, |
| "grad_norm": 0.10459477454423904, |
| "learning_rate": 1.0752073960523911e-05, |
| "loss": 0.0039, |
| "step": 15960 |
| }, |
| { |
| "epoch": 80.65656565656566, |
| "grad_norm": 0.05412621423602104, |
| "learning_rate": 1.070090741265447e-05, |
| "loss": 0.0042, |
| "step": 15970 |
| }, |
| { |
| "epoch": 80.70707070707071, |
| "grad_norm": 0.052498795092105865, |
| "learning_rate": 1.0649848307003547e-05, |
| "loss": 0.0045, |
| "step": 15980 |
| }, |
| { |
| "epoch": 80.75757575757575, |
| "grad_norm": 0.04982329159975052, |
| "learning_rate": 1.0598896783164757e-05, |
| "loss": 0.0034, |
| "step": 15990 |
| }, |
| { |
| "epoch": 80.8080808080808, |
| "grad_norm": 0.04683833196759224, |
| "learning_rate": 1.0548052980437645e-05, |
| "loss": 0.0041, |
| "step": 16000 |
| }, |
| { |
| "epoch": 80.85858585858585, |
| "grad_norm": 0.052911657840013504, |
| "learning_rate": 1.049731703782722e-05, |
| "loss": 0.0054, |
| "step": 16010 |
| }, |
| { |
| "epoch": 80.9090909090909, |
| "grad_norm": 0.08569244295358658, |
| "learning_rate": 1.0446689094043587e-05, |
| "loss": 0.0054, |
| "step": 16020 |
| }, |
| { |
| "epoch": 80.95959595959596, |
| "grad_norm": 0.07661054283380508, |
| "learning_rate": 1.039616928750165e-05, |
| "loss": 0.0034, |
| "step": 16030 |
| }, |
| { |
| "epoch": 81.01010101010101, |
| "grad_norm": 0.036184243857860565, |
| "learning_rate": 1.0345757756320612e-05, |
| "loss": 0.0034, |
| "step": 16040 |
| }, |
| { |
| "epoch": 81.06060606060606, |
| "grad_norm": 0.09585819393396378, |
| "learning_rate": 1.0295454638323666e-05, |
| "loss": 0.0041, |
| "step": 16050 |
| }, |
| { |
| "epoch": 81.11111111111111, |
| "grad_norm": 0.04465373232960701, |
| "learning_rate": 1.0245260071037632e-05, |
| "loss": 0.0048, |
| "step": 16060 |
| }, |
| { |
| "epoch": 81.16161616161617, |
| "grad_norm": 0.05570686236023903, |
| "learning_rate": 1.0195174191692518e-05, |
| "loss": 0.0046, |
| "step": 16070 |
| }, |
| { |
| "epoch": 81.21212121212122, |
| "grad_norm": 0.04545486718416214, |
| "learning_rate": 1.014519713722124e-05, |
| "loss": 0.006, |
| "step": 16080 |
| }, |
| { |
| "epoch": 81.26262626262626, |
| "grad_norm": 0.05401284620165825, |
| "learning_rate": 1.0095329044259132e-05, |
| "loss": 0.0053, |
| "step": 16090 |
| }, |
| { |
| "epoch": 81.31313131313131, |
| "grad_norm": 0.05565112456679344, |
| "learning_rate": 1.004557004914365e-05, |
| "loss": 0.0059, |
| "step": 16100 |
| }, |
| { |
| "epoch": 81.36363636363636, |
| "grad_norm": 0.037602294236421585, |
| "learning_rate": 9.995920287914007e-06, |
| "loss": 0.0045, |
| "step": 16110 |
| }, |
| { |
| "epoch": 81.41414141414141, |
| "grad_norm": 0.05396126210689545, |
| "learning_rate": 9.946379896310737e-06, |
| "loss": 0.0056, |
| "step": 16120 |
| }, |
| { |
| "epoch": 81.46464646464646, |
| "grad_norm": 0.05958091467618942, |
| "learning_rate": 9.896949009775396e-06, |
| "loss": 0.0031, |
| "step": 16130 |
| }, |
| { |
| "epoch": 81.51515151515152, |
| "grad_norm": 0.04834005981683731, |
| "learning_rate": 9.847627763450134e-06, |
| "loss": 0.0033, |
| "step": 16140 |
| }, |
| { |
| "epoch": 81.56565656565657, |
| "grad_norm": 0.0648161768913269, |
| "learning_rate": 9.798416292177337e-06, |
| "loss": 0.0043, |
| "step": 16150 |
| }, |
| { |
| "epoch": 81.61616161616162, |
| "grad_norm": 0.04166547209024429, |
| "learning_rate": 9.74931473049932e-06, |
| "loss": 0.0035, |
| "step": 16160 |
| }, |
| { |
| "epoch": 81.66666666666667, |
| "grad_norm": 0.09897361695766449, |
| "learning_rate": 9.700323212657847e-06, |
| "loss": 0.0035, |
| "step": 16170 |
| }, |
| { |
| "epoch": 81.71717171717172, |
| "grad_norm": 0.03798495978116989, |
| "learning_rate": 9.65144187259388e-06, |
| "loss": 0.0048, |
| "step": 16180 |
| }, |
| { |
| "epoch": 81.76767676767676, |
| "grad_norm": 0.04609782621264458, |
| "learning_rate": 9.602670843947132e-06, |
| "loss": 0.0029, |
| "step": 16190 |
| }, |
| { |
| "epoch": 81.81818181818181, |
| "grad_norm": 0.0476488433778286, |
| "learning_rate": 9.554010260055713e-06, |
| "loss": 0.0047, |
| "step": 16200 |
| }, |
| { |
| "epoch": 81.86868686868686, |
| "grad_norm": 0.05336550995707512, |
| "learning_rate": 9.505460253955834e-06, |
| "loss": 0.005, |
| "step": 16210 |
| }, |
| { |
| "epoch": 81.91919191919192, |
| "grad_norm": 0.04684247076511383, |
| "learning_rate": 9.457020958381324e-06, |
| "loss": 0.0035, |
| "step": 16220 |
| }, |
| { |
| "epoch": 81.96969696969697, |
| "grad_norm": 0.048000678420066833, |
| "learning_rate": 9.408692505763395e-06, |
| "loss": 0.0037, |
| "step": 16230 |
| }, |
| { |
| "epoch": 82.02020202020202, |
| "grad_norm": 0.07258518040180206, |
| "learning_rate": 9.360475028230181e-06, |
| "loss": 0.0034, |
| "step": 16240 |
| }, |
| { |
| "epoch": 82.07070707070707, |
| "grad_norm": 0.04257355257868767, |
| "learning_rate": 9.312368657606412e-06, |
| "loss": 0.0054, |
| "step": 16250 |
| }, |
| { |
| "epoch": 82.12121212121212, |
| "grad_norm": 0.051619768142700195, |
| "learning_rate": 9.264373525413096e-06, |
| "loss": 0.0041, |
| "step": 16260 |
| }, |
| { |
| "epoch": 82.17171717171718, |
| "grad_norm": 0.0474412739276886, |
| "learning_rate": 9.216489762867058e-06, |
| "loss": 0.0042, |
| "step": 16270 |
| }, |
| { |
| "epoch": 82.22222222222223, |
| "grad_norm": 0.053956326097249985, |
| "learning_rate": 9.168717500880708e-06, |
| "loss": 0.0046, |
| "step": 16280 |
| }, |
| { |
| "epoch": 82.27272727272727, |
| "grad_norm": 0.0470103919506073, |
| "learning_rate": 9.121056870061574e-06, |
| "loss": 0.0043, |
| "step": 16290 |
| }, |
| { |
| "epoch": 82.32323232323232, |
| "grad_norm": 0.03350234404206276, |
| "learning_rate": 9.073508000711983e-06, |
| "loss": 0.0041, |
| "step": 16300 |
| }, |
| { |
| "epoch": 82.37373737373737, |
| "grad_norm": 0.06173461303114891, |
| "learning_rate": 9.026071022828758e-06, |
| "loss": 0.0055, |
| "step": 16310 |
| }, |
| { |
| "epoch": 82.42424242424242, |
| "grad_norm": 0.04139098897576332, |
| "learning_rate": 8.978746066102771e-06, |
| "loss": 0.0058, |
| "step": 16320 |
| }, |
| { |
| "epoch": 82.47474747474747, |
| "grad_norm": 0.03952089324593544, |
| "learning_rate": 8.931533259918634e-06, |
| "loss": 0.0036, |
| "step": 16330 |
| }, |
| { |
| "epoch": 82.52525252525253, |
| "grad_norm": 0.051731888204813004, |
| "learning_rate": 8.884432733354382e-06, |
| "loss": 0.0023, |
| "step": 16340 |
| }, |
| { |
| "epoch": 82.57575757575758, |
| "grad_norm": 0.08133986592292786, |
| "learning_rate": 8.837444615181029e-06, |
| "loss": 0.0043, |
| "step": 16350 |
| }, |
| { |
| "epoch": 82.62626262626263, |
| "grad_norm": 0.04177246615290642, |
| "learning_rate": 8.790569033862323e-06, |
| "loss": 0.0028, |
| "step": 16360 |
| }, |
| { |
| "epoch": 82.67676767676768, |
| "grad_norm": 0.041377052664756775, |
| "learning_rate": 8.7438061175543e-06, |
| "loss": 0.0044, |
| "step": 16370 |
| }, |
| { |
| "epoch": 82.72727272727273, |
| "grad_norm": 0.07852531969547272, |
| "learning_rate": 8.697155994104978e-06, |
| "loss": 0.0046, |
| "step": 16380 |
| }, |
| { |
| "epoch": 82.77777777777777, |
| "grad_norm": 0.05860913544893265, |
| "learning_rate": 8.650618791054033e-06, |
| "loss": 0.0045, |
| "step": 16390 |
| }, |
| { |
| "epoch": 82.82828282828282, |
| "grad_norm": 0.041265297681093216, |
| "learning_rate": 8.604194635632373e-06, |
| "loss": 0.0042, |
| "step": 16400 |
| }, |
| { |
| "epoch": 82.87878787878788, |
| "grad_norm": 0.08484966307878494, |
| "learning_rate": 8.557883654761906e-06, |
| "loss": 0.0051, |
| "step": 16410 |
| }, |
| { |
| "epoch": 82.92929292929293, |
| "grad_norm": 0.059632230550050735, |
| "learning_rate": 8.511685975055061e-06, |
| "loss": 0.0052, |
| "step": 16420 |
| }, |
| { |
| "epoch": 82.97979797979798, |
| "grad_norm": 0.043834228068590164, |
| "learning_rate": 8.46560172281452e-06, |
| "loss": 0.0032, |
| "step": 16430 |
| }, |
| { |
| "epoch": 83.03030303030303, |
| "grad_norm": 0.03874203935265541, |
| "learning_rate": 8.419631024032893e-06, |
| "loss": 0.0036, |
| "step": 16440 |
| }, |
| { |
| "epoch": 83.08080808080808, |
| "grad_norm": 0.054539117962121964, |
| "learning_rate": 8.373774004392293e-06, |
| "loss": 0.0056, |
| "step": 16450 |
| }, |
| { |
| "epoch": 83.13131313131314, |
| "grad_norm": 0.034922268241643906, |
| "learning_rate": 8.32803078926409e-06, |
| "loss": 0.0033, |
| "step": 16460 |
| }, |
| { |
| "epoch": 83.18181818181819, |
| "grad_norm": 0.042134761810302734, |
| "learning_rate": 8.282401503708454e-06, |
| "loss": 0.0056, |
| "step": 16470 |
| }, |
| { |
| "epoch": 83.23232323232324, |
| "grad_norm": 0.05206553265452385, |
| "learning_rate": 8.23688627247412e-06, |
| "loss": 0.0031, |
| "step": 16480 |
| }, |
| { |
| "epoch": 83.28282828282828, |
| "grad_norm": 0.05045735463500023, |
| "learning_rate": 8.191485219998007e-06, |
| "loss": 0.0034, |
| "step": 16490 |
| }, |
| { |
| "epoch": 83.33333333333333, |
| "grad_norm": 0.037639982998371124, |
| "learning_rate": 8.146198470404843e-06, |
| "loss": 0.0029, |
| "step": 16500 |
| }, |
| { |
| "epoch": 83.38383838383838, |
| "grad_norm": 0.03712478280067444, |
| "learning_rate": 8.101026147506897e-06, |
| "loss": 0.0053, |
| "step": 16510 |
| }, |
| { |
| "epoch": 83.43434343434343, |
| "grad_norm": 0.05099721625447273, |
| "learning_rate": 8.05596837480353e-06, |
| "loss": 0.0047, |
| "step": 16520 |
| }, |
| { |
| "epoch": 83.48484848484848, |
| "grad_norm": 0.0648672804236412, |
| "learning_rate": 8.011025275480998e-06, |
| "loss": 0.0056, |
| "step": 16530 |
| }, |
| { |
| "epoch": 83.53535353535354, |
| "grad_norm": 0.03885408118367195, |
| "learning_rate": 7.966196972412027e-06, |
| "loss": 0.0038, |
| "step": 16540 |
| }, |
| { |
| "epoch": 83.58585858585859, |
| "grad_norm": 0.03967776149511337, |
| "learning_rate": 7.92148358815547e-06, |
| "loss": 0.0048, |
| "step": 16550 |
| }, |
| { |
| "epoch": 83.63636363636364, |
| "grad_norm": 0.03749620169401169, |
| "learning_rate": 7.87688524495604e-06, |
| "loss": 0.0041, |
| "step": 16560 |
| }, |
| { |
| "epoch": 83.68686868686869, |
| "grad_norm": 0.05034166947007179, |
| "learning_rate": 7.83240206474386e-06, |
| "loss": 0.0039, |
| "step": 16570 |
| }, |
| { |
| "epoch": 83.73737373737374, |
| "grad_norm": 0.049059975892305374, |
| "learning_rate": 7.788034169134272e-06, |
| "loss": 0.0053, |
| "step": 16580 |
| }, |
| { |
| "epoch": 83.78787878787878, |
| "grad_norm": 0.0636754184961319, |
| "learning_rate": 7.743781679427414e-06, |
| "loss": 0.0049, |
| "step": 16590 |
| }, |
| { |
| "epoch": 83.83838383838383, |
| "grad_norm": 0.06673651933670044, |
| "learning_rate": 7.699644716607895e-06, |
| "loss": 0.004, |
| "step": 16600 |
| }, |
| { |
| "epoch": 83.88888888888889, |
| "grad_norm": 0.055378515273332596, |
| "learning_rate": 7.655623401344486e-06, |
| "loss": 0.0047, |
| "step": 16610 |
| }, |
| { |
| "epoch": 83.93939393939394, |
| "grad_norm": 0.10101229697465897, |
| "learning_rate": 7.611717853989775e-06, |
| "loss": 0.0047, |
| "step": 16620 |
| }, |
| { |
| "epoch": 83.98989898989899, |
| "grad_norm": 0.050026051700115204, |
| "learning_rate": 7.567928194579854e-06, |
| "loss": 0.0031, |
| "step": 16630 |
| }, |
| { |
| "epoch": 84.04040404040404, |
| "grad_norm": 0.031481221318244934, |
| "learning_rate": 7.524254542833997e-06, |
| "loss": 0.004, |
| "step": 16640 |
| }, |
| { |
| "epoch": 84.0909090909091, |
| "grad_norm": 0.0495365634560585, |
| "learning_rate": 7.480697018154286e-06, |
| "loss": 0.0036, |
| "step": 16650 |
| }, |
| { |
| "epoch": 84.14141414141415, |
| "grad_norm": 0.06161488965153694, |
| "learning_rate": 7.437255739625332e-06, |
| "loss": 0.0031, |
| "step": 16660 |
| }, |
| { |
| "epoch": 84.1919191919192, |
| "grad_norm": 0.034171752631664276, |
| "learning_rate": 7.393930826013923e-06, |
| "loss": 0.0036, |
| "step": 16670 |
| }, |
| { |
| "epoch": 84.24242424242425, |
| "grad_norm": 0.033337194472551346, |
| "learning_rate": 7.350722395768722e-06, |
| "loss": 0.0034, |
| "step": 16680 |
| }, |
| { |
| "epoch": 84.29292929292929, |
| "grad_norm": 0.05273110046982765, |
| "learning_rate": 7.307630567019963e-06, |
| "loss": 0.0043, |
| "step": 16690 |
| }, |
| { |
| "epoch": 84.34343434343434, |
| "grad_norm": 0.0554899163544178, |
| "learning_rate": 7.264655457579e-06, |
| "loss": 0.0032, |
| "step": 16700 |
| }, |
| { |
| "epoch": 84.39393939393939, |
| "grad_norm": 0.045129161328077316, |
| "learning_rate": 7.221797184938184e-06, |
| "loss": 0.004, |
| "step": 16710 |
| }, |
| { |
| "epoch": 84.44444444444444, |
| "grad_norm": 0.04948750510811806, |
| "learning_rate": 7.179055866270373e-06, |
| "loss": 0.0029, |
| "step": 16720 |
| }, |
| { |
| "epoch": 84.4949494949495, |
| "grad_norm": 0.04577599838376045, |
| "learning_rate": 7.136431618428707e-06, |
| "loss": 0.0042, |
| "step": 16730 |
| }, |
| { |
| "epoch": 84.54545454545455, |
| "grad_norm": 0.06439365446567535, |
| "learning_rate": 7.09392455794628e-06, |
| "loss": 0.0033, |
| "step": 16740 |
| }, |
| { |
| "epoch": 84.5959595959596, |
| "grad_norm": 0.044175464659929276, |
| "learning_rate": 7.051534801035725e-06, |
| "loss": 0.0053, |
| "step": 16750 |
| }, |
| { |
| "epoch": 84.64646464646465, |
| "grad_norm": 0.03742539882659912, |
| "learning_rate": 7.00926246358905e-06, |
| "loss": 0.0031, |
| "step": 16760 |
| }, |
| { |
| "epoch": 84.6969696969697, |
| "grad_norm": 0.06943278759717941, |
| "learning_rate": 6.967107661177191e-06, |
| "loss": 0.0049, |
| "step": 16770 |
| }, |
| { |
| "epoch": 84.74747474747475, |
| "grad_norm": 0.07032317668199539, |
| "learning_rate": 6.925070509049786e-06, |
| "loss": 0.003, |
| "step": 16780 |
| }, |
| { |
| "epoch": 84.79797979797979, |
| "grad_norm": 0.04955672845244408, |
| "learning_rate": 6.883151122134812e-06, |
| "loss": 0.004, |
| "step": 16790 |
| }, |
| { |
| "epoch": 84.84848484848484, |
| "grad_norm": 0.0520283542573452, |
| "learning_rate": 6.8413496150382394e-06, |
| "loss": 0.0054, |
| "step": 16800 |
| }, |
| { |
| "epoch": 84.8989898989899, |
| "grad_norm": 0.048698846250772476, |
| "learning_rate": 6.7996661020438165e-06, |
| "loss": 0.0036, |
| "step": 16810 |
| }, |
| { |
| "epoch": 84.94949494949495, |
| "grad_norm": 0.042392052710056305, |
| "learning_rate": 6.758100697112662e-06, |
| "loss": 0.0055, |
| "step": 16820 |
| }, |
| { |
| "epoch": 85.0, |
| "grad_norm": 0.04165013134479523, |
| "learning_rate": 6.716653513883026e-06, |
| "loss": 0.0036, |
| "step": 16830 |
| }, |
| { |
| "epoch": 85.05050505050505, |
| "grad_norm": 0.06501394510269165, |
| "learning_rate": 6.675324665669913e-06, |
| "loss": 0.0053, |
| "step": 16840 |
| }, |
| { |
| "epoch": 85.1010101010101, |
| "grad_norm": 0.06017445772886276, |
| "learning_rate": 6.634114265464803e-06, |
| "loss": 0.0038, |
| "step": 16850 |
| }, |
| { |
| "epoch": 85.15151515151516, |
| "grad_norm": 0.045219965279102325, |
| "learning_rate": 6.59302242593538e-06, |
| "loss": 0.0053, |
| "step": 16860 |
| }, |
| { |
| "epoch": 85.20202020202021, |
| "grad_norm": 0.05543389171361923, |
| "learning_rate": 6.552049259425141e-06, |
| "loss": 0.0035, |
| "step": 16870 |
| }, |
| { |
| "epoch": 85.25252525252525, |
| "grad_norm": 0.06010183319449425, |
| "learning_rate": 6.511194877953181e-06, |
| "loss": 0.0042, |
| "step": 16880 |
| }, |
| { |
| "epoch": 85.3030303030303, |
| "grad_norm": 0.05636962130665779, |
| "learning_rate": 6.470459393213813e-06, |
| "loss": 0.0049, |
| "step": 16890 |
| }, |
| { |
| "epoch": 85.35353535353535, |
| "grad_norm": 0.04674657806754112, |
| "learning_rate": 6.429842916576279e-06, |
| "loss": 0.0043, |
| "step": 16900 |
| }, |
| { |
| "epoch": 85.4040404040404, |
| "grad_norm": 0.05739723518490791, |
| "learning_rate": 6.389345559084503e-06, |
| "loss": 0.0038, |
| "step": 16910 |
| }, |
| { |
| "epoch": 85.45454545454545, |
| "grad_norm": 0.037810686975717545, |
| "learning_rate": 6.348967431456682e-06, |
| "loss": 0.0051, |
| "step": 16920 |
| }, |
| { |
| "epoch": 85.5050505050505, |
| "grad_norm": 0.04868736118078232, |
| "learning_rate": 6.30870864408511e-06, |
| "loss": 0.0043, |
| "step": 16930 |
| }, |
| { |
| "epoch": 85.55555555555556, |
| "grad_norm": 0.04640132561326027, |
| "learning_rate": 6.268569307035754e-06, |
| "loss": 0.004, |
| "step": 16940 |
| }, |
| { |
| "epoch": 85.60606060606061, |
| "grad_norm": 0.03748789802193642, |
| "learning_rate": 6.228549530048022e-06, |
| "loss": 0.005, |
| "step": 16950 |
| }, |
| { |
| "epoch": 85.65656565656566, |
| "grad_norm": 0.061714909970760345, |
| "learning_rate": 6.1886494225344814e-06, |
| "loss": 0.0042, |
| "step": 16960 |
| }, |
| { |
| "epoch": 85.70707070707071, |
| "grad_norm": 0.03985877335071564, |
| "learning_rate": 6.148869093580479e-06, |
| "loss": 0.0034, |
| "step": 16970 |
| }, |
| { |
| "epoch": 85.75757575757575, |
| "grad_norm": 0.04133319854736328, |
| "learning_rate": 6.109208651943921e-06, |
| "loss": 0.0038, |
| "step": 16980 |
| }, |
| { |
| "epoch": 85.8080808080808, |
| "grad_norm": 0.06771542131900787, |
| "learning_rate": 6.069668206054946e-06, |
| "loss": 0.0044, |
| "step": 16990 |
| }, |
| { |
| "epoch": 85.85858585858585, |
| "grad_norm": 0.04606661573052406, |
| "learning_rate": 6.0302478640156145e-06, |
| "loss": 0.0035, |
| "step": 17000 |
| }, |
| { |
| "epoch": 85.9090909090909, |
| "grad_norm": 0.07141992449760437, |
| "learning_rate": 5.990947733599644e-06, |
| "loss": 0.0043, |
| "step": 17010 |
| }, |
| { |
| "epoch": 85.95959595959596, |
| "grad_norm": 0.03340751677751541, |
| "learning_rate": 5.951767922252105e-06, |
| "loss": 0.0035, |
| "step": 17020 |
| }, |
| { |
| "epoch": 86.01010101010101, |
| "grad_norm": 0.05523008480668068, |
| "learning_rate": 5.912708537089068e-06, |
| "loss": 0.0039, |
| "step": 17030 |
| }, |
| { |
| "epoch": 86.06060606060606, |
| "grad_norm": 0.03810710459947586, |
| "learning_rate": 5.873769684897434e-06, |
| "loss": 0.004, |
| "step": 17040 |
| }, |
| { |
| "epoch": 86.11111111111111, |
| "grad_norm": 0.06234141066670418, |
| "learning_rate": 5.834951472134514e-06, |
| "loss": 0.0048, |
| "step": 17050 |
| }, |
| { |
| "epoch": 86.16161616161617, |
| "grad_norm": 0.03600474074482918, |
| "learning_rate": 5.796254004927832e-06, |
| "loss": 0.0044, |
| "step": 17060 |
| }, |
| { |
| "epoch": 86.21212121212122, |
| "grad_norm": 0.049893636256456375, |
| "learning_rate": 5.757677389074806e-06, |
| "loss": 0.004, |
| "step": 17070 |
| }, |
| { |
| "epoch": 86.26262626262626, |
| "grad_norm": 0.08444990962743759, |
| "learning_rate": 5.719221730042385e-06, |
| "loss": 0.0045, |
| "step": 17080 |
| }, |
| { |
| "epoch": 86.31313131313131, |
| "grad_norm": 0.03251152113080025, |
| "learning_rate": 5.680887132966911e-06, |
| "loss": 0.0034, |
| "step": 17090 |
| }, |
| { |
| "epoch": 86.36363636363636, |
| "grad_norm": 0.04604816436767578, |
| "learning_rate": 5.642673702653683e-06, |
| "loss": 0.0039, |
| "step": 17100 |
| }, |
| { |
| "epoch": 86.41414141414141, |
| "grad_norm": 0.031586963683366776, |
| "learning_rate": 5.604581543576781e-06, |
| "loss": 0.0037, |
| "step": 17110 |
| }, |
| { |
| "epoch": 86.46464646464646, |
| "grad_norm": 0.05464068055152893, |
| "learning_rate": 5.566610759878704e-06, |
| "loss": 0.0042, |
| "step": 17120 |
| }, |
| { |
| "epoch": 86.51515151515152, |
| "grad_norm": 0.07352510094642639, |
| "learning_rate": 5.528761455370119e-06, |
| "loss": 0.0052, |
| "step": 17130 |
| }, |
| { |
| "epoch": 86.56565656565657, |
| "grad_norm": 0.042281001806259155, |
| "learning_rate": 5.491033733529594e-06, |
| "loss": 0.0034, |
| "step": 17140 |
| }, |
| { |
| "epoch": 86.61616161616162, |
| "grad_norm": 0.05766304209828377, |
| "learning_rate": 5.453427697503255e-06, |
| "loss": 0.0044, |
| "step": 17150 |
| }, |
| { |
| "epoch": 86.66666666666667, |
| "grad_norm": 0.05599381402134895, |
| "learning_rate": 5.415943450104599e-06, |
| "loss": 0.0054, |
| "step": 17160 |
| }, |
| { |
| "epoch": 86.71717171717172, |
| "grad_norm": 0.03706614300608635, |
| "learning_rate": 5.378581093814111e-06, |
| "loss": 0.0028, |
| "step": 17170 |
| }, |
| { |
| "epoch": 86.76767676767676, |
| "grad_norm": 0.04778679460287094, |
| "learning_rate": 5.3413407307790375e-06, |
| "loss": 0.0035, |
| "step": 17180 |
| }, |
| { |
| "epoch": 86.81818181818181, |
| "grad_norm": 0.051025185734033585, |
| "learning_rate": 5.30422246281313e-06, |
| "loss": 0.004, |
| "step": 17190 |
| }, |
| { |
| "epoch": 86.86868686868686, |
| "grad_norm": 0.05140963941812515, |
| "learning_rate": 5.267226391396296e-06, |
| "loss": 0.0047, |
| "step": 17200 |
| }, |
| { |
| "epoch": 86.91919191919192, |
| "grad_norm": 0.04682416468858719, |
| "learning_rate": 5.2303526176744e-06, |
| "loss": 0.003, |
| "step": 17210 |
| }, |
| { |
| "epoch": 86.96969696969697, |
| "grad_norm": 0.053372856229543686, |
| "learning_rate": 5.193601242458929e-06, |
| "loss": 0.0047, |
| "step": 17220 |
| }, |
| { |
| "epoch": 87.02020202020202, |
| "grad_norm": 0.049070846289396286, |
| "learning_rate": 5.156972366226714e-06, |
| "loss": 0.0044, |
| "step": 17230 |
| }, |
| { |
| "epoch": 87.07070707070707, |
| "grad_norm": 0.05205696076154709, |
| "learning_rate": 5.120466089119735e-06, |
| "loss": 0.0032, |
| "step": 17240 |
| }, |
| { |
| "epoch": 87.12121212121212, |
| "grad_norm": 0.0382784828543663, |
| "learning_rate": 5.084082510944749e-06, |
| "loss": 0.0039, |
| "step": 17250 |
| }, |
| { |
| "epoch": 87.17171717171718, |
| "grad_norm": 0.0661512240767479, |
| "learning_rate": 5.047821731173058e-06, |
| "loss": 0.004, |
| "step": 17260 |
| }, |
| { |
| "epoch": 87.22222222222223, |
| "grad_norm": 0.03276047110557556, |
| "learning_rate": 5.011683848940274e-06, |
| "loss": 0.004, |
| "step": 17270 |
| }, |
| { |
| "epoch": 87.27272727272727, |
| "grad_norm": 0.03832295164465904, |
| "learning_rate": 4.975668963045954e-06, |
| "loss": 0.006, |
| "step": 17280 |
| }, |
| { |
| "epoch": 87.32323232323232, |
| "grad_norm": 0.044937312602996826, |
| "learning_rate": 4.9397771719534525e-06, |
| "loss": 0.0039, |
| "step": 17290 |
| }, |
| { |
| "epoch": 87.37373737373737, |
| "grad_norm": 0.039545830339193344, |
| "learning_rate": 4.904008573789548e-06, |
| "loss": 0.0044, |
| "step": 17300 |
| }, |
| { |
| "epoch": 87.42424242424242, |
| "grad_norm": 0.03510792553424835, |
| "learning_rate": 4.8683632663442005e-06, |
| "loss": 0.0046, |
| "step": 17310 |
| }, |
| { |
| "epoch": 87.47474747474747, |
| "grad_norm": 0.04066254571080208, |
| "learning_rate": 4.832841347070343e-06, |
| "loss": 0.003, |
| "step": 17320 |
| }, |
| { |
| "epoch": 87.52525252525253, |
| "grad_norm": 0.054364513605833054, |
| "learning_rate": 4.797442913083539e-06, |
| "loss": 0.0044, |
| "step": 17330 |
| }, |
| { |
| "epoch": 87.57575757575758, |
| "grad_norm": 0.0527929849922657, |
| "learning_rate": 4.7621680611617596e-06, |
| "loss": 0.0046, |
| "step": 17340 |
| }, |
| { |
| "epoch": 87.62626262626263, |
| "grad_norm": 0.035776108503341675, |
| "learning_rate": 4.727016887745095e-06, |
| "loss": 0.0037, |
| "step": 17350 |
| }, |
| { |
| "epoch": 87.67676767676768, |
| "grad_norm": 0.055232495069503784, |
| "learning_rate": 4.691989488935511e-06, |
| "loss": 0.0047, |
| "step": 17360 |
| }, |
| { |
| "epoch": 87.72727272727273, |
| "grad_norm": 0.055246442556381226, |
| "learning_rate": 4.657085960496588e-06, |
| "loss": 0.0048, |
| "step": 17370 |
| }, |
| { |
| "epoch": 87.77777777777777, |
| "grad_norm": 0.04053269326686859, |
| "learning_rate": 4.6223063978532265e-06, |
| "loss": 0.0064, |
| "step": 17380 |
| }, |
| { |
| "epoch": 87.82828282828282, |
| "grad_norm": 0.04018593207001686, |
| "learning_rate": 4.587650896091439e-06, |
| "loss": 0.005, |
| "step": 17390 |
| }, |
| { |
| "epoch": 87.87878787878788, |
| "grad_norm": 0.07063507288694382, |
| "learning_rate": 4.553119549958035e-06, |
| "loss": 0.0036, |
| "step": 17400 |
| }, |
| { |
| "epoch": 87.92929292929293, |
| "grad_norm": 0.06845667958259583, |
| "learning_rate": 4.518712453860385e-06, |
| "loss": 0.0047, |
| "step": 17410 |
| }, |
| { |
| "epoch": 87.97979797979798, |
| "grad_norm": 0.05673421919345856, |
| "learning_rate": 4.484429701866205e-06, |
| "loss": 0.0045, |
| "step": 17420 |
| }, |
| { |
| "epoch": 88.03030303030303, |
| "grad_norm": 0.040732745081186295, |
| "learning_rate": 4.4502713877031975e-06, |
| "loss": 0.0038, |
| "step": 17430 |
| }, |
| { |
| "epoch": 88.08080808080808, |
| "grad_norm": 0.052769098430871964, |
| "learning_rate": 4.416237604758911e-06, |
| "loss": 0.0032, |
| "step": 17440 |
| }, |
| { |
| "epoch": 88.13131313131314, |
| "grad_norm": 0.054453298449516296, |
| "learning_rate": 4.3823284460804025e-06, |
| "loss": 0.0042, |
| "step": 17450 |
| }, |
| { |
| "epoch": 88.18181818181819, |
| "grad_norm": 0.10970568656921387, |
| "learning_rate": 4.348544004374011e-06, |
| "loss": 0.0034, |
| "step": 17460 |
| }, |
| { |
| "epoch": 88.23232323232324, |
| "grad_norm": 0.0543520450592041, |
| "learning_rate": 4.314884372005123e-06, |
| "loss": 0.0038, |
| "step": 17470 |
| }, |
| { |
| "epoch": 88.28282828282828, |
| "grad_norm": 0.044143691658973694, |
| "learning_rate": 4.281349640997867e-06, |
| "loss": 0.0047, |
| "step": 17480 |
| }, |
| { |
| "epoch": 88.33333333333333, |
| "grad_norm": 0.0406656451523304, |
| "learning_rate": 4.247939903034942e-06, |
| "loss": 0.0039, |
| "step": 17490 |
| }, |
| { |
| "epoch": 88.38383838383838, |
| "grad_norm": 0.05508514493703842, |
| "learning_rate": 4.214655249457284e-06, |
| "loss": 0.0046, |
| "step": 17500 |
| }, |
| { |
| "epoch": 88.43434343434343, |
| "grad_norm": 0.06542282551527023, |
| "learning_rate": 4.181495771263855e-06, |
| "loss": 0.0029, |
| "step": 17510 |
| }, |
| { |
| "epoch": 88.48484848484848, |
| "grad_norm": 0.05241219326853752, |
| "learning_rate": 4.148461559111427e-06, |
| "loss": 0.005, |
| "step": 17520 |
| }, |
| { |
| "epoch": 88.53535353535354, |
| "grad_norm": 0.04970612749457359, |
| "learning_rate": 4.115552703314252e-06, |
| "loss": 0.0034, |
| "step": 17530 |
| }, |
| { |
| "epoch": 88.58585858585859, |
| "grad_norm": 0.055072735995054245, |
| "learning_rate": 4.082769293843886e-06, |
| "loss": 0.0038, |
| "step": 17540 |
| }, |
| { |
| "epoch": 88.63636363636364, |
| "grad_norm": 0.03332855924963951, |
| "learning_rate": 4.050111420328939e-06, |
| "loss": 0.0045, |
| "step": 17550 |
| }, |
| { |
| "epoch": 88.68686868686869, |
| "grad_norm": 0.042254358530044556, |
| "learning_rate": 4.017579172054764e-06, |
| "loss": 0.0029, |
| "step": 17560 |
| }, |
| { |
| "epoch": 88.73737373737374, |
| "grad_norm": 0.06204305961728096, |
| "learning_rate": 3.985172637963308e-06, |
| "loss": 0.0038, |
| "step": 17570 |
| }, |
| { |
| "epoch": 88.78787878787878, |
| "grad_norm": 0.04189785197377205, |
| "learning_rate": 3.952891906652784e-06, |
| "loss": 0.0054, |
| "step": 17580 |
| }, |
| { |
| "epoch": 88.83838383838383, |
| "grad_norm": 0.03866136819124222, |
| "learning_rate": 3.920737066377478e-06, |
| "loss": 0.0047, |
| "step": 17590 |
| }, |
| { |
| "epoch": 88.88888888888889, |
| "grad_norm": 0.060296591371297836, |
| "learning_rate": 3.888708205047509e-06, |
| "loss": 0.0027, |
| "step": 17600 |
| }, |
| { |
| "epoch": 88.93939393939394, |
| "grad_norm": 0.05010818690061569, |
| "learning_rate": 3.856805410228542e-06, |
| "loss": 0.0038, |
| "step": 17610 |
| }, |
| { |
| "epoch": 88.98989898989899, |
| "grad_norm": 0.033823516219854355, |
| "learning_rate": 3.82502876914162e-06, |
| "loss": 0.0034, |
| "step": 17620 |
| }, |
| { |
| "epoch": 89.04040404040404, |
| "grad_norm": 0.034181877970695496, |
| "learning_rate": 3.7933783686628586e-06, |
| "loss": 0.0034, |
| "step": 17630 |
| }, |
| { |
| "epoch": 89.0909090909091, |
| "grad_norm": 0.05725046247243881, |
| "learning_rate": 3.7618542953232306e-06, |
| "loss": 0.0035, |
| "step": 17640 |
| }, |
| { |
| "epoch": 89.14141414141415, |
| "grad_norm": 0.04720688983798027, |
| "learning_rate": 3.7304566353083658e-06, |
| "loss": 0.0031, |
| "step": 17650 |
| }, |
| { |
| "epoch": 89.1919191919192, |
| "grad_norm": 0.05363806337118149, |
| "learning_rate": 3.6991854744582555e-06, |
| "loss": 0.0038, |
| "step": 17660 |
| }, |
| { |
| "epoch": 89.24242424242425, |
| "grad_norm": 0.04350375011563301, |
| "learning_rate": 3.6680408982670777e-06, |
| "loss": 0.0028, |
| "step": 17670 |
| }, |
| { |
| "epoch": 89.29292929292929, |
| "grad_norm": 0.03218059986829758, |
| "learning_rate": 3.637022991882899e-06, |
| "loss": 0.005, |
| "step": 17680 |
| }, |
| { |
| "epoch": 89.34343434343434, |
| "grad_norm": 0.03996923193335533, |
| "learning_rate": 3.606131840107485e-06, |
| "loss": 0.0038, |
| "step": 17690 |
| }, |
| { |
| "epoch": 89.39393939393939, |
| "grad_norm": 0.03196267783641815, |
| "learning_rate": 3.575367527396084e-06, |
| "loss": 0.0039, |
| "step": 17700 |
| }, |
| { |
| "epoch": 89.44444444444444, |
| "grad_norm": 0.04089348763227463, |
| "learning_rate": 3.5447301378571386e-06, |
| "loss": 0.006, |
| "step": 17710 |
| }, |
| { |
| "epoch": 89.4949494949495, |
| "grad_norm": 0.052284859120845795, |
| "learning_rate": 3.514219755252113e-06, |
| "loss": 0.0038, |
| "step": 17720 |
| }, |
| { |
| "epoch": 89.54545454545455, |
| "grad_norm": 0.03968391567468643, |
| "learning_rate": 3.4838364629952213e-06, |
| "loss": 0.0032, |
| "step": 17730 |
| }, |
| { |
| "epoch": 89.5959595959596, |
| "grad_norm": 0.04206349700689316, |
| "learning_rate": 3.4535803441532123e-06, |
| "loss": 0.0037, |
| "step": 17740 |
| }, |
| { |
| "epoch": 89.64646464646465, |
| "grad_norm": 0.049555979669094086, |
| "learning_rate": 3.4234514814451836e-06, |
| "loss": 0.0056, |
| "step": 17750 |
| }, |
| { |
| "epoch": 89.6969696969697, |
| "grad_norm": 0.051781896501779556, |
| "learning_rate": 3.393449957242273e-06, |
| "loss": 0.0038, |
| "step": 17760 |
| }, |
| { |
| "epoch": 89.74747474747475, |
| "grad_norm": 0.03432776778936386, |
| "learning_rate": 3.363575853567524e-06, |
| "loss": 0.0042, |
| "step": 17770 |
| }, |
| { |
| "epoch": 89.79797979797979, |
| "grad_norm": 0.03770219162106514, |
| "learning_rate": 3.3338292520955826e-06, |
| "loss": 0.0043, |
| "step": 17780 |
| }, |
| { |
| "epoch": 89.84848484848484, |
| "grad_norm": 0.06619658321142197, |
| "learning_rate": 3.304210234152516e-06, |
| "loss": 0.0045, |
| "step": 17790 |
| }, |
| { |
| "epoch": 89.8989898989899, |
| "grad_norm": 0.03524979576468468, |
| "learning_rate": 3.2747188807155993e-06, |
| "loss": 0.006, |
| "step": 17800 |
| }, |
| { |
| "epoch": 89.94949494949495, |
| "grad_norm": 0.027555925771594048, |
| "learning_rate": 3.2453552724130643e-06, |
| "loss": 0.0046, |
| "step": 17810 |
| }, |
| { |
| "epoch": 90.0, |
| "grad_norm": 0.060094453394412994, |
| "learning_rate": 3.216119489523889e-06, |
| "loss": 0.0032, |
| "step": 17820 |
| }, |
| { |
| "epoch": 90.05050505050505, |
| "grad_norm": 0.04735299199819565, |
| "learning_rate": 3.1870116119775917e-06, |
| "loss": 0.004, |
| "step": 17830 |
| }, |
| { |
| "epoch": 90.1010101010101, |
| "grad_norm": 0.02756795473396778, |
| "learning_rate": 3.158031719353999e-06, |
| "loss": 0.0029, |
| "step": 17840 |
| }, |
| { |
| "epoch": 90.15151515151516, |
| "grad_norm": 0.04132930934429169, |
| "learning_rate": 3.1291798908830273e-06, |
| "loss": 0.0036, |
| "step": 17850 |
| }, |
| { |
| "epoch": 90.20202020202021, |
| "grad_norm": 0.03491568565368652, |
| "learning_rate": 3.1004562054444853e-06, |
| "loss": 0.004, |
| "step": 17860 |
| }, |
| { |
| "epoch": 90.25252525252525, |
| "grad_norm": 0.04508006200194359, |
| "learning_rate": 3.071860741567806e-06, |
| "loss": 0.0051, |
| "step": 17870 |
| }, |
| { |
| "epoch": 90.3030303030303, |
| "grad_norm": 0.04414810240268707, |
| "learning_rate": 3.04339357743193e-06, |
| "loss": 0.0036, |
| "step": 17880 |
| }, |
| { |
| "epoch": 90.35353535353535, |
| "grad_norm": 0.04808623343706131, |
| "learning_rate": 3.0150547908649628e-06, |
| "loss": 0.0036, |
| "step": 17890 |
| }, |
| { |
| "epoch": 90.4040404040404, |
| "grad_norm": 0.0406995452940464, |
| "learning_rate": 2.9868444593440957e-06, |
| "loss": 0.0035, |
| "step": 17900 |
| }, |
| { |
| "epoch": 90.45454545454545, |
| "grad_norm": 0.03358862176537514, |
| "learning_rate": 2.9587626599952846e-06, |
| "loss": 0.0044, |
| "step": 17910 |
| }, |
| { |
| "epoch": 90.5050505050505, |
| "grad_norm": 0.04659715294837952, |
| "learning_rate": 2.930809469593082e-06, |
| "loss": 0.005, |
| "step": 17920 |
| }, |
| { |
| "epoch": 90.55555555555556, |
| "grad_norm": 0.07087094336748123, |
| "learning_rate": 2.9029849645604733e-06, |
| "loss": 0.0053, |
| "step": 17930 |
| }, |
| { |
| "epoch": 90.60606060606061, |
| "grad_norm": 0.03274569287896156, |
| "learning_rate": 2.8752892209685632e-06, |
| "loss": 0.004, |
| "step": 17940 |
| }, |
| { |
| "epoch": 90.65656565656566, |
| "grad_norm": 0.04843807592988014, |
| "learning_rate": 2.847722314536483e-06, |
| "loss": 0.0057, |
| "step": 17950 |
| }, |
| { |
| "epoch": 90.70707070707071, |
| "grad_norm": 0.04069873318076134, |
| "learning_rate": 2.820284320631078e-06, |
| "loss": 0.0036, |
| "step": 17960 |
| }, |
| { |
| "epoch": 90.75757575757575, |
| "grad_norm": 0.049800556153059006, |
| "learning_rate": 2.792975314266788e-06, |
| "loss": 0.0035, |
| "step": 17970 |
| }, |
| { |
| "epoch": 90.8080808080808, |
| "grad_norm": 0.020762933418154716, |
| "learning_rate": 2.7657953701054007e-06, |
| "loss": 0.0033, |
| "step": 17980 |
| }, |
| { |
| "epoch": 90.85858585858585, |
| "grad_norm": 0.0408884696662426, |
| "learning_rate": 2.7387445624558306e-06, |
| "loss": 0.0029, |
| "step": 17990 |
| }, |
| { |
| "epoch": 90.9090909090909, |
| "grad_norm": 0.0442095510661602, |
| "learning_rate": 2.7118229652739747e-06, |
| "loss": 0.005, |
| "step": 18000 |
| }, |
| { |
| "epoch": 90.95959595959596, |
| "grad_norm": 0.050296369940042496, |
| "learning_rate": 2.6850306521624236e-06, |
| "loss": 0.004, |
| "step": 18010 |
| }, |
| { |
| "epoch": 91.01010101010101, |
| "grad_norm": 0.019173599779605865, |
| "learning_rate": 2.6583676963703507e-06, |
| "loss": 0.0031, |
| "step": 18020 |
| }, |
| { |
| "epoch": 91.06060606060606, |
| "grad_norm": 0.031140927225351334, |
| "learning_rate": 2.631834170793268e-06, |
| "loss": 0.0051, |
| "step": 18030 |
| }, |
| { |
| "epoch": 91.11111111111111, |
| "grad_norm": 0.03578264266252518, |
| "learning_rate": 2.6054301479728036e-06, |
| "loss": 0.0032, |
| "step": 18040 |
| }, |
| { |
| "epoch": 91.16161616161617, |
| "grad_norm": 0.04770519956946373, |
| "learning_rate": 2.579155700096575e-06, |
| "loss": 0.0044, |
| "step": 18050 |
| }, |
| { |
| "epoch": 91.21212121212122, |
| "grad_norm": 0.042660024017095566, |
| "learning_rate": 2.5530108989978873e-06, |
| "loss": 0.0034, |
| "step": 18060 |
| }, |
| { |
| "epoch": 91.26262626262626, |
| "grad_norm": 0.04649681597948074, |
| "learning_rate": 2.5269958161556416e-06, |
| "loss": 0.0027, |
| "step": 18070 |
| }, |
| { |
| "epoch": 91.31313131313131, |
| "grad_norm": 0.05673650652170181, |
| "learning_rate": 2.5011105226940888e-06, |
| "loss": 0.0035, |
| "step": 18080 |
| }, |
| { |
| "epoch": 91.36363636363636, |
| "grad_norm": 0.03204884007573128, |
| "learning_rate": 2.4753550893826248e-06, |
| "loss": 0.0036, |
| "step": 18090 |
| }, |
| { |
| "epoch": 91.41414141414141, |
| "grad_norm": 0.03134565055370331, |
| "learning_rate": 2.4497295866356296e-06, |
| "loss": 0.0028, |
| "step": 18100 |
| }, |
| { |
| "epoch": 91.46464646464646, |
| "grad_norm": 0.04258838668465614, |
| "learning_rate": 2.424234084512228e-06, |
| "loss": 0.0036, |
| "step": 18110 |
| }, |
| { |
| "epoch": 91.51515151515152, |
| "grad_norm": 0.03241683542728424, |
| "learning_rate": 2.3988686527161687e-06, |
| "loss": 0.005, |
| "step": 18120 |
| }, |
| { |
| "epoch": 91.56565656565657, |
| "grad_norm": 0.03781206160783768, |
| "learning_rate": 2.373633360595573e-06, |
| "loss": 0.0042, |
| "step": 18130 |
| }, |
| { |
| "epoch": 91.61616161616162, |
| "grad_norm": 0.049089618027210236, |
| "learning_rate": 2.3485282771427585e-06, |
| "loss": 0.0049, |
| "step": 18140 |
| }, |
| { |
| "epoch": 91.66666666666667, |
| "grad_norm": 0.04013260826468468, |
| "learning_rate": 2.3235534709940665e-06, |
| "loss": 0.0048, |
| "step": 18150 |
| }, |
| { |
| "epoch": 91.71717171717172, |
| "grad_norm": 0.06884553283452988, |
| "learning_rate": 2.2987090104296617e-06, |
| "loss": 0.0036, |
| "step": 18160 |
| }, |
| { |
| "epoch": 91.76767676767676, |
| "grad_norm": 0.0281562190502882, |
| "learning_rate": 2.273994963373355e-06, |
| "loss": 0.0043, |
| "step": 18170 |
| }, |
| { |
| "epoch": 91.81818181818181, |
| "grad_norm": 0.04102921485900879, |
| "learning_rate": 2.249411397392409e-06, |
| "loss": 0.0032, |
| "step": 18180 |
| }, |
| { |
| "epoch": 91.86868686868686, |
| "grad_norm": 0.047293271869421005, |
| "learning_rate": 2.2249583796973506e-06, |
| "loss": 0.0038, |
| "step": 18190 |
| }, |
| { |
| "epoch": 91.91919191919192, |
| "grad_norm": 0.03195156529545784, |
| "learning_rate": 2.200635977141796e-06, |
| "loss": 0.0041, |
| "step": 18200 |
| }, |
| { |
| "epoch": 91.96969696969697, |
| "grad_norm": 0.06988223642110825, |
| "learning_rate": 2.17644425622226e-06, |
| "loss": 0.004, |
| "step": 18210 |
| }, |
| { |
| "epoch": 92.02020202020202, |
| "grad_norm": 0.03746530041098595, |
| "learning_rate": 2.152383283077991e-06, |
| "loss": 0.0042, |
| "step": 18220 |
| }, |
| { |
| "epoch": 92.07070707070707, |
| "grad_norm": 0.07839635014533997, |
| "learning_rate": 2.128453123490781e-06, |
| "loss": 0.0037, |
| "step": 18230 |
| }, |
| { |
| "epoch": 92.12121212121212, |
| "grad_norm": 0.03648047521710396, |
| "learning_rate": 2.1046538428847462e-06, |
| "loss": 0.0036, |
| "step": 18240 |
| }, |
| { |
| "epoch": 92.17171717171718, |
| "grad_norm": 0.031017137691378593, |
| "learning_rate": 2.0809855063262273e-06, |
| "loss": 0.0048, |
| "step": 18250 |
| }, |
| { |
| "epoch": 92.22222222222223, |
| "grad_norm": 0.02425781637430191, |
| "learning_rate": 2.057448178523558e-06, |
| "loss": 0.0028, |
| "step": 18260 |
| }, |
| { |
| "epoch": 92.27272727272727, |
| "grad_norm": 0.02781876176595688, |
| "learning_rate": 2.034041923826885e-06, |
| "loss": 0.0037, |
| "step": 18270 |
| }, |
| { |
| "epoch": 92.32323232323232, |
| "grad_norm": 0.05620908737182617, |
| "learning_rate": 2.0107668062280204e-06, |
| "loss": 0.0044, |
| "step": 18280 |
| }, |
| { |
| "epoch": 92.37373737373737, |
| "grad_norm": 0.03351902216672897, |
| "learning_rate": 1.9876228893602357e-06, |
| "loss": 0.0039, |
| "step": 18290 |
| }, |
| { |
| "epoch": 92.42424242424242, |
| "grad_norm": 0.07220311462879181, |
| "learning_rate": 1.9646102364981266e-06, |
| "loss": 0.0043, |
| "step": 18300 |
| }, |
| { |
| "epoch": 92.47474747474747, |
| "grad_norm": 0.034759800881147385, |
| "learning_rate": 1.9417289105574053e-06, |
| "loss": 0.0042, |
| "step": 18310 |
| }, |
| { |
| "epoch": 92.52525252525253, |
| "grad_norm": 0.07266608625650406, |
| "learning_rate": 1.9189789740947427e-06, |
| "loss": 0.0042, |
| "step": 18320 |
| }, |
| { |
| "epoch": 92.57575757575758, |
| "grad_norm": 0.03637060150504112, |
| "learning_rate": 1.896360489307597e-06, |
| "loss": 0.0036, |
| "step": 18330 |
| }, |
| { |
| "epoch": 92.62626262626263, |
| "grad_norm": 0.04083196818828583, |
| "learning_rate": 1.8738735180340362e-06, |
| "loss": 0.0039, |
| "step": 18340 |
| }, |
| { |
| "epoch": 92.67676767676768, |
| "grad_norm": 0.024212179705500603, |
| "learning_rate": 1.8515181217525824e-06, |
| "loss": 0.0034, |
| "step": 18350 |
| }, |
| { |
| "epoch": 92.72727272727273, |
| "grad_norm": 0.054545819759368896, |
| "learning_rate": 1.8292943615820457e-06, |
| "loss": 0.005, |
| "step": 18360 |
| }, |
| { |
| "epoch": 92.77777777777777, |
| "grad_norm": 0.025757793337106705, |
| "learning_rate": 1.8072022982813296e-06, |
| "loss": 0.0044, |
| "step": 18370 |
| }, |
| { |
| "epoch": 92.82828282828282, |
| "grad_norm": 0.03443967178463936, |
| "learning_rate": 1.7852419922492925e-06, |
| "loss": 0.0042, |
| "step": 18380 |
| }, |
| { |
| "epoch": 92.87878787878788, |
| "grad_norm": 0.05914749950170517, |
| "learning_rate": 1.763413503524569e-06, |
| "loss": 0.0029, |
| "step": 18390 |
| }, |
| { |
| "epoch": 92.92929292929293, |
| "grad_norm": 0.035037267953157425, |
| "learning_rate": 1.7417168917854165e-06, |
| "loss": 0.004, |
| "step": 18400 |
| }, |
| { |
| "epoch": 92.97979797979798, |
| "grad_norm": 0.037610623985528946, |
| "learning_rate": 1.720152216349552e-06, |
| "loss": 0.0037, |
| "step": 18410 |
| }, |
| { |
| "epoch": 93.03030303030303, |
| "grad_norm": 0.060395658016204834, |
| "learning_rate": 1.6987195361739595e-06, |
| "loss": 0.0029, |
| "step": 18420 |
| }, |
| { |
| "epoch": 93.08080808080808, |
| "grad_norm": 0.05216611921787262, |
| "learning_rate": 1.6774189098547832e-06, |
| "loss": 0.0037, |
| "step": 18430 |
| }, |
| { |
| "epoch": 93.13131313131314, |
| "grad_norm": 0.035125941038131714, |
| "learning_rate": 1.6562503956271069e-06, |
| "loss": 0.0031, |
| "step": 18440 |
| }, |
| { |
| "epoch": 93.18181818181819, |
| "grad_norm": 0.050738029181957245, |
| "learning_rate": 1.6352140513648417e-06, |
| "loss": 0.0049, |
| "step": 18450 |
| }, |
| { |
| "epoch": 93.23232323232324, |
| "grad_norm": 0.038712430745363235, |
| "learning_rate": 1.6143099345805712e-06, |
| "loss": 0.0032, |
| "step": 18460 |
| }, |
| { |
| "epoch": 93.28282828282828, |
| "grad_norm": 0.02295299619436264, |
| "learning_rate": 1.5935381024253293e-06, |
| "loss": 0.0046, |
| "step": 18470 |
| }, |
| { |
| "epoch": 93.33333333333333, |
| "grad_norm": 0.031058920547366142, |
| "learning_rate": 1.572898611688517e-06, |
| "loss": 0.0041, |
| "step": 18480 |
| }, |
| { |
| "epoch": 93.38383838383838, |
| "grad_norm": 0.06300924718379974, |
| "learning_rate": 1.5523915187977133e-06, |
| "loss": 0.0033, |
| "step": 18490 |
| }, |
| { |
| "epoch": 93.43434343434343, |
| "grad_norm": 0.04438111558556557, |
| "learning_rate": 1.532016879818532e-06, |
| "loss": 0.0035, |
| "step": 18500 |
| }, |
| { |
| "epoch": 93.48484848484848, |
| "grad_norm": 0.056030288338661194, |
| "learning_rate": 1.51177475045447e-06, |
| "loss": 0.0032, |
| "step": 18510 |
| }, |
| { |
| "epoch": 93.53535353535354, |
| "grad_norm": 0.0393751822412014, |
| "learning_rate": 1.4916651860467035e-06, |
| "loss": 0.0043, |
| "step": 18520 |
| }, |
| { |
| "epoch": 93.58585858585859, |
| "grad_norm": 0.04453909769654274, |
| "learning_rate": 1.471688241574043e-06, |
| "loss": 0.0044, |
| "step": 18530 |
| }, |
| { |
| "epoch": 93.63636363636364, |
| "grad_norm": 0.024743949994444847, |
| "learning_rate": 1.451843971652672e-06, |
| "loss": 0.003, |
| "step": 18540 |
| }, |
| { |
| "epoch": 93.68686868686869, |
| "grad_norm": 0.04220050573348999, |
| "learning_rate": 1.432132430536076e-06, |
| "loss": 0.0046, |
| "step": 18550 |
| }, |
| { |
| "epoch": 93.73737373737374, |
| "grad_norm": 0.06438975036144257, |
| "learning_rate": 1.412553672114869e-06, |
| "loss": 0.0062, |
| "step": 18560 |
| }, |
| { |
| "epoch": 93.78787878787878, |
| "grad_norm": 0.06011359021067619, |
| "learning_rate": 1.3931077499166056e-06, |
| "loss": 0.0032, |
| "step": 18570 |
| }, |
| { |
| "epoch": 93.83838383838383, |
| "grad_norm": 0.04411096125841141, |
| "learning_rate": 1.3737947171057085e-06, |
| "loss": 0.0032, |
| "step": 18580 |
| }, |
| { |
| "epoch": 93.88888888888889, |
| "grad_norm": 0.03725773096084595, |
| "learning_rate": 1.3546146264832582e-06, |
| "loss": 0.0042, |
| "step": 18590 |
| }, |
| { |
| "epoch": 93.93939393939394, |
| "grad_norm": 0.06034037843346596, |
| "learning_rate": 1.3355675304869086e-06, |
| "loss": 0.0024, |
| "step": 18600 |
| }, |
| { |
| "epoch": 93.98989898989899, |
| "grad_norm": 0.03476404771208763, |
| "learning_rate": 1.3166534811906827e-06, |
| "loss": 0.0031, |
| "step": 18610 |
| }, |
| { |
| "epoch": 94.04040404040404, |
| "grad_norm": 0.04701095446944237, |
| "learning_rate": 1.2978725303048666e-06, |
| "loss": 0.0038, |
| "step": 18620 |
| }, |
| { |
| "epoch": 94.0909090909091, |
| "grad_norm": 0.03897915035486221, |
| "learning_rate": 1.2792247291758762e-06, |
| "loss": 0.005, |
| "step": 18630 |
| }, |
| { |
| "epoch": 94.14141414141415, |
| "grad_norm": 0.03873452544212341, |
| "learning_rate": 1.2607101287860635e-06, |
| "loss": 0.0035, |
| "step": 18640 |
| }, |
| { |
| "epoch": 94.1919191919192, |
| "grad_norm": 0.04186253622174263, |
| "learning_rate": 1.2423287797536654e-06, |
| "loss": 0.0037, |
| "step": 18650 |
| }, |
| { |
| "epoch": 94.24242424242425, |
| "grad_norm": 0.03364429250359535, |
| "learning_rate": 1.2240807323325776e-06, |
| "loss": 0.0038, |
| "step": 18660 |
| }, |
| { |
| "epoch": 94.29292929292929, |
| "grad_norm": 0.046862516552209854, |
| "learning_rate": 1.205966036412254e-06, |
| "loss": 0.0031, |
| "step": 18670 |
| }, |
| { |
| "epoch": 94.34343434343434, |
| "grad_norm": 0.053212303668260574, |
| "learning_rate": 1.1879847415175949e-06, |
| "loss": 0.0037, |
| "step": 18680 |
| }, |
| { |
| "epoch": 94.39393939393939, |
| "grad_norm": 0.05949003994464874, |
| "learning_rate": 1.1701368968087712e-06, |
| "loss": 0.005, |
| "step": 18690 |
| }, |
| { |
| "epoch": 94.44444444444444, |
| "grad_norm": 0.03779060021042824, |
| "learning_rate": 1.1524225510811116e-06, |
| "loss": 0.005, |
| "step": 18700 |
| }, |
| { |
| "epoch": 94.4949494949495, |
| "grad_norm": 0.04807324707508087, |
| "learning_rate": 1.1348417527649535e-06, |
| "loss": 0.0042, |
| "step": 18710 |
| }, |
| { |
| "epoch": 94.54545454545455, |
| "grad_norm": 0.05160413309931755, |
| "learning_rate": 1.1173945499255268e-06, |
| "loss": 0.0038, |
| "step": 18720 |
| }, |
| { |
| "epoch": 94.5959595959596, |
| "grad_norm": 0.050292376428842545, |
| "learning_rate": 1.1000809902628307e-06, |
| "loss": 0.0044, |
| "step": 18730 |
| }, |
| { |
| "epoch": 94.64646464646465, |
| "grad_norm": 0.03146674484014511, |
| "learning_rate": 1.082901121111468e-06, |
| "loss": 0.0057, |
| "step": 18740 |
| }, |
| { |
| "epoch": 94.6969696969697, |
| "grad_norm": 0.049421388655900955, |
| "learning_rate": 1.0658549894405456e-06, |
| "loss": 0.004, |
| "step": 18750 |
| }, |
| { |
| "epoch": 94.74747474747475, |
| "grad_norm": 0.04492296278476715, |
| "learning_rate": 1.0489426418535342e-06, |
| "loss": 0.0041, |
| "step": 18760 |
| }, |
| { |
| "epoch": 94.79797979797979, |
| "grad_norm": 0.04480140283703804, |
| "learning_rate": 1.0321641245881474e-06, |
| "loss": 0.0044, |
| "step": 18770 |
| }, |
| { |
| "epoch": 94.84848484848484, |
| "grad_norm": 0.041882362216711044, |
| "learning_rate": 1.015519483516214e-06, |
| "loss": 0.0039, |
| "step": 18780 |
| }, |
| { |
| "epoch": 94.8989898989899, |
| "grad_norm": 0.07387326657772064, |
| "learning_rate": 9.990087641435443e-07, |
| "loss": 0.0025, |
| "step": 18790 |
| }, |
| { |
| "epoch": 94.94949494949495, |
| "grad_norm": 0.05954978987574577, |
| "learning_rate": 9.826320116098132e-07, |
| "loss": 0.0038, |
| "step": 18800 |
| }, |
| { |
| "epoch": 95.0, |
| "grad_norm": 0.04798886179924011, |
| "learning_rate": 9.663892706884447e-07, |
| "loss": 0.0048, |
| "step": 18810 |
| }, |
| { |
| "epoch": 95.05050505050505, |
| "grad_norm": 0.03563932329416275, |
| "learning_rate": 9.502805857864616e-07, |
| "loss": 0.0043, |
| "step": 18820 |
| }, |
| { |
| "epoch": 95.1010101010101, |
| "grad_norm": 0.028271649032831192, |
| "learning_rate": 9.34306000944396e-07, |
| "loss": 0.0053, |
| "step": 18830 |
| }, |
| { |
| "epoch": 95.15151515151516, |
| "grad_norm": 0.02527770586311817, |
| "learning_rate": 9.184655598361624e-07, |
| "loss": 0.0039, |
| "step": 18840 |
| }, |
| { |
| "epoch": 95.20202020202021, |
| "grad_norm": 0.04958255961537361, |
| "learning_rate": 9.027593057689076e-07, |
| "loss": 0.0028, |
| "step": 18850 |
| }, |
| { |
| "epoch": 95.25252525252525, |
| "grad_norm": 0.03436202555894852, |
| "learning_rate": 8.871872816829441e-07, |
| "loss": 0.0034, |
| "step": 18860 |
| }, |
| { |
| "epoch": 95.3030303030303, |
| "grad_norm": 0.03816872462630272, |
| "learning_rate": 8.717495301515777e-07, |
| "loss": 0.0058, |
| "step": 18870 |
| }, |
| { |
| "epoch": 95.35353535353535, |
| "grad_norm": 0.03201194107532501, |
| "learning_rate": 8.564460933810415e-07, |
| "loss": 0.0035, |
| "step": 18880 |
| }, |
| { |
| "epoch": 95.4040404040404, |
| "grad_norm": 0.029647523537278175, |
| "learning_rate": 8.412770132103453e-07, |
| "loss": 0.0031, |
| "step": 18890 |
| }, |
| { |
| "epoch": 95.45454545454545, |
| "grad_norm": 0.02744038961827755, |
| "learning_rate": 8.262423311111711e-07, |
| "loss": 0.003, |
| "step": 18900 |
| }, |
| { |
| "epoch": 95.5050505050505, |
| "grad_norm": 0.049508027732372284, |
| "learning_rate": 8.113420881877665e-07, |
| "loss": 0.0035, |
| "step": 18910 |
| }, |
| { |
| "epoch": 95.55555555555556, |
| "grad_norm": 0.03543351590633392, |
| "learning_rate": 7.965763251768288e-07, |
| "loss": 0.0033, |
| "step": 18920 |
| }, |
| { |
| "epoch": 95.60606060606061, |
| "grad_norm": 0.02505590207874775, |
| "learning_rate": 7.819450824473995e-07, |
| "loss": 0.0035, |
| "step": 18930 |
| }, |
| { |
| "epoch": 95.65656565656566, |
| "grad_norm": 0.026979682967066765, |
| "learning_rate": 7.674484000007198e-07, |
| "loss": 0.0033, |
| "step": 18940 |
| }, |
| { |
| "epoch": 95.70707070707071, |
| "grad_norm": 0.0367315448820591, |
| "learning_rate": 7.530863174701752e-07, |
| "loss": 0.0031, |
| "step": 18950 |
| }, |
| { |
| "epoch": 95.75757575757575, |
| "grad_norm": 0.07017456740140915, |
| "learning_rate": 7.38858874121151e-07, |
| "loss": 0.0032, |
| "step": 18960 |
| }, |
| { |
| "epoch": 95.8080808080808, |
| "grad_norm": 0.04139334708452225, |
| "learning_rate": 7.247661088509328e-07, |
| "loss": 0.0035, |
| "step": 18970 |
| }, |
| { |
| "epoch": 95.85858585858585, |
| "grad_norm": 0.04143497720360756, |
| "learning_rate": 7.108080601886002e-07, |
| "loss": 0.0039, |
| "step": 18980 |
| }, |
| { |
| "epoch": 95.9090909090909, |
| "grad_norm": 0.045695606619119644, |
| "learning_rate": 6.969847662949336e-07, |
| "loss": 0.0037, |
| "step": 18990 |
| }, |
| { |
| "epoch": 95.95959595959596, |
| "grad_norm": 0.05012314021587372, |
| "learning_rate": 6.832962649622798e-07, |
| "loss": 0.0044, |
| "step": 19000 |
| }, |
| { |
| "epoch": 96.01010101010101, |
| "grad_norm": 0.04472542926669121, |
| "learning_rate": 6.697425936144863e-07, |
| "loss": 0.0042, |
| "step": 19010 |
| }, |
| { |
| "epoch": 96.06060606060606, |
| "grad_norm": 0.039515864104032516, |
| "learning_rate": 6.563237893067731e-07, |
| "loss": 0.0032, |
| "step": 19020 |
| }, |
| { |
| "epoch": 96.11111111111111, |
| "grad_norm": 0.05459969490766525, |
| "learning_rate": 6.430398887256328e-07, |
| "loss": 0.0042, |
| "step": 19030 |
| }, |
| { |
| "epoch": 96.16161616161617, |
| "grad_norm": 0.03233601152896881, |
| "learning_rate": 6.298909281887478e-07, |
| "loss": 0.0041, |
| "step": 19040 |
| }, |
| { |
| "epoch": 96.21212121212122, |
| "grad_norm": 0.0407043993473053, |
| "learning_rate": 6.168769436448673e-07, |
| "loss": 0.0037, |
| "step": 19050 |
| }, |
| { |
| "epoch": 96.26262626262626, |
| "grad_norm": 0.017194926738739014, |
| "learning_rate": 6.03997970673742e-07, |
| "loss": 0.0044, |
| "step": 19060 |
| }, |
| { |
| "epoch": 96.31313131313131, |
| "grad_norm": 0.04515859857201576, |
| "learning_rate": 5.912540444859782e-07, |
| "loss": 0.0036, |
| "step": 19070 |
| }, |
| { |
| "epoch": 96.36363636363636, |
| "grad_norm": 0.050143543630838394, |
| "learning_rate": 5.786451999229837e-07, |
| "loss": 0.0039, |
| "step": 19080 |
| }, |
| { |
| "epoch": 96.41414141414141, |
| "grad_norm": 0.03943491727113724, |
| "learning_rate": 5.661714714568722e-07, |
| "loss": 0.0027, |
| "step": 19090 |
| }, |
| { |
| "epoch": 96.46464646464646, |
| "grad_norm": 0.043478913605213165, |
| "learning_rate": 5.538328931903259e-07, |
| "loss": 0.004, |
| "step": 19100 |
| }, |
| { |
| "epoch": 96.51515151515152, |
| "grad_norm": 0.07941284775733948, |
| "learning_rate": 5.416294988565551e-07, |
| "loss": 0.0046, |
| "step": 19110 |
| }, |
| { |
| "epoch": 96.56565656565657, |
| "grad_norm": 0.02984599582850933, |
| "learning_rate": 5.29561321819172e-07, |
| "loss": 0.0039, |
| "step": 19120 |
| }, |
| { |
| "epoch": 96.61616161616162, |
| "grad_norm": 0.0289442241191864, |
| "learning_rate": 5.176283950721061e-07, |
| "loss": 0.0034, |
| "step": 19130 |
| }, |
| { |
| "epoch": 96.66666666666667, |
| "grad_norm": 0.04183840751647949, |
| "learning_rate": 5.058307512395332e-07, |
| "loss": 0.0035, |
| "step": 19140 |
| }, |
| { |
| "epoch": 96.71717171717172, |
| "grad_norm": 0.051290154457092285, |
| "learning_rate": 4.941684225757526e-07, |
| "loss": 0.0029, |
| "step": 19150 |
| }, |
| { |
| "epoch": 96.76767676767676, |
| "grad_norm": 0.03290151432156563, |
| "learning_rate": 4.826414409651314e-07, |
| "loss": 0.0038, |
| "step": 19160 |
| }, |
| { |
| "epoch": 96.81818181818181, |
| "grad_norm": 0.021863160654902458, |
| "learning_rate": 4.712498379219943e-07, |
| "loss": 0.0042, |
| "step": 19170 |
| }, |
| { |
| "epoch": 96.86868686868686, |
| "grad_norm": 0.06561406701803207, |
| "learning_rate": 4.599936445905506e-07, |
| "loss": 0.0027, |
| "step": 19180 |
| }, |
| { |
| "epoch": 96.91919191919192, |
| "grad_norm": 0.04439042508602142, |
| "learning_rate": 4.4887289174480594e-07, |
| "loss": 0.0034, |
| "step": 19190 |
| }, |
| { |
| "epoch": 96.96969696969697, |
| "grad_norm": 0.05049925297498703, |
| "learning_rate": 4.378876097884621e-07, |
| "loss": 0.0036, |
| "step": 19200 |
| }, |
| { |
| "epoch": 97.02020202020202, |
| "grad_norm": 0.03010014072060585, |
| "learning_rate": 4.2703782875487264e-07, |
| "loss": 0.0039, |
| "step": 19210 |
| }, |
| { |
| "epoch": 97.07070707070707, |
| "grad_norm": 0.04316074773669243, |
| "learning_rate": 4.163235783069208e-07, |
| "loss": 0.0039, |
| "step": 19220 |
| }, |
| { |
| "epoch": 97.12121212121212, |
| "grad_norm": 0.04912576079368591, |
| "learning_rate": 4.057448877369585e-07, |
| "loss": 0.0034, |
| "step": 19230 |
| }, |
| { |
| "epoch": 97.17171717171718, |
| "grad_norm": 0.02317500114440918, |
| "learning_rate": 3.9530178596672295e-07, |
| "loss": 0.0044, |
| "step": 19240 |
| }, |
| { |
| "epoch": 97.22222222222223, |
| "grad_norm": 0.03432612121105194, |
| "learning_rate": 3.849943015472479e-07, |
| "loss": 0.005, |
| "step": 19250 |
| }, |
| { |
| "epoch": 97.27272727272727, |
| "grad_norm": 0.028366610407829285, |
| "learning_rate": 3.748224626588137e-07, |
| "loss": 0.003, |
| "step": 19260 |
| }, |
| { |
| "epoch": 97.32323232323232, |
| "grad_norm": 0.04143473133444786, |
| "learning_rate": 3.647862971108307e-07, |
| "loss": 0.0039, |
| "step": 19270 |
| }, |
| { |
| "epoch": 97.37373737373737, |
| "grad_norm": 0.07116958498954773, |
| "learning_rate": 3.5488583234179473e-07, |
| "loss": 0.0037, |
| "step": 19280 |
| }, |
| { |
| "epoch": 97.42424242424242, |
| "grad_norm": 0.029003122821450233, |
| "learning_rate": 3.4512109541920413e-07, |
| "loss": 0.0033, |
| "step": 19290 |
| }, |
| { |
| "epoch": 97.47474747474747, |
| "grad_norm": 0.02539253793656826, |
| "learning_rate": 3.354921130394706e-07, |
| "loss": 0.0044, |
| "step": 19300 |
| }, |
| { |
| "epoch": 97.52525252525253, |
| "grad_norm": 0.04416859522461891, |
| "learning_rate": 3.259989115278639e-07, |
| "loss": 0.0035, |
| "step": 19310 |
| }, |
| { |
| "epoch": 97.57575757575758, |
| "grad_norm": 0.016116200014948845, |
| "learning_rate": 3.1664151683843403e-07, |
| "loss": 0.0033, |
| "step": 19320 |
| }, |
| { |
| "epoch": 97.62626262626263, |
| "grad_norm": 0.024798711761832237, |
| "learning_rate": 3.074199545539447e-07, |
| "loss": 0.0034, |
| "step": 19330 |
| }, |
| { |
| "epoch": 97.67676767676768, |
| "grad_norm": 0.036049872636795044, |
| "learning_rate": 2.983342498857955e-07, |
| "loss": 0.0037, |
| "step": 19340 |
| }, |
| { |
| "epoch": 97.72727272727273, |
| "grad_norm": 0.03611943870782852, |
| "learning_rate": 2.893844276739499e-07, |
| "loss": 0.0056, |
| "step": 19350 |
| }, |
| { |
| "epoch": 97.77777777777777, |
| "grad_norm": 0.030951781198382378, |
| "learning_rate": 2.8057051238688514e-07, |
| "loss": 0.0028, |
| "step": 19360 |
| }, |
| { |
| "epoch": 97.82828282828282, |
| "grad_norm": 0.044782210141420364, |
| "learning_rate": 2.71892528121509e-07, |
| "loss": 0.0034, |
| "step": 19370 |
| }, |
| { |
| "epoch": 97.87878787878788, |
| "grad_norm": 0.04859697073698044, |
| "learning_rate": 2.633504986030988e-07, |
| "loss": 0.0064, |
| "step": 19380 |
| }, |
| { |
| "epoch": 97.92929292929293, |
| "grad_norm": 0.02026473544538021, |
| "learning_rate": 2.549444471852347e-07, |
| "loss": 0.0041, |
| "step": 19390 |
| }, |
| { |
| "epoch": 97.97979797979798, |
| "grad_norm": 0.0452253557741642, |
| "learning_rate": 2.4667439684974423e-07, |
| "loss": 0.0036, |
| "step": 19400 |
| }, |
| { |
| "epoch": 98.03030303030303, |
| "grad_norm": 0.028337610885500908, |
| "learning_rate": 2.3854037020662467e-07, |
| "loss": 0.0031, |
| "step": 19410 |
| }, |
| { |
| "epoch": 98.08080808080808, |
| "grad_norm": 0.0540512390434742, |
| "learning_rate": 2.3054238949399288e-07, |
| "loss": 0.0038, |
| "step": 19420 |
| }, |
| { |
| "epoch": 98.13131313131314, |
| "grad_norm": 0.0500851608812809, |
| "learning_rate": 2.2268047657802993e-07, |
| "loss": 0.0049, |
| "step": 19430 |
| }, |
| { |
| "epoch": 98.18181818181819, |
| "grad_norm": 0.07143998891115189, |
| "learning_rate": 2.149546529529034e-07, |
| "loss": 0.0036, |
| "step": 19440 |
| }, |
| { |
| "epoch": 98.23232323232324, |
| "grad_norm": 0.03580980375409126, |
| "learning_rate": 2.0736493974071736e-07, |
| "loss": 0.004, |
| "step": 19450 |
| }, |
| { |
| "epoch": 98.28282828282828, |
| "grad_norm": 0.03180670365691185, |
| "learning_rate": 1.9991135769145686e-07, |
| "loss": 0.0037, |
| "step": 19460 |
| }, |
| { |
| "epoch": 98.33333333333333, |
| "grad_norm": 0.04010605439543724, |
| "learning_rate": 1.9259392718293245e-07, |
| "loss": 0.0038, |
| "step": 19470 |
| }, |
| { |
| "epoch": 98.38383838383838, |
| "grad_norm": 0.0340183861553669, |
| "learning_rate": 1.8541266822072467e-07, |
| "loss": 0.0034, |
| "step": 19480 |
| }, |
| { |
| "epoch": 98.43434343434343, |
| "grad_norm": 0.05510501563549042, |
| "learning_rate": 1.7836760043811184e-07, |
| "loss": 0.0043, |
| "step": 19490 |
| }, |
| { |
| "epoch": 98.48484848484848, |
| "grad_norm": 0.049830302596092224, |
| "learning_rate": 1.7145874309604792e-07, |
| "loss": 0.0051, |
| "step": 19500 |
| }, |
| { |
| "epoch": 98.53535353535354, |
| "grad_norm": 0.03263877332210541, |
| "learning_rate": 1.6468611508308474e-07, |
| "loss": 0.0036, |
| "step": 19510 |
| }, |
| { |
| "epoch": 98.58585858585859, |
| "grad_norm": 0.025438496842980385, |
| "learning_rate": 1.5804973491532204e-07, |
| "loss": 0.004, |
| "step": 19520 |
| }, |
| { |
| "epoch": 98.63636363636364, |
| "grad_norm": 0.03540460765361786, |
| "learning_rate": 1.5154962073637424e-07, |
| "loss": 0.0044, |
| "step": 19530 |
| }, |
| { |
| "epoch": 98.68686868686869, |
| "grad_norm": 0.042209696024656296, |
| "learning_rate": 1.4518579031730372e-07, |
| "loss": 0.0039, |
| "step": 19540 |
| }, |
| { |
| "epoch": 98.73737373737374, |
| "grad_norm": 0.04924866929650307, |
| "learning_rate": 1.389582610565876e-07, |
| "loss": 0.004, |
| "step": 19550 |
| }, |
| { |
| "epoch": 98.78787878787878, |
| "grad_norm": 0.04095659777522087, |
| "learning_rate": 1.3286704998003995e-07, |
| "loss": 0.005, |
| "step": 19560 |
| }, |
| { |
| "epoch": 98.83838383838383, |
| "grad_norm": 0.029495500028133392, |
| "learning_rate": 1.2691217374080632e-07, |
| "loss": 0.0034, |
| "step": 19570 |
| }, |
| { |
| "epoch": 98.88888888888889, |
| "grad_norm": 0.049884043633937836, |
| "learning_rate": 1.2109364861929705e-07, |
| "loss": 0.0037, |
| "step": 19580 |
| }, |
| { |
| "epoch": 98.93939393939394, |
| "grad_norm": 0.02595648355782032, |
| "learning_rate": 1.1541149052312628e-07, |
| "loss": 0.0039, |
| "step": 19590 |
| }, |
| { |
| "epoch": 98.98989898989899, |
| "grad_norm": 0.10371682792901993, |
| "learning_rate": 1.0986571498710074e-07, |
| "loss": 0.0048, |
| "step": 19600 |
| }, |
| { |
| "epoch": 99.04040404040404, |
| "grad_norm": 0.033988844603300095, |
| "learning_rate": 1.0445633717316438e-07, |
| "loss": 0.0033, |
| "step": 19610 |
| }, |
| { |
| "epoch": 99.0909090909091, |
| "grad_norm": 0.04691971838474274, |
| "learning_rate": 9.918337187034277e-08, |
| "loss": 0.0044, |
| "step": 19620 |
| }, |
| { |
| "epoch": 99.14141414141415, |
| "grad_norm": 0.024039743468165398, |
| "learning_rate": 9.404683349472643e-08, |
| "loss": 0.0047, |
| "step": 19630 |
| }, |
| { |
| "epoch": 99.1919191919192, |
| "grad_norm": 0.03623565286397934, |
| "learning_rate": 8.904673608940983e-08, |
| "loss": 0.0047, |
| "step": 19640 |
| }, |
| { |
| "epoch": 99.24242424242425, |
| "grad_norm": 0.031936511397361755, |
| "learning_rate": 8.418309332447471e-08, |
| "loss": 0.0043, |
| "step": 19650 |
| }, |
| { |
| "epoch": 99.29292929292929, |
| "grad_norm": 0.06535791605710983, |
| "learning_rate": 7.945591849692902e-08, |
| "loss": 0.005, |
| "step": 19660 |
| }, |
| { |
| "epoch": 99.34343434343434, |
| "grad_norm": 0.03242163732647896, |
| "learning_rate": 7.486522453069578e-08, |
| "loss": 0.003, |
| "step": 19670 |
| }, |
| { |
| "epoch": 99.39393939393939, |
| "grad_norm": 0.057140789926052094, |
| "learning_rate": 7.041102397655208e-08, |
| "loss": 0.0045, |
| "step": 19680 |
| }, |
| { |
| "epoch": 99.44444444444444, |
| "grad_norm": 0.053853847086429596, |
| "learning_rate": 6.609332901210685e-08, |
| "loss": 0.0042, |
| "step": 19690 |
| }, |
| { |
| "epoch": 99.4949494949495, |
| "grad_norm": 0.02903430350124836, |
| "learning_rate": 6.191215144178419e-08, |
| "loss": 0.0034, |
| "step": 19700 |
| }, |
| { |
| "epoch": 99.54545454545455, |
| "grad_norm": 0.052556391805410385, |
| "learning_rate": 5.786750269675678e-08, |
| "loss": 0.0034, |
| "step": 19710 |
| }, |
| { |
| "epoch": 99.5959595959596, |
| "grad_norm": 0.037251293659210205, |
| "learning_rate": 5.395939383494031e-08, |
| "loss": 0.0033, |
| "step": 19720 |
| }, |
| { |
| "epoch": 99.64646464646465, |
| "grad_norm": 0.04870278388261795, |
| "learning_rate": 5.018783554095463e-08, |
| "loss": 0.0043, |
| "step": 19730 |
| }, |
| { |
| "epoch": 99.6969696969697, |
| "grad_norm": 0.03333054855465889, |
| "learning_rate": 4.655283812610156e-08, |
| "loss": 0.0038, |
| "step": 19740 |
| }, |
| { |
| "epoch": 99.74747474747475, |
| "grad_norm": 0.050699446350336075, |
| "learning_rate": 4.305441152831491e-08, |
| "loss": 0.0049, |
| "step": 19750 |
| }, |
| { |
| "epoch": 99.79797979797979, |
| "grad_norm": 0.034104641526937485, |
| "learning_rate": 3.9692565312171584e-08, |
| "loss": 0.0036, |
| "step": 19760 |
| }, |
| { |
| "epoch": 99.84848484848484, |
| "grad_norm": 0.031108910217881203, |
| "learning_rate": 3.6467308668824975e-08, |
| "loss": 0.0031, |
| "step": 19770 |
| }, |
| { |
| "epoch": 99.8989898989899, |
| "grad_norm": 0.05221113562583923, |
| "learning_rate": 3.3378650416004964e-08, |
| "loss": 0.0052, |
| "step": 19780 |
| }, |
| { |
| "epoch": 99.94949494949495, |
| "grad_norm": 0.03935597464442253, |
| "learning_rate": 3.042659899797906e-08, |
| "loss": 0.0038, |
| "step": 19790 |
| }, |
| { |
| "epoch": 100.0, |
| "grad_norm": 0.04355981573462486, |
| "learning_rate": 2.76111624855524e-08, |
| "loss": 0.0036, |
| "step": 19800 |
| }, |
| { |
| "epoch": 100.05050505050505, |
| "grad_norm": 0.023957056924700737, |
| "learning_rate": 2.4932348576017784e-08, |
| "loss": 0.0055, |
| "step": 19810 |
| }, |
| { |
| "epoch": 100.1010101010101, |
| "grad_norm": 0.02798941545188427, |
| "learning_rate": 2.239016459314458e-08, |
| "loss": 0.0025, |
| "step": 19820 |
| }, |
| { |
| "epoch": 100.15151515151516, |
| "grad_norm": 0.052029967308044434, |
| "learning_rate": 1.9984617487173174e-08, |
| "loss": 0.0052, |
| "step": 19830 |
| }, |
| { |
| "epoch": 100.20202020202021, |
| "grad_norm": 0.047520752996206284, |
| "learning_rate": 1.7715713834776105e-08, |
| "loss": 0.0032, |
| "step": 19840 |
| }, |
| { |
| "epoch": 100.25252525252525, |
| "grad_norm": 0.039928119629621506, |
| "learning_rate": 1.5583459839046964e-08, |
| "loss": 0.0044, |
| "step": 19850 |
| }, |
| { |
| "epoch": 100.3030303030303, |
| "grad_norm": 0.05859793722629547, |
| "learning_rate": 1.3587861329489304e-08, |
| "loss": 0.003, |
| "step": 19860 |
| }, |
| { |
| "epoch": 100.35353535353535, |
| "grad_norm": 0.05113937705755234, |
| "learning_rate": 1.1728923761994415e-08, |
| "loss": 0.0037, |
| "step": 19870 |
| }, |
| { |
| "epoch": 100.4040404040404, |
| "grad_norm": 0.03595266863703728, |
| "learning_rate": 1.0006652218819135e-08, |
| "loss": 0.0035, |
| "step": 19880 |
| }, |
| { |
| "epoch": 100.45454545454545, |
| "grad_norm": 0.04748938977718353, |
| "learning_rate": 8.421051408596947e-09, |
| "loss": 0.0025, |
| "step": 19890 |
| }, |
| { |
| "epoch": 100.5050505050505, |
| "grad_norm": 0.027514083310961723, |
| "learning_rate": 6.972125666299123e-09, |
| "loss": 0.0036, |
| "step": 19900 |
| }, |
| { |
| "epoch": 100.55555555555556, |
| "grad_norm": 0.05337878316640854, |
| "learning_rate": 5.659878953229169e-09, |
| "loss": 0.0025, |
| "step": 19910 |
| }, |
| { |
| "epoch": 100.60606060606061, |
| "grad_norm": 0.03915149345993996, |
| "learning_rate": 4.48431485701728e-09, |
| "loss": 0.0037, |
| "step": 19920 |
| }, |
| { |
| "epoch": 100.65656565656566, |
| "grad_norm": 0.04100440442562103, |
| "learning_rate": 3.4454365916203322e-09, |
| "loss": 0.0034, |
| "step": 19930 |
| }, |
| { |
| "epoch": 100.70707070707071, |
| "grad_norm": 0.02787337265908718, |
| "learning_rate": 2.5432469972830332e-09, |
| "loss": 0.0042, |
| "step": 19940 |
| }, |
| { |
| "epoch": 100.75757575757575, |
| "grad_norm": 0.04069006070494652, |
| "learning_rate": 1.7777485405601203e-09, |
| "loss": 0.0038, |
| "step": 19950 |
| }, |
| { |
| "epoch": 100.8080808080808, |
| "grad_norm": 0.03178541734814644, |
| "learning_rate": 1.1489433142941597e-09, |
| "loss": 0.0037, |
| "step": 19960 |
| }, |
| { |
| "epoch": 100.85858585858585, |
| "grad_norm": 0.013675125315785408, |
| "learning_rate": 6.568330376210963e-10, |
| "loss": 0.0037, |
| "step": 19970 |
| }, |
| { |
| "epoch": 100.9090909090909, |
| "grad_norm": 0.04246926307678223, |
| "learning_rate": 3.0141905594249787e-10, |
| "loss": 0.0029, |
| "step": 19980 |
| }, |
| { |
| "epoch": 100.95959595959596, |
| "grad_norm": 0.03863817825913429, |
| "learning_rate": 8.270234094776008e-11, |
| "loss": 0.005, |
| "step": 19990 |
| }, |
| { |
| "epoch": 101.01010101010101, |
| "grad_norm": 0.033880241215229034, |
| "learning_rate": 6.834906085551041e-13, |
| "loss": 0.0044, |
| "step": 20000 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 20000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 102, |
| "save_steps": 5000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 60, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|