| { | |
| "best_metric": 0.5851995594482614, | |
| "best_model_checkpoint": "vit-base-patch16-224-vit-base-patch16\\checkpoint-16086", | |
| "epoch": 2.9995804391403667, | |
| "eval_steps": 500, | |
| "global_step": 16086, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 3.107520198881293e-07, | |
| "loss": 7.9302, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 6.215040397762586e-07, | |
| "loss": 7.9301, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 9.32256059664388e-07, | |
| "loss": 7.93, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.2430080795525172e-06, | |
| "loss": 7.9296, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.5537600994406465e-06, | |
| "loss": 7.9293, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.864512119328776e-06, | |
| "loss": 7.9286, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 2.175264139216905e-06, | |
| "loss": 7.9281, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 2.4860161591050345e-06, | |
| "loss": 7.9278, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 2.7967681789931635e-06, | |
| "loss": 7.926, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 3.107520198881293e-06, | |
| "loss": 7.9253, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 3.418272218769422e-06, | |
| "loss": 7.9242, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 3.729024238657552e-06, | |
| "loss": 7.9225, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.0397762585456806e-06, | |
| "loss": 7.9207, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.35052827843381e-06, | |
| "loss": 7.9191, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.6612802983219395e-06, | |
| "loss": 7.917, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.972032318210069e-06, | |
| "loss": 7.9149, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 5.282784338098198e-06, | |
| "loss": 7.9116, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 5.593536357986327e-06, | |
| "loss": 7.9083, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 5.9042883778744565e-06, | |
| "loss": 7.9065, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 6.215040397762586e-06, | |
| "loss": 7.9019, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 6.5257924176507155e-06, | |
| "loss": 7.8957, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 6.836544437538844e-06, | |
| "loss": 7.8927, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 7.1472964574269735e-06, | |
| "loss": 7.8868, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 7.458048477315104e-06, | |
| "loss": 7.8831, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 7.768800497203232e-06, | |
| "loss": 7.8775, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 8.079552517091361e-06, | |
| "loss": 7.8719, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 8.39030453697949e-06, | |
| "loss": 7.8652, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 8.70105655686762e-06, | |
| "loss": 7.8573, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 9.01180857675575e-06, | |
| "loss": 7.8496, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 9.322560596643879e-06, | |
| "loss": 7.8454, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 9.633312616532007e-06, | |
| "loss": 7.8381, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 9.944064636420138e-06, | |
| "loss": 7.8295, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.0254816656308266e-05, | |
| "loss": 7.8215, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.0565568676196395e-05, | |
| "loss": 7.811, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.0876320696084526e-05, | |
| "loss": 7.8033, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.1187072715972654e-05, | |
| "loss": 7.7903, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.1497824735860784e-05, | |
| "loss": 7.7835, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.1808576755748913e-05, | |
| "loss": 7.7719, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.2119328775637043e-05, | |
| "loss": 7.7589, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.2430080795525172e-05, | |
| "loss": 7.7576, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.27408328154133e-05, | |
| "loss": 7.7456, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.3051584835301431e-05, | |
| "loss": 7.7309, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.3362336855189559e-05, | |
| "loss": 7.724, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.3673088875077688e-05, | |
| "loss": 7.7139, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.398384089496582e-05, | |
| "loss": 7.7025, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.4294592914853947e-05, | |
| "loss": 7.6857, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.4605344934742077e-05, | |
| "loss": 7.6762, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.4916096954630208e-05, | |
| "loss": 7.6618, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.5226848974518334e-05, | |
| "loss": 7.6498, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.5537600994406463e-05, | |
| "loss": 7.6415, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.5848353014294593e-05, | |
| "loss": 7.6319, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.6159105034182722e-05, | |
| "loss": 7.6112, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.646985705407085e-05, | |
| "loss": 7.6032, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.678060907395898e-05, | |
| "loss": 7.5935, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.709136109384711e-05, | |
| "loss": 7.5846, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.740211311373524e-05, | |
| "loss": 7.5674, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.771286513362337e-05, | |
| "loss": 7.5512, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.80236171535115e-05, | |
| "loss": 7.541, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.833436917339963e-05, | |
| "loss": 7.5262, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.8645121193287758e-05, | |
| "loss": 7.5138, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.8955873213175887e-05, | |
| "loss": 7.4991, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 1.9266625233064014e-05, | |
| "loss": 7.4734, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 1.9577377252952146e-05, | |
| "loss": 7.4607, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 1.9888129272840276e-05, | |
| "loss": 7.4621, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 2.0198881292728402e-05, | |
| "loss": 7.4499, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 2.050963331261653e-05, | |
| "loss": 7.4228, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 2.0820385332504664e-05, | |
| "loss": 7.4174, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 2.113113735239279e-05, | |
| "loss": 7.3955, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 2.144188937228092e-05, | |
| "loss": 7.387, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 2.1752641392169053e-05, | |
| "loss": 7.3643, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 2.206339341205718e-05, | |
| "loss": 7.3718, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 2.2374145431945308e-05, | |
| "loss": 7.3485, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 2.2684897451833438e-05, | |
| "loss": 7.3414, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 2.2995649471721567e-05, | |
| "loss": 7.3117, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 2.3306401491609697e-05, | |
| "loss": 7.2965, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 2.3617153511497826e-05, | |
| "loss": 7.3053, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 2.3927905531385956e-05, | |
| "loss": 7.2712, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 2.4238657551274085e-05, | |
| "loss": 7.2515, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 2.4549409571162214e-05, | |
| "loss": 7.2411, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 2.4860161591050344e-05, | |
| "loss": 7.2357, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 2.5170913610938473e-05, | |
| "loss": 7.2151, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 2.54816656308266e-05, | |
| "loss": 7.1943, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 2.579241765071473e-05, | |
| "loss": 7.1974, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 2.6103169670602862e-05, | |
| "loss": 7.2105, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 2.6413921690490988e-05, | |
| "loss": 7.156, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 2.6724673710379117e-05, | |
| "loss": 7.1484, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 2.703542573026725e-05, | |
| "loss": 7.1293, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 2.7346177750155376e-05, | |
| "loss": 7.1283, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 2.7656929770043506e-05, | |
| "loss": 7.1159, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 2.796768178993164e-05, | |
| "loss": 7.0768, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 2.8278433809819765e-05, | |
| "loss": 7.0734, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 2.8589185829707894e-05, | |
| "loss": 7.068, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 2.8899937849596027e-05, | |
| "loss": 7.0497, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 2.9210689869484153e-05, | |
| "loss": 7.0373, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 2.9521441889372283e-05, | |
| "loss": 7.0332, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 2.9832193909260415e-05, | |
| "loss": 7.0304, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 3.014294592914854e-05, | |
| "loss": 7.0186, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 3.0453697949036668e-05, | |
| "loss": 6.9994, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 3.0764449968924804e-05, | |
| "loss": 6.988, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 3.1075201988812927e-05, | |
| "loss": 6.9575, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 3.1385954008701056e-05, | |
| "loss": 6.9407, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 3.1696706028589185e-05, | |
| "loss": 6.9424, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 3.2007458048477315e-05, | |
| "loss": 6.9267, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 3.2318210068365444e-05, | |
| "loss": 6.9348, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 3.2628962088253574e-05, | |
| "loss": 6.9101, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 3.29397141081417e-05, | |
| "loss": 6.8636, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 3.325046612802983e-05, | |
| "loss": 6.8813, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 3.356121814791796e-05, | |
| "loss": 6.8706, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 3.387197016780609e-05, | |
| "loss": 6.8722, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 3.418272218769422e-05, | |
| "loss": 6.8492, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 3.449347420758235e-05, | |
| "loss": 6.8014, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 3.480422622747048e-05, | |
| "loss": 6.8025, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 3.511497824735861e-05, | |
| "loss": 6.8108, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 3.542573026724674e-05, | |
| "loss": 6.8155, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 3.573648228713487e-05, | |
| "loss": 6.7615, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 3.6047234307023e-05, | |
| "loss": 6.7385, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 3.635798632691113e-05, | |
| "loss": 6.7358, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 3.666873834679926e-05, | |
| "loss": 6.7388, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 3.6979490366687386e-05, | |
| "loss": 6.7352, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 3.7290242386575516e-05, | |
| "loss": 6.7093, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 3.7600994406463645e-05, | |
| "loss": 6.7211, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 3.7911746426351775e-05, | |
| "loss": 6.6963, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 3.8222498446239904e-05, | |
| "loss": 6.6921, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 3.853325046612803e-05, | |
| "loss": 6.6363, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 3.884400248601616e-05, | |
| "loss": 6.6501, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 3.915475450590429e-05, | |
| "loss": 6.6199, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 3.9465506525792415e-05, | |
| "loss": 6.5996, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 3.977625854568055e-05, | |
| "loss": 6.6274, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.008701056556868e-05, | |
| "loss": 6.5743, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.0397762585456804e-05, | |
| "loss": 6.5633, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.070851460534494e-05, | |
| "loss": 6.5602, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.101926662523306e-05, | |
| "loss": 6.5558, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.133001864512119e-05, | |
| "loss": 6.5785, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.164077066500933e-05, | |
| "loss": 6.5247, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.195152268489745e-05, | |
| "loss": 6.5409, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.226227470478558e-05, | |
| "loss": 6.4857, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.257302672467372e-05, | |
| "loss": 6.5344, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.288377874456184e-05, | |
| "loss": 6.5113, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.319453076444997e-05, | |
| "loss": 6.4928, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.3505282784338105e-05, | |
| "loss": 6.4826, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.381603480422623e-05, | |
| "loss": 6.4621, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.412678682411436e-05, | |
| "loss": 6.4211, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.4437538844002494e-05, | |
| "loss": 6.4753, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.4748290863890616e-05, | |
| "loss": 6.4343, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.5059042883778746e-05, | |
| "loss": 6.4252, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.5369794903666875e-05, | |
| "loss": 6.3907, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.5680546923555005e-05, | |
| "loss": 6.383, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.5991298943443134e-05, | |
| "loss": 6.378, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.6302050963331264e-05, | |
| "loss": 6.3726, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.661280298321939e-05, | |
| "loss": 6.3738, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.692355500310752e-05, | |
| "loss": 6.3397, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.723430702299565e-05, | |
| "loss": 6.351, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.754505904288378e-05, | |
| "loss": 6.3207, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.785581106277191e-05, | |
| "loss": 6.3161, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.816656308266004e-05, | |
| "loss": 6.3103, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.847731510254817e-05, | |
| "loss": 6.291, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.87880671224363e-05, | |
| "loss": 6.2851, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.909881914232443e-05, | |
| "loss": 6.2605, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.940957116221256e-05, | |
| "loss": 6.2434, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.972032318210069e-05, | |
| "loss": 6.2739, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.999654624576915e-05, | |
| "loss": 6.2309, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.996200870346067e-05, | |
| "loss": 6.2764, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.992747116115217e-05, | |
| "loss": 6.1911, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.989293361884368e-05, | |
| "loss": 6.193, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.98583960765352e-05, | |
| "loss": 6.1912, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.982385853422671e-05, | |
| "loss": 6.1854, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.978932099191822e-05, | |
| "loss": 6.1694, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.9754783449609724e-05, | |
| "loss": 6.1337, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.972024590730124e-05, | |
| "loss": 6.1062, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.968570836499275e-05, | |
| "loss": 6.132, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.965117082268426e-05, | |
| "loss": 6.178, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.961663328037577e-05, | |
| "loss": 6.1392, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.958209573806728e-05, | |
| "loss": 6.1331, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.954755819575879e-05, | |
| "loss": 6.0742, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.95130206534503e-05, | |
| "loss": 6.0837, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.947848311114181e-05, | |
| "loss": 6.0774, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.944394556883332e-05, | |
| "loss": 6.0711, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.9409408026524834e-05, | |
| "loss": 6.0922, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.9374870484216344e-05, | |
| "loss": 6.041, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.9340332941907854e-05, | |
| "loss": 6.0081, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.930579539959937e-05, | |
| "loss": 5.9962, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.9271257857290875e-05, | |
| "loss": 6.0346, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.9236720314982385e-05, | |
| "loss": 6.0221, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.92021827726739e-05, | |
| "loss": 6.0185, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.916764523036541e-05, | |
| "loss": 5.9659, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.913310768805692e-05, | |
| "loss": 5.9443, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.9098570145748426e-05, | |
| "loss": 5.9759, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.9064032603439943e-05, | |
| "loss": 5.973, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.9029495061131454e-05, | |
| "loss": 5.9397, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.8994957518822964e-05, | |
| "loss": 5.9718, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.8960419976514474e-05, | |
| "loss": 5.8958, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.8925882434205985e-05, | |
| "loss": 5.9235, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.8891344891897495e-05, | |
| "loss": 5.8631, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.8856807349589005e-05, | |
| "loss": 5.8879, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.8822269807280516e-05, | |
| "loss": 5.9051, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.8787732264972026e-05, | |
| "loss": 5.8506, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.8753194722663536e-05, | |
| "loss": 5.8902, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.8718657180355047e-05, | |
| "loss": 5.8543, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.868411963804656e-05, | |
| "loss": 5.8779, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.8649582095738074e-05, | |
| "loss": 5.859, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.861504455342958e-05, | |
| "loss": 5.8649, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.858050701112109e-05, | |
| "loss": 5.8307, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.8545969468812605e-05, | |
| "loss": 5.7816, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.8511431926504115e-05, | |
| "loss": 5.7834, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.847689438419562e-05, | |
| "loss": 5.8585, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.844235684188713e-05, | |
| "loss": 5.758, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.8407819299578646e-05, | |
| "loss": 5.7945, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.8373281757270157e-05, | |
| "loss": 5.7647, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.833874421496167e-05, | |
| "loss": 5.7845, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.830420667265318e-05, | |
| "loss": 5.7382, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.826966913034469e-05, | |
| "loss": 5.7743, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.82351315880362e-05, | |
| "loss": 5.7199, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.820059404572771e-05, | |
| "loss": 5.7205, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.816605650341922e-05, | |
| "loss": 5.7001, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.813151896111073e-05, | |
| "loss": 5.7475, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.809698141880224e-05, | |
| "loss": 5.735, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.806244387649375e-05, | |
| "loss": 5.7608, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.8027906334185266e-05, | |
| "loss": 5.6663, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.799336879187678e-05, | |
| "loss": 5.6878, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.795883124956828e-05, | |
| "loss": 5.6914, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.792429370725979e-05, | |
| "loss": 5.6306, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.788975616495131e-05, | |
| "loss": 5.6178, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.785521862264282e-05, | |
| "loss": 5.679, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.782068108033432e-05, | |
| "loss": 5.6543, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.778614353802583e-05, | |
| "loss": 5.6911, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.775160599571735e-05, | |
| "loss": 5.6754, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.771706845340886e-05, | |
| "loss": 5.6252, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.768253091110037e-05, | |
| "loss": 5.6094, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.764799336879188e-05, | |
| "loss": 5.599, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.761345582648339e-05, | |
| "loss": 5.6413, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.75789182841749e-05, | |
| "loss": 5.6193, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.754438074186641e-05, | |
| "loss": 5.5898, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.750984319955792e-05, | |
| "loss": 5.572, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.747530565724943e-05, | |
| "loss": 5.6013, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.744076811494094e-05, | |
| "loss": 5.5543, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.740623057263245e-05, | |
| "loss": 5.5415, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.737169303032397e-05, | |
| "loss": 5.5246, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.733715548801548e-05, | |
| "loss": 5.5657, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.730261794570698e-05, | |
| "loss": 5.5453, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.7268080403398493e-05, | |
| "loss": 5.5467, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.723354286109001e-05, | |
| "loss": 5.5455, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.719900531878152e-05, | |
| "loss": 5.5253, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.7164467776473024e-05, | |
| "loss": 5.4831, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.7129930234164535e-05, | |
| "loss": 5.4704, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.709539269185605e-05, | |
| "loss": 5.4801, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.706085514954756e-05, | |
| "loss": 5.48, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.702631760723907e-05, | |
| "loss": 5.5388, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.699178006493058e-05, | |
| "loss": 5.4883, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.695724252262209e-05, | |
| "loss": 5.4321, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.69227049803136e-05, | |
| "loss": 5.4297, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.6888167438005114e-05, | |
| "loss": 5.4174, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.6853629895696624e-05, | |
| "loss": 5.5185, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.6819092353388134e-05, | |
| "loss": 5.4269, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.6784554811079645e-05, | |
| "loss": 5.4688, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.6750017268771155e-05, | |
| "loss": 5.384, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.671547972646267e-05, | |
| "loss": 5.4662, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.668094218415418e-05, | |
| "loss": 5.4473, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.6646404641845686e-05, | |
| "loss": 5.4024, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.6611867099537196e-05, | |
| "loss": 5.3608, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.657732955722871e-05, | |
| "loss": 5.4895, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.6542792014920224e-05, | |
| "loss": 5.3868, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.650825447261173e-05, | |
| "loss": 5.3681, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.647371693030324e-05, | |
| "loss": 5.4223, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.6439179387994755e-05, | |
| "loss": 5.412, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.6404641845686265e-05, | |
| "loss": 5.3381, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.6370104303377775e-05, | |
| "loss": 5.3195, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.6335566761069285e-05, | |
| "loss": 5.3945, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.6301029218760796e-05, | |
| "loss": 5.3316, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.6266491676452306e-05, | |
| "loss": 5.3232, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.6231954134143816e-05, | |
| "loss": 5.3246, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.619741659183533e-05, | |
| "loss": 5.3445, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.616287904952684e-05, | |
| "loss": 5.2847, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.612834150721835e-05, | |
| "loss": 5.2795, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.609380396490986e-05, | |
| "loss": 5.2559, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.6059266422601375e-05, | |
| "loss": 5.3091, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.6024728880292885e-05, | |
| "loss": 5.2441, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.599019133798439e-05, | |
| "loss": 5.2534, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.59556537956759e-05, | |
| "loss": 5.2869, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.5921116253367416e-05, | |
| "loss": 5.2629, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.5886578711058926e-05, | |
| "loss": 5.2835, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.585204116875043e-05, | |
| "loss": 5.2437, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.581750362644194e-05, | |
| "loss": 5.2736, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.578296608413346e-05, | |
| "loss": 5.2331, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.574842854182497e-05, | |
| "loss": 5.2059, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.571389099951648e-05, | |
| "loss": 5.2348, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.567935345720799e-05, | |
| "loss": 5.2183, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.56448159148995e-05, | |
| "loss": 5.1723, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.561027837259101e-05, | |
| "loss": 5.206, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.557574083028252e-05, | |
| "loss": 5.276, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.554120328797403e-05, | |
| "loss": 5.1271, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.550666574566554e-05, | |
| "loss": 5.1887, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.547212820335705e-05, | |
| "loss": 5.2678, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.543759066104856e-05, | |
| "loss": 5.2341, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.540305311874008e-05, | |
| "loss": 5.2136, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.536851557643158e-05, | |
| "loss": 5.1565, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.533397803412309e-05, | |
| "loss": 5.1883, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.52994404918146e-05, | |
| "loss": 5.187, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.526490294950612e-05, | |
| "loss": 5.145, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.523036540719763e-05, | |
| "loss": 5.1121, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.519582786488913e-05, | |
| "loss": 5.063, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.516129032258064e-05, | |
| "loss": 5.157, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.512675278027216e-05, | |
| "loss": 5.1123, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.509221523796367e-05, | |
| "loss": 5.115, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.505767769565518e-05, | |
| "loss": 5.1296, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.502314015334669e-05, | |
| "loss": 5.1624, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.49886026110382e-05, | |
| "loss": 5.0889, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.495406506872971e-05, | |
| "loss": 5.0914, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.491952752642122e-05, | |
| "loss": 5.1042, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.488498998411273e-05, | |
| "loss": 5.0769, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.485045244180424e-05, | |
| "loss": 5.0678, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.481591489949575e-05, | |
| "loss": 5.0928, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.478137735718726e-05, | |
| "loss": 5.0958, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.474683981487878e-05, | |
| "loss": 5.0713, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.4712302272570284e-05, | |
| "loss": 5.0995, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.4677764730261794e-05, | |
| "loss": 5.0845, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.4643227187953304e-05, | |
| "loss": 5.0856, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.460868964564482e-05, | |
| "loss": 5.0638, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.457415210333633e-05, | |
| "loss": 5.0594, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.4539614561027835e-05, | |
| "loss": 5.0941, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.4505077018719346e-05, | |
| "loss": 5.0457, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.447053947641086e-05, | |
| "loss": 4.9635, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.443600193410237e-05, | |
| "loss": 4.9817, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.4401464391793883e-05, | |
| "loss": 4.989, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.4366926849485394e-05, | |
| "loss": 4.9672, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 4.4332389307176904e-05, | |
| "loss": 5.008, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 4.4297851764868414e-05, | |
| "loss": 4.9655, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 4.4263314222559925e-05, | |
| "loss": 4.9778, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 4.4228776680251435e-05, | |
| "loss": 4.9593, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 4.4194239137942945e-05, | |
| "loss": 4.9961, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4.4159701595634456e-05, | |
| "loss": 4.9818, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4.4125164053325966e-05, | |
| "loss": 4.913, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4.409062651101748e-05, | |
| "loss": 4.9754, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4.4056088968708987e-05, | |
| "loss": 4.9515, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4.40215514264005e-05, | |
| "loss": 4.9495, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4.398701388409201e-05, | |
| "loss": 4.9268, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 4.3952476341783524e-05, | |
| "loss": 4.9985, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 4.3917938799475035e-05, | |
| "loss": 4.9254, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 4.388340125716654e-05, | |
| "loss": 4.9554, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 4.384886371485805e-05, | |
| "loss": 4.9328, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 4.3814326172549566e-05, | |
| "loss": 4.9389, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 4.3779788630241076e-05, | |
| "loss": 4.9226, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 4.3745251087932586e-05, | |
| "loss": 4.9134, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 4.3710713545624097e-05, | |
| "loss": 4.9435, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 4.367617600331561e-05, | |
| "loss": 4.9202, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 4.364163846100712e-05, | |
| "loss": 4.9282, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.360710091869863e-05, | |
| "loss": 4.8598, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.357256337639014e-05, | |
| "loss": 4.9474, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.353802583408165e-05, | |
| "loss": 4.8958, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.350348829177316e-05, | |
| "loss": 4.8854, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.346895074946467e-05, | |
| "loss": 4.8184, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.3434413207156186e-05, | |
| "loss": 4.9108, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 4.339987566484769e-05, | |
| "loss": 4.8647, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 4.33653381225392e-05, | |
| "loss": 4.8807, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 4.333080058023071e-05, | |
| "loss": 4.8766, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 4.329626303792223e-05, | |
| "loss": 4.8508, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 4.326172549561374e-05, | |
| "loss": 4.8532, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 4.322718795330524e-05, | |
| "loss": 4.8687, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 4.319265041099675e-05, | |
| "loss": 4.7944, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 4.315811286868827e-05, | |
| "loss": 4.7731, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 4.312357532637978e-05, | |
| "loss": 4.8183, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 4.308903778407129e-05, | |
| "loss": 4.7874, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.30545002417628e-05, | |
| "loss": 4.7327, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.301996269945431e-05, | |
| "loss": 4.8096, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.298542515714582e-05, | |
| "loss": 4.738, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.295088761483733e-05, | |
| "loss": 4.8368, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.291635007252884e-05, | |
| "loss": 4.757, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.288181253022035e-05, | |
| "loss": 4.789, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.284727498791186e-05, | |
| "loss": 4.7926, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.281273744560337e-05, | |
| "loss": 4.7196, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.277819990329489e-05, | |
| "loss": 4.7615, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.274366236098639e-05, | |
| "loss": 4.8161, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.27091248186779e-05, | |
| "loss": 4.7221, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.267458727636941e-05, | |
| "loss": 4.7104, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.264004973406093e-05, | |
| "loss": 4.6939, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.260551219175244e-05, | |
| "loss": 4.7584, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.2570974649443944e-05, | |
| "loss": 4.7131, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.2536437107135454e-05, | |
| "loss": 4.7359, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.250189956482697e-05, | |
| "loss": 4.7093, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.246736202251848e-05, | |
| "loss": 4.6965, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.243282448020999e-05, | |
| "loss": 4.7546, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.23982869379015e-05, | |
| "loss": 4.6808, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.236374939559301e-05, | |
| "loss": 4.7021, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.232921185328452e-05, | |
| "loss": 4.6742, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.229467431097603e-05, | |
| "loss": 4.7074, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.226013676866754e-05, | |
| "loss": 4.719, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.2225599226359054e-05, | |
| "loss": 4.6518, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.2191061684050564e-05, | |
| "loss": 4.6734, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.2156524141742074e-05, | |
| "loss": 4.686, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 4.212198659943359e-05, | |
| "loss": 4.6587, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 4.2087449057125095e-05, | |
| "loss": 4.6642, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 4.2052911514816605e-05, | |
| "loss": 4.6555, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 4.2018373972508116e-05, | |
| "loss": 4.6315, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 4.198383643019963e-05, | |
| "loss": 4.6435, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 4.194929888789114e-05, | |
| "loss": 4.6456, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 4.1914761345582646e-05, | |
| "loss": 4.5384, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 4.188022380327416e-05, | |
| "loss": 4.6354, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 4.1845686260965674e-05, | |
| "loss": 4.5797, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 4.1811148718657184e-05, | |
| "loss": 4.6615, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 4.1776611176348694e-05, | |
| "loss": 4.6493, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 4.1742073634040205e-05, | |
| "loss": 4.5619, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 4.1707536091731715e-05, | |
| "loss": 4.5834, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 4.1672998549423225e-05, | |
| "loss": 4.6102, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 4.1638461007114736e-05, | |
| "loss": 4.6063, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 4.1603923464806246e-05, | |
| "loss": 4.5329, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 4.1569385922497756e-05, | |
| "loss": 4.6316, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 4.153484838018927e-05, | |
| "loss": 4.6018, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 4.150031083788078e-05, | |
| "loss": 4.5185, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 4.1465773295572294e-05, | |
| "loss": 4.572, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 4.14312357532638e-05, | |
| "loss": 4.5646, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 4.139669821095531e-05, | |
| "loss": 4.603, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 4.136216066864682e-05, | |
| "loss": 4.5372, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 4.1327623126338335e-05, | |
| "loss": 4.5963, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 4.1293085584029846e-05, | |
| "loss": 4.5808, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 4.125854804172135e-05, | |
| "loss": 4.497, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 4.122401049941286e-05, | |
| "loss": 4.5251, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 4.118947295710438e-05, | |
| "loss": 4.6056, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 4.115493541479589e-05, | |
| "loss": 4.5351, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 4.11203978724874e-05, | |
| "loss": 4.5328, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 4.108586033017891e-05, | |
| "loss": 4.5216, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 4.105132278787042e-05, | |
| "loss": 4.4807, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 4.101678524556193e-05, | |
| "loss": 4.4105, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 4.098224770325344e-05, | |
| "loss": 4.5167, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 4.094771016094495e-05, | |
| "loss": 4.5025, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 4.091317261863646e-05, | |
| "loss": 4.4726, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 4.087863507632797e-05, | |
| "loss": 4.5453, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 4.084409753401948e-05, | |
| "loss": 4.5499, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 4.0809559991711e-05, | |
| "loss": 4.477, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 4.07750224494025e-05, | |
| "loss": 4.4173, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 4.074048490709401e-05, | |
| "loss": 4.4168, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 4.070594736478552e-05, | |
| "loss": 4.4963, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 4.067140982247704e-05, | |
| "loss": 4.4329, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 4.063687228016855e-05, | |
| "loss": 4.4016, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 4.060233473786005e-05, | |
| "loss": 4.4456, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 4.056779719555156e-05, | |
| "loss": 4.5099, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 4.053325965324308e-05, | |
| "loss": 4.5314, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 4.049872211093459e-05, | |
| "loss": 4.3918, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 4.04641845686261e-05, | |
| "loss": 4.3786, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 4.042964702631761e-05, | |
| "loss": 4.3641, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 4.039510948400912e-05, | |
| "loss": 4.3287, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 4.036057194170063e-05, | |
| "loss": 4.3898, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 4.032603439939214e-05, | |
| "loss": 4.4319, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 4.029149685708365e-05, | |
| "loss": 4.4166, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 4.025695931477516e-05, | |
| "loss": 4.4221, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 4.022242177246667e-05, | |
| "loss": 4.4518, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 4.018788423015818e-05, | |
| "loss": 4.433, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 4.01533466878497e-05, | |
| "loss": 4.4263, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 4.01188091455412e-05, | |
| "loss": 4.44, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 4.0084271603232713e-05, | |
| "loss": 4.3684, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 4.0049734060924224e-05, | |
| "loss": 4.3712, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 4.001519651861574e-05, | |
| "loss": 4.3684, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 3.998065897630725e-05, | |
| "loss": 4.3534, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 3.9946121433998755e-05, | |
| "loss": 4.355, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 3.9911583891690265e-05, | |
| "loss": 4.3857, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 3.987704634938178e-05, | |
| "loss": 4.3869, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 3.984250880707329e-05, | |
| "loss": 4.3584, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 3.98079712647648e-05, | |
| "loss": 4.2255, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 3.977343372245631e-05, | |
| "loss": 4.3284, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 3.9738896180147823e-05, | |
| "loss": 4.3396, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 3.9704358637839334e-05, | |
| "loss": 4.3761, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 3.9669821095530844e-05, | |
| "loss": 4.3291, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 3.9635283553222354e-05, | |
| "loss": 4.3493, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 3.9600746010913865e-05, | |
| "loss": 4.3123, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 3.9566208468605375e-05, | |
| "loss": 4.3874, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.9531670926296885e-05, | |
| "loss": 4.3719, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.94971333839884e-05, | |
| "loss": 4.3051, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.9462595841679906e-05, | |
| "loss": 4.3216, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.9428058299371416e-05, | |
| "loss": 4.4278, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.9393520757062927e-05, | |
| "loss": 4.3334, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.9358983214754444e-05, | |
| "loss": 4.2799, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.9324445672445954e-05, | |
| "loss": 4.3025, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.928990813013746e-05, | |
| "loss": 4.2286, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.9255370587828975e-05, | |
| "loss": 4.286, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.9220833045520485e-05, | |
| "loss": 4.2102, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.9186295503211995e-05, | |
| "loss": 4.2735, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.91517579609035e-05, | |
| "loss": 4.3194, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.9117220418595016e-05, | |
| "loss": 4.2928, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.9082682876286526e-05, | |
| "loss": 4.348, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.9048145333978037e-05, | |
| "loss": 4.3026, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.901360779166955e-05, | |
| "loss": 4.2144, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.897907024936106e-05, | |
| "loss": 4.2993, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.894453270705257e-05, | |
| "loss": 4.2313, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.890999516474408e-05, | |
| "loss": 4.2205, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.887545762243559e-05, | |
| "loss": 4.2033, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.8840920080127105e-05, | |
| "loss": 4.2286, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.880638253781861e-05, | |
| "loss": 4.2882, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 3.877184499551012e-05, | |
| "loss": 4.2705, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 3.873730745320163e-05, | |
| "loss": 4.2006, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 3.8702769910893146e-05, | |
| "loss": 4.2344, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 3.866823236858466e-05, | |
| "loss": 4.2227, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 3.863369482627616e-05, | |
| "loss": 4.2481, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.859915728396768e-05, | |
| "loss": 4.2321, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.856461974165919e-05, | |
| "loss": 4.2588, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.85300821993507e-05, | |
| "loss": 4.1625, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.84955446570422e-05, | |
| "loss": 4.1685, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.846100711473372e-05, | |
| "loss": 4.2126, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.842646957242523e-05, | |
| "loss": 4.1852, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.839193203011674e-05, | |
| "loss": 4.0762, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.835739448780825e-05, | |
| "loss": 4.1486, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.832285694549976e-05, | |
| "loss": 4.2086, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.828831940319127e-05, | |
| "loss": 4.1741, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.825378186088278e-05, | |
| "loss": 4.1473, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 3.821924431857429e-05, | |
| "loss": 4.1852, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 3.818470677626581e-05, | |
| "loss": 4.126, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 3.815016923395731e-05, | |
| "loss": 4.2465, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 3.811563169164882e-05, | |
| "loss": 4.0662, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 3.808109414934033e-05, | |
| "loss": 4.1311, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 3.804655660703185e-05, | |
| "loss": 4.1458, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 3.801201906472336e-05, | |
| "loss": 4.1106, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 3.797748152241486e-05, | |
| "loss": 4.1293, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 3.794294398010638e-05, | |
| "loss": 4.0843, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 3.790840643779789e-05, | |
| "loss": 4.1247, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 3.78738688954894e-05, | |
| "loss": 4.1126, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 3.7839331353180904e-05, | |
| "loss": 4.0693, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 3.780479381087242e-05, | |
| "loss": 4.0749, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 3.777025626856393e-05, | |
| "loss": 4.1138, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 3.773571872625544e-05, | |
| "loss": 4.1342, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 3.770118118394695e-05, | |
| "loss": 4.2315, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 3.766664364163846e-05, | |
| "loss": 4.0744, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 3.763210609932997e-05, | |
| "loss": 4.1098, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 3.759756855702148e-05, | |
| "loss": 4.1641, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 3.7563031014712994e-05, | |
| "loss": 4.1207, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 3.752849347240451e-05, | |
| "loss": 4.0232, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 3.7493955930096014e-05, | |
| "loss": 4.1027, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 3.7459418387787525e-05, | |
| "loss": 4.1394, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 3.7424880845479035e-05, | |
| "loss": 4.0756, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 3.739034330317055e-05, | |
| "loss": 4.1204, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 3.735580576086206e-05, | |
| "loss": 4.1218, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 3.7321268218553566e-05, | |
| "loss": 4.0515, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 3.728673067624508e-05, | |
| "loss": 4.109, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 3.725219313393659e-05, | |
| "loss": 3.9909, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 3.7217655591628104e-05, | |
| "loss": 4.0671, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 3.718311804931961e-05, | |
| "loss": 4.1067, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 3.7148580507011124e-05, | |
| "loss": 4.0092, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 3.7114042964702634e-05, | |
| "loss": 4.0346, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 3.7079505422394145e-05, | |
| "loss": 4.0381, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 3.7044967880085655e-05, | |
| "loss": 4.0308, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.24909529553679133, | |
| "eval_loss": 3.6947672367095947, | |
| "eval_runtime": 8621.8692, | |
| "eval_samples_per_second": 8.846, | |
| "eval_steps_per_second": 0.277, | |
| "step": 5362 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 3.7010430337777165e-05, | |
| "loss": 4.0648, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 3.6975892795468676e-05, | |
| "loss": 3.9987, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 3.6941355253160186e-05, | |
| "loss": 3.9417, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 3.6906817710851696e-05, | |
| "loss": 3.9734, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 3.6872280168543213e-05, | |
| "loss": 3.9119, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 3.683774262623472e-05, | |
| "loss": 3.9818, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 3.680320508392623e-05, | |
| "loss": 3.9722, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 3.676866754161774e-05, | |
| "loss": 3.9544, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 3.6734129999309255e-05, | |
| "loss": 3.9963, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 3.6699592457000765e-05, | |
| "loss": 3.9356, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 3.666505491469227e-05, | |
| "loss": 3.9639, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 3.6630517372383786e-05, | |
| "loss": 3.9783, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 3.6595979830075296e-05, | |
| "loss": 3.9439, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 3.6561442287766806e-05, | |
| "loss": 3.9195, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 3.652690474545831e-05, | |
| "loss": 3.8927, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 3.649236720314983e-05, | |
| "loss": 3.9244, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 3.645782966084134e-05, | |
| "loss": 3.9266, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 3.642329211853285e-05, | |
| "loss": 3.9866, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 3.638875457622436e-05, | |
| "loss": 3.8888, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 3.635421703391587e-05, | |
| "loss": 3.8811, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 3.631967949160738e-05, | |
| "loss": 3.959, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 3.628514194929889e-05, | |
| "loss": 4.0576, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 3.62506044069904e-05, | |
| "loss": 3.9046, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 3.6216066864681916e-05, | |
| "loss": 3.8684, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 3.618152932237342e-05, | |
| "loss": 3.9167, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 3.614699178006493e-05, | |
| "loss": 3.8899, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 3.611245423775644e-05, | |
| "loss": 3.9039, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 3.607791669544796e-05, | |
| "loss": 3.8306, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 3.604337915313946e-05, | |
| "loss": 3.8872, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 3.600884161083097e-05, | |
| "loss": 3.8607, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 3.597430406852249e-05, | |
| "loss": 3.9271, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 3.5939766526214e-05, | |
| "loss": 4.006, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 3.590522898390551e-05, | |
| "loss": 3.901, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 3.587069144159701e-05, | |
| "loss": 3.8321, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 3.583615389928853e-05, | |
| "loss": 3.8744, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 3.580161635698004e-05, | |
| "loss": 3.9017, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 3.576707881467155e-05, | |
| "loss": 3.8878, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 3.573254127236306e-05, | |
| "loss": 3.832, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 3.569800373005457e-05, | |
| "loss": 3.831, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 3.566346618774608e-05, | |
| "loss": 3.9113, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 3.562892864543759e-05, | |
| "loss": 3.796, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 3.55943911031291e-05, | |
| "loss": 3.8043, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 3.555985356082062e-05, | |
| "loss": 3.9136, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 3.552531601851212e-05, | |
| "loss": 3.7756, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 3.549077847620363e-05, | |
| "loss": 3.7959, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 3.545624093389514e-05, | |
| "loss": 3.7942, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 3.542170339158666e-05, | |
| "loss": 3.8605, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 3.5387165849278164e-05, | |
| "loss": 3.7954, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 3.5352628306969674e-05, | |
| "loss": 3.8062, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 3.531809076466119e-05, | |
| "loss": 3.8087, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 3.52835532223527e-05, | |
| "loss": 3.801, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 3.524901568004421e-05, | |
| "loss": 3.7685, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 3.5214478137735715e-05, | |
| "loss": 3.8729, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 3.517994059542723e-05, | |
| "loss": 3.8502, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 3.514540305311874e-05, | |
| "loss": 3.8123, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 3.511086551081025e-05, | |
| "loss": 3.7957, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 3.5076327968501763e-05, | |
| "loss": 3.7741, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 3.5041790426193274e-05, | |
| "loss": 3.7132, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 3.5007252883884784e-05, | |
| "loss": 3.7215, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 3.4972715341576294e-05, | |
| "loss": 3.773, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 3.4938177799267805e-05, | |
| "loss": 3.7669, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 3.490364025695932e-05, | |
| "loss": 3.7389, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 3.4869102714650825e-05, | |
| "loss": 3.748, | |
| "step": 5990 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 3.4834565172342336e-05, | |
| "loss": 3.7029, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 3.4800027630033846e-05, | |
| "loss": 3.8851, | |
| "step": 6010 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 3.476549008772536e-05, | |
| "loss": 3.8138, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 3.4730952545416867e-05, | |
| "loss": 3.7595, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 3.469641500310838e-05, | |
| "loss": 3.7483, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 3.4661877460799894e-05, | |
| "loss": 3.7428, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 3.4627339918491404e-05, | |
| "loss": 3.7077, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 3.4592802376182915e-05, | |
| "loss": 3.7716, | |
| "step": 6070 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 3.455826483387442e-05, | |
| "loss": 3.7343, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 3.4523727291565935e-05, | |
| "loss": 3.7992, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 3.4489189749257446e-05, | |
| "loss": 3.6995, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 3.4454652206948956e-05, | |
| "loss": 3.7807, | |
| "step": 6110 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 3.4420114664640466e-05, | |
| "loss": 3.7395, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 3.4385577122331976e-05, | |
| "loss": 3.8304, | |
| "step": 6130 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 3.435103958002349e-05, | |
| "loss": 3.6993, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 3.4316502037715e-05, | |
| "loss": 3.6915, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 3.428196449540651e-05, | |
| "loss": 3.6993, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 3.4247426953098025e-05, | |
| "loss": 3.7521, | |
| "step": 6170 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 3.421288941078953e-05, | |
| "loss": 3.6159, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 3.417835186848104e-05, | |
| "loss": 3.8004, | |
| "step": 6190 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 3.414381432617255e-05, | |
| "loss": 3.7197, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 3.4109276783864066e-05, | |
| "loss": 3.6887, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 3.407473924155557e-05, | |
| "loss": 3.6608, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 3.404020169924708e-05, | |
| "loss": 3.6339, | |
| "step": 6230 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 3.40056641569386e-05, | |
| "loss": 3.7329, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 3.397112661463011e-05, | |
| "loss": 3.7651, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 3.393658907232162e-05, | |
| "loss": 3.731, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 3.390205153001312e-05, | |
| "loss": 3.6192, | |
| "step": 6270 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 3.386751398770464e-05, | |
| "loss": 3.6153, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 3.383297644539615e-05, | |
| "loss": 3.6365, | |
| "step": 6290 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 3.379843890308766e-05, | |
| "loss": 3.6716, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 3.376390136077917e-05, | |
| "loss": 3.6605, | |
| "step": 6310 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 3.372936381847068e-05, | |
| "loss": 3.7046, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 3.369482627616219e-05, | |
| "loss": 3.6256, | |
| "step": 6330 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 3.36602887338537e-05, | |
| "loss": 3.6081, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 3.362575119154521e-05, | |
| "loss": 3.6484, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 3.359121364923673e-05, | |
| "loss": 3.6968, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 3.355667610692823e-05, | |
| "loss": 3.5797, | |
| "step": 6370 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 3.352213856461974e-05, | |
| "loss": 3.6661, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 3.348760102231125e-05, | |
| "loss": 3.6663, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 3.345306348000277e-05, | |
| "loss": 3.5707, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 3.341852593769427e-05, | |
| "loss": 3.5989, | |
| "step": 6410 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 3.338398839538578e-05, | |
| "loss": 3.6403, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 3.33494508530773e-05, | |
| "loss": 3.6176, | |
| "step": 6430 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 3.331491331076881e-05, | |
| "loss": 3.5832, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 3.328037576846032e-05, | |
| "loss": 3.6051, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 3.3245838226151824e-05, | |
| "loss": 3.6246, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 3.321130068384334e-05, | |
| "loss": 3.6141, | |
| "step": 6470 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 3.317676314153485e-05, | |
| "loss": 3.5999, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 3.314222559922636e-05, | |
| "loss": 3.6628, | |
| "step": 6490 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 3.310768805691787e-05, | |
| "loss": 3.651, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 3.307315051460938e-05, | |
| "loss": 3.5738, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 3.303861297230089e-05, | |
| "loss": 3.5479, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 3.30040754299924e-05, | |
| "loss": 3.5372, | |
| "step": 6530 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 3.296953788768391e-05, | |
| "loss": 3.6088, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 3.293500034537542e-05, | |
| "loss": 3.5082, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 3.2900462803066934e-05, | |
| "loss": 3.5443, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 3.2865925260758444e-05, | |
| "loss": 3.4684, | |
| "step": 6570 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 3.2831387718449954e-05, | |
| "loss": 3.5836, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 3.279685017614147e-05, | |
| "loss": 3.5457, | |
| "step": 6590 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 3.2762312633832975e-05, | |
| "loss": 3.5694, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 3.2727775091524485e-05, | |
| "loss": 3.4931, | |
| "step": 6610 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 3.2693237549216e-05, | |
| "loss": 3.6173, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 3.265870000690751e-05, | |
| "loss": 3.5387, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 3.262416246459902e-05, | |
| "loss": 3.552, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 3.2589624922290526e-05, | |
| "loss": 3.5856, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 3.2555087379982044e-05, | |
| "loss": 3.5191, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 3.2520549837673554e-05, | |
| "loss": 3.4632, | |
| "step": 6670 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 3.2486012295365064e-05, | |
| "loss": 3.557, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 3.2451474753056574e-05, | |
| "loss": 3.4683, | |
| "step": 6690 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 3.2416937210748085e-05, | |
| "loss": 3.6178, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 3.2382399668439595e-05, | |
| "loss": 3.509, | |
| "step": 6710 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 3.2347862126131105e-05, | |
| "loss": 3.5511, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 3.2313324583822616e-05, | |
| "loss": 3.5375, | |
| "step": 6730 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 3.2278787041514126e-05, | |
| "loss": 3.5648, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 3.2244249499205636e-05, | |
| "loss": 3.4806, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 3.220971195689715e-05, | |
| "loss": 3.5598, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 3.217517441458866e-05, | |
| "loss": 3.5497, | |
| "step": 6770 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 3.2140636872280174e-05, | |
| "loss": 3.5869, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 3.210609932997168e-05, | |
| "loss": 3.46, | |
| "step": 6790 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 3.207156178766319e-05, | |
| "loss": 3.4238, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 3.2037024245354705e-05, | |
| "loss": 3.5371, | |
| "step": 6810 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 3.2002486703046215e-05, | |
| "loss": 3.5355, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 3.1967949160737726e-05, | |
| "loss": 3.481, | |
| "step": 6830 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 3.193341161842923e-05, | |
| "loss": 3.3692, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 3.1898874076120746e-05, | |
| "loss": 3.4681, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 3.1864336533812257e-05, | |
| "loss": 3.4593, | |
| "step": 6860 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 3.182979899150377e-05, | |
| "loss": 3.4291, | |
| "step": 6870 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 3.179526144919528e-05, | |
| "loss": 3.452, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 3.176072390688679e-05, | |
| "loss": 3.4585, | |
| "step": 6890 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 3.17261863645783e-05, | |
| "loss": 3.4393, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 3.169164882226981e-05, | |
| "loss": 3.472, | |
| "step": 6910 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 3.165711127996132e-05, | |
| "loss": 3.4524, | |
| "step": 6920 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 3.162257373765283e-05, | |
| "loss": 3.4865, | |
| "step": 6930 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 3.158803619534434e-05, | |
| "loss": 3.442, | |
| "step": 6940 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 3.155349865303585e-05, | |
| "loss": 3.4376, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 3.151896111072736e-05, | |
| "loss": 3.4278, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 3.148442356841888e-05, | |
| "loss": 3.342, | |
| "step": 6970 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 3.144988602611038e-05, | |
| "loss": 3.4077, | |
| "step": 6980 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 3.141534848380189e-05, | |
| "loss": 3.3748, | |
| "step": 6990 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 3.138081094149341e-05, | |
| "loss": 3.3983, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 3.134627339918492e-05, | |
| "loss": 3.4114, | |
| "step": 7010 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 3.131173585687643e-05, | |
| "loss": 3.5379, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 3.127719831456793e-05, | |
| "loss": 3.446, | |
| "step": 7030 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 3.124266077225945e-05, | |
| "loss": 3.3867, | |
| "step": 7040 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 3.120812322995096e-05, | |
| "loss": 3.434, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 3.117358568764247e-05, | |
| "loss": 3.425, | |
| "step": 7060 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 3.113904814533398e-05, | |
| "loss": 3.4585, | |
| "step": 7070 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 3.110451060302549e-05, | |
| "loss": 3.4087, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 3.1069973060717e-05, | |
| "loss": 3.4151, | |
| "step": 7090 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 3.103543551840851e-05, | |
| "loss": 3.3507, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 3.100089797610002e-05, | |
| "loss": 3.5211, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 3.096636043379153e-05, | |
| "loss": 3.3704, | |
| "step": 7120 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 3.093182289148304e-05, | |
| "loss": 3.4302, | |
| "step": 7130 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 3.089728534917455e-05, | |
| "loss": 3.4675, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 3.086274780686606e-05, | |
| "loss": 3.4799, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 3.082821026455758e-05, | |
| "loss": 3.4777, | |
| "step": 7160 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 3.079367272224908e-05, | |
| "loss": 3.4224, | |
| "step": 7170 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 3.0759135179940593e-05, | |
| "loss": 3.3939, | |
| "step": 7180 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 3.072459763763211e-05, | |
| "loss": 3.4045, | |
| "step": 7190 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 3.069006009532362e-05, | |
| "loss": 3.3775, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 3.065552255301513e-05, | |
| "loss": 3.3832, | |
| "step": 7210 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 3.0620985010706635e-05, | |
| "loss": 3.3987, | |
| "step": 7220 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 3.058644746839815e-05, | |
| "loss": 3.3736, | |
| "step": 7230 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 3.055190992608966e-05, | |
| "loss": 3.4009, | |
| "step": 7240 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 3.0517372383781172e-05, | |
| "loss": 3.4189, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 3.0482834841472686e-05, | |
| "loss": 3.3918, | |
| "step": 7260 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 3.044829729916419e-05, | |
| "loss": 3.38, | |
| "step": 7270 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 3.0413759756855703e-05, | |
| "loss": 3.3672, | |
| "step": 7280 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 3.0379222214547214e-05, | |
| "loss": 3.3829, | |
| "step": 7290 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 3.0344684672238727e-05, | |
| "loss": 3.3583, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 3.0310147129930234e-05, | |
| "loss": 3.2762, | |
| "step": 7310 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 3.0275609587621745e-05, | |
| "loss": 3.2542, | |
| "step": 7320 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 3.024107204531326e-05, | |
| "loss": 3.2616, | |
| "step": 7330 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 3.020653450300477e-05, | |
| "loss": 3.4247, | |
| "step": 7340 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 3.017199696069628e-05, | |
| "loss": 3.2569, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 3.0137459418387786e-05, | |
| "loss": 3.2765, | |
| "step": 7360 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 3.01029218760793e-05, | |
| "loss": 3.4377, | |
| "step": 7370 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 3.006838433377081e-05, | |
| "loss": 3.289, | |
| "step": 7380 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 3.0033846791462324e-05, | |
| "loss": 3.4032, | |
| "step": 7390 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 2.9999309249153834e-05, | |
| "loss": 3.3145, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 2.996477170684534e-05, | |
| "loss": 3.3723, | |
| "step": 7410 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 2.993023416453685e-05, | |
| "loss": 3.3719, | |
| "step": 7420 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 2.9895696622228365e-05, | |
| "loss": 3.3268, | |
| "step": 7430 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 2.9861159079919875e-05, | |
| "loss": 3.4391, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 2.982662153761139e-05, | |
| "loss": 3.2366, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 2.9792083995302892e-05, | |
| "loss": 3.3724, | |
| "step": 7460 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 2.9757546452994406e-05, | |
| "loss": 3.3473, | |
| "step": 7470 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 2.9723008910685916e-05, | |
| "loss": 3.2653, | |
| "step": 7480 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 2.968847136837743e-05, | |
| "loss": 3.2872, | |
| "step": 7490 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 2.9653933826068937e-05, | |
| "loss": 3.2932, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 2.9619396283760447e-05, | |
| "loss": 3.2541, | |
| "step": 7510 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 2.958485874145196e-05, | |
| "loss": 3.3233, | |
| "step": 7520 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 2.955032119914347e-05, | |
| "loss": 3.3025, | |
| "step": 7530 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 2.9515783656834982e-05, | |
| "loss": 3.2965, | |
| "step": 7540 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 2.948124611452649e-05, | |
| "loss": 3.2564, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 2.9446708572218002e-05, | |
| "loss": 3.3116, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 2.9412171029909513e-05, | |
| "loss": 3.2414, | |
| "step": 7570 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 2.9377633487601026e-05, | |
| "loss": 3.3608, | |
| "step": 7580 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 2.9343095945292537e-05, | |
| "loss": 3.3019, | |
| "step": 7590 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 2.9308558402984044e-05, | |
| "loss": 3.3488, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 2.9274020860675554e-05, | |
| "loss": 3.3367, | |
| "step": 7610 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 2.9239483318367068e-05, | |
| "loss": 3.2071, | |
| "step": 7620 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 2.9204945776058578e-05, | |
| "loss": 3.2963, | |
| "step": 7630 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 2.9170408233750085e-05, | |
| "loss": 3.2445, | |
| "step": 7640 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 2.9135870691441595e-05, | |
| "loss": 3.3087, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 2.910133314913311e-05, | |
| "loss": 3.3239, | |
| "step": 7660 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 2.906679560682462e-05, | |
| "loss": 3.2231, | |
| "step": 7670 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 2.9032258064516133e-05, | |
| "loss": 3.3682, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 2.899772052220764e-05, | |
| "loss": 3.1886, | |
| "step": 7690 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 2.896318297989915e-05, | |
| "loss": 3.285, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 2.8928645437590664e-05, | |
| "loss": 3.1984, | |
| "step": 7710 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 2.8894107895282174e-05, | |
| "loss": 3.2885, | |
| "step": 7720 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 2.8859570352973685e-05, | |
| "loss": 3.2339, | |
| "step": 7730 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 2.882503281066519e-05, | |
| "loss": 3.3014, | |
| "step": 7740 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 2.8790495268356705e-05, | |
| "loss": 3.3142, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 2.8755957726048215e-05, | |
| "loss": 3.2698, | |
| "step": 7760 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 2.872142018373973e-05, | |
| "loss": 3.2825, | |
| "step": 7770 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 2.868688264143124e-05, | |
| "loss": 3.2214, | |
| "step": 7780 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 2.8652345099122746e-05, | |
| "loss": 3.1402, | |
| "step": 7790 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 2.8617807556814257e-05, | |
| "loss": 3.2267, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 2.858327001450577e-05, | |
| "loss": 3.185, | |
| "step": 7810 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 2.854873247219728e-05, | |
| "loss": 3.1691, | |
| "step": 7820 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 2.8514194929888788e-05, | |
| "loss": 3.1357, | |
| "step": 7830 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 2.8479657387580298e-05, | |
| "loss": 3.2274, | |
| "step": 7840 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 2.844511984527181e-05, | |
| "loss": 3.2324, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 2.8410582302963322e-05, | |
| "loss": 3.2761, | |
| "step": 7860 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 2.8376044760654836e-05, | |
| "loss": 3.2883, | |
| "step": 7870 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 2.8341507218346343e-05, | |
| "loss": 3.2104, | |
| "step": 7880 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 2.8306969676037853e-05, | |
| "loss": 3.2581, | |
| "step": 7890 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 2.8272432133729367e-05, | |
| "loss": 3.265, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 2.8237894591420877e-05, | |
| "loss": 3.2577, | |
| "step": 7910 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 2.8203357049112387e-05, | |
| "loss": 3.2683, | |
| "step": 7920 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 2.8168819506803894e-05, | |
| "loss": 3.1734, | |
| "step": 7930 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 2.8134281964495408e-05, | |
| "loss": 3.2041, | |
| "step": 7940 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 2.8099744422186918e-05, | |
| "loss": 3.1578, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 2.8065206879878432e-05, | |
| "loss": 3.2432, | |
| "step": 7960 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 2.8030669337569942e-05, | |
| "loss": 3.2978, | |
| "step": 7970 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 2.799613179526145e-05, | |
| "loss": 3.201, | |
| "step": 7980 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 2.796159425295296e-05, | |
| "loss": 3.2955, | |
| "step": 7990 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 2.7927056710644473e-05, | |
| "loss": 3.206, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 2.7892519168335984e-05, | |
| "loss": 3.1621, | |
| "step": 8010 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 2.785798162602749e-05, | |
| "loss": 3.2604, | |
| "step": 8020 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 2.7823444083719004e-05, | |
| "loss": 3.2261, | |
| "step": 8030 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 2.7788906541410514e-05, | |
| "loss": 3.1247, | |
| "step": 8040 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 2.7754368999102025e-05, | |
| "loss": 3.1877, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 2.771983145679354e-05, | |
| "loss": 3.1831, | |
| "step": 8060 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 2.7685293914485045e-05, | |
| "loss": 3.177, | |
| "step": 8070 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 2.7650756372176556e-05, | |
| "loss": 3.1021, | |
| "step": 8080 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 2.761621882986807e-05, | |
| "loss": 3.1824, | |
| "step": 8090 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 2.758168128755958e-05, | |
| "loss": 3.1609, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 2.754714374525109e-05, | |
| "loss": 3.2185, | |
| "step": 8110 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 2.7512606202942597e-05, | |
| "loss": 3.1663, | |
| "step": 8120 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 2.747806866063411e-05, | |
| "loss": 3.1356, | |
| "step": 8130 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 2.744353111832562e-05, | |
| "loss": 3.1245, | |
| "step": 8140 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 2.7408993576017135e-05, | |
| "loss": 3.1758, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 2.7374456033708645e-05, | |
| "loss": 3.0987, | |
| "step": 8160 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 2.7339918491400152e-05, | |
| "loss": 3.0779, | |
| "step": 8170 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 2.7305380949091662e-05, | |
| "loss": 3.1655, | |
| "step": 8180 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 2.7270843406783176e-05, | |
| "loss": 3.1217, | |
| "step": 8190 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 2.7236305864474686e-05, | |
| "loss": 3.1073, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 2.7201768322166193e-05, | |
| "loss": 3.1061, | |
| "step": 8210 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 2.7167230779857707e-05, | |
| "loss": 3.1527, | |
| "step": 8220 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 2.7132693237549217e-05, | |
| "loss": 3.1558, | |
| "step": 8230 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 2.7098155695240728e-05, | |
| "loss": 3.1518, | |
| "step": 8240 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 2.706361815293224e-05, | |
| "loss": 3.1196, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 2.7029080610623748e-05, | |
| "loss": 3.1993, | |
| "step": 8260 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 2.699454306831526e-05, | |
| "loss": 3.1277, | |
| "step": 8270 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 2.6960005526006772e-05, | |
| "loss": 3.1874, | |
| "step": 8280 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 2.6925467983698282e-05, | |
| "loss": 3.1914, | |
| "step": 8290 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 2.6890930441389793e-05, | |
| "loss": 3.0947, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 2.68563928990813e-05, | |
| "loss": 3.0878, | |
| "step": 8310 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 2.6821855356772813e-05, | |
| "loss": 3.1066, | |
| "step": 8320 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 2.6787317814464324e-05, | |
| "loss": 3.1181, | |
| "step": 8330 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 2.6752780272155837e-05, | |
| "loss": 3.1704, | |
| "step": 8340 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 2.6718242729847348e-05, | |
| "loss": 3.1091, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 2.6683705187538855e-05, | |
| "loss": 3.0995, | |
| "step": 8360 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 2.6649167645230365e-05, | |
| "loss": 3.1144, | |
| "step": 8370 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 2.661463010292188e-05, | |
| "loss": 3.0199, | |
| "step": 8380 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 2.658009256061339e-05, | |
| "loss": 3.118, | |
| "step": 8390 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 2.6545555018304896e-05, | |
| "loss": 3.1443, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 2.651101747599641e-05, | |
| "loss": 3.1003, | |
| "step": 8410 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 2.647647993368792e-05, | |
| "loss": 3.1032, | |
| "step": 8420 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 2.644194239137943e-05, | |
| "loss": 3.0726, | |
| "step": 8430 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 2.6407404849070944e-05, | |
| "loss": 3.1226, | |
| "step": 8440 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 2.637286730676245e-05, | |
| "loss": 3.1293, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 2.633832976445396e-05, | |
| "loss": 2.9997, | |
| "step": 8460 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 2.6303792222145475e-05, | |
| "loss": 3.0414, | |
| "step": 8470 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 2.6269254679836985e-05, | |
| "loss": 3.11, | |
| "step": 8480 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 2.6234717137528496e-05, | |
| "loss": 3.1564, | |
| "step": 8490 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 2.6200179595220002e-05, | |
| "loss": 3.0275, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 2.6165642052911516e-05, | |
| "loss": 3.1224, | |
| "step": 8510 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 2.6131104510603027e-05, | |
| "loss": 3.089, | |
| "step": 8520 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 2.609656696829454e-05, | |
| "loss": 2.9951, | |
| "step": 8530 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 2.6062029425986044e-05, | |
| "loss": 3.0263, | |
| "step": 8540 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 2.6027491883677557e-05, | |
| "loss": 3.0849, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 2.5992954341369068e-05, | |
| "loss": 3.0331, | |
| "step": 8560 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 2.595841679906058e-05, | |
| "loss": 3.0678, | |
| "step": 8570 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 2.5923879256752092e-05, | |
| "loss": 3.0979, | |
| "step": 8580 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 2.58893417144436e-05, | |
| "loss": 3.1191, | |
| "step": 8590 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 2.5854804172135112e-05, | |
| "loss": 3.0135, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 2.5820266629826623e-05, | |
| "loss": 3.0578, | |
| "step": 8610 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 2.5785729087518133e-05, | |
| "loss": 3.1627, | |
| "step": 8620 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 2.5751191545209647e-05, | |
| "loss": 3.0275, | |
| "step": 8630 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 2.5716654002901154e-05, | |
| "loss": 2.9919, | |
| "step": 8640 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 2.5682116460592664e-05, | |
| "loss": 3.0629, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 2.5647578918284178e-05, | |
| "loss": 3.0972, | |
| "step": 8660 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 2.5613041375975688e-05, | |
| "loss": 3.1405, | |
| "step": 8670 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 2.5578503833667202e-05, | |
| "loss": 3.0783, | |
| "step": 8680 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 2.5543966291358705e-05, | |
| "loss": 3.0817, | |
| "step": 8690 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 2.550942874905022e-05, | |
| "loss": 3.0429, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 2.547489120674173e-05, | |
| "loss": 3.0268, | |
| "step": 8710 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 2.5440353664433243e-05, | |
| "loss": 3.0695, | |
| "step": 8720 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 2.5405816122124747e-05, | |
| "loss": 3.0673, | |
| "step": 8730 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 2.537127857981626e-05, | |
| "loss": 2.9537, | |
| "step": 8740 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 2.533674103750777e-05, | |
| "loss": 2.982, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 2.5302203495199284e-05, | |
| "loss": 3.0885, | |
| "step": 8760 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 2.5267665952890795e-05, | |
| "loss": 3.0187, | |
| "step": 8770 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 2.52331284105823e-05, | |
| "loss": 2.9979, | |
| "step": 8780 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 2.5198590868273815e-05, | |
| "loss": 2.998, | |
| "step": 8790 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 2.5164053325965326e-05, | |
| "loss": 2.9708, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 2.5129515783656836e-05, | |
| "loss": 2.9856, | |
| "step": 8810 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 2.509497824134835e-05, | |
| "loss": 3.0117, | |
| "step": 8820 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 2.5060440699039856e-05, | |
| "loss": 3.0323, | |
| "step": 8830 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 2.5025903156731367e-05, | |
| "loss": 3.0227, | |
| "step": 8840 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 2.499136561442288e-05, | |
| "loss": 3.0074, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 2.4956828072114387e-05, | |
| "loss": 3.0091, | |
| "step": 8860 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 2.49222905298059e-05, | |
| "loss": 3.1571, | |
| "step": 8870 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 2.488775298749741e-05, | |
| "loss": 3.0275, | |
| "step": 8880 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 2.4853215445188922e-05, | |
| "loss": 3.0307, | |
| "step": 8890 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 2.4818677902880432e-05, | |
| "loss": 2.9366, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 2.4784140360571942e-05, | |
| "loss": 2.9924, | |
| "step": 8910 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 2.4749602818263453e-05, | |
| "loss": 3.0313, | |
| "step": 8920 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 2.4715065275954966e-05, | |
| "loss": 2.9134, | |
| "step": 8930 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 2.4680527733646473e-05, | |
| "loss": 2.9929, | |
| "step": 8940 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 2.4645990191337987e-05, | |
| "loss": 2.9983, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 2.4611452649029494e-05, | |
| "loss": 2.9756, | |
| "step": 8960 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 2.4576915106721008e-05, | |
| "loss": 2.9688, | |
| "step": 8970 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 2.4542377564412518e-05, | |
| "loss": 2.9359, | |
| "step": 8980 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 2.4507840022104028e-05, | |
| "loss": 3.0948, | |
| "step": 8990 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 2.447330247979554e-05, | |
| "loss": 2.9085, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 2.443876493748705e-05, | |
| "loss": 3.053, | |
| "step": 9010 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 2.4404227395178563e-05, | |
| "loss": 3.0007, | |
| "step": 9020 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 2.436968985287007e-05, | |
| "loss": 2.9837, | |
| "step": 9030 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 2.4335152310561583e-05, | |
| "loss": 2.9764, | |
| "step": 9040 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 2.430061476825309e-05, | |
| "loss": 2.955, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 2.4266077225944604e-05, | |
| "loss": 2.9645, | |
| "step": 9060 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 2.4231539683636114e-05, | |
| "loss": 3.0081, | |
| "step": 9070 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 2.4197002141327624e-05, | |
| "loss": 3.0566, | |
| "step": 9080 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 2.4162464599019135e-05, | |
| "loss": 2.8261, | |
| "step": 9090 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 2.4127927056710645e-05, | |
| "loss": 2.9972, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 2.4093389514402155e-05, | |
| "loss": 2.9635, | |
| "step": 9110 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 2.4058851972093666e-05, | |
| "loss": 2.9686, | |
| "step": 9120 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 2.4024314429785176e-05, | |
| "loss": 2.952, | |
| "step": 9130 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 2.398977688747669e-05, | |
| "loss": 2.9526, | |
| "step": 9140 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 2.3955239345168197e-05, | |
| "loss": 3.0147, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 2.392070180285971e-05, | |
| "loss": 2.8927, | |
| "step": 9160 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 2.388616426055122e-05, | |
| "loss": 2.8888, | |
| "step": 9170 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 2.385162671824273e-05, | |
| "loss": 2.9943, | |
| "step": 9180 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 2.381708917593424e-05, | |
| "loss": 2.8836, | |
| "step": 9190 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 2.378255163362575e-05, | |
| "loss": 3.0489, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 2.3748014091317265e-05, | |
| "loss": 3.009, | |
| "step": 9210 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 2.3713476549008772e-05, | |
| "loss": 2.8603, | |
| "step": 9220 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 2.3678939006700286e-05, | |
| "loss": 2.9036, | |
| "step": 9230 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 2.3644401464391793e-05, | |
| "loss": 2.9626, | |
| "step": 9240 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 2.3609863922083307e-05, | |
| "loss": 2.9827, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 2.3575326379774817e-05, | |
| "loss": 3.0024, | |
| "step": 9260 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 2.3540788837466327e-05, | |
| "loss": 2.9592, | |
| "step": 9270 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 2.3506251295157838e-05, | |
| "loss": 2.9028, | |
| "step": 9280 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 2.3471713752849348e-05, | |
| "loss": 2.8719, | |
| "step": 9290 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 2.3437176210540858e-05, | |
| "loss": 2.9314, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 2.340263866823237e-05, | |
| "loss": 3.0222, | |
| "step": 9310 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 2.336810112592388e-05, | |
| "loss": 2.9664, | |
| "step": 9320 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 2.3333563583615393e-05, | |
| "loss": 2.8377, | |
| "step": 9330 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 2.32990260413069e-05, | |
| "loss": 2.9357, | |
| "step": 9340 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 2.3264488498998413e-05, | |
| "loss": 2.8697, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 2.3229950956689923e-05, | |
| "loss": 2.909, | |
| "step": 9360 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 2.3195413414381434e-05, | |
| "loss": 2.9791, | |
| "step": 9370 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 2.3160875872072944e-05, | |
| "loss": 2.9093, | |
| "step": 9380 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 2.3126338329764454e-05, | |
| "loss": 2.843, | |
| "step": 9390 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 2.3091800787455968e-05, | |
| "loss": 2.889, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 2.3057263245147475e-05, | |
| "loss": 2.8633, | |
| "step": 9410 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 2.302272570283899e-05, | |
| "loss": 2.9043, | |
| "step": 9420 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 2.2988188160530496e-05, | |
| "loss": 2.8618, | |
| "step": 9430 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 2.295365061822201e-05, | |
| "loss": 2.8755, | |
| "step": 9440 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 2.291911307591352e-05, | |
| "loss": 2.8721, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 2.288457553360503e-05, | |
| "loss": 2.926, | |
| "step": 9460 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 2.285003799129654e-05, | |
| "loss": 2.8687, | |
| "step": 9470 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 2.281550044898805e-05, | |
| "loss": 2.9855, | |
| "step": 9480 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 2.278096290667956e-05, | |
| "loss": 2.7932, | |
| "step": 9490 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 2.274642536437107e-05, | |
| "loss": 2.8963, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 2.271188782206258e-05, | |
| "loss": 2.886, | |
| "step": 9510 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 2.2677350279754095e-05, | |
| "loss": 2.8421, | |
| "step": 9520 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 2.2642812737445606e-05, | |
| "loss": 2.9048, | |
| "step": 9530 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 2.2608275195137116e-05, | |
| "loss": 2.8581, | |
| "step": 9540 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 2.2573737652828626e-05, | |
| "loss": 2.8165, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 2.2539200110520137e-05, | |
| "loss": 2.8677, | |
| "step": 9560 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 2.2504662568211647e-05, | |
| "loss": 2.7541, | |
| "step": 9570 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 2.2470125025903157e-05, | |
| "loss": 2.907, | |
| "step": 9580 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 2.243558748359467e-05, | |
| "loss": 2.9712, | |
| "step": 9590 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 2.2401049941286178e-05, | |
| "loss": 2.945, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 2.236651239897769e-05, | |
| "loss": 2.9733, | |
| "step": 9610 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 2.23319748566692e-05, | |
| "loss": 2.7933, | |
| "step": 9620 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 2.2297437314360712e-05, | |
| "loss": 2.9037, | |
| "step": 9630 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 2.2262899772052222e-05, | |
| "loss": 2.8159, | |
| "step": 9640 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 2.2228362229743733e-05, | |
| "loss": 2.9867, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 2.2193824687435243e-05, | |
| "loss": 2.7537, | |
| "step": 9660 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 2.2159287145126753e-05, | |
| "loss": 2.859, | |
| "step": 9670 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 2.2124749602818264e-05, | |
| "loss": 2.8736, | |
| "step": 9680 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 2.2090212060509774e-05, | |
| "loss": 2.8086, | |
| "step": 9690 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 2.2055674518201284e-05, | |
| "loss": 2.913, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 2.2021136975892798e-05, | |
| "loss": 2.7818, | |
| "step": 9710 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 2.198659943358431e-05, | |
| "loss": 2.8732, | |
| "step": 9720 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 2.195206189127582e-05, | |
| "loss": 2.8173, | |
| "step": 9730 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 2.191752434896733e-05, | |
| "loss": 2.8845, | |
| "step": 9740 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 2.188298680665884e-05, | |
| "loss": 2.8686, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 2.184844926435035e-05, | |
| "loss": 2.9331, | |
| "step": 9760 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 2.181391172204186e-05, | |
| "loss": 2.8525, | |
| "step": 9770 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 2.1779374179733374e-05, | |
| "loss": 2.8304, | |
| "step": 9780 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 2.174483663742488e-05, | |
| "loss": 2.7739, | |
| "step": 9790 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 2.1710299095116394e-05, | |
| "loss": 2.9298, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 2.16757615528079e-05, | |
| "loss": 2.824, | |
| "step": 9810 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 2.1641224010499415e-05, | |
| "loss": 2.7872, | |
| "step": 9820 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 2.1606686468190925e-05, | |
| "loss": 2.7732, | |
| "step": 9830 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 2.1572148925882436e-05, | |
| "loss": 2.7426, | |
| "step": 9840 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 2.1537611383573946e-05, | |
| "loss": 2.82, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 2.1503073841265456e-05, | |
| "loss": 2.7538, | |
| "step": 9860 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 2.1468536298956967e-05, | |
| "loss": 2.7856, | |
| "step": 9870 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 2.1433998756648477e-05, | |
| "loss": 2.7411, | |
| "step": 9880 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 2.1399461214339987e-05, | |
| "loss": 2.7934, | |
| "step": 9890 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 2.13649236720315e-05, | |
| "loss": 2.8427, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 2.133038612972301e-05, | |
| "loss": 2.7685, | |
| "step": 9910 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 2.129584858741452e-05, | |
| "loss": 2.7284, | |
| "step": 9920 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 2.1261311045106032e-05, | |
| "loss": 2.8034, | |
| "step": 9930 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 2.1226773502797542e-05, | |
| "loss": 2.8175, | |
| "step": 9940 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 2.1192235960489052e-05, | |
| "loss": 2.7895, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 2.1157698418180563e-05, | |
| "loss": 2.8621, | |
| "step": 9960 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 2.1123160875872076e-05, | |
| "loss": 2.7962, | |
| "step": 9970 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 2.1088623333563583e-05, | |
| "loss": 2.7863, | |
| "step": 9980 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 2.1054085791255097e-05, | |
| "loss": 2.7884, | |
| "step": 9990 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 2.1019548248946604e-05, | |
| "loss": 2.7566, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 2.0985010706638118e-05, | |
| "loss": 2.8012, | |
| "step": 10010 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 2.0950473164329625e-05, | |
| "loss": 2.8358, | |
| "step": 10020 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 2.091593562202114e-05, | |
| "loss": 2.8367, | |
| "step": 10030 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 2.088139807971265e-05, | |
| "loss": 2.7646, | |
| "step": 10040 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 2.084686053740416e-05, | |
| "loss": 2.8934, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 2.081232299509567e-05, | |
| "loss": 2.8152, | |
| "step": 10060 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 2.077778545278718e-05, | |
| "loss": 2.7449, | |
| "step": 10070 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 2.074324791047869e-05, | |
| "loss": 2.7978, | |
| "step": 10080 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 2.0708710368170204e-05, | |
| "loss": 2.7717, | |
| "step": 10090 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 2.0674172825861714e-05, | |
| "loss": 2.8873, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 2.0639635283553224e-05, | |
| "loss": 2.8055, | |
| "step": 10110 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 2.0605097741244735e-05, | |
| "loss": 2.6969, | |
| "step": 10120 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 2.0570560198936245e-05, | |
| "loss": 2.7373, | |
| "step": 10130 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 2.0536022656627755e-05, | |
| "loss": 2.7297, | |
| "step": 10140 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 2.0501485114319265e-05, | |
| "loss": 2.824, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 2.046694757201078e-05, | |
| "loss": 2.6395, | |
| "step": 10160 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 2.0432410029702286e-05, | |
| "loss": 2.7764, | |
| "step": 10170 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 2.03978724873938e-05, | |
| "loss": 2.7993, | |
| "step": 10180 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 2.0363334945085307e-05, | |
| "loss": 2.7092, | |
| "step": 10190 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 2.032879740277682e-05, | |
| "loss": 2.8668, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 2.0294259860468327e-05, | |
| "loss": 2.7189, | |
| "step": 10210 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 2.025972231815984e-05, | |
| "loss": 2.6979, | |
| "step": 10220 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 2.022518477585135e-05, | |
| "loss": 2.7583, | |
| "step": 10230 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 2.0190647233542862e-05, | |
| "loss": 2.8106, | |
| "step": 10240 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 2.0156109691234372e-05, | |
| "loss": 2.7257, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 2.0121572148925882e-05, | |
| "loss": 2.6996, | |
| "step": 10260 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 2.0087034606617393e-05, | |
| "loss": 2.6771, | |
| "step": 10270 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 2.0052497064308906e-05, | |
| "loss": 2.8394, | |
| "step": 10280 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 2.0017959522000417e-05, | |
| "loss": 2.7741, | |
| "step": 10290 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 1.9983421979691927e-05, | |
| "loss": 2.7705, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 1.9948884437383437e-05, | |
| "loss": 2.7212, | |
| "step": 10310 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 1.9914346895074948e-05, | |
| "loss": 2.773, | |
| "step": 10320 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 1.9879809352766458e-05, | |
| "loss": 2.7534, | |
| "step": 10330 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 1.9845271810457968e-05, | |
| "loss": 2.7153, | |
| "step": 10340 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 1.9810734268149482e-05, | |
| "loss": 2.7342, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 1.977619672584099e-05, | |
| "loss": 2.7553, | |
| "step": 10360 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 1.9741659183532503e-05, | |
| "loss": 2.7939, | |
| "step": 10370 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 1.970712164122401e-05, | |
| "loss": 2.7801, | |
| "step": 10380 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 1.9672584098915523e-05, | |
| "loss": 2.786, | |
| "step": 10390 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 1.963804655660703e-05, | |
| "loss": 2.7158, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 1.9603509014298544e-05, | |
| "loss": 2.7806, | |
| "step": 10410 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 1.9568971471990054e-05, | |
| "loss": 2.7648, | |
| "step": 10420 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 1.9534433929681564e-05, | |
| "loss": 2.8309, | |
| "step": 10430 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 1.9499896387373075e-05, | |
| "loss": 2.6813, | |
| "step": 10440 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 1.9465358845064585e-05, | |
| "loss": 2.6845, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 1.9430821302756095e-05, | |
| "loss": 2.7858, | |
| "step": 10460 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 1.939628376044761e-05, | |
| "loss": 2.8503, | |
| "step": 10470 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 1.936174621813912e-05, | |
| "loss": 2.7545, | |
| "step": 10480 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 1.932720867583063e-05, | |
| "loss": 2.6919, | |
| "step": 10490 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 1.929267113352214e-05, | |
| "loss": 2.8175, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 1.925813359121365e-05, | |
| "loss": 2.8181, | |
| "step": 10510 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 1.922359604890516e-05, | |
| "loss": 2.7865, | |
| "step": 10520 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 1.918905850659667e-05, | |
| "loss": 2.8738, | |
| "step": 10530 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 1.9154520964288185e-05, | |
| "loss": 2.7771, | |
| "step": 10540 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 1.911998342197969e-05, | |
| "loss": 2.6842, | |
| "step": 10550 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 1.9085445879671205e-05, | |
| "loss": 2.7889, | |
| "step": 10560 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 1.9050908337362712e-05, | |
| "loss": 2.7784, | |
| "step": 10570 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 1.9016370795054226e-05, | |
| "loss": 2.746, | |
| "step": 10580 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 1.8981833252745733e-05, | |
| "loss": 2.733, | |
| "step": 10590 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 1.8947295710437247e-05, | |
| "loss": 2.8084, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 1.8912758168128757e-05, | |
| "loss": 2.6748, | |
| "step": 10610 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 1.8878220625820267e-05, | |
| "loss": 2.6682, | |
| "step": 10620 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 1.8843683083511778e-05, | |
| "loss": 2.7535, | |
| "step": 10630 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 1.8809145541203288e-05, | |
| "loss": 2.8174, | |
| "step": 10640 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 1.8774607998894798e-05, | |
| "loss": 2.7326, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 1.874007045658631e-05, | |
| "loss": 2.6306, | |
| "step": 10660 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 1.8705532914277822e-05, | |
| "loss": 2.6619, | |
| "step": 10670 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 1.8670995371969333e-05, | |
| "loss": 2.6543, | |
| "step": 10680 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 1.8636457829660843e-05, | |
| "loss": 2.7638, | |
| "step": 10690 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.8601920287352353e-05, | |
| "loss": 2.7623, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.8567382745043863e-05, | |
| "loss": 2.7837, | |
| "step": 10710 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.8532845202735374e-05, | |
| "loss": 2.694, | |
| "step": 10720 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.5199166098494782, | |
| "eval_loss": 2.258553981781006, | |
| "eval_runtime": 8350.0446, | |
| "eval_samples_per_second": 9.134, | |
| "eval_steps_per_second": 0.286, | |
| "step": 10725 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.8498307660426887e-05, | |
| "loss": 2.7086, | |
| "step": 10730 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.8463770118118394e-05, | |
| "loss": 2.5868, | |
| "step": 10740 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.8429232575809908e-05, | |
| "loss": 2.6158, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 1.8394695033501415e-05, | |
| "loss": 2.5816, | |
| "step": 10760 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 1.836015749119293e-05, | |
| "loss": 2.6675, | |
| "step": 10770 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 1.8325619948884436e-05, | |
| "loss": 2.6007, | |
| "step": 10780 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 1.829108240657595e-05, | |
| "loss": 2.7189, | |
| "step": 10790 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 1.825654486426746e-05, | |
| "loss": 2.6269, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 1.822200732195897e-05, | |
| "loss": 2.6914, | |
| "step": 10810 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 1.818746977965048e-05, | |
| "loss": 2.6807, | |
| "step": 10820 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 1.815293223734199e-05, | |
| "loss": 2.6789, | |
| "step": 10830 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 1.81183946950335e-05, | |
| "loss": 2.6979, | |
| "step": 10840 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 1.808385715272501e-05, | |
| "loss": 2.7043, | |
| "step": 10850 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 1.8049319610416525e-05, | |
| "loss": 2.5784, | |
| "step": 10860 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 1.8014782068108035e-05, | |
| "loss": 2.765, | |
| "step": 10870 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 1.7980244525799546e-05, | |
| "loss": 2.7079, | |
| "step": 10880 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 1.7945706983491056e-05, | |
| "loss": 2.5952, | |
| "step": 10890 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 1.7911169441182566e-05, | |
| "loss": 2.5811, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 1.7876631898874077e-05, | |
| "loss": 2.5979, | |
| "step": 10910 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 1.784209435656559e-05, | |
| "loss": 2.5244, | |
| "step": 10920 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 1.7807556814257097e-05, | |
| "loss": 2.5945, | |
| "step": 10930 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 1.777301927194861e-05, | |
| "loss": 2.6323, | |
| "step": 10940 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 1.7738481729640118e-05, | |
| "loss": 2.7229, | |
| "step": 10950 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 1.770394418733163e-05, | |
| "loss": 2.6336, | |
| "step": 10960 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 1.766940664502314e-05, | |
| "loss": 2.7559, | |
| "step": 10970 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 1.7634869102714652e-05, | |
| "loss": 2.5932, | |
| "step": 10980 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 1.7600331560406162e-05, | |
| "loss": 2.6705, | |
| "step": 10990 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 1.7565794018097673e-05, | |
| "loss": 2.6559, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 1.7531256475789183e-05, | |
| "loss": 2.6464, | |
| "step": 11010 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 1.7496718933480693e-05, | |
| "loss": 2.612, | |
| "step": 11020 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 1.7462181391172207e-05, | |
| "loss": 2.6383, | |
| "step": 11030 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 1.7427643848863714e-05, | |
| "loss": 2.6744, | |
| "step": 11040 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 1.7393106306555228e-05, | |
| "loss": 2.6917, | |
| "step": 11050 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 1.7358568764246738e-05, | |
| "loss": 2.6124, | |
| "step": 11060 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 1.732403122193825e-05, | |
| "loss": 2.7056, | |
| "step": 11070 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 1.728949367962976e-05, | |
| "loss": 2.666, | |
| "step": 11080 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 1.725495613732127e-05, | |
| "loss": 2.5992, | |
| "step": 11090 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 1.722041859501278e-05, | |
| "loss": 2.6046, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 1.718588105270429e-05, | |
| "loss": 2.5876, | |
| "step": 11110 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 1.71513435103958e-05, | |
| "loss": 2.5939, | |
| "step": 11120 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 1.7116805968087314e-05, | |
| "loss": 2.6956, | |
| "step": 11130 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 1.708226842577882e-05, | |
| "loss": 2.6503, | |
| "step": 11140 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 1.7047730883470334e-05, | |
| "loss": 2.606, | |
| "step": 11150 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 1.701319334116184e-05, | |
| "loss": 2.6121, | |
| "step": 11160 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 1.6978655798853355e-05, | |
| "loss": 2.5824, | |
| "step": 11170 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 1.6944118256544865e-05, | |
| "loss": 2.5693, | |
| "step": 11180 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 1.6909580714236376e-05, | |
| "loss": 2.6992, | |
| "step": 11190 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 1.6875043171927886e-05, | |
| "loss": 2.6964, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 1.6840505629619396e-05, | |
| "loss": 2.5641, | |
| "step": 11210 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 1.680596808731091e-05, | |
| "loss": 2.687, | |
| "step": 11220 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 1.6771430545002417e-05, | |
| "loss": 2.7424, | |
| "step": 11230 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 1.673689300269393e-05, | |
| "loss": 2.5703, | |
| "step": 11240 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 1.670235546038544e-05, | |
| "loss": 2.6801, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 1.666781791807695e-05, | |
| "loss": 2.6199, | |
| "step": 11260 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 1.663328037576846e-05, | |
| "loss": 2.5185, | |
| "step": 11270 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 1.6598742833459972e-05, | |
| "loss": 2.6805, | |
| "step": 11280 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 1.6564205291151482e-05, | |
| "loss": 2.6054, | |
| "step": 11290 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 1.6529667748842992e-05, | |
| "loss": 2.5373, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 1.6495130206534503e-05, | |
| "loss": 2.6074, | |
| "step": 11310 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 1.6460592664226016e-05, | |
| "loss": 2.5797, | |
| "step": 11320 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 1.6426055121917523e-05, | |
| "loss": 2.6493, | |
| "step": 11330 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 1.6391517579609037e-05, | |
| "loss": 2.5966, | |
| "step": 11340 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 1.6356980037300544e-05, | |
| "loss": 2.6321, | |
| "step": 11350 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 1.6322442494992058e-05, | |
| "loss": 2.6245, | |
| "step": 11360 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 1.6287904952683568e-05, | |
| "loss": 2.6626, | |
| "step": 11370 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 1.6253367410375078e-05, | |
| "loss": 2.6061, | |
| "step": 11380 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 1.621882986806659e-05, | |
| "loss": 2.5854, | |
| "step": 11390 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 1.61842923257581e-05, | |
| "loss": 2.6, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 1.6149754783449613e-05, | |
| "loss": 2.5424, | |
| "step": 11410 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 1.611521724114112e-05, | |
| "loss": 2.6849, | |
| "step": 11420 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 1.6080679698832633e-05, | |
| "loss": 2.5638, | |
| "step": 11430 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 1.6046142156524144e-05, | |
| "loss": 2.6181, | |
| "step": 11440 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 1.6011604614215654e-05, | |
| "loss": 2.5725, | |
| "step": 11450 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 1.5977067071907164e-05, | |
| "loss": 2.5491, | |
| "step": 11460 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 1.5942529529598675e-05, | |
| "loss": 2.6698, | |
| "step": 11470 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 1.5907991987290185e-05, | |
| "loss": 2.6591, | |
| "step": 11480 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 1.5873454444981695e-05, | |
| "loss": 2.669, | |
| "step": 11490 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 1.5838916902673205e-05, | |
| "loss": 2.6449, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 1.580437936036472e-05, | |
| "loss": 2.6878, | |
| "step": 11510 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 1.5769841818056226e-05, | |
| "loss": 2.5862, | |
| "step": 11520 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 1.573530427574774e-05, | |
| "loss": 2.6372, | |
| "step": 11530 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 1.5700766733439247e-05, | |
| "loss": 2.5786, | |
| "step": 11540 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 1.566622919113076e-05, | |
| "loss": 2.525, | |
| "step": 11550 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 1.563169164882227e-05, | |
| "loss": 2.5454, | |
| "step": 11560 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 1.559715410651378e-05, | |
| "loss": 2.5995, | |
| "step": 11570 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 1.556261656420529e-05, | |
| "loss": 2.5934, | |
| "step": 11580 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 1.5528079021896802e-05, | |
| "loss": 2.5854, | |
| "step": 11590 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 1.5493541479588315e-05, | |
| "loss": 2.4892, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 1.5459003937279822e-05, | |
| "loss": 2.5199, | |
| "step": 11610 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 1.5424466394971336e-05, | |
| "loss": 2.6195, | |
| "step": 11620 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 1.5389928852662846e-05, | |
| "loss": 2.5539, | |
| "step": 11630 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 1.5355391310354357e-05, | |
| "loss": 2.4715, | |
| "step": 11640 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 1.5320853768045867e-05, | |
| "loss": 2.636, | |
| "step": 11650 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 1.5286316225737377e-05, | |
| "loss": 2.6113, | |
| "step": 11660 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 1.5251778683428888e-05, | |
| "loss": 2.6125, | |
| "step": 11670 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 1.5217241141120398e-05, | |
| "loss": 2.6328, | |
| "step": 11680 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 1.518270359881191e-05, | |
| "loss": 2.6832, | |
| "step": 11690 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 1.5148166056503422e-05, | |
| "loss": 2.6303, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 1.511362851419493e-05, | |
| "loss": 2.5381, | |
| "step": 11710 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 1.5079090971886443e-05, | |
| "loss": 2.5419, | |
| "step": 11720 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 1.5044553429577951e-05, | |
| "loss": 2.5489, | |
| "step": 11730 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 1.5010015887269463e-05, | |
| "loss": 2.5821, | |
| "step": 11740 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 1.4975478344960972e-05, | |
| "loss": 2.5817, | |
| "step": 11750 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 1.4940940802652484e-05, | |
| "loss": 2.5954, | |
| "step": 11760 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 1.4906403260343996e-05, | |
| "loss": 2.5759, | |
| "step": 11770 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 1.4871865718035504e-05, | |
| "loss": 2.4825, | |
| "step": 11780 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 1.4837328175727016e-05, | |
| "loss": 2.6348, | |
| "step": 11790 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 1.4802790633418525e-05, | |
| "loss": 2.5029, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 1.4768253091110037e-05, | |
| "loss": 2.5852, | |
| "step": 11810 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 1.4733715548801549e-05, | |
| "loss": 2.5978, | |
| "step": 11820 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 1.4699178006493058e-05, | |
| "loss": 2.5382, | |
| "step": 11830 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 1.466464046418457e-05, | |
| "loss": 2.5815, | |
| "step": 11840 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 1.463010292187608e-05, | |
| "loss": 2.5992, | |
| "step": 11850 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 1.459556537956759e-05, | |
| "loss": 2.5691, | |
| "step": 11860 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 1.45610278372591e-05, | |
| "loss": 2.6351, | |
| "step": 11870 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 1.4526490294950613e-05, | |
| "loss": 2.572, | |
| "step": 11880 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 1.4491952752642125e-05, | |
| "loss": 2.5904, | |
| "step": 11890 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 1.4457415210333633e-05, | |
| "loss": 2.5471, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 1.4422877668025145e-05, | |
| "loss": 2.4937, | |
| "step": 11910 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 1.4388340125716654e-05, | |
| "loss": 2.4822, | |
| "step": 11920 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 1.4353802583408166e-05, | |
| "loss": 2.5416, | |
| "step": 11930 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 1.4319265041099675e-05, | |
| "loss": 2.4873, | |
| "step": 11940 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 1.4284727498791187e-05, | |
| "loss": 2.4911, | |
| "step": 11950 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 1.4250189956482699e-05, | |
| "loss": 2.4459, | |
| "step": 11960 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 1.4215652414174207e-05, | |
| "loss": 2.499, | |
| "step": 11970 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 1.418111487186572e-05, | |
| "loss": 2.4586, | |
| "step": 11980 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 1.4146577329557228e-05, | |
| "loss": 2.5339, | |
| "step": 11990 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 1.411203978724874e-05, | |
| "loss": 2.5919, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 1.407750224494025e-05, | |
| "loss": 2.5695, | |
| "step": 12010 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 1.404296470263176e-05, | |
| "loss": 2.5624, | |
| "step": 12020 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 1.4008427160323273e-05, | |
| "loss": 2.5149, | |
| "step": 12030 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 1.3973889618014783e-05, | |
| "loss": 2.6278, | |
| "step": 12040 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 1.3939352075706293e-05, | |
| "loss": 2.539, | |
| "step": 12050 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 1.3904814533397803e-05, | |
| "loss": 2.4993, | |
| "step": 12060 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 1.3870276991089315e-05, | |
| "loss": 2.576, | |
| "step": 12070 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 1.3835739448780827e-05, | |
| "loss": 2.5318, | |
| "step": 12080 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 1.3801201906472336e-05, | |
| "loss": 2.5886, | |
| "step": 12090 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 1.3766664364163848e-05, | |
| "loss": 2.4935, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 1.3732126821855357e-05, | |
| "loss": 2.4726, | |
| "step": 12110 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 1.3697589279546869e-05, | |
| "loss": 2.5471, | |
| "step": 12120 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 1.3663051737238377e-05, | |
| "loss": 2.5019, | |
| "step": 12130 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 1.362851419492989e-05, | |
| "loss": 2.5326, | |
| "step": 12140 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 1.3593976652621401e-05, | |
| "loss": 2.5554, | |
| "step": 12150 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 1.355943911031291e-05, | |
| "loss": 2.5263, | |
| "step": 12160 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 1.3524901568004422e-05, | |
| "loss": 2.4685, | |
| "step": 12170 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 1.349036402569593e-05, | |
| "loss": 2.5834, | |
| "step": 12180 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 1.3455826483387443e-05, | |
| "loss": 2.4834, | |
| "step": 12190 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 1.3421288941078953e-05, | |
| "loss": 2.5581, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 1.3386751398770463e-05, | |
| "loss": 2.4237, | |
| "step": 12210 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 1.3352213856461975e-05, | |
| "loss": 2.5372, | |
| "step": 12220 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 1.3317676314153486e-05, | |
| "loss": 2.5476, | |
| "step": 12230 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 1.3283138771844998e-05, | |
| "loss": 2.5046, | |
| "step": 12240 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 1.3248601229536506e-05, | |
| "loss": 2.5236, | |
| "step": 12250 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 1.3214063687228018e-05, | |
| "loss": 2.4282, | |
| "step": 12260 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 1.317952614491953e-05, | |
| "loss": 2.5041, | |
| "step": 12270 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 1.3144988602611039e-05, | |
| "loss": 2.5305, | |
| "step": 12280 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 1.3110451060302551e-05, | |
| "loss": 2.5322, | |
| "step": 12290 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 1.307591351799406e-05, | |
| "loss": 2.443, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 1.3041375975685571e-05, | |
| "loss": 2.5025, | |
| "step": 12310 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 1.300683843337708e-05, | |
| "loss": 2.6194, | |
| "step": 12320 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 1.2972300891068592e-05, | |
| "loss": 2.5422, | |
| "step": 12330 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 1.2937763348760104e-05, | |
| "loss": 2.487, | |
| "step": 12340 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 1.2903225806451613e-05, | |
| "loss": 2.5176, | |
| "step": 12350 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 1.2868688264143125e-05, | |
| "loss": 2.4831, | |
| "step": 12360 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 1.2834150721834633e-05, | |
| "loss": 2.4596, | |
| "step": 12370 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 1.2799613179526145e-05, | |
| "loss": 2.5229, | |
| "step": 12380 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 1.2765075637217656e-05, | |
| "loss": 2.4673, | |
| "step": 12390 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 1.2730538094909166e-05, | |
| "loss": 2.4803, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 1.2696000552600678e-05, | |
| "loss": 2.4534, | |
| "step": 12410 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 1.2661463010292188e-05, | |
| "loss": 2.4959, | |
| "step": 12420 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 1.26269254679837e-05, | |
| "loss": 2.523, | |
| "step": 12430 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 1.2592387925675209e-05, | |
| "loss": 2.5406, | |
| "step": 12440 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 1.2557850383366721e-05, | |
| "loss": 2.4748, | |
| "step": 12450 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 1.252331284105823e-05, | |
| "loss": 2.5003, | |
| "step": 12460 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 1.2488775298749742e-05, | |
| "loss": 2.5308, | |
| "step": 12470 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 1.2454237756441252e-05, | |
| "loss": 2.5162, | |
| "step": 12480 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 1.2419700214132762e-05, | |
| "loss": 2.4429, | |
| "step": 12490 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 1.2385162671824274e-05, | |
| "loss": 2.5054, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 1.2350625129515785e-05, | |
| "loss": 2.427, | |
| "step": 12510 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 1.2316087587207295e-05, | |
| "loss": 2.4848, | |
| "step": 12520 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 1.2281550044898805e-05, | |
| "loss": 2.4365, | |
| "step": 12530 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 1.2247012502590316e-05, | |
| "loss": 2.5547, | |
| "step": 12540 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 1.2212474960281826e-05, | |
| "loss": 2.5358, | |
| "step": 12550 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 1.2177937417973338e-05, | |
| "loss": 2.4882, | |
| "step": 12560 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 1.2143399875664848e-05, | |
| "loss": 2.5398, | |
| "step": 12570 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 1.2108862333356358e-05, | |
| "loss": 2.426, | |
| "step": 12580 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 1.2074324791047869e-05, | |
| "loss": 2.5536, | |
| "step": 12590 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 1.203978724873938e-05, | |
| "loss": 2.4513, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 1.2005249706430891e-05, | |
| "loss": 2.5256, | |
| "step": 12610 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 1.1970712164122401e-05, | |
| "loss": 2.5244, | |
| "step": 12620 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 1.1936174621813913e-05, | |
| "loss": 2.5293, | |
| "step": 12630 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 1.1901637079505424e-05, | |
| "loss": 2.4588, | |
| "step": 12640 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 1.1867099537196934e-05, | |
| "loss": 2.4325, | |
| "step": 12650 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 1.1832561994888444e-05, | |
| "loss": 2.4101, | |
| "step": 12660 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 1.1798024452579955e-05, | |
| "loss": 2.4884, | |
| "step": 12670 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 1.1763486910271465e-05, | |
| "loss": 2.5393, | |
| "step": 12680 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 1.1728949367962977e-05, | |
| "loss": 2.527, | |
| "step": 12690 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 1.1694411825654487e-05, | |
| "loss": 2.5476, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 1.1659874283345998e-05, | |
| "loss": 2.5215, | |
| "step": 12710 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 1.1625336741037508e-05, | |
| "loss": 2.4547, | |
| "step": 12720 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 1.1590799198729018e-05, | |
| "loss": 2.3536, | |
| "step": 12730 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 1.1556261656420529e-05, | |
| "loss": 2.4102, | |
| "step": 12740 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 1.1521724114112039e-05, | |
| "loss": 2.5389, | |
| "step": 12750 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 1.1487186571803551e-05, | |
| "loss": 2.4363, | |
| "step": 12760 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 1.1452649029495061e-05, | |
| "loss": 2.5441, | |
| "step": 12770 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 1.1418111487186573e-05, | |
| "loss": 2.4774, | |
| "step": 12780 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 1.1383573944878084e-05, | |
| "loss": 2.4745, | |
| "step": 12790 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 1.1349036402569594e-05, | |
| "loss": 2.4728, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 1.1314498860261104e-05, | |
| "loss": 2.5576, | |
| "step": 12810 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 1.1279961317952616e-05, | |
| "loss": 2.5435, | |
| "step": 12820 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 1.1245423775644127e-05, | |
| "loss": 2.4142, | |
| "step": 12830 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 1.1210886233335637e-05, | |
| "loss": 2.4979, | |
| "step": 12840 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 1.1176348691027147e-05, | |
| "loss": 2.582, | |
| "step": 12850 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 1.1141811148718657e-05, | |
| "loss": 2.5088, | |
| "step": 12860 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 1.1107273606410168e-05, | |
| "loss": 2.4736, | |
| "step": 12870 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 1.107273606410168e-05, | |
| "loss": 2.5025, | |
| "step": 12880 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 1.103819852179319e-05, | |
| "loss": 2.3659, | |
| "step": 12890 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 1.10036609794847e-05, | |
| "loss": 2.4489, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 1.096912343717621e-05, | |
| "loss": 2.4468, | |
| "step": 12910 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 1.0934585894867721e-05, | |
| "loss": 2.4301, | |
| "step": 12920 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 1.0900048352559231e-05, | |
| "loss": 2.5407, | |
| "step": 12930 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 1.0865510810250742e-05, | |
| "loss": 2.5414, | |
| "step": 12940 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 1.0830973267942254e-05, | |
| "loss": 2.5377, | |
| "step": 12950 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 1.0796435725633764e-05, | |
| "loss": 2.3887, | |
| "step": 12960 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 1.0761898183325276e-05, | |
| "loss": 2.4716, | |
| "step": 12970 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 1.0727360641016786e-05, | |
| "loss": 2.3875, | |
| "step": 12980 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 1.0692823098708297e-05, | |
| "loss": 2.4645, | |
| "step": 12990 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 1.0658285556399807e-05, | |
| "loss": 2.4588, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 1.0623748014091319e-05, | |
| "loss": 2.4394, | |
| "step": 13010 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 1.058921047178283e-05, | |
| "loss": 2.4167, | |
| "step": 13020 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 1.055467292947434e-05, | |
| "loss": 2.4317, | |
| "step": 13030 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 1.052013538716585e-05, | |
| "loss": 2.46, | |
| "step": 13040 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 1.048559784485736e-05, | |
| "loss": 2.5175, | |
| "step": 13050 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 1.045106030254887e-05, | |
| "loss": 2.3986, | |
| "step": 13060 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 1.0416522760240381e-05, | |
| "loss": 2.4969, | |
| "step": 13070 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 1.0381985217931893e-05, | |
| "loss": 2.3781, | |
| "step": 13080 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 1.0347447675623403e-05, | |
| "loss": 2.4569, | |
| "step": 13090 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 1.0312910133314914e-05, | |
| "loss": 2.3831, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 1.0278372591006424e-05, | |
| "loss": 2.4431, | |
| "step": 13110 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 1.0243835048697934e-05, | |
| "loss": 2.5117, | |
| "step": 13120 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 1.0209297506389444e-05, | |
| "loss": 2.5629, | |
| "step": 13130 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 1.0174759964080956e-05, | |
| "loss": 2.437, | |
| "step": 13140 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 1.0140222421772467e-05, | |
| "loss": 2.5089, | |
| "step": 13150 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 1.0105684879463979e-05, | |
| "loss": 2.5891, | |
| "step": 13160 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 1.0071147337155489e-05, | |
| "loss": 2.4592, | |
| "step": 13170 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 1.0036609794847e-05, | |
| "loss": 2.4947, | |
| "step": 13180 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 1.000207225253851e-05, | |
| "loss": 2.3656, | |
| "step": 13190 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 9.96753471023002e-06, | |
| "loss": 2.3021, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 9.932997167921532e-06, | |
| "loss": 2.5248, | |
| "step": 13210 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 9.898459625613042e-06, | |
| "loss": 2.3746, | |
| "step": 13220 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 9.863922083304553e-06, | |
| "loss": 2.4837, | |
| "step": 13230 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 9.829384540996063e-06, | |
| "loss": 2.3482, | |
| "step": 13240 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 9.794846998687573e-06, | |
| "loss": 2.3631, | |
| "step": 13250 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 9.760309456379084e-06, | |
| "loss": 2.3923, | |
| "step": 13260 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 9.725771914070596e-06, | |
| "loss": 2.465, | |
| "step": 13270 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 9.691234371762106e-06, | |
| "loss": 2.5215, | |
| "step": 13280 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 9.656696829453616e-06, | |
| "loss": 2.4189, | |
| "step": 13290 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 9.622159287145127e-06, | |
| "loss": 2.4592, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 9.587621744836637e-06, | |
| "loss": 2.5317, | |
| "step": 13310 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 9.553084202528149e-06, | |
| "loss": 2.5719, | |
| "step": 13320 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 9.51854666021966e-06, | |
| "loss": 2.4286, | |
| "step": 13330 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 9.48400911791117e-06, | |
| "loss": 2.3491, | |
| "step": 13340 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 9.449471575602682e-06, | |
| "loss": 2.5172, | |
| "step": 13350 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 9.414934033294192e-06, | |
| "loss": 2.4466, | |
| "step": 13360 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 9.380396490985702e-06, | |
| "loss": 2.4197, | |
| "step": 13370 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 9.345858948677212e-06, | |
| "loss": 2.5182, | |
| "step": 13380 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 9.311321406368723e-06, | |
| "loss": 2.5291, | |
| "step": 13390 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 9.276783864060235e-06, | |
| "loss": 2.5132, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 9.242246321751745e-06, | |
| "loss": 2.4365, | |
| "step": 13410 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 9.207708779443255e-06, | |
| "loss": 2.5625, | |
| "step": 13420 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 9.173171237134766e-06, | |
| "loss": 2.4663, | |
| "step": 13430 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 9.138633694826276e-06, | |
| "loss": 2.5276, | |
| "step": 13440 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 9.104096152517786e-06, | |
| "loss": 2.4061, | |
| "step": 13450 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 9.069558610209298e-06, | |
| "loss": 2.4691, | |
| "step": 13460 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 9.035021067900809e-06, | |
| "loss": 2.4038, | |
| "step": 13470 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 9.000483525592319e-06, | |
| "loss": 2.4281, | |
| "step": 13480 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 8.96594598328383e-06, | |
| "loss": 2.4752, | |
| "step": 13490 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 8.93140844097534e-06, | |
| "loss": 2.3682, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 8.896870898666852e-06, | |
| "loss": 2.4398, | |
| "step": 13510 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 8.862333356358362e-06, | |
| "loss": 2.4714, | |
| "step": 13520 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 8.827795814049874e-06, | |
| "loss": 2.4158, | |
| "step": 13530 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 8.793258271741384e-06, | |
| "loss": 2.4668, | |
| "step": 13540 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 8.758720729432895e-06, | |
| "loss": 2.3691, | |
| "step": 13550 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 8.724183187124405e-06, | |
| "loss": 2.3835, | |
| "step": 13560 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 8.689645644815915e-06, | |
| "loss": 2.4404, | |
| "step": 13570 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 8.655108102507426e-06, | |
| "loss": 2.4065, | |
| "step": 13580 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 8.620570560198938e-06, | |
| "loss": 2.4134, | |
| "step": 13590 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 8.586033017890448e-06, | |
| "loss": 2.3334, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 8.551495475581958e-06, | |
| "loss": 2.4672, | |
| "step": 13610 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 8.516957933273469e-06, | |
| "loss": 2.3859, | |
| "step": 13620 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 8.482420390964979e-06, | |
| "loss": 2.4169, | |
| "step": 13630 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 8.44788284865649e-06, | |
| "loss": 2.4103, | |
| "step": 13640 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 8.413345306348e-06, | |
| "loss": 2.492, | |
| "step": 13650 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 8.378807764039511e-06, | |
| "loss": 2.4176, | |
| "step": 13660 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 8.344270221731022e-06, | |
| "loss": 2.4864, | |
| "step": 13670 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 8.309732679422532e-06, | |
| "loss": 2.412, | |
| "step": 13680 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 8.275195137114042e-06, | |
| "loss": 2.2933, | |
| "step": 13690 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 8.240657594805554e-06, | |
| "loss": 2.3668, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 8.206120052497065e-06, | |
| "loss": 2.4555, | |
| "step": 13710 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 8.171582510188577e-06, | |
| "loss": 2.3584, | |
| "step": 13720 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 8.137044967880087e-06, | |
| "loss": 2.3607, | |
| "step": 13730 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 8.102507425571597e-06, | |
| "loss": 2.4085, | |
| "step": 13740 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 8.067969883263108e-06, | |
| "loss": 2.3381, | |
| "step": 13750 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 8.033432340954618e-06, | |
| "loss": 2.4618, | |
| "step": 13760 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 7.998894798646128e-06, | |
| "loss": 2.3399, | |
| "step": 13770 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 7.96435725633764e-06, | |
| "loss": 2.4523, | |
| "step": 13780 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 7.92981971402915e-06, | |
| "loss": 2.387, | |
| "step": 13790 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 7.895282171720661e-06, | |
| "loss": 2.3244, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 7.860744629412171e-06, | |
| "loss": 2.4542, | |
| "step": 13810 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 7.826207087103682e-06, | |
| "loss": 2.4163, | |
| "step": 13820 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 7.791669544795192e-06, | |
| "loss": 2.4133, | |
| "step": 13830 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 7.757132002486702e-06, | |
| "loss": 2.5119, | |
| "step": 13840 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 7.722594460178214e-06, | |
| "loss": 2.398, | |
| "step": 13850 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 7.688056917869725e-06, | |
| "loss": 2.3494, | |
| "step": 13860 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 7.653519375561235e-06, | |
| "loss": 2.3333, | |
| "step": 13870 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 7.618981833252746e-06, | |
| "loss": 2.3259, | |
| "step": 13880 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 7.584444290944256e-06, | |
| "loss": 2.3831, | |
| "step": 13890 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 7.549906748635767e-06, | |
| "loss": 2.3466, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 7.515369206327279e-06, | |
| "loss": 2.3265, | |
| "step": 13910 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 7.480831664018789e-06, | |
| "loss": 2.5178, | |
| "step": 13920 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 7.446294121710299e-06, | |
| "loss": 2.434, | |
| "step": 13930 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 7.4117565794018105e-06, | |
| "loss": 2.373, | |
| "step": 13940 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 7.377219037093321e-06, | |
| "loss": 2.4323, | |
| "step": 13950 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 7.342681494784831e-06, | |
| "loss": 2.4116, | |
| "step": 13960 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 7.308143952476341e-06, | |
| "loss": 2.4483, | |
| "step": 13970 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 7.273606410167853e-06, | |
| "loss": 2.316, | |
| "step": 13980 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 7.239068867859364e-06, | |
| "loss": 2.4437, | |
| "step": 13990 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 7.204531325550874e-06, | |
| "loss": 2.2788, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 7.169993783242384e-06, | |
| "loss": 2.3947, | |
| "step": 14010 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 7.1354562409338955e-06, | |
| "loss": 2.3904, | |
| "step": 14020 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 7.100918698625406e-06, | |
| "loss": 2.3955, | |
| "step": 14030 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 7.066381156316918e-06, | |
| "loss": 2.3703, | |
| "step": 14040 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 7.031843614008428e-06, | |
| "loss": 2.3459, | |
| "step": 14050 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 6.9973060716999385e-06, | |
| "loss": 2.3317, | |
| "step": 14060 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 6.962768529391449e-06, | |
| "loss": 2.4926, | |
| "step": 14070 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 6.928230987082959e-06, | |
| "loss": 2.3021, | |
| "step": 14080 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 6.8936934447744694e-06, | |
| "loss": 2.4277, | |
| "step": 14090 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 6.85915590246598e-06, | |
| "loss": 2.4472, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 6.824618360157492e-06, | |
| "loss": 2.3815, | |
| "step": 14110 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 6.790080817849002e-06, | |
| "loss": 2.4714, | |
| "step": 14120 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 6.755543275540513e-06, | |
| "loss": 2.3671, | |
| "step": 14130 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 6.7210057332320235e-06, | |
| "loss": 2.3541, | |
| "step": 14140 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 6.686468190923534e-06, | |
| "loss": 2.3399, | |
| "step": 14150 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 6.651930648615044e-06, | |
| "loss": 2.4081, | |
| "step": 14160 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 6.617393106306556e-06, | |
| "loss": 2.4018, | |
| "step": 14170 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 6.5828555639980665e-06, | |
| "loss": 2.4288, | |
| "step": 14180 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 6.548318021689577e-06, | |
| "loss": 2.3876, | |
| "step": 14190 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 6.513780479381087e-06, | |
| "loss": 2.3145, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 6.479242937072598e-06, | |
| "loss": 2.4065, | |
| "step": 14210 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 6.444705394764109e-06, | |
| "loss": 2.4055, | |
| "step": 14220 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 6.410167852455621e-06, | |
| "loss": 2.3118, | |
| "step": 14230 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 6.375630310147131e-06, | |
| "loss": 2.36, | |
| "step": 14240 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 6.341092767838641e-06, | |
| "loss": 2.3578, | |
| "step": 14250 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 6.3065552255301516e-06, | |
| "loss": 2.3756, | |
| "step": 14260 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 6.272017683221662e-06, | |
| "loss": 2.4196, | |
| "step": 14270 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 6.237480140913173e-06, | |
| "loss": 2.4409, | |
| "step": 14280 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 6.202942598604683e-06, | |
| "loss": 2.3316, | |
| "step": 14290 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 6.1684050562961945e-06, | |
| "loss": 2.306, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 6.133867513987706e-06, | |
| "loss": 2.3856, | |
| "step": 14310 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 6.099329971679216e-06, | |
| "loss": 2.3944, | |
| "step": 14320 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 6.064792429370726e-06, | |
| "loss": 2.4568, | |
| "step": 14330 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 6.030254887062237e-06, | |
| "loss": 2.3327, | |
| "step": 14340 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 5.995717344753748e-06, | |
| "loss": 2.3498, | |
| "step": 14350 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 5.961179802445258e-06, | |
| "loss": 2.4181, | |
| "step": 14360 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 5.926642260136768e-06, | |
| "loss": 2.398, | |
| "step": 14370 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 5.89210471782828e-06, | |
| "loss": 2.4317, | |
| "step": 14380 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 5.85756717551979e-06, | |
| "loss": 2.4137, | |
| "step": 14390 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 5.823029633211301e-06, | |
| "loss": 2.4382, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 5.788492090902812e-06, | |
| "loss": 2.3696, | |
| "step": 14410 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 5.7539545485943225e-06, | |
| "loss": 2.4141, | |
| "step": 14420 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 5.719417006285833e-06, | |
| "loss": 2.3535, | |
| "step": 14430 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 5.684879463977344e-06, | |
| "loss": 2.408, | |
| "step": 14440 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 5.650341921668854e-06, | |
| "loss": 2.354, | |
| "step": 14450 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 5.615804379360365e-06, | |
| "loss": 2.4954, | |
| "step": 14460 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 5.581266837051876e-06, | |
| "loss": 2.4316, | |
| "step": 14470 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 5.546729294743386e-06, | |
| "loss": 2.3022, | |
| "step": 14480 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 5.512191752434897e-06, | |
| "loss": 2.2843, | |
| "step": 14490 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 5.477654210126408e-06, | |
| "loss": 2.4304, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 5.443116667817919e-06, | |
| "loss": 2.2816, | |
| "step": 14510 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 5.408579125509429e-06, | |
| "loss": 2.3768, | |
| "step": 14520 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 5.374041583200939e-06, | |
| "loss": 2.4476, | |
| "step": 14530 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 5.3395040408924506e-06, | |
| "loss": 2.439, | |
| "step": 14540 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 5.304966498583961e-06, | |
| "loss": 2.3349, | |
| "step": 14550 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 5.270428956275471e-06, | |
| "loss": 2.542, | |
| "step": 14560 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 5.235891413966982e-06, | |
| "loss": 2.3421, | |
| "step": 14570 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 5.201353871658493e-06, | |
| "loss": 2.3747, | |
| "step": 14580 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 5.166816329350004e-06, | |
| "loss": 2.3729, | |
| "step": 14590 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 5.132278787041515e-06, | |
| "loss": 2.3492, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 5.097741244733025e-06, | |
| "loss": 2.2954, | |
| "step": 14610 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 5.063203702424536e-06, | |
| "loss": 2.3548, | |
| "step": 14620 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 5.028666160116046e-06, | |
| "loss": 2.2816, | |
| "step": 14630 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 4.994128617807557e-06, | |
| "loss": 2.4028, | |
| "step": 14640 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 4.959591075499067e-06, | |
| "loss": 2.3788, | |
| "step": 14650 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 4.925053533190578e-06, | |
| "loss": 2.4394, | |
| "step": 14660 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 4.890515990882089e-06, | |
| "loss": 2.2951, | |
| "step": 14670 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 4.8559784485736e-06, | |
| "loss": 2.2777, | |
| "step": 14680 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 4.82144090626511e-06, | |
| "loss": 2.3185, | |
| "step": 14690 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 4.7869033639566215e-06, | |
| "loss": 2.4007, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 4.752365821648132e-06, | |
| "loss": 2.3751, | |
| "step": 14710 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 4.717828279339642e-06, | |
| "loss": 2.3223, | |
| "step": 14720 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 4.683290737031153e-06, | |
| "loss": 2.3298, | |
| "step": 14730 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 4.648753194722664e-06, | |
| "loss": 2.3654, | |
| "step": 14740 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 4.614215652414174e-06, | |
| "loss": 2.4975, | |
| "step": 14750 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 4.579678110105685e-06, | |
| "loss": 2.4081, | |
| "step": 14760 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 4.545140567797196e-06, | |
| "loss": 2.3517, | |
| "step": 14770 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 4.510603025488707e-06, | |
| "loss": 2.4114, | |
| "step": 14780 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 4.476065483180217e-06, | |
| "loss": 2.2955, | |
| "step": 14790 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 4.441527940871728e-06, | |
| "loss": 2.2924, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 4.406990398563238e-06, | |
| "loss": 2.3289, | |
| "step": 14810 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 4.372452856254749e-06, | |
| "loss": 2.4274, | |
| "step": 14820 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 4.33791531394626e-06, | |
| "loss": 2.2682, | |
| "step": 14830 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 4.30337777163777e-06, | |
| "loss": 2.3307, | |
| "step": 14840 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 4.2688402293292805e-06, | |
| "loss": 2.3643, | |
| "step": 14850 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 4.234302687020792e-06, | |
| "loss": 2.4233, | |
| "step": 14860 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 4.199765144712303e-06, | |
| "loss": 2.4175, | |
| "step": 14870 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 4.165227602403813e-06, | |
| "loss": 2.3321, | |
| "step": 14880 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 4.130690060095324e-06, | |
| "loss": 2.4373, | |
| "step": 14890 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 4.096152517786835e-06, | |
| "loss": 2.3562, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 4.061614975478345e-06, | |
| "loss": 2.3348, | |
| "step": 14910 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 4.027077433169856e-06, | |
| "loss": 2.3721, | |
| "step": 14920 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 3.992539890861366e-06, | |
| "loss": 2.3379, | |
| "step": 14930 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 3.958002348552877e-06, | |
| "loss": 2.3698, | |
| "step": 14940 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 3.923464806244388e-06, | |
| "loss": 2.4562, | |
| "step": 14950 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 3.888927263935899e-06, | |
| "loss": 2.4839, | |
| "step": 14960 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 3.854389721627409e-06, | |
| "loss": 2.3623, | |
| "step": 14970 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 3.81985217931892e-06, | |
| "loss": 2.3421, | |
| "step": 14980 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 3.785314637010431e-06, | |
| "loss": 2.3138, | |
| "step": 14990 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 3.750777094701941e-06, | |
| "loss": 2.3843, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 3.7162395523934515e-06, | |
| "loss": 2.3591, | |
| "step": 15010 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 3.6817020100849626e-06, | |
| "loss": 2.3636, | |
| "step": 15020 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 3.6471644677764734e-06, | |
| "loss": 2.4267, | |
| "step": 15030 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 3.6126269254679837e-06, | |
| "loss": 2.373, | |
| "step": 15040 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 3.578089383159495e-06, | |
| "loss": 2.5028, | |
| "step": 15050 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 3.543551840851005e-06, | |
| "loss": 2.3799, | |
| "step": 15060 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 3.509014298542516e-06, | |
| "loss": 2.4805, | |
| "step": 15070 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 3.474476756234026e-06, | |
| "loss": 2.2899, | |
| "step": 15080 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 3.4399392139255374e-06, | |
| "loss": 2.3221, | |
| "step": 15090 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 3.4054016716170477e-06, | |
| "loss": 2.3465, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 3.3708641293085584e-06, | |
| "loss": 2.3942, | |
| "step": 15110 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 3.3363265870000696e-06, | |
| "loss": 2.4674, | |
| "step": 15120 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 3.30178904469158e-06, | |
| "loss": 2.4301, | |
| "step": 15130 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 3.2672515023830902e-06, | |
| "loss": 2.4088, | |
| "step": 15140 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 3.2327139600746014e-06, | |
| "loss": 2.3602, | |
| "step": 15150 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 3.198176417766112e-06, | |
| "loss": 2.3013, | |
| "step": 15160 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 3.1636388754576224e-06, | |
| "loss": 2.3958, | |
| "step": 15170 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 3.1291013331491336e-06, | |
| "loss": 2.3123, | |
| "step": 15180 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 3.094563790840644e-06, | |
| "loss": 2.3162, | |
| "step": 15190 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 3.0600262485321547e-06, | |
| "loss": 2.3614, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 3.0254887062236654e-06, | |
| "loss": 2.3247, | |
| "step": 15210 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 2.990951163915176e-06, | |
| "loss": 2.3145, | |
| "step": 15220 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 2.9564136216066864e-06, | |
| "loss": 2.3681, | |
| "step": 15230 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 2.921876079298197e-06, | |
| "loss": 2.2981, | |
| "step": 15240 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 2.887338536989708e-06, | |
| "loss": 2.346, | |
| "step": 15250 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 2.8528009946812187e-06, | |
| "loss": 2.4164, | |
| "step": 15260 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 2.8182634523727294e-06, | |
| "loss": 2.4174, | |
| "step": 15270 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 2.7837259100642397e-06, | |
| "loss": 2.3599, | |
| "step": 15280 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 2.7491883677557505e-06, | |
| "loss": 2.278, | |
| "step": 15290 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 2.7146508254472616e-06, | |
| "loss": 2.3583, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 2.680113283138772e-06, | |
| "loss": 2.4094, | |
| "step": 15310 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 2.6455757408302827e-06, | |
| "loss": 2.4016, | |
| "step": 15320 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 2.6110381985217934e-06, | |
| "loss": 2.3733, | |
| "step": 15330 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 2.5765006562133037e-06, | |
| "loss": 2.3657, | |
| "step": 15340 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 2.541963113904815e-06, | |
| "loss": 2.3585, | |
| "step": 15350 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 2.507425571596325e-06, | |
| "loss": 2.3034, | |
| "step": 15360 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 2.472888029287836e-06, | |
| "loss": 2.399, | |
| "step": 15370 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 2.4383504869793467e-06, | |
| "loss": 2.3786, | |
| "step": 15380 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 2.4038129446708574e-06, | |
| "loss": 2.3629, | |
| "step": 15390 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 2.369275402362368e-06, | |
| "loss": 2.3671, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 2.334737860053879e-06, | |
| "loss": 2.2268, | |
| "step": 15410 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 2.300200317745389e-06, | |
| "loss": 2.3121, | |
| "step": 15420 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 2.2656627754369e-06, | |
| "loss": 2.3288, | |
| "step": 15430 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 2.2311252331284107e-06, | |
| "loss": 2.3856, | |
| "step": 15440 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 2.1965876908199214e-06, | |
| "loss": 2.3113, | |
| "step": 15450 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 2.162050148511432e-06, | |
| "loss": 2.2406, | |
| "step": 15460 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 2.1275126062029425e-06, | |
| "loss": 2.353, | |
| "step": 15470 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 2.0929750638944532e-06, | |
| "loss": 2.3918, | |
| "step": 15480 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 2.058437521585964e-06, | |
| "loss": 2.4184, | |
| "step": 15490 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 2.0238999792774747e-06, | |
| "loss": 2.3828, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 1.9893624369689854e-06, | |
| "loss": 2.4124, | |
| "step": 15510 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 1.9548248946604957e-06, | |
| "loss": 2.3288, | |
| "step": 15520 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 1.920287352352007e-06, | |
| "loss": 2.2583, | |
| "step": 15530 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 1.8857498100435174e-06, | |
| "loss": 2.4088, | |
| "step": 15540 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 1.851212267735028e-06, | |
| "loss": 2.2754, | |
| "step": 15550 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 1.8166747254265387e-06, | |
| "loss": 2.3621, | |
| "step": 15560 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 1.7821371831180492e-06, | |
| "loss": 2.3717, | |
| "step": 15570 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 1.74759964080956e-06, | |
| "loss": 2.4536, | |
| "step": 15580 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 1.713062098501071e-06, | |
| "loss": 2.3785, | |
| "step": 15590 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 1.6785245561925812e-06, | |
| "loss": 2.2907, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 1.6439870138840922e-06, | |
| "loss": 2.3086, | |
| "step": 15610 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 1.609449471575603e-06, | |
| "loss": 2.358, | |
| "step": 15620 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 1.5749119292671134e-06, | |
| "loss": 2.3363, | |
| "step": 15630 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 1.5403743869586242e-06, | |
| "loss": 2.3688, | |
| "step": 15640 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 1.5058368446501347e-06, | |
| "loss": 2.3412, | |
| "step": 15650 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 1.4712993023416455e-06, | |
| "loss": 2.3828, | |
| "step": 15660 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 1.436761760033156e-06, | |
| "loss": 2.2863, | |
| "step": 15670 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 1.402224217724667e-06, | |
| "loss": 2.5111, | |
| "step": 15680 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 1.3676866754161775e-06, | |
| "loss": 2.4224, | |
| "step": 15690 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 1.3331491331076882e-06, | |
| "loss": 2.4222, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 1.2986115907991987e-06, | |
| "loss": 2.3345, | |
| "step": 15710 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 1.2640740484907095e-06, | |
| "loss": 2.2492, | |
| "step": 15720 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 1.2295365061822202e-06, | |
| "loss": 2.4186, | |
| "step": 15730 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 1.1949989638737307e-06, | |
| "loss": 2.328, | |
| "step": 15740 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 1.1604614215652415e-06, | |
| "loss": 2.3137, | |
| "step": 15750 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 1.1259238792567522e-06, | |
| "loss": 2.3326, | |
| "step": 15760 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 1.091386336948263e-06, | |
| "loss": 2.3862, | |
| "step": 15770 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 1.0568487946397735e-06, | |
| "loss": 2.3108, | |
| "step": 15780 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 1.022311252331284e-06, | |
| "loss": 2.26, | |
| "step": 15790 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 9.87773710022795e-07, | |
| "loss": 2.4293, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 9.532361677143055e-07, | |
| "loss": 2.3243, | |
| "step": 15810 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 9.186986254058161e-07, | |
| "loss": 2.3935, | |
| "step": 15820 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 8.841610830973267e-07, | |
| "loss": 2.3129, | |
| "step": 15830 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 8.496235407888376e-07, | |
| "loss": 2.3353, | |
| "step": 15840 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 8.150859984803482e-07, | |
| "loss": 2.2725, | |
| "step": 15850 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 7.805484561718589e-07, | |
| "loss": 2.378, | |
| "step": 15860 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 7.460109138633696e-07, | |
| "loss": 2.3231, | |
| "step": 15870 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 7.114733715548802e-07, | |
| "loss": 2.3203, | |
| "step": 15880 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 6.769358292463909e-07, | |
| "loss": 2.3593, | |
| "step": 15890 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 6.423982869379015e-07, | |
| "loss": 2.3439, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 6.078607446294122e-07, | |
| "loss": 2.3775, | |
| "step": 15910 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 5.733232023209229e-07, | |
| "loss": 2.3511, | |
| "step": 15920 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 5.387856600124336e-07, | |
| "loss": 2.3449, | |
| "step": 15930 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 5.042481177039442e-07, | |
| "loss": 2.3427, | |
| "step": 15940 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 4.697105753954549e-07, | |
| "loss": 2.2693, | |
| "step": 15950 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 4.3517303308696555e-07, | |
| "loss": 2.3398, | |
| "step": 15960 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 4.006354907784763e-07, | |
| "loss": 2.2743, | |
| "step": 15970 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 3.6609794846998687e-07, | |
| "loss": 2.2086, | |
| "step": 15980 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 3.3156040616149755e-07, | |
| "loss": 2.3393, | |
| "step": 15990 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 2.9702286385300824e-07, | |
| "loss": 2.3861, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 2.624853215445189e-07, | |
| "loss": 2.2871, | |
| "step": 16010 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 2.2794777923602959e-07, | |
| "loss": 2.3119, | |
| "step": 16020 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 1.9341023692754024e-07, | |
| "loss": 2.385, | |
| "step": 16030 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 1.5887269461905093e-07, | |
| "loss": 2.2872, | |
| "step": 16040 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 1.243351523105616e-07, | |
| "loss": 2.3681, | |
| "step": 16050 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 8.979761000207225e-08, | |
| "loss": 2.3491, | |
| "step": 16060 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 5.526006769358292e-08, | |
| "loss": 2.3968, | |
| "step": 16070 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 2.0722525385093597e-08, | |
| "loss": 2.4475, | |
| "step": 16080 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.5851995594482614, | |
| "eval_loss": 1.9156352281570435, | |
| "eval_runtime": 8294.1564, | |
| "eval_samples_per_second": 9.195, | |
| "eval_steps_per_second": 0.287, | |
| "step": 16086 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 16086, | |
| "total_flos": 1.6352487334240263e+20, | |
| "train_loss": 3.7719367721047283, | |
| "train_runtime": 628821.6026, | |
| "train_samples_per_second": 3.275, | |
| "train_steps_per_second": 0.026 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 16086, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "total_flos": 1.6352487334240263e+20, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |