| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.08267797093601252, |
| "eval_steps": 10000, |
| "global_step": 20001, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 4.133691862207516e-06, |
| "grad_norm": 71.12261039322922, |
| "learning_rate": 8.264462809917355e-09, |
| "loss": 7.6413, |
| "step": 1 |
| }, |
| { |
| "epoch": 4.1336918622075155e-05, |
| "grad_norm": 67.75271488374239, |
| "learning_rate": 8.264462809917357e-08, |
| "loss": 7.6837, |
| "step": 10 |
| }, |
| { |
| "epoch": 8.267383724415031e-05, |
| "grad_norm": 68.70872130093693, |
| "learning_rate": 1.6528925619834713e-07, |
| "loss": 7.6852, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.00012401075586622546, |
| "grad_norm": 62.98947446015298, |
| "learning_rate": 2.4793388429752067e-07, |
| "loss": 7.6376, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.00016534767448830062, |
| "grad_norm": 54.98248131444704, |
| "learning_rate": 3.3057851239669426e-07, |
| "loss": 7.4517, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.00020668459311037578, |
| "grad_norm": 42.7313362524158, |
| "learning_rate": 4.132231404958678e-07, |
| "loss": 7.1676, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.0002480215117324509, |
| "grad_norm": 38.42250005234289, |
| "learning_rate": 4.958677685950413e-07, |
| "loss": 6.7673, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.0002893584303545261, |
| "grad_norm": 26.17988841617355, |
| "learning_rate": 5.78512396694215e-07, |
| "loss": 6.1421, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.00033069534897660124, |
| "grad_norm": 20.709699960482357, |
| "learning_rate": 6.611570247933885e-07, |
| "loss": 5.7812, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.0003720322675986764, |
| "grad_norm": 18.83787744791924, |
| "learning_rate": 7.438016528925621e-07, |
| "loss": 5.4004, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.00041336918622075157, |
| "grad_norm": 19.12503087164104, |
| "learning_rate": 8.264462809917356e-07, |
| "loss": 5.0367, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.0004547061048428267, |
| "grad_norm": 14.34978763505406, |
| "learning_rate": 9.090909090909091e-07, |
| "loss": 4.7081, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.0004960430234649018, |
| "grad_norm": 13.492660464865253, |
| "learning_rate": 9.917355371900827e-07, |
| "loss": 4.5718, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.000537379942086977, |
| "grad_norm": 14.420928714044905, |
| "learning_rate": 1.0743801652892562e-06, |
| "loss": 4.4218, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.0005787168607090522, |
| "grad_norm": 12.495448321891903, |
| "learning_rate": 1.15702479338843e-06, |
| "loss": 4.166, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.0006200537793311273, |
| "grad_norm": 9.801331301009261, |
| "learning_rate": 1.2396694214876035e-06, |
| "loss": 4.0983, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.0006613906979532025, |
| "grad_norm": 11.207849241885746, |
| "learning_rate": 1.322314049586777e-06, |
| "loss": 3.8569, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.0007027276165752776, |
| "grad_norm": 9.679145248918651, |
| "learning_rate": 1.4049586776859506e-06, |
| "loss": 3.8265, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.0007440645351973528, |
| "grad_norm": 11.74286989793758, |
| "learning_rate": 1.4876033057851241e-06, |
| "loss": 3.7614, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.0007854014538194279, |
| "grad_norm": 12.757634031418831, |
| "learning_rate": 1.5702479338842977e-06, |
| "loss": 3.7239, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.0008267383724415031, |
| "grad_norm": 10.441179098409538, |
| "learning_rate": 1.6528925619834712e-06, |
| "loss": 3.5734, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.0008680752910635782, |
| "grad_norm": 9.812592859538713, |
| "learning_rate": 1.7355371900826448e-06, |
| "loss": 3.567, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.0009094122096856533, |
| "grad_norm": 13.93844427924458, |
| "learning_rate": 1.8181818181818183e-06, |
| "loss": 3.4882, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.0009507491283077286, |
| "grad_norm": 10.098507514325146, |
| "learning_rate": 1.900826446280992e-06, |
| "loss": 3.4818, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.0009920860469298037, |
| "grad_norm": 9.58855528934084, |
| "learning_rate": 1.9834710743801654e-06, |
| "loss": 3.4219, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.0010334229655518789, |
| "grad_norm": 10.682635147410519, |
| "learning_rate": 2.066115702479339e-06, |
| "loss": 3.4406, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.001074759884173954, |
| "grad_norm": 8.223333896011393, |
| "learning_rate": 2.1487603305785124e-06, |
| "loss": 3.3402, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.001116096802796029, |
| "grad_norm": 8.984512948693363, |
| "learning_rate": 2.231404958677686e-06, |
| "loss": 3.3059, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.0011574337214181043, |
| "grad_norm": 9.341193028741104, |
| "learning_rate": 2.31404958677686e-06, |
| "loss": 3.1823, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.0011987706400401795, |
| "grad_norm": 10.3056509420508, |
| "learning_rate": 2.3966942148760335e-06, |
| "loss": 3.2817, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.0012401075586622545, |
| "grad_norm": 6.768688690699956, |
| "learning_rate": 2.479338842975207e-06, |
| "loss": 3.1747, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.0012814444772843298, |
| "grad_norm": 6.809665393368199, |
| "learning_rate": 2.56198347107438e-06, |
| "loss": 3.1865, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.001322781395906405, |
| "grad_norm": 6.996784607657393, |
| "learning_rate": 2.644628099173554e-06, |
| "loss": 3.1331, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.0013641183145284802, |
| "grad_norm": 9.752811655395744, |
| "learning_rate": 2.7272727272727272e-06, |
| "loss": 3.1656, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.0014054552331505552, |
| "grad_norm": 8.381106520516422, |
| "learning_rate": 2.809917355371901e-06, |
| "loss": 3.1045, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.0014467921517726304, |
| "grad_norm": 9.529557732528744, |
| "learning_rate": 2.8925619834710743e-06, |
| "loss": 3.0388, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.0014881290703947056, |
| "grad_norm": 7.141802881599084, |
| "learning_rate": 2.9752066115702483e-06, |
| "loss": 3.1119, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.0015294659890167806, |
| "grad_norm": 8.784456968597924, |
| "learning_rate": 3.0578512396694214e-06, |
| "loss": 2.9281, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.0015708029076388558, |
| "grad_norm": 7.19486645716449, |
| "learning_rate": 3.1404958677685953e-06, |
| "loss": 2.9817, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.001612139826260931, |
| "grad_norm": 8.694417122337162, |
| "learning_rate": 3.2231404958677685e-06, |
| "loss": 3.0107, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.0016534767448830063, |
| "grad_norm": 8.323745687899638, |
| "learning_rate": 3.3057851239669424e-06, |
| "loss": 3.0367, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.0016948136635050813, |
| "grad_norm": 7.289283064256704, |
| "learning_rate": 3.388429752066116e-06, |
| "loss": 2.9071, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.0017361505821271565, |
| "grad_norm": 8.436424184415285, |
| "learning_rate": 3.4710743801652895e-06, |
| "loss": 2.8761, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.0017774875007492317, |
| "grad_norm": 8.284395953073583, |
| "learning_rate": 3.553719008264463e-06, |
| "loss": 2.8742, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.0018188244193713067, |
| "grad_norm": 7.765622556565918, |
| "learning_rate": 3.6363636363636366e-06, |
| "loss": 2.9028, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.001860161337993382, |
| "grad_norm": 8.06677494412333, |
| "learning_rate": 3.71900826446281e-06, |
| "loss": 2.885, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.0019014982566154571, |
| "grad_norm": 6.847325183046608, |
| "learning_rate": 3.801652892561984e-06, |
| "loss": 2.8491, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.0019428351752375323, |
| "grad_norm": 6.363354378607742, |
| "learning_rate": 3.884297520661157e-06, |
| "loss": 2.8626, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.0019841720938596073, |
| "grad_norm": 6.752545926601506, |
| "learning_rate": 3.966942148760331e-06, |
| "loss": 2.8759, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.0020255090124816828, |
| "grad_norm": 6.408269470963144, |
| "learning_rate": 4.049586776859504e-06, |
| "loss": 2.8132, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.0020668459311037578, |
| "grad_norm": 10.174350278228932, |
| "learning_rate": 4.132231404958678e-06, |
| "loss": 2.7726, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.0021081828497258328, |
| "grad_norm": 7.3668788002558045, |
| "learning_rate": 4.214876033057851e-06, |
| "loss": 2.781, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.002149519768347908, |
| "grad_norm": 6.065779568736328, |
| "learning_rate": 4.297520661157025e-06, |
| "loss": 2.7566, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.002190856686969983, |
| "grad_norm": 7.249820467027506, |
| "learning_rate": 4.3801652892561984e-06, |
| "loss": 2.7745, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.002232193605592058, |
| "grad_norm": 6.6147066822580305, |
| "learning_rate": 4.462809917355372e-06, |
| "loss": 2.7287, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.0022735305242141336, |
| "grad_norm": 6.730411628057589, |
| "learning_rate": 4.5454545454545455e-06, |
| "loss": 2.7263, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.0023148674428362086, |
| "grad_norm": 5.90842778355055, |
| "learning_rate": 4.62809917355372e-06, |
| "loss": 2.7113, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.0023562043614582836, |
| "grad_norm": 5.290690112865889, |
| "learning_rate": 4.710743801652893e-06, |
| "loss": 2.7407, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.002397541280080359, |
| "grad_norm": 6.913494491726395, |
| "learning_rate": 4.793388429752067e-06, |
| "loss": 2.7089, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.002438878198702434, |
| "grad_norm": 6.999126848562726, |
| "learning_rate": 4.87603305785124e-06, |
| "loss": 2.7074, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.002480215117324509, |
| "grad_norm": 7.325247968940623, |
| "learning_rate": 4.958677685950414e-06, |
| "loss": 2.6561, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.0025215520359465845, |
| "grad_norm": 5.841708656820878, |
| "learning_rate": 5.041322314049587e-06, |
| "loss": 2.5882, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.0025628889545686595, |
| "grad_norm": 6.0353216317971725, |
| "learning_rate": 5.12396694214876e-06, |
| "loss": 2.6469, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.002604225873190735, |
| "grad_norm": 7.544181254798358, |
| "learning_rate": 5.206611570247935e-06, |
| "loss": 2.6267, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.00264556279181281, |
| "grad_norm": 6.608459353317291, |
| "learning_rate": 5.289256198347108e-06, |
| "loss": 2.5662, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.002686899710434885, |
| "grad_norm": 6.839416904552874, |
| "learning_rate": 5.371900826446281e-06, |
| "loss": 2.6888, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.0027282366290569604, |
| "grad_norm": 6.668329679339745, |
| "learning_rate": 5.4545454545454545e-06, |
| "loss": 2.5642, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.0027695735476790354, |
| "grad_norm": 7.132958283503685, |
| "learning_rate": 5.537190082644629e-06, |
| "loss": 2.5651, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.0028109104663011104, |
| "grad_norm": 6.277307177686086, |
| "learning_rate": 5.619834710743802e-06, |
| "loss": 2.5659, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.002852247384923186, |
| "grad_norm": 6.128689798291957, |
| "learning_rate": 5.702479338842976e-06, |
| "loss": 2.5826, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.002893584303545261, |
| "grad_norm": 6.5950769294424125, |
| "learning_rate": 5.785123966942149e-06, |
| "loss": 2.5845, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.002934921222167336, |
| "grad_norm": 6.419190212095196, |
| "learning_rate": 5.867768595041323e-06, |
| "loss": 2.5336, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.0029762581407894112, |
| "grad_norm": 8.4242870632546, |
| "learning_rate": 5.9504132231404965e-06, |
| "loss": 2.5085, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.0030175950594114862, |
| "grad_norm": 7.690590337257814, |
| "learning_rate": 6.03305785123967e-06, |
| "loss": 2.6229, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.0030589319780335612, |
| "grad_norm": 6.501607766316929, |
| "learning_rate": 6.115702479338843e-06, |
| "loss": 2.5214, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.0031002688966556367, |
| "grad_norm": 6.318891494759645, |
| "learning_rate": 6.198347107438017e-06, |
| "loss": 2.5001, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.0031416058152777117, |
| "grad_norm": 6.549087764929742, |
| "learning_rate": 6.280991735537191e-06, |
| "loss": 2.4692, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.003182942733899787, |
| "grad_norm": 6.139353182718512, |
| "learning_rate": 6.363636363636364e-06, |
| "loss": 2.4862, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.003224279652521862, |
| "grad_norm": 6.927572304151442, |
| "learning_rate": 6.446280991735537e-06, |
| "loss": 2.5114, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.003265616571143937, |
| "grad_norm": 6.193127375797419, |
| "learning_rate": 6.528925619834712e-06, |
| "loss": 2.5163, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.0033069534897660125, |
| "grad_norm": 6.624590490134376, |
| "learning_rate": 6.611570247933885e-06, |
| "loss": 2.4314, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.0033482904083880875, |
| "grad_norm": 6.821779483779539, |
| "learning_rate": 6.694214876033058e-06, |
| "loss": 2.5099, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.0033896273270101625, |
| "grad_norm": 7.545681159342933, |
| "learning_rate": 6.776859504132232e-06, |
| "loss": 2.431, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.003430964245632238, |
| "grad_norm": 7.4575512413535785, |
| "learning_rate": 6.859504132231406e-06, |
| "loss": 2.5166, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.003472301164254313, |
| "grad_norm": 5.330361729769768, |
| "learning_rate": 6.942148760330579e-06, |
| "loss": 2.4147, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.003513638082876388, |
| "grad_norm": 9.642946599111673, |
| "learning_rate": 7.0247933884297525e-06, |
| "loss": 2.416, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.0035549750014984634, |
| "grad_norm": 5.81243704909108, |
| "learning_rate": 7.107438016528926e-06, |
| "loss": 2.4712, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.0035963119201205384, |
| "grad_norm": 8.181150357128717, |
| "learning_rate": 7.1900826446281005e-06, |
| "loss": 2.4275, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.0036376488387426134, |
| "grad_norm": 6.783789830926592, |
| "learning_rate": 7.272727272727273e-06, |
| "loss": 2.4283, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.003678985757364689, |
| "grad_norm": 6.537482232422147, |
| "learning_rate": 7.355371900826447e-06, |
| "loss": 2.4064, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.003720322675986764, |
| "grad_norm": 5.51502652262759, |
| "learning_rate": 7.43801652892562e-06, |
| "loss": 2.3804, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.0037616595946088393, |
| "grad_norm": 5.643663413025215, |
| "learning_rate": 7.520661157024795e-06, |
| "loss": 2.4225, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.0038029965132309143, |
| "grad_norm": 5.698077553184173, |
| "learning_rate": 7.603305785123968e-06, |
| "loss": 2.3767, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.0038443334318529893, |
| "grad_norm": 7.7844289388382695, |
| "learning_rate": 7.685950413223142e-06, |
| "loss": 2.3515, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.0038856703504750647, |
| "grad_norm": 7.037093549799256, |
| "learning_rate": 7.768595041322314e-06, |
| "loss": 2.34, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.00392700726909714, |
| "grad_norm": 6.080619495201754, |
| "learning_rate": 7.851239669421489e-06, |
| "loss": 2.3174, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.003968344187719215, |
| "grad_norm": 5.880580728396556, |
| "learning_rate": 7.933884297520661e-06, |
| "loss": 2.3706, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.00400968110634129, |
| "grad_norm": 5.661072675024262, |
| "learning_rate": 8.016528925619836e-06, |
| "loss": 2.3481, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.0040510180249633656, |
| "grad_norm": 7.201611884034939, |
| "learning_rate": 8.099173553719009e-06, |
| "loss": 2.3667, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.00409235494358544, |
| "grad_norm": 6.275874199218544, |
| "learning_rate": 8.181818181818183e-06, |
| "loss": 2.3169, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.0041336918622075156, |
| "grad_norm": 5.267583094894874, |
| "learning_rate": 8.264462809917356e-06, |
| "loss": 2.3757, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.004175028780829591, |
| "grad_norm": 5.377757889968936, |
| "learning_rate": 8.34710743801653e-06, |
| "loss": 2.3631, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.0042163656994516656, |
| "grad_norm": 6.0201100161095225, |
| "learning_rate": 8.429752066115703e-06, |
| "loss": 2.2818, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.004257702618073741, |
| "grad_norm": 6.579057670565248, |
| "learning_rate": 8.512396694214877e-06, |
| "loss": 2.3313, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.004299039536695816, |
| "grad_norm": 7.27660719754988, |
| "learning_rate": 8.59504132231405e-06, |
| "loss": 2.3288, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.004340376455317891, |
| "grad_norm": 6.144262724026651, |
| "learning_rate": 8.677685950413224e-06, |
| "loss": 2.2981, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.004381713373939966, |
| "grad_norm": 5.672033241927713, |
| "learning_rate": 8.760330578512397e-06, |
| "loss": 2.3059, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.004423050292562042, |
| "grad_norm": 6.00241597585073, |
| "learning_rate": 8.842975206611571e-06, |
| "loss": 2.389, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.004464387211184116, |
| "grad_norm": 5.649027277167286, |
| "learning_rate": 8.925619834710744e-06, |
| "loss": 2.3371, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.004505724129806192, |
| "grad_norm": 5.927371810838215, |
| "learning_rate": 9.008264462809918e-06, |
| "loss": 2.3133, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.004547061048428267, |
| "grad_norm": 5.662885075294936, |
| "learning_rate": 9.090909090909091e-06, |
| "loss": 2.2779, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.004588397967050342, |
| "grad_norm": 5.654705250045512, |
| "learning_rate": 9.173553719008265e-06, |
| "loss": 2.2234, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.004629734885672417, |
| "grad_norm": 6.241923587474726, |
| "learning_rate": 9.25619834710744e-06, |
| "loss": 2.2626, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.004671071804294493, |
| "grad_norm": 5.741893435201511, |
| "learning_rate": 9.338842975206613e-06, |
| "loss": 2.3012, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.004712408722916567, |
| "grad_norm": 6.034507786920065, |
| "learning_rate": 9.421487603305785e-06, |
| "loss": 2.2682, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.004753745641538643, |
| "grad_norm": 7.410349347874194, |
| "learning_rate": 9.50413223140496e-06, |
| "loss": 2.2796, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.004795082560160718, |
| "grad_norm": 5.992102424922784, |
| "learning_rate": 9.586776859504134e-06, |
| "loss": 2.2112, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.004836419478782793, |
| "grad_norm": 5.453311998034154, |
| "learning_rate": 9.669421487603307e-06, |
| "loss": 2.1744, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.004877756397404868, |
| "grad_norm": 5.521988823358632, |
| "learning_rate": 9.75206611570248e-06, |
| "loss": 2.2984, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.004919093316026944, |
| "grad_norm": 6.153893937530345, |
| "learning_rate": 9.834710743801654e-06, |
| "loss": 2.2443, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.004960430234649018, |
| "grad_norm": 6.5507135206490315, |
| "learning_rate": 9.917355371900828e-06, |
| "loss": 2.245, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.005001767153271094, |
| "grad_norm": 8.209761096607327, |
| "learning_rate": 1e-05, |
| "loss": 2.1959, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.005043104071893169, |
| "grad_norm": 4.986264712575914, |
| "learning_rate": 1.0082644628099174e-05, |
| "loss": 2.1612, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.0050844409905152444, |
| "grad_norm": 6.3381969120868895, |
| "learning_rate": 1.0165289256198348e-05, |
| "loss": 2.187, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.005125777909137319, |
| "grad_norm": 5.750067641203542, |
| "learning_rate": 1.024793388429752e-05, |
| "loss": 2.2004, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.0051671148277593944, |
| "grad_norm": 5.826539821613237, |
| "learning_rate": 1.0330578512396693e-05, |
| "loss": 2.1668, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.00520845174638147, |
| "grad_norm": 6.296936925085496, |
| "learning_rate": 1.041322314049587e-05, |
| "loss": 2.1807, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.0052497886650035444, |
| "grad_norm": 5.812866932063289, |
| "learning_rate": 1.0495867768595042e-05, |
| "loss": 2.209, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.00529112558362562, |
| "grad_norm": 5.808144224407848, |
| "learning_rate": 1.0578512396694216e-05, |
| "loss": 2.1807, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.005332462502247695, |
| "grad_norm": 7.460856083590218, |
| "learning_rate": 1.0661157024793389e-05, |
| "loss": 2.2229, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.00537379942086977, |
| "grad_norm": 6.980089322389665, |
| "learning_rate": 1.0743801652892562e-05, |
| "loss": 2.175, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.005415136339491845, |
| "grad_norm": 5.57740557557049, |
| "learning_rate": 1.0826446280991736e-05, |
| "loss": 2.159, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.005456473258113921, |
| "grad_norm": 6.647266714783434, |
| "learning_rate": 1.0909090909090909e-05, |
| "loss": 2.1214, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.005497810176735995, |
| "grad_norm": 6.128334205497799, |
| "learning_rate": 1.0991735537190083e-05, |
| "loss": 2.1792, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.005539147095358071, |
| "grad_norm": 6.483094766646449, |
| "learning_rate": 1.1074380165289258e-05, |
| "loss": 2.2472, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.005580484013980146, |
| "grad_norm": 5.359049945838656, |
| "learning_rate": 1.1157024793388432e-05, |
| "loss": 2.182, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.005621820932602221, |
| "grad_norm": 6.6553587609192695, |
| "learning_rate": 1.1239669421487605e-05, |
| "loss": 2.1918, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.005663157851224296, |
| "grad_norm": 6.105297642757683, |
| "learning_rate": 1.1322314049586777e-05, |
| "loss": 2.1221, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.005704494769846372, |
| "grad_norm": 5.22407946250878, |
| "learning_rate": 1.1404958677685952e-05, |
| "loss": 2.0898, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.005745831688468446, |
| "grad_norm": 5.695260375861287, |
| "learning_rate": 1.1487603305785125e-05, |
| "loss": 2.2, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.005787168607090522, |
| "grad_norm": 5.834677547053157, |
| "learning_rate": 1.1570247933884297e-05, |
| "loss": 2.1191, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.005828505525712597, |
| "grad_norm": 7.863484441598645, |
| "learning_rate": 1.1652892561983472e-05, |
| "loss": 2.1255, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.005869842444334672, |
| "grad_norm": 5.295752440326079, |
| "learning_rate": 1.1735537190082646e-05, |
| "loss": 2.1535, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.005911179362956747, |
| "grad_norm": 6.925687761354192, |
| "learning_rate": 1.181818181818182e-05, |
| "loss": 2.0618, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.0059525162815788225, |
| "grad_norm": 4.89230568395151, |
| "learning_rate": 1.1900826446280993e-05, |
| "loss": 2.1745, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.005993853200200897, |
| "grad_norm": 5.795044632849597, |
| "learning_rate": 1.1983471074380166e-05, |
| "loss": 2.1352, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.0060351901188229725, |
| "grad_norm": 6.43513153980254, |
| "learning_rate": 1.206611570247934e-05, |
| "loss": 2.102, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.006076527037445048, |
| "grad_norm": 6.46737354415826, |
| "learning_rate": 1.2148760330578513e-05, |
| "loss": 2.0934, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.0061178639560671225, |
| "grad_norm": 6.202005592277405, |
| "learning_rate": 1.2231404958677686e-05, |
| "loss": 2.1482, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.006159200874689198, |
| "grad_norm": 5.8071883971926725, |
| "learning_rate": 1.231404958677686e-05, |
| "loss": 2.0553, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.006200537793311273, |
| "grad_norm": 6.2092050955251334, |
| "learning_rate": 1.2396694214876034e-05, |
| "loss": 2.0836, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.006241874711933349, |
| "grad_norm": 4.486772138898485, |
| "learning_rate": 1.2479338842975209e-05, |
| "loss": 2.1622, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.006283211630555423, |
| "grad_norm": 5.229981562060858, |
| "learning_rate": 1.2561983471074381e-05, |
| "loss": 2.0854, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.006324548549177499, |
| "grad_norm": 5.604269574061805, |
| "learning_rate": 1.2644628099173554e-05, |
| "loss": 2.1604, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.006365885467799574, |
| "grad_norm": 5.361170550183367, |
| "learning_rate": 1.2727272727272728e-05, |
| "loss": 2.063, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.006407222386421649, |
| "grad_norm": 7.20018526125173, |
| "learning_rate": 1.2809917355371901e-05, |
| "loss": 2.0935, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.006448559305043724, |
| "grad_norm": 5.379224740428811, |
| "learning_rate": 1.2892561983471074e-05, |
| "loss": 2.1291, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.0064898962236658, |
| "grad_norm": 4.967695885199312, |
| "learning_rate": 1.2975206611570248e-05, |
| "loss": 2.102, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.006531233142287874, |
| "grad_norm": 6.039676530986522, |
| "learning_rate": 1.3057851239669424e-05, |
| "loss": 2.1134, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.00657257006090995, |
| "grad_norm": 5.459189111144024, |
| "learning_rate": 1.3140495867768597e-05, |
| "loss": 2.0302, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.006613906979532025, |
| "grad_norm": 6.143839950859222, |
| "learning_rate": 1.322314049586777e-05, |
| "loss": 2.0978, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.0066552438981541, |
| "grad_norm": 5.6756704061902825, |
| "learning_rate": 1.3305785123966944e-05, |
| "loss": 2.1592, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.006696580816776175, |
| "grad_norm": 4.994981957965064, |
| "learning_rate": 1.3388429752066117e-05, |
| "loss": 2.1035, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.0067379177353982505, |
| "grad_norm": 5.853251459350967, |
| "learning_rate": 1.347107438016529e-05, |
| "loss": 2.0701, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.006779254654020325, |
| "grad_norm": 5.4681573607696965, |
| "learning_rate": 1.3553719008264464e-05, |
| "loss": 2.089, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.0068205915726424005, |
| "grad_norm": 5.848581304068256, |
| "learning_rate": 1.3636363636363637e-05, |
| "loss": 2.0819, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.006861928491264476, |
| "grad_norm": 5.481243900559041, |
| "learning_rate": 1.3719008264462813e-05, |
| "loss": 2.0284, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.0069032654098865505, |
| "grad_norm": 5.699959566604993, |
| "learning_rate": 1.3801652892561985e-05, |
| "loss": 2.0622, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.006944602328508626, |
| "grad_norm": 4.996648526388665, |
| "learning_rate": 1.3884297520661158e-05, |
| "loss": 2.0071, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.006985939247130701, |
| "grad_norm": 5.126923483054136, |
| "learning_rate": 1.3966942148760332e-05, |
| "loss": 2.0631, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.007027276165752776, |
| "grad_norm": 5.883552104251492, |
| "learning_rate": 1.4049586776859505e-05, |
| "loss": 2.0791, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.007068613084374851, |
| "grad_norm": 4.929514966855435, |
| "learning_rate": 1.4132231404958678e-05, |
| "loss": 2.0364, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.007109950002996927, |
| "grad_norm": 5.301129760644346, |
| "learning_rate": 1.4214876033057852e-05, |
| "loss": 2.0373, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.007151286921619001, |
| "grad_norm": 5.523739748516145, |
| "learning_rate": 1.4297520661157025e-05, |
| "loss": 2.1079, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.007192623840241077, |
| "grad_norm": 5.7887756838227755, |
| "learning_rate": 1.4380165289256201e-05, |
| "loss": 2.049, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.007233960758863152, |
| "grad_norm": 5.2452853088604865, |
| "learning_rate": 1.4462809917355374e-05, |
| "loss": 2.0686, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.007275297677485227, |
| "grad_norm": 4.454384214370969, |
| "learning_rate": 1.4545454545454546e-05, |
| "loss": 2.0124, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.007316634596107302, |
| "grad_norm": 6.397338503442304, |
| "learning_rate": 1.462809917355372e-05, |
| "loss": 2.0152, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.007357971514729378, |
| "grad_norm": 6.554144037150873, |
| "learning_rate": 1.4710743801652893e-05, |
| "loss": 1.9976, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.007399308433351453, |
| "grad_norm": 4.973940426595748, |
| "learning_rate": 1.4793388429752066e-05, |
| "loss": 2.0085, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.007440645351973528, |
| "grad_norm": 5.776375204519037, |
| "learning_rate": 1.487603305785124e-05, |
| "loss": 2.0339, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.007481982270595603, |
| "grad_norm": 5.472367097758556, |
| "learning_rate": 1.4958677685950413e-05, |
| "loss": 2.0016, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.0075233191892176785, |
| "grad_norm": 4.850880114898939, |
| "learning_rate": 1.504132231404959e-05, |
| "loss": 1.9952, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.007564656107839753, |
| "grad_norm": 4.825492061262016, |
| "learning_rate": 1.5123966942148762e-05, |
| "loss": 2.0149, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.0076059930264618285, |
| "grad_norm": 6.317700924322252, |
| "learning_rate": 1.5206611570247936e-05, |
| "loss": 1.9765, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.007647329945083904, |
| "grad_norm": 5.831048263887902, |
| "learning_rate": 1.528925619834711e-05, |
| "loss": 1.9669, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.0076886668637059785, |
| "grad_norm": 5.190457786334756, |
| "learning_rate": 1.5371900826446283e-05, |
| "loss": 2.0342, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.007730003782328054, |
| "grad_norm": 5.752029895196757, |
| "learning_rate": 1.5454545454545454e-05, |
| "loss": 2.0606, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.007771340700950129, |
| "grad_norm": 5.005855197604682, |
| "learning_rate": 1.553719008264463e-05, |
| "loss": 2.0764, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.007812677619572205, |
| "grad_norm": 5.362161895494138, |
| "learning_rate": 1.5619834710743803e-05, |
| "loss": 2.0373, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.00785401453819428, |
| "grad_norm": 5.589239650428267, |
| "learning_rate": 1.5702479338842978e-05, |
| "loss": 2.0536, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.007895351456816354, |
| "grad_norm": 5.38085836484136, |
| "learning_rate": 1.5785123966942152e-05, |
| "loss": 2.0357, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.00793668837543843, |
| "grad_norm": 6.4123494555744065, |
| "learning_rate": 1.5867768595041323e-05, |
| "loss": 1.9992, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.007978025294060505, |
| "grad_norm": 5.369699763052158, |
| "learning_rate": 1.5950413223140497e-05, |
| "loss": 1.9645, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.00801936221268258, |
| "grad_norm": 6.655726980448801, |
| "learning_rate": 1.6033057851239672e-05, |
| "loss": 2.0023, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.008060699131304656, |
| "grad_norm": 5.150395460216213, |
| "learning_rate": 1.6115702479338843e-05, |
| "loss": 1.9953, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.008102036049926731, |
| "grad_norm": 5.534796132616727, |
| "learning_rate": 1.6198347107438017e-05, |
| "loss": 1.964, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.008143372968548805, |
| "grad_norm": 5.0714233165065075, |
| "learning_rate": 1.628099173553719e-05, |
| "loss": 1.9942, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.00818470988717088, |
| "grad_norm": 5.370096628339807, |
| "learning_rate": 1.6363636363636366e-05, |
| "loss": 1.9938, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.008226046805792956, |
| "grad_norm": 4.816680798680657, |
| "learning_rate": 1.644628099173554e-05, |
| "loss": 1.9023, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.008267383724415031, |
| "grad_norm": 5.910326143029371, |
| "learning_rate": 1.652892561983471e-05, |
| "loss": 1.9366, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.008308720643037107, |
| "grad_norm": 5.364682793090204, |
| "learning_rate": 1.6611570247933886e-05, |
| "loss": 1.9835, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.008350057561659182, |
| "grad_norm": 6.171717096992393, |
| "learning_rate": 1.669421487603306e-05, |
| "loss": 1.9641, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.008391394480281256, |
| "grad_norm": 4.794750763380389, |
| "learning_rate": 1.677685950413223e-05, |
| "loss": 1.9405, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.008432731398903331, |
| "grad_norm": 6.3363242673070745, |
| "learning_rate": 1.6859504132231405e-05, |
| "loss": 1.9717, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.008474068317525407, |
| "grad_norm": 5.10756576497978, |
| "learning_rate": 1.694214876033058e-05, |
| "loss": 1.9312, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.008515405236147482, |
| "grad_norm": 5.5429121513722945, |
| "learning_rate": 1.7024793388429754e-05, |
| "loss": 1.9692, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.008556742154769557, |
| "grad_norm": 5.053921879606705, |
| "learning_rate": 1.710743801652893e-05, |
| "loss": 1.9187, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.008598079073391633, |
| "grad_norm": 5.246682645264326, |
| "learning_rate": 1.71900826446281e-05, |
| "loss": 2.0086, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.008639415992013707, |
| "grad_norm": 4.651358563124329, |
| "learning_rate": 1.7272727272727274e-05, |
| "loss": 1.9676, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.008680752910635782, |
| "grad_norm": 5.254574557184252, |
| "learning_rate": 1.735537190082645e-05, |
| "loss": 1.9193, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.008722089829257857, |
| "grad_norm": 5.5559516380514316, |
| "learning_rate": 1.743801652892562e-05, |
| "loss": 1.9123, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.008763426747879933, |
| "grad_norm": 5.714609535718523, |
| "learning_rate": 1.7520661157024794e-05, |
| "loss": 1.9831, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.008804763666502008, |
| "grad_norm": 4.664121459414757, |
| "learning_rate": 1.7603305785123968e-05, |
| "loss": 1.9606, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.008846100585124084, |
| "grad_norm": 4.9060858638182685, |
| "learning_rate": 1.7685950413223143e-05, |
| "loss": 1.9535, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.008887437503746157, |
| "grad_norm": 4.997171967559315, |
| "learning_rate": 1.7768595041322317e-05, |
| "loss": 1.9243, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.008928774422368233, |
| "grad_norm": 4.60645777188567, |
| "learning_rate": 1.7851239669421488e-05, |
| "loss": 1.9291, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.008970111340990308, |
| "grad_norm": 4.2131519608354004, |
| "learning_rate": 1.7933884297520662e-05, |
| "loss": 1.9105, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.009011448259612384, |
| "grad_norm": 5.51444703850531, |
| "learning_rate": 1.8016528925619837e-05, |
| "loss": 1.9502, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.00905278517823446, |
| "grad_norm": 5.2003808089855825, |
| "learning_rate": 1.809917355371901e-05, |
| "loss": 1.9436, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.009094122096856535, |
| "grad_norm": 4.240179682087964, |
| "learning_rate": 1.8181818181818182e-05, |
| "loss": 1.9194, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.00913545901547861, |
| "grad_norm": 4.582501074244312, |
| "learning_rate": 1.8264462809917356e-05, |
| "loss": 1.9145, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.009176795934100684, |
| "grad_norm": 5.362083861786352, |
| "learning_rate": 1.834710743801653e-05, |
| "loss": 1.9165, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.009218132852722759, |
| "grad_norm": 5.06281875114174, |
| "learning_rate": 1.8429752066115705e-05, |
| "loss": 1.977, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.009259469771344835, |
| "grad_norm": 4.661496047656461, |
| "learning_rate": 1.851239669421488e-05, |
| "loss": 1.8892, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.00930080668996691, |
| "grad_norm": 4.735532406310298, |
| "learning_rate": 1.859504132231405e-05, |
| "loss": 1.8689, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.009342143608588985, |
| "grad_norm": 4.445771479719063, |
| "learning_rate": 1.8677685950413225e-05, |
| "loss": 1.9301, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.00938348052721106, |
| "grad_norm": 5.100109904664726, |
| "learning_rate": 1.87603305785124e-05, |
| "loss": 1.9309, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.009424817445833135, |
| "grad_norm": 6.115469535323335, |
| "learning_rate": 1.884297520661157e-05, |
| "loss": 1.918, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.00946615436445521, |
| "grad_norm": 4.890019742506766, |
| "learning_rate": 1.8925619834710745e-05, |
| "loss": 1.945, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.009507491283077285, |
| "grad_norm": 5.023798525711054, |
| "learning_rate": 1.900826446280992e-05, |
| "loss": 1.9004, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.00954882820169936, |
| "grad_norm": 5.652810624249754, |
| "learning_rate": 1.9090909090909094e-05, |
| "loss": 1.9339, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.009590165120321436, |
| "grad_norm": 6.04847384963266, |
| "learning_rate": 1.9173553719008268e-05, |
| "loss": 1.8687, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.009631502038943512, |
| "grad_norm": 5.332733823425359, |
| "learning_rate": 1.925619834710744e-05, |
| "loss": 1.8938, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.009672838957565585, |
| "grad_norm": 4.814331448709232, |
| "learning_rate": 1.9338842975206613e-05, |
| "loss": 1.8752, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.00971417587618766, |
| "grad_norm": 5.558977499560294, |
| "learning_rate": 1.9421487603305788e-05, |
| "loss": 1.8835, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.009755512794809736, |
| "grad_norm": 5.554940177569548, |
| "learning_rate": 1.950413223140496e-05, |
| "loss": 1.857, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.009796849713431812, |
| "grad_norm": 4.970502489086558, |
| "learning_rate": 1.9586776859504133e-05, |
| "loss": 1.8553, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.009838186632053887, |
| "grad_norm": 4.044099915606779, |
| "learning_rate": 1.9669421487603307e-05, |
| "loss": 1.924, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.009879523550675963, |
| "grad_norm": 4.880726953238654, |
| "learning_rate": 1.9752066115702482e-05, |
| "loss": 1.9785, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.009920860469298036, |
| "grad_norm": 5.457077789861094, |
| "learning_rate": 1.9834710743801656e-05, |
| "loss": 1.8585, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.009962197387920112, |
| "grad_norm": 4.608586390817221, |
| "learning_rate": 1.9917355371900827e-05, |
| "loss": 1.8861, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.010003534306542187, |
| "grad_norm": 4.5178969512670335, |
| "learning_rate": 2e-05, |
| "loss": 1.8936, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.010044871225164263, |
| "grad_norm": 5.722004352525454, |
| "learning_rate": 1.999999991396395e-05, |
| "loss": 1.8616, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.010086208143786338, |
| "grad_norm": 4.99862696301366, |
| "learning_rate": 1.9999999655855794e-05, |
| "loss": 1.8865, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.010127545062408413, |
| "grad_norm": 5.204994732642035, |
| "learning_rate": 1.9999999225675543e-05, |
| "loss": 1.8602, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.010168881981030489, |
| "grad_norm": 4.1143956012846505, |
| "learning_rate": 1.9999998623423198e-05, |
| "loss": 1.9101, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.010210218899652563, |
| "grad_norm": 5.535771463118041, |
| "learning_rate": 1.9999997849098773e-05, |
| "loss": 1.8596, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.010251555818274638, |
| "grad_norm": 5.020430211409416, |
| "learning_rate": 1.999999690270228e-05, |
| "loss": 1.8393, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.010292892736896713, |
| "grad_norm": 5.571737674116448, |
| "learning_rate": 1.999999578423374e-05, |
| "loss": 1.8987, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.010334229655518789, |
| "grad_norm": 4.336614412280944, |
| "learning_rate": 1.9999994493693165e-05, |
| "loss": 1.9194, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.010375566574140864, |
| "grad_norm": 4.815853586635344, |
| "learning_rate": 1.999999303108058e-05, |
| "loss": 1.8332, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.01041690349276294, |
| "grad_norm": 4.559920874208704, |
| "learning_rate": 1.9999991396396014e-05, |
| "loss": 1.8818, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.010458240411385013, |
| "grad_norm": 5.23388810362715, |
| "learning_rate": 1.9999989589639487e-05, |
| "loss": 1.8302, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.010499577330007089, |
| "grad_norm": 5.000301577463503, |
| "learning_rate": 1.999998761081104e-05, |
| "loss": 1.8657, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.010540914248629164, |
| "grad_norm": 4.448533864801871, |
| "learning_rate": 1.9999985459910698e-05, |
| "loss": 1.8762, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.01058225116725124, |
| "grad_norm": 4.715035035883112, |
| "learning_rate": 1.9999983136938504e-05, |
| "loss": 1.8511, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.010623588085873315, |
| "grad_norm": 4.025529484549816, |
| "learning_rate": 1.9999980641894497e-05, |
| "loss": 1.8458, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.01066492500449539, |
| "grad_norm": 4.754000032727581, |
| "learning_rate": 1.9999977974778715e-05, |
| "loss": 1.8714, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.010706261923117464, |
| "grad_norm": 4.930978688660729, |
| "learning_rate": 1.999997513559121e-05, |
| "loss": 1.8656, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.01074759884173954, |
| "grad_norm": 4.4132729377261475, |
| "learning_rate": 1.9999972124332028e-05, |
| "loss": 1.9383, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.010788935760361615, |
| "grad_norm": 4.4540551253199325, |
| "learning_rate": 1.9999968941001225e-05, |
| "loss": 1.8426, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.01083027267898369, |
| "grad_norm": 4.797250042059473, |
| "learning_rate": 1.999996558559885e-05, |
| "loss": 1.8634, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.010871609597605766, |
| "grad_norm": 5.456931710111963, |
| "learning_rate": 1.999996205812496e-05, |
| "loss": 1.825, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.010912946516227841, |
| "grad_norm": 4.377649523138056, |
| "learning_rate": 1.999995835857962e-05, |
| "loss": 1.8545, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.010954283434849915, |
| "grad_norm": 4.5317328844732145, |
| "learning_rate": 1.9999954486962893e-05, |
| "loss": 1.8774, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.01099562035347199, |
| "grad_norm": 4.709498283906347, |
| "learning_rate": 1.9999950443274847e-05, |
| "loss": 1.8083, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.011036957272094066, |
| "grad_norm": 4.592327748002219, |
| "learning_rate": 1.9999946227515547e-05, |
| "loss": 1.792, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.011078294190716141, |
| "grad_norm": 5.036724935294618, |
| "learning_rate": 1.999994183968507e-05, |
| "loss": 1.8779, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.011119631109338217, |
| "grad_norm": 6.694409030503598, |
| "learning_rate": 1.999993727978349e-05, |
| "loss": 1.8573, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.011160968027960292, |
| "grad_norm": 5.018981836140064, |
| "learning_rate": 1.9999932547810883e-05, |
| "loss": 1.8726, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.011202304946582366, |
| "grad_norm": 4.451864521641474, |
| "learning_rate": 1.9999927643767332e-05, |
| "loss": 1.8193, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.011243641865204441, |
| "grad_norm": 5.036890897058994, |
| "learning_rate": 1.999992256765292e-05, |
| "loss": 1.8594, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.011284978783826517, |
| "grad_norm": 4.7545987502372755, |
| "learning_rate": 1.999991731946774e-05, |
| "loss": 1.9158, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.011326315702448592, |
| "grad_norm": 3.9156550800432783, |
| "learning_rate": 1.999991189921188e-05, |
| "loss": 1.8166, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.011367652621070668, |
| "grad_norm": 4.622686377530181, |
| "learning_rate": 1.999990630688543e-05, |
| "loss": 1.8426, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.011408989539692743, |
| "grad_norm": 4.176720366120709, |
| "learning_rate": 1.9999900542488487e-05, |
| "loss": 1.8701, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.011450326458314819, |
| "grad_norm": 4.588055146989058, |
| "learning_rate": 1.999989460602115e-05, |
| "loss": 1.8474, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.011491663376936892, |
| "grad_norm": 4.7632605353618604, |
| "learning_rate": 1.9999888497483523e-05, |
| "loss": 1.7611, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.011533000295558968, |
| "grad_norm": 5.168047411415939, |
| "learning_rate": 1.9999882216875714e-05, |
| "loss": 1.8297, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.011574337214181043, |
| "grad_norm": 5.6032261833368215, |
| "learning_rate": 1.9999875764197824e-05, |
| "loss": 1.8273, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.011615674132803119, |
| "grad_norm": 4.836606306201456, |
| "learning_rate": 1.9999869139449965e-05, |
| "loss": 1.8067, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.011657011051425194, |
| "grad_norm": 5.371156408385522, |
| "learning_rate": 1.9999862342632258e-05, |
| "loss": 1.7726, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.01169834797004727, |
| "grad_norm": 4.715562111195242, |
| "learning_rate": 1.9999855373744813e-05, |
| "loss": 1.8257, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.011739684888669343, |
| "grad_norm": 4.672226047314074, |
| "learning_rate": 1.9999848232787753e-05, |
| "loss": 1.807, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.011781021807291419, |
| "grad_norm": 5.305211095175868, |
| "learning_rate": 1.9999840919761202e-05, |
| "loss": 1.8398, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.011822358725913494, |
| "grad_norm": 4.973800529744849, |
| "learning_rate": 1.9999833434665282e-05, |
| "loss": 1.8028, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.01186369564453557, |
| "grad_norm": 4.580336749750151, |
| "learning_rate": 1.9999825777500127e-05, |
| "loss": 1.7559, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.011905032563157645, |
| "grad_norm": 5.203176556084249, |
| "learning_rate": 1.999981794826586e-05, |
| "loss": 1.8375, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.01194636948177972, |
| "grad_norm": 5.810430258629928, |
| "learning_rate": 1.9999809946962627e-05, |
| "loss": 1.8126, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.011987706400401794, |
| "grad_norm": 5.7480488342439955, |
| "learning_rate": 1.9999801773590556e-05, |
| "loss": 1.8228, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.01202904331902387, |
| "grad_norm": 4.946108636349945, |
| "learning_rate": 1.9999793428149793e-05, |
| "loss": 1.7801, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.012070380237645945, |
| "grad_norm": 4.686375909907021, |
| "learning_rate": 1.9999784910640484e-05, |
| "loss": 1.7595, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.01211171715626802, |
| "grad_norm": 5.2352360374303135, |
| "learning_rate": 1.9999776221062767e-05, |
| "loss": 1.8413, |
| "step": 2930 |
| }, |
| { |
| "epoch": 0.012153054074890096, |
| "grad_norm": 4.509401680547479, |
| "learning_rate": 1.99997673594168e-05, |
| "loss": 1.7973, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.012194390993512171, |
| "grad_norm": 4.614511294927466, |
| "learning_rate": 1.9999758325702728e-05, |
| "loss": 1.8206, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.012235727912134245, |
| "grad_norm": 6.185921445660834, |
| "learning_rate": 1.9999749119920714e-05, |
| "loss": 1.8462, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.01227706483075632, |
| "grad_norm": 4.174924494562577, |
| "learning_rate": 1.999973974207091e-05, |
| "loss": 1.8036, |
| "step": 2970 |
| }, |
| { |
| "epoch": 0.012318401749378396, |
| "grad_norm": 4.836665186633954, |
| "learning_rate": 1.9999730192153483e-05, |
| "loss": 1.8517, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.012359738668000471, |
| "grad_norm": 5.384960643252126, |
| "learning_rate": 1.999972047016859e-05, |
| "loss": 1.8159, |
| "step": 2990 |
| }, |
| { |
| "epoch": 0.012401075586622547, |
| "grad_norm": 5.021462883098841, |
| "learning_rate": 1.9999710576116403e-05, |
| "loss": 1.7985, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.012442412505244622, |
| "grad_norm": 5.427243361920921, |
| "learning_rate": 1.99997005099971e-05, |
| "loss": 1.7765, |
| "step": 3010 |
| }, |
| { |
| "epoch": 0.012483749423866698, |
| "grad_norm": 4.491526906719712, |
| "learning_rate": 1.999969027181084e-05, |
| "loss": 1.8183, |
| "step": 3020 |
| }, |
| { |
| "epoch": 0.012525086342488771, |
| "grad_norm": 4.559161527860771, |
| "learning_rate": 1.9999679861557804e-05, |
| "loss": 1.724, |
| "step": 3030 |
| }, |
| { |
| "epoch": 0.012566423261110847, |
| "grad_norm": 4.161439649907769, |
| "learning_rate": 1.9999669279238173e-05, |
| "loss": 1.7683, |
| "step": 3040 |
| }, |
| { |
| "epoch": 0.012607760179732922, |
| "grad_norm": 4.56366674854018, |
| "learning_rate": 1.999965852485213e-05, |
| "loss": 1.786, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.012649097098354998, |
| "grad_norm": 5.245429404102266, |
| "learning_rate": 1.999964759839986e-05, |
| "loss": 1.7822, |
| "step": 3060 |
| }, |
| { |
| "epoch": 0.012690434016977073, |
| "grad_norm": 4.6630532976211825, |
| "learning_rate": 1.9999636499881548e-05, |
| "loss": 1.7466, |
| "step": 3070 |
| }, |
| { |
| "epoch": 0.012731770935599148, |
| "grad_norm": 4.060133282127772, |
| "learning_rate": 1.9999625229297385e-05, |
| "loss": 1.795, |
| "step": 3080 |
| }, |
| { |
| "epoch": 0.012773107854221222, |
| "grad_norm": 3.9035653210065644, |
| "learning_rate": 1.9999613786647568e-05, |
| "loss": 1.7644, |
| "step": 3090 |
| }, |
| { |
| "epoch": 0.012814444772843298, |
| "grad_norm": 4.309261522039182, |
| "learning_rate": 1.9999602171932292e-05, |
| "loss": 1.7843, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.012855781691465373, |
| "grad_norm": 4.849350440475919, |
| "learning_rate": 1.999959038515176e-05, |
| "loss": 1.7703, |
| "step": 3110 |
| }, |
| { |
| "epoch": 0.012897118610087448, |
| "grad_norm": 4.275025549538348, |
| "learning_rate": 1.999957842630617e-05, |
| "loss": 1.7714, |
| "step": 3120 |
| }, |
| { |
| "epoch": 0.012938455528709524, |
| "grad_norm": 5.173686081149218, |
| "learning_rate": 1.9999566295395728e-05, |
| "loss": 1.7638, |
| "step": 3130 |
| }, |
| { |
| "epoch": 0.0129797924473316, |
| "grad_norm": 4.908518222034618, |
| "learning_rate": 1.999955399242065e-05, |
| "loss": 1.8008, |
| "step": 3140 |
| }, |
| { |
| "epoch": 0.013021129365953673, |
| "grad_norm": 4.14921313541257, |
| "learning_rate": 1.9999541517381137e-05, |
| "loss": 1.7741, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.013062466284575748, |
| "grad_norm": 4.543722393877187, |
| "learning_rate": 1.9999528870277412e-05, |
| "loss": 1.7949, |
| "step": 3160 |
| }, |
| { |
| "epoch": 0.013103803203197824, |
| "grad_norm": 4.410976439510873, |
| "learning_rate": 1.9999516051109688e-05, |
| "loss": 1.7547, |
| "step": 3170 |
| }, |
| { |
| "epoch": 0.0131451401218199, |
| "grad_norm": 5.705194883861194, |
| "learning_rate": 1.9999503059878188e-05, |
| "loss": 1.7513, |
| "step": 3180 |
| }, |
| { |
| "epoch": 0.013186477040441975, |
| "grad_norm": 4.65186813408292, |
| "learning_rate": 1.999948989658313e-05, |
| "loss": 1.7664, |
| "step": 3190 |
| }, |
| { |
| "epoch": 0.01322781395906405, |
| "grad_norm": 5.20074413082596, |
| "learning_rate": 1.9999476561224754e-05, |
| "loss": 1.7545, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.013269150877686124, |
| "grad_norm": 3.9975643391331745, |
| "learning_rate": 1.9999463053803275e-05, |
| "loss": 1.7175, |
| "step": 3210 |
| }, |
| { |
| "epoch": 0.0133104877963082, |
| "grad_norm": 4.798261065065511, |
| "learning_rate": 1.9999449374318934e-05, |
| "loss": 1.7464, |
| "step": 3220 |
| }, |
| { |
| "epoch": 0.013351824714930275, |
| "grad_norm": 4.858469902498838, |
| "learning_rate": 1.9999435522771963e-05, |
| "loss": 1.7568, |
| "step": 3230 |
| }, |
| { |
| "epoch": 0.01339316163355235, |
| "grad_norm": 4.236662694907985, |
| "learning_rate": 1.99994214991626e-05, |
| "loss": 1.7335, |
| "step": 3240 |
| }, |
| { |
| "epoch": 0.013434498552174426, |
| "grad_norm": 4.6188062645939585, |
| "learning_rate": 1.9999407303491085e-05, |
| "loss": 1.7529, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.013475835470796501, |
| "grad_norm": 4.77521754475989, |
| "learning_rate": 1.9999392935757668e-05, |
| "loss": 1.7734, |
| "step": 3260 |
| }, |
| { |
| "epoch": 0.013517172389418576, |
| "grad_norm": 6.027108769658543, |
| "learning_rate": 1.999937839596259e-05, |
| "loss": 1.8136, |
| "step": 3270 |
| }, |
| { |
| "epoch": 0.01355850930804065, |
| "grad_norm": 4.163761649197771, |
| "learning_rate": 1.9999363684106105e-05, |
| "loss": 1.7085, |
| "step": 3280 |
| }, |
| { |
| "epoch": 0.013599846226662726, |
| "grad_norm": 3.916493440655603, |
| "learning_rate": 1.9999348800188466e-05, |
| "loss": 1.7815, |
| "step": 3290 |
| }, |
| { |
| "epoch": 0.013641183145284801, |
| "grad_norm": 4.397530066361572, |
| "learning_rate": 1.9999333744209924e-05, |
| "loss": 1.7759, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.013682520063906876, |
| "grad_norm": 4.839462185241853, |
| "learning_rate": 1.9999318516170747e-05, |
| "loss": 1.7548, |
| "step": 3310 |
| }, |
| { |
| "epoch": 0.013723856982528952, |
| "grad_norm": 4.759012044819632, |
| "learning_rate": 1.999930311607119e-05, |
| "loss": 1.7815, |
| "step": 3320 |
| }, |
| { |
| "epoch": 0.013765193901151027, |
| "grad_norm": 4.1799473470272295, |
| "learning_rate": 1.9999287543911522e-05, |
| "loss": 1.7907, |
| "step": 3330 |
| }, |
| { |
| "epoch": 0.013806530819773101, |
| "grad_norm": 4.454633377063746, |
| "learning_rate": 1.9999271799692006e-05, |
| "loss": 1.7579, |
| "step": 3340 |
| }, |
| { |
| "epoch": 0.013847867738395176, |
| "grad_norm": 4.997867503776301, |
| "learning_rate": 1.999925588341292e-05, |
| "loss": 1.7335, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.013889204657017252, |
| "grad_norm": 4.345433706332678, |
| "learning_rate": 1.999923979507453e-05, |
| "loss": 1.7124, |
| "step": 3360 |
| }, |
| { |
| "epoch": 0.013930541575639327, |
| "grad_norm": 4.531985231521044, |
| "learning_rate": 1.999922353467712e-05, |
| "loss": 1.758, |
| "step": 3370 |
| }, |
| { |
| "epoch": 0.013971878494261403, |
| "grad_norm": 4.399801080955952, |
| "learning_rate": 1.9999207102220962e-05, |
| "loss": 1.7065, |
| "step": 3380 |
| }, |
| { |
| "epoch": 0.014013215412883478, |
| "grad_norm": 5.059800651377326, |
| "learning_rate": 1.999919049770635e-05, |
| "loss": 1.693, |
| "step": 3390 |
| }, |
| { |
| "epoch": 0.014054552331505552, |
| "grad_norm": 4.260000291303237, |
| "learning_rate": 1.9999173721133557e-05, |
| "loss": 1.7488, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.014095889250127627, |
| "grad_norm": 4.5331171056345605, |
| "learning_rate": 1.999915677250288e-05, |
| "loss": 1.7046, |
| "step": 3410 |
| }, |
| { |
| "epoch": 0.014137226168749703, |
| "grad_norm": 4.187185061547482, |
| "learning_rate": 1.999913965181461e-05, |
| "loss": 1.7013, |
| "step": 3420 |
| }, |
| { |
| "epoch": 0.014178563087371778, |
| "grad_norm": 4.429191188349303, |
| "learning_rate": 1.999912235906904e-05, |
| "loss": 1.7127, |
| "step": 3430 |
| }, |
| { |
| "epoch": 0.014219900005993854, |
| "grad_norm": 4.083346883074332, |
| "learning_rate": 1.9999104894266466e-05, |
| "loss": 1.7571, |
| "step": 3440 |
| }, |
| { |
| "epoch": 0.014261236924615929, |
| "grad_norm": 4.424685185794806, |
| "learning_rate": 1.999908725740719e-05, |
| "loss": 1.7563, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.014302573843238003, |
| "grad_norm": 4.285454928033791, |
| "learning_rate": 1.9999069448491516e-05, |
| "loss": 1.7547, |
| "step": 3460 |
| }, |
| { |
| "epoch": 0.014343910761860078, |
| "grad_norm": 4.424718664433471, |
| "learning_rate": 1.999905146751975e-05, |
| "loss": 1.7374, |
| "step": 3470 |
| }, |
| { |
| "epoch": 0.014385247680482154, |
| "grad_norm": 4.075077717271962, |
| "learning_rate": 1.99990333144922e-05, |
| "loss": 1.7661, |
| "step": 3480 |
| }, |
| { |
| "epoch": 0.014426584599104229, |
| "grad_norm": 4.538474712661468, |
| "learning_rate": 1.999901498940918e-05, |
| "loss": 1.7118, |
| "step": 3490 |
| }, |
| { |
| "epoch": 0.014467921517726304, |
| "grad_norm": 4.435775318213515, |
| "learning_rate": 1.9998996492271007e-05, |
| "loss": 1.7368, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.01450925843634838, |
| "grad_norm": 4.545629750120307, |
| "learning_rate": 1.9998977823077998e-05, |
| "loss": 1.7335, |
| "step": 3510 |
| }, |
| { |
| "epoch": 0.014550595354970454, |
| "grad_norm": 4.359608762821868, |
| "learning_rate": 1.9998958981830473e-05, |
| "loss": 1.7318, |
| "step": 3520 |
| }, |
| { |
| "epoch": 0.014591932273592529, |
| "grad_norm": 4.453842525389737, |
| "learning_rate": 1.9998939968528754e-05, |
| "loss": 1.7499, |
| "step": 3530 |
| }, |
| { |
| "epoch": 0.014633269192214604, |
| "grad_norm": 5.088846901725583, |
| "learning_rate": 1.9998920783173172e-05, |
| "loss": 1.7555, |
| "step": 3540 |
| }, |
| { |
| "epoch": 0.01467460611083668, |
| "grad_norm": 4.1590343693668395, |
| "learning_rate": 1.9998901425764057e-05, |
| "loss": 1.7386, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.014715943029458755, |
| "grad_norm": 4.181140524329235, |
| "learning_rate": 1.9998881896301744e-05, |
| "loss": 1.6455, |
| "step": 3560 |
| }, |
| { |
| "epoch": 0.01475727994808083, |
| "grad_norm": 4.228896471972448, |
| "learning_rate": 1.999886219478656e-05, |
| "loss": 1.7282, |
| "step": 3570 |
| }, |
| { |
| "epoch": 0.014798616866702906, |
| "grad_norm": 3.9899831004408526, |
| "learning_rate": 1.9998842321218855e-05, |
| "loss": 1.7201, |
| "step": 3580 |
| }, |
| { |
| "epoch": 0.01483995378532498, |
| "grad_norm": 3.9178031007246408, |
| "learning_rate": 1.9998822275598964e-05, |
| "loss": 1.6812, |
| "step": 3590 |
| }, |
| { |
| "epoch": 0.014881290703947055, |
| "grad_norm": 4.3808976089497484, |
| "learning_rate": 1.9998802057927236e-05, |
| "loss": 1.7175, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.01492262762256913, |
| "grad_norm": 3.9780395209303197, |
| "learning_rate": 1.9998781668204015e-05, |
| "loss": 1.7351, |
| "step": 3610 |
| }, |
| { |
| "epoch": 0.014963964541191206, |
| "grad_norm": 5.378812354806347, |
| "learning_rate": 1.9998761106429655e-05, |
| "loss": 1.7092, |
| "step": 3620 |
| }, |
| { |
| "epoch": 0.015005301459813282, |
| "grad_norm": 3.9422939515447246, |
| "learning_rate": 1.999874037260451e-05, |
| "loss": 1.7261, |
| "step": 3630 |
| }, |
| { |
| "epoch": 0.015046638378435357, |
| "grad_norm": 4.442422033504748, |
| "learning_rate": 1.9998719466728934e-05, |
| "loss": 1.7027, |
| "step": 3640 |
| }, |
| { |
| "epoch": 0.01508797529705743, |
| "grad_norm": 4.102984271689072, |
| "learning_rate": 1.9998698388803288e-05, |
| "loss": 1.6741, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.015129312215679506, |
| "grad_norm": 3.9608491048290615, |
| "learning_rate": 1.9998677138827934e-05, |
| "loss": 1.7542, |
| "step": 3660 |
| }, |
| { |
| "epoch": 0.015170649134301582, |
| "grad_norm": 4.561629575046756, |
| "learning_rate": 1.999865571680324e-05, |
| "loss": 1.6785, |
| "step": 3670 |
| }, |
| { |
| "epoch": 0.015211986052923657, |
| "grad_norm": 4.4640127715057885, |
| "learning_rate": 1.9998634122729573e-05, |
| "loss": 1.7, |
| "step": 3680 |
| }, |
| { |
| "epoch": 0.015253322971545732, |
| "grad_norm": 3.8935864417828188, |
| "learning_rate": 1.9998612356607303e-05, |
| "loss": 1.6939, |
| "step": 3690 |
| }, |
| { |
| "epoch": 0.015294659890167808, |
| "grad_norm": 5.011277234521909, |
| "learning_rate": 1.9998590418436808e-05, |
| "loss": 1.7019, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.015335996808789882, |
| "grad_norm": 4.107354033282244, |
| "learning_rate": 1.9998568308218465e-05, |
| "loss": 1.6637, |
| "step": 3710 |
| }, |
| { |
| "epoch": 0.015377333727411957, |
| "grad_norm": 5.3480918453617905, |
| "learning_rate": 1.999854602595265e-05, |
| "loss": 1.7322, |
| "step": 3720 |
| }, |
| { |
| "epoch": 0.015418670646034032, |
| "grad_norm": 4.443648241332512, |
| "learning_rate": 1.9998523571639752e-05, |
| "loss": 1.6794, |
| "step": 3730 |
| }, |
| { |
| "epoch": 0.015460007564656108, |
| "grad_norm": 3.4677507775480025, |
| "learning_rate": 1.999850094528015e-05, |
| "loss": 1.6943, |
| "step": 3740 |
| }, |
| { |
| "epoch": 0.015501344483278183, |
| "grad_norm": 4.306434811794374, |
| "learning_rate": 1.9998478146874244e-05, |
| "loss": 1.6996, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.015542681401900259, |
| "grad_norm": 5.783322294479809, |
| "learning_rate": 1.9998455176422423e-05, |
| "loss": 1.7071, |
| "step": 3760 |
| }, |
| { |
| "epoch": 0.015584018320522332, |
| "grad_norm": 5.907422561947855, |
| "learning_rate": 1.999843203392507e-05, |
| "loss": 1.7736, |
| "step": 3770 |
| }, |
| { |
| "epoch": 0.01562535523914441, |
| "grad_norm": 4.114299347318918, |
| "learning_rate": 1.9998408719382602e-05, |
| "loss": 1.7068, |
| "step": 3780 |
| }, |
| { |
| "epoch": 0.015666692157766483, |
| "grad_norm": 4.897826252389082, |
| "learning_rate": 1.999838523279541e-05, |
| "loss": 1.6542, |
| "step": 3790 |
| }, |
| { |
| "epoch": 0.01570802907638856, |
| "grad_norm": 4.387711122090114, |
| "learning_rate": 1.9998361574163897e-05, |
| "loss": 1.7202, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.015749365995010634, |
| "grad_norm": 4.249935651772863, |
| "learning_rate": 1.999833774348847e-05, |
| "loss": 1.6871, |
| "step": 3810 |
| }, |
| { |
| "epoch": 0.015790702913632708, |
| "grad_norm": 4.961734747800958, |
| "learning_rate": 1.9998313740769547e-05, |
| "loss": 1.7012, |
| "step": 3820 |
| }, |
| { |
| "epoch": 0.015832039832254785, |
| "grad_norm": 4.247988360660198, |
| "learning_rate": 1.9998289566007535e-05, |
| "loss": 1.684, |
| "step": 3830 |
| }, |
| { |
| "epoch": 0.01587337675087686, |
| "grad_norm": 5.434305269506113, |
| "learning_rate": 1.999826521920285e-05, |
| "loss": 1.7673, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.015914713669498936, |
| "grad_norm": 4.617133742171007, |
| "learning_rate": 1.999824070035591e-05, |
| "loss": 1.6622, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.01595605058812101, |
| "grad_norm": 3.70746479523289, |
| "learning_rate": 1.9998216009467136e-05, |
| "loss": 1.6647, |
| "step": 3860 |
| }, |
| { |
| "epoch": 0.015997387506743083, |
| "grad_norm": 4.604026510578186, |
| "learning_rate": 1.999819114653696e-05, |
| "loss": 1.6772, |
| "step": 3870 |
| }, |
| { |
| "epoch": 0.01603872442536516, |
| "grad_norm": 3.8606213125382642, |
| "learning_rate": 1.9998166111565804e-05, |
| "loss": 1.694, |
| "step": 3880 |
| }, |
| { |
| "epoch": 0.016080061343987234, |
| "grad_norm": 5.0244652420608045, |
| "learning_rate": 1.99981409045541e-05, |
| "loss": 1.7797, |
| "step": 3890 |
| }, |
| { |
| "epoch": 0.01612139826260931, |
| "grad_norm": 4.707739461519922, |
| "learning_rate": 1.999811552550228e-05, |
| "loss": 1.7159, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.016162735181231385, |
| "grad_norm": 3.9677147576335043, |
| "learning_rate": 1.9998089974410782e-05, |
| "loss": 1.6708, |
| "step": 3910 |
| }, |
| { |
| "epoch": 0.016204072099853462, |
| "grad_norm": 4.311084704937728, |
| "learning_rate": 1.9998064251280048e-05, |
| "loss": 1.7109, |
| "step": 3920 |
| }, |
| { |
| "epoch": 0.016245409018475536, |
| "grad_norm": 3.9457174661249534, |
| "learning_rate": 1.999803835611052e-05, |
| "loss": 1.6713, |
| "step": 3930 |
| }, |
| { |
| "epoch": 0.01628674593709761, |
| "grad_norm": 3.947531059176682, |
| "learning_rate": 1.999801228890264e-05, |
| "loss": 1.6796, |
| "step": 3940 |
| }, |
| { |
| "epoch": 0.016328082855719687, |
| "grad_norm": 4.14663907999712, |
| "learning_rate": 1.9997986049656858e-05, |
| "loss": 1.6452, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.01636941977434176, |
| "grad_norm": 3.897276226226099, |
| "learning_rate": 1.9997959638373626e-05, |
| "loss": 1.6507, |
| "step": 3960 |
| }, |
| { |
| "epoch": 0.016410756692963838, |
| "grad_norm": 3.778326978683171, |
| "learning_rate": 1.9997933055053402e-05, |
| "loss": 1.7378, |
| "step": 3970 |
| }, |
| { |
| "epoch": 0.01645209361158591, |
| "grad_norm": 4.014730222130603, |
| "learning_rate": 1.9997906299696635e-05, |
| "loss": 1.6651, |
| "step": 3980 |
| }, |
| { |
| "epoch": 0.016493430530207985, |
| "grad_norm": 3.8164751076978223, |
| "learning_rate": 1.9997879372303797e-05, |
| "loss": 1.7007, |
| "step": 3990 |
| }, |
| { |
| "epoch": 0.016534767448830062, |
| "grad_norm": 3.922371704332535, |
| "learning_rate": 1.999785227287534e-05, |
| "loss": 1.7161, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.016576104367452136, |
| "grad_norm": 3.934785675300376, |
| "learning_rate": 1.9997825001411738e-05, |
| "loss": 1.6704, |
| "step": 4010 |
| }, |
| { |
| "epoch": 0.016617441286074213, |
| "grad_norm": 4.564033996587743, |
| "learning_rate": 1.9997797557913455e-05, |
| "loss": 1.6918, |
| "step": 4020 |
| }, |
| { |
| "epoch": 0.016658778204696287, |
| "grad_norm": 4.4245567390274365, |
| "learning_rate": 1.9997769942380968e-05, |
| "loss": 1.7143, |
| "step": 4030 |
| }, |
| { |
| "epoch": 0.016700115123318364, |
| "grad_norm": 3.8624198473379874, |
| "learning_rate": 1.9997742154814744e-05, |
| "loss": 1.7298, |
| "step": 4040 |
| }, |
| { |
| "epoch": 0.016741452041940438, |
| "grad_norm": 4.010446146589402, |
| "learning_rate": 1.9997714195215275e-05, |
| "loss": 1.6851, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.01678278896056251, |
| "grad_norm": 4.139527737935189, |
| "learning_rate": 1.9997686063583028e-05, |
| "loss": 1.6597, |
| "step": 4060 |
| }, |
| { |
| "epoch": 0.01682412587918459, |
| "grad_norm": 3.617422879629344, |
| "learning_rate": 1.9997657759918498e-05, |
| "loss": 1.7078, |
| "step": 4070 |
| }, |
| { |
| "epoch": 0.016865462797806662, |
| "grad_norm": 4.492323213426353, |
| "learning_rate": 1.9997629284222165e-05, |
| "loss": 1.6521, |
| "step": 4080 |
| }, |
| { |
| "epoch": 0.01690679971642874, |
| "grad_norm": 5.007903819964739, |
| "learning_rate": 1.999760063649452e-05, |
| "loss": 1.6694, |
| "step": 4090 |
| }, |
| { |
| "epoch": 0.016948136635050813, |
| "grad_norm": 4.960862868620129, |
| "learning_rate": 1.999757181673606e-05, |
| "loss": 1.68, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.01698947355367289, |
| "grad_norm": 5.878432740559922, |
| "learning_rate": 1.9997542824947276e-05, |
| "loss": 1.6736, |
| "step": 4110 |
| }, |
| { |
| "epoch": 0.017030810472294964, |
| "grad_norm": 4.440326929426054, |
| "learning_rate": 1.999751366112867e-05, |
| "loss": 1.6335, |
| "step": 4120 |
| }, |
| { |
| "epoch": 0.017072147390917038, |
| "grad_norm": 4.263618522816504, |
| "learning_rate": 1.999748432528074e-05, |
| "loss": 1.7186, |
| "step": 4130 |
| }, |
| { |
| "epoch": 0.017113484309539115, |
| "grad_norm": 4.292363992231819, |
| "learning_rate": 1.9997454817403996e-05, |
| "loss": 1.6416, |
| "step": 4140 |
| }, |
| { |
| "epoch": 0.01715482122816119, |
| "grad_norm": 4.013314862106662, |
| "learning_rate": 1.9997425137498944e-05, |
| "loss": 1.723, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.017196158146783266, |
| "grad_norm": 4.07382683143937, |
| "learning_rate": 1.999739528556609e-05, |
| "loss": 1.6604, |
| "step": 4160 |
| }, |
| { |
| "epoch": 0.01723749506540534, |
| "grad_norm": 4.533516304139438, |
| "learning_rate": 1.9997365261605957e-05, |
| "loss": 1.6683, |
| "step": 4170 |
| }, |
| { |
| "epoch": 0.017278831984027413, |
| "grad_norm": 5.114666733039835, |
| "learning_rate": 1.999733506561905e-05, |
| "loss": 1.6925, |
| "step": 4180 |
| }, |
| { |
| "epoch": 0.01732016890264949, |
| "grad_norm": 3.895641699630939, |
| "learning_rate": 1.99973046976059e-05, |
| "loss": 1.6743, |
| "step": 4190 |
| }, |
| { |
| "epoch": 0.017361505821271564, |
| "grad_norm": 3.9125805892169465, |
| "learning_rate": 1.9997274157567025e-05, |
| "loss": 1.6823, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.01740284273989364, |
| "grad_norm": 4.530982763817902, |
| "learning_rate": 1.999724344550295e-05, |
| "loss": 1.666, |
| "step": 4210 |
| }, |
| { |
| "epoch": 0.017444179658515715, |
| "grad_norm": 4.806928145874966, |
| "learning_rate": 1.9997212561414198e-05, |
| "loss": 1.7254, |
| "step": 4220 |
| }, |
| { |
| "epoch": 0.017485516577137792, |
| "grad_norm": 3.9697720655534483, |
| "learning_rate": 1.999718150530131e-05, |
| "loss": 1.6241, |
| "step": 4230 |
| }, |
| { |
| "epoch": 0.017526853495759866, |
| "grad_norm": 4.257480914158059, |
| "learning_rate": 1.9997150277164815e-05, |
| "loss": 1.6346, |
| "step": 4240 |
| }, |
| { |
| "epoch": 0.01756819041438194, |
| "grad_norm": 3.799531767116148, |
| "learning_rate": 1.999711887700525e-05, |
| "loss": 1.6296, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.017609527333004017, |
| "grad_norm": 3.802902072405634, |
| "learning_rate": 1.999708730482316e-05, |
| "loss": 1.6296, |
| "step": 4260 |
| }, |
| { |
| "epoch": 0.01765086425162609, |
| "grad_norm": 5.118064089629252, |
| "learning_rate": 1.9997055560619082e-05, |
| "loss": 1.643, |
| "step": 4270 |
| }, |
| { |
| "epoch": 0.017692201170248167, |
| "grad_norm": 4.227158901611068, |
| "learning_rate": 1.9997023644393567e-05, |
| "loss": 1.6698, |
| "step": 4280 |
| }, |
| { |
| "epoch": 0.01773353808887024, |
| "grad_norm": 4.238927562799819, |
| "learning_rate": 1.9996991556147166e-05, |
| "loss": 1.653, |
| "step": 4290 |
| }, |
| { |
| "epoch": 0.017774875007492315, |
| "grad_norm": 4.204830304370112, |
| "learning_rate": 1.9996959295880423e-05, |
| "loss": 1.6844, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.017816211926114392, |
| "grad_norm": 4.097133417277415, |
| "learning_rate": 1.99969268635939e-05, |
| "loss": 1.6212, |
| "step": 4310 |
| }, |
| { |
| "epoch": 0.017857548844736466, |
| "grad_norm": 4.65335395814053, |
| "learning_rate": 1.999689425928815e-05, |
| "loss": 1.6882, |
| "step": 4320 |
| }, |
| { |
| "epoch": 0.017898885763358543, |
| "grad_norm": 4.112571966210029, |
| "learning_rate": 1.999686148296374e-05, |
| "loss": 1.6929, |
| "step": 4330 |
| }, |
| { |
| "epoch": 0.017940222681980617, |
| "grad_norm": 5.088602258322444, |
| "learning_rate": 1.999682853462123e-05, |
| "loss": 1.6648, |
| "step": 4340 |
| }, |
| { |
| "epoch": 0.017981559600602694, |
| "grad_norm": 3.9480572889086147, |
| "learning_rate": 1.9996795414261186e-05, |
| "loss": 1.5896, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.018022896519224767, |
| "grad_norm": 4.8104711694243, |
| "learning_rate": 1.9996762121884186e-05, |
| "loss": 1.6709, |
| "step": 4360 |
| }, |
| { |
| "epoch": 0.01806423343784684, |
| "grad_norm": 5.388396623467715, |
| "learning_rate": 1.999672865749079e-05, |
| "loss": 1.6716, |
| "step": 4370 |
| }, |
| { |
| "epoch": 0.01810557035646892, |
| "grad_norm": 4.279793170082693, |
| "learning_rate": 1.9996695021081584e-05, |
| "loss": 1.632, |
| "step": 4380 |
| }, |
| { |
| "epoch": 0.018146907275090992, |
| "grad_norm": 4.624743597271427, |
| "learning_rate": 1.999666121265714e-05, |
| "loss": 1.6054, |
| "step": 4390 |
| }, |
| { |
| "epoch": 0.01818824419371307, |
| "grad_norm": 4.133320200289432, |
| "learning_rate": 1.9996627232218048e-05, |
| "loss": 1.6418, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.018229581112335143, |
| "grad_norm": 4.0963463496824986, |
| "learning_rate": 1.9996593079764884e-05, |
| "loss": 1.6683, |
| "step": 4410 |
| }, |
| { |
| "epoch": 0.01827091803095722, |
| "grad_norm": 4.03547359932741, |
| "learning_rate": 1.9996558755298238e-05, |
| "loss": 1.5996, |
| "step": 4420 |
| }, |
| { |
| "epoch": 0.018312254949579294, |
| "grad_norm": 4.156363997210419, |
| "learning_rate": 1.9996524258818706e-05, |
| "loss": 1.6471, |
| "step": 4430 |
| }, |
| { |
| "epoch": 0.018353591868201367, |
| "grad_norm": 4.075479637959615, |
| "learning_rate": 1.9996489590326874e-05, |
| "loss": 1.5989, |
| "step": 4440 |
| }, |
| { |
| "epoch": 0.018394928786823445, |
| "grad_norm": 4.63601174765512, |
| "learning_rate": 1.9996454749823345e-05, |
| "loss": 1.6642, |
| "step": 4450 |
| }, |
| { |
| "epoch": 0.018436265705445518, |
| "grad_norm": 3.760851338042477, |
| "learning_rate": 1.9996419737308715e-05, |
| "loss": 1.6579, |
| "step": 4460 |
| }, |
| { |
| "epoch": 0.018477602624067595, |
| "grad_norm": 3.979536768168784, |
| "learning_rate": 1.9996384552783588e-05, |
| "loss": 1.6006, |
| "step": 4470 |
| }, |
| { |
| "epoch": 0.01851893954268967, |
| "grad_norm": 4.246767902971398, |
| "learning_rate": 1.9996349196248563e-05, |
| "loss": 1.6715, |
| "step": 4480 |
| }, |
| { |
| "epoch": 0.018560276461311743, |
| "grad_norm": 4.26779353731614, |
| "learning_rate": 1.999631366770426e-05, |
| "loss": 1.6859, |
| "step": 4490 |
| }, |
| { |
| "epoch": 0.01860161337993382, |
| "grad_norm": 4.049582440808523, |
| "learning_rate": 1.9996277967151283e-05, |
| "loss": 1.6882, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.018642950298555894, |
| "grad_norm": 4.066185344313316, |
| "learning_rate": 1.9996242094590248e-05, |
| "loss": 1.6601, |
| "step": 4510 |
| }, |
| { |
| "epoch": 0.01868428721717797, |
| "grad_norm": 3.7309702600230494, |
| "learning_rate": 1.9996206050021768e-05, |
| "loss": 1.6453, |
| "step": 4520 |
| }, |
| { |
| "epoch": 0.018725624135800045, |
| "grad_norm": 4.307728435051617, |
| "learning_rate": 1.9996169833446473e-05, |
| "loss": 1.6728, |
| "step": 4530 |
| }, |
| { |
| "epoch": 0.01876696105442212, |
| "grad_norm": 3.892468749865279, |
| "learning_rate": 1.9996133444864974e-05, |
| "loss": 1.6996, |
| "step": 4540 |
| }, |
| { |
| "epoch": 0.018808297973044195, |
| "grad_norm": 4.172694653615993, |
| "learning_rate": 1.999609688427791e-05, |
| "loss": 1.6519, |
| "step": 4550 |
| }, |
| { |
| "epoch": 0.01884963489166627, |
| "grad_norm": 4.211392128772361, |
| "learning_rate": 1.9996060151685895e-05, |
| "loss": 1.6096, |
| "step": 4560 |
| }, |
| { |
| "epoch": 0.018890971810288346, |
| "grad_norm": 4.728429773380645, |
| "learning_rate": 1.9996023247089576e-05, |
| "loss": 1.6217, |
| "step": 4570 |
| }, |
| { |
| "epoch": 0.01893230872891042, |
| "grad_norm": 3.7603074265755745, |
| "learning_rate": 1.999598617048958e-05, |
| "loss": 1.617, |
| "step": 4580 |
| }, |
| { |
| "epoch": 0.018973645647532497, |
| "grad_norm": 4.5264911357846165, |
| "learning_rate": 1.9995948921886547e-05, |
| "loss": 1.6009, |
| "step": 4590 |
| }, |
| { |
| "epoch": 0.01901498256615457, |
| "grad_norm": 4.285402551531064, |
| "learning_rate": 1.999591150128112e-05, |
| "loss": 1.6666, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.019056319484776648, |
| "grad_norm": 4.528562163332608, |
| "learning_rate": 1.9995873908673936e-05, |
| "loss": 1.6967, |
| "step": 4610 |
| }, |
| { |
| "epoch": 0.01909765640339872, |
| "grad_norm": 4.331142545150304, |
| "learning_rate": 1.999583614406565e-05, |
| "loss": 1.6387, |
| "step": 4620 |
| }, |
| { |
| "epoch": 0.019138993322020795, |
| "grad_norm": 4.277497333006759, |
| "learning_rate": 1.9995798207456906e-05, |
| "loss": 1.6407, |
| "step": 4630 |
| }, |
| { |
| "epoch": 0.019180330240642873, |
| "grad_norm": 4.236531733677237, |
| "learning_rate": 1.999576009884836e-05, |
| "loss": 1.6528, |
| "step": 4640 |
| }, |
| { |
| "epoch": 0.019221667159264946, |
| "grad_norm": 4.1527404087837825, |
| "learning_rate": 1.9995721818240664e-05, |
| "loss": 1.6386, |
| "step": 4650 |
| }, |
| { |
| "epoch": 0.019263004077887023, |
| "grad_norm": 4.21734134516066, |
| "learning_rate": 1.999568336563448e-05, |
| "loss": 1.6531, |
| "step": 4660 |
| }, |
| { |
| "epoch": 0.019304340996509097, |
| "grad_norm": 4.010277949791672, |
| "learning_rate": 1.999564474103047e-05, |
| "loss": 1.6125, |
| "step": 4670 |
| }, |
| { |
| "epoch": 0.01934567791513117, |
| "grad_norm": 4.974363400314765, |
| "learning_rate": 1.99956059444293e-05, |
| "loss": 1.6562, |
| "step": 4680 |
| }, |
| { |
| "epoch": 0.019387014833753248, |
| "grad_norm": 3.461845715262989, |
| "learning_rate": 1.999556697583163e-05, |
| "loss": 1.6715, |
| "step": 4690 |
| }, |
| { |
| "epoch": 0.01942835175237532, |
| "grad_norm": 4.501289760535044, |
| "learning_rate": 1.999552783523814e-05, |
| "loss": 1.6276, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.0194696886709974, |
| "grad_norm": 3.980526992455661, |
| "learning_rate": 1.99954885226495e-05, |
| "loss": 1.6512, |
| "step": 4710 |
| }, |
| { |
| "epoch": 0.019511025589619473, |
| "grad_norm": 4.754361998561602, |
| "learning_rate": 1.9995449038066385e-05, |
| "loss": 1.6563, |
| "step": 4720 |
| }, |
| { |
| "epoch": 0.01955236250824155, |
| "grad_norm": 3.962924389993788, |
| "learning_rate": 1.9995409381489473e-05, |
| "loss": 1.5921, |
| "step": 4730 |
| }, |
| { |
| "epoch": 0.019593699426863623, |
| "grad_norm": 4.230038259640959, |
| "learning_rate": 1.999536955291945e-05, |
| "loss": 1.6266, |
| "step": 4740 |
| }, |
| { |
| "epoch": 0.019635036345485697, |
| "grad_norm": 3.4637303252434863, |
| "learning_rate": 1.9995329552356996e-05, |
| "loss": 1.5613, |
| "step": 4750 |
| }, |
| { |
| "epoch": 0.019676373264107774, |
| "grad_norm": 4.180047059414082, |
| "learning_rate": 1.999528937980281e-05, |
| "loss": 1.6358, |
| "step": 4760 |
| }, |
| { |
| "epoch": 0.019717710182729848, |
| "grad_norm": 4.407688478601427, |
| "learning_rate": 1.9995249035257572e-05, |
| "loss": 1.6276, |
| "step": 4770 |
| }, |
| { |
| "epoch": 0.019759047101351925, |
| "grad_norm": 5.682179019619738, |
| "learning_rate": 1.999520851872198e-05, |
| "loss": 1.6339, |
| "step": 4780 |
| }, |
| { |
| "epoch": 0.019800384019974, |
| "grad_norm": 5.80296950656401, |
| "learning_rate": 1.9995167830196732e-05, |
| "loss": 1.6735, |
| "step": 4790 |
| }, |
| { |
| "epoch": 0.019841720938596073, |
| "grad_norm": 4.788010741660107, |
| "learning_rate": 1.999512696968253e-05, |
| "loss": 1.6183, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.01988305785721815, |
| "grad_norm": 3.2823877198029683, |
| "learning_rate": 1.9995085937180075e-05, |
| "loss": 1.6314, |
| "step": 4810 |
| }, |
| { |
| "epoch": 0.019924394775840223, |
| "grad_norm": 4.513204723991569, |
| "learning_rate": 1.9995044732690074e-05, |
| "loss": 1.6558, |
| "step": 4820 |
| }, |
| { |
| "epoch": 0.0199657316944623, |
| "grad_norm": 3.710887033971277, |
| "learning_rate": 1.999500335621323e-05, |
| "loss": 1.6339, |
| "step": 4830 |
| }, |
| { |
| "epoch": 0.020007068613084374, |
| "grad_norm": 3.914180149814728, |
| "learning_rate": 1.9994961807750264e-05, |
| "loss": 1.6263, |
| "step": 4840 |
| }, |
| { |
| "epoch": 0.02004840553170645, |
| "grad_norm": 4.149254446951243, |
| "learning_rate": 1.999492008730189e-05, |
| "loss": 1.6276, |
| "step": 4850 |
| }, |
| { |
| "epoch": 0.020089742450328525, |
| "grad_norm": 3.8520876610172756, |
| "learning_rate": 1.9994878194868817e-05, |
| "loss": 1.6168, |
| "step": 4860 |
| }, |
| { |
| "epoch": 0.0201310793689506, |
| "grad_norm": 4.315135033151227, |
| "learning_rate": 1.9994836130451777e-05, |
| "loss": 1.6799, |
| "step": 4870 |
| }, |
| { |
| "epoch": 0.020172416287572676, |
| "grad_norm": 4.299172694880712, |
| "learning_rate": 1.9994793894051483e-05, |
| "loss": 1.6094, |
| "step": 4880 |
| }, |
| { |
| "epoch": 0.02021375320619475, |
| "grad_norm": 3.9099719074716974, |
| "learning_rate": 1.999475148566867e-05, |
| "loss": 1.6002, |
| "step": 4890 |
| }, |
| { |
| "epoch": 0.020255090124816827, |
| "grad_norm": 3.621204913700773, |
| "learning_rate": 1.9994708905304066e-05, |
| "loss": 1.627, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.0202964270434389, |
| "grad_norm": 4.002608239997497, |
| "learning_rate": 1.9994666152958403e-05, |
| "loss": 1.6377, |
| "step": 4910 |
| }, |
| { |
| "epoch": 0.020337763962060978, |
| "grad_norm": 3.509839578650558, |
| "learning_rate": 1.9994623228632413e-05, |
| "loss": 1.6498, |
| "step": 4920 |
| }, |
| { |
| "epoch": 0.02037910088068305, |
| "grad_norm": 3.948041169756955, |
| "learning_rate": 1.9994580132326843e-05, |
| "loss": 1.6605, |
| "step": 4930 |
| }, |
| { |
| "epoch": 0.020420437799305125, |
| "grad_norm": 3.7588684802290713, |
| "learning_rate": 1.9994536864042428e-05, |
| "loss": 1.6845, |
| "step": 4940 |
| }, |
| { |
| "epoch": 0.020461774717927202, |
| "grad_norm": 4.867688920782023, |
| "learning_rate": 1.999449342377991e-05, |
| "loss": 1.5575, |
| "step": 4950 |
| }, |
| { |
| "epoch": 0.020503111636549276, |
| "grad_norm": 4.235921275935457, |
| "learning_rate": 1.9994449811540044e-05, |
| "loss": 1.6329, |
| "step": 4960 |
| }, |
| { |
| "epoch": 0.020544448555171353, |
| "grad_norm": 5.353004787701509, |
| "learning_rate": 1.9994406027323578e-05, |
| "loss": 1.5961, |
| "step": 4970 |
| }, |
| { |
| "epoch": 0.020585785473793427, |
| "grad_norm": 4.49092979482084, |
| "learning_rate": 1.999436207113126e-05, |
| "loss": 1.6152, |
| "step": 4980 |
| }, |
| { |
| "epoch": 0.0206271223924155, |
| "grad_norm": 4.786632872232947, |
| "learning_rate": 1.9994317942963856e-05, |
| "loss": 1.5889, |
| "step": 4990 |
| }, |
| { |
| "epoch": 0.020668459311037578, |
| "grad_norm": 3.7616100197105324, |
| "learning_rate": 1.999427364282212e-05, |
| "loss": 1.6428, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.02070979622965965, |
| "grad_norm": 4.922026489251745, |
| "learning_rate": 1.999422917070681e-05, |
| "loss": 1.6404, |
| "step": 5010 |
| }, |
| { |
| "epoch": 0.02075113314828173, |
| "grad_norm": 4.51143708824428, |
| "learning_rate": 1.9994184526618698e-05, |
| "loss": 1.6532, |
| "step": 5020 |
| }, |
| { |
| "epoch": 0.020792470066903802, |
| "grad_norm": 4.104589032058005, |
| "learning_rate": 1.999413971055855e-05, |
| "loss": 1.6071, |
| "step": 5030 |
| }, |
| { |
| "epoch": 0.02083380698552588, |
| "grad_norm": 4.89262784656072, |
| "learning_rate": 1.999409472252714e-05, |
| "loss": 1.6516, |
| "step": 5040 |
| }, |
| { |
| "epoch": 0.020875143904147953, |
| "grad_norm": 3.6347037714340122, |
| "learning_rate": 1.9994049562525235e-05, |
| "loss": 1.5681, |
| "step": 5050 |
| }, |
| { |
| "epoch": 0.020916480822770027, |
| "grad_norm": 3.986687295644655, |
| "learning_rate": 1.9994004230553616e-05, |
| "loss": 1.6061, |
| "step": 5060 |
| }, |
| { |
| "epoch": 0.020957817741392104, |
| "grad_norm": 5.1196884550128825, |
| "learning_rate": 1.999395872661307e-05, |
| "loss": 1.646, |
| "step": 5070 |
| }, |
| { |
| "epoch": 0.020999154660014178, |
| "grad_norm": 4.073313251564883, |
| "learning_rate": 1.9993913050704362e-05, |
| "loss": 1.5632, |
| "step": 5080 |
| }, |
| { |
| "epoch": 0.021040491578636255, |
| "grad_norm": 3.773829349198683, |
| "learning_rate": 1.99938672028283e-05, |
| "loss": 1.596, |
| "step": 5090 |
| }, |
| { |
| "epoch": 0.02108182849725833, |
| "grad_norm": 5.707286361857388, |
| "learning_rate": 1.9993821182985655e-05, |
| "loss": 1.587, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.021123165415880402, |
| "grad_norm": 4.135913165404502, |
| "learning_rate": 1.9993774991177227e-05, |
| "loss": 1.6229, |
| "step": 5110 |
| }, |
| { |
| "epoch": 0.02116450233450248, |
| "grad_norm": 4.538213401615244, |
| "learning_rate": 1.9993728627403814e-05, |
| "loss": 1.5913, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.021205839253124553, |
| "grad_norm": 4.103580788767663, |
| "learning_rate": 1.9993682091666206e-05, |
| "loss": 1.6532, |
| "step": 5130 |
| }, |
| { |
| "epoch": 0.02124717617174663, |
| "grad_norm": 3.6711472807654064, |
| "learning_rate": 1.9993635383965205e-05, |
| "loss": 1.5746, |
| "step": 5140 |
| }, |
| { |
| "epoch": 0.021288513090368704, |
| "grad_norm": 5.277279072305559, |
| "learning_rate": 1.9993588504301623e-05, |
| "loss": 1.597, |
| "step": 5150 |
| }, |
| { |
| "epoch": 0.02132985000899078, |
| "grad_norm": 3.646653216373581, |
| "learning_rate": 1.9993541452676257e-05, |
| "loss": 1.6045, |
| "step": 5160 |
| }, |
| { |
| "epoch": 0.021371186927612855, |
| "grad_norm": 4.454553625669168, |
| "learning_rate": 1.999349422908992e-05, |
| "loss": 1.6168, |
| "step": 5170 |
| }, |
| { |
| "epoch": 0.02141252384623493, |
| "grad_norm": 4.408940295701244, |
| "learning_rate": 1.999344683354343e-05, |
| "loss": 1.5688, |
| "step": 5180 |
| }, |
| { |
| "epoch": 0.021453860764857006, |
| "grad_norm": 4.30626191840598, |
| "learning_rate": 1.9993399266037593e-05, |
| "loss": 1.5743, |
| "step": 5190 |
| }, |
| { |
| "epoch": 0.02149519768347908, |
| "grad_norm": 3.674456985901954, |
| "learning_rate": 1.999335152657323e-05, |
| "loss": 1.5872, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.021536534602101157, |
| "grad_norm": 3.641790233464658, |
| "learning_rate": 1.9993303615151168e-05, |
| "loss": 1.5612, |
| "step": 5210 |
| }, |
| { |
| "epoch": 0.02157787152072323, |
| "grad_norm": 4.165728119210956, |
| "learning_rate": 1.9993255531772225e-05, |
| "loss": 1.59, |
| "step": 5220 |
| }, |
| { |
| "epoch": 0.021619208439345308, |
| "grad_norm": 3.8319777859342246, |
| "learning_rate": 1.9993207276437235e-05, |
| "loss": 1.5912, |
| "step": 5230 |
| }, |
| { |
| "epoch": 0.02166054535796738, |
| "grad_norm": 3.9855756729463168, |
| "learning_rate": 1.999315884914702e-05, |
| "loss": 1.58, |
| "step": 5240 |
| }, |
| { |
| "epoch": 0.021701882276589455, |
| "grad_norm": 3.8011477722676807, |
| "learning_rate": 1.999311024990242e-05, |
| "loss": 1.6003, |
| "step": 5250 |
| }, |
| { |
| "epoch": 0.021743219195211532, |
| "grad_norm": 3.985198206647649, |
| "learning_rate": 1.9993061478704275e-05, |
| "loss": 1.5986, |
| "step": 5260 |
| }, |
| { |
| "epoch": 0.021784556113833606, |
| "grad_norm": 3.9838081605823636, |
| "learning_rate": 1.9993012535553412e-05, |
| "loss": 1.6166, |
| "step": 5270 |
| }, |
| { |
| "epoch": 0.021825893032455683, |
| "grad_norm": 3.9996617755784043, |
| "learning_rate": 1.999296342045068e-05, |
| "loss": 1.5792, |
| "step": 5280 |
| }, |
| { |
| "epoch": 0.021867229951077757, |
| "grad_norm": 5.892962480457768, |
| "learning_rate": 1.9992914133396926e-05, |
| "loss": 1.6053, |
| "step": 5290 |
| }, |
| { |
| "epoch": 0.02190856686969983, |
| "grad_norm": 4.427789486632826, |
| "learning_rate": 1.9992864674392994e-05, |
| "loss": 1.6374, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.021949903788321908, |
| "grad_norm": 4.488482688049845, |
| "learning_rate": 1.9992815043439736e-05, |
| "loss": 1.6198, |
| "step": 5310 |
| }, |
| { |
| "epoch": 0.02199124070694398, |
| "grad_norm": 3.9697984164903035, |
| "learning_rate": 1.999276524053801e-05, |
| "loss": 1.6112, |
| "step": 5320 |
| }, |
| { |
| "epoch": 0.02203257762556606, |
| "grad_norm": 4.708237856178089, |
| "learning_rate": 1.9992715265688666e-05, |
| "loss": 1.569, |
| "step": 5330 |
| }, |
| { |
| "epoch": 0.022073914544188132, |
| "grad_norm": 4.180089792931872, |
| "learning_rate": 1.999266511889257e-05, |
| "loss": 1.564, |
| "step": 5340 |
| }, |
| { |
| "epoch": 0.02211525146281021, |
| "grad_norm": 4.540705844431402, |
| "learning_rate": 1.9992614800150582e-05, |
| "loss": 1.6062, |
| "step": 5350 |
| }, |
| { |
| "epoch": 0.022156588381432283, |
| "grad_norm": 3.6164199548569256, |
| "learning_rate": 1.999256430946357e-05, |
| "loss": 1.614, |
| "step": 5360 |
| }, |
| { |
| "epoch": 0.022197925300054357, |
| "grad_norm": 3.815681996528154, |
| "learning_rate": 1.9992513646832398e-05, |
| "loss": 1.5836, |
| "step": 5370 |
| }, |
| { |
| "epoch": 0.022239262218676434, |
| "grad_norm": 4.806439757203068, |
| "learning_rate": 1.9992462812257943e-05, |
| "loss": 1.6162, |
| "step": 5380 |
| }, |
| { |
| "epoch": 0.022280599137298508, |
| "grad_norm": 4.354139965343947, |
| "learning_rate": 1.999241180574108e-05, |
| "loss": 1.5888, |
| "step": 5390 |
| }, |
| { |
| "epoch": 0.022321936055920585, |
| "grad_norm": 4.126817858976234, |
| "learning_rate": 1.999236062728268e-05, |
| "loss": 1.5879, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.02236327297454266, |
| "grad_norm": 4.47607737943672, |
| "learning_rate": 1.9992309276883632e-05, |
| "loss": 1.6099, |
| "step": 5410 |
| }, |
| { |
| "epoch": 0.022404609893164732, |
| "grad_norm": 5.610066619695038, |
| "learning_rate": 1.9992257754544814e-05, |
| "loss": 1.593, |
| "step": 5420 |
| }, |
| { |
| "epoch": 0.02244594681178681, |
| "grad_norm": 4.2928973652861675, |
| "learning_rate": 1.9992206060267114e-05, |
| "loss": 1.5793, |
| "step": 5430 |
| }, |
| { |
| "epoch": 0.022487283730408883, |
| "grad_norm": 3.8921859700664325, |
| "learning_rate": 1.9992154194051422e-05, |
| "loss": 1.608, |
| "step": 5440 |
| }, |
| { |
| "epoch": 0.02252862064903096, |
| "grad_norm": 3.677731550454947, |
| "learning_rate": 1.999210215589863e-05, |
| "loss": 1.6151, |
| "step": 5450 |
| }, |
| { |
| "epoch": 0.022569957567653034, |
| "grad_norm": 4.200629201423265, |
| "learning_rate": 1.9992049945809632e-05, |
| "loss": 1.6246, |
| "step": 5460 |
| }, |
| { |
| "epoch": 0.02261129448627511, |
| "grad_norm": 4.064480908765512, |
| "learning_rate": 1.9991997563785332e-05, |
| "loss": 1.5607, |
| "step": 5470 |
| }, |
| { |
| "epoch": 0.022652631404897185, |
| "grad_norm": 3.5486537855524176, |
| "learning_rate": 1.9991945009826623e-05, |
| "loss": 1.5906, |
| "step": 5480 |
| }, |
| { |
| "epoch": 0.02269396832351926, |
| "grad_norm": 4.0698465101707, |
| "learning_rate": 1.9991892283934415e-05, |
| "loss": 1.5864, |
| "step": 5490 |
| }, |
| { |
| "epoch": 0.022735305242141336, |
| "grad_norm": 3.698399389749536, |
| "learning_rate": 1.9991839386109615e-05, |
| "loss": 1.593, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.02277664216076341, |
| "grad_norm": 4.854255782396672, |
| "learning_rate": 1.9991786316353134e-05, |
| "loss": 1.5961, |
| "step": 5510 |
| }, |
| { |
| "epoch": 0.022817979079385486, |
| "grad_norm": 3.5841353274799244, |
| "learning_rate": 1.9991733074665884e-05, |
| "loss": 1.5638, |
| "step": 5520 |
| }, |
| { |
| "epoch": 0.02285931599800756, |
| "grad_norm": 4.188646894537988, |
| "learning_rate": 1.9991679661048774e-05, |
| "loss": 1.5605, |
| "step": 5530 |
| }, |
| { |
| "epoch": 0.022900652916629637, |
| "grad_norm": 3.646293980881599, |
| "learning_rate": 1.9991626075502736e-05, |
| "loss": 1.5672, |
| "step": 5540 |
| }, |
| { |
| "epoch": 0.02294198983525171, |
| "grad_norm": 3.513345408175718, |
| "learning_rate": 1.999157231802868e-05, |
| "loss": 1.5228, |
| "step": 5550 |
| }, |
| { |
| "epoch": 0.022983326753873785, |
| "grad_norm": 4.22409759900443, |
| "learning_rate": 1.999151838862754e-05, |
| "loss": 1.5742, |
| "step": 5560 |
| }, |
| { |
| "epoch": 0.023024663672495862, |
| "grad_norm": 3.9606510772786674, |
| "learning_rate": 1.999146428730024e-05, |
| "loss": 1.5898, |
| "step": 5570 |
| }, |
| { |
| "epoch": 0.023066000591117936, |
| "grad_norm": 4.723833314885466, |
| "learning_rate": 1.9991410014047713e-05, |
| "loss": 1.6293, |
| "step": 5580 |
| }, |
| { |
| "epoch": 0.023107337509740013, |
| "grad_norm": 3.79738622812003, |
| "learning_rate": 1.999135556887089e-05, |
| "loss": 1.5347, |
| "step": 5590 |
| }, |
| { |
| "epoch": 0.023148674428362086, |
| "grad_norm": 3.5876021705924277, |
| "learning_rate": 1.9991300951770712e-05, |
| "loss": 1.5639, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.02319001134698416, |
| "grad_norm": 4.466727344043237, |
| "learning_rate": 1.9991246162748116e-05, |
| "loss": 1.5821, |
| "step": 5610 |
| }, |
| { |
| "epoch": 0.023231348265606237, |
| "grad_norm": 4.027485882579859, |
| "learning_rate": 1.999119120180404e-05, |
| "loss": 1.5641, |
| "step": 5620 |
| }, |
| { |
| "epoch": 0.02327268518422831, |
| "grad_norm": 4.698907728867797, |
| "learning_rate": 1.9991136068939436e-05, |
| "loss": 1.5717, |
| "step": 5630 |
| }, |
| { |
| "epoch": 0.023314022102850388, |
| "grad_norm": 3.9675562129009534, |
| "learning_rate": 1.9991080764155254e-05, |
| "loss": 1.5984, |
| "step": 5640 |
| }, |
| { |
| "epoch": 0.023355359021472462, |
| "grad_norm": 4.469330328433558, |
| "learning_rate": 1.9991025287452442e-05, |
| "loss": 1.5836, |
| "step": 5650 |
| }, |
| { |
| "epoch": 0.02339669594009454, |
| "grad_norm": 4.315359559691392, |
| "learning_rate": 1.9990969638831955e-05, |
| "loss": 1.5456, |
| "step": 5660 |
| }, |
| { |
| "epoch": 0.023438032858716613, |
| "grad_norm": 3.67958327218992, |
| "learning_rate": 1.9990913818294753e-05, |
| "loss": 1.6191, |
| "step": 5670 |
| }, |
| { |
| "epoch": 0.023479369777338686, |
| "grad_norm": 4.491956126894857, |
| "learning_rate": 1.9990857825841793e-05, |
| "loss": 1.5808, |
| "step": 5680 |
| }, |
| { |
| "epoch": 0.023520706695960764, |
| "grad_norm": 3.79674314266457, |
| "learning_rate": 1.999080166147404e-05, |
| "loss": 1.5183, |
| "step": 5690 |
| }, |
| { |
| "epoch": 0.023562043614582837, |
| "grad_norm": 4.5968252548890245, |
| "learning_rate": 1.999074532519246e-05, |
| "loss": 1.5757, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.023603380533204914, |
| "grad_norm": 3.5990834672231284, |
| "learning_rate": 1.9990688816998025e-05, |
| "loss": 1.6086, |
| "step": 5710 |
| }, |
| { |
| "epoch": 0.023644717451826988, |
| "grad_norm": 4.344410017466151, |
| "learning_rate": 1.99906321368917e-05, |
| "loss": 1.6113, |
| "step": 5720 |
| }, |
| { |
| "epoch": 0.023686054370449065, |
| "grad_norm": 3.7938891603257603, |
| "learning_rate": 1.9990575284874473e-05, |
| "loss": 1.6365, |
| "step": 5730 |
| }, |
| { |
| "epoch": 0.02372739128907114, |
| "grad_norm": 3.562057149121525, |
| "learning_rate": 1.999051826094731e-05, |
| "loss": 1.5485, |
| "step": 5740 |
| }, |
| { |
| "epoch": 0.023768728207693213, |
| "grad_norm": 4.081479989742111, |
| "learning_rate": 1.99904610651112e-05, |
| "loss": 1.5689, |
| "step": 5750 |
| }, |
| { |
| "epoch": 0.02381006512631529, |
| "grad_norm": 3.759485760858795, |
| "learning_rate": 1.999040369736712e-05, |
| "loss": 1.564, |
| "step": 5760 |
| }, |
| { |
| "epoch": 0.023851402044937364, |
| "grad_norm": 4.032363621919849, |
| "learning_rate": 1.9990346157716064e-05, |
| "loss": 1.6025, |
| "step": 5770 |
| }, |
| { |
| "epoch": 0.02389273896355944, |
| "grad_norm": 3.6432323322843403, |
| "learning_rate": 1.999028844615902e-05, |
| "loss": 1.5271, |
| "step": 5780 |
| }, |
| { |
| "epoch": 0.023934075882181514, |
| "grad_norm": 3.802770545609017, |
| "learning_rate": 1.9990230562696983e-05, |
| "loss": 1.5967, |
| "step": 5790 |
| }, |
| { |
| "epoch": 0.023975412800803588, |
| "grad_norm": 3.795072573463222, |
| "learning_rate": 1.9990172507330943e-05, |
| "loss": 1.5247, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.024016749719425665, |
| "grad_norm": 4.366382080210575, |
| "learning_rate": 1.99901142800619e-05, |
| "loss": 1.5781, |
| "step": 5810 |
| }, |
| { |
| "epoch": 0.02405808663804774, |
| "grad_norm": 3.9097914526353605, |
| "learning_rate": 1.9990055880890864e-05, |
| "loss": 1.6034, |
| "step": 5820 |
| }, |
| { |
| "epoch": 0.024099423556669816, |
| "grad_norm": 4.123926255872013, |
| "learning_rate": 1.9989997309818833e-05, |
| "loss": 1.5464, |
| "step": 5830 |
| }, |
| { |
| "epoch": 0.02414076047529189, |
| "grad_norm": 4.446493191993532, |
| "learning_rate": 1.9989938566846812e-05, |
| "loss": 1.5586, |
| "step": 5840 |
| }, |
| { |
| "epoch": 0.024182097393913967, |
| "grad_norm": 3.7337639849714233, |
| "learning_rate": 1.998987965197582e-05, |
| "loss": 1.5479, |
| "step": 5850 |
| }, |
| { |
| "epoch": 0.02422343431253604, |
| "grad_norm": 4.7444952313768525, |
| "learning_rate": 1.9989820565206865e-05, |
| "loss": 1.5808, |
| "step": 5860 |
| }, |
| { |
| "epoch": 0.024264771231158114, |
| "grad_norm": 4.247725775065283, |
| "learning_rate": 1.9989761306540966e-05, |
| "loss": 1.523, |
| "step": 5870 |
| }, |
| { |
| "epoch": 0.02430610814978019, |
| "grad_norm": 3.995186643530754, |
| "learning_rate": 1.998970187597914e-05, |
| "loss": 1.5785, |
| "step": 5880 |
| }, |
| { |
| "epoch": 0.024347445068402265, |
| "grad_norm": 4.816092056889684, |
| "learning_rate": 1.9989642273522416e-05, |
| "loss": 1.5746, |
| "step": 5890 |
| }, |
| { |
| "epoch": 0.024388781987024342, |
| "grad_norm": 4.290367502884436, |
| "learning_rate": 1.9989582499171813e-05, |
| "loss": 1.6119, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.024430118905646416, |
| "grad_norm": 3.5513668937922236, |
| "learning_rate": 1.9989522552928365e-05, |
| "loss": 1.5162, |
| "step": 5910 |
| }, |
| { |
| "epoch": 0.02447145582426849, |
| "grad_norm": 3.6198772954827665, |
| "learning_rate": 1.9989462434793096e-05, |
| "loss": 1.5323, |
| "step": 5920 |
| }, |
| { |
| "epoch": 0.024512792742890567, |
| "grad_norm": 3.852832040089439, |
| "learning_rate": 1.9989402144767046e-05, |
| "loss": 1.5311, |
| "step": 5930 |
| }, |
| { |
| "epoch": 0.02455412966151264, |
| "grad_norm": 3.9519174433535325, |
| "learning_rate": 1.9989341682851254e-05, |
| "loss": 1.5429, |
| "step": 5940 |
| }, |
| { |
| "epoch": 0.024595466580134718, |
| "grad_norm": 4.87353052847372, |
| "learning_rate": 1.9989281049046755e-05, |
| "loss": 1.6002, |
| "step": 5950 |
| }, |
| { |
| "epoch": 0.02463680349875679, |
| "grad_norm": 3.3803857370087225, |
| "learning_rate": 1.9989220243354595e-05, |
| "loss": 1.5793, |
| "step": 5960 |
| }, |
| { |
| "epoch": 0.02467814041737887, |
| "grad_norm": 3.8766963938819075, |
| "learning_rate": 1.998915926577582e-05, |
| "loss": 1.533, |
| "step": 5970 |
| }, |
| { |
| "epoch": 0.024719477336000942, |
| "grad_norm": 4.09044180490663, |
| "learning_rate": 1.998909811631148e-05, |
| "loss": 1.565, |
| "step": 5980 |
| }, |
| { |
| "epoch": 0.024760814254623016, |
| "grad_norm": 4.537575506546124, |
| "learning_rate": 1.998903679496263e-05, |
| "loss": 1.5523, |
| "step": 5990 |
| }, |
| { |
| "epoch": 0.024802151173245093, |
| "grad_norm": 3.4086201638465803, |
| "learning_rate": 1.9988975301730317e-05, |
| "loss": 1.5467, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.024843488091867167, |
| "grad_norm": 3.7512592579174244, |
| "learning_rate": 1.9988913636615608e-05, |
| "loss": 1.6148, |
| "step": 6010 |
| }, |
| { |
| "epoch": 0.024884825010489244, |
| "grad_norm": 3.9606857815229035, |
| "learning_rate": 1.9988851799619557e-05, |
| "loss": 1.5529, |
| "step": 6020 |
| }, |
| { |
| "epoch": 0.024926161929111318, |
| "grad_norm": 4.87271131926588, |
| "learning_rate": 1.9988789790743235e-05, |
| "loss": 1.624, |
| "step": 6030 |
| }, |
| { |
| "epoch": 0.024967498847733395, |
| "grad_norm": 4.562111872082575, |
| "learning_rate": 1.9988727609987705e-05, |
| "loss": 1.5954, |
| "step": 6040 |
| }, |
| { |
| "epoch": 0.02500883576635547, |
| "grad_norm": 4.160920766227917, |
| "learning_rate": 1.9988665257354035e-05, |
| "loss": 1.5745, |
| "step": 6050 |
| }, |
| { |
| "epoch": 0.025050172684977542, |
| "grad_norm": 3.7976329240225284, |
| "learning_rate": 1.9988602732843296e-05, |
| "loss": 1.539, |
| "step": 6060 |
| }, |
| { |
| "epoch": 0.02509150960359962, |
| "grad_norm": 3.9348324977710347, |
| "learning_rate": 1.9988540036456575e-05, |
| "loss": 1.5802, |
| "step": 6070 |
| }, |
| { |
| "epoch": 0.025132846522221693, |
| "grad_norm": 3.3649859713246313, |
| "learning_rate": 1.998847716819494e-05, |
| "loss": 1.5349, |
| "step": 6080 |
| }, |
| { |
| "epoch": 0.02517418344084377, |
| "grad_norm": 5.035730829505278, |
| "learning_rate": 1.998841412805948e-05, |
| "loss": 1.5522, |
| "step": 6090 |
| }, |
| { |
| "epoch": 0.025215520359465844, |
| "grad_norm": 4.38089533529463, |
| "learning_rate": 1.9988350916051272e-05, |
| "loss": 1.5696, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.025256857278087918, |
| "grad_norm": 4.0458062619048185, |
| "learning_rate": 1.9988287532171408e-05, |
| "loss": 1.582, |
| "step": 6110 |
| }, |
| { |
| "epoch": 0.025298194196709995, |
| "grad_norm": 5.197316936196237, |
| "learning_rate": 1.9988223976420983e-05, |
| "loss": 1.5685, |
| "step": 6120 |
| }, |
| { |
| "epoch": 0.02533953111533207, |
| "grad_norm": 3.701848060366763, |
| "learning_rate": 1.998816024880108e-05, |
| "loss": 1.568, |
| "step": 6130 |
| }, |
| { |
| "epoch": 0.025380868033954146, |
| "grad_norm": 4.576812131388496, |
| "learning_rate": 1.9988096349312808e-05, |
| "loss": 1.5925, |
| "step": 6140 |
| }, |
| { |
| "epoch": 0.02542220495257622, |
| "grad_norm": 3.626416937979281, |
| "learning_rate": 1.998803227795726e-05, |
| "loss": 1.6026, |
| "step": 6150 |
| }, |
| { |
| "epoch": 0.025463541871198297, |
| "grad_norm": 3.7415000301009016, |
| "learning_rate": 1.9987968034735535e-05, |
| "loss": 1.5632, |
| "step": 6160 |
| }, |
| { |
| "epoch": 0.02550487878982037, |
| "grad_norm": 4.093809033114078, |
| "learning_rate": 1.9987903619648745e-05, |
| "loss": 1.5442, |
| "step": 6170 |
| }, |
| { |
| "epoch": 0.025546215708442444, |
| "grad_norm": 3.782350165490308, |
| "learning_rate": 1.9987839032697995e-05, |
| "loss": 1.5423, |
| "step": 6180 |
| }, |
| { |
| "epoch": 0.02558755262706452, |
| "grad_norm": 3.1897173529667695, |
| "learning_rate": 1.9987774273884398e-05, |
| "loss": 1.5332, |
| "step": 6190 |
| }, |
| { |
| "epoch": 0.025628889545686595, |
| "grad_norm": 3.9224918301369276, |
| "learning_rate": 1.9987709343209066e-05, |
| "loss": 1.5133, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.025670226464308672, |
| "grad_norm": 3.830850927059349, |
| "learning_rate": 1.9987644240673118e-05, |
| "loss": 1.555, |
| "step": 6210 |
| }, |
| { |
| "epoch": 0.025711563382930746, |
| "grad_norm": 4.0209145103807575, |
| "learning_rate": 1.9987578966277678e-05, |
| "loss": 1.5114, |
| "step": 6220 |
| }, |
| { |
| "epoch": 0.02575290030155282, |
| "grad_norm": 3.936778500441379, |
| "learning_rate": 1.998751352002386e-05, |
| "loss": 1.5257, |
| "step": 6230 |
| }, |
| { |
| "epoch": 0.025794237220174897, |
| "grad_norm": 3.7293623909553313, |
| "learning_rate": 1.9987447901912794e-05, |
| "loss": 1.5694, |
| "step": 6240 |
| }, |
| { |
| "epoch": 0.02583557413879697, |
| "grad_norm": 3.5818490617695575, |
| "learning_rate": 1.9987382111945614e-05, |
| "loss": 1.5531, |
| "step": 6250 |
| }, |
| { |
| "epoch": 0.025876911057419048, |
| "grad_norm": 3.6430367540575928, |
| "learning_rate": 1.998731615012345e-05, |
| "loss": 1.5434, |
| "step": 6260 |
| }, |
| { |
| "epoch": 0.02591824797604112, |
| "grad_norm": 4.589782090699277, |
| "learning_rate": 1.998725001644743e-05, |
| "loss": 1.557, |
| "step": 6270 |
| }, |
| { |
| "epoch": 0.0259595848946632, |
| "grad_norm": 4.283265289643292, |
| "learning_rate": 1.99871837109187e-05, |
| "loss": 1.5624, |
| "step": 6280 |
| }, |
| { |
| "epoch": 0.026000921813285272, |
| "grad_norm": 4.946779268912546, |
| "learning_rate": 1.99871172335384e-05, |
| "loss": 1.5124, |
| "step": 6290 |
| }, |
| { |
| "epoch": 0.026042258731907346, |
| "grad_norm": 3.5038967204373694, |
| "learning_rate": 1.998705058430767e-05, |
| "loss": 1.5683, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.026083595650529423, |
| "grad_norm": 3.8770191196485886, |
| "learning_rate": 1.998698376322766e-05, |
| "loss": 1.5435, |
| "step": 6310 |
| }, |
| { |
| "epoch": 0.026124932569151497, |
| "grad_norm": 4.439160488934939, |
| "learning_rate": 1.998691677029952e-05, |
| "loss": 1.5295, |
| "step": 6320 |
| }, |
| { |
| "epoch": 0.026166269487773574, |
| "grad_norm": 4.092904098781107, |
| "learning_rate": 1.99868496055244e-05, |
| "loss": 1.55, |
| "step": 6330 |
| }, |
| { |
| "epoch": 0.026207606406395648, |
| "grad_norm": 4.303539009198583, |
| "learning_rate": 1.9986782268903457e-05, |
| "loss": 1.5484, |
| "step": 6340 |
| }, |
| { |
| "epoch": 0.026248943325017725, |
| "grad_norm": 3.9078194955949916, |
| "learning_rate": 1.9986714760437853e-05, |
| "loss": 1.5827, |
| "step": 6350 |
| }, |
| { |
| "epoch": 0.0262902802436398, |
| "grad_norm": 4.342380780259694, |
| "learning_rate": 1.9986647080128746e-05, |
| "loss": 1.557, |
| "step": 6360 |
| }, |
| { |
| "epoch": 0.026331617162261872, |
| "grad_norm": 3.985596918279314, |
| "learning_rate": 1.99865792279773e-05, |
| "loss": 1.578, |
| "step": 6370 |
| }, |
| { |
| "epoch": 0.02637295408088395, |
| "grad_norm": 3.653720676962278, |
| "learning_rate": 1.9986511203984683e-05, |
| "loss": 1.5668, |
| "step": 6380 |
| }, |
| { |
| "epoch": 0.026414290999506023, |
| "grad_norm": 3.751487721843964, |
| "learning_rate": 1.998644300815207e-05, |
| "loss": 1.5305, |
| "step": 6390 |
| }, |
| { |
| "epoch": 0.0264556279181281, |
| "grad_norm": 4.009883655861525, |
| "learning_rate": 1.9986374640480627e-05, |
| "loss": 1.5495, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.026496964836750174, |
| "grad_norm": 3.8266120037819396, |
| "learning_rate": 1.9986306100971533e-05, |
| "loss": 1.5255, |
| "step": 6410 |
| }, |
| { |
| "epoch": 0.026538301755372248, |
| "grad_norm": 3.6427176903376384, |
| "learning_rate": 1.9986237389625974e-05, |
| "loss": 1.5525, |
| "step": 6420 |
| }, |
| { |
| "epoch": 0.026579638673994325, |
| "grad_norm": 4.099974786255079, |
| "learning_rate": 1.998616850644512e-05, |
| "loss": 1.5424, |
| "step": 6430 |
| }, |
| { |
| "epoch": 0.0266209755926164, |
| "grad_norm": 3.8198383445190793, |
| "learning_rate": 1.998609945143017e-05, |
| "loss": 1.567, |
| "step": 6440 |
| }, |
| { |
| "epoch": 0.026662312511238476, |
| "grad_norm": 4.473728987789235, |
| "learning_rate": 1.9986030224582302e-05, |
| "loss": 1.4823, |
| "step": 6450 |
| }, |
| { |
| "epoch": 0.02670364942986055, |
| "grad_norm": 4.534257895704366, |
| "learning_rate": 1.998596082590271e-05, |
| "loss": 1.5712, |
| "step": 6460 |
| }, |
| { |
| "epoch": 0.026744986348482627, |
| "grad_norm": 3.644766820996961, |
| "learning_rate": 1.998589125539259e-05, |
| "loss": 1.4863, |
| "step": 6470 |
| }, |
| { |
| "epoch": 0.0267863232671047, |
| "grad_norm": 3.5999094373664526, |
| "learning_rate": 1.9985821513053137e-05, |
| "loss": 1.5326, |
| "step": 6480 |
| }, |
| { |
| "epoch": 0.026827660185726774, |
| "grad_norm": 4.1425167904001565, |
| "learning_rate": 1.9985751598885552e-05, |
| "loss": 1.5378, |
| "step": 6490 |
| }, |
| { |
| "epoch": 0.02686899710434885, |
| "grad_norm": 4.493755521920297, |
| "learning_rate": 1.998568151289104e-05, |
| "loss": 1.559, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.026910334022970925, |
| "grad_norm": 3.8756107746153896, |
| "learning_rate": 1.9985611255070806e-05, |
| "loss": 1.556, |
| "step": 6510 |
| }, |
| { |
| "epoch": 0.026951670941593002, |
| "grad_norm": 4.038084156630909, |
| "learning_rate": 1.9985540825426055e-05, |
| "loss": 1.5645, |
| "step": 6520 |
| }, |
| { |
| "epoch": 0.026993007860215076, |
| "grad_norm": 3.5809330138239392, |
| "learning_rate": 1.9985470223958e-05, |
| "loss": 1.5548, |
| "step": 6530 |
| }, |
| { |
| "epoch": 0.027034344778837153, |
| "grad_norm": 3.8676960332502963, |
| "learning_rate": 1.998539945066786e-05, |
| "loss": 1.5276, |
| "step": 6540 |
| }, |
| { |
| "epoch": 0.027075681697459227, |
| "grad_norm": 3.4449297103173593, |
| "learning_rate": 1.9985328505556852e-05, |
| "loss": 1.5651, |
| "step": 6550 |
| }, |
| { |
| "epoch": 0.0271170186160813, |
| "grad_norm": 4.288962654330007, |
| "learning_rate": 1.9985257388626196e-05, |
| "loss": 1.4996, |
| "step": 6560 |
| }, |
| { |
| "epoch": 0.027158355534703377, |
| "grad_norm": 4.128501380292175, |
| "learning_rate": 1.9985186099877112e-05, |
| "loss": 1.5419, |
| "step": 6570 |
| }, |
| { |
| "epoch": 0.02719969245332545, |
| "grad_norm": 4.575002926290647, |
| "learning_rate": 1.998511463931083e-05, |
| "loss": 1.5515, |
| "step": 6580 |
| }, |
| { |
| "epoch": 0.02724102937194753, |
| "grad_norm": 3.821068699345836, |
| "learning_rate": 1.998504300692858e-05, |
| "loss": 1.5233, |
| "step": 6590 |
| }, |
| { |
| "epoch": 0.027282366290569602, |
| "grad_norm": 3.2424897263365016, |
| "learning_rate": 1.9984971202731596e-05, |
| "loss": 1.5479, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.027323703209191676, |
| "grad_norm": 4.834170577555974, |
| "learning_rate": 1.9984899226721107e-05, |
| "loss": 1.5502, |
| "step": 6610 |
| }, |
| { |
| "epoch": 0.027365040127813753, |
| "grad_norm": 3.764169873278093, |
| "learning_rate": 1.998482707889836e-05, |
| "loss": 1.5891, |
| "step": 6620 |
| }, |
| { |
| "epoch": 0.027406377046435827, |
| "grad_norm": 3.414782018354158, |
| "learning_rate": 1.998475475926459e-05, |
| "loss": 1.5159, |
| "step": 6630 |
| }, |
| { |
| "epoch": 0.027447713965057904, |
| "grad_norm": 4.666184759190313, |
| "learning_rate": 1.9984682267821046e-05, |
| "loss": 1.5628, |
| "step": 6640 |
| }, |
| { |
| "epoch": 0.027489050883679977, |
| "grad_norm": 3.5574578914802943, |
| "learning_rate": 1.998460960456897e-05, |
| "loss": 1.5315, |
| "step": 6650 |
| }, |
| { |
| "epoch": 0.027530387802302055, |
| "grad_norm": 4.817382615755327, |
| "learning_rate": 1.9984536769509615e-05, |
| "loss": 1.5081, |
| "step": 6660 |
| }, |
| { |
| "epoch": 0.02757172472092413, |
| "grad_norm": 4.458867619168575, |
| "learning_rate": 1.998446376264424e-05, |
| "loss": 1.5099, |
| "step": 6670 |
| }, |
| { |
| "epoch": 0.027613061639546202, |
| "grad_norm": 4.929644851290023, |
| "learning_rate": 1.9984390583974093e-05, |
| "loss": 1.5122, |
| "step": 6680 |
| }, |
| { |
| "epoch": 0.02765439855816828, |
| "grad_norm": 4.625479741961043, |
| "learning_rate": 1.9984317233500435e-05, |
| "loss": 1.5516, |
| "step": 6690 |
| }, |
| { |
| "epoch": 0.027695735476790353, |
| "grad_norm": 4.116997057727521, |
| "learning_rate": 1.9984243711224535e-05, |
| "loss": 1.5376, |
| "step": 6700 |
| }, |
| { |
| "epoch": 0.02773707239541243, |
| "grad_norm": 3.5829728345047314, |
| "learning_rate": 1.998417001714765e-05, |
| "loss": 1.5175, |
| "step": 6710 |
| }, |
| { |
| "epoch": 0.027778409314034504, |
| "grad_norm": 3.9734979789101996, |
| "learning_rate": 1.9984096151271048e-05, |
| "loss": 1.4871, |
| "step": 6720 |
| }, |
| { |
| "epoch": 0.027819746232656577, |
| "grad_norm": 3.430905345503222, |
| "learning_rate": 1.9984022113596003e-05, |
| "loss": 1.5413, |
| "step": 6730 |
| }, |
| { |
| "epoch": 0.027861083151278655, |
| "grad_norm": 4.2373775185116465, |
| "learning_rate": 1.998394790412379e-05, |
| "loss": 1.508, |
| "step": 6740 |
| }, |
| { |
| "epoch": 0.027902420069900728, |
| "grad_norm": 3.973156898260741, |
| "learning_rate": 1.9983873522855684e-05, |
| "loss": 1.5283, |
| "step": 6750 |
| }, |
| { |
| "epoch": 0.027943756988522805, |
| "grad_norm": 3.714285928818181, |
| "learning_rate": 1.9983798969792966e-05, |
| "loss": 1.5362, |
| "step": 6760 |
| }, |
| { |
| "epoch": 0.02798509390714488, |
| "grad_norm": 4.251009623472971, |
| "learning_rate": 1.9983724244936916e-05, |
| "loss": 1.5282, |
| "step": 6770 |
| }, |
| { |
| "epoch": 0.028026430825766956, |
| "grad_norm": 3.886447728722872, |
| "learning_rate": 1.9983649348288825e-05, |
| "loss": 1.5719, |
| "step": 6780 |
| }, |
| { |
| "epoch": 0.02806776774438903, |
| "grad_norm": 3.5572049346515757, |
| "learning_rate": 1.9983574279849977e-05, |
| "loss": 1.5302, |
| "step": 6790 |
| }, |
| { |
| "epoch": 0.028109104663011104, |
| "grad_norm": 3.688224659646708, |
| "learning_rate": 1.9983499039621667e-05, |
| "loss": 1.5132, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.02815044158163318, |
| "grad_norm": 3.986436014630721, |
| "learning_rate": 1.998342362760519e-05, |
| "loss": 1.5045, |
| "step": 6810 |
| }, |
| { |
| "epoch": 0.028191778500255255, |
| "grad_norm": 4.042321521286428, |
| "learning_rate": 1.998334804380184e-05, |
| "loss": 1.52, |
| "step": 6820 |
| }, |
| { |
| "epoch": 0.02823311541887733, |
| "grad_norm": 3.936708692378881, |
| "learning_rate": 1.9983272288212917e-05, |
| "loss": 1.5208, |
| "step": 6830 |
| }, |
| { |
| "epoch": 0.028274452337499405, |
| "grad_norm": 4.692602969518199, |
| "learning_rate": 1.998319636083973e-05, |
| "loss": 1.5667, |
| "step": 6840 |
| }, |
| { |
| "epoch": 0.028315789256121483, |
| "grad_norm": 3.9313878236010598, |
| "learning_rate": 1.9983120261683582e-05, |
| "loss": 1.5831, |
| "step": 6850 |
| }, |
| { |
| "epoch": 0.028357126174743556, |
| "grad_norm": 3.551615629888668, |
| "learning_rate": 1.9983043990745784e-05, |
| "loss": 1.5308, |
| "step": 6860 |
| }, |
| { |
| "epoch": 0.02839846309336563, |
| "grad_norm": 4.6846872186437905, |
| "learning_rate": 1.9982967548027645e-05, |
| "loss": 1.4921, |
| "step": 6870 |
| }, |
| { |
| "epoch": 0.028439800011987707, |
| "grad_norm": 4.130277420309701, |
| "learning_rate": 1.9982890933530482e-05, |
| "loss": 1.4943, |
| "step": 6880 |
| }, |
| { |
| "epoch": 0.02848113693060978, |
| "grad_norm": 4.84212625045545, |
| "learning_rate": 1.9982814147255617e-05, |
| "loss": 1.5353, |
| "step": 6890 |
| }, |
| { |
| "epoch": 0.028522473849231858, |
| "grad_norm": 4.484005829649726, |
| "learning_rate": 1.9982737189204367e-05, |
| "loss": 1.5051, |
| "step": 6900 |
| }, |
| { |
| "epoch": 0.02856381076785393, |
| "grad_norm": 4.60797848842926, |
| "learning_rate": 1.998266005937806e-05, |
| "loss": 1.5816, |
| "step": 6910 |
| }, |
| { |
| "epoch": 0.028605147686476005, |
| "grad_norm": 4.6416638061065685, |
| "learning_rate": 1.998258275777802e-05, |
| "loss": 1.4904, |
| "step": 6920 |
| }, |
| { |
| "epoch": 0.028646484605098083, |
| "grad_norm": 3.477343383643086, |
| "learning_rate": 1.9982505284405574e-05, |
| "loss": 1.4904, |
| "step": 6930 |
| }, |
| { |
| "epoch": 0.028687821523720156, |
| "grad_norm": 3.6088868644511427, |
| "learning_rate": 1.9982427639262065e-05, |
| "loss": 1.5314, |
| "step": 6940 |
| }, |
| { |
| "epoch": 0.028729158442342233, |
| "grad_norm": 3.5925096362200333, |
| "learning_rate": 1.9982349822348816e-05, |
| "loss": 1.5714, |
| "step": 6950 |
| }, |
| { |
| "epoch": 0.028770495360964307, |
| "grad_norm": 3.2379164637124855, |
| "learning_rate": 1.9982271833667178e-05, |
| "loss": 1.538, |
| "step": 6960 |
| }, |
| { |
| "epoch": 0.028811832279586384, |
| "grad_norm": 4.70836437147594, |
| "learning_rate": 1.9982193673218487e-05, |
| "loss": 1.5221, |
| "step": 6970 |
| }, |
| { |
| "epoch": 0.028853169198208458, |
| "grad_norm": 4.481706228912118, |
| "learning_rate": 1.9982115341004088e-05, |
| "loss": 1.5313, |
| "step": 6980 |
| }, |
| { |
| "epoch": 0.02889450611683053, |
| "grad_norm": 4.687318727733607, |
| "learning_rate": 1.9982036837025332e-05, |
| "loss": 1.5051, |
| "step": 6990 |
| }, |
| { |
| "epoch": 0.02893584303545261, |
| "grad_norm": 4.598444922768698, |
| "learning_rate": 1.998195816128357e-05, |
| "loss": 1.4934, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.028977179954074683, |
| "grad_norm": 3.9891444926266555, |
| "learning_rate": 1.9981879313780145e-05, |
| "loss": 1.5511, |
| "step": 7010 |
| }, |
| { |
| "epoch": 0.02901851687269676, |
| "grad_norm": 4.416153022094333, |
| "learning_rate": 1.998180029451643e-05, |
| "loss": 1.5097, |
| "step": 7020 |
| }, |
| { |
| "epoch": 0.029059853791318833, |
| "grad_norm": 3.798723984502823, |
| "learning_rate": 1.9981721103493775e-05, |
| "loss": 1.4997, |
| "step": 7030 |
| }, |
| { |
| "epoch": 0.029101190709940907, |
| "grad_norm": 3.8640966990883223, |
| "learning_rate": 1.9981641740713545e-05, |
| "loss": 1.54, |
| "step": 7040 |
| }, |
| { |
| "epoch": 0.029142527628562984, |
| "grad_norm": 3.9923946800397436, |
| "learning_rate": 1.9981562206177104e-05, |
| "loss": 1.5511, |
| "step": 7050 |
| }, |
| { |
| "epoch": 0.029183864547185058, |
| "grad_norm": 3.451062899533455, |
| "learning_rate": 1.998148249988582e-05, |
| "loss": 1.5094, |
| "step": 7060 |
| }, |
| { |
| "epoch": 0.029225201465807135, |
| "grad_norm": 3.608872589804411, |
| "learning_rate": 1.998140262184107e-05, |
| "loss": 1.5162, |
| "step": 7070 |
| }, |
| { |
| "epoch": 0.02926653838442921, |
| "grad_norm": 4.033124276978576, |
| "learning_rate": 1.998132257204422e-05, |
| "loss": 1.4959, |
| "step": 7080 |
| }, |
| { |
| "epoch": 0.029307875303051286, |
| "grad_norm": 3.6841192145575397, |
| "learning_rate": 1.9981242350496656e-05, |
| "loss": 1.5223, |
| "step": 7090 |
| }, |
| { |
| "epoch": 0.02934921222167336, |
| "grad_norm": 3.5915932028439226, |
| "learning_rate": 1.9981161957199754e-05, |
| "loss": 1.5257, |
| "step": 7100 |
| }, |
| { |
| "epoch": 0.029390549140295433, |
| "grad_norm": 3.870756947521686, |
| "learning_rate": 1.9981081392154898e-05, |
| "loss": 1.4904, |
| "step": 7110 |
| }, |
| { |
| "epoch": 0.02943188605891751, |
| "grad_norm": 4.22816037060308, |
| "learning_rate": 1.9981000655363473e-05, |
| "loss": 1.4982, |
| "step": 7120 |
| }, |
| { |
| "epoch": 0.029473222977539584, |
| "grad_norm": 3.808092113157194, |
| "learning_rate": 1.9980919746826872e-05, |
| "loss": 1.519, |
| "step": 7130 |
| }, |
| { |
| "epoch": 0.02951455989616166, |
| "grad_norm": 3.709300764256846, |
| "learning_rate": 1.9980838666546483e-05, |
| "loss": 1.5533, |
| "step": 7140 |
| }, |
| { |
| "epoch": 0.029555896814783735, |
| "grad_norm": 3.4732849314384127, |
| "learning_rate": 1.9980757414523704e-05, |
| "loss": 1.5633, |
| "step": 7150 |
| }, |
| { |
| "epoch": 0.029597233733405812, |
| "grad_norm": 3.713987413842907, |
| "learning_rate": 1.998067599075993e-05, |
| "loss": 1.5201, |
| "step": 7160 |
| }, |
| { |
| "epoch": 0.029638570652027886, |
| "grad_norm": 4.113428203135297, |
| "learning_rate": 1.9980594395256564e-05, |
| "loss": 1.4594, |
| "step": 7170 |
| }, |
| { |
| "epoch": 0.02967990757064996, |
| "grad_norm": 2.9935199155014285, |
| "learning_rate": 1.9980512628015014e-05, |
| "loss": 1.4986, |
| "step": 7180 |
| }, |
| { |
| "epoch": 0.029721244489272037, |
| "grad_norm": 4.691195443226011, |
| "learning_rate": 1.998043068903668e-05, |
| "loss": 1.5357, |
| "step": 7190 |
| }, |
| { |
| "epoch": 0.02976258140789411, |
| "grad_norm": 3.4891344421625647, |
| "learning_rate": 1.9980348578322973e-05, |
| "loss": 1.5306, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.029803918326516188, |
| "grad_norm": 5.006688866087417, |
| "learning_rate": 1.9980266295875313e-05, |
| "loss": 1.512, |
| "step": 7210 |
| }, |
| { |
| "epoch": 0.02984525524513826, |
| "grad_norm": 4.053771198338998, |
| "learning_rate": 1.9980183841695107e-05, |
| "loss": 1.4794, |
| "step": 7220 |
| }, |
| { |
| "epoch": 0.029886592163760335, |
| "grad_norm": 3.352224471660883, |
| "learning_rate": 1.998010121578378e-05, |
| "loss": 1.4914, |
| "step": 7230 |
| }, |
| { |
| "epoch": 0.029927929082382412, |
| "grad_norm": 3.864235630776693, |
| "learning_rate": 1.998001841814275e-05, |
| "loss": 1.5253, |
| "step": 7240 |
| }, |
| { |
| "epoch": 0.029969266001004486, |
| "grad_norm": 3.706315048662037, |
| "learning_rate": 1.997993544877344e-05, |
| "loss": 1.5509, |
| "step": 7250 |
| }, |
| { |
| "epoch": 0.030010602919626563, |
| "grad_norm": 3.5465275464245676, |
| "learning_rate": 1.9979852307677285e-05, |
| "loss": 1.5605, |
| "step": 7260 |
| }, |
| { |
| "epoch": 0.030051939838248637, |
| "grad_norm": 4.129168530718907, |
| "learning_rate": 1.997976899485571e-05, |
| "loss": 1.5204, |
| "step": 7270 |
| }, |
| { |
| "epoch": 0.030093276756870714, |
| "grad_norm": 3.500344444775805, |
| "learning_rate": 1.997968551031015e-05, |
| "loss": 1.4853, |
| "step": 7280 |
| }, |
| { |
| "epoch": 0.030134613675492788, |
| "grad_norm": 3.930695708227359, |
| "learning_rate": 1.9979601854042044e-05, |
| "loss": 1.5186, |
| "step": 7290 |
| }, |
| { |
| "epoch": 0.03017595059411486, |
| "grad_norm": 3.8784171251524846, |
| "learning_rate": 1.9979518026052826e-05, |
| "loss": 1.5031, |
| "step": 7300 |
| }, |
| { |
| "epoch": 0.03021728751273694, |
| "grad_norm": 4.264401345696811, |
| "learning_rate": 1.997943402634394e-05, |
| "loss": 1.5175, |
| "step": 7310 |
| }, |
| { |
| "epoch": 0.030258624431359012, |
| "grad_norm": 3.924430485404916, |
| "learning_rate": 1.9979349854916836e-05, |
| "loss": 1.5415, |
| "step": 7320 |
| }, |
| { |
| "epoch": 0.03029996134998109, |
| "grad_norm": 5.108018679538745, |
| "learning_rate": 1.9979265511772958e-05, |
| "loss": 1.4635, |
| "step": 7330 |
| }, |
| { |
| "epoch": 0.030341298268603163, |
| "grad_norm": 3.5308579618903253, |
| "learning_rate": 1.997918099691376e-05, |
| "loss": 1.5076, |
| "step": 7340 |
| }, |
| { |
| "epoch": 0.03038263518722524, |
| "grad_norm": 5.35750207475576, |
| "learning_rate": 1.997909631034069e-05, |
| "loss": 1.5426, |
| "step": 7350 |
| }, |
| { |
| "epoch": 0.030423972105847314, |
| "grad_norm": 3.4906620060732645, |
| "learning_rate": 1.9979011452055216e-05, |
| "loss": 1.5012, |
| "step": 7360 |
| }, |
| { |
| "epoch": 0.030465309024469388, |
| "grad_norm": 3.059087960421475, |
| "learning_rate": 1.9978926422058788e-05, |
| "loss": 1.5542, |
| "step": 7370 |
| }, |
| { |
| "epoch": 0.030506645943091465, |
| "grad_norm": 3.9845949063681227, |
| "learning_rate": 1.9978841220352875e-05, |
| "loss": 1.546, |
| "step": 7380 |
| }, |
| { |
| "epoch": 0.03054798286171354, |
| "grad_norm": 3.8610381846741815, |
| "learning_rate": 1.9978755846938943e-05, |
| "loss": 1.5437, |
| "step": 7390 |
| }, |
| { |
| "epoch": 0.030589319780335616, |
| "grad_norm": 3.2351748039531154, |
| "learning_rate": 1.9978670301818456e-05, |
| "loss": 1.4819, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.03063065669895769, |
| "grad_norm": 3.3661408607769823, |
| "learning_rate": 1.997858458499289e-05, |
| "loss": 1.4698, |
| "step": 7410 |
| }, |
| { |
| "epoch": 0.030671993617579763, |
| "grad_norm": 3.882697263150638, |
| "learning_rate": 1.997849869646372e-05, |
| "loss": 1.5216, |
| "step": 7420 |
| }, |
| { |
| "epoch": 0.03071333053620184, |
| "grad_norm": 3.574753801928485, |
| "learning_rate": 1.9978412636232425e-05, |
| "loss": 1.4803, |
| "step": 7430 |
| }, |
| { |
| "epoch": 0.030754667454823914, |
| "grad_norm": 4.443698443398163, |
| "learning_rate": 1.997832640430048e-05, |
| "loss": 1.5006, |
| "step": 7440 |
| }, |
| { |
| "epoch": 0.03079600437344599, |
| "grad_norm": 4.180214183413532, |
| "learning_rate": 1.9978240000669377e-05, |
| "loss": 1.4823, |
| "step": 7450 |
| }, |
| { |
| "epoch": 0.030837341292068065, |
| "grad_norm": 4.271849788381411, |
| "learning_rate": 1.9978153425340596e-05, |
| "loss": 1.4888, |
| "step": 7460 |
| }, |
| { |
| "epoch": 0.030878678210690142, |
| "grad_norm": 3.638691493855987, |
| "learning_rate": 1.9978066678315634e-05, |
| "loss": 1.5171, |
| "step": 7470 |
| }, |
| { |
| "epoch": 0.030920015129312216, |
| "grad_norm": 4.31218852799806, |
| "learning_rate": 1.9977979759595972e-05, |
| "loss": 1.5313, |
| "step": 7480 |
| }, |
| { |
| "epoch": 0.03096135204793429, |
| "grad_norm": 4.188985873406339, |
| "learning_rate": 1.9977892669183115e-05, |
| "loss": 1.5333, |
| "step": 7490 |
| }, |
| { |
| "epoch": 0.031002688966556367, |
| "grad_norm": 4.091422365669455, |
| "learning_rate": 1.9977805407078563e-05, |
| "loss": 1.5104, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.03104402588517844, |
| "grad_norm": 3.7904969438772995, |
| "learning_rate": 1.997771797328381e-05, |
| "loss": 1.4961, |
| "step": 7510 |
| }, |
| { |
| "epoch": 0.031085362803800518, |
| "grad_norm": 4.393169555680857, |
| "learning_rate": 1.9977630367800366e-05, |
| "loss": 1.4876, |
| "step": 7520 |
| }, |
| { |
| "epoch": 0.03112669972242259, |
| "grad_norm": 3.3377888267921163, |
| "learning_rate": 1.9977542590629736e-05, |
| "loss": 1.5107, |
| "step": 7530 |
| }, |
| { |
| "epoch": 0.031168036641044665, |
| "grad_norm": 3.6979315229218512, |
| "learning_rate": 1.9977454641773432e-05, |
| "loss": 1.4984, |
| "step": 7540 |
| }, |
| { |
| "epoch": 0.031209373559666742, |
| "grad_norm": 4.422235985099495, |
| "learning_rate": 1.9977366521232966e-05, |
| "loss": 1.5166, |
| "step": 7550 |
| }, |
| { |
| "epoch": 0.03125071047828882, |
| "grad_norm": 4.357975123788466, |
| "learning_rate": 1.9977278229009854e-05, |
| "loss": 1.5133, |
| "step": 7560 |
| }, |
| { |
| "epoch": 0.03129204739691089, |
| "grad_norm": 3.618227619392518, |
| "learning_rate": 1.997718976510562e-05, |
| "loss": 1.4872, |
| "step": 7570 |
| }, |
| { |
| "epoch": 0.03133338431553297, |
| "grad_norm": 3.6903691829248175, |
| "learning_rate": 1.9977101129521778e-05, |
| "loss": 1.4968, |
| "step": 7580 |
| }, |
| { |
| "epoch": 0.031374721234155044, |
| "grad_norm": 3.325924769769221, |
| "learning_rate": 1.997701232225986e-05, |
| "loss": 1.484, |
| "step": 7590 |
| }, |
| { |
| "epoch": 0.03141605815277712, |
| "grad_norm": 3.244599139580768, |
| "learning_rate": 1.9976923343321388e-05, |
| "loss": 1.501, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.03145739507139919, |
| "grad_norm": 3.9512458600612224, |
| "learning_rate": 1.9976834192707898e-05, |
| "loss": 1.5146, |
| "step": 7610 |
| }, |
| { |
| "epoch": 0.03149873199002127, |
| "grad_norm": 6.319637912227645, |
| "learning_rate": 1.9976744870420925e-05, |
| "loss": 1.5232, |
| "step": 7620 |
| }, |
| { |
| "epoch": 0.031540068908643346, |
| "grad_norm": 3.385728813673924, |
| "learning_rate": 1.9976655376462003e-05, |
| "loss": 1.4964, |
| "step": 7630 |
| }, |
| { |
| "epoch": 0.031581405827265416, |
| "grad_norm": 3.5621843155206365, |
| "learning_rate": 1.997656571083267e-05, |
| "loss": 1.4948, |
| "step": 7640 |
| }, |
| { |
| "epoch": 0.03162274274588749, |
| "grad_norm": 3.5127469264785334, |
| "learning_rate": 1.9976475873534476e-05, |
| "loss": 1.4788, |
| "step": 7650 |
| }, |
| { |
| "epoch": 0.03166407966450957, |
| "grad_norm": 5.123261236833159, |
| "learning_rate": 1.9976385864568958e-05, |
| "loss": 1.4906, |
| "step": 7660 |
| }, |
| { |
| "epoch": 0.03170541658313164, |
| "grad_norm": 3.0872059709849378, |
| "learning_rate": 1.997629568393767e-05, |
| "loss": 1.5013, |
| "step": 7670 |
| }, |
| { |
| "epoch": 0.03174675350175372, |
| "grad_norm": 3.516937874543627, |
| "learning_rate": 1.9976205331642165e-05, |
| "loss": 1.4802, |
| "step": 7680 |
| }, |
| { |
| "epoch": 0.031788090420375795, |
| "grad_norm": 3.7194230277933684, |
| "learning_rate": 1.9976114807683996e-05, |
| "loss": 1.4776, |
| "step": 7690 |
| }, |
| { |
| "epoch": 0.03182942733899787, |
| "grad_norm": 3.6171028748264016, |
| "learning_rate": 1.9976024112064718e-05, |
| "loss": 1.4867, |
| "step": 7700 |
| }, |
| { |
| "epoch": 0.03187076425761994, |
| "grad_norm": 4.74734081777917, |
| "learning_rate": 1.9975933244785894e-05, |
| "loss": 1.5321, |
| "step": 7710 |
| }, |
| { |
| "epoch": 0.03191210117624202, |
| "grad_norm": 3.4998159594245912, |
| "learning_rate": 1.997584220584909e-05, |
| "loss": 1.4555, |
| "step": 7720 |
| }, |
| { |
| "epoch": 0.031953438094864096, |
| "grad_norm": 4.0664157482197245, |
| "learning_rate": 1.9975750995255865e-05, |
| "loss": 1.4982, |
| "step": 7730 |
| }, |
| { |
| "epoch": 0.03199477501348617, |
| "grad_norm": 4.448956391697098, |
| "learning_rate": 1.9975659613007797e-05, |
| "loss": 1.4877, |
| "step": 7740 |
| }, |
| { |
| "epoch": 0.032036111932108244, |
| "grad_norm": 3.9116050665163056, |
| "learning_rate": 1.9975568059106455e-05, |
| "loss": 1.51, |
| "step": 7750 |
| }, |
| { |
| "epoch": 0.03207744885073032, |
| "grad_norm": 3.8835007718258874, |
| "learning_rate": 1.9975476333553416e-05, |
| "loss": 1.5245, |
| "step": 7760 |
| }, |
| { |
| "epoch": 0.0321187857693524, |
| "grad_norm": 3.8364373709477215, |
| "learning_rate": 1.9975384436350254e-05, |
| "loss": 1.467, |
| "step": 7770 |
| }, |
| { |
| "epoch": 0.03216012268797447, |
| "grad_norm": 3.848935245597496, |
| "learning_rate": 1.9975292367498556e-05, |
| "loss": 1.4999, |
| "step": 7780 |
| }, |
| { |
| "epoch": 0.032201459606596546, |
| "grad_norm": 3.309302500010883, |
| "learning_rate": 1.99752001269999e-05, |
| "loss": 1.513, |
| "step": 7790 |
| }, |
| { |
| "epoch": 0.03224279652521862, |
| "grad_norm": 3.417545265677783, |
| "learning_rate": 1.9975107714855875e-05, |
| "loss": 1.5138, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.03228413344384069, |
| "grad_norm": 4.218881911775881, |
| "learning_rate": 1.9975015131068078e-05, |
| "loss": 1.4763, |
| "step": 7810 |
| }, |
| { |
| "epoch": 0.03232547036246277, |
| "grad_norm": 3.581778505125333, |
| "learning_rate": 1.997492237563809e-05, |
| "loss": 1.5195, |
| "step": 7820 |
| }, |
| { |
| "epoch": 0.03236680728108485, |
| "grad_norm": 3.7604938020140226, |
| "learning_rate": 1.997482944856752e-05, |
| "loss": 1.4933, |
| "step": 7830 |
| }, |
| { |
| "epoch": 0.032408144199706924, |
| "grad_norm": 4.37155325952897, |
| "learning_rate": 1.997473634985796e-05, |
| "loss": 1.4557, |
| "step": 7840 |
| }, |
| { |
| "epoch": 0.032449481118328995, |
| "grad_norm": 3.3738873014096153, |
| "learning_rate": 1.9974643079511008e-05, |
| "loss": 1.5323, |
| "step": 7850 |
| }, |
| { |
| "epoch": 0.03249081803695107, |
| "grad_norm": 4.587428592575524, |
| "learning_rate": 1.9974549637528276e-05, |
| "loss": 1.5129, |
| "step": 7860 |
| }, |
| { |
| "epoch": 0.03253215495557315, |
| "grad_norm": 3.7141451702858252, |
| "learning_rate": 1.997445602391137e-05, |
| "loss": 1.5176, |
| "step": 7870 |
| }, |
| { |
| "epoch": 0.03257349187419522, |
| "grad_norm": 3.585481088159076, |
| "learning_rate": 1.9974362238661903e-05, |
| "loss": 1.5109, |
| "step": 7880 |
| }, |
| { |
| "epoch": 0.032614828792817296, |
| "grad_norm": 3.4290626560881936, |
| "learning_rate": 1.9974268281781484e-05, |
| "loss": 1.4477, |
| "step": 7890 |
| }, |
| { |
| "epoch": 0.032656165711439374, |
| "grad_norm": 3.612891113663602, |
| "learning_rate": 1.9974174153271728e-05, |
| "loss": 1.4229, |
| "step": 7900 |
| }, |
| { |
| "epoch": 0.03269750263006145, |
| "grad_norm": 3.487845093010647, |
| "learning_rate": 1.9974079853134266e-05, |
| "loss": 1.5321, |
| "step": 7910 |
| }, |
| { |
| "epoch": 0.03273883954868352, |
| "grad_norm": 5.587764895917656, |
| "learning_rate": 1.9973985381370707e-05, |
| "loss": 1.4645, |
| "step": 7920 |
| }, |
| { |
| "epoch": 0.0327801764673056, |
| "grad_norm": 3.5490448514590494, |
| "learning_rate": 1.9973890737982684e-05, |
| "loss": 1.5374, |
| "step": 7930 |
| }, |
| { |
| "epoch": 0.032821513385927675, |
| "grad_norm": 3.94930169954439, |
| "learning_rate": 1.9973795922971827e-05, |
| "loss": 1.4661, |
| "step": 7940 |
| }, |
| { |
| "epoch": 0.032862850304549746, |
| "grad_norm": 3.494031781745132, |
| "learning_rate": 1.9973700936339763e-05, |
| "loss": 1.4291, |
| "step": 7950 |
| }, |
| { |
| "epoch": 0.03290418722317182, |
| "grad_norm": 3.0746272722997414, |
| "learning_rate": 1.9973605778088126e-05, |
| "loss": 1.5493, |
| "step": 7960 |
| }, |
| { |
| "epoch": 0.0329455241417939, |
| "grad_norm": 3.6416998354027736, |
| "learning_rate": 1.9973510448218558e-05, |
| "loss": 1.4471, |
| "step": 7970 |
| }, |
| { |
| "epoch": 0.03298686106041597, |
| "grad_norm": 3.8385152763033883, |
| "learning_rate": 1.99734149467327e-05, |
| "loss": 1.5182, |
| "step": 7980 |
| }, |
| { |
| "epoch": 0.03302819797903805, |
| "grad_norm": 3.5463908529630985, |
| "learning_rate": 1.9973319273632187e-05, |
| "loss": 1.4848, |
| "step": 7990 |
| }, |
| { |
| "epoch": 0.033069534897660124, |
| "grad_norm": 4.494540529736681, |
| "learning_rate": 1.9973223428918677e-05, |
| "loss": 1.4656, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.0331108718162822, |
| "grad_norm": 4.138487839617815, |
| "learning_rate": 1.997312741259381e-05, |
| "loss": 1.4533, |
| "step": 8010 |
| }, |
| { |
| "epoch": 0.03315220873490427, |
| "grad_norm": 3.2836571982439957, |
| "learning_rate": 1.9973031224659238e-05, |
| "loss": 1.4637, |
| "step": 8020 |
| }, |
| { |
| "epoch": 0.03319354565352635, |
| "grad_norm": 4.101602166042781, |
| "learning_rate": 1.9972934865116622e-05, |
| "loss": 1.4656, |
| "step": 8030 |
| }, |
| { |
| "epoch": 0.033234882572148426, |
| "grad_norm": 4.372499340047587, |
| "learning_rate": 1.9972838333967615e-05, |
| "loss": 1.5377, |
| "step": 8040 |
| }, |
| { |
| "epoch": 0.033276219490770496, |
| "grad_norm": 3.984477694550147, |
| "learning_rate": 1.997274163121388e-05, |
| "loss": 1.5216, |
| "step": 8050 |
| }, |
| { |
| "epoch": 0.033317556409392574, |
| "grad_norm": 3.484892612102319, |
| "learning_rate": 1.9972644756857087e-05, |
| "loss": 1.459, |
| "step": 8060 |
| }, |
| { |
| "epoch": 0.03335889332801465, |
| "grad_norm": 3.473953984247227, |
| "learning_rate": 1.9972547710898894e-05, |
| "loss": 1.4889, |
| "step": 8070 |
| }, |
| { |
| "epoch": 0.03340023024663673, |
| "grad_norm": 3.6982560350258384, |
| "learning_rate": 1.9972450493340973e-05, |
| "loss": 1.4529, |
| "step": 8080 |
| }, |
| { |
| "epoch": 0.0334415671652588, |
| "grad_norm": 4.338255434805353, |
| "learning_rate": 1.9972353104185e-05, |
| "loss": 1.4906, |
| "step": 8090 |
| }, |
| { |
| "epoch": 0.033482904083880875, |
| "grad_norm": 3.2105273217876897, |
| "learning_rate": 1.9972255543432644e-05, |
| "loss": 1.4846, |
| "step": 8100 |
| }, |
| { |
| "epoch": 0.03352424100250295, |
| "grad_norm": 3.7063825752894037, |
| "learning_rate": 1.997215781108559e-05, |
| "loss": 1.4354, |
| "step": 8110 |
| }, |
| { |
| "epoch": 0.03356557792112502, |
| "grad_norm": 3.606082322653538, |
| "learning_rate": 1.997205990714552e-05, |
| "loss": 1.5067, |
| "step": 8120 |
| }, |
| { |
| "epoch": 0.0336069148397471, |
| "grad_norm": 3.8291971667898643, |
| "learning_rate": 1.9971961831614116e-05, |
| "loss": 1.4619, |
| "step": 8130 |
| }, |
| { |
| "epoch": 0.03364825175836918, |
| "grad_norm": 3.331721665794406, |
| "learning_rate": 1.997186358449307e-05, |
| "loss": 1.4484, |
| "step": 8140 |
| }, |
| { |
| "epoch": 0.033689588676991254, |
| "grad_norm": 4.417828154129124, |
| "learning_rate": 1.9971765165784065e-05, |
| "loss": 1.508, |
| "step": 8150 |
| }, |
| { |
| "epoch": 0.033730925595613324, |
| "grad_norm": 3.782991542711988, |
| "learning_rate": 1.9971666575488798e-05, |
| "loss": 1.3925, |
| "step": 8160 |
| }, |
| { |
| "epoch": 0.0337722625142354, |
| "grad_norm": 4.079542042860949, |
| "learning_rate": 1.997156781360897e-05, |
| "loss": 1.4996, |
| "step": 8170 |
| }, |
| { |
| "epoch": 0.03381359943285748, |
| "grad_norm": 4.1626484536651995, |
| "learning_rate": 1.9971468880146273e-05, |
| "loss": 1.5178, |
| "step": 8180 |
| }, |
| { |
| "epoch": 0.03385493635147955, |
| "grad_norm": 4.122058968257828, |
| "learning_rate": 1.9971369775102417e-05, |
| "loss": 1.4267, |
| "step": 8190 |
| }, |
| { |
| "epoch": 0.033896273270101626, |
| "grad_norm": 4.017697912034419, |
| "learning_rate": 1.9971270498479097e-05, |
| "loss": 1.5129, |
| "step": 8200 |
| }, |
| { |
| "epoch": 0.0339376101887237, |
| "grad_norm": 3.3575071407879977, |
| "learning_rate": 1.997117105027803e-05, |
| "loss": 1.4796, |
| "step": 8210 |
| }, |
| { |
| "epoch": 0.03397894710734578, |
| "grad_norm": 3.9246891678069598, |
| "learning_rate": 1.9971071430500924e-05, |
| "loss": 1.5052, |
| "step": 8220 |
| }, |
| { |
| "epoch": 0.03402028402596785, |
| "grad_norm": 4.075111279351767, |
| "learning_rate": 1.9970971639149493e-05, |
| "loss": 1.4606, |
| "step": 8230 |
| }, |
| { |
| "epoch": 0.03406162094458993, |
| "grad_norm": 4.564662754708322, |
| "learning_rate": 1.997087167622546e-05, |
| "loss": 1.5111, |
| "step": 8240 |
| }, |
| { |
| "epoch": 0.034102957863212005, |
| "grad_norm": 3.753297984489193, |
| "learning_rate": 1.9970771541730536e-05, |
| "loss": 1.4899, |
| "step": 8250 |
| }, |
| { |
| "epoch": 0.034144294781834075, |
| "grad_norm": 3.913489388979073, |
| "learning_rate": 1.997067123566645e-05, |
| "loss": 1.4796, |
| "step": 8260 |
| }, |
| { |
| "epoch": 0.03418563170045615, |
| "grad_norm": 3.440711906522703, |
| "learning_rate": 1.9970570758034924e-05, |
| "loss": 1.5184, |
| "step": 8270 |
| }, |
| { |
| "epoch": 0.03422696861907823, |
| "grad_norm": 3.7595940155205363, |
| "learning_rate": 1.997047010883769e-05, |
| "loss": 1.4901, |
| "step": 8280 |
| }, |
| { |
| "epoch": 0.0342683055377003, |
| "grad_norm": 4.387403962431217, |
| "learning_rate": 1.9970369288076478e-05, |
| "loss": 1.4553, |
| "step": 8290 |
| }, |
| { |
| "epoch": 0.03430964245632238, |
| "grad_norm": 3.7034201439850594, |
| "learning_rate": 1.9970268295753022e-05, |
| "loss": 1.4534, |
| "step": 8300 |
| }, |
| { |
| "epoch": 0.034350979374944454, |
| "grad_norm": 4.0052763549673225, |
| "learning_rate": 1.9970167131869064e-05, |
| "loss": 1.4539, |
| "step": 8310 |
| }, |
| { |
| "epoch": 0.03439231629356653, |
| "grad_norm": 4.079349801665228, |
| "learning_rate": 1.9970065796426342e-05, |
| "loss": 1.4698, |
| "step": 8320 |
| }, |
| { |
| "epoch": 0.0344336532121886, |
| "grad_norm": 3.9322937150085875, |
| "learning_rate": 1.99699642894266e-05, |
| "loss": 1.4318, |
| "step": 8330 |
| }, |
| { |
| "epoch": 0.03447499013081068, |
| "grad_norm": 3.834658918473933, |
| "learning_rate": 1.9969862610871586e-05, |
| "loss": 1.4687, |
| "step": 8340 |
| }, |
| { |
| "epoch": 0.034516327049432756, |
| "grad_norm": 3.3637972502778912, |
| "learning_rate": 1.9969760760763045e-05, |
| "loss": 1.4661, |
| "step": 8350 |
| }, |
| { |
| "epoch": 0.034557663968054826, |
| "grad_norm": 3.5654841117273026, |
| "learning_rate": 1.9969658739102733e-05, |
| "loss": 1.4302, |
| "step": 8360 |
| }, |
| { |
| "epoch": 0.0345990008866769, |
| "grad_norm": 3.7270409908212194, |
| "learning_rate": 1.9969556545892405e-05, |
| "loss": 1.4447, |
| "step": 8370 |
| }, |
| { |
| "epoch": 0.03464033780529898, |
| "grad_norm": 3.914859995823126, |
| "learning_rate": 1.996945418113382e-05, |
| "loss": 1.4519, |
| "step": 8380 |
| }, |
| { |
| "epoch": 0.03468167472392106, |
| "grad_norm": 4.5791406660012095, |
| "learning_rate": 1.9969351644828742e-05, |
| "loss": 1.5204, |
| "step": 8390 |
| }, |
| { |
| "epoch": 0.03472301164254313, |
| "grad_norm": 3.6418617205879817, |
| "learning_rate": 1.9969248936978932e-05, |
| "loss": 1.4943, |
| "step": 8400 |
| }, |
| { |
| "epoch": 0.034764348561165205, |
| "grad_norm": 3.4609359361113534, |
| "learning_rate": 1.9969146057586156e-05, |
| "loss": 1.4799, |
| "step": 8410 |
| }, |
| { |
| "epoch": 0.03480568547978728, |
| "grad_norm": 3.9925889106780987, |
| "learning_rate": 1.9969043006652186e-05, |
| "loss": 1.4687, |
| "step": 8420 |
| }, |
| { |
| "epoch": 0.03484702239840935, |
| "grad_norm": 3.2054804086428548, |
| "learning_rate": 1.9968939784178794e-05, |
| "loss": 1.4816, |
| "step": 8430 |
| }, |
| { |
| "epoch": 0.03488835931703143, |
| "grad_norm": 3.349889124617882, |
| "learning_rate": 1.996883639016776e-05, |
| "loss": 1.4577, |
| "step": 8440 |
| }, |
| { |
| "epoch": 0.03492969623565351, |
| "grad_norm": 3.5789857070884086, |
| "learning_rate": 1.996873282462086e-05, |
| "loss": 1.5172, |
| "step": 8450 |
| }, |
| { |
| "epoch": 0.034971033154275584, |
| "grad_norm": 3.4989039985130272, |
| "learning_rate": 1.9968629087539876e-05, |
| "loss": 1.4852, |
| "step": 8460 |
| }, |
| { |
| "epoch": 0.035012370072897654, |
| "grad_norm": 3.3811833684889154, |
| "learning_rate": 1.9968525178926595e-05, |
| "loss": 1.4594, |
| "step": 8470 |
| }, |
| { |
| "epoch": 0.03505370699151973, |
| "grad_norm": 3.4385931194022223, |
| "learning_rate": 1.9968421098782803e-05, |
| "loss": 1.4595, |
| "step": 8480 |
| }, |
| { |
| "epoch": 0.03509504391014181, |
| "grad_norm": 4.16413923561225, |
| "learning_rate": 1.9968316847110292e-05, |
| "loss": 1.4963, |
| "step": 8490 |
| }, |
| { |
| "epoch": 0.03513638082876388, |
| "grad_norm": 4.289305042774894, |
| "learning_rate": 1.9968212423910855e-05, |
| "loss": 1.4551, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.035177717747385956, |
| "grad_norm": 5.453654756696216, |
| "learning_rate": 1.9968107829186287e-05, |
| "loss": 1.4885, |
| "step": 8510 |
| }, |
| { |
| "epoch": 0.03521905466600803, |
| "grad_norm": 3.6797118668666795, |
| "learning_rate": 1.996800306293839e-05, |
| "loss": 1.4984, |
| "step": 8520 |
| }, |
| { |
| "epoch": 0.03526039158463011, |
| "grad_norm": 3.2371854818646995, |
| "learning_rate": 1.9967898125168973e-05, |
| "loss": 1.4481, |
| "step": 8530 |
| }, |
| { |
| "epoch": 0.03530172850325218, |
| "grad_norm": 3.238508861502653, |
| "learning_rate": 1.9967793015879828e-05, |
| "loss": 1.4562, |
| "step": 8540 |
| }, |
| { |
| "epoch": 0.03534306542187426, |
| "grad_norm": 3.5415115005606177, |
| "learning_rate": 1.9967687735072776e-05, |
| "loss": 1.476, |
| "step": 8550 |
| }, |
| { |
| "epoch": 0.035384402340496335, |
| "grad_norm": 3.843042698193225, |
| "learning_rate": 1.9967582282749622e-05, |
| "loss": 1.4751, |
| "step": 8560 |
| }, |
| { |
| "epoch": 0.035425739259118405, |
| "grad_norm": 3.5779391668735006, |
| "learning_rate": 1.9967476658912184e-05, |
| "loss": 1.4804, |
| "step": 8570 |
| }, |
| { |
| "epoch": 0.03546707617774048, |
| "grad_norm": 4.949952686769368, |
| "learning_rate": 1.9967370863562276e-05, |
| "loss": 1.4245, |
| "step": 8580 |
| }, |
| { |
| "epoch": 0.03550841309636256, |
| "grad_norm": 3.8134978579481924, |
| "learning_rate": 1.996726489670172e-05, |
| "loss": 1.494, |
| "step": 8590 |
| }, |
| { |
| "epoch": 0.03554975001498463, |
| "grad_norm": 4.098567290916666, |
| "learning_rate": 1.996715875833234e-05, |
| "loss": 1.4339, |
| "step": 8600 |
| }, |
| { |
| "epoch": 0.03559108693360671, |
| "grad_norm": 3.4443466301897074, |
| "learning_rate": 1.9967052448455962e-05, |
| "loss": 1.4808, |
| "step": 8610 |
| }, |
| { |
| "epoch": 0.035632423852228784, |
| "grad_norm": 3.939242075931307, |
| "learning_rate": 1.9966945967074416e-05, |
| "loss": 1.4884, |
| "step": 8620 |
| }, |
| { |
| "epoch": 0.03567376077085086, |
| "grad_norm": 3.3941498280577975, |
| "learning_rate": 1.996683931418953e-05, |
| "loss": 1.4635, |
| "step": 8630 |
| }, |
| { |
| "epoch": 0.03571509768947293, |
| "grad_norm": 3.911248054251368, |
| "learning_rate": 1.996673248980315e-05, |
| "loss": 1.4785, |
| "step": 8640 |
| }, |
| { |
| "epoch": 0.03575643460809501, |
| "grad_norm": 4.0383619484944155, |
| "learning_rate": 1.99666254939171e-05, |
| "loss": 1.4334, |
| "step": 8650 |
| }, |
| { |
| "epoch": 0.035797771526717086, |
| "grad_norm": 3.21818266356431, |
| "learning_rate": 1.996651832653323e-05, |
| "loss": 1.5279, |
| "step": 8660 |
| }, |
| { |
| "epoch": 0.035839108445339156, |
| "grad_norm": 4.068360221073268, |
| "learning_rate": 1.9966410987653383e-05, |
| "loss": 1.5073, |
| "step": 8670 |
| }, |
| { |
| "epoch": 0.03588044536396123, |
| "grad_norm": 5.64416307388456, |
| "learning_rate": 1.9966303477279404e-05, |
| "loss": 1.4595, |
| "step": 8680 |
| }, |
| { |
| "epoch": 0.03592178228258331, |
| "grad_norm": 4.456991706091006, |
| "learning_rate": 1.9966195795413145e-05, |
| "loss": 1.5152, |
| "step": 8690 |
| }, |
| { |
| "epoch": 0.03596311920120539, |
| "grad_norm": 3.541237309488241, |
| "learning_rate": 1.9966087942056457e-05, |
| "loss": 1.4773, |
| "step": 8700 |
| }, |
| { |
| "epoch": 0.03600445611982746, |
| "grad_norm": 3.5306668816992186, |
| "learning_rate": 1.9965979917211196e-05, |
| "loss": 1.4838, |
| "step": 8710 |
| }, |
| { |
| "epoch": 0.036045793038449535, |
| "grad_norm": 4.051613339547623, |
| "learning_rate": 1.9965871720879223e-05, |
| "loss": 1.463, |
| "step": 8720 |
| }, |
| { |
| "epoch": 0.03608712995707161, |
| "grad_norm": 3.6129954404785, |
| "learning_rate": 1.9965763353062394e-05, |
| "loss": 1.4479, |
| "step": 8730 |
| }, |
| { |
| "epoch": 0.03612846687569368, |
| "grad_norm": 4.2384349637413825, |
| "learning_rate": 1.9965654813762582e-05, |
| "loss": 1.4928, |
| "step": 8740 |
| }, |
| { |
| "epoch": 0.03616980379431576, |
| "grad_norm": 4.343148391315392, |
| "learning_rate": 1.9965546102981652e-05, |
| "loss": 1.4418, |
| "step": 8750 |
| }, |
| { |
| "epoch": 0.03621114071293784, |
| "grad_norm": 3.9477945474327276, |
| "learning_rate": 1.996543722072147e-05, |
| "loss": 1.4417, |
| "step": 8760 |
| }, |
| { |
| "epoch": 0.036252477631559914, |
| "grad_norm": 3.912481869555381, |
| "learning_rate": 1.9965328166983916e-05, |
| "loss": 1.4877, |
| "step": 8770 |
| }, |
| { |
| "epoch": 0.036293814550181984, |
| "grad_norm": 4.391935734682612, |
| "learning_rate": 1.9965218941770857e-05, |
| "loss": 1.4335, |
| "step": 8780 |
| }, |
| { |
| "epoch": 0.03633515146880406, |
| "grad_norm": 4.493537291846412, |
| "learning_rate": 1.9965109545084185e-05, |
| "loss": 1.4919, |
| "step": 8790 |
| }, |
| { |
| "epoch": 0.03637648838742614, |
| "grad_norm": 2.93026955700472, |
| "learning_rate": 1.9964999976925775e-05, |
| "loss": 1.4304, |
| "step": 8800 |
| }, |
| { |
| "epoch": 0.03641782530604821, |
| "grad_norm": 3.6053506467813032, |
| "learning_rate": 1.9964890237297512e-05, |
| "loss": 1.4635, |
| "step": 8810 |
| }, |
| { |
| "epoch": 0.036459162224670286, |
| "grad_norm": 3.5234834011018648, |
| "learning_rate": 1.9964780326201286e-05, |
| "loss": 1.4981, |
| "step": 8820 |
| }, |
| { |
| "epoch": 0.03650049914329236, |
| "grad_norm": 3.750450253620856, |
| "learning_rate": 1.996467024363899e-05, |
| "loss": 1.4627, |
| "step": 8830 |
| }, |
| { |
| "epoch": 0.03654183606191444, |
| "grad_norm": 3.666723051780572, |
| "learning_rate": 1.9964559989612516e-05, |
| "loss": 1.4514, |
| "step": 8840 |
| }, |
| { |
| "epoch": 0.03658317298053651, |
| "grad_norm": 3.3239044375214633, |
| "learning_rate": 1.996444956412376e-05, |
| "loss": 1.4972, |
| "step": 8850 |
| }, |
| { |
| "epoch": 0.03662450989915859, |
| "grad_norm": 3.8599698199624064, |
| "learning_rate": 1.9964338967174625e-05, |
| "loss": 1.5057, |
| "step": 8860 |
| }, |
| { |
| "epoch": 0.036665846817780665, |
| "grad_norm": 4.132699231086706, |
| "learning_rate": 1.9964228198767012e-05, |
| "loss": 1.4519, |
| "step": 8870 |
| }, |
| { |
| "epoch": 0.036707183736402735, |
| "grad_norm": 3.0714085451165745, |
| "learning_rate": 1.9964117258902828e-05, |
| "loss": 1.434, |
| "step": 8880 |
| }, |
| { |
| "epoch": 0.03674852065502481, |
| "grad_norm": 3.8796486291954904, |
| "learning_rate": 1.9964006147583982e-05, |
| "loss": 1.4505, |
| "step": 8890 |
| }, |
| { |
| "epoch": 0.03678985757364689, |
| "grad_norm": 3.832002897416075, |
| "learning_rate": 1.9963894864812383e-05, |
| "loss": 1.4526, |
| "step": 8900 |
| }, |
| { |
| "epoch": 0.03683119449226896, |
| "grad_norm": 4.887224283091199, |
| "learning_rate": 1.9963783410589948e-05, |
| "loss": 1.4644, |
| "step": 8910 |
| }, |
| { |
| "epoch": 0.036872531410891037, |
| "grad_norm": 4.158724114940273, |
| "learning_rate": 1.99636717849186e-05, |
| "loss": 1.4417, |
| "step": 8920 |
| }, |
| { |
| "epoch": 0.036913868329513114, |
| "grad_norm": 3.81771878130769, |
| "learning_rate": 1.9963559987800253e-05, |
| "loss": 1.508, |
| "step": 8930 |
| }, |
| { |
| "epoch": 0.03695520524813519, |
| "grad_norm": 3.5553407292065207, |
| "learning_rate": 1.9963448019236834e-05, |
| "loss": 1.383, |
| "step": 8940 |
| }, |
| { |
| "epoch": 0.03699654216675726, |
| "grad_norm": 5.03061141095772, |
| "learning_rate": 1.9963335879230264e-05, |
| "loss": 1.4293, |
| "step": 8950 |
| }, |
| { |
| "epoch": 0.03703787908537934, |
| "grad_norm": 3.183233332739406, |
| "learning_rate": 1.996322356778248e-05, |
| "loss": 1.4355, |
| "step": 8960 |
| }, |
| { |
| "epoch": 0.037079216004001415, |
| "grad_norm": 3.555732688914675, |
| "learning_rate": 1.996311108489541e-05, |
| "loss": 1.4338, |
| "step": 8970 |
| }, |
| { |
| "epoch": 0.037120552922623486, |
| "grad_norm": 3.696220192021282, |
| "learning_rate": 1.9962998430570994e-05, |
| "loss": 1.4883, |
| "step": 8980 |
| }, |
| { |
| "epoch": 0.03716188984124556, |
| "grad_norm": 4.796096029475931, |
| "learning_rate": 1.9962885604811168e-05, |
| "loss": 1.4901, |
| "step": 8990 |
| }, |
| { |
| "epoch": 0.03720322675986764, |
| "grad_norm": 5.814203236754815, |
| "learning_rate": 1.996277260761787e-05, |
| "loss": 1.4053, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.03724456367848972, |
| "grad_norm": 3.3287110492970795, |
| "learning_rate": 1.996265943899305e-05, |
| "loss": 1.4202, |
| "step": 9010 |
| }, |
| { |
| "epoch": 0.03728590059711179, |
| "grad_norm": 3.877230681858091, |
| "learning_rate": 1.996254609893865e-05, |
| "loss": 1.4297, |
| "step": 9020 |
| }, |
| { |
| "epoch": 0.037327237515733865, |
| "grad_norm": 3.48844533397734, |
| "learning_rate": 1.9962432587456622e-05, |
| "loss": 1.4652, |
| "step": 9030 |
| }, |
| { |
| "epoch": 0.03736857443435594, |
| "grad_norm": 3.5520610987897943, |
| "learning_rate": 1.9962318904548923e-05, |
| "loss": 1.4807, |
| "step": 9040 |
| }, |
| { |
| "epoch": 0.03740991135297801, |
| "grad_norm": 3.181838391240591, |
| "learning_rate": 1.9962205050217504e-05, |
| "loss": 1.4757, |
| "step": 9050 |
| }, |
| { |
| "epoch": 0.03745124827160009, |
| "grad_norm": 3.7425531387998907, |
| "learning_rate": 1.996209102446433e-05, |
| "loss": 1.4331, |
| "step": 9060 |
| }, |
| { |
| "epoch": 0.037492585190222166, |
| "grad_norm": 3.663633392520708, |
| "learning_rate": 1.9961976827291358e-05, |
| "loss": 1.4718, |
| "step": 9070 |
| }, |
| { |
| "epoch": 0.03753392210884424, |
| "grad_norm": 4.833995454604731, |
| "learning_rate": 1.9961862458700554e-05, |
| "loss": 1.4217, |
| "step": 9080 |
| }, |
| { |
| "epoch": 0.037575259027466314, |
| "grad_norm": 3.6290459016542216, |
| "learning_rate": 1.9961747918693887e-05, |
| "loss": 1.4848, |
| "step": 9090 |
| }, |
| { |
| "epoch": 0.03761659594608839, |
| "grad_norm": 3.585806885070931, |
| "learning_rate": 1.9961633207273325e-05, |
| "loss": 1.4358, |
| "step": 9100 |
| }, |
| { |
| "epoch": 0.03765793286471047, |
| "grad_norm": 3.4952003665857134, |
| "learning_rate": 1.9961518324440847e-05, |
| "loss": 1.3939, |
| "step": 9110 |
| }, |
| { |
| "epoch": 0.03769926978333254, |
| "grad_norm": 3.279719203181294, |
| "learning_rate": 1.9961403270198424e-05, |
| "loss": 1.4808, |
| "step": 9120 |
| }, |
| { |
| "epoch": 0.037740606701954615, |
| "grad_norm": 3.2692766545796528, |
| "learning_rate": 1.9961288044548043e-05, |
| "loss": 1.3822, |
| "step": 9130 |
| }, |
| { |
| "epoch": 0.03778194362057669, |
| "grad_norm": 3.6490123739235623, |
| "learning_rate": 1.996117264749168e-05, |
| "loss": 1.4485, |
| "step": 9140 |
| }, |
| { |
| "epoch": 0.03782328053919877, |
| "grad_norm": 4.464763724322134, |
| "learning_rate": 1.996105707903132e-05, |
| "loss": 1.4795, |
| "step": 9150 |
| }, |
| { |
| "epoch": 0.03786461745782084, |
| "grad_norm": 3.529618572994803, |
| "learning_rate": 1.9960941339168963e-05, |
| "loss": 1.4452, |
| "step": 9160 |
| }, |
| { |
| "epoch": 0.03790595437644292, |
| "grad_norm": 3.949852891089842, |
| "learning_rate": 1.9960825427906587e-05, |
| "loss": 1.4866, |
| "step": 9170 |
| }, |
| { |
| "epoch": 0.037947291295064994, |
| "grad_norm": 6.3198129841396735, |
| "learning_rate": 1.9960709345246192e-05, |
| "loss": 1.4661, |
| "step": 9180 |
| }, |
| { |
| "epoch": 0.037988628213687065, |
| "grad_norm": 4.3016466998403775, |
| "learning_rate": 1.9960593091189776e-05, |
| "loss": 1.4575, |
| "step": 9190 |
| }, |
| { |
| "epoch": 0.03802996513230914, |
| "grad_norm": 3.218809542898574, |
| "learning_rate": 1.996047666573934e-05, |
| "loss": 1.4385, |
| "step": 9200 |
| }, |
| { |
| "epoch": 0.03807130205093122, |
| "grad_norm": 3.507546904929844, |
| "learning_rate": 1.9960360068896884e-05, |
| "loss": 1.456, |
| "step": 9210 |
| }, |
| { |
| "epoch": 0.038112638969553296, |
| "grad_norm": 3.2658287561416866, |
| "learning_rate": 1.9960243300664418e-05, |
| "loss": 1.4937, |
| "step": 9220 |
| }, |
| { |
| "epoch": 0.038153975888175366, |
| "grad_norm": 3.9657257849748078, |
| "learning_rate": 1.996012636104395e-05, |
| "loss": 1.4743, |
| "step": 9230 |
| }, |
| { |
| "epoch": 0.03819531280679744, |
| "grad_norm": 3.7419945345055865, |
| "learning_rate": 1.996000925003749e-05, |
| "loss": 1.4645, |
| "step": 9240 |
| }, |
| { |
| "epoch": 0.03823664972541952, |
| "grad_norm": 3.717998186266208, |
| "learning_rate": 1.9959891967647055e-05, |
| "loss": 1.4304, |
| "step": 9250 |
| }, |
| { |
| "epoch": 0.03827798664404159, |
| "grad_norm": 4.122611974230224, |
| "learning_rate": 1.9959774513874666e-05, |
| "loss": 1.4396, |
| "step": 9260 |
| }, |
| { |
| "epoch": 0.03831932356266367, |
| "grad_norm": 4.081766829152903, |
| "learning_rate": 1.9959656888722338e-05, |
| "loss": 1.4296, |
| "step": 9270 |
| }, |
| { |
| "epoch": 0.038360660481285745, |
| "grad_norm": 3.4327932618086807, |
| "learning_rate": 1.99595390921921e-05, |
| "loss": 1.479, |
| "step": 9280 |
| }, |
| { |
| "epoch": 0.038401997399907815, |
| "grad_norm": 4.251866528393302, |
| "learning_rate": 1.9959421124285976e-05, |
| "loss": 1.4399, |
| "step": 9290 |
| }, |
| { |
| "epoch": 0.03844333431852989, |
| "grad_norm": 4.132921022210262, |
| "learning_rate": 1.9959302985006e-05, |
| "loss": 1.4366, |
| "step": 9300 |
| }, |
| { |
| "epoch": 0.03848467123715197, |
| "grad_norm": 4.791211851168452, |
| "learning_rate": 1.9959184674354198e-05, |
| "loss": 1.4838, |
| "step": 9310 |
| }, |
| { |
| "epoch": 0.03852600815577405, |
| "grad_norm": 3.000007579210258, |
| "learning_rate": 1.995906619233261e-05, |
| "loss": 1.4541, |
| "step": 9320 |
| }, |
| { |
| "epoch": 0.03856734507439612, |
| "grad_norm": 5.059959210643911, |
| "learning_rate": 1.9958947538943278e-05, |
| "loss": 1.5233, |
| "step": 9330 |
| }, |
| { |
| "epoch": 0.038608681993018194, |
| "grad_norm": 3.20842711732194, |
| "learning_rate": 1.9958828714188236e-05, |
| "loss": 1.4718, |
| "step": 9340 |
| }, |
| { |
| "epoch": 0.03865001891164027, |
| "grad_norm": 3.796018357701994, |
| "learning_rate": 1.9958709718069532e-05, |
| "loss": 1.4522, |
| "step": 9350 |
| }, |
| { |
| "epoch": 0.03869135583026234, |
| "grad_norm": 3.9321479256125347, |
| "learning_rate": 1.995859055058922e-05, |
| "loss": 1.5065, |
| "step": 9360 |
| }, |
| { |
| "epoch": 0.03873269274888442, |
| "grad_norm": 3.254019632954085, |
| "learning_rate": 1.9958471211749342e-05, |
| "loss": 1.4114, |
| "step": 9370 |
| }, |
| { |
| "epoch": 0.038774029667506496, |
| "grad_norm": 3.3308294037697896, |
| "learning_rate": 1.9958351701551953e-05, |
| "loss": 1.4285, |
| "step": 9380 |
| }, |
| { |
| "epoch": 0.03881536658612857, |
| "grad_norm": 4.08834871777043, |
| "learning_rate": 1.9958232019999114e-05, |
| "loss": 1.4295, |
| "step": 9390 |
| }, |
| { |
| "epoch": 0.03885670350475064, |
| "grad_norm": 3.441069579264666, |
| "learning_rate": 1.995811216709288e-05, |
| "loss": 1.4472, |
| "step": 9400 |
| }, |
| { |
| "epoch": 0.03889804042337272, |
| "grad_norm": 3.426532775633606, |
| "learning_rate": 1.995799214283531e-05, |
| "loss": 1.4566, |
| "step": 9410 |
| }, |
| { |
| "epoch": 0.0389393773419948, |
| "grad_norm": 3.399689817601649, |
| "learning_rate": 1.9957871947228476e-05, |
| "loss": 1.4642, |
| "step": 9420 |
| }, |
| { |
| "epoch": 0.03898071426061687, |
| "grad_norm": 3.388140389856613, |
| "learning_rate": 1.995775158027445e-05, |
| "loss": 1.4593, |
| "step": 9430 |
| }, |
| { |
| "epoch": 0.039022051179238945, |
| "grad_norm": 3.325888034679041, |
| "learning_rate": 1.9957631041975292e-05, |
| "loss": 1.473, |
| "step": 9440 |
| }, |
| { |
| "epoch": 0.03906338809786102, |
| "grad_norm": 5.25058506424265, |
| "learning_rate": 1.995751033233308e-05, |
| "loss": 1.4085, |
| "step": 9450 |
| }, |
| { |
| "epoch": 0.0391047250164831, |
| "grad_norm": 3.8257776442726135, |
| "learning_rate": 1.9957389451349898e-05, |
| "loss": 1.4926, |
| "step": 9460 |
| }, |
| { |
| "epoch": 0.03914606193510517, |
| "grad_norm": 3.9914355755037514, |
| "learning_rate": 1.9957268399027815e-05, |
| "loss": 1.4433, |
| "step": 9470 |
| }, |
| { |
| "epoch": 0.03918739885372725, |
| "grad_norm": 4.259453650516103, |
| "learning_rate": 1.9957147175368923e-05, |
| "loss": 1.4435, |
| "step": 9480 |
| }, |
| { |
| "epoch": 0.039228735772349324, |
| "grad_norm": 3.4057039561381974, |
| "learning_rate": 1.99570257803753e-05, |
| "loss": 1.4021, |
| "step": 9490 |
| }, |
| { |
| "epoch": 0.039270072690971394, |
| "grad_norm": 3.9702568689341735, |
| "learning_rate": 1.9956904214049044e-05, |
| "loss": 1.3975, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.03931140960959347, |
| "grad_norm": 4.162984306124767, |
| "learning_rate": 1.995678247639224e-05, |
| "loss": 1.4269, |
| "step": 9510 |
| }, |
| { |
| "epoch": 0.03935274652821555, |
| "grad_norm": 3.4623660466543216, |
| "learning_rate": 1.9956660567406984e-05, |
| "loss": 1.4812, |
| "step": 9520 |
| }, |
| { |
| "epoch": 0.039394083446837626, |
| "grad_norm": 3.9487634862208663, |
| "learning_rate": 1.9956538487095375e-05, |
| "loss": 1.3904, |
| "step": 9530 |
| }, |
| { |
| "epoch": 0.039435420365459696, |
| "grad_norm": 3.940768474272943, |
| "learning_rate": 1.9956416235459514e-05, |
| "loss": 1.4627, |
| "step": 9540 |
| }, |
| { |
| "epoch": 0.03947675728408177, |
| "grad_norm": 3.7240510688214488, |
| "learning_rate": 1.9956293812501503e-05, |
| "loss": 1.4714, |
| "step": 9550 |
| }, |
| { |
| "epoch": 0.03951809420270385, |
| "grad_norm": 3.544248199002313, |
| "learning_rate": 1.995617121822345e-05, |
| "loss": 1.4418, |
| "step": 9560 |
| }, |
| { |
| "epoch": 0.03955943112132592, |
| "grad_norm": 3.7941720521427453, |
| "learning_rate": 1.9956048452627463e-05, |
| "loss": 1.398, |
| "step": 9570 |
| }, |
| { |
| "epoch": 0.039600768039948, |
| "grad_norm": 3.231769382614049, |
| "learning_rate": 1.9955925515715656e-05, |
| "loss": 1.4323, |
| "step": 9580 |
| }, |
| { |
| "epoch": 0.039642104958570075, |
| "grad_norm": 3.4504677343753585, |
| "learning_rate": 1.9955802407490144e-05, |
| "loss": 1.4508, |
| "step": 9590 |
| }, |
| { |
| "epoch": 0.039683441877192145, |
| "grad_norm": 4.608743387499926, |
| "learning_rate": 1.9955679127953046e-05, |
| "loss": 1.4849, |
| "step": 9600 |
| }, |
| { |
| "epoch": 0.03972477879581422, |
| "grad_norm": 3.2583619571782223, |
| "learning_rate": 1.995555567710648e-05, |
| "loss": 1.4528, |
| "step": 9610 |
| }, |
| { |
| "epoch": 0.0397661157144363, |
| "grad_norm": 3.592600847545303, |
| "learning_rate": 1.9955432054952573e-05, |
| "loss": 1.4222, |
| "step": 9620 |
| }, |
| { |
| "epoch": 0.03980745263305838, |
| "grad_norm": 3.935340478064598, |
| "learning_rate": 1.9955308261493457e-05, |
| "loss": 1.4243, |
| "step": 9630 |
| }, |
| { |
| "epoch": 0.03984878955168045, |
| "grad_norm": 3.7051161334020075, |
| "learning_rate": 1.995518429673125e-05, |
| "loss": 1.4487, |
| "step": 9640 |
| }, |
| { |
| "epoch": 0.039890126470302524, |
| "grad_norm": 3.6647142900939977, |
| "learning_rate": 1.9955060160668095e-05, |
| "loss": 1.4458, |
| "step": 9650 |
| }, |
| { |
| "epoch": 0.0399314633889246, |
| "grad_norm": 4.428497991354939, |
| "learning_rate": 1.9954935853306124e-05, |
| "loss": 1.4721, |
| "step": 9660 |
| }, |
| { |
| "epoch": 0.03997280030754667, |
| "grad_norm": 3.2958564393103056, |
| "learning_rate": 1.9954811374647474e-05, |
| "loss": 1.4394, |
| "step": 9670 |
| }, |
| { |
| "epoch": 0.04001413722616875, |
| "grad_norm": 3.10104196973718, |
| "learning_rate": 1.9954686724694297e-05, |
| "loss": 1.4361, |
| "step": 9680 |
| }, |
| { |
| "epoch": 0.040055474144790826, |
| "grad_norm": 3.957938872776804, |
| "learning_rate": 1.9954561903448727e-05, |
| "loss": 1.4602, |
| "step": 9690 |
| }, |
| { |
| "epoch": 0.0400968110634129, |
| "grad_norm": 3.760794840185392, |
| "learning_rate": 1.9954436910912914e-05, |
| "loss": 1.4285, |
| "step": 9700 |
| }, |
| { |
| "epoch": 0.04013814798203497, |
| "grad_norm": 3.421396807117046, |
| "learning_rate": 1.9954311747089012e-05, |
| "loss": 1.4774, |
| "step": 9710 |
| }, |
| { |
| "epoch": 0.04017948490065705, |
| "grad_norm": 3.91789094802535, |
| "learning_rate": 1.9954186411979175e-05, |
| "loss": 1.4021, |
| "step": 9720 |
| }, |
| { |
| "epoch": 0.04022082181927913, |
| "grad_norm": 3.081464490088515, |
| "learning_rate": 1.9954060905585556e-05, |
| "loss": 1.4219, |
| "step": 9730 |
| }, |
| { |
| "epoch": 0.0402621587379012, |
| "grad_norm": 3.3381107703512507, |
| "learning_rate": 1.9953935227910316e-05, |
| "loss": 1.4632, |
| "step": 9740 |
| }, |
| { |
| "epoch": 0.040303495656523275, |
| "grad_norm": 3.8300980744875828, |
| "learning_rate": 1.995380937895562e-05, |
| "loss": 1.4322, |
| "step": 9750 |
| }, |
| { |
| "epoch": 0.04034483257514535, |
| "grad_norm": 3.4534661824404633, |
| "learning_rate": 1.995368335872363e-05, |
| "loss": 1.4436, |
| "step": 9760 |
| }, |
| { |
| "epoch": 0.04038616949376743, |
| "grad_norm": 3.983712880561037, |
| "learning_rate": 1.995355716721652e-05, |
| "loss": 1.4598, |
| "step": 9770 |
| }, |
| { |
| "epoch": 0.0404275064123895, |
| "grad_norm": 3.840919795852268, |
| "learning_rate": 1.995343080443645e-05, |
| "loss": 1.4456, |
| "step": 9780 |
| }, |
| { |
| "epoch": 0.04046884333101158, |
| "grad_norm": 3.901157368681076, |
| "learning_rate": 1.9953304270385607e-05, |
| "loss": 1.4525, |
| "step": 9790 |
| }, |
| { |
| "epoch": 0.040510180249633654, |
| "grad_norm": 3.189808091891606, |
| "learning_rate": 1.9953177565066163e-05, |
| "loss": 1.4462, |
| "step": 9800 |
| }, |
| { |
| "epoch": 0.040551517168255724, |
| "grad_norm": 4.025890961267514, |
| "learning_rate": 1.9953050688480293e-05, |
| "loss": 1.443, |
| "step": 9810 |
| }, |
| { |
| "epoch": 0.0405928540868778, |
| "grad_norm": 3.3710964799433007, |
| "learning_rate": 1.995292364063019e-05, |
| "loss": 1.4262, |
| "step": 9820 |
| }, |
| { |
| "epoch": 0.04063419100549988, |
| "grad_norm": 3.883950857165337, |
| "learning_rate": 1.9952796421518034e-05, |
| "loss": 1.4174, |
| "step": 9830 |
| }, |
| { |
| "epoch": 0.040675527924121956, |
| "grad_norm": 3.474777308443348, |
| "learning_rate": 1.995266903114602e-05, |
| "loss": 1.4654, |
| "step": 9840 |
| }, |
| { |
| "epoch": 0.040716864842744026, |
| "grad_norm": 3.116715119287666, |
| "learning_rate": 1.995254146951633e-05, |
| "loss": 1.3727, |
| "step": 9850 |
| }, |
| { |
| "epoch": 0.0407582017613661, |
| "grad_norm": 3.9762493552410203, |
| "learning_rate": 1.9952413736631165e-05, |
| "loss": 1.4567, |
| "step": 9860 |
| }, |
| { |
| "epoch": 0.04079953867998818, |
| "grad_norm": 2.9468825909120033, |
| "learning_rate": 1.9952285832492726e-05, |
| "loss": 1.4422, |
| "step": 9870 |
| }, |
| { |
| "epoch": 0.04084087559861025, |
| "grad_norm": 3.5348361015353444, |
| "learning_rate": 1.995215775710321e-05, |
| "loss": 1.3795, |
| "step": 9880 |
| }, |
| { |
| "epoch": 0.04088221251723233, |
| "grad_norm": 3.276927230678387, |
| "learning_rate": 1.995202951046482e-05, |
| "loss": 1.4218, |
| "step": 9890 |
| }, |
| { |
| "epoch": 0.040923549435854405, |
| "grad_norm": 3.606741214717579, |
| "learning_rate": 1.9951901092579763e-05, |
| "loss": 1.4364, |
| "step": 9900 |
| }, |
| { |
| "epoch": 0.040964886354476475, |
| "grad_norm": 4.3260733895333425, |
| "learning_rate": 1.9951772503450252e-05, |
| "loss": 1.4398, |
| "step": 9910 |
| }, |
| { |
| "epoch": 0.04100622327309855, |
| "grad_norm": 3.1621905456493544, |
| "learning_rate": 1.9951643743078496e-05, |
| "loss": 1.4397, |
| "step": 9920 |
| }, |
| { |
| "epoch": 0.04104756019172063, |
| "grad_norm": 3.4412582079657623, |
| "learning_rate": 1.9951514811466713e-05, |
| "loss": 1.4036, |
| "step": 9930 |
| }, |
| { |
| "epoch": 0.041088897110342706, |
| "grad_norm": 4.873896899838307, |
| "learning_rate": 1.995138570861712e-05, |
| "loss": 1.4263, |
| "step": 9940 |
| }, |
| { |
| "epoch": 0.04113023402896478, |
| "grad_norm": 4.331814069124075, |
| "learning_rate": 1.9951256434531943e-05, |
| "loss": 1.4817, |
| "step": 9950 |
| }, |
| { |
| "epoch": 0.041171570947586854, |
| "grad_norm": 3.9349360259135926, |
| "learning_rate": 1.9951126989213398e-05, |
| "loss": 1.4483, |
| "step": 9960 |
| }, |
| { |
| "epoch": 0.04121290786620893, |
| "grad_norm": 3.3613448968668864, |
| "learning_rate": 1.995099737266372e-05, |
| "loss": 1.4229, |
| "step": 9970 |
| }, |
| { |
| "epoch": 0.041254244784831, |
| "grad_norm": 3.549433934959654, |
| "learning_rate": 1.9950867584885132e-05, |
| "loss": 1.4283, |
| "step": 9980 |
| }, |
| { |
| "epoch": 0.04129558170345308, |
| "grad_norm": 3.5652364655273208, |
| "learning_rate": 1.995073762587987e-05, |
| "loss": 1.4642, |
| "step": 9990 |
| }, |
| { |
| "epoch": 0.041336918622075156, |
| "grad_norm": 4.029695967481624, |
| "learning_rate": 1.995060749565018e-05, |
| "loss": 1.3657, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.041336918622075156, |
| "eval_loss": 1.736175537109375, |
| "eval_runtime": 393.8494, |
| "eval_samples_per_second": 10.4, |
| "eval_steps_per_second": 2.6, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.04137825554069723, |
| "grad_norm": 3.414046152937389, |
| "learning_rate": 1.9950477194198287e-05, |
| "loss": 1.3957, |
| "step": 10010 |
| }, |
| { |
| "epoch": 0.0414195924593193, |
| "grad_norm": 5.320606616740586, |
| "learning_rate": 1.9950346721526443e-05, |
| "loss": 1.4508, |
| "step": 10020 |
| }, |
| { |
| "epoch": 0.04146092937794138, |
| "grad_norm": 3.9807925522216423, |
| "learning_rate": 1.9950216077636886e-05, |
| "loss": 1.3943, |
| "step": 10030 |
| }, |
| { |
| "epoch": 0.04150226629656346, |
| "grad_norm": 3.501083066632413, |
| "learning_rate": 1.9950085262531868e-05, |
| "loss": 1.4352, |
| "step": 10040 |
| }, |
| { |
| "epoch": 0.04154360321518553, |
| "grad_norm": 3.771268569637735, |
| "learning_rate": 1.994995427621364e-05, |
| "loss": 1.452, |
| "step": 10050 |
| }, |
| { |
| "epoch": 0.041584940133807605, |
| "grad_norm": 3.8515101224909216, |
| "learning_rate": 1.9949823118684454e-05, |
| "loss": 1.4306, |
| "step": 10060 |
| }, |
| { |
| "epoch": 0.04162627705242968, |
| "grad_norm": 3.7934745333554782, |
| "learning_rate": 1.9949691789946567e-05, |
| "loss": 1.4805, |
| "step": 10070 |
| }, |
| { |
| "epoch": 0.04166761397105176, |
| "grad_norm": 3.396103842576295, |
| "learning_rate": 1.9949560290002245e-05, |
| "loss": 1.4516, |
| "step": 10080 |
| }, |
| { |
| "epoch": 0.04170895088967383, |
| "grad_norm": 3.4268415637061085, |
| "learning_rate": 1.994942861885374e-05, |
| "loss": 1.4143, |
| "step": 10090 |
| }, |
| { |
| "epoch": 0.041750287808295906, |
| "grad_norm": 3.4582505595292203, |
| "learning_rate": 1.9949296776503324e-05, |
| "loss": 1.3815, |
| "step": 10100 |
| }, |
| { |
| "epoch": 0.041791624726917984, |
| "grad_norm": 3.5395990026077304, |
| "learning_rate": 1.994916476295327e-05, |
| "loss": 1.4449, |
| "step": 10110 |
| }, |
| { |
| "epoch": 0.041832961645540054, |
| "grad_norm": 3.4229281128115403, |
| "learning_rate": 1.9949032578205834e-05, |
| "loss": 1.4526, |
| "step": 10120 |
| }, |
| { |
| "epoch": 0.04187429856416213, |
| "grad_norm": 3.983206436567361, |
| "learning_rate": 1.994890022226331e-05, |
| "loss": 1.4463, |
| "step": 10130 |
| }, |
| { |
| "epoch": 0.04191563548278421, |
| "grad_norm": 3.668734425155437, |
| "learning_rate": 1.9948767695127964e-05, |
| "loss": 1.419, |
| "step": 10140 |
| }, |
| { |
| "epoch": 0.041956972401406285, |
| "grad_norm": 3.3634372280714517, |
| "learning_rate": 1.9948634996802078e-05, |
| "loss": 1.4329, |
| "step": 10150 |
| }, |
| { |
| "epoch": 0.041998309320028356, |
| "grad_norm": 4.062775728402737, |
| "learning_rate": 1.9948502127287936e-05, |
| "loss": 1.4361, |
| "step": 10160 |
| }, |
| { |
| "epoch": 0.04203964623865043, |
| "grad_norm": 3.4149660693597084, |
| "learning_rate": 1.9948369086587823e-05, |
| "loss": 1.4725, |
| "step": 10170 |
| }, |
| { |
| "epoch": 0.04208098315727251, |
| "grad_norm": 3.6916128915527313, |
| "learning_rate": 1.9948235874704035e-05, |
| "loss": 1.4732, |
| "step": 10180 |
| }, |
| { |
| "epoch": 0.04212232007589458, |
| "grad_norm": 3.9231999868924206, |
| "learning_rate": 1.9948102491638853e-05, |
| "loss": 1.4558, |
| "step": 10190 |
| }, |
| { |
| "epoch": 0.04216365699451666, |
| "grad_norm": 4.846870150341976, |
| "learning_rate": 1.9947968937394583e-05, |
| "loss": 1.4455, |
| "step": 10200 |
| }, |
| { |
| "epoch": 0.042204993913138734, |
| "grad_norm": 3.426175390236964, |
| "learning_rate": 1.9947835211973517e-05, |
| "loss": 1.3997, |
| "step": 10210 |
| }, |
| { |
| "epoch": 0.042246330831760805, |
| "grad_norm": 3.7909997652306258, |
| "learning_rate": 1.9947701315377954e-05, |
| "loss": 1.4361, |
| "step": 10220 |
| }, |
| { |
| "epoch": 0.04228766775038288, |
| "grad_norm": 3.535939765317278, |
| "learning_rate": 1.9947567247610206e-05, |
| "loss": 1.4449, |
| "step": 10230 |
| }, |
| { |
| "epoch": 0.04232900466900496, |
| "grad_norm": 3.3731810089302523, |
| "learning_rate": 1.9947433008672572e-05, |
| "loss": 1.4193, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.042370341587627036, |
| "grad_norm": 3.9292291070435077, |
| "learning_rate": 1.9947298598567364e-05, |
| "loss": 1.4657, |
| "step": 10250 |
| }, |
| { |
| "epoch": 0.042411678506249106, |
| "grad_norm": 3.369066359531392, |
| "learning_rate": 1.99471640172969e-05, |
| "loss": 1.4509, |
| "step": 10260 |
| }, |
| { |
| "epoch": 0.042453015424871184, |
| "grad_norm": 3.6668982318612495, |
| "learning_rate": 1.994702926486349e-05, |
| "loss": 1.3931, |
| "step": 10270 |
| }, |
| { |
| "epoch": 0.04249435234349326, |
| "grad_norm": 3.2034209344506097, |
| "learning_rate": 1.9946894341269453e-05, |
| "loss": 1.4217, |
| "step": 10280 |
| }, |
| { |
| "epoch": 0.04253568926211533, |
| "grad_norm": 4.400853617662863, |
| "learning_rate": 1.9946759246517113e-05, |
| "loss": 1.4544, |
| "step": 10290 |
| }, |
| { |
| "epoch": 0.04257702618073741, |
| "grad_norm": 3.1712083272819473, |
| "learning_rate": 1.9946623980608792e-05, |
| "loss": 1.4813, |
| "step": 10300 |
| }, |
| { |
| "epoch": 0.042618363099359485, |
| "grad_norm": 3.5677581184867395, |
| "learning_rate": 1.994648854354682e-05, |
| "loss": 1.4321, |
| "step": 10310 |
| }, |
| { |
| "epoch": 0.04265970001798156, |
| "grad_norm": 3.38462741867337, |
| "learning_rate": 1.9946352935333528e-05, |
| "loss": 1.3907, |
| "step": 10320 |
| }, |
| { |
| "epoch": 0.04270103693660363, |
| "grad_norm": 3.6690520985143054, |
| "learning_rate": 1.994621715597125e-05, |
| "loss": 1.453, |
| "step": 10330 |
| }, |
| { |
| "epoch": 0.04274237385522571, |
| "grad_norm": 3.628541207308318, |
| "learning_rate": 1.9946081205462315e-05, |
| "loss": 1.4224, |
| "step": 10340 |
| }, |
| { |
| "epoch": 0.04278371077384779, |
| "grad_norm": 3.573675637942579, |
| "learning_rate": 1.994594508380907e-05, |
| "loss": 1.4409, |
| "step": 10350 |
| }, |
| { |
| "epoch": 0.04282504769246986, |
| "grad_norm": 3.9512735810988584, |
| "learning_rate": 1.9945808791013857e-05, |
| "loss": 1.4116, |
| "step": 10360 |
| }, |
| { |
| "epoch": 0.042866384611091934, |
| "grad_norm": 3.2189804332946936, |
| "learning_rate": 1.994567232707902e-05, |
| "loss": 1.4239, |
| "step": 10370 |
| }, |
| { |
| "epoch": 0.04290772152971401, |
| "grad_norm": 3.277452726822312, |
| "learning_rate": 1.9945535692006903e-05, |
| "loss": 1.419, |
| "step": 10380 |
| }, |
| { |
| "epoch": 0.04294905844833609, |
| "grad_norm": 3.2409307004738594, |
| "learning_rate": 1.994539888579986e-05, |
| "loss": 1.412, |
| "step": 10390 |
| }, |
| { |
| "epoch": 0.04299039536695816, |
| "grad_norm": 5.955091940094864, |
| "learning_rate": 1.9945261908460248e-05, |
| "loss": 1.4001, |
| "step": 10400 |
| }, |
| { |
| "epoch": 0.043031732285580236, |
| "grad_norm": 3.626590483447886, |
| "learning_rate": 1.9945124759990424e-05, |
| "loss": 1.4598, |
| "step": 10410 |
| }, |
| { |
| "epoch": 0.04307306920420231, |
| "grad_norm": 3.4065599382832197, |
| "learning_rate": 1.9944987440392742e-05, |
| "loss": 1.3991, |
| "step": 10420 |
| }, |
| { |
| "epoch": 0.043114406122824384, |
| "grad_norm": 3.3228235524159824, |
| "learning_rate": 1.994484994966957e-05, |
| "loss": 1.4069, |
| "step": 10430 |
| }, |
| { |
| "epoch": 0.04315574304144646, |
| "grad_norm": 4.069300982498486, |
| "learning_rate": 1.9944712287823275e-05, |
| "loss": 1.4376, |
| "step": 10440 |
| }, |
| { |
| "epoch": 0.04319707996006854, |
| "grad_norm": 3.668990751455877, |
| "learning_rate": 1.9944574454856216e-05, |
| "loss": 1.4185, |
| "step": 10450 |
| }, |
| { |
| "epoch": 0.043238416878690615, |
| "grad_norm": 3.189803317003545, |
| "learning_rate": 1.9944436450770775e-05, |
| "loss": 1.3998, |
| "step": 10460 |
| }, |
| { |
| "epoch": 0.043279753797312685, |
| "grad_norm": 3.7817594340150924, |
| "learning_rate": 1.9944298275569328e-05, |
| "loss": 1.4494, |
| "step": 10470 |
| }, |
| { |
| "epoch": 0.04332109071593476, |
| "grad_norm": 4.780904235096889, |
| "learning_rate": 1.9944159929254245e-05, |
| "loss": 1.4616, |
| "step": 10480 |
| }, |
| { |
| "epoch": 0.04336242763455684, |
| "grad_norm": 4.010329780152807, |
| "learning_rate": 1.9944021411827905e-05, |
| "loss": 1.4532, |
| "step": 10490 |
| }, |
| { |
| "epoch": 0.04340376455317891, |
| "grad_norm": 4.209509632753131, |
| "learning_rate": 1.9943882723292704e-05, |
| "loss": 1.4622, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.04344510147180099, |
| "grad_norm": 3.228687583673167, |
| "learning_rate": 1.9943743863651017e-05, |
| "loss": 1.4053, |
| "step": 10510 |
| }, |
| { |
| "epoch": 0.043486438390423064, |
| "grad_norm": 3.288729771085999, |
| "learning_rate": 1.994360483290523e-05, |
| "loss": 1.4192, |
| "step": 10520 |
| }, |
| { |
| "epoch": 0.043527775309045134, |
| "grad_norm": 4.41078023777337, |
| "learning_rate": 1.994346563105775e-05, |
| "loss": 1.4098, |
| "step": 10530 |
| }, |
| { |
| "epoch": 0.04356911222766721, |
| "grad_norm": 3.1815816594140487, |
| "learning_rate": 1.9943326258110963e-05, |
| "loss": 1.4676, |
| "step": 10540 |
| }, |
| { |
| "epoch": 0.04361044914628929, |
| "grad_norm": 3.554730176042178, |
| "learning_rate": 1.994318671406727e-05, |
| "loss": 1.4262, |
| "step": 10550 |
| }, |
| { |
| "epoch": 0.043651786064911366, |
| "grad_norm": 4.564103408690964, |
| "learning_rate": 1.9943046998929073e-05, |
| "loss": 1.4104, |
| "step": 10560 |
| }, |
| { |
| "epoch": 0.043693122983533436, |
| "grad_norm": 3.5454961573863994, |
| "learning_rate": 1.994290711269877e-05, |
| "loss": 1.4235, |
| "step": 10570 |
| }, |
| { |
| "epoch": 0.04373445990215551, |
| "grad_norm": 3.6248317766975857, |
| "learning_rate": 1.9942767055378775e-05, |
| "loss": 1.3733, |
| "step": 10580 |
| }, |
| { |
| "epoch": 0.04377579682077759, |
| "grad_norm": 3.2489128741687123, |
| "learning_rate": 1.9942626826971493e-05, |
| "loss": 1.4456, |
| "step": 10590 |
| }, |
| { |
| "epoch": 0.04381713373939966, |
| "grad_norm": 3.5799361135868057, |
| "learning_rate": 1.994248642747934e-05, |
| "loss": 1.4071, |
| "step": 10600 |
| }, |
| { |
| "epoch": 0.04385847065802174, |
| "grad_norm": 3.4391607635624033, |
| "learning_rate": 1.9942345856904727e-05, |
| "loss": 1.388, |
| "step": 10610 |
| }, |
| { |
| "epoch": 0.043899807576643815, |
| "grad_norm": 4.900926633402934, |
| "learning_rate": 1.994220511525008e-05, |
| "loss": 1.4214, |
| "step": 10620 |
| }, |
| { |
| "epoch": 0.04394114449526589, |
| "grad_norm": 3.1998682814537807, |
| "learning_rate": 1.994206420251782e-05, |
| "loss": 1.4392, |
| "step": 10630 |
| }, |
| { |
| "epoch": 0.04398248141388796, |
| "grad_norm": 3.512730762072939, |
| "learning_rate": 1.9941923118710366e-05, |
| "loss": 1.3833, |
| "step": 10640 |
| }, |
| { |
| "epoch": 0.04402381833251004, |
| "grad_norm": 3.5959575986075354, |
| "learning_rate": 1.9941781863830153e-05, |
| "loss": 1.4666, |
| "step": 10650 |
| }, |
| { |
| "epoch": 0.04406515525113212, |
| "grad_norm": 4.116993239444605, |
| "learning_rate": 1.9941640437879603e-05, |
| "loss": 1.417, |
| "step": 10660 |
| }, |
| { |
| "epoch": 0.04410649216975419, |
| "grad_norm": 4.587080576933717, |
| "learning_rate": 1.9941498840861153e-05, |
| "loss": 1.3558, |
| "step": 10670 |
| }, |
| { |
| "epoch": 0.044147829088376264, |
| "grad_norm": 3.923348655449712, |
| "learning_rate": 1.9941357072777245e-05, |
| "loss": 1.403, |
| "step": 10680 |
| }, |
| { |
| "epoch": 0.04418916600699834, |
| "grad_norm": 3.3131343328753884, |
| "learning_rate": 1.9941215133630312e-05, |
| "loss": 1.414, |
| "step": 10690 |
| }, |
| { |
| "epoch": 0.04423050292562042, |
| "grad_norm": 3.815180569497117, |
| "learning_rate": 1.9941073023422796e-05, |
| "loss": 1.4567, |
| "step": 10700 |
| }, |
| { |
| "epoch": 0.04427183984424249, |
| "grad_norm": 3.0191885771803264, |
| "learning_rate": 1.994093074215715e-05, |
| "loss": 1.4257, |
| "step": 10710 |
| }, |
| { |
| "epoch": 0.044313176762864566, |
| "grad_norm": 3.4376292652965494, |
| "learning_rate": 1.994078828983581e-05, |
| "loss": 1.4118, |
| "step": 10720 |
| }, |
| { |
| "epoch": 0.04435451368148664, |
| "grad_norm": 3.5106899932837643, |
| "learning_rate": 1.994064566646124e-05, |
| "loss": 1.4159, |
| "step": 10730 |
| }, |
| { |
| "epoch": 0.04439585060010871, |
| "grad_norm": 3.6846637686102413, |
| "learning_rate": 1.9940502872035888e-05, |
| "loss": 1.3948, |
| "step": 10740 |
| }, |
| { |
| "epoch": 0.04443718751873079, |
| "grad_norm": 3.657265133747329, |
| "learning_rate": 1.9940359906562207e-05, |
| "loss": 1.4087, |
| "step": 10750 |
| }, |
| { |
| "epoch": 0.04447852443735287, |
| "grad_norm": 4.430332521129557, |
| "learning_rate": 1.9940216770042666e-05, |
| "loss": 1.3989, |
| "step": 10760 |
| }, |
| { |
| "epoch": 0.044519861355974945, |
| "grad_norm": 4.43254812995105, |
| "learning_rate": 1.994007346247972e-05, |
| "loss": 1.3781, |
| "step": 10770 |
| }, |
| { |
| "epoch": 0.044561198274597015, |
| "grad_norm": 3.547905857131194, |
| "learning_rate": 1.9939929983875837e-05, |
| "loss": 1.443, |
| "step": 10780 |
| }, |
| { |
| "epoch": 0.04460253519321909, |
| "grad_norm": 4.225199610421922, |
| "learning_rate": 1.9939786334233492e-05, |
| "loss": 1.3992, |
| "step": 10790 |
| }, |
| { |
| "epoch": 0.04464387211184117, |
| "grad_norm": 3.2850031799014494, |
| "learning_rate": 1.993964251355515e-05, |
| "loss": 1.39, |
| "step": 10800 |
| }, |
| { |
| "epoch": 0.04468520903046324, |
| "grad_norm": 3.576860893151518, |
| "learning_rate": 1.993949852184329e-05, |
| "loss": 1.4019, |
| "step": 10810 |
| }, |
| { |
| "epoch": 0.04472654594908532, |
| "grad_norm": 4.14729049725031, |
| "learning_rate": 1.9939354359100385e-05, |
| "loss": 1.407, |
| "step": 10820 |
| }, |
| { |
| "epoch": 0.044767882867707394, |
| "grad_norm": 3.6785935387585447, |
| "learning_rate": 1.9939210025328915e-05, |
| "loss": 1.4188, |
| "step": 10830 |
| }, |
| { |
| "epoch": 0.044809219786329464, |
| "grad_norm": 3.475380301816819, |
| "learning_rate": 1.993906552053137e-05, |
| "loss": 1.4146, |
| "step": 10840 |
| }, |
| { |
| "epoch": 0.04485055670495154, |
| "grad_norm": 3.3679721828323217, |
| "learning_rate": 1.9938920844710235e-05, |
| "loss": 1.4208, |
| "step": 10850 |
| }, |
| { |
| "epoch": 0.04489189362357362, |
| "grad_norm": 3.679471702118622, |
| "learning_rate": 1.9938775997867995e-05, |
| "loss": 1.4209, |
| "step": 10860 |
| }, |
| { |
| "epoch": 0.044933230542195696, |
| "grad_norm": 3.8980049289176377, |
| "learning_rate": 1.9938630980007147e-05, |
| "loss": 1.4121, |
| "step": 10870 |
| }, |
| { |
| "epoch": 0.044974567460817766, |
| "grad_norm": 3.7079901840906713, |
| "learning_rate": 1.9938485791130183e-05, |
| "loss": 1.3969, |
| "step": 10880 |
| }, |
| { |
| "epoch": 0.04501590437943984, |
| "grad_norm": 3.7675855531387996, |
| "learning_rate": 1.9938340431239603e-05, |
| "loss": 1.4012, |
| "step": 10890 |
| }, |
| { |
| "epoch": 0.04505724129806192, |
| "grad_norm": 3.3894112723434127, |
| "learning_rate": 1.9938194900337908e-05, |
| "loss": 1.4184, |
| "step": 10900 |
| }, |
| { |
| "epoch": 0.04509857821668399, |
| "grad_norm": 4.1568950335530825, |
| "learning_rate": 1.9938049198427604e-05, |
| "loss": 1.452, |
| "step": 10910 |
| }, |
| { |
| "epoch": 0.04513991513530607, |
| "grad_norm": 3.630087506411177, |
| "learning_rate": 1.9937903325511193e-05, |
| "loss": 1.4657, |
| "step": 10920 |
| }, |
| { |
| "epoch": 0.045181252053928145, |
| "grad_norm": 3.510575809020148, |
| "learning_rate": 1.9937757281591187e-05, |
| "loss": 1.4341, |
| "step": 10930 |
| }, |
| { |
| "epoch": 0.04522258897255022, |
| "grad_norm": 3.309825385197255, |
| "learning_rate": 1.9937611066670106e-05, |
| "loss": 1.3789, |
| "step": 10940 |
| }, |
| { |
| "epoch": 0.04526392589117229, |
| "grad_norm": 3.239522136091904, |
| "learning_rate": 1.9937464680750454e-05, |
| "loss": 1.4103, |
| "step": 10950 |
| }, |
| { |
| "epoch": 0.04530526280979437, |
| "grad_norm": 4.673675936224972, |
| "learning_rate": 1.9937318123834762e-05, |
| "loss": 1.3989, |
| "step": 10960 |
| }, |
| { |
| "epoch": 0.04534659972841645, |
| "grad_norm": 4.627358104306948, |
| "learning_rate": 1.9937171395925544e-05, |
| "loss": 1.4203, |
| "step": 10970 |
| }, |
| { |
| "epoch": 0.04538793664703852, |
| "grad_norm": 3.311365466265083, |
| "learning_rate": 1.9937024497025325e-05, |
| "loss": 1.389, |
| "step": 10980 |
| }, |
| { |
| "epoch": 0.045429273565660594, |
| "grad_norm": 4.134318195617502, |
| "learning_rate": 1.9936877427136637e-05, |
| "loss": 1.4224, |
| "step": 10990 |
| }, |
| { |
| "epoch": 0.04547061048428267, |
| "grad_norm": 3.006959002241816, |
| "learning_rate": 1.9936730186262007e-05, |
| "loss": 1.3988, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.04551194740290475, |
| "grad_norm": 4.01529741437254, |
| "learning_rate": 1.993658277440397e-05, |
| "loss": 1.4396, |
| "step": 11010 |
| }, |
| { |
| "epoch": 0.04555328432152682, |
| "grad_norm": 3.2748525540941507, |
| "learning_rate": 1.993643519156506e-05, |
| "loss": 1.3921, |
| "step": 11020 |
| }, |
| { |
| "epoch": 0.045594621240148896, |
| "grad_norm": 4.018973443549097, |
| "learning_rate": 1.9936287437747822e-05, |
| "loss": 1.3617, |
| "step": 11030 |
| }, |
| { |
| "epoch": 0.04563595815877097, |
| "grad_norm": 3.462150874636636, |
| "learning_rate": 1.993613951295479e-05, |
| "loss": 1.4075, |
| "step": 11040 |
| }, |
| { |
| "epoch": 0.04567729507739304, |
| "grad_norm": 3.6284928503493528, |
| "learning_rate": 1.9935991417188523e-05, |
| "loss": 1.3774, |
| "step": 11050 |
| }, |
| { |
| "epoch": 0.04571863199601512, |
| "grad_norm": 4.331109900085688, |
| "learning_rate": 1.9935843150451558e-05, |
| "loss": 1.4156, |
| "step": 11060 |
| }, |
| { |
| "epoch": 0.0457599689146372, |
| "grad_norm": 3.749722584209809, |
| "learning_rate": 1.9935694712746448e-05, |
| "loss": 1.4314, |
| "step": 11070 |
| }, |
| { |
| "epoch": 0.045801305833259275, |
| "grad_norm": 3.1035349095523266, |
| "learning_rate": 1.9935546104075746e-05, |
| "loss": 1.4167, |
| "step": 11080 |
| }, |
| { |
| "epoch": 0.045842642751881345, |
| "grad_norm": 4.134657657963317, |
| "learning_rate": 1.9935397324442015e-05, |
| "loss": 1.4377, |
| "step": 11090 |
| }, |
| { |
| "epoch": 0.04588397967050342, |
| "grad_norm": 3.378268647534537, |
| "learning_rate": 1.993524837384781e-05, |
| "loss": 1.4201, |
| "step": 11100 |
| }, |
| { |
| "epoch": 0.0459253165891255, |
| "grad_norm": 3.3061200097201615, |
| "learning_rate": 1.9935099252295694e-05, |
| "loss": 1.391, |
| "step": 11110 |
| }, |
| { |
| "epoch": 0.04596665350774757, |
| "grad_norm": 3.6514603716731133, |
| "learning_rate": 1.9934949959788237e-05, |
| "loss": 1.4423, |
| "step": 11120 |
| }, |
| { |
| "epoch": 0.046007990426369647, |
| "grad_norm": 3.2198262717397896, |
| "learning_rate": 1.9934800496328006e-05, |
| "loss": 1.4049, |
| "step": 11130 |
| }, |
| { |
| "epoch": 0.046049327344991724, |
| "grad_norm": 3.1555815125987197, |
| "learning_rate": 1.993465086191757e-05, |
| "loss": 1.4418, |
| "step": 11140 |
| }, |
| { |
| "epoch": 0.0460906642636138, |
| "grad_norm": 3.6391685610732476, |
| "learning_rate": 1.993450105655951e-05, |
| "loss": 1.3824, |
| "step": 11150 |
| }, |
| { |
| "epoch": 0.04613200118223587, |
| "grad_norm": 3.426206028662225, |
| "learning_rate": 1.9934351080256395e-05, |
| "loss": 1.3837, |
| "step": 11160 |
| }, |
| { |
| "epoch": 0.04617333810085795, |
| "grad_norm": 4.616699518929945, |
| "learning_rate": 1.9934200933010816e-05, |
| "loss": 1.3886, |
| "step": 11170 |
| }, |
| { |
| "epoch": 0.046214675019480025, |
| "grad_norm": 3.919463255914606, |
| "learning_rate": 1.993405061482535e-05, |
| "loss": 1.418, |
| "step": 11180 |
| }, |
| { |
| "epoch": 0.046256011938102096, |
| "grad_norm": 4.372063175345489, |
| "learning_rate": 1.9933900125702582e-05, |
| "loss": 1.3976, |
| "step": 11190 |
| }, |
| { |
| "epoch": 0.04629734885672417, |
| "grad_norm": 4.040676043168612, |
| "learning_rate": 1.9933749465645103e-05, |
| "loss": 1.4122, |
| "step": 11200 |
| }, |
| { |
| "epoch": 0.04633868577534625, |
| "grad_norm": 3.3730911260973815, |
| "learning_rate": 1.9933598634655512e-05, |
| "loss": 1.3707, |
| "step": 11210 |
| }, |
| { |
| "epoch": 0.04638002269396832, |
| "grad_norm": 3.6851619767350066, |
| "learning_rate": 1.9933447632736393e-05, |
| "loss": 1.398, |
| "step": 11220 |
| }, |
| { |
| "epoch": 0.0464213596125904, |
| "grad_norm": 4.001189833407079, |
| "learning_rate": 1.9933296459890355e-05, |
| "loss": 1.4071, |
| "step": 11230 |
| }, |
| { |
| "epoch": 0.046462696531212475, |
| "grad_norm": 3.3898995076664757, |
| "learning_rate": 1.993314511611999e-05, |
| "loss": 1.408, |
| "step": 11240 |
| }, |
| { |
| "epoch": 0.04650403344983455, |
| "grad_norm": 3.6996997277102146, |
| "learning_rate": 1.9932993601427912e-05, |
| "loss": 1.3975, |
| "step": 11250 |
| }, |
| { |
| "epoch": 0.04654537036845662, |
| "grad_norm": 3.313365690401916, |
| "learning_rate": 1.993284191581672e-05, |
| "loss": 1.408, |
| "step": 11260 |
| }, |
| { |
| "epoch": 0.0465867072870787, |
| "grad_norm": 4.633876867393197, |
| "learning_rate": 1.993269005928903e-05, |
| "loss": 1.396, |
| "step": 11270 |
| }, |
| { |
| "epoch": 0.046628044205700776, |
| "grad_norm": 3.5037620526852304, |
| "learning_rate": 1.993253803184745e-05, |
| "loss": 1.4024, |
| "step": 11280 |
| }, |
| { |
| "epoch": 0.046669381124322847, |
| "grad_norm": 3.081503642322238, |
| "learning_rate": 1.9932385833494597e-05, |
| "loss": 1.4109, |
| "step": 11290 |
| }, |
| { |
| "epoch": 0.046710718042944924, |
| "grad_norm": 3.547058360569091, |
| "learning_rate": 1.9932233464233092e-05, |
| "loss": 1.3796, |
| "step": 11300 |
| }, |
| { |
| "epoch": 0.046752054961567, |
| "grad_norm": 3.814887745242394, |
| "learning_rate": 1.9932080924065556e-05, |
| "loss": 1.4401, |
| "step": 11310 |
| }, |
| { |
| "epoch": 0.04679339188018908, |
| "grad_norm": 3.5004316252867085, |
| "learning_rate": 1.993192821299461e-05, |
| "loss": 1.452, |
| "step": 11320 |
| }, |
| { |
| "epoch": 0.04683472879881115, |
| "grad_norm": 3.449228750426351, |
| "learning_rate": 1.993177533102289e-05, |
| "loss": 1.3734, |
| "step": 11330 |
| }, |
| { |
| "epoch": 0.046876065717433225, |
| "grad_norm": 3.2964308484381784, |
| "learning_rate": 1.9931622278153024e-05, |
| "loss": 1.4018, |
| "step": 11340 |
| }, |
| { |
| "epoch": 0.0469174026360553, |
| "grad_norm": 2.8180078039607697, |
| "learning_rate": 1.993146905438764e-05, |
| "loss": 1.4081, |
| "step": 11350 |
| }, |
| { |
| "epoch": 0.04695873955467737, |
| "grad_norm": 3.2236402061866545, |
| "learning_rate": 1.9931315659729376e-05, |
| "loss": 1.4534, |
| "step": 11360 |
| }, |
| { |
| "epoch": 0.04700007647329945, |
| "grad_norm": 3.5082002531342473, |
| "learning_rate": 1.9931162094180874e-05, |
| "loss": 1.4173, |
| "step": 11370 |
| }, |
| { |
| "epoch": 0.04704141339192153, |
| "grad_norm": 3.284436648082263, |
| "learning_rate": 1.993100835774478e-05, |
| "loss": 1.4092, |
| "step": 11380 |
| }, |
| { |
| "epoch": 0.047082750310543604, |
| "grad_norm": 3.4816460403688314, |
| "learning_rate": 1.9930854450423736e-05, |
| "loss": 1.3913, |
| "step": 11390 |
| }, |
| { |
| "epoch": 0.047124087229165675, |
| "grad_norm": 4.14476906280773, |
| "learning_rate": 1.9930700372220387e-05, |
| "loss": 1.3703, |
| "step": 11400 |
| }, |
| { |
| "epoch": 0.04716542414778775, |
| "grad_norm": 4.375945137034217, |
| "learning_rate": 1.993054612313739e-05, |
| "loss": 1.4362, |
| "step": 11410 |
| }, |
| { |
| "epoch": 0.04720676106640983, |
| "grad_norm": 4.277168672236525, |
| "learning_rate": 1.993039170317739e-05, |
| "loss": 1.479, |
| "step": 11420 |
| }, |
| { |
| "epoch": 0.0472480979850319, |
| "grad_norm": 3.927053905964369, |
| "learning_rate": 1.9930237112343056e-05, |
| "loss": 1.3872, |
| "step": 11430 |
| }, |
| { |
| "epoch": 0.047289434903653976, |
| "grad_norm": 3.607150421833661, |
| "learning_rate": 1.9930082350637042e-05, |
| "loss": 1.3891, |
| "step": 11440 |
| }, |
| { |
| "epoch": 0.04733077182227605, |
| "grad_norm": 3.338415521714108, |
| "learning_rate": 1.992992741806201e-05, |
| "loss": 1.4211, |
| "step": 11450 |
| }, |
| { |
| "epoch": 0.04737210874089813, |
| "grad_norm": 3.629104841971202, |
| "learning_rate": 1.9929772314620627e-05, |
| "loss": 1.3425, |
| "step": 11460 |
| }, |
| { |
| "epoch": 0.0474134456595202, |
| "grad_norm": 3.099599716044088, |
| "learning_rate": 1.9929617040315563e-05, |
| "loss": 1.382, |
| "step": 11470 |
| }, |
| { |
| "epoch": 0.04745478257814228, |
| "grad_norm": 3.7158273713517893, |
| "learning_rate": 1.992946159514949e-05, |
| "loss": 1.4361, |
| "step": 11480 |
| }, |
| { |
| "epoch": 0.047496119496764355, |
| "grad_norm": 4.389528378421289, |
| "learning_rate": 1.992930597912508e-05, |
| "loss": 1.4435, |
| "step": 11490 |
| }, |
| { |
| "epoch": 0.047537456415386425, |
| "grad_norm": 3.669631509338215, |
| "learning_rate": 1.9929150192245016e-05, |
| "loss": 1.4321, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.0475787933340085, |
| "grad_norm": 3.4995914344254624, |
| "learning_rate": 1.992899423451197e-05, |
| "loss": 1.446, |
| "step": 11510 |
| }, |
| { |
| "epoch": 0.04762013025263058, |
| "grad_norm": 3.3809354325443017, |
| "learning_rate": 1.9928838105928635e-05, |
| "loss": 1.3941, |
| "step": 11520 |
| }, |
| { |
| "epoch": 0.04766146717125265, |
| "grad_norm": 3.1788524195701684, |
| "learning_rate": 1.9928681806497693e-05, |
| "loss": 1.4027, |
| "step": 11530 |
| }, |
| { |
| "epoch": 0.04770280408987473, |
| "grad_norm": 3.368111520244943, |
| "learning_rate": 1.9928525336221837e-05, |
| "loss": 1.4038, |
| "step": 11540 |
| }, |
| { |
| "epoch": 0.047744141008496804, |
| "grad_norm": 3.6045389016529636, |
| "learning_rate": 1.992836869510375e-05, |
| "loss": 1.4523, |
| "step": 11550 |
| }, |
| { |
| "epoch": 0.04778547792711888, |
| "grad_norm": 3.859885628963866, |
| "learning_rate": 1.9928211883146136e-05, |
| "loss": 1.4307, |
| "step": 11560 |
| }, |
| { |
| "epoch": 0.04782681484574095, |
| "grad_norm": 3.2925025525674756, |
| "learning_rate": 1.9928054900351693e-05, |
| "loss": 1.4473, |
| "step": 11570 |
| }, |
| { |
| "epoch": 0.04786815176436303, |
| "grad_norm": 4.075772971569745, |
| "learning_rate": 1.992789774672312e-05, |
| "loss": 1.4384, |
| "step": 11580 |
| }, |
| { |
| "epoch": 0.047909488682985106, |
| "grad_norm": 3.391288577095074, |
| "learning_rate": 1.9927740422263117e-05, |
| "loss": 1.4038, |
| "step": 11590 |
| }, |
| { |
| "epoch": 0.047950825601607176, |
| "grad_norm": 3.4783586083297626, |
| "learning_rate": 1.9927582926974402e-05, |
| "loss": 1.3911, |
| "step": 11600 |
| }, |
| { |
| "epoch": 0.04799216252022925, |
| "grad_norm": 3.468149363696469, |
| "learning_rate": 1.9927425260859673e-05, |
| "loss": 1.4123, |
| "step": 11610 |
| }, |
| { |
| "epoch": 0.04803349943885133, |
| "grad_norm": 3.464512963986536, |
| "learning_rate": 1.992726742392165e-05, |
| "loss": 1.3973, |
| "step": 11620 |
| }, |
| { |
| "epoch": 0.04807483635747341, |
| "grad_norm": 4.061746689943389, |
| "learning_rate": 1.992710941616305e-05, |
| "loss": 1.3841, |
| "step": 11630 |
| }, |
| { |
| "epoch": 0.04811617327609548, |
| "grad_norm": 4.139608234706919, |
| "learning_rate": 1.992695123758659e-05, |
| "loss": 1.3787, |
| "step": 11640 |
| }, |
| { |
| "epoch": 0.048157510194717555, |
| "grad_norm": 3.8447070353873025, |
| "learning_rate": 1.992679288819499e-05, |
| "loss": 1.3815, |
| "step": 11650 |
| }, |
| { |
| "epoch": 0.04819884711333963, |
| "grad_norm": 3.6946628562082693, |
| "learning_rate": 1.9926634367990973e-05, |
| "loss": 1.3788, |
| "step": 11660 |
| }, |
| { |
| "epoch": 0.0482401840319617, |
| "grad_norm": 3.3618354267056465, |
| "learning_rate": 1.992647567697727e-05, |
| "loss": 1.4063, |
| "step": 11670 |
| }, |
| { |
| "epoch": 0.04828152095058378, |
| "grad_norm": 3.495643041214259, |
| "learning_rate": 1.9926316815156617e-05, |
| "loss": 1.4348, |
| "step": 11680 |
| }, |
| { |
| "epoch": 0.04832285786920586, |
| "grad_norm": 3.552951920155812, |
| "learning_rate": 1.9926157782531735e-05, |
| "loss": 1.3604, |
| "step": 11690 |
| }, |
| { |
| "epoch": 0.048364194787827934, |
| "grad_norm": 3.2600229354667025, |
| "learning_rate": 1.9925998579105374e-05, |
| "loss": 1.3395, |
| "step": 11700 |
| }, |
| { |
| "epoch": 0.048405531706450004, |
| "grad_norm": 3.4189972893062635, |
| "learning_rate": 1.9925839204880263e-05, |
| "loss": 1.4291, |
| "step": 11710 |
| }, |
| { |
| "epoch": 0.04844686862507208, |
| "grad_norm": 3.2696931259943494, |
| "learning_rate": 1.9925679659859148e-05, |
| "loss": 1.3748, |
| "step": 11720 |
| }, |
| { |
| "epoch": 0.04848820554369416, |
| "grad_norm": 3.400522141333647, |
| "learning_rate": 1.9925519944044772e-05, |
| "loss": 1.4141, |
| "step": 11730 |
| }, |
| { |
| "epoch": 0.04852954246231623, |
| "grad_norm": 3.347216625916271, |
| "learning_rate": 1.9925360057439887e-05, |
| "loss": 1.4062, |
| "step": 11740 |
| }, |
| { |
| "epoch": 0.048570879380938306, |
| "grad_norm": 3.55815106093295, |
| "learning_rate": 1.9925200000047248e-05, |
| "loss": 1.4056, |
| "step": 11750 |
| }, |
| { |
| "epoch": 0.04861221629956038, |
| "grad_norm": 4.75921473200291, |
| "learning_rate": 1.99250397718696e-05, |
| "loss": 1.418, |
| "step": 11760 |
| }, |
| { |
| "epoch": 0.04865355321818246, |
| "grad_norm": 3.2697324056422588, |
| "learning_rate": 1.9924879372909703e-05, |
| "loss": 1.4015, |
| "step": 11770 |
| }, |
| { |
| "epoch": 0.04869489013680453, |
| "grad_norm": 3.590517133814017, |
| "learning_rate": 1.9924718803170324e-05, |
| "loss": 1.3738, |
| "step": 11780 |
| }, |
| { |
| "epoch": 0.04873622705542661, |
| "grad_norm": 3.579850493829701, |
| "learning_rate": 1.9924558062654215e-05, |
| "loss": 1.334, |
| "step": 11790 |
| }, |
| { |
| "epoch": 0.048777563974048685, |
| "grad_norm": 3.552170187760315, |
| "learning_rate": 1.9924397151364148e-05, |
| "loss": 1.4169, |
| "step": 11800 |
| }, |
| { |
| "epoch": 0.048818900892670755, |
| "grad_norm": 3.0842304871945037, |
| "learning_rate": 1.992423606930289e-05, |
| "loss": 1.3906, |
| "step": 11810 |
| }, |
| { |
| "epoch": 0.04886023781129283, |
| "grad_norm": 3.379537547677814, |
| "learning_rate": 1.9924074816473215e-05, |
| "loss": 1.4351, |
| "step": 11820 |
| }, |
| { |
| "epoch": 0.04890157472991491, |
| "grad_norm": 3.53218922925584, |
| "learning_rate": 1.9923913392877896e-05, |
| "loss": 1.4032, |
| "step": 11830 |
| }, |
| { |
| "epoch": 0.04894291164853698, |
| "grad_norm": 5.016060492627983, |
| "learning_rate": 1.992375179851971e-05, |
| "loss": 1.3918, |
| "step": 11840 |
| }, |
| { |
| "epoch": 0.04898424856715906, |
| "grad_norm": 3.540388411237351, |
| "learning_rate": 1.9923590033401443e-05, |
| "loss": 1.4196, |
| "step": 11850 |
| }, |
| { |
| "epoch": 0.049025585485781134, |
| "grad_norm": 3.3104883369223765, |
| "learning_rate": 1.9923428097525872e-05, |
| "loss": 1.4141, |
| "step": 11860 |
| }, |
| { |
| "epoch": 0.04906692240440321, |
| "grad_norm": 3.580413781862322, |
| "learning_rate": 1.9923265990895785e-05, |
| "loss": 1.4291, |
| "step": 11870 |
| }, |
| { |
| "epoch": 0.04910825932302528, |
| "grad_norm": 4.057142008160253, |
| "learning_rate": 1.9923103713513972e-05, |
| "loss": 1.4193, |
| "step": 11880 |
| }, |
| { |
| "epoch": 0.04914959624164736, |
| "grad_norm": 3.157870137031843, |
| "learning_rate": 1.9922941265383226e-05, |
| "loss": 1.3949, |
| "step": 11890 |
| }, |
| { |
| "epoch": 0.049190933160269436, |
| "grad_norm": 3.7453792892755255, |
| "learning_rate": 1.992277864650634e-05, |
| "loss": 1.373, |
| "step": 11900 |
| }, |
| { |
| "epoch": 0.049232270078891506, |
| "grad_norm": 4.17858858176331, |
| "learning_rate": 1.992261585688611e-05, |
| "loss": 1.3732, |
| "step": 11910 |
| }, |
| { |
| "epoch": 0.04927360699751358, |
| "grad_norm": 3.3040489276601157, |
| "learning_rate": 1.992245289652535e-05, |
| "loss": 1.373, |
| "step": 11920 |
| }, |
| { |
| "epoch": 0.04931494391613566, |
| "grad_norm": 3.0410499391716117, |
| "learning_rate": 1.992228976542685e-05, |
| "loss": 1.3839, |
| "step": 11930 |
| }, |
| { |
| "epoch": 0.04935628083475774, |
| "grad_norm": 3.541114095436553, |
| "learning_rate": 1.9922126463593422e-05, |
| "loss": 1.4006, |
| "step": 11940 |
| }, |
| { |
| "epoch": 0.04939761775337981, |
| "grad_norm": 3.9811902872742673, |
| "learning_rate": 1.992196299102788e-05, |
| "loss": 1.3546, |
| "step": 11950 |
| }, |
| { |
| "epoch": 0.049438954672001885, |
| "grad_norm": 3.1780875587126705, |
| "learning_rate": 1.9921799347733026e-05, |
| "loss": 1.3693, |
| "step": 11960 |
| }, |
| { |
| "epoch": 0.04948029159062396, |
| "grad_norm": 3.5586898768297415, |
| "learning_rate": 1.9921635533711687e-05, |
| "loss": 1.4215, |
| "step": 11970 |
| }, |
| { |
| "epoch": 0.04952162850924603, |
| "grad_norm": 2.8477978920610565, |
| "learning_rate": 1.9921471548966678e-05, |
| "loss": 1.4256, |
| "step": 11980 |
| }, |
| { |
| "epoch": 0.04956296542786811, |
| "grad_norm": 3.5714848436239275, |
| "learning_rate": 1.9921307393500822e-05, |
| "loss": 1.4358, |
| "step": 11990 |
| }, |
| { |
| "epoch": 0.04960430234649019, |
| "grad_norm": 3.746696171382991, |
| "learning_rate": 1.992114306731694e-05, |
| "loss": 1.4556, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.049645639265112264, |
| "grad_norm": 3.6562155551861353, |
| "learning_rate": 1.992097857041786e-05, |
| "loss": 1.404, |
| "step": 12010 |
| }, |
| { |
| "epoch": 0.049686976183734334, |
| "grad_norm": 3.2699664906016803, |
| "learning_rate": 1.9920813902806414e-05, |
| "loss": 1.3946, |
| "step": 12020 |
| }, |
| { |
| "epoch": 0.04972831310235641, |
| "grad_norm": 4.034724844068233, |
| "learning_rate": 1.992064906448544e-05, |
| "loss": 1.4321, |
| "step": 12030 |
| }, |
| { |
| "epoch": 0.04976965002097849, |
| "grad_norm": 3.4822925081412497, |
| "learning_rate": 1.9920484055457767e-05, |
| "loss": 1.4105, |
| "step": 12040 |
| }, |
| { |
| "epoch": 0.04981098693960056, |
| "grad_norm": 3.587853195823534, |
| "learning_rate": 1.9920318875726238e-05, |
| "loss": 1.3697, |
| "step": 12050 |
| }, |
| { |
| "epoch": 0.049852323858222636, |
| "grad_norm": 3.205239993732016, |
| "learning_rate": 1.9920153525293694e-05, |
| "loss": 1.3979, |
| "step": 12060 |
| }, |
| { |
| "epoch": 0.04989366077684471, |
| "grad_norm": 3.388133162074283, |
| "learning_rate": 1.991998800416298e-05, |
| "loss": 1.4023, |
| "step": 12070 |
| }, |
| { |
| "epoch": 0.04993499769546679, |
| "grad_norm": 4.2719394165918985, |
| "learning_rate": 1.9919822312336947e-05, |
| "loss": 1.3956, |
| "step": 12080 |
| }, |
| { |
| "epoch": 0.04997633461408886, |
| "grad_norm": 3.234674011512212, |
| "learning_rate": 1.9919656449818444e-05, |
| "loss": 1.4101, |
| "step": 12090 |
| }, |
| { |
| "epoch": 0.05001767153271094, |
| "grad_norm": 3.4860946302341107, |
| "learning_rate": 1.9919490416610327e-05, |
| "loss": 1.3802, |
| "step": 12100 |
| }, |
| { |
| "epoch": 0.050059008451333015, |
| "grad_norm": 3.3542639404969483, |
| "learning_rate": 1.9919324212715448e-05, |
| "loss": 1.3865, |
| "step": 12110 |
| }, |
| { |
| "epoch": 0.050100345369955085, |
| "grad_norm": 3.7877647269232293, |
| "learning_rate": 1.9919157838136668e-05, |
| "loss": 1.4198, |
| "step": 12120 |
| }, |
| { |
| "epoch": 0.05014168228857716, |
| "grad_norm": 3.6768245303830214, |
| "learning_rate": 1.9918991292876857e-05, |
| "loss": 1.392, |
| "step": 12130 |
| }, |
| { |
| "epoch": 0.05018301920719924, |
| "grad_norm": 3.0504339083555054, |
| "learning_rate": 1.9918824576938872e-05, |
| "loss": 1.3943, |
| "step": 12140 |
| }, |
| { |
| "epoch": 0.05022435612582131, |
| "grad_norm": 4.003605772511745, |
| "learning_rate": 1.9918657690325586e-05, |
| "loss": 1.3627, |
| "step": 12150 |
| }, |
| { |
| "epoch": 0.05026569304444339, |
| "grad_norm": 4.04766902184383, |
| "learning_rate": 1.9918490633039873e-05, |
| "loss": 1.3867, |
| "step": 12160 |
| }, |
| { |
| "epoch": 0.050307029963065464, |
| "grad_norm": 4.054619230075598, |
| "learning_rate": 1.99183234050846e-05, |
| "loss": 1.3558, |
| "step": 12170 |
| }, |
| { |
| "epoch": 0.05034836688168754, |
| "grad_norm": 3.3575853249481873, |
| "learning_rate": 1.9918156006462653e-05, |
| "loss": 1.3863, |
| "step": 12180 |
| }, |
| { |
| "epoch": 0.05038970380030961, |
| "grad_norm": 3.5267112119515582, |
| "learning_rate": 1.9917988437176908e-05, |
| "loss": 1.3705, |
| "step": 12190 |
| }, |
| { |
| "epoch": 0.05043104071893169, |
| "grad_norm": 3.528333383483439, |
| "learning_rate": 1.9917820697230247e-05, |
| "loss": 1.4441, |
| "step": 12200 |
| }, |
| { |
| "epoch": 0.050472377637553766, |
| "grad_norm": 3.233459986557015, |
| "learning_rate": 1.991765278662556e-05, |
| "loss": 1.3532, |
| "step": 12210 |
| }, |
| { |
| "epoch": 0.050513714556175836, |
| "grad_norm": 3.4949233408933034, |
| "learning_rate": 1.991748470536573e-05, |
| "loss": 1.3658, |
| "step": 12220 |
| }, |
| { |
| "epoch": 0.05055505147479791, |
| "grad_norm": 3.194405798297394, |
| "learning_rate": 1.9917316453453657e-05, |
| "loss": 1.397, |
| "step": 12230 |
| }, |
| { |
| "epoch": 0.05059638839341999, |
| "grad_norm": 3.6606193297707046, |
| "learning_rate": 1.9917148030892238e-05, |
| "loss": 1.4072, |
| "step": 12240 |
| }, |
| { |
| "epoch": 0.05063772531204207, |
| "grad_norm": 4.069430004373759, |
| "learning_rate": 1.9916979437684362e-05, |
| "loss": 1.4136, |
| "step": 12250 |
| }, |
| { |
| "epoch": 0.05067906223066414, |
| "grad_norm": 3.1603885636318956, |
| "learning_rate": 1.991681067383293e-05, |
| "loss": 1.4097, |
| "step": 12260 |
| }, |
| { |
| "epoch": 0.050720399149286215, |
| "grad_norm": 3.177326748991991, |
| "learning_rate": 1.9916641739340857e-05, |
| "loss": 1.4195, |
| "step": 12270 |
| }, |
| { |
| "epoch": 0.05076173606790829, |
| "grad_norm": 3.918139727949541, |
| "learning_rate": 1.991647263421104e-05, |
| "loss": 1.3876, |
| "step": 12280 |
| }, |
| { |
| "epoch": 0.05080307298653036, |
| "grad_norm": 3.9573038369404427, |
| "learning_rate": 1.9916303358446392e-05, |
| "loss": 1.3683, |
| "step": 12290 |
| }, |
| { |
| "epoch": 0.05084440990515244, |
| "grad_norm": 4.275806482199394, |
| "learning_rate": 1.9916133912049825e-05, |
| "loss": 1.4204, |
| "step": 12300 |
| }, |
| { |
| "epoch": 0.050885746823774516, |
| "grad_norm": 4.847335915383492, |
| "learning_rate": 1.9915964295024254e-05, |
| "loss": 1.4034, |
| "step": 12310 |
| }, |
| { |
| "epoch": 0.050927083742396594, |
| "grad_norm": 3.273054866007932, |
| "learning_rate": 1.99157945073726e-05, |
| "loss": 1.3968, |
| "step": 12320 |
| }, |
| { |
| "epoch": 0.050968420661018664, |
| "grad_norm": 3.7167200760785173, |
| "learning_rate": 1.9915624549097784e-05, |
| "loss": 1.3999, |
| "step": 12330 |
| }, |
| { |
| "epoch": 0.05100975757964074, |
| "grad_norm": 3.4590376733691337, |
| "learning_rate": 1.991545442020273e-05, |
| "loss": 1.4122, |
| "step": 12340 |
| }, |
| { |
| "epoch": 0.05105109449826282, |
| "grad_norm": 3.1847142656240255, |
| "learning_rate": 1.9915284120690362e-05, |
| "loss": 1.4239, |
| "step": 12350 |
| }, |
| { |
| "epoch": 0.05109243141688489, |
| "grad_norm": 3.3467985786610353, |
| "learning_rate": 1.991511365056362e-05, |
| "loss": 1.3844, |
| "step": 12360 |
| }, |
| { |
| "epoch": 0.051133768335506966, |
| "grad_norm": 4.094929119314472, |
| "learning_rate": 1.9914943009825425e-05, |
| "loss": 1.3577, |
| "step": 12370 |
| }, |
| { |
| "epoch": 0.05117510525412904, |
| "grad_norm": 3.521925167773648, |
| "learning_rate": 1.9914772198478723e-05, |
| "loss": 1.3954, |
| "step": 12380 |
| }, |
| { |
| "epoch": 0.05121644217275112, |
| "grad_norm": 3.3998170027309067, |
| "learning_rate": 1.9914601216526446e-05, |
| "loss": 1.4102, |
| "step": 12390 |
| }, |
| { |
| "epoch": 0.05125777909137319, |
| "grad_norm": 3.6457154728502035, |
| "learning_rate": 1.9914430063971542e-05, |
| "loss": 1.3666, |
| "step": 12400 |
| }, |
| { |
| "epoch": 0.05129911600999527, |
| "grad_norm": 3.133616222950282, |
| "learning_rate": 1.9914258740816956e-05, |
| "loss": 1.4071, |
| "step": 12410 |
| }, |
| { |
| "epoch": 0.051340452928617344, |
| "grad_norm": 3.5165895133634133, |
| "learning_rate": 1.9914087247065634e-05, |
| "loss": 1.4127, |
| "step": 12420 |
| }, |
| { |
| "epoch": 0.051381789847239415, |
| "grad_norm": 3.1317185826456795, |
| "learning_rate": 1.991391558272052e-05, |
| "loss": 1.3609, |
| "step": 12430 |
| }, |
| { |
| "epoch": 0.05142312676586149, |
| "grad_norm": 3.7493626264431605, |
| "learning_rate": 1.991374374778458e-05, |
| "loss": 1.3832, |
| "step": 12440 |
| }, |
| { |
| "epoch": 0.05146446368448357, |
| "grad_norm": 3.5662863496670196, |
| "learning_rate": 1.991357174226076e-05, |
| "loss": 1.3665, |
| "step": 12450 |
| }, |
| { |
| "epoch": 0.05150580060310564, |
| "grad_norm": 3.1448919796113035, |
| "learning_rate": 1.9913399566152033e-05, |
| "loss": 1.3965, |
| "step": 12460 |
| }, |
| { |
| "epoch": 0.051547137521727716, |
| "grad_norm": 3.3154750106704625, |
| "learning_rate": 1.991322721946135e-05, |
| "loss": 1.3873, |
| "step": 12470 |
| }, |
| { |
| "epoch": 0.051588474440349794, |
| "grad_norm": 3.688390777094577, |
| "learning_rate": 1.991305470219168e-05, |
| "loss": 1.3224, |
| "step": 12480 |
| }, |
| { |
| "epoch": 0.05162981135897187, |
| "grad_norm": 3.0270982188671907, |
| "learning_rate": 1.9912882014345988e-05, |
| "loss": 1.3551, |
| "step": 12490 |
| }, |
| { |
| "epoch": 0.05167114827759394, |
| "grad_norm": 3.7441081256974136, |
| "learning_rate": 1.9912709155927254e-05, |
| "loss": 1.3945, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.05171248519621602, |
| "grad_norm": 3.697861466581755, |
| "learning_rate": 1.9912536126938446e-05, |
| "loss": 1.3612, |
| "step": 12510 |
| }, |
| { |
| "epoch": 0.051753822114838095, |
| "grad_norm": 3.523721321551833, |
| "learning_rate": 1.9912362927382546e-05, |
| "loss": 1.3747, |
| "step": 12520 |
| }, |
| { |
| "epoch": 0.051795159033460166, |
| "grad_norm": 3.0061067921910727, |
| "learning_rate": 1.9912189557262528e-05, |
| "loss": 1.4086, |
| "step": 12530 |
| }, |
| { |
| "epoch": 0.05183649595208224, |
| "grad_norm": 4.0409382024057425, |
| "learning_rate": 1.991201601658138e-05, |
| "loss": 1.3849, |
| "step": 12540 |
| }, |
| { |
| "epoch": 0.05187783287070432, |
| "grad_norm": 3.7066972530256983, |
| "learning_rate": 1.9911842305342085e-05, |
| "loss": 1.3775, |
| "step": 12550 |
| }, |
| { |
| "epoch": 0.0519191697893264, |
| "grad_norm": 3.8641850919990737, |
| "learning_rate": 1.9911668423547635e-05, |
| "loss": 1.4056, |
| "step": 12560 |
| }, |
| { |
| "epoch": 0.05196050670794847, |
| "grad_norm": 2.8845765281029325, |
| "learning_rate": 1.9911494371201023e-05, |
| "loss": 1.3433, |
| "step": 12570 |
| }, |
| { |
| "epoch": 0.052001843626570544, |
| "grad_norm": 2.9182840148796996, |
| "learning_rate": 1.9911320148305235e-05, |
| "loss": 1.4146, |
| "step": 12580 |
| }, |
| { |
| "epoch": 0.05204318054519262, |
| "grad_norm": 3.471309021112678, |
| "learning_rate": 1.991114575486328e-05, |
| "loss": 1.3769, |
| "step": 12590 |
| }, |
| { |
| "epoch": 0.05208451746381469, |
| "grad_norm": 3.132099407416561, |
| "learning_rate": 1.9910971190878157e-05, |
| "loss": 1.4006, |
| "step": 12600 |
| }, |
| { |
| "epoch": 0.05212585438243677, |
| "grad_norm": 3.5078473659046554, |
| "learning_rate": 1.9910796456352863e-05, |
| "loss": 1.3608, |
| "step": 12610 |
| }, |
| { |
| "epoch": 0.052167191301058846, |
| "grad_norm": 3.420611210950219, |
| "learning_rate": 1.991062155129041e-05, |
| "loss": 1.3477, |
| "step": 12620 |
| }, |
| { |
| "epoch": 0.05220852821968092, |
| "grad_norm": 3.3602682043236425, |
| "learning_rate": 1.991044647569381e-05, |
| "loss": 1.3821, |
| "step": 12630 |
| }, |
| { |
| "epoch": 0.052249865138302994, |
| "grad_norm": 3.3934199487204326, |
| "learning_rate": 1.9910271229566067e-05, |
| "loss": 1.3672, |
| "step": 12640 |
| }, |
| { |
| "epoch": 0.05229120205692507, |
| "grad_norm": 3.3930743766477636, |
| "learning_rate": 1.9910095812910205e-05, |
| "loss": 1.3805, |
| "step": 12650 |
| }, |
| { |
| "epoch": 0.05233253897554715, |
| "grad_norm": 3.803471056919821, |
| "learning_rate": 1.9909920225729237e-05, |
| "loss": 1.357, |
| "step": 12660 |
| }, |
| { |
| "epoch": 0.05237387589416922, |
| "grad_norm": 2.9908606514422766, |
| "learning_rate": 1.990974446802619e-05, |
| "loss": 1.4148, |
| "step": 12670 |
| }, |
| { |
| "epoch": 0.052415212812791295, |
| "grad_norm": 3.472506553773665, |
| "learning_rate": 1.990956853980408e-05, |
| "loss": 1.4119, |
| "step": 12680 |
| }, |
| { |
| "epoch": 0.05245654973141337, |
| "grad_norm": 3.652498665098648, |
| "learning_rate": 1.9909392441065944e-05, |
| "loss": 1.3896, |
| "step": 12690 |
| }, |
| { |
| "epoch": 0.05249788665003545, |
| "grad_norm": 3.556198285804122, |
| "learning_rate": 1.9909216171814802e-05, |
| "loss": 1.3556, |
| "step": 12700 |
| }, |
| { |
| "epoch": 0.05253922356865752, |
| "grad_norm": 3.3341578506950187, |
| "learning_rate": 1.9909039732053695e-05, |
| "loss": 1.3875, |
| "step": 12710 |
| }, |
| { |
| "epoch": 0.0525805604872796, |
| "grad_norm": 3.623737574209396, |
| "learning_rate": 1.9908863121785656e-05, |
| "loss": 1.3699, |
| "step": 12720 |
| }, |
| { |
| "epoch": 0.052621897405901674, |
| "grad_norm": 3.068120426816953, |
| "learning_rate": 1.9908686341013723e-05, |
| "loss": 1.3504, |
| "step": 12730 |
| }, |
| { |
| "epoch": 0.052663234324523744, |
| "grad_norm": 3.5988757581859643, |
| "learning_rate": 1.990850938974094e-05, |
| "loss": 1.3506, |
| "step": 12740 |
| }, |
| { |
| "epoch": 0.05270457124314582, |
| "grad_norm": 3.4850198824984724, |
| "learning_rate": 1.990833226797035e-05, |
| "loss": 1.3949, |
| "step": 12750 |
| }, |
| { |
| "epoch": 0.0527459081617679, |
| "grad_norm": 3.3573178296822834, |
| "learning_rate": 1.9908154975705e-05, |
| "loss": 1.3766, |
| "step": 12760 |
| }, |
| { |
| "epoch": 0.052787245080389976, |
| "grad_norm": 3.5288003708700186, |
| "learning_rate": 1.990797751294795e-05, |
| "loss": 1.3915, |
| "step": 12770 |
| }, |
| { |
| "epoch": 0.052828581999012046, |
| "grad_norm": 3.0065181585529794, |
| "learning_rate": 1.990779987970224e-05, |
| "loss": 1.3943, |
| "step": 12780 |
| }, |
| { |
| "epoch": 0.05286991891763412, |
| "grad_norm": 3.8902210517557787, |
| "learning_rate": 1.9907622075970933e-05, |
| "loss": 1.4339, |
| "step": 12790 |
| }, |
| { |
| "epoch": 0.0529112558362562, |
| "grad_norm": 3.0754498963080317, |
| "learning_rate": 1.990744410175709e-05, |
| "loss": 1.3633, |
| "step": 12800 |
| }, |
| { |
| "epoch": 0.05295259275487827, |
| "grad_norm": 3.373819633563616, |
| "learning_rate": 1.990726595706377e-05, |
| "loss": 1.3729, |
| "step": 12810 |
| }, |
| { |
| "epoch": 0.05299392967350035, |
| "grad_norm": 3.2413750593238277, |
| "learning_rate": 1.990708764189404e-05, |
| "loss": 1.3611, |
| "step": 12820 |
| }, |
| { |
| "epoch": 0.053035266592122425, |
| "grad_norm": 3.3175842583387287, |
| "learning_rate": 1.990690915625097e-05, |
| "loss": 1.4386, |
| "step": 12830 |
| }, |
| { |
| "epoch": 0.053076603510744495, |
| "grad_norm": 4.421464949416987, |
| "learning_rate": 1.9906730500137626e-05, |
| "loss": 1.3825, |
| "step": 12840 |
| }, |
| { |
| "epoch": 0.05311794042936657, |
| "grad_norm": 3.7375473757828312, |
| "learning_rate": 1.9906551673557092e-05, |
| "loss": 1.3584, |
| "step": 12850 |
| }, |
| { |
| "epoch": 0.05315927734798865, |
| "grad_norm": 4.23504822699641, |
| "learning_rate": 1.9906372676512435e-05, |
| "loss": 1.3655, |
| "step": 12860 |
| }, |
| { |
| "epoch": 0.05320061426661073, |
| "grad_norm": 3.674354440233681, |
| "learning_rate": 1.9906193509006737e-05, |
| "loss": 1.3652, |
| "step": 12870 |
| }, |
| { |
| "epoch": 0.0532419511852328, |
| "grad_norm": 3.270440974926962, |
| "learning_rate": 1.9906014171043085e-05, |
| "loss": 1.408, |
| "step": 12880 |
| }, |
| { |
| "epoch": 0.053283288103854874, |
| "grad_norm": 3.4328461661592007, |
| "learning_rate": 1.9905834662624562e-05, |
| "loss": 1.3881, |
| "step": 12890 |
| }, |
| { |
| "epoch": 0.05332462502247695, |
| "grad_norm": 3.296815547244285, |
| "learning_rate": 1.9905654983754255e-05, |
| "loss": 1.3099, |
| "step": 12900 |
| }, |
| { |
| "epoch": 0.05336596194109902, |
| "grad_norm": 3.182778307558256, |
| "learning_rate": 1.9905475134435265e-05, |
| "loss": 1.3887, |
| "step": 12910 |
| }, |
| { |
| "epoch": 0.0534072988597211, |
| "grad_norm": 4.066051141098089, |
| "learning_rate": 1.9905295114670674e-05, |
| "loss": 1.3615, |
| "step": 12920 |
| }, |
| { |
| "epoch": 0.053448635778343176, |
| "grad_norm": 3.62599590443558, |
| "learning_rate": 1.9905114924463592e-05, |
| "loss": 1.3461, |
| "step": 12930 |
| }, |
| { |
| "epoch": 0.05348997269696525, |
| "grad_norm": 4.5391383467222735, |
| "learning_rate": 1.9904934563817106e-05, |
| "loss": 1.3543, |
| "step": 12940 |
| }, |
| { |
| "epoch": 0.05353130961558732, |
| "grad_norm": 4.04368698393749, |
| "learning_rate": 1.990475403273433e-05, |
| "loss": 1.3712, |
| "step": 12950 |
| }, |
| { |
| "epoch": 0.0535726465342094, |
| "grad_norm": 3.9811173894333836, |
| "learning_rate": 1.9904573331218365e-05, |
| "loss": 1.4334, |
| "step": 12960 |
| }, |
| { |
| "epoch": 0.05361398345283148, |
| "grad_norm": 3.499953327542098, |
| "learning_rate": 1.9904392459272326e-05, |
| "loss": 1.3871, |
| "step": 12970 |
| }, |
| { |
| "epoch": 0.05365532037145355, |
| "grad_norm": 3.226290735311431, |
| "learning_rate": 1.9904211416899322e-05, |
| "loss": 1.4122, |
| "step": 12980 |
| }, |
| { |
| "epoch": 0.053696657290075625, |
| "grad_norm": 3.566091958099414, |
| "learning_rate": 1.990403020410247e-05, |
| "loss": 1.4075, |
| "step": 12990 |
| }, |
| { |
| "epoch": 0.0537379942086977, |
| "grad_norm": 3.4558175186897513, |
| "learning_rate": 1.990384882088488e-05, |
| "loss": 1.4553, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.05377933112731978, |
| "grad_norm": 3.238909449520725, |
| "learning_rate": 1.9903667267249683e-05, |
| "loss": 1.3791, |
| "step": 13010 |
| }, |
| { |
| "epoch": 0.05382066804594185, |
| "grad_norm": 3.517722296765338, |
| "learning_rate": 1.9903485543199995e-05, |
| "loss": 1.3283, |
| "step": 13020 |
| }, |
| { |
| "epoch": 0.05386200496456393, |
| "grad_norm": 3.42397575432932, |
| "learning_rate": 1.9903303648738954e-05, |
| "loss": 1.3335, |
| "step": 13030 |
| }, |
| { |
| "epoch": 0.053903341883186004, |
| "grad_norm": 3.350334059229468, |
| "learning_rate": 1.990312158386968e-05, |
| "loss": 1.3806, |
| "step": 13040 |
| }, |
| { |
| "epoch": 0.053944678801808074, |
| "grad_norm": 3.051548296138219, |
| "learning_rate": 1.9902939348595307e-05, |
| "loss": 1.3885, |
| "step": 13050 |
| }, |
| { |
| "epoch": 0.05398601572043015, |
| "grad_norm": 3.276091159978694, |
| "learning_rate": 1.9902756942918976e-05, |
| "loss": 1.359, |
| "step": 13060 |
| }, |
| { |
| "epoch": 0.05402735263905223, |
| "grad_norm": 3.5296387125760185, |
| "learning_rate": 1.9902574366843824e-05, |
| "loss": 1.3625, |
| "step": 13070 |
| }, |
| { |
| "epoch": 0.054068689557674306, |
| "grad_norm": 3.567948297220875, |
| "learning_rate": 1.990239162037299e-05, |
| "loss": 1.351, |
| "step": 13080 |
| }, |
| { |
| "epoch": 0.054110026476296376, |
| "grad_norm": 3.1640186718240266, |
| "learning_rate": 1.9902208703509617e-05, |
| "loss": 1.3458, |
| "step": 13090 |
| }, |
| { |
| "epoch": 0.05415136339491845, |
| "grad_norm": 3.9025546167384495, |
| "learning_rate": 1.9902025616256854e-05, |
| "loss": 1.3588, |
| "step": 13100 |
| }, |
| { |
| "epoch": 0.05419270031354053, |
| "grad_norm": 3.501658240766089, |
| "learning_rate": 1.9901842358617854e-05, |
| "loss": 1.3624, |
| "step": 13110 |
| }, |
| { |
| "epoch": 0.0542340372321626, |
| "grad_norm": 3.923570308465845, |
| "learning_rate": 1.9901658930595774e-05, |
| "loss": 1.3294, |
| "step": 13120 |
| }, |
| { |
| "epoch": 0.05427537415078468, |
| "grad_norm": 3.0232913372852406, |
| "learning_rate": 1.990147533219376e-05, |
| "loss": 1.3855, |
| "step": 13130 |
| }, |
| { |
| "epoch": 0.054316711069406755, |
| "grad_norm": 3.670482250458697, |
| "learning_rate": 1.9901291563414977e-05, |
| "loss": 1.3337, |
| "step": 13140 |
| }, |
| { |
| "epoch": 0.054358047988028825, |
| "grad_norm": 3.790854501668152, |
| "learning_rate": 1.990110762426259e-05, |
| "loss": 1.366, |
| "step": 13150 |
| }, |
| { |
| "epoch": 0.0543993849066509, |
| "grad_norm": 3.1103384131591256, |
| "learning_rate": 1.9900923514739758e-05, |
| "loss": 1.3574, |
| "step": 13160 |
| }, |
| { |
| "epoch": 0.05444072182527298, |
| "grad_norm": 3.2207958459794845, |
| "learning_rate": 1.990073923484965e-05, |
| "loss": 1.3675, |
| "step": 13170 |
| }, |
| { |
| "epoch": 0.05448205874389506, |
| "grad_norm": 3.2588291421463023, |
| "learning_rate": 1.990055478459544e-05, |
| "loss": 1.3313, |
| "step": 13180 |
| }, |
| { |
| "epoch": 0.05452339566251713, |
| "grad_norm": 2.9426904447180506, |
| "learning_rate": 1.99003701639803e-05, |
| "loss": 1.3995, |
| "step": 13190 |
| }, |
| { |
| "epoch": 0.054564732581139204, |
| "grad_norm": 3.892827987664763, |
| "learning_rate": 1.990018537300741e-05, |
| "loss": 1.4035, |
| "step": 13200 |
| }, |
| { |
| "epoch": 0.05460606949976128, |
| "grad_norm": 3.765962575470102, |
| "learning_rate": 1.9900000411679946e-05, |
| "loss": 1.3823, |
| "step": 13210 |
| }, |
| { |
| "epoch": 0.05464740641838335, |
| "grad_norm": 3.031044142550962, |
| "learning_rate": 1.9899815280001093e-05, |
| "loss": 1.3907, |
| "step": 13220 |
| }, |
| { |
| "epoch": 0.05468874333700543, |
| "grad_norm": 3.401074997651561, |
| "learning_rate": 1.9899629977974033e-05, |
| "loss": 1.3724, |
| "step": 13230 |
| }, |
| { |
| "epoch": 0.054730080255627506, |
| "grad_norm": 3.4363592487014367, |
| "learning_rate": 1.9899444505601957e-05, |
| "loss": 1.4044, |
| "step": 13240 |
| }, |
| { |
| "epoch": 0.05477141717424958, |
| "grad_norm": 3.4008170147404924, |
| "learning_rate": 1.9899258862888055e-05, |
| "loss": 1.4329, |
| "step": 13250 |
| }, |
| { |
| "epoch": 0.05481275409287165, |
| "grad_norm": 3.3535448510349086, |
| "learning_rate": 1.9899073049835526e-05, |
| "loss": 1.3803, |
| "step": 13260 |
| }, |
| { |
| "epoch": 0.05485409101149373, |
| "grad_norm": 3.6491506303085677, |
| "learning_rate": 1.9898887066447564e-05, |
| "loss": 1.4061, |
| "step": 13270 |
| }, |
| { |
| "epoch": 0.05489542793011581, |
| "grad_norm": 3.2649595342568754, |
| "learning_rate": 1.9898700912727365e-05, |
| "loss": 1.3548, |
| "step": 13280 |
| }, |
| { |
| "epoch": 0.05493676484873788, |
| "grad_norm": 3.260685808424658, |
| "learning_rate": 1.9898514588678138e-05, |
| "loss": 1.3798, |
| "step": 13290 |
| }, |
| { |
| "epoch": 0.054978101767359955, |
| "grad_norm": 3.3068054059856964, |
| "learning_rate": 1.989832809430309e-05, |
| "loss": 1.3873, |
| "step": 13300 |
| }, |
| { |
| "epoch": 0.05501943868598203, |
| "grad_norm": 3.3289477651913844, |
| "learning_rate": 1.9898141429605428e-05, |
| "loss": 1.42, |
| "step": 13310 |
| }, |
| { |
| "epoch": 0.05506077560460411, |
| "grad_norm": 3.899358403289862, |
| "learning_rate": 1.9897954594588366e-05, |
| "loss": 1.3612, |
| "step": 13320 |
| }, |
| { |
| "epoch": 0.05510211252322618, |
| "grad_norm": 3.4534257185508768, |
| "learning_rate": 1.989776758925511e-05, |
| "loss": 1.4139, |
| "step": 13330 |
| }, |
| { |
| "epoch": 0.05514344944184826, |
| "grad_norm": 3.0896933369894555, |
| "learning_rate": 1.9897580413608888e-05, |
| "loss": 1.3455, |
| "step": 13340 |
| }, |
| { |
| "epoch": 0.055184786360470334, |
| "grad_norm": 3.6880673723268895, |
| "learning_rate": 1.9897393067652916e-05, |
| "loss": 1.3553, |
| "step": 13350 |
| }, |
| { |
| "epoch": 0.055226123279092404, |
| "grad_norm": 4.959844713915171, |
| "learning_rate": 1.989720555139042e-05, |
| "loss": 1.3744, |
| "step": 13360 |
| }, |
| { |
| "epoch": 0.05526746019771448, |
| "grad_norm": 3.7512130358105535, |
| "learning_rate": 1.9897017864824623e-05, |
| "loss": 1.3967, |
| "step": 13370 |
| }, |
| { |
| "epoch": 0.05530879711633656, |
| "grad_norm": 3.438280531829743, |
| "learning_rate": 1.989683000795876e-05, |
| "loss": 1.3199, |
| "step": 13380 |
| }, |
| { |
| "epoch": 0.055350134034958635, |
| "grad_norm": 3.191658504407269, |
| "learning_rate": 1.989664198079606e-05, |
| "loss": 1.3843, |
| "step": 13390 |
| }, |
| { |
| "epoch": 0.055391470953580706, |
| "grad_norm": 3.246301259794481, |
| "learning_rate": 1.989645378333976e-05, |
| "loss": 1.3823, |
| "step": 13400 |
| }, |
| { |
| "epoch": 0.05543280787220278, |
| "grad_norm": 3.209808944542668, |
| "learning_rate": 1.9896265415593096e-05, |
| "loss": 1.4023, |
| "step": 13410 |
| }, |
| { |
| "epoch": 0.05547414479082486, |
| "grad_norm": 3.8876392074458055, |
| "learning_rate": 1.989607687755931e-05, |
| "loss": 1.4021, |
| "step": 13420 |
| }, |
| { |
| "epoch": 0.05551548170944693, |
| "grad_norm": 4.6622959130132635, |
| "learning_rate": 1.9895888169241643e-05, |
| "loss": 1.3941, |
| "step": 13430 |
| }, |
| { |
| "epoch": 0.05555681862806901, |
| "grad_norm": 3.2829449802312243, |
| "learning_rate": 1.989569929064335e-05, |
| "loss": 1.3914, |
| "step": 13440 |
| }, |
| { |
| "epoch": 0.055598155546691085, |
| "grad_norm": 2.975464909103463, |
| "learning_rate": 1.989551024176768e-05, |
| "loss": 1.3393, |
| "step": 13450 |
| }, |
| { |
| "epoch": 0.055639492465313155, |
| "grad_norm": 3.3616145226127374, |
| "learning_rate": 1.9895321022617877e-05, |
| "loss": 1.3691, |
| "step": 13460 |
| }, |
| { |
| "epoch": 0.05568082938393523, |
| "grad_norm": 3.551441103147202, |
| "learning_rate": 1.9895131633197206e-05, |
| "loss": 1.3748, |
| "step": 13470 |
| }, |
| { |
| "epoch": 0.05572216630255731, |
| "grad_norm": 3.1368088044777838, |
| "learning_rate": 1.9894942073508924e-05, |
| "loss": 1.3341, |
| "step": 13480 |
| }, |
| { |
| "epoch": 0.055763503221179386, |
| "grad_norm": 2.8747175172948722, |
| "learning_rate": 1.989475234355629e-05, |
| "loss": 1.3951, |
| "step": 13490 |
| }, |
| { |
| "epoch": 0.055804840139801457, |
| "grad_norm": 3.0419841938845975, |
| "learning_rate": 1.989456244334257e-05, |
| "loss": 1.3325, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.055846177058423534, |
| "grad_norm": 3.6672894427510947, |
| "learning_rate": 1.9894372372871036e-05, |
| "loss": 1.3847, |
| "step": 13510 |
| }, |
| { |
| "epoch": 0.05588751397704561, |
| "grad_norm": 3.3319466434724325, |
| "learning_rate": 1.989418213214495e-05, |
| "loss": 1.4007, |
| "step": 13520 |
| }, |
| { |
| "epoch": 0.05592885089566768, |
| "grad_norm": 2.756361075189523, |
| "learning_rate": 1.9893991721167593e-05, |
| "loss": 1.3962, |
| "step": 13530 |
| }, |
| { |
| "epoch": 0.05597018781428976, |
| "grad_norm": 3.2588019424168384, |
| "learning_rate": 1.989380113994224e-05, |
| "loss": 1.409, |
| "step": 13540 |
| }, |
| { |
| "epoch": 0.056011524732911835, |
| "grad_norm": 3.992120970935661, |
| "learning_rate": 1.9893610388472162e-05, |
| "loss": 1.3642, |
| "step": 13550 |
| }, |
| { |
| "epoch": 0.05605286165153391, |
| "grad_norm": 3.0454737775385885, |
| "learning_rate": 1.9893419466760653e-05, |
| "loss": 1.3696, |
| "step": 13560 |
| }, |
| { |
| "epoch": 0.05609419857015598, |
| "grad_norm": 3.1960156109377507, |
| "learning_rate": 1.9893228374810993e-05, |
| "loss": 1.3611, |
| "step": 13570 |
| }, |
| { |
| "epoch": 0.05613553548877806, |
| "grad_norm": 3.3282542329613496, |
| "learning_rate": 1.989303711262647e-05, |
| "loss": 1.3541, |
| "step": 13580 |
| }, |
| { |
| "epoch": 0.05617687240740014, |
| "grad_norm": 4.271766406501802, |
| "learning_rate": 1.9892845680210374e-05, |
| "loss": 1.3033, |
| "step": 13590 |
| }, |
| { |
| "epoch": 0.05621820932602221, |
| "grad_norm": 3.4542215892482964, |
| "learning_rate": 1.9892654077566003e-05, |
| "loss": 1.3853, |
| "step": 13600 |
| }, |
| { |
| "epoch": 0.056259546244644285, |
| "grad_norm": 5.10198450683926, |
| "learning_rate": 1.9892462304696653e-05, |
| "loss": 1.3758, |
| "step": 13610 |
| }, |
| { |
| "epoch": 0.05630088316326636, |
| "grad_norm": 4.67424032198832, |
| "learning_rate": 1.989227036160562e-05, |
| "loss": 1.3395, |
| "step": 13620 |
| }, |
| { |
| "epoch": 0.05634222008188844, |
| "grad_norm": 3.574141696384299, |
| "learning_rate": 1.989207824829621e-05, |
| "loss": 1.3953, |
| "step": 13630 |
| }, |
| { |
| "epoch": 0.05638355700051051, |
| "grad_norm": 3.4219942324444244, |
| "learning_rate": 1.989188596477173e-05, |
| "loss": 1.3493, |
| "step": 13640 |
| }, |
| { |
| "epoch": 0.056424893919132586, |
| "grad_norm": 3.135032092895971, |
| "learning_rate": 1.9891693511035484e-05, |
| "loss": 1.4203, |
| "step": 13650 |
| }, |
| { |
| "epoch": 0.05646623083775466, |
| "grad_norm": 3.0856331845063387, |
| "learning_rate": 1.989150088709079e-05, |
| "loss": 1.2854, |
| "step": 13660 |
| }, |
| { |
| "epoch": 0.056507567756376734, |
| "grad_norm": 3.9267068067232, |
| "learning_rate": 1.9891308092940953e-05, |
| "loss": 1.3701, |
| "step": 13670 |
| }, |
| { |
| "epoch": 0.05654890467499881, |
| "grad_norm": 3.8306681224418395, |
| "learning_rate": 1.98911151285893e-05, |
| "loss": 1.4076, |
| "step": 13680 |
| }, |
| { |
| "epoch": 0.05659024159362089, |
| "grad_norm": 3.306837169963007, |
| "learning_rate": 1.9890921994039148e-05, |
| "loss": 1.3873, |
| "step": 13690 |
| }, |
| { |
| "epoch": 0.056631578512242965, |
| "grad_norm": 3.2434462335337297, |
| "learning_rate": 1.989072868929382e-05, |
| "loss": 1.3531, |
| "step": 13700 |
| }, |
| { |
| "epoch": 0.056672915430865035, |
| "grad_norm": 3.3740272090711856, |
| "learning_rate": 1.989053521435664e-05, |
| "loss": 1.3772, |
| "step": 13710 |
| }, |
| { |
| "epoch": 0.05671425234948711, |
| "grad_norm": 3.141984063033404, |
| "learning_rate": 1.989034156923094e-05, |
| "loss": 1.3983, |
| "step": 13720 |
| }, |
| { |
| "epoch": 0.05675558926810919, |
| "grad_norm": 2.9680517660305847, |
| "learning_rate": 1.989014775392005e-05, |
| "loss": 1.3651, |
| "step": 13730 |
| }, |
| { |
| "epoch": 0.05679692618673126, |
| "grad_norm": 3.418890084499366, |
| "learning_rate": 1.9889953768427313e-05, |
| "loss": 1.4157, |
| "step": 13740 |
| }, |
| { |
| "epoch": 0.05683826310535334, |
| "grad_norm": 3.7254805946590706, |
| "learning_rate": 1.9889759612756053e-05, |
| "loss": 1.3979, |
| "step": 13750 |
| }, |
| { |
| "epoch": 0.056879600023975414, |
| "grad_norm": 3.617034367391942, |
| "learning_rate": 1.9889565286909623e-05, |
| "loss": 1.3549, |
| "step": 13760 |
| }, |
| { |
| "epoch": 0.056920936942597485, |
| "grad_norm": 3.8592922160592646, |
| "learning_rate": 1.9889370790891364e-05, |
| "loss": 1.4008, |
| "step": 13770 |
| }, |
| { |
| "epoch": 0.05696227386121956, |
| "grad_norm": 3.510616141867297, |
| "learning_rate": 1.9889176124704616e-05, |
| "loss": 1.4071, |
| "step": 13780 |
| }, |
| { |
| "epoch": 0.05700361077984164, |
| "grad_norm": 3.5434621547794105, |
| "learning_rate": 1.9888981288352736e-05, |
| "loss": 1.3782, |
| "step": 13790 |
| }, |
| { |
| "epoch": 0.057044947698463716, |
| "grad_norm": 3.0056956686627117, |
| "learning_rate": 1.988878628183907e-05, |
| "loss": 1.352, |
| "step": 13800 |
| }, |
| { |
| "epoch": 0.057086284617085786, |
| "grad_norm": 2.876862066794774, |
| "learning_rate": 1.9888591105166984e-05, |
| "loss": 1.3451, |
| "step": 13810 |
| }, |
| { |
| "epoch": 0.05712762153570786, |
| "grad_norm": 3.6994718345443776, |
| "learning_rate": 1.9888395758339823e-05, |
| "loss": 1.3711, |
| "step": 13820 |
| }, |
| { |
| "epoch": 0.05716895845432994, |
| "grad_norm": 3.7019952550478052, |
| "learning_rate": 1.988820024136096e-05, |
| "loss": 1.3542, |
| "step": 13830 |
| }, |
| { |
| "epoch": 0.05721029537295201, |
| "grad_norm": 3.149511937310367, |
| "learning_rate": 1.9888004554233757e-05, |
| "loss": 1.3498, |
| "step": 13840 |
| }, |
| { |
| "epoch": 0.05725163229157409, |
| "grad_norm": 3.3687656057902298, |
| "learning_rate": 1.9887808696961574e-05, |
| "loss": 1.3759, |
| "step": 13850 |
| }, |
| { |
| "epoch": 0.057292969210196165, |
| "grad_norm": 3.286956265957372, |
| "learning_rate": 1.988761266954779e-05, |
| "loss": 1.3416, |
| "step": 13860 |
| }, |
| { |
| "epoch": 0.05733430612881824, |
| "grad_norm": 3.8546233434442025, |
| "learning_rate": 1.988741647199577e-05, |
| "loss": 1.3601, |
| "step": 13870 |
| }, |
| { |
| "epoch": 0.05737564304744031, |
| "grad_norm": 4.043137550642432, |
| "learning_rate": 1.98872201043089e-05, |
| "loss": 1.4019, |
| "step": 13880 |
| }, |
| { |
| "epoch": 0.05741697996606239, |
| "grad_norm": 4.6033780589634805, |
| "learning_rate": 1.988702356649055e-05, |
| "loss": 1.406, |
| "step": 13890 |
| }, |
| { |
| "epoch": 0.05745831688468447, |
| "grad_norm": 3.4533366026357135, |
| "learning_rate": 1.9886826858544103e-05, |
| "loss": 1.3579, |
| "step": 13900 |
| }, |
| { |
| "epoch": 0.05749965380330654, |
| "grad_norm": 4.2918223423561725, |
| "learning_rate": 1.9886629980472945e-05, |
| "loss": 1.3238, |
| "step": 13910 |
| }, |
| { |
| "epoch": 0.057540990721928614, |
| "grad_norm": 3.1786603013459667, |
| "learning_rate": 1.988643293228047e-05, |
| "loss": 1.3815, |
| "step": 13920 |
| }, |
| { |
| "epoch": 0.05758232764055069, |
| "grad_norm": 4.187787674938507, |
| "learning_rate": 1.988623571397006e-05, |
| "loss": 1.3129, |
| "step": 13930 |
| }, |
| { |
| "epoch": 0.05762366455917277, |
| "grad_norm": 3.2066247160025956, |
| "learning_rate": 1.9886038325545112e-05, |
| "loss": 1.3604, |
| "step": 13940 |
| }, |
| { |
| "epoch": 0.05766500147779484, |
| "grad_norm": 4.137189558470061, |
| "learning_rate": 1.9885840767009023e-05, |
| "loss": 1.3683, |
| "step": 13950 |
| }, |
| { |
| "epoch": 0.057706338396416916, |
| "grad_norm": 3.21825868230931, |
| "learning_rate": 1.988564303836519e-05, |
| "loss": 1.3521, |
| "step": 13960 |
| }, |
| { |
| "epoch": 0.05774767531503899, |
| "grad_norm": 3.5331562264396097, |
| "learning_rate": 1.9885445139617018e-05, |
| "loss": 1.4079, |
| "step": 13970 |
| }, |
| { |
| "epoch": 0.05778901223366106, |
| "grad_norm": 3.1970430005062607, |
| "learning_rate": 1.9885247070767915e-05, |
| "loss": 1.3688, |
| "step": 13980 |
| }, |
| { |
| "epoch": 0.05783034915228314, |
| "grad_norm": 4.27476021372676, |
| "learning_rate": 1.988504883182128e-05, |
| "loss": 1.364, |
| "step": 13990 |
| }, |
| { |
| "epoch": 0.05787168607090522, |
| "grad_norm": 3.2156024105612278, |
| "learning_rate": 1.9884850422780534e-05, |
| "loss": 1.3814, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.057913022989527295, |
| "grad_norm": 3.6998253526421565, |
| "learning_rate": 1.9884651843649083e-05, |
| "loss": 1.3698, |
| "step": 14010 |
| }, |
| { |
| "epoch": 0.057954359908149365, |
| "grad_norm": 4.503662250473274, |
| "learning_rate": 1.988445309443035e-05, |
| "loss": 1.3564, |
| "step": 14020 |
| }, |
| { |
| "epoch": 0.05799569682677144, |
| "grad_norm": 3.762384040491392, |
| "learning_rate": 1.9884254175127754e-05, |
| "loss": 1.4119, |
| "step": 14030 |
| }, |
| { |
| "epoch": 0.05803703374539352, |
| "grad_norm": 3.285301388684364, |
| "learning_rate": 1.9884055085744713e-05, |
| "loss": 1.3501, |
| "step": 14040 |
| }, |
| { |
| "epoch": 0.05807837066401559, |
| "grad_norm": 4.336832986530797, |
| "learning_rate": 1.9883855826284656e-05, |
| "loss": 1.3662, |
| "step": 14050 |
| }, |
| { |
| "epoch": 0.05811970758263767, |
| "grad_norm": 3.4574445488885734, |
| "learning_rate": 1.9883656396751016e-05, |
| "loss": 1.3127, |
| "step": 14060 |
| }, |
| { |
| "epoch": 0.058161044501259744, |
| "grad_norm": 3.4098914920099883, |
| "learning_rate": 1.988345679714722e-05, |
| "loss": 1.391, |
| "step": 14070 |
| }, |
| { |
| "epoch": 0.058202381419881814, |
| "grad_norm": 3.6614081585424603, |
| "learning_rate": 1.98832570274767e-05, |
| "loss": 1.3659, |
| "step": 14080 |
| }, |
| { |
| "epoch": 0.05824371833850389, |
| "grad_norm": 3.276861233677139, |
| "learning_rate": 1.98830570877429e-05, |
| "loss": 1.3559, |
| "step": 14090 |
| }, |
| { |
| "epoch": 0.05828505525712597, |
| "grad_norm": 3.4536947240708997, |
| "learning_rate": 1.9882856977949257e-05, |
| "loss": 1.3779, |
| "step": 14100 |
| }, |
| { |
| "epoch": 0.058326392175748046, |
| "grad_norm": 3.242988394736396, |
| "learning_rate": 1.9882656698099213e-05, |
| "loss": 1.3353, |
| "step": 14110 |
| }, |
| { |
| "epoch": 0.058367729094370116, |
| "grad_norm": 3.033285353432935, |
| "learning_rate": 1.9882456248196216e-05, |
| "loss": 1.3831, |
| "step": 14120 |
| }, |
| { |
| "epoch": 0.05840906601299219, |
| "grad_norm": 2.9840093106321173, |
| "learning_rate": 1.9882255628243715e-05, |
| "loss": 1.399, |
| "step": 14130 |
| }, |
| { |
| "epoch": 0.05845040293161427, |
| "grad_norm": 3.424871961043564, |
| "learning_rate": 1.9882054838245158e-05, |
| "loss": 1.3774, |
| "step": 14140 |
| }, |
| { |
| "epoch": 0.05849173985023634, |
| "grad_norm": 4.206811562034524, |
| "learning_rate": 1.988185387820401e-05, |
| "loss": 1.3768, |
| "step": 14150 |
| }, |
| { |
| "epoch": 0.05853307676885842, |
| "grad_norm": 3.167196829826696, |
| "learning_rate": 1.9881652748123723e-05, |
| "loss": 1.3118, |
| "step": 14160 |
| }, |
| { |
| "epoch": 0.058574413687480495, |
| "grad_norm": 3.2647863004270583, |
| "learning_rate": 1.9881451448007752e-05, |
| "loss": 1.359, |
| "step": 14170 |
| }, |
| { |
| "epoch": 0.05861575060610257, |
| "grad_norm": 3.2249069204445506, |
| "learning_rate": 1.988124997785957e-05, |
| "loss": 1.3594, |
| "step": 14180 |
| }, |
| { |
| "epoch": 0.05865708752472464, |
| "grad_norm": 3.125833926204158, |
| "learning_rate": 1.9881048337682644e-05, |
| "loss": 1.3729, |
| "step": 14190 |
| }, |
| { |
| "epoch": 0.05869842444334672, |
| "grad_norm": 3.124857727130482, |
| "learning_rate": 1.9880846527480434e-05, |
| "loss": 1.3968, |
| "step": 14200 |
| }, |
| { |
| "epoch": 0.0587397613619688, |
| "grad_norm": 3.5324983045866416, |
| "learning_rate": 1.988064454725642e-05, |
| "loss": 1.3744, |
| "step": 14210 |
| }, |
| { |
| "epoch": 0.05878109828059087, |
| "grad_norm": 3.534173243273415, |
| "learning_rate": 1.9880442397014082e-05, |
| "loss": 1.3858, |
| "step": 14220 |
| }, |
| { |
| "epoch": 0.058822435199212944, |
| "grad_norm": 4.522930564227188, |
| "learning_rate": 1.9880240076756885e-05, |
| "loss": 1.3365, |
| "step": 14230 |
| }, |
| { |
| "epoch": 0.05886377211783502, |
| "grad_norm": 2.911847199383502, |
| "learning_rate": 1.9880037586488324e-05, |
| "loss": 1.3629, |
| "step": 14240 |
| }, |
| { |
| "epoch": 0.0589051090364571, |
| "grad_norm": 3.467653251925633, |
| "learning_rate": 1.9879834926211875e-05, |
| "loss": 1.3839, |
| "step": 14250 |
| }, |
| { |
| "epoch": 0.05894644595507917, |
| "grad_norm": 3.7166470032659618, |
| "learning_rate": 1.9879632095931024e-05, |
| "loss": 1.3358, |
| "step": 14260 |
| }, |
| { |
| "epoch": 0.058987782873701246, |
| "grad_norm": 3.7077022813203193, |
| "learning_rate": 1.987942909564927e-05, |
| "loss": 1.3474, |
| "step": 14270 |
| }, |
| { |
| "epoch": 0.05902911979232332, |
| "grad_norm": 3.1733151465540916, |
| "learning_rate": 1.9879225925370094e-05, |
| "loss": 1.3881, |
| "step": 14280 |
| }, |
| { |
| "epoch": 0.05907045671094539, |
| "grad_norm": 3.0349885556128378, |
| "learning_rate": 1.9879022585097005e-05, |
| "loss": 1.3686, |
| "step": 14290 |
| }, |
| { |
| "epoch": 0.05911179362956747, |
| "grad_norm": 3.2620109103700363, |
| "learning_rate": 1.9878819074833493e-05, |
| "loss": 1.3588, |
| "step": 14300 |
| }, |
| { |
| "epoch": 0.05915313054818955, |
| "grad_norm": 3.7133352574339873, |
| "learning_rate": 1.9878615394583062e-05, |
| "loss": 1.34, |
| "step": 14310 |
| }, |
| { |
| "epoch": 0.059194467466811625, |
| "grad_norm": 3.1624289931149114, |
| "learning_rate": 1.987841154434922e-05, |
| "loss": 1.3334, |
| "step": 14320 |
| }, |
| { |
| "epoch": 0.059235804385433695, |
| "grad_norm": 3.7047396553657643, |
| "learning_rate": 1.9878207524135468e-05, |
| "loss": 1.3375, |
| "step": 14330 |
| }, |
| { |
| "epoch": 0.05927714130405577, |
| "grad_norm": 3.3119519535402, |
| "learning_rate": 1.9878003333945325e-05, |
| "loss": 1.3537, |
| "step": 14340 |
| }, |
| { |
| "epoch": 0.05931847822267785, |
| "grad_norm": 3.3812187643072105, |
| "learning_rate": 1.98777989737823e-05, |
| "loss": 1.3374, |
| "step": 14350 |
| }, |
| { |
| "epoch": 0.05935981514129992, |
| "grad_norm": 3.299892572730851, |
| "learning_rate": 1.9877594443649902e-05, |
| "loss": 1.3704, |
| "step": 14360 |
| }, |
| { |
| "epoch": 0.059401152059922, |
| "grad_norm": 4.081245562156265, |
| "learning_rate": 1.9877389743551668e-05, |
| "loss": 1.3498, |
| "step": 14370 |
| }, |
| { |
| "epoch": 0.059442488978544074, |
| "grad_norm": 3.350158600172311, |
| "learning_rate": 1.9877184873491102e-05, |
| "loss": 1.3449, |
| "step": 14380 |
| }, |
| { |
| "epoch": 0.05948382589716615, |
| "grad_norm": 3.714541324538004, |
| "learning_rate": 1.9876979833471742e-05, |
| "loss": 1.3874, |
| "step": 14390 |
| }, |
| { |
| "epoch": 0.05952516281578822, |
| "grad_norm": 3.274100022702722, |
| "learning_rate": 1.9876774623497112e-05, |
| "loss": 1.3582, |
| "step": 14400 |
| }, |
| { |
| "epoch": 0.0595664997344103, |
| "grad_norm": 2.9329187414523425, |
| "learning_rate": 1.9876569243570742e-05, |
| "loss": 1.3901, |
| "step": 14410 |
| }, |
| { |
| "epoch": 0.059607836653032376, |
| "grad_norm": 3.73192299633757, |
| "learning_rate": 1.9876363693696166e-05, |
| "loss": 1.3898, |
| "step": 14420 |
| }, |
| { |
| "epoch": 0.059649173571654446, |
| "grad_norm": 3.2578219560804196, |
| "learning_rate": 1.987615797387692e-05, |
| "loss": 1.371, |
| "step": 14430 |
| }, |
| { |
| "epoch": 0.05969051049027652, |
| "grad_norm": 3.2376808673712807, |
| "learning_rate": 1.9875952084116548e-05, |
| "loss": 1.336, |
| "step": 14440 |
| }, |
| { |
| "epoch": 0.0597318474088986, |
| "grad_norm": 3.5969021652399253, |
| "learning_rate": 1.987574602441859e-05, |
| "loss": 1.3862, |
| "step": 14450 |
| }, |
| { |
| "epoch": 0.05977318432752067, |
| "grad_norm": 3.1882930317103844, |
| "learning_rate": 1.9875539794786593e-05, |
| "loss": 1.3734, |
| "step": 14460 |
| }, |
| { |
| "epoch": 0.05981452124614275, |
| "grad_norm": 3.147605709223492, |
| "learning_rate": 1.9875333395224102e-05, |
| "loss": 1.3739, |
| "step": 14470 |
| }, |
| { |
| "epoch": 0.059855858164764825, |
| "grad_norm": 3.4276761969558645, |
| "learning_rate": 1.9875126825734673e-05, |
| "loss": 1.3301, |
| "step": 14480 |
| }, |
| { |
| "epoch": 0.0598971950833869, |
| "grad_norm": 3.4226682382169997, |
| "learning_rate": 1.987492008632186e-05, |
| "loss": 1.3747, |
| "step": 14490 |
| }, |
| { |
| "epoch": 0.05993853200200897, |
| "grad_norm": 3.471596558825063, |
| "learning_rate": 1.987471317698922e-05, |
| "loss": 1.3349, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.05997986892063105, |
| "grad_norm": 3.676413482025792, |
| "learning_rate": 1.9874506097740308e-05, |
| "loss": 1.3963, |
| "step": 14510 |
| }, |
| { |
| "epoch": 0.060021205839253126, |
| "grad_norm": 3.2543359739910267, |
| "learning_rate": 1.9874298848578696e-05, |
| "loss": 1.3334, |
| "step": 14520 |
| }, |
| { |
| "epoch": 0.0600625427578752, |
| "grad_norm": 3.2372261281319377, |
| "learning_rate": 1.9874091429507943e-05, |
| "loss": 1.3367, |
| "step": 14530 |
| }, |
| { |
| "epoch": 0.060103879676497274, |
| "grad_norm": 3.3746909228055397, |
| "learning_rate": 1.987388384053162e-05, |
| "loss": 1.376, |
| "step": 14540 |
| }, |
| { |
| "epoch": 0.06014521659511935, |
| "grad_norm": 2.921092227637486, |
| "learning_rate": 1.9873676081653302e-05, |
| "loss": 1.3715, |
| "step": 14550 |
| }, |
| { |
| "epoch": 0.06018655351374143, |
| "grad_norm": 2.9872735994422417, |
| "learning_rate": 1.9873468152876563e-05, |
| "loss": 1.3457, |
| "step": 14560 |
| }, |
| { |
| "epoch": 0.0602278904323635, |
| "grad_norm": 3.713230272104003, |
| "learning_rate": 1.9873260054204978e-05, |
| "loss": 1.328, |
| "step": 14570 |
| }, |
| { |
| "epoch": 0.060269227350985576, |
| "grad_norm": 3.3242217719152496, |
| "learning_rate": 1.9873051785642134e-05, |
| "loss": 1.3433, |
| "step": 14580 |
| }, |
| { |
| "epoch": 0.06031056426960765, |
| "grad_norm": 3.3594706223759143, |
| "learning_rate": 1.9872843347191607e-05, |
| "loss": 1.4027, |
| "step": 14590 |
| }, |
| { |
| "epoch": 0.06035190118822972, |
| "grad_norm": 3.1573056170670846, |
| "learning_rate": 1.9872634738856987e-05, |
| "loss": 1.3798, |
| "step": 14600 |
| }, |
| { |
| "epoch": 0.0603932381068518, |
| "grad_norm": 3.076626392806199, |
| "learning_rate": 1.9872425960641863e-05, |
| "loss": 1.3581, |
| "step": 14610 |
| }, |
| { |
| "epoch": 0.06043457502547388, |
| "grad_norm": 3.468364476739333, |
| "learning_rate": 1.987221701254983e-05, |
| "loss": 1.3675, |
| "step": 14620 |
| }, |
| { |
| "epoch": 0.060475911944095954, |
| "grad_norm": 4.079855852909671, |
| "learning_rate": 1.987200789458448e-05, |
| "loss": 1.3197, |
| "step": 14630 |
| }, |
| { |
| "epoch": 0.060517248862718025, |
| "grad_norm": 4.960368430326063, |
| "learning_rate": 1.9871798606749415e-05, |
| "loss": 1.4018, |
| "step": 14640 |
| }, |
| { |
| "epoch": 0.0605585857813401, |
| "grad_norm": 3.1263141804205272, |
| "learning_rate": 1.9871589149048232e-05, |
| "loss": 1.4034, |
| "step": 14650 |
| }, |
| { |
| "epoch": 0.06059992269996218, |
| "grad_norm": 3.0030045708337876, |
| "learning_rate": 1.9871379521484538e-05, |
| "loss": 1.314, |
| "step": 14660 |
| }, |
| { |
| "epoch": 0.06064125961858425, |
| "grad_norm": 4.171309905380893, |
| "learning_rate": 1.987116972406194e-05, |
| "loss": 1.3454, |
| "step": 14670 |
| }, |
| { |
| "epoch": 0.060682596537206326, |
| "grad_norm": 3.678722232531344, |
| "learning_rate": 1.9870959756784044e-05, |
| "loss": 1.3644, |
| "step": 14680 |
| }, |
| { |
| "epoch": 0.060723933455828404, |
| "grad_norm": 3.5114052948471164, |
| "learning_rate": 1.987074961965447e-05, |
| "loss": 1.3446, |
| "step": 14690 |
| }, |
| { |
| "epoch": 0.06076527037445048, |
| "grad_norm": 3.3802619453865637, |
| "learning_rate": 1.987053931267683e-05, |
| "loss": 1.3603, |
| "step": 14700 |
| }, |
| { |
| "epoch": 0.06080660729307255, |
| "grad_norm": 3.239888006176567, |
| "learning_rate": 1.9870328835854743e-05, |
| "loss": 1.3263, |
| "step": 14710 |
| }, |
| { |
| "epoch": 0.06084794421169463, |
| "grad_norm": 2.7923482855439716, |
| "learning_rate": 1.9870118189191833e-05, |
| "loss": 1.3532, |
| "step": 14720 |
| }, |
| { |
| "epoch": 0.060889281130316705, |
| "grad_norm": 3.672374432583175, |
| "learning_rate": 1.9869907372691715e-05, |
| "loss": 1.3749, |
| "step": 14730 |
| }, |
| { |
| "epoch": 0.060930618048938776, |
| "grad_norm": 3.1371007241226017, |
| "learning_rate": 1.9869696386358032e-05, |
| "loss": 1.3529, |
| "step": 14740 |
| }, |
| { |
| "epoch": 0.06097195496756085, |
| "grad_norm": 3.5064852286924837, |
| "learning_rate": 1.9869485230194403e-05, |
| "loss": 1.3664, |
| "step": 14750 |
| }, |
| { |
| "epoch": 0.06101329188618293, |
| "grad_norm": 3.9587666778347073, |
| "learning_rate": 1.9869273904204465e-05, |
| "loss": 1.3847, |
| "step": 14760 |
| }, |
| { |
| "epoch": 0.061054628804805, |
| "grad_norm": 3.2129303491061973, |
| "learning_rate": 1.9869062408391855e-05, |
| "loss": 1.3625, |
| "step": 14770 |
| }, |
| { |
| "epoch": 0.06109596572342708, |
| "grad_norm": 3.1860777109810465, |
| "learning_rate": 1.9868850742760212e-05, |
| "loss": 1.3062, |
| "step": 14780 |
| }, |
| { |
| "epoch": 0.061137302642049154, |
| "grad_norm": 3.47772048599133, |
| "learning_rate": 1.9868638907313174e-05, |
| "loss": 1.3487, |
| "step": 14790 |
| }, |
| { |
| "epoch": 0.06117863956067123, |
| "grad_norm": 3.323868913803053, |
| "learning_rate": 1.9868426902054394e-05, |
| "loss": 1.3304, |
| "step": 14800 |
| }, |
| { |
| "epoch": 0.0612199764792933, |
| "grad_norm": 3.4796197385612735, |
| "learning_rate": 1.9868214726987513e-05, |
| "loss": 1.3143, |
| "step": 14810 |
| }, |
| { |
| "epoch": 0.06126131339791538, |
| "grad_norm": 3.8413485871243402, |
| "learning_rate": 1.9868002382116186e-05, |
| "loss": 1.3451, |
| "step": 14820 |
| }, |
| { |
| "epoch": 0.061302650316537456, |
| "grad_norm": 3.3376163822500278, |
| "learning_rate": 1.9867789867444066e-05, |
| "loss": 1.3486, |
| "step": 14830 |
| }, |
| { |
| "epoch": 0.061343987235159526, |
| "grad_norm": 3.3554967862457543, |
| "learning_rate": 1.9867577182974807e-05, |
| "loss": 1.3447, |
| "step": 14840 |
| }, |
| { |
| "epoch": 0.061385324153781604, |
| "grad_norm": 3.0553156261560757, |
| "learning_rate": 1.9867364328712074e-05, |
| "loss": 1.3436, |
| "step": 14850 |
| }, |
| { |
| "epoch": 0.06142666107240368, |
| "grad_norm": 3.9386923791793027, |
| "learning_rate": 1.9867151304659527e-05, |
| "loss": 1.3719, |
| "step": 14860 |
| }, |
| { |
| "epoch": 0.06146799799102576, |
| "grad_norm": 3.915463974315291, |
| "learning_rate": 1.986693811082083e-05, |
| "loss": 1.328, |
| "step": 14870 |
| }, |
| { |
| "epoch": 0.06150933490964783, |
| "grad_norm": 2.800179512440139, |
| "learning_rate": 1.986672474719965e-05, |
| "loss": 1.322, |
| "step": 14880 |
| }, |
| { |
| "epoch": 0.061550671828269905, |
| "grad_norm": 3.3411218270323664, |
| "learning_rate": 1.9866511213799665e-05, |
| "loss": 1.3899, |
| "step": 14890 |
| }, |
| { |
| "epoch": 0.06159200874689198, |
| "grad_norm": 3.323705843953213, |
| "learning_rate": 1.9866297510624544e-05, |
| "loss": 1.3615, |
| "step": 14900 |
| }, |
| { |
| "epoch": 0.06163334566551405, |
| "grad_norm": 4.754040048447529, |
| "learning_rate": 1.9866083637677963e-05, |
| "loss": 1.3726, |
| "step": 14910 |
| }, |
| { |
| "epoch": 0.06167468258413613, |
| "grad_norm": 3.45837680678418, |
| "learning_rate": 1.9865869594963607e-05, |
| "loss": 1.3519, |
| "step": 14920 |
| }, |
| { |
| "epoch": 0.06171601950275821, |
| "grad_norm": 3.183067903396876, |
| "learning_rate": 1.986565538248516e-05, |
| "loss": 1.3584, |
| "step": 14930 |
| }, |
| { |
| "epoch": 0.061757356421380284, |
| "grad_norm": 3.8419649238240656, |
| "learning_rate": 1.98654410002463e-05, |
| "loss": 1.3698, |
| "step": 14940 |
| }, |
| { |
| "epoch": 0.061798693340002354, |
| "grad_norm": 3.3977765733638168, |
| "learning_rate": 1.9865226448250725e-05, |
| "loss": 1.3702, |
| "step": 14950 |
| }, |
| { |
| "epoch": 0.06184003025862443, |
| "grad_norm": 3.5010199408218305, |
| "learning_rate": 1.9865011726502118e-05, |
| "loss": 1.3515, |
| "step": 14960 |
| }, |
| { |
| "epoch": 0.06188136717724651, |
| "grad_norm": 3.521969897290798, |
| "learning_rate": 1.9864796835004184e-05, |
| "loss": 1.3562, |
| "step": 14970 |
| }, |
| { |
| "epoch": 0.06192270409586858, |
| "grad_norm": 3.198260891262558, |
| "learning_rate": 1.986458177376061e-05, |
| "loss": 1.3265, |
| "step": 14980 |
| }, |
| { |
| "epoch": 0.061964041014490656, |
| "grad_norm": 4.203288617287408, |
| "learning_rate": 1.9864366542775104e-05, |
| "loss": 1.3445, |
| "step": 14990 |
| }, |
| { |
| "epoch": 0.06200537793311273, |
| "grad_norm": 3.0231024395080204, |
| "learning_rate": 1.9864151142051367e-05, |
| "loss": 1.3437, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.06204671485173481, |
| "grad_norm": 3.375301699368925, |
| "learning_rate": 1.9863935571593104e-05, |
| "loss": 1.3587, |
| "step": 15010 |
| }, |
| { |
| "epoch": 0.06208805177035688, |
| "grad_norm": 3.3942212627441433, |
| "learning_rate": 1.986371983140403e-05, |
| "loss": 1.3574, |
| "step": 15020 |
| }, |
| { |
| "epoch": 0.06212938868897896, |
| "grad_norm": 2.8229502868870906, |
| "learning_rate": 1.986350392148785e-05, |
| "loss": 1.3252, |
| "step": 15030 |
| }, |
| { |
| "epoch": 0.062170725607601035, |
| "grad_norm": 3.531378883228978, |
| "learning_rate": 1.9863287841848283e-05, |
| "loss": 1.3284, |
| "step": 15040 |
| }, |
| { |
| "epoch": 0.062212062526223105, |
| "grad_norm": 3.4111136482320057, |
| "learning_rate": 1.986307159248905e-05, |
| "loss": 1.3503, |
| "step": 15050 |
| }, |
| { |
| "epoch": 0.06225339944484518, |
| "grad_norm": 3.1924697026460525, |
| "learning_rate": 1.9862855173413864e-05, |
| "loss": 1.3316, |
| "step": 15060 |
| }, |
| { |
| "epoch": 0.06229473636346726, |
| "grad_norm": 3.673294835187914, |
| "learning_rate": 1.9862638584626456e-05, |
| "loss": 1.378, |
| "step": 15070 |
| }, |
| { |
| "epoch": 0.06233607328208933, |
| "grad_norm": 2.944502490931273, |
| "learning_rate": 1.9862421826130548e-05, |
| "loss": 1.3505, |
| "step": 15080 |
| }, |
| { |
| "epoch": 0.06237741020071141, |
| "grad_norm": 3.166457773127537, |
| "learning_rate": 1.9862204897929875e-05, |
| "loss": 1.3274, |
| "step": 15090 |
| }, |
| { |
| "epoch": 0.062418747119333484, |
| "grad_norm": 3.539140440652841, |
| "learning_rate": 1.9861987800028167e-05, |
| "loss": 1.3373, |
| "step": 15100 |
| }, |
| { |
| "epoch": 0.06246008403795556, |
| "grad_norm": 4.111976580182342, |
| "learning_rate": 1.986177053242916e-05, |
| "loss": 1.3795, |
| "step": 15110 |
| }, |
| { |
| "epoch": 0.06250142095657764, |
| "grad_norm": 3.518730561598167, |
| "learning_rate": 1.986155309513659e-05, |
| "loss": 1.3284, |
| "step": 15120 |
| }, |
| { |
| "epoch": 0.06254275787519971, |
| "grad_norm": 3.391425624844218, |
| "learning_rate": 1.9861335488154206e-05, |
| "loss": 1.3587, |
| "step": 15130 |
| }, |
| { |
| "epoch": 0.06258409479382178, |
| "grad_norm": 3.4961442083301097, |
| "learning_rate": 1.9861117711485743e-05, |
| "loss": 1.399, |
| "step": 15140 |
| }, |
| { |
| "epoch": 0.06262543171244386, |
| "grad_norm": 2.7302506739470784, |
| "learning_rate": 1.9860899765134953e-05, |
| "loss": 1.3654, |
| "step": 15150 |
| }, |
| { |
| "epoch": 0.06266676863106593, |
| "grad_norm": 4.411982690984872, |
| "learning_rate": 1.9860681649105585e-05, |
| "loss": 1.3409, |
| "step": 15160 |
| }, |
| { |
| "epoch": 0.062708105549688, |
| "grad_norm": 3.671422586435062, |
| "learning_rate": 1.9860463363401393e-05, |
| "loss": 1.3629, |
| "step": 15170 |
| }, |
| { |
| "epoch": 0.06274944246831009, |
| "grad_norm": 3.599702261798689, |
| "learning_rate": 1.9860244908026133e-05, |
| "loss": 1.3464, |
| "step": 15180 |
| }, |
| { |
| "epoch": 0.06279077938693216, |
| "grad_norm": 3.6962151021925598, |
| "learning_rate": 1.9860026282983568e-05, |
| "loss": 1.362, |
| "step": 15190 |
| }, |
| { |
| "epoch": 0.06283211630555424, |
| "grad_norm": 3.5740776610173284, |
| "learning_rate": 1.9859807488277453e-05, |
| "loss": 1.3657, |
| "step": 15200 |
| }, |
| { |
| "epoch": 0.06287345322417631, |
| "grad_norm": 3.437476924912017, |
| "learning_rate": 1.9859588523911554e-05, |
| "loss": 1.3384, |
| "step": 15210 |
| }, |
| { |
| "epoch": 0.06291479014279838, |
| "grad_norm": 3.2699157716406373, |
| "learning_rate": 1.9859369389889642e-05, |
| "loss": 1.3658, |
| "step": 15220 |
| }, |
| { |
| "epoch": 0.06295612706142047, |
| "grad_norm": 3.123305029392182, |
| "learning_rate": 1.9859150086215487e-05, |
| "loss": 1.352, |
| "step": 15230 |
| }, |
| { |
| "epoch": 0.06299746398004254, |
| "grad_norm": 3.2830928690839354, |
| "learning_rate": 1.985893061289286e-05, |
| "loss": 1.3799, |
| "step": 15240 |
| }, |
| { |
| "epoch": 0.06303880089866461, |
| "grad_norm": 3.664715842390073, |
| "learning_rate": 1.9858710969925547e-05, |
| "loss": 1.3669, |
| "step": 15250 |
| }, |
| { |
| "epoch": 0.06308013781728669, |
| "grad_norm": 3.2013150011378952, |
| "learning_rate": 1.985849115731731e-05, |
| "loss": 1.3403, |
| "step": 15260 |
| }, |
| { |
| "epoch": 0.06312147473590876, |
| "grad_norm": 3.1107330520735643, |
| "learning_rate": 1.9858271175071946e-05, |
| "loss": 1.348, |
| "step": 15270 |
| }, |
| { |
| "epoch": 0.06316281165453083, |
| "grad_norm": 3.188091096811774, |
| "learning_rate": 1.9858051023193234e-05, |
| "loss": 1.3219, |
| "step": 15280 |
| }, |
| { |
| "epoch": 0.06320414857315292, |
| "grad_norm": 3.1313626953852705, |
| "learning_rate": 1.9857830701684967e-05, |
| "loss": 1.3622, |
| "step": 15290 |
| }, |
| { |
| "epoch": 0.06324548549177499, |
| "grad_norm": 3.1702519850910127, |
| "learning_rate": 1.985761021055093e-05, |
| "loss": 1.3546, |
| "step": 15300 |
| }, |
| { |
| "epoch": 0.06328682241039706, |
| "grad_norm": 3.252596544143132, |
| "learning_rate": 1.9857389549794917e-05, |
| "loss": 1.2552, |
| "step": 15310 |
| }, |
| { |
| "epoch": 0.06332815932901914, |
| "grad_norm": 3.0313555049861374, |
| "learning_rate": 1.985716871942073e-05, |
| "loss": 1.4033, |
| "step": 15320 |
| }, |
| { |
| "epoch": 0.06336949624764121, |
| "grad_norm": 3.4443767622891115, |
| "learning_rate": 1.985694771943217e-05, |
| "loss": 1.3893, |
| "step": 15330 |
| }, |
| { |
| "epoch": 0.06341083316626328, |
| "grad_norm": 3.616984395155513, |
| "learning_rate": 1.9856726549833034e-05, |
| "loss": 1.3499, |
| "step": 15340 |
| }, |
| { |
| "epoch": 0.06345217008488536, |
| "grad_norm": 3.50828559499885, |
| "learning_rate": 1.985650521062713e-05, |
| "loss": 1.3298, |
| "step": 15350 |
| }, |
| { |
| "epoch": 0.06349350700350744, |
| "grad_norm": 2.8467914617151244, |
| "learning_rate": 1.9856283701818268e-05, |
| "loss": 1.3307, |
| "step": 15360 |
| }, |
| { |
| "epoch": 0.06353484392212952, |
| "grad_norm": 4.069010272907543, |
| "learning_rate": 1.9856062023410257e-05, |
| "loss": 1.3341, |
| "step": 15370 |
| }, |
| { |
| "epoch": 0.06357618084075159, |
| "grad_norm": 3.554716298604244, |
| "learning_rate": 1.985584017540691e-05, |
| "loss": 1.3366, |
| "step": 15380 |
| }, |
| { |
| "epoch": 0.06361751775937366, |
| "grad_norm": 3.2437195929255123, |
| "learning_rate": 1.985561815781205e-05, |
| "loss": 1.3241, |
| "step": 15390 |
| }, |
| { |
| "epoch": 0.06365885467799574, |
| "grad_norm": 3.0814014462588495, |
| "learning_rate": 1.9855395970629497e-05, |
| "loss": 1.3086, |
| "step": 15400 |
| }, |
| { |
| "epoch": 0.06370019159661781, |
| "grad_norm": 2.845186766341529, |
| "learning_rate": 1.985517361386307e-05, |
| "loss": 1.3302, |
| "step": 15410 |
| }, |
| { |
| "epoch": 0.06374152851523988, |
| "grad_norm": 3.594783956612939, |
| "learning_rate": 1.9854951087516598e-05, |
| "loss": 1.3374, |
| "step": 15420 |
| }, |
| { |
| "epoch": 0.06378286543386197, |
| "grad_norm": 2.8854977362042815, |
| "learning_rate": 1.9854728391593904e-05, |
| "loss": 1.3326, |
| "step": 15430 |
| }, |
| { |
| "epoch": 0.06382420235248404, |
| "grad_norm": 3.5259935395974784, |
| "learning_rate": 1.985450552609883e-05, |
| "loss": 1.3081, |
| "step": 15440 |
| }, |
| { |
| "epoch": 0.06386553927110611, |
| "grad_norm": 3.245306593778846, |
| "learning_rate": 1.9854282491035203e-05, |
| "loss": 1.3746, |
| "step": 15450 |
| }, |
| { |
| "epoch": 0.06390687618972819, |
| "grad_norm": 4.071308549266746, |
| "learning_rate": 1.9854059286406866e-05, |
| "loss": 1.3783, |
| "step": 15460 |
| }, |
| { |
| "epoch": 0.06394821310835026, |
| "grad_norm": 3.247822958885961, |
| "learning_rate": 1.9853835912217657e-05, |
| "loss": 1.3411, |
| "step": 15470 |
| }, |
| { |
| "epoch": 0.06398955002697233, |
| "grad_norm": 3.35088921885468, |
| "learning_rate": 1.9853612368471416e-05, |
| "loss": 1.3769, |
| "step": 15480 |
| }, |
| { |
| "epoch": 0.06403088694559442, |
| "grad_norm": 3.311416405062516, |
| "learning_rate": 1.9853388655171998e-05, |
| "loss": 1.3546, |
| "step": 15490 |
| }, |
| { |
| "epoch": 0.06407222386421649, |
| "grad_norm": 3.423751956404864, |
| "learning_rate": 1.985316477232325e-05, |
| "loss": 1.3082, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.06411356078283857, |
| "grad_norm": 3.5994672319763072, |
| "learning_rate": 1.9852940719929017e-05, |
| "loss": 1.308, |
| "step": 15510 |
| }, |
| { |
| "epoch": 0.06415489770146064, |
| "grad_norm": 6.092109341719527, |
| "learning_rate": 1.9852716497993164e-05, |
| "loss": 1.3333, |
| "step": 15520 |
| }, |
| { |
| "epoch": 0.06419623462008271, |
| "grad_norm": 3.4870247799249574, |
| "learning_rate": 1.985249210651954e-05, |
| "loss": 1.3755, |
| "step": 15530 |
| }, |
| { |
| "epoch": 0.0642375715387048, |
| "grad_norm": 3.1557901036816687, |
| "learning_rate": 1.9852267545512016e-05, |
| "loss": 1.3237, |
| "step": 15540 |
| }, |
| { |
| "epoch": 0.06427890845732687, |
| "grad_norm": 2.9168599988216655, |
| "learning_rate": 1.9852042814974448e-05, |
| "loss": 1.3333, |
| "step": 15550 |
| }, |
| { |
| "epoch": 0.06432024537594894, |
| "grad_norm": 3.658350430782456, |
| "learning_rate": 1.9851817914910707e-05, |
| "loss": 1.3157, |
| "step": 15560 |
| }, |
| { |
| "epoch": 0.06436158229457102, |
| "grad_norm": 2.98123289160538, |
| "learning_rate": 1.9851592845324664e-05, |
| "loss": 1.3461, |
| "step": 15570 |
| }, |
| { |
| "epoch": 0.06440291921319309, |
| "grad_norm": 3.2851248740018133, |
| "learning_rate": 1.9851367606220187e-05, |
| "loss": 1.3592, |
| "step": 15580 |
| }, |
| { |
| "epoch": 0.06444425613181516, |
| "grad_norm": 2.874060228764119, |
| "learning_rate": 1.9851142197601157e-05, |
| "loss": 1.3179, |
| "step": 15590 |
| }, |
| { |
| "epoch": 0.06448559305043725, |
| "grad_norm": 3.3595179685654286, |
| "learning_rate": 1.985091661947145e-05, |
| "loss": 1.358, |
| "step": 15600 |
| }, |
| { |
| "epoch": 0.06452692996905932, |
| "grad_norm": 4.343402000280312, |
| "learning_rate": 1.9850690871834945e-05, |
| "loss": 1.3387, |
| "step": 15610 |
| }, |
| { |
| "epoch": 0.06456826688768139, |
| "grad_norm": 4.4028919030557345, |
| "learning_rate": 1.985046495469553e-05, |
| "loss": 1.3405, |
| "step": 15620 |
| }, |
| { |
| "epoch": 0.06460960380630347, |
| "grad_norm": 3.3683021821213077, |
| "learning_rate": 1.9850238868057097e-05, |
| "loss": 1.3164, |
| "step": 15630 |
| }, |
| { |
| "epoch": 0.06465094072492554, |
| "grad_norm": 3.223018835339703, |
| "learning_rate": 1.9850012611923527e-05, |
| "loss": 1.2937, |
| "step": 15640 |
| }, |
| { |
| "epoch": 0.06469227764354761, |
| "grad_norm": 3.6479375571543584, |
| "learning_rate": 1.984978618629872e-05, |
| "loss": 1.3703, |
| "step": 15650 |
| }, |
| { |
| "epoch": 0.0647336145621697, |
| "grad_norm": 3.0694724692776107, |
| "learning_rate": 1.9849559591186566e-05, |
| "loss": 1.3239, |
| "step": 15660 |
| }, |
| { |
| "epoch": 0.06477495148079176, |
| "grad_norm": 3.595788633474915, |
| "learning_rate": 1.984933282659097e-05, |
| "loss": 1.2858, |
| "step": 15670 |
| }, |
| { |
| "epoch": 0.06481628839941385, |
| "grad_norm": 2.996464746206387, |
| "learning_rate": 1.984910589251583e-05, |
| "loss": 1.3677, |
| "step": 15680 |
| }, |
| { |
| "epoch": 0.06485762531803592, |
| "grad_norm": 2.978010213666374, |
| "learning_rate": 1.9848878788965053e-05, |
| "loss": 1.3612, |
| "step": 15690 |
| }, |
| { |
| "epoch": 0.06489896223665799, |
| "grad_norm": 4.107355827588679, |
| "learning_rate": 1.9848651515942545e-05, |
| "loss": 1.3473, |
| "step": 15700 |
| }, |
| { |
| "epoch": 0.06494029915528007, |
| "grad_norm": 3.405862899284087, |
| "learning_rate": 1.984842407345222e-05, |
| "loss": 1.329, |
| "step": 15710 |
| }, |
| { |
| "epoch": 0.06498163607390214, |
| "grad_norm": 3.56806926046013, |
| "learning_rate": 1.984819646149799e-05, |
| "loss": 1.3547, |
| "step": 15720 |
| }, |
| { |
| "epoch": 0.06502297299252421, |
| "grad_norm": 2.9244416633025234, |
| "learning_rate": 1.984796868008377e-05, |
| "loss": 1.3451, |
| "step": 15730 |
| }, |
| { |
| "epoch": 0.0650643099111463, |
| "grad_norm": 3.6605360363216133, |
| "learning_rate": 1.984774072921348e-05, |
| "loss": 1.3121, |
| "step": 15740 |
| }, |
| { |
| "epoch": 0.06510564682976837, |
| "grad_norm": 3.857082230167186, |
| "learning_rate": 1.9847512608891046e-05, |
| "loss": 1.3546, |
| "step": 15750 |
| }, |
| { |
| "epoch": 0.06514698374839044, |
| "grad_norm": 3.20861184076794, |
| "learning_rate": 1.9847284319120386e-05, |
| "loss": 1.3384, |
| "step": 15760 |
| }, |
| { |
| "epoch": 0.06518832066701252, |
| "grad_norm": 4.374471400346774, |
| "learning_rate": 1.9847055859905434e-05, |
| "loss": 1.3603, |
| "step": 15770 |
| }, |
| { |
| "epoch": 0.06522965758563459, |
| "grad_norm": 3.558128348565767, |
| "learning_rate": 1.984682723125012e-05, |
| "loss": 1.3307, |
| "step": 15780 |
| }, |
| { |
| "epoch": 0.06527099450425666, |
| "grad_norm": 3.740975960278226, |
| "learning_rate": 1.984659843315838e-05, |
| "loss": 1.3565, |
| "step": 15790 |
| }, |
| { |
| "epoch": 0.06531233142287875, |
| "grad_norm": 3.0884764960813254, |
| "learning_rate": 1.9846369465634146e-05, |
| "loss": 1.3371, |
| "step": 15800 |
| }, |
| { |
| "epoch": 0.06535366834150082, |
| "grad_norm": 3.0640927344766236, |
| "learning_rate": 1.9846140328681363e-05, |
| "loss": 1.3075, |
| "step": 15810 |
| }, |
| { |
| "epoch": 0.0653950052601229, |
| "grad_norm": 3.6774626803339285, |
| "learning_rate": 1.9845911022303973e-05, |
| "loss": 1.3647, |
| "step": 15820 |
| }, |
| { |
| "epoch": 0.06543634217874497, |
| "grad_norm": 2.9365431211187065, |
| "learning_rate": 1.9845681546505915e-05, |
| "loss": 1.3086, |
| "step": 15830 |
| }, |
| { |
| "epoch": 0.06547767909736704, |
| "grad_norm": 4.305431264385432, |
| "learning_rate": 1.9845451901291145e-05, |
| "loss": 1.3348, |
| "step": 15840 |
| }, |
| { |
| "epoch": 0.06551901601598913, |
| "grad_norm": 3.032533703820296, |
| "learning_rate": 1.9845222086663615e-05, |
| "loss": 1.3527, |
| "step": 15850 |
| }, |
| { |
| "epoch": 0.0655603529346112, |
| "grad_norm": 3.3387798006802782, |
| "learning_rate": 1.9844992102627273e-05, |
| "loss": 1.3249, |
| "step": 15860 |
| }, |
| { |
| "epoch": 0.06560168985323327, |
| "grad_norm": 3.3127539852292363, |
| "learning_rate": 1.9844761949186083e-05, |
| "loss": 1.3323, |
| "step": 15870 |
| }, |
| { |
| "epoch": 0.06564302677185535, |
| "grad_norm": 3.4862694527591307, |
| "learning_rate": 1.9844531626344003e-05, |
| "loss": 1.3224, |
| "step": 15880 |
| }, |
| { |
| "epoch": 0.06568436369047742, |
| "grad_norm": 3.215035735991411, |
| "learning_rate": 1.9844301134104996e-05, |
| "loss": 1.349, |
| "step": 15890 |
| }, |
| { |
| "epoch": 0.06572570060909949, |
| "grad_norm": 3.331575129362213, |
| "learning_rate": 1.9844070472473026e-05, |
| "loss": 1.3297, |
| "step": 15900 |
| }, |
| { |
| "epoch": 0.06576703752772158, |
| "grad_norm": 3.008247289759144, |
| "learning_rate": 1.9843839641452062e-05, |
| "loss": 1.368, |
| "step": 15910 |
| }, |
| { |
| "epoch": 0.06580837444634365, |
| "grad_norm": 3.314210218559566, |
| "learning_rate": 1.984360864104608e-05, |
| "loss": 1.3318, |
| "step": 15920 |
| }, |
| { |
| "epoch": 0.06584971136496572, |
| "grad_norm": 4.460695903393647, |
| "learning_rate": 1.9843377471259056e-05, |
| "loss": 1.363, |
| "step": 15930 |
| }, |
| { |
| "epoch": 0.0658910482835878, |
| "grad_norm": 3.591421453277731, |
| "learning_rate": 1.984314613209496e-05, |
| "loss": 1.3428, |
| "step": 15940 |
| }, |
| { |
| "epoch": 0.06593238520220987, |
| "grad_norm": 3.708262991759124, |
| "learning_rate": 1.984291462355778e-05, |
| "loss": 1.3564, |
| "step": 15950 |
| }, |
| { |
| "epoch": 0.06597372212083194, |
| "grad_norm": 3.1432924399561903, |
| "learning_rate": 1.9842682945651495e-05, |
| "loss": 1.3455, |
| "step": 15960 |
| }, |
| { |
| "epoch": 0.06601505903945402, |
| "grad_norm": 3.1161735074970216, |
| "learning_rate": 1.9842451098380096e-05, |
| "loss": 1.3514, |
| "step": 15970 |
| }, |
| { |
| "epoch": 0.0660563959580761, |
| "grad_norm": 2.8632026794139875, |
| "learning_rate": 1.984221908174757e-05, |
| "loss": 1.3446, |
| "step": 15980 |
| }, |
| { |
| "epoch": 0.06609773287669818, |
| "grad_norm": 3.934735081002441, |
| "learning_rate": 1.9841986895757907e-05, |
| "loss": 1.3298, |
| "step": 15990 |
| }, |
| { |
| "epoch": 0.06613906979532025, |
| "grad_norm": 4.324584533198296, |
| "learning_rate": 1.9841754540415102e-05, |
| "loss": 1.3537, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.06618040671394232, |
| "grad_norm": 3.6119231745645166, |
| "learning_rate": 1.9841522015723164e-05, |
| "loss": 1.3343, |
| "step": 16010 |
| }, |
| { |
| "epoch": 0.0662217436325644, |
| "grad_norm": 3.1193540137010887, |
| "learning_rate": 1.984128932168608e-05, |
| "loss": 1.3752, |
| "step": 16020 |
| }, |
| { |
| "epoch": 0.06626308055118647, |
| "grad_norm": 3.1351192552066762, |
| "learning_rate": 1.984105645830786e-05, |
| "loss": 1.3289, |
| "step": 16030 |
| }, |
| { |
| "epoch": 0.06630441746980854, |
| "grad_norm": 3.5610679476787737, |
| "learning_rate": 1.9840823425592512e-05, |
| "loss": 1.3543, |
| "step": 16040 |
| }, |
| { |
| "epoch": 0.06634575438843063, |
| "grad_norm": 3.5677181048737943, |
| "learning_rate": 1.984059022354404e-05, |
| "loss": 1.3483, |
| "step": 16050 |
| }, |
| { |
| "epoch": 0.0663870913070527, |
| "grad_norm": 3.6239317747912385, |
| "learning_rate": 1.9840356852166465e-05, |
| "loss": 1.3511, |
| "step": 16060 |
| }, |
| { |
| "epoch": 0.06642842822567477, |
| "grad_norm": 3.7952366513012636, |
| "learning_rate": 1.9840123311463803e-05, |
| "loss": 1.33, |
| "step": 16070 |
| }, |
| { |
| "epoch": 0.06646976514429685, |
| "grad_norm": 3.345456868567261, |
| "learning_rate": 1.9839889601440064e-05, |
| "loss": 1.3226, |
| "step": 16080 |
| }, |
| { |
| "epoch": 0.06651110206291892, |
| "grad_norm": 3.0646963347861194, |
| "learning_rate": 1.9839655722099277e-05, |
| "loss": 1.3142, |
| "step": 16090 |
| }, |
| { |
| "epoch": 0.06655243898154099, |
| "grad_norm": 3.1796454557601956, |
| "learning_rate": 1.9839421673445457e-05, |
| "loss": 1.3363, |
| "step": 16100 |
| }, |
| { |
| "epoch": 0.06659377590016308, |
| "grad_norm": 2.7604488860103453, |
| "learning_rate": 1.9839187455482646e-05, |
| "loss": 1.3453, |
| "step": 16110 |
| }, |
| { |
| "epoch": 0.06663511281878515, |
| "grad_norm": 3.1473856534090885, |
| "learning_rate": 1.9838953068214862e-05, |
| "loss": 1.3146, |
| "step": 16120 |
| }, |
| { |
| "epoch": 0.06667644973740723, |
| "grad_norm": 3.323185903362043, |
| "learning_rate": 1.983871851164614e-05, |
| "loss": 1.3013, |
| "step": 16130 |
| }, |
| { |
| "epoch": 0.0667177866560293, |
| "grad_norm": 3.0139283592638315, |
| "learning_rate": 1.9838483785780522e-05, |
| "loss": 1.3761, |
| "step": 16140 |
| }, |
| { |
| "epoch": 0.06675912357465137, |
| "grad_norm": 3.2332451066710535, |
| "learning_rate": 1.9838248890622043e-05, |
| "loss": 1.341, |
| "step": 16150 |
| }, |
| { |
| "epoch": 0.06680046049327346, |
| "grad_norm": 3.0517814224449826, |
| "learning_rate": 1.9838013826174745e-05, |
| "loss": 1.3003, |
| "step": 16160 |
| }, |
| { |
| "epoch": 0.06684179741189553, |
| "grad_norm": 4.164899927708623, |
| "learning_rate": 1.983777859244267e-05, |
| "loss": 1.3247, |
| "step": 16170 |
| }, |
| { |
| "epoch": 0.0668831343305176, |
| "grad_norm": 2.992166731140292, |
| "learning_rate": 1.983754318942987e-05, |
| "loss": 1.2762, |
| "step": 16180 |
| }, |
| { |
| "epoch": 0.06692447124913968, |
| "grad_norm": 3.774796705592006, |
| "learning_rate": 1.98373076171404e-05, |
| "loss": 1.3652, |
| "step": 16190 |
| }, |
| { |
| "epoch": 0.06696580816776175, |
| "grad_norm": 3.6727461962589296, |
| "learning_rate": 1.98370718755783e-05, |
| "loss": 1.3433, |
| "step": 16200 |
| }, |
| { |
| "epoch": 0.06700714508638382, |
| "grad_norm": 2.759830849179559, |
| "learning_rate": 1.983683596474764e-05, |
| "loss": 1.2917, |
| "step": 16210 |
| }, |
| { |
| "epoch": 0.0670484820050059, |
| "grad_norm": 4.142227423223113, |
| "learning_rate": 1.983659988465247e-05, |
| "loss": 1.3287, |
| "step": 16220 |
| }, |
| { |
| "epoch": 0.06708981892362798, |
| "grad_norm": 3.1361138389830305, |
| "learning_rate": 1.9836363635296856e-05, |
| "loss": 1.3526, |
| "step": 16230 |
| }, |
| { |
| "epoch": 0.06713115584225005, |
| "grad_norm": 4.684718067129147, |
| "learning_rate": 1.9836127216684864e-05, |
| "loss": 1.3398, |
| "step": 16240 |
| }, |
| { |
| "epoch": 0.06717249276087213, |
| "grad_norm": 3.9305271840952325, |
| "learning_rate": 1.9835890628820564e-05, |
| "loss": 1.3061, |
| "step": 16250 |
| }, |
| { |
| "epoch": 0.0672138296794942, |
| "grad_norm": 3.6945496877183754, |
| "learning_rate": 1.983565387170802e-05, |
| "loss": 1.3046, |
| "step": 16260 |
| }, |
| { |
| "epoch": 0.06725516659811627, |
| "grad_norm": 2.913410222912495, |
| "learning_rate": 1.983541694535131e-05, |
| "loss": 1.3439, |
| "step": 16270 |
| }, |
| { |
| "epoch": 0.06729650351673835, |
| "grad_norm": 3.4482083464660143, |
| "learning_rate": 1.9835179849754517e-05, |
| "loss": 1.3282, |
| "step": 16280 |
| }, |
| { |
| "epoch": 0.06733784043536042, |
| "grad_norm": 4.311927057182653, |
| "learning_rate": 1.983494258492171e-05, |
| "loss": 1.3156, |
| "step": 16290 |
| }, |
| { |
| "epoch": 0.06737917735398251, |
| "grad_norm": 3.5718375129378015, |
| "learning_rate": 1.9834705150856973e-05, |
| "loss": 1.3088, |
| "step": 16300 |
| }, |
| { |
| "epoch": 0.06742051427260458, |
| "grad_norm": 3.6069557479034704, |
| "learning_rate": 1.98344675475644e-05, |
| "loss": 1.3259, |
| "step": 16310 |
| }, |
| { |
| "epoch": 0.06746185119122665, |
| "grad_norm": 3.2551197753818393, |
| "learning_rate": 1.9834229775048076e-05, |
| "loss": 1.3389, |
| "step": 16320 |
| }, |
| { |
| "epoch": 0.06750318810984873, |
| "grad_norm": 3.4034457092581536, |
| "learning_rate": 1.9833991833312086e-05, |
| "loss": 1.3396, |
| "step": 16330 |
| }, |
| { |
| "epoch": 0.0675445250284708, |
| "grad_norm": 2.9661916406960858, |
| "learning_rate": 1.9833753722360534e-05, |
| "loss": 1.2989, |
| "step": 16340 |
| }, |
| { |
| "epoch": 0.06758586194709287, |
| "grad_norm": 5.279255386120152, |
| "learning_rate": 1.983351544219751e-05, |
| "loss": 1.3283, |
| "step": 16350 |
| }, |
| { |
| "epoch": 0.06762719886571496, |
| "grad_norm": 3.8221920564477028, |
| "learning_rate": 1.9833276992827117e-05, |
| "loss": 1.2918, |
| "step": 16360 |
| }, |
| { |
| "epoch": 0.06766853578433703, |
| "grad_norm": 3.2255728423614163, |
| "learning_rate": 1.9833038374253456e-05, |
| "loss": 1.327, |
| "step": 16370 |
| }, |
| { |
| "epoch": 0.0677098727029591, |
| "grad_norm": 3.2355832791038464, |
| "learning_rate": 1.9832799586480637e-05, |
| "loss": 1.3204, |
| "step": 16380 |
| }, |
| { |
| "epoch": 0.06775120962158118, |
| "grad_norm": 3.2467585154210155, |
| "learning_rate": 1.9832560629512767e-05, |
| "loss": 1.3338, |
| "step": 16390 |
| }, |
| { |
| "epoch": 0.06779254654020325, |
| "grad_norm": 4.988429472619141, |
| "learning_rate": 1.9832321503353954e-05, |
| "loss": 1.3876, |
| "step": 16400 |
| }, |
| { |
| "epoch": 0.06783388345882532, |
| "grad_norm": 3.446832489576954, |
| "learning_rate": 1.9832082208008317e-05, |
| "loss": 1.3233, |
| "step": 16410 |
| }, |
| { |
| "epoch": 0.0678752203774474, |
| "grad_norm": 3.605129460226094, |
| "learning_rate": 1.9831842743479975e-05, |
| "loss": 1.3386, |
| "step": 16420 |
| }, |
| { |
| "epoch": 0.06791655729606948, |
| "grad_norm": 4.275587631874799, |
| "learning_rate": 1.9831603109773044e-05, |
| "loss": 1.3613, |
| "step": 16430 |
| }, |
| { |
| "epoch": 0.06795789421469156, |
| "grad_norm": 4.1009269376864665, |
| "learning_rate": 1.983136330689165e-05, |
| "loss": 1.3662, |
| "step": 16440 |
| }, |
| { |
| "epoch": 0.06799923113331363, |
| "grad_norm": 2.9883770968491565, |
| "learning_rate": 1.983112333483992e-05, |
| "loss": 1.3304, |
| "step": 16450 |
| }, |
| { |
| "epoch": 0.0680405680519357, |
| "grad_norm": 2.9575768154010085, |
| "learning_rate": 1.983088319362198e-05, |
| "loss": 1.3075, |
| "step": 16460 |
| }, |
| { |
| "epoch": 0.06808190497055779, |
| "grad_norm": 3.1295830557654147, |
| "learning_rate": 1.9830642883241967e-05, |
| "loss": 1.3311, |
| "step": 16470 |
| }, |
| { |
| "epoch": 0.06812324188917986, |
| "grad_norm": 3.6574005023751774, |
| "learning_rate": 1.9830402403704008e-05, |
| "loss": 1.2925, |
| "step": 16480 |
| }, |
| { |
| "epoch": 0.06816457880780193, |
| "grad_norm": 3.080968609055009, |
| "learning_rate": 1.9830161755012255e-05, |
| "loss": 1.3156, |
| "step": 16490 |
| }, |
| { |
| "epoch": 0.06820591572642401, |
| "grad_norm": 3.0330618488937553, |
| "learning_rate": 1.9829920937170835e-05, |
| "loss": 1.3314, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.06824725264504608, |
| "grad_norm": 3.3116009543610665, |
| "learning_rate": 1.9829679950183895e-05, |
| "loss": 1.3034, |
| "step": 16510 |
| }, |
| { |
| "epoch": 0.06828858956366815, |
| "grad_norm": 3.783202270094679, |
| "learning_rate": 1.9829438794055584e-05, |
| "loss": 1.3313, |
| "step": 16520 |
| }, |
| { |
| "epoch": 0.06832992648229023, |
| "grad_norm": 3.969686559597743, |
| "learning_rate": 1.9829197468790054e-05, |
| "loss": 1.2911, |
| "step": 16530 |
| }, |
| { |
| "epoch": 0.0683712634009123, |
| "grad_norm": 3.580580495241376, |
| "learning_rate": 1.9828955974391455e-05, |
| "loss": 1.2912, |
| "step": 16540 |
| }, |
| { |
| "epoch": 0.06841260031953438, |
| "grad_norm": 2.9992747255679033, |
| "learning_rate": 1.982871431086394e-05, |
| "loss": 1.302, |
| "step": 16550 |
| }, |
| { |
| "epoch": 0.06845393723815646, |
| "grad_norm": 3.5773285600866256, |
| "learning_rate": 1.9828472478211673e-05, |
| "loss": 1.334, |
| "step": 16560 |
| }, |
| { |
| "epoch": 0.06849527415677853, |
| "grad_norm": 3.647217700504523, |
| "learning_rate": 1.982823047643881e-05, |
| "loss": 1.3137, |
| "step": 16570 |
| }, |
| { |
| "epoch": 0.0685366110754006, |
| "grad_norm": 3.7929582820880428, |
| "learning_rate": 1.982798830554952e-05, |
| "loss": 1.3598, |
| "step": 16580 |
| }, |
| { |
| "epoch": 0.06857794799402268, |
| "grad_norm": 3.3921261114157817, |
| "learning_rate": 1.982774596554796e-05, |
| "loss": 1.3159, |
| "step": 16590 |
| }, |
| { |
| "epoch": 0.06861928491264475, |
| "grad_norm": 3.948788035775027, |
| "learning_rate": 1.9827503456438314e-05, |
| "loss": 1.3487, |
| "step": 16600 |
| }, |
| { |
| "epoch": 0.06866062183126684, |
| "grad_norm": 3.4434803120058595, |
| "learning_rate": 1.9827260778224744e-05, |
| "loss": 1.3611, |
| "step": 16610 |
| }, |
| { |
| "epoch": 0.06870195874988891, |
| "grad_norm": 3.345318165402726, |
| "learning_rate": 1.9827017930911433e-05, |
| "loss": 1.3214, |
| "step": 16620 |
| }, |
| { |
| "epoch": 0.06874329566851098, |
| "grad_norm": 3.3238354847739675, |
| "learning_rate": 1.9826774914502554e-05, |
| "loss": 1.3415, |
| "step": 16630 |
| }, |
| { |
| "epoch": 0.06878463258713306, |
| "grad_norm": 3.3341319261133773, |
| "learning_rate": 1.9826531729002293e-05, |
| "loss": 1.2814, |
| "step": 16640 |
| }, |
| { |
| "epoch": 0.06882596950575513, |
| "grad_norm": 3.2557833923669937, |
| "learning_rate": 1.982628837441483e-05, |
| "loss": 1.3692, |
| "step": 16650 |
| }, |
| { |
| "epoch": 0.0688673064243772, |
| "grad_norm": 2.7035167555145545, |
| "learning_rate": 1.9826044850744358e-05, |
| "loss": 1.3045, |
| "step": 16660 |
| }, |
| { |
| "epoch": 0.06890864334299929, |
| "grad_norm": 3.2563919103242167, |
| "learning_rate": 1.9825801157995065e-05, |
| "loss": 1.2807, |
| "step": 16670 |
| }, |
| { |
| "epoch": 0.06894998026162136, |
| "grad_norm": 3.369904458766467, |
| "learning_rate": 1.9825557296171143e-05, |
| "loss": 1.2897, |
| "step": 16680 |
| }, |
| { |
| "epoch": 0.06899131718024343, |
| "grad_norm": 3.064586151511127, |
| "learning_rate": 1.982531326527679e-05, |
| "loss": 1.3332, |
| "step": 16690 |
| }, |
| { |
| "epoch": 0.06903265409886551, |
| "grad_norm": 3.0263447019462753, |
| "learning_rate": 1.9825069065316204e-05, |
| "loss": 1.2825, |
| "step": 16700 |
| }, |
| { |
| "epoch": 0.06907399101748758, |
| "grad_norm": 4.065735171694178, |
| "learning_rate": 1.9824824696293584e-05, |
| "loss": 1.2698, |
| "step": 16710 |
| }, |
| { |
| "epoch": 0.06911532793610965, |
| "grad_norm": 3.4683529768298262, |
| "learning_rate": 1.9824580158213142e-05, |
| "loss": 1.3135, |
| "step": 16720 |
| }, |
| { |
| "epoch": 0.06915666485473174, |
| "grad_norm": 2.955425396507506, |
| "learning_rate": 1.9824335451079083e-05, |
| "loss": 1.3571, |
| "step": 16730 |
| }, |
| { |
| "epoch": 0.0691980017733538, |
| "grad_norm": 3.099433278967918, |
| "learning_rate": 1.982409057489561e-05, |
| "loss": 1.3273, |
| "step": 16740 |
| }, |
| { |
| "epoch": 0.06923933869197589, |
| "grad_norm": 2.8197838698402093, |
| "learning_rate": 1.982384552966695e-05, |
| "loss": 1.3158, |
| "step": 16750 |
| }, |
| { |
| "epoch": 0.06928067561059796, |
| "grad_norm": 3.422610473722703, |
| "learning_rate": 1.982360031539731e-05, |
| "loss": 1.3072, |
| "step": 16760 |
| }, |
| { |
| "epoch": 0.06932201252922003, |
| "grad_norm": 3.654024548878873, |
| "learning_rate": 1.9823354932090913e-05, |
| "loss": 1.3174, |
| "step": 16770 |
| }, |
| { |
| "epoch": 0.06936334944784212, |
| "grad_norm": 2.69963783380391, |
| "learning_rate": 1.982310937975198e-05, |
| "loss": 1.301, |
| "step": 16780 |
| }, |
| { |
| "epoch": 0.06940468636646419, |
| "grad_norm": 3.119208859314543, |
| "learning_rate": 1.9822863658384736e-05, |
| "loss": 1.2915, |
| "step": 16790 |
| }, |
| { |
| "epoch": 0.06944602328508626, |
| "grad_norm": 3.983735003830943, |
| "learning_rate": 1.982261776799341e-05, |
| "loss": 1.3029, |
| "step": 16800 |
| }, |
| { |
| "epoch": 0.06948736020370834, |
| "grad_norm": 2.8833424065520292, |
| "learning_rate": 1.9822371708582236e-05, |
| "loss": 1.309, |
| "step": 16810 |
| }, |
| { |
| "epoch": 0.06952869712233041, |
| "grad_norm": 3.5651047449269138, |
| "learning_rate": 1.9822125480155442e-05, |
| "loss": 1.3408, |
| "step": 16820 |
| }, |
| { |
| "epoch": 0.06957003404095248, |
| "grad_norm": 2.9084817767691344, |
| "learning_rate": 1.982187908271727e-05, |
| "loss": 1.3217, |
| "step": 16830 |
| }, |
| { |
| "epoch": 0.06961137095957456, |
| "grad_norm": 3.38644936958733, |
| "learning_rate": 1.982163251627196e-05, |
| "loss": 1.3094, |
| "step": 16840 |
| }, |
| { |
| "epoch": 0.06965270787819663, |
| "grad_norm": 3.7040548837026384, |
| "learning_rate": 1.9821385780823748e-05, |
| "loss": 1.2973, |
| "step": 16850 |
| }, |
| { |
| "epoch": 0.0696940447968187, |
| "grad_norm": 3.1936456120357435, |
| "learning_rate": 1.982113887637689e-05, |
| "loss": 1.3362, |
| "step": 16860 |
| }, |
| { |
| "epoch": 0.06973538171544079, |
| "grad_norm": 3.676850292318552, |
| "learning_rate": 1.9820891802935623e-05, |
| "loss": 1.2947, |
| "step": 16870 |
| }, |
| { |
| "epoch": 0.06977671863406286, |
| "grad_norm": 3.484703646839946, |
| "learning_rate": 1.9820644560504207e-05, |
| "loss": 1.2488, |
| "step": 16880 |
| }, |
| { |
| "epoch": 0.06981805555268493, |
| "grad_norm": 3.4336203726483654, |
| "learning_rate": 1.9820397149086892e-05, |
| "loss": 1.3372, |
| "step": 16890 |
| }, |
| { |
| "epoch": 0.06985939247130701, |
| "grad_norm": 3.4895422854739437, |
| "learning_rate": 1.9820149568687937e-05, |
| "loss": 1.3434, |
| "step": 16900 |
| }, |
| { |
| "epoch": 0.06990072938992908, |
| "grad_norm": 3.5294671703855545, |
| "learning_rate": 1.98199018193116e-05, |
| "loss": 1.3299, |
| "step": 16910 |
| }, |
| { |
| "epoch": 0.06994206630855117, |
| "grad_norm": 4.136918538506862, |
| "learning_rate": 1.9819653900962153e-05, |
| "loss": 1.3082, |
| "step": 16920 |
| }, |
| { |
| "epoch": 0.06998340322717324, |
| "grad_norm": 3.1337022113010975, |
| "learning_rate": 1.981940581364385e-05, |
| "loss": 1.3131, |
| "step": 16930 |
| }, |
| { |
| "epoch": 0.07002474014579531, |
| "grad_norm": 3.3461906954820115, |
| "learning_rate": 1.9819157557360965e-05, |
| "loss": 1.3533, |
| "step": 16940 |
| }, |
| { |
| "epoch": 0.07006607706441739, |
| "grad_norm": 2.945652884357033, |
| "learning_rate": 1.981890913211777e-05, |
| "loss": 1.333, |
| "step": 16950 |
| }, |
| { |
| "epoch": 0.07010741398303946, |
| "grad_norm": 3.8269320260288375, |
| "learning_rate": 1.981866053791854e-05, |
| "loss": 1.3021, |
| "step": 16960 |
| }, |
| { |
| "epoch": 0.07014875090166153, |
| "grad_norm": 3.373539442185096, |
| "learning_rate": 1.9818411774767555e-05, |
| "loss": 1.3621, |
| "step": 16970 |
| }, |
| { |
| "epoch": 0.07019008782028362, |
| "grad_norm": 3.1789158671874724, |
| "learning_rate": 1.9818162842669087e-05, |
| "loss": 1.3357, |
| "step": 16980 |
| }, |
| { |
| "epoch": 0.07023142473890569, |
| "grad_norm": 3.379334004765994, |
| "learning_rate": 1.981791374162743e-05, |
| "loss": 1.3328, |
| "step": 16990 |
| }, |
| { |
| "epoch": 0.07027276165752776, |
| "grad_norm": 3.3263838295875794, |
| "learning_rate": 1.981766447164686e-05, |
| "loss": 1.3177, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.07031409857614984, |
| "grad_norm": 3.4846748393089744, |
| "learning_rate": 1.9817415032731676e-05, |
| "loss": 1.3088, |
| "step": 17010 |
| }, |
| { |
| "epoch": 0.07035543549477191, |
| "grad_norm": 3.1166768241545526, |
| "learning_rate": 1.9817165424886165e-05, |
| "loss": 1.3168, |
| "step": 17020 |
| }, |
| { |
| "epoch": 0.07039677241339398, |
| "grad_norm": 3.880062668262267, |
| "learning_rate": 1.9816915648114623e-05, |
| "loss": 1.3071, |
| "step": 17030 |
| }, |
| { |
| "epoch": 0.07043810933201607, |
| "grad_norm": 3.511807529042466, |
| "learning_rate": 1.9816665702421344e-05, |
| "loss": 1.3409, |
| "step": 17040 |
| }, |
| { |
| "epoch": 0.07047944625063814, |
| "grad_norm": 3.1993990782018256, |
| "learning_rate": 1.9816415587810636e-05, |
| "loss": 1.2918, |
| "step": 17050 |
| }, |
| { |
| "epoch": 0.07052078316926022, |
| "grad_norm": 2.9930648253290943, |
| "learning_rate": 1.98161653042868e-05, |
| "loss": 1.2926, |
| "step": 17060 |
| }, |
| { |
| "epoch": 0.07056212008788229, |
| "grad_norm": 3.0399828063698116, |
| "learning_rate": 1.981591485185414e-05, |
| "loss": 1.3183, |
| "step": 17070 |
| }, |
| { |
| "epoch": 0.07060345700650436, |
| "grad_norm": 3.233401985951665, |
| "learning_rate": 1.981566423051697e-05, |
| "loss": 1.3262, |
| "step": 17080 |
| }, |
| { |
| "epoch": 0.07064479392512645, |
| "grad_norm": 2.891852783093535, |
| "learning_rate": 1.9815413440279597e-05, |
| "loss": 1.2882, |
| "step": 17090 |
| }, |
| { |
| "epoch": 0.07068613084374852, |
| "grad_norm": 2.861582978848217, |
| "learning_rate": 1.9815162481146345e-05, |
| "loss": 1.3417, |
| "step": 17100 |
| }, |
| { |
| "epoch": 0.07072746776237059, |
| "grad_norm": 3.657647746254238, |
| "learning_rate": 1.981491135312152e-05, |
| "loss": 1.3239, |
| "step": 17110 |
| }, |
| { |
| "epoch": 0.07076880468099267, |
| "grad_norm": 3.422931095645904, |
| "learning_rate": 1.9814660056209454e-05, |
| "loss": 1.3471, |
| "step": 17120 |
| }, |
| { |
| "epoch": 0.07081014159961474, |
| "grad_norm": 3.233056855771981, |
| "learning_rate": 1.9814408590414466e-05, |
| "loss": 1.342, |
| "step": 17130 |
| }, |
| { |
| "epoch": 0.07085147851823681, |
| "grad_norm": 2.8978002673834706, |
| "learning_rate": 1.9814156955740885e-05, |
| "loss": 1.3526, |
| "step": 17140 |
| }, |
| { |
| "epoch": 0.0708928154368589, |
| "grad_norm": 3.1044166179845227, |
| "learning_rate": 1.981390515219304e-05, |
| "loss": 1.3338, |
| "step": 17150 |
| }, |
| { |
| "epoch": 0.07093415235548096, |
| "grad_norm": 3.4443852171387963, |
| "learning_rate": 1.9813653179775263e-05, |
| "loss": 1.2798, |
| "step": 17160 |
| }, |
| { |
| "epoch": 0.07097548927410303, |
| "grad_norm": 3.4197392236572925, |
| "learning_rate": 1.9813401038491893e-05, |
| "loss": 1.3278, |
| "step": 17170 |
| }, |
| { |
| "epoch": 0.07101682619272512, |
| "grad_norm": 2.8656903185698868, |
| "learning_rate": 1.9813148728347263e-05, |
| "loss": 1.3044, |
| "step": 17180 |
| }, |
| { |
| "epoch": 0.07105816311134719, |
| "grad_norm": 3.4951853787951457, |
| "learning_rate": 1.981289624934572e-05, |
| "loss": 1.3731, |
| "step": 17190 |
| }, |
| { |
| "epoch": 0.07109950002996926, |
| "grad_norm": 3.0021246003594646, |
| "learning_rate": 1.981264360149161e-05, |
| "loss": 1.2953, |
| "step": 17200 |
| }, |
| { |
| "epoch": 0.07114083694859134, |
| "grad_norm": 3.078524102988392, |
| "learning_rate": 1.981239078478927e-05, |
| "loss": 1.3536, |
| "step": 17210 |
| }, |
| { |
| "epoch": 0.07118217386721341, |
| "grad_norm": 3.6935463858453805, |
| "learning_rate": 1.981213779924306e-05, |
| "loss": 1.3386, |
| "step": 17220 |
| }, |
| { |
| "epoch": 0.0712235107858355, |
| "grad_norm": 3.2508773899525036, |
| "learning_rate": 1.9811884644857332e-05, |
| "loss": 1.2929, |
| "step": 17230 |
| }, |
| { |
| "epoch": 0.07126484770445757, |
| "grad_norm": 4.11272659457257, |
| "learning_rate": 1.9811631321636438e-05, |
| "loss": 1.3376, |
| "step": 17240 |
| }, |
| { |
| "epoch": 0.07130618462307964, |
| "grad_norm": 3.3031386708100734, |
| "learning_rate": 1.9811377829584738e-05, |
| "loss": 1.3078, |
| "step": 17250 |
| }, |
| { |
| "epoch": 0.07134752154170172, |
| "grad_norm": 3.721567213304264, |
| "learning_rate": 1.9811124168706598e-05, |
| "loss": 1.3135, |
| "step": 17260 |
| }, |
| { |
| "epoch": 0.07138885846032379, |
| "grad_norm": 3.3077256345455712, |
| "learning_rate": 1.981087033900638e-05, |
| "loss": 1.2727, |
| "step": 17270 |
| }, |
| { |
| "epoch": 0.07143019537894586, |
| "grad_norm": 3.1365945025331716, |
| "learning_rate": 1.9810616340488448e-05, |
| "loss": 1.3082, |
| "step": 17280 |
| }, |
| { |
| "epoch": 0.07147153229756795, |
| "grad_norm": 3.0095750827782095, |
| "learning_rate": 1.981036217315718e-05, |
| "loss": 1.3422, |
| "step": 17290 |
| }, |
| { |
| "epoch": 0.07151286921619002, |
| "grad_norm": 3.879458456856346, |
| "learning_rate": 1.9810107837016943e-05, |
| "loss": 1.3444, |
| "step": 17300 |
| }, |
| { |
| "epoch": 0.07155420613481209, |
| "grad_norm": 3.5419514306232145, |
| "learning_rate": 1.9809853332072118e-05, |
| "loss": 1.3495, |
| "step": 17310 |
| }, |
| { |
| "epoch": 0.07159554305343417, |
| "grad_norm": 3.309317975252579, |
| "learning_rate": 1.9809598658327084e-05, |
| "loss": 1.3249, |
| "step": 17320 |
| }, |
| { |
| "epoch": 0.07163687997205624, |
| "grad_norm": 3.0972043541501484, |
| "learning_rate": 1.9809343815786218e-05, |
| "loss": 1.3289, |
| "step": 17330 |
| }, |
| { |
| "epoch": 0.07167821689067831, |
| "grad_norm": 3.278786808325931, |
| "learning_rate": 1.9809088804453913e-05, |
| "loss": 1.2998, |
| "step": 17340 |
| }, |
| { |
| "epoch": 0.0717195538093004, |
| "grad_norm": 4.238835899003122, |
| "learning_rate": 1.9808833624334547e-05, |
| "loss": 1.3311, |
| "step": 17350 |
| }, |
| { |
| "epoch": 0.07176089072792247, |
| "grad_norm": 2.92277331992324, |
| "learning_rate": 1.980857827543252e-05, |
| "loss": 1.2915, |
| "step": 17360 |
| }, |
| { |
| "epoch": 0.07180222764654455, |
| "grad_norm": 3.376501582542177, |
| "learning_rate": 1.9808322757752227e-05, |
| "loss": 1.3324, |
| "step": 17370 |
| }, |
| { |
| "epoch": 0.07184356456516662, |
| "grad_norm": 2.9710666955071754, |
| "learning_rate": 1.9808067071298057e-05, |
| "loss": 1.28, |
| "step": 17380 |
| }, |
| { |
| "epoch": 0.07188490148378869, |
| "grad_norm": 3.651318543567687, |
| "learning_rate": 1.9807811216074412e-05, |
| "loss": 1.2827, |
| "step": 17390 |
| }, |
| { |
| "epoch": 0.07192623840241077, |
| "grad_norm": 4.002270792783564, |
| "learning_rate": 1.9807555192085697e-05, |
| "loss": 1.2869, |
| "step": 17400 |
| }, |
| { |
| "epoch": 0.07196757532103285, |
| "grad_norm": 3.7494249341770027, |
| "learning_rate": 1.9807298999336316e-05, |
| "loss": 1.368, |
| "step": 17410 |
| }, |
| { |
| "epoch": 0.07200891223965492, |
| "grad_norm": 3.533387599209301, |
| "learning_rate": 1.9807042637830677e-05, |
| "loss": 1.3093, |
| "step": 17420 |
| }, |
| { |
| "epoch": 0.072050249158277, |
| "grad_norm": 3.3204434550557096, |
| "learning_rate": 1.980678610757319e-05, |
| "loss": 1.3557, |
| "step": 17430 |
| }, |
| { |
| "epoch": 0.07209158607689907, |
| "grad_norm": 3.1416247569905935, |
| "learning_rate": 1.9806529408568274e-05, |
| "loss": 1.2784, |
| "step": 17440 |
| }, |
| { |
| "epoch": 0.07213292299552114, |
| "grad_norm": 3.117796981445201, |
| "learning_rate": 1.980627254082034e-05, |
| "loss": 1.2882, |
| "step": 17450 |
| }, |
| { |
| "epoch": 0.07217425991414322, |
| "grad_norm": 3.13339952835746, |
| "learning_rate": 1.9806015504333812e-05, |
| "loss": 1.3144, |
| "step": 17460 |
| }, |
| { |
| "epoch": 0.0722155968327653, |
| "grad_norm": 3.7609938207052074, |
| "learning_rate": 1.9805758299113115e-05, |
| "loss": 1.329, |
| "step": 17470 |
| }, |
| { |
| "epoch": 0.07225693375138736, |
| "grad_norm": 3.1345386262954533, |
| "learning_rate": 1.980550092516267e-05, |
| "loss": 1.3049, |
| "step": 17480 |
| }, |
| { |
| "epoch": 0.07229827067000945, |
| "grad_norm": 3.7469777473015573, |
| "learning_rate": 1.98052433824869e-05, |
| "loss": 1.3096, |
| "step": 17490 |
| }, |
| { |
| "epoch": 0.07233960758863152, |
| "grad_norm": 2.9933582431757073, |
| "learning_rate": 1.9804985671090252e-05, |
| "loss": 1.3274, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.07238094450725359, |
| "grad_norm": 3.182564552885168, |
| "learning_rate": 1.980472779097715e-05, |
| "loss": 1.2891, |
| "step": 17510 |
| }, |
| { |
| "epoch": 0.07242228142587567, |
| "grad_norm": 3.8813847623233744, |
| "learning_rate": 1.9804469742152035e-05, |
| "loss": 1.2941, |
| "step": 17520 |
| }, |
| { |
| "epoch": 0.07246361834449774, |
| "grad_norm": 3.388309470760254, |
| "learning_rate": 1.9804211524619345e-05, |
| "loss": 1.3129, |
| "step": 17530 |
| }, |
| { |
| "epoch": 0.07250495526311983, |
| "grad_norm": 3.0370861904609305, |
| "learning_rate": 1.9803953138383523e-05, |
| "loss": 1.2816, |
| "step": 17540 |
| }, |
| { |
| "epoch": 0.0725462921817419, |
| "grad_norm": 3.255739551897295, |
| "learning_rate": 1.980369458344902e-05, |
| "loss": 1.3048, |
| "step": 17550 |
| }, |
| { |
| "epoch": 0.07258762910036397, |
| "grad_norm": 3.2197652995956325, |
| "learning_rate": 1.9803435859820278e-05, |
| "loss": 1.3401, |
| "step": 17560 |
| }, |
| { |
| "epoch": 0.07262896601898605, |
| "grad_norm": 3.144102345108867, |
| "learning_rate": 1.9803176967501752e-05, |
| "loss": 1.3093, |
| "step": 17570 |
| }, |
| { |
| "epoch": 0.07267030293760812, |
| "grad_norm": 3.255959441623639, |
| "learning_rate": 1.98029179064979e-05, |
| "loss": 1.3107, |
| "step": 17580 |
| }, |
| { |
| "epoch": 0.07271163985623019, |
| "grad_norm": 3.300836279803978, |
| "learning_rate": 1.9802658676813177e-05, |
| "loss": 1.2793, |
| "step": 17590 |
| }, |
| { |
| "epoch": 0.07275297677485228, |
| "grad_norm": 3.1982961633057307, |
| "learning_rate": 1.980239927845204e-05, |
| "loss": 1.293, |
| "step": 17600 |
| }, |
| { |
| "epoch": 0.07279431369347435, |
| "grad_norm": 2.7735162621143066, |
| "learning_rate": 1.980213971141896e-05, |
| "loss": 1.2829, |
| "step": 17610 |
| }, |
| { |
| "epoch": 0.07283565061209642, |
| "grad_norm": 3.1127853241409524, |
| "learning_rate": 1.9801879975718397e-05, |
| "loss": 1.3038, |
| "step": 17620 |
| }, |
| { |
| "epoch": 0.0728769875307185, |
| "grad_norm": 4.4882400102517, |
| "learning_rate": 1.9801620071354823e-05, |
| "loss": 1.3252, |
| "step": 17630 |
| }, |
| { |
| "epoch": 0.07291832444934057, |
| "grad_norm": 3.0527302986554474, |
| "learning_rate": 1.980135999833271e-05, |
| "loss": 1.3324, |
| "step": 17640 |
| }, |
| { |
| "epoch": 0.07295966136796264, |
| "grad_norm": 2.6769380820391637, |
| "learning_rate": 1.9801099756656534e-05, |
| "loss": 1.3472, |
| "step": 17650 |
| }, |
| { |
| "epoch": 0.07300099828658473, |
| "grad_norm": 2.8240010897597796, |
| "learning_rate": 1.980083934633077e-05, |
| "loss": 1.3072, |
| "step": 17660 |
| }, |
| { |
| "epoch": 0.0730423352052068, |
| "grad_norm": 3.0678813341238387, |
| "learning_rate": 1.9800578767359905e-05, |
| "loss": 1.3385, |
| "step": 17670 |
| }, |
| { |
| "epoch": 0.07308367212382888, |
| "grad_norm": 3.9303518801684056, |
| "learning_rate": 1.9800318019748414e-05, |
| "loss": 1.3024, |
| "step": 17680 |
| }, |
| { |
| "epoch": 0.07312500904245095, |
| "grad_norm": 3.4138877073872367, |
| "learning_rate": 1.980005710350079e-05, |
| "loss": 1.3219, |
| "step": 17690 |
| }, |
| { |
| "epoch": 0.07316634596107302, |
| "grad_norm": 2.9612831417650574, |
| "learning_rate": 1.9799796018621523e-05, |
| "loss": 1.2972, |
| "step": 17700 |
| }, |
| { |
| "epoch": 0.0732076828796951, |
| "grad_norm": 3.5999340523181167, |
| "learning_rate": 1.9799534765115106e-05, |
| "loss": 1.2879, |
| "step": 17710 |
| }, |
| { |
| "epoch": 0.07324901979831717, |
| "grad_norm": 3.243493407552293, |
| "learning_rate": 1.9799273342986027e-05, |
| "loss": 1.3312, |
| "step": 17720 |
| }, |
| { |
| "epoch": 0.07329035671693925, |
| "grad_norm": 3.1251098781902678, |
| "learning_rate": 1.979901175223879e-05, |
| "loss": 1.283, |
| "step": 17730 |
| }, |
| { |
| "epoch": 0.07333169363556133, |
| "grad_norm": 3.016302925303861, |
| "learning_rate": 1.97987499928779e-05, |
| "loss": 1.2653, |
| "step": 17740 |
| }, |
| { |
| "epoch": 0.0733730305541834, |
| "grad_norm": 3.0676270506907373, |
| "learning_rate": 1.9798488064907854e-05, |
| "loss": 1.3463, |
| "step": 17750 |
| }, |
| { |
| "epoch": 0.07341436747280547, |
| "grad_norm": 3.33093266064901, |
| "learning_rate": 1.9798225968333162e-05, |
| "loss": 1.2925, |
| "step": 17760 |
| }, |
| { |
| "epoch": 0.07345570439142755, |
| "grad_norm": 2.9945442082263707, |
| "learning_rate": 1.9797963703158338e-05, |
| "loss": 1.3073, |
| "step": 17770 |
| }, |
| { |
| "epoch": 0.07349704131004962, |
| "grad_norm": 2.7596863662002145, |
| "learning_rate": 1.9797701269387886e-05, |
| "loss": 1.3014, |
| "step": 17780 |
| }, |
| { |
| "epoch": 0.0735383782286717, |
| "grad_norm": 3.533092084537244, |
| "learning_rate": 1.979743866702633e-05, |
| "loss": 1.2791, |
| "step": 17790 |
| }, |
| { |
| "epoch": 0.07357971514729378, |
| "grad_norm": 4.416928923354257, |
| "learning_rate": 1.9797175896078183e-05, |
| "loss": 1.3187, |
| "step": 17800 |
| }, |
| { |
| "epoch": 0.07362105206591585, |
| "grad_norm": 3.4889626149350983, |
| "learning_rate": 1.9796912956547968e-05, |
| "loss": 1.3279, |
| "step": 17810 |
| }, |
| { |
| "epoch": 0.07366238898453792, |
| "grad_norm": 2.8612849638284636, |
| "learning_rate": 1.979664984844021e-05, |
| "loss": 1.286, |
| "step": 17820 |
| }, |
| { |
| "epoch": 0.07370372590316, |
| "grad_norm": 3.48443371509397, |
| "learning_rate": 1.9796386571759437e-05, |
| "loss": 1.3143, |
| "step": 17830 |
| }, |
| { |
| "epoch": 0.07374506282178207, |
| "grad_norm": 3.744181699697567, |
| "learning_rate": 1.979612312651018e-05, |
| "loss": 1.3133, |
| "step": 17840 |
| }, |
| { |
| "epoch": 0.07378639974040416, |
| "grad_norm": 3.0686519126186056, |
| "learning_rate": 1.9795859512696974e-05, |
| "loss": 1.3136, |
| "step": 17850 |
| }, |
| { |
| "epoch": 0.07382773665902623, |
| "grad_norm": 3.067495520118401, |
| "learning_rate": 1.9795595730324347e-05, |
| "loss": 1.3026, |
| "step": 17860 |
| }, |
| { |
| "epoch": 0.0738690735776483, |
| "grad_norm": 3.39693600568867, |
| "learning_rate": 1.9795331779396846e-05, |
| "loss": 1.3045, |
| "step": 17870 |
| }, |
| { |
| "epoch": 0.07391041049627038, |
| "grad_norm": 3.593857826641827, |
| "learning_rate": 1.9795067659919008e-05, |
| "loss": 1.3278, |
| "step": 17880 |
| }, |
| { |
| "epoch": 0.07395174741489245, |
| "grad_norm": 3.5924096708123985, |
| "learning_rate": 1.9794803371895383e-05, |
| "loss": 1.2578, |
| "step": 17890 |
| }, |
| { |
| "epoch": 0.07399308433351452, |
| "grad_norm": 3.1548306467737546, |
| "learning_rate": 1.9794538915330514e-05, |
| "loss": 1.3145, |
| "step": 17900 |
| }, |
| { |
| "epoch": 0.0740344212521366, |
| "grad_norm": 3.3874308529909736, |
| "learning_rate": 1.979427429022895e-05, |
| "loss": 1.2635, |
| "step": 17910 |
| }, |
| { |
| "epoch": 0.07407575817075868, |
| "grad_norm": 3.574121943476516, |
| "learning_rate": 1.979400949659525e-05, |
| "loss": 1.3333, |
| "step": 17920 |
| }, |
| { |
| "epoch": 0.07411709508938075, |
| "grad_norm": 3.277207731814223, |
| "learning_rate": 1.9793744534433968e-05, |
| "loss": 1.3438, |
| "step": 17930 |
| }, |
| { |
| "epoch": 0.07415843200800283, |
| "grad_norm": 3.8968392073501152, |
| "learning_rate": 1.979347940374966e-05, |
| "loss": 1.313, |
| "step": 17940 |
| }, |
| { |
| "epoch": 0.0741997689266249, |
| "grad_norm": 3.083444751622717, |
| "learning_rate": 1.9793214104546895e-05, |
| "loss": 1.3146, |
| "step": 17950 |
| }, |
| { |
| "epoch": 0.07424110584524697, |
| "grad_norm": 3.2007909625237208, |
| "learning_rate": 1.9792948636830235e-05, |
| "loss": 1.2767, |
| "step": 17960 |
| }, |
| { |
| "epoch": 0.07428244276386906, |
| "grad_norm": 3.475285388439305, |
| "learning_rate": 1.979268300060424e-05, |
| "loss": 1.3113, |
| "step": 17970 |
| }, |
| { |
| "epoch": 0.07432377968249113, |
| "grad_norm": 3.0728058276568064, |
| "learning_rate": 1.9792417195873496e-05, |
| "loss": 1.2625, |
| "step": 17980 |
| }, |
| { |
| "epoch": 0.07436511660111321, |
| "grad_norm": 3.1462512846060595, |
| "learning_rate": 1.9792151222642565e-05, |
| "loss": 1.2669, |
| "step": 17990 |
| }, |
| { |
| "epoch": 0.07440645351973528, |
| "grad_norm": 3.451097468244895, |
| "learning_rate": 1.9791885080916026e-05, |
| "loss": 1.3536, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.07444779043835735, |
| "grad_norm": 2.9123675277858805, |
| "learning_rate": 1.979161877069846e-05, |
| "loss": 1.2859, |
| "step": 18010 |
| }, |
| { |
| "epoch": 0.07448912735697943, |
| "grad_norm": 3.6048473916322905, |
| "learning_rate": 1.9791352291994453e-05, |
| "loss": 1.2868, |
| "step": 18020 |
| }, |
| { |
| "epoch": 0.0745304642756015, |
| "grad_norm": 2.877236521609129, |
| "learning_rate": 1.9791085644808588e-05, |
| "loss": 1.3201, |
| "step": 18030 |
| }, |
| { |
| "epoch": 0.07457180119422357, |
| "grad_norm": 3.346518920497629, |
| "learning_rate": 1.9790818829145447e-05, |
| "loss": 1.2914, |
| "step": 18040 |
| }, |
| { |
| "epoch": 0.07461313811284566, |
| "grad_norm": 3.5346578512252416, |
| "learning_rate": 1.979055184500963e-05, |
| "loss": 1.2668, |
| "step": 18050 |
| }, |
| { |
| "epoch": 0.07465447503146773, |
| "grad_norm": 3.044418176627761, |
| "learning_rate": 1.9790284692405723e-05, |
| "loss": 1.2722, |
| "step": 18060 |
| }, |
| { |
| "epoch": 0.0746958119500898, |
| "grad_norm": 3.4743539555195206, |
| "learning_rate": 1.979001737133833e-05, |
| "loss": 1.3093, |
| "step": 18070 |
| }, |
| { |
| "epoch": 0.07473714886871188, |
| "grad_norm": 3.152139034733342, |
| "learning_rate": 1.978974988181205e-05, |
| "loss": 1.3282, |
| "step": 18080 |
| }, |
| { |
| "epoch": 0.07477848578733395, |
| "grad_norm": 3.844127465592703, |
| "learning_rate": 1.978948222383148e-05, |
| "loss": 1.3, |
| "step": 18090 |
| }, |
| { |
| "epoch": 0.07481982270595602, |
| "grad_norm": 3.312821234732469, |
| "learning_rate": 1.9789214397401233e-05, |
| "loss": 1.3007, |
| "step": 18100 |
| }, |
| { |
| "epoch": 0.07486115962457811, |
| "grad_norm": 2.587430778686902, |
| "learning_rate": 1.978894640252591e-05, |
| "loss": 1.2712, |
| "step": 18110 |
| }, |
| { |
| "epoch": 0.07490249654320018, |
| "grad_norm": 3.6471232765625867, |
| "learning_rate": 1.978867823921013e-05, |
| "loss": 1.3255, |
| "step": 18120 |
| }, |
| { |
| "epoch": 0.07494383346182225, |
| "grad_norm": 3.4191203190660966, |
| "learning_rate": 1.9788409907458502e-05, |
| "loss": 1.2588, |
| "step": 18130 |
| }, |
| { |
| "epoch": 0.07498517038044433, |
| "grad_norm": 3.3115429776293466, |
| "learning_rate": 1.9788141407275643e-05, |
| "loss": 1.2923, |
| "step": 18140 |
| }, |
| { |
| "epoch": 0.0750265072990664, |
| "grad_norm": 3.360103375582472, |
| "learning_rate": 1.9787872738666182e-05, |
| "loss": 1.3274, |
| "step": 18150 |
| }, |
| { |
| "epoch": 0.07506784421768849, |
| "grad_norm": 4.4310427169860205, |
| "learning_rate": 1.978760390163473e-05, |
| "loss": 1.3237, |
| "step": 18160 |
| }, |
| { |
| "epoch": 0.07510918113631056, |
| "grad_norm": 2.794577806503562, |
| "learning_rate": 1.9787334896185916e-05, |
| "loss": 1.3095, |
| "step": 18170 |
| }, |
| { |
| "epoch": 0.07515051805493263, |
| "grad_norm": 3.3585888834525384, |
| "learning_rate": 1.9787065722324374e-05, |
| "loss": 1.3199, |
| "step": 18180 |
| }, |
| { |
| "epoch": 0.07519185497355471, |
| "grad_norm": 3.060067450654041, |
| "learning_rate": 1.9786796380054733e-05, |
| "loss": 1.2532, |
| "step": 18190 |
| }, |
| { |
| "epoch": 0.07523319189217678, |
| "grad_norm": 3.124345955834219, |
| "learning_rate": 1.978652686938163e-05, |
| "loss": 1.2875, |
| "step": 18200 |
| }, |
| { |
| "epoch": 0.07527452881079885, |
| "grad_norm": 3.6658880497970254, |
| "learning_rate": 1.9786257190309695e-05, |
| "loss": 1.301, |
| "step": 18210 |
| }, |
| { |
| "epoch": 0.07531586572942094, |
| "grad_norm": 3.7099075204316088, |
| "learning_rate": 1.9785987342843573e-05, |
| "loss": 1.343, |
| "step": 18220 |
| }, |
| { |
| "epoch": 0.075357202648043, |
| "grad_norm": 3.26000011118403, |
| "learning_rate": 1.9785717326987914e-05, |
| "loss": 1.2782, |
| "step": 18230 |
| }, |
| { |
| "epoch": 0.07539853956666508, |
| "grad_norm": 3.133092069419479, |
| "learning_rate": 1.978544714274735e-05, |
| "loss": 1.3026, |
| "step": 18240 |
| }, |
| { |
| "epoch": 0.07543987648528716, |
| "grad_norm": 3.1564032343316453, |
| "learning_rate": 1.9785176790126542e-05, |
| "loss": 1.3207, |
| "step": 18250 |
| }, |
| { |
| "epoch": 0.07548121340390923, |
| "grad_norm": 3.284115672721678, |
| "learning_rate": 1.9784906269130137e-05, |
| "loss": 1.3117, |
| "step": 18260 |
| }, |
| { |
| "epoch": 0.0755225503225313, |
| "grad_norm": 3.1339318140995167, |
| "learning_rate": 1.9784635579762793e-05, |
| "loss": 1.305, |
| "step": 18270 |
| }, |
| { |
| "epoch": 0.07556388724115339, |
| "grad_norm": 4.379220492841367, |
| "learning_rate": 1.9784364722029165e-05, |
| "loss": 1.3408, |
| "step": 18280 |
| }, |
| { |
| "epoch": 0.07560522415977546, |
| "grad_norm": 3.9318661880011945, |
| "learning_rate": 1.978409369593391e-05, |
| "loss": 1.3053, |
| "step": 18290 |
| }, |
| { |
| "epoch": 0.07564656107839754, |
| "grad_norm": 3.3417710306838253, |
| "learning_rate": 1.97838225014817e-05, |
| "loss": 1.2781, |
| "step": 18300 |
| }, |
| { |
| "epoch": 0.07568789799701961, |
| "grad_norm": 3.54702678079395, |
| "learning_rate": 1.9783551138677197e-05, |
| "loss": 1.2998, |
| "step": 18310 |
| }, |
| { |
| "epoch": 0.07572923491564168, |
| "grad_norm": 3.4741727106428946, |
| "learning_rate": 1.978327960752507e-05, |
| "loss": 1.3003, |
| "step": 18320 |
| }, |
| { |
| "epoch": 0.07577057183426376, |
| "grad_norm": 3.2924386529485563, |
| "learning_rate": 1.9783007908029995e-05, |
| "loss": 1.3095, |
| "step": 18330 |
| }, |
| { |
| "epoch": 0.07581190875288583, |
| "grad_norm": 3.516437725795232, |
| "learning_rate": 1.978273604019664e-05, |
| "loss": 1.3088, |
| "step": 18340 |
| }, |
| { |
| "epoch": 0.0758532456715079, |
| "grad_norm": 3.5342954273687193, |
| "learning_rate": 1.9782464004029692e-05, |
| "loss": 1.3035, |
| "step": 18350 |
| }, |
| { |
| "epoch": 0.07589458259012999, |
| "grad_norm": 3.3786994777959523, |
| "learning_rate": 1.9782191799533824e-05, |
| "loss": 1.315, |
| "step": 18360 |
| }, |
| { |
| "epoch": 0.07593591950875206, |
| "grad_norm": 3.16392973516022, |
| "learning_rate": 1.9781919426713725e-05, |
| "loss": 1.3363, |
| "step": 18370 |
| }, |
| { |
| "epoch": 0.07597725642737413, |
| "grad_norm": 3.5807733324507693, |
| "learning_rate": 1.9781646885574078e-05, |
| "loss": 1.313, |
| "step": 18380 |
| }, |
| { |
| "epoch": 0.07601859334599621, |
| "grad_norm": 2.911047627936505, |
| "learning_rate": 1.978137417611958e-05, |
| "loss": 1.2943, |
| "step": 18390 |
| }, |
| { |
| "epoch": 0.07605993026461828, |
| "grad_norm": 3.519244637721149, |
| "learning_rate": 1.9781101298354913e-05, |
| "loss": 1.3013, |
| "step": 18400 |
| }, |
| { |
| "epoch": 0.07610126718324035, |
| "grad_norm": 2.80355546975897, |
| "learning_rate": 1.9780828252284778e-05, |
| "loss": 1.319, |
| "step": 18410 |
| }, |
| { |
| "epoch": 0.07614260410186244, |
| "grad_norm": 3.272390505391555, |
| "learning_rate": 1.9780555037913874e-05, |
| "loss": 1.3139, |
| "step": 18420 |
| }, |
| { |
| "epoch": 0.07618394102048451, |
| "grad_norm": 4.373312173753661, |
| "learning_rate": 1.9780281655246903e-05, |
| "loss": 1.332, |
| "step": 18430 |
| }, |
| { |
| "epoch": 0.07622527793910659, |
| "grad_norm": 3.4593509090352828, |
| "learning_rate": 1.9780008104288566e-05, |
| "loss": 1.3275, |
| "step": 18440 |
| }, |
| { |
| "epoch": 0.07626661485772866, |
| "grad_norm": 3.988419562725868, |
| "learning_rate": 1.9779734385043572e-05, |
| "loss": 1.2876, |
| "step": 18450 |
| }, |
| { |
| "epoch": 0.07630795177635073, |
| "grad_norm": 3.1901407057893127, |
| "learning_rate": 1.9779460497516633e-05, |
| "loss": 1.3077, |
| "step": 18460 |
| }, |
| { |
| "epoch": 0.07634928869497282, |
| "grad_norm": 3.392493661451085, |
| "learning_rate": 1.9779186441712456e-05, |
| "loss": 1.2853, |
| "step": 18470 |
| }, |
| { |
| "epoch": 0.07639062561359489, |
| "grad_norm": 3.2188126520970126, |
| "learning_rate": 1.9778912217635762e-05, |
| "loss": 1.3166, |
| "step": 18480 |
| }, |
| { |
| "epoch": 0.07643196253221696, |
| "grad_norm": 3.207853073897323, |
| "learning_rate": 1.9778637825291267e-05, |
| "loss": 1.2522, |
| "step": 18490 |
| }, |
| { |
| "epoch": 0.07647329945083904, |
| "grad_norm": 3.614534499504545, |
| "learning_rate": 1.9778363264683694e-05, |
| "loss": 1.2857, |
| "step": 18500 |
| }, |
| { |
| "epoch": 0.07651463636946111, |
| "grad_norm": 3.1137575271670634, |
| "learning_rate": 1.9778088535817765e-05, |
| "loss": 1.322, |
| "step": 18510 |
| }, |
| { |
| "epoch": 0.07655597328808318, |
| "grad_norm": 3.155733653317413, |
| "learning_rate": 1.977781363869821e-05, |
| "loss": 1.2775, |
| "step": 18520 |
| }, |
| { |
| "epoch": 0.07659731020670527, |
| "grad_norm": 4.553902503352338, |
| "learning_rate": 1.9777538573329757e-05, |
| "loss": 1.3102, |
| "step": 18530 |
| }, |
| { |
| "epoch": 0.07663864712532734, |
| "grad_norm": 3.421736022180327, |
| "learning_rate": 1.9777263339717143e-05, |
| "loss": 1.3101, |
| "step": 18540 |
| }, |
| { |
| "epoch": 0.0766799840439494, |
| "grad_norm": 3.0688727373371245, |
| "learning_rate": 1.97769879378651e-05, |
| "loss": 1.3116, |
| "step": 18550 |
| }, |
| { |
| "epoch": 0.07672132096257149, |
| "grad_norm": 3.7016289513150173, |
| "learning_rate": 1.977671236777837e-05, |
| "loss": 1.3266, |
| "step": 18560 |
| }, |
| { |
| "epoch": 0.07676265788119356, |
| "grad_norm": 2.918955840628811, |
| "learning_rate": 1.977643662946169e-05, |
| "loss": 1.335, |
| "step": 18570 |
| }, |
| { |
| "epoch": 0.07680399479981563, |
| "grad_norm": 2.710819215083447, |
| "learning_rate": 1.9776160722919808e-05, |
| "loss": 1.3241, |
| "step": 18580 |
| }, |
| { |
| "epoch": 0.07684533171843771, |
| "grad_norm": 4.690835156163024, |
| "learning_rate": 1.9775884648157473e-05, |
| "loss": 1.3112, |
| "step": 18590 |
| }, |
| { |
| "epoch": 0.07688666863705979, |
| "grad_norm": 2.8651231551210685, |
| "learning_rate": 1.9775608405179433e-05, |
| "loss": 1.2753, |
| "step": 18600 |
| }, |
| { |
| "epoch": 0.07692800555568187, |
| "grad_norm": 3.440907539318232, |
| "learning_rate": 1.9775331993990445e-05, |
| "loss": 1.3065, |
| "step": 18610 |
| }, |
| { |
| "epoch": 0.07696934247430394, |
| "grad_norm": 3.284791911253238, |
| "learning_rate": 1.977505541459526e-05, |
| "loss": 1.2491, |
| "step": 18620 |
| }, |
| { |
| "epoch": 0.07701067939292601, |
| "grad_norm": 3.9127411714280815, |
| "learning_rate": 1.977477866699864e-05, |
| "loss": 1.3486, |
| "step": 18630 |
| }, |
| { |
| "epoch": 0.0770520163115481, |
| "grad_norm": 3.6832173528899292, |
| "learning_rate": 1.9774501751205343e-05, |
| "loss": 1.26, |
| "step": 18640 |
| }, |
| { |
| "epoch": 0.07709335323017016, |
| "grad_norm": 3.037263148140778, |
| "learning_rate": 1.9774224667220145e-05, |
| "loss": 1.3066, |
| "step": 18650 |
| }, |
| { |
| "epoch": 0.07713469014879223, |
| "grad_norm": 3.9474254309978978, |
| "learning_rate": 1.97739474150478e-05, |
| "loss": 1.3526, |
| "step": 18660 |
| }, |
| { |
| "epoch": 0.07717602706741432, |
| "grad_norm": 3.2632360352472087, |
| "learning_rate": 1.977366999469309e-05, |
| "loss": 1.242, |
| "step": 18670 |
| }, |
| { |
| "epoch": 0.07721736398603639, |
| "grad_norm": 3.398609819663782, |
| "learning_rate": 1.977339240616078e-05, |
| "loss": 1.3083, |
| "step": 18680 |
| }, |
| { |
| "epoch": 0.07725870090465846, |
| "grad_norm": 2.8558120813505274, |
| "learning_rate": 1.977311464945565e-05, |
| "loss": 1.3002, |
| "step": 18690 |
| }, |
| { |
| "epoch": 0.07730003782328054, |
| "grad_norm": 3.3599778107706553, |
| "learning_rate": 1.9772836724582483e-05, |
| "loss": 1.3299, |
| "step": 18700 |
| }, |
| { |
| "epoch": 0.07734137474190261, |
| "grad_norm": 3.9628018743002658, |
| "learning_rate": 1.9772558631546054e-05, |
| "loss": 1.3115, |
| "step": 18710 |
| }, |
| { |
| "epoch": 0.07738271166052468, |
| "grad_norm": 3.239048590526076, |
| "learning_rate": 1.9772280370351155e-05, |
| "loss": 1.2683, |
| "step": 18720 |
| }, |
| { |
| "epoch": 0.07742404857914677, |
| "grad_norm": 3.0722778687848558, |
| "learning_rate": 1.977200194100257e-05, |
| "loss": 1.3484, |
| "step": 18730 |
| }, |
| { |
| "epoch": 0.07746538549776884, |
| "grad_norm": 3.1378953738889637, |
| "learning_rate": 1.9771723343505093e-05, |
| "loss": 1.353, |
| "step": 18740 |
| }, |
| { |
| "epoch": 0.07750672241639092, |
| "grad_norm": 2.9849102736288113, |
| "learning_rate": 1.9771444577863517e-05, |
| "loss": 1.3318, |
| "step": 18750 |
| }, |
| { |
| "epoch": 0.07754805933501299, |
| "grad_norm": 3.006210181091503, |
| "learning_rate": 1.9771165644082636e-05, |
| "loss": 1.3095, |
| "step": 18760 |
| }, |
| { |
| "epoch": 0.07758939625363506, |
| "grad_norm": 2.8083464465567074, |
| "learning_rate": 1.9770886542167252e-05, |
| "loss": 1.2896, |
| "step": 18770 |
| }, |
| { |
| "epoch": 0.07763073317225715, |
| "grad_norm": 3.262789038521565, |
| "learning_rate": 1.9770607272122168e-05, |
| "loss": 1.3333, |
| "step": 18780 |
| }, |
| { |
| "epoch": 0.07767207009087922, |
| "grad_norm": 3.421189746770822, |
| "learning_rate": 1.9770327833952187e-05, |
| "loss": 1.28, |
| "step": 18790 |
| }, |
| { |
| "epoch": 0.07771340700950129, |
| "grad_norm": 3.006884991049078, |
| "learning_rate": 1.977004822766212e-05, |
| "loss": 1.2982, |
| "step": 18800 |
| }, |
| { |
| "epoch": 0.07775474392812337, |
| "grad_norm": 2.8631010619173427, |
| "learning_rate": 1.976976845325678e-05, |
| "loss": 1.2924, |
| "step": 18810 |
| }, |
| { |
| "epoch": 0.07779608084674544, |
| "grad_norm": 3.4772737519626533, |
| "learning_rate": 1.9769488510740974e-05, |
| "loss": 1.2927, |
| "step": 18820 |
| }, |
| { |
| "epoch": 0.07783741776536751, |
| "grad_norm": 2.7187723773853967, |
| "learning_rate": 1.976920840011953e-05, |
| "loss": 1.2868, |
| "step": 18830 |
| }, |
| { |
| "epoch": 0.0778787546839896, |
| "grad_norm": 3.145525828551427, |
| "learning_rate": 1.9768928121397253e-05, |
| "loss": 1.2662, |
| "step": 18840 |
| }, |
| { |
| "epoch": 0.07792009160261167, |
| "grad_norm": 3.6834084868042205, |
| "learning_rate": 1.9768647674578978e-05, |
| "loss": 1.2916, |
| "step": 18850 |
| }, |
| { |
| "epoch": 0.07796142852123374, |
| "grad_norm": 3.2384861614856697, |
| "learning_rate": 1.976836705966953e-05, |
| "loss": 1.2719, |
| "step": 18860 |
| }, |
| { |
| "epoch": 0.07800276543985582, |
| "grad_norm": 3.06441372291841, |
| "learning_rate": 1.976808627667373e-05, |
| "loss": 1.3137, |
| "step": 18870 |
| }, |
| { |
| "epoch": 0.07804410235847789, |
| "grad_norm": 3.1960269763685565, |
| "learning_rate": 1.9767805325596417e-05, |
| "loss": 1.2943, |
| "step": 18880 |
| }, |
| { |
| "epoch": 0.07808543927709996, |
| "grad_norm": 3.6639857210827778, |
| "learning_rate": 1.976752420644242e-05, |
| "loss": 1.2634, |
| "step": 18890 |
| }, |
| { |
| "epoch": 0.07812677619572204, |
| "grad_norm": 2.8992259499910564, |
| "learning_rate": 1.976724291921658e-05, |
| "loss": 1.3205, |
| "step": 18900 |
| }, |
| { |
| "epoch": 0.07816811311434411, |
| "grad_norm": 3.8410116582710963, |
| "learning_rate": 1.9766961463923735e-05, |
| "loss": 1.2778, |
| "step": 18910 |
| }, |
| { |
| "epoch": 0.0782094500329662, |
| "grad_norm": 4.723075181703024, |
| "learning_rate": 1.976667984056873e-05, |
| "loss": 1.2998, |
| "step": 18920 |
| }, |
| { |
| "epoch": 0.07825078695158827, |
| "grad_norm": 3.371508438100944, |
| "learning_rate": 1.976639804915641e-05, |
| "loss": 1.2512, |
| "step": 18930 |
| }, |
| { |
| "epoch": 0.07829212387021034, |
| "grad_norm": 3.6639818461414677, |
| "learning_rate": 1.976611608969162e-05, |
| "loss": 1.2961, |
| "step": 18940 |
| }, |
| { |
| "epoch": 0.07833346078883242, |
| "grad_norm": 2.8877225304005525, |
| "learning_rate": 1.976583396217922e-05, |
| "loss": 1.3345, |
| "step": 18950 |
| }, |
| { |
| "epoch": 0.0783747977074545, |
| "grad_norm": 3.282148781915577, |
| "learning_rate": 1.9765551666624062e-05, |
| "loss": 1.3293, |
| "step": 18960 |
| }, |
| { |
| "epoch": 0.07841613462607656, |
| "grad_norm": 3.1290209387742007, |
| "learning_rate": 1.9765269203030996e-05, |
| "loss": 1.3202, |
| "step": 18970 |
| }, |
| { |
| "epoch": 0.07845747154469865, |
| "grad_norm": 2.9401948590630798, |
| "learning_rate": 1.9764986571404892e-05, |
| "loss": 1.2739, |
| "step": 18980 |
| }, |
| { |
| "epoch": 0.07849880846332072, |
| "grad_norm": 2.726413528264204, |
| "learning_rate": 1.9764703771750606e-05, |
| "loss": 1.3417, |
| "step": 18990 |
| }, |
| { |
| "epoch": 0.07854014538194279, |
| "grad_norm": 3.8708836373349693, |
| "learning_rate": 1.976442080407301e-05, |
| "loss": 1.2817, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.07858148230056487, |
| "grad_norm": 2.9568442793661225, |
| "learning_rate": 1.976413766837697e-05, |
| "loss": 1.3157, |
| "step": 19010 |
| }, |
| { |
| "epoch": 0.07862281921918694, |
| "grad_norm": 3.2014326193422895, |
| "learning_rate": 1.9763854364667355e-05, |
| "loss": 1.2734, |
| "step": 19020 |
| }, |
| { |
| "epoch": 0.07866415613780901, |
| "grad_norm": 3.8142983049855204, |
| "learning_rate": 1.9763570892949048e-05, |
| "loss": 1.3268, |
| "step": 19030 |
| }, |
| { |
| "epoch": 0.0787054930564311, |
| "grad_norm": 3.2687191971325555, |
| "learning_rate": 1.976328725322692e-05, |
| "loss": 1.2922, |
| "step": 19040 |
| }, |
| { |
| "epoch": 0.07874682997505317, |
| "grad_norm": 3.8385310533023276, |
| "learning_rate": 1.9763003445505854e-05, |
| "loss": 1.274, |
| "step": 19050 |
| }, |
| { |
| "epoch": 0.07878816689367525, |
| "grad_norm": 3.11002301324885, |
| "learning_rate": 1.9762719469790736e-05, |
| "loss": 1.3201, |
| "step": 19060 |
| }, |
| { |
| "epoch": 0.07882950381229732, |
| "grad_norm": 3.1064094109546385, |
| "learning_rate": 1.9762435326086446e-05, |
| "loss": 1.2524, |
| "step": 19070 |
| }, |
| { |
| "epoch": 0.07887084073091939, |
| "grad_norm": 2.8255870735206687, |
| "learning_rate": 1.976215101439788e-05, |
| "loss": 1.2982, |
| "step": 19080 |
| }, |
| { |
| "epoch": 0.07891217764954148, |
| "grad_norm": 3.041236956663884, |
| "learning_rate": 1.9761866534729926e-05, |
| "loss": 1.2784, |
| "step": 19090 |
| }, |
| { |
| "epoch": 0.07895351456816355, |
| "grad_norm": 3.0762050459561032, |
| "learning_rate": 1.976158188708748e-05, |
| "loss": 1.2615, |
| "step": 19100 |
| }, |
| { |
| "epoch": 0.07899485148678562, |
| "grad_norm": 4.057630290231405, |
| "learning_rate": 1.976129707147544e-05, |
| "loss": 1.2956, |
| "step": 19110 |
| }, |
| { |
| "epoch": 0.0790361884054077, |
| "grad_norm": 3.530641336009131, |
| "learning_rate": 1.976101208789871e-05, |
| "loss": 1.3015, |
| "step": 19120 |
| }, |
| { |
| "epoch": 0.07907752532402977, |
| "grad_norm": 3.457139153653439, |
| "learning_rate": 1.976072693636219e-05, |
| "loss": 1.3007, |
| "step": 19130 |
| }, |
| { |
| "epoch": 0.07911886224265184, |
| "grad_norm": 4.247767055366308, |
| "learning_rate": 1.9760441616870785e-05, |
| "loss": 1.3284, |
| "step": 19140 |
| }, |
| { |
| "epoch": 0.07916019916127393, |
| "grad_norm": 3.4313340038672995, |
| "learning_rate": 1.976015612942941e-05, |
| "loss": 1.3064, |
| "step": 19150 |
| }, |
| { |
| "epoch": 0.079201536079896, |
| "grad_norm": 3.908560338711229, |
| "learning_rate": 1.9759870474042973e-05, |
| "loss": 1.3116, |
| "step": 19160 |
| }, |
| { |
| "epoch": 0.07924287299851807, |
| "grad_norm": 3.745765592110947, |
| "learning_rate": 1.9759584650716395e-05, |
| "loss": 1.3737, |
| "step": 19170 |
| }, |
| { |
| "epoch": 0.07928420991714015, |
| "grad_norm": 3.078986622432472, |
| "learning_rate": 1.9759298659454588e-05, |
| "loss": 1.2788, |
| "step": 19180 |
| }, |
| { |
| "epoch": 0.07932554683576222, |
| "grad_norm": 2.987786079650764, |
| "learning_rate": 1.9759012500262474e-05, |
| "loss": 1.2834, |
| "step": 19190 |
| }, |
| { |
| "epoch": 0.07936688375438429, |
| "grad_norm": 3.1036456287250673, |
| "learning_rate": 1.975872617314498e-05, |
| "loss": 1.2831, |
| "step": 19200 |
| }, |
| { |
| "epoch": 0.07940822067300637, |
| "grad_norm": 3.865690690229234, |
| "learning_rate": 1.9758439678107033e-05, |
| "loss": 1.2922, |
| "step": 19210 |
| }, |
| { |
| "epoch": 0.07944955759162844, |
| "grad_norm": 3.3185016462394588, |
| "learning_rate": 1.9758153015153553e-05, |
| "loss": 1.3349, |
| "step": 19220 |
| }, |
| { |
| "epoch": 0.07949089451025053, |
| "grad_norm": 2.971387000152931, |
| "learning_rate": 1.975786618428949e-05, |
| "loss": 1.2411, |
| "step": 19230 |
| }, |
| { |
| "epoch": 0.0795322314288726, |
| "grad_norm": 3.1868412483457065, |
| "learning_rate": 1.9757579185519766e-05, |
| "loss": 1.3152, |
| "step": 19240 |
| }, |
| { |
| "epoch": 0.07957356834749467, |
| "grad_norm": 3.573887753524877, |
| "learning_rate": 1.9757292018849322e-05, |
| "loss": 1.32, |
| "step": 19250 |
| }, |
| { |
| "epoch": 0.07961490526611675, |
| "grad_norm": 2.6269840263774347, |
| "learning_rate": 1.9757004684283107e-05, |
| "loss": 1.3123, |
| "step": 19260 |
| }, |
| { |
| "epoch": 0.07965624218473882, |
| "grad_norm": 3.1839995568616546, |
| "learning_rate": 1.9756717181826054e-05, |
| "loss": 1.3305, |
| "step": 19270 |
| }, |
| { |
| "epoch": 0.0796975791033609, |
| "grad_norm": 3.5732944308856474, |
| "learning_rate": 1.9756429511483117e-05, |
| "loss": 1.298, |
| "step": 19280 |
| }, |
| { |
| "epoch": 0.07973891602198298, |
| "grad_norm": 3.612052010659264, |
| "learning_rate": 1.9756141673259247e-05, |
| "loss": 1.2797, |
| "step": 19290 |
| }, |
| { |
| "epoch": 0.07978025294060505, |
| "grad_norm": 3.129559274239735, |
| "learning_rate": 1.9755853667159392e-05, |
| "loss": 1.3242, |
| "step": 19300 |
| }, |
| { |
| "epoch": 0.07982158985922712, |
| "grad_norm": 3.2804938014945915, |
| "learning_rate": 1.9755565493188507e-05, |
| "loss": 1.2882, |
| "step": 19310 |
| }, |
| { |
| "epoch": 0.0798629267778492, |
| "grad_norm": 3.5207612414520444, |
| "learning_rate": 1.9755277151351558e-05, |
| "loss": 1.3292, |
| "step": 19320 |
| }, |
| { |
| "epoch": 0.07990426369647127, |
| "grad_norm": 3.2901806629006356, |
| "learning_rate": 1.9754988641653502e-05, |
| "loss": 1.2829, |
| "step": 19330 |
| }, |
| { |
| "epoch": 0.07994560061509334, |
| "grad_norm": 3.1031625175692876, |
| "learning_rate": 1.97546999640993e-05, |
| "loss": 1.2952, |
| "step": 19340 |
| }, |
| { |
| "epoch": 0.07998693753371543, |
| "grad_norm": 3.1014549071869606, |
| "learning_rate": 1.975441111869393e-05, |
| "loss": 1.2609, |
| "step": 19350 |
| }, |
| { |
| "epoch": 0.0800282744523375, |
| "grad_norm": 2.9394216851114217, |
| "learning_rate": 1.975412210544235e-05, |
| "loss": 1.2948, |
| "step": 19360 |
| }, |
| { |
| "epoch": 0.08006961137095958, |
| "grad_norm": 3.0263675235321545, |
| "learning_rate": 1.975383292434954e-05, |
| "loss": 1.3069, |
| "step": 19370 |
| }, |
| { |
| "epoch": 0.08011094828958165, |
| "grad_norm": 3.0848357434937124, |
| "learning_rate": 1.9753543575420477e-05, |
| "loss": 1.2747, |
| "step": 19380 |
| }, |
| { |
| "epoch": 0.08015228520820372, |
| "grad_norm": 2.9964068973521774, |
| "learning_rate": 1.9753254058660132e-05, |
| "loss": 1.3225, |
| "step": 19390 |
| }, |
| { |
| "epoch": 0.0801936221268258, |
| "grad_norm": 3.2536839847485224, |
| "learning_rate": 1.9752964374073494e-05, |
| "loss": 1.3448, |
| "step": 19400 |
| }, |
| { |
| "epoch": 0.08023495904544788, |
| "grad_norm": 3.37952149059762, |
| "learning_rate": 1.9752674521665546e-05, |
| "loss": 1.2845, |
| "step": 19410 |
| }, |
| { |
| "epoch": 0.08027629596406995, |
| "grad_norm": 3.0635418759023687, |
| "learning_rate": 1.9752384501441276e-05, |
| "loss": 1.3123, |
| "step": 19420 |
| }, |
| { |
| "epoch": 0.08031763288269203, |
| "grad_norm": 3.298468467101726, |
| "learning_rate": 1.9752094313405674e-05, |
| "loss": 1.2986, |
| "step": 19430 |
| }, |
| { |
| "epoch": 0.0803589698013141, |
| "grad_norm": 3.2473420562854507, |
| "learning_rate": 1.9751803957563735e-05, |
| "loss": 1.327, |
| "step": 19440 |
| }, |
| { |
| "epoch": 0.08040030671993617, |
| "grad_norm": 3.0843171834880483, |
| "learning_rate": 1.975151343392045e-05, |
| "loss": 1.304, |
| "step": 19450 |
| }, |
| { |
| "epoch": 0.08044164363855826, |
| "grad_norm": 3.0717834263814856, |
| "learning_rate": 1.9751222742480823e-05, |
| "loss": 1.3133, |
| "step": 19460 |
| }, |
| { |
| "epoch": 0.08048298055718033, |
| "grad_norm": 2.9934271457981603, |
| "learning_rate": 1.9750931883249852e-05, |
| "loss": 1.2674, |
| "step": 19470 |
| }, |
| { |
| "epoch": 0.0805243174758024, |
| "grad_norm": 3.1064293377071843, |
| "learning_rate": 1.9750640856232548e-05, |
| "loss": 1.2917, |
| "step": 19480 |
| }, |
| { |
| "epoch": 0.08056565439442448, |
| "grad_norm": 2.980738543973593, |
| "learning_rate": 1.975034966143391e-05, |
| "loss": 1.2978, |
| "step": 19490 |
| }, |
| { |
| "epoch": 0.08060699131304655, |
| "grad_norm": 3.383580434705728, |
| "learning_rate": 1.975005829885896e-05, |
| "loss": 1.2603, |
| "step": 19500 |
| }, |
| { |
| "epoch": 0.08064832823166862, |
| "grad_norm": 3.2830909352425564, |
| "learning_rate": 1.97497667685127e-05, |
| "loss": 1.3043, |
| "step": 19510 |
| }, |
| { |
| "epoch": 0.0806896651502907, |
| "grad_norm": 3.3969068546862036, |
| "learning_rate": 1.9749475070400157e-05, |
| "loss": 1.3338, |
| "step": 19520 |
| }, |
| { |
| "epoch": 0.08073100206891277, |
| "grad_norm": 3.0880094364724773, |
| "learning_rate": 1.974918320452634e-05, |
| "loss": 1.2351, |
| "step": 19530 |
| }, |
| { |
| "epoch": 0.08077233898753486, |
| "grad_norm": 3.4417279889999564, |
| "learning_rate": 1.974889117089628e-05, |
| "loss": 1.264, |
| "step": 19540 |
| }, |
| { |
| "epoch": 0.08081367590615693, |
| "grad_norm": 2.904756528303193, |
| "learning_rate": 1.9748598969514993e-05, |
| "loss": 1.2647, |
| "step": 19550 |
| }, |
| { |
| "epoch": 0.080855012824779, |
| "grad_norm": 2.7148091448555722, |
| "learning_rate": 1.9748306600387516e-05, |
| "loss": 1.2989, |
| "step": 19560 |
| }, |
| { |
| "epoch": 0.08089634974340108, |
| "grad_norm": 3.651161572746769, |
| "learning_rate": 1.9748014063518875e-05, |
| "loss": 1.3161, |
| "step": 19570 |
| }, |
| { |
| "epoch": 0.08093768666202315, |
| "grad_norm": 3.1157328327095866, |
| "learning_rate": 1.9747721358914106e-05, |
| "loss": 1.2713, |
| "step": 19580 |
| }, |
| { |
| "epoch": 0.08097902358064522, |
| "grad_norm": 3.131352709584711, |
| "learning_rate": 1.9747428486578243e-05, |
| "loss": 1.2904, |
| "step": 19590 |
| }, |
| { |
| "epoch": 0.08102036049926731, |
| "grad_norm": 2.942079614415163, |
| "learning_rate": 1.9747135446516327e-05, |
| "loss": 1.2857, |
| "step": 19600 |
| }, |
| { |
| "epoch": 0.08106169741788938, |
| "grad_norm": 2.8838500418296955, |
| "learning_rate": 1.9746842238733404e-05, |
| "loss": 1.3162, |
| "step": 19610 |
| }, |
| { |
| "epoch": 0.08110303433651145, |
| "grad_norm": 3.62285581087724, |
| "learning_rate": 1.9746548863234512e-05, |
| "loss": 1.3105, |
| "step": 19620 |
| }, |
| { |
| "epoch": 0.08114437125513353, |
| "grad_norm": 2.8759608727278927, |
| "learning_rate": 1.9746255320024702e-05, |
| "loss": 1.2757, |
| "step": 19630 |
| }, |
| { |
| "epoch": 0.0811857081737556, |
| "grad_norm": 3.2868321525430377, |
| "learning_rate": 1.974596160910903e-05, |
| "loss": 1.2808, |
| "step": 19640 |
| }, |
| { |
| "epoch": 0.08122704509237767, |
| "grad_norm": 2.9255021662463, |
| "learning_rate": 1.9745667730492543e-05, |
| "loss": 1.2982, |
| "step": 19650 |
| }, |
| { |
| "epoch": 0.08126838201099976, |
| "grad_norm": 3.6734905665007336, |
| "learning_rate": 1.97453736841803e-05, |
| "loss": 1.3469, |
| "step": 19660 |
| }, |
| { |
| "epoch": 0.08130971892962183, |
| "grad_norm": 2.9210368580920294, |
| "learning_rate": 1.974507947017736e-05, |
| "loss": 1.2624, |
| "step": 19670 |
| }, |
| { |
| "epoch": 0.08135105584824391, |
| "grad_norm": 3.5155014430588514, |
| "learning_rate": 1.974478508848879e-05, |
| "loss": 1.3032, |
| "step": 19680 |
| }, |
| { |
| "epoch": 0.08139239276686598, |
| "grad_norm": 3.1972640975511966, |
| "learning_rate": 1.9744490539119652e-05, |
| "loss": 1.2663, |
| "step": 19690 |
| }, |
| { |
| "epoch": 0.08143372968548805, |
| "grad_norm": 3.137196038716376, |
| "learning_rate": 1.9744195822075016e-05, |
| "loss": 1.2599, |
| "step": 19700 |
| }, |
| { |
| "epoch": 0.08147506660411014, |
| "grad_norm": 3.429683582295254, |
| "learning_rate": 1.974390093735995e-05, |
| "loss": 1.3125, |
| "step": 19710 |
| }, |
| { |
| "epoch": 0.0815164035227322, |
| "grad_norm": 3.4935153839614213, |
| "learning_rate": 1.974360588497953e-05, |
| "loss": 1.3021, |
| "step": 19720 |
| }, |
| { |
| "epoch": 0.08155774044135428, |
| "grad_norm": 3.693677622986634, |
| "learning_rate": 1.9743310664938836e-05, |
| "loss": 1.3154, |
| "step": 19730 |
| }, |
| { |
| "epoch": 0.08159907735997636, |
| "grad_norm": 3.1960119762782995, |
| "learning_rate": 1.9743015277242942e-05, |
| "loss": 1.2931, |
| "step": 19740 |
| }, |
| { |
| "epoch": 0.08164041427859843, |
| "grad_norm": 3.362501999299266, |
| "learning_rate": 1.9742719721896936e-05, |
| "loss": 1.2977, |
| "step": 19750 |
| }, |
| { |
| "epoch": 0.0816817511972205, |
| "grad_norm": 3.152254479818853, |
| "learning_rate": 1.97424239989059e-05, |
| "loss": 1.2571, |
| "step": 19760 |
| }, |
| { |
| "epoch": 0.08172308811584258, |
| "grad_norm": 3.78958825416846, |
| "learning_rate": 1.9742128108274926e-05, |
| "loss": 1.2903, |
| "step": 19770 |
| }, |
| { |
| "epoch": 0.08176442503446466, |
| "grad_norm": 3.2123145951877077, |
| "learning_rate": 1.9741832050009102e-05, |
| "loss": 1.282, |
| "step": 19780 |
| }, |
| { |
| "epoch": 0.08180576195308673, |
| "grad_norm": 3.1167331240538076, |
| "learning_rate": 1.9741535824113526e-05, |
| "loss": 1.2552, |
| "step": 19790 |
| }, |
| { |
| "epoch": 0.08184709887170881, |
| "grad_norm": 4.206288270940374, |
| "learning_rate": 1.974123943059329e-05, |
| "loss": 1.2597, |
| "step": 19800 |
| }, |
| { |
| "epoch": 0.08188843579033088, |
| "grad_norm": 3.553992295271698, |
| "learning_rate": 1.9740942869453504e-05, |
| "loss": 1.2908, |
| "step": 19810 |
| }, |
| { |
| "epoch": 0.08192977270895295, |
| "grad_norm": 3.521907001370512, |
| "learning_rate": 1.974064614069926e-05, |
| "loss": 1.297, |
| "step": 19820 |
| }, |
| { |
| "epoch": 0.08197110962757503, |
| "grad_norm": 3.360470421890721, |
| "learning_rate": 1.9740349244335665e-05, |
| "loss": 1.2882, |
| "step": 19830 |
| }, |
| { |
| "epoch": 0.0820124465461971, |
| "grad_norm": 2.9432002594333464, |
| "learning_rate": 1.9740052180367836e-05, |
| "loss": 1.252, |
| "step": 19840 |
| }, |
| { |
| "epoch": 0.08205378346481919, |
| "grad_norm": 3.252199526734451, |
| "learning_rate": 1.9739754948800874e-05, |
| "loss": 1.2805, |
| "step": 19850 |
| }, |
| { |
| "epoch": 0.08209512038344126, |
| "grad_norm": 3.1871490405890586, |
| "learning_rate": 1.9739457549639905e-05, |
| "loss": 1.2697, |
| "step": 19860 |
| }, |
| { |
| "epoch": 0.08213645730206333, |
| "grad_norm": 3.1800803475250166, |
| "learning_rate": 1.973915998289004e-05, |
| "loss": 1.3342, |
| "step": 19870 |
| }, |
| { |
| "epoch": 0.08217779422068541, |
| "grad_norm": 2.8783537685939478, |
| "learning_rate": 1.9738862248556395e-05, |
| "loss": 1.2471, |
| "step": 19880 |
| }, |
| { |
| "epoch": 0.08221913113930748, |
| "grad_norm": 2.7100348444687246, |
| "learning_rate": 1.9738564346644103e-05, |
| "loss": 1.2827, |
| "step": 19890 |
| }, |
| { |
| "epoch": 0.08226046805792955, |
| "grad_norm": 3.509524916127054, |
| "learning_rate": 1.973826627715828e-05, |
| "loss": 1.3025, |
| "step": 19900 |
| }, |
| { |
| "epoch": 0.08230180497655164, |
| "grad_norm": 3.3725272601095795, |
| "learning_rate": 1.9737968040104065e-05, |
| "loss": 1.3234, |
| "step": 19910 |
| }, |
| { |
| "epoch": 0.08234314189517371, |
| "grad_norm": 3.341475977428283, |
| "learning_rate": 1.9737669635486585e-05, |
| "loss": 1.3203, |
| "step": 19920 |
| }, |
| { |
| "epoch": 0.08238447881379578, |
| "grad_norm": 3.185315976662971, |
| "learning_rate": 1.9737371063310972e-05, |
| "loss": 1.2828, |
| "step": 19930 |
| }, |
| { |
| "epoch": 0.08242581573241786, |
| "grad_norm": 3.6059578502827945, |
| "learning_rate": 1.9737072323582366e-05, |
| "loss": 1.3272, |
| "step": 19940 |
| }, |
| { |
| "epoch": 0.08246715265103993, |
| "grad_norm": 3.486164979637442, |
| "learning_rate": 1.973677341630591e-05, |
| "loss": 1.2619, |
| "step": 19950 |
| }, |
| { |
| "epoch": 0.082508489569662, |
| "grad_norm": 3.3788628785271935, |
| "learning_rate": 1.9736474341486742e-05, |
| "loss": 1.2866, |
| "step": 19960 |
| }, |
| { |
| "epoch": 0.08254982648828409, |
| "grad_norm": 3.116863275035675, |
| "learning_rate": 1.973617509913001e-05, |
| "loss": 1.291, |
| "step": 19970 |
| }, |
| { |
| "epoch": 0.08259116340690616, |
| "grad_norm": 2.7964040776322796, |
| "learning_rate": 1.973587568924087e-05, |
| "loss": 1.2725, |
| "step": 19980 |
| }, |
| { |
| "epoch": 0.08263250032552824, |
| "grad_norm": 2.797740269032973, |
| "learning_rate": 1.9735576111824465e-05, |
| "loss": 1.2742, |
| "step": 19990 |
| }, |
| { |
| "epoch": 0.08267383724415031, |
| "grad_norm": 3.2432141472722162, |
| "learning_rate": 1.9735276366885956e-05, |
| "loss": 1.2947, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.08267383724415031, |
| "eval_loss": 1.5713279247283936, |
| "eval_runtime": 392.3898, |
| "eval_samples_per_second": 10.439, |
| "eval_steps_per_second": 2.61, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.08267797093601252, |
| "step": 20001, |
| "total_flos": 0.0, |
| "train_loss": 6.551032936291113e-05, |
| "train_runtime": 86.8457, |
| "train_samples_per_second": 14738.777, |
| "train_steps_per_second": 230.293 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 20000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 10000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|