| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.8504847763225039, |
| "eval_steps": 500, |
| "global_step": 20000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00042524238816125194, |
| "grad_norm": 3.2010223865509033, |
| "learning_rate": 9e-08, |
| "loss": 1.3874, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0008504847763225039, |
| "grad_norm": 1.5439364910125732, |
| "learning_rate": 1.9e-07, |
| "loss": 1.3901, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0012757271644837558, |
| "grad_norm": 1.5690912008285522, |
| "learning_rate": 2.9000000000000003e-07, |
| "loss": 1.3805, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.0017009695526450078, |
| "grad_norm": 3.358100175857544, |
| "learning_rate": 3.8999999999999997e-07, |
| "loss": 1.3746, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.0021262119408062595, |
| "grad_norm": 1.5554291009902954, |
| "learning_rate": 4.9e-07, |
| "loss": 1.3712, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.0025514543289675115, |
| "grad_norm": 1.6682578325271606, |
| "learning_rate": 5.9e-07, |
| "loss": 1.3693, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.0029766967171287635, |
| "grad_norm": 2.5594966411590576, |
| "learning_rate": 6.9e-07, |
| "loss": 1.3535, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.0034019391052900155, |
| "grad_norm": 1.2641184329986572, |
| "learning_rate": 7.900000000000001e-07, |
| "loss": 1.3506, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.003827181493451267, |
| "grad_norm": 1.0074024200439453, |
| "learning_rate": 8.900000000000001e-07, |
| "loss": 1.3411, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.004252423881612519, |
| "grad_norm": 2.079498291015625, |
| "learning_rate": 9.9e-07, |
| "loss": 1.3332, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.004677666269773771, |
| "grad_norm": 1.5527578592300415, |
| "learning_rate": 1.0900000000000002e-06, |
| "loss": 1.3341, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.005102908657935023, |
| "grad_norm": 0.8216768503189087, |
| "learning_rate": 1.19e-06, |
| "loss": 1.3179, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.005528151046096275, |
| "grad_norm": 0.7347335815429688, |
| "learning_rate": 1.29e-06, |
| "loss": 1.3076, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.005953393434257527, |
| "grad_norm": 0.6095930933952332, |
| "learning_rate": 1.39e-06, |
| "loss": 1.3021, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.006378635822418779, |
| "grad_norm": 0.7857323288917542, |
| "learning_rate": 1.49e-06, |
| "loss": 1.3051, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.006803878210580031, |
| "grad_norm": 0.728410542011261, |
| "learning_rate": 1.59e-06, |
| "loss": 1.3004, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.007229120598741283, |
| "grad_norm": 1.132466197013855, |
| "learning_rate": 1.69e-06, |
| "loss": 1.2892, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.007654362986902534, |
| "grad_norm": 1.3443944454193115, |
| "learning_rate": 1.79e-06, |
| "loss": 1.2835, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.008079605375063786, |
| "grad_norm": 0.6804354786872864, |
| "learning_rate": 1.8900000000000001e-06, |
| "loss": 1.2874, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.008504847763225038, |
| "grad_norm": 0.4348722994327545, |
| "learning_rate": 1.99e-06, |
| "loss": 1.2857, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.00893009015138629, |
| "grad_norm": 0.6353126168251038, |
| "learning_rate": 2.09e-06, |
| "loss": 1.2806, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.009355332539547541, |
| "grad_norm": 0.5617169737815857, |
| "learning_rate": 2.1899999999999998e-06, |
| "loss": 1.279, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.009780574927708794, |
| "grad_norm": 0.665167510509491, |
| "learning_rate": 2.29e-06, |
| "loss": 1.2731, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.010205817315870046, |
| "grad_norm": 0.4070955514907837, |
| "learning_rate": 2.39e-06, |
| "loss": 1.271, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.010631059704031299, |
| "grad_norm": 0.6760728359222412, |
| "learning_rate": 2.4900000000000003e-06, |
| "loss": 1.265, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.01105630209219255, |
| "grad_norm": 0.4963316023349762, |
| "learning_rate": 2.59e-06, |
| "loss": 1.2628, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.011481544480353802, |
| "grad_norm": 0.42656829953193665, |
| "learning_rate": 2.69e-06, |
| "loss": 1.2602, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.011906786868515054, |
| "grad_norm": 0.3536563813686371, |
| "learning_rate": 2.79e-06, |
| "loss": 1.2481, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.012332029256676305, |
| "grad_norm": 0.4079868793487549, |
| "learning_rate": 2.8900000000000003e-06, |
| "loss": 1.2361, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.012757271644837557, |
| "grad_norm": 0.33841463923454285, |
| "learning_rate": 2.99e-06, |
| "loss": 1.2329, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.01318251403299881, |
| "grad_norm": 0.3516484797000885, |
| "learning_rate": 3.09e-06, |
| "loss": 1.2188, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.013607756421160062, |
| "grad_norm": 0.296055406332016, |
| "learning_rate": 3.19e-06, |
| "loss": 1.2077, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.014032998809321313, |
| "grad_norm": 0.2963598966598511, |
| "learning_rate": 3.29e-06, |
| "loss": 1.1939, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.014458241197482565, |
| "grad_norm": 0.3225858211517334, |
| "learning_rate": 3.39e-06, |
| "loss": 1.181, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.014883483585643818, |
| "grad_norm": 0.2994067072868347, |
| "learning_rate": 3.49e-06, |
| "loss": 1.1655, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.015308725973805068, |
| "grad_norm": 0.3013548254966736, |
| "learning_rate": 3.5900000000000004e-06, |
| "loss": 1.1578, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.015733968361966322, |
| "grad_norm": 0.4306448698043823, |
| "learning_rate": 3.6900000000000002e-06, |
| "loss": 1.1496, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.01615921075012757, |
| "grad_norm": 0.33329278230667114, |
| "learning_rate": 3.7899999999999997e-06, |
| "loss": 1.1427, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.016584453138288824, |
| "grad_norm": 0.30759066343307495, |
| "learning_rate": 3.890000000000001e-06, |
| "loss": 1.1413, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.017009695526450076, |
| "grad_norm": 0.28163671493530273, |
| "learning_rate": 3.99e-06, |
| "loss": 1.1257, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.01743493791461133, |
| "grad_norm": 0.3048485517501831, |
| "learning_rate": 4.09e-06, |
| "loss": 1.1293, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.01786018030277258, |
| "grad_norm": 0.3524611294269562, |
| "learning_rate": 4.19e-06, |
| "loss": 1.1192, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.018285422690933834, |
| "grad_norm": 0.33914950489997864, |
| "learning_rate": 4.29e-06, |
| "loss": 1.1145, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.018710665079095083, |
| "grad_norm": 0.33718347549438477, |
| "learning_rate": 4.39e-06, |
| "loss": 1.1122, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.019135907467256335, |
| "grad_norm": 0.3634999096393585, |
| "learning_rate": 4.49e-06, |
| "loss": 1.1153, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.019561149855417587, |
| "grad_norm": 0.43056294322013855, |
| "learning_rate": 4.59e-06, |
| "loss": 1.1115, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.01998639224357884, |
| "grad_norm": 0.3170914351940155, |
| "learning_rate": 4.69e-06, |
| "loss": 1.1015, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.020411634631740092, |
| "grad_norm": 0.4195287823677063, |
| "learning_rate": 4.790000000000001e-06, |
| "loss": 1.0997, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.020836877019901345, |
| "grad_norm": 0.3294726610183716, |
| "learning_rate": 4.890000000000001e-06, |
| "loss": 1.0998, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.021262119408062597, |
| "grad_norm": 0.312850683927536, |
| "learning_rate": 4.9900000000000005e-06, |
| "loss": 1.094, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.021687361796223846, |
| "grad_norm": 0.3543089032173157, |
| "learning_rate": 5.0899999999999995e-06, |
| "loss": 1.0897, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.0221126041843851, |
| "grad_norm": 0.39667844772338867, |
| "learning_rate": 5.1899999999999994e-06, |
| "loss": 1.0863, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.02253784657254635, |
| "grad_norm": 0.44174888730049133, |
| "learning_rate": 5.29e-06, |
| "loss": 1.0799, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.022963088960707603, |
| "grad_norm": 0.37127602100372314, |
| "learning_rate": 5.39e-06, |
| "loss": 1.0814, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.023388331348868856, |
| "grad_norm": 0.35916680097579956, |
| "learning_rate": 5.49e-06, |
| "loss": 1.079, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.023813573737030108, |
| "grad_norm": 0.41336843371391296, |
| "learning_rate": 5.59e-06, |
| "loss": 1.0728, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.02423881612519136, |
| "grad_norm": 0.5130095481872559, |
| "learning_rate": 5.690000000000001e-06, |
| "loss": 1.0743, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.02466405851335261, |
| "grad_norm": 0.2982211410999298, |
| "learning_rate": 5.7900000000000005e-06, |
| "loss": 1.0703, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.025089300901513862, |
| "grad_norm": 0.3798081874847412, |
| "learning_rate": 5.89e-06, |
| "loss": 1.0737, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.025514543289675114, |
| "grad_norm": 0.4531615972518921, |
| "learning_rate": 5.99e-06, |
| "loss": 1.0643, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.025939785677836367, |
| "grad_norm": 0.37526369094848633, |
| "learning_rate": 6.090000000000001e-06, |
| "loss": 1.0645, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.02636502806599762, |
| "grad_norm": 0.4011104106903076, |
| "learning_rate": 6.19e-06, |
| "loss": 1.0591, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.02679027045415887, |
| "grad_norm": 0.41057097911834717, |
| "learning_rate": 6.29e-06, |
| "loss": 1.0564, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.027215512842320124, |
| "grad_norm": 0.5642093420028687, |
| "learning_rate": 6.39e-06, |
| "loss": 1.0522, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.027640755230481373, |
| "grad_norm": 0.6502516269683838, |
| "learning_rate": 6.49e-06, |
| "loss": 1.0454, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.028065997618642625, |
| "grad_norm": 0.4717367887496948, |
| "learning_rate": 6.5900000000000004e-06, |
| "loss": 1.0386, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.028491240006803878, |
| "grad_norm": 0.6143516898155212, |
| "learning_rate": 6.69e-06, |
| "loss": 1.0228, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.02891648239496513, |
| "grad_norm": 0.46155494451522827, |
| "learning_rate": 6.79e-06, |
| "loss": 1.0089, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.029341724783126383, |
| "grad_norm": 0.7181910276412964, |
| "learning_rate": 6.89e-06, |
| "loss": 0.9919, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.029766967171287635, |
| "grad_norm": 0.49455365538597107, |
| "learning_rate": 6.990000000000001e-06, |
| "loss": 0.9628, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.030192209559448884, |
| "grad_norm": 0.5362220406532288, |
| "learning_rate": 7.090000000000001e-06, |
| "loss": 0.9337, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.030617451947610137, |
| "grad_norm": 0.5086848139762878, |
| "learning_rate": 7.19e-06, |
| "loss": 0.9057, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.03104269433577139, |
| "grad_norm": 0.5631227493286133, |
| "learning_rate": 7.29e-06, |
| "loss": 0.8713, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.031467936723932645, |
| "grad_norm": 0.6062225699424744, |
| "learning_rate": 7.3899999999999995e-06, |
| "loss": 0.838, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.03189317911209389, |
| "grad_norm": 0.7424901127815247, |
| "learning_rate": 7.49e-06, |
| "loss": 0.7954, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.03231842150025514, |
| "grad_norm": 0.8033110499382019, |
| "learning_rate": 7.59e-06, |
| "loss": 0.754, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.032743663888416395, |
| "grad_norm": 0.9407315850257874, |
| "learning_rate": 7.690000000000001e-06, |
| "loss": 0.7209, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.03316890627657765, |
| "grad_norm": 0.8432111144065857, |
| "learning_rate": 7.79e-06, |
| "loss": 0.6719, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.0335941486647389, |
| "grad_norm": 0.8872693181037903, |
| "learning_rate": 7.89e-06, |
| "loss": 0.6367, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.03401939105290015, |
| "grad_norm": 0.8927829265594482, |
| "learning_rate": 7.99e-06, |
| "loss": 0.6017, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.034444633441061405, |
| "grad_norm": 1.2115992307662964, |
| "learning_rate": 8.09e-06, |
| "loss": 0.5578, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.03486987582922266, |
| "grad_norm": 1.2473925352096558, |
| "learning_rate": 8.190000000000001e-06, |
| "loss": 0.5364, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.03529511821738391, |
| "grad_norm": 1.0221428871154785, |
| "learning_rate": 8.29e-06, |
| "loss": 0.5031, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.03572036060554516, |
| "grad_norm": 1.24955415725708, |
| "learning_rate": 8.390000000000001e-06, |
| "loss": 0.4787, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.036145602993706415, |
| "grad_norm": 1.1550233364105225, |
| "learning_rate": 8.49e-06, |
| "loss": 0.4508, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.03657084538186767, |
| "grad_norm": 1.9099974632263184, |
| "learning_rate": 8.59e-06, |
| "loss": 0.4327, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.03699608777002892, |
| "grad_norm": 1.241186499595642, |
| "learning_rate": 8.690000000000002e-06, |
| "loss": 0.411, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.037421330158190165, |
| "grad_norm": 1.373657464981079, |
| "learning_rate": 8.79e-06, |
| "loss": 0.3953, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.03784657254635142, |
| "grad_norm": 1.6695680618286133, |
| "learning_rate": 8.89e-06, |
| "loss": 0.374, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.03827181493451267, |
| "grad_norm": 1.3347766399383545, |
| "learning_rate": 8.99e-06, |
| "loss": 0.3563, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.03869705732267392, |
| "grad_norm": 1.2124155759811401, |
| "learning_rate": 9.09e-06, |
| "loss": 0.3237, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.039122299710835175, |
| "grad_norm": 1.1073696613311768, |
| "learning_rate": 9.19e-06, |
| "loss": 0.2997, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.03954754209899643, |
| "grad_norm": 1.322092056274414, |
| "learning_rate": 9.289999999999999e-06, |
| "loss": 0.2878, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.03997278448715768, |
| "grad_norm": 1.1476775407791138, |
| "learning_rate": 9.39e-06, |
| "loss": 0.263, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.04039802687531893, |
| "grad_norm": 1.3284542560577393, |
| "learning_rate": 9.49e-06, |
| "loss": 0.2456, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.040823269263480184, |
| "grad_norm": 1.3168091773986816, |
| "learning_rate": 9.59e-06, |
| "loss": 0.2359, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.04124851165164144, |
| "grad_norm": 1.368938684463501, |
| "learning_rate": 9.69e-06, |
| "loss": 0.2278, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.04167375403980269, |
| "grad_norm": 1.097208023071289, |
| "learning_rate": 9.79e-06, |
| "loss": 0.2119, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.04209899642796394, |
| "grad_norm": 1.1645355224609375, |
| "learning_rate": 9.89e-06, |
| "loss": 0.2169, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.042524238816125194, |
| "grad_norm": 1.1648592948913574, |
| "learning_rate": 9.990000000000001e-06, |
| "loss": 0.1849, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.04294948120428645, |
| "grad_norm": 1.443503975868225, |
| "learning_rate": 1.009e-05, |
| "loss": 0.1788, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.04337472359244769, |
| "grad_norm": 1.0813487768173218, |
| "learning_rate": 1.019e-05, |
| "loss": 0.1847, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.043799965980608944, |
| "grad_norm": 0.81873619556427, |
| "learning_rate": 1.0290000000000001e-05, |
| "loss": 0.1687, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.0442252083687702, |
| "grad_norm": 1.162904143333435, |
| "learning_rate": 1.039e-05, |
| "loss": 0.1576, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.04465045075693145, |
| "grad_norm": 1.271335244178772, |
| "learning_rate": 1.0490000000000001e-05, |
| "loss": 0.1553, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.0450756931450927, |
| "grad_norm": 1.254536509513855, |
| "learning_rate": 1.059e-05, |
| "loss": 0.15, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.045500935533253954, |
| "grad_norm": 1.403584599494934, |
| "learning_rate": 1.0690000000000001e-05, |
| "loss": 0.1537, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.04592617792141521, |
| "grad_norm": 1.571937918663025, |
| "learning_rate": 1.0790000000000002e-05, |
| "loss": 0.152, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.04635142030957646, |
| "grad_norm": 1.3833575248718262, |
| "learning_rate": 1.089e-05, |
| "loss": 0.144, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.04677666269773771, |
| "grad_norm": 1.6977688074111938, |
| "learning_rate": 1.099e-05, |
| "loss": 0.1393, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.047201905085898964, |
| "grad_norm": 1.1160330772399902, |
| "learning_rate": 1.1089999999999999e-05, |
| "loss": 0.1333, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.047627147474060216, |
| "grad_norm": 1.1706668138504028, |
| "learning_rate": 1.119e-05, |
| "loss": 0.136, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.04805238986222147, |
| "grad_norm": 1.2051717042922974, |
| "learning_rate": 1.129e-05, |
| "loss": 0.1402, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.04847763225038272, |
| "grad_norm": 1.4838896989822388, |
| "learning_rate": 1.139e-05, |
| "loss": 0.134, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.04890287463854397, |
| "grad_norm": 1.3389332294464111, |
| "learning_rate": 1.149e-05, |
| "loss": 0.1264, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.04932811702670522, |
| "grad_norm": 1.3594571352005005, |
| "learning_rate": 1.1589999999999999e-05, |
| "loss": 0.1326, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.04975335941486647, |
| "grad_norm": 1.2801322937011719, |
| "learning_rate": 1.169e-05, |
| "loss": 0.1312, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.050178601803027724, |
| "grad_norm": 1.1950969696044922, |
| "learning_rate": 1.179e-05, |
| "loss": 0.1254, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.050603844191188976, |
| "grad_norm": 1.2749476432800293, |
| "learning_rate": 1.189e-05, |
| "loss": 0.1225, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.05102908657935023, |
| "grad_norm": 1.4519540071487427, |
| "learning_rate": 1.199e-05, |
| "loss": 0.1312, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.05145432896751148, |
| "grad_norm": 1.6330214738845825, |
| "learning_rate": 1.2090000000000001e-05, |
| "loss": 0.1213, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.051879571355672734, |
| "grad_norm": 1.9503471851348877, |
| "learning_rate": 1.219e-05, |
| "loss": 0.1228, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.052304813743833986, |
| "grad_norm": 1.6446688175201416, |
| "learning_rate": 1.2290000000000001e-05, |
| "loss": 0.1251, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.05273005613199524, |
| "grad_norm": 1.630383014678955, |
| "learning_rate": 1.239e-05, |
| "loss": 0.1186, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.05315529852015649, |
| "grad_norm": 1.3865199089050293, |
| "learning_rate": 1.249e-05, |
| "loss": 0.1105, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.05358054090831774, |
| "grad_norm": 1.2496237754821777, |
| "learning_rate": 1.2590000000000001e-05, |
| "loss": 0.114, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.054005783296478996, |
| "grad_norm": 1.639675259590149, |
| "learning_rate": 1.269e-05, |
| "loss": 0.1152, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.05443102568464025, |
| "grad_norm": 1.650262475013733, |
| "learning_rate": 1.2790000000000001e-05, |
| "loss": 0.1226, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.054856268072801494, |
| "grad_norm": 1.3705108165740967, |
| "learning_rate": 1.289e-05, |
| "loss": 0.1128, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.055281510460962746, |
| "grad_norm": 1.304731845855713, |
| "learning_rate": 1.2990000000000001e-05, |
| "loss": 0.1096, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.055706752849124, |
| "grad_norm": 1.6657530069351196, |
| "learning_rate": 1.309e-05, |
| "loss": 0.1122, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.05613199523728525, |
| "grad_norm": 2.000190019607544, |
| "learning_rate": 1.3189999999999999e-05, |
| "loss": 0.1082, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.0565572376254465, |
| "grad_norm": 1.6935395002365112, |
| "learning_rate": 1.329e-05, |
| "loss": 0.119, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.056982480013607756, |
| "grad_norm": 1.2586040496826172, |
| "learning_rate": 1.339e-05, |
| "loss": 0.1117, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.05740772240176901, |
| "grad_norm": 1.3810162544250488, |
| "learning_rate": 1.349e-05, |
| "loss": 0.1085, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.05783296478993026, |
| "grad_norm": 1.6154093742370605, |
| "learning_rate": 1.359e-05, |
| "loss": 0.1098, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.05825820717809151, |
| "grad_norm": 1.2975713014602661, |
| "learning_rate": 1.369e-05, |
| "loss": 0.1055, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.058683449566252766, |
| "grad_norm": 1.3397157192230225, |
| "learning_rate": 1.379e-05, |
| "loss": 0.0988, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.05910869195441402, |
| "grad_norm": 1.884732961654663, |
| "learning_rate": 1.389e-05, |
| "loss": 0.106, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.05953393434257527, |
| "grad_norm": 1.21388840675354, |
| "learning_rate": 1.399e-05, |
| "loss": 0.0998, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.05995917673073652, |
| "grad_norm": 1.3441375494003296, |
| "learning_rate": 1.409e-05, |
| "loss": 0.0981, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.06038441911889777, |
| "grad_norm": 1.3364049196243286, |
| "learning_rate": 1.419e-05, |
| "loss": 0.0983, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.06080966150705902, |
| "grad_norm": 1.7252370119094849, |
| "learning_rate": 1.429e-05, |
| "loss": 0.1027, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.06123490389522027, |
| "grad_norm": 1.3132603168487549, |
| "learning_rate": 1.4390000000000001e-05, |
| "loss": 0.0971, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.061660146283381526, |
| "grad_norm": 1.9096121788024902, |
| "learning_rate": 1.449e-05, |
| "loss": 0.0992, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.06208538867154278, |
| "grad_norm": 1.864293098449707, |
| "learning_rate": 1.4590000000000001e-05, |
| "loss": 0.1079, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.06251063105970403, |
| "grad_norm": 1.2758187055587769, |
| "learning_rate": 1.469e-05, |
| "loss": 0.0952, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.06293587344786529, |
| "grad_norm": 1.5732557773590088, |
| "learning_rate": 1.479e-05, |
| "loss": 0.0985, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.06336111583602654, |
| "grad_norm": 1.8538833856582642, |
| "learning_rate": 1.4890000000000001e-05, |
| "loss": 0.0957, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.06378635822418778, |
| "grad_norm": 1.4448537826538086, |
| "learning_rate": 1.499e-05, |
| "loss": 0.0958, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.06421160061234904, |
| "grad_norm": 1.7205644845962524, |
| "learning_rate": 1.5090000000000001e-05, |
| "loss": 0.0997, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.06463684300051029, |
| "grad_norm": 1.3191324472427368, |
| "learning_rate": 1.519e-05, |
| "loss": 0.0895, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.06506208538867155, |
| "grad_norm": 1.219373345375061, |
| "learning_rate": 1.529e-05, |
| "loss": 0.0955, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.06548732777683279, |
| "grad_norm": 1.2836029529571533, |
| "learning_rate": 1.539e-05, |
| "loss": 0.0948, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.06591257016499405, |
| "grad_norm": 1.2800109386444092, |
| "learning_rate": 1.549e-05, |
| "loss": 0.0956, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.0663378125531553, |
| "grad_norm": 1.4099206924438477, |
| "learning_rate": 1.559e-05, |
| "loss": 0.0919, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.06676305494131655, |
| "grad_norm": 1.320432186126709, |
| "learning_rate": 1.569e-05, |
| "loss": 0.0947, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.0671882973294778, |
| "grad_norm": 1.7595242261886597, |
| "learning_rate": 1.579e-05, |
| "loss": 0.0877, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.06761353971763906, |
| "grad_norm": 1.6487762928009033, |
| "learning_rate": 1.589e-05, |
| "loss": 0.093, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.0680387821058003, |
| "grad_norm": 1.58949613571167, |
| "learning_rate": 1.599e-05, |
| "loss": 0.0949, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.06846402449396156, |
| "grad_norm": 1.098441243171692, |
| "learning_rate": 1.609e-05, |
| "loss": 0.0898, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.06888926688212281, |
| "grad_norm": 1.2733993530273438, |
| "learning_rate": 1.619e-05, |
| "loss": 0.0841, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.06931450927028406, |
| "grad_norm": 1.5060628652572632, |
| "learning_rate": 1.629e-05, |
| "loss": 0.0919, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.06973975165844531, |
| "grad_norm": 1.5991514921188354, |
| "learning_rate": 1.639e-05, |
| "loss": 0.0863, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.07016499404660656, |
| "grad_norm": 1.4756519794464111, |
| "learning_rate": 1.649e-05, |
| "loss": 0.0848, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.07059023643476782, |
| "grad_norm": 1.5085145235061646, |
| "learning_rate": 1.6590000000000002e-05, |
| "loss": 0.0834, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.07101547882292907, |
| "grad_norm": 1.3816215991973877, |
| "learning_rate": 1.669e-05, |
| "loss": 0.0861, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.07144072121109032, |
| "grad_norm": 1.3119925260543823, |
| "learning_rate": 1.679e-05, |
| "loss": 0.088, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.07186596359925157, |
| "grad_norm": 1.1059609651565552, |
| "learning_rate": 1.689e-05, |
| "loss": 0.0941, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.07229120598741283, |
| "grad_norm": 0.9836457371711731, |
| "learning_rate": 1.699e-05, |
| "loss": 0.0889, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.07271644837557407, |
| "grad_norm": 1.3157384395599365, |
| "learning_rate": 1.709e-05, |
| "loss": 0.0867, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.07314169076373533, |
| "grad_norm": 1.0749443769454956, |
| "learning_rate": 1.719e-05, |
| "loss": 0.0809, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.07356693315189658, |
| "grad_norm": 1.4055633544921875, |
| "learning_rate": 1.7290000000000002e-05, |
| "loss": 0.0825, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.07399217554005784, |
| "grad_norm": 1.156111240386963, |
| "learning_rate": 1.739e-05, |
| "loss": 0.0833, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.07441741792821908, |
| "grad_norm": 1.625030517578125, |
| "learning_rate": 1.749e-05, |
| "loss": 0.0866, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.07484266031638033, |
| "grad_norm": 1.5402555465698242, |
| "learning_rate": 1.7590000000000003e-05, |
| "loss": 0.0838, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.07526790270454159, |
| "grad_norm": 1.3374762535095215, |
| "learning_rate": 1.7690000000000002e-05, |
| "loss": 0.0889, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.07569314509270283, |
| "grad_norm": 1.5017690658569336, |
| "learning_rate": 1.779e-05, |
| "loss": 0.0863, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.0761183874808641, |
| "grad_norm": 1.2897818088531494, |
| "learning_rate": 1.7890000000000003e-05, |
| "loss": 0.0848, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.07654362986902534, |
| "grad_norm": 1.434987187385559, |
| "learning_rate": 1.7990000000000002e-05, |
| "loss": 0.0816, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.0769688722571866, |
| "grad_norm": 1.3573745489120483, |
| "learning_rate": 1.809e-05, |
| "loss": 0.0796, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.07739411464534784, |
| "grad_norm": 1.3428010940551758, |
| "learning_rate": 1.819e-05, |
| "loss": 0.0822, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.0778193570335091, |
| "grad_norm": 1.129830241203308, |
| "learning_rate": 1.8290000000000003e-05, |
| "loss": 0.0802, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.07824459942167035, |
| "grad_norm": 1.1881945133209229, |
| "learning_rate": 1.8390000000000002e-05, |
| "loss": 0.0851, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.07866984180983161, |
| "grad_norm": 1.3428897857666016, |
| "learning_rate": 1.8489999999999997e-05, |
| "loss": 0.0839, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.07909508419799285, |
| "grad_norm": 0.9840798377990723, |
| "learning_rate": 1.859e-05, |
| "loss": 0.0837, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.07952032658615411, |
| "grad_norm": 1.179263710975647, |
| "learning_rate": 1.869e-05, |
| "loss": 0.0824, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.07994556897431536, |
| "grad_norm": 1.192193865776062, |
| "learning_rate": 1.8789999999999998e-05, |
| "loss": 0.0794, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.08037081136247662, |
| "grad_norm": 1.5266687870025635, |
| "learning_rate": 1.889e-05, |
| "loss": 0.0857, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.08079605375063786, |
| "grad_norm": 1.2958351373672485, |
| "learning_rate": 1.899e-05, |
| "loss": 0.0787, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.08122129613879911, |
| "grad_norm": 1.245995044708252, |
| "learning_rate": 1.909e-05, |
| "loss": 0.0786, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.08164653852696037, |
| "grad_norm": 1.16083562374115, |
| "learning_rate": 1.919e-05, |
| "loss": 0.0789, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.08207178091512161, |
| "grad_norm": 1.0717693567276, |
| "learning_rate": 1.929e-05, |
| "loss": 0.0786, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.08249702330328287, |
| "grad_norm": 1.2903943061828613, |
| "learning_rate": 1.939e-05, |
| "loss": 0.0796, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.08292226569144412, |
| "grad_norm": 1.5063890218734741, |
| "learning_rate": 1.9489999999999998e-05, |
| "loss": 0.0792, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.08334750807960538, |
| "grad_norm": 1.065131664276123, |
| "learning_rate": 1.959e-05, |
| "loss": 0.0757, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.08377275046776662, |
| "grad_norm": 1.508479118347168, |
| "learning_rate": 1.969e-05, |
| "loss": 0.0801, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.08419799285592788, |
| "grad_norm": 1.4832401275634766, |
| "learning_rate": 1.979e-05, |
| "loss": 0.0781, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.08462323524408913, |
| "grad_norm": 1.1999212503433228, |
| "learning_rate": 1.989e-05, |
| "loss": 0.0772, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.08504847763225039, |
| "grad_norm": 1.1872233152389526, |
| "learning_rate": 1.999e-05, |
| "loss": 0.0753, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.08547372002041163, |
| "grad_norm": 1.603405237197876, |
| "learning_rate": 2.009e-05, |
| "loss": 0.0737, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.0858989624085729, |
| "grad_norm": 1.4420855045318604, |
| "learning_rate": 2.019e-05, |
| "loss": 0.0795, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.08632420479673414, |
| "grad_norm": 1.2586225271224976, |
| "learning_rate": 2.029e-05, |
| "loss": 0.0762, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.08674944718489538, |
| "grad_norm": 1.6337774991989136, |
| "learning_rate": 2.039e-05, |
| "loss": 0.0751, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.08717468957305664, |
| "grad_norm": 1.450543999671936, |
| "learning_rate": 2.0490000000000002e-05, |
| "loss": 0.0776, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.08759993196121789, |
| "grad_norm": 1.1475777626037598, |
| "learning_rate": 2.059e-05, |
| "loss": 0.0757, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.08802517434937915, |
| "grad_norm": 1.5073463916778564, |
| "learning_rate": 2.069e-05, |
| "loss": 0.0756, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.0884504167375404, |
| "grad_norm": 1.4093585014343262, |
| "learning_rate": 2.079e-05, |
| "loss": 0.0697, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.08887565912570165, |
| "grad_norm": 1.230241298675537, |
| "learning_rate": 2.089e-05, |
| "loss": 0.0782, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.0893009015138629, |
| "grad_norm": 1.3263798952102661, |
| "learning_rate": 2.099e-05, |
| "loss": 0.0736, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.08972614390202416, |
| "grad_norm": 1.136014461517334, |
| "learning_rate": 2.109e-05, |
| "loss": 0.0676, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.0901513862901854, |
| "grad_norm": 1.4707313776016235, |
| "learning_rate": 2.1190000000000002e-05, |
| "loss": 0.0701, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.09057662867834666, |
| "grad_norm": 1.4252678155899048, |
| "learning_rate": 2.129e-05, |
| "loss": 0.0767, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.09100187106650791, |
| "grad_norm": 1.2199596166610718, |
| "learning_rate": 2.139e-05, |
| "loss": 0.0693, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.09142711345466917, |
| "grad_norm": 1.658355474472046, |
| "learning_rate": 2.1490000000000003e-05, |
| "loss": 0.0709, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.09185235584283041, |
| "grad_norm": 1.2282954454421997, |
| "learning_rate": 2.159e-05, |
| "loss": 0.0688, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.09227759823099166, |
| "grad_norm": 1.3242515325546265, |
| "learning_rate": 2.169e-05, |
| "loss": 0.071, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.09270284061915292, |
| "grad_norm": 1.1989598274230957, |
| "learning_rate": 2.1790000000000003e-05, |
| "loss": 0.0683, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.09312808300731416, |
| "grad_norm": 1.3082703351974487, |
| "learning_rate": 2.1890000000000002e-05, |
| "loss": 0.0712, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.09355332539547542, |
| "grad_norm": 1.3433862924575806, |
| "learning_rate": 2.199e-05, |
| "loss": 0.0768, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.09397856778363667, |
| "grad_norm": 1.1946460008621216, |
| "learning_rate": 2.209e-05, |
| "loss": 0.0754, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.09440381017179793, |
| "grad_norm": 1.3874248266220093, |
| "learning_rate": 2.2190000000000003e-05, |
| "loss": 0.0784, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.09482905255995917, |
| "grad_norm": 1.2676492929458618, |
| "learning_rate": 2.2290000000000002e-05, |
| "loss": 0.0694, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.09525429494812043, |
| "grad_norm": 1.0633152723312378, |
| "learning_rate": 2.239e-05, |
| "loss": 0.0667, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.09567953733628168, |
| "grad_norm": 1.0461442470550537, |
| "learning_rate": 2.2490000000000003e-05, |
| "loss": 0.0707, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.09610477972444294, |
| "grad_norm": 1.5528920888900757, |
| "learning_rate": 2.2590000000000002e-05, |
| "loss": 0.0728, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.09653002211260418, |
| "grad_norm": 1.3866478204727173, |
| "learning_rate": 2.269e-05, |
| "loss": 0.0683, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.09695526450076544, |
| "grad_norm": 1.2420086860656738, |
| "learning_rate": 2.279e-05, |
| "loss": 0.0653, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.09738050688892669, |
| "grad_norm": 1.163353681564331, |
| "learning_rate": 2.289e-05, |
| "loss": 0.0687, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.09780574927708793, |
| "grad_norm": 1.2111841440200806, |
| "learning_rate": 2.299e-05, |
| "loss": 0.0687, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.09823099166524919, |
| "grad_norm": 1.3638079166412354, |
| "learning_rate": 2.309e-05, |
| "loss": 0.0721, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.09865623405341044, |
| "grad_norm": 1.3157620429992676, |
| "learning_rate": 2.319e-05, |
| "loss": 0.071, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.0990814764415717, |
| "grad_norm": 1.142834186553955, |
| "learning_rate": 2.329e-05, |
| "loss": 0.0736, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.09950671882973294, |
| "grad_norm": 1.247509241104126, |
| "learning_rate": 2.3389999999999998e-05, |
| "loss": 0.0648, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.0999319612178942, |
| "grad_norm": 1.134833574295044, |
| "learning_rate": 2.349e-05, |
| "loss": 0.0748, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.10035720360605545, |
| "grad_norm": 1.1281261444091797, |
| "learning_rate": 2.359e-05, |
| "loss": 0.0678, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.10078244599421671, |
| "grad_norm": 1.2668074369430542, |
| "learning_rate": 2.369e-05, |
| "loss": 0.0666, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.10120768838237795, |
| "grad_norm": 1.2871060371398926, |
| "learning_rate": 2.379e-05, |
| "loss": 0.0695, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.10163293077053921, |
| "grad_norm": 1.4086240530014038, |
| "learning_rate": 2.389e-05, |
| "loss": 0.0678, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.10205817315870046, |
| "grad_norm": 1.2249305248260498, |
| "learning_rate": 2.399e-05, |
| "loss": 0.0676, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.10248341554686172, |
| "grad_norm": 1.1750974655151367, |
| "learning_rate": 2.409e-05, |
| "loss": 0.0627, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.10290865793502296, |
| "grad_norm": 1.1746395826339722, |
| "learning_rate": 2.419e-05, |
| "loss": 0.0732, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.10333390032318422, |
| "grad_norm": 1.2999951839447021, |
| "learning_rate": 2.429e-05, |
| "loss": 0.067, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.10375914271134547, |
| "grad_norm": 1.4515321254730225, |
| "learning_rate": 2.439e-05, |
| "loss": 0.0708, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.10418438509950671, |
| "grad_norm": 1.1880756616592407, |
| "learning_rate": 2.449e-05, |
| "loss": 0.0691, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.10460962748766797, |
| "grad_norm": 2.154705047607422, |
| "learning_rate": 2.459e-05, |
| "loss": 0.0691, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.10503486987582922, |
| "grad_norm": 1.569284439086914, |
| "learning_rate": 2.469e-05, |
| "loss": 0.0696, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.10546011226399048, |
| "grad_norm": 1.4350764751434326, |
| "learning_rate": 2.479e-05, |
| "loss": 0.0627, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.10588535465215172, |
| "grad_norm": 1.3060920238494873, |
| "learning_rate": 2.489e-05, |
| "loss": 0.0736, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.10631059704031298, |
| "grad_norm": 1.4141494035720825, |
| "learning_rate": 2.499e-05, |
| "loss": 0.0669, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.10673583942847423, |
| "grad_norm": 1.221895456314087, |
| "learning_rate": 2.5090000000000002e-05, |
| "loss": 0.0631, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.10716108181663549, |
| "grad_norm": 1.24517023563385, |
| "learning_rate": 2.519e-05, |
| "loss": 0.0646, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.10758632420479673, |
| "grad_norm": 0.9323004484176636, |
| "learning_rate": 2.529e-05, |
| "loss": 0.0669, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.10801156659295799, |
| "grad_norm": 1.0457737445831299, |
| "learning_rate": 2.5390000000000003e-05, |
| "loss": 0.0638, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.10843680898111924, |
| "grad_norm": 1.4137825965881348, |
| "learning_rate": 2.549e-05, |
| "loss": 0.0683, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.1088620513692805, |
| "grad_norm": 1.292286992073059, |
| "learning_rate": 2.559e-05, |
| "loss": 0.0717, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.10928729375744174, |
| "grad_norm": 1.1481221914291382, |
| "learning_rate": 2.569e-05, |
| "loss": 0.0633, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.10971253614560299, |
| "grad_norm": 1.4091408252716064, |
| "learning_rate": 2.5790000000000002e-05, |
| "loss": 0.0633, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.11013777853376425, |
| "grad_norm": 1.1633602380752563, |
| "learning_rate": 2.589e-05, |
| "loss": 0.0604, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.11056302092192549, |
| "grad_norm": 1.198021650314331, |
| "learning_rate": 2.599e-05, |
| "loss": 0.0705, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.11098826331008675, |
| "grad_norm": 1.1614141464233398, |
| "learning_rate": 2.6090000000000003e-05, |
| "loss": 0.0679, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.111413505698248, |
| "grad_norm": 0.9087436199188232, |
| "learning_rate": 2.619e-05, |
| "loss": 0.065, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.11183874808640926, |
| "grad_norm": 1.429312825202942, |
| "learning_rate": 2.629e-05, |
| "loss": 0.065, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.1122639904745705, |
| "grad_norm": 1.4051202535629272, |
| "learning_rate": 2.6390000000000003e-05, |
| "loss": 0.0656, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.11268923286273176, |
| "grad_norm": 1.294622540473938, |
| "learning_rate": 2.6490000000000002e-05, |
| "loss": 0.0669, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.113114475250893, |
| "grad_norm": 1.2461621761322021, |
| "learning_rate": 2.659e-05, |
| "loss": 0.0664, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.11353971763905427, |
| "grad_norm": 1.217442274093628, |
| "learning_rate": 2.6690000000000004e-05, |
| "loss": 0.0641, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.11396496002721551, |
| "grad_norm": 1.3828129768371582, |
| "learning_rate": 2.6790000000000003e-05, |
| "loss": 0.0651, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.11439020241537677, |
| "grad_norm": 1.1948891878128052, |
| "learning_rate": 2.6890000000000002e-05, |
| "loss": 0.0643, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.11481544480353802, |
| "grad_norm": 1.2406245470046997, |
| "learning_rate": 2.699e-05, |
| "loss": 0.0622, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.11524068719169926, |
| "grad_norm": 1.248665452003479, |
| "learning_rate": 2.709e-05, |
| "loss": 0.0599, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.11566592957986052, |
| "grad_norm": 1.2043137550354004, |
| "learning_rate": 2.719e-05, |
| "loss": 0.0604, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.11609117196802177, |
| "grad_norm": 1.176339030265808, |
| "learning_rate": 2.7289999999999998e-05, |
| "loss": 0.0651, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.11651641435618303, |
| "grad_norm": 1.115503191947937, |
| "learning_rate": 2.739e-05, |
| "loss": 0.0591, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.11694165674434427, |
| "grad_norm": 1.1059077978134155, |
| "learning_rate": 2.749e-05, |
| "loss": 0.0581, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.11736689913250553, |
| "grad_norm": 1.0317000150680542, |
| "learning_rate": 2.759e-05, |
| "loss": 0.0616, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.11779214152066678, |
| "grad_norm": 0.9671021103858948, |
| "learning_rate": 2.769e-05, |
| "loss": 0.0639, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.11821738390882804, |
| "grad_norm": 1.3735092878341675, |
| "learning_rate": 2.779e-05, |
| "loss": 0.0586, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.11864262629698928, |
| "grad_norm": 1.3800747394561768, |
| "learning_rate": 2.789e-05, |
| "loss": 0.0606, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.11906786868515054, |
| "grad_norm": 1.1293871402740479, |
| "learning_rate": 2.799e-05, |
| "loss": 0.0659, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.11949311107331179, |
| "grad_norm": 1.056138038635254, |
| "learning_rate": 2.809e-05, |
| "loss": 0.063, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.11991835346147305, |
| "grad_norm": 1.1736313104629517, |
| "learning_rate": 2.819e-05, |
| "loss": 0.0634, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.12034359584963429, |
| "grad_norm": 1.3959088325500488, |
| "learning_rate": 2.829e-05, |
| "loss": 0.0597, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.12076883823779554, |
| "grad_norm": 1.186424970626831, |
| "learning_rate": 2.839e-05, |
| "loss": 0.0604, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.1211940806259568, |
| "grad_norm": 1.1499032974243164, |
| "learning_rate": 2.849e-05, |
| "loss": 0.0614, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.12161932301411804, |
| "grad_norm": 1.2873589992523193, |
| "learning_rate": 2.859e-05, |
| "loss": 0.0647, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.1220445654022793, |
| "grad_norm": 1.3243647813796997, |
| "learning_rate": 2.869e-05, |
| "loss": 0.0628, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.12246980779044055, |
| "grad_norm": 1.2491867542266846, |
| "learning_rate": 2.879e-05, |
| "loss": 0.0602, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.1228950501786018, |
| "grad_norm": 1.207425832748413, |
| "learning_rate": 2.889e-05, |
| "loss": 0.0609, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.12332029256676305, |
| "grad_norm": 1.0992892980575562, |
| "learning_rate": 2.8990000000000002e-05, |
| "loss": 0.0578, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.12374553495492431, |
| "grad_norm": 1.2509560585021973, |
| "learning_rate": 2.909e-05, |
| "loss": 0.0613, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.12417077734308556, |
| "grad_norm": 1.1020233631134033, |
| "learning_rate": 2.919e-05, |
| "loss": 0.0629, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.12459601973124682, |
| "grad_norm": 1.1341887712478638, |
| "learning_rate": 2.9290000000000002e-05, |
| "loss": 0.0622, |
| "step": 2930 |
| }, |
| { |
| "epoch": 0.12502126211940806, |
| "grad_norm": 0.9719606041908264, |
| "learning_rate": 2.939e-05, |
| "loss": 0.0657, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.1254465045075693, |
| "grad_norm": 1.160712718963623, |
| "learning_rate": 2.949e-05, |
| "loss": 0.0596, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.12587174689573058, |
| "grad_norm": 1.1883903741836548, |
| "learning_rate": 2.959e-05, |
| "loss": 0.0613, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.12629698928389183, |
| "grad_norm": 1.1179677248001099, |
| "learning_rate": 2.9690000000000002e-05, |
| "loss": 0.0608, |
| "step": 2970 |
| }, |
| { |
| "epoch": 0.12672223167205307, |
| "grad_norm": 0.936011791229248, |
| "learning_rate": 2.979e-05, |
| "loss": 0.0576, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.12714747406021432, |
| "grad_norm": 1.0857445001602173, |
| "learning_rate": 2.989e-05, |
| "loss": 0.0578, |
| "step": 2990 |
| }, |
| { |
| "epoch": 0.12757271644837556, |
| "grad_norm": 1.2470076084136963, |
| "learning_rate": 2.9990000000000003e-05, |
| "loss": 0.0591, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.12799795883653683, |
| "grad_norm": 1.279581069946289, |
| "learning_rate": 2.9999998154575393e-05, |
| "loss": 0.0609, |
| "step": 3010 |
| }, |
| { |
| "epoch": 0.12842320122469808, |
| "grad_norm": 1.257899522781372, |
| "learning_rate": 2.999999177533042e-05, |
| "loss": 0.0589, |
| "step": 3020 |
| }, |
| { |
| "epoch": 0.12884844361285933, |
| "grad_norm": 0.9201774001121521, |
| "learning_rate": 2.9999980839483992e-05, |
| "loss": 0.0583, |
| "step": 3030 |
| }, |
| { |
| "epoch": 0.12927368600102057, |
| "grad_norm": 0.919161319732666, |
| "learning_rate": 2.999996534703944e-05, |
| "loss": 0.0597, |
| "step": 3040 |
| }, |
| { |
| "epoch": 0.12969892838918184, |
| "grad_norm": 0.9568930268287659, |
| "learning_rate": 2.9999945298001468e-05, |
| "loss": 0.0563, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.1301241707773431, |
| "grad_norm": 0.7360648512840271, |
| "learning_rate": 2.9999920692376165e-05, |
| "loss": 0.0569, |
| "step": 3060 |
| }, |
| { |
| "epoch": 0.13054941316550434, |
| "grad_norm": 0.9083346128463745, |
| "learning_rate": 2.9999891530171002e-05, |
| "loss": 0.0576, |
| "step": 3070 |
| }, |
| { |
| "epoch": 0.13097465555366558, |
| "grad_norm": 1.020730972290039, |
| "learning_rate": 2.9999857811394845e-05, |
| "loss": 0.059, |
| "step": 3080 |
| }, |
| { |
| "epoch": 0.13139989794182685, |
| "grad_norm": 1.0232067108154297, |
| "learning_rate": 2.9999819536057935e-05, |
| "loss": 0.0596, |
| "step": 3090 |
| }, |
| { |
| "epoch": 0.1318251403299881, |
| "grad_norm": 1.2200008630752563, |
| "learning_rate": 2.9999776704171894e-05, |
| "loss": 0.0611, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.13225038271814935, |
| "grad_norm": 1.1772595643997192, |
| "learning_rate": 2.999972931574973e-05, |
| "loss": 0.06, |
| "step": 3110 |
| }, |
| { |
| "epoch": 0.1326756251063106, |
| "grad_norm": 1.0882384777069092, |
| "learning_rate": 2.9999677370805858e-05, |
| "loss": 0.0598, |
| "step": 3120 |
| }, |
| { |
| "epoch": 0.13310086749447184, |
| "grad_norm": 0.9506834745407104, |
| "learning_rate": 2.9999620869356034e-05, |
| "loss": 0.0577, |
| "step": 3130 |
| }, |
| { |
| "epoch": 0.1335261098826331, |
| "grad_norm": 0.8521194458007812, |
| "learning_rate": 2.9999559811417436e-05, |
| "loss": 0.0577, |
| "step": 3140 |
| }, |
| { |
| "epoch": 0.13395135227079435, |
| "grad_norm": 0.9100087285041809, |
| "learning_rate": 2.999949419700861e-05, |
| "loss": 0.0564, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.1343765946589556, |
| "grad_norm": 0.8859062194824219, |
| "learning_rate": 2.9999424026149484e-05, |
| "loss": 0.054, |
| "step": 3160 |
| }, |
| { |
| "epoch": 0.13480183704711685, |
| "grad_norm": 1.4700942039489746, |
| "learning_rate": 2.9999349298861376e-05, |
| "loss": 0.0568, |
| "step": 3170 |
| }, |
| { |
| "epoch": 0.13522707943527812, |
| "grad_norm": 0.989130437374115, |
| "learning_rate": 2.9999270015166983e-05, |
| "loss": 0.0547, |
| "step": 3180 |
| }, |
| { |
| "epoch": 0.13565232182343936, |
| "grad_norm": 1.364755392074585, |
| "learning_rate": 2.999918617509039e-05, |
| "loss": 0.058, |
| "step": 3190 |
| }, |
| { |
| "epoch": 0.1360775642116006, |
| "grad_norm": 1.0077078342437744, |
| "learning_rate": 2.9999097778657074e-05, |
| "loss": 0.0589, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.13650280659976186, |
| "grad_norm": 1.1901323795318604, |
| "learning_rate": 2.9999004825893878e-05, |
| "loss": 0.058, |
| "step": 3210 |
| }, |
| { |
| "epoch": 0.13692804898792313, |
| "grad_norm": 1.0837482213974, |
| "learning_rate": 2.9998907316829043e-05, |
| "loss": 0.0528, |
| "step": 3220 |
| }, |
| { |
| "epoch": 0.13735329137608437, |
| "grad_norm": 1.2312610149383545, |
| "learning_rate": 2.999880525149219e-05, |
| "loss": 0.0551, |
| "step": 3230 |
| }, |
| { |
| "epoch": 0.13777853376424562, |
| "grad_norm": 1.3543277978897095, |
| "learning_rate": 2.999869862991432e-05, |
| "loss": 0.0584, |
| "step": 3240 |
| }, |
| { |
| "epoch": 0.13820377615240687, |
| "grad_norm": 1.1511555910110474, |
| "learning_rate": 2.9998587452127822e-05, |
| "loss": 0.0531, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.1386290185405681, |
| "grad_norm": 1.1416348218917847, |
| "learning_rate": 2.999847171816647e-05, |
| "loss": 0.0587, |
| "step": 3260 |
| }, |
| { |
| "epoch": 0.13905426092872938, |
| "grad_norm": 1.1889604330062866, |
| "learning_rate": 2.999835142806543e-05, |
| "loss": 0.0578, |
| "step": 3270 |
| }, |
| { |
| "epoch": 0.13947950331689063, |
| "grad_norm": 0.9729725122451782, |
| "learning_rate": 2.9998226581861227e-05, |
| "loss": 0.0573, |
| "step": 3280 |
| }, |
| { |
| "epoch": 0.13990474570505188, |
| "grad_norm": 1.0449501276016235, |
| "learning_rate": 2.9998097179591793e-05, |
| "loss": 0.0658, |
| "step": 3290 |
| }, |
| { |
| "epoch": 0.14032998809321312, |
| "grad_norm": 1.0162328481674194, |
| "learning_rate": 2.9997963221296443e-05, |
| "loss": 0.0556, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.1407552304813744, |
| "grad_norm": 1.19861900806427, |
| "learning_rate": 2.9997824707015858e-05, |
| "loss": 0.0559, |
| "step": 3310 |
| }, |
| { |
| "epoch": 0.14118047286953564, |
| "grad_norm": 1.102773666381836, |
| "learning_rate": 2.9997681636792126e-05, |
| "loss": 0.0584, |
| "step": 3320 |
| }, |
| { |
| "epoch": 0.14160571525769688, |
| "grad_norm": 1.011214256286621, |
| "learning_rate": 2.9997534010668707e-05, |
| "loss": 0.0523, |
| "step": 3330 |
| }, |
| { |
| "epoch": 0.14203095764585813, |
| "grad_norm": 0.9323782324790955, |
| "learning_rate": 2.9997381828690435e-05, |
| "loss": 0.0559, |
| "step": 3340 |
| }, |
| { |
| "epoch": 0.1424562000340194, |
| "grad_norm": 0.9286249279975891, |
| "learning_rate": 2.9997225090903552e-05, |
| "loss": 0.0547, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.14288144242218065, |
| "grad_norm": 0.7139819860458374, |
| "learning_rate": 2.9997063797355664e-05, |
| "loss": 0.0553, |
| "step": 3360 |
| }, |
| { |
| "epoch": 0.1433066848103419, |
| "grad_norm": 0.8565438985824585, |
| "learning_rate": 2.9996897948095768e-05, |
| "loss": 0.0585, |
| "step": 3370 |
| }, |
| { |
| "epoch": 0.14373192719850314, |
| "grad_norm": 1.053774118423462, |
| "learning_rate": 2.9996727543174244e-05, |
| "loss": 0.056, |
| "step": 3380 |
| }, |
| { |
| "epoch": 0.14415716958666439, |
| "grad_norm": 1.0462929010391235, |
| "learning_rate": 2.9996552582642857e-05, |
| "loss": 0.0535, |
| "step": 3390 |
| }, |
| { |
| "epoch": 0.14458241197482566, |
| "grad_norm": 0.90887451171875, |
| "learning_rate": 2.9996373066554763e-05, |
| "loss": 0.0581, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.1450076543629869, |
| "grad_norm": 0.8000062704086304, |
| "learning_rate": 2.999618899496448e-05, |
| "loss": 0.0526, |
| "step": 3410 |
| }, |
| { |
| "epoch": 0.14543289675114815, |
| "grad_norm": 0.88075852394104, |
| "learning_rate": 2.9996000367927936e-05, |
| "loss": 0.053, |
| "step": 3420 |
| }, |
| { |
| "epoch": 0.1458581391393094, |
| "grad_norm": 0.8267508745193481, |
| "learning_rate": 2.9995807185502425e-05, |
| "loss": 0.054, |
| "step": 3430 |
| }, |
| { |
| "epoch": 0.14628338152747067, |
| "grad_norm": 1.1968902349472046, |
| "learning_rate": 2.9995609447746636e-05, |
| "loss": 0.0615, |
| "step": 3440 |
| }, |
| { |
| "epoch": 0.1467086239156319, |
| "grad_norm": 1.008314609527588, |
| "learning_rate": 2.999540715472063e-05, |
| "loss": 0.0553, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.14713386630379316, |
| "grad_norm": 1.1219818592071533, |
| "learning_rate": 2.999520030648586e-05, |
| "loss": 0.0592, |
| "step": 3460 |
| }, |
| { |
| "epoch": 0.1475591086919544, |
| "grad_norm": 1.2746617794036865, |
| "learning_rate": 2.9994988903105163e-05, |
| "loss": 0.0508, |
| "step": 3470 |
| }, |
| { |
| "epoch": 0.14798435108011568, |
| "grad_norm": 0.914639413356781, |
| "learning_rate": 2.999477294464276e-05, |
| "loss": 0.0569, |
| "step": 3480 |
| }, |
| { |
| "epoch": 0.14840959346827692, |
| "grad_norm": 1.115281581878662, |
| "learning_rate": 2.9994552431164247e-05, |
| "loss": 0.0524, |
| "step": 3490 |
| }, |
| { |
| "epoch": 0.14883483585643817, |
| "grad_norm": 0.9016542434692383, |
| "learning_rate": 2.9994327362736617e-05, |
| "loss": 0.0545, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.14926007824459941, |
| "grad_norm": 0.7672004699707031, |
| "learning_rate": 2.9994097739428233e-05, |
| "loss": 0.0507, |
| "step": 3510 |
| }, |
| { |
| "epoch": 0.14968532063276066, |
| "grad_norm": 1.0223740339279175, |
| "learning_rate": 2.9993863561308856e-05, |
| "loss": 0.0558, |
| "step": 3520 |
| }, |
| { |
| "epoch": 0.15011056302092193, |
| "grad_norm": 1.0547072887420654, |
| "learning_rate": 2.9993624828449616e-05, |
| "loss": 0.0543, |
| "step": 3530 |
| }, |
| { |
| "epoch": 0.15053580540908318, |
| "grad_norm": 0.882698655128479, |
| "learning_rate": 2.9993381540923037e-05, |
| "loss": 0.0525, |
| "step": 3540 |
| }, |
| { |
| "epoch": 0.15096104779724442, |
| "grad_norm": 0.8458632230758667, |
| "learning_rate": 2.9993133698803025e-05, |
| "loss": 0.0512, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.15138629018540567, |
| "grad_norm": 0.872350811958313, |
| "learning_rate": 2.9992881302164862e-05, |
| "loss": 0.0529, |
| "step": 3560 |
| }, |
| { |
| "epoch": 0.15181153257356694, |
| "grad_norm": 1.0274560451507568, |
| "learning_rate": 2.9992624351085226e-05, |
| "loss": 0.0533, |
| "step": 3570 |
| }, |
| { |
| "epoch": 0.1522367749617282, |
| "grad_norm": 1.1185356378555298, |
| "learning_rate": 2.9992362845642167e-05, |
| "loss": 0.057, |
| "step": 3580 |
| }, |
| { |
| "epoch": 0.15266201734988943, |
| "grad_norm": 0.9312054514884949, |
| "learning_rate": 2.9992096785915132e-05, |
| "loss": 0.0524, |
| "step": 3590 |
| }, |
| { |
| "epoch": 0.15308725973805068, |
| "grad_norm": 1.1811115741729736, |
| "learning_rate": 2.999182617198493e-05, |
| "loss": 0.0564, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.15351250212621195, |
| "grad_norm": 1.1230878829956055, |
| "learning_rate": 2.999155100393378e-05, |
| "loss": 0.0526, |
| "step": 3610 |
| }, |
| { |
| "epoch": 0.1539377445143732, |
| "grad_norm": 0.9790253043174744, |
| "learning_rate": 2.9991271281845256e-05, |
| "loss": 0.0515, |
| "step": 3620 |
| }, |
| { |
| "epoch": 0.15436298690253444, |
| "grad_norm": 0.8891139626502991, |
| "learning_rate": 2.9990987005804347e-05, |
| "loss": 0.0527, |
| "step": 3630 |
| }, |
| { |
| "epoch": 0.1547882292906957, |
| "grad_norm": 0.9285902976989746, |
| "learning_rate": 2.999069817589739e-05, |
| "loss": 0.0526, |
| "step": 3640 |
| }, |
| { |
| "epoch": 0.15521347167885693, |
| "grad_norm": 1.1737614870071411, |
| "learning_rate": 2.9990404792212142e-05, |
| "loss": 0.0551, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.1556387140670182, |
| "grad_norm": 1.1494216918945312, |
| "learning_rate": 2.9990106854837715e-05, |
| "loss": 0.0537, |
| "step": 3660 |
| }, |
| { |
| "epoch": 0.15606395645517945, |
| "grad_norm": 1.0738581418991089, |
| "learning_rate": 2.9989804363864615e-05, |
| "loss": 0.0562, |
| "step": 3670 |
| }, |
| { |
| "epoch": 0.1564891988433407, |
| "grad_norm": 0.920110821723938, |
| "learning_rate": 2.998949731938473e-05, |
| "loss": 0.0493, |
| "step": 3680 |
| }, |
| { |
| "epoch": 0.15691444123150194, |
| "grad_norm": 0.9473956227302551, |
| "learning_rate": 2.9989185721491338e-05, |
| "loss": 0.0501, |
| "step": 3690 |
| }, |
| { |
| "epoch": 0.15733968361966322, |
| "grad_norm": 1.0324159860610962, |
| "learning_rate": 2.998886957027909e-05, |
| "loss": 0.0523, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.15776492600782446, |
| "grad_norm": 0.9430623054504395, |
| "learning_rate": 2.998854886584402e-05, |
| "loss": 0.0528, |
| "step": 3710 |
| }, |
| { |
| "epoch": 0.1581901683959857, |
| "grad_norm": 0.9284719228744507, |
| "learning_rate": 2.9988223608283557e-05, |
| "loss": 0.0519, |
| "step": 3720 |
| }, |
| { |
| "epoch": 0.15861541078414695, |
| "grad_norm": 1.019679307937622, |
| "learning_rate": 2.9987893797696502e-05, |
| "loss": 0.0522, |
| "step": 3730 |
| }, |
| { |
| "epoch": 0.15904065317230823, |
| "grad_norm": 1.0338765382766724, |
| "learning_rate": 2.9987559434183046e-05, |
| "loss": 0.0501, |
| "step": 3740 |
| }, |
| { |
| "epoch": 0.15946589556046947, |
| "grad_norm": 1.1244757175445557, |
| "learning_rate": 2.998722051784476e-05, |
| "loss": 0.0555, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.15989113794863072, |
| "grad_norm": 1.0248645544052124, |
| "learning_rate": 2.9986877048784594e-05, |
| "loss": 0.0565, |
| "step": 3760 |
| }, |
| { |
| "epoch": 0.16031638033679196, |
| "grad_norm": 0.8432033061981201, |
| "learning_rate": 2.9986529027106885e-05, |
| "loss": 0.0542, |
| "step": 3770 |
| }, |
| { |
| "epoch": 0.16074162272495324, |
| "grad_norm": 0.8507984280586243, |
| "learning_rate": 2.998617645291735e-05, |
| "loss": 0.0483, |
| "step": 3780 |
| }, |
| { |
| "epoch": 0.16116686511311448, |
| "grad_norm": 0.8564496040344238, |
| "learning_rate": 2.9985819326323097e-05, |
| "loss": 0.0539, |
| "step": 3790 |
| }, |
| { |
| "epoch": 0.16159210750127573, |
| "grad_norm": 0.8069973587989807, |
| "learning_rate": 2.9985457647432613e-05, |
| "loss": 0.0529, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.16201734988943697, |
| "grad_norm": 0.7802913188934326, |
| "learning_rate": 2.998509141635576e-05, |
| "loss": 0.0482, |
| "step": 3810 |
| }, |
| { |
| "epoch": 0.16244259227759822, |
| "grad_norm": 0.8876798152923584, |
| "learning_rate": 2.99847206332038e-05, |
| "loss": 0.0506, |
| "step": 3820 |
| }, |
| { |
| "epoch": 0.1628678346657595, |
| "grad_norm": 1.1593446731567383, |
| "learning_rate": 2.9984345298089356e-05, |
| "loss": 0.0531, |
| "step": 3830 |
| }, |
| { |
| "epoch": 0.16329307705392074, |
| "grad_norm": 0.9372552037239075, |
| "learning_rate": 2.998396541112645e-05, |
| "loss": 0.0514, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.16371831944208198, |
| "grad_norm": 0.8006210327148438, |
| "learning_rate": 2.998358097243048e-05, |
| "loss": 0.051, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.16414356183024323, |
| "grad_norm": 0.8869373798370361, |
| "learning_rate": 2.998319198211823e-05, |
| "loss": 0.0502, |
| "step": 3860 |
| }, |
| { |
| "epoch": 0.1645688042184045, |
| "grad_norm": 1.0628048181533813, |
| "learning_rate": 2.998279844030786e-05, |
| "loss": 0.0521, |
| "step": 3870 |
| }, |
| { |
| "epoch": 0.16499404660656575, |
| "grad_norm": 1.1185886859893799, |
| "learning_rate": 2.9982400347118926e-05, |
| "loss": 0.0501, |
| "step": 3880 |
| }, |
| { |
| "epoch": 0.165419288994727, |
| "grad_norm": 1.170861005783081, |
| "learning_rate": 2.9981997702672353e-05, |
| "loss": 0.0492, |
| "step": 3890 |
| }, |
| { |
| "epoch": 0.16584453138288824, |
| "grad_norm": 0.9327141642570496, |
| "learning_rate": 2.9981590507090454e-05, |
| "loss": 0.0493, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.1662697737710495, |
| "grad_norm": 0.9895486831665039, |
| "learning_rate": 2.9981178760496927e-05, |
| "loss": 0.0552, |
| "step": 3910 |
| }, |
| { |
| "epoch": 0.16669501615921076, |
| "grad_norm": 0.8877111673355103, |
| "learning_rate": 2.9980762463016848e-05, |
| "loss": 0.0543, |
| "step": 3920 |
| }, |
| { |
| "epoch": 0.167120258547372, |
| "grad_norm": 0.7400316596031189, |
| "learning_rate": 2.9980341614776673e-05, |
| "loss": 0.0479, |
| "step": 3930 |
| }, |
| { |
| "epoch": 0.16754550093553325, |
| "grad_norm": 0.7988236546516418, |
| "learning_rate": 2.9979916215904247e-05, |
| "loss": 0.0499, |
| "step": 3940 |
| }, |
| { |
| "epoch": 0.1679707433236945, |
| "grad_norm": 0.8732298016548157, |
| "learning_rate": 2.99794862665288e-05, |
| "loss": 0.0477, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.16839598571185577, |
| "grad_norm": 1.0647722482681274, |
| "learning_rate": 2.9979051766780938e-05, |
| "loss": 0.0517, |
| "step": 3960 |
| }, |
| { |
| "epoch": 0.168821228100017, |
| "grad_norm": 0.8953602313995361, |
| "learning_rate": 2.9978612716792647e-05, |
| "loss": 0.0509, |
| "step": 3970 |
| }, |
| { |
| "epoch": 0.16924647048817826, |
| "grad_norm": 0.9406638145446777, |
| "learning_rate": 2.99781691166973e-05, |
| "loss": 0.0574, |
| "step": 3980 |
| }, |
| { |
| "epoch": 0.1696717128763395, |
| "grad_norm": 1.0174944400787354, |
| "learning_rate": 2.9977720966629646e-05, |
| "loss": 0.0515, |
| "step": 3990 |
| }, |
| { |
| "epoch": 0.17009695526450078, |
| "grad_norm": 0.9914764761924744, |
| "learning_rate": 2.997726826672583e-05, |
| "loss": 0.0507, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.17052219765266202, |
| "grad_norm": 0.9415300488471985, |
| "learning_rate": 2.9976811017123368e-05, |
| "loss": 0.0519, |
| "step": 4010 |
| }, |
| { |
| "epoch": 0.17094744004082327, |
| "grad_norm": 0.8257099986076355, |
| "learning_rate": 2.9976349217961158e-05, |
| "loss": 0.0501, |
| "step": 4020 |
| }, |
| { |
| "epoch": 0.1713726824289845, |
| "grad_norm": 1.0278548002243042, |
| "learning_rate": 2.9975882869379482e-05, |
| "loss": 0.0505, |
| "step": 4030 |
| }, |
| { |
| "epoch": 0.1717979248171458, |
| "grad_norm": 0.9726608991622925, |
| "learning_rate": 2.997541197152001e-05, |
| "loss": 0.0506, |
| "step": 4040 |
| }, |
| { |
| "epoch": 0.17222316720530703, |
| "grad_norm": 0.9818130731582642, |
| "learning_rate": 2.9974936524525774e-05, |
| "loss": 0.0464, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.17264840959346828, |
| "grad_norm": 0.796649694442749, |
| "learning_rate": 2.9974456528541222e-05, |
| "loss": 0.0503, |
| "step": 4060 |
| }, |
| { |
| "epoch": 0.17307365198162952, |
| "grad_norm": 0.9704950451850891, |
| "learning_rate": 2.9973971983712147e-05, |
| "loss": 0.0505, |
| "step": 4070 |
| }, |
| { |
| "epoch": 0.17349889436979077, |
| "grad_norm": 1.0106730461120605, |
| "learning_rate": 2.9973482890185753e-05, |
| "loss": 0.0491, |
| "step": 4080 |
| }, |
| { |
| "epoch": 0.17392413675795204, |
| "grad_norm": 1.1311920881271362, |
| "learning_rate": 2.9972989248110606e-05, |
| "loss": 0.0523, |
| "step": 4090 |
| }, |
| { |
| "epoch": 0.1743493791461133, |
| "grad_norm": 1.1980477571487427, |
| "learning_rate": 2.9972491057636665e-05, |
| "loss": 0.0498, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.17477462153427453, |
| "grad_norm": 0.9751189351081848, |
| "learning_rate": 2.9971988318915268e-05, |
| "loss": 0.0498, |
| "step": 4110 |
| }, |
| { |
| "epoch": 0.17519986392243578, |
| "grad_norm": 1.084840178489685, |
| "learning_rate": 2.997148103209913e-05, |
| "loss": 0.0486, |
| "step": 4120 |
| }, |
| { |
| "epoch": 0.17562510631059705, |
| "grad_norm": 0.8263355493545532, |
| "learning_rate": 2.9970969197342356e-05, |
| "loss": 0.0466, |
| "step": 4130 |
| }, |
| { |
| "epoch": 0.1760503486987583, |
| "grad_norm": 0.8387073874473572, |
| "learning_rate": 2.9970452814800422e-05, |
| "loss": 0.054, |
| "step": 4140 |
| }, |
| { |
| "epoch": 0.17647559108691954, |
| "grad_norm": 1.1868751049041748, |
| "learning_rate": 2.99699318846302e-05, |
| "loss": 0.0487, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.1769008334750808, |
| "grad_norm": 0.7838359475135803, |
| "learning_rate": 2.9969406406989927e-05, |
| "loss": 0.0507, |
| "step": 4160 |
| }, |
| { |
| "epoch": 0.17732607586324206, |
| "grad_norm": 1.2756154537200928, |
| "learning_rate": 2.996887638203923e-05, |
| "loss": 0.0484, |
| "step": 4170 |
| }, |
| { |
| "epoch": 0.1777513182514033, |
| "grad_norm": 0.7980268597602844, |
| "learning_rate": 2.9968341809939116e-05, |
| "loss": 0.0493, |
| "step": 4180 |
| }, |
| { |
| "epoch": 0.17817656063956455, |
| "grad_norm": 0.8188315629959106, |
| "learning_rate": 2.9967802690851985e-05, |
| "loss": 0.0457, |
| "step": 4190 |
| }, |
| { |
| "epoch": 0.1786018030277258, |
| "grad_norm": 0.8356110453605652, |
| "learning_rate": 2.9967259024941595e-05, |
| "loss": 0.0483, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.17902704541588704, |
| "grad_norm": 0.6958544254302979, |
| "learning_rate": 2.9966710812373097e-05, |
| "loss": 0.0473, |
| "step": 4210 |
| }, |
| { |
| "epoch": 0.17945228780404832, |
| "grad_norm": 0.8168766498565674, |
| "learning_rate": 2.996615805331303e-05, |
| "loss": 0.0466, |
| "step": 4220 |
| }, |
| { |
| "epoch": 0.17987753019220956, |
| "grad_norm": 0.8009299039840698, |
| "learning_rate": 2.99656007479293e-05, |
| "loss": 0.0453, |
| "step": 4230 |
| }, |
| { |
| "epoch": 0.1803027725803708, |
| "grad_norm": 0.9034857153892517, |
| "learning_rate": 2.9965038896391213e-05, |
| "loss": 0.0482, |
| "step": 4240 |
| }, |
| { |
| "epoch": 0.18072801496853205, |
| "grad_norm": 1.084303379058838, |
| "learning_rate": 2.9964472498869434e-05, |
| "loss": 0.0486, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.18115325735669333, |
| "grad_norm": 0.9920953512191772, |
| "learning_rate": 2.996390155553603e-05, |
| "loss": 0.0474, |
| "step": 4260 |
| }, |
| { |
| "epoch": 0.18157849974485457, |
| "grad_norm": 0.8993090391159058, |
| "learning_rate": 2.9963326066564426e-05, |
| "loss": 0.0465, |
| "step": 4270 |
| }, |
| { |
| "epoch": 0.18200374213301582, |
| "grad_norm": 0.8214529752731323, |
| "learning_rate": 2.996274603212945e-05, |
| "loss": 0.046, |
| "step": 4280 |
| }, |
| { |
| "epoch": 0.18242898452117706, |
| "grad_norm": 0.847908079624176, |
| "learning_rate": 2.9962161452407296e-05, |
| "loss": 0.0467, |
| "step": 4290 |
| }, |
| { |
| "epoch": 0.18285422690933834, |
| "grad_norm": 0.8442016839981079, |
| "learning_rate": 2.996157232757555e-05, |
| "loss": 0.0489, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.18327946929749958, |
| "grad_norm": 0.9253048300743103, |
| "learning_rate": 2.996097865781316e-05, |
| "loss": 0.0522, |
| "step": 4310 |
| }, |
| { |
| "epoch": 0.18370471168566083, |
| "grad_norm": 0.7988717555999756, |
| "learning_rate": 2.996038044330048e-05, |
| "loss": 0.0467, |
| "step": 4320 |
| }, |
| { |
| "epoch": 0.18412995407382207, |
| "grad_norm": 0.866479754447937, |
| "learning_rate": 2.9959777684219233e-05, |
| "loss": 0.0484, |
| "step": 4330 |
| }, |
| { |
| "epoch": 0.18455519646198332, |
| "grad_norm": 0.9718467593193054, |
| "learning_rate": 2.9959170380752508e-05, |
| "loss": 0.048, |
| "step": 4340 |
| }, |
| { |
| "epoch": 0.1849804388501446, |
| "grad_norm": 0.8483349084854126, |
| "learning_rate": 2.99585585330848e-05, |
| "loss": 0.0469, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.18540568123830584, |
| "grad_norm": 0.7021053433418274, |
| "learning_rate": 2.9957942141401967e-05, |
| "loss": 0.0451, |
| "step": 4360 |
| }, |
| { |
| "epoch": 0.18583092362646708, |
| "grad_norm": 0.8285357356071472, |
| "learning_rate": 2.995732120589125e-05, |
| "loss": 0.0483, |
| "step": 4370 |
| }, |
| { |
| "epoch": 0.18625616601462833, |
| "grad_norm": 1.0425705909729004, |
| "learning_rate": 2.9956695726741276e-05, |
| "loss": 0.0488, |
| "step": 4380 |
| }, |
| { |
| "epoch": 0.1866814084027896, |
| "grad_norm": 0.9305728673934937, |
| "learning_rate": 2.995606570414205e-05, |
| "loss": 0.0473, |
| "step": 4390 |
| }, |
| { |
| "epoch": 0.18710665079095085, |
| "grad_norm": 0.8360809683799744, |
| "learning_rate": 2.9955431138284955e-05, |
| "loss": 0.0491, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.1875318931791121, |
| "grad_norm": 0.9870768785476685, |
| "learning_rate": 2.9954792029362754e-05, |
| "loss": 0.0508, |
| "step": 4410 |
| }, |
| { |
| "epoch": 0.18795713556727334, |
| "grad_norm": 0.7025906443595886, |
| "learning_rate": 2.9954148377569598e-05, |
| "loss": 0.0447, |
| "step": 4420 |
| }, |
| { |
| "epoch": 0.1883823779554346, |
| "grad_norm": 0.8500348329544067, |
| "learning_rate": 2.9953500183101002e-05, |
| "loss": 0.048, |
| "step": 4430 |
| }, |
| { |
| "epoch": 0.18880762034359586, |
| "grad_norm": 0.8822534084320068, |
| "learning_rate": 2.9952847446153877e-05, |
| "loss": 0.047, |
| "step": 4440 |
| }, |
| { |
| "epoch": 0.1892328627317571, |
| "grad_norm": 0.9271929264068604, |
| "learning_rate": 2.9952190166926498e-05, |
| "loss": 0.049, |
| "step": 4450 |
| }, |
| { |
| "epoch": 0.18965810511991835, |
| "grad_norm": 0.8434593677520752, |
| "learning_rate": 2.9951528345618546e-05, |
| "loss": 0.046, |
| "step": 4460 |
| }, |
| { |
| "epoch": 0.1900833475080796, |
| "grad_norm": 0.7526440024375916, |
| "learning_rate": 2.995086198243105e-05, |
| "loss": 0.046, |
| "step": 4470 |
| }, |
| { |
| "epoch": 0.19050858989624087, |
| "grad_norm": 0.8926873803138733, |
| "learning_rate": 2.9950191077566434e-05, |
| "loss": 0.0487, |
| "step": 4480 |
| }, |
| { |
| "epoch": 0.1909338322844021, |
| "grad_norm": 0.8471497297286987, |
| "learning_rate": 2.9949515631228515e-05, |
| "loss": 0.0448, |
| "step": 4490 |
| }, |
| { |
| "epoch": 0.19135907467256336, |
| "grad_norm": 0.7868749499320984, |
| "learning_rate": 2.994883564362246e-05, |
| "loss": 0.0488, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.1917843170607246, |
| "grad_norm": 0.6812358498573303, |
| "learning_rate": 2.994815111495484e-05, |
| "loss": 0.0442, |
| "step": 4510 |
| }, |
| { |
| "epoch": 0.19220955944888587, |
| "grad_norm": 0.727590024471283, |
| "learning_rate": 2.9947462045433594e-05, |
| "loss": 0.0479, |
| "step": 4520 |
| }, |
| { |
| "epoch": 0.19263480183704712, |
| "grad_norm": 0.6835356950759888, |
| "learning_rate": 2.9946768435268045e-05, |
| "loss": 0.0474, |
| "step": 4530 |
| }, |
| { |
| "epoch": 0.19306004422520837, |
| "grad_norm": 0.7723318934440613, |
| "learning_rate": 2.9946070284668884e-05, |
| "loss": 0.0451, |
| "step": 4540 |
| }, |
| { |
| "epoch": 0.1934852866133696, |
| "grad_norm": 0.8470324277877808, |
| "learning_rate": 2.994536759384821e-05, |
| "loss": 0.0475, |
| "step": 4550 |
| }, |
| { |
| "epoch": 0.19391052900153088, |
| "grad_norm": 0.7473281025886536, |
| "learning_rate": 2.994466036301946e-05, |
| "loss": 0.0484, |
| "step": 4560 |
| }, |
| { |
| "epoch": 0.19433577138969213, |
| "grad_norm": 0.9247629046440125, |
| "learning_rate": 2.9943948592397494e-05, |
| "loss": 0.0446, |
| "step": 4570 |
| }, |
| { |
| "epoch": 0.19476101377785338, |
| "grad_norm": 0.8116459250450134, |
| "learning_rate": 2.994323228219851e-05, |
| "loss": 0.0501, |
| "step": 4580 |
| }, |
| { |
| "epoch": 0.19518625616601462, |
| "grad_norm": 0.8559114933013916, |
| "learning_rate": 2.9942511432640113e-05, |
| "loss": 0.0495, |
| "step": 4590 |
| }, |
| { |
| "epoch": 0.19561149855417587, |
| "grad_norm": 1.0107638835906982, |
| "learning_rate": 2.9941786043941278e-05, |
| "loss": 0.0466, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.19603674094233714, |
| "grad_norm": 0.9044751524925232, |
| "learning_rate": 2.9941056116322357e-05, |
| "loss": 0.0476, |
| "step": 4610 |
| }, |
| { |
| "epoch": 0.19646198333049839, |
| "grad_norm": 0.8107633590698242, |
| "learning_rate": 2.994032165000508e-05, |
| "loss": 0.047, |
| "step": 4620 |
| }, |
| { |
| "epoch": 0.19688722571865963, |
| "grad_norm": 0.9792898297309875, |
| "learning_rate": 2.9939582645212566e-05, |
| "loss": 0.0483, |
| "step": 4630 |
| }, |
| { |
| "epoch": 0.19731246810682088, |
| "grad_norm": 0.7129027247428894, |
| "learning_rate": 2.9938839102169303e-05, |
| "loss": 0.0437, |
| "step": 4640 |
| }, |
| { |
| "epoch": 0.19773771049498215, |
| "grad_norm": 0.6798853278160095, |
| "learning_rate": 2.993809102110116e-05, |
| "loss": 0.045, |
| "step": 4650 |
| }, |
| { |
| "epoch": 0.1981629528831434, |
| "grad_norm": 0.8705613613128662, |
| "learning_rate": 2.9937338402235373e-05, |
| "loss": 0.0459, |
| "step": 4660 |
| }, |
| { |
| "epoch": 0.19858819527130464, |
| "grad_norm": 0.9700260758399963, |
| "learning_rate": 2.993658124580058e-05, |
| "loss": 0.0472, |
| "step": 4670 |
| }, |
| { |
| "epoch": 0.19901343765946589, |
| "grad_norm": 1.0166648626327515, |
| "learning_rate": 2.9935819552026785e-05, |
| "loss": 0.0454, |
| "step": 4680 |
| }, |
| { |
| "epoch": 0.19943868004762716, |
| "grad_norm": 1.0023534297943115, |
| "learning_rate": 2.9935053321145368e-05, |
| "loss": 0.0501, |
| "step": 4690 |
| }, |
| { |
| "epoch": 0.1998639224357884, |
| "grad_norm": 0.820027768611908, |
| "learning_rate": 2.9934282553389088e-05, |
| "loss": 0.0458, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.20028916482394965, |
| "grad_norm": 0.881223201751709, |
| "learning_rate": 2.9933507248992084e-05, |
| "loss": 0.046, |
| "step": 4710 |
| }, |
| { |
| "epoch": 0.2007144072121109, |
| "grad_norm": 0.838016927242279, |
| "learning_rate": 2.9932727408189876e-05, |
| "loss": 0.0471, |
| "step": 4720 |
| }, |
| { |
| "epoch": 0.20113964960027217, |
| "grad_norm": 0.9668949246406555, |
| "learning_rate": 2.9931943031219356e-05, |
| "loss": 0.0446, |
| "step": 4730 |
| }, |
| { |
| "epoch": 0.20156489198843341, |
| "grad_norm": 0.762941300868988, |
| "learning_rate": 2.9931154118318803e-05, |
| "loss": 0.0439, |
| "step": 4740 |
| }, |
| { |
| "epoch": 0.20199013437659466, |
| "grad_norm": 0.6991223692893982, |
| "learning_rate": 2.9930360669727858e-05, |
| "loss": 0.0476, |
| "step": 4750 |
| }, |
| { |
| "epoch": 0.2024153767647559, |
| "grad_norm": 0.744959831237793, |
| "learning_rate": 2.992956268568755e-05, |
| "loss": 0.0473, |
| "step": 4760 |
| }, |
| { |
| "epoch": 0.20284061915291715, |
| "grad_norm": 0.8024082183837891, |
| "learning_rate": 2.99287601664403e-05, |
| "loss": 0.0487, |
| "step": 4770 |
| }, |
| { |
| "epoch": 0.20326586154107842, |
| "grad_norm": 0.9071053266525269, |
| "learning_rate": 2.992795311222988e-05, |
| "loss": 0.0484, |
| "step": 4780 |
| }, |
| { |
| "epoch": 0.20369110392923967, |
| "grad_norm": 0.9340217113494873, |
| "learning_rate": 2.9927141523301453e-05, |
| "loss": 0.0473, |
| "step": 4790 |
| }, |
| { |
| "epoch": 0.20411634631740092, |
| "grad_norm": 0.9845976829528809, |
| "learning_rate": 2.992632539990156e-05, |
| "loss": 0.0447, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.20454158870556216, |
| "grad_norm": 0.8842020034790039, |
| "learning_rate": 2.9925504742278117e-05, |
| "loss": 0.0492, |
| "step": 4810 |
| }, |
| { |
| "epoch": 0.20496683109372343, |
| "grad_norm": 0.9457699060440063, |
| "learning_rate": 2.9924679550680416e-05, |
| "loss": 0.0463, |
| "step": 4820 |
| }, |
| { |
| "epoch": 0.20539207348188468, |
| "grad_norm": 0.8074204921722412, |
| "learning_rate": 2.9923849825359133e-05, |
| "loss": 0.0467, |
| "step": 4830 |
| }, |
| { |
| "epoch": 0.20581731587004592, |
| "grad_norm": 0.7934054136276245, |
| "learning_rate": 2.9923015566566317e-05, |
| "loss": 0.0462, |
| "step": 4840 |
| }, |
| { |
| "epoch": 0.20624255825820717, |
| "grad_norm": 0.8365235924720764, |
| "learning_rate": 2.992217677455539e-05, |
| "loss": 0.0447, |
| "step": 4850 |
| }, |
| { |
| "epoch": 0.20666780064636844, |
| "grad_norm": 0.8392214775085449, |
| "learning_rate": 2.9921333449581153e-05, |
| "loss": 0.0481, |
| "step": 4860 |
| }, |
| { |
| "epoch": 0.2070930430345297, |
| "grad_norm": 0.9338740110397339, |
| "learning_rate": 2.9920485591899795e-05, |
| "loss": 0.0459, |
| "step": 4870 |
| }, |
| { |
| "epoch": 0.20751828542269093, |
| "grad_norm": 0.7019702196121216, |
| "learning_rate": 2.991963320176886e-05, |
| "loss": 0.0438, |
| "step": 4880 |
| }, |
| { |
| "epoch": 0.20794352781085218, |
| "grad_norm": 0.7933806777000427, |
| "learning_rate": 2.991877627944729e-05, |
| "loss": 0.0432, |
| "step": 4890 |
| }, |
| { |
| "epoch": 0.20836877019901343, |
| "grad_norm": 0.8409459590911865, |
| "learning_rate": 2.9917914825195393e-05, |
| "loss": 0.0464, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.2087940125871747, |
| "grad_norm": 0.7470780611038208, |
| "learning_rate": 2.991704883927486e-05, |
| "loss": 0.0427, |
| "step": 4910 |
| }, |
| { |
| "epoch": 0.20921925497533594, |
| "grad_norm": 0.7226709127426147, |
| "learning_rate": 2.991617832194875e-05, |
| "loss": 0.0491, |
| "step": 4920 |
| }, |
| { |
| "epoch": 0.2096444973634972, |
| "grad_norm": 0.7474321126937866, |
| "learning_rate": 2.9915303273481504e-05, |
| "loss": 0.0484, |
| "step": 4930 |
| }, |
| { |
| "epoch": 0.21006973975165844, |
| "grad_norm": 0.8669159412384033, |
| "learning_rate": 2.9914423694138942e-05, |
| "loss": 0.044, |
| "step": 4940 |
| }, |
| { |
| "epoch": 0.2104949821398197, |
| "grad_norm": 0.9365962147712708, |
| "learning_rate": 2.9913539584188253e-05, |
| "loss": 0.0487, |
| "step": 4950 |
| }, |
| { |
| "epoch": 0.21092022452798095, |
| "grad_norm": 0.7574573755264282, |
| "learning_rate": 2.9912650943898008e-05, |
| "loss": 0.0482, |
| "step": 4960 |
| }, |
| { |
| "epoch": 0.2113454669161422, |
| "grad_norm": 0.6261996626853943, |
| "learning_rate": 2.9911757773538148e-05, |
| "loss": 0.0443, |
| "step": 4970 |
| }, |
| { |
| "epoch": 0.21177070930430344, |
| "grad_norm": 0.9774627089500427, |
| "learning_rate": 2.9910860073380005e-05, |
| "loss": 0.046, |
| "step": 4980 |
| }, |
| { |
| "epoch": 0.21219595169246472, |
| "grad_norm": 0.8433800935745239, |
| "learning_rate": 2.9909957843696267e-05, |
| "loss": 0.0458, |
| "step": 4990 |
| }, |
| { |
| "epoch": 0.21262119408062596, |
| "grad_norm": 0.6524537801742554, |
| "learning_rate": 2.990905108476101e-05, |
| "loss": 0.0488, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.2130464364687872, |
| "grad_norm": 0.6434529423713684, |
| "learning_rate": 2.9908139796849683e-05, |
| "loss": 0.0455, |
| "step": 5010 |
| }, |
| { |
| "epoch": 0.21347167885694845, |
| "grad_norm": 0.7861361503601074, |
| "learning_rate": 2.9907223980239114e-05, |
| "loss": 0.0461, |
| "step": 5020 |
| }, |
| { |
| "epoch": 0.2138969212451097, |
| "grad_norm": 0.5959680676460266, |
| "learning_rate": 2.99063036352075e-05, |
| "loss": 0.0442, |
| "step": 5030 |
| }, |
| { |
| "epoch": 0.21432216363327097, |
| "grad_norm": 0.6599105000495911, |
| "learning_rate": 2.9905378762034424e-05, |
| "loss": 0.0445, |
| "step": 5040 |
| }, |
| { |
| "epoch": 0.21474740602143222, |
| "grad_norm": 0.7693561315536499, |
| "learning_rate": 2.9904449361000833e-05, |
| "loss": 0.0441, |
| "step": 5050 |
| }, |
| { |
| "epoch": 0.21517264840959346, |
| "grad_norm": 0.8092666864395142, |
| "learning_rate": 2.9903515432389056e-05, |
| "loss": 0.0431, |
| "step": 5060 |
| }, |
| { |
| "epoch": 0.2155978907977547, |
| "grad_norm": 0.7702406644821167, |
| "learning_rate": 2.990257697648279e-05, |
| "loss": 0.0433, |
| "step": 5070 |
| }, |
| { |
| "epoch": 0.21602313318591598, |
| "grad_norm": 0.8427804708480835, |
| "learning_rate": 2.9901633993567125e-05, |
| "loss": 0.0417, |
| "step": 5080 |
| }, |
| { |
| "epoch": 0.21644837557407723, |
| "grad_norm": 0.7076109051704407, |
| "learning_rate": 2.99006864839285e-05, |
| "loss": 0.0489, |
| "step": 5090 |
| }, |
| { |
| "epoch": 0.21687361796223847, |
| "grad_norm": 0.6427155137062073, |
| "learning_rate": 2.989973444785476e-05, |
| "loss": 0.0482, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.21729886035039972, |
| "grad_norm": 0.7994742393493652, |
| "learning_rate": 2.989877788563509e-05, |
| "loss": 0.0471, |
| "step": 5110 |
| }, |
| { |
| "epoch": 0.217724102738561, |
| "grad_norm": 0.7038072943687439, |
| "learning_rate": 2.9897816797560085e-05, |
| "loss": 0.0429, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.21814934512672224, |
| "grad_norm": 0.8199881315231323, |
| "learning_rate": 2.989685118392169e-05, |
| "loss": 0.044, |
| "step": 5130 |
| }, |
| { |
| "epoch": 0.21857458751488348, |
| "grad_norm": 0.9012776017189026, |
| "learning_rate": 2.9895881045013232e-05, |
| "loss": 0.0482, |
| "step": 5140 |
| }, |
| { |
| "epoch": 0.21899982990304473, |
| "grad_norm": 0.9288299679756165, |
| "learning_rate": 2.9894906381129414e-05, |
| "loss": 0.0457, |
| "step": 5150 |
| }, |
| { |
| "epoch": 0.21942507229120597, |
| "grad_norm": 0.8141002058982849, |
| "learning_rate": 2.9893927192566316e-05, |
| "loss": 0.0465, |
| "step": 5160 |
| }, |
| { |
| "epoch": 0.21985031467936725, |
| "grad_norm": 0.8564704060554504, |
| "learning_rate": 2.989294347962139e-05, |
| "loss": 0.0433, |
| "step": 5170 |
| }, |
| { |
| "epoch": 0.2202755570675285, |
| "grad_norm": 0.7727344036102295, |
| "learning_rate": 2.989195524259346e-05, |
| "loss": 0.0444, |
| "step": 5180 |
| }, |
| { |
| "epoch": 0.22070079945568974, |
| "grad_norm": 0.9129493236541748, |
| "learning_rate": 2.9890962481782723e-05, |
| "loss": 0.0455, |
| "step": 5190 |
| }, |
| { |
| "epoch": 0.22112604184385098, |
| "grad_norm": 0.6133219003677368, |
| "learning_rate": 2.9889965197490757e-05, |
| "loss": 0.044, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.22155128423201226, |
| "grad_norm": 0.5847681760787964, |
| "learning_rate": 2.988896339002052e-05, |
| "loss": 0.0466, |
| "step": 5210 |
| }, |
| { |
| "epoch": 0.2219765266201735, |
| "grad_norm": 0.7327444553375244, |
| "learning_rate": 2.9887957059676315e-05, |
| "loss": 0.0444, |
| "step": 5220 |
| }, |
| { |
| "epoch": 0.22240176900833475, |
| "grad_norm": 0.6856175065040588, |
| "learning_rate": 2.9886946206763855e-05, |
| "loss": 0.0453, |
| "step": 5230 |
| }, |
| { |
| "epoch": 0.222827011396496, |
| "grad_norm": 0.8159507513046265, |
| "learning_rate": 2.9885930831590202e-05, |
| "loss": 0.0446, |
| "step": 5240 |
| }, |
| { |
| "epoch": 0.22325225378465727, |
| "grad_norm": 0.7875511646270752, |
| "learning_rate": 2.98849109344638e-05, |
| "loss": 0.044, |
| "step": 5250 |
| }, |
| { |
| "epoch": 0.2236774961728185, |
| "grad_norm": 0.7698711156845093, |
| "learning_rate": 2.9883886515694474e-05, |
| "loss": 0.0451, |
| "step": 5260 |
| }, |
| { |
| "epoch": 0.22410273856097976, |
| "grad_norm": 0.958143413066864, |
| "learning_rate": 2.988285757559341e-05, |
| "loss": 0.0452, |
| "step": 5270 |
| }, |
| { |
| "epoch": 0.224527980949141, |
| "grad_norm": 1.0015833377838135, |
| "learning_rate": 2.988182411447317e-05, |
| "loss": 0.045, |
| "step": 5280 |
| }, |
| { |
| "epoch": 0.22495322333730225, |
| "grad_norm": 0.7416315674781799, |
| "learning_rate": 2.98807861326477e-05, |
| "loss": 0.0457, |
| "step": 5290 |
| }, |
| { |
| "epoch": 0.22537846572546352, |
| "grad_norm": 0.6568078994750977, |
| "learning_rate": 2.9879743630432307e-05, |
| "loss": 0.0444, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.22580370811362477, |
| "grad_norm": 0.7507728338241577, |
| "learning_rate": 2.9878696608143677e-05, |
| "loss": 0.0419, |
| "step": 5310 |
| }, |
| { |
| "epoch": 0.226228950501786, |
| "grad_norm": 0.7239499688148499, |
| "learning_rate": 2.9877645066099862e-05, |
| "loss": 0.0431, |
| "step": 5320 |
| }, |
| { |
| "epoch": 0.22665419288994726, |
| "grad_norm": 0.6904134154319763, |
| "learning_rate": 2.9876589004620298e-05, |
| "loss": 0.0437, |
| "step": 5330 |
| }, |
| { |
| "epoch": 0.22707943527810853, |
| "grad_norm": 0.7209636569023132, |
| "learning_rate": 2.987552842402579e-05, |
| "loss": 0.0472, |
| "step": 5340 |
| }, |
| { |
| "epoch": 0.22750467766626978, |
| "grad_norm": 0.7524945735931396, |
| "learning_rate": 2.9874463324638515e-05, |
| "loss": 0.0423, |
| "step": 5350 |
| }, |
| { |
| "epoch": 0.22792992005443102, |
| "grad_norm": 0.8387914299964905, |
| "learning_rate": 2.9873393706782016e-05, |
| "loss": 0.0439, |
| "step": 5360 |
| }, |
| { |
| "epoch": 0.22835516244259227, |
| "grad_norm": 0.7968807220458984, |
| "learning_rate": 2.987231957078122e-05, |
| "loss": 0.0424, |
| "step": 5370 |
| }, |
| { |
| "epoch": 0.22878040483075354, |
| "grad_norm": 0.8148554563522339, |
| "learning_rate": 2.9871240916962414e-05, |
| "loss": 0.0416, |
| "step": 5380 |
| }, |
| { |
| "epoch": 0.2292056472189148, |
| "grad_norm": 0.6972390413284302, |
| "learning_rate": 2.9870157745653274e-05, |
| "loss": 0.0423, |
| "step": 5390 |
| }, |
| { |
| "epoch": 0.22963088960707603, |
| "grad_norm": 0.9805029630661011, |
| "learning_rate": 2.9869070057182838e-05, |
| "loss": 0.042, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.23005613199523728, |
| "grad_norm": 0.9213829636573792, |
| "learning_rate": 2.986797785188151e-05, |
| "loss": 0.045, |
| "step": 5410 |
| }, |
| { |
| "epoch": 0.23048137438339852, |
| "grad_norm": 0.8021851778030396, |
| "learning_rate": 2.9866881130081074e-05, |
| "loss": 0.0431, |
| "step": 5420 |
| }, |
| { |
| "epoch": 0.2309066167715598, |
| "grad_norm": 0.7615094184875488, |
| "learning_rate": 2.986577989211469e-05, |
| "loss": 0.0442, |
| "step": 5430 |
| }, |
| { |
| "epoch": 0.23133185915972104, |
| "grad_norm": 0.6527719497680664, |
| "learning_rate": 2.9864674138316887e-05, |
| "loss": 0.0434, |
| "step": 5440 |
| }, |
| { |
| "epoch": 0.2317571015478823, |
| "grad_norm": 0.7374159097671509, |
| "learning_rate": 2.9863563869023554e-05, |
| "loss": 0.0412, |
| "step": 5450 |
| }, |
| { |
| "epoch": 0.23218234393604353, |
| "grad_norm": 0.7521957159042358, |
| "learning_rate": 2.9862449084571972e-05, |
| "loss": 0.0418, |
| "step": 5460 |
| }, |
| { |
| "epoch": 0.2326075863242048, |
| "grad_norm": 0.7196958065032959, |
| "learning_rate": 2.9861329785300774e-05, |
| "loss": 0.0434, |
| "step": 5470 |
| }, |
| { |
| "epoch": 0.23303282871236605, |
| "grad_norm": 0.8486290574073792, |
| "learning_rate": 2.9860205971549978e-05, |
| "loss": 0.0423, |
| "step": 5480 |
| }, |
| { |
| "epoch": 0.2334580711005273, |
| "grad_norm": 0.8910712003707886, |
| "learning_rate": 2.9859077643660974e-05, |
| "loss": 0.0425, |
| "step": 5490 |
| }, |
| { |
| "epoch": 0.23388331348868854, |
| "grad_norm": 0.8362525701522827, |
| "learning_rate": 2.985794480197651e-05, |
| "loss": 0.0492, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.23430855587684982, |
| "grad_norm": 0.817209780216217, |
| "learning_rate": 2.9856807446840716e-05, |
| "loss": 0.0396, |
| "step": 5510 |
| }, |
| { |
| "epoch": 0.23473379826501106, |
| "grad_norm": 0.7105398178100586, |
| "learning_rate": 2.9855665578599093e-05, |
| "loss": 0.0427, |
| "step": 5520 |
| }, |
| { |
| "epoch": 0.2351590406531723, |
| "grad_norm": 0.8205658197402954, |
| "learning_rate": 2.9854519197598504e-05, |
| "loss": 0.0409, |
| "step": 5530 |
| }, |
| { |
| "epoch": 0.23558428304133355, |
| "grad_norm": 0.5791633725166321, |
| "learning_rate": 2.9853368304187197e-05, |
| "loss": 0.0423, |
| "step": 5540 |
| }, |
| { |
| "epoch": 0.2360095254294948, |
| "grad_norm": 0.6344013810157776, |
| "learning_rate": 2.9852212898714783e-05, |
| "loss": 0.0436, |
| "step": 5550 |
| }, |
| { |
| "epoch": 0.23643476781765607, |
| "grad_norm": 0.7818456888198853, |
| "learning_rate": 2.9851052981532233e-05, |
| "loss": 0.0437, |
| "step": 5560 |
| }, |
| { |
| "epoch": 0.23686001020581732, |
| "grad_norm": 0.6619543433189392, |
| "learning_rate": 2.9849888552991908e-05, |
| "loss": 0.0441, |
| "step": 5570 |
| }, |
| { |
| "epoch": 0.23728525259397856, |
| "grad_norm": 0.6021639704704285, |
| "learning_rate": 2.9848719613447535e-05, |
| "loss": 0.0496, |
| "step": 5580 |
| }, |
| { |
| "epoch": 0.2377104949821398, |
| "grad_norm": 0.6315738558769226, |
| "learning_rate": 2.9847546163254194e-05, |
| "loss": 0.0417, |
| "step": 5590 |
| }, |
| { |
| "epoch": 0.23813573737030108, |
| "grad_norm": 0.7588102221488953, |
| "learning_rate": 2.984636820276836e-05, |
| "loss": 0.0454, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.23856097975846233, |
| "grad_norm": 0.7127803564071655, |
| "learning_rate": 2.984518573234786e-05, |
| "loss": 0.0441, |
| "step": 5610 |
| }, |
| { |
| "epoch": 0.23898622214662357, |
| "grad_norm": 0.8134715557098389, |
| "learning_rate": 2.98439987523519e-05, |
| "loss": 0.0426, |
| "step": 5620 |
| }, |
| { |
| "epoch": 0.23941146453478482, |
| "grad_norm": 0.6682436466217041, |
| "learning_rate": 2.9842807263141052e-05, |
| "loss": 0.0429, |
| "step": 5630 |
| }, |
| { |
| "epoch": 0.2398367069229461, |
| "grad_norm": 0.7612809538841248, |
| "learning_rate": 2.9841611265077256e-05, |
| "loss": 0.0404, |
| "step": 5640 |
| }, |
| { |
| "epoch": 0.24026194931110734, |
| "grad_norm": 0.6678027510643005, |
| "learning_rate": 2.9840410758523832e-05, |
| "loss": 0.0426, |
| "step": 5650 |
| }, |
| { |
| "epoch": 0.24068719169926858, |
| "grad_norm": 0.5913005471229553, |
| "learning_rate": 2.9839205743845453e-05, |
| "loss": 0.0423, |
| "step": 5660 |
| }, |
| { |
| "epoch": 0.24111243408742983, |
| "grad_norm": 0.903803825378418, |
| "learning_rate": 2.983799622140818e-05, |
| "loss": 0.0451, |
| "step": 5670 |
| }, |
| { |
| "epoch": 0.24153767647559107, |
| "grad_norm": 0.6426815986633301, |
| "learning_rate": 2.9836782191579425e-05, |
| "loss": 0.0422, |
| "step": 5680 |
| }, |
| { |
| "epoch": 0.24196291886375235, |
| "grad_norm": 0.8089521527290344, |
| "learning_rate": 2.983556365472799e-05, |
| "loss": 0.0444, |
| "step": 5690 |
| }, |
| { |
| "epoch": 0.2423881612519136, |
| "grad_norm": 0.7508390545845032, |
| "learning_rate": 2.9834340611224022e-05, |
| "loss": 0.0403, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.24281340364007484, |
| "grad_norm": 0.6964208483695984, |
| "learning_rate": 2.9833113061439057e-05, |
| "loss": 0.042, |
| "step": 5710 |
| }, |
| { |
| "epoch": 0.24323864602823608, |
| "grad_norm": 0.8737379312515259, |
| "learning_rate": 2.9831881005745986e-05, |
| "loss": 0.0403, |
| "step": 5720 |
| }, |
| { |
| "epoch": 0.24366388841639736, |
| "grad_norm": 0.6892777681350708, |
| "learning_rate": 2.983064444451908e-05, |
| "loss": 0.0441, |
| "step": 5730 |
| }, |
| { |
| "epoch": 0.2440891308045586, |
| "grad_norm": 0.8702257871627808, |
| "learning_rate": 2.9829403378133974e-05, |
| "loss": 0.0443, |
| "step": 5740 |
| }, |
| { |
| "epoch": 0.24451437319271985, |
| "grad_norm": 0.7382299900054932, |
| "learning_rate": 2.9828157806967668e-05, |
| "loss": 0.0421, |
| "step": 5750 |
| }, |
| { |
| "epoch": 0.2449396155808811, |
| "grad_norm": 0.8885031938552856, |
| "learning_rate": 2.9826907731398533e-05, |
| "loss": 0.0416, |
| "step": 5760 |
| }, |
| { |
| "epoch": 0.24536485796904237, |
| "grad_norm": 0.5969104766845703, |
| "learning_rate": 2.9825653151806315e-05, |
| "loss": 0.0447, |
| "step": 5770 |
| }, |
| { |
| "epoch": 0.2457901003572036, |
| "grad_norm": 0.7052406668663025, |
| "learning_rate": 2.9824394068572114e-05, |
| "loss": 0.0427, |
| "step": 5780 |
| }, |
| { |
| "epoch": 0.24621534274536486, |
| "grad_norm": 0.5495277643203735, |
| "learning_rate": 2.9823130482078415e-05, |
| "loss": 0.0417, |
| "step": 5790 |
| }, |
| { |
| "epoch": 0.2466405851335261, |
| "grad_norm": 0.6981067657470703, |
| "learning_rate": 2.9821862392709054e-05, |
| "loss": 0.0431, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.24706582752168738, |
| "grad_norm": 0.9393956065177917, |
| "learning_rate": 2.982058980084925e-05, |
| "loss": 0.0441, |
| "step": 5810 |
| }, |
| { |
| "epoch": 0.24749106990984862, |
| "grad_norm": 0.6969560980796814, |
| "learning_rate": 2.9819312706885577e-05, |
| "loss": 0.0439, |
| "step": 5820 |
| }, |
| { |
| "epoch": 0.24791631229800987, |
| "grad_norm": 0.7812885642051697, |
| "learning_rate": 2.9818031111205986e-05, |
| "loss": 0.039, |
| "step": 5830 |
| }, |
| { |
| "epoch": 0.2483415546861711, |
| "grad_norm": 0.7769454121589661, |
| "learning_rate": 2.981674501419979e-05, |
| "loss": 0.0415, |
| "step": 5840 |
| }, |
| { |
| "epoch": 0.24876679707433236, |
| "grad_norm": 0.6432703137397766, |
| "learning_rate": 2.981545441625767e-05, |
| "loss": 0.0404, |
| "step": 5850 |
| }, |
| { |
| "epoch": 0.24919203946249363, |
| "grad_norm": 0.9023407697677612, |
| "learning_rate": 2.9814159317771682e-05, |
| "loss": 0.0432, |
| "step": 5860 |
| }, |
| { |
| "epoch": 0.24961728185065488, |
| "grad_norm": 0.9495925903320312, |
| "learning_rate": 2.9812859719135236e-05, |
| "loss": 0.0419, |
| "step": 5870 |
| }, |
| { |
| "epoch": 0.2500425242388161, |
| "grad_norm": 1.0032718181610107, |
| "learning_rate": 2.981155562074312e-05, |
| "loss": 0.0425, |
| "step": 5880 |
| }, |
| { |
| "epoch": 0.2504677666269774, |
| "grad_norm": 0.6281837224960327, |
| "learning_rate": 2.9810247022991483e-05, |
| "loss": 0.0387, |
| "step": 5890 |
| }, |
| { |
| "epoch": 0.2508930090151386, |
| "grad_norm": 0.7671382427215576, |
| "learning_rate": 2.9808933926277842e-05, |
| "loss": 0.0418, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.2513182514032999, |
| "grad_norm": 0.7822478413581848, |
| "learning_rate": 2.9807616331001078e-05, |
| "loss": 0.0418, |
| "step": 5910 |
| }, |
| { |
| "epoch": 0.25174349379146116, |
| "grad_norm": 0.7015408277511597, |
| "learning_rate": 2.9806294237561452e-05, |
| "loss": 0.0407, |
| "step": 5920 |
| }, |
| { |
| "epoch": 0.2521687361796224, |
| "grad_norm": 0.683319628238678, |
| "learning_rate": 2.980496764636057e-05, |
| "loss": 0.0439, |
| "step": 5930 |
| }, |
| { |
| "epoch": 0.25259397856778365, |
| "grad_norm": 0.6143163442611694, |
| "learning_rate": 2.9803636557801417e-05, |
| "loss": 0.042, |
| "step": 5940 |
| }, |
| { |
| "epoch": 0.25301922095594487, |
| "grad_norm": 0.8570902347564697, |
| "learning_rate": 2.980230097228835e-05, |
| "loss": 0.0438, |
| "step": 5950 |
| }, |
| { |
| "epoch": 0.25344446334410614, |
| "grad_norm": 0.6100302934646606, |
| "learning_rate": 2.9800960890227076e-05, |
| "loss": 0.0402, |
| "step": 5960 |
| }, |
| { |
| "epoch": 0.2538697057322674, |
| "grad_norm": 0.6212876439094543, |
| "learning_rate": 2.979961631202468e-05, |
| "loss": 0.0399, |
| "step": 5970 |
| }, |
| { |
| "epoch": 0.25429494812042863, |
| "grad_norm": 0.546836256980896, |
| "learning_rate": 2.9798267238089613e-05, |
| "loss": 0.0403, |
| "step": 5980 |
| }, |
| { |
| "epoch": 0.2547201905085899, |
| "grad_norm": 0.6100478172302246, |
| "learning_rate": 2.9796913668831684e-05, |
| "loss": 0.0404, |
| "step": 5990 |
| }, |
| { |
| "epoch": 0.2551454328967511, |
| "grad_norm": 0.5832479596138, |
| "learning_rate": 2.979555560466207e-05, |
| "loss": 0.0386, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.2555706752849124, |
| "grad_norm": 0.6781200766563416, |
| "learning_rate": 2.9794193045993317e-05, |
| "loss": 0.0425, |
| "step": 6010 |
| }, |
| { |
| "epoch": 0.25599591767307367, |
| "grad_norm": 0.5507116317749023, |
| "learning_rate": 2.9792825993239337e-05, |
| "loss": 0.0384, |
| "step": 6020 |
| }, |
| { |
| "epoch": 0.2564211600612349, |
| "grad_norm": 0.7260423898696899, |
| "learning_rate": 2.9791454446815404e-05, |
| "loss": 0.0438, |
| "step": 6030 |
| }, |
| { |
| "epoch": 0.25684640244939616, |
| "grad_norm": 0.6341103315353394, |
| "learning_rate": 2.9790078407138154e-05, |
| "loss": 0.0401, |
| "step": 6040 |
| }, |
| { |
| "epoch": 0.25727164483755743, |
| "grad_norm": 0.5935182571411133, |
| "learning_rate": 2.9788697874625588e-05, |
| "loss": 0.041, |
| "step": 6050 |
| }, |
| { |
| "epoch": 0.25769688722571865, |
| "grad_norm": 0.7649999260902405, |
| "learning_rate": 2.9787312849697088e-05, |
| "loss": 0.0416, |
| "step": 6060 |
| }, |
| { |
| "epoch": 0.2581221296138799, |
| "grad_norm": 0.7656498551368713, |
| "learning_rate": 2.978592333277338e-05, |
| "loss": 0.04, |
| "step": 6070 |
| }, |
| { |
| "epoch": 0.25854737200204114, |
| "grad_norm": 0.6227708458900452, |
| "learning_rate": 2.9784529324276557e-05, |
| "loss": 0.0441, |
| "step": 6080 |
| }, |
| { |
| "epoch": 0.2589726143902024, |
| "grad_norm": 0.6449046730995178, |
| "learning_rate": 2.9783130824630093e-05, |
| "loss": 0.0413, |
| "step": 6090 |
| }, |
| { |
| "epoch": 0.2593978567783637, |
| "grad_norm": 0.8230679035186768, |
| "learning_rate": 2.9781727834258806e-05, |
| "loss": 0.0409, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.2598230991665249, |
| "grad_norm": 0.810616672039032, |
| "learning_rate": 2.97803203535889e-05, |
| "loss": 0.0456, |
| "step": 6110 |
| }, |
| { |
| "epoch": 0.2602483415546862, |
| "grad_norm": 0.7623624205589294, |
| "learning_rate": 2.977890838304792e-05, |
| "loss": 0.0426, |
| "step": 6120 |
| }, |
| { |
| "epoch": 0.2606735839428474, |
| "grad_norm": 0.7329275012016296, |
| "learning_rate": 2.9777491923064782e-05, |
| "loss": 0.042, |
| "step": 6130 |
| }, |
| { |
| "epoch": 0.26109882633100867, |
| "grad_norm": 0.7706323266029358, |
| "learning_rate": 2.977607097406978e-05, |
| "loss": 0.0415, |
| "step": 6140 |
| }, |
| { |
| "epoch": 0.26152406871916994, |
| "grad_norm": 0.6100689172744751, |
| "learning_rate": 2.9774645536494552e-05, |
| "loss": 0.0384, |
| "step": 6150 |
| }, |
| { |
| "epoch": 0.26194931110733116, |
| "grad_norm": 0.516251802444458, |
| "learning_rate": 2.9773215610772117e-05, |
| "loss": 0.0401, |
| "step": 6160 |
| }, |
| { |
| "epoch": 0.26237455349549244, |
| "grad_norm": 0.7943555116653442, |
| "learning_rate": 2.9771781197336837e-05, |
| "loss": 0.0376, |
| "step": 6170 |
| }, |
| { |
| "epoch": 0.2627997958836537, |
| "grad_norm": 0.8178479671478271, |
| "learning_rate": 2.977034229662446e-05, |
| "loss": 0.0388, |
| "step": 6180 |
| }, |
| { |
| "epoch": 0.2632250382718149, |
| "grad_norm": 0.7358256578445435, |
| "learning_rate": 2.976889890907208e-05, |
| "loss": 0.0412, |
| "step": 6190 |
| }, |
| { |
| "epoch": 0.2636502806599762, |
| "grad_norm": 0.6371821761131287, |
| "learning_rate": 2.9767451035118164e-05, |
| "loss": 0.0398, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.2640755230481374, |
| "grad_norm": 0.549709141254425, |
| "learning_rate": 2.9765998675202536e-05, |
| "loss": 0.0416, |
| "step": 6210 |
| }, |
| { |
| "epoch": 0.2645007654362987, |
| "grad_norm": 0.6909655332565308, |
| "learning_rate": 2.9764541829766382e-05, |
| "loss": 0.0399, |
| "step": 6220 |
| }, |
| { |
| "epoch": 0.26492600782445996, |
| "grad_norm": 0.6212393641471863, |
| "learning_rate": 2.9763080499252253e-05, |
| "loss": 0.0377, |
| "step": 6230 |
| }, |
| { |
| "epoch": 0.2653512502126212, |
| "grad_norm": 0.5959421396255493, |
| "learning_rate": 2.9761614684104064e-05, |
| "loss": 0.0399, |
| "step": 6240 |
| }, |
| { |
| "epoch": 0.26577649260078245, |
| "grad_norm": 0.6034130454063416, |
| "learning_rate": 2.9760144384767095e-05, |
| "loss": 0.0393, |
| "step": 6250 |
| }, |
| { |
| "epoch": 0.2662017349889437, |
| "grad_norm": 0.6626703143119812, |
| "learning_rate": 2.9758669601687986e-05, |
| "loss": 0.0381, |
| "step": 6260 |
| }, |
| { |
| "epoch": 0.26662697737710495, |
| "grad_norm": 0.6859477758407593, |
| "learning_rate": 2.9757190335314722e-05, |
| "loss": 0.0406, |
| "step": 6270 |
| }, |
| { |
| "epoch": 0.2670522197652662, |
| "grad_norm": 0.7063243985176086, |
| "learning_rate": 2.975570658609668e-05, |
| "loss": 0.0391, |
| "step": 6280 |
| }, |
| { |
| "epoch": 0.26747746215342744, |
| "grad_norm": 0.5350667834281921, |
| "learning_rate": 2.9754218354484582e-05, |
| "loss": 0.0416, |
| "step": 6290 |
| }, |
| { |
| "epoch": 0.2679027045415887, |
| "grad_norm": 0.6627820134162903, |
| "learning_rate": 2.9752725640930504e-05, |
| "loss": 0.0386, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.26832794692975, |
| "grad_norm": 0.6602414846420288, |
| "learning_rate": 2.9751228445887905e-05, |
| "loss": 0.0391, |
| "step": 6310 |
| }, |
| { |
| "epoch": 0.2687531893179112, |
| "grad_norm": 0.6656312942504883, |
| "learning_rate": 2.9749726769811588e-05, |
| "loss": 0.0406, |
| "step": 6320 |
| }, |
| { |
| "epoch": 0.2691784317060725, |
| "grad_norm": 0.6035658121109009, |
| "learning_rate": 2.9748220613157717e-05, |
| "loss": 0.0396, |
| "step": 6330 |
| }, |
| { |
| "epoch": 0.2696036740942337, |
| "grad_norm": 0.589562177658081, |
| "learning_rate": 2.9746709976383832e-05, |
| "loss": 0.0377, |
| "step": 6340 |
| }, |
| { |
| "epoch": 0.27002891648239497, |
| "grad_norm": 0.5129275918006897, |
| "learning_rate": 2.974519485994882e-05, |
| "loss": 0.0451, |
| "step": 6350 |
| }, |
| { |
| "epoch": 0.27045415887055624, |
| "grad_norm": 0.6258460879325867, |
| "learning_rate": 2.9743675264312934e-05, |
| "loss": 0.0386, |
| "step": 6360 |
| }, |
| { |
| "epoch": 0.27087940125871746, |
| "grad_norm": 0.8174107670783997, |
| "learning_rate": 2.9742151189937784e-05, |
| "loss": 0.04, |
| "step": 6370 |
| }, |
| { |
| "epoch": 0.27130464364687873, |
| "grad_norm": 0.7886274456977844, |
| "learning_rate": 2.974062263728635e-05, |
| "loss": 0.0404, |
| "step": 6380 |
| }, |
| { |
| "epoch": 0.27172988603503995, |
| "grad_norm": 0.7022881507873535, |
| "learning_rate": 2.973908960682296e-05, |
| "loss": 0.0414, |
| "step": 6390 |
| }, |
| { |
| "epoch": 0.2721551284232012, |
| "grad_norm": 0.9441139698028564, |
| "learning_rate": 2.9737552099013313e-05, |
| "loss": 0.045, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.2725803708113625, |
| "grad_norm": 0.877943754196167, |
| "learning_rate": 2.9736010114324462e-05, |
| "loss": 0.042, |
| "step": 6410 |
| }, |
| { |
| "epoch": 0.2730056131995237, |
| "grad_norm": 0.851972758769989, |
| "learning_rate": 2.9734463653224813e-05, |
| "loss": 0.0423, |
| "step": 6420 |
| }, |
| { |
| "epoch": 0.273430855587685, |
| "grad_norm": 0.7306724190711975, |
| "learning_rate": 2.9732912716184155e-05, |
| "loss": 0.039, |
| "step": 6430 |
| }, |
| { |
| "epoch": 0.27385609797584626, |
| "grad_norm": 0.6330131888389587, |
| "learning_rate": 2.9731357303673607e-05, |
| "loss": 0.0411, |
| "step": 6440 |
| }, |
| { |
| "epoch": 0.2742813403640075, |
| "grad_norm": 0.8628656268119812, |
| "learning_rate": 2.9729797416165677e-05, |
| "loss": 0.0378, |
| "step": 6450 |
| }, |
| { |
| "epoch": 0.27470658275216875, |
| "grad_norm": 0.6638690233230591, |
| "learning_rate": 2.9728233054134202e-05, |
| "loss": 0.0425, |
| "step": 6460 |
| }, |
| { |
| "epoch": 0.27513182514032997, |
| "grad_norm": 0.607979953289032, |
| "learning_rate": 2.9726664218054405e-05, |
| "loss": 0.0359, |
| "step": 6470 |
| }, |
| { |
| "epoch": 0.27555706752849124, |
| "grad_norm": 0.6174076795578003, |
| "learning_rate": 2.9725090908402856e-05, |
| "loss": 0.0383, |
| "step": 6480 |
| }, |
| { |
| "epoch": 0.2759823099166525, |
| "grad_norm": 0.8638490438461304, |
| "learning_rate": 2.972351312565748e-05, |
| "loss": 0.0445, |
| "step": 6490 |
| }, |
| { |
| "epoch": 0.27640755230481373, |
| "grad_norm": 0.7757192850112915, |
| "learning_rate": 2.9721930870297567e-05, |
| "loss": 0.0422, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.276832794692975, |
| "grad_norm": 0.7784976363182068, |
| "learning_rate": 2.9720344142803766e-05, |
| "loss": 0.0427, |
| "step": 6510 |
| }, |
| { |
| "epoch": 0.2772580370811362, |
| "grad_norm": 0.7674238681793213, |
| "learning_rate": 2.9718752943658085e-05, |
| "loss": 0.0411, |
| "step": 6520 |
| }, |
| { |
| "epoch": 0.2776832794692975, |
| "grad_norm": 0.6946344971656799, |
| "learning_rate": 2.9717157273343885e-05, |
| "loss": 0.0434, |
| "step": 6530 |
| }, |
| { |
| "epoch": 0.27810852185745877, |
| "grad_norm": 0.8143453001976013, |
| "learning_rate": 2.971555713234589e-05, |
| "loss": 0.0381, |
| "step": 6540 |
| }, |
| { |
| "epoch": 0.27853376424562, |
| "grad_norm": 0.6152936220169067, |
| "learning_rate": 2.9713952521150176e-05, |
| "loss": 0.0413, |
| "step": 6550 |
| }, |
| { |
| "epoch": 0.27895900663378126, |
| "grad_norm": 0.5989219546318054, |
| "learning_rate": 2.971234344024419e-05, |
| "loss": 0.0399, |
| "step": 6560 |
| }, |
| { |
| "epoch": 0.27938424902194253, |
| "grad_norm": 0.6335272789001465, |
| "learning_rate": 2.971072989011672e-05, |
| "loss": 0.0403, |
| "step": 6570 |
| }, |
| { |
| "epoch": 0.27980949141010375, |
| "grad_norm": 0.5582631826400757, |
| "learning_rate": 2.9709111871257927e-05, |
| "loss": 0.0394, |
| "step": 6580 |
| }, |
| { |
| "epoch": 0.280234733798265, |
| "grad_norm": 0.5173953175544739, |
| "learning_rate": 2.9707489384159318e-05, |
| "loss": 0.0389, |
| "step": 6590 |
| }, |
| { |
| "epoch": 0.28065997618642624, |
| "grad_norm": 0.6251932382583618, |
| "learning_rate": 2.9705862429313763e-05, |
| "loss": 0.0412, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.2810852185745875, |
| "grad_norm": 0.5652943253517151, |
| "learning_rate": 2.9704231007215488e-05, |
| "loss": 0.0393, |
| "step": 6610 |
| }, |
| { |
| "epoch": 0.2815104609627488, |
| "grad_norm": 0.6986289620399475, |
| "learning_rate": 2.9702595118360077e-05, |
| "loss": 0.0424, |
| "step": 6620 |
| }, |
| { |
| "epoch": 0.28193570335091, |
| "grad_norm": 0.6257848739624023, |
| "learning_rate": 2.9700954763244465e-05, |
| "loss": 0.042, |
| "step": 6630 |
| }, |
| { |
| "epoch": 0.2823609457390713, |
| "grad_norm": 0.5983774662017822, |
| "learning_rate": 2.9699309942366953e-05, |
| "loss": 0.0409, |
| "step": 6640 |
| }, |
| { |
| "epoch": 0.2827861881272325, |
| "grad_norm": 0.6213322281837463, |
| "learning_rate": 2.969766065622719e-05, |
| "loss": 0.0416, |
| "step": 6650 |
| }, |
| { |
| "epoch": 0.28321143051539377, |
| "grad_norm": 0.6498160362243652, |
| "learning_rate": 2.96960069053262e-05, |
| "loss": 0.0384, |
| "step": 6660 |
| }, |
| { |
| "epoch": 0.28363667290355504, |
| "grad_norm": 0.6931298971176147, |
| "learning_rate": 2.9694348690166327e-05, |
| "loss": 0.04, |
| "step": 6670 |
| }, |
| { |
| "epoch": 0.28406191529171626, |
| "grad_norm": 0.5969164967536926, |
| "learning_rate": 2.969268601125131e-05, |
| "loss": 0.0375, |
| "step": 6680 |
| }, |
| { |
| "epoch": 0.28448715767987753, |
| "grad_norm": 0.7202589511871338, |
| "learning_rate": 2.969101886908622e-05, |
| "loss": 0.0388, |
| "step": 6690 |
| }, |
| { |
| "epoch": 0.2849124000680388, |
| "grad_norm": 0.709220826625824, |
| "learning_rate": 2.968934726417749e-05, |
| "loss": 0.0391, |
| "step": 6700 |
| }, |
| { |
| "epoch": 0.2853376424562, |
| "grad_norm": 0.7698434591293335, |
| "learning_rate": 2.968767119703291e-05, |
| "loss": 0.0385, |
| "step": 6710 |
| }, |
| { |
| "epoch": 0.2857628848443613, |
| "grad_norm": 0.6566237807273865, |
| "learning_rate": 2.9685990668161628e-05, |
| "loss": 0.0419, |
| "step": 6720 |
| }, |
| { |
| "epoch": 0.2861881272325225, |
| "grad_norm": 0.6004732847213745, |
| "learning_rate": 2.9684305678074137e-05, |
| "loss": 0.0388, |
| "step": 6730 |
| }, |
| { |
| "epoch": 0.2866133696206838, |
| "grad_norm": 0.736395537853241, |
| "learning_rate": 2.9682616227282304e-05, |
| "loss": 0.0405, |
| "step": 6740 |
| }, |
| { |
| "epoch": 0.28703861200884506, |
| "grad_norm": 0.6645581722259521, |
| "learning_rate": 2.968092231629933e-05, |
| "loss": 0.045, |
| "step": 6750 |
| }, |
| { |
| "epoch": 0.2874638543970063, |
| "grad_norm": 0.5871290564537048, |
| "learning_rate": 2.967922394563978e-05, |
| "loss": 0.0409, |
| "step": 6760 |
| }, |
| { |
| "epoch": 0.28788909678516755, |
| "grad_norm": 0.6334441304206848, |
| "learning_rate": 2.9677521115819585e-05, |
| "loss": 0.0416, |
| "step": 6770 |
| }, |
| { |
| "epoch": 0.28831433917332877, |
| "grad_norm": 0.5969275236129761, |
| "learning_rate": 2.967581382735601e-05, |
| "loss": 0.0387, |
| "step": 6780 |
| }, |
| { |
| "epoch": 0.28873958156149004, |
| "grad_norm": 0.7136366367340088, |
| "learning_rate": 2.9674102080767685e-05, |
| "loss": 0.0374, |
| "step": 6790 |
| }, |
| { |
| "epoch": 0.2891648239496513, |
| "grad_norm": 0.7302682399749756, |
| "learning_rate": 2.9672385876574597e-05, |
| "loss": 0.0393, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.28959006633781254, |
| "grad_norm": 0.665640652179718, |
| "learning_rate": 2.967066521529808e-05, |
| "loss": 0.0372, |
| "step": 6810 |
| }, |
| { |
| "epoch": 0.2900153087259738, |
| "grad_norm": 0.5665135383605957, |
| "learning_rate": 2.966894009746083e-05, |
| "loss": 0.0393, |
| "step": 6820 |
| }, |
| { |
| "epoch": 0.2904405511141351, |
| "grad_norm": 0.7281858921051025, |
| "learning_rate": 2.9667210523586888e-05, |
| "loss": 0.038, |
| "step": 6830 |
| }, |
| { |
| "epoch": 0.2908657935022963, |
| "grad_norm": 0.6732358336448669, |
| "learning_rate": 2.966547649420165e-05, |
| "loss": 0.0394, |
| "step": 6840 |
| }, |
| { |
| "epoch": 0.2912910358904576, |
| "grad_norm": 0.7290246486663818, |
| "learning_rate": 2.9663738009831877e-05, |
| "loss": 0.0377, |
| "step": 6850 |
| }, |
| { |
| "epoch": 0.2917162782786188, |
| "grad_norm": 0.6827811598777771, |
| "learning_rate": 2.966199507100567e-05, |
| "loss": 0.0398, |
| "step": 6860 |
| }, |
| { |
| "epoch": 0.29214152066678006, |
| "grad_norm": 0.46444040536880493, |
| "learning_rate": 2.9660247678252484e-05, |
| "loss": 0.0387, |
| "step": 6870 |
| }, |
| { |
| "epoch": 0.29256676305494134, |
| "grad_norm": 0.5850991606712341, |
| "learning_rate": 2.965849583210314e-05, |
| "loss": 0.0401, |
| "step": 6880 |
| }, |
| { |
| "epoch": 0.29299200544310255, |
| "grad_norm": 0.5107231140136719, |
| "learning_rate": 2.9656739533089794e-05, |
| "loss": 0.0372, |
| "step": 6890 |
| }, |
| { |
| "epoch": 0.2934172478312638, |
| "grad_norm": 0.6337826251983643, |
| "learning_rate": 2.9654978781745968e-05, |
| "loss": 0.0389, |
| "step": 6900 |
| }, |
| { |
| "epoch": 0.29384249021942505, |
| "grad_norm": 0.6117849349975586, |
| "learning_rate": 2.9653213578606534e-05, |
| "loss": 0.039, |
| "step": 6910 |
| }, |
| { |
| "epoch": 0.2942677326075863, |
| "grad_norm": 0.8882553577423096, |
| "learning_rate": 2.9651443924207704e-05, |
| "loss": 0.0398, |
| "step": 6920 |
| }, |
| { |
| "epoch": 0.2946929749957476, |
| "grad_norm": 0.7033731937408447, |
| "learning_rate": 2.964966981908706e-05, |
| "loss": 0.0391, |
| "step": 6930 |
| }, |
| { |
| "epoch": 0.2951182173839088, |
| "grad_norm": 0.6146150231361389, |
| "learning_rate": 2.964789126378353e-05, |
| "loss": 0.0377, |
| "step": 6940 |
| }, |
| { |
| "epoch": 0.2955434597720701, |
| "grad_norm": 0.7496012449264526, |
| "learning_rate": 2.9646108258837388e-05, |
| "loss": 0.0386, |
| "step": 6950 |
| }, |
| { |
| "epoch": 0.29596870216023136, |
| "grad_norm": 0.639735996723175, |
| "learning_rate": 2.9644320804790267e-05, |
| "loss": 0.0388, |
| "step": 6960 |
| }, |
| { |
| "epoch": 0.2963939445483926, |
| "grad_norm": 0.6214953064918518, |
| "learning_rate": 2.9642528902185142e-05, |
| "loss": 0.037, |
| "step": 6970 |
| }, |
| { |
| "epoch": 0.29681918693655385, |
| "grad_norm": 0.6856206059455872, |
| "learning_rate": 2.9640732551566354e-05, |
| "loss": 0.04, |
| "step": 6980 |
| }, |
| { |
| "epoch": 0.29724442932471506, |
| "grad_norm": 0.6917822957038879, |
| "learning_rate": 2.9638931753479578e-05, |
| "loss": 0.0362, |
| "step": 6990 |
| }, |
| { |
| "epoch": 0.29766967171287634, |
| "grad_norm": 0.752424955368042, |
| "learning_rate": 2.9637126508471858e-05, |
| "loss": 0.0382, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.2980949141010376, |
| "grad_norm": 0.6131302714347839, |
| "learning_rate": 2.9635316817091577e-05, |
| "loss": 0.0408, |
| "step": 7010 |
| }, |
| { |
| "epoch": 0.29852015648919883, |
| "grad_norm": 0.6820628046989441, |
| "learning_rate": 2.9633502679888467e-05, |
| "loss": 0.0367, |
| "step": 7020 |
| }, |
| { |
| "epoch": 0.2989453988773601, |
| "grad_norm": 0.5934157967567444, |
| "learning_rate": 2.963168409741362e-05, |
| "loss": 0.0365, |
| "step": 7030 |
| }, |
| { |
| "epoch": 0.2993706412655213, |
| "grad_norm": 0.6045647859573364, |
| "learning_rate": 2.9629861070219467e-05, |
| "loss": 0.0388, |
| "step": 7040 |
| }, |
| { |
| "epoch": 0.2997958836536826, |
| "grad_norm": 0.7339217662811279, |
| "learning_rate": 2.9628033598859807e-05, |
| "loss": 0.038, |
| "step": 7050 |
| }, |
| { |
| "epoch": 0.30022112604184387, |
| "grad_norm": 0.7751262784004211, |
| "learning_rate": 2.9626201683889764e-05, |
| "loss": 0.0351, |
| "step": 7060 |
| }, |
| { |
| "epoch": 0.3006463684300051, |
| "grad_norm": 0.710809051990509, |
| "learning_rate": 2.9624365325865835e-05, |
| "loss": 0.0419, |
| "step": 7070 |
| }, |
| { |
| "epoch": 0.30107161081816636, |
| "grad_norm": 0.7651996612548828, |
| "learning_rate": 2.962252452534585e-05, |
| "loss": 0.0381, |
| "step": 7080 |
| }, |
| { |
| "epoch": 0.30149685320632763, |
| "grad_norm": 0.8094403743743896, |
| "learning_rate": 2.9620679282889006e-05, |
| "loss": 0.0405, |
| "step": 7090 |
| }, |
| { |
| "epoch": 0.30192209559448885, |
| "grad_norm": 0.6429983973503113, |
| "learning_rate": 2.9618829599055833e-05, |
| "loss": 0.0386, |
| "step": 7100 |
| }, |
| { |
| "epoch": 0.3023473379826501, |
| "grad_norm": 0.5724627375602722, |
| "learning_rate": 2.961697547440821e-05, |
| "loss": 0.0394, |
| "step": 7110 |
| }, |
| { |
| "epoch": 0.30277258037081134, |
| "grad_norm": 0.5370784401893616, |
| "learning_rate": 2.9615116909509383e-05, |
| "loss": 0.0374, |
| "step": 7120 |
| }, |
| { |
| "epoch": 0.3031978227589726, |
| "grad_norm": 0.6105824708938599, |
| "learning_rate": 2.9613253904923924e-05, |
| "loss": 0.0392, |
| "step": 7130 |
| }, |
| { |
| "epoch": 0.3036230651471339, |
| "grad_norm": 0.6746649146080017, |
| "learning_rate": 2.9611386461217772e-05, |
| "loss": 0.0368, |
| "step": 7140 |
| }, |
| { |
| "epoch": 0.3040483075352951, |
| "grad_norm": 0.5744502544403076, |
| "learning_rate": 2.96095145789582e-05, |
| "loss": 0.0349, |
| "step": 7150 |
| }, |
| { |
| "epoch": 0.3044735499234564, |
| "grad_norm": 0.5895463228225708, |
| "learning_rate": 2.9607638258713845e-05, |
| "loss": 0.0395, |
| "step": 7160 |
| }, |
| { |
| "epoch": 0.3048987923116176, |
| "grad_norm": 0.7200367450714111, |
| "learning_rate": 2.960575750105468e-05, |
| "loss": 0.0393, |
| "step": 7170 |
| }, |
| { |
| "epoch": 0.30532403469977887, |
| "grad_norm": 0.8259462118148804, |
| "learning_rate": 2.9603872306552025e-05, |
| "loss": 0.0396, |
| "step": 7180 |
| }, |
| { |
| "epoch": 0.30574927708794014, |
| "grad_norm": 0.7393692135810852, |
| "learning_rate": 2.9601982675778562e-05, |
| "loss": 0.0409, |
| "step": 7190 |
| }, |
| { |
| "epoch": 0.30617451947610136, |
| "grad_norm": 0.7287253141403198, |
| "learning_rate": 2.9600088609308294e-05, |
| "loss": 0.0371, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.30659976186426263, |
| "grad_norm": 0.8086169958114624, |
| "learning_rate": 2.959819010771661e-05, |
| "loss": 0.042, |
| "step": 7210 |
| }, |
| { |
| "epoch": 0.3070250042524239, |
| "grad_norm": 0.8077754974365234, |
| "learning_rate": 2.959628717158021e-05, |
| "loss": 0.0371, |
| "step": 7220 |
| }, |
| { |
| "epoch": 0.3074502466405851, |
| "grad_norm": 0.6154179573059082, |
| "learning_rate": 2.959437980147716e-05, |
| "loss": 0.0387, |
| "step": 7230 |
| }, |
| { |
| "epoch": 0.3078754890287464, |
| "grad_norm": 0.6824501752853394, |
| "learning_rate": 2.959246799798687e-05, |
| "loss": 0.0408, |
| "step": 7240 |
| }, |
| { |
| "epoch": 0.3083007314169076, |
| "grad_norm": 0.6912359595298767, |
| "learning_rate": 2.9590551761690095e-05, |
| "loss": 0.0368, |
| "step": 7250 |
| }, |
| { |
| "epoch": 0.3087259738050689, |
| "grad_norm": 0.6031985282897949, |
| "learning_rate": 2.9588631093168936e-05, |
| "loss": 0.0361, |
| "step": 7260 |
| }, |
| { |
| "epoch": 0.30915121619323016, |
| "grad_norm": 0.7554166316986084, |
| "learning_rate": 2.9586705993006837e-05, |
| "loss": 0.0392, |
| "step": 7270 |
| }, |
| { |
| "epoch": 0.3095764585813914, |
| "grad_norm": 0.5785640478134155, |
| "learning_rate": 2.95847764617886e-05, |
| "loss": 0.0384, |
| "step": 7280 |
| }, |
| { |
| "epoch": 0.31000170096955265, |
| "grad_norm": 0.5476772785186768, |
| "learning_rate": 2.9582842500100364e-05, |
| "loss": 0.0386, |
| "step": 7290 |
| }, |
| { |
| "epoch": 0.31042694335771387, |
| "grad_norm": 0.5620119571685791, |
| "learning_rate": 2.958090410852961e-05, |
| "loss": 0.0361, |
| "step": 7300 |
| }, |
| { |
| "epoch": 0.31085218574587514, |
| "grad_norm": 0.5480359792709351, |
| "learning_rate": 2.9578961287665175e-05, |
| "loss": 0.0367, |
| "step": 7310 |
| }, |
| { |
| "epoch": 0.3112774281340364, |
| "grad_norm": 0.6803015470504761, |
| "learning_rate": 2.9577014038097238e-05, |
| "loss": 0.0405, |
| "step": 7320 |
| }, |
| { |
| "epoch": 0.31170267052219763, |
| "grad_norm": 0.6066681742668152, |
| "learning_rate": 2.9575062360417324e-05, |
| "loss": 0.036, |
| "step": 7330 |
| }, |
| { |
| "epoch": 0.3121279129103589, |
| "grad_norm": 0.7002130150794983, |
| "learning_rate": 2.957310625521829e-05, |
| "loss": 0.038, |
| "step": 7340 |
| }, |
| { |
| "epoch": 0.3125531552985202, |
| "grad_norm": 0.5165942907333374, |
| "learning_rate": 2.957114572309436e-05, |
| "loss": 0.036, |
| "step": 7350 |
| }, |
| { |
| "epoch": 0.3129783976866814, |
| "grad_norm": 0.6666249632835388, |
| "learning_rate": 2.956918076464109e-05, |
| "loss": 0.0464, |
| "step": 7360 |
| }, |
| { |
| "epoch": 0.31340364007484267, |
| "grad_norm": 0.6441757082939148, |
| "learning_rate": 2.9567211380455376e-05, |
| "loss": 0.0408, |
| "step": 7370 |
| }, |
| { |
| "epoch": 0.3138288824630039, |
| "grad_norm": 0.696550726890564, |
| "learning_rate": 2.9565237571135472e-05, |
| "loss": 0.0381, |
| "step": 7380 |
| }, |
| { |
| "epoch": 0.31425412485116516, |
| "grad_norm": 0.7473100423812866, |
| "learning_rate": 2.9563259337280967e-05, |
| "loss": 0.0402, |
| "step": 7390 |
| }, |
| { |
| "epoch": 0.31467936723932644, |
| "grad_norm": 0.7424477934837341, |
| "learning_rate": 2.9561276679492794e-05, |
| "loss": 0.0375, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.31510460962748765, |
| "grad_norm": 0.774580180644989, |
| "learning_rate": 2.9559289598373236e-05, |
| "loss": 0.0395, |
| "step": 7410 |
| }, |
| { |
| "epoch": 0.3155298520156489, |
| "grad_norm": 0.7219531536102295, |
| "learning_rate": 2.9557298094525913e-05, |
| "loss": 0.038, |
| "step": 7420 |
| }, |
| { |
| "epoch": 0.3159550944038102, |
| "grad_norm": 0.533015251159668, |
| "learning_rate": 2.955530216855579e-05, |
| "loss": 0.0364, |
| "step": 7430 |
| }, |
| { |
| "epoch": 0.3163803367919714, |
| "grad_norm": 0.5288422703742981, |
| "learning_rate": 2.955330182106918e-05, |
| "loss": 0.0372, |
| "step": 7440 |
| }, |
| { |
| "epoch": 0.3168055791801327, |
| "grad_norm": 0.5094618797302246, |
| "learning_rate": 2.9551297052673734e-05, |
| "loss": 0.0358, |
| "step": 7450 |
| }, |
| { |
| "epoch": 0.3172308215682939, |
| "grad_norm": 0.7423509359359741, |
| "learning_rate": 2.954928786397845e-05, |
| "loss": 0.0405, |
| "step": 7460 |
| }, |
| { |
| "epoch": 0.3176560639564552, |
| "grad_norm": 0.8391866087913513, |
| "learning_rate": 2.9547274255593665e-05, |
| "loss": 0.0359, |
| "step": 7470 |
| }, |
| { |
| "epoch": 0.31808130634461645, |
| "grad_norm": 0.7100056409835815, |
| "learning_rate": 2.9545256228131058e-05, |
| "loss": 0.0354, |
| "step": 7480 |
| }, |
| { |
| "epoch": 0.3185065487327777, |
| "grad_norm": 0.4895709156990051, |
| "learning_rate": 2.9543233782203653e-05, |
| "loss": 0.0357, |
| "step": 7490 |
| }, |
| { |
| "epoch": 0.31893179112093895, |
| "grad_norm": 0.7405874729156494, |
| "learning_rate": 2.954120691842582e-05, |
| "loss": 0.0385, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.31935703350910016, |
| "grad_norm": 1.0487643480300903, |
| "learning_rate": 2.9539175637413264e-05, |
| "loss": 0.0356, |
| "step": 7510 |
| }, |
| { |
| "epoch": 0.31978227589726144, |
| "grad_norm": 0.6350162029266357, |
| "learning_rate": 2.9537139939783033e-05, |
| "loss": 0.0392, |
| "step": 7520 |
| }, |
| { |
| "epoch": 0.3202075182854227, |
| "grad_norm": 0.4637593924999237, |
| "learning_rate": 2.9535099826153516e-05, |
| "loss": 0.0374, |
| "step": 7530 |
| }, |
| { |
| "epoch": 0.3206327606735839, |
| "grad_norm": 0.5955590009689331, |
| "learning_rate": 2.9533055297144454e-05, |
| "loss": 0.0349, |
| "step": 7540 |
| }, |
| { |
| "epoch": 0.3210580030617452, |
| "grad_norm": 0.599247932434082, |
| "learning_rate": 2.953100635337691e-05, |
| "loss": 0.0368, |
| "step": 7550 |
| }, |
| { |
| "epoch": 0.3214832454499065, |
| "grad_norm": 0.6445111036300659, |
| "learning_rate": 2.952895299547331e-05, |
| "loss": 0.0398, |
| "step": 7560 |
| }, |
| { |
| "epoch": 0.3219084878380677, |
| "grad_norm": 0.765993058681488, |
| "learning_rate": 2.9526895224057402e-05, |
| "loss": 0.0372, |
| "step": 7570 |
| }, |
| { |
| "epoch": 0.32233373022622896, |
| "grad_norm": 0.7807483673095703, |
| "learning_rate": 2.9524833039754284e-05, |
| "loss": 0.0365, |
| "step": 7580 |
| }, |
| { |
| "epoch": 0.3227589726143902, |
| "grad_norm": 0.4911043345928192, |
| "learning_rate": 2.9522766443190398e-05, |
| "loss": 0.0369, |
| "step": 7590 |
| }, |
| { |
| "epoch": 0.32318421500255146, |
| "grad_norm": 0.6260459423065186, |
| "learning_rate": 2.9520695434993516e-05, |
| "loss": 0.0379, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.32360945739071273, |
| "grad_norm": 0.4841105341911316, |
| "learning_rate": 2.9518620015792757e-05, |
| "loss": 0.0345, |
| "step": 7610 |
| }, |
| { |
| "epoch": 0.32403469977887395, |
| "grad_norm": 0.5489952564239502, |
| "learning_rate": 2.951654018621858e-05, |
| "loss": 0.0367, |
| "step": 7620 |
| }, |
| { |
| "epoch": 0.3244599421670352, |
| "grad_norm": 0.5816491842269897, |
| "learning_rate": 2.951445594690278e-05, |
| "loss": 0.0378, |
| "step": 7630 |
| }, |
| { |
| "epoch": 0.32488518455519644, |
| "grad_norm": 0.6203845739364624, |
| "learning_rate": 2.95123672984785e-05, |
| "loss": 0.0355, |
| "step": 7640 |
| }, |
| { |
| "epoch": 0.3253104269433577, |
| "grad_norm": 0.6808344721794128, |
| "learning_rate": 2.9510274241580207e-05, |
| "loss": 0.0379, |
| "step": 7650 |
| }, |
| { |
| "epoch": 0.325735669331519, |
| "grad_norm": 0.6007257103919983, |
| "learning_rate": 2.9508176776843726e-05, |
| "loss": 0.0398, |
| "step": 7660 |
| }, |
| { |
| "epoch": 0.3261609117196802, |
| "grad_norm": 0.6604802012443542, |
| "learning_rate": 2.95060749049062e-05, |
| "loss": 0.0364, |
| "step": 7670 |
| }, |
| { |
| "epoch": 0.3265861541078415, |
| "grad_norm": 0.7024301886558533, |
| "learning_rate": 2.9503968626406133e-05, |
| "loss": 0.0368, |
| "step": 7680 |
| }, |
| { |
| "epoch": 0.32701139649600275, |
| "grad_norm": 0.5574946999549866, |
| "learning_rate": 2.9501857941983354e-05, |
| "loss": 0.0361, |
| "step": 7690 |
| }, |
| { |
| "epoch": 0.32743663888416397, |
| "grad_norm": 0.6038932204246521, |
| "learning_rate": 2.9499742852279025e-05, |
| "loss": 0.0405, |
| "step": 7700 |
| }, |
| { |
| "epoch": 0.32786188127232524, |
| "grad_norm": 0.5846895575523376, |
| "learning_rate": 2.9497623357935666e-05, |
| "loss": 0.0385, |
| "step": 7710 |
| }, |
| { |
| "epoch": 0.32828712366048646, |
| "grad_norm": 0.5325697660446167, |
| "learning_rate": 2.9495499459597116e-05, |
| "loss": 0.0362, |
| "step": 7720 |
| }, |
| { |
| "epoch": 0.32871236604864773, |
| "grad_norm": 0.5643430948257446, |
| "learning_rate": 2.9493371157908563e-05, |
| "loss": 0.0359, |
| "step": 7730 |
| }, |
| { |
| "epoch": 0.329137608436809, |
| "grad_norm": 0.5376736521720886, |
| "learning_rate": 2.9491238453516524e-05, |
| "loss": 0.0358, |
| "step": 7740 |
| }, |
| { |
| "epoch": 0.3295628508249702, |
| "grad_norm": 0.5130335688591003, |
| "learning_rate": 2.9489101347068868e-05, |
| "loss": 0.0367, |
| "step": 7750 |
| }, |
| { |
| "epoch": 0.3299880932131315, |
| "grad_norm": 0.5577582120895386, |
| "learning_rate": 2.948695983921478e-05, |
| "loss": 0.0364, |
| "step": 7760 |
| }, |
| { |
| "epoch": 0.3304133356012927, |
| "grad_norm": 0.580887496471405, |
| "learning_rate": 2.9484813930604804e-05, |
| "loss": 0.0419, |
| "step": 7770 |
| }, |
| { |
| "epoch": 0.330838577989454, |
| "grad_norm": 0.5507401823997498, |
| "learning_rate": 2.9482663621890804e-05, |
| "loss": 0.0378, |
| "step": 7780 |
| }, |
| { |
| "epoch": 0.33126382037761526, |
| "grad_norm": 0.6311460137367249, |
| "learning_rate": 2.9480508913725986e-05, |
| "loss": 0.035, |
| "step": 7790 |
| }, |
| { |
| "epoch": 0.3316890627657765, |
| "grad_norm": 0.6687628626823425, |
| "learning_rate": 2.9478349806764895e-05, |
| "loss": 0.0363, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.33211430515393775, |
| "grad_norm": 0.5091450810432434, |
| "learning_rate": 2.9476186301663414e-05, |
| "loss": 0.0357, |
| "step": 7810 |
| }, |
| { |
| "epoch": 0.332539547542099, |
| "grad_norm": 0.6998031139373779, |
| "learning_rate": 2.9474018399078752e-05, |
| "loss": 0.0331, |
| "step": 7820 |
| }, |
| { |
| "epoch": 0.33296478993026024, |
| "grad_norm": 0.578231692314148, |
| "learning_rate": 2.947184609966947e-05, |
| "loss": 0.0378, |
| "step": 7830 |
| }, |
| { |
| "epoch": 0.3333900323184215, |
| "grad_norm": 0.7172752618789673, |
| "learning_rate": 2.9469669404095447e-05, |
| "loss": 0.0339, |
| "step": 7840 |
| }, |
| { |
| "epoch": 0.33381527470658273, |
| "grad_norm": 0.6786145567893982, |
| "learning_rate": 2.9467488313017908e-05, |
| "loss": 0.0349, |
| "step": 7850 |
| }, |
| { |
| "epoch": 0.334240517094744, |
| "grad_norm": 0.5483919382095337, |
| "learning_rate": 2.9465302827099412e-05, |
| "loss": 0.0354, |
| "step": 7860 |
| }, |
| { |
| "epoch": 0.3346657594829053, |
| "grad_norm": 0.4744562804698944, |
| "learning_rate": 2.9463112947003854e-05, |
| "loss": 0.0346, |
| "step": 7870 |
| }, |
| { |
| "epoch": 0.3350910018710665, |
| "grad_norm": 0.47334733605384827, |
| "learning_rate": 2.9460918673396455e-05, |
| "loss": 0.035, |
| "step": 7880 |
| }, |
| { |
| "epoch": 0.33551624425922777, |
| "grad_norm": 0.6834809184074402, |
| "learning_rate": 2.9458720006943784e-05, |
| "loss": 0.0376, |
| "step": 7890 |
| }, |
| { |
| "epoch": 0.335941486647389, |
| "grad_norm": 0.6203020215034485, |
| "learning_rate": 2.9456516948313736e-05, |
| "loss": 0.0367, |
| "step": 7900 |
| }, |
| { |
| "epoch": 0.33636672903555026, |
| "grad_norm": 0.6990686058998108, |
| "learning_rate": 2.9454309498175538e-05, |
| "loss": 0.0379, |
| "step": 7910 |
| }, |
| { |
| "epoch": 0.33679197142371153, |
| "grad_norm": 0.5946453213691711, |
| "learning_rate": 2.9452097657199762e-05, |
| "loss": 0.0362, |
| "step": 7920 |
| }, |
| { |
| "epoch": 0.33721721381187275, |
| "grad_norm": 0.698390007019043, |
| "learning_rate": 2.9449881426058303e-05, |
| "loss": 0.0371, |
| "step": 7930 |
| }, |
| { |
| "epoch": 0.337642456200034, |
| "grad_norm": 0.8408201932907104, |
| "learning_rate": 2.9447660805424388e-05, |
| "loss": 0.0387, |
| "step": 7940 |
| }, |
| { |
| "epoch": 0.3380676985881953, |
| "grad_norm": 0.7631927132606506, |
| "learning_rate": 2.94454357959726e-05, |
| "loss": 0.0382, |
| "step": 7950 |
| }, |
| { |
| "epoch": 0.3384929409763565, |
| "grad_norm": 0.7121431827545166, |
| "learning_rate": 2.9443206398378815e-05, |
| "loss": 0.0395, |
| "step": 7960 |
| }, |
| { |
| "epoch": 0.3389181833645178, |
| "grad_norm": 0.6551350951194763, |
| "learning_rate": 2.9440972613320282e-05, |
| "loss": 0.0401, |
| "step": 7970 |
| }, |
| { |
| "epoch": 0.339343425752679, |
| "grad_norm": 0.5990687608718872, |
| "learning_rate": 2.943873444147556e-05, |
| "loss": 0.0377, |
| "step": 7980 |
| }, |
| { |
| "epoch": 0.3397686681408403, |
| "grad_norm": 0.6591171622276306, |
| "learning_rate": 2.9436491883524547e-05, |
| "loss": 0.0369, |
| "step": 7990 |
| }, |
| { |
| "epoch": 0.34019391052900155, |
| "grad_norm": 0.6975916624069214, |
| "learning_rate": 2.9434244940148472e-05, |
| "loss": 0.0339, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.34061915291716277, |
| "grad_norm": 0.6451482176780701, |
| "learning_rate": 2.9431993612029903e-05, |
| "loss": 0.032, |
| "step": 8010 |
| }, |
| { |
| "epoch": 0.34104439530532404, |
| "grad_norm": 0.7803753018379211, |
| "learning_rate": 2.9429737899852728e-05, |
| "loss": 0.0349, |
| "step": 8020 |
| }, |
| { |
| "epoch": 0.34146963769348526, |
| "grad_norm": 0.8223358988761902, |
| "learning_rate": 2.9427477804302174e-05, |
| "loss": 0.0365, |
| "step": 8030 |
| }, |
| { |
| "epoch": 0.34189488008164653, |
| "grad_norm": 0.5982056856155396, |
| "learning_rate": 2.9425213326064797e-05, |
| "loss": 0.0364, |
| "step": 8040 |
| }, |
| { |
| "epoch": 0.3423201224698078, |
| "grad_norm": 0.6571147441864014, |
| "learning_rate": 2.942294446582849e-05, |
| "loss": 0.0352, |
| "step": 8050 |
| }, |
| { |
| "epoch": 0.342745364857969, |
| "grad_norm": 0.49468785524368286, |
| "learning_rate": 2.9420671224282468e-05, |
| "loss": 0.0362, |
| "step": 8060 |
| }, |
| { |
| "epoch": 0.3431706072461303, |
| "grad_norm": 0.5896933078765869, |
| "learning_rate": 2.9418393602117284e-05, |
| "loss": 0.0353, |
| "step": 8070 |
| }, |
| { |
| "epoch": 0.3435958496342916, |
| "grad_norm": 0.7391409873962402, |
| "learning_rate": 2.9416111600024816e-05, |
| "loss": 0.0383, |
| "step": 8080 |
| }, |
| { |
| "epoch": 0.3440210920224528, |
| "grad_norm": 0.7604628205299377, |
| "learning_rate": 2.9413825218698284e-05, |
| "loss": 0.0352, |
| "step": 8090 |
| }, |
| { |
| "epoch": 0.34444633441061406, |
| "grad_norm": 0.5802039504051208, |
| "learning_rate": 2.9411534458832222e-05, |
| "loss": 0.038, |
| "step": 8100 |
| }, |
| { |
| "epoch": 0.3448715767987753, |
| "grad_norm": 0.5897963047027588, |
| "learning_rate": 2.9409239321122513e-05, |
| "loss": 0.0334, |
| "step": 8110 |
| }, |
| { |
| "epoch": 0.34529681918693655, |
| "grad_norm": 0.6740944385528564, |
| "learning_rate": 2.940693980626634e-05, |
| "loss": 0.0361, |
| "step": 8120 |
| }, |
| { |
| "epoch": 0.3457220615750978, |
| "grad_norm": 0.6636224389076233, |
| "learning_rate": 2.9404635914962258e-05, |
| "loss": 0.035, |
| "step": 8130 |
| }, |
| { |
| "epoch": 0.34614730396325905, |
| "grad_norm": 0.5794883370399475, |
| "learning_rate": 2.9402327647910113e-05, |
| "loss": 0.0339, |
| "step": 8140 |
| }, |
| { |
| "epoch": 0.3465725463514203, |
| "grad_norm": 0.5011441707611084, |
| "learning_rate": 2.94000150058111e-05, |
| "loss": 0.036, |
| "step": 8150 |
| }, |
| { |
| "epoch": 0.34699778873958154, |
| "grad_norm": 0.637493371963501, |
| "learning_rate": 2.939769798936774e-05, |
| "loss": 0.0359, |
| "step": 8160 |
| }, |
| { |
| "epoch": 0.3474230311277428, |
| "grad_norm": 0.6575992107391357, |
| "learning_rate": 2.9395376599283878e-05, |
| "loss": 0.0357, |
| "step": 8170 |
| }, |
| { |
| "epoch": 0.3478482735159041, |
| "grad_norm": 0.710395872592926, |
| "learning_rate": 2.9393050836264697e-05, |
| "loss": 0.0363, |
| "step": 8180 |
| }, |
| { |
| "epoch": 0.3482735159040653, |
| "grad_norm": 0.6287087202072144, |
| "learning_rate": 2.9390720701016693e-05, |
| "loss": 0.0355, |
| "step": 8190 |
| }, |
| { |
| "epoch": 0.3486987582922266, |
| "grad_norm": 0.5436473488807678, |
| "learning_rate": 2.938838619424771e-05, |
| "loss": 0.0347, |
| "step": 8200 |
| }, |
| { |
| "epoch": 0.34912400068038785, |
| "grad_norm": 0.6140791177749634, |
| "learning_rate": 2.93860473166669e-05, |
| "loss": 0.0358, |
| "step": 8210 |
| }, |
| { |
| "epoch": 0.34954924306854906, |
| "grad_norm": 0.6407830119132996, |
| "learning_rate": 2.938370406898476e-05, |
| "loss": 0.0318, |
| "step": 8220 |
| }, |
| { |
| "epoch": 0.34997448545671034, |
| "grad_norm": 0.5551308393478394, |
| "learning_rate": 2.9381356451913103e-05, |
| "loss": 0.0349, |
| "step": 8230 |
| }, |
| { |
| "epoch": 0.35039972784487156, |
| "grad_norm": 0.649509072303772, |
| "learning_rate": 2.9379004466165072e-05, |
| "loss": 0.0342, |
| "step": 8240 |
| }, |
| { |
| "epoch": 0.35082497023303283, |
| "grad_norm": 0.5699142813682556, |
| "learning_rate": 2.937664811245514e-05, |
| "loss": 0.0366, |
| "step": 8250 |
| }, |
| { |
| "epoch": 0.3512502126211941, |
| "grad_norm": 0.6970945596694946, |
| "learning_rate": 2.9374287391499108e-05, |
| "loss": 0.0344, |
| "step": 8260 |
| }, |
| { |
| "epoch": 0.3516754550093553, |
| "grad_norm": 0.7967458367347717, |
| "learning_rate": 2.9371922304014093e-05, |
| "loss": 0.0353, |
| "step": 8270 |
| }, |
| { |
| "epoch": 0.3521006973975166, |
| "grad_norm": 0.6777560114860535, |
| "learning_rate": 2.9369552850718557e-05, |
| "loss": 0.0361, |
| "step": 8280 |
| }, |
| { |
| "epoch": 0.3525259397856778, |
| "grad_norm": 0.7028951644897461, |
| "learning_rate": 2.9367179032332262e-05, |
| "loss": 0.0386, |
| "step": 8290 |
| }, |
| { |
| "epoch": 0.3529511821738391, |
| "grad_norm": 0.6818631291389465, |
| "learning_rate": 2.936480084957633e-05, |
| "loss": 0.0329, |
| "step": 8300 |
| }, |
| { |
| "epoch": 0.35337642456200036, |
| "grad_norm": 0.7123552560806274, |
| "learning_rate": 2.9362418303173174e-05, |
| "loss": 0.0369, |
| "step": 8310 |
| }, |
| { |
| "epoch": 0.3538016669501616, |
| "grad_norm": 0.6206834316253662, |
| "learning_rate": 2.9360031393846558e-05, |
| "loss": 0.0349, |
| "step": 8320 |
| }, |
| { |
| "epoch": 0.35422690933832285, |
| "grad_norm": 0.5408953428268433, |
| "learning_rate": 2.9357640122321557e-05, |
| "loss": 0.0352, |
| "step": 8330 |
| }, |
| { |
| "epoch": 0.3546521517264841, |
| "grad_norm": 0.5239614844322205, |
| "learning_rate": 2.935524448932458e-05, |
| "loss": 0.0333, |
| "step": 8340 |
| }, |
| { |
| "epoch": 0.35507739411464534, |
| "grad_norm": 0.6259095668792725, |
| "learning_rate": 2.9352844495583356e-05, |
| "loss": 0.0361, |
| "step": 8350 |
| }, |
| { |
| "epoch": 0.3555026365028066, |
| "grad_norm": 0.5152319073677063, |
| "learning_rate": 2.9350440141826938e-05, |
| "loss": 0.0351, |
| "step": 8360 |
| }, |
| { |
| "epoch": 0.35592787889096783, |
| "grad_norm": 0.5210868120193481, |
| "learning_rate": 2.9348031428785705e-05, |
| "loss": 0.0318, |
| "step": 8370 |
| }, |
| { |
| "epoch": 0.3563531212791291, |
| "grad_norm": 0.49504750967025757, |
| "learning_rate": 2.9345618357191363e-05, |
| "loss": 0.0331, |
| "step": 8380 |
| }, |
| { |
| "epoch": 0.3567783636672904, |
| "grad_norm": 0.47331687808036804, |
| "learning_rate": 2.934320092777694e-05, |
| "loss": 0.035, |
| "step": 8390 |
| }, |
| { |
| "epoch": 0.3572036060554516, |
| "grad_norm": 0.3879975378513336, |
| "learning_rate": 2.9340779141276782e-05, |
| "loss": 0.034, |
| "step": 8400 |
| }, |
| { |
| "epoch": 0.35762884844361287, |
| "grad_norm": 0.4745931327342987, |
| "learning_rate": 2.933835299842657e-05, |
| "loss": 0.0374, |
| "step": 8410 |
| }, |
| { |
| "epoch": 0.3580540908317741, |
| "grad_norm": 0.6969006657600403, |
| "learning_rate": 2.93359224999633e-05, |
| "loss": 0.0328, |
| "step": 8420 |
| }, |
| { |
| "epoch": 0.35847933321993536, |
| "grad_norm": 0.5800720453262329, |
| "learning_rate": 2.933348764662529e-05, |
| "loss": 0.0334, |
| "step": 8430 |
| }, |
| { |
| "epoch": 0.35890457560809663, |
| "grad_norm": 0.6239498853683472, |
| "learning_rate": 2.933104843915219e-05, |
| "loss": 0.034, |
| "step": 8440 |
| }, |
| { |
| "epoch": 0.35932981799625785, |
| "grad_norm": 0.7754019498825073, |
| "learning_rate": 2.9328604878284963e-05, |
| "loss": 0.0406, |
| "step": 8450 |
| }, |
| { |
| "epoch": 0.3597550603844191, |
| "grad_norm": 0.543395459651947, |
| "learning_rate": 2.9326156964765892e-05, |
| "loss": 0.0338, |
| "step": 8460 |
| }, |
| { |
| "epoch": 0.3601803027725804, |
| "grad_norm": 0.702585756778717, |
| "learning_rate": 2.9323704699338603e-05, |
| "loss": 0.0349, |
| "step": 8470 |
| }, |
| { |
| "epoch": 0.3606055451607416, |
| "grad_norm": 0.6561062932014465, |
| "learning_rate": 2.932124808274802e-05, |
| "loss": 0.0343, |
| "step": 8480 |
| }, |
| { |
| "epoch": 0.3610307875489029, |
| "grad_norm": 0.5744190216064453, |
| "learning_rate": 2.93187871157404e-05, |
| "loss": 0.0358, |
| "step": 8490 |
| }, |
| { |
| "epoch": 0.3614560299370641, |
| "grad_norm": 0.6018226146697998, |
| "learning_rate": 2.931632179906332e-05, |
| "loss": 0.0381, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.3618812723252254, |
| "grad_norm": 0.543805718421936, |
| "learning_rate": 2.931385213346568e-05, |
| "loss": 0.0333, |
| "step": 8510 |
| }, |
| { |
| "epoch": 0.36230651471338665, |
| "grad_norm": 0.5329932570457458, |
| "learning_rate": 2.9311378119697694e-05, |
| "loss": 0.035, |
| "step": 8520 |
| }, |
| { |
| "epoch": 0.36273175710154787, |
| "grad_norm": 0.5564429759979248, |
| "learning_rate": 2.9308899758510903e-05, |
| "loss": 0.0333, |
| "step": 8530 |
| }, |
| { |
| "epoch": 0.36315699948970914, |
| "grad_norm": 0.6484778523445129, |
| "learning_rate": 2.9306417050658172e-05, |
| "loss": 0.0363, |
| "step": 8540 |
| }, |
| { |
| "epoch": 0.36358224187787036, |
| "grad_norm": 0.7013140320777893, |
| "learning_rate": 2.9303929996893687e-05, |
| "loss": 0.0338, |
| "step": 8550 |
| }, |
| { |
| "epoch": 0.36400748426603163, |
| "grad_norm": 0.7455886602401733, |
| "learning_rate": 2.9301438597972932e-05, |
| "loss": 0.0353, |
| "step": 8560 |
| }, |
| { |
| "epoch": 0.3644327266541929, |
| "grad_norm": 0.5060240030288696, |
| "learning_rate": 2.9298942854652744e-05, |
| "loss": 0.0399, |
| "step": 8570 |
| }, |
| { |
| "epoch": 0.3648579690423541, |
| "grad_norm": 0.44752374291419983, |
| "learning_rate": 2.9296442767691257e-05, |
| "loss": 0.0344, |
| "step": 8580 |
| }, |
| { |
| "epoch": 0.3652832114305154, |
| "grad_norm": 0.6356979608535767, |
| "learning_rate": 2.9293938337847936e-05, |
| "loss": 0.0348, |
| "step": 8590 |
| }, |
| { |
| "epoch": 0.36570845381867667, |
| "grad_norm": 0.6637558341026306, |
| "learning_rate": 2.9291429565883554e-05, |
| "loss": 0.0331, |
| "step": 8600 |
| }, |
| { |
| "epoch": 0.3661336962068379, |
| "grad_norm": 0.5308704376220703, |
| "learning_rate": 2.9288916452560214e-05, |
| "loss": 0.0359, |
| "step": 8610 |
| }, |
| { |
| "epoch": 0.36655893859499916, |
| "grad_norm": 0.6599563360214233, |
| "learning_rate": 2.9286398998641334e-05, |
| "loss": 0.0342, |
| "step": 8620 |
| }, |
| { |
| "epoch": 0.3669841809831604, |
| "grad_norm": 0.5241801738739014, |
| "learning_rate": 2.9283877204891645e-05, |
| "loss": 0.0366, |
| "step": 8630 |
| }, |
| { |
| "epoch": 0.36740942337132165, |
| "grad_norm": 0.6455984711647034, |
| "learning_rate": 2.9281351072077208e-05, |
| "loss": 0.0339, |
| "step": 8640 |
| }, |
| { |
| "epoch": 0.3678346657594829, |
| "grad_norm": 0.6421254277229309, |
| "learning_rate": 2.9278820600965393e-05, |
| "loss": 0.035, |
| "step": 8650 |
| }, |
| { |
| "epoch": 0.36825990814764414, |
| "grad_norm": 0.4384848475456238, |
| "learning_rate": 2.9276285792324887e-05, |
| "loss": 0.0329, |
| "step": 8660 |
| }, |
| { |
| "epoch": 0.3686851505358054, |
| "grad_norm": 0.6649608016014099, |
| "learning_rate": 2.9273746646925703e-05, |
| "loss": 0.0346, |
| "step": 8670 |
| }, |
| { |
| "epoch": 0.36911039292396663, |
| "grad_norm": 0.6458434462547302, |
| "learning_rate": 2.9271203165539166e-05, |
| "loss": 0.0348, |
| "step": 8680 |
| }, |
| { |
| "epoch": 0.3695356353121279, |
| "grad_norm": 0.6240953803062439, |
| "learning_rate": 2.9268655348937913e-05, |
| "loss": 0.0313, |
| "step": 8690 |
| }, |
| { |
| "epoch": 0.3699608777002892, |
| "grad_norm": 0.5618213415145874, |
| "learning_rate": 2.926610319789591e-05, |
| "loss": 0.0318, |
| "step": 8700 |
| }, |
| { |
| "epoch": 0.3703861200884504, |
| "grad_norm": 0.7894103527069092, |
| "learning_rate": 2.926354671318843e-05, |
| "loss": 0.0358, |
| "step": 8710 |
| }, |
| { |
| "epoch": 0.3708113624766117, |
| "grad_norm": 0.6642913222312927, |
| "learning_rate": 2.926098589559206e-05, |
| "loss": 0.0339, |
| "step": 8720 |
| }, |
| { |
| "epoch": 0.37123660486477295, |
| "grad_norm": 0.6823061108589172, |
| "learning_rate": 2.925842074588472e-05, |
| "loss": 0.0332, |
| "step": 8730 |
| }, |
| { |
| "epoch": 0.37166184725293416, |
| "grad_norm": 0.5889248251914978, |
| "learning_rate": 2.925585126484563e-05, |
| "loss": 0.0349, |
| "step": 8740 |
| }, |
| { |
| "epoch": 0.37208708964109544, |
| "grad_norm": 0.5245225429534912, |
| "learning_rate": 2.9253277453255326e-05, |
| "loss": 0.0307, |
| "step": 8750 |
| }, |
| { |
| "epoch": 0.37251233202925665, |
| "grad_norm": 0.5833045840263367, |
| "learning_rate": 2.925069931189567e-05, |
| "loss": 0.0319, |
| "step": 8760 |
| }, |
| { |
| "epoch": 0.3729375744174179, |
| "grad_norm": 0.5893930196762085, |
| "learning_rate": 2.924811684154983e-05, |
| "loss": 0.0347, |
| "step": 8770 |
| }, |
| { |
| "epoch": 0.3733628168055792, |
| "grad_norm": 0.5016536116600037, |
| "learning_rate": 2.9245530043002293e-05, |
| "loss": 0.0347, |
| "step": 8780 |
| }, |
| { |
| "epoch": 0.3737880591937404, |
| "grad_norm": 0.5982713103294373, |
| "learning_rate": 2.9242938917038863e-05, |
| "loss": 0.0336, |
| "step": 8790 |
| }, |
| { |
| "epoch": 0.3742133015819017, |
| "grad_norm": 0.48951834440231323, |
| "learning_rate": 2.9240343464446647e-05, |
| "loss": 0.03, |
| "step": 8800 |
| }, |
| { |
| "epoch": 0.3746385439700629, |
| "grad_norm": 0.5397206544876099, |
| "learning_rate": 2.923774368601409e-05, |
| "loss": 0.031, |
| "step": 8810 |
| }, |
| { |
| "epoch": 0.3750637863582242, |
| "grad_norm": 0.5121596455574036, |
| "learning_rate": 2.9235139582530918e-05, |
| "loss": 0.0342, |
| "step": 8820 |
| }, |
| { |
| "epoch": 0.37548902874638546, |
| "grad_norm": 0.5357693433761597, |
| "learning_rate": 2.9232531154788203e-05, |
| "loss": 0.0323, |
| "step": 8830 |
| }, |
| { |
| "epoch": 0.3759142711345467, |
| "grad_norm": 0.5267826914787292, |
| "learning_rate": 2.9229918403578313e-05, |
| "loss": 0.0353, |
| "step": 8840 |
| }, |
| { |
| "epoch": 0.37633951352270795, |
| "grad_norm": 0.5199102759361267, |
| "learning_rate": 2.922730132969493e-05, |
| "loss": 0.0344, |
| "step": 8850 |
| }, |
| { |
| "epoch": 0.3767647559108692, |
| "grad_norm": 0.5712948441505432, |
| "learning_rate": 2.922467993393305e-05, |
| "loss": 0.0327, |
| "step": 8860 |
| }, |
| { |
| "epoch": 0.37718999829903044, |
| "grad_norm": 0.5483639240264893, |
| "learning_rate": 2.9222054217088985e-05, |
| "loss": 0.0351, |
| "step": 8870 |
| }, |
| { |
| "epoch": 0.3776152406871917, |
| "grad_norm": 0.44184285402297974, |
| "learning_rate": 2.921942417996037e-05, |
| "loss": 0.0333, |
| "step": 8880 |
| }, |
| { |
| "epoch": 0.37804048307535293, |
| "grad_norm": 0.6121577024459839, |
| "learning_rate": 2.921678982334612e-05, |
| "loss": 0.0361, |
| "step": 8890 |
| }, |
| { |
| "epoch": 0.3784657254635142, |
| "grad_norm": 0.5963767766952515, |
| "learning_rate": 2.92141511480465e-05, |
| "loss": 0.0369, |
| "step": 8900 |
| }, |
| { |
| "epoch": 0.3788909678516755, |
| "grad_norm": 0.6200764775276184, |
| "learning_rate": 2.9211508154863066e-05, |
| "loss": 0.0323, |
| "step": 8910 |
| }, |
| { |
| "epoch": 0.3793162102398367, |
| "grad_norm": 0.5460836887359619, |
| "learning_rate": 2.9208860844598684e-05, |
| "loss": 0.0351, |
| "step": 8920 |
| }, |
| { |
| "epoch": 0.37974145262799797, |
| "grad_norm": 0.6484760642051697, |
| "learning_rate": 2.9206209218057543e-05, |
| "loss": 0.0369, |
| "step": 8930 |
| }, |
| { |
| "epoch": 0.3801666950161592, |
| "grad_norm": 0.7471164464950562, |
| "learning_rate": 2.9203553276045135e-05, |
| "loss": 0.032, |
| "step": 8940 |
| }, |
| { |
| "epoch": 0.38059193740432046, |
| "grad_norm": 0.47716355323791504, |
| "learning_rate": 2.9200893019368263e-05, |
| "loss": 0.0321, |
| "step": 8950 |
| }, |
| { |
| "epoch": 0.38101717979248173, |
| "grad_norm": 0.5212956666946411, |
| "learning_rate": 2.9198228448835045e-05, |
| "loss": 0.034, |
| "step": 8960 |
| }, |
| { |
| "epoch": 0.38144242218064295, |
| "grad_norm": 0.6043409109115601, |
| "learning_rate": 2.9195559565254908e-05, |
| "loss": 0.0345, |
| "step": 8970 |
| }, |
| { |
| "epoch": 0.3818676645688042, |
| "grad_norm": 0.5945512056350708, |
| "learning_rate": 2.919288636943858e-05, |
| "loss": 0.0324, |
| "step": 8980 |
| }, |
| { |
| "epoch": 0.3822929069569655, |
| "grad_norm": 0.5137171149253845, |
| "learning_rate": 2.9190208862198126e-05, |
| "loss": 0.036, |
| "step": 8990 |
| }, |
| { |
| "epoch": 0.3827181493451267, |
| "grad_norm": 0.5771226286888123, |
| "learning_rate": 2.9187527044346883e-05, |
| "loss": 0.0341, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.383143391733288, |
| "grad_norm": 0.7187480926513672, |
| "learning_rate": 2.918484091669953e-05, |
| "loss": 0.0345, |
| "step": 9010 |
| }, |
| { |
| "epoch": 0.3835686341214492, |
| "grad_norm": 0.6605693697929382, |
| "learning_rate": 2.9182150480072026e-05, |
| "loss": 0.0361, |
| "step": 9020 |
| }, |
| { |
| "epoch": 0.3839938765096105, |
| "grad_norm": 0.4109347462654114, |
| "learning_rate": 2.917945573528167e-05, |
| "loss": 0.035, |
| "step": 9030 |
| }, |
| { |
| "epoch": 0.38441911889777175, |
| "grad_norm": 0.5347878336906433, |
| "learning_rate": 2.9176756683147052e-05, |
| "loss": 0.032, |
| "step": 9040 |
| }, |
| { |
| "epoch": 0.38484436128593297, |
| "grad_norm": 0.5180054903030396, |
| "learning_rate": 2.9174053324488063e-05, |
| "loss": 0.0329, |
| "step": 9050 |
| }, |
| { |
| "epoch": 0.38526960367409424, |
| "grad_norm": 0.5243205428123474, |
| "learning_rate": 2.917134566012592e-05, |
| "loss": 0.0319, |
| "step": 9060 |
| }, |
| { |
| "epoch": 0.38569484606225546, |
| "grad_norm": 0.5306262373924255, |
| "learning_rate": 2.916863369088314e-05, |
| "loss": 0.033, |
| "step": 9070 |
| }, |
| { |
| "epoch": 0.38612008845041673, |
| "grad_norm": 0.46671026945114136, |
| "learning_rate": 2.9165917417583544e-05, |
| "loss": 0.0337, |
| "step": 9080 |
| }, |
| { |
| "epoch": 0.386545330838578, |
| "grad_norm": 0.5129649043083191, |
| "learning_rate": 2.916319684105227e-05, |
| "loss": 0.0325, |
| "step": 9090 |
| }, |
| { |
| "epoch": 0.3869705732267392, |
| "grad_norm": 0.5589383840560913, |
| "learning_rate": 2.916047196211575e-05, |
| "loss": 0.0314, |
| "step": 9100 |
| }, |
| { |
| "epoch": 0.3873958156149005, |
| "grad_norm": 0.5646084547042847, |
| "learning_rate": 2.915774278160173e-05, |
| "loss": 0.0298, |
| "step": 9110 |
| }, |
| { |
| "epoch": 0.38782105800306177, |
| "grad_norm": 0.5653718113899231, |
| "learning_rate": 2.9155009300339268e-05, |
| "loss": 0.0341, |
| "step": 9120 |
| }, |
| { |
| "epoch": 0.388246300391223, |
| "grad_norm": 0.6720633506774902, |
| "learning_rate": 2.9152271519158724e-05, |
| "loss": 0.0326, |
| "step": 9130 |
| }, |
| { |
| "epoch": 0.38867154277938426, |
| "grad_norm": 0.595745325088501, |
| "learning_rate": 2.9149529438891764e-05, |
| "loss": 0.03, |
| "step": 9140 |
| }, |
| { |
| "epoch": 0.3890967851675455, |
| "grad_norm": 0.6727629899978638, |
| "learning_rate": 2.9146783060371352e-05, |
| "loss": 0.0333, |
| "step": 9150 |
| }, |
| { |
| "epoch": 0.38952202755570675, |
| "grad_norm": 0.5202494859695435, |
| "learning_rate": 2.914403238443177e-05, |
| "loss": 0.0316, |
| "step": 9160 |
| }, |
| { |
| "epoch": 0.389947269943868, |
| "grad_norm": 0.46365416049957275, |
| "learning_rate": 2.9141277411908607e-05, |
| "loss": 0.0331, |
| "step": 9170 |
| }, |
| { |
| "epoch": 0.39037251233202924, |
| "grad_norm": 0.5766396522521973, |
| "learning_rate": 2.9138518143638744e-05, |
| "loss": 0.0317, |
| "step": 9180 |
| }, |
| { |
| "epoch": 0.3907977547201905, |
| "grad_norm": 0.5727835297584534, |
| "learning_rate": 2.9135754580460373e-05, |
| "loss": 0.0331, |
| "step": 9190 |
| }, |
| { |
| "epoch": 0.39122299710835173, |
| "grad_norm": 0.4820510447025299, |
| "learning_rate": 2.9132986723212992e-05, |
| "loss": 0.0331, |
| "step": 9200 |
| }, |
| { |
| "epoch": 0.391648239496513, |
| "grad_norm": 0.49070218205451965, |
| "learning_rate": 2.9130214572737403e-05, |
| "loss": 0.0338, |
| "step": 9210 |
| }, |
| { |
| "epoch": 0.3920734818846743, |
| "grad_norm": 0.48210710287094116, |
| "learning_rate": 2.9127438129875717e-05, |
| "loss": 0.0314, |
| "step": 9220 |
| }, |
| { |
| "epoch": 0.3924987242728355, |
| "grad_norm": 0.5722389817237854, |
| "learning_rate": 2.912465739547134e-05, |
| "loss": 0.0337, |
| "step": 9230 |
| }, |
| { |
| "epoch": 0.39292396666099677, |
| "grad_norm": 0.560624361038208, |
| "learning_rate": 2.9121872370368988e-05, |
| "loss": 0.0309, |
| "step": 9240 |
| }, |
| { |
| "epoch": 0.39334920904915804, |
| "grad_norm": 0.6490792632102966, |
| "learning_rate": 2.911908305541467e-05, |
| "loss": 0.0329, |
| "step": 9250 |
| }, |
| { |
| "epoch": 0.39377445143731926, |
| "grad_norm": 0.5507360100746155, |
| "learning_rate": 2.9116289451455717e-05, |
| "loss": 0.0356, |
| "step": 9260 |
| }, |
| { |
| "epoch": 0.39419969382548053, |
| "grad_norm": 0.761702299118042, |
| "learning_rate": 2.911349155934075e-05, |
| "loss": 0.0369, |
| "step": 9270 |
| }, |
| { |
| "epoch": 0.39462493621364175, |
| "grad_norm": 0.4436461925506592, |
| "learning_rate": 2.9110689379919687e-05, |
| "loss": 0.0321, |
| "step": 9280 |
| }, |
| { |
| "epoch": 0.395050178601803, |
| "grad_norm": 0.47194549441337585, |
| "learning_rate": 2.9107882914043767e-05, |
| "loss": 0.031, |
| "step": 9290 |
| }, |
| { |
| "epoch": 0.3954754209899643, |
| "grad_norm": 0.5178126096725464, |
| "learning_rate": 2.910507216256551e-05, |
| "loss": 0.0344, |
| "step": 9300 |
| }, |
| { |
| "epoch": 0.3959006633781255, |
| "grad_norm": 0.5823556184768677, |
| "learning_rate": 2.910225712633876e-05, |
| "loss": 0.0306, |
| "step": 9310 |
| }, |
| { |
| "epoch": 0.3963259057662868, |
| "grad_norm": 0.6231299042701721, |
| "learning_rate": 2.9099437806218637e-05, |
| "loss": 0.0306, |
| "step": 9320 |
| }, |
| { |
| "epoch": 0.396751148154448, |
| "grad_norm": 0.5591525435447693, |
| "learning_rate": 2.9096614203061584e-05, |
| "loss": 0.0324, |
| "step": 9330 |
| }, |
| { |
| "epoch": 0.3971763905426093, |
| "grad_norm": 0.5873363614082336, |
| "learning_rate": 2.9093786317725336e-05, |
| "loss": 0.0366, |
| "step": 9340 |
| }, |
| { |
| "epoch": 0.39760163293077055, |
| "grad_norm": 0.5950449705123901, |
| "learning_rate": 2.909095415106893e-05, |
| "loss": 0.0325, |
| "step": 9350 |
| }, |
| { |
| "epoch": 0.39802687531893177, |
| "grad_norm": 0.507709264755249, |
| "learning_rate": 2.9088117703952698e-05, |
| "loss": 0.0307, |
| "step": 9360 |
| }, |
| { |
| "epoch": 0.39845211770709305, |
| "grad_norm": 0.5411258935928345, |
| "learning_rate": 2.908527697723829e-05, |
| "loss": 0.0308, |
| "step": 9370 |
| }, |
| { |
| "epoch": 0.3988773600952543, |
| "grad_norm": 0.8558620810508728, |
| "learning_rate": 2.9082431971788635e-05, |
| "loss": 0.0303, |
| "step": 9380 |
| }, |
| { |
| "epoch": 0.39930260248341554, |
| "grad_norm": 0.5814989805221558, |
| "learning_rate": 2.9079582688467972e-05, |
| "loss": 0.0309, |
| "step": 9390 |
| }, |
| { |
| "epoch": 0.3997278448715768, |
| "grad_norm": 0.5498828291893005, |
| "learning_rate": 2.9076729128141833e-05, |
| "loss": 0.033, |
| "step": 9400 |
| }, |
| { |
| "epoch": 0.400153087259738, |
| "grad_norm": 0.6426417827606201, |
| "learning_rate": 2.9073871291677063e-05, |
| "loss": 0.0326, |
| "step": 9410 |
| }, |
| { |
| "epoch": 0.4005783296478993, |
| "grad_norm": 0.593120813369751, |
| "learning_rate": 2.9071009179941797e-05, |
| "loss": 0.0307, |
| "step": 9420 |
| }, |
| { |
| "epoch": 0.4010035720360606, |
| "grad_norm": 0.5962951183319092, |
| "learning_rate": 2.906814279380546e-05, |
| "loss": 0.0299, |
| "step": 9430 |
| }, |
| { |
| "epoch": 0.4014288144242218, |
| "grad_norm": 0.49784666299819946, |
| "learning_rate": 2.9065272134138795e-05, |
| "loss": 0.0302, |
| "step": 9440 |
| }, |
| { |
| "epoch": 0.40185405681238306, |
| "grad_norm": 0.5332371592521667, |
| "learning_rate": 2.9062397201813823e-05, |
| "loss": 0.0302, |
| "step": 9450 |
| }, |
| { |
| "epoch": 0.40227929920054434, |
| "grad_norm": 0.47895118594169617, |
| "learning_rate": 2.905951799770388e-05, |
| "loss": 0.0321, |
| "step": 9460 |
| }, |
| { |
| "epoch": 0.40270454158870556, |
| "grad_norm": 0.5205760598182678, |
| "learning_rate": 2.9056634522683586e-05, |
| "loss": 0.0297, |
| "step": 9470 |
| }, |
| { |
| "epoch": 0.40312978397686683, |
| "grad_norm": 0.6770009994506836, |
| "learning_rate": 2.9053746777628872e-05, |
| "loss": 0.0312, |
| "step": 9480 |
| }, |
| { |
| "epoch": 0.40355502636502805, |
| "grad_norm": 0.5336707830429077, |
| "learning_rate": 2.9050854763416952e-05, |
| "loss": 0.0288, |
| "step": 9490 |
| }, |
| { |
| "epoch": 0.4039802687531893, |
| "grad_norm": 0.5179953575134277, |
| "learning_rate": 2.9047958480926346e-05, |
| "loss": 0.0286, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.4044055111413506, |
| "grad_norm": 0.5594274401664734, |
| "learning_rate": 2.9045057931036874e-05, |
| "loss": 0.0314, |
| "step": 9510 |
| }, |
| { |
| "epoch": 0.4048307535295118, |
| "grad_norm": 0.5783358812332153, |
| "learning_rate": 2.9042153114629636e-05, |
| "loss": 0.0299, |
| "step": 9520 |
| }, |
| { |
| "epoch": 0.4052559959176731, |
| "grad_norm": 0.49959713220596313, |
| "learning_rate": 2.9039244032587043e-05, |
| "loss": 0.0266, |
| "step": 9530 |
| }, |
| { |
| "epoch": 0.4056812383058343, |
| "grad_norm": 0.5821396708488464, |
| "learning_rate": 2.90363306857928e-05, |
| "loss": 0.0317, |
| "step": 9540 |
| }, |
| { |
| "epoch": 0.4061064806939956, |
| "grad_norm": 0.6275321245193481, |
| "learning_rate": 2.90334130751319e-05, |
| "loss": 0.0303, |
| "step": 9550 |
| }, |
| { |
| "epoch": 0.40653172308215685, |
| "grad_norm": 0.5117196440696716, |
| "learning_rate": 2.903049120149064e-05, |
| "loss": 0.0279, |
| "step": 9560 |
| }, |
| { |
| "epoch": 0.40695696547031807, |
| "grad_norm": 0.4848882853984833, |
| "learning_rate": 2.9027565065756608e-05, |
| "loss": 0.03, |
| "step": 9570 |
| }, |
| { |
| "epoch": 0.40738220785847934, |
| "grad_norm": 0.5670190453529358, |
| "learning_rate": 2.9024634668818682e-05, |
| "loss": 0.0318, |
| "step": 9580 |
| }, |
| { |
| "epoch": 0.4078074502466406, |
| "grad_norm": 0.4296819269657135, |
| "learning_rate": 2.9021700011567045e-05, |
| "loss": 0.0311, |
| "step": 9590 |
| }, |
| { |
| "epoch": 0.40823269263480183, |
| "grad_norm": 0.4154737889766693, |
| "learning_rate": 2.9018761094893166e-05, |
| "loss": 0.0306, |
| "step": 9600 |
| }, |
| { |
| "epoch": 0.4086579350229631, |
| "grad_norm": 0.5340800881385803, |
| "learning_rate": 2.9015817919689808e-05, |
| "loss": 0.0282, |
| "step": 9610 |
| }, |
| { |
| "epoch": 0.4090831774111243, |
| "grad_norm": 0.5465659499168396, |
| "learning_rate": 2.9012870486851034e-05, |
| "loss": 0.0296, |
| "step": 9620 |
| }, |
| { |
| "epoch": 0.4095084197992856, |
| "grad_norm": 0.617377519607544, |
| "learning_rate": 2.9009918797272195e-05, |
| "loss": 0.0307, |
| "step": 9630 |
| }, |
| { |
| "epoch": 0.40993366218744687, |
| "grad_norm": 0.5425435900688171, |
| "learning_rate": 2.9006962851849933e-05, |
| "loss": 0.0279, |
| "step": 9640 |
| }, |
| { |
| "epoch": 0.4103589045756081, |
| "grad_norm": 0.6728000044822693, |
| "learning_rate": 2.9004002651482192e-05, |
| "loss": 0.0298, |
| "step": 9650 |
| }, |
| { |
| "epoch": 0.41078414696376936, |
| "grad_norm": 0.5605682730674744, |
| "learning_rate": 2.9001038197068198e-05, |
| "loss": 0.0291, |
| "step": 9660 |
| }, |
| { |
| "epoch": 0.4112093893519306, |
| "grad_norm": 0.9502558708190918, |
| "learning_rate": 2.899806948950848e-05, |
| "loss": 0.0311, |
| "step": 9670 |
| }, |
| { |
| "epoch": 0.41163463174009185, |
| "grad_norm": 0.5846401453018188, |
| "learning_rate": 2.8995096529704846e-05, |
| "loss": 0.0308, |
| "step": 9680 |
| }, |
| { |
| "epoch": 0.4120598741282531, |
| "grad_norm": 0.6297990083694458, |
| "learning_rate": 2.8992119318560403e-05, |
| "loss": 0.0296, |
| "step": 9690 |
| }, |
| { |
| "epoch": 0.41248511651641434, |
| "grad_norm": 0.49979668855667114, |
| "learning_rate": 2.8989137856979555e-05, |
| "loss": 0.0297, |
| "step": 9700 |
| }, |
| { |
| "epoch": 0.4129103589045756, |
| "grad_norm": 0.47791269421577454, |
| "learning_rate": 2.8986152145867983e-05, |
| "loss": 0.0309, |
| "step": 9710 |
| }, |
| { |
| "epoch": 0.4133356012927369, |
| "grad_norm": 0.5595065951347351, |
| "learning_rate": 2.8983162186132678e-05, |
| "loss": 0.0302, |
| "step": 9720 |
| }, |
| { |
| "epoch": 0.4137608436808981, |
| "grad_norm": 0.5153464078903198, |
| "learning_rate": 2.8980167978681905e-05, |
| "loss": 0.0292, |
| "step": 9730 |
| }, |
| { |
| "epoch": 0.4141860860690594, |
| "grad_norm": 0.5208001732826233, |
| "learning_rate": 2.897716952442522e-05, |
| "loss": 0.0326, |
| "step": 9740 |
| }, |
| { |
| "epoch": 0.4146113284572206, |
| "grad_norm": 0.491243451833725, |
| "learning_rate": 2.8974166824273482e-05, |
| "loss": 0.0324, |
| "step": 9750 |
| }, |
| { |
| "epoch": 0.41503657084538187, |
| "grad_norm": 0.45093971490859985, |
| "learning_rate": 2.897115987913883e-05, |
| "loss": 0.0284, |
| "step": 9760 |
| }, |
| { |
| "epoch": 0.41546181323354314, |
| "grad_norm": 0.4437636137008667, |
| "learning_rate": 2.896814868993469e-05, |
| "loss": 0.0298, |
| "step": 9770 |
| }, |
| { |
| "epoch": 0.41588705562170436, |
| "grad_norm": 0.7555100917816162, |
| "learning_rate": 2.896513325757579e-05, |
| "loss": 0.0356, |
| "step": 9780 |
| }, |
| { |
| "epoch": 0.41631229800986563, |
| "grad_norm": 0.6052760481834412, |
| "learning_rate": 2.896211358297813e-05, |
| "loss": 0.0317, |
| "step": 9790 |
| }, |
| { |
| "epoch": 0.41673754039802685, |
| "grad_norm": 0.6751075387001038, |
| "learning_rate": 2.8959089667059014e-05, |
| "loss": 0.0297, |
| "step": 9800 |
| }, |
| { |
| "epoch": 0.4171627827861881, |
| "grad_norm": 0.6517361998558044, |
| "learning_rate": 2.8956061510737027e-05, |
| "loss": 0.0278, |
| "step": 9810 |
| }, |
| { |
| "epoch": 0.4175880251743494, |
| "grad_norm": 0.4893549680709839, |
| "learning_rate": 2.8953029114932042e-05, |
| "loss": 0.0288, |
| "step": 9820 |
| }, |
| { |
| "epoch": 0.4180132675625106, |
| "grad_norm": 0.4782169759273529, |
| "learning_rate": 2.8949992480565214e-05, |
| "loss": 0.028, |
| "step": 9830 |
| }, |
| { |
| "epoch": 0.4184385099506719, |
| "grad_norm": 0.6266399621963501, |
| "learning_rate": 2.894695160855901e-05, |
| "loss": 0.0305, |
| "step": 9840 |
| }, |
| { |
| "epoch": 0.41886375233883316, |
| "grad_norm": 0.44131985306739807, |
| "learning_rate": 2.8943906499837146e-05, |
| "loss": 0.0328, |
| "step": 9850 |
| }, |
| { |
| "epoch": 0.4192889947269944, |
| "grad_norm": 0.7677248120307922, |
| "learning_rate": 2.8940857155324655e-05, |
| "loss": 0.0295, |
| "step": 9860 |
| }, |
| { |
| "epoch": 0.41971423711515565, |
| "grad_norm": 0.5036348104476929, |
| "learning_rate": 2.893780357594785e-05, |
| "loss": 0.0295, |
| "step": 9870 |
| }, |
| { |
| "epoch": 0.42013947950331687, |
| "grad_norm": 0.47260379791259766, |
| "learning_rate": 2.8934745762634326e-05, |
| "loss": 0.0307, |
| "step": 9880 |
| }, |
| { |
| "epoch": 0.42056472189147814, |
| "grad_norm": 0.4747141897678375, |
| "learning_rate": 2.8931683716312964e-05, |
| "loss": 0.0295, |
| "step": 9890 |
| }, |
| { |
| "epoch": 0.4209899642796394, |
| "grad_norm": 0.4908016324043274, |
| "learning_rate": 2.8928617437913932e-05, |
| "loss": 0.0305, |
| "step": 9900 |
| }, |
| { |
| "epoch": 0.42141520666780063, |
| "grad_norm": 0.6114606261253357, |
| "learning_rate": 2.8925546928368688e-05, |
| "loss": 0.0296, |
| "step": 9910 |
| }, |
| { |
| "epoch": 0.4218404490559619, |
| "grad_norm": 0.5824898481369019, |
| "learning_rate": 2.8922472188609968e-05, |
| "loss": 0.0329, |
| "step": 9920 |
| }, |
| { |
| "epoch": 0.4222656914441231, |
| "grad_norm": 0.5461505651473999, |
| "learning_rate": 2.8919393219571805e-05, |
| "loss": 0.0332, |
| "step": 9930 |
| }, |
| { |
| "epoch": 0.4226909338322844, |
| "grad_norm": 0.5584800839424133, |
| "learning_rate": 2.8916310022189495e-05, |
| "loss": 0.0289, |
| "step": 9940 |
| }, |
| { |
| "epoch": 0.42311617622044567, |
| "grad_norm": 0.5313165187835693, |
| "learning_rate": 2.891322259739964e-05, |
| "loss": 0.0301, |
| "step": 9950 |
| }, |
| { |
| "epoch": 0.4235414186086069, |
| "grad_norm": 0.46237021684646606, |
| "learning_rate": 2.891013094614012e-05, |
| "loss": 0.0268, |
| "step": 9960 |
| }, |
| { |
| "epoch": 0.42396666099676816, |
| "grad_norm": 0.45541614294052124, |
| "learning_rate": 2.8907035069350093e-05, |
| "loss": 0.0284, |
| "step": 9970 |
| }, |
| { |
| "epoch": 0.42439190338492944, |
| "grad_norm": 0.6681911945343018, |
| "learning_rate": 2.8903934967970007e-05, |
| "loss": 0.0298, |
| "step": 9980 |
| }, |
| { |
| "epoch": 0.42481714577309065, |
| "grad_norm": 0.5190245509147644, |
| "learning_rate": 2.8900830642941583e-05, |
| "loss": 0.0287, |
| "step": 9990 |
| }, |
| { |
| "epoch": 0.4252423881612519, |
| "grad_norm": 0.5697410702705383, |
| "learning_rate": 2.889772209520785e-05, |
| "loss": 0.0279, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.42566763054941315, |
| "grad_norm": 0.5547255873680115, |
| "learning_rate": 2.8894609325713087e-05, |
| "loss": 0.0273, |
| "step": 10010 |
| }, |
| { |
| "epoch": 0.4260928729375744, |
| "grad_norm": 0.5132760405540466, |
| "learning_rate": 2.889149233540288e-05, |
| "loss": 0.0289, |
| "step": 10020 |
| }, |
| { |
| "epoch": 0.4265181153257357, |
| "grad_norm": 0.5643351078033447, |
| "learning_rate": 2.8888371125224084e-05, |
| "loss": 0.029, |
| "step": 10030 |
| }, |
| { |
| "epoch": 0.4269433577138969, |
| "grad_norm": 0.43181145191192627, |
| "learning_rate": 2.8885245696124835e-05, |
| "loss": 0.0276, |
| "step": 10040 |
| }, |
| { |
| "epoch": 0.4273686001020582, |
| "grad_norm": 0.47269269824028015, |
| "learning_rate": 2.888211604905457e-05, |
| "loss": 0.0271, |
| "step": 10050 |
| }, |
| { |
| "epoch": 0.4277938424902194, |
| "grad_norm": 0.5423154234886169, |
| "learning_rate": 2.8878982184963986e-05, |
| "loss": 0.0273, |
| "step": 10060 |
| }, |
| { |
| "epoch": 0.4282190848783807, |
| "grad_norm": 0.4239635467529297, |
| "learning_rate": 2.8875844104805066e-05, |
| "loss": 0.0273, |
| "step": 10070 |
| }, |
| { |
| "epoch": 0.42864432726654195, |
| "grad_norm": 0.541154146194458, |
| "learning_rate": 2.887270180953107e-05, |
| "loss": 0.0313, |
| "step": 10080 |
| }, |
| { |
| "epoch": 0.42906956965470316, |
| "grad_norm": 0.528346836566925, |
| "learning_rate": 2.886955530009656e-05, |
| "loss": 0.0316, |
| "step": 10090 |
| }, |
| { |
| "epoch": 0.42949481204286444, |
| "grad_norm": 0.5038647055625916, |
| "learning_rate": 2.8866404577457352e-05, |
| "loss": 0.0319, |
| "step": 10100 |
| }, |
| { |
| "epoch": 0.4299200544310257, |
| "grad_norm": 0.5295943021774292, |
| "learning_rate": 2.8863249642570557e-05, |
| "loss": 0.0317, |
| "step": 10110 |
| }, |
| { |
| "epoch": 0.43034529681918693, |
| "grad_norm": 0.5857188701629639, |
| "learning_rate": 2.8860090496394552e-05, |
| "loss": 0.0296, |
| "step": 10120 |
| }, |
| { |
| "epoch": 0.4307705392073482, |
| "grad_norm": 0.43627995252609253, |
| "learning_rate": 2.8856927139889015e-05, |
| "loss": 0.0279, |
| "step": 10130 |
| }, |
| { |
| "epoch": 0.4311957815955094, |
| "grad_norm": 0.4689892530441284, |
| "learning_rate": 2.8853759574014878e-05, |
| "loss": 0.0278, |
| "step": 10140 |
| }, |
| { |
| "epoch": 0.4316210239836707, |
| "grad_norm": 0.42289119958877563, |
| "learning_rate": 2.8850587799734372e-05, |
| "loss": 0.0265, |
| "step": 10150 |
| }, |
| { |
| "epoch": 0.43204626637183197, |
| "grad_norm": 0.48674267530441284, |
| "learning_rate": 2.8847411818011e-05, |
| "loss": 0.0256, |
| "step": 10160 |
| }, |
| { |
| "epoch": 0.4324715087599932, |
| "grad_norm": 0.37926095724105835, |
| "learning_rate": 2.8844231629809534e-05, |
| "loss": 0.033, |
| "step": 10170 |
| }, |
| { |
| "epoch": 0.43289675114815446, |
| "grad_norm": 0.4297679364681244, |
| "learning_rate": 2.8841047236096037e-05, |
| "loss": 0.0273, |
| "step": 10180 |
| }, |
| { |
| "epoch": 0.4333219935363157, |
| "grad_norm": 0.4957326650619507, |
| "learning_rate": 2.8837858637837838e-05, |
| "loss": 0.0265, |
| "step": 10190 |
| }, |
| { |
| "epoch": 0.43374723592447695, |
| "grad_norm": 0.5052283406257629, |
| "learning_rate": 2.8834665836003556e-05, |
| "loss": 0.0275, |
| "step": 10200 |
| }, |
| { |
| "epoch": 0.4341724783126382, |
| "grad_norm": 0.49201151728630066, |
| "learning_rate": 2.8831468831563075e-05, |
| "loss": 0.0289, |
| "step": 10210 |
| }, |
| { |
| "epoch": 0.43459772070079944, |
| "grad_norm": 0.6140077710151672, |
| "learning_rate": 2.882826762548756e-05, |
| "loss": 0.0301, |
| "step": 10220 |
| }, |
| { |
| "epoch": 0.4350229630889607, |
| "grad_norm": 0.6304380297660828, |
| "learning_rate": 2.8825062218749456e-05, |
| "loss": 0.0271, |
| "step": 10230 |
| }, |
| { |
| "epoch": 0.435448205477122, |
| "grad_norm": 0.6733559966087341, |
| "learning_rate": 2.8821852612322477e-05, |
| "loss": 0.0288, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.4358734478652832, |
| "grad_norm": 0.5495876669883728, |
| "learning_rate": 2.8818638807181622e-05, |
| "loss": 0.0272, |
| "step": 10250 |
| }, |
| { |
| "epoch": 0.4362986902534445, |
| "grad_norm": 0.5263335108757019, |
| "learning_rate": 2.8815420804303154e-05, |
| "loss": 0.0291, |
| "step": 10260 |
| }, |
| { |
| "epoch": 0.4367239326416057, |
| "grad_norm": 0.5337483286857605, |
| "learning_rate": 2.8812198604664627e-05, |
| "loss": 0.028, |
| "step": 10270 |
| }, |
| { |
| "epoch": 0.43714917502976697, |
| "grad_norm": 0.4586282968521118, |
| "learning_rate": 2.8808972209244848e-05, |
| "loss": 0.0299, |
| "step": 10280 |
| }, |
| { |
| "epoch": 0.43757441741792824, |
| "grad_norm": 0.46955880522727966, |
| "learning_rate": 2.8805741619023922e-05, |
| "loss": 0.0288, |
| "step": 10290 |
| }, |
| { |
| "epoch": 0.43799965980608946, |
| "grad_norm": 0.5017499923706055, |
| "learning_rate": 2.880250683498321e-05, |
| "loss": 0.0291, |
| "step": 10300 |
| }, |
| { |
| "epoch": 0.43842490219425073, |
| "grad_norm": 0.5041956305503845, |
| "learning_rate": 2.879926785810535e-05, |
| "loss": 0.0284, |
| "step": 10310 |
| }, |
| { |
| "epoch": 0.43885014458241195, |
| "grad_norm": 0.43060925602912903, |
| "learning_rate": 2.8796024689374267e-05, |
| "loss": 0.0264, |
| "step": 10320 |
| }, |
| { |
| "epoch": 0.4392753869705732, |
| "grad_norm": 0.5580827593803406, |
| "learning_rate": 2.8792777329775148e-05, |
| "loss": 0.0291, |
| "step": 10330 |
| }, |
| { |
| "epoch": 0.4397006293587345, |
| "grad_norm": 0.38732340931892395, |
| "learning_rate": 2.878952578029445e-05, |
| "loss": 0.0268, |
| "step": 10340 |
| }, |
| { |
| "epoch": 0.4401258717468957, |
| "grad_norm": 0.5076254606246948, |
| "learning_rate": 2.8786270041919915e-05, |
| "loss": 0.0284, |
| "step": 10350 |
| }, |
| { |
| "epoch": 0.440551114135057, |
| "grad_norm": 0.39968782663345337, |
| "learning_rate": 2.8783010115640546e-05, |
| "loss": 0.026, |
| "step": 10360 |
| }, |
| { |
| "epoch": 0.44097635652321826, |
| "grad_norm": 0.4572618901729584, |
| "learning_rate": 2.877974600244662e-05, |
| "loss": 0.0267, |
| "step": 10370 |
| }, |
| { |
| "epoch": 0.4414015989113795, |
| "grad_norm": 0.4569093883037567, |
| "learning_rate": 2.8776477703329696e-05, |
| "loss": 0.0256, |
| "step": 10380 |
| }, |
| { |
| "epoch": 0.44182684129954075, |
| "grad_norm": 0.5506418943405151, |
| "learning_rate": 2.877320521928259e-05, |
| "loss": 0.0292, |
| "step": 10390 |
| }, |
| { |
| "epoch": 0.44225208368770197, |
| "grad_norm": 0.5269330143928528, |
| "learning_rate": 2.87699285512994e-05, |
| "loss": 0.0285, |
| "step": 10400 |
| }, |
| { |
| "epoch": 0.44267732607586324, |
| "grad_norm": 0.5772727727890015, |
| "learning_rate": 2.876664770037549e-05, |
| "loss": 0.0276, |
| "step": 10410 |
| }, |
| { |
| "epoch": 0.4431025684640245, |
| "grad_norm": 0.47063735127449036, |
| "learning_rate": 2.8763362667507495e-05, |
| "loss": 0.0276, |
| "step": 10420 |
| }, |
| { |
| "epoch": 0.44352781085218573, |
| "grad_norm": 0.4042012691497803, |
| "learning_rate": 2.8760073453693322e-05, |
| "loss": 0.0248, |
| "step": 10430 |
| }, |
| { |
| "epoch": 0.443953053240347, |
| "grad_norm": 0.5305367112159729, |
| "learning_rate": 2.8756780059932146e-05, |
| "loss": 0.0253, |
| "step": 10440 |
| }, |
| { |
| "epoch": 0.4443782956285082, |
| "grad_norm": 0.5205299258232117, |
| "learning_rate": 2.875348248722442e-05, |
| "loss": 0.0271, |
| "step": 10450 |
| }, |
| { |
| "epoch": 0.4448035380166695, |
| "grad_norm": 0.47778600454330444, |
| "learning_rate": 2.8750180736571848e-05, |
| "loss": 0.0286, |
| "step": 10460 |
| }, |
| { |
| "epoch": 0.44522878040483077, |
| "grad_norm": 0.6566385626792908, |
| "learning_rate": 2.8746874808977424e-05, |
| "loss": 0.0276, |
| "step": 10470 |
| }, |
| { |
| "epoch": 0.445654022792992, |
| "grad_norm": 0.6271282434463501, |
| "learning_rate": 2.87435647054454e-05, |
| "loss": 0.0275, |
| "step": 10480 |
| }, |
| { |
| "epoch": 0.44607926518115326, |
| "grad_norm": 0.4872870147228241, |
| "learning_rate": 2.8740250426981292e-05, |
| "loss": 0.0277, |
| "step": 10490 |
| }, |
| { |
| "epoch": 0.44650450756931453, |
| "grad_norm": 0.5417566299438477, |
| "learning_rate": 2.8736931974591895e-05, |
| "loss": 0.0323, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.44692974995747575, |
| "grad_norm": 0.48499515652656555, |
| "learning_rate": 2.8733609349285268e-05, |
| "loss": 0.0239, |
| "step": 10510 |
| }, |
| { |
| "epoch": 0.447354992345637, |
| "grad_norm": 0.5535517334938049, |
| "learning_rate": 2.8730282552070734e-05, |
| "loss": 0.0253, |
| "step": 10520 |
| }, |
| { |
| "epoch": 0.44778023473379824, |
| "grad_norm": 0.45956945419311523, |
| "learning_rate": 2.872695158395889e-05, |
| "loss": 0.0285, |
| "step": 10530 |
| }, |
| { |
| "epoch": 0.4482054771219595, |
| "grad_norm": 0.6099970936775208, |
| "learning_rate": 2.872361644596159e-05, |
| "loss": 0.0291, |
| "step": 10540 |
| }, |
| { |
| "epoch": 0.4486307195101208, |
| "grad_norm": 0.5071263909339905, |
| "learning_rate": 2.8720277139091972e-05, |
| "loss": 0.0272, |
| "step": 10550 |
| }, |
| { |
| "epoch": 0.449055961898282, |
| "grad_norm": 0.49766653776168823, |
| "learning_rate": 2.8716933664364417e-05, |
| "loss": 0.0262, |
| "step": 10560 |
| }, |
| { |
| "epoch": 0.4494812042864433, |
| "grad_norm": 0.4527025520801544, |
| "learning_rate": 2.871358602279459e-05, |
| "loss": 0.0272, |
| "step": 10570 |
| }, |
| { |
| "epoch": 0.4499064466746045, |
| "grad_norm": 0.47787800431251526, |
| "learning_rate": 2.871023421539942e-05, |
| "loss": 0.0256, |
| "step": 10580 |
| }, |
| { |
| "epoch": 0.45033168906276577, |
| "grad_norm": 0.6697449088096619, |
| "learning_rate": 2.8706878243197085e-05, |
| "loss": 0.0266, |
| "step": 10590 |
| }, |
| { |
| "epoch": 0.45075693145092705, |
| "grad_norm": 0.6518754363059998, |
| "learning_rate": 2.8703518107207052e-05, |
| "loss": 0.0271, |
| "step": 10600 |
| }, |
| { |
| "epoch": 0.45118217383908826, |
| "grad_norm": 0.4889909327030182, |
| "learning_rate": 2.870015380845004e-05, |
| "loss": 0.0249, |
| "step": 10610 |
| }, |
| { |
| "epoch": 0.45160741622724954, |
| "grad_norm": 0.5126760005950928, |
| "learning_rate": 2.8696785347948035e-05, |
| "loss": 0.0283, |
| "step": 10620 |
| }, |
| { |
| "epoch": 0.4520326586154108, |
| "grad_norm": 0.43273210525512695, |
| "learning_rate": 2.8693412726724282e-05, |
| "loss": 0.0276, |
| "step": 10630 |
| }, |
| { |
| "epoch": 0.452457901003572, |
| "grad_norm": 0.5461113452911377, |
| "learning_rate": 2.869003594580329e-05, |
| "loss": 0.0275, |
| "step": 10640 |
| }, |
| { |
| "epoch": 0.4528831433917333, |
| "grad_norm": 0.5773922801017761, |
| "learning_rate": 2.868665500621085e-05, |
| "loss": 0.0288, |
| "step": 10650 |
| }, |
| { |
| "epoch": 0.4533083857798945, |
| "grad_norm": 0.47226929664611816, |
| "learning_rate": 2.8683269908973992e-05, |
| "loss": 0.0254, |
| "step": 10660 |
| }, |
| { |
| "epoch": 0.4537336281680558, |
| "grad_norm": 0.4792795777320862, |
| "learning_rate": 2.867988065512102e-05, |
| "loss": 0.0261, |
| "step": 10670 |
| }, |
| { |
| "epoch": 0.45415887055621706, |
| "grad_norm": 0.5739585161209106, |
| "learning_rate": 2.8676487245681507e-05, |
| "loss": 0.0288, |
| "step": 10680 |
| }, |
| { |
| "epoch": 0.4545841129443783, |
| "grad_norm": 0.48658737540245056, |
| "learning_rate": 2.867308968168627e-05, |
| "loss": 0.0288, |
| "step": 10690 |
| }, |
| { |
| "epoch": 0.45500935533253956, |
| "grad_norm": 0.5436782240867615, |
| "learning_rate": 2.8669687964167404e-05, |
| "loss": 0.0249, |
| "step": 10700 |
| }, |
| { |
| "epoch": 0.4554345977207008, |
| "grad_norm": 0.49603909254074097, |
| "learning_rate": 2.8666282094158264e-05, |
| "loss": 0.0267, |
| "step": 10710 |
| }, |
| { |
| "epoch": 0.45585984010886205, |
| "grad_norm": 0.5125226974487305, |
| "learning_rate": 2.866287207269346e-05, |
| "loss": 0.0289, |
| "step": 10720 |
| }, |
| { |
| "epoch": 0.4562850824970233, |
| "grad_norm": 0.447691410779953, |
| "learning_rate": 2.8659457900808868e-05, |
| "loss": 0.0273, |
| "step": 10730 |
| }, |
| { |
| "epoch": 0.45671032488518454, |
| "grad_norm": 0.39385440945625305, |
| "learning_rate": 2.8656039579541628e-05, |
| "loss": 0.026, |
| "step": 10740 |
| }, |
| { |
| "epoch": 0.4571355672733458, |
| "grad_norm": 0.5474128127098083, |
| "learning_rate": 2.865261710993012e-05, |
| "loss": 0.0258, |
| "step": 10750 |
| }, |
| { |
| "epoch": 0.4575608096615071, |
| "grad_norm": 0.5764764547348022, |
| "learning_rate": 2.864919049301402e-05, |
| "loss": 0.0272, |
| "step": 10760 |
| }, |
| { |
| "epoch": 0.4579860520496683, |
| "grad_norm": 0.6347236037254333, |
| "learning_rate": 2.8645759729834227e-05, |
| "loss": 0.0238, |
| "step": 10770 |
| }, |
| { |
| "epoch": 0.4584112944378296, |
| "grad_norm": 0.5578680038452148, |
| "learning_rate": 2.864232482143293e-05, |
| "loss": 0.0244, |
| "step": 10780 |
| }, |
| { |
| "epoch": 0.4588365368259908, |
| "grad_norm": 0.51310133934021, |
| "learning_rate": 2.8638885768853548e-05, |
| "loss": 0.0287, |
| "step": 10790 |
| }, |
| { |
| "epoch": 0.45926177921415207, |
| "grad_norm": 0.5770761370658875, |
| "learning_rate": 2.8635442573140793e-05, |
| "loss": 0.028, |
| "step": 10800 |
| }, |
| { |
| "epoch": 0.45968702160231334, |
| "grad_norm": 0.47170472145080566, |
| "learning_rate": 2.8631995235340606e-05, |
| "loss": 0.028, |
| "step": 10810 |
| }, |
| { |
| "epoch": 0.46011226399047456, |
| "grad_norm": 0.3460540473461151, |
| "learning_rate": 2.8628543756500192e-05, |
| "loss": 0.0257, |
| "step": 10820 |
| }, |
| { |
| "epoch": 0.46053750637863583, |
| "grad_norm": 0.40834179520606995, |
| "learning_rate": 2.862508813766803e-05, |
| "loss": 0.0243, |
| "step": 10830 |
| }, |
| { |
| "epoch": 0.46096274876679705, |
| "grad_norm": 0.5245689749717712, |
| "learning_rate": 2.8621628379893837e-05, |
| "loss": 0.0255, |
| "step": 10840 |
| }, |
| { |
| "epoch": 0.4613879911549583, |
| "grad_norm": 0.4288097321987152, |
| "learning_rate": 2.861816448422861e-05, |
| "loss": 0.0263, |
| "step": 10850 |
| }, |
| { |
| "epoch": 0.4618132335431196, |
| "grad_norm": 0.5491788387298584, |
| "learning_rate": 2.8614696451724573e-05, |
| "loss": 0.0239, |
| "step": 10860 |
| }, |
| { |
| "epoch": 0.4622384759312808, |
| "grad_norm": 0.6117849349975586, |
| "learning_rate": 2.861122428343523e-05, |
| "loss": 0.0259, |
| "step": 10870 |
| }, |
| { |
| "epoch": 0.4626637183194421, |
| "grad_norm": 0.5702128410339355, |
| "learning_rate": 2.8607747980415333e-05, |
| "loss": 0.0269, |
| "step": 10880 |
| }, |
| { |
| "epoch": 0.46308896070760336, |
| "grad_norm": 0.663136899471283, |
| "learning_rate": 2.860426754372089e-05, |
| "loss": 0.0263, |
| "step": 10890 |
| }, |
| { |
| "epoch": 0.4635142030957646, |
| "grad_norm": 0.4230470061302185, |
| "learning_rate": 2.860078297440917e-05, |
| "loss": 0.0267, |
| "step": 10900 |
| }, |
| { |
| "epoch": 0.46393944548392585, |
| "grad_norm": 0.5185224413871765, |
| "learning_rate": 2.8597294273538687e-05, |
| "loss": 0.0279, |
| "step": 10910 |
| }, |
| { |
| "epoch": 0.46436468787208707, |
| "grad_norm": 0.509982705116272, |
| "learning_rate": 2.8593801442169223e-05, |
| "loss": 0.0264, |
| "step": 10920 |
| }, |
| { |
| "epoch": 0.46478993026024834, |
| "grad_norm": 0.4910407066345215, |
| "learning_rate": 2.85903044813618e-05, |
| "loss": 0.027, |
| "step": 10930 |
| }, |
| { |
| "epoch": 0.4652151726484096, |
| "grad_norm": 0.5604979395866394, |
| "learning_rate": 2.8586803392178715e-05, |
| "loss": 0.0263, |
| "step": 10940 |
| }, |
| { |
| "epoch": 0.46564041503657083, |
| "grad_norm": 0.49472281336784363, |
| "learning_rate": 2.858329817568349e-05, |
| "loss": 0.0247, |
| "step": 10950 |
| }, |
| { |
| "epoch": 0.4660656574247321, |
| "grad_norm": 0.5663736462593079, |
| "learning_rate": 2.8579788832940925e-05, |
| "loss": 0.0241, |
| "step": 10960 |
| }, |
| { |
| "epoch": 0.4664908998128933, |
| "grad_norm": 0.5932319760322571, |
| "learning_rate": 2.8576275365017073e-05, |
| "loss": 0.0285, |
| "step": 10970 |
| }, |
| { |
| "epoch": 0.4669161422010546, |
| "grad_norm": 0.5209352970123291, |
| "learning_rate": 2.857275777297922e-05, |
| "loss": 0.026, |
| "step": 10980 |
| }, |
| { |
| "epoch": 0.46734138458921587, |
| "grad_norm": 0.5121971368789673, |
| "learning_rate": 2.8569236057895924e-05, |
| "loss": 0.0273, |
| "step": 10990 |
| }, |
| { |
| "epoch": 0.4677666269773771, |
| "grad_norm": 0.5635631084442139, |
| "learning_rate": 2.8565710220836994e-05, |
| "loss": 0.0266, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.46819186936553836, |
| "grad_norm": 0.42411789298057556, |
| "learning_rate": 2.8562180262873474e-05, |
| "loss": 0.0255, |
| "step": 11010 |
| }, |
| { |
| "epoch": 0.46861711175369963, |
| "grad_norm": 0.49095433950424194, |
| "learning_rate": 2.8558646185077682e-05, |
| "loss": 0.0277, |
| "step": 11020 |
| }, |
| { |
| "epoch": 0.46904235414186085, |
| "grad_norm": 0.39923784136772156, |
| "learning_rate": 2.8555107988523177e-05, |
| "loss": 0.0232, |
| "step": 11030 |
| }, |
| { |
| "epoch": 0.4694675965300221, |
| "grad_norm": 0.5151148438453674, |
| "learning_rate": 2.8551565674284762e-05, |
| "loss": 0.0248, |
| "step": 11040 |
| }, |
| { |
| "epoch": 0.46989283891818334, |
| "grad_norm": 0.4995056390762329, |
| "learning_rate": 2.8548019243438503e-05, |
| "loss": 0.0249, |
| "step": 11050 |
| }, |
| { |
| "epoch": 0.4703180813063446, |
| "grad_norm": 0.39380818605422974, |
| "learning_rate": 2.8544468697061715e-05, |
| "loss": 0.028, |
| "step": 11060 |
| }, |
| { |
| "epoch": 0.4707433236945059, |
| "grad_norm": 0.5959387421607971, |
| "learning_rate": 2.854091403623296e-05, |
| "loss": 0.0238, |
| "step": 11070 |
| }, |
| { |
| "epoch": 0.4711685660826671, |
| "grad_norm": 0.4565434455871582, |
| "learning_rate": 2.8537355262032047e-05, |
| "loss": 0.0251, |
| "step": 11080 |
| }, |
| { |
| "epoch": 0.4715938084708284, |
| "grad_norm": 0.5781095623970032, |
| "learning_rate": 2.853379237554004e-05, |
| "loss": 0.0253, |
| "step": 11090 |
| }, |
| { |
| "epoch": 0.4720190508589896, |
| "grad_norm": 0.4889158010482788, |
| "learning_rate": 2.853022537783924e-05, |
| "loss": 0.0245, |
| "step": 11100 |
| }, |
| { |
| "epoch": 0.47244429324715087, |
| "grad_norm": 0.5270752906799316, |
| "learning_rate": 2.8526654270013223e-05, |
| "loss": 0.0264, |
| "step": 11110 |
| }, |
| { |
| "epoch": 0.47286953563531214, |
| "grad_norm": 0.5031289458274841, |
| "learning_rate": 2.8523079053146785e-05, |
| "loss": 0.0276, |
| "step": 11120 |
| }, |
| { |
| "epoch": 0.47329477802347336, |
| "grad_norm": 0.5778294801712036, |
| "learning_rate": 2.851949972832599e-05, |
| "loss": 0.0258, |
| "step": 11130 |
| }, |
| { |
| "epoch": 0.47372002041163463, |
| "grad_norm": 0.5123775601387024, |
| "learning_rate": 2.851591629663814e-05, |
| "loss": 0.0246, |
| "step": 11140 |
| }, |
| { |
| "epoch": 0.4741452627997959, |
| "grad_norm": 0.5043962001800537, |
| "learning_rate": 2.8512328759171783e-05, |
| "loss": 0.0226, |
| "step": 11150 |
| }, |
| { |
| "epoch": 0.4745705051879571, |
| "grad_norm": 0.41966044902801514, |
| "learning_rate": 2.8508737117016722e-05, |
| "loss": 0.0249, |
| "step": 11160 |
| }, |
| { |
| "epoch": 0.4749957475761184, |
| "grad_norm": 0.4980928897857666, |
| "learning_rate": 2.8505141371264005e-05, |
| "loss": 0.0256, |
| "step": 11170 |
| }, |
| { |
| "epoch": 0.4754209899642796, |
| "grad_norm": 0.43403160572052, |
| "learning_rate": 2.8501541523005918e-05, |
| "loss": 0.0236, |
| "step": 11180 |
| }, |
| { |
| "epoch": 0.4758462323524409, |
| "grad_norm": 0.41079947352409363, |
| "learning_rate": 2.8497937573336007e-05, |
| "loss": 0.0273, |
| "step": 11190 |
| }, |
| { |
| "epoch": 0.47627147474060216, |
| "grad_norm": 0.41930046677589417, |
| "learning_rate": 2.8494329523349053e-05, |
| "loss": 0.0277, |
| "step": 11200 |
| }, |
| { |
| "epoch": 0.4766967171287634, |
| "grad_norm": 0.356673002243042, |
| "learning_rate": 2.8490717374141088e-05, |
| "loss": 0.0272, |
| "step": 11210 |
| }, |
| { |
| "epoch": 0.47712195951692465, |
| "grad_norm": 0.5610805749893188, |
| "learning_rate": 2.8487101126809384e-05, |
| "loss": 0.0239, |
| "step": 11220 |
| }, |
| { |
| "epoch": 0.47754720190508587, |
| "grad_norm": 0.44382742047309875, |
| "learning_rate": 2.8483480782452463e-05, |
| "loss": 0.0299, |
| "step": 11230 |
| }, |
| { |
| "epoch": 0.47797244429324715, |
| "grad_norm": 0.45406636595726013, |
| "learning_rate": 2.8479856342170097e-05, |
| "loss": 0.0241, |
| "step": 11240 |
| }, |
| { |
| "epoch": 0.4783976866814084, |
| "grad_norm": 0.3877098262310028, |
| "learning_rate": 2.8476227807063283e-05, |
| "loss": 0.025, |
| "step": 11250 |
| }, |
| { |
| "epoch": 0.47882292906956964, |
| "grad_norm": 0.45907872915267944, |
| "learning_rate": 2.8472595178234284e-05, |
| "loss": 0.0246, |
| "step": 11260 |
| }, |
| { |
| "epoch": 0.4792481714577309, |
| "grad_norm": 0.4462440311908722, |
| "learning_rate": 2.846895845678659e-05, |
| "loss": 0.023, |
| "step": 11270 |
| }, |
| { |
| "epoch": 0.4796734138458922, |
| "grad_norm": 0.48364442586898804, |
| "learning_rate": 2.8465317643824945e-05, |
| "loss": 0.0232, |
| "step": 11280 |
| }, |
| { |
| "epoch": 0.4800986562340534, |
| "grad_norm": 0.5409104824066162, |
| "learning_rate": 2.8461672740455334e-05, |
| "loss": 0.0252, |
| "step": 11290 |
| }, |
| { |
| "epoch": 0.4805238986222147, |
| "grad_norm": 0.5658287405967712, |
| "learning_rate": 2.8458023747784976e-05, |
| "loss": 0.0232, |
| "step": 11300 |
| }, |
| { |
| "epoch": 0.4809491410103759, |
| "grad_norm": 0.3975183665752411, |
| "learning_rate": 2.8454370666922343e-05, |
| "loss": 0.0246, |
| "step": 11310 |
| }, |
| { |
| "epoch": 0.48137438339853716, |
| "grad_norm": 0.3229718804359436, |
| "learning_rate": 2.8450713498977145e-05, |
| "loss": 0.024, |
| "step": 11320 |
| }, |
| { |
| "epoch": 0.48179962578669844, |
| "grad_norm": 0.38045093417167664, |
| "learning_rate": 2.844705224506033e-05, |
| "loss": 0.0236, |
| "step": 11330 |
| }, |
| { |
| "epoch": 0.48222486817485966, |
| "grad_norm": 0.3866424560546875, |
| "learning_rate": 2.844338690628409e-05, |
| "loss": 0.0256, |
| "step": 11340 |
| }, |
| { |
| "epoch": 0.48265011056302093, |
| "grad_norm": 0.42420685291290283, |
| "learning_rate": 2.8439717483761863e-05, |
| "loss": 0.0254, |
| "step": 11350 |
| }, |
| { |
| "epoch": 0.48307535295118215, |
| "grad_norm": 0.5029059648513794, |
| "learning_rate": 2.843604397860832e-05, |
| "loss": 0.0234, |
| "step": 11360 |
| }, |
| { |
| "epoch": 0.4835005953393434, |
| "grad_norm": 0.47720468044281006, |
| "learning_rate": 2.8432366391939373e-05, |
| "loss": 0.0257, |
| "step": 11370 |
| }, |
| { |
| "epoch": 0.4839258377275047, |
| "grad_norm": 0.5582727193832397, |
| "learning_rate": 2.8428684724872176e-05, |
| "loss": 0.0254, |
| "step": 11380 |
| }, |
| { |
| "epoch": 0.4843510801156659, |
| "grad_norm": 0.36047664284706116, |
| "learning_rate": 2.8424998978525123e-05, |
| "loss": 0.0244, |
| "step": 11390 |
| }, |
| { |
| "epoch": 0.4847763225038272, |
| "grad_norm": 0.4934903383255005, |
| "learning_rate": 2.842130915401785e-05, |
| "loss": 0.0263, |
| "step": 11400 |
| }, |
| { |
| "epoch": 0.48520156489198846, |
| "grad_norm": 0.5368421673774719, |
| "learning_rate": 2.8417615252471226e-05, |
| "loss": 0.0272, |
| "step": 11410 |
| }, |
| { |
| "epoch": 0.4856268072801497, |
| "grad_norm": 0.5921639800071716, |
| "learning_rate": 2.841391727500735e-05, |
| "loss": 0.0225, |
| "step": 11420 |
| }, |
| { |
| "epoch": 0.48605204966831095, |
| "grad_norm": 0.6646623611450195, |
| "learning_rate": 2.8410215222749593e-05, |
| "loss": 0.0248, |
| "step": 11430 |
| }, |
| { |
| "epoch": 0.48647729205647217, |
| "grad_norm": 0.5030350089073181, |
| "learning_rate": 2.840650909682252e-05, |
| "loss": 0.0238, |
| "step": 11440 |
| }, |
| { |
| "epoch": 0.48690253444463344, |
| "grad_norm": 0.5150179862976074, |
| "learning_rate": 2.8402798898351963e-05, |
| "loss": 0.0253, |
| "step": 11450 |
| }, |
| { |
| "epoch": 0.4873277768327947, |
| "grad_norm": 0.5101315379142761, |
| "learning_rate": 2.8399084628464986e-05, |
| "loss": 0.0279, |
| "step": 11460 |
| }, |
| { |
| "epoch": 0.48775301922095593, |
| "grad_norm": 0.3990659713745117, |
| "learning_rate": 2.8395366288289876e-05, |
| "loss": 0.0234, |
| "step": 11470 |
| }, |
| { |
| "epoch": 0.4881782616091172, |
| "grad_norm": 0.4439125955104828, |
| "learning_rate": 2.839164387895617e-05, |
| "loss": 0.0233, |
| "step": 11480 |
| }, |
| { |
| "epoch": 0.4886035039972784, |
| "grad_norm": 0.5056343674659729, |
| "learning_rate": 2.8387917401594644e-05, |
| "loss": 0.023, |
| "step": 11490 |
| }, |
| { |
| "epoch": 0.4890287463854397, |
| "grad_norm": 0.38938045501708984, |
| "learning_rate": 2.8384186857337296e-05, |
| "loss": 0.0245, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.48945398877360097, |
| "grad_norm": 0.48951399326324463, |
| "learning_rate": 2.838045224731737e-05, |
| "loss": 0.0246, |
| "step": 11510 |
| }, |
| { |
| "epoch": 0.4898792311617622, |
| "grad_norm": 0.4403155446052551, |
| "learning_rate": 2.8376713572669348e-05, |
| "loss": 0.0248, |
| "step": 11520 |
| }, |
| { |
| "epoch": 0.49030447354992346, |
| "grad_norm": 0.5052725672721863, |
| "learning_rate": 2.8372970834528924e-05, |
| "loss": 0.028, |
| "step": 11530 |
| }, |
| { |
| "epoch": 0.49072971593808473, |
| "grad_norm": 0.4916299879550934, |
| "learning_rate": 2.836922403403306e-05, |
| "loss": 0.0265, |
| "step": 11540 |
| }, |
| { |
| "epoch": 0.49115495832624595, |
| "grad_norm": 0.4234394133090973, |
| "learning_rate": 2.8365473172319926e-05, |
| "loss": 0.0259, |
| "step": 11550 |
| }, |
| { |
| "epoch": 0.4915802007144072, |
| "grad_norm": 0.4441545307636261, |
| "learning_rate": 2.8361718250528936e-05, |
| "loss": 0.0256, |
| "step": 11560 |
| }, |
| { |
| "epoch": 0.49200544310256844, |
| "grad_norm": 0.49031588435173035, |
| "learning_rate": 2.835795926980074e-05, |
| "loss": 0.0233, |
| "step": 11570 |
| }, |
| { |
| "epoch": 0.4924306854907297, |
| "grad_norm": 0.6478508114814758, |
| "learning_rate": 2.8354196231277213e-05, |
| "loss": 0.0269, |
| "step": 11580 |
| }, |
| { |
| "epoch": 0.492855927878891, |
| "grad_norm": 0.5133887529373169, |
| "learning_rate": 2.835042913610147e-05, |
| "loss": 0.0256, |
| "step": 11590 |
| }, |
| { |
| "epoch": 0.4932811702670522, |
| "grad_norm": 0.35141903162002563, |
| "learning_rate": 2.8346657985417846e-05, |
| "loss": 0.0245, |
| "step": 11600 |
| }, |
| { |
| "epoch": 0.4937064126552135, |
| "grad_norm": 0.4691372811794281, |
| "learning_rate": 2.8342882780371932e-05, |
| "loss": 0.0275, |
| "step": 11610 |
| }, |
| { |
| "epoch": 0.49413165504337475, |
| "grad_norm": 0.4540161192417145, |
| "learning_rate": 2.833910352211053e-05, |
| "loss": 0.0292, |
| "step": 11620 |
| }, |
| { |
| "epoch": 0.49455689743153597, |
| "grad_norm": 0.5981717705726624, |
| "learning_rate": 2.8335320211781667e-05, |
| "loss": 0.0293, |
| "step": 11630 |
| }, |
| { |
| "epoch": 0.49498213981969724, |
| "grad_norm": 0.5212690234184265, |
| "learning_rate": 2.8331532850534628e-05, |
| "loss": 0.0259, |
| "step": 11640 |
| }, |
| { |
| "epoch": 0.49540738220785846, |
| "grad_norm": 0.47971609234809875, |
| "learning_rate": 2.8327741439519906e-05, |
| "loss": 0.0267, |
| "step": 11650 |
| }, |
| { |
| "epoch": 0.49583262459601973, |
| "grad_norm": 0.43671026825904846, |
| "learning_rate": 2.8323945979889233e-05, |
| "loss": 0.0241, |
| "step": 11660 |
| }, |
| { |
| "epoch": 0.496257866984181, |
| "grad_norm": 0.5726038217544556, |
| "learning_rate": 2.8320146472795572e-05, |
| "loss": 0.0245, |
| "step": 11670 |
| }, |
| { |
| "epoch": 0.4966831093723422, |
| "grad_norm": 0.41962766647338867, |
| "learning_rate": 2.831634291939311e-05, |
| "loss": 0.0233, |
| "step": 11680 |
| }, |
| { |
| "epoch": 0.4971083517605035, |
| "grad_norm": 0.49509820342063904, |
| "learning_rate": 2.8312535320837266e-05, |
| "loss": 0.0247, |
| "step": 11690 |
| }, |
| { |
| "epoch": 0.4975335941486647, |
| "grad_norm": 0.44511812925338745, |
| "learning_rate": 2.8308723678284687e-05, |
| "loss": 0.0264, |
| "step": 11700 |
| }, |
| { |
| "epoch": 0.497958836536826, |
| "grad_norm": 0.42542317509651184, |
| "learning_rate": 2.8304907992893254e-05, |
| "loss": 0.0238, |
| "step": 11710 |
| }, |
| { |
| "epoch": 0.49838407892498726, |
| "grad_norm": 0.5425060391426086, |
| "learning_rate": 2.8301088265822066e-05, |
| "loss": 0.0234, |
| "step": 11720 |
| }, |
| { |
| "epoch": 0.4988093213131485, |
| "grad_norm": 0.520889937877655, |
| "learning_rate": 2.829726449823146e-05, |
| "loss": 0.025, |
| "step": 11730 |
| }, |
| { |
| "epoch": 0.49923456370130975, |
| "grad_norm": 0.49344781041145325, |
| "learning_rate": 2.8293436691282993e-05, |
| "loss": 0.025, |
| "step": 11740 |
| }, |
| { |
| "epoch": 0.499659806089471, |
| "grad_norm": 0.41864946484565735, |
| "learning_rate": 2.828960484613945e-05, |
| "loss": 0.0239, |
| "step": 11750 |
| }, |
| { |
| "epoch": 0.5000850484776322, |
| "grad_norm": 0.34268102049827576, |
| "learning_rate": 2.828576896396485e-05, |
| "loss": 0.0228, |
| "step": 11760 |
| }, |
| { |
| "epoch": 0.5005102908657935, |
| "grad_norm": 0.37487515807151794, |
| "learning_rate": 2.828192904592443e-05, |
| "loss": 0.0239, |
| "step": 11770 |
| }, |
| { |
| "epoch": 0.5009355332539548, |
| "grad_norm": 0.4595807194709778, |
| "learning_rate": 2.8278085093184648e-05, |
| "loss": 0.0223, |
| "step": 11780 |
| }, |
| { |
| "epoch": 0.501360775642116, |
| "grad_norm": 0.5990208983421326, |
| "learning_rate": 2.827423710691321e-05, |
| "loss": 0.0233, |
| "step": 11790 |
| }, |
| { |
| "epoch": 0.5017860180302772, |
| "grad_norm": 0.7175838947296143, |
| "learning_rate": 2.827038508827902e-05, |
| "loss": 0.026, |
| "step": 11800 |
| }, |
| { |
| "epoch": 0.5022112604184386, |
| "grad_norm": 0.6296750903129578, |
| "learning_rate": 2.8266529038452223e-05, |
| "loss": 0.0258, |
| "step": 11810 |
| }, |
| { |
| "epoch": 0.5026365028065998, |
| "grad_norm": 0.5214424729347229, |
| "learning_rate": 2.826266895860419e-05, |
| "loss": 0.0242, |
| "step": 11820 |
| }, |
| { |
| "epoch": 0.503061745194761, |
| "grad_norm": 0.5066425800323486, |
| "learning_rate": 2.8258804849907508e-05, |
| "loss": 0.0257, |
| "step": 11830 |
| }, |
| { |
| "epoch": 0.5034869875829223, |
| "grad_norm": 0.5052611827850342, |
| "learning_rate": 2.825493671353599e-05, |
| "loss": 0.0244, |
| "step": 11840 |
| }, |
| { |
| "epoch": 0.5039122299710835, |
| "grad_norm": 0.4577106237411499, |
| "learning_rate": 2.8251064550664673e-05, |
| "loss": 0.024, |
| "step": 11850 |
| }, |
| { |
| "epoch": 0.5043374723592448, |
| "grad_norm": 0.5954710245132446, |
| "learning_rate": 2.8247188362469823e-05, |
| "loss": 0.0239, |
| "step": 11860 |
| }, |
| { |
| "epoch": 0.504762714747406, |
| "grad_norm": 0.4054025709629059, |
| "learning_rate": 2.824330815012892e-05, |
| "loss": 0.0265, |
| "step": 11870 |
| }, |
| { |
| "epoch": 0.5051879571355673, |
| "grad_norm": 0.6234331727027893, |
| "learning_rate": 2.8239423914820668e-05, |
| "loss": 0.0261, |
| "step": 11880 |
| }, |
| { |
| "epoch": 0.5056131995237285, |
| "grad_norm": 0.5679658651351929, |
| "learning_rate": 2.8235535657724997e-05, |
| "loss": 0.0241, |
| "step": 11890 |
| }, |
| { |
| "epoch": 0.5060384419118897, |
| "grad_norm": 0.5622302293777466, |
| "learning_rate": 2.823164338002306e-05, |
| "loss": 0.0244, |
| "step": 11900 |
| }, |
| { |
| "epoch": 0.5064636843000511, |
| "grad_norm": 0.6095677018165588, |
| "learning_rate": 2.8227747082897223e-05, |
| "loss": 0.0265, |
| "step": 11910 |
| }, |
| { |
| "epoch": 0.5068889266882123, |
| "grad_norm": 0.406110554933548, |
| "learning_rate": 2.8223846767531084e-05, |
| "loss": 0.0257, |
| "step": 11920 |
| }, |
| { |
| "epoch": 0.5073141690763735, |
| "grad_norm": 0.4320657551288605, |
| "learning_rate": 2.821994243510945e-05, |
| "loss": 0.0291, |
| "step": 11930 |
| }, |
| { |
| "epoch": 0.5077394114645348, |
| "grad_norm": 0.40571796894073486, |
| "learning_rate": 2.821603408681835e-05, |
| "loss": 0.0281, |
| "step": 11940 |
| }, |
| { |
| "epoch": 0.508164653852696, |
| "grad_norm": 0.44595959782600403, |
| "learning_rate": 2.8212121723845054e-05, |
| "loss": 0.0241, |
| "step": 11950 |
| }, |
| { |
| "epoch": 0.5085898962408573, |
| "grad_norm": 0.36468973755836487, |
| "learning_rate": 2.820820534737802e-05, |
| "loss": 0.0284, |
| "step": 11960 |
| }, |
| { |
| "epoch": 0.5090151386290186, |
| "grad_norm": 0.42328399419784546, |
| "learning_rate": 2.8204284958606946e-05, |
| "loss": 0.0237, |
| "step": 11970 |
| }, |
| { |
| "epoch": 0.5094403810171798, |
| "grad_norm": 0.4866717755794525, |
| "learning_rate": 2.8200360558722738e-05, |
| "loss": 0.0239, |
| "step": 11980 |
| }, |
| { |
| "epoch": 0.509865623405341, |
| "grad_norm": 0.4124220311641693, |
| "learning_rate": 2.819643214891753e-05, |
| "loss": 0.0231, |
| "step": 11990 |
| }, |
| { |
| "epoch": 0.5102908657935022, |
| "grad_norm": 0.5854120850563049, |
| "learning_rate": 2.8192499730384668e-05, |
| "loss": 0.0265, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.5107161081816636, |
| "grad_norm": 0.40588247776031494, |
| "learning_rate": 2.818856330431871e-05, |
| "loss": 0.0271, |
| "step": 12010 |
| }, |
| { |
| "epoch": 0.5111413505698248, |
| "grad_norm": 0.427511990070343, |
| "learning_rate": 2.8184622871915446e-05, |
| "loss": 0.0238, |
| "step": 12020 |
| }, |
| { |
| "epoch": 0.511566592957986, |
| "grad_norm": 0.4201594293117523, |
| "learning_rate": 2.8180678434371874e-05, |
| "loss": 0.0244, |
| "step": 12030 |
| }, |
| { |
| "epoch": 0.5119918353461473, |
| "grad_norm": 0.3546863794326782, |
| "learning_rate": 2.817672999288621e-05, |
| "loss": 0.0227, |
| "step": 12040 |
| }, |
| { |
| "epoch": 0.5124170777343086, |
| "grad_norm": 0.36736375093460083, |
| "learning_rate": 2.8172777548657886e-05, |
| "loss": 0.0242, |
| "step": 12050 |
| }, |
| { |
| "epoch": 0.5128423201224698, |
| "grad_norm": 0.5919041633605957, |
| "learning_rate": 2.8168821102887545e-05, |
| "loss": 0.0243, |
| "step": 12060 |
| }, |
| { |
| "epoch": 0.5132675625106311, |
| "grad_norm": 0.5262941718101501, |
| "learning_rate": 2.816486065677706e-05, |
| "loss": 0.0219, |
| "step": 12070 |
| }, |
| { |
| "epoch": 0.5136928048987923, |
| "grad_norm": 0.4629955291748047, |
| "learning_rate": 2.8160896211529506e-05, |
| "loss": 0.025, |
| "step": 12080 |
| }, |
| { |
| "epoch": 0.5141180472869535, |
| "grad_norm": 0.4440573751926422, |
| "learning_rate": 2.8156927768349175e-05, |
| "loss": 0.0228, |
| "step": 12090 |
| }, |
| { |
| "epoch": 0.5145432896751149, |
| "grad_norm": 0.34329354763031006, |
| "learning_rate": 2.815295532844158e-05, |
| "loss": 0.0237, |
| "step": 12100 |
| }, |
| { |
| "epoch": 0.5149685320632761, |
| "grad_norm": 0.42704781889915466, |
| "learning_rate": 2.8148978893013434e-05, |
| "loss": 0.0233, |
| "step": 12110 |
| }, |
| { |
| "epoch": 0.5153937744514373, |
| "grad_norm": 0.4133966565132141, |
| "learning_rate": 2.814499846327268e-05, |
| "loss": 0.0243, |
| "step": 12120 |
| }, |
| { |
| "epoch": 0.5158190168395985, |
| "grad_norm": 0.4624313712120056, |
| "learning_rate": 2.8141014040428468e-05, |
| "loss": 0.0241, |
| "step": 12130 |
| }, |
| { |
| "epoch": 0.5162442592277598, |
| "grad_norm": 0.5375241041183472, |
| "learning_rate": 2.8137025625691163e-05, |
| "loss": 0.0234, |
| "step": 12140 |
| }, |
| { |
| "epoch": 0.5166695016159211, |
| "grad_norm": 0.44683730602264404, |
| "learning_rate": 2.813303322027233e-05, |
| "loss": 0.0227, |
| "step": 12150 |
| }, |
| { |
| "epoch": 0.5170947440040823, |
| "grad_norm": 0.4118146598339081, |
| "learning_rate": 2.8129036825384763e-05, |
| "loss": 0.0248, |
| "step": 12160 |
| }, |
| { |
| "epoch": 0.5175199863922436, |
| "grad_norm": 0.4741264581680298, |
| "learning_rate": 2.8125036442242466e-05, |
| "loss": 0.0229, |
| "step": 12170 |
| }, |
| { |
| "epoch": 0.5179452287804048, |
| "grad_norm": 0.5060192942619324, |
| "learning_rate": 2.8121032072060635e-05, |
| "loss": 0.025, |
| "step": 12180 |
| }, |
| { |
| "epoch": 0.518370471168566, |
| "grad_norm": 0.44125476479530334, |
| "learning_rate": 2.811702371605571e-05, |
| "loss": 0.0231, |
| "step": 12190 |
| }, |
| { |
| "epoch": 0.5187957135567274, |
| "grad_norm": 0.45274418592453003, |
| "learning_rate": 2.8113011375445308e-05, |
| "loss": 0.0252, |
| "step": 12200 |
| }, |
| { |
| "epoch": 0.5192209559448886, |
| "grad_norm": 0.4670751392841339, |
| "learning_rate": 2.8108995051448284e-05, |
| "loss": 0.0266, |
| "step": 12210 |
| }, |
| { |
| "epoch": 0.5196461983330498, |
| "grad_norm": 0.41676709055900574, |
| "learning_rate": 2.8104974745284685e-05, |
| "loss": 0.0218, |
| "step": 12220 |
| }, |
| { |
| "epoch": 0.5200714407212111, |
| "grad_norm": 0.4893173575401306, |
| "learning_rate": 2.8100950458175775e-05, |
| "loss": 0.0254, |
| "step": 12230 |
| }, |
| { |
| "epoch": 0.5204966831093724, |
| "grad_norm": 0.49697211384773254, |
| "learning_rate": 2.8096922191344027e-05, |
| "loss": 0.0256, |
| "step": 12240 |
| }, |
| { |
| "epoch": 0.5209219254975336, |
| "grad_norm": 0.4751816987991333, |
| "learning_rate": 2.809288994601312e-05, |
| "loss": 0.0213, |
| "step": 12250 |
| }, |
| { |
| "epoch": 0.5213471678856948, |
| "grad_norm": 0.45922544598579407, |
| "learning_rate": 2.8088853723407946e-05, |
| "loss": 0.0231, |
| "step": 12260 |
| }, |
| { |
| "epoch": 0.5217724102738561, |
| "grad_norm": 0.568095326423645, |
| "learning_rate": 2.8084813524754602e-05, |
| "loss": 0.0265, |
| "step": 12270 |
| }, |
| { |
| "epoch": 0.5221976526620173, |
| "grad_norm": 0.5587965846061707, |
| "learning_rate": 2.8080769351280395e-05, |
| "loss": 0.0224, |
| "step": 12280 |
| }, |
| { |
| "epoch": 0.5226228950501786, |
| "grad_norm": 0.4851609468460083, |
| "learning_rate": 2.8076721204213835e-05, |
| "loss": 0.0217, |
| "step": 12290 |
| }, |
| { |
| "epoch": 0.5230481374383399, |
| "grad_norm": 0.5293141603469849, |
| "learning_rate": 2.8072669084784642e-05, |
| "loss": 0.0268, |
| "step": 12300 |
| }, |
| { |
| "epoch": 0.5234733798265011, |
| "grad_norm": 0.511161208152771, |
| "learning_rate": 2.8068612994223747e-05, |
| "loss": 0.0249, |
| "step": 12310 |
| }, |
| { |
| "epoch": 0.5238986222146623, |
| "grad_norm": 0.4113911986351013, |
| "learning_rate": 2.8064552933763284e-05, |
| "loss": 0.0242, |
| "step": 12320 |
| }, |
| { |
| "epoch": 0.5243238646028237, |
| "grad_norm": 0.4587953984737396, |
| "learning_rate": 2.8060488904636586e-05, |
| "loss": 0.0225, |
| "step": 12330 |
| }, |
| { |
| "epoch": 0.5247491069909849, |
| "grad_norm": 0.4895862340927124, |
| "learning_rate": 2.80564209080782e-05, |
| "loss": 0.0248, |
| "step": 12340 |
| }, |
| { |
| "epoch": 0.5251743493791461, |
| "grad_norm": 0.573306679725647, |
| "learning_rate": 2.8052348945323877e-05, |
| "loss": 0.0255, |
| "step": 12350 |
| }, |
| { |
| "epoch": 0.5255995917673074, |
| "grad_norm": 0.39072391390800476, |
| "learning_rate": 2.8048273017610574e-05, |
| "loss": 0.0234, |
| "step": 12360 |
| }, |
| { |
| "epoch": 0.5260248341554686, |
| "grad_norm": 0.5370450019836426, |
| "learning_rate": 2.804419312617645e-05, |
| "loss": 0.0247, |
| "step": 12370 |
| }, |
| { |
| "epoch": 0.5264500765436299, |
| "grad_norm": 0.5759648084640503, |
| "learning_rate": 2.8040109272260862e-05, |
| "loss": 0.0226, |
| "step": 12380 |
| }, |
| { |
| "epoch": 0.5268753189317911, |
| "grad_norm": 0.41309723258018494, |
| "learning_rate": 2.8036021457104384e-05, |
| "loss": 0.023, |
| "step": 12390 |
| }, |
| { |
| "epoch": 0.5273005613199524, |
| "grad_norm": 0.4322841465473175, |
| "learning_rate": 2.803192968194878e-05, |
| "loss": 0.0247, |
| "step": 12400 |
| }, |
| { |
| "epoch": 0.5277258037081136, |
| "grad_norm": 0.39251482486724854, |
| "learning_rate": 2.8027833948037032e-05, |
| "loss": 0.0198, |
| "step": 12410 |
| }, |
| { |
| "epoch": 0.5281510460962748, |
| "grad_norm": 0.5345434546470642, |
| "learning_rate": 2.802373425661331e-05, |
| "loss": 0.0235, |
| "step": 12420 |
| }, |
| { |
| "epoch": 0.5285762884844362, |
| "grad_norm": 0.4239960014820099, |
| "learning_rate": 2.8019630608922988e-05, |
| "loss": 0.025, |
| "step": 12430 |
| }, |
| { |
| "epoch": 0.5290015308725974, |
| "grad_norm": 0.38847601413726807, |
| "learning_rate": 2.801552300621265e-05, |
| "loss": 0.0227, |
| "step": 12440 |
| }, |
| { |
| "epoch": 0.5294267732607586, |
| "grad_norm": 0.3944767415523529, |
| "learning_rate": 2.8011411449730084e-05, |
| "loss": 0.0221, |
| "step": 12450 |
| }, |
| { |
| "epoch": 0.5298520156489199, |
| "grad_norm": 0.3829469084739685, |
| "learning_rate": 2.800729594072426e-05, |
| "loss": 0.0229, |
| "step": 12460 |
| }, |
| { |
| "epoch": 0.5302772580370811, |
| "grad_norm": 0.49871960282325745, |
| "learning_rate": 2.8003176480445373e-05, |
| "loss": 0.0252, |
| "step": 12470 |
| }, |
| { |
| "epoch": 0.5307025004252424, |
| "grad_norm": 0.479323148727417, |
| "learning_rate": 2.7999053070144793e-05, |
| "loss": 0.0222, |
| "step": 12480 |
| }, |
| { |
| "epoch": 0.5311277428134037, |
| "grad_norm": 0.4209604859352112, |
| "learning_rate": 2.7994925711075117e-05, |
| "loss": 0.0265, |
| "step": 12490 |
| }, |
| { |
| "epoch": 0.5315529852015649, |
| "grad_norm": 0.44869011640548706, |
| "learning_rate": 2.7990794404490118e-05, |
| "loss": 0.0224, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.5319782275897261, |
| "grad_norm": 0.4455290138721466, |
| "learning_rate": 2.798665915164478e-05, |
| "loss": 0.0247, |
| "step": 12510 |
| }, |
| { |
| "epoch": 0.5324034699778873, |
| "grad_norm": 0.4481496810913086, |
| "learning_rate": 2.7982519953795284e-05, |
| "loss": 0.0214, |
| "step": 12520 |
| }, |
| { |
| "epoch": 0.5328287123660487, |
| "grad_norm": 0.38179811835289, |
| "learning_rate": 2.7978376812199013e-05, |
| "loss": 0.0231, |
| "step": 12530 |
| }, |
| { |
| "epoch": 0.5332539547542099, |
| "grad_norm": 0.44815391302108765, |
| "learning_rate": 2.797422972811454e-05, |
| "loss": 0.0209, |
| "step": 12540 |
| }, |
| { |
| "epoch": 0.5336791971423711, |
| "grad_norm": 0.5632169842720032, |
| "learning_rate": 2.7970078702801635e-05, |
| "loss": 0.0227, |
| "step": 12550 |
| }, |
| { |
| "epoch": 0.5341044395305324, |
| "grad_norm": 0.4357597231864929, |
| "learning_rate": 2.7965923737521283e-05, |
| "loss": 0.0255, |
| "step": 12560 |
| }, |
| { |
| "epoch": 0.5345296819186937, |
| "grad_norm": 0.47203293442726135, |
| "learning_rate": 2.796176483353564e-05, |
| "loss": 0.0238, |
| "step": 12570 |
| }, |
| { |
| "epoch": 0.5349549243068549, |
| "grad_norm": 0.4765610694885254, |
| "learning_rate": 2.795760199210808e-05, |
| "loss": 0.0264, |
| "step": 12580 |
| }, |
| { |
| "epoch": 0.5353801666950162, |
| "grad_norm": 0.46927255392074585, |
| "learning_rate": 2.7953435214503157e-05, |
| "loss": 0.0208, |
| "step": 12590 |
| }, |
| { |
| "epoch": 0.5358054090831774, |
| "grad_norm": 0.48647135496139526, |
| "learning_rate": 2.7949264501986634e-05, |
| "loss": 0.022, |
| "step": 12600 |
| }, |
| { |
| "epoch": 0.5362306514713386, |
| "grad_norm": 0.39700639247894287, |
| "learning_rate": 2.794508985582546e-05, |
| "loss": 0.0193, |
| "step": 12610 |
| }, |
| { |
| "epoch": 0.5366558938595, |
| "grad_norm": 0.43265703320503235, |
| "learning_rate": 2.7940911277287777e-05, |
| "loss": 0.0216, |
| "step": 12620 |
| }, |
| { |
| "epoch": 0.5370811362476612, |
| "grad_norm": 0.5016583204269409, |
| "learning_rate": 2.793672876764294e-05, |
| "loss": 0.0249, |
| "step": 12630 |
| }, |
| { |
| "epoch": 0.5375063786358224, |
| "grad_norm": 0.4855128228664398, |
| "learning_rate": 2.7932542328161478e-05, |
| "loss": 0.0244, |
| "step": 12640 |
| }, |
| { |
| "epoch": 0.5379316210239836, |
| "grad_norm": 0.46733221411705017, |
| "learning_rate": 2.792835196011512e-05, |
| "loss": 0.0211, |
| "step": 12650 |
| }, |
| { |
| "epoch": 0.538356863412145, |
| "grad_norm": 0.44542211294174194, |
| "learning_rate": 2.792415766477679e-05, |
| "loss": 0.0234, |
| "step": 12660 |
| }, |
| { |
| "epoch": 0.5387821058003062, |
| "grad_norm": 0.3643878400325775, |
| "learning_rate": 2.79199594434206e-05, |
| "loss": 0.0219, |
| "step": 12670 |
| }, |
| { |
| "epoch": 0.5392073481884674, |
| "grad_norm": 0.3483383357524872, |
| "learning_rate": 2.7915757297321867e-05, |
| "loss": 0.0213, |
| "step": 12680 |
| }, |
| { |
| "epoch": 0.5396325905766287, |
| "grad_norm": 0.49537405371665955, |
| "learning_rate": 2.7911551227757085e-05, |
| "loss": 0.0249, |
| "step": 12690 |
| }, |
| { |
| "epoch": 0.5400578329647899, |
| "grad_norm": 0.40428227186203003, |
| "learning_rate": 2.790734123600395e-05, |
| "loss": 0.0217, |
| "step": 12700 |
| }, |
| { |
| "epoch": 0.5404830753529511, |
| "grad_norm": 0.4799230396747589, |
| "learning_rate": 2.7903127323341347e-05, |
| "loss": 0.0236, |
| "step": 12710 |
| }, |
| { |
| "epoch": 0.5409083177411125, |
| "grad_norm": 0.4571576416492462, |
| "learning_rate": 2.7898909491049353e-05, |
| "loss": 0.0255, |
| "step": 12720 |
| }, |
| { |
| "epoch": 0.5413335601292737, |
| "grad_norm": 0.38204875588417053, |
| "learning_rate": 2.789468774040923e-05, |
| "loss": 0.0264, |
| "step": 12730 |
| }, |
| { |
| "epoch": 0.5417588025174349, |
| "grad_norm": 0.5351446270942688, |
| "learning_rate": 2.7890462072703433e-05, |
| "loss": 0.0213, |
| "step": 12740 |
| }, |
| { |
| "epoch": 0.5421840449055962, |
| "grad_norm": 0.44234591722488403, |
| "learning_rate": 2.7886232489215617e-05, |
| "loss": 0.022, |
| "step": 12750 |
| }, |
| { |
| "epoch": 0.5426092872937575, |
| "grad_norm": 0.3830717206001282, |
| "learning_rate": 2.7881998991230606e-05, |
| "loss": 0.0241, |
| "step": 12760 |
| }, |
| { |
| "epoch": 0.5430345296819187, |
| "grad_norm": 0.44614824652671814, |
| "learning_rate": 2.7877761580034436e-05, |
| "loss": 0.0225, |
| "step": 12770 |
| }, |
| { |
| "epoch": 0.5434597720700799, |
| "grad_norm": 0.5328401327133179, |
| "learning_rate": 2.787352025691431e-05, |
| "loss": 0.0235, |
| "step": 12780 |
| }, |
| { |
| "epoch": 0.5438850144582412, |
| "grad_norm": 0.4961501657962799, |
| "learning_rate": 2.7869275023158642e-05, |
| "loss": 0.024, |
| "step": 12790 |
| }, |
| { |
| "epoch": 0.5443102568464024, |
| "grad_norm": 0.4256893992424011, |
| "learning_rate": 2.7865025880057014e-05, |
| "loss": 0.0254, |
| "step": 12800 |
| }, |
| { |
| "epoch": 0.5447354992345637, |
| "grad_norm": 0.40367719531059265, |
| "learning_rate": 2.78607728289002e-05, |
| "loss": 0.0214, |
| "step": 12810 |
| }, |
| { |
| "epoch": 0.545160741622725, |
| "grad_norm": 0.3512210249900818, |
| "learning_rate": 2.7856515870980176e-05, |
| "loss": 0.0231, |
| "step": 12820 |
| }, |
| { |
| "epoch": 0.5455859840108862, |
| "grad_norm": 0.5113899111747742, |
| "learning_rate": 2.785225500759008e-05, |
| "loss": 0.0227, |
| "step": 12830 |
| }, |
| { |
| "epoch": 0.5460112263990474, |
| "grad_norm": 0.42159557342529297, |
| "learning_rate": 2.7847990240024263e-05, |
| "loss": 0.0234, |
| "step": 12840 |
| }, |
| { |
| "epoch": 0.5464364687872088, |
| "grad_norm": 0.6201958060264587, |
| "learning_rate": 2.784372156957824e-05, |
| "loss": 0.0243, |
| "step": 12850 |
| }, |
| { |
| "epoch": 0.54686171117537, |
| "grad_norm": 0.5295283198356628, |
| "learning_rate": 2.7839448997548728e-05, |
| "loss": 0.0229, |
| "step": 12860 |
| }, |
| { |
| "epoch": 0.5472869535635312, |
| "grad_norm": 0.34550079703330994, |
| "learning_rate": 2.783517252523361e-05, |
| "loss": 0.0236, |
| "step": 12870 |
| }, |
| { |
| "epoch": 0.5477121959516925, |
| "grad_norm": 0.4696100354194641, |
| "learning_rate": 2.7830892153931975e-05, |
| "loss": 0.0224, |
| "step": 12880 |
| }, |
| { |
| "epoch": 0.5481374383398537, |
| "grad_norm": 0.5960995554924011, |
| "learning_rate": 2.7826607884944083e-05, |
| "loss": 0.024, |
| "step": 12890 |
| }, |
| { |
| "epoch": 0.548562680728015, |
| "grad_norm": 0.43218421936035156, |
| "learning_rate": 2.782231971957138e-05, |
| "loss": 0.021, |
| "step": 12900 |
| }, |
| { |
| "epoch": 0.5489879231161762, |
| "grad_norm": 0.3802073299884796, |
| "learning_rate": 2.78180276591165e-05, |
| "loss": 0.0212, |
| "step": 12910 |
| }, |
| { |
| "epoch": 0.5494131655043375, |
| "grad_norm": 0.5046935081481934, |
| "learning_rate": 2.781373170488326e-05, |
| "loss": 0.0243, |
| "step": 12920 |
| }, |
| { |
| "epoch": 0.5498384078924987, |
| "grad_norm": 0.3973044753074646, |
| "learning_rate": 2.7809431858176655e-05, |
| "loss": 0.0258, |
| "step": 12930 |
| }, |
| { |
| "epoch": 0.5502636502806599, |
| "grad_norm": 0.48014792799949646, |
| "learning_rate": 2.7805128120302865e-05, |
| "loss": 0.0206, |
| "step": 12940 |
| }, |
| { |
| "epoch": 0.5506888926688213, |
| "grad_norm": 0.444076269865036, |
| "learning_rate": 2.7800820492569254e-05, |
| "loss": 0.0252, |
| "step": 12950 |
| }, |
| { |
| "epoch": 0.5511141350569825, |
| "grad_norm": 0.37103697657585144, |
| "learning_rate": 2.779650897628436e-05, |
| "loss": 0.0221, |
| "step": 12960 |
| }, |
| { |
| "epoch": 0.5515393774451437, |
| "grad_norm": 1.3059979677200317, |
| "learning_rate": 2.7792193572757915e-05, |
| "loss": 0.0217, |
| "step": 12970 |
| }, |
| { |
| "epoch": 0.551964619833305, |
| "grad_norm": 0.48660945892333984, |
| "learning_rate": 2.7787874283300812e-05, |
| "loss": 0.0232, |
| "step": 12980 |
| }, |
| { |
| "epoch": 0.5523898622214662, |
| "grad_norm": 0.4161160886287689, |
| "learning_rate": 2.7783551109225155e-05, |
| "loss": 0.0233, |
| "step": 12990 |
| }, |
| { |
| "epoch": 0.5528151046096275, |
| "grad_norm": 0.5009236335754395, |
| "learning_rate": 2.7779224051844195e-05, |
| "loss": 0.0256, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.5532403469977888, |
| "grad_norm": 0.5999890565872192, |
| "learning_rate": 2.777489311247239e-05, |
| "loss": 0.0209, |
| "step": 13010 |
| }, |
| { |
| "epoch": 0.55366558938595, |
| "grad_norm": 0.42301124334335327, |
| "learning_rate": 2.7770558292425355e-05, |
| "loss": 0.0232, |
| "step": 13020 |
| }, |
| { |
| "epoch": 0.5540908317741112, |
| "grad_norm": 0.4028618037700653, |
| "learning_rate": 2.77662195930199e-05, |
| "loss": 0.0223, |
| "step": 13030 |
| }, |
| { |
| "epoch": 0.5545160741622724, |
| "grad_norm": 0.3619602620601654, |
| "learning_rate": 2.7761877015574005e-05, |
| "loss": 0.0238, |
| "step": 13040 |
| }, |
| { |
| "epoch": 0.5549413165504338, |
| "grad_norm": 0.3421330153942108, |
| "learning_rate": 2.775753056140683e-05, |
| "loss": 0.0249, |
| "step": 13050 |
| }, |
| { |
| "epoch": 0.555366558938595, |
| "grad_norm": 0.4019469618797302, |
| "learning_rate": 2.775318023183871e-05, |
| "loss": 0.0236, |
| "step": 13060 |
| }, |
| { |
| "epoch": 0.5557918013267562, |
| "grad_norm": 0.43583133816719055, |
| "learning_rate": 2.774882602819117e-05, |
| "loss": 0.0203, |
| "step": 13070 |
| }, |
| { |
| "epoch": 0.5562170437149175, |
| "grad_norm": 0.4878927171230316, |
| "learning_rate": 2.7744467951786892e-05, |
| "loss": 0.0235, |
| "step": 13080 |
| }, |
| { |
| "epoch": 0.5566422861030788, |
| "grad_norm": 0.3605678081512451, |
| "learning_rate": 2.7740106003949747e-05, |
| "loss": 0.0225, |
| "step": 13090 |
| }, |
| { |
| "epoch": 0.55706752849124, |
| "grad_norm": 0.43232229351997375, |
| "learning_rate": 2.7735740186004785e-05, |
| "loss": 0.0214, |
| "step": 13100 |
| }, |
| { |
| "epoch": 0.5574927708794013, |
| "grad_norm": 0.46627750992774963, |
| "learning_rate": 2.7731370499278222e-05, |
| "loss": 0.0237, |
| "step": 13110 |
| }, |
| { |
| "epoch": 0.5579180132675625, |
| "grad_norm": 0.405368447303772, |
| "learning_rate": 2.772699694509745e-05, |
| "loss": 0.0234, |
| "step": 13120 |
| }, |
| { |
| "epoch": 0.5583432556557237, |
| "grad_norm": 0.4305224120616913, |
| "learning_rate": 2.7722619524791046e-05, |
| "loss": 0.0235, |
| "step": 13130 |
| }, |
| { |
| "epoch": 0.5587684980438851, |
| "grad_norm": 0.4244700074195862, |
| "learning_rate": 2.7718238239688747e-05, |
| "loss": 0.0209, |
| "step": 13140 |
| }, |
| { |
| "epoch": 0.5591937404320463, |
| "grad_norm": 0.4601515531539917, |
| "learning_rate": 2.7713853091121484e-05, |
| "loss": 0.0211, |
| "step": 13150 |
| }, |
| { |
| "epoch": 0.5596189828202075, |
| "grad_norm": 0.4351855516433716, |
| "learning_rate": 2.7709464080421338e-05, |
| "loss": 0.0215, |
| "step": 13160 |
| }, |
| { |
| "epoch": 0.5600442252083687, |
| "grad_norm": 0.43480363488197327, |
| "learning_rate": 2.770507120892158e-05, |
| "loss": 0.0212, |
| "step": 13170 |
| }, |
| { |
| "epoch": 0.56046946759653, |
| "grad_norm": 0.3325427770614624, |
| "learning_rate": 2.7700674477956643e-05, |
| "loss": 0.021, |
| "step": 13180 |
| }, |
| { |
| "epoch": 0.5608947099846913, |
| "grad_norm": 0.4659954905509949, |
| "learning_rate": 2.7696273888862145e-05, |
| "loss": 0.023, |
| "step": 13190 |
| }, |
| { |
| "epoch": 0.5613199523728525, |
| "grad_norm": 0.4178023636341095, |
| "learning_rate": 2.7691869442974864e-05, |
| "loss": 0.0199, |
| "step": 13200 |
| }, |
| { |
| "epoch": 0.5617451947610138, |
| "grad_norm": 0.4274429678916931, |
| "learning_rate": 2.7687461141632754e-05, |
| "loss": 0.0212, |
| "step": 13210 |
| }, |
| { |
| "epoch": 0.562170437149175, |
| "grad_norm": 0.44344186782836914, |
| "learning_rate": 2.7683048986174945e-05, |
| "loss": 0.0219, |
| "step": 13220 |
| }, |
| { |
| "epoch": 0.5625956795373362, |
| "grad_norm": 0.4258417785167694, |
| "learning_rate": 2.7678632977941727e-05, |
| "loss": 0.0229, |
| "step": 13230 |
| }, |
| { |
| "epoch": 0.5630209219254976, |
| "grad_norm": 0.44375917315483093, |
| "learning_rate": 2.767421311827457e-05, |
| "loss": 0.021, |
| "step": 13240 |
| }, |
| { |
| "epoch": 0.5634461643136588, |
| "grad_norm": 0.38370025157928467, |
| "learning_rate": 2.7669789408516117e-05, |
| "loss": 0.0217, |
| "step": 13250 |
| }, |
| { |
| "epoch": 0.56387140670182, |
| "grad_norm": 0.4034651219844818, |
| "learning_rate": 2.766536185001016e-05, |
| "loss": 0.0248, |
| "step": 13260 |
| }, |
| { |
| "epoch": 0.5642966490899813, |
| "grad_norm": 0.3681996464729309, |
| "learning_rate": 2.7660930444101692e-05, |
| "loss": 0.0227, |
| "step": 13270 |
| }, |
| { |
| "epoch": 0.5647218914781426, |
| "grad_norm": 0.48673391342163086, |
| "learning_rate": 2.7656495192136842e-05, |
| "loss": 0.0225, |
| "step": 13280 |
| }, |
| { |
| "epoch": 0.5651471338663038, |
| "grad_norm": 0.34889769554138184, |
| "learning_rate": 2.7652056095462926e-05, |
| "loss": 0.0241, |
| "step": 13290 |
| }, |
| { |
| "epoch": 0.565572376254465, |
| "grad_norm": 0.39446333050727844, |
| "learning_rate": 2.764761315542843e-05, |
| "loss": 0.0231, |
| "step": 13300 |
| }, |
| { |
| "epoch": 0.5659976186426263, |
| "grad_norm": 0.42651620507240295, |
| "learning_rate": 2.7643166373383e-05, |
| "loss": 0.0207, |
| "step": 13310 |
| }, |
| { |
| "epoch": 0.5664228610307875, |
| "grad_norm": 0.44455501437187195, |
| "learning_rate": 2.763871575067744e-05, |
| "loss": 0.0263, |
| "step": 13320 |
| }, |
| { |
| "epoch": 0.5668481034189488, |
| "grad_norm": 0.40686893463134766, |
| "learning_rate": 2.763426128866375e-05, |
| "loss": 0.0227, |
| "step": 13330 |
| }, |
| { |
| "epoch": 0.5672733458071101, |
| "grad_norm": 0.6360083818435669, |
| "learning_rate": 2.7629802988695064e-05, |
| "loss": 0.0232, |
| "step": 13340 |
| }, |
| { |
| "epoch": 0.5676985881952713, |
| "grad_norm": 0.47458934783935547, |
| "learning_rate": 2.7625340852125703e-05, |
| "loss": 0.0207, |
| "step": 13350 |
| }, |
| { |
| "epoch": 0.5681238305834325, |
| "grad_norm": 0.518570601940155, |
| "learning_rate": 2.7620874880311148e-05, |
| "loss": 0.0242, |
| "step": 13360 |
| }, |
| { |
| "epoch": 0.5685490729715938, |
| "grad_norm": 0.36844056844711304, |
| "learning_rate": 2.7616405074608033e-05, |
| "loss": 0.0233, |
| "step": 13370 |
| }, |
| { |
| "epoch": 0.5689743153597551, |
| "grad_norm": 0.4950474202632904, |
| "learning_rate": 2.7611931436374173e-05, |
| "loss": 0.0223, |
| "step": 13380 |
| }, |
| { |
| "epoch": 0.5693995577479163, |
| "grad_norm": 0.3924030661582947, |
| "learning_rate": 2.7607453966968545e-05, |
| "loss": 0.0216, |
| "step": 13390 |
| }, |
| { |
| "epoch": 0.5698248001360776, |
| "grad_norm": 0.44798216223716736, |
| "learning_rate": 2.760297266775128e-05, |
| "loss": 0.0235, |
| "step": 13400 |
| }, |
| { |
| "epoch": 0.5702500425242388, |
| "grad_norm": 0.4270899295806885, |
| "learning_rate": 2.759848754008368e-05, |
| "loss": 0.0231, |
| "step": 13410 |
| }, |
| { |
| "epoch": 0.5706752849124, |
| "grad_norm": 0.38340136408805847, |
| "learning_rate": 2.759399858532821e-05, |
| "loss": 0.0238, |
| "step": 13420 |
| }, |
| { |
| "epoch": 0.5711005273005613, |
| "grad_norm": 0.38224777579307556, |
| "learning_rate": 2.75895058048485e-05, |
| "loss": 0.024, |
| "step": 13430 |
| }, |
| { |
| "epoch": 0.5715257696887226, |
| "grad_norm": 0.3722180128097534, |
| "learning_rate": 2.7585009200009326e-05, |
| "loss": 0.0231, |
| "step": 13440 |
| }, |
| { |
| "epoch": 0.5719510120768838, |
| "grad_norm": 0.4371410608291626, |
| "learning_rate": 2.7580508772176648e-05, |
| "loss": 0.021, |
| "step": 13450 |
| }, |
| { |
| "epoch": 0.572376254465045, |
| "grad_norm": 0.5341368317604065, |
| "learning_rate": 2.7576004522717577e-05, |
| "loss": 0.0227, |
| "step": 13460 |
| }, |
| { |
| "epoch": 0.5728014968532064, |
| "grad_norm": 0.4262879192829132, |
| "learning_rate": 2.7571496453000377e-05, |
| "loss": 0.0236, |
| "step": 13470 |
| }, |
| { |
| "epoch": 0.5732267392413676, |
| "grad_norm": 0.5993068814277649, |
| "learning_rate": 2.7566984564394487e-05, |
| "loss": 0.0242, |
| "step": 13480 |
| }, |
| { |
| "epoch": 0.5736519816295288, |
| "grad_norm": 0.5745378136634827, |
| "learning_rate": 2.7562468858270498e-05, |
| "loss": 0.0225, |
| "step": 13490 |
| }, |
| { |
| "epoch": 0.5740772240176901, |
| "grad_norm": 0.5052975416183472, |
| "learning_rate": 2.7557949336000165e-05, |
| "loss": 0.0233, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.5745024664058513, |
| "grad_norm": 0.463056743144989, |
| "learning_rate": 2.7553425998956395e-05, |
| "loss": 0.0227, |
| "step": 13510 |
| }, |
| { |
| "epoch": 0.5749277087940126, |
| "grad_norm": 0.5643817782402039, |
| "learning_rate": 2.7548898848513264e-05, |
| "loss": 0.0207, |
| "step": 13520 |
| }, |
| { |
| "epoch": 0.5753529511821739, |
| "grad_norm": 0.514615535736084, |
| "learning_rate": 2.754436788604599e-05, |
| "loss": 0.0209, |
| "step": 13530 |
| }, |
| { |
| "epoch": 0.5757781935703351, |
| "grad_norm": 0.40102750062942505, |
| "learning_rate": 2.7539833112930973e-05, |
| "loss": 0.0224, |
| "step": 13540 |
| }, |
| { |
| "epoch": 0.5762034359584963, |
| "grad_norm": 0.4101232588291168, |
| "learning_rate": 2.753529453054575e-05, |
| "loss": 0.0239, |
| "step": 13550 |
| }, |
| { |
| "epoch": 0.5766286783466575, |
| "grad_norm": 0.3861386775970459, |
| "learning_rate": 2.7530752140269028e-05, |
| "loss": 0.0213, |
| "step": 13560 |
| }, |
| { |
| "epoch": 0.5770539207348189, |
| "grad_norm": 0.45340144634246826, |
| "learning_rate": 2.7526205943480657e-05, |
| "loss": 0.023, |
| "step": 13570 |
| }, |
| { |
| "epoch": 0.5774791631229801, |
| "grad_norm": 0.39935538172721863, |
| "learning_rate": 2.7521655941561653e-05, |
| "loss": 0.0226, |
| "step": 13580 |
| }, |
| { |
| "epoch": 0.5779044055111413, |
| "grad_norm": 0.32109445333480835, |
| "learning_rate": 2.751710213589419e-05, |
| "loss": 0.0242, |
| "step": 13590 |
| }, |
| { |
| "epoch": 0.5783296478993026, |
| "grad_norm": 0.36769798398017883, |
| "learning_rate": 2.7512544527861597e-05, |
| "loss": 0.0231, |
| "step": 13600 |
| }, |
| { |
| "epoch": 0.5787548902874639, |
| "grad_norm": 0.4025172293186188, |
| "learning_rate": 2.7507983118848353e-05, |
| "loss": 0.0201, |
| "step": 13610 |
| }, |
| { |
| "epoch": 0.5791801326756251, |
| "grad_norm": 0.40505367517471313, |
| "learning_rate": 2.7503417910240085e-05, |
| "loss": 0.0205, |
| "step": 13620 |
| }, |
| { |
| "epoch": 0.5796053750637864, |
| "grad_norm": 0.32008427381515503, |
| "learning_rate": 2.7498848903423593e-05, |
| "loss": 0.022, |
| "step": 13630 |
| }, |
| { |
| "epoch": 0.5800306174519476, |
| "grad_norm": 0.35272738337516785, |
| "learning_rate": 2.7494276099786817e-05, |
| "loss": 0.0226, |
| "step": 13640 |
| }, |
| { |
| "epoch": 0.5804558598401088, |
| "grad_norm": 0.6926178336143494, |
| "learning_rate": 2.7489699500718854e-05, |
| "loss": 0.0248, |
| "step": 13650 |
| }, |
| { |
| "epoch": 0.5808811022282702, |
| "grad_norm": 0.4260108470916748, |
| "learning_rate": 2.7485119107609953e-05, |
| "loss": 0.021, |
| "step": 13660 |
| }, |
| { |
| "epoch": 0.5813063446164314, |
| "grad_norm": 0.32744520902633667, |
| "learning_rate": 2.748053492185152e-05, |
| "loss": 0.021, |
| "step": 13670 |
| }, |
| { |
| "epoch": 0.5817315870045926, |
| "grad_norm": 0.35791370272636414, |
| "learning_rate": 2.7475946944836103e-05, |
| "loss": 0.0236, |
| "step": 13680 |
| }, |
| { |
| "epoch": 0.5821568293927538, |
| "grad_norm": 0.3699380159378052, |
| "learning_rate": 2.7471355177957414e-05, |
| "loss": 0.0232, |
| "step": 13690 |
| }, |
| { |
| "epoch": 0.5825820717809151, |
| "grad_norm": 0.4098301827907562, |
| "learning_rate": 2.746675962261031e-05, |
| "loss": 0.0239, |
| "step": 13700 |
| }, |
| { |
| "epoch": 0.5830073141690764, |
| "grad_norm": 0.4216383099555969, |
| "learning_rate": 2.74621602801908e-05, |
| "loss": 0.0228, |
| "step": 13710 |
| }, |
| { |
| "epoch": 0.5834325565572376, |
| "grad_norm": 0.491955041885376, |
| "learning_rate": 2.745755715209604e-05, |
| "loss": 0.0206, |
| "step": 13720 |
| }, |
| { |
| "epoch": 0.5838577989453989, |
| "grad_norm": 0.4711659252643585, |
| "learning_rate": 2.7452950239724337e-05, |
| "loss": 0.026, |
| "step": 13730 |
| }, |
| { |
| "epoch": 0.5842830413335601, |
| "grad_norm": 0.4391963481903076, |
| "learning_rate": 2.7448339544475156e-05, |
| "loss": 0.021, |
| "step": 13740 |
| }, |
| { |
| "epoch": 0.5847082837217213, |
| "grad_norm": 0.5280522108078003, |
| "learning_rate": 2.7443725067749098e-05, |
| "loss": 0.025, |
| "step": 13750 |
| }, |
| { |
| "epoch": 0.5851335261098827, |
| "grad_norm": 0.465518981218338, |
| "learning_rate": 2.7439106810947923e-05, |
| "loss": 0.0236, |
| "step": 13760 |
| }, |
| { |
| "epoch": 0.5855587684980439, |
| "grad_norm": 0.5963212251663208, |
| "learning_rate": 2.7434484775474532e-05, |
| "loss": 0.0208, |
| "step": 13770 |
| }, |
| { |
| "epoch": 0.5859840108862051, |
| "grad_norm": 0.5224639773368835, |
| "learning_rate": 2.7429858962732984e-05, |
| "loss": 0.0198, |
| "step": 13780 |
| }, |
| { |
| "epoch": 0.5864092532743664, |
| "grad_norm": 0.6179777979850769, |
| "learning_rate": 2.7425229374128475e-05, |
| "loss": 0.0224, |
| "step": 13790 |
| }, |
| { |
| "epoch": 0.5868344956625277, |
| "grad_norm": 0.5582897067070007, |
| "learning_rate": 2.742059601106735e-05, |
| "loss": 0.0211, |
| "step": 13800 |
| }, |
| { |
| "epoch": 0.5872597380506889, |
| "grad_norm": 0.4970376789569855, |
| "learning_rate": 2.74159588749571e-05, |
| "loss": 0.0199, |
| "step": 13810 |
| }, |
| { |
| "epoch": 0.5876849804388501, |
| "grad_norm": 0.4438394010066986, |
| "learning_rate": 2.7411317967206373e-05, |
| "loss": 0.0223, |
| "step": 13820 |
| }, |
| { |
| "epoch": 0.5881102228270114, |
| "grad_norm": 0.40164047479629517, |
| "learning_rate": 2.7406673289224945e-05, |
| "loss": 0.0226, |
| "step": 13830 |
| }, |
| { |
| "epoch": 0.5885354652151726, |
| "grad_norm": 0.520200788974762, |
| "learning_rate": 2.740202484242376e-05, |
| "loss": 0.0242, |
| "step": 13840 |
| }, |
| { |
| "epoch": 0.5889607076033339, |
| "grad_norm": 0.40304872393608093, |
| "learning_rate": 2.7397372628214873e-05, |
| "loss": 0.0208, |
| "step": 13850 |
| }, |
| { |
| "epoch": 0.5893859499914952, |
| "grad_norm": 0.39076563715934753, |
| "learning_rate": 2.739271664801152e-05, |
| "loss": 0.0192, |
| "step": 13860 |
| }, |
| { |
| "epoch": 0.5898111923796564, |
| "grad_norm": 0.2574770152568817, |
| "learning_rate": 2.738805690322806e-05, |
| "loss": 0.0226, |
| "step": 13870 |
| }, |
| { |
| "epoch": 0.5902364347678176, |
| "grad_norm": 0.39357537031173706, |
| "learning_rate": 2.7383393395280004e-05, |
| "loss": 0.0213, |
| "step": 13880 |
| }, |
| { |
| "epoch": 0.590661677155979, |
| "grad_norm": 0.4306516945362091, |
| "learning_rate": 2.7378726125584e-05, |
| "loss": 0.0196, |
| "step": 13890 |
| }, |
| { |
| "epoch": 0.5910869195441402, |
| "grad_norm": 0.42869049310684204, |
| "learning_rate": 2.7374055095557832e-05, |
| "loss": 0.02, |
| "step": 13900 |
| }, |
| { |
| "epoch": 0.5915121619323014, |
| "grad_norm": 0.41680777072906494, |
| "learning_rate": 2.7369380306620446e-05, |
| "loss": 0.0237, |
| "step": 13910 |
| }, |
| { |
| "epoch": 0.5919374043204627, |
| "grad_norm": 0.36180999875068665, |
| "learning_rate": 2.7364701760191923e-05, |
| "loss": 0.0202, |
| "step": 13920 |
| }, |
| { |
| "epoch": 0.5923626467086239, |
| "grad_norm": 0.44238200783729553, |
| "learning_rate": 2.7360019457693468e-05, |
| "loss": 0.0195, |
| "step": 13930 |
| }, |
| { |
| "epoch": 0.5927878890967851, |
| "grad_norm": 0.3397182524204254, |
| "learning_rate": 2.7355333400547457e-05, |
| "loss": 0.02, |
| "step": 13940 |
| }, |
| { |
| "epoch": 0.5932131314849464, |
| "grad_norm": 0.4424580931663513, |
| "learning_rate": 2.7350643590177378e-05, |
| "loss": 0.0221, |
| "step": 13950 |
| }, |
| { |
| "epoch": 0.5936383738731077, |
| "grad_norm": 0.4530566334724426, |
| "learning_rate": 2.7345950028007878e-05, |
| "loss": 0.022, |
| "step": 13960 |
| }, |
| { |
| "epoch": 0.5940636162612689, |
| "grad_norm": 0.4371461570262909, |
| "learning_rate": 2.7341252715464736e-05, |
| "loss": 0.0231, |
| "step": 13970 |
| }, |
| { |
| "epoch": 0.5944888586494301, |
| "grad_norm": 0.41568294167518616, |
| "learning_rate": 2.7336551653974868e-05, |
| "loss": 0.0202, |
| "step": 13980 |
| }, |
| { |
| "epoch": 0.5949141010375915, |
| "grad_norm": 0.4857887327671051, |
| "learning_rate": 2.733184684496634e-05, |
| "loss": 0.0218, |
| "step": 13990 |
| }, |
| { |
| "epoch": 0.5953393434257527, |
| "grad_norm": 0.32679444551467896, |
| "learning_rate": 2.7327138289868337e-05, |
| "loss": 0.0189, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.5957645858139139, |
| "grad_norm": 0.4400253891944885, |
| "learning_rate": 2.7322425990111208e-05, |
| "loss": 0.0224, |
| "step": 14010 |
| }, |
| { |
| "epoch": 0.5961898282020752, |
| "grad_norm": 0.3958609402179718, |
| "learning_rate": 2.7317709947126416e-05, |
| "loss": 0.0223, |
| "step": 14020 |
| }, |
| { |
| "epoch": 0.5966150705902364, |
| "grad_norm": 0.3876691162586212, |
| "learning_rate": 2.731299016234657e-05, |
| "loss": 0.0222, |
| "step": 14030 |
| }, |
| { |
| "epoch": 0.5970403129783977, |
| "grad_norm": 0.48080649971961975, |
| "learning_rate": 2.730826663720542e-05, |
| "loss": 0.0194, |
| "step": 14040 |
| }, |
| { |
| "epoch": 0.597465555366559, |
| "grad_norm": 0.4989674687385559, |
| "learning_rate": 2.730353937313785e-05, |
| "loss": 0.0225, |
| "step": 14050 |
| }, |
| { |
| "epoch": 0.5978907977547202, |
| "grad_norm": 0.3855687379837036, |
| "learning_rate": 2.729880837157987e-05, |
| "loss": 0.0197, |
| "step": 14060 |
| }, |
| { |
| "epoch": 0.5983160401428814, |
| "grad_norm": 0.3866235315799713, |
| "learning_rate": 2.729407363396864e-05, |
| "loss": 0.0175, |
| "step": 14070 |
| }, |
| { |
| "epoch": 0.5987412825310426, |
| "grad_norm": 0.5367085933685303, |
| "learning_rate": 2.7289335161742444e-05, |
| "loss": 0.0196, |
| "step": 14080 |
| }, |
| { |
| "epoch": 0.599166524919204, |
| "grad_norm": 0.42395225167274475, |
| "learning_rate": 2.7284592956340707e-05, |
| "loss": 0.0193, |
| "step": 14090 |
| }, |
| { |
| "epoch": 0.5995917673073652, |
| "grad_norm": 0.43578681349754333, |
| "learning_rate": 2.727984701920399e-05, |
| "loss": 0.0201, |
| "step": 14100 |
| }, |
| { |
| "epoch": 0.6000170096955264, |
| "grad_norm": 0.5050670504570007, |
| "learning_rate": 2.7275097351773977e-05, |
| "loss": 0.0218, |
| "step": 14110 |
| }, |
| { |
| "epoch": 0.6004422520836877, |
| "grad_norm": 0.44479116797447205, |
| "learning_rate": 2.727034395549349e-05, |
| "loss": 0.0218, |
| "step": 14120 |
| }, |
| { |
| "epoch": 0.600867494471849, |
| "grad_norm": 0.4565848708152771, |
| "learning_rate": 2.726558683180649e-05, |
| "loss": 0.022, |
| "step": 14130 |
| }, |
| { |
| "epoch": 0.6012927368600102, |
| "grad_norm": 0.4360315799713135, |
| "learning_rate": 2.7260825982158067e-05, |
| "loss": 0.0201, |
| "step": 14140 |
| }, |
| { |
| "epoch": 0.6017179792481715, |
| "grad_norm": 0.361296683549881, |
| "learning_rate": 2.725606140799444e-05, |
| "loss": 0.0179, |
| "step": 14150 |
| }, |
| { |
| "epoch": 0.6021432216363327, |
| "grad_norm": 0.40813711285591125, |
| "learning_rate": 2.7251293110762957e-05, |
| "loss": 0.0212, |
| "step": 14160 |
| }, |
| { |
| "epoch": 0.6025684640244939, |
| "grad_norm": 0.3440335988998413, |
| "learning_rate": 2.7246521091912106e-05, |
| "loss": 0.022, |
| "step": 14170 |
| }, |
| { |
| "epoch": 0.6029937064126553, |
| "grad_norm": 0.40166258811950684, |
| "learning_rate": 2.7241745352891497e-05, |
| "loss": 0.0194, |
| "step": 14180 |
| }, |
| { |
| "epoch": 0.6034189488008165, |
| "grad_norm": 0.4059109687805176, |
| "learning_rate": 2.7236965895151874e-05, |
| "loss": 0.0219, |
| "step": 14190 |
| }, |
| { |
| "epoch": 0.6038441911889777, |
| "grad_norm": 0.4299823045730591, |
| "learning_rate": 2.7232182720145105e-05, |
| "loss": 0.0225, |
| "step": 14200 |
| }, |
| { |
| "epoch": 0.6042694335771389, |
| "grad_norm": 0.5142824053764343, |
| "learning_rate": 2.7227395829324208e-05, |
| "loss": 0.0229, |
| "step": 14210 |
| }, |
| { |
| "epoch": 0.6046946759653002, |
| "grad_norm": 0.36144012212753296, |
| "learning_rate": 2.7222605224143298e-05, |
| "loss": 0.0222, |
| "step": 14220 |
| }, |
| { |
| "epoch": 0.6051199183534615, |
| "grad_norm": 0.43536025285720825, |
| "learning_rate": 2.721781090605764e-05, |
| "loss": 0.0228, |
| "step": 14230 |
| }, |
| { |
| "epoch": 0.6055451607416227, |
| "grad_norm": 0.3937072455883026, |
| "learning_rate": 2.721301287652362e-05, |
| "loss": 0.0215, |
| "step": 14240 |
| }, |
| { |
| "epoch": 0.605970403129784, |
| "grad_norm": 0.3862617313861847, |
| "learning_rate": 2.7208211136998758e-05, |
| "loss": 0.0223, |
| "step": 14250 |
| }, |
| { |
| "epoch": 0.6063956455179452, |
| "grad_norm": 0.41137275099754333, |
| "learning_rate": 2.7203405688941693e-05, |
| "loss": 0.0209, |
| "step": 14260 |
| }, |
| { |
| "epoch": 0.6068208879061064, |
| "grad_norm": 0.46557578444480896, |
| "learning_rate": 2.719859653381219e-05, |
| "loss": 0.0221, |
| "step": 14270 |
| }, |
| { |
| "epoch": 0.6072461302942678, |
| "grad_norm": 0.4357418119907379, |
| "learning_rate": 2.719378367307114e-05, |
| "loss": 0.0191, |
| "step": 14280 |
| }, |
| { |
| "epoch": 0.607671372682429, |
| "grad_norm": 0.4667029082775116, |
| "learning_rate": 2.7188967108180574e-05, |
| "loss": 0.0198, |
| "step": 14290 |
| }, |
| { |
| "epoch": 0.6080966150705902, |
| "grad_norm": 0.3489953577518463, |
| "learning_rate": 2.7184146840603634e-05, |
| "loss": 0.0212, |
| "step": 14300 |
| }, |
| { |
| "epoch": 0.6085218574587515, |
| "grad_norm": 0.34787535667419434, |
| "learning_rate": 2.7179322871804584e-05, |
| "loss": 0.0231, |
| "step": 14310 |
| }, |
| { |
| "epoch": 0.6089470998469128, |
| "grad_norm": 0.39182159304618835, |
| "learning_rate": 2.717449520324882e-05, |
| "loss": 0.0216, |
| "step": 14320 |
| }, |
| { |
| "epoch": 0.609372342235074, |
| "grad_norm": 0.3901592791080475, |
| "learning_rate": 2.7169663836402864e-05, |
| "loss": 0.0187, |
| "step": 14330 |
| }, |
| { |
| "epoch": 0.6097975846232352, |
| "grad_norm": 0.35816362500190735, |
| "learning_rate": 2.7164828772734352e-05, |
| "loss": 0.022, |
| "step": 14340 |
| }, |
| { |
| "epoch": 0.6102228270113965, |
| "grad_norm": 0.35063478350639343, |
| "learning_rate": 2.7159990013712057e-05, |
| "loss": 0.0192, |
| "step": 14350 |
| }, |
| { |
| "epoch": 0.6106480693995577, |
| "grad_norm": 0.42145827412605286, |
| "learning_rate": 2.7155147560805863e-05, |
| "loss": 0.0224, |
| "step": 14360 |
| }, |
| { |
| "epoch": 0.611073311787719, |
| "grad_norm": 0.439269095659256, |
| "learning_rate": 2.7150301415486777e-05, |
| "loss": 0.0202, |
| "step": 14370 |
| }, |
| { |
| "epoch": 0.6114985541758803, |
| "grad_norm": 0.44516879320144653, |
| "learning_rate": 2.7145451579226932e-05, |
| "loss": 0.0207, |
| "step": 14380 |
| }, |
| { |
| "epoch": 0.6119237965640415, |
| "grad_norm": 0.4259718358516693, |
| "learning_rate": 2.7140598053499577e-05, |
| "loss": 0.0226, |
| "step": 14390 |
| }, |
| { |
| "epoch": 0.6123490389522027, |
| "grad_norm": 0.44744691252708435, |
| "learning_rate": 2.7135740839779087e-05, |
| "loss": 0.02, |
| "step": 14400 |
| }, |
| { |
| "epoch": 0.612774281340364, |
| "grad_norm": 0.5334987640380859, |
| "learning_rate": 2.7130879939540957e-05, |
| "loss": 0.0191, |
| "step": 14410 |
| }, |
| { |
| "epoch": 0.6131995237285253, |
| "grad_norm": 0.41623225808143616, |
| "learning_rate": 2.7126015354261798e-05, |
| "loss": 0.0186, |
| "step": 14420 |
| }, |
| { |
| "epoch": 0.6136247661166865, |
| "grad_norm": 0.44290414452552795, |
| "learning_rate": 2.712114708541934e-05, |
| "loss": 0.0212, |
| "step": 14430 |
| }, |
| { |
| "epoch": 0.6140500085048478, |
| "grad_norm": 0.4709611237049103, |
| "learning_rate": 2.7116275134492445e-05, |
| "loss": 0.0226, |
| "step": 14440 |
| }, |
| { |
| "epoch": 0.614475250893009, |
| "grad_norm": 0.46769437193870544, |
| "learning_rate": 2.7111399502961073e-05, |
| "loss": 0.0205, |
| "step": 14450 |
| }, |
| { |
| "epoch": 0.6149004932811702, |
| "grad_norm": 0.38859257102012634, |
| "learning_rate": 2.710652019230631e-05, |
| "loss": 0.0212, |
| "step": 14460 |
| }, |
| { |
| "epoch": 0.6153257356693315, |
| "grad_norm": 0.3308449685573578, |
| "learning_rate": 2.710163720401037e-05, |
| "loss": 0.0206, |
| "step": 14470 |
| }, |
| { |
| "epoch": 0.6157509780574928, |
| "grad_norm": 0.48803281784057617, |
| "learning_rate": 2.709675053955658e-05, |
| "loss": 0.0194, |
| "step": 14480 |
| }, |
| { |
| "epoch": 0.616176220445654, |
| "grad_norm": 0.4074990451335907, |
| "learning_rate": 2.7091860200429362e-05, |
| "loss": 0.0205, |
| "step": 14490 |
| }, |
| { |
| "epoch": 0.6166014628338152, |
| "grad_norm": 0.3850661814212799, |
| "learning_rate": 2.7086966188114285e-05, |
| "loss": 0.0224, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.6170267052219766, |
| "grad_norm": 0.380643755197525, |
| "learning_rate": 2.708206850409802e-05, |
| "loss": 0.0222, |
| "step": 14510 |
| }, |
| { |
| "epoch": 0.6174519476101378, |
| "grad_norm": 0.4678167700767517, |
| "learning_rate": 2.707716714986835e-05, |
| "loss": 0.0195, |
| "step": 14520 |
| }, |
| { |
| "epoch": 0.617877189998299, |
| "grad_norm": 0.39859825372695923, |
| "learning_rate": 2.707226212691418e-05, |
| "loss": 0.0206, |
| "step": 14530 |
| }, |
| { |
| "epoch": 0.6183024323864603, |
| "grad_norm": 0.4449918568134308, |
| "learning_rate": 2.7067353436725525e-05, |
| "loss": 0.0203, |
| "step": 14540 |
| }, |
| { |
| "epoch": 0.6187276747746215, |
| "grad_norm": 0.4654516279697418, |
| "learning_rate": 2.706244108079352e-05, |
| "loss": 0.0226, |
| "step": 14550 |
| }, |
| { |
| "epoch": 0.6191529171627828, |
| "grad_norm": 0.4559080898761749, |
| "learning_rate": 2.7057525060610396e-05, |
| "loss": 0.0226, |
| "step": 14560 |
| }, |
| { |
| "epoch": 0.6195781595509441, |
| "grad_norm": 0.34623491764068604, |
| "learning_rate": 2.7052605377669527e-05, |
| "loss": 0.0205, |
| "step": 14570 |
| }, |
| { |
| "epoch": 0.6200034019391053, |
| "grad_norm": 0.4986266791820526, |
| "learning_rate": 2.704768203346537e-05, |
| "loss": 0.0211, |
| "step": 14580 |
| }, |
| { |
| "epoch": 0.6204286443272665, |
| "grad_norm": 0.4883112907409668, |
| "learning_rate": 2.7042755029493513e-05, |
| "loss": 0.0218, |
| "step": 14590 |
| }, |
| { |
| "epoch": 0.6208538867154277, |
| "grad_norm": 0.5301306247711182, |
| "learning_rate": 2.703782436725065e-05, |
| "loss": 0.0219, |
| "step": 14600 |
| }, |
| { |
| "epoch": 0.6212791291035891, |
| "grad_norm": 0.42371174693107605, |
| "learning_rate": 2.7032890048234585e-05, |
| "loss": 0.022, |
| "step": 14610 |
| }, |
| { |
| "epoch": 0.6217043714917503, |
| "grad_norm": 0.5062300562858582, |
| "learning_rate": 2.702795207394423e-05, |
| "loss": 0.0199, |
| "step": 14620 |
| }, |
| { |
| "epoch": 0.6221296138799115, |
| "grad_norm": 0.48883020877838135, |
| "learning_rate": 2.702301044587962e-05, |
| "loss": 0.0205, |
| "step": 14630 |
| }, |
| { |
| "epoch": 0.6225548562680728, |
| "grad_norm": 0.5089893937110901, |
| "learning_rate": 2.7018065165541885e-05, |
| "loss": 0.0206, |
| "step": 14640 |
| }, |
| { |
| "epoch": 0.622980098656234, |
| "grad_norm": 0.48434340953826904, |
| "learning_rate": 2.701311623443327e-05, |
| "loss": 0.0211, |
| "step": 14650 |
| }, |
| { |
| "epoch": 0.6234053410443953, |
| "grad_norm": 0.5042490363121033, |
| "learning_rate": 2.700816365405713e-05, |
| "loss": 0.0194, |
| "step": 14660 |
| }, |
| { |
| "epoch": 0.6238305834325566, |
| "grad_norm": 0.4644983112812042, |
| "learning_rate": 2.7003207425917926e-05, |
| "loss": 0.0216, |
| "step": 14670 |
| }, |
| { |
| "epoch": 0.6242558258207178, |
| "grad_norm": 0.46126359701156616, |
| "learning_rate": 2.6998247551521238e-05, |
| "loss": 0.0211, |
| "step": 14680 |
| }, |
| { |
| "epoch": 0.624681068208879, |
| "grad_norm": 0.44964349269866943, |
| "learning_rate": 2.6993284032373732e-05, |
| "loss": 0.0212, |
| "step": 14690 |
| }, |
| { |
| "epoch": 0.6251063105970404, |
| "grad_norm": 0.3886130154132843, |
| "learning_rate": 2.6988316869983208e-05, |
| "loss": 0.0224, |
| "step": 14700 |
| }, |
| { |
| "epoch": 0.6255315529852016, |
| "grad_norm": 0.40591052174568176, |
| "learning_rate": 2.698334606585855e-05, |
| "loss": 0.0205, |
| "step": 14710 |
| }, |
| { |
| "epoch": 0.6259567953733628, |
| "grad_norm": 0.539798378944397, |
| "learning_rate": 2.6978371621509757e-05, |
| "loss": 0.0203, |
| "step": 14720 |
| }, |
| { |
| "epoch": 0.6263820377615241, |
| "grad_norm": 0.46444740891456604, |
| "learning_rate": 2.697339353844794e-05, |
| "loss": 0.0219, |
| "step": 14730 |
| }, |
| { |
| "epoch": 0.6268072801496853, |
| "grad_norm": 0.3508462607860565, |
| "learning_rate": 2.6968411818185303e-05, |
| "loss": 0.0202, |
| "step": 14740 |
| }, |
| { |
| "epoch": 0.6272325225378466, |
| "grad_norm": 0.45676133036613464, |
| "learning_rate": 2.6963426462235167e-05, |
| "loss": 0.0199, |
| "step": 14750 |
| }, |
| { |
| "epoch": 0.6276577649260078, |
| "grad_norm": 0.43334293365478516, |
| "learning_rate": 2.6958437472111947e-05, |
| "loss": 0.023, |
| "step": 14760 |
| }, |
| { |
| "epoch": 0.6280830073141691, |
| "grad_norm": 0.3671410083770752, |
| "learning_rate": 2.695344484933116e-05, |
| "loss": 0.0193, |
| "step": 14770 |
| }, |
| { |
| "epoch": 0.6285082497023303, |
| "grad_norm": 0.38691452145576477, |
| "learning_rate": 2.6948448595409443e-05, |
| "loss": 0.0205, |
| "step": 14780 |
| }, |
| { |
| "epoch": 0.6289334920904915, |
| "grad_norm": 0.4143933355808258, |
| "learning_rate": 2.694344871186453e-05, |
| "loss": 0.0201, |
| "step": 14790 |
| }, |
| { |
| "epoch": 0.6293587344786529, |
| "grad_norm": 0.27843642234802246, |
| "learning_rate": 2.6938445200215237e-05, |
| "loss": 0.0214, |
| "step": 14800 |
| }, |
| { |
| "epoch": 0.6297839768668141, |
| "grad_norm": 0.33850619196891785, |
| "learning_rate": 2.6933438061981513e-05, |
| "loss": 0.0198, |
| "step": 14810 |
| }, |
| { |
| "epoch": 0.6302092192549753, |
| "grad_norm": 0.3694206178188324, |
| "learning_rate": 2.6928427298684395e-05, |
| "loss": 0.02, |
| "step": 14820 |
| }, |
| { |
| "epoch": 0.6306344616431366, |
| "grad_norm": 0.37032657861709595, |
| "learning_rate": 2.6923412911846008e-05, |
| "loss": 0.0208, |
| "step": 14830 |
| }, |
| { |
| "epoch": 0.6310597040312979, |
| "grad_norm": 0.4800131022930145, |
| "learning_rate": 2.6918394902989604e-05, |
| "loss": 0.0209, |
| "step": 14840 |
| }, |
| { |
| "epoch": 0.6314849464194591, |
| "grad_norm": 0.4037657082080841, |
| "learning_rate": 2.6913373273639508e-05, |
| "loss": 0.0202, |
| "step": 14850 |
| }, |
| { |
| "epoch": 0.6319101888076204, |
| "grad_norm": 0.40070000290870667, |
| "learning_rate": 2.690834802532117e-05, |
| "loss": 0.0198, |
| "step": 14860 |
| }, |
| { |
| "epoch": 0.6323354311957816, |
| "grad_norm": 0.4060189425945282, |
| "learning_rate": 2.6903319159561125e-05, |
| "loss": 0.0219, |
| "step": 14870 |
| }, |
| { |
| "epoch": 0.6327606735839428, |
| "grad_norm": 0.4039824903011322, |
| "learning_rate": 2.6898286677887005e-05, |
| "loss": 0.0223, |
| "step": 14880 |
| }, |
| { |
| "epoch": 0.633185915972104, |
| "grad_norm": 0.40134313702583313, |
| "learning_rate": 2.689325058182755e-05, |
| "loss": 0.0209, |
| "step": 14890 |
| }, |
| { |
| "epoch": 0.6336111583602654, |
| "grad_norm": 0.45482826232910156, |
| "learning_rate": 2.6888210872912594e-05, |
| "loss": 0.0229, |
| "step": 14900 |
| }, |
| { |
| "epoch": 0.6340364007484266, |
| "grad_norm": 0.3572455048561096, |
| "learning_rate": 2.688316755267306e-05, |
| "loss": 0.0221, |
| "step": 14910 |
| }, |
| { |
| "epoch": 0.6344616431365878, |
| "grad_norm": 0.3394013047218323, |
| "learning_rate": 2.6878120622640988e-05, |
| "loss": 0.0213, |
| "step": 14920 |
| }, |
| { |
| "epoch": 0.6348868855247491, |
| "grad_norm": 0.37122642993927, |
| "learning_rate": 2.6873070084349486e-05, |
| "loss": 0.0216, |
| "step": 14930 |
| }, |
| { |
| "epoch": 0.6353121279129104, |
| "grad_norm": 0.3685668408870697, |
| "learning_rate": 2.6868015939332788e-05, |
| "loss": 0.0212, |
| "step": 14940 |
| }, |
| { |
| "epoch": 0.6357373703010716, |
| "grad_norm": 0.3652264475822449, |
| "learning_rate": 2.68629581891262e-05, |
| "loss": 0.0215, |
| "step": 14950 |
| }, |
| { |
| "epoch": 0.6361626126892329, |
| "grad_norm": 0.3909711539745331, |
| "learning_rate": 2.685789683526614e-05, |
| "loss": 0.0186, |
| "step": 14960 |
| }, |
| { |
| "epoch": 0.6365878550773941, |
| "grad_norm": 0.5734156370162964, |
| "learning_rate": 2.685283187929011e-05, |
| "loss": 0.0217, |
| "step": 14970 |
| }, |
| { |
| "epoch": 0.6370130974655553, |
| "grad_norm": 0.643765926361084, |
| "learning_rate": 2.6847763322736713e-05, |
| "loss": 0.0213, |
| "step": 14980 |
| }, |
| { |
| "epoch": 0.6374383398537167, |
| "grad_norm": 0.3826221823692322, |
| "learning_rate": 2.684269116714564e-05, |
| "loss": 0.0195, |
| "step": 14990 |
| }, |
| { |
| "epoch": 0.6378635822418779, |
| "grad_norm": 0.4413127303123474, |
| "learning_rate": 2.6837615414057677e-05, |
| "loss": 0.0205, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.6382888246300391, |
| "grad_norm": 0.5099561810493469, |
| "learning_rate": 2.6832536065014708e-05, |
| "loss": 0.0225, |
| "step": 15010 |
| }, |
| { |
| "epoch": 0.6387140670182003, |
| "grad_norm": 0.40210336446762085, |
| "learning_rate": 2.68274531215597e-05, |
| "loss": 0.0226, |
| "step": 15020 |
| }, |
| { |
| "epoch": 0.6391393094063617, |
| "grad_norm": 0.3939429521560669, |
| "learning_rate": 2.6822366585236716e-05, |
| "loss": 0.0194, |
| "step": 15030 |
| }, |
| { |
| "epoch": 0.6395645517945229, |
| "grad_norm": 0.33854517340660095, |
| "learning_rate": 2.6817276457590924e-05, |
| "loss": 0.0192, |
| "step": 15040 |
| }, |
| { |
| "epoch": 0.6399897941826841, |
| "grad_norm": 0.3618461787700653, |
| "learning_rate": 2.6812182740168555e-05, |
| "loss": 0.0211, |
| "step": 15050 |
| }, |
| { |
| "epoch": 0.6404150365708454, |
| "grad_norm": 0.35461387038230896, |
| "learning_rate": 2.6807085434516953e-05, |
| "loss": 0.0208, |
| "step": 15060 |
| }, |
| { |
| "epoch": 0.6408402789590066, |
| "grad_norm": 0.4645833373069763, |
| "learning_rate": 2.6801984542184544e-05, |
| "loss": 0.0229, |
| "step": 15070 |
| }, |
| { |
| "epoch": 0.6412655213471679, |
| "grad_norm": 0.40264660120010376, |
| "learning_rate": 2.6796880064720845e-05, |
| "loss": 0.0234, |
| "step": 15080 |
| }, |
| { |
| "epoch": 0.6416907637353292, |
| "grad_norm": 0.3904982805252075, |
| "learning_rate": 2.6791772003676462e-05, |
| "loss": 0.0235, |
| "step": 15090 |
| }, |
| { |
| "epoch": 0.6421160061234904, |
| "grad_norm": 0.4172634780406952, |
| "learning_rate": 2.6786660360603087e-05, |
| "loss": 0.0213, |
| "step": 15100 |
| }, |
| { |
| "epoch": 0.6425412485116516, |
| "grad_norm": 0.5049511790275574, |
| "learning_rate": 2.6781545137053503e-05, |
| "loss": 0.0229, |
| "step": 15110 |
| }, |
| { |
| "epoch": 0.642966490899813, |
| "grad_norm": 0.4754362106323242, |
| "learning_rate": 2.677642633458158e-05, |
| "loss": 0.0193, |
| "step": 15120 |
| }, |
| { |
| "epoch": 0.6433917332879742, |
| "grad_norm": 0.36239519715309143, |
| "learning_rate": 2.6771303954742274e-05, |
| "loss": 0.0206, |
| "step": 15130 |
| }, |
| { |
| "epoch": 0.6438169756761354, |
| "grad_norm": 0.4245750308036804, |
| "learning_rate": 2.6766177999091633e-05, |
| "loss": 0.0214, |
| "step": 15140 |
| }, |
| { |
| "epoch": 0.6442422180642966, |
| "grad_norm": 0.3785336911678314, |
| "learning_rate": 2.676104846918678e-05, |
| "loss": 0.0199, |
| "step": 15150 |
| }, |
| { |
| "epoch": 0.6446674604524579, |
| "grad_norm": 0.4175279140472412, |
| "learning_rate": 2.675591536658594e-05, |
| "loss": 0.0212, |
| "step": 15160 |
| }, |
| { |
| "epoch": 0.6450927028406191, |
| "grad_norm": 0.38691219687461853, |
| "learning_rate": 2.6750778692848405e-05, |
| "loss": 0.0216, |
| "step": 15170 |
| }, |
| { |
| "epoch": 0.6455179452287804, |
| "grad_norm": 0.49059873819351196, |
| "learning_rate": 2.6745638449534562e-05, |
| "loss": 0.0207, |
| "step": 15180 |
| }, |
| { |
| "epoch": 0.6459431876169417, |
| "grad_norm": 0.4424341320991516, |
| "learning_rate": 2.6740494638205888e-05, |
| "loss": 0.0238, |
| "step": 15190 |
| }, |
| { |
| "epoch": 0.6463684300051029, |
| "grad_norm": 0.3581426441669464, |
| "learning_rate": 2.6735347260424928e-05, |
| "loss": 0.0202, |
| "step": 15200 |
| }, |
| { |
| "epoch": 0.6467936723932641, |
| "grad_norm": 0.3555155098438263, |
| "learning_rate": 2.6730196317755326e-05, |
| "loss": 0.0197, |
| "step": 15210 |
| }, |
| { |
| "epoch": 0.6472189147814255, |
| "grad_norm": 0.33948877453804016, |
| "learning_rate": 2.6725041811761805e-05, |
| "loss": 0.0203, |
| "step": 15220 |
| }, |
| { |
| "epoch": 0.6476441571695867, |
| "grad_norm": 0.38188841938972473, |
| "learning_rate": 2.671988374401016e-05, |
| "loss": 0.0211, |
| "step": 15230 |
| }, |
| { |
| "epoch": 0.6480693995577479, |
| "grad_norm": 0.33603012561798096, |
| "learning_rate": 2.671472211606728e-05, |
| "loss": 0.0185, |
| "step": 15240 |
| }, |
| { |
| "epoch": 0.6484946419459092, |
| "grad_norm": 0.5087174773216248, |
| "learning_rate": 2.6709556929501128e-05, |
| "loss": 0.0212, |
| "step": 15250 |
| }, |
| { |
| "epoch": 0.6489198843340704, |
| "grad_norm": 0.3816879987716675, |
| "learning_rate": 2.670438818588076e-05, |
| "loss": 0.0208, |
| "step": 15260 |
| }, |
| { |
| "epoch": 0.6493451267222317, |
| "grad_norm": 0.4196940064430237, |
| "learning_rate": 2.6699215886776287e-05, |
| "loss": 0.0233, |
| "step": 15270 |
| }, |
| { |
| "epoch": 0.6497703691103929, |
| "grad_norm": 0.42934325337409973, |
| "learning_rate": 2.6694040033758933e-05, |
| "loss": 0.0217, |
| "step": 15280 |
| }, |
| { |
| "epoch": 0.6501956114985542, |
| "grad_norm": 0.49663063883781433, |
| "learning_rate": 2.6688860628400982e-05, |
| "loss": 0.0218, |
| "step": 15290 |
| }, |
| { |
| "epoch": 0.6506208538867154, |
| "grad_norm": 0.3750404417514801, |
| "learning_rate": 2.6683677672275797e-05, |
| "loss": 0.0212, |
| "step": 15300 |
| }, |
| { |
| "epoch": 0.6510460962748766, |
| "grad_norm": 0.4539925158023834, |
| "learning_rate": 2.6678491166957825e-05, |
| "loss": 0.0199, |
| "step": 15310 |
| }, |
| { |
| "epoch": 0.651471338663038, |
| "grad_norm": 0.32169193029403687, |
| "learning_rate": 2.6673301114022592e-05, |
| "loss": 0.019, |
| "step": 15320 |
| }, |
| { |
| "epoch": 0.6518965810511992, |
| "grad_norm": 0.5332807302474976, |
| "learning_rate": 2.666810751504669e-05, |
| "loss": 0.022, |
| "step": 15330 |
| }, |
| { |
| "epoch": 0.6523218234393604, |
| "grad_norm": 0.39766547083854675, |
| "learning_rate": 2.6662910371607807e-05, |
| "loss": 0.02, |
| "step": 15340 |
| }, |
| { |
| "epoch": 0.6527470658275217, |
| "grad_norm": 0.32298263907432556, |
| "learning_rate": 2.6657709685284696e-05, |
| "loss": 0.021, |
| "step": 15350 |
| }, |
| { |
| "epoch": 0.653172308215683, |
| "grad_norm": 0.4446607232093811, |
| "learning_rate": 2.6652505457657183e-05, |
| "loss": 0.0213, |
| "step": 15360 |
| }, |
| { |
| "epoch": 0.6535975506038442, |
| "grad_norm": 0.36240512132644653, |
| "learning_rate": 2.664729769030618e-05, |
| "loss": 0.0212, |
| "step": 15370 |
| }, |
| { |
| "epoch": 0.6540227929920055, |
| "grad_norm": 0.31035521626472473, |
| "learning_rate": 2.6642086384813667e-05, |
| "loss": 0.0193, |
| "step": 15380 |
| }, |
| { |
| "epoch": 0.6544480353801667, |
| "grad_norm": 0.3434225618839264, |
| "learning_rate": 2.6636871542762703e-05, |
| "loss": 0.0219, |
| "step": 15390 |
| }, |
| { |
| "epoch": 0.6548732777683279, |
| "grad_norm": 0.37940528988838196, |
| "learning_rate": 2.6631653165737418e-05, |
| "loss": 0.021, |
| "step": 15400 |
| }, |
| { |
| "epoch": 0.6552985201564892, |
| "grad_norm": 0.4367035925388336, |
| "learning_rate": 2.662643125532302e-05, |
| "loss": 0.0175, |
| "step": 15410 |
| }, |
| { |
| "epoch": 0.6557237625446505, |
| "grad_norm": 0.3646489083766937, |
| "learning_rate": 2.6621205813105777e-05, |
| "loss": 0.0235, |
| "step": 15420 |
| }, |
| { |
| "epoch": 0.6561490049328117, |
| "grad_norm": 0.3998994827270508, |
| "learning_rate": 2.6615976840673056e-05, |
| "loss": 0.0204, |
| "step": 15430 |
| }, |
| { |
| "epoch": 0.6565742473209729, |
| "grad_norm": 0.510734498500824, |
| "learning_rate": 2.6610744339613265e-05, |
| "loss": 0.0239, |
| "step": 15440 |
| }, |
| { |
| "epoch": 0.6569994897091342, |
| "grad_norm": 0.40119668841362, |
| "learning_rate": 2.6605508311515916e-05, |
| "loss": 0.0179, |
| "step": 15450 |
| }, |
| { |
| "epoch": 0.6574247320972955, |
| "grad_norm": 0.36183375120162964, |
| "learning_rate": 2.6600268757971566e-05, |
| "loss": 0.0197, |
| "step": 15460 |
| }, |
| { |
| "epoch": 0.6578499744854567, |
| "grad_norm": 0.32286256551742554, |
| "learning_rate": 2.659502568057185e-05, |
| "loss": 0.0177, |
| "step": 15470 |
| }, |
| { |
| "epoch": 0.658275216873618, |
| "grad_norm": 0.3489496111869812, |
| "learning_rate": 2.658977908090949e-05, |
| "loss": 0.0211, |
| "step": 15480 |
| }, |
| { |
| "epoch": 0.6587004592617792, |
| "grad_norm": 0.35360145568847656, |
| "learning_rate": 2.6584528960578245e-05, |
| "loss": 0.0211, |
| "step": 15490 |
| }, |
| { |
| "epoch": 0.6591257016499404, |
| "grad_norm": 0.3279133439064026, |
| "learning_rate": 2.657927532117298e-05, |
| "loss": 0.0224, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.6595509440381018, |
| "grad_norm": 0.5307937860488892, |
| "learning_rate": 2.6574018164289605e-05, |
| "loss": 0.0206, |
| "step": 15510 |
| }, |
| { |
| "epoch": 0.659976186426263, |
| "grad_norm": 0.44229283928871155, |
| "learning_rate": 2.6568757491525103e-05, |
| "loss": 0.0212, |
| "step": 15520 |
| }, |
| { |
| "epoch": 0.6604014288144242, |
| "grad_norm": 0.4387955963611603, |
| "learning_rate": 2.6563493304477534e-05, |
| "loss": 0.0195, |
| "step": 15530 |
| }, |
| { |
| "epoch": 0.6608266712025854, |
| "grad_norm": 0.35147640109062195, |
| "learning_rate": 2.655822560474601e-05, |
| "loss": 0.0197, |
| "step": 15540 |
| }, |
| { |
| "epoch": 0.6612519135907468, |
| "grad_norm": 0.415713369846344, |
| "learning_rate": 2.655295439393073e-05, |
| "loss": 0.0202, |
| "step": 15550 |
| }, |
| { |
| "epoch": 0.661677155978908, |
| "grad_norm": 0.4200793504714966, |
| "learning_rate": 2.654767967363294e-05, |
| "loss": 0.0226, |
| "step": 15560 |
| }, |
| { |
| "epoch": 0.6621023983670692, |
| "grad_norm": 0.42018580436706543, |
| "learning_rate": 2.6542401445454967e-05, |
| "loss": 0.0215, |
| "step": 15570 |
| }, |
| { |
| "epoch": 0.6625276407552305, |
| "grad_norm": 0.4219094514846802, |
| "learning_rate": 2.6537119711000187e-05, |
| "loss": 0.0233, |
| "step": 15580 |
| }, |
| { |
| "epoch": 0.6629528831433917, |
| "grad_norm": 0.5176847577095032, |
| "learning_rate": 2.6531834471873068e-05, |
| "loss": 0.0192, |
| "step": 15590 |
| }, |
| { |
| "epoch": 0.663378125531553, |
| "grad_norm": 0.3487168550491333, |
| "learning_rate": 2.6526545729679115e-05, |
| "loss": 0.0204, |
| "step": 15600 |
| }, |
| { |
| "epoch": 0.6638033679197143, |
| "grad_norm": 0.4768315851688385, |
| "learning_rate": 2.6521253486024908e-05, |
| "loss": 0.0217, |
| "step": 15610 |
| }, |
| { |
| "epoch": 0.6642286103078755, |
| "grad_norm": 0.3240163326263428, |
| "learning_rate": 2.651595774251809e-05, |
| "loss": 0.0188, |
| "step": 15620 |
| }, |
| { |
| "epoch": 0.6646538526960367, |
| "grad_norm": 0.3336798846721649, |
| "learning_rate": 2.6510658500767378e-05, |
| "loss": 0.0191, |
| "step": 15630 |
| }, |
| { |
| "epoch": 0.665079095084198, |
| "grad_norm": 0.46676674485206604, |
| "learning_rate": 2.650535576238253e-05, |
| "loss": 0.0198, |
| "step": 15640 |
| }, |
| { |
| "epoch": 0.6655043374723593, |
| "grad_norm": 0.4120013117790222, |
| "learning_rate": 2.650004952897438e-05, |
| "loss": 0.0204, |
| "step": 15650 |
| }, |
| { |
| "epoch": 0.6659295798605205, |
| "grad_norm": 0.36168473958969116, |
| "learning_rate": 2.649473980215483e-05, |
| "loss": 0.02, |
| "step": 15660 |
| }, |
| { |
| "epoch": 0.6663548222486817, |
| "grad_norm": 0.34925559163093567, |
| "learning_rate": 2.648942658353683e-05, |
| "loss": 0.0192, |
| "step": 15670 |
| }, |
| { |
| "epoch": 0.666780064636843, |
| "grad_norm": 0.31256768107414246, |
| "learning_rate": 2.648410987473439e-05, |
| "loss": 0.0186, |
| "step": 15680 |
| }, |
| { |
| "epoch": 0.6672053070250042, |
| "grad_norm": 0.37509337067604065, |
| "learning_rate": 2.6478789677362596e-05, |
| "loss": 0.019, |
| "step": 15690 |
| }, |
| { |
| "epoch": 0.6676305494131655, |
| "grad_norm": 0.29366612434387207, |
| "learning_rate": 2.6473465993037572e-05, |
| "loss": 0.0191, |
| "step": 15700 |
| }, |
| { |
| "epoch": 0.6680557918013268, |
| "grad_norm": 0.3554866909980774, |
| "learning_rate": 2.6468138823376522e-05, |
| "loss": 0.0207, |
| "step": 15710 |
| }, |
| { |
| "epoch": 0.668481034189488, |
| "grad_norm": 0.39348745346069336, |
| "learning_rate": 2.6462808169997695e-05, |
| "loss": 0.0205, |
| "step": 15720 |
| }, |
| { |
| "epoch": 0.6689062765776492, |
| "grad_norm": 0.2861650288105011, |
| "learning_rate": 2.6457474034520402e-05, |
| "loss": 0.0203, |
| "step": 15730 |
| }, |
| { |
| "epoch": 0.6693315189658106, |
| "grad_norm": 0.4072084426879883, |
| "learning_rate": 2.6452136418565016e-05, |
| "loss": 0.0187, |
| "step": 15740 |
| }, |
| { |
| "epoch": 0.6697567613539718, |
| "grad_norm": 0.3620724678039551, |
| "learning_rate": 2.6446795323752962e-05, |
| "loss": 0.0216, |
| "step": 15750 |
| }, |
| { |
| "epoch": 0.670182003742133, |
| "grad_norm": 0.3886725902557373, |
| "learning_rate": 2.6441450751706725e-05, |
| "loss": 0.0194, |
| "step": 15760 |
| }, |
| { |
| "epoch": 0.6706072461302943, |
| "grad_norm": 0.41271156072616577, |
| "learning_rate": 2.6436102704049843e-05, |
| "loss": 0.021, |
| "step": 15770 |
| }, |
| { |
| "epoch": 0.6710324885184555, |
| "grad_norm": 0.4042252004146576, |
| "learning_rate": 2.6430751182406913e-05, |
| "loss": 0.0205, |
| "step": 15780 |
| }, |
| { |
| "epoch": 0.6714577309066168, |
| "grad_norm": 0.4653618335723877, |
| "learning_rate": 2.6425396188403585e-05, |
| "loss": 0.0229, |
| "step": 15790 |
| }, |
| { |
| "epoch": 0.671882973294778, |
| "grad_norm": 0.5367754697799683, |
| "learning_rate": 2.6420037723666565e-05, |
| "loss": 0.0233, |
| "step": 15800 |
| }, |
| { |
| "epoch": 0.6723082156829393, |
| "grad_norm": 0.4355956017971039, |
| "learning_rate": 2.641467578982361e-05, |
| "loss": 0.0197, |
| "step": 15810 |
| }, |
| { |
| "epoch": 0.6727334580711005, |
| "grad_norm": 0.4350462853908539, |
| "learning_rate": 2.6409310388503542e-05, |
| "loss": 0.0198, |
| "step": 15820 |
| }, |
| { |
| "epoch": 0.6731587004592617, |
| "grad_norm": 0.3870401680469513, |
| "learning_rate": 2.640394152133622e-05, |
| "loss": 0.0206, |
| "step": 15830 |
| }, |
| { |
| "epoch": 0.6735839428474231, |
| "grad_norm": 0.3935236632823944, |
| "learning_rate": 2.6398569189952573e-05, |
| "loss": 0.0194, |
| "step": 15840 |
| }, |
| { |
| "epoch": 0.6740091852355843, |
| "grad_norm": 0.4127284288406372, |
| "learning_rate": 2.6393193395984563e-05, |
| "loss": 0.02, |
| "step": 15850 |
| }, |
| { |
| "epoch": 0.6744344276237455, |
| "grad_norm": 0.4417366683483124, |
| "learning_rate": 2.6387814141065216e-05, |
| "loss": 0.0222, |
| "step": 15860 |
| }, |
| { |
| "epoch": 0.6748596700119068, |
| "grad_norm": 0.4162922203540802, |
| "learning_rate": 2.6382431426828615e-05, |
| "loss": 0.0185, |
| "step": 15870 |
| }, |
| { |
| "epoch": 0.675284912400068, |
| "grad_norm": 0.41200003027915955, |
| "learning_rate": 2.6377045254909885e-05, |
| "loss": 0.0207, |
| "step": 15880 |
| }, |
| { |
| "epoch": 0.6757101547882293, |
| "grad_norm": 0.4270523488521576, |
| "learning_rate": 2.6371655626945196e-05, |
| "loss": 0.0221, |
| "step": 15890 |
| }, |
| { |
| "epoch": 0.6761353971763906, |
| "grad_norm": 0.41679078340530396, |
| "learning_rate": 2.6366262544571778e-05, |
| "loss": 0.0198, |
| "step": 15900 |
| }, |
| { |
| "epoch": 0.6765606395645518, |
| "grad_norm": 0.4529988765716553, |
| "learning_rate": 2.636086600942791e-05, |
| "loss": 0.0191, |
| "step": 15910 |
| }, |
| { |
| "epoch": 0.676985881952713, |
| "grad_norm": 0.45074957609176636, |
| "learning_rate": 2.6355466023152913e-05, |
| "loss": 0.021, |
| "step": 15920 |
| }, |
| { |
| "epoch": 0.6774111243408742, |
| "grad_norm": 0.39274510741233826, |
| "learning_rate": 2.6350062587387164e-05, |
| "loss": 0.02, |
| "step": 15930 |
| }, |
| { |
| "epoch": 0.6778363667290356, |
| "grad_norm": 0.36425289511680603, |
| "learning_rate": 2.634465570377208e-05, |
| "loss": 0.0192, |
| "step": 15940 |
| }, |
| { |
| "epoch": 0.6782616091171968, |
| "grad_norm": 0.3857857882976532, |
| "learning_rate": 2.633924537395013e-05, |
| "loss": 0.02, |
| "step": 15950 |
| }, |
| { |
| "epoch": 0.678686851505358, |
| "grad_norm": 0.3799647092819214, |
| "learning_rate": 2.6333831599564835e-05, |
| "loss": 0.0183, |
| "step": 15960 |
| }, |
| { |
| "epoch": 0.6791120938935193, |
| "grad_norm": 0.42903515696525574, |
| "learning_rate": 2.632841438226075e-05, |
| "loss": 0.0221, |
| "step": 15970 |
| }, |
| { |
| "epoch": 0.6795373362816806, |
| "grad_norm": 0.5024873614311218, |
| "learning_rate": 2.6322993723683486e-05, |
| "loss": 0.021, |
| "step": 15980 |
| }, |
| { |
| "epoch": 0.6799625786698418, |
| "grad_norm": 0.435470849275589, |
| "learning_rate": 2.6317569625479695e-05, |
| "loss": 0.0208, |
| "step": 15990 |
| }, |
| { |
| "epoch": 0.6803878210580031, |
| "grad_norm": 0.43367549777030945, |
| "learning_rate": 2.6312142089297076e-05, |
| "loss": 0.0206, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.6808130634461643, |
| "grad_norm": 0.38560950756073, |
| "learning_rate": 2.6306711116784366e-05, |
| "loss": 0.0201, |
| "step": 16010 |
| }, |
| { |
| "epoch": 0.6812383058343255, |
| "grad_norm": 0.4164743423461914, |
| "learning_rate": 2.6301276709591358e-05, |
| "loss": 0.0193, |
| "step": 16020 |
| }, |
| { |
| "epoch": 0.6816635482224869, |
| "grad_norm": 0.36647531390190125, |
| "learning_rate": 2.6295838869368878e-05, |
| "loss": 0.0191, |
| "step": 16030 |
| }, |
| { |
| "epoch": 0.6820887906106481, |
| "grad_norm": 0.34206658601760864, |
| "learning_rate": 2.6290397597768803e-05, |
| "loss": 0.0189, |
| "step": 16040 |
| }, |
| { |
| "epoch": 0.6825140329988093, |
| "grad_norm": 0.40263527631759644, |
| "learning_rate": 2.6284952896444037e-05, |
| "loss": 0.0187, |
| "step": 16050 |
| }, |
| { |
| "epoch": 0.6829392753869705, |
| "grad_norm": 0.4216151237487793, |
| "learning_rate": 2.6279504767048547e-05, |
| "loss": 0.0211, |
| "step": 16060 |
| }, |
| { |
| "epoch": 0.6833645177751319, |
| "grad_norm": 0.41514572501182556, |
| "learning_rate": 2.6274053211237323e-05, |
| "loss": 0.0182, |
| "step": 16070 |
| }, |
| { |
| "epoch": 0.6837897601632931, |
| "grad_norm": 0.3836633861064911, |
| "learning_rate": 2.626859823066641e-05, |
| "loss": 0.0196, |
| "step": 16080 |
| }, |
| { |
| "epoch": 0.6842150025514543, |
| "grad_norm": 0.39318519830703735, |
| "learning_rate": 2.6263139826992886e-05, |
| "loss": 0.0237, |
| "step": 16090 |
| }, |
| { |
| "epoch": 0.6846402449396156, |
| "grad_norm": 0.42578408122062683, |
| "learning_rate": 2.625767800187487e-05, |
| "loss": 0.0172, |
| "step": 16100 |
| }, |
| { |
| "epoch": 0.6850654873277768, |
| "grad_norm": 0.3923208713531494, |
| "learning_rate": 2.6252212756971514e-05, |
| "loss": 0.0187, |
| "step": 16110 |
| }, |
| { |
| "epoch": 0.685490729715938, |
| "grad_norm": 0.3883177936077118, |
| "learning_rate": 2.6246744093943022e-05, |
| "loss": 0.0202, |
| "step": 16120 |
| }, |
| { |
| "epoch": 0.6859159721040994, |
| "grad_norm": 0.42425307631492615, |
| "learning_rate": 2.6241272014450625e-05, |
| "loss": 0.0195, |
| "step": 16130 |
| }, |
| { |
| "epoch": 0.6863412144922606, |
| "grad_norm": 0.3335045576095581, |
| "learning_rate": 2.6235796520156603e-05, |
| "loss": 0.0203, |
| "step": 16140 |
| }, |
| { |
| "epoch": 0.6867664568804218, |
| "grad_norm": 0.3852084279060364, |
| "learning_rate": 2.623031761272426e-05, |
| "loss": 0.0201, |
| "step": 16150 |
| }, |
| { |
| "epoch": 0.6871916992685831, |
| "grad_norm": 0.34301063418388367, |
| "learning_rate": 2.6224835293817948e-05, |
| "loss": 0.0189, |
| "step": 16160 |
| }, |
| { |
| "epoch": 0.6876169416567444, |
| "grad_norm": 0.401050329208374, |
| "learning_rate": 2.6219349565103044e-05, |
| "loss": 0.0199, |
| "step": 16170 |
| }, |
| { |
| "epoch": 0.6880421840449056, |
| "grad_norm": 0.44773104786872864, |
| "learning_rate": 2.6213860428245974e-05, |
| "loss": 0.0182, |
| "step": 16180 |
| }, |
| { |
| "epoch": 0.6884674264330668, |
| "grad_norm": 0.37949037551879883, |
| "learning_rate": 2.620836788491419e-05, |
| "loss": 0.0203, |
| "step": 16190 |
| }, |
| { |
| "epoch": 0.6888926688212281, |
| "grad_norm": 0.47246506810188293, |
| "learning_rate": 2.6202871936776183e-05, |
| "loss": 0.0207, |
| "step": 16200 |
| }, |
| { |
| "epoch": 0.6893179112093893, |
| "grad_norm": 0.44393685460090637, |
| "learning_rate": 2.6197372585501477e-05, |
| "loss": 0.0186, |
| "step": 16210 |
| }, |
| { |
| "epoch": 0.6897431535975506, |
| "grad_norm": 0.42799538373947144, |
| "learning_rate": 2.6191869832760626e-05, |
| "loss": 0.0196, |
| "step": 16220 |
| }, |
| { |
| "epoch": 0.6901683959857119, |
| "grad_norm": 0.39829087257385254, |
| "learning_rate": 2.618636368022523e-05, |
| "loss": 0.02, |
| "step": 16230 |
| }, |
| { |
| "epoch": 0.6905936383738731, |
| "grad_norm": 0.39561641216278076, |
| "learning_rate": 2.6180854129567902e-05, |
| "loss": 0.0178, |
| "step": 16240 |
| }, |
| { |
| "epoch": 0.6910188807620343, |
| "grad_norm": 0.4070194661617279, |
| "learning_rate": 2.6175341182462303e-05, |
| "loss": 0.0207, |
| "step": 16250 |
| }, |
| { |
| "epoch": 0.6914441231501957, |
| "grad_norm": 0.39801710844039917, |
| "learning_rate": 2.6169824840583124e-05, |
| "loss": 0.0185, |
| "step": 16260 |
| }, |
| { |
| "epoch": 0.6918693655383569, |
| "grad_norm": 0.3587666153907776, |
| "learning_rate": 2.6164305105606076e-05, |
| "loss": 0.0209, |
| "step": 16270 |
| }, |
| { |
| "epoch": 0.6922946079265181, |
| "grad_norm": 0.3414198160171509, |
| "learning_rate": 2.6158781979207918e-05, |
| "loss": 0.0196, |
| "step": 16280 |
| }, |
| { |
| "epoch": 0.6927198503146794, |
| "grad_norm": 0.372044175863266, |
| "learning_rate": 2.615325546306642e-05, |
| "loss": 0.0233, |
| "step": 16290 |
| }, |
| { |
| "epoch": 0.6931450927028406, |
| "grad_norm": 0.330537348985672, |
| "learning_rate": 2.61477255588604e-05, |
| "loss": 0.0213, |
| "step": 16300 |
| }, |
| { |
| "epoch": 0.6935703350910019, |
| "grad_norm": 0.3763916492462158, |
| "learning_rate": 2.614219226826969e-05, |
| "loss": 0.0196, |
| "step": 16310 |
| }, |
| { |
| "epoch": 0.6939955774791631, |
| "grad_norm": 0.4111920893192291, |
| "learning_rate": 2.6136655592975166e-05, |
| "loss": 0.0207, |
| "step": 16320 |
| }, |
| { |
| "epoch": 0.6944208198673244, |
| "grad_norm": 0.38545480370521545, |
| "learning_rate": 2.613111553465872e-05, |
| "loss": 0.0215, |
| "step": 16330 |
| }, |
| { |
| "epoch": 0.6948460622554856, |
| "grad_norm": 0.43266260623931885, |
| "learning_rate": 2.6125572095003276e-05, |
| "loss": 0.0191, |
| "step": 16340 |
| }, |
| { |
| "epoch": 0.6952713046436468, |
| "grad_norm": 0.4159199297428131, |
| "learning_rate": 2.6120025275692777e-05, |
| "loss": 0.0183, |
| "step": 16350 |
| }, |
| { |
| "epoch": 0.6956965470318082, |
| "grad_norm": 0.3651190996170044, |
| "learning_rate": 2.6114475078412212e-05, |
| "loss": 0.02, |
| "step": 16360 |
| }, |
| { |
| "epoch": 0.6961217894199694, |
| "grad_norm": 0.32506754994392395, |
| "learning_rate": 2.6108921504847575e-05, |
| "loss": 0.0177, |
| "step": 16370 |
| }, |
| { |
| "epoch": 0.6965470318081306, |
| "grad_norm": 0.413657009601593, |
| "learning_rate": 2.6103364556685902e-05, |
| "loss": 0.018, |
| "step": 16380 |
| }, |
| { |
| "epoch": 0.6969722741962919, |
| "grad_norm": 0.36810994148254395, |
| "learning_rate": 2.6097804235615242e-05, |
| "loss": 0.0208, |
| "step": 16390 |
| }, |
| { |
| "epoch": 0.6973975165844531, |
| "grad_norm": 0.5319944024085999, |
| "learning_rate": 2.6092240543324676e-05, |
| "loss": 0.02, |
| "step": 16400 |
| }, |
| { |
| "epoch": 0.6978227589726144, |
| "grad_norm": 0.3020700216293335, |
| "learning_rate": 2.6086673481504303e-05, |
| "loss": 0.0184, |
| "step": 16410 |
| }, |
| { |
| "epoch": 0.6982480013607757, |
| "grad_norm": 0.3940604031085968, |
| "learning_rate": 2.608110305184526e-05, |
| "loss": 0.0191, |
| "step": 16420 |
| }, |
| { |
| "epoch": 0.6986732437489369, |
| "grad_norm": 0.2802399694919586, |
| "learning_rate": 2.6075529256039687e-05, |
| "loss": 0.0207, |
| "step": 16430 |
| }, |
| { |
| "epoch": 0.6990984861370981, |
| "grad_norm": 0.29645195603370667, |
| "learning_rate": 2.6069952095780756e-05, |
| "loss": 0.0178, |
| "step": 16440 |
| }, |
| { |
| "epoch": 0.6995237285252593, |
| "grad_norm": 0.2838857173919678, |
| "learning_rate": 2.6064371572762667e-05, |
| "loss": 0.0193, |
| "step": 16450 |
| }, |
| { |
| "epoch": 0.6999489709134207, |
| "grad_norm": 0.38183513283729553, |
| "learning_rate": 2.605878768868063e-05, |
| "loss": 0.0178, |
| "step": 16460 |
| }, |
| { |
| "epoch": 0.7003742133015819, |
| "grad_norm": 0.4492323696613312, |
| "learning_rate": 2.605320044523088e-05, |
| "loss": 0.018, |
| "step": 16470 |
| }, |
| { |
| "epoch": 0.7007994556897431, |
| "grad_norm": 0.5879418849945068, |
| "learning_rate": 2.6047609844110683e-05, |
| "loss": 0.0213, |
| "step": 16480 |
| }, |
| { |
| "epoch": 0.7012246980779044, |
| "grad_norm": 0.5019773840904236, |
| "learning_rate": 2.604201588701831e-05, |
| "loss": 0.0232, |
| "step": 16490 |
| }, |
| { |
| "epoch": 0.7016499404660657, |
| "grad_norm": 0.3990536034107208, |
| "learning_rate": 2.6036418575653057e-05, |
| "loss": 0.0175, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.7020751828542269, |
| "grad_norm": 0.4245409369468689, |
| "learning_rate": 2.603081791171524e-05, |
| "loss": 0.0191, |
| "step": 16510 |
| }, |
| { |
| "epoch": 0.7025004252423882, |
| "grad_norm": 0.456036239862442, |
| "learning_rate": 2.602521389690619e-05, |
| "loss": 0.0181, |
| "step": 16520 |
| }, |
| { |
| "epoch": 0.7029256676305494, |
| "grad_norm": 0.47955575585365295, |
| "learning_rate": 2.601960653292827e-05, |
| "loss": 0.0184, |
| "step": 16530 |
| }, |
| { |
| "epoch": 0.7033509100187106, |
| "grad_norm": 0.38320642709732056, |
| "learning_rate": 2.6013995821484833e-05, |
| "loss": 0.019, |
| "step": 16540 |
| }, |
| { |
| "epoch": 0.703776152406872, |
| "grad_norm": 0.3216317594051361, |
| "learning_rate": 2.600838176428028e-05, |
| "loss": 0.0189, |
| "step": 16550 |
| }, |
| { |
| "epoch": 0.7042013947950332, |
| "grad_norm": 0.3859560489654541, |
| "learning_rate": 2.6002764363020004e-05, |
| "loss": 0.0177, |
| "step": 16560 |
| }, |
| { |
| "epoch": 0.7046266371831944, |
| "grad_norm": 0.5185313820838928, |
| "learning_rate": 2.5997143619410427e-05, |
| "loss": 0.0192, |
| "step": 16570 |
| }, |
| { |
| "epoch": 0.7050518795713556, |
| "grad_norm": 0.3257139027118683, |
| "learning_rate": 2.5991519535158976e-05, |
| "loss": 0.0188, |
| "step": 16580 |
| }, |
| { |
| "epoch": 0.705477121959517, |
| "grad_norm": 0.3514297902584076, |
| "learning_rate": 2.598589211197411e-05, |
| "loss": 0.0199, |
| "step": 16590 |
| }, |
| { |
| "epoch": 0.7059023643476782, |
| "grad_norm": 0.33370789885520935, |
| "learning_rate": 2.598026135156528e-05, |
| "loss": 0.0204, |
| "step": 16600 |
| }, |
| { |
| "epoch": 0.7063276067358394, |
| "grad_norm": 0.41825467348098755, |
| "learning_rate": 2.5974627255642974e-05, |
| "loss": 0.0185, |
| "step": 16610 |
| }, |
| { |
| "epoch": 0.7067528491240007, |
| "grad_norm": 0.34714779257774353, |
| "learning_rate": 2.5968989825918674e-05, |
| "loss": 0.0184, |
| "step": 16620 |
| }, |
| { |
| "epoch": 0.7071780915121619, |
| "grad_norm": 0.3500387370586395, |
| "learning_rate": 2.5963349064104887e-05, |
| "loss": 0.0197, |
| "step": 16630 |
| }, |
| { |
| "epoch": 0.7076033339003232, |
| "grad_norm": 0.5644680261611938, |
| "learning_rate": 2.5957704971915116e-05, |
| "loss": 0.0209, |
| "step": 16640 |
| }, |
| { |
| "epoch": 0.7080285762884845, |
| "grad_norm": 0.46666523814201355, |
| "learning_rate": 2.5952057551063905e-05, |
| "loss": 0.0193, |
| "step": 16650 |
| }, |
| { |
| "epoch": 0.7084538186766457, |
| "grad_norm": 0.41835102438926697, |
| "learning_rate": 2.594640680326678e-05, |
| "loss": 0.0192, |
| "step": 16660 |
| }, |
| { |
| "epoch": 0.7088790610648069, |
| "grad_norm": 0.5231054425239563, |
| "learning_rate": 2.594075273024029e-05, |
| "loss": 0.02, |
| "step": 16670 |
| }, |
| { |
| "epoch": 0.7093043034529682, |
| "grad_norm": 0.4124384820461273, |
| "learning_rate": 2.5935095333701994e-05, |
| "loss": 0.0204, |
| "step": 16680 |
| }, |
| { |
| "epoch": 0.7097295458411295, |
| "grad_norm": 0.297749787569046, |
| "learning_rate": 2.5929434615370462e-05, |
| "loss": 0.0235, |
| "step": 16690 |
| }, |
| { |
| "epoch": 0.7101547882292907, |
| "grad_norm": 0.3325209319591522, |
| "learning_rate": 2.592377057696527e-05, |
| "loss": 0.0191, |
| "step": 16700 |
| }, |
| { |
| "epoch": 0.7105800306174519, |
| "grad_norm": 0.3475107252597809, |
| "learning_rate": 2.5918103220206997e-05, |
| "loss": 0.0167, |
| "step": 16710 |
| }, |
| { |
| "epoch": 0.7110052730056132, |
| "grad_norm": 0.36572086811065674, |
| "learning_rate": 2.5912432546817247e-05, |
| "loss": 0.0183, |
| "step": 16720 |
| }, |
| { |
| "epoch": 0.7114305153937744, |
| "grad_norm": 0.3658443093299866, |
| "learning_rate": 2.5906758558518613e-05, |
| "loss": 0.0183, |
| "step": 16730 |
| }, |
| { |
| "epoch": 0.7118557577819357, |
| "grad_norm": 0.5162744522094727, |
| "learning_rate": 2.5901081257034706e-05, |
| "loss": 0.0178, |
| "step": 16740 |
| }, |
| { |
| "epoch": 0.712281000170097, |
| "grad_norm": 0.43483680486679077, |
| "learning_rate": 2.5895400644090138e-05, |
| "loss": 0.0207, |
| "step": 16750 |
| }, |
| { |
| "epoch": 0.7127062425582582, |
| "grad_norm": 0.43120190501213074, |
| "learning_rate": 2.5889716721410535e-05, |
| "loss": 0.0179, |
| "step": 16760 |
| }, |
| { |
| "epoch": 0.7131314849464194, |
| "grad_norm": 0.49665582180023193, |
| "learning_rate": 2.5884029490722515e-05, |
| "loss": 0.0196, |
| "step": 16770 |
| }, |
| { |
| "epoch": 0.7135567273345808, |
| "grad_norm": 0.555846095085144, |
| "learning_rate": 2.587833895375371e-05, |
| "loss": 0.0186, |
| "step": 16780 |
| }, |
| { |
| "epoch": 0.713981969722742, |
| "grad_norm": 0.31895700097084045, |
| "learning_rate": 2.587264511223276e-05, |
| "loss": 0.0183, |
| "step": 16790 |
| }, |
| { |
| "epoch": 0.7144072121109032, |
| "grad_norm": 0.3290864825248718, |
| "learning_rate": 2.5866947967889295e-05, |
| "loss": 0.0214, |
| "step": 16800 |
| }, |
| { |
| "epoch": 0.7148324544990645, |
| "grad_norm": 0.4324344992637634, |
| "learning_rate": 2.586124752245397e-05, |
| "loss": 0.02, |
| "step": 16810 |
| }, |
| { |
| "epoch": 0.7152576968872257, |
| "grad_norm": 0.4053352475166321, |
| "learning_rate": 2.585554377765842e-05, |
| "loss": 0.0214, |
| "step": 16820 |
| }, |
| { |
| "epoch": 0.715682939275387, |
| "grad_norm": 0.4539511501789093, |
| "learning_rate": 2.584983673523529e-05, |
| "loss": 0.0215, |
| "step": 16830 |
| }, |
| { |
| "epoch": 0.7161081816635482, |
| "grad_norm": 0.4536151885986328, |
| "learning_rate": 2.5844126396918236e-05, |
| "loss": 0.0183, |
| "step": 16840 |
| }, |
| { |
| "epoch": 0.7165334240517095, |
| "grad_norm": 0.4749014973640442, |
| "learning_rate": 2.5838412764441905e-05, |
| "loss": 0.0199, |
| "step": 16850 |
| }, |
| { |
| "epoch": 0.7169586664398707, |
| "grad_norm": 0.3392566442489624, |
| "learning_rate": 2.583269583954195e-05, |
| "loss": 0.0193, |
| "step": 16860 |
| }, |
| { |
| "epoch": 0.7173839088280319, |
| "grad_norm": 0.26803597807884216, |
| "learning_rate": 2.5826975623955017e-05, |
| "loss": 0.0187, |
| "step": 16870 |
| }, |
| { |
| "epoch": 0.7178091512161933, |
| "grad_norm": 0.37092941999435425, |
| "learning_rate": 2.5821252119418762e-05, |
| "loss": 0.0177, |
| "step": 16880 |
| }, |
| { |
| "epoch": 0.7182343936043545, |
| "grad_norm": 0.31138113141059875, |
| "learning_rate": 2.581552532767183e-05, |
| "loss": 0.0188, |
| "step": 16890 |
| }, |
| { |
| "epoch": 0.7186596359925157, |
| "grad_norm": 0.3887597322463989, |
| "learning_rate": 2.580979525045387e-05, |
| "loss": 0.0223, |
| "step": 16900 |
| }, |
| { |
| "epoch": 0.719084878380677, |
| "grad_norm": 0.3579878807067871, |
| "learning_rate": 2.580406188950553e-05, |
| "loss": 0.0181, |
| "step": 16910 |
| }, |
| { |
| "epoch": 0.7195101207688382, |
| "grad_norm": 0.3246123194694519, |
| "learning_rate": 2.5798325246568453e-05, |
| "loss": 0.0187, |
| "step": 16920 |
| }, |
| { |
| "epoch": 0.7199353631569995, |
| "grad_norm": 0.4029758870601654, |
| "learning_rate": 2.579258532338528e-05, |
| "loss": 0.019, |
| "step": 16930 |
| }, |
| { |
| "epoch": 0.7203606055451608, |
| "grad_norm": 0.3865378499031067, |
| "learning_rate": 2.5786842121699644e-05, |
| "loss": 0.0178, |
| "step": 16940 |
| }, |
| { |
| "epoch": 0.720785847933322, |
| "grad_norm": 0.3467353582382202, |
| "learning_rate": 2.5781095643256186e-05, |
| "loss": 0.0196, |
| "step": 16950 |
| }, |
| { |
| "epoch": 0.7212110903214832, |
| "grad_norm": 0.42222702503204346, |
| "learning_rate": 2.577534588980053e-05, |
| "loss": 0.0193, |
| "step": 16960 |
| }, |
| { |
| "epoch": 0.7216363327096444, |
| "grad_norm": 0.39148077368736267, |
| "learning_rate": 2.5769592863079298e-05, |
| "loss": 0.0176, |
| "step": 16970 |
| }, |
| { |
| "epoch": 0.7220615750978058, |
| "grad_norm": 0.3850536048412323, |
| "learning_rate": 2.576383656484011e-05, |
| "loss": 0.0199, |
| "step": 16980 |
| }, |
| { |
| "epoch": 0.722486817485967, |
| "grad_norm": 0.40393391251564026, |
| "learning_rate": 2.5758076996831572e-05, |
| "loss": 0.0218, |
| "step": 16990 |
| }, |
| { |
| "epoch": 0.7229120598741282, |
| "grad_norm": 0.39605194330215454, |
| "learning_rate": 2.5752314160803295e-05, |
| "loss": 0.02, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.7233373022622895, |
| "grad_norm": 0.39946261048316956, |
| "learning_rate": 2.5746548058505873e-05, |
| "loss": 0.0201, |
| "step": 17010 |
| }, |
| { |
| "epoch": 0.7237625446504508, |
| "grad_norm": 0.5638138651847839, |
| "learning_rate": 2.57407786916909e-05, |
| "loss": 0.021, |
| "step": 17020 |
| }, |
| { |
| "epoch": 0.724187787038612, |
| "grad_norm": 0.36596256494522095, |
| "learning_rate": 2.573500606211095e-05, |
| "loss": 0.0186, |
| "step": 17030 |
| }, |
| { |
| "epoch": 0.7246130294267733, |
| "grad_norm": 0.5323764085769653, |
| "learning_rate": 2.57292301715196e-05, |
| "loss": 0.0202, |
| "step": 17040 |
| }, |
| { |
| "epoch": 0.7250382718149345, |
| "grad_norm": 0.4261687099933624, |
| "learning_rate": 2.572345102167142e-05, |
| "loss": 0.018, |
| "step": 17050 |
| }, |
| { |
| "epoch": 0.7254635142030957, |
| "grad_norm": 0.44146978855133057, |
| "learning_rate": 2.571766861432195e-05, |
| "loss": 0.021, |
| "step": 17060 |
| }, |
| { |
| "epoch": 0.7258887565912571, |
| "grad_norm": 0.36145922541618347, |
| "learning_rate": 2.5711882951227738e-05, |
| "loss": 0.018, |
| "step": 17070 |
| }, |
| { |
| "epoch": 0.7263139989794183, |
| "grad_norm": 0.4039449393749237, |
| "learning_rate": 2.570609403414632e-05, |
| "loss": 0.0198, |
| "step": 17080 |
| }, |
| { |
| "epoch": 0.7267392413675795, |
| "grad_norm": 0.42143484950065613, |
| "learning_rate": 2.5700301864836212e-05, |
| "loss": 0.0195, |
| "step": 17090 |
| }, |
| { |
| "epoch": 0.7271644837557407, |
| "grad_norm": 0.5072124600410461, |
| "learning_rate": 2.569450644505692e-05, |
| "loss": 0.02, |
| "step": 17100 |
| }, |
| { |
| "epoch": 0.727589726143902, |
| "grad_norm": 0.418938547372818, |
| "learning_rate": 2.5688707776568943e-05, |
| "loss": 0.0213, |
| "step": 17110 |
| }, |
| { |
| "epoch": 0.7280149685320633, |
| "grad_norm": 0.3790704607963562, |
| "learning_rate": 2.5682905861133764e-05, |
| "loss": 0.0189, |
| "step": 17120 |
| }, |
| { |
| "epoch": 0.7284402109202245, |
| "grad_norm": 0.42760220170021057, |
| "learning_rate": 2.567710070051385e-05, |
| "loss": 0.0186, |
| "step": 17130 |
| }, |
| { |
| "epoch": 0.7288654533083858, |
| "grad_norm": 0.4262620210647583, |
| "learning_rate": 2.5671292296472652e-05, |
| "loss": 0.0205, |
| "step": 17140 |
| }, |
| { |
| "epoch": 0.729290695696547, |
| "grad_norm": 0.35412681102752686, |
| "learning_rate": 2.5665480650774616e-05, |
| "loss": 0.0194, |
| "step": 17150 |
| }, |
| { |
| "epoch": 0.7297159380847082, |
| "grad_norm": 0.461956262588501, |
| "learning_rate": 2.565966576518517e-05, |
| "loss": 0.0216, |
| "step": 17160 |
| }, |
| { |
| "epoch": 0.7301411804728696, |
| "grad_norm": 0.4576774537563324, |
| "learning_rate": 2.5653847641470706e-05, |
| "loss": 0.0194, |
| "step": 17170 |
| }, |
| { |
| "epoch": 0.7305664228610308, |
| "grad_norm": 0.4432138502597809, |
| "learning_rate": 2.564802628139863e-05, |
| "loss": 0.021, |
| "step": 17180 |
| }, |
| { |
| "epoch": 0.730991665249192, |
| "grad_norm": 0.343555212020874, |
| "learning_rate": 2.5642201686737318e-05, |
| "loss": 0.0201, |
| "step": 17190 |
| }, |
| { |
| "epoch": 0.7314169076373533, |
| "grad_norm": 0.49047133326530457, |
| "learning_rate": 2.563637385925612e-05, |
| "loss": 0.0186, |
| "step": 17200 |
| }, |
| { |
| "epoch": 0.7318421500255146, |
| "grad_norm": 0.40630829334259033, |
| "learning_rate": 2.5630542800725388e-05, |
| "loss": 0.02, |
| "step": 17210 |
| }, |
| { |
| "epoch": 0.7322673924136758, |
| "grad_norm": 0.5432057976722717, |
| "learning_rate": 2.562470851291643e-05, |
| "loss": 0.0218, |
| "step": 17220 |
| }, |
| { |
| "epoch": 0.732692634801837, |
| "grad_norm": 0.4478088617324829, |
| "learning_rate": 2.561887099760156e-05, |
| "loss": 0.019, |
| "step": 17230 |
| }, |
| { |
| "epoch": 0.7331178771899983, |
| "grad_norm": 0.3950343430042267, |
| "learning_rate": 2.5613030256554054e-05, |
| "loss": 0.0187, |
| "step": 17240 |
| }, |
| { |
| "epoch": 0.7335431195781595, |
| "grad_norm": 0.3851512670516968, |
| "learning_rate": 2.5607186291548174e-05, |
| "loss": 0.0194, |
| "step": 17250 |
| }, |
| { |
| "epoch": 0.7339683619663208, |
| "grad_norm": 0.38966938853263855, |
| "learning_rate": 2.5601339104359167e-05, |
| "loss": 0.0194, |
| "step": 17260 |
| }, |
| { |
| "epoch": 0.7343936043544821, |
| "grad_norm": 0.4339113235473633, |
| "learning_rate": 2.559548869676325e-05, |
| "loss": 0.0167, |
| "step": 17270 |
| }, |
| { |
| "epoch": 0.7348188467426433, |
| "grad_norm": 0.5083813667297363, |
| "learning_rate": 2.558963507053763e-05, |
| "loss": 0.0195, |
| "step": 17280 |
| }, |
| { |
| "epoch": 0.7352440891308045, |
| "grad_norm": 0.37444162368774414, |
| "learning_rate": 2.5583778227460476e-05, |
| "loss": 0.0171, |
| "step": 17290 |
| }, |
| { |
| "epoch": 0.7356693315189659, |
| "grad_norm": 0.3342028856277466, |
| "learning_rate": 2.5577918169310943e-05, |
| "loss": 0.0214, |
| "step": 17300 |
| }, |
| { |
| "epoch": 0.7360945739071271, |
| "grad_norm": 0.38764867186546326, |
| "learning_rate": 2.5572054897869167e-05, |
| "loss": 0.0192, |
| "step": 17310 |
| }, |
| { |
| "epoch": 0.7365198162952883, |
| "grad_norm": 0.457276314496994, |
| "learning_rate": 2.556618841491625e-05, |
| "loss": 0.0195, |
| "step": 17320 |
| }, |
| { |
| "epoch": 0.7369450586834496, |
| "grad_norm": 0.3450474441051483, |
| "learning_rate": 2.5560318722234276e-05, |
| "loss": 0.0195, |
| "step": 17330 |
| }, |
| { |
| "epoch": 0.7373703010716108, |
| "grad_norm": 0.3469228446483612, |
| "learning_rate": 2.5554445821606306e-05, |
| "loss": 0.0169, |
| "step": 17340 |
| }, |
| { |
| "epoch": 0.737795543459772, |
| "grad_norm": 0.45763877034187317, |
| "learning_rate": 2.554856971481637e-05, |
| "loss": 0.0183, |
| "step": 17350 |
| }, |
| { |
| "epoch": 0.7382207858479333, |
| "grad_norm": 0.44012799859046936, |
| "learning_rate": 2.554269040364947e-05, |
| "loss": 0.0173, |
| "step": 17360 |
| }, |
| { |
| "epoch": 0.7386460282360946, |
| "grad_norm": 0.40965408086776733, |
| "learning_rate": 2.553680788989159e-05, |
| "loss": 0.019, |
| "step": 17370 |
| }, |
| { |
| "epoch": 0.7390712706242558, |
| "grad_norm": 0.3658427596092224, |
| "learning_rate": 2.5530922175329677e-05, |
| "loss": 0.0174, |
| "step": 17380 |
| }, |
| { |
| "epoch": 0.739496513012417, |
| "grad_norm": 0.4212437570095062, |
| "learning_rate": 2.5525033261751667e-05, |
| "loss": 0.0184, |
| "step": 17390 |
| }, |
| { |
| "epoch": 0.7399217554005784, |
| "grad_norm": 0.3762251138687134, |
| "learning_rate": 2.5519141150946445e-05, |
| "loss": 0.0224, |
| "step": 17400 |
| }, |
| { |
| "epoch": 0.7403469977887396, |
| "grad_norm": 0.40899696946144104, |
| "learning_rate": 2.551324584470389e-05, |
| "loss": 0.0187, |
| "step": 17410 |
| }, |
| { |
| "epoch": 0.7407722401769008, |
| "grad_norm": 0.3483990728855133, |
| "learning_rate": 2.5507347344814825e-05, |
| "loss": 0.0186, |
| "step": 17420 |
| }, |
| { |
| "epoch": 0.7411974825650621, |
| "grad_norm": 0.3548679053783417, |
| "learning_rate": 2.5501445653071072e-05, |
| "loss": 0.0188, |
| "step": 17430 |
| }, |
| { |
| "epoch": 0.7416227249532233, |
| "grad_norm": 0.3296952247619629, |
| "learning_rate": 2.5495540771265406e-05, |
| "loss": 0.0168, |
| "step": 17440 |
| }, |
| { |
| "epoch": 0.7420479673413846, |
| "grad_norm": 0.3904550075531006, |
| "learning_rate": 2.5489632701191565e-05, |
| "loss": 0.0172, |
| "step": 17450 |
| }, |
| { |
| "epoch": 0.7424732097295459, |
| "grad_norm": 0.4135804772377014, |
| "learning_rate": 2.5483721444644276e-05, |
| "loss": 0.0202, |
| "step": 17460 |
| }, |
| { |
| "epoch": 0.7428984521177071, |
| "grad_norm": 0.44280630350112915, |
| "learning_rate": 2.5477807003419222e-05, |
| "loss": 0.0197, |
| "step": 17470 |
| }, |
| { |
| "epoch": 0.7433236945058683, |
| "grad_norm": 0.394596666097641, |
| "learning_rate": 2.547188937931305e-05, |
| "loss": 0.0182, |
| "step": 17480 |
| }, |
| { |
| "epoch": 0.7437489368940295, |
| "grad_norm": 0.37119102478027344, |
| "learning_rate": 2.5465968574123372e-05, |
| "loss": 0.0191, |
| "step": 17490 |
| }, |
| { |
| "epoch": 0.7441741792821909, |
| "grad_norm": 0.2509319484233856, |
| "learning_rate": 2.5460044589648787e-05, |
| "loss": 0.0172, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.7445994216703521, |
| "grad_norm": 0.3507034480571747, |
| "learning_rate": 2.5454117427688833e-05, |
| "loss": 0.0169, |
| "step": 17510 |
| }, |
| { |
| "epoch": 0.7450246640585133, |
| "grad_norm": 0.36366188526153564, |
| "learning_rate": 2.5448187090044033e-05, |
| "loss": 0.0209, |
| "step": 17520 |
| }, |
| { |
| "epoch": 0.7454499064466746, |
| "grad_norm": 0.37416818737983704, |
| "learning_rate": 2.544225357851586e-05, |
| "loss": 0.021, |
| "step": 17530 |
| }, |
| { |
| "epoch": 0.7458751488348359, |
| "grad_norm": 0.42603954672813416, |
| "learning_rate": 2.543631689490676e-05, |
| "loss": 0.0205, |
| "step": 17540 |
| }, |
| { |
| "epoch": 0.7463003912229971, |
| "grad_norm": 0.3621084988117218, |
| "learning_rate": 2.543037704102015e-05, |
| "loss": 0.0191, |
| "step": 17550 |
| }, |
| { |
| "epoch": 0.7467256336111584, |
| "grad_norm": 0.5240678787231445, |
| "learning_rate": 2.5424434018660386e-05, |
| "loss": 0.0205, |
| "step": 17560 |
| }, |
| { |
| "epoch": 0.7471508759993196, |
| "grad_norm": 0.2871054410934448, |
| "learning_rate": 2.541848782963281e-05, |
| "loss": 0.0217, |
| "step": 17570 |
| }, |
| { |
| "epoch": 0.7475761183874808, |
| "grad_norm": 0.4009586274623871, |
| "learning_rate": 2.5412538475743714e-05, |
| "loss": 0.0198, |
| "step": 17580 |
| }, |
| { |
| "epoch": 0.7480013607756422, |
| "grad_norm": 0.3869740068912506, |
| "learning_rate": 2.540658595880036e-05, |
| "loss": 0.0197, |
| "step": 17590 |
| }, |
| { |
| "epoch": 0.7484266031638034, |
| "grad_norm": 0.35922184586524963, |
| "learning_rate": 2.540063028061096e-05, |
| "loss": 0.0195, |
| "step": 17600 |
| }, |
| { |
| "epoch": 0.7488518455519646, |
| "grad_norm": 0.32907354831695557, |
| "learning_rate": 2.5394671442984692e-05, |
| "loss": 0.0178, |
| "step": 17610 |
| }, |
| { |
| "epoch": 0.7492770879401258, |
| "grad_norm": 0.4469134211540222, |
| "learning_rate": 2.5388709447731696e-05, |
| "loss": 0.0157, |
| "step": 17620 |
| }, |
| { |
| "epoch": 0.7497023303282871, |
| "grad_norm": 0.38923630118370056, |
| "learning_rate": 2.5382744296663068e-05, |
| "loss": 0.0205, |
| "step": 17630 |
| }, |
| { |
| "epoch": 0.7501275727164484, |
| "grad_norm": 0.5383104681968689, |
| "learning_rate": 2.5376775991590865e-05, |
| "loss": 0.0212, |
| "step": 17640 |
| }, |
| { |
| "epoch": 0.7505528151046096, |
| "grad_norm": 0.3489135205745697, |
| "learning_rate": 2.5370804534328097e-05, |
| "loss": 0.0205, |
| "step": 17650 |
| }, |
| { |
| "epoch": 0.7509780574927709, |
| "grad_norm": 0.3815806806087494, |
| "learning_rate": 2.5364829926688736e-05, |
| "loss": 0.0207, |
| "step": 17660 |
| }, |
| { |
| "epoch": 0.7514032998809321, |
| "grad_norm": 0.22493062913417816, |
| "learning_rate": 2.5358852170487713e-05, |
| "loss": 0.0166, |
| "step": 17670 |
| }, |
| { |
| "epoch": 0.7518285422690933, |
| "grad_norm": 0.39472290873527527, |
| "learning_rate": 2.5352871267540906e-05, |
| "loss": 0.0178, |
| "step": 17680 |
| }, |
| { |
| "epoch": 0.7522537846572547, |
| "grad_norm": 0.444985032081604, |
| "learning_rate": 2.534688721966516e-05, |
| "loss": 0.021, |
| "step": 17690 |
| }, |
| { |
| "epoch": 0.7526790270454159, |
| "grad_norm": 0.3079502284526825, |
| "learning_rate": 2.5340900028678276e-05, |
| "loss": 0.0192, |
| "step": 17700 |
| }, |
| { |
| "epoch": 0.7531042694335771, |
| "grad_norm": 0.27846136689186096, |
| "learning_rate": 2.5334909696398998e-05, |
| "loss": 0.0182, |
| "step": 17710 |
| }, |
| { |
| "epoch": 0.7535295118217384, |
| "grad_norm": 0.3500784635543823, |
| "learning_rate": 2.5328916224647027e-05, |
| "loss": 0.0178, |
| "step": 17720 |
| }, |
| { |
| "epoch": 0.7539547542098997, |
| "grad_norm": 0.4569058418273926, |
| "learning_rate": 2.532291961524303e-05, |
| "loss": 0.0185, |
| "step": 17730 |
| }, |
| { |
| "epoch": 0.7543799965980609, |
| "grad_norm": 0.34410810470581055, |
| "learning_rate": 2.531691987000861e-05, |
| "loss": 0.0191, |
| "step": 17740 |
| }, |
| { |
| "epoch": 0.7548052389862221, |
| "grad_norm": 0.38633546233177185, |
| "learning_rate": 2.5310916990766343e-05, |
| "loss": 0.0184, |
| "step": 17750 |
| }, |
| { |
| "epoch": 0.7552304813743834, |
| "grad_norm": 0.37940606474876404, |
| "learning_rate": 2.5304910979339734e-05, |
| "loss": 0.0174, |
| "step": 17760 |
| }, |
| { |
| "epoch": 0.7556557237625446, |
| "grad_norm": 0.3326164484024048, |
| "learning_rate": 2.5298901837553255e-05, |
| "loss": 0.0171, |
| "step": 17770 |
| }, |
| { |
| "epoch": 0.7560809661507059, |
| "grad_norm": 0.33612585067749023, |
| "learning_rate": 2.5292889567232326e-05, |
| "loss": 0.0189, |
| "step": 17780 |
| }, |
| { |
| "epoch": 0.7565062085388672, |
| "grad_norm": 0.3002769649028778, |
| "learning_rate": 2.528687417020331e-05, |
| "loss": 0.0178, |
| "step": 17790 |
| }, |
| { |
| "epoch": 0.7569314509270284, |
| "grad_norm": 0.5658273696899414, |
| "learning_rate": 2.5280855648293536e-05, |
| "loss": 0.0178, |
| "step": 17800 |
| }, |
| { |
| "epoch": 0.7573566933151896, |
| "grad_norm": 0.2587268352508545, |
| "learning_rate": 2.5274834003331266e-05, |
| "loss": 0.019, |
| "step": 17810 |
| }, |
| { |
| "epoch": 0.757781935703351, |
| "grad_norm": 0.3134019374847412, |
| "learning_rate": 2.5268809237145717e-05, |
| "loss": 0.0195, |
| "step": 17820 |
| }, |
| { |
| "epoch": 0.7582071780915122, |
| "grad_norm": 0.34614887833595276, |
| "learning_rate": 2.5262781351567052e-05, |
| "loss": 0.017, |
| "step": 17830 |
| }, |
| { |
| "epoch": 0.7586324204796734, |
| "grad_norm": 0.4725238084793091, |
| "learning_rate": 2.525675034842638e-05, |
| "loss": 0.0207, |
| "step": 17840 |
| }, |
| { |
| "epoch": 0.7590576628678347, |
| "grad_norm": 0.3761941194534302, |
| "learning_rate": 2.5250716229555774e-05, |
| "loss": 0.0185, |
| "step": 17850 |
| }, |
| { |
| "epoch": 0.7594829052559959, |
| "grad_norm": 0.35306262969970703, |
| "learning_rate": 2.524467899678823e-05, |
| "loss": 0.0188, |
| "step": 17860 |
| }, |
| { |
| "epoch": 0.7599081476441572, |
| "grad_norm": 0.4369204342365265, |
| "learning_rate": 2.52386386519577e-05, |
| "loss": 0.0205, |
| "step": 17870 |
| }, |
| { |
| "epoch": 0.7603333900323184, |
| "grad_norm": 0.35953933000564575, |
| "learning_rate": 2.5232595196899086e-05, |
| "loss": 0.0191, |
| "step": 17880 |
| }, |
| { |
| "epoch": 0.7607586324204797, |
| "grad_norm": 0.28581809997558594, |
| "learning_rate": 2.5226548633448224e-05, |
| "loss": 0.0175, |
| "step": 17890 |
| }, |
| { |
| "epoch": 0.7611838748086409, |
| "grad_norm": 0.3392051160335541, |
| "learning_rate": 2.5220498963441906e-05, |
| "loss": 0.0181, |
| "step": 17900 |
| }, |
| { |
| "epoch": 0.7616091171968021, |
| "grad_norm": 0.3997742533683777, |
| "learning_rate": 2.521444618871786e-05, |
| "loss": 0.019, |
| "step": 17910 |
| }, |
| { |
| "epoch": 0.7620343595849635, |
| "grad_norm": 0.5108124613761902, |
| "learning_rate": 2.5208390311114758e-05, |
| "loss": 0.0206, |
| "step": 17920 |
| }, |
| { |
| "epoch": 0.7624596019731247, |
| "grad_norm": 0.37691330909729004, |
| "learning_rate": 2.520233133247221e-05, |
| "loss": 0.0197, |
| "step": 17930 |
| }, |
| { |
| "epoch": 0.7628848443612859, |
| "grad_norm": 0.4904385507106781, |
| "learning_rate": 2.519626925463079e-05, |
| "loss": 0.0171, |
| "step": 17940 |
| }, |
| { |
| "epoch": 0.7633100867494472, |
| "grad_norm": 0.43569111824035645, |
| "learning_rate": 2.519020407943198e-05, |
| "loss": 0.0178, |
| "step": 17950 |
| }, |
| { |
| "epoch": 0.7637353291376084, |
| "grad_norm": 0.4185038208961487, |
| "learning_rate": 2.518413580871823e-05, |
| "loss": 0.0175, |
| "step": 17960 |
| }, |
| { |
| "epoch": 0.7641605715257697, |
| "grad_norm": 0.3519199788570404, |
| "learning_rate": 2.5178064444332922e-05, |
| "loss": 0.0186, |
| "step": 17970 |
| }, |
| { |
| "epoch": 0.764585813913931, |
| "grad_norm": 0.40214109420776367, |
| "learning_rate": 2.5171989988120368e-05, |
| "loss": 0.016, |
| "step": 17980 |
| }, |
| { |
| "epoch": 0.7650110563020922, |
| "grad_norm": 0.46126723289489746, |
| "learning_rate": 2.5165912441925832e-05, |
| "loss": 0.0169, |
| "step": 17990 |
| }, |
| { |
| "epoch": 0.7654362986902534, |
| "grad_norm": 0.30911028385162354, |
| "learning_rate": 2.515983180759551e-05, |
| "loss": 0.0172, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.7658615410784146, |
| "grad_norm": 0.4340876042842865, |
| "learning_rate": 2.515374808697654e-05, |
| "loss": 0.0189, |
| "step": 18010 |
| }, |
| { |
| "epoch": 0.766286783466576, |
| "grad_norm": 0.30016154050827026, |
| "learning_rate": 2.5147661281916996e-05, |
| "loss": 0.017, |
| "step": 18020 |
| }, |
| { |
| "epoch": 0.7667120258547372, |
| "grad_norm": 0.27429714798927307, |
| "learning_rate": 2.5141571394265892e-05, |
| "loss": 0.0183, |
| "step": 18030 |
| }, |
| { |
| "epoch": 0.7671372682428984, |
| "grad_norm": 0.4100990295410156, |
| "learning_rate": 2.513547842587317e-05, |
| "loss": 0.0185, |
| "step": 18040 |
| }, |
| { |
| "epoch": 0.7675625106310597, |
| "grad_norm": 0.38406017422676086, |
| "learning_rate": 2.5129382378589708e-05, |
| "loss": 0.0192, |
| "step": 18050 |
| }, |
| { |
| "epoch": 0.767987753019221, |
| "grad_norm": 0.4918137788772583, |
| "learning_rate": 2.512328325426733e-05, |
| "loss": 0.0169, |
| "step": 18060 |
| }, |
| { |
| "epoch": 0.7684129954073822, |
| "grad_norm": 0.46917903423309326, |
| "learning_rate": 2.5117181054758798e-05, |
| "loss": 0.0191, |
| "step": 18070 |
| }, |
| { |
| "epoch": 0.7688382377955435, |
| "grad_norm": 0.3494175374507904, |
| "learning_rate": 2.5111075781917783e-05, |
| "loss": 0.0184, |
| "step": 18080 |
| }, |
| { |
| "epoch": 0.7692634801837047, |
| "grad_norm": 0.3880108594894409, |
| "learning_rate": 2.510496743759892e-05, |
| "loss": 0.0198, |
| "step": 18090 |
| }, |
| { |
| "epoch": 0.7696887225718659, |
| "grad_norm": 0.28338170051574707, |
| "learning_rate": 2.509885602365775e-05, |
| "loss": 0.0171, |
| "step": 18100 |
| }, |
| { |
| "epoch": 0.7701139649600273, |
| "grad_norm": 0.3353395164012909, |
| "learning_rate": 2.509274154195076e-05, |
| "loss": 0.0183, |
| "step": 18110 |
| }, |
| { |
| "epoch": 0.7705392073481885, |
| "grad_norm": 0.3458663523197174, |
| "learning_rate": 2.5086623994335383e-05, |
| "loss": 0.0172, |
| "step": 18120 |
| }, |
| { |
| "epoch": 0.7709644497363497, |
| "grad_norm": 0.36026453971862793, |
| "learning_rate": 2.5080503382669953e-05, |
| "loss": 0.0199, |
| "step": 18130 |
| }, |
| { |
| "epoch": 0.7713896921245109, |
| "grad_norm": 0.38520294427871704, |
| "learning_rate": 2.507437970881376e-05, |
| "loss": 0.0222, |
| "step": 18140 |
| }, |
| { |
| "epoch": 0.7718149345126722, |
| "grad_norm": 0.4084884822368622, |
| "learning_rate": 2.5068252974627003e-05, |
| "loss": 0.0168, |
| "step": 18150 |
| }, |
| { |
| "epoch": 0.7722401769008335, |
| "grad_norm": 0.3372369408607483, |
| "learning_rate": 2.5062123181970834e-05, |
| "loss": 0.0189, |
| "step": 18160 |
| }, |
| { |
| "epoch": 0.7726654192889947, |
| "grad_norm": 0.36237940192222595, |
| "learning_rate": 2.5055990332707316e-05, |
| "loss": 0.0236, |
| "step": 18170 |
| }, |
| { |
| "epoch": 0.773090661677156, |
| "grad_norm": 0.395637571811676, |
| "learning_rate": 2.5049854428699444e-05, |
| "loss": 0.018, |
| "step": 18180 |
| }, |
| { |
| "epoch": 0.7735159040653172, |
| "grad_norm": 0.3573133051395416, |
| "learning_rate": 2.5043715471811158e-05, |
| "loss": 0.0191, |
| "step": 18190 |
| }, |
| { |
| "epoch": 0.7739411464534784, |
| "grad_norm": 0.48093611001968384, |
| "learning_rate": 2.5037573463907296e-05, |
| "loss": 0.0165, |
| "step": 18200 |
| }, |
| { |
| "epoch": 0.7743663888416398, |
| "grad_norm": 0.3498045802116394, |
| "learning_rate": 2.5031428406853637e-05, |
| "loss": 0.0181, |
| "step": 18210 |
| }, |
| { |
| "epoch": 0.774791631229801, |
| "grad_norm": 0.38417303562164307, |
| "learning_rate": 2.5025280302516897e-05, |
| "loss": 0.0204, |
| "step": 18220 |
| }, |
| { |
| "epoch": 0.7752168736179622, |
| "grad_norm": 0.3889990746974945, |
| "learning_rate": 2.5019129152764698e-05, |
| "loss": 0.0192, |
| "step": 18230 |
| }, |
| { |
| "epoch": 0.7756421160061235, |
| "grad_norm": 0.4350021481513977, |
| "learning_rate": 2.5012974959465612e-05, |
| "loss": 0.0178, |
| "step": 18240 |
| }, |
| { |
| "epoch": 0.7760673583942848, |
| "grad_norm": 0.3341920077800751, |
| "learning_rate": 2.5006817724489105e-05, |
| "loss": 0.0174, |
| "step": 18250 |
| }, |
| { |
| "epoch": 0.776492600782446, |
| "grad_norm": 0.3362509608268738, |
| "learning_rate": 2.500065744970559e-05, |
| "loss": 0.0176, |
| "step": 18260 |
| }, |
| { |
| "epoch": 0.7769178431706072, |
| "grad_norm": 0.3553008735179901, |
| "learning_rate": 2.499449413698639e-05, |
| "loss": 0.0189, |
| "step": 18270 |
| }, |
| { |
| "epoch": 0.7773430855587685, |
| "grad_norm": 0.3581770658493042, |
| "learning_rate": 2.4988327788203764e-05, |
| "loss": 0.0193, |
| "step": 18280 |
| }, |
| { |
| "epoch": 0.7777683279469297, |
| "grad_norm": 0.4561096131801605, |
| "learning_rate": 2.498215840523088e-05, |
| "loss": 0.019, |
| "step": 18290 |
| }, |
| { |
| "epoch": 0.778193570335091, |
| "grad_norm": 0.2999465763568878, |
| "learning_rate": 2.4975985989941837e-05, |
| "loss": 0.0189, |
| "step": 18300 |
| }, |
| { |
| "epoch": 0.7786188127232523, |
| "grad_norm": 0.36827707290649414, |
| "learning_rate": 2.4969810544211652e-05, |
| "loss": 0.0196, |
| "step": 18310 |
| }, |
| { |
| "epoch": 0.7790440551114135, |
| "grad_norm": 0.3937795162200928, |
| "learning_rate": 2.4963632069916258e-05, |
| "loss": 0.0171, |
| "step": 18320 |
| }, |
| { |
| "epoch": 0.7794692974995747, |
| "grad_norm": 0.37284278869628906, |
| "learning_rate": 2.495745056893252e-05, |
| "loss": 0.0204, |
| "step": 18330 |
| }, |
| { |
| "epoch": 0.779894539887736, |
| "grad_norm": 0.33898916840553284, |
| "learning_rate": 2.495126604313821e-05, |
| "loss": 0.0183, |
| "step": 18340 |
| }, |
| { |
| "epoch": 0.7803197822758973, |
| "grad_norm": 0.3953031003475189, |
| "learning_rate": 2.4945078494412023e-05, |
| "loss": 0.0176, |
| "step": 18350 |
| }, |
| { |
| "epoch": 0.7807450246640585, |
| "grad_norm": 0.3483516275882721, |
| "learning_rate": 2.493888792463357e-05, |
| "loss": 0.0173, |
| "step": 18360 |
| }, |
| { |
| "epoch": 0.7811702670522198, |
| "grad_norm": 0.3744226098060608, |
| "learning_rate": 2.4932694335683395e-05, |
| "loss": 0.0173, |
| "step": 18370 |
| }, |
| { |
| "epoch": 0.781595509440381, |
| "grad_norm": 0.3779127299785614, |
| "learning_rate": 2.4926497729442932e-05, |
| "loss": 0.0203, |
| "step": 18380 |
| }, |
| { |
| "epoch": 0.7820207518285422, |
| "grad_norm": 0.3308951258659363, |
| "learning_rate": 2.4920298107794555e-05, |
| "loss": 0.018, |
| "step": 18390 |
| }, |
| { |
| "epoch": 0.7824459942167035, |
| "grad_norm": 0.3206545412540436, |
| "learning_rate": 2.4914095472621544e-05, |
| "loss": 0.0184, |
| "step": 18400 |
| }, |
| { |
| "epoch": 0.7828712366048648, |
| "grad_norm": 0.5378426909446716, |
| "learning_rate": 2.4907889825808093e-05, |
| "loss": 0.0217, |
| "step": 18410 |
| }, |
| { |
| "epoch": 0.783296478993026, |
| "grad_norm": 0.3933923840522766, |
| "learning_rate": 2.4901681169239314e-05, |
| "loss": 0.0174, |
| "step": 18420 |
| }, |
| { |
| "epoch": 0.7837217213811872, |
| "grad_norm": 0.3201284110546112, |
| "learning_rate": 2.4895469504801236e-05, |
| "loss": 0.0204, |
| "step": 18430 |
| }, |
| { |
| "epoch": 0.7841469637693486, |
| "grad_norm": 0.44616612792015076, |
| "learning_rate": 2.4889254834380802e-05, |
| "loss": 0.0174, |
| "step": 18440 |
| }, |
| { |
| "epoch": 0.7845722061575098, |
| "grad_norm": 0.33565399050712585, |
| "learning_rate": 2.488303715986585e-05, |
| "loss": 0.0185, |
| "step": 18450 |
| }, |
| { |
| "epoch": 0.784997448545671, |
| "grad_norm": 0.2184840440750122, |
| "learning_rate": 2.4876816483145166e-05, |
| "loss": 0.0186, |
| "step": 18460 |
| }, |
| { |
| "epoch": 0.7854226909338323, |
| "grad_norm": 0.2786431312561035, |
| "learning_rate": 2.487059280610841e-05, |
| "loss": 0.0171, |
| "step": 18470 |
| }, |
| { |
| "epoch": 0.7858479333219935, |
| "grad_norm": 0.3800705671310425, |
| "learning_rate": 2.4864366130646178e-05, |
| "loss": 0.0165, |
| "step": 18480 |
| }, |
| { |
| "epoch": 0.7862731757101548, |
| "grad_norm": 0.3747217059135437, |
| "learning_rate": 2.485813645864997e-05, |
| "loss": 0.0182, |
| "step": 18490 |
| }, |
| { |
| "epoch": 0.7866984180983161, |
| "grad_norm": 0.3308454751968384, |
| "learning_rate": 2.4851903792012198e-05, |
| "loss": 0.0176, |
| "step": 18500 |
| }, |
| { |
| "epoch": 0.7871236604864773, |
| "grad_norm": 0.47512272000312805, |
| "learning_rate": 2.4845668132626173e-05, |
| "loss": 0.0207, |
| "step": 18510 |
| }, |
| { |
| "epoch": 0.7875489028746385, |
| "grad_norm": 0.2912342846393585, |
| "learning_rate": 2.483942948238613e-05, |
| "loss": 0.0201, |
| "step": 18520 |
| }, |
| { |
| "epoch": 0.7879741452627997, |
| "grad_norm": 0.38101184368133545, |
| "learning_rate": 2.4833187843187207e-05, |
| "loss": 0.0173, |
| "step": 18530 |
| }, |
| { |
| "epoch": 0.7883993876509611, |
| "grad_norm": 0.42462605237960815, |
| "learning_rate": 2.4826943216925448e-05, |
| "loss": 0.0204, |
| "step": 18540 |
| }, |
| { |
| "epoch": 0.7888246300391223, |
| "grad_norm": 0.4753773808479309, |
| "learning_rate": 2.4820695605497807e-05, |
| "loss": 0.018, |
| "step": 18550 |
| }, |
| { |
| "epoch": 0.7892498724272835, |
| "grad_norm": 0.5159977078437805, |
| "learning_rate": 2.481444501080214e-05, |
| "loss": 0.0182, |
| "step": 18560 |
| }, |
| { |
| "epoch": 0.7896751148154448, |
| "grad_norm": 0.4189545512199402, |
| "learning_rate": 2.4808191434737217e-05, |
| "loss": 0.0214, |
| "step": 18570 |
| }, |
| { |
| "epoch": 0.790100357203606, |
| "grad_norm": 0.2559889554977417, |
| "learning_rate": 2.4801934879202696e-05, |
| "loss": 0.0158, |
| "step": 18580 |
| }, |
| { |
| "epoch": 0.7905255995917673, |
| "grad_norm": 0.6500698924064636, |
| "learning_rate": 2.4795675346099172e-05, |
| "loss": 0.0202, |
| "step": 18590 |
| }, |
| { |
| "epoch": 0.7909508419799286, |
| "grad_norm": 0.29353052377700806, |
| "learning_rate": 2.4789412837328114e-05, |
| "loss": 0.019, |
| "step": 18600 |
| }, |
| { |
| "epoch": 0.7913760843680898, |
| "grad_norm": 0.39966511726379395, |
| "learning_rate": 2.4783147354791915e-05, |
| "loss": 0.019, |
| "step": 18610 |
| }, |
| { |
| "epoch": 0.791801326756251, |
| "grad_norm": 0.33445870876312256, |
| "learning_rate": 2.4776878900393858e-05, |
| "loss": 0.0168, |
| "step": 18620 |
| }, |
| { |
| "epoch": 0.7922265691444124, |
| "grad_norm": 0.33083635568618774, |
| "learning_rate": 2.477060747603813e-05, |
| "loss": 0.0178, |
| "step": 18630 |
| }, |
| { |
| "epoch": 0.7926518115325736, |
| "grad_norm": 0.2751755118370056, |
| "learning_rate": 2.4764333083629833e-05, |
| "loss": 0.0176, |
| "step": 18640 |
| }, |
| { |
| "epoch": 0.7930770539207348, |
| "grad_norm": 0.25697383284568787, |
| "learning_rate": 2.4758055725074954e-05, |
| "loss": 0.0178, |
| "step": 18650 |
| }, |
| { |
| "epoch": 0.793502296308896, |
| "grad_norm": 0.3238734006881714, |
| "learning_rate": 2.4751775402280396e-05, |
| "loss": 0.0173, |
| "step": 18660 |
| }, |
| { |
| "epoch": 0.7939275386970573, |
| "grad_norm": 0.32385683059692383, |
| "learning_rate": 2.4745492117153947e-05, |
| "loss": 0.0163, |
| "step": 18670 |
| }, |
| { |
| "epoch": 0.7943527810852186, |
| "grad_norm": 0.4163258671760559, |
| "learning_rate": 2.473920587160431e-05, |
| "loss": 0.0198, |
| "step": 18680 |
| }, |
| { |
| "epoch": 0.7947780234733798, |
| "grad_norm": 0.3122082054615021, |
| "learning_rate": 2.473291666754108e-05, |
| "loss": 0.0191, |
| "step": 18690 |
| }, |
| { |
| "epoch": 0.7952032658615411, |
| "grad_norm": 0.3161797821521759, |
| "learning_rate": 2.4726624506874748e-05, |
| "loss": 0.0181, |
| "step": 18700 |
| }, |
| { |
| "epoch": 0.7956285082497023, |
| "grad_norm": 0.3504398465156555, |
| "learning_rate": 2.4720329391516708e-05, |
| "loss": 0.0179, |
| "step": 18710 |
| }, |
| { |
| "epoch": 0.7960537506378635, |
| "grad_norm": 0.28261467814445496, |
| "learning_rate": 2.4714031323379248e-05, |
| "loss": 0.0176, |
| "step": 18720 |
| }, |
| { |
| "epoch": 0.7964789930260249, |
| "grad_norm": 0.37667712569236755, |
| "learning_rate": 2.4707730304375556e-05, |
| "loss": 0.0166, |
| "step": 18730 |
| }, |
| { |
| "epoch": 0.7969042354141861, |
| "grad_norm": 0.40282124280929565, |
| "learning_rate": 2.4701426336419713e-05, |
| "loss": 0.0204, |
| "step": 18740 |
| }, |
| { |
| "epoch": 0.7973294778023473, |
| "grad_norm": 0.3857354521751404, |
| "learning_rate": 2.4695119421426707e-05, |
| "loss": 0.0183, |
| "step": 18750 |
| }, |
| { |
| "epoch": 0.7977547201905086, |
| "grad_norm": 0.32577598094940186, |
| "learning_rate": 2.46888095613124e-05, |
| "loss": 0.0156, |
| "step": 18760 |
| }, |
| { |
| "epoch": 0.7981799625786699, |
| "grad_norm": 0.31286710500717163, |
| "learning_rate": 2.468249675799357e-05, |
| "loss": 0.0187, |
| "step": 18770 |
| }, |
| { |
| "epoch": 0.7986052049668311, |
| "grad_norm": 0.29057997465133667, |
| "learning_rate": 2.467618101338787e-05, |
| "loss": 0.0186, |
| "step": 18780 |
| }, |
| { |
| "epoch": 0.7990304473549923, |
| "grad_norm": 0.32208725810050964, |
| "learning_rate": 2.466986232941387e-05, |
| "loss": 0.0179, |
| "step": 18790 |
| }, |
| { |
| "epoch": 0.7994556897431536, |
| "grad_norm": 0.42681917548179626, |
| "learning_rate": 2.466354070799101e-05, |
| "loss": 0.0185, |
| "step": 18800 |
| }, |
| { |
| "epoch": 0.7998809321313148, |
| "grad_norm": 0.389090895652771, |
| "learning_rate": 2.4657216151039634e-05, |
| "loss": 0.0182, |
| "step": 18810 |
| }, |
| { |
| "epoch": 0.800306174519476, |
| "grad_norm": 0.338835209608078, |
| "learning_rate": 2.4650888660480976e-05, |
| "loss": 0.0192, |
| "step": 18820 |
| }, |
| { |
| "epoch": 0.8007314169076374, |
| "grad_norm": 0.39491820335388184, |
| "learning_rate": 2.464455823823716e-05, |
| "loss": 0.0188, |
| "step": 18830 |
| }, |
| { |
| "epoch": 0.8011566592957986, |
| "grad_norm": 0.3796221911907196, |
| "learning_rate": 2.4638224886231196e-05, |
| "loss": 0.0203, |
| "step": 18840 |
| }, |
| { |
| "epoch": 0.8015819016839598, |
| "grad_norm": 0.4522951543331146, |
| "learning_rate": 2.4631888606387e-05, |
| "loss": 0.0196, |
| "step": 18850 |
| }, |
| { |
| "epoch": 0.8020071440721211, |
| "grad_norm": 0.32392099499702454, |
| "learning_rate": 2.4625549400629356e-05, |
| "loss": 0.0179, |
| "step": 18860 |
| }, |
| { |
| "epoch": 0.8024323864602824, |
| "grad_norm": 0.36404240131378174, |
| "learning_rate": 2.4619207270883958e-05, |
| "loss": 0.0166, |
| "step": 18870 |
| }, |
| { |
| "epoch": 0.8028576288484436, |
| "grad_norm": 0.31685033440589905, |
| "learning_rate": 2.461286221907737e-05, |
| "loss": 0.0177, |
| "step": 18880 |
| }, |
| { |
| "epoch": 0.8032828712366049, |
| "grad_norm": 0.35425153374671936, |
| "learning_rate": 2.460651424713705e-05, |
| "loss": 0.0184, |
| "step": 18890 |
| }, |
| { |
| "epoch": 0.8037081136247661, |
| "grad_norm": 0.3561999201774597, |
| "learning_rate": 2.4600163356991347e-05, |
| "loss": 0.0174, |
| "step": 18900 |
| }, |
| { |
| "epoch": 0.8041333560129273, |
| "grad_norm": 0.3638227880001068, |
| "learning_rate": 2.4593809550569498e-05, |
| "loss": 0.0169, |
| "step": 18910 |
| }, |
| { |
| "epoch": 0.8045585984010887, |
| "grad_norm": 0.3247460722923279, |
| "learning_rate": 2.4587452829801614e-05, |
| "loss": 0.0217, |
| "step": 18920 |
| }, |
| { |
| "epoch": 0.8049838407892499, |
| "grad_norm": 0.34640103578567505, |
| "learning_rate": 2.45810931966187e-05, |
| "loss": 0.017, |
| "step": 18930 |
| }, |
| { |
| "epoch": 0.8054090831774111, |
| "grad_norm": 0.2949196696281433, |
| "learning_rate": 2.457473065295265e-05, |
| "loss": 0.0164, |
| "step": 18940 |
| }, |
| { |
| "epoch": 0.8058343255655723, |
| "grad_norm": 0.2806178033351898, |
| "learning_rate": 2.4568365200736232e-05, |
| "loss": 0.0184, |
| "step": 18950 |
| }, |
| { |
| "epoch": 0.8062595679537337, |
| "grad_norm": 0.38688793778419495, |
| "learning_rate": 2.4561996841903104e-05, |
| "loss": 0.0184, |
| "step": 18960 |
| }, |
| { |
| "epoch": 0.8066848103418949, |
| "grad_norm": 0.294419527053833, |
| "learning_rate": 2.455562557838781e-05, |
| "loss": 0.0202, |
| "step": 18970 |
| }, |
| { |
| "epoch": 0.8071100527300561, |
| "grad_norm": 0.39870259165763855, |
| "learning_rate": 2.4549251412125762e-05, |
| "loss": 0.0194, |
| "step": 18980 |
| }, |
| { |
| "epoch": 0.8075352951182174, |
| "grad_norm": 0.35803669691085815, |
| "learning_rate": 2.454287434505327e-05, |
| "loss": 0.019, |
| "step": 18990 |
| }, |
| { |
| "epoch": 0.8079605375063786, |
| "grad_norm": 0.3306076228618622, |
| "learning_rate": 2.4536494379107514e-05, |
| "loss": 0.0173, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.8083857798945399, |
| "grad_norm": 0.31047430634498596, |
| "learning_rate": 2.453011151622657e-05, |
| "loss": 0.0163, |
| "step": 19010 |
| }, |
| { |
| "epoch": 0.8088110222827012, |
| "grad_norm": 0.45618563890457153, |
| "learning_rate": 2.452372575834937e-05, |
| "loss": 0.0178, |
| "step": 19020 |
| }, |
| { |
| "epoch": 0.8092362646708624, |
| "grad_norm": 0.3509865701198578, |
| "learning_rate": 2.4517337107415742e-05, |
| "loss": 0.0185, |
| "step": 19030 |
| }, |
| { |
| "epoch": 0.8096615070590236, |
| "grad_norm": 0.40352803468704224, |
| "learning_rate": 2.4510945565366397e-05, |
| "loss": 0.0197, |
| "step": 19040 |
| }, |
| { |
| "epoch": 0.810086749447185, |
| "grad_norm": 0.2777395248413086, |
| "learning_rate": 2.4504551134142905e-05, |
| "loss": 0.0183, |
| "step": 19050 |
| }, |
| { |
| "epoch": 0.8105119918353462, |
| "grad_norm": 0.43311402201652527, |
| "learning_rate": 2.4498153815687738e-05, |
| "loss": 0.021, |
| "step": 19060 |
| }, |
| { |
| "epoch": 0.8109372342235074, |
| "grad_norm": 0.3267991840839386, |
| "learning_rate": 2.4491753611944224e-05, |
| "loss": 0.0182, |
| "step": 19070 |
| }, |
| { |
| "epoch": 0.8113624766116686, |
| "grad_norm": 0.37511321902275085, |
| "learning_rate": 2.4485350524856577e-05, |
| "loss": 0.0183, |
| "step": 19080 |
| }, |
| { |
| "epoch": 0.8117877189998299, |
| "grad_norm": 0.33489423990249634, |
| "learning_rate": 2.4478944556369886e-05, |
| "loss": 0.0184, |
| "step": 19090 |
| }, |
| { |
| "epoch": 0.8122129613879912, |
| "grad_norm": 0.30842292308807373, |
| "learning_rate": 2.4472535708430116e-05, |
| "loss": 0.0194, |
| "step": 19100 |
| }, |
| { |
| "epoch": 0.8126382037761524, |
| "grad_norm": 0.4636704921722412, |
| "learning_rate": 2.4466123982984103e-05, |
| "loss": 0.022, |
| "step": 19110 |
| }, |
| { |
| "epoch": 0.8130634461643137, |
| "grad_norm": 0.3947596848011017, |
| "learning_rate": 2.445970938197957e-05, |
| "loss": 0.0193, |
| "step": 19120 |
| }, |
| { |
| "epoch": 0.8134886885524749, |
| "grad_norm": 0.3773009479045868, |
| "learning_rate": 2.4453291907365092e-05, |
| "loss": 0.0188, |
| "step": 19130 |
| }, |
| { |
| "epoch": 0.8139139309406361, |
| "grad_norm": 0.4608099162578583, |
| "learning_rate": 2.444687156109013e-05, |
| "loss": 0.02, |
| "step": 19140 |
| }, |
| { |
| "epoch": 0.8143391733287975, |
| "grad_norm": 0.3472055196762085, |
| "learning_rate": 2.444044834510502e-05, |
| "loss": 0.0193, |
| "step": 19150 |
| }, |
| { |
| "epoch": 0.8147644157169587, |
| "grad_norm": 0.40071094036102295, |
| "learning_rate": 2.443402226136096e-05, |
| "loss": 0.0193, |
| "step": 19160 |
| }, |
| { |
| "epoch": 0.8151896581051199, |
| "grad_norm": 0.4245036542415619, |
| "learning_rate": 2.442759331181003e-05, |
| "loss": 0.0162, |
| "step": 19170 |
| }, |
| { |
| "epoch": 0.8156149004932812, |
| "grad_norm": 0.592641294002533, |
| "learning_rate": 2.4421161498405176e-05, |
| "loss": 0.0201, |
| "step": 19180 |
| }, |
| { |
| "epoch": 0.8160401428814424, |
| "grad_norm": 0.48311716318130493, |
| "learning_rate": 2.4414726823100207e-05, |
| "loss": 0.0191, |
| "step": 19190 |
| }, |
| { |
| "epoch": 0.8164653852696037, |
| "grad_norm": 0.3404284417629242, |
| "learning_rate": 2.4408289287849813e-05, |
| "loss": 0.0203, |
| "step": 19200 |
| }, |
| { |
| "epoch": 0.8168906276577649, |
| "grad_norm": 0.3067640960216522, |
| "learning_rate": 2.4401848894609543e-05, |
| "loss": 0.0182, |
| "step": 19210 |
| }, |
| { |
| "epoch": 0.8173158700459262, |
| "grad_norm": 0.44467446208000183, |
| "learning_rate": 2.439540564533582e-05, |
| "loss": 0.0181, |
| "step": 19220 |
| }, |
| { |
| "epoch": 0.8177411124340874, |
| "grad_norm": 0.3175550401210785, |
| "learning_rate": 2.4388959541985935e-05, |
| "loss": 0.0177, |
| "step": 19230 |
| }, |
| { |
| "epoch": 0.8181663548222486, |
| "grad_norm": 0.44434231519699097, |
| "learning_rate": 2.438251058651804e-05, |
| "loss": 0.0187, |
| "step": 19240 |
| }, |
| { |
| "epoch": 0.81859159721041, |
| "grad_norm": 0.3861560821533203, |
| "learning_rate": 2.437605878089116e-05, |
| "loss": 0.0212, |
| "step": 19250 |
| }, |
| { |
| "epoch": 0.8190168395985712, |
| "grad_norm": 0.3165411353111267, |
| "learning_rate": 2.4369604127065183e-05, |
| "loss": 0.0192, |
| "step": 19260 |
| }, |
| { |
| "epoch": 0.8194420819867324, |
| "grad_norm": 0.3824351131916046, |
| "learning_rate": 2.4363146627000862e-05, |
| "loss": 0.0168, |
| "step": 19270 |
| }, |
| { |
| "epoch": 0.8198673243748937, |
| "grad_norm": 0.375009149312973, |
| "learning_rate": 2.435668628265982e-05, |
| "loss": 0.0212, |
| "step": 19280 |
| }, |
| { |
| "epoch": 0.820292566763055, |
| "grad_norm": 0.2801119387149811, |
| "learning_rate": 2.4350223096004533e-05, |
| "loss": 0.0171, |
| "step": 19290 |
| }, |
| { |
| "epoch": 0.8207178091512162, |
| "grad_norm": 0.3751022219657898, |
| "learning_rate": 2.4343757068998343e-05, |
| "loss": 0.0188, |
| "step": 19300 |
| }, |
| { |
| "epoch": 0.8211430515393775, |
| "grad_norm": 0.34862837195396423, |
| "learning_rate": 2.4337288203605465e-05, |
| "loss": 0.0184, |
| "step": 19310 |
| }, |
| { |
| "epoch": 0.8215682939275387, |
| "grad_norm": 0.3476138710975647, |
| "learning_rate": 2.433081650179097e-05, |
| "loss": 0.02, |
| "step": 19320 |
| }, |
| { |
| "epoch": 0.8219935363156999, |
| "grad_norm": 0.36957767605781555, |
| "learning_rate": 2.4324341965520787e-05, |
| "loss": 0.0166, |
| "step": 19330 |
| }, |
| { |
| "epoch": 0.8224187787038612, |
| "grad_norm": 0.31499162316322327, |
| "learning_rate": 2.4317864596761706e-05, |
| "loss": 0.0177, |
| "step": 19340 |
| }, |
| { |
| "epoch": 0.8228440210920225, |
| "grad_norm": 0.32327380776405334, |
| "learning_rate": 2.4311384397481387e-05, |
| "loss": 0.0172, |
| "step": 19350 |
| }, |
| { |
| "epoch": 0.8232692634801837, |
| "grad_norm": 0.27957937121391296, |
| "learning_rate": 2.4304901369648344e-05, |
| "loss": 0.0169, |
| "step": 19360 |
| }, |
| { |
| "epoch": 0.8236945058683449, |
| "grad_norm": 0.4002857804298401, |
| "learning_rate": 2.429841551523194e-05, |
| "loss": 0.0222, |
| "step": 19370 |
| }, |
| { |
| "epoch": 0.8241197482565062, |
| "grad_norm": 0.43563494086265564, |
| "learning_rate": 2.4291926836202416e-05, |
| "loss": 0.019, |
| "step": 19380 |
| }, |
| { |
| "epoch": 0.8245449906446675, |
| "grad_norm": 0.31372612714767456, |
| "learning_rate": 2.428543533453086e-05, |
| "loss": 0.0203, |
| "step": 19390 |
| }, |
| { |
| "epoch": 0.8249702330328287, |
| "grad_norm": 0.31944289803504944, |
| "learning_rate": 2.4278941012189215e-05, |
| "loss": 0.0195, |
| "step": 19400 |
| }, |
| { |
| "epoch": 0.82539547542099, |
| "grad_norm": 0.28638774156570435, |
| "learning_rate": 2.427244387115029e-05, |
| "loss": 0.0191, |
| "step": 19410 |
| }, |
| { |
| "epoch": 0.8258207178091512, |
| "grad_norm": 0.4001684784889221, |
| "learning_rate": 2.4265943913387738e-05, |
| "loss": 0.0176, |
| "step": 19420 |
| }, |
| { |
| "epoch": 0.8262459601973124, |
| "grad_norm": 0.34653154015541077, |
| "learning_rate": 2.425944114087608e-05, |
| "loss": 0.0169, |
| "step": 19430 |
| }, |
| { |
| "epoch": 0.8266712025854738, |
| "grad_norm": 0.3180333077907562, |
| "learning_rate": 2.4252935555590684e-05, |
| "loss": 0.0214, |
| "step": 19440 |
| }, |
| { |
| "epoch": 0.827096444973635, |
| "grad_norm": 0.420258492231369, |
| "learning_rate": 2.4246427159507772e-05, |
| "loss": 0.0155, |
| "step": 19450 |
| }, |
| { |
| "epoch": 0.8275216873617962, |
| "grad_norm": 0.30975809693336487, |
| "learning_rate": 2.4239915954604433e-05, |
| "loss": 0.0158, |
| "step": 19460 |
| }, |
| { |
| "epoch": 0.8279469297499574, |
| "grad_norm": 0.32593613862991333, |
| "learning_rate": 2.4233401942858595e-05, |
| "loss": 0.0161, |
| "step": 19470 |
| }, |
| { |
| "epoch": 0.8283721721381188, |
| "grad_norm": 0.3261292576789856, |
| "learning_rate": 2.4226885126249033e-05, |
| "loss": 0.0163, |
| "step": 19480 |
| }, |
| { |
| "epoch": 0.82879741452628, |
| "grad_norm": 0.329802542924881, |
| "learning_rate": 2.4220365506755394e-05, |
| "loss": 0.0156, |
| "step": 19490 |
| }, |
| { |
| "epoch": 0.8292226569144412, |
| "grad_norm": 0.36109891533851624, |
| "learning_rate": 2.4213843086358166e-05, |
| "loss": 0.0154, |
| "step": 19500 |
| }, |
| { |
| "epoch": 0.8296478993026025, |
| "grad_norm": 0.44082027673721313, |
| "learning_rate": 2.4207317867038684e-05, |
| "loss": 0.0206, |
| "step": 19510 |
| }, |
| { |
| "epoch": 0.8300731416907637, |
| "grad_norm": 0.316689133644104, |
| "learning_rate": 2.4200789850779137e-05, |
| "loss": 0.0172, |
| "step": 19520 |
| }, |
| { |
| "epoch": 0.830498384078925, |
| "grad_norm": 0.399915486574173, |
| "learning_rate": 2.419425903956257e-05, |
| "loss": 0.0188, |
| "step": 19530 |
| }, |
| { |
| "epoch": 0.8309236264670863, |
| "grad_norm": 0.5746642351150513, |
| "learning_rate": 2.4187725435372865e-05, |
| "loss": 0.0156, |
| "step": 19540 |
| }, |
| { |
| "epoch": 0.8313488688552475, |
| "grad_norm": 0.6345515847206116, |
| "learning_rate": 2.418118904019476e-05, |
| "loss": 0.0182, |
| "step": 19550 |
| }, |
| { |
| "epoch": 0.8317741112434087, |
| "grad_norm": 0.35415080189704895, |
| "learning_rate": 2.4174649856013842e-05, |
| "loss": 0.0179, |
| "step": 19560 |
| }, |
| { |
| "epoch": 0.83219935363157, |
| "grad_norm": 0.3247321844100952, |
| "learning_rate": 2.416810788481654e-05, |
| "loss": 0.0176, |
| "step": 19570 |
| }, |
| { |
| "epoch": 0.8326245960197313, |
| "grad_norm": 0.41374531388282776, |
| "learning_rate": 2.416156312859013e-05, |
| "loss": 0.017, |
| "step": 19580 |
| }, |
| { |
| "epoch": 0.8330498384078925, |
| "grad_norm": 0.3736271858215332, |
| "learning_rate": 2.4155015589322742e-05, |
| "loss": 0.0168, |
| "step": 19590 |
| }, |
| { |
| "epoch": 0.8334750807960537, |
| "grad_norm": 0.4117409288883209, |
| "learning_rate": 2.4148465269003338e-05, |
| "loss": 0.0189, |
| "step": 19600 |
| }, |
| { |
| "epoch": 0.833900323184215, |
| "grad_norm": 0.32814255356788635, |
| "learning_rate": 2.4141912169621742e-05, |
| "loss": 0.018, |
| "step": 19610 |
| }, |
| { |
| "epoch": 0.8343255655723762, |
| "grad_norm": 0.34247809648513794, |
| "learning_rate": 2.4135356293168602e-05, |
| "loss": 0.0176, |
| "step": 19620 |
| }, |
| { |
| "epoch": 0.8347508079605375, |
| "grad_norm": 0.317573606967926, |
| "learning_rate": 2.4128797641635427e-05, |
| "loss": 0.0178, |
| "step": 19630 |
| }, |
| { |
| "epoch": 0.8351760503486988, |
| "grad_norm": 0.3367726802825928, |
| "learning_rate": 2.412223621701456e-05, |
| "loss": 0.0148, |
| "step": 19640 |
| }, |
| { |
| "epoch": 0.83560129273686, |
| "grad_norm": 0.34025198221206665, |
| "learning_rate": 2.4115672021299193e-05, |
| "loss": 0.0175, |
| "step": 19650 |
| }, |
| { |
| "epoch": 0.8360265351250212, |
| "grad_norm": 0.32034459710121155, |
| "learning_rate": 2.4109105056483345e-05, |
| "loss": 0.0209, |
| "step": 19660 |
| }, |
| { |
| "epoch": 0.8364517775131826, |
| "grad_norm": 0.350245863199234, |
| "learning_rate": 2.4102535324561898e-05, |
| "loss": 0.0208, |
| "step": 19670 |
| }, |
| { |
| "epoch": 0.8368770199013438, |
| "grad_norm": 0.4573119878768921, |
| "learning_rate": 2.4095962827530556e-05, |
| "loss": 0.0172, |
| "step": 19680 |
| }, |
| { |
| "epoch": 0.837302262289505, |
| "grad_norm": 0.38064107298851013, |
| "learning_rate": 2.408938756738587e-05, |
| "loss": 0.0166, |
| "step": 19690 |
| }, |
| { |
| "epoch": 0.8377275046776663, |
| "grad_norm": 0.3633199632167816, |
| "learning_rate": 2.4082809546125235e-05, |
| "loss": 0.0189, |
| "step": 19700 |
| }, |
| { |
| "epoch": 0.8381527470658275, |
| "grad_norm": 0.5364571809768677, |
| "learning_rate": 2.4076228765746876e-05, |
| "loss": 0.0171, |
| "step": 19710 |
| }, |
| { |
| "epoch": 0.8385779894539888, |
| "grad_norm": 0.27998796105384827, |
| "learning_rate": 2.4069645228249864e-05, |
| "loss": 0.0164, |
| "step": 19720 |
| }, |
| { |
| "epoch": 0.83900323184215, |
| "grad_norm": 0.33518269658088684, |
| "learning_rate": 2.40630589356341e-05, |
| "loss": 0.0175, |
| "step": 19730 |
| }, |
| { |
| "epoch": 0.8394284742303113, |
| "grad_norm": 0.3925931453704834, |
| "learning_rate": 2.4056469889900327e-05, |
| "loss": 0.0183, |
| "step": 19740 |
| }, |
| { |
| "epoch": 0.8398537166184725, |
| "grad_norm": 0.2806520164012909, |
| "learning_rate": 2.404987809305012e-05, |
| "loss": 0.0179, |
| "step": 19750 |
| }, |
| { |
| "epoch": 0.8402789590066337, |
| "grad_norm": 0.26155996322631836, |
| "learning_rate": 2.4043283547085903e-05, |
| "loss": 0.0206, |
| "step": 19760 |
| }, |
| { |
| "epoch": 0.8407042013947951, |
| "grad_norm": 0.3690318763256073, |
| "learning_rate": 2.403668625401092e-05, |
| "loss": 0.0182, |
| "step": 19770 |
| }, |
| { |
| "epoch": 0.8411294437829563, |
| "grad_norm": 0.38202929496765137, |
| "learning_rate": 2.4030086215829247e-05, |
| "loss": 0.0154, |
| "step": 19780 |
| }, |
| { |
| "epoch": 0.8415546861711175, |
| "grad_norm": 0.3304462432861328, |
| "learning_rate": 2.4023483434545806e-05, |
| "loss": 0.019, |
| "step": 19790 |
| }, |
| { |
| "epoch": 0.8419799285592788, |
| "grad_norm": 0.32554149627685547, |
| "learning_rate": 2.4016877912166356e-05, |
| "loss": 0.018, |
| "step": 19800 |
| }, |
| { |
| "epoch": 0.84240517094744, |
| "grad_norm": 0.3655115067958832, |
| "learning_rate": 2.401026965069747e-05, |
| "loss": 0.0188, |
| "step": 19810 |
| }, |
| { |
| "epoch": 0.8428304133356013, |
| "grad_norm": 0.3540496528148651, |
| "learning_rate": 2.4003658652146564e-05, |
| "loss": 0.0172, |
| "step": 19820 |
| }, |
| { |
| "epoch": 0.8432556557237626, |
| "grad_norm": 0.2831367552280426, |
| "learning_rate": 2.3997044918521896e-05, |
| "loss": 0.0181, |
| "step": 19830 |
| }, |
| { |
| "epoch": 0.8436808981119238, |
| "grad_norm": 0.3655035197734833, |
| "learning_rate": 2.3990428451832524e-05, |
| "loss": 0.0172, |
| "step": 19840 |
| }, |
| { |
| "epoch": 0.844106140500085, |
| "grad_norm": 0.266615092754364, |
| "learning_rate": 2.398380925408837e-05, |
| "loss": 0.0158, |
| "step": 19850 |
| }, |
| { |
| "epoch": 0.8445313828882463, |
| "grad_norm": 0.37141183018684387, |
| "learning_rate": 2.3977187327300174e-05, |
| "loss": 0.0189, |
| "step": 19860 |
| }, |
| { |
| "epoch": 0.8449566252764076, |
| "grad_norm": 0.43702635169029236, |
| "learning_rate": 2.397056267347949e-05, |
| "loss": 0.0186, |
| "step": 19870 |
| }, |
| { |
| "epoch": 0.8453818676645688, |
| "grad_norm": 0.42261597514152527, |
| "learning_rate": 2.3963935294638725e-05, |
| "loss": 0.0177, |
| "step": 19880 |
| }, |
| { |
| "epoch": 0.84580711005273, |
| "grad_norm": 0.3517419993877411, |
| "learning_rate": 2.395730519279109e-05, |
| "loss": 0.017, |
| "step": 19890 |
| }, |
| { |
| "epoch": 0.8462323524408913, |
| "grad_norm": 0.303210973739624, |
| "learning_rate": 2.3950672369950646e-05, |
| "loss": 0.0177, |
| "step": 19900 |
| }, |
| { |
| "epoch": 0.8466575948290526, |
| "grad_norm": 0.3902907073497772, |
| "learning_rate": 2.3944036828132266e-05, |
| "loss": 0.0199, |
| "step": 19910 |
| }, |
| { |
| "epoch": 0.8470828372172138, |
| "grad_norm": 0.28750261664390564, |
| "learning_rate": 2.3937398569351647e-05, |
| "loss": 0.0186, |
| "step": 19920 |
| }, |
| { |
| "epoch": 0.8475080796053751, |
| "grad_norm": 0.3395385146141052, |
| "learning_rate": 2.3930757595625326e-05, |
| "loss": 0.0221, |
| "step": 19930 |
| }, |
| { |
| "epoch": 0.8479333219935363, |
| "grad_norm": 0.3717360198497772, |
| "learning_rate": 2.3924113908970645e-05, |
| "loss": 0.0183, |
| "step": 19940 |
| }, |
| { |
| "epoch": 0.8483585643816975, |
| "grad_norm": 0.39457517862319946, |
| "learning_rate": 2.391746751140579e-05, |
| "loss": 0.0175, |
| "step": 19950 |
| }, |
| { |
| "epoch": 0.8487838067698589, |
| "grad_norm": 0.5388570427894592, |
| "learning_rate": 2.3910818404949756e-05, |
| "loss": 0.0206, |
| "step": 19960 |
| }, |
| { |
| "epoch": 0.8492090491580201, |
| "grad_norm": 0.36116811633110046, |
| "learning_rate": 2.3904166591622367e-05, |
| "loss": 0.0163, |
| "step": 19970 |
| }, |
| { |
| "epoch": 0.8496342915461813, |
| "grad_norm": 0.42637062072753906, |
| "learning_rate": 2.3897512073444267e-05, |
| "loss": 0.0177, |
| "step": 19980 |
| }, |
| { |
| "epoch": 0.8500595339343425, |
| "grad_norm": 0.32587724924087524, |
| "learning_rate": 2.389085485243693e-05, |
| "loss": 0.0181, |
| "step": 19990 |
| }, |
| { |
| "epoch": 0.8504847763225039, |
| "grad_norm": 0.3714869022369385, |
| "learning_rate": 2.3884194930622632e-05, |
| "loss": 0.018, |
| "step": 20000 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 60000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 5000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|