| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 5.0, |
| "eval_steps": 500, |
| "global_step": 7730, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00517464424320828, |
| "grad_norm": 4.884782314300537, |
| "learning_rate": 3.0172413793103453e-07, |
| "loss": 0.4996, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.01034928848641656, |
| "grad_norm": 10.670414924621582, |
| "learning_rate": 6.465517241379311e-07, |
| "loss": 0.4662, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.015523932729624839, |
| "grad_norm": 2.3693668842315674, |
| "learning_rate": 9.913793103448276e-07, |
| "loss": 0.4448, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.02069857697283312, |
| "grad_norm": 4.083646297454834, |
| "learning_rate": 1.336206896551724e-06, |
| "loss": 0.4917, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.0258732212160414, |
| "grad_norm": 8.517114639282227, |
| "learning_rate": 1.681034482758621e-06, |
| "loss": 0.5036, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.031047865459249677, |
| "grad_norm": 6.137472629547119, |
| "learning_rate": 2.025862068965517e-06, |
| "loss": 0.4726, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.03622250970245795, |
| "grad_norm": 2.339662551879883, |
| "learning_rate": 2.370689655172414e-06, |
| "loss": 0.4691, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.04139715394566624, |
| "grad_norm": 1.5891815423965454, |
| "learning_rate": 2.7155172413793105e-06, |
| "loss": 0.4667, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.04657179818887452, |
| "grad_norm": 8.638023376464844, |
| "learning_rate": 3.0603448275862068e-06, |
| "loss": 0.4776, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.0517464424320828, |
| "grad_norm": 1.5453442335128784, |
| "learning_rate": 3.4051724137931034e-06, |
| "loss": 0.4724, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.056921086675291076, |
| "grad_norm": 3.350733757019043, |
| "learning_rate": 3.7500000000000005e-06, |
| "loss": 0.4675, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.062095730918499355, |
| "grad_norm": 4.810270309448242, |
| "learning_rate": 4.094827586206897e-06, |
| "loss": 0.4592, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.06727037516170763, |
| "grad_norm": 1.514150619506836, |
| "learning_rate": 4.439655172413794e-06, |
| "loss": 0.4825, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.0724450194049159, |
| "grad_norm": 8.344229698181152, |
| "learning_rate": 4.78448275862069e-06, |
| "loss": 0.4604, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.07761966364812418, |
| "grad_norm": 6.710835933685303, |
| "learning_rate": 5.129310344827587e-06, |
| "loss": 0.4659, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.08279430789133248, |
| "grad_norm": 3.7732086181640625, |
| "learning_rate": 5.474137931034483e-06, |
| "loss": 0.4575, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.08796895213454076, |
| "grad_norm": 1.9706354141235352, |
| "learning_rate": 5.81896551724138e-06, |
| "loss": 0.4473, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.09314359637774904, |
| "grad_norm": 1.358317494392395, |
| "learning_rate": 6.163793103448276e-06, |
| "loss": 0.4724, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.09831824062095731, |
| "grad_norm": 2.7171244621276855, |
| "learning_rate": 6.508620689655173e-06, |
| "loss": 0.4687, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.1034928848641656, |
| "grad_norm": 1.794846773147583, |
| "learning_rate": 6.853448275862069e-06, |
| "loss": 0.466, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.10866752910737387, |
| "grad_norm": 2.4740686416625977, |
| "learning_rate": 7.198275862068966e-06, |
| "loss": 0.4562, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.11384217335058215, |
| "grad_norm": 12.792396545410156, |
| "learning_rate": 7.543103448275862e-06, |
| "loss": 0.4553, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.11901681759379043, |
| "grad_norm": 4.893735408782959, |
| "learning_rate": 7.88793103448276e-06, |
| "loss": 0.4499, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.12419146183699871, |
| "grad_norm": 2.4978456497192383, |
| "learning_rate": 8.232758620689656e-06, |
| "loss": 0.4405, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.129366106080207, |
| "grad_norm": 1.71527099609375, |
| "learning_rate": 8.577586206896551e-06, |
| "loss": 0.4694, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.13454075032341525, |
| "grad_norm": 1.814359188079834, |
| "learning_rate": 8.922413793103449e-06, |
| "loss": 0.4213, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.13971539456662355, |
| "grad_norm": 1.0052739381790161, |
| "learning_rate": 9.267241379310346e-06, |
| "loss": 0.4165, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.1448900388098318, |
| "grad_norm": 3.3144054412841797, |
| "learning_rate": 9.612068965517242e-06, |
| "loss": 0.4324, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.1500646830530401, |
| "grad_norm": 14.789776802062988, |
| "learning_rate": 9.95689655172414e-06, |
| "loss": 0.4362, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.15523932729624837, |
| "grad_norm": 2.4650723934173584, |
| "learning_rate": 9.999978494742326e-06, |
| "loss": 0.4394, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.16041397153945666, |
| "grad_norm": 1.6728233098983765, |
| "learning_rate": 9.999901251622079e-06, |
| "loss": 0.4686, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.16558861578266496, |
| "grad_norm": 1.9105558395385742, |
| "learning_rate": 9.999767832624e-06, |
| "loss": 0.4479, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.17076326002587322, |
| "grad_norm": 2.219156503677368, |
| "learning_rate": 9.999578239247104e-06, |
| "loss": 0.4562, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.1759379042690815, |
| "grad_norm": 2.4550633430480957, |
| "learning_rate": 9.999332473621544e-06, |
| "loss": 0.4546, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.18111254851228978, |
| "grad_norm": 2.5587055683135986, |
| "learning_rate": 9.999030538508598e-06, |
| "loss": 0.4527, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.18628719275549807, |
| "grad_norm": 3.2998480796813965, |
| "learning_rate": 9.99867243730063e-06, |
| "loss": 0.4528, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.19146183699870634, |
| "grad_norm": 6.697995662689209, |
| "learning_rate": 9.998258174021043e-06, |
| "loss": 0.4488, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.19663648124191463, |
| "grad_norm": 2.801957607269287, |
| "learning_rate": 9.997787753324253e-06, |
| "loss": 0.4644, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.2018111254851229, |
| "grad_norm": 7.488362789154053, |
| "learning_rate": 9.997261180495623e-06, |
| "loss": 0.4567, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.2069857697283312, |
| "grad_norm": 11.816390991210938, |
| "learning_rate": 9.996678461451408e-06, |
| "loss": 0.4542, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.21216041397153945, |
| "grad_norm": 4.340507984161377, |
| "learning_rate": 9.996039602738688e-06, |
| "loss": 0.4299, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.21733505821474774, |
| "grad_norm": 2.5847530364990234, |
| "learning_rate": 9.995344611535295e-06, |
| "loss": 0.4199, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.222509702457956, |
| "grad_norm": 7.996049404144287, |
| "learning_rate": 9.994593495649733e-06, |
| "loss": 0.4006, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.2276843467011643, |
| "grad_norm": 8.294257164001465, |
| "learning_rate": 9.993786263521083e-06, |
| "loss": 0.3779, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.23285899094437257, |
| "grad_norm": 8.907233238220215, |
| "learning_rate": 9.992922924218924e-06, |
| "loss": 0.3424, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.23803363518758086, |
| "grad_norm": 20.33425521850586, |
| "learning_rate": 9.99200348744321e-06, |
| "loss": 0.3199, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.24320827943078913, |
| "grad_norm": 13.30887508392334, |
| "learning_rate": 9.991027963524188e-06, |
| "loss": 0.2968, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.24838292367399742, |
| "grad_norm": 24.578723907470703, |
| "learning_rate": 9.989996363422246e-06, |
| "loss": 0.2971, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.2535575679172057, |
| "grad_norm": 7.940666198730469, |
| "learning_rate": 9.988908698727828e-06, |
| "loss": 0.2772, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.258732212160414, |
| "grad_norm": 48.541175842285156, |
| "learning_rate": 9.987764981661278e-06, |
| "loss": 0.2937, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.26390685640362227, |
| "grad_norm": 8.2228364944458, |
| "learning_rate": 9.986565225072713e-06, |
| "loss": 0.2775, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.2690815006468305, |
| "grad_norm": 37.225929260253906, |
| "learning_rate": 9.98530944244187e-06, |
| "loss": 0.2628, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.2742561448900388, |
| "grad_norm": 28.484724044799805, |
| "learning_rate": 9.983997647877973e-06, |
| "loss": 0.2671, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.2794307891332471, |
| "grad_norm": 7.030237197875977, |
| "learning_rate": 9.98262985611955e-06, |
| "loss": 0.2598, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.2846054333764554, |
| "grad_norm": 6.647619247436523, |
| "learning_rate": 9.981206082534287e-06, |
| "loss": 0.2519, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.2897800776196636, |
| "grad_norm": 17.09227180480957, |
| "learning_rate": 9.979726343118847e-06, |
| "loss": 0.2567, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.2949547218628719, |
| "grad_norm": 24.190093994140625, |
| "learning_rate": 9.978190654498687e-06, |
| "loss": 0.2476, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.3001293661060802, |
| "grad_norm": 160.548095703125, |
| "learning_rate": 9.976599033927884e-06, |
| "loss": 0.2546, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.3053040103492885, |
| "grad_norm": 5.440623760223389, |
| "learning_rate": 9.974951499288925e-06, |
| "loss": 0.2497, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.31047865459249674, |
| "grad_norm": 21.864320755004883, |
| "learning_rate": 9.973248069092516e-06, |
| "loss": 0.2766, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.31565329883570503, |
| "grad_norm": 1.826672911643982, |
| "learning_rate": 9.971488762477373e-06, |
| "loss": 0.2578, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.3208279430789133, |
| "grad_norm": 11.376463890075684, |
| "learning_rate": 9.969673599210006e-06, |
| "loss": 0.2619, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.3260025873221216, |
| "grad_norm": 4.004533290863037, |
| "learning_rate": 9.967802599684494e-06, |
| "loss": 0.2586, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.3311772315653299, |
| "grad_norm": 14.940202713012695, |
| "learning_rate": 9.965875784922261e-06, |
| "loss": 0.2624, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.33635187580853815, |
| "grad_norm": 5.1946940422058105, |
| "learning_rate": 9.963893176571836e-06, |
| "loss": 0.2569, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.34152652005174644, |
| "grad_norm": 6.96012544631958, |
| "learning_rate": 9.961854796908615e-06, |
| "loss": 0.2501, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.34670116429495473, |
| "grad_norm": 20.033016204833984, |
| "learning_rate": 9.959760668834601e-06, |
| "loss": 0.2466, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.351875808538163, |
| "grad_norm": 5.902473449707031, |
| "learning_rate": 9.957610815878156e-06, |
| "loss": 0.2424, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.35705045278137126, |
| "grad_norm": 2.925157070159912, |
| "learning_rate": 9.955405262193731e-06, |
| "loss": 0.2429, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.36222509702457956, |
| "grad_norm": 3.106954336166382, |
| "learning_rate": 9.9531440325616e-06, |
| "loss": 0.2367, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.36739974126778785, |
| "grad_norm": 8.999578475952148, |
| "learning_rate": 9.950827152387575e-06, |
| "loss": 0.2428, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.37257438551099614, |
| "grad_norm": 8.012633323669434, |
| "learning_rate": 9.948454647702727e-06, |
| "loss": 0.2447, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.3777490297542044, |
| "grad_norm": 17.479957580566406, |
| "learning_rate": 9.94602654516309e-06, |
| "loss": 0.2539, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.3829236739974127, |
| "grad_norm": 111.3963851928711, |
| "learning_rate": 9.94354287204936e-06, |
| "loss": 0.2536, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.38809831824062097, |
| "grad_norm": 67.85525512695312, |
| "learning_rate": 9.941003656266589e-06, |
| "loss": 0.263, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.39327296248382926, |
| "grad_norm": 5.47231912612915, |
| "learning_rate": 9.93840892634388e-06, |
| "loss": 0.2395, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.3984476067270375, |
| "grad_norm": 20.308883666992188, |
| "learning_rate": 9.935758711434052e-06, |
| "loss": 0.2433, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.4036222509702458, |
| "grad_norm": 5.908266544342041, |
| "learning_rate": 9.933053041313325e-06, |
| "loss": 0.2531, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.4087968952134541, |
| "grad_norm": 4.62359094619751, |
| "learning_rate": 9.930291946380977e-06, |
| "loss": 0.2572, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.4139715394566624, |
| "grad_norm": 33.82321548461914, |
| "learning_rate": 9.927475457659007e-06, |
| "loss": 0.252, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.4191461836998706, |
| "grad_norm": 2.1320619583129883, |
| "learning_rate": 9.924603606791786e-06, |
| "loss": 0.2484, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.4243208279430789, |
| "grad_norm": 5.690158367156982, |
| "learning_rate": 9.921676426045698e-06, |
| "loss": 0.242, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.4294954721862872, |
| "grad_norm": 4.768186092376709, |
| "learning_rate": 9.918693948308783e-06, |
| "loss": 0.2258, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.4346701164294955, |
| "grad_norm": 7.269800662994385, |
| "learning_rate": 9.915656207090367e-06, |
| "loss": 0.2397, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.4398447606727037, |
| "grad_norm": 30.480161666870117, |
| "learning_rate": 9.912563236520675e-06, |
| "loss": 0.2296, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.445019404915912, |
| "grad_norm": 22.762638092041016, |
| "learning_rate": 9.909415071350464e-06, |
| "loss": 0.233, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.4501940491591203, |
| "grad_norm": 13.787392616271973, |
| "learning_rate": 9.90621174695062e-06, |
| "loss": 0.2333, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.4553686934023286, |
| "grad_norm": 3.74238657951355, |
| "learning_rate": 9.902953299311763e-06, |
| "loss": 0.236, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.46054333764553684, |
| "grad_norm": 71.07587432861328, |
| "learning_rate": 9.899639765043854e-06, |
| "loss": 0.2549, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.46571798188874514, |
| "grad_norm": 3.8564956188201904, |
| "learning_rate": 9.89627118137576e-06, |
| "loss": 0.2488, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.47089262613195343, |
| "grad_norm": 3.945932626724243, |
| "learning_rate": 9.892847586154863e-06, |
| "loss": 0.2435, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.4760672703751617, |
| "grad_norm": 8.651078224182129, |
| "learning_rate": 9.889369017846616e-06, |
| "loss": 0.2301, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.48124191461837, |
| "grad_norm": 23.654037475585938, |
| "learning_rate": 9.88583551553411e-06, |
| "loss": 0.2408, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.48641655886157825, |
| "grad_norm": 9.601846694946289, |
| "learning_rate": 9.882247118917656e-06, |
| "loss": 0.2578, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.49159120310478654, |
| "grad_norm": 7.171313762664795, |
| "learning_rate": 9.87860386831431e-06, |
| "loss": 0.2412, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.49676584734799484, |
| "grad_norm": 13.73882007598877, |
| "learning_rate": 9.874905804657445e-06, |
| "loss": 0.235, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.5019404915912031, |
| "grad_norm": 8.866873741149902, |
| "learning_rate": 9.871152969496274e-06, |
| "loss": 0.2259, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.5071151358344114, |
| "grad_norm": 2.9023540019989014, |
| "learning_rate": 9.867345404995393e-06, |
| "loss": 0.2382, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.5122897800776197, |
| "grad_norm": 8.159908294677734, |
| "learning_rate": 9.8634831539343e-06, |
| "loss": 0.2362, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.517464424320828, |
| "grad_norm": 5.698594093322754, |
| "learning_rate": 9.85956625970692e-06, |
| "loss": 0.2312, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.5226390685640362, |
| "grad_norm": 1.7953461408615112, |
| "learning_rate": 9.855594766321122e-06, |
| "loss": 0.2341, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.5278137128072445, |
| "grad_norm": 6.394684791564941, |
| "learning_rate": 9.85156871839821e-06, |
| "loss": 0.2467, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.5329883570504528, |
| "grad_norm": 3.978597402572632, |
| "learning_rate": 9.847488161172429e-06, |
| "loss": 0.2411, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.538163001293661, |
| "grad_norm": 49.182125091552734, |
| "learning_rate": 9.843353140490466e-06, |
| "loss": 0.2394, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.5433376455368694, |
| "grad_norm": 95.73147583007812, |
| "learning_rate": 9.839163702810922e-06, |
| "loss": 0.2247, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.5485122897800776, |
| "grad_norm": 14.831591606140137, |
| "learning_rate": 9.834919895203789e-06, |
| "loss": 0.2471, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.553686934023286, |
| "grad_norm": 5.494821548461914, |
| "learning_rate": 9.83062176534994e-06, |
| "loss": 0.244, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.5588615782664942, |
| "grad_norm": 2.4115333557128906, |
| "learning_rate": 9.826269361540565e-06, |
| "loss": 0.2532, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.5640362225097024, |
| "grad_norm": 13.977892875671387, |
| "learning_rate": 9.821862732676655e-06, |
| "loss": 0.2507, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.5692108667529108, |
| "grad_norm": 9.79362964630127, |
| "learning_rate": 9.817401928268435e-06, |
| "loss": 0.2237, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.574385510996119, |
| "grad_norm": 1.0879400968551636, |
| "learning_rate": 9.812886998434817e-06, |
| "loss": 0.2281, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.5795601552393272, |
| "grad_norm": 7.494878768920898, |
| "learning_rate": 9.80831799390283e-06, |
| "loss": 0.2375, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.5847347994825356, |
| "grad_norm": 6.011063098907471, |
| "learning_rate": 9.803694966007059e-06, |
| "loss": 0.2363, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.5899094437257438, |
| "grad_norm": 3.7052977085113525, |
| "learning_rate": 9.799017966689057e-06, |
| "loss": 0.2217, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.5950840879689522, |
| "grad_norm": 1.6096714735031128, |
| "learning_rate": 9.794287048496771e-06, |
| "loss": 0.2399, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.6002587322121604, |
| "grad_norm": 4.586109638214111, |
| "learning_rate": 9.789502264583949e-06, |
| "loss": 0.2511, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.6054333764553687, |
| "grad_norm": 6.05584192276001, |
| "learning_rate": 9.784663668709537e-06, |
| "loss": 0.2411, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.610608020698577, |
| "grad_norm": 10.09545612335205, |
| "learning_rate": 9.779771315237086e-06, |
| "loss": 0.2565, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.6157826649417852, |
| "grad_norm": 21.93640899658203, |
| "learning_rate": 9.77482525913413e-06, |
| "loss": 0.2294, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.6209573091849935, |
| "grad_norm": 12.018024444580078, |
| "learning_rate": 9.769825555971575e-06, |
| "loss": 0.2268, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.6261319534282018, |
| "grad_norm": 17.39588165283203, |
| "learning_rate": 9.764772261923074e-06, |
| "loss": 0.2349, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.6313065976714101, |
| "grad_norm": 15.817480087280273, |
| "learning_rate": 9.759665433764393e-06, |
| "loss": 0.2238, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.6364812419146184, |
| "grad_norm": 6.889886379241943, |
| "learning_rate": 9.754505128872778e-06, |
| "loss": 0.2409, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.6416558861578266, |
| "grad_norm": 5.029130935668945, |
| "learning_rate": 9.749291405226304e-06, |
| "loss": 0.2388, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.6468305304010349, |
| "grad_norm": 7.987931728363037, |
| "learning_rate": 9.744024321403229e-06, |
| "loss": 0.2306, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.6520051746442432, |
| "grad_norm": 3.22019100189209, |
| "learning_rate": 9.738703936581333e-06, |
| "loss": 0.2444, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.6571798188874515, |
| "grad_norm": 13.65085220336914, |
| "learning_rate": 9.733330310537255e-06, |
| "loss": 0.248, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.6623544631306598, |
| "grad_norm": 12.927434921264648, |
| "learning_rate": 9.727903503645818e-06, |
| "loss": 0.2226, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.6675291073738681, |
| "grad_norm": 1.27034592628479, |
| "learning_rate": 9.722423576879354e-06, |
| "loss": 0.237, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.6727037516170763, |
| "grad_norm": 266.6712951660156, |
| "learning_rate": 9.71689059180702e-06, |
| "loss": 0.2384, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.6778783958602846, |
| "grad_norm": 8.363792419433594, |
| "learning_rate": 9.711304610594104e-06, |
| "loss": 0.2278, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.6830530401034929, |
| "grad_norm": 2.284507989883423, |
| "learning_rate": 9.70566569600132e-06, |
| "loss": 0.2452, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.6882276843467011, |
| "grad_norm": 3.4681687355041504, |
| "learning_rate": 9.699973911384119e-06, |
| "loss": 0.2338, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.6934023285899095, |
| "grad_norm": 21.047893524169922, |
| "learning_rate": 9.694229320691961e-06, |
| "loss": 0.2189, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.6985769728331177, |
| "grad_norm": 7.996535778045654, |
| "learning_rate": 9.688431988467609e-06, |
| "loss": 0.2475, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.703751617076326, |
| "grad_norm": 3.221071481704712, |
| "learning_rate": 9.682581979846388e-06, |
| "loss": 0.2501, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.7089262613195343, |
| "grad_norm": 14.775407791137695, |
| "learning_rate": 9.676679360555479e-06, |
| "loss": 0.2245, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.7141009055627425, |
| "grad_norm": 3.179734945297241, |
| "learning_rate": 9.670724196913149e-06, |
| "loss": 0.2314, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.7192755498059509, |
| "grad_norm": 36.58845901489258, |
| "learning_rate": 9.66471655582803e-06, |
| "loss": 0.2414, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.7244501940491591, |
| "grad_norm": 2.0989410877227783, |
| "learning_rate": 9.658656504798361e-06, |
| "loss": 0.2373, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.7296248382923674, |
| "grad_norm": 67.86420440673828, |
| "learning_rate": 9.652544111911218e-06, |
| "loss": 0.2414, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.7347994825355757, |
| "grad_norm": 64.04298400878906, |
| "learning_rate": 9.646379445841769e-06, |
| "loss": 0.2419, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.7399741267787839, |
| "grad_norm": 5.908459663391113, |
| "learning_rate": 9.640162575852487e-06, |
| "loss": 0.2328, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.7451487710219923, |
| "grad_norm": 5.114171981811523, |
| "learning_rate": 9.633893571792375e-06, |
| "loss": 0.2117, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.7503234152652005, |
| "grad_norm": 8.035240173339844, |
| "learning_rate": 9.627572504096188e-06, |
| "loss": 0.2344, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.7554980595084088, |
| "grad_norm": 3.9648971557617188, |
| "learning_rate": 9.621199443783633e-06, |
| "loss": 0.2248, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.7606727037516171, |
| "grad_norm": 17.44873046875, |
| "learning_rate": 9.614774462458573e-06, |
| "loss": 0.2377, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.7658473479948253, |
| "grad_norm": 10.615303039550781, |
| "learning_rate": 9.608297632308233e-06, |
| "loss": 0.2278, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.7710219922380336, |
| "grad_norm": 29.23194122314453, |
| "learning_rate": 9.601769026102368e-06, |
| "loss": 0.2213, |
| "step": 1192 |
| }, |
| { |
| "epoch": 0.7761966364812419, |
| "grad_norm": 21.02916717529297, |
| "learning_rate": 9.595188717192466e-06, |
| "loss": 0.2382, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.7813712807244502, |
| "grad_norm": 9.737796783447266, |
| "learning_rate": 9.58855677951091e-06, |
| "loss": 0.2347, |
| "step": 1208 |
| }, |
| { |
| "epoch": 0.7865459249676585, |
| "grad_norm": 59.887420654296875, |
| "learning_rate": 9.581873287570164e-06, |
| "loss": 0.2379, |
| "step": 1216 |
| }, |
| { |
| "epoch": 0.7917205692108668, |
| "grad_norm": 25.03373146057129, |
| "learning_rate": 9.575138316461909e-06, |
| "loss": 0.2358, |
| "step": 1224 |
| }, |
| { |
| "epoch": 0.796895213454075, |
| "grad_norm": 3.6164708137512207, |
| "learning_rate": 9.568351941856223e-06, |
| "loss": 0.2511, |
| "step": 1232 |
| }, |
| { |
| "epoch": 0.8020698576972833, |
| "grad_norm": 86.80042266845703, |
| "learning_rate": 9.561514240000724e-06, |
| "loss": 0.2262, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.8072445019404916, |
| "grad_norm": 6.826155185699463, |
| "learning_rate": 9.554625287719711e-06, |
| "loss": 0.231, |
| "step": 1248 |
| }, |
| { |
| "epoch": 0.8124191461836999, |
| "grad_norm": 1.1142396926879883, |
| "learning_rate": 9.547685162413298e-06, |
| "loss": 0.2187, |
| "step": 1256 |
| }, |
| { |
| "epoch": 0.8175937904269082, |
| "grad_norm": 1.7491034269332886, |
| "learning_rate": 9.540693942056553e-06, |
| "loss": 0.237, |
| "step": 1264 |
| }, |
| { |
| "epoch": 0.8227684346701164, |
| "grad_norm": 43.96466064453125, |
| "learning_rate": 9.533651705198616e-06, |
| "loss": 0.2347, |
| "step": 1272 |
| }, |
| { |
| "epoch": 0.8279430789133247, |
| "grad_norm": 2.50284481048584, |
| "learning_rate": 9.526558530961817e-06, |
| "loss": 0.2199, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.833117723156533, |
| "grad_norm": 3.6089391708374023, |
| "learning_rate": 9.519414499040785e-06, |
| "loss": 0.2341, |
| "step": 1288 |
| }, |
| { |
| "epoch": 0.8382923673997412, |
| "grad_norm": 2.025322914123535, |
| "learning_rate": 9.51221968970156e-06, |
| "loss": 0.2317, |
| "step": 1296 |
| }, |
| { |
| "epoch": 0.8434670116429496, |
| "grad_norm": 3.1698544025421143, |
| "learning_rate": 9.504974183780686e-06, |
| "loss": 0.2133, |
| "step": 1304 |
| }, |
| { |
| "epoch": 0.8486416558861578, |
| "grad_norm": 2.9666683673858643, |
| "learning_rate": 9.497678062684301e-06, |
| "loss": 0.2224, |
| "step": 1312 |
| }, |
| { |
| "epoch": 0.8538163001293662, |
| "grad_norm": 6.248025894165039, |
| "learning_rate": 9.490331408387225e-06, |
| "loss": 0.2145, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.8589909443725744, |
| "grad_norm": 5.850505828857422, |
| "learning_rate": 9.482934303432038e-06, |
| "loss": 0.2277, |
| "step": 1328 |
| }, |
| { |
| "epoch": 0.8641655886157826, |
| "grad_norm": 8.283476829528809, |
| "learning_rate": 9.475486830928155e-06, |
| "loss": 0.2219, |
| "step": 1336 |
| }, |
| { |
| "epoch": 0.869340232858991, |
| "grad_norm": 1.7238540649414062, |
| "learning_rate": 9.467989074550891e-06, |
| "loss": 0.2384, |
| "step": 1344 |
| }, |
| { |
| "epoch": 0.8745148771021992, |
| "grad_norm": 2.008312225341797, |
| "learning_rate": 9.46044111854052e-06, |
| "loss": 0.2006, |
| "step": 1352 |
| }, |
| { |
| "epoch": 0.8796895213454075, |
| "grad_norm": 5.544405937194824, |
| "learning_rate": 9.452843047701324e-06, |
| "loss": 0.2313, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.8848641655886158, |
| "grad_norm": 2.758505344390869, |
| "learning_rate": 9.44519494740065e-06, |
| "loss": 0.2333, |
| "step": 1368 |
| }, |
| { |
| "epoch": 0.890038809831824, |
| "grad_norm": 1.708716630935669, |
| "learning_rate": 9.437496903567946e-06, |
| "loss": 0.2276, |
| "step": 1376 |
| }, |
| { |
| "epoch": 0.8952134540750324, |
| "grad_norm": 14.639440536499023, |
| "learning_rate": 9.429749002693793e-06, |
| "loss": 0.233, |
| "step": 1384 |
| }, |
| { |
| "epoch": 0.9003880983182406, |
| "grad_norm": 18.10475730895996, |
| "learning_rate": 9.421951331828938e-06, |
| "loss": 0.2351, |
| "step": 1392 |
| }, |
| { |
| "epoch": 0.9055627425614489, |
| "grad_norm": 15.641255378723145, |
| "learning_rate": 9.414103978583312e-06, |
| "loss": 0.2297, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.9107373868046572, |
| "grad_norm": 13.53865909576416, |
| "learning_rate": 9.406207031125048e-06, |
| "loss": 0.2171, |
| "step": 1408 |
| }, |
| { |
| "epoch": 0.9159120310478654, |
| "grad_norm": 7.33931827545166, |
| "learning_rate": 9.398260578179487e-06, |
| "loss": 0.2258, |
| "step": 1416 |
| }, |
| { |
| "epoch": 0.9210866752910737, |
| "grad_norm": 1.1460613012313843, |
| "learning_rate": 9.390264709028189e-06, |
| "loss": 0.2223, |
| "step": 1424 |
| }, |
| { |
| "epoch": 0.926261319534282, |
| "grad_norm": 3.664703607559204, |
| "learning_rate": 9.382219513507922e-06, |
| "loss": 0.2291, |
| "step": 1432 |
| }, |
| { |
| "epoch": 0.9314359637774903, |
| "grad_norm": 2.028292655944824, |
| "learning_rate": 9.374125082009654e-06, |
| "loss": 0.2197, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.9366106080206986, |
| "grad_norm": 1.853790044784546, |
| "learning_rate": 9.365981505477541e-06, |
| "loss": 0.2086, |
| "step": 1448 |
| }, |
| { |
| "epoch": 0.9417852522639069, |
| "grad_norm": 49.15569305419922, |
| "learning_rate": 9.3577888754079e-06, |
| "loss": 0.2245, |
| "step": 1456 |
| }, |
| { |
| "epoch": 0.9469598965071151, |
| "grad_norm": 3.0362915992736816, |
| "learning_rate": 9.34954728384819e-06, |
| "loss": 0.2304, |
| "step": 1464 |
| }, |
| { |
| "epoch": 0.9521345407503234, |
| "grad_norm": 4.143098831176758, |
| "learning_rate": 9.341256823395965e-06, |
| "loss": 0.237, |
| "step": 1472 |
| }, |
| { |
| "epoch": 0.9573091849935317, |
| "grad_norm": 5.784548759460449, |
| "learning_rate": 9.332917587197844e-06, |
| "loss": 0.2176, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.96248382923674, |
| "grad_norm": 13.278392791748047, |
| "learning_rate": 9.324529668948459e-06, |
| "loss": 0.2303, |
| "step": 1488 |
| }, |
| { |
| "epoch": 0.9676584734799483, |
| "grad_norm": 46.16282272338867, |
| "learning_rate": 9.316093162889407e-06, |
| "loss": 0.2226, |
| "step": 1496 |
| }, |
| { |
| "epoch": 0.9728331177231565, |
| "grad_norm": 2.1848196983337402, |
| "learning_rate": 9.307608163808189e-06, |
| "loss": 0.2481, |
| "step": 1504 |
| }, |
| { |
| "epoch": 0.9780077619663649, |
| "grad_norm": 3.2306673526763916, |
| "learning_rate": 9.299074767037137e-06, |
| "loss": 0.2144, |
| "step": 1512 |
| }, |
| { |
| "epoch": 0.9831824062095731, |
| "grad_norm": 7.1894755363464355, |
| "learning_rate": 9.290493068452357e-06, |
| "loss": 0.2319, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.9883570504527813, |
| "grad_norm": 13.211376190185547, |
| "learning_rate": 9.281863164472647e-06, |
| "loss": 0.2368, |
| "step": 1528 |
| }, |
| { |
| "epoch": 0.9935316946959897, |
| "grad_norm": 3.837167739868164, |
| "learning_rate": 9.273185152058406e-06, |
| "loss": 0.237, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.9987063389391979, |
| "grad_norm": 26.63664436340332, |
| "learning_rate": 9.26445912871055e-06, |
| "loss": 0.2236, |
| "step": 1544 |
| }, |
| { |
| "epoch": 1.0038809831824063, |
| "grad_norm": 5.332308292388916, |
| "learning_rate": 9.255685192469424e-06, |
| "loss": 0.2325, |
| "step": 1552 |
| }, |
| { |
| "epoch": 1.0090556274256144, |
| "grad_norm": 1.7717430591583252, |
| "learning_rate": 9.246863441913685e-06, |
| "loss": 0.2244, |
| "step": 1560 |
| }, |
| { |
| "epoch": 1.0142302716688227, |
| "grad_norm": 36.790260314941406, |
| "learning_rate": 9.237993976159211e-06, |
| "loss": 0.2414, |
| "step": 1568 |
| }, |
| { |
| "epoch": 1.019404915912031, |
| "grad_norm": 7.347696781158447, |
| "learning_rate": 9.229076894857973e-06, |
| "loss": 0.2339, |
| "step": 1576 |
| }, |
| { |
| "epoch": 1.0245795601552394, |
| "grad_norm": 7.630255699157715, |
| "learning_rate": 9.220112298196922e-06, |
| "loss": 0.2127, |
| "step": 1584 |
| }, |
| { |
| "epoch": 1.0297542043984476, |
| "grad_norm": 4.760798454284668, |
| "learning_rate": 9.211100286896865e-06, |
| "loss": 0.231, |
| "step": 1592 |
| }, |
| { |
| "epoch": 1.034928848641656, |
| "grad_norm": 17.15314483642578, |
| "learning_rate": 9.202040962211334e-06, |
| "loss": 0.233, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.0401034928848643, |
| "grad_norm": 7.275695323944092, |
| "learning_rate": 9.19293442592544e-06, |
| "loss": 0.2205, |
| "step": 1608 |
| }, |
| { |
| "epoch": 1.0452781371280724, |
| "grad_norm": 10.593427658081055, |
| "learning_rate": 9.183780780354736e-06, |
| "loss": 0.2137, |
| "step": 1616 |
| }, |
| { |
| "epoch": 1.0504527813712807, |
| "grad_norm": 84.94709014892578, |
| "learning_rate": 9.174580128344073e-06, |
| "loss": 0.2119, |
| "step": 1624 |
| }, |
| { |
| "epoch": 1.055627425614489, |
| "grad_norm": 6.472126007080078, |
| "learning_rate": 9.16533257326643e-06, |
| "loss": 0.235, |
| "step": 1632 |
| }, |
| { |
| "epoch": 1.0608020698576972, |
| "grad_norm": 2.1055634021759033, |
| "learning_rate": 9.156038219021764e-06, |
| "loss": 0.2329, |
| "step": 1640 |
| }, |
| { |
| "epoch": 1.0659767141009056, |
| "grad_norm": 4.520327091217041, |
| "learning_rate": 9.146697170035839e-06, |
| "loss": 0.2199, |
| "step": 1648 |
| }, |
| { |
| "epoch": 1.071151358344114, |
| "grad_norm": 2.18851375579834, |
| "learning_rate": 9.137309531259054e-06, |
| "loss": 0.2305, |
| "step": 1656 |
| }, |
| { |
| "epoch": 1.076326002587322, |
| "grad_norm": 7.414587497711182, |
| "learning_rate": 9.127875408165261e-06, |
| "loss": 0.2172, |
| "step": 1664 |
| }, |
| { |
| "epoch": 1.0815006468305304, |
| "grad_norm": 40.298065185546875, |
| "learning_rate": 9.118394906750585e-06, |
| "loss": 0.2222, |
| "step": 1672 |
| }, |
| { |
| "epoch": 1.0866752910737387, |
| "grad_norm": 2.0129568576812744, |
| "learning_rate": 9.108868133532224e-06, |
| "loss": 0.2385, |
| "step": 1680 |
| }, |
| { |
| "epoch": 1.0918499353169469, |
| "grad_norm": 10.248085975646973, |
| "learning_rate": 9.099295195547264e-06, |
| "loss": 0.2252, |
| "step": 1688 |
| }, |
| { |
| "epoch": 1.0970245795601552, |
| "grad_norm": 3.0196053981781006, |
| "learning_rate": 9.089676200351467e-06, |
| "loss": 0.2266, |
| "step": 1696 |
| }, |
| { |
| "epoch": 1.1021992238033635, |
| "grad_norm": 2.057884931564331, |
| "learning_rate": 9.08001125601807e-06, |
| "loss": 0.2353, |
| "step": 1704 |
| }, |
| { |
| "epoch": 1.107373868046572, |
| "grad_norm": 3.0884134769439697, |
| "learning_rate": 9.07030047113656e-06, |
| "loss": 0.2137, |
| "step": 1712 |
| }, |
| { |
| "epoch": 1.11254851228978, |
| "grad_norm": 2.399594783782959, |
| "learning_rate": 9.060543954811464e-06, |
| "loss": 0.2122, |
| "step": 1720 |
| }, |
| { |
| "epoch": 1.1177231565329884, |
| "grad_norm": 20.144880294799805, |
| "learning_rate": 9.050741816661128e-06, |
| "loss": 0.222, |
| "step": 1728 |
| }, |
| { |
| "epoch": 1.1228978007761967, |
| "grad_norm": 3.0582709312438965, |
| "learning_rate": 9.040894166816461e-06, |
| "loss": 0.2162, |
| "step": 1736 |
| }, |
| { |
| "epoch": 1.1280724450194048, |
| "grad_norm": 3.313769578933716, |
| "learning_rate": 9.031001115919732e-06, |
| "loss": 0.23, |
| "step": 1744 |
| }, |
| { |
| "epoch": 1.1332470892626132, |
| "grad_norm": 2.804013967514038, |
| "learning_rate": 9.02106277512329e-06, |
| "loss": 0.2367, |
| "step": 1752 |
| }, |
| { |
| "epoch": 1.1384217335058215, |
| "grad_norm": 9.238598823547363, |
| "learning_rate": 9.011079256088355e-06, |
| "loss": 0.2371, |
| "step": 1760 |
| }, |
| { |
| "epoch": 1.1435963777490297, |
| "grad_norm": 10.921507835388184, |
| "learning_rate": 9.001050670983721e-06, |
| "loss": 0.2327, |
| "step": 1768 |
| }, |
| { |
| "epoch": 1.148771021992238, |
| "grad_norm": 1.8150590658187866, |
| "learning_rate": 8.990977132484535e-06, |
| "loss": 0.233, |
| "step": 1776 |
| }, |
| { |
| "epoch": 1.1539456662354464, |
| "grad_norm": 1.9657435417175293, |
| "learning_rate": 8.980858753771002e-06, |
| "loss": 0.2172, |
| "step": 1784 |
| }, |
| { |
| "epoch": 1.1591203104786545, |
| "grad_norm": 3.5203697681427, |
| "learning_rate": 8.970695648527132e-06, |
| "loss": 0.2129, |
| "step": 1792 |
| }, |
| { |
| "epoch": 1.1642949547218628, |
| "grad_norm": 8.29598617553711, |
| "learning_rate": 8.96048793093945e-06, |
| "loss": 0.2325, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.1694695989650712, |
| "grad_norm": 1.7340894937515259, |
| "learning_rate": 8.950235715695717e-06, |
| "loss": 0.2177, |
| "step": 1808 |
| }, |
| { |
| "epoch": 1.1746442432082795, |
| "grad_norm": 8.192721366882324, |
| "learning_rate": 8.93993911798365e-06, |
| "loss": 0.2385, |
| "step": 1816 |
| }, |
| { |
| "epoch": 1.1798188874514877, |
| "grad_norm": 15.727509498596191, |
| "learning_rate": 8.929598253489617e-06, |
| "loss": 0.2367, |
| "step": 1824 |
| }, |
| { |
| "epoch": 1.184993531694696, |
| "grad_norm": 15.4706449508667, |
| "learning_rate": 8.91921323839734e-06, |
| "loss": 0.2373, |
| "step": 1832 |
| }, |
| { |
| "epoch": 1.1901681759379044, |
| "grad_norm": 7.3651227951049805, |
| "learning_rate": 8.908784189386589e-06, |
| "loss": 0.2352, |
| "step": 1840 |
| }, |
| { |
| "epoch": 1.1953428201811125, |
| "grad_norm": 15.465596199035645, |
| "learning_rate": 8.898311223631876e-06, |
| "loss": 0.223, |
| "step": 1848 |
| }, |
| { |
| "epoch": 1.2005174644243208, |
| "grad_norm": 7.5635528564453125, |
| "learning_rate": 8.887794458801137e-06, |
| "loss": 0.2179, |
| "step": 1856 |
| }, |
| { |
| "epoch": 1.2056921086675292, |
| "grad_norm": 9.759207725524902, |
| "learning_rate": 8.8772340130544e-06, |
| "loss": 0.2189, |
| "step": 1864 |
| }, |
| { |
| "epoch": 1.2108667529107373, |
| "grad_norm": 2.7430686950683594, |
| "learning_rate": 8.866630005042476e-06, |
| "loss": 0.2354, |
| "step": 1872 |
| }, |
| { |
| "epoch": 1.2160413971539457, |
| "grad_norm": 6.708351135253906, |
| "learning_rate": 8.855982553905604e-06, |
| "loss": 0.2191, |
| "step": 1880 |
| }, |
| { |
| "epoch": 1.221216041397154, |
| "grad_norm": 155.43638610839844, |
| "learning_rate": 8.845291779272131e-06, |
| "loss": 0.226, |
| "step": 1888 |
| }, |
| { |
| "epoch": 1.2263906856403621, |
| "grad_norm": 2.132230281829834, |
| "learning_rate": 8.834557801257162e-06, |
| "loss": 0.2087, |
| "step": 1896 |
| }, |
| { |
| "epoch": 1.2315653298835705, |
| "grad_norm": 4.70977783203125, |
| "learning_rate": 8.823780740461204e-06, |
| "loss": 0.2122, |
| "step": 1904 |
| }, |
| { |
| "epoch": 1.2367399741267788, |
| "grad_norm": 1.3468469381332397, |
| "learning_rate": 8.81296071796882e-06, |
| "loss": 0.2225, |
| "step": 1912 |
| }, |
| { |
| "epoch": 1.2419146183699872, |
| "grad_norm": 3.733586549758911, |
| "learning_rate": 8.80209785534726e-06, |
| "loss": 0.2395, |
| "step": 1920 |
| }, |
| { |
| "epoch": 1.2470892626131953, |
| "grad_norm": 4.492749214172363, |
| "learning_rate": 8.791192274645107e-06, |
| "loss": 0.2138, |
| "step": 1928 |
| }, |
| { |
| "epoch": 1.2522639068564037, |
| "grad_norm": 19.35169219970703, |
| "learning_rate": 8.780244098390891e-06, |
| "loss": 0.2287, |
| "step": 1936 |
| }, |
| { |
| "epoch": 1.2574385510996118, |
| "grad_norm": 1.480233073234558, |
| "learning_rate": 8.769253449591728e-06, |
| "loss": 0.2347, |
| "step": 1944 |
| }, |
| { |
| "epoch": 1.2626131953428201, |
| "grad_norm": 8.458354949951172, |
| "learning_rate": 8.758220451731922e-06, |
| "loss": 0.2327, |
| "step": 1952 |
| }, |
| { |
| "epoch": 1.2677878395860285, |
| "grad_norm": 10.992264747619629, |
| "learning_rate": 8.74714522877159e-06, |
| "loss": 0.221, |
| "step": 1960 |
| }, |
| { |
| "epoch": 1.2729624838292368, |
| "grad_norm": 15.178800582885742, |
| "learning_rate": 8.736027905145265e-06, |
| "loss": 0.2282, |
| "step": 1968 |
| }, |
| { |
| "epoch": 1.278137128072445, |
| "grad_norm": 22.52798843383789, |
| "learning_rate": 8.724868605760497e-06, |
| "loss": 0.2238, |
| "step": 1976 |
| }, |
| { |
| "epoch": 1.2833117723156533, |
| "grad_norm": 2.1530232429504395, |
| "learning_rate": 8.713667455996449e-06, |
| "loss": 0.2304, |
| "step": 1984 |
| }, |
| { |
| "epoch": 1.2884864165588616, |
| "grad_norm": 3.1006431579589844, |
| "learning_rate": 8.70242458170249e-06, |
| "loss": 0.2453, |
| "step": 1992 |
| }, |
| { |
| "epoch": 1.2936610608020698, |
| "grad_norm": 35.956756591796875, |
| "learning_rate": 8.691140109196782e-06, |
| "loss": 0.2018, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.2988357050452781, |
| "grad_norm": 32.85151672363281, |
| "learning_rate": 8.67981416526486e-06, |
| "loss": 0.2131, |
| "step": 2008 |
| }, |
| { |
| "epoch": 1.3040103492884865, |
| "grad_norm": 3.432091236114502, |
| "learning_rate": 8.668446877158205e-06, |
| "loss": 0.2433, |
| "step": 2016 |
| }, |
| { |
| "epoch": 1.3091849935316948, |
| "grad_norm": 14.427757263183594, |
| "learning_rate": 8.657038372592815e-06, |
| "loss": 0.2315, |
| "step": 2024 |
| }, |
| { |
| "epoch": 1.314359637774903, |
| "grad_norm": 2.7362518310546875, |
| "learning_rate": 8.645588779747775e-06, |
| "loss": 0.2295, |
| "step": 2032 |
| }, |
| { |
| "epoch": 1.3195342820181113, |
| "grad_norm": 3.2137720584869385, |
| "learning_rate": 8.634098227263809e-06, |
| "loss": 0.221, |
| "step": 2040 |
| }, |
| { |
| "epoch": 1.3247089262613194, |
| "grad_norm": 31.10691261291504, |
| "learning_rate": 8.622566844241846e-06, |
| "loss": 0.2174, |
| "step": 2048 |
| }, |
| { |
| "epoch": 1.3298835705045278, |
| "grad_norm": 4.460792541503906, |
| "learning_rate": 8.610994760241555e-06, |
| "loss": 0.2277, |
| "step": 2056 |
| }, |
| { |
| "epoch": 1.3350582147477361, |
| "grad_norm": 1.419631838798523, |
| "learning_rate": 8.599382105279899e-06, |
| "loss": 0.2259, |
| "step": 2064 |
| }, |
| { |
| "epoch": 1.3402328589909445, |
| "grad_norm": 6.29133939743042, |
| "learning_rate": 8.58772900982967e-06, |
| "loss": 0.2272, |
| "step": 2072 |
| }, |
| { |
| "epoch": 1.3454075032341526, |
| "grad_norm": 4.922664642333984, |
| "learning_rate": 8.576035604818031e-06, |
| "loss": 0.216, |
| "step": 2080 |
| }, |
| { |
| "epoch": 1.350582147477361, |
| "grad_norm": 4.346408843994141, |
| "learning_rate": 8.564302021625033e-06, |
| "loss": 0.212, |
| "step": 2088 |
| }, |
| { |
| "epoch": 1.3557567917205693, |
| "grad_norm": 27.521772384643555, |
| "learning_rate": 8.552528392082147e-06, |
| "loss": 0.2423, |
| "step": 2096 |
| }, |
| { |
| "epoch": 1.3609314359637774, |
| "grad_norm": 4.437674522399902, |
| "learning_rate": 8.54071484847078e-06, |
| "loss": 0.2133, |
| "step": 2104 |
| }, |
| { |
| "epoch": 1.3661060802069858, |
| "grad_norm": 18.69135284423828, |
| "learning_rate": 8.528861523520792e-06, |
| "loss": 0.2248, |
| "step": 2112 |
| }, |
| { |
| "epoch": 1.371280724450194, |
| "grad_norm": 1.3831626176834106, |
| "learning_rate": 8.516968550408998e-06, |
| "loss": 0.2158, |
| "step": 2120 |
| }, |
| { |
| "epoch": 1.3764553686934025, |
| "grad_norm": 2.99196720123291, |
| "learning_rate": 8.505036062757677e-06, |
| "loss": 0.2301, |
| "step": 2128 |
| }, |
| { |
| "epoch": 1.3816300129366106, |
| "grad_norm": 25.202573776245117, |
| "learning_rate": 8.493064194633072e-06, |
| "loss": 0.213, |
| "step": 2136 |
| }, |
| { |
| "epoch": 1.386804657179819, |
| "grad_norm": 40.16120529174805, |
| "learning_rate": 8.481053080543879e-06, |
| "loss": 0.2394, |
| "step": 2144 |
| }, |
| { |
| "epoch": 1.391979301423027, |
| "grad_norm": 32.01914596557617, |
| "learning_rate": 8.469002855439741e-06, |
| "loss": 0.2155, |
| "step": 2152 |
| }, |
| { |
| "epoch": 1.3971539456662354, |
| "grad_norm": 11.138020515441895, |
| "learning_rate": 8.456913654709725e-06, |
| "loss": 0.2337, |
| "step": 2160 |
| }, |
| { |
| "epoch": 1.4023285899094438, |
| "grad_norm": 4.366079807281494, |
| "learning_rate": 8.444785614180807e-06, |
| "loss": 0.2186, |
| "step": 2168 |
| }, |
| { |
| "epoch": 1.407503234152652, |
| "grad_norm": 19.91827392578125, |
| "learning_rate": 8.432618870116339e-06, |
| "loss": 0.2493, |
| "step": 2176 |
| }, |
| { |
| "epoch": 1.4126778783958602, |
| "grad_norm": 9.727320671081543, |
| "learning_rate": 8.42041355921453e-06, |
| "loss": 0.2207, |
| "step": 2184 |
| }, |
| { |
| "epoch": 1.4178525226390686, |
| "grad_norm": 15.481773376464844, |
| "learning_rate": 8.4081698186069e-06, |
| "loss": 0.2179, |
| "step": 2192 |
| }, |
| { |
| "epoch": 1.4230271668822767, |
| "grad_norm": 4.066092491149902, |
| "learning_rate": 8.39588778585674e-06, |
| "loss": 0.2304, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.428201811125485, |
| "grad_norm": 35.265018463134766, |
| "learning_rate": 8.383567598957567e-06, |
| "loss": 0.2237, |
| "step": 2208 |
| }, |
| { |
| "epoch": 1.4333764553686934, |
| "grad_norm": 10.66234302520752, |
| "learning_rate": 8.37120939633158e-06, |
| "loss": 0.2202, |
| "step": 2216 |
| }, |
| { |
| "epoch": 1.4385510996119018, |
| "grad_norm": 1.3266409635543823, |
| "learning_rate": 8.358813316828097e-06, |
| "loss": 0.2194, |
| "step": 2224 |
| }, |
| { |
| "epoch": 1.44372574385511, |
| "grad_norm": 4.940774440765381, |
| "learning_rate": 8.346379499722e-06, |
| "loss": 0.205, |
| "step": 2232 |
| }, |
| { |
| "epoch": 1.4489003880983182, |
| "grad_norm": 4.44492769241333, |
| "learning_rate": 8.333908084712163e-06, |
| "loss": 0.2241, |
| "step": 2240 |
| }, |
| { |
| "epoch": 1.4540750323415266, |
| "grad_norm": 2.2839596271514893, |
| "learning_rate": 8.321399211919893e-06, |
| "loss": 0.2245, |
| "step": 2248 |
| }, |
| { |
| "epoch": 1.4592496765847347, |
| "grad_norm": 22.686996459960938, |
| "learning_rate": 8.308853021887346e-06, |
| "loss": 0.2472, |
| "step": 2256 |
| }, |
| { |
| "epoch": 1.464424320827943, |
| "grad_norm": 3.752237319946289, |
| "learning_rate": 8.296269655575956e-06, |
| "loss": 0.2201, |
| "step": 2264 |
| }, |
| { |
| "epoch": 1.4695989650711514, |
| "grad_norm": 16.266639709472656, |
| "learning_rate": 8.283649254364843e-06, |
| "loss": 0.2298, |
| "step": 2272 |
| }, |
| { |
| "epoch": 1.4747736093143597, |
| "grad_norm": 1.5946576595306396, |
| "learning_rate": 8.270991960049231e-06, |
| "loss": 0.2144, |
| "step": 2280 |
| }, |
| { |
| "epoch": 1.4799482535575679, |
| "grad_norm": 38.729488372802734, |
| "learning_rate": 8.25829791483885e-06, |
| "loss": 0.2181, |
| "step": 2288 |
| }, |
| { |
| "epoch": 1.4851228978007762, |
| "grad_norm": 8.792683601379395, |
| "learning_rate": 8.245567261356347e-06, |
| "loss": 0.2204, |
| "step": 2296 |
| }, |
| { |
| "epoch": 1.4902975420439843, |
| "grad_norm": 3.90529203414917, |
| "learning_rate": 8.232800142635675e-06, |
| "loss": 0.2179, |
| "step": 2304 |
| }, |
| { |
| "epoch": 1.4954721862871927, |
| "grad_norm": 4.205716609954834, |
| "learning_rate": 8.219996702120482e-06, |
| "loss": 0.2391, |
| "step": 2312 |
| }, |
| { |
| "epoch": 1.500646830530401, |
| "grad_norm": 12.205682754516602, |
| "learning_rate": 8.207157083662516e-06, |
| "loss": 0.2365, |
| "step": 2320 |
| }, |
| { |
| "epoch": 1.5058214747736094, |
| "grad_norm": 3.6196138858795166, |
| "learning_rate": 8.19428143151999e-06, |
| "loss": 0.2246, |
| "step": 2328 |
| }, |
| { |
| "epoch": 1.5109961190168177, |
| "grad_norm": 3.0989346504211426, |
| "learning_rate": 8.181369890355975e-06, |
| "loss": 0.2266, |
| "step": 2336 |
| }, |
| { |
| "epoch": 1.5161707632600259, |
| "grad_norm": 3.95747447013855, |
| "learning_rate": 8.16842260523677e-06, |
| "loss": 0.2292, |
| "step": 2344 |
| }, |
| { |
| "epoch": 1.521345407503234, |
| "grad_norm": 4.447324752807617, |
| "learning_rate": 8.155439721630265e-06, |
| "loss": 0.2128, |
| "step": 2352 |
| }, |
| { |
| "epoch": 1.5265200517464423, |
| "grad_norm": 4.018327713012695, |
| "learning_rate": 8.14242138540432e-06, |
| "loss": 0.2098, |
| "step": 2360 |
| }, |
| { |
| "epoch": 1.5316946959896507, |
| "grad_norm": 6.639673709869385, |
| "learning_rate": 8.129367742825117e-06, |
| "loss": 0.2232, |
| "step": 2368 |
| }, |
| { |
| "epoch": 1.536869340232859, |
| "grad_norm": 17.369224548339844, |
| "learning_rate": 8.116278940555517e-06, |
| "loss": 0.2291, |
| "step": 2376 |
| }, |
| { |
| "epoch": 1.5420439844760674, |
| "grad_norm": 6.68892765045166, |
| "learning_rate": 8.103155125653419e-06, |
| "loss": 0.2425, |
| "step": 2384 |
| }, |
| { |
| "epoch": 1.5472186287192755, |
| "grad_norm": 34.08677673339844, |
| "learning_rate": 8.089996445570097e-06, |
| "loss": 0.2296, |
| "step": 2392 |
| }, |
| { |
| "epoch": 1.5523932729624839, |
| "grad_norm": 25.89212417602539, |
| "learning_rate": 8.076803048148553e-06, |
| "loss": 0.2526, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.557567917205692, |
| "grad_norm": 12.489855766296387, |
| "learning_rate": 8.06357508162185e-06, |
| "loss": 0.2218, |
| "step": 2408 |
| }, |
| { |
| "epoch": 1.5627425614489003, |
| "grad_norm": 1.4443012475967407, |
| "learning_rate": 8.050312694611451e-06, |
| "loss": 0.2239, |
| "step": 2416 |
| }, |
| { |
| "epoch": 1.5679172056921087, |
| "grad_norm": 9.047863960266113, |
| "learning_rate": 8.037016036125542e-06, |
| "loss": 0.2096, |
| "step": 2424 |
| }, |
| { |
| "epoch": 1.573091849935317, |
| "grad_norm": 8.109607696533203, |
| "learning_rate": 8.023685255557368e-06, |
| "loss": 0.2118, |
| "step": 2432 |
| }, |
| { |
| "epoch": 1.5782664941785254, |
| "grad_norm": 36.13426971435547, |
| "learning_rate": 8.010320502683549e-06, |
| "loss": 0.2083, |
| "step": 2440 |
| }, |
| { |
| "epoch": 1.5834411384217335, |
| "grad_norm": 17.517616271972656, |
| "learning_rate": 7.996921927662395e-06, |
| "loss": 0.2078, |
| "step": 2448 |
| }, |
| { |
| "epoch": 1.5886157826649416, |
| "grad_norm": 4.454436779022217, |
| "learning_rate": 7.983489681032219e-06, |
| "loss": 0.2428, |
| "step": 2456 |
| }, |
| { |
| "epoch": 1.59379042690815, |
| "grad_norm": 26.744815826416016, |
| "learning_rate": 7.970023913709652e-06, |
| "loss": 0.2263, |
| "step": 2464 |
| }, |
| { |
| "epoch": 1.5989650711513583, |
| "grad_norm": 2.163850784301758, |
| "learning_rate": 7.956524776987945e-06, |
| "loss": 0.2253, |
| "step": 2472 |
| }, |
| { |
| "epoch": 1.6041397153945667, |
| "grad_norm": 2.051708936691284, |
| "learning_rate": 7.94299242253526e-06, |
| "loss": 0.2352, |
| "step": 2480 |
| }, |
| { |
| "epoch": 1.609314359637775, |
| "grad_norm": 9.460783004760742, |
| "learning_rate": 7.929427002392981e-06, |
| "loss": 0.2407, |
| "step": 2488 |
| }, |
| { |
| "epoch": 1.6144890038809832, |
| "grad_norm": 13.439695358276367, |
| "learning_rate": 7.915828668973992e-06, |
| "loss": 0.2189, |
| "step": 2496 |
| }, |
| { |
| "epoch": 1.6196636481241915, |
| "grad_norm": 14.777132987976074, |
| "learning_rate": 7.902197575060978e-06, |
| "loss": 0.2232, |
| "step": 2504 |
| }, |
| { |
| "epoch": 1.6248382923673996, |
| "grad_norm": 36.123809814453125, |
| "learning_rate": 7.888533873804693e-06, |
| "loss": 0.2258, |
| "step": 2512 |
| }, |
| { |
| "epoch": 1.630012936610608, |
| "grad_norm": 1.7682701349258423, |
| "learning_rate": 7.874837718722254e-06, |
| "loss": 0.2339, |
| "step": 2520 |
| }, |
| { |
| "epoch": 1.6351875808538163, |
| "grad_norm": 13.471175193786621, |
| "learning_rate": 7.861109263695405e-06, |
| "loss": 0.2441, |
| "step": 2528 |
| }, |
| { |
| "epoch": 1.6403622250970247, |
| "grad_norm": 2.4435620307922363, |
| "learning_rate": 7.847348662968796e-06, |
| "loss": 0.2245, |
| "step": 2536 |
| }, |
| { |
| "epoch": 1.645536869340233, |
| "grad_norm": 2.9155242443084717, |
| "learning_rate": 7.833556071148245e-06, |
| "loss": 0.2229, |
| "step": 2544 |
| }, |
| { |
| "epoch": 1.6507115135834411, |
| "grad_norm": 1.8082088232040405, |
| "learning_rate": 7.819731643199006e-06, |
| "loss": 0.2273, |
| "step": 2552 |
| }, |
| { |
| "epoch": 1.6558861578266493, |
| "grad_norm": 1.26021409034729, |
| "learning_rate": 7.805875534444016e-06, |
| "loss": 0.2234, |
| "step": 2560 |
| }, |
| { |
| "epoch": 1.6610608020698576, |
| "grad_norm": 1.4038159847259521, |
| "learning_rate": 7.79198790056217e-06, |
| "loss": 0.2318, |
| "step": 2568 |
| }, |
| { |
| "epoch": 1.666235446313066, |
| "grad_norm": 217.4139862060547, |
| "learning_rate": 7.77806889758655e-06, |
| "loss": 0.2318, |
| "step": 2576 |
| }, |
| { |
| "epoch": 1.6714100905562743, |
| "grad_norm": 3.6848537921905518, |
| "learning_rate": 7.764118681902688e-06, |
| "loss": 0.2276, |
| "step": 2584 |
| }, |
| { |
| "epoch": 1.6765847347994827, |
| "grad_norm": 5.489861965179443, |
| "learning_rate": 7.750137410246803e-06, |
| "loss": 0.2255, |
| "step": 2592 |
| }, |
| { |
| "epoch": 1.6817593790426908, |
| "grad_norm": 3.472351312637329, |
| "learning_rate": 7.73612523970404e-06, |
| "loss": 0.2106, |
| "step": 2600 |
| }, |
| { |
| "epoch": 1.6869340232858991, |
| "grad_norm": 4.789738178253174, |
| "learning_rate": 7.722082327706701e-06, |
| "loss": 0.2432, |
| "step": 2608 |
| }, |
| { |
| "epoch": 1.6921086675291073, |
| "grad_norm": 1.9528974294662476, |
| "learning_rate": 7.708008832032485e-06, |
| "loss": 0.2263, |
| "step": 2616 |
| }, |
| { |
| "epoch": 1.6972833117723156, |
| "grad_norm": 21.182388305664062, |
| "learning_rate": 7.693904910802712e-06, |
| "loss": 0.2346, |
| "step": 2624 |
| }, |
| { |
| "epoch": 1.702457956015524, |
| "grad_norm": 12.879313468933105, |
| "learning_rate": 7.679770722480539e-06, |
| "loss": 0.2041, |
| "step": 2632 |
| }, |
| { |
| "epoch": 1.7076326002587323, |
| "grad_norm": 8.2993745803833, |
| "learning_rate": 7.665606425869194e-06, |
| "loss": 0.2193, |
| "step": 2640 |
| }, |
| { |
| "epoch": 1.7128072445019404, |
| "grad_norm": 8.905396461486816, |
| "learning_rate": 7.651412180110176e-06, |
| "loss": 0.2067, |
| "step": 2648 |
| }, |
| { |
| "epoch": 1.7179818887451488, |
| "grad_norm": 2.264564037322998, |
| "learning_rate": 7.637188144681478e-06, |
| "loss": 0.2225, |
| "step": 2656 |
| }, |
| { |
| "epoch": 1.723156532988357, |
| "grad_norm": 6.169471263885498, |
| "learning_rate": 7.622934479395792e-06, |
| "loss": 0.2128, |
| "step": 2664 |
| }, |
| { |
| "epoch": 1.7283311772315653, |
| "grad_norm": 13.35564136505127, |
| "learning_rate": 7.608651344398713e-06, |
| "loss": 0.2185, |
| "step": 2672 |
| }, |
| { |
| "epoch": 1.7335058214747736, |
| "grad_norm": 3.8094420433044434, |
| "learning_rate": 7.5943389001669395e-06, |
| "loss": 0.2038, |
| "step": 2680 |
| }, |
| { |
| "epoch": 1.738680465717982, |
| "grad_norm": 7.062588214874268, |
| "learning_rate": 7.579997307506472e-06, |
| "loss": 0.2247, |
| "step": 2688 |
| }, |
| { |
| "epoch": 1.7438551099611903, |
| "grad_norm": 4.3430962562561035, |
| "learning_rate": 7.565626727550804e-06, |
| "loss": 0.213, |
| "step": 2696 |
| }, |
| { |
| "epoch": 1.7490297542043984, |
| "grad_norm": 0.8228983283042908, |
| "learning_rate": 7.551227321759111e-06, |
| "loss": 0.2116, |
| "step": 2704 |
| }, |
| { |
| "epoch": 1.7542043984476066, |
| "grad_norm": 2.99155592918396, |
| "learning_rate": 7.536799251914442e-06, |
| "loss": 0.2295, |
| "step": 2712 |
| }, |
| { |
| "epoch": 1.759379042690815, |
| "grad_norm": 1.8452483415603638, |
| "learning_rate": 7.522342680121897e-06, |
| "loss": 0.2174, |
| "step": 2720 |
| }, |
| { |
| "epoch": 1.7645536869340233, |
| "grad_norm": 9.412191390991211, |
| "learning_rate": 7.507857768806803e-06, |
| "loss": 0.2125, |
| "step": 2728 |
| }, |
| { |
| "epoch": 1.7697283311772316, |
| "grad_norm": 1.4425644874572754, |
| "learning_rate": 7.4933446807129e-06, |
| "loss": 0.2283, |
| "step": 2736 |
| }, |
| { |
| "epoch": 1.77490297542044, |
| "grad_norm": 2.3930792808532715, |
| "learning_rate": 7.4788035789005e-06, |
| "loss": 0.2288, |
| "step": 2744 |
| }, |
| { |
| "epoch": 1.780077619663648, |
| "grad_norm": 8.610286712646484, |
| "learning_rate": 7.464234626744659e-06, |
| "loss": 0.2197, |
| "step": 2752 |
| }, |
| { |
| "epoch": 1.7852522639068564, |
| "grad_norm": 2.6517043113708496, |
| "learning_rate": 7.449637987933347e-06, |
| "loss": 0.2278, |
| "step": 2760 |
| }, |
| { |
| "epoch": 1.7904269081500646, |
| "grad_norm": 1.6406168937683105, |
| "learning_rate": 7.435013826465601e-06, |
| "loss": 0.2227, |
| "step": 2768 |
| }, |
| { |
| "epoch": 1.795601552393273, |
| "grad_norm": 11.188384056091309, |
| "learning_rate": 7.420362306649691e-06, |
| "loss": 0.2139, |
| "step": 2776 |
| }, |
| { |
| "epoch": 1.8007761966364813, |
| "grad_norm": 4.76533317565918, |
| "learning_rate": 7.405683593101263e-06, |
| "loss": 0.2279, |
| "step": 2784 |
| }, |
| { |
| "epoch": 1.8059508408796896, |
| "grad_norm": 3.7414357662200928, |
| "learning_rate": 7.390977850741498e-06, |
| "loss": 0.2098, |
| "step": 2792 |
| }, |
| { |
| "epoch": 1.811125485122898, |
| "grad_norm": 37.51686096191406, |
| "learning_rate": 7.376245244795255e-06, |
| "loss": 0.2204, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.816300129366106, |
| "grad_norm": 29.107175827026367, |
| "learning_rate": 7.361485940789221e-06, |
| "loss": 0.2254, |
| "step": 2808 |
| }, |
| { |
| "epoch": 1.8214747736093142, |
| "grad_norm": 51.822723388671875, |
| "learning_rate": 7.346700104550042e-06, |
| "loss": 0.2304, |
| "step": 2816 |
| }, |
| { |
| "epoch": 1.8266494178525226, |
| "grad_norm": 20.180925369262695, |
| "learning_rate": 7.331887902202463e-06, |
| "loss": 0.2262, |
| "step": 2824 |
| }, |
| { |
| "epoch": 1.831824062095731, |
| "grad_norm": 8.97240161895752, |
| "learning_rate": 7.317049500167466e-06, |
| "loss": 0.253, |
| "step": 2832 |
| }, |
| { |
| "epoch": 1.8369987063389392, |
| "grad_norm": 24.334049224853516, |
| "learning_rate": 7.3021850651603955e-06, |
| "loss": 0.219, |
| "step": 2840 |
| }, |
| { |
| "epoch": 1.8421733505821476, |
| "grad_norm": 14.938766479492188, |
| "learning_rate": 7.2872947641890854e-06, |
| "loss": 0.232, |
| "step": 2848 |
| }, |
| { |
| "epoch": 1.8473479948253557, |
| "grad_norm": 0.8988103270530701, |
| "learning_rate": 7.272378764551988e-06, |
| "loss": 0.213, |
| "step": 2856 |
| }, |
| { |
| "epoch": 1.852522639068564, |
| "grad_norm": 25.765361785888672, |
| "learning_rate": 7.257437233836285e-06, |
| "loss": 0.2185, |
| "step": 2864 |
| }, |
| { |
| "epoch": 1.8576972833117722, |
| "grad_norm": 21.85658836364746, |
| "learning_rate": 7.242470339916014e-06, |
| "loss": 0.2175, |
| "step": 2872 |
| }, |
| { |
| "epoch": 1.8628719275549805, |
| "grad_norm": 4.687010288238525, |
| "learning_rate": 7.227478250950178e-06, |
| "loss": 0.2221, |
| "step": 2880 |
| }, |
| { |
| "epoch": 1.868046571798189, |
| "grad_norm": 1.0599603652954102, |
| "learning_rate": 7.212461135380855e-06, |
| "loss": 0.214, |
| "step": 2888 |
| }, |
| { |
| "epoch": 1.8732212160413972, |
| "grad_norm": 8.412078857421875, |
| "learning_rate": 7.197419161931305e-06, |
| "loss": 0.2103, |
| "step": 2896 |
| }, |
| { |
| "epoch": 1.8783958602846056, |
| "grad_norm": 3.0134875774383545, |
| "learning_rate": 7.182352499604081e-06, |
| "loss": 0.2114, |
| "step": 2904 |
| }, |
| { |
| "epoch": 1.8835705045278137, |
| "grad_norm": 0.7685543298721313, |
| "learning_rate": 7.167261317679121e-06, |
| "loss": 0.2036, |
| "step": 2912 |
| }, |
| { |
| "epoch": 1.8887451487710218, |
| "grad_norm": 2.3020284175872803, |
| "learning_rate": 7.1521457857118525e-06, |
| "loss": 0.2265, |
| "step": 2920 |
| }, |
| { |
| "epoch": 1.8939197930142302, |
| "grad_norm": 18.331098556518555, |
| "learning_rate": 7.137006073531285e-06, |
| "loss": 0.2318, |
| "step": 2928 |
| }, |
| { |
| "epoch": 1.8990944372574385, |
| "grad_norm": 6.871685981750488, |
| "learning_rate": 7.121842351238102e-06, |
| "loss": 0.1977, |
| "step": 2936 |
| }, |
| { |
| "epoch": 1.9042690815006469, |
| "grad_norm": 8.529058456420898, |
| "learning_rate": 7.106654789202751e-06, |
| "loss": 0.1992, |
| "step": 2944 |
| }, |
| { |
| "epoch": 1.9094437257438552, |
| "grad_norm": 11.424173355102539, |
| "learning_rate": 7.0914435580635286e-06, |
| "loss": 0.2185, |
| "step": 2952 |
| }, |
| { |
| "epoch": 1.9146183699870634, |
| "grad_norm": 5.662269592285156, |
| "learning_rate": 7.076208828724661e-06, |
| "loss": 0.217, |
| "step": 2960 |
| }, |
| { |
| "epoch": 1.9197930142302717, |
| "grad_norm": 17.576711654663086, |
| "learning_rate": 7.060950772354389e-06, |
| "loss": 0.2251, |
| "step": 2968 |
| }, |
| { |
| "epoch": 1.9249676584734798, |
| "grad_norm": 65.82316589355469, |
| "learning_rate": 7.045669560383039e-06, |
| "loss": 0.2131, |
| "step": 2976 |
| }, |
| { |
| "epoch": 1.9301423027166882, |
| "grad_norm": 5.220096588134766, |
| "learning_rate": 7.030365364501104e-06, |
| "loss": 0.2263, |
| "step": 2984 |
| }, |
| { |
| "epoch": 1.9353169469598965, |
| "grad_norm": 13.813321113586426, |
| "learning_rate": 7.015038356657303e-06, |
| "loss": 0.2229, |
| "step": 2992 |
| }, |
| { |
| "epoch": 1.9404915912031049, |
| "grad_norm": 3.5180106163024902, |
| "learning_rate": 6.9996887090566645e-06, |
| "loss": 0.2055, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.9456662354463132, |
| "grad_norm": 13.823914527893066, |
| "learning_rate": 6.98431659415858e-06, |
| "loss": 0.2298, |
| "step": 3008 |
| }, |
| { |
| "epoch": 1.9508408796895214, |
| "grad_norm": 9.340493202209473, |
| "learning_rate": 6.968922184674868e-06, |
| "loss": 0.21, |
| "step": 3016 |
| }, |
| { |
| "epoch": 1.9560155239327295, |
| "grad_norm": 8.113641738891602, |
| "learning_rate": 6.95350565356784e-06, |
| "loss": 0.2215, |
| "step": 3024 |
| }, |
| { |
| "epoch": 1.9611901681759378, |
| "grad_norm": 4.113650321960449, |
| "learning_rate": 6.93806717404835e-06, |
| "loss": 0.2176, |
| "step": 3032 |
| }, |
| { |
| "epoch": 1.9663648124191462, |
| "grad_norm": 44.47676086425781, |
| "learning_rate": 6.922606919573851e-06, |
| "loss": 0.2188, |
| "step": 3040 |
| }, |
| { |
| "epoch": 1.9715394566623545, |
| "grad_norm": 4.712839126586914, |
| "learning_rate": 6.907125063846447e-06, |
| "loss": 0.2101, |
| "step": 3048 |
| }, |
| { |
| "epoch": 1.9767141009055629, |
| "grad_norm": 13.299867630004883, |
| "learning_rate": 6.891621780810941e-06, |
| "loss": 0.2207, |
| "step": 3056 |
| }, |
| { |
| "epoch": 1.981888745148771, |
| "grad_norm": 2.58492112159729, |
| "learning_rate": 6.876097244652879e-06, |
| "loss": 0.2233, |
| "step": 3064 |
| }, |
| { |
| "epoch": 1.9870633893919794, |
| "grad_norm": 4.740981101989746, |
| "learning_rate": 6.860551629796597e-06, |
| "loss": 0.2386, |
| "step": 3072 |
| }, |
| { |
| "epoch": 1.9922380336351875, |
| "grad_norm": 3.3356964588165283, |
| "learning_rate": 6.844985110903255e-06, |
| "loss": 0.208, |
| "step": 3080 |
| }, |
| { |
| "epoch": 1.9974126778783958, |
| "grad_norm": 11.826971054077148, |
| "learning_rate": 6.829397862868878e-06, |
| "loss": 0.2142, |
| "step": 3088 |
| }, |
| { |
| "epoch": 2.002587322121604, |
| "grad_norm": 5.242908477783203, |
| "learning_rate": 6.8137900608223985e-06, |
| "loss": 0.2276, |
| "step": 3096 |
| }, |
| { |
| "epoch": 2.0077619663648125, |
| "grad_norm": 7.4030327796936035, |
| "learning_rate": 6.798161880123671e-06, |
| "loss": 0.2199, |
| "step": 3104 |
| }, |
| { |
| "epoch": 2.012936610608021, |
| "grad_norm": 1.0129386186599731, |
| "learning_rate": 6.78251349636152e-06, |
| "loss": 0.2151, |
| "step": 3112 |
| }, |
| { |
| "epoch": 2.0181112548512288, |
| "grad_norm": 12.936131477355957, |
| "learning_rate": 6.766845085351755e-06, |
| "loss": 0.2103, |
| "step": 3120 |
| }, |
| { |
| "epoch": 2.023285899094437, |
| "grad_norm": 3.0860114097595215, |
| "learning_rate": 6.751156823135203e-06, |
| "loss": 0.2312, |
| "step": 3128 |
| }, |
| { |
| "epoch": 2.0284605433376455, |
| "grad_norm": 10.499250411987305, |
| "learning_rate": 6.735448885975724e-06, |
| "loss": 0.2236, |
| "step": 3136 |
| }, |
| { |
| "epoch": 2.033635187580854, |
| "grad_norm": 1.9619132280349731, |
| "learning_rate": 6.7197214503582355e-06, |
| "loss": 0.2222, |
| "step": 3144 |
| }, |
| { |
| "epoch": 2.038809831824062, |
| "grad_norm": 5.836687088012695, |
| "learning_rate": 6.703974692986729e-06, |
| "loss": 0.2057, |
| "step": 3152 |
| }, |
| { |
| "epoch": 2.0439844760672705, |
| "grad_norm": 1.3874237537384033, |
| "learning_rate": 6.68820879078228e-06, |
| "loss": 0.2332, |
| "step": 3160 |
| }, |
| { |
| "epoch": 2.049159120310479, |
| "grad_norm": 1.146099328994751, |
| "learning_rate": 6.672423920881068e-06, |
| "loss": 0.2266, |
| "step": 3168 |
| }, |
| { |
| "epoch": 2.0543337645536868, |
| "grad_norm": 3.993424654006958, |
| "learning_rate": 6.6566202606323806e-06, |
| "loss": 0.2172, |
| "step": 3176 |
| }, |
| { |
| "epoch": 2.059508408796895, |
| "grad_norm": 57.19075393676758, |
| "learning_rate": 6.640797987596621e-06, |
| "loss": 0.2056, |
| "step": 3184 |
| }, |
| { |
| "epoch": 2.0646830530401035, |
| "grad_norm": 2.535454511642456, |
| "learning_rate": 6.6249572795433155e-06, |
| "loss": 0.2082, |
| "step": 3192 |
| }, |
| { |
| "epoch": 2.069857697283312, |
| "grad_norm": 2.062868595123291, |
| "learning_rate": 6.609098314449116e-06, |
| "loss": 0.2182, |
| "step": 3200 |
| }, |
| { |
| "epoch": 2.07503234152652, |
| "grad_norm": 0.7799398303031921, |
| "learning_rate": 6.593221270495797e-06, |
| "loss": 0.2168, |
| "step": 3208 |
| }, |
| { |
| "epoch": 2.0802069857697285, |
| "grad_norm": 57.6228141784668, |
| "learning_rate": 6.5773263260682595e-06, |
| "loss": 0.2344, |
| "step": 3216 |
| }, |
| { |
| "epoch": 2.0853816300129364, |
| "grad_norm": 1.8435810804367065, |
| "learning_rate": 6.561413659752521e-06, |
| "loss": 0.2159, |
| "step": 3224 |
| }, |
| { |
| "epoch": 2.0905562742561448, |
| "grad_norm": 4.932186126708984, |
| "learning_rate": 6.545483450333712e-06, |
| "loss": 0.214, |
| "step": 3232 |
| }, |
| { |
| "epoch": 2.095730918499353, |
| "grad_norm": 2.2944324016571045, |
| "learning_rate": 6.529535876794069e-06, |
| "loss": 0.2212, |
| "step": 3240 |
| }, |
| { |
| "epoch": 2.1009055627425615, |
| "grad_norm": 2.7697372436523438, |
| "learning_rate": 6.5135711183109156e-06, |
| "loss": 0.2193, |
| "step": 3248 |
| }, |
| { |
| "epoch": 2.10608020698577, |
| "grad_norm": 2.9319217205047607, |
| "learning_rate": 6.497589354254662e-06, |
| "loss": 0.2292, |
| "step": 3256 |
| }, |
| { |
| "epoch": 2.111254851228978, |
| "grad_norm": 4.4201154708862305, |
| "learning_rate": 6.481590764186778e-06, |
| "loss": 0.2141, |
| "step": 3264 |
| }, |
| { |
| "epoch": 2.116429495472186, |
| "grad_norm": 14.810309410095215, |
| "learning_rate": 6.465575527857781e-06, |
| "loss": 0.1982, |
| "step": 3272 |
| }, |
| { |
| "epoch": 2.1216041397153944, |
| "grad_norm": 3.787808656692505, |
| "learning_rate": 6.44954382520522e-06, |
| "loss": 0.2116, |
| "step": 3280 |
| }, |
| { |
| "epoch": 2.1267787839586028, |
| "grad_norm": 7.30560827255249, |
| "learning_rate": 6.433495836351643e-06, |
| "loss": 0.2088, |
| "step": 3288 |
| }, |
| { |
| "epoch": 2.131953428201811, |
| "grad_norm": 1.6610970497131348, |
| "learning_rate": 6.417431741602585e-06, |
| "loss": 0.2189, |
| "step": 3296 |
| }, |
| { |
| "epoch": 2.1371280724450195, |
| "grad_norm": 3.0157978534698486, |
| "learning_rate": 6.401351721444533e-06, |
| "loss": 0.2197, |
| "step": 3304 |
| }, |
| { |
| "epoch": 2.142302716688228, |
| "grad_norm": 29.640609741210938, |
| "learning_rate": 6.385255956542907e-06, |
| "loss": 0.2209, |
| "step": 3312 |
| }, |
| { |
| "epoch": 2.147477360931436, |
| "grad_norm": 1.1481863260269165, |
| "learning_rate": 6.369144627740023e-06, |
| "loss": 0.2099, |
| "step": 3320 |
| }, |
| { |
| "epoch": 2.152652005174644, |
| "grad_norm": 4.599576473236084, |
| "learning_rate": 6.353017916053063e-06, |
| "loss": 0.2159, |
| "step": 3328 |
| }, |
| { |
| "epoch": 2.1578266494178524, |
| "grad_norm": 4.746689319610596, |
| "learning_rate": 6.336876002672042e-06, |
| "loss": 0.2289, |
| "step": 3336 |
| }, |
| { |
| "epoch": 2.1630012936610608, |
| "grad_norm": 14.463395118713379, |
| "learning_rate": 6.3207190689577745e-06, |
| "loss": 0.219, |
| "step": 3344 |
| }, |
| { |
| "epoch": 2.168175937904269, |
| "grad_norm": 7.709287166595459, |
| "learning_rate": 6.304547296439831e-06, |
| "loss": 0.2419, |
| "step": 3352 |
| }, |
| { |
| "epoch": 2.1733505821474774, |
| "grad_norm": 4.06973934173584, |
| "learning_rate": 6.288360866814504e-06, |
| "loss": 0.2434, |
| "step": 3360 |
| }, |
| { |
| "epoch": 2.178525226390686, |
| "grad_norm": 2.9332635402679443, |
| "learning_rate": 6.272159961942764e-06, |
| "loss": 0.2202, |
| "step": 3368 |
| }, |
| { |
| "epoch": 2.1836998706338937, |
| "grad_norm": 7.125377655029297, |
| "learning_rate": 6.255944763848215e-06, |
| "loss": 0.2049, |
| "step": 3376 |
| }, |
| { |
| "epoch": 2.188874514877102, |
| "grad_norm": 2.0361697673797607, |
| "learning_rate": 6.239715454715054e-06, |
| "loss": 0.2237, |
| "step": 3384 |
| }, |
| { |
| "epoch": 2.1940491591203104, |
| "grad_norm": 25.10670280456543, |
| "learning_rate": 6.223472216886021e-06, |
| "loss": 0.2088, |
| "step": 3392 |
| }, |
| { |
| "epoch": 2.1992238033635187, |
| "grad_norm": 16.464994430541992, |
| "learning_rate": 6.2072152328603464e-06, |
| "loss": 0.2224, |
| "step": 3400 |
| }, |
| { |
| "epoch": 2.204398447606727, |
| "grad_norm": 3.6457226276397705, |
| "learning_rate": 6.190944685291708e-06, |
| "loss": 0.2081, |
| "step": 3408 |
| }, |
| { |
| "epoch": 2.2095730918499354, |
| "grad_norm": 1.3341186046600342, |
| "learning_rate": 6.174660756986175e-06, |
| "loss": 0.211, |
| "step": 3416 |
| }, |
| { |
| "epoch": 2.214747736093144, |
| "grad_norm": 14.349130630493164, |
| "learning_rate": 6.158363630900155e-06, |
| "loss": 0.225, |
| "step": 3424 |
| }, |
| { |
| "epoch": 2.2199223803363517, |
| "grad_norm": 13.548636436462402, |
| "learning_rate": 6.142053490138335e-06, |
| "loss": 0.2251, |
| "step": 3432 |
| }, |
| { |
| "epoch": 2.22509702457956, |
| "grad_norm": 8.892589569091797, |
| "learning_rate": 6.1257305179516315e-06, |
| "loss": 0.2518, |
| "step": 3440 |
| }, |
| { |
| "epoch": 2.2302716688227684, |
| "grad_norm": 3.8122220039367676, |
| "learning_rate": 6.109394897735121e-06, |
| "loss": 0.2342, |
| "step": 3448 |
| }, |
| { |
| "epoch": 2.2354463130659767, |
| "grad_norm": 6.099566459655762, |
| "learning_rate": 6.093046813025995e-06, |
| "loss": 0.2175, |
| "step": 3456 |
| }, |
| { |
| "epoch": 2.240620957309185, |
| "grad_norm": 1.4928990602493286, |
| "learning_rate": 6.0766864475014785e-06, |
| "loss": 0.2383, |
| "step": 3464 |
| }, |
| { |
| "epoch": 2.2457956015523934, |
| "grad_norm": 11.29719352722168, |
| "learning_rate": 6.060313984976783e-06, |
| "loss": 0.2389, |
| "step": 3472 |
| }, |
| { |
| "epoch": 2.2509702457956013, |
| "grad_norm": 2.987386465072632, |
| "learning_rate": 6.043929609403032e-06, |
| "loss": 0.2113, |
| "step": 3480 |
| }, |
| { |
| "epoch": 2.2561448900388097, |
| "grad_norm": 2.351633310317993, |
| "learning_rate": 6.027533504865196e-06, |
| "loss": 0.2235, |
| "step": 3488 |
| }, |
| { |
| "epoch": 2.261319534282018, |
| "grad_norm": 5.436527729034424, |
| "learning_rate": 6.011125855580026e-06, |
| "loss": 0.2204, |
| "step": 3496 |
| }, |
| { |
| "epoch": 2.2664941785252264, |
| "grad_norm": 1.6124496459960938, |
| "learning_rate": 5.994706845893986e-06, |
| "loss": 0.2247, |
| "step": 3504 |
| }, |
| { |
| "epoch": 2.2716688227684347, |
| "grad_norm": 8.689626693725586, |
| "learning_rate": 5.978276660281174e-06, |
| "loss": 0.2345, |
| "step": 3512 |
| }, |
| { |
| "epoch": 2.276843467011643, |
| "grad_norm": 29.871366500854492, |
| "learning_rate": 5.961835483341255e-06, |
| "loss": 0.2154, |
| "step": 3520 |
| }, |
| { |
| "epoch": 2.2820181112548514, |
| "grad_norm": 11.956281661987305, |
| "learning_rate": 5.945383499797388e-06, |
| "loss": 0.2351, |
| "step": 3528 |
| }, |
| { |
| "epoch": 2.2871927554980593, |
| "grad_norm": 1.748079538345337, |
| "learning_rate": 5.928920894494147e-06, |
| "loss": 0.2083, |
| "step": 3536 |
| }, |
| { |
| "epoch": 2.2923673997412677, |
| "grad_norm": 22.06855010986328, |
| "learning_rate": 5.912447852395444e-06, |
| "loss": 0.2149, |
| "step": 3544 |
| }, |
| { |
| "epoch": 2.297542043984476, |
| "grad_norm": 7.1412529945373535, |
| "learning_rate": 5.8959645585824575e-06, |
| "loss": 0.2176, |
| "step": 3552 |
| }, |
| { |
| "epoch": 2.3027166882276844, |
| "grad_norm": 11.964238166809082, |
| "learning_rate": 5.879471198251544e-06, |
| "loss": 0.2235, |
| "step": 3560 |
| }, |
| { |
| "epoch": 2.3078913324708927, |
| "grad_norm": 4.59617280960083, |
| "learning_rate": 5.86296795671216e-06, |
| "loss": 0.2066, |
| "step": 3568 |
| }, |
| { |
| "epoch": 2.313065976714101, |
| "grad_norm": 27.847808837890625, |
| "learning_rate": 5.846455019384787e-06, |
| "loss": 0.2031, |
| "step": 3576 |
| }, |
| { |
| "epoch": 2.318240620957309, |
| "grad_norm": 1.6919810771942139, |
| "learning_rate": 5.8299325717988355e-06, |
| "loss": 0.2163, |
| "step": 3584 |
| }, |
| { |
| "epoch": 2.3234152652005173, |
| "grad_norm": 2.3351593017578125, |
| "learning_rate": 5.813400799590573e-06, |
| "loss": 0.2211, |
| "step": 3592 |
| }, |
| { |
| "epoch": 2.3285899094437257, |
| "grad_norm": 23.962520599365234, |
| "learning_rate": 5.7968598885010315e-06, |
| "loss": 0.2116, |
| "step": 3600 |
| }, |
| { |
| "epoch": 2.333764553686934, |
| "grad_norm": 19.117528915405273, |
| "learning_rate": 5.780310024373923e-06, |
| "loss": 0.2227, |
| "step": 3608 |
| }, |
| { |
| "epoch": 2.3389391979301424, |
| "grad_norm": 48.07025146484375, |
| "learning_rate": 5.763751393153545e-06, |
| "loss": 0.2183, |
| "step": 3616 |
| }, |
| { |
| "epoch": 2.3441138421733507, |
| "grad_norm": 1.309008240699768, |
| "learning_rate": 5.747184180882704e-06, |
| "loss": 0.2098, |
| "step": 3624 |
| }, |
| { |
| "epoch": 2.349288486416559, |
| "grad_norm": 2.3568334579467773, |
| "learning_rate": 5.730608573700613e-06, |
| "loss": 0.2062, |
| "step": 3632 |
| }, |
| { |
| "epoch": 2.354463130659767, |
| "grad_norm": 1.478174090385437, |
| "learning_rate": 5.714024757840806e-06, |
| "loss": 0.2175, |
| "step": 3640 |
| }, |
| { |
| "epoch": 2.3596377749029753, |
| "grad_norm": 4.405735492706299, |
| "learning_rate": 5.697432919629048e-06, |
| "loss": 0.2204, |
| "step": 3648 |
| }, |
| { |
| "epoch": 2.3648124191461837, |
| "grad_norm": 1.8726969957351685, |
| "learning_rate": 5.680833245481234e-06, |
| "loss": 0.2205, |
| "step": 3656 |
| }, |
| { |
| "epoch": 2.369987063389392, |
| "grad_norm": 2.596064805984497, |
| "learning_rate": 5.664225921901302e-06, |
| "loss": 0.2197, |
| "step": 3664 |
| }, |
| { |
| "epoch": 2.3751617076326004, |
| "grad_norm": 3.998558282852173, |
| "learning_rate": 5.647611135479133e-06, |
| "loss": 0.2132, |
| "step": 3672 |
| }, |
| { |
| "epoch": 2.3803363518758087, |
| "grad_norm": 23.586936950683594, |
| "learning_rate": 5.6309890728884555e-06, |
| "loss": 0.2174, |
| "step": 3680 |
| }, |
| { |
| "epoch": 2.3855109961190166, |
| "grad_norm": 41.73709487915039, |
| "learning_rate": 5.614359920884751e-06, |
| "loss": 0.2214, |
| "step": 3688 |
| }, |
| { |
| "epoch": 2.390685640362225, |
| "grad_norm": 25.809192657470703, |
| "learning_rate": 5.5977238663031495e-06, |
| "loss": 0.2193, |
| "step": 3696 |
| }, |
| { |
| "epoch": 2.3958602846054333, |
| "grad_norm": 2.3301281929016113, |
| "learning_rate": 5.581081096056337e-06, |
| "loss": 0.2192, |
| "step": 3704 |
| }, |
| { |
| "epoch": 2.4010349288486417, |
| "grad_norm": 2.47526478767395, |
| "learning_rate": 5.564431797132454e-06, |
| "loss": 0.2042, |
| "step": 3712 |
| }, |
| { |
| "epoch": 2.40620957309185, |
| "grad_norm": 24.38184928894043, |
| "learning_rate": 5.547776156592989e-06, |
| "loss": 0.2235, |
| "step": 3720 |
| }, |
| { |
| "epoch": 2.4113842173350584, |
| "grad_norm": 1.7953455448150635, |
| "learning_rate": 5.531114361570684e-06, |
| "loss": 0.231, |
| "step": 3728 |
| }, |
| { |
| "epoch": 2.4165588615782667, |
| "grad_norm": 2.250443935394287, |
| "learning_rate": 5.514446599267429e-06, |
| "loss": 0.2206, |
| "step": 3736 |
| }, |
| { |
| "epoch": 2.4217335058214746, |
| "grad_norm": 3.726274251937866, |
| "learning_rate": 5.497773056952159e-06, |
| "loss": 0.2133, |
| "step": 3744 |
| }, |
| { |
| "epoch": 2.426908150064683, |
| "grad_norm": 1.7468153238296509, |
| "learning_rate": 5.481093921958749e-06, |
| "loss": 0.2299, |
| "step": 3752 |
| }, |
| { |
| "epoch": 2.4320827943078913, |
| "grad_norm": 15.300987243652344, |
| "learning_rate": 5.4644093816839086e-06, |
| "loss": 0.2238, |
| "step": 3760 |
| }, |
| { |
| "epoch": 2.4372574385510997, |
| "grad_norm": 1.6782792806625366, |
| "learning_rate": 5.44771962358508e-06, |
| "loss": 0.2315, |
| "step": 3768 |
| }, |
| { |
| "epoch": 2.442432082794308, |
| "grad_norm": 18.028644561767578, |
| "learning_rate": 5.4310248351783264e-06, |
| "loss": 0.2366, |
| "step": 3776 |
| }, |
| { |
| "epoch": 2.4476067270375164, |
| "grad_norm": 4.083730220794678, |
| "learning_rate": 5.414325204036237e-06, |
| "loss": 0.207, |
| "step": 3784 |
| }, |
| { |
| "epoch": 2.4527813712807243, |
| "grad_norm": 1.102040410041809, |
| "learning_rate": 5.397620917785799e-06, |
| "loss": 0.2198, |
| "step": 3792 |
| }, |
| { |
| "epoch": 2.4579560155239326, |
| "grad_norm": 13.378700256347656, |
| "learning_rate": 5.380912164106312e-06, |
| "loss": 0.2193, |
| "step": 3800 |
| }, |
| { |
| "epoch": 2.463130659767141, |
| "grad_norm": 1.2787953615188599, |
| "learning_rate": 5.364199130727262e-06, |
| "loss": 0.2146, |
| "step": 3808 |
| }, |
| { |
| "epoch": 2.4683053040103493, |
| "grad_norm": 5.001540184020996, |
| "learning_rate": 5.347482005426224e-06, |
| "loss": 0.2128, |
| "step": 3816 |
| }, |
| { |
| "epoch": 2.4734799482535577, |
| "grad_norm": 32.12523651123047, |
| "learning_rate": 5.330760976026744e-06, |
| "loss": 0.2146, |
| "step": 3824 |
| }, |
| { |
| "epoch": 2.478654592496766, |
| "grad_norm": 2.327051877975464, |
| "learning_rate": 5.314036230396233e-06, |
| "loss": 0.2224, |
| "step": 3832 |
| }, |
| { |
| "epoch": 2.4838292367399744, |
| "grad_norm": 26.68153953552246, |
| "learning_rate": 5.297307956443856e-06, |
| "loss": 0.2238, |
| "step": 3840 |
| }, |
| { |
| "epoch": 2.4890038809831823, |
| "grad_norm": 2.472919225692749, |
| "learning_rate": 5.28057634211842e-06, |
| "loss": 0.2116, |
| "step": 3848 |
| }, |
| { |
| "epoch": 2.4941785252263906, |
| "grad_norm": 90.6447982788086, |
| "learning_rate": 5.2638415754062625e-06, |
| "loss": 0.2207, |
| "step": 3856 |
| }, |
| { |
| "epoch": 2.499353169469599, |
| "grad_norm": 1.0954121351242065, |
| "learning_rate": 5.247103844329137e-06, |
| "loss": 0.2277, |
| "step": 3864 |
| }, |
| { |
| "epoch": 2.5045278137128073, |
| "grad_norm": 1.563537836074829, |
| "learning_rate": 5.230363336942105e-06, |
| "loss": 0.2093, |
| "step": 3872 |
| }, |
| { |
| "epoch": 2.5097024579560157, |
| "grad_norm": 42.30322265625, |
| "learning_rate": 5.213620241331424e-06, |
| "loss": 0.2162, |
| "step": 3880 |
| }, |
| { |
| "epoch": 2.5148771021992236, |
| "grad_norm": 1.3426226377487183, |
| "learning_rate": 5.196874745612425e-06, |
| "loss": 0.2232, |
| "step": 3888 |
| }, |
| { |
| "epoch": 2.520051746442432, |
| "grad_norm": 9.39691162109375, |
| "learning_rate": 5.180127037927408e-06, |
| "loss": 0.2242, |
| "step": 3896 |
| }, |
| { |
| "epoch": 2.5252263906856403, |
| "grad_norm": 4.963808536529541, |
| "learning_rate": 5.163377306443527e-06, |
| "loss": 0.2156, |
| "step": 3904 |
| }, |
| { |
| "epoch": 2.5304010349288486, |
| "grad_norm": 5.381854057312012, |
| "learning_rate": 5.146625739350671e-06, |
| "loss": 0.2267, |
| "step": 3912 |
| }, |
| { |
| "epoch": 2.535575679172057, |
| "grad_norm": 11.522015571594238, |
| "learning_rate": 5.129872524859356e-06, |
| "loss": 0.2337, |
| "step": 3920 |
| }, |
| { |
| "epoch": 2.5407503234152653, |
| "grad_norm": 1.7935131788253784, |
| "learning_rate": 5.1131178511986045e-06, |
| "loss": 0.2213, |
| "step": 3928 |
| }, |
| { |
| "epoch": 2.5459249676584736, |
| "grad_norm": 3.484163284301758, |
| "learning_rate": 5.096361906613836e-06, |
| "loss": 0.2215, |
| "step": 3936 |
| }, |
| { |
| "epoch": 2.551099611901682, |
| "grad_norm": 1.6095975637435913, |
| "learning_rate": 5.079604879364746e-06, |
| "loss": 0.2164, |
| "step": 3944 |
| }, |
| { |
| "epoch": 2.55627425614489, |
| "grad_norm": 1.0419116020202637, |
| "learning_rate": 5.062846957723194e-06, |
| "loss": 0.2071, |
| "step": 3952 |
| }, |
| { |
| "epoch": 2.5614489003880982, |
| "grad_norm": 2.196648359298706, |
| "learning_rate": 5.046088329971095e-06, |
| "loss": 0.2071, |
| "step": 3960 |
| }, |
| { |
| "epoch": 2.5666235446313066, |
| "grad_norm": 11.791790962219238, |
| "learning_rate": 5.0293291843982896e-06, |
| "loss": 0.2325, |
| "step": 3968 |
| }, |
| { |
| "epoch": 2.571798188874515, |
| "grad_norm": 3.8279497623443604, |
| "learning_rate": 5.012569709300441e-06, |
| "loss": 0.2219, |
| "step": 3976 |
| }, |
| { |
| "epoch": 2.5769728331177233, |
| "grad_norm": 6.66563081741333, |
| "learning_rate": 4.995810092976912e-06, |
| "loss": 0.2211, |
| "step": 3984 |
| }, |
| { |
| "epoch": 2.582147477360931, |
| "grad_norm": 2.60664701461792, |
| "learning_rate": 4.979050523728654e-06, |
| "loss": 0.2128, |
| "step": 3992 |
| }, |
| { |
| "epoch": 2.5873221216041395, |
| "grad_norm": 7.721459865570068, |
| "learning_rate": 4.962291189856089e-06, |
| "loss": 0.2089, |
| "step": 4000 |
| }, |
| { |
| "epoch": 2.592496765847348, |
| "grad_norm": 4.121895790100098, |
| "learning_rate": 4.945532279656993e-06, |
| "loss": 0.2183, |
| "step": 4008 |
| }, |
| { |
| "epoch": 2.5976714100905562, |
| "grad_norm": 4.680197238922119, |
| "learning_rate": 4.9287739814243835e-06, |
| "loss": 0.2173, |
| "step": 4016 |
| }, |
| { |
| "epoch": 2.6028460543337646, |
| "grad_norm": 3.2336695194244385, |
| "learning_rate": 4.912016483444403e-06, |
| "loss": 0.2069, |
| "step": 4024 |
| }, |
| { |
| "epoch": 2.608020698576973, |
| "grad_norm": 5.2549309730529785, |
| "learning_rate": 4.8952599739942015e-06, |
| "loss": 0.2342, |
| "step": 4032 |
| }, |
| { |
| "epoch": 2.6131953428201813, |
| "grad_norm": 1.6960891485214233, |
| "learning_rate": 4.878504641339822e-06, |
| "loss": 0.2158, |
| "step": 4040 |
| }, |
| { |
| "epoch": 2.6183699870633896, |
| "grad_norm": 11.856524467468262, |
| "learning_rate": 4.861750673734085e-06, |
| "loss": 0.2135, |
| "step": 4048 |
| }, |
| { |
| "epoch": 2.6235446313065975, |
| "grad_norm": 25.278499603271484, |
| "learning_rate": 4.8449982594144786e-06, |
| "loss": 0.2054, |
| "step": 4056 |
| }, |
| { |
| "epoch": 2.628719275549806, |
| "grad_norm": 9.263405799865723, |
| "learning_rate": 4.828247586601035e-06, |
| "loss": 0.2099, |
| "step": 4064 |
| }, |
| { |
| "epoch": 2.6338939197930142, |
| "grad_norm": 5.896005153656006, |
| "learning_rate": 4.811498843494222e-06, |
| "loss": 0.207, |
| "step": 4072 |
| }, |
| { |
| "epoch": 2.6390685640362226, |
| "grad_norm": 1.7929736375808716, |
| "learning_rate": 4.794752218272824e-06, |
| "loss": 0.2267, |
| "step": 4080 |
| }, |
| { |
| "epoch": 2.644243208279431, |
| "grad_norm": 1.0446430444717407, |
| "learning_rate": 4.7780078990918326e-06, |
| "loss": 0.2206, |
| "step": 4088 |
| }, |
| { |
| "epoch": 2.649417852522639, |
| "grad_norm": 5.339548110961914, |
| "learning_rate": 4.761266074080326e-06, |
| "loss": 0.2187, |
| "step": 4096 |
| }, |
| { |
| "epoch": 2.654592496765847, |
| "grad_norm": 14.899389266967773, |
| "learning_rate": 4.744526931339367e-06, |
| "loss": 0.207, |
| "step": 4104 |
| }, |
| { |
| "epoch": 2.6597671410090555, |
| "grad_norm": 1.1871739625930786, |
| "learning_rate": 4.727790658939875e-06, |
| "loss": 0.2211, |
| "step": 4112 |
| }, |
| { |
| "epoch": 2.664941785252264, |
| "grad_norm": 1.4944238662719727, |
| "learning_rate": 4.711057444920522e-06, |
| "loss": 0.2206, |
| "step": 4120 |
| }, |
| { |
| "epoch": 2.6701164294954722, |
| "grad_norm": 62.428558349609375, |
| "learning_rate": 4.694327477285619e-06, |
| "loss": 0.2163, |
| "step": 4128 |
| }, |
| { |
| "epoch": 2.6752910737386806, |
| "grad_norm": 9.660483360290527, |
| "learning_rate": 4.6776009440030035e-06, |
| "loss": 0.2123, |
| "step": 4136 |
| }, |
| { |
| "epoch": 2.680465717981889, |
| "grad_norm": 7.151432991027832, |
| "learning_rate": 4.660878033001922e-06, |
| "loss": 0.2163, |
| "step": 4144 |
| }, |
| { |
| "epoch": 2.6856403622250973, |
| "grad_norm": 8.276389122009277, |
| "learning_rate": 4.644158932170929e-06, |
| "loss": 0.2239, |
| "step": 4152 |
| }, |
| { |
| "epoch": 2.690815006468305, |
| "grad_norm": 9.777862548828125, |
| "learning_rate": 4.627443829355765e-06, |
| "loss": 0.22, |
| "step": 4160 |
| }, |
| { |
| "epoch": 2.6959896507115135, |
| "grad_norm": 3.226142168045044, |
| "learning_rate": 4.610732912357256e-06, |
| "loss": 0.2278, |
| "step": 4168 |
| }, |
| { |
| "epoch": 2.701164294954722, |
| "grad_norm": 4.0428385734558105, |
| "learning_rate": 4.5940263689291955e-06, |
| "loss": 0.2135, |
| "step": 4176 |
| }, |
| { |
| "epoch": 2.7063389391979302, |
| "grad_norm": 2.2790136337280273, |
| "learning_rate": 4.57732438677624e-06, |
| "loss": 0.2022, |
| "step": 4184 |
| }, |
| { |
| "epoch": 2.7115135834411386, |
| "grad_norm": 8.921182632446289, |
| "learning_rate": 4.560627153551795e-06, |
| "loss": 0.2195, |
| "step": 4192 |
| }, |
| { |
| "epoch": 2.7166882276843465, |
| "grad_norm": 1.772870421409607, |
| "learning_rate": 4.543934856855913e-06, |
| "loss": 0.2088, |
| "step": 4200 |
| }, |
| { |
| "epoch": 2.721862871927555, |
| "grad_norm": 3.5123379230499268, |
| "learning_rate": 4.527247684233185e-06, |
| "loss": 0.2105, |
| "step": 4208 |
| }, |
| { |
| "epoch": 2.727037516170763, |
| "grad_norm": 2.6325693130493164, |
| "learning_rate": 4.510565823170625e-06, |
| "loss": 0.2158, |
| "step": 4216 |
| }, |
| { |
| "epoch": 2.7322121604139715, |
| "grad_norm": 79.38518524169922, |
| "learning_rate": 4.493889461095574e-06, |
| "loss": 0.2012, |
| "step": 4224 |
| }, |
| { |
| "epoch": 2.73738680465718, |
| "grad_norm": 12.619338035583496, |
| "learning_rate": 4.477218785373587e-06, |
| "loss": 0.2151, |
| "step": 4232 |
| }, |
| { |
| "epoch": 2.742561448900388, |
| "grad_norm": 1.3559527397155762, |
| "learning_rate": 4.460553983306332e-06, |
| "loss": 0.2048, |
| "step": 4240 |
| }, |
| { |
| "epoch": 2.7477360931435966, |
| "grad_norm": 9.754837036132812, |
| "learning_rate": 4.443895242129484e-06, |
| "loss": 0.2134, |
| "step": 4248 |
| }, |
| { |
| "epoch": 2.752910737386805, |
| "grad_norm": 4.612194538116455, |
| "learning_rate": 4.4272427490106215e-06, |
| "loss": 0.2063, |
| "step": 4256 |
| }, |
| { |
| "epoch": 2.758085381630013, |
| "grad_norm": 5.114107608795166, |
| "learning_rate": 4.410596691047123e-06, |
| "loss": 0.2185, |
| "step": 4264 |
| }, |
| { |
| "epoch": 2.763260025873221, |
| "grad_norm": 9.316654205322266, |
| "learning_rate": 4.3939572552640645e-06, |
| "loss": 0.2153, |
| "step": 4272 |
| }, |
| { |
| "epoch": 2.7684346701164295, |
| "grad_norm": 6.500330448150635, |
| "learning_rate": 4.377324628612123e-06, |
| "loss": 0.2101, |
| "step": 4280 |
| }, |
| { |
| "epoch": 2.773609314359638, |
| "grad_norm": 1.7955437898635864, |
| "learning_rate": 4.36069899796547e-06, |
| "loss": 0.2072, |
| "step": 4288 |
| }, |
| { |
| "epoch": 2.778783958602846, |
| "grad_norm": 113.924072265625, |
| "learning_rate": 4.344080550119672e-06, |
| "loss": 0.2066, |
| "step": 4296 |
| }, |
| { |
| "epoch": 2.783958602846054, |
| "grad_norm": 2.6472039222717285, |
| "learning_rate": 4.327469471789597e-06, |
| "loss": 0.2122, |
| "step": 4304 |
| }, |
| { |
| "epoch": 2.7891332470892625, |
| "grad_norm": 7.95417594909668, |
| "learning_rate": 4.310865949607311e-06, |
| "loss": 0.1984, |
| "step": 4312 |
| }, |
| { |
| "epoch": 2.794307891332471, |
| "grad_norm": 2.1271450519561768, |
| "learning_rate": 4.294270170119987e-06, |
| "loss": 0.2263, |
| "step": 4320 |
| }, |
| { |
| "epoch": 2.799482535575679, |
| "grad_norm": 1.0342472791671753, |
| "learning_rate": 4.277682319787802e-06, |
| "loss": 0.2248, |
| "step": 4328 |
| }, |
| { |
| "epoch": 2.8046571798188875, |
| "grad_norm": 143.9209747314453, |
| "learning_rate": 4.261102584981848e-06, |
| "loss": 0.2026, |
| "step": 4336 |
| }, |
| { |
| "epoch": 2.809831824062096, |
| "grad_norm": 7.666977405548096, |
| "learning_rate": 4.244531151982034e-06, |
| "loss": 0.2195, |
| "step": 4344 |
| }, |
| { |
| "epoch": 2.815006468305304, |
| "grad_norm": 123.94723510742188, |
| "learning_rate": 4.227968206974999e-06, |
| "loss": 0.2207, |
| "step": 4352 |
| }, |
| { |
| "epoch": 2.8201811125485126, |
| "grad_norm": 52.61326599121094, |
| "learning_rate": 4.211413936052013e-06, |
| "loss": 0.2026, |
| "step": 4360 |
| }, |
| { |
| "epoch": 2.8253557567917205, |
| "grad_norm": 6.78623628616333, |
| "learning_rate": 4.194868525206887e-06, |
| "loss": 0.2131, |
| "step": 4368 |
| }, |
| { |
| "epoch": 2.830530401034929, |
| "grad_norm": 1.9670113325119019, |
| "learning_rate": 4.178332160333891e-06, |
| "loss": 0.2268, |
| "step": 4376 |
| }, |
| { |
| "epoch": 2.835705045278137, |
| "grad_norm": 1.38176691532135, |
| "learning_rate": 4.161805027225655e-06, |
| "loss": 0.2192, |
| "step": 4384 |
| }, |
| { |
| "epoch": 2.8408796895213455, |
| "grad_norm": 3.1774935722351074, |
| "learning_rate": 4.145287311571089e-06, |
| "loss": 0.2164, |
| "step": 4392 |
| }, |
| { |
| "epoch": 2.8460543337645534, |
| "grad_norm": 4.968908786773682, |
| "learning_rate": 4.1287791989532935e-06, |
| "loss": 0.2089, |
| "step": 4400 |
| }, |
| { |
| "epoch": 2.8512289780077618, |
| "grad_norm": 16.97532081604004, |
| "learning_rate": 4.1122808748474745e-06, |
| "loss": 0.2143, |
| "step": 4408 |
| }, |
| { |
| "epoch": 2.85640362225097, |
| "grad_norm": 0.9508041739463806, |
| "learning_rate": 4.095792524618861e-06, |
| "loss": 0.2205, |
| "step": 4416 |
| }, |
| { |
| "epoch": 2.8615782664941785, |
| "grad_norm": 15.078865051269531, |
| "learning_rate": 4.079314333520623e-06, |
| "loss": 0.2224, |
| "step": 4424 |
| }, |
| { |
| "epoch": 2.866752910737387, |
| "grad_norm": 5.82922887802124, |
| "learning_rate": 4.062846486691784e-06, |
| "loss": 0.1991, |
| "step": 4432 |
| }, |
| { |
| "epoch": 2.871927554980595, |
| "grad_norm": 1.030604362487793, |
| "learning_rate": 4.04638916915515e-06, |
| "loss": 0.2134, |
| "step": 4440 |
| }, |
| { |
| "epoch": 2.8771021992238035, |
| "grad_norm": 1.6634279489517212, |
| "learning_rate": 4.0299425658152255e-06, |
| "loss": 0.2113, |
| "step": 4448 |
| }, |
| { |
| "epoch": 2.882276843467012, |
| "grad_norm": 28.07861328125, |
| "learning_rate": 4.013506861456136e-06, |
| "loss": 0.2113, |
| "step": 4456 |
| }, |
| { |
| "epoch": 2.88745148771022, |
| "grad_norm": 6.467888355255127, |
| "learning_rate": 3.997082240739551e-06, |
| "loss": 0.2299, |
| "step": 4464 |
| }, |
| { |
| "epoch": 2.892626131953428, |
| "grad_norm": 2.268150568008423, |
| "learning_rate": 3.9806688882026125e-06, |
| "loss": 0.2134, |
| "step": 4472 |
| }, |
| { |
| "epoch": 2.8978007761966365, |
| "grad_norm": 19.535581588745117, |
| "learning_rate": 3.964266988255861e-06, |
| "loss": 0.2224, |
| "step": 4480 |
| }, |
| { |
| "epoch": 2.902975420439845, |
| "grad_norm": 3.764432907104492, |
| "learning_rate": 3.94787672518116e-06, |
| "loss": 0.2122, |
| "step": 4488 |
| }, |
| { |
| "epoch": 2.908150064683053, |
| "grad_norm": 27.98623275756836, |
| "learning_rate": 3.931498283129631e-06, |
| "loss": 0.2009, |
| "step": 4496 |
| }, |
| { |
| "epoch": 2.913324708926261, |
| "grad_norm": 1.0380629301071167, |
| "learning_rate": 3.915131846119581e-06, |
| "loss": 0.2076, |
| "step": 4504 |
| }, |
| { |
| "epoch": 2.9184993531694694, |
| "grad_norm": 3.117368698120117, |
| "learning_rate": 3.898777598034434e-06, |
| "loss": 0.2179, |
| "step": 4512 |
| }, |
| { |
| "epoch": 2.9236739974126777, |
| "grad_norm": 13.296104431152344, |
| "learning_rate": 3.882435722620667e-06, |
| "loss": 0.2045, |
| "step": 4520 |
| }, |
| { |
| "epoch": 2.928848641655886, |
| "grad_norm": 2.6790828704833984, |
| "learning_rate": 3.866106403485745e-06, |
| "loss": 0.2138, |
| "step": 4528 |
| }, |
| { |
| "epoch": 2.9340232858990944, |
| "grad_norm": 0.8345991373062134, |
| "learning_rate": 3.849789824096061e-06, |
| "loss": 0.1957, |
| "step": 4536 |
| }, |
| { |
| "epoch": 2.939197930142303, |
| "grad_norm": 8.058691024780273, |
| "learning_rate": 3.833486167774867e-06, |
| "loss": 0.2193, |
| "step": 4544 |
| }, |
| { |
| "epoch": 2.944372574385511, |
| "grad_norm": 2.5544962882995605, |
| "learning_rate": 3.817195617700224e-06, |
| "loss": 0.2215, |
| "step": 4552 |
| }, |
| { |
| "epoch": 2.9495472186287195, |
| "grad_norm": 2.118175983428955, |
| "learning_rate": 3.800918356902936e-06, |
| "loss": 0.2082, |
| "step": 4560 |
| }, |
| { |
| "epoch": 2.9547218628719274, |
| "grad_norm": 1.6129382848739624, |
| "learning_rate": 3.784654568264497e-06, |
| "loss": 0.2148, |
| "step": 4568 |
| }, |
| { |
| "epoch": 2.9598965071151357, |
| "grad_norm": 14.738019943237305, |
| "learning_rate": 3.768404434515038e-06, |
| "loss": 0.216, |
| "step": 4576 |
| }, |
| { |
| "epoch": 2.965071151358344, |
| "grad_norm": 10.588520050048828, |
| "learning_rate": 3.7521681382312693e-06, |
| "loss": 0.2179, |
| "step": 4584 |
| }, |
| { |
| "epoch": 2.9702457956015524, |
| "grad_norm": 26.88380241394043, |
| "learning_rate": 3.735945861834434e-06, |
| "loss": 0.2132, |
| "step": 4592 |
| }, |
| { |
| "epoch": 2.975420439844761, |
| "grad_norm": 8.701356887817383, |
| "learning_rate": 3.7197377875882547e-06, |
| "loss": 0.2174, |
| "step": 4600 |
| }, |
| { |
| "epoch": 2.9805950840879687, |
| "grad_norm": 9.462160110473633, |
| "learning_rate": 3.703544097596887e-06, |
| "loss": 0.2296, |
| "step": 4608 |
| }, |
| { |
| "epoch": 2.985769728331177, |
| "grad_norm": 7.985735893249512, |
| "learning_rate": 3.6873649738028737e-06, |
| "loss": 0.2121, |
| "step": 4616 |
| }, |
| { |
| "epoch": 2.9909443725743854, |
| "grad_norm": 24.12420654296875, |
| "learning_rate": 3.671200597985104e-06, |
| "loss": 0.206, |
| "step": 4624 |
| }, |
| { |
| "epoch": 2.9961190168175937, |
| "grad_norm": 1.4237028360366821, |
| "learning_rate": 3.655051151756762e-06, |
| "loss": 0.2072, |
| "step": 4632 |
| }, |
| { |
| "epoch": 3.001293661060802, |
| "grad_norm": 4.103756427764893, |
| "learning_rate": 3.638916816563298e-06, |
| "loss": 0.1977, |
| "step": 4640 |
| }, |
| { |
| "epoch": 3.0064683053040104, |
| "grad_norm": 2.717452049255371, |
| "learning_rate": 3.622797773680379e-06, |
| "loss": 0.2233, |
| "step": 4648 |
| }, |
| { |
| "epoch": 3.011642949547219, |
| "grad_norm": 3.143430233001709, |
| "learning_rate": 3.6066942042118568e-06, |
| "loss": 0.2246, |
| "step": 4656 |
| }, |
| { |
| "epoch": 3.0168175937904267, |
| "grad_norm": 0.8686035871505737, |
| "learning_rate": 3.5906062890877368e-06, |
| "loss": 0.2112, |
| "step": 4664 |
| }, |
| { |
| "epoch": 3.021992238033635, |
| "grad_norm": 7.7689056396484375, |
| "learning_rate": 3.5745342090621406e-06, |
| "loss": 0.2288, |
| "step": 4672 |
| }, |
| { |
| "epoch": 3.0271668822768434, |
| "grad_norm": 7.347503185272217, |
| "learning_rate": 3.5584781447112737e-06, |
| "loss": 0.1989, |
| "step": 4680 |
| }, |
| { |
| "epoch": 3.0323415265200517, |
| "grad_norm": 29.066707611083984, |
| "learning_rate": 3.542438276431401e-06, |
| "loss": 0.1981, |
| "step": 4688 |
| }, |
| { |
| "epoch": 3.03751617076326, |
| "grad_norm": 16.907032012939453, |
| "learning_rate": 3.526414784436819e-06, |
| "loss": 0.2241, |
| "step": 4696 |
| }, |
| { |
| "epoch": 3.0426908150064684, |
| "grad_norm": 50.19180679321289, |
| "learning_rate": 3.510407848757828e-06, |
| "loss": 0.2103, |
| "step": 4704 |
| }, |
| { |
| "epoch": 3.047865459249677, |
| "grad_norm": 3.969433069229126, |
| "learning_rate": 3.494417649238713e-06, |
| "loss": 0.2084, |
| "step": 4712 |
| }, |
| { |
| "epoch": 3.0530401034928847, |
| "grad_norm": 1.613051176071167, |
| "learning_rate": 3.47844436553572e-06, |
| "loss": 0.207, |
| "step": 4720 |
| }, |
| { |
| "epoch": 3.058214747736093, |
| "grad_norm": 15.627549171447754, |
| "learning_rate": 3.462488177115041e-06, |
| "loss": 0.2232, |
| "step": 4728 |
| }, |
| { |
| "epoch": 3.0633893919793014, |
| "grad_norm": 4.300905704498291, |
| "learning_rate": 3.4465492632507946e-06, |
| "loss": 0.2122, |
| "step": 4736 |
| }, |
| { |
| "epoch": 3.0685640362225097, |
| "grad_norm": 7.382449150085449, |
| "learning_rate": 3.4306278030230143e-06, |
| "loss": 0.2146, |
| "step": 4744 |
| }, |
| { |
| "epoch": 3.073738680465718, |
| "grad_norm": 2.4655721187591553, |
| "learning_rate": 3.4147239753156324e-06, |
| "loss": 0.2172, |
| "step": 4752 |
| }, |
| { |
| "epoch": 3.0789133247089264, |
| "grad_norm": 3.6668355464935303, |
| "learning_rate": 3.398837958814475e-06, |
| "loss": 0.2068, |
| "step": 4760 |
| }, |
| { |
| "epoch": 3.0840879689521343, |
| "grad_norm": 2.1171956062316895, |
| "learning_rate": 3.382969932005252e-06, |
| "loss": 0.2049, |
| "step": 4768 |
| }, |
| { |
| "epoch": 3.0892626131953427, |
| "grad_norm": 2.6610488891601562, |
| "learning_rate": 3.367120073171548e-06, |
| "loss": 0.2132, |
| "step": 4776 |
| }, |
| { |
| "epoch": 3.094437257438551, |
| "grad_norm": 3.1115005016326904, |
| "learning_rate": 3.351288560392833e-06, |
| "loss": 0.2113, |
| "step": 4784 |
| }, |
| { |
| "epoch": 3.0996119016817594, |
| "grad_norm": 39.48991775512695, |
| "learning_rate": 3.335475571542442e-06, |
| "loss": 0.1985, |
| "step": 4792 |
| }, |
| { |
| "epoch": 3.1047865459249677, |
| "grad_norm": 4.187602996826172, |
| "learning_rate": 3.3196812842855895e-06, |
| "loss": 0.2209, |
| "step": 4800 |
| }, |
| { |
| "epoch": 3.109961190168176, |
| "grad_norm": 2.5106544494628906, |
| "learning_rate": 3.303905876077372e-06, |
| "loss": 0.2136, |
| "step": 4808 |
| }, |
| { |
| "epoch": 3.1151358344113844, |
| "grad_norm": 5.031343460083008, |
| "learning_rate": 3.28814952416077e-06, |
| "loss": 0.2079, |
| "step": 4816 |
| }, |
| { |
| "epoch": 3.1203104786545923, |
| "grad_norm": 4.405430316925049, |
| "learning_rate": 3.272412405564659e-06, |
| "loss": 0.2209, |
| "step": 4824 |
| }, |
| { |
| "epoch": 3.1254851228978007, |
| "grad_norm": 4.106354713439941, |
| "learning_rate": 3.2566946971018225e-06, |
| "loss": 0.2219, |
| "step": 4832 |
| }, |
| { |
| "epoch": 3.130659767141009, |
| "grad_norm": 74.13800811767578, |
| "learning_rate": 3.240996575366961e-06, |
| "loss": 0.2264, |
| "step": 4840 |
| }, |
| { |
| "epoch": 3.1358344113842174, |
| "grad_norm": 2.211841344833374, |
| "learning_rate": 3.225318216734713e-06, |
| "loss": 0.2095, |
| "step": 4848 |
| }, |
| { |
| "epoch": 3.1410090556274257, |
| "grad_norm": 5.970486164093018, |
| "learning_rate": 3.209659797357669e-06, |
| "loss": 0.2156, |
| "step": 4856 |
| }, |
| { |
| "epoch": 3.146183699870634, |
| "grad_norm": 2.485638380050659, |
| "learning_rate": 3.1940214931643945e-06, |
| "loss": 0.2137, |
| "step": 4864 |
| }, |
| { |
| "epoch": 3.151358344113842, |
| "grad_norm": 1.4314054250717163, |
| "learning_rate": 3.1784034798574514e-06, |
| "loss": 0.2071, |
| "step": 4872 |
| }, |
| { |
| "epoch": 3.1565329883570503, |
| "grad_norm": 2.9645638465881348, |
| "learning_rate": 3.1628059329114286e-06, |
| "loss": 0.2172, |
| "step": 4880 |
| }, |
| { |
| "epoch": 3.1617076326002587, |
| "grad_norm": 2.3624343872070312, |
| "learning_rate": 3.1472290275709642e-06, |
| "loss": 0.2201, |
| "step": 4888 |
| }, |
| { |
| "epoch": 3.166882276843467, |
| "grad_norm": 8.607010841369629, |
| "learning_rate": 3.1316729388487815e-06, |
| "loss": 0.2092, |
| "step": 4896 |
| }, |
| { |
| "epoch": 3.1720569210866754, |
| "grad_norm": 1.539337396621704, |
| "learning_rate": 3.1161378415237197e-06, |
| "loss": 0.2105, |
| "step": 4904 |
| }, |
| { |
| "epoch": 3.1772315653298837, |
| "grad_norm": 2.8210718631744385, |
| "learning_rate": 3.1006239101387725e-06, |
| "loss": 0.2279, |
| "step": 4912 |
| }, |
| { |
| "epoch": 3.1824062095730916, |
| "grad_norm": 2.121821641921997, |
| "learning_rate": 3.0851313189991226e-06, |
| "loss": 0.2033, |
| "step": 4920 |
| }, |
| { |
| "epoch": 3.1875808538163, |
| "grad_norm": 1.296933650970459, |
| "learning_rate": 3.0696602421701943e-06, |
| "loss": 0.2021, |
| "step": 4928 |
| }, |
| { |
| "epoch": 3.1927554980595083, |
| "grad_norm": 6.50001335144043, |
| "learning_rate": 3.054210853475682e-06, |
| "loss": 0.209, |
| "step": 4936 |
| }, |
| { |
| "epoch": 3.1979301423027167, |
| "grad_norm": 4.615538120269775, |
| "learning_rate": 3.0387833264956078e-06, |
| "loss": 0.2133, |
| "step": 4944 |
| }, |
| { |
| "epoch": 3.203104786545925, |
| "grad_norm": 2.2612783908843994, |
| "learning_rate": 3.02337783456437e-06, |
| "loss": 0.2207, |
| "step": 4952 |
| }, |
| { |
| "epoch": 3.2082794307891334, |
| "grad_norm": 5.742753028869629, |
| "learning_rate": 3.007994550768793e-06, |
| "loss": 0.2244, |
| "step": 4960 |
| }, |
| { |
| "epoch": 3.2134540750323417, |
| "grad_norm": 2.2950289249420166, |
| "learning_rate": 2.9926336479461846e-06, |
| "loss": 0.2055, |
| "step": 4968 |
| }, |
| { |
| "epoch": 3.2186287192755496, |
| "grad_norm": 1.1664949655532837, |
| "learning_rate": 2.9772952986823943e-06, |
| "loss": 0.2003, |
| "step": 4976 |
| }, |
| { |
| "epoch": 3.223803363518758, |
| "grad_norm": 16.022438049316406, |
| "learning_rate": 2.9619796753098716e-06, |
| "loss": 0.2171, |
| "step": 4984 |
| }, |
| { |
| "epoch": 3.2289780077619663, |
| "grad_norm": 1.7189433574676514, |
| "learning_rate": 2.946686949905733e-06, |
| "loss": 0.2308, |
| "step": 4992 |
| }, |
| { |
| "epoch": 3.2341526520051747, |
| "grad_norm": 9.547940254211426, |
| "learning_rate": 2.9314172942898257e-06, |
| "loss": 0.2124, |
| "step": 5000 |
| }, |
| { |
| "epoch": 3.239327296248383, |
| "grad_norm": 2.51373028755188, |
| "learning_rate": 2.9161708800228e-06, |
| "loss": 0.196, |
| "step": 5008 |
| }, |
| { |
| "epoch": 3.2445019404915914, |
| "grad_norm": 2.1994516849517822, |
| "learning_rate": 2.900947878404181e-06, |
| "loss": 0.2283, |
| "step": 5016 |
| }, |
| { |
| "epoch": 3.2496765847347993, |
| "grad_norm": 3.118130922317505, |
| "learning_rate": 2.8857484604704415e-06, |
| "loss": 0.2067, |
| "step": 5024 |
| }, |
| { |
| "epoch": 3.2548512289780076, |
| "grad_norm": 11.828572273254395, |
| "learning_rate": 2.870572796993084e-06, |
| "loss": 0.1918, |
| "step": 5032 |
| }, |
| { |
| "epoch": 3.260025873221216, |
| "grad_norm": 9.986909866333008, |
| "learning_rate": 2.8554210584767188e-06, |
| "loss": 0.2205, |
| "step": 5040 |
| }, |
| { |
| "epoch": 3.2652005174644243, |
| "grad_norm": 10.845985412597656, |
| "learning_rate": 2.8402934151571505e-06, |
| "loss": 0.2055, |
| "step": 5048 |
| }, |
| { |
| "epoch": 3.2703751617076326, |
| "grad_norm": 6.319619655609131, |
| "learning_rate": 2.8251900369994645e-06, |
| "loss": 0.2106, |
| "step": 5056 |
| }, |
| { |
| "epoch": 3.275549805950841, |
| "grad_norm": 6.275879859924316, |
| "learning_rate": 2.8101110936961153e-06, |
| "loss": 0.2055, |
| "step": 5064 |
| }, |
| { |
| "epoch": 3.2807244501940493, |
| "grad_norm": 56.02274703979492, |
| "learning_rate": 2.795056754665028e-06, |
| "loss": 0.2066, |
| "step": 5072 |
| }, |
| { |
| "epoch": 3.2858990944372573, |
| "grad_norm": 4.814873218536377, |
| "learning_rate": 2.7800271890476836e-06, |
| "loss": 0.2145, |
| "step": 5080 |
| }, |
| { |
| "epoch": 3.2910737386804656, |
| "grad_norm": 2.2435498237609863, |
| "learning_rate": 2.765022565707226e-06, |
| "loss": 0.2214, |
| "step": 5088 |
| }, |
| { |
| "epoch": 3.296248382923674, |
| "grad_norm": 13.148270606994629, |
| "learning_rate": 2.750043053226561e-06, |
| "loss": 0.2017, |
| "step": 5096 |
| }, |
| { |
| "epoch": 3.3014230271668823, |
| "grad_norm": 8.37467098236084, |
| "learning_rate": 2.735088819906465e-06, |
| "loss": 0.202, |
| "step": 5104 |
| }, |
| { |
| "epoch": 3.3065976714100906, |
| "grad_norm": 17.98732566833496, |
| "learning_rate": 2.7201600337636946e-06, |
| "loss": 0.2121, |
| "step": 5112 |
| }, |
| { |
| "epoch": 3.311772315653299, |
| "grad_norm": 6.076496601104736, |
| "learning_rate": 2.7052568625290955e-06, |
| "loss": 0.2187, |
| "step": 5120 |
| }, |
| { |
| "epoch": 3.316946959896507, |
| "grad_norm": 2.1653237342834473, |
| "learning_rate": 2.690379473645718e-06, |
| "loss": 0.2119, |
| "step": 5128 |
| }, |
| { |
| "epoch": 3.3221216041397152, |
| "grad_norm": 10.182047843933105, |
| "learning_rate": 2.675528034266941e-06, |
| "loss": 0.2204, |
| "step": 5136 |
| }, |
| { |
| "epoch": 3.3272962483829236, |
| "grad_norm": 29.412364959716797, |
| "learning_rate": 2.6607027112545893e-06, |
| "loss": 0.2093, |
| "step": 5144 |
| }, |
| { |
| "epoch": 3.332470892626132, |
| "grad_norm": 4.263775825500488, |
| "learning_rate": 2.645903671177058e-06, |
| "loss": 0.2191, |
| "step": 5152 |
| }, |
| { |
| "epoch": 3.3376455368693403, |
| "grad_norm": 30.59326934814453, |
| "learning_rate": 2.631131080307445e-06, |
| "loss": 0.2026, |
| "step": 5160 |
| }, |
| { |
| "epoch": 3.3428201811125486, |
| "grad_norm": 5.779555320739746, |
| "learning_rate": 2.6163851046216813e-06, |
| "loss": 0.2137, |
| "step": 5168 |
| }, |
| { |
| "epoch": 3.347994825355757, |
| "grad_norm": 3.681560754776001, |
| "learning_rate": 2.6016659097966636e-06, |
| "loss": 0.2146, |
| "step": 5176 |
| }, |
| { |
| "epoch": 3.353169469598965, |
| "grad_norm": 1.4380924701690674, |
| "learning_rate": 2.5869736612083955e-06, |
| "loss": 0.2087, |
| "step": 5184 |
| }, |
| { |
| "epoch": 3.3583441138421732, |
| "grad_norm": 12.789270401000977, |
| "learning_rate": 2.572308523930131e-06, |
| "loss": 0.216, |
| "step": 5192 |
| }, |
| { |
| "epoch": 3.3635187580853816, |
| "grad_norm": 17.20673370361328, |
| "learning_rate": 2.557670662730515e-06, |
| "loss": 0.2145, |
| "step": 5200 |
| }, |
| { |
| "epoch": 3.36869340232859, |
| "grad_norm": 1.4245859384536743, |
| "learning_rate": 2.5430602420717355e-06, |
| "loss": 0.2107, |
| "step": 5208 |
| }, |
| { |
| "epoch": 3.3738680465717983, |
| "grad_norm": 13.390450477600098, |
| "learning_rate": 2.528477426107678e-06, |
| "loss": 0.204, |
| "step": 5216 |
| }, |
| { |
| "epoch": 3.3790426908150066, |
| "grad_norm": 1.8627618551254272, |
| "learning_rate": 2.513922378682075e-06, |
| "loss": 0.2112, |
| "step": 5224 |
| }, |
| { |
| "epoch": 3.3842173350582145, |
| "grad_norm": 1.2581387758255005, |
| "learning_rate": 2.499395263326669e-06, |
| "loss": 0.2056, |
| "step": 5232 |
| }, |
| { |
| "epoch": 3.389391979301423, |
| "grad_norm": 1.6016255617141724, |
| "learning_rate": 2.484896243259375e-06, |
| "loss": 0.2077, |
| "step": 5240 |
| }, |
| { |
| "epoch": 3.3945666235446312, |
| "grad_norm": 6.12626314163208, |
| "learning_rate": 2.470425481382447e-06, |
| "loss": 0.2113, |
| "step": 5248 |
| }, |
| { |
| "epoch": 3.3997412677878396, |
| "grad_norm": 2.2390005588531494, |
| "learning_rate": 2.4559831402806454e-06, |
| "loss": 0.2097, |
| "step": 5256 |
| }, |
| { |
| "epoch": 3.404915912031048, |
| "grad_norm": 5.566039085388184, |
| "learning_rate": 2.441569382219413e-06, |
| "loss": 0.2065, |
| "step": 5264 |
| }, |
| { |
| "epoch": 3.4100905562742563, |
| "grad_norm": 1.4189672470092773, |
| "learning_rate": 2.427184369143051e-06, |
| "loss": 0.2182, |
| "step": 5272 |
| }, |
| { |
| "epoch": 3.4152652005174646, |
| "grad_norm": 22.4144287109375, |
| "learning_rate": 2.4128282626728985e-06, |
| "loss": 0.2052, |
| "step": 5280 |
| }, |
| { |
| "epoch": 3.4204398447606725, |
| "grad_norm": 2.110011339187622, |
| "learning_rate": 2.398501224105517e-06, |
| "loss": 0.2091, |
| "step": 5288 |
| }, |
| { |
| "epoch": 3.425614489003881, |
| "grad_norm": 2.668170928955078, |
| "learning_rate": 2.384203414410878e-06, |
| "loss": 0.2092, |
| "step": 5296 |
| }, |
| { |
| "epoch": 3.4307891332470892, |
| "grad_norm": 3.0023293495178223, |
| "learning_rate": 2.3699349942305603e-06, |
| "loss": 0.2116, |
| "step": 5304 |
| }, |
| { |
| "epoch": 3.4359637774902976, |
| "grad_norm": 4.757721900939941, |
| "learning_rate": 2.355696123875934e-06, |
| "loss": 0.2025, |
| "step": 5312 |
| }, |
| { |
| "epoch": 3.441138421733506, |
| "grad_norm": 19.3017635345459, |
| "learning_rate": 2.341486963326366e-06, |
| "loss": 0.2227, |
| "step": 5320 |
| }, |
| { |
| "epoch": 3.4463130659767143, |
| "grad_norm": 1.613916039466858, |
| "learning_rate": 2.3273076722274233e-06, |
| "loss": 0.1964, |
| "step": 5328 |
| }, |
| { |
| "epoch": 3.451487710219922, |
| "grad_norm": 2.9506986141204834, |
| "learning_rate": 2.3131584098890775e-06, |
| "loss": 0.2258, |
| "step": 5336 |
| }, |
| { |
| "epoch": 3.4566623544631305, |
| "grad_norm": 6.207396984100342, |
| "learning_rate": 2.299039335283914e-06, |
| "loss": 0.2156, |
| "step": 5344 |
| }, |
| { |
| "epoch": 3.461836998706339, |
| "grad_norm": 1.0315911769866943, |
| "learning_rate": 2.2849506070453466e-06, |
| "loss": 0.1993, |
| "step": 5352 |
| }, |
| { |
| "epoch": 3.4670116429495472, |
| "grad_norm": 39.45634078979492, |
| "learning_rate": 2.27089238346584e-06, |
| "loss": 0.201, |
| "step": 5360 |
| }, |
| { |
| "epoch": 3.4721862871927556, |
| "grad_norm": 13.167795181274414, |
| "learning_rate": 2.2568648224951217e-06, |
| "loss": 0.2168, |
| "step": 5368 |
| }, |
| { |
| "epoch": 3.477360931435964, |
| "grad_norm": 19.175676345825195, |
| "learning_rate": 2.2428680817384153e-06, |
| "loss": 0.1958, |
| "step": 5376 |
| }, |
| { |
| "epoch": 3.4825355756791723, |
| "grad_norm": 6.49905252456665, |
| "learning_rate": 2.228902318454666e-06, |
| "loss": 0.2009, |
| "step": 5384 |
| }, |
| { |
| "epoch": 3.48771021992238, |
| "grad_norm": 2.5502731800079346, |
| "learning_rate": 2.214967689554775e-06, |
| "loss": 0.2018, |
| "step": 5392 |
| }, |
| { |
| "epoch": 3.4928848641655885, |
| "grad_norm": 17.03938102722168, |
| "learning_rate": 2.201064351599837e-06, |
| "loss": 0.2102, |
| "step": 5400 |
| }, |
| { |
| "epoch": 3.498059508408797, |
| "grad_norm": 3.042534112930298, |
| "learning_rate": 2.18719246079938e-06, |
| "loss": 0.212, |
| "step": 5408 |
| }, |
| { |
| "epoch": 3.503234152652005, |
| "grad_norm": 1.2638347148895264, |
| "learning_rate": 2.17335217300961e-06, |
| "loss": 0.2273, |
| "step": 5416 |
| }, |
| { |
| "epoch": 3.5084087968952136, |
| "grad_norm": 13.239524841308594, |
| "learning_rate": 2.1595436437316614e-06, |
| "loss": 0.2107, |
| "step": 5424 |
| }, |
| { |
| "epoch": 3.5135834411384215, |
| "grad_norm": 1.3805886507034302, |
| "learning_rate": 2.1457670281098493e-06, |
| "loss": 0.2167, |
| "step": 5432 |
| }, |
| { |
| "epoch": 3.51875808538163, |
| "grad_norm": 4.028883934020996, |
| "learning_rate": 2.132022480929926e-06, |
| "loss": 0.2158, |
| "step": 5440 |
| }, |
| { |
| "epoch": 3.523932729624838, |
| "grad_norm": 5.846325397491455, |
| "learning_rate": 2.118310156617342e-06, |
| "loss": 0.2237, |
| "step": 5448 |
| }, |
| { |
| "epoch": 3.5291073738680465, |
| "grad_norm": 1.7205686569213867, |
| "learning_rate": 2.1046302092355107e-06, |
| "loss": 0.2206, |
| "step": 5456 |
| }, |
| { |
| "epoch": 3.534282018111255, |
| "grad_norm": 16.556434631347656, |
| "learning_rate": 2.0909827924840787e-06, |
| "loss": 0.208, |
| "step": 5464 |
| }, |
| { |
| "epoch": 3.539456662354463, |
| "grad_norm": 1.7649803161621094, |
| "learning_rate": 2.0773680596971976e-06, |
| "loss": 0.2087, |
| "step": 5472 |
| }, |
| { |
| "epoch": 3.5446313065976716, |
| "grad_norm": 33.007301330566406, |
| "learning_rate": 2.0637861638418003e-06, |
| "loss": 0.2162, |
| "step": 5480 |
| }, |
| { |
| "epoch": 3.54980595084088, |
| "grad_norm": 55.374752044677734, |
| "learning_rate": 2.0502372575158865e-06, |
| "loss": 0.2078, |
| "step": 5488 |
| }, |
| { |
| "epoch": 3.554980595084088, |
| "grad_norm": 9.658388137817383, |
| "learning_rate": 2.0367214929468036e-06, |
| "loss": 0.2036, |
| "step": 5496 |
| }, |
| { |
| "epoch": 3.560155239327296, |
| "grad_norm": 15.524160385131836, |
| "learning_rate": 2.0232390219895364e-06, |
| "loss": 0.2035, |
| "step": 5504 |
| }, |
| { |
| "epoch": 3.5653298835705045, |
| "grad_norm": 3.2139875888824463, |
| "learning_rate": 2.009789996125009e-06, |
| "loss": 0.2099, |
| "step": 5512 |
| }, |
| { |
| "epoch": 3.570504527813713, |
| "grad_norm": 1.951788067817688, |
| "learning_rate": 1.99637456645837e-06, |
| "loss": 0.2027, |
| "step": 5520 |
| }, |
| { |
| "epoch": 3.575679172056921, |
| "grad_norm": 1.508257508277893, |
| "learning_rate": 1.982992883717304e-06, |
| "loss": 0.2064, |
| "step": 5528 |
| }, |
| { |
| "epoch": 3.580853816300129, |
| "grad_norm": 1.6039284467697144, |
| "learning_rate": 1.9696450982503356e-06, |
| "loss": 0.2065, |
| "step": 5536 |
| }, |
| { |
| "epoch": 3.5860284605433375, |
| "grad_norm": 7.59080696105957, |
| "learning_rate": 1.95633136002514e-06, |
| "loss": 0.2112, |
| "step": 5544 |
| }, |
| { |
| "epoch": 3.591203104786546, |
| "grad_norm": 25.365097045898438, |
| "learning_rate": 1.943051818626857e-06, |
| "loss": 0.2115, |
| "step": 5552 |
| }, |
| { |
| "epoch": 3.596377749029754, |
| "grad_norm": 1.8311065435409546, |
| "learning_rate": 1.9298066232564135e-06, |
| "loss": 0.203, |
| "step": 5560 |
| }, |
| { |
| "epoch": 3.6015523932729625, |
| "grad_norm": 12.468267440795898, |
| "learning_rate": 1.916595922728843e-06, |
| "loss": 0.2106, |
| "step": 5568 |
| }, |
| { |
| "epoch": 3.606727037516171, |
| "grad_norm": 3.0780019760131836, |
| "learning_rate": 1.9034198654716163e-06, |
| "loss": 0.2152, |
| "step": 5576 |
| }, |
| { |
| "epoch": 3.611901681759379, |
| "grad_norm": 1.8586463928222656, |
| "learning_rate": 1.890278599522975e-06, |
| "loss": 0.203, |
| "step": 5584 |
| }, |
| { |
| "epoch": 3.6170763260025875, |
| "grad_norm": 1.7706098556518555, |
| "learning_rate": 1.8771722725302644e-06, |
| "loss": 0.2188, |
| "step": 5592 |
| }, |
| { |
| "epoch": 3.6222509702457955, |
| "grad_norm": 2.8252525329589844, |
| "learning_rate": 1.864101031748277e-06, |
| "loss": 0.2101, |
| "step": 5600 |
| }, |
| { |
| "epoch": 3.627425614489004, |
| "grad_norm": 2.265062093734741, |
| "learning_rate": 1.8510650240376e-06, |
| "loss": 0.2018, |
| "step": 5608 |
| }, |
| { |
| "epoch": 3.632600258732212, |
| "grad_norm": 2.893099546432495, |
| "learning_rate": 1.8380643958629596e-06, |
| "loss": 0.2047, |
| "step": 5616 |
| }, |
| { |
| "epoch": 3.6377749029754205, |
| "grad_norm": 1.9583306312561035, |
| "learning_rate": 1.8250992932915811e-06, |
| "loss": 0.2101, |
| "step": 5624 |
| }, |
| { |
| "epoch": 3.642949547218629, |
| "grad_norm": 1.5815550088882446, |
| "learning_rate": 1.8121698619915457e-06, |
| "loss": 0.2153, |
| "step": 5632 |
| }, |
| { |
| "epoch": 3.6481241914618368, |
| "grad_norm": 20.85481834411621, |
| "learning_rate": 1.7992762472301511e-06, |
| "loss": 0.2095, |
| "step": 5640 |
| }, |
| { |
| "epoch": 3.653298835705045, |
| "grad_norm": 3.830641269683838, |
| "learning_rate": 1.7864185938722868e-06, |
| "loss": 0.2056, |
| "step": 5648 |
| }, |
| { |
| "epoch": 3.6584734799482534, |
| "grad_norm": 15.252459526062012, |
| "learning_rate": 1.7735970463787967e-06, |
| "loss": 0.2233, |
| "step": 5656 |
| }, |
| { |
| "epoch": 3.663648124191462, |
| "grad_norm": 2.826512336730957, |
| "learning_rate": 1.7608117488048636e-06, |
| "loss": 0.2275, |
| "step": 5664 |
| }, |
| { |
| "epoch": 3.66882276843467, |
| "grad_norm": 2.0030300617218018, |
| "learning_rate": 1.7480628447983878e-06, |
| "loss": 0.2101, |
| "step": 5672 |
| }, |
| { |
| "epoch": 3.6739974126778785, |
| "grad_norm": 2.019261598587036, |
| "learning_rate": 1.735350477598372e-06, |
| "loss": 0.2121, |
| "step": 5680 |
| }, |
| { |
| "epoch": 3.679172056921087, |
| "grad_norm": 2.8684234619140625, |
| "learning_rate": 1.7226747900333135e-06, |
| "loss": 0.2239, |
| "step": 5688 |
| }, |
| { |
| "epoch": 3.684346701164295, |
| "grad_norm": 18.148910522460938, |
| "learning_rate": 1.7100359245196035e-06, |
| "loss": 0.2087, |
| "step": 5696 |
| }, |
| { |
| "epoch": 3.689521345407503, |
| "grad_norm": 2.289294719696045, |
| "learning_rate": 1.6974340230599173e-06, |
| "loss": 0.1977, |
| "step": 5704 |
| }, |
| { |
| "epoch": 3.6946959896507114, |
| "grad_norm": 7.737388610839844, |
| "learning_rate": 1.6848692272416268e-06, |
| "loss": 0.2152, |
| "step": 5712 |
| }, |
| { |
| "epoch": 3.69987063389392, |
| "grad_norm": 17.832653045654297, |
| "learning_rate": 1.6723416782352076e-06, |
| "loss": 0.2132, |
| "step": 5720 |
| }, |
| { |
| "epoch": 3.705045278137128, |
| "grad_norm": 2.064863443374634, |
| "learning_rate": 1.659851516792651e-06, |
| "loss": 0.2106, |
| "step": 5728 |
| }, |
| { |
| "epoch": 3.7102199223803365, |
| "grad_norm": 4.075965404510498, |
| "learning_rate": 1.647398883245886e-06, |
| "loss": 0.2105, |
| "step": 5736 |
| }, |
| { |
| "epoch": 3.7153945666235444, |
| "grad_norm": 18.446760177612305, |
| "learning_rate": 1.6349839175051995e-06, |
| "loss": 0.213, |
| "step": 5744 |
| }, |
| { |
| "epoch": 3.7205692108667527, |
| "grad_norm": 22.746885299682617, |
| "learning_rate": 1.622606759057666e-06, |
| "loss": 0.2037, |
| "step": 5752 |
| }, |
| { |
| "epoch": 3.725743855109961, |
| "grad_norm": 1.7176026105880737, |
| "learning_rate": 1.610267546965581e-06, |
| "loss": 0.2129, |
| "step": 5760 |
| }, |
| { |
| "epoch": 3.7309184993531694, |
| "grad_norm": 1.911559820175171, |
| "learning_rate": 1.5979664198648959e-06, |
| "loss": 0.227, |
| "step": 5768 |
| }, |
| { |
| "epoch": 3.736093143596378, |
| "grad_norm": 21.561500549316406, |
| "learning_rate": 1.5857035159636625e-06, |
| "loss": 0.2178, |
| "step": 5776 |
| }, |
| { |
| "epoch": 3.741267787839586, |
| "grad_norm": 14.1412353515625, |
| "learning_rate": 1.5734789730404815e-06, |
| "loss": 0.2048, |
| "step": 5784 |
| }, |
| { |
| "epoch": 3.7464424320827945, |
| "grad_norm": 8.0577392578125, |
| "learning_rate": 1.5612929284429484e-06, |
| "loss": 0.2079, |
| "step": 5792 |
| }, |
| { |
| "epoch": 3.751617076326003, |
| "grad_norm": 10.920722007751465, |
| "learning_rate": 1.549145519086122e-06, |
| "loss": 0.1922, |
| "step": 5800 |
| }, |
| { |
| "epoch": 3.7567917205692107, |
| "grad_norm": 1.764875888824463, |
| "learning_rate": 1.5370368814509727e-06, |
| "loss": 0.1979, |
| "step": 5808 |
| }, |
| { |
| "epoch": 3.761966364812419, |
| "grad_norm": 4.495997428894043, |
| "learning_rate": 1.5249671515828569e-06, |
| "loss": 0.2098, |
| "step": 5816 |
| }, |
| { |
| "epoch": 3.7671410090556274, |
| "grad_norm": 4.6347503662109375, |
| "learning_rate": 1.5129364650899869e-06, |
| "loss": 0.2254, |
| "step": 5824 |
| }, |
| { |
| "epoch": 3.772315653298836, |
| "grad_norm": 7.4554009437561035, |
| "learning_rate": 1.5009449571419077e-06, |
| "loss": 0.2071, |
| "step": 5832 |
| }, |
| { |
| "epoch": 3.777490297542044, |
| "grad_norm": 1.338654637336731, |
| "learning_rate": 1.4889927624679762e-06, |
| "loss": 0.2029, |
| "step": 5840 |
| }, |
| { |
| "epoch": 3.782664941785252, |
| "grad_norm": 3.0357022285461426, |
| "learning_rate": 1.4770800153558513e-06, |
| "loss": 0.2136, |
| "step": 5848 |
| }, |
| { |
| "epoch": 3.7878395860284604, |
| "grad_norm": 11.845126152038574, |
| "learning_rate": 1.4652068496499804e-06, |
| "loss": 0.2241, |
| "step": 5856 |
| }, |
| { |
| "epoch": 3.7930142302716687, |
| "grad_norm": 1.81815505027771, |
| "learning_rate": 1.4533733987501004e-06, |
| "loss": 0.2151, |
| "step": 5864 |
| }, |
| { |
| "epoch": 3.798188874514877, |
| "grad_norm": 1.015817403793335, |
| "learning_rate": 1.4415797956097356e-06, |
| "loss": 0.2179, |
| "step": 5872 |
| }, |
| { |
| "epoch": 3.8033635187580854, |
| "grad_norm": 5.7766218185424805, |
| "learning_rate": 1.4298261727347034e-06, |
| "loss": 0.2151, |
| "step": 5880 |
| }, |
| { |
| "epoch": 3.8085381630012938, |
| "grad_norm": 2.376643180847168, |
| "learning_rate": 1.41811266218163e-06, |
| "loss": 0.1969, |
| "step": 5888 |
| }, |
| { |
| "epoch": 3.813712807244502, |
| "grad_norm": 3.778541088104248, |
| "learning_rate": 1.4064393955564615e-06, |
| "loss": 0.211, |
| "step": 5896 |
| }, |
| { |
| "epoch": 3.8188874514877105, |
| "grad_norm": 8.010899543762207, |
| "learning_rate": 1.3948065040129882e-06, |
| "loss": 0.2075, |
| "step": 5904 |
| }, |
| { |
| "epoch": 3.8240620957309184, |
| "grad_norm": 11.207403182983398, |
| "learning_rate": 1.3832141182513699e-06, |
| "loss": 0.2022, |
| "step": 5912 |
| }, |
| { |
| "epoch": 3.8292367399741267, |
| "grad_norm": 1.3528246879577637, |
| "learning_rate": 1.3716623685166685e-06, |
| "loss": 0.2143, |
| "step": 5920 |
| }, |
| { |
| "epoch": 3.834411384217335, |
| "grad_norm": 26.18692970275879, |
| "learning_rate": 1.3601513845973835e-06, |
| "loss": 0.2224, |
| "step": 5928 |
| }, |
| { |
| "epoch": 3.8395860284605434, |
| "grad_norm": 1.8334400653839111, |
| "learning_rate": 1.3486812958239931e-06, |
| "loss": 0.2178, |
| "step": 5936 |
| }, |
| { |
| "epoch": 3.8447606727037518, |
| "grad_norm": 5.360263824462891, |
| "learning_rate": 1.3372522310675063e-06, |
| "loss": 0.2175, |
| "step": 5944 |
| }, |
| { |
| "epoch": 3.8499353169469597, |
| "grad_norm": 1.6526539325714111, |
| "learning_rate": 1.3258643187380071e-06, |
| "loss": 0.2074, |
| "step": 5952 |
| }, |
| { |
| "epoch": 3.855109961190168, |
| "grad_norm": 10.19829273223877, |
| "learning_rate": 1.3145176867832165e-06, |
| "loss": 0.2067, |
| "step": 5960 |
| }, |
| { |
| "epoch": 3.8602846054333764, |
| "grad_norm": 3.2970707416534424, |
| "learning_rate": 1.3032124626870546e-06, |
| "loss": 0.2229, |
| "step": 5968 |
| }, |
| { |
| "epoch": 3.8654592496765847, |
| "grad_norm": 2.3017404079437256, |
| "learning_rate": 1.2919487734682073e-06, |
| "loss": 0.2071, |
| "step": 5976 |
| }, |
| { |
| "epoch": 3.870633893919793, |
| "grad_norm": 11.9258394241333, |
| "learning_rate": 1.2807267456787004e-06, |
| "loss": 0.204, |
| "step": 5984 |
| }, |
| { |
| "epoch": 3.8758085381630014, |
| "grad_norm": 1.4012444019317627, |
| "learning_rate": 1.2695465054024752e-06, |
| "loss": 0.2191, |
| "step": 5992 |
| }, |
| { |
| "epoch": 3.8809831824062098, |
| "grad_norm": 1.8280630111694336, |
| "learning_rate": 1.2584081782539764e-06, |
| "loss": 0.2163, |
| "step": 6000 |
| }, |
| { |
| "epoch": 3.886157826649418, |
| "grad_norm": 5.412600994110107, |
| "learning_rate": 1.247311889376736e-06, |
| "loss": 0.2066, |
| "step": 6008 |
| }, |
| { |
| "epoch": 3.891332470892626, |
| "grad_norm": 18.54897117614746, |
| "learning_rate": 1.2362577634419692e-06, |
| "loss": 0.2104, |
| "step": 6016 |
| }, |
| { |
| "epoch": 3.8965071151358344, |
| "grad_norm": 21.60243034362793, |
| "learning_rate": 1.2252459246471754e-06, |
| "loss": 0.2074, |
| "step": 6024 |
| }, |
| { |
| "epoch": 3.9016817593790427, |
| "grad_norm": 24.753875732421875, |
| "learning_rate": 1.2142764967147385e-06, |
| "loss": 0.2005, |
| "step": 6032 |
| }, |
| { |
| "epoch": 3.906856403622251, |
| "grad_norm": 12.543983459472656, |
| "learning_rate": 1.2033496028905445e-06, |
| "loss": 0.204, |
| "step": 6040 |
| }, |
| { |
| "epoch": 3.9120310478654594, |
| "grad_norm": 8.506756782531738, |
| "learning_rate": 1.1924653659425862e-06, |
| "loss": 0.2109, |
| "step": 6048 |
| }, |
| { |
| "epoch": 3.9172056921086673, |
| "grad_norm": 6.222147464752197, |
| "learning_rate": 1.1816239081595926e-06, |
| "loss": 0.203, |
| "step": 6056 |
| }, |
| { |
| "epoch": 3.9223803363518757, |
| "grad_norm": 1.7484989166259766, |
| "learning_rate": 1.1708253513496504e-06, |
| "loss": 0.2183, |
| "step": 6064 |
| }, |
| { |
| "epoch": 3.927554980595084, |
| "grad_norm": 1.252619981765747, |
| "learning_rate": 1.160069816838838e-06, |
| "loss": 0.2018, |
| "step": 6072 |
| }, |
| { |
| "epoch": 3.9327296248382924, |
| "grad_norm": 8.604789733886719, |
| "learning_rate": 1.1493574254698598e-06, |
| "loss": 0.1997, |
| "step": 6080 |
| }, |
| { |
| "epoch": 3.9379042690815007, |
| "grad_norm": 21.417587280273438, |
| "learning_rate": 1.1386882976006897e-06, |
| "loss": 0.1985, |
| "step": 6088 |
| }, |
| { |
| "epoch": 3.943078913324709, |
| "grad_norm": 1.4360429048538208, |
| "learning_rate": 1.128062553103223e-06, |
| "loss": 0.214, |
| "step": 6096 |
| }, |
| { |
| "epoch": 3.9482535575679174, |
| "grad_norm": 28.317441940307617, |
| "learning_rate": 1.1174803113619204e-06, |
| "loss": 0.2086, |
| "step": 6104 |
| }, |
| { |
| "epoch": 3.9534282018111258, |
| "grad_norm": 4.1924052238464355, |
| "learning_rate": 1.106941691272474e-06, |
| "loss": 0.214, |
| "step": 6112 |
| }, |
| { |
| "epoch": 3.9586028460543337, |
| "grad_norm": 11.092174530029297, |
| "learning_rate": 1.0964468112404691e-06, |
| "loss": 0.2052, |
| "step": 6120 |
| }, |
| { |
| "epoch": 3.963777490297542, |
| "grad_norm": 0.8266966938972473, |
| "learning_rate": 1.0859957891800548e-06, |
| "loss": 0.2056, |
| "step": 6128 |
| }, |
| { |
| "epoch": 3.9689521345407504, |
| "grad_norm": 1.4189459085464478, |
| "learning_rate": 1.075588742512617e-06, |
| "loss": 0.2043, |
| "step": 6136 |
| }, |
| { |
| "epoch": 3.9741267787839587, |
| "grad_norm": 10.665148735046387, |
| "learning_rate": 1.0652257881654625e-06, |
| "loss": 0.2146, |
| "step": 6144 |
| }, |
| { |
| "epoch": 3.9793014230271666, |
| "grad_norm": 11.610654830932617, |
| "learning_rate": 1.0549070425705017e-06, |
| "loss": 0.2126, |
| "step": 6152 |
| }, |
| { |
| "epoch": 3.984476067270375, |
| "grad_norm": 1.4883841276168823, |
| "learning_rate": 1.0446326216629422e-06, |
| "loss": 0.2093, |
| "step": 6160 |
| }, |
| { |
| "epoch": 3.9896507115135833, |
| "grad_norm": 3.509707450866699, |
| "learning_rate": 1.0344026408799868e-06, |
| "loss": 0.2055, |
| "step": 6168 |
| }, |
| { |
| "epoch": 3.9948253557567917, |
| "grad_norm": 7.780543804168701, |
| "learning_rate": 1.0242172151595365e-06, |
| "loss": 0.2123, |
| "step": 6176 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 12.673335075378418, |
| "learning_rate": 1.0140764589388963e-06, |
| "loss": 0.2044, |
| "step": 6184 |
| }, |
| { |
| "epoch": 4.005174644243208, |
| "grad_norm": 3.754645824432373, |
| "learning_rate": 1.003980486153494e-06, |
| "loss": 0.2155, |
| "step": 6192 |
| }, |
| { |
| "epoch": 4.010349288486417, |
| "grad_norm": 0.9349867105484009, |
| "learning_rate": 9.939294102355957e-07, |
| "loss": 0.211, |
| "step": 6200 |
| }, |
| { |
| "epoch": 4.015523932729625, |
| "grad_norm": 9.735404014587402, |
| "learning_rate": 9.839233441130353e-07, |
| "loss": 0.2043, |
| "step": 6208 |
| }, |
| { |
| "epoch": 4.020698576972833, |
| "grad_norm": 4.292990207672119, |
| "learning_rate": 9.739624002079412e-07, |
| "loss": 0.2239, |
| "step": 6216 |
| }, |
| { |
| "epoch": 4.025873221216042, |
| "grad_norm": 12.860719680786133, |
| "learning_rate": 9.640466904354778e-07, |
| "loss": 0.2163, |
| "step": 6224 |
| }, |
| { |
| "epoch": 4.03104786545925, |
| "grad_norm": 1.3650522232055664, |
| "learning_rate": 9.541763262025866e-07, |
| "loss": 0.2082, |
| "step": 6232 |
| }, |
| { |
| "epoch": 4.0362225097024576, |
| "grad_norm": 17.642620086669922, |
| "learning_rate": 9.443514184067326e-07, |
| "loss": 0.197, |
| "step": 6240 |
| }, |
| { |
| "epoch": 4.041397153945666, |
| "grad_norm": 5.8212714195251465, |
| "learning_rate": 9.345720774346589e-07, |
| "loss": 0.2059, |
| "step": 6248 |
| }, |
| { |
| "epoch": 4.046571798188874, |
| "grad_norm": 14.918822288513184, |
| "learning_rate": 9.248384131611493e-07, |
| "loss": 0.2074, |
| "step": 6256 |
| }, |
| { |
| "epoch": 4.051746442432083, |
| "grad_norm": 3.4913625717163086, |
| "learning_rate": 9.151505349477901e-07, |
| "loss": 0.2251, |
| "step": 6264 |
| }, |
| { |
| "epoch": 4.056921086675291, |
| "grad_norm": 17.606687545776367, |
| "learning_rate": 9.055085516417439e-07, |
| "loss": 0.2141, |
| "step": 6272 |
| }, |
| { |
| "epoch": 4.062095730918499, |
| "grad_norm": 0.8561938405036926, |
| "learning_rate": 8.959125715745248e-07, |
| "loss": 0.2123, |
| "step": 6280 |
| }, |
| { |
| "epoch": 4.067270375161708, |
| "grad_norm": 6.487976551055908, |
| "learning_rate": 8.863627025607835e-07, |
| "loss": 0.2218, |
| "step": 6288 |
| }, |
| { |
| "epoch": 4.072445019404916, |
| "grad_norm": 24.814111709594727, |
| "learning_rate": 8.768590518970938e-07, |
| "loss": 0.1991, |
| "step": 6296 |
| }, |
| { |
| "epoch": 4.077619663648124, |
| "grad_norm": 0.9031611084938049, |
| "learning_rate": 8.674017263607488e-07, |
| "loss": 0.2011, |
| "step": 6304 |
| }, |
| { |
| "epoch": 4.082794307891333, |
| "grad_norm": 32.17177200317383, |
| "learning_rate": 8.57990832208559e-07, |
| "loss": 0.2109, |
| "step": 6312 |
| }, |
| { |
| "epoch": 4.087968952134541, |
| "grad_norm": 4.342803478240967, |
| "learning_rate": 8.486264751756607e-07, |
| "loss": 0.1977, |
| "step": 6320 |
| }, |
| { |
| "epoch": 4.093143596377749, |
| "grad_norm": 4.845144748687744, |
| "learning_rate": 8.393087604743283e-07, |
| "loss": 0.2082, |
| "step": 6328 |
| }, |
| { |
| "epoch": 4.098318240620958, |
| "grad_norm": 7.410824298858643, |
| "learning_rate": 8.300377927927888e-07, |
| "loss": 0.2096, |
| "step": 6336 |
| }, |
| { |
| "epoch": 4.103492884864165, |
| "grad_norm": 21.402700424194336, |
| "learning_rate": 8.208136762940489e-07, |
| "loss": 0.2133, |
| "step": 6344 |
| }, |
| { |
| "epoch": 4.1086675291073735, |
| "grad_norm": 10.131089210510254, |
| "learning_rate": 8.116365146147243e-07, |
| "loss": 0.2217, |
| "step": 6352 |
| }, |
| { |
| "epoch": 4.113842173350582, |
| "grad_norm": 2.5814476013183594, |
| "learning_rate": 8.025064108638742e-07, |
| "loss": 0.1901, |
| "step": 6360 |
| }, |
| { |
| "epoch": 4.11901681759379, |
| "grad_norm": 3.8243138790130615, |
| "learning_rate": 7.934234676218411e-07, |
| "loss": 0.2239, |
| "step": 6368 |
| }, |
| { |
| "epoch": 4.124191461836999, |
| "grad_norm": 1.469176173210144, |
| "learning_rate": 7.843877869391053e-07, |
| "loss": 0.2088, |
| "step": 6376 |
| }, |
| { |
| "epoch": 4.129366106080207, |
| "grad_norm": 8.301488876342773, |
| "learning_rate": 7.753994703351298e-07, |
| "loss": 0.2082, |
| "step": 6384 |
| }, |
| { |
| "epoch": 4.134540750323415, |
| "grad_norm": 5.9099249839782715, |
| "learning_rate": 7.664586187972234e-07, |
| "loss": 0.1966, |
| "step": 6392 |
| }, |
| { |
| "epoch": 4.139715394566624, |
| "grad_norm": 10.461531639099121, |
| "learning_rate": 7.575653327794075e-07, |
| "loss": 0.2058, |
| "step": 6400 |
| }, |
| { |
| "epoch": 4.144890038809832, |
| "grad_norm": 6.918221950531006, |
| "learning_rate": 7.48719712201284e-07, |
| "loss": 0.212, |
| "step": 6408 |
| }, |
| { |
| "epoch": 4.15006468305304, |
| "grad_norm": 2.304258346557617, |
| "learning_rate": 7.399218564469174e-07, |
| "loss": 0.2005, |
| "step": 6416 |
| }, |
| { |
| "epoch": 4.155239327296249, |
| "grad_norm": 6.370954990386963, |
| "learning_rate": 7.311718643637134e-07, |
| "loss": 0.1985, |
| "step": 6424 |
| }, |
| { |
| "epoch": 4.160413971539457, |
| "grad_norm": 11.649602890014648, |
| "learning_rate": 7.224698342613096e-07, |
| "loss": 0.1978, |
| "step": 6432 |
| }, |
| { |
| "epoch": 4.165588615782665, |
| "grad_norm": 6.221530437469482, |
| "learning_rate": 7.138158639104748e-07, |
| "loss": 0.2098, |
| "step": 6440 |
| }, |
| { |
| "epoch": 4.170763260025873, |
| "grad_norm": 10.76377010345459, |
| "learning_rate": 7.052100505420051e-07, |
| "loss": 0.2189, |
| "step": 6448 |
| }, |
| { |
| "epoch": 4.175937904269081, |
| "grad_norm": 12.636096954345703, |
| "learning_rate": 6.96652490845634e-07, |
| "loss": 0.2253, |
| "step": 6456 |
| }, |
| { |
| "epoch": 4.1811125485122895, |
| "grad_norm": 1.3980625867843628, |
| "learning_rate": 6.881432809689459e-07, |
| "loss": 0.2044, |
| "step": 6464 |
| }, |
| { |
| "epoch": 4.186287192755498, |
| "grad_norm": 40.41677474975586, |
| "learning_rate": 6.796825165162951e-07, |
| "loss": 0.2063, |
| "step": 6472 |
| }, |
| { |
| "epoch": 4.191461836998706, |
| "grad_norm": 6.918355941772461, |
| "learning_rate": 6.712702925477343e-07, |
| "loss": 0.2095, |
| "step": 6480 |
| }, |
| { |
| "epoch": 4.196636481241915, |
| "grad_norm": 5.543708801269531, |
| "learning_rate": 6.62906703577943e-07, |
| "loss": 0.203, |
| "step": 6488 |
| }, |
| { |
| "epoch": 4.201811125485123, |
| "grad_norm": 0.7908322811126709, |
| "learning_rate": 6.545918435751669e-07, |
| "loss": 0.2164, |
| "step": 6496 |
| }, |
| { |
| "epoch": 4.206985769728331, |
| "grad_norm": 23.913345336914062, |
| "learning_rate": 6.463258059601635e-07, |
| "loss": 0.1994, |
| "step": 6504 |
| }, |
| { |
| "epoch": 4.21216041397154, |
| "grad_norm": 20.359169006347656, |
| "learning_rate": 6.381086836051498e-07, |
| "loss": 0.2175, |
| "step": 6512 |
| }, |
| { |
| "epoch": 4.217335058214748, |
| "grad_norm": 2.3722023963928223, |
| "learning_rate": 6.299405688327631e-07, |
| "loss": 0.2055, |
| "step": 6520 |
| }, |
| { |
| "epoch": 4.222509702457956, |
| "grad_norm": 96.0938491821289, |
| "learning_rate": 6.218215534150185e-07, |
| "loss": 0.1927, |
| "step": 6528 |
| }, |
| { |
| "epoch": 4.227684346701165, |
| "grad_norm": 7.64237642288208, |
| "learning_rate": 6.137517285722816e-07, |
| "loss": 0.2043, |
| "step": 6536 |
| }, |
| { |
| "epoch": 4.232858990944372, |
| "grad_norm": 4.935359477996826, |
| "learning_rate": 6.057311849722419e-07, |
| "loss": 0.2184, |
| "step": 6544 |
| }, |
| { |
| "epoch": 4.2380336351875805, |
| "grad_norm": 3.5845251083374023, |
| "learning_rate": 5.977600127288941e-07, |
| "loss": 0.2137, |
| "step": 6552 |
| }, |
| { |
| "epoch": 4.243208279430789, |
| "grad_norm": 16.71499252319336, |
| "learning_rate": 5.898383014015275e-07, |
| "loss": 0.2096, |
| "step": 6560 |
| }, |
| { |
| "epoch": 4.248382923673997, |
| "grad_norm": 17.370206832885742, |
| "learning_rate": 5.81966139993716e-07, |
| "loss": 0.2067, |
| "step": 6568 |
| }, |
| { |
| "epoch": 4.2535575679172055, |
| "grad_norm": 22.58631134033203, |
| "learning_rate": 5.741436169523234e-07, |
| "loss": 0.2232, |
| "step": 6576 |
| }, |
| { |
| "epoch": 4.258732212160414, |
| "grad_norm": 3.885636806488037, |
| "learning_rate": 5.663708201665041e-07, |
| "loss": 0.2096, |
| "step": 6584 |
| }, |
| { |
| "epoch": 4.263906856403622, |
| "grad_norm": 1.6453157663345337, |
| "learning_rate": 5.586478369667203e-07, |
| "loss": 0.2082, |
| "step": 6592 |
| }, |
| { |
| "epoch": 4.269081500646831, |
| "grad_norm": 6.002994060516357, |
| "learning_rate": 5.50974754123757e-07, |
| "loss": 0.201, |
| "step": 6600 |
| }, |
| { |
| "epoch": 4.274256144890039, |
| "grad_norm": 1.0541515350341797, |
| "learning_rate": 5.433516578477504e-07, |
| "loss": 0.2105, |
| "step": 6608 |
| }, |
| { |
| "epoch": 4.279430789133247, |
| "grad_norm": 1.4043939113616943, |
| "learning_rate": 5.357786337872168e-07, |
| "loss": 0.2143, |
| "step": 6616 |
| }, |
| { |
| "epoch": 4.284605433376456, |
| "grad_norm": 2.2832283973693848, |
| "learning_rate": 5.282557670280914e-07, |
| "loss": 0.2075, |
| "step": 6624 |
| }, |
| { |
| "epoch": 4.289780077619664, |
| "grad_norm": 1.8749516010284424, |
| "learning_rate": 5.207831420927722e-07, |
| "loss": 0.1923, |
| "step": 6632 |
| }, |
| { |
| "epoch": 4.294954721862872, |
| "grad_norm": 9.822781562805176, |
| "learning_rate": 5.133608429391706e-07, |
| "loss": 0.2093, |
| "step": 6640 |
| }, |
| { |
| "epoch": 4.300129366106081, |
| "grad_norm": 8.075210571289062, |
| "learning_rate": 5.059889529597678e-07, |
| "loss": 0.1995, |
| "step": 6648 |
| }, |
| { |
| "epoch": 4.305304010349288, |
| "grad_norm": 13.020132064819336, |
| "learning_rate": 4.986675549806769e-07, |
| "loss": 0.208, |
| "step": 6656 |
| }, |
| { |
| "epoch": 4.3104786545924965, |
| "grad_norm": 1.0930315256118774, |
| "learning_rate": 4.913967312607154e-07, |
| "loss": 0.1978, |
| "step": 6664 |
| }, |
| { |
| "epoch": 4.315653298835705, |
| "grad_norm": 1.784521222114563, |
| "learning_rate": 4.841765634904777e-07, |
| "loss": 0.1921, |
| "step": 6672 |
| }, |
| { |
| "epoch": 4.320827943078913, |
| "grad_norm": 10.063488960266113, |
| "learning_rate": 4.770071327914177e-07, |
| "loss": 0.2094, |
| "step": 6680 |
| }, |
| { |
| "epoch": 4.3260025873221215, |
| "grad_norm": 1.535333514213562, |
| "learning_rate": 4.6988851971493886e-07, |
| "loss": 0.2041, |
| "step": 6688 |
| }, |
| { |
| "epoch": 4.33117723156533, |
| "grad_norm": 3.97757625579834, |
| "learning_rate": 4.628208042414889e-07, |
| "loss": 0.2225, |
| "step": 6696 |
| }, |
| { |
| "epoch": 4.336351875808538, |
| "grad_norm": 8.291097640991211, |
| "learning_rate": 4.558040657796603e-07, |
| "loss": 0.2119, |
| "step": 6704 |
| }, |
| { |
| "epoch": 4.3415265200517466, |
| "grad_norm": 5.780373573303223, |
| "learning_rate": 4.4883838316529816e-07, |
| "loss": 0.2099, |
| "step": 6712 |
| }, |
| { |
| "epoch": 4.346701164294955, |
| "grad_norm": 23.213045120239258, |
| "learning_rate": 4.4192383466061583e-07, |
| "loss": 0.1992, |
| "step": 6720 |
| }, |
| { |
| "epoch": 4.351875808538163, |
| "grad_norm": 6.386680603027344, |
| "learning_rate": 4.350604979533135e-07, |
| "loss": 0.2085, |
| "step": 6728 |
| }, |
| { |
| "epoch": 4.357050452781372, |
| "grad_norm": 1.2865214347839355, |
| "learning_rate": 4.2824845015570713e-07, |
| "loss": 0.2168, |
| "step": 6736 |
| }, |
| { |
| "epoch": 4.36222509702458, |
| "grad_norm": 3.7142112255096436, |
| "learning_rate": 4.214877678038609e-07, |
| "loss": 0.2087, |
| "step": 6744 |
| }, |
| { |
| "epoch": 4.367399741267787, |
| "grad_norm": 15.213637351989746, |
| "learning_rate": 4.1477852685672895e-07, |
| "loss": 0.2107, |
| "step": 6752 |
| }, |
| { |
| "epoch": 4.372574385510996, |
| "grad_norm": 2.587907075881958, |
| "learning_rate": 4.0812080269529983e-07, |
| "loss": 0.2178, |
| "step": 6760 |
| }, |
| { |
| "epoch": 4.377749029754204, |
| "grad_norm": 3.547725200653076, |
| "learning_rate": 4.015146701217493e-07, |
| "loss": 0.2255, |
| "step": 6768 |
| }, |
| { |
| "epoch": 4.3829236739974125, |
| "grad_norm": 2.6407039165496826, |
| "learning_rate": 3.949602033586047e-07, |
| "loss": 0.2035, |
| "step": 6776 |
| }, |
| { |
| "epoch": 4.388098318240621, |
| "grad_norm": 7.901521682739258, |
| "learning_rate": 3.884574760479037e-07, |
| "loss": 0.2069, |
| "step": 6784 |
| }, |
| { |
| "epoch": 4.393272962483829, |
| "grad_norm": 2.049207925796509, |
| "learning_rate": 3.820065612503732e-07, |
| "loss": 0.2042, |
| "step": 6792 |
| }, |
| { |
| "epoch": 4.3984476067270375, |
| "grad_norm": 1.1140714883804321, |
| "learning_rate": 3.756075314446045e-07, |
| "loss": 0.2081, |
| "step": 6800 |
| }, |
| { |
| "epoch": 4.403622250970246, |
| "grad_norm": 1.4943286180496216, |
| "learning_rate": 3.6926045852624106e-07, |
| "loss": 0.2066, |
| "step": 6808 |
| }, |
| { |
| "epoch": 4.408796895213454, |
| "grad_norm": 2.857074499130249, |
| "learning_rate": 3.629654138071692e-07, |
| "loss": 0.2095, |
| "step": 6816 |
| }, |
| { |
| "epoch": 4.4139715394566625, |
| "grad_norm": 8.338090896606445, |
| "learning_rate": 3.56722468014718e-07, |
| "loss": 0.2238, |
| "step": 6824 |
| }, |
| { |
| "epoch": 4.419146183699871, |
| "grad_norm": 5.449411869049072, |
| "learning_rate": 3.505316912908668e-07, |
| "loss": 0.1984, |
| "step": 6832 |
| }, |
| { |
| "epoch": 4.424320827943079, |
| "grad_norm": 26.998971939086914, |
| "learning_rate": 3.443931531914507e-07, |
| "loss": 0.199, |
| "step": 6840 |
| }, |
| { |
| "epoch": 4.429495472186288, |
| "grad_norm": 2.477626085281372, |
| "learning_rate": 3.3830692268538637e-07, |
| "loss": 0.205, |
| "step": 6848 |
| }, |
| { |
| "epoch": 4.434670116429496, |
| "grad_norm": 5.2165632247924805, |
| "learning_rate": 3.3227306815389213e-07, |
| "loss": 0.2037, |
| "step": 6856 |
| }, |
| { |
| "epoch": 4.439844760672703, |
| "grad_norm": 1.6941031217575073, |
| "learning_rate": 3.262916573897218e-07, |
| "loss": 0.2006, |
| "step": 6864 |
| }, |
| { |
| "epoch": 4.445019404915912, |
| "grad_norm": 1.2534488439559937, |
| "learning_rate": 3.2036275759640245e-07, |
| "loss": 0.1979, |
| "step": 6872 |
| }, |
| { |
| "epoch": 4.45019404915912, |
| "grad_norm": 16.04631233215332, |
| "learning_rate": 3.1448643538748045e-07, |
| "loss": 0.2027, |
| "step": 6880 |
| }, |
| { |
| "epoch": 4.455368693402328, |
| "grad_norm": 2.139188289642334, |
| "learning_rate": 3.086627567857703e-07, |
| "loss": 0.2088, |
| "step": 6888 |
| }, |
| { |
| "epoch": 4.460543337645537, |
| "grad_norm": 1.968658685684204, |
| "learning_rate": 3.0289178722261726e-07, |
| "loss": 0.213, |
| "step": 6896 |
| }, |
| { |
| "epoch": 4.465717981888745, |
| "grad_norm": 5.241852760314941, |
| "learning_rate": 2.9717359153715707e-07, |
| "loss": 0.2227, |
| "step": 6904 |
| }, |
| { |
| "epoch": 4.4708926261319535, |
| "grad_norm": 2.9422459602355957, |
| "learning_rate": 2.9150823397559094e-07, |
| "loss": 0.2046, |
| "step": 6912 |
| }, |
| { |
| "epoch": 4.476067270375162, |
| "grad_norm": 8.094049453735352, |
| "learning_rate": 2.8589577819046364e-07, |
| "loss": 0.198, |
| "step": 6920 |
| }, |
| { |
| "epoch": 4.48124191461837, |
| "grad_norm": 1.4471999406814575, |
| "learning_rate": 2.8033628723994623e-07, |
| "loss": 0.2106, |
| "step": 6928 |
| }, |
| { |
| "epoch": 4.4864165588615785, |
| "grad_norm": 2.6254570484161377, |
| "learning_rate": 2.7482982358712885e-07, |
| "loss": 0.211, |
| "step": 6936 |
| }, |
| { |
| "epoch": 4.491591203104787, |
| "grad_norm": 11.787996292114258, |
| "learning_rate": 2.6937644909931893e-07, |
| "loss": 0.2103, |
| "step": 6944 |
| }, |
| { |
| "epoch": 4.496765847347995, |
| "grad_norm": 4.06654691696167, |
| "learning_rate": 2.639762250473482e-07, |
| "loss": 0.2116, |
| "step": 6952 |
| }, |
| { |
| "epoch": 4.501940491591203, |
| "grad_norm": 1.4215199947357178, |
| "learning_rate": 2.5862921210487833e-07, |
| "loss": 0.2039, |
| "step": 6960 |
| }, |
| { |
| "epoch": 4.507115135834411, |
| "grad_norm": 4.604936122894287, |
| "learning_rate": 2.5333547034772645e-07, |
| "loss": 0.2126, |
| "step": 6968 |
| }, |
| { |
| "epoch": 4.512289780077619, |
| "grad_norm": 29.087053298950195, |
| "learning_rate": 2.480950592531844e-07, |
| "loss": 0.195, |
| "step": 6976 |
| }, |
| { |
| "epoch": 4.517464424320828, |
| "grad_norm": 6.165823459625244, |
| "learning_rate": 2.429080376993537e-07, |
| "loss": 0.2141, |
| "step": 6984 |
| }, |
| { |
| "epoch": 4.522639068564036, |
| "grad_norm": 7.566137313842773, |
| "learning_rate": 2.37774463964483e-07, |
| "loss": 0.2013, |
| "step": 6992 |
| }, |
| { |
| "epoch": 4.527813712807244, |
| "grad_norm": 17.188518524169922, |
| "learning_rate": 2.3269439572631448e-07, |
| "loss": 0.213, |
| "step": 7000 |
| }, |
| { |
| "epoch": 4.532988357050453, |
| "grad_norm": 2.3657376766204834, |
| "learning_rate": 2.2766789006143265e-07, |
| "loss": 0.2087, |
| "step": 7008 |
| }, |
| { |
| "epoch": 4.538163001293661, |
| "grad_norm": 131.63343811035156, |
| "learning_rate": 2.226950034446279e-07, |
| "loss": 0.2219, |
| "step": 7016 |
| }, |
| { |
| "epoch": 4.5433376455368695, |
| "grad_norm": 9.159808158874512, |
| "learning_rate": 2.1777579174825703e-07, |
| "loss": 0.2194, |
| "step": 7024 |
| }, |
| { |
| "epoch": 4.548512289780078, |
| "grad_norm": 1.9207276105880737, |
| "learning_rate": 2.1291031024161856e-07, |
| "loss": 0.2093, |
| "step": 7032 |
| }, |
| { |
| "epoch": 4.553686934023286, |
| "grad_norm": 12.356361389160156, |
| "learning_rate": 2.0809861359033124e-07, |
| "loss": 0.214, |
| "step": 7040 |
| }, |
| { |
| "epoch": 4.5588615782664945, |
| "grad_norm": 2.2436163425445557, |
| "learning_rate": 2.0334075585571988e-07, |
| "loss": 0.2149, |
| "step": 7048 |
| }, |
| { |
| "epoch": 4.564036222509703, |
| "grad_norm": 3.7744526863098145, |
| "learning_rate": 1.986367904942066e-07, |
| "loss": 0.1967, |
| "step": 7056 |
| }, |
| { |
| "epoch": 4.569210866752911, |
| "grad_norm": 1.290109395980835, |
| "learning_rate": 1.9398677035671222e-07, |
| "loss": 0.2186, |
| "step": 7064 |
| }, |
| { |
| "epoch": 4.574385510996119, |
| "grad_norm": 1.6141724586486816, |
| "learning_rate": 1.8939074768806076e-07, |
| "loss": 0.2067, |
| "step": 7072 |
| }, |
| { |
| "epoch": 4.579560155239327, |
| "grad_norm": 2.7260425090789795, |
| "learning_rate": 1.8484877412639435e-07, |
| "loss": 0.1964, |
| "step": 7080 |
| }, |
| { |
| "epoch": 4.584734799482535, |
| "grad_norm": 1.980734944343567, |
| "learning_rate": 1.8036090070259026e-07, |
| "loss": 0.1991, |
| "step": 7088 |
| }, |
| { |
| "epoch": 4.589909443725744, |
| "grad_norm": 2.165452241897583, |
| "learning_rate": 1.7592717783969094e-07, |
| "loss": 0.2146, |
| "step": 7096 |
| }, |
| { |
| "epoch": 4.595084087968952, |
| "grad_norm": 1.303862452507019, |
| "learning_rate": 1.7154765535233486e-07, |
| "loss": 0.2152, |
| "step": 7104 |
| }, |
| { |
| "epoch": 4.60025873221216, |
| "grad_norm": 7.211986064910889, |
| "learning_rate": 1.6722238244619827e-07, |
| "loss": 0.2248, |
| "step": 7112 |
| }, |
| { |
| "epoch": 4.605433376455369, |
| "grad_norm": 7.8271870613098145, |
| "learning_rate": 1.6295140771744044e-07, |
| "loss": 0.209, |
| "step": 7120 |
| }, |
| { |
| "epoch": 4.610608020698577, |
| "grad_norm": 1.4870136976242065, |
| "learning_rate": 1.587347791521604e-07, |
| "loss": 0.2148, |
| "step": 7128 |
| }, |
| { |
| "epoch": 4.6157826649417855, |
| "grad_norm": 1.905441403388977, |
| "learning_rate": 1.5457254412585666e-07, |
| "loss": 0.2107, |
| "step": 7136 |
| }, |
| { |
| "epoch": 4.620957309184994, |
| "grad_norm": 0.9246317148208618, |
| "learning_rate": 1.5046474940289268e-07, |
| "loss": 0.2177, |
| "step": 7144 |
| }, |
| { |
| "epoch": 4.626131953428202, |
| "grad_norm": 12.103673934936523, |
| "learning_rate": 1.4641144113597628e-07, |
| "loss": 0.2049, |
| "step": 7152 |
| }, |
| { |
| "epoch": 4.63130659767141, |
| "grad_norm": 1.047852635383606, |
| "learning_rate": 1.4241266486563654e-07, |
| "loss": 0.2062, |
| "step": 7160 |
| }, |
| { |
| "epoch": 4.636481241914618, |
| "grad_norm": 0.9444906115531921, |
| "learning_rate": 1.3846846551971272e-07, |
| "loss": 0.2019, |
| "step": 7168 |
| }, |
| { |
| "epoch": 4.641655886157826, |
| "grad_norm": 1.3876017332077026, |
| "learning_rate": 1.3457888741285452e-07, |
| "loss": 0.1979, |
| "step": 7176 |
| }, |
| { |
| "epoch": 4.646830530401035, |
| "grad_norm": 1.2842018604278564, |
| "learning_rate": 1.307439742460165e-07, |
| "loss": 0.207, |
| "step": 7184 |
| }, |
| { |
| "epoch": 4.652005174644243, |
| "grad_norm": 15.236653327941895, |
| "learning_rate": 1.2696376910597275e-07, |
| "loss": 0.2146, |
| "step": 7192 |
| }, |
| { |
| "epoch": 4.657179818887451, |
| "grad_norm": 41.783870697021484, |
| "learning_rate": 1.2323831446483025e-07, |
| "loss": 0.207, |
| "step": 7200 |
| }, |
| { |
| "epoch": 4.66235446313066, |
| "grad_norm": 1.3897194862365723, |
| "learning_rate": 1.1956765217955302e-07, |
| "loss": 0.1963, |
| "step": 7208 |
| }, |
| { |
| "epoch": 4.667529107373868, |
| "grad_norm": 10.643123626708984, |
| "learning_rate": 1.1595182349149026e-07, |
| "loss": 0.2189, |
| "step": 7216 |
| }, |
| { |
| "epoch": 4.672703751617076, |
| "grad_norm": 1.3099812269210815, |
| "learning_rate": 1.1239086902591512e-07, |
| "loss": 0.2271, |
| "step": 7224 |
| }, |
| { |
| "epoch": 4.677878395860285, |
| "grad_norm": 9.640113830566406, |
| "learning_rate": 1.0888482879156503e-07, |
| "loss": 0.2085, |
| "step": 7232 |
| }, |
| { |
| "epoch": 4.683053040103493, |
| "grad_norm": 8.829296112060547, |
| "learning_rate": 1.0543374218019708e-07, |
| "loss": 0.2029, |
| "step": 7240 |
| }, |
| { |
| "epoch": 4.6882276843467015, |
| "grad_norm": 15.328158378601074, |
| "learning_rate": 1.0203764796614057e-07, |
| "loss": 0.2266, |
| "step": 7248 |
| }, |
| { |
| "epoch": 4.69340232858991, |
| "grad_norm": 3.7764499187469482, |
| "learning_rate": 9.869658430586349e-08, |
| "loss": 0.216, |
| "step": 7256 |
| }, |
| { |
| "epoch": 4.698576972833118, |
| "grad_norm": 8.380655288696289, |
| "learning_rate": 9.541058873754394e-08, |
| "loss": 0.213, |
| "step": 7264 |
| }, |
| { |
| "epoch": 4.7037516170763265, |
| "grad_norm": 5.44411563873291, |
| "learning_rate": 9.217969818064832e-08, |
| "loss": 0.1983, |
| "step": 7272 |
| }, |
| { |
| "epoch": 4.708926261319534, |
| "grad_norm": 20.862810134887695, |
| "learning_rate": 8.900394893551655e-08, |
| "loss": 0.2082, |
| "step": 7280 |
| }, |
| { |
| "epoch": 4.714100905562742, |
| "grad_norm": 8.134232521057129, |
| "learning_rate": 8.588337668295366e-08, |
| "loss": 0.1995, |
| "step": 7288 |
| }, |
| { |
| "epoch": 4.719275549805951, |
| "grad_norm": 5.843049049377441, |
| "learning_rate": 8.28180164838288e-08, |
| "loss": 0.1962, |
| "step": 7296 |
| }, |
| { |
| "epoch": 4.724450194049159, |
| "grad_norm": 14.079636573791504, |
| "learning_rate": 7.980790277868189e-08, |
| "loss": 0.2213, |
| "step": 7304 |
| }, |
| { |
| "epoch": 4.729624838292367, |
| "grad_norm": 1.857102870941162, |
| "learning_rate": 7.685306938733761e-08, |
| "loss": 0.2115, |
| "step": 7312 |
| }, |
| { |
| "epoch": 4.734799482535576, |
| "grad_norm": 4.96279239654541, |
| "learning_rate": 7.395354950852307e-08, |
| "loss": 0.2191, |
| "step": 7320 |
| }, |
| { |
| "epoch": 4.739974126778784, |
| "grad_norm": 3.5607805252075195, |
| "learning_rate": 7.110937571949639e-08, |
| "loss": 0.2076, |
| "step": 7328 |
| }, |
| { |
| "epoch": 4.745148771021992, |
| "grad_norm": 3.995842218399048, |
| "learning_rate": 6.832057997568087e-08, |
| "loss": 0.1983, |
| "step": 7336 |
| }, |
| { |
| "epoch": 4.750323415265201, |
| "grad_norm": 19.480527877807617, |
| "learning_rate": 6.55871936103053e-08, |
| "loss": 0.2037, |
| "step": 7344 |
| }, |
| { |
| "epoch": 4.755498059508409, |
| "grad_norm": 2.151970624923706, |
| "learning_rate": 6.290924733405201e-08, |
| "loss": 0.2137, |
| "step": 7352 |
| }, |
| { |
| "epoch": 4.760672703751617, |
| "grad_norm": 6.302921772003174, |
| "learning_rate": 6.028677123471105e-08, |
| "loss": 0.2095, |
| "step": 7360 |
| }, |
| { |
| "epoch": 4.765847347994825, |
| "grad_norm": 1.0118603706359863, |
| "learning_rate": 5.771979477684375e-08, |
| "loss": 0.221, |
| "step": 7368 |
| }, |
| { |
| "epoch": 4.771021992238033, |
| "grad_norm": 20.808563232421875, |
| "learning_rate": 5.5208346801451376e-08, |
| "loss": 0.2034, |
| "step": 7376 |
| }, |
| { |
| "epoch": 4.776196636481242, |
| "grad_norm": 4.907052040100098, |
| "learning_rate": 5.2752455525650334e-08, |
| "loss": 0.2076, |
| "step": 7384 |
| }, |
| { |
| "epoch": 4.78137128072445, |
| "grad_norm": 29.325668334960938, |
| "learning_rate": 5.035214854235526e-08, |
| "loss": 0.1882, |
| "step": 7392 |
| }, |
| { |
| "epoch": 4.786545924967658, |
| "grad_norm": 6.801876544952393, |
| "learning_rate": 4.8007452819968107e-08, |
| "loss": 0.2004, |
| "step": 7400 |
| }, |
| { |
| "epoch": 4.791720569210867, |
| "grad_norm": 42.309303283691406, |
| "learning_rate": 4.571839470207839e-08, |
| "loss": 0.2132, |
| "step": 7408 |
| }, |
| { |
| "epoch": 4.796895213454075, |
| "grad_norm": 5.460014820098877, |
| "learning_rate": 4.3484999907163484e-08, |
| "loss": 0.1956, |
| "step": 7416 |
| }, |
| { |
| "epoch": 4.802069857697283, |
| "grad_norm": 4.588939666748047, |
| "learning_rate": 4.130729352830154e-08, |
| "loss": 0.1942, |
| "step": 7424 |
| }, |
| { |
| "epoch": 4.807244501940492, |
| "grad_norm": 12.402462005615234, |
| "learning_rate": 3.9185300032889005e-08, |
| "loss": 0.2013, |
| "step": 7432 |
| }, |
| { |
| "epoch": 4.8124191461837, |
| "grad_norm": 2.2621071338653564, |
| "learning_rate": 3.711904326236693e-08, |
| "loss": 0.1916, |
| "step": 7440 |
| }, |
| { |
| "epoch": 4.817593790426908, |
| "grad_norm": 0.7548274993896484, |
| "learning_rate": 3.510854643195061e-08, |
| "loss": 0.2083, |
| "step": 7448 |
| }, |
| { |
| "epoch": 4.822768434670117, |
| "grad_norm": 6.128635883331299, |
| "learning_rate": 3.3153832130371486e-08, |
| "loss": 0.2125, |
| "step": 7456 |
| }, |
| { |
| "epoch": 4.827943078913325, |
| "grad_norm": 4.110105514526367, |
| "learning_rate": 3.1254922319621794e-08, |
| "loss": 0.2127, |
| "step": 7464 |
| }, |
| { |
| "epoch": 4.833117723156533, |
| "grad_norm": 13.073273658752441, |
| "learning_rate": 2.941183833470751e-08, |
| "loss": 0.2125, |
| "step": 7472 |
| }, |
| { |
| "epoch": 4.838292367399741, |
| "grad_norm": 4.8851542472839355, |
| "learning_rate": 2.7624600883410235e-08, |
| "loss": 0.2054, |
| "step": 7480 |
| }, |
| { |
| "epoch": 4.843467011642949, |
| "grad_norm": 12.184813499450684, |
| "learning_rate": 2.589323004605293e-08, |
| "loss": 0.2078, |
| "step": 7488 |
| }, |
| { |
| "epoch": 4.848641655886158, |
| "grad_norm": 22.534168243408203, |
| "learning_rate": 2.4217745275275094e-08, |
| "loss": 0.1981, |
| "step": 7496 |
| }, |
| { |
| "epoch": 4.853816300129366, |
| "grad_norm": 15.434218406677246, |
| "learning_rate": 2.2598165395813498e-08, |
| "loss": 0.2012, |
| "step": 7504 |
| }, |
| { |
| "epoch": 4.858990944372574, |
| "grad_norm": 1.728147029876709, |
| "learning_rate": 2.1034508604292904e-08, |
| "loss": 0.2149, |
| "step": 7512 |
| }, |
| { |
| "epoch": 4.864165588615783, |
| "grad_norm": 49.4590950012207, |
| "learning_rate": 1.9526792469017896e-08, |
| "loss": 0.2191, |
| "step": 7520 |
| }, |
| { |
| "epoch": 4.869340232858991, |
| "grad_norm": 0.7744470834732056, |
| "learning_rate": 1.807503392977916e-08, |
| "loss": 0.2149, |
| "step": 7528 |
| }, |
| { |
| "epoch": 4.874514877102199, |
| "grad_norm": 4.530129909515381, |
| "learning_rate": 1.6679249297660847e-08, |
| "loss": 0.2137, |
| "step": 7536 |
| }, |
| { |
| "epoch": 4.879689521345408, |
| "grad_norm": 6.442441463470459, |
| "learning_rate": 1.533945425485739e-08, |
| "loss": 0.208, |
| "step": 7544 |
| }, |
| { |
| "epoch": 4.884864165588616, |
| "grad_norm": 29.557950973510742, |
| "learning_rate": 1.405566385449919e-08, |
| "loss": 0.2039, |
| "step": 7552 |
| }, |
| { |
| "epoch": 4.890038809831824, |
| "grad_norm": 1.7751787900924683, |
| "learning_rate": 1.2827892520481667e-08, |
| "loss": 0.2158, |
| "step": 7560 |
| }, |
| { |
| "epoch": 4.895213454075033, |
| "grad_norm": 1.7882133722305298, |
| "learning_rate": 1.1656154047303691e-08, |
| "loss": 0.196, |
| "step": 7568 |
| }, |
| { |
| "epoch": 4.90038809831824, |
| "grad_norm": 1.204350233078003, |
| "learning_rate": 1.0540461599913287e-08, |
| "loss": 0.1944, |
| "step": 7576 |
| }, |
| { |
| "epoch": 4.9055627425614485, |
| "grad_norm": 3.3513760566711426, |
| "learning_rate": 9.480827713557183e-09, |
| "loss": 0.1995, |
| "step": 7584 |
| }, |
| { |
| "epoch": 4.910737386804657, |
| "grad_norm": 4.574145317077637, |
| "learning_rate": 8.47726429364426e-09, |
| "loss": 0.1927, |
| "step": 7592 |
| }, |
| { |
| "epoch": 4.915912031047865, |
| "grad_norm": 2.1629719734191895, |
| "learning_rate": 7.529782615608439e-09, |
| "loss": 0.1989, |
| "step": 7600 |
| }, |
| { |
| "epoch": 4.921086675291074, |
| "grad_norm": 1.5303400754928589, |
| "learning_rate": 6.638393324782111e-09, |
| "loss": 0.2084, |
| "step": 7608 |
| }, |
| { |
| "epoch": 4.926261319534282, |
| "grad_norm": 1.0018268823623657, |
| "learning_rate": 5.803106436279571e-09, |
| "loss": 0.2008, |
| "step": 7616 |
| }, |
| { |
| "epoch": 4.93143596377749, |
| "grad_norm": 1.660049319267273, |
| "learning_rate": 5.023931334879883e-09, |
| "loss": 0.201, |
| "step": 7624 |
| }, |
| { |
| "epoch": 4.936610608020699, |
| "grad_norm": 16.29326820373535, |
| "learning_rate": 4.3008767749253e-09, |
| "loss": 0.2237, |
| "step": 7632 |
| }, |
| { |
| "epoch": 4.941785252263907, |
| "grad_norm": 1.205179214477539, |
| "learning_rate": 3.6339508802213374e-09, |
| "loss": 0.2053, |
| "step": 7640 |
| }, |
| { |
| "epoch": 4.946959896507115, |
| "grad_norm": 1.7497557401657104, |
| "learning_rate": 3.0231611439457407e-09, |
| "loss": 0.2056, |
| "step": 7648 |
| }, |
| { |
| "epoch": 4.952134540750324, |
| "grad_norm": 2.757598400115967, |
| "learning_rate": 2.468514428563551e-09, |
| "loss": 0.2057, |
| "step": 7656 |
| }, |
| { |
| "epoch": 4.957309184993532, |
| "grad_norm": 0.9228636026382446, |
| "learning_rate": 1.9700169657510537e-09, |
| "loss": 0.2067, |
| "step": 7664 |
| }, |
| { |
| "epoch": 4.96248382923674, |
| "grad_norm": 1.4917523860931396, |
| "learning_rate": 1.5276743563258367e-09, |
| "loss": 0.2222, |
| "step": 7672 |
| }, |
| { |
| "epoch": 4.967658473479949, |
| "grad_norm": 4.442543029785156, |
| "learning_rate": 1.141491570182396e-09, |
| "loss": 0.2168, |
| "step": 7680 |
| }, |
| { |
| "epoch": 4.972833117723156, |
| "grad_norm": 1.5017764568328857, |
| "learning_rate": 8.114729462377346e-10, |
| "loss": 0.2137, |
| "step": 7688 |
| }, |
| { |
| "epoch": 4.9780077619663645, |
| "grad_norm": 3.795858144760132, |
| "learning_rate": 5.376221923830694e-10, |
| "loss": 0.2188, |
| "step": 7696 |
| }, |
| { |
| "epoch": 4.983182406209573, |
| "grad_norm": 2.518415927886963, |
| "learning_rate": 3.1994238543997526e-10, |
| "loss": 0.2288, |
| "step": 7704 |
| }, |
| { |
| "epoch": 4.988357050452781, |
| "grad_norm": 1.4229096174240112, |
| "learning_rate": 1.5843597112707997e-10, |
| "loss": 0.2053, |
| "step": 7712 |
| }, |
| { |
| "epoch": 4.99353169469599, |
| "grad_norm": 8.819074630737305, |
| "learning_rate": 5.3104764033973245e-11, |
| "loss": 0.2206, |
| "step": 7720 |
| }, |
| { |
| "epoch": 4.998706338939198, |
| "grad_norm": 4.728495121002197, |
| "learning_rate": 3.949947598447246e-12, |
| "loss": 0.2096, |
| "step": 7728 |
| }, |
| { |
| "epoch": 5.0, |
| "step": 7730, |
| "total_flos": 2.7880125934075904e+16, |
| "train_loss": 0.22995005332097354, |
| "train_runtime": 14201.6757, |
| "train_samples_per_second": 69.632, |
| "train_steps_per_second": 0.544 |
| } |
| ], |
| "logging_steps": 8, |
| "max_steps": 7730, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 387, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.7880125934075904e+16, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|