| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.49999647479359915, |
| "eval_steps": 500, |
| "global_step": 26594, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.000376022016089042, |
| "grad_norm": 69.5, |
| "learning_rate": 1.9843342036553526e-08, |
| "loss": 2.5216, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.000752044032178084, |
| "grad_norm": 75.0, |
| "learning_rate": 4.073107049608355e-08, |
| "loss": 2.4632, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.001128066048267126, |
| "grad_norm": 109.0, |
| "learning_rate": 6.161879895561358e-08, |
| "loss": 2.5604, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.001504088064356168, |
| "grad_norm": 48.5, |
| "learning_rate": 8.250652741514362e-08, |
| "loss": 2.4744, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.0018801100804452101, |
| "grad_norm": 45.25, |
| "learning_rate": 1.0339425587467364e-07, |
| "loss": 2.5512, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.002256132096534252, |
| "grad_norm": 38.25, |
| "learning_rate": 1.2428198433420367e-07, |
| "loss": 2.4959, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.002632154112623294, |
| "grad_norm": 30.5, |
| "learning_rate": 1.451697127937337e-07, |
| "loss": 2.5159, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.003008176128712336, |
| "grad_norm": 29.0, |
| "learning_rate": 1.660574412532637e-07, |
| "loss": 2.5399, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.003384198144801378, |
| "grad_norm": 29.625, |
| "learning_rate": 1.8694516971279375e-07, |
| "loss": 2.4812, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.0037602201608904202, |
| "grad_norm": 25.0, |
| "learning_rate": 2.0783289817232378e-07, |
| "loss": 2.4797, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.004136242176979462, |
| "grad_norm": 24.625, |
| "learning_rate": 2.2872062663185383e-07, |
| "loss": 2.4898, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.004512264193068504, |
| "grad_norm": 17.0, |
| "learning_rate": 2.4960835509138383e-07, |
| "loss": 2.4359, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.004888286209157546, |
| "grad_norm": 39.75, |
| "learning_rate": 2.7049608355091385e-07, |
| "loss": 2.4451, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.005264308225246588, |
| "grad_norm": 17.75, |
| "learning_rate": 2.913838120104439e-07, |
| "loss": 2.4747, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.00564033024133563, |
| "grad_norm": 19.75, |
| "learning_rate": 3.122715404699739e-07, |
| "loss": 2.4672, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.006016352257424672, |
| "grad_norm": 27.125, |
| "learning_rate": 3.3315926892950393e-07, |
| "loss": 2.44, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.006392374273513714, |
| "grad_norm": 19.75, |
| "learning_rate": 3.5404699738903396e-07, |
| "loss": 2.494, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.006768396289602756, |
| "grad_norm": 39.25, |
| "learning_rate": 3.7493472584856404e-07, |
| "loss": 2.4068, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.007144418305691798, |
| "grad_norm": 27.75, |
| "learning_rate": 3.95822454308094e-07, |
| "loss": 2.3509, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.0075204403217808405, |
| "grad_norm": 14.9375, |
| "learning_rate": 4.1671018276762403e-07, |
| "loss": 2.3596, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.007896462337869883, |
| "grad_norm": 21.5, |
| "learning_rate": 4.375979112271541e-07, |
| "loss": 2.4322, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.008272484353958925, |
| "grad_norm": 17.25, |
| "learning_rate": 4.584856396866841e-07, |
| "loss": 2.4769, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.008648506370047966, |
| "grad_norm": 24.375, |
| "learning_rate": 4.793733681462142e-07, |
| "loss": 2.3957, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.009024528386137008, |
| "grad_norm": 16.875, |
| "learning_rate": 5.002610966057442e-07, |
| "loss": 2.4445, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.00940055040222605, |
| "grad_norm": 21.75, |
| "learning_rate": 5.211488250652742e-07, |
| "loss": 2.4009, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.009776572418315092, |
| "grad_norm": 26.375, |
| "learning_rate": 5.420365535248042e-07, |
| "loss": 2.3618, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.010152594434404135, |
| "grad_norm": 24.5, |
| "learning_rate": 5.629242819843343e-07, |
| "loss": 2.3718, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.010528616450493177, |
| "grad_norm": 19.875, |
| "learning_rate": 5.838120104438643e-07, |
| "loss": 2.3708, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.010904638466582219, |
| "grad_norm": 18.875, |
| "learning_rate": 6.046997389033943e-07, |
| "loss": 2.4253, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.01128066048267126, |
| "grad_norm": 28.0, |
| "learning_rate": 6.255874673629243e-07, |
| "loss": 2.3592, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.011656682498760302, |
| "grad_norm": 32.25, |
| "learning_rate": 6.464751958224544e-07, |
| "loss": 2.3199, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.012032704514849344, |
| "grad_norm": 33.25, |
| "learning_rate": 6.673629242819844e-07, |
| "loss": 2.3505, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.012408726530938387, |
| "grad_norm": 41.0, |
| "learning_rate": 6.882506527415145e-07, |
| "loss": 2.3872, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.012784748547027429, |
| "grad_norm": 15.625, |
| "learning_rate": 7.091383812010443e-07, |
| "loss": 2.3008, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.01316077056311647, |
| "grad_norm": 31.5, |
| "learning_rate": 7.300261096605745e-07, |
| "loss": 2.168, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.013536792579205512, |
| "grad_norm": 71.0, |
| "learning_rate": 7.509138381201045e-07, |
| "loss": 2.2318, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.013912814595294554, |
| "grad_norm": 32.25, |
| "learning_rate": 7.718015665796345e-07, |
| "loss": 2.2759, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.014288836611383596, |
| "grad_norm": 71.0, |
| "learning_rate": 7.926892950391646e-07, |
| "loss": 2.2838, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.01466485862747264, |
| "grad_norm": 37.0, |
| "learning_rate": 8.135770234986947e-07, |
| "loss": 2.2449, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.015040880643561681, |
| "grad_norm": 100.0, |
| "learning_rate": 8.344647519582245e-07, |
| "loss": 2.2566, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.015416902659650723, |
| "grad_norm": 52.5, |
| "learning_rate": 8.553524804177546e-07, |
| "loss": 2.2765, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.015792924675739766, |
| "grad_norm": 30.0, |
| "learning_rate": 8.762402088772847e-07, |
| "loss": 2.2282, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.016168946691828806, |
| "grad_norm": 22.75, |
| "learning_rate": 8.971279373368147e-07, |
| "loss": 2.2817, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.01654496870791785, |
| "grad_norm": 70.5, |
| "learning_rate": 9.180156657963447e-07, |
| "loss": 2.209, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.01692099072400689, |
| "grad_norm": 107.0, |
| "learning_rate": 9.389033942558748e-07, |
| "loss": 2.2978, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.017297012740095933, |
| "grad_norm": 52.5, |
| "learning_rate": 9.597911227154048e-07, |
| "loss": 2.2589, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.017673034756184973, |
| "grad_norm": 90.0, |
| "learning_rate": 9.806788511749348e-07, |
| "loss": 2.1987, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.018049056772274016, |
| "grad_norm": 45.75, |
| "learning_rate": 1.0015665796344648e-06, |
| "loss": 2.1721, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.01842507878836306, |
| "grad_norm": 191.0, |
| "learning_rate": 1.0224543080939948e-06, |
| "loss": 2.2062, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.0188011008044521, |
| "grad_norm": 67.5, |
| "learning_rate": 1.0433420365535249e-06, |
| "loss": 2.1984, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.019177122820541143, |
| "grad_norm": 61.25, |
| "learning_rate": 1.0642297650130549e-06, |
| "loss": 2.2189, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.019553144836630183, |
| "grad_norm": 49.5, |
| "learning_rate": 1.085117493472585e-06, |
| "loss": 2.2025, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.019929166852719227, |
| "grad_norm": 80.5, |
| "learning_rate": 1.1060052219321151e-06, |
| "loss": 2.1776, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.02030518886880827, |
| "grad_norm": 139.0, |
| "learning_rate": 1.126892950391645e-06, |
| "loss": 2.1829, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.02068121088489731, |
| "grad_norm": 127.0, |
| "learning_rate": 1.147780678851175e-06, |
| "loss": 2.1865, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.021057232900986354, |
| "grad_norm": 163.0, |
| "learning_rate": 1.168668407310705e-06, |
| "loss": 2.1854, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.021433254917075394, |
| "grad_norm": 71.5, |
| "learning_rate": 1.189556135770235e-06, |
| "loss": 2.163, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.021809276933164437, |
| "grad_norm": 98.0, |
| "learning_rate": 1.210443864229765e-06, |
| "loss": 2.071, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.02218529894925348, |
| "grad_norm": 165.0, |
| "learning_rate": 1.2313315926892953e-06, |
| "loss": 2.145, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.02256132096534252, |
| "grad_norm": 35.5, |
| "learning_rate": 1.2522193211488251e-06, |
| "loss": 2.0652, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.022937342981431564, |
| "grad_norm": 115.5, |
| "learning_rate": 1.2731070496083554e-06, |
| "loss": 2.1296, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.023313364997520604, |
| "grad_norm": 90.0, |
| "learning_rate": 1.2939947780678852e-06, |
| "loss": 2.0437, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.023689387013609647, |
| "grad_norm": 45.0, |
| "learning_rate": 1.3148825065274152e-06, |
| "loss": 2.0833, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.024065409029698687, |
| "grad_norm": 32.75, |
| "learning_rate": 1.3357702349869452e-06, |
| "loss": 2.0352, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.02444143104578773, |
| "grad_norm": 153.0, |
| "learning_rate": 1.3566579634464752e-06, |
| "loss": 2.054, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.024817453061876774, |
| "grad_norm": 149.0, |
| "learning_rate": 1.3775456919060055e-06, |
| "loss": 2.0609, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.025193475077965814, |
| "grad_norm": 50.75, |
| "learning_rate": 1.3984334203655353e-06, |
| "loss": 1.97, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.025569497094054858, |
| "grad_norm": 121.5, |
| "learning_rate": 1.4193211488250655e-06, |
| "loss": 1.9576, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.025945519110143898, |
| "grad_norm": 31.0, |
| "learning_rate": 1.4402088772845953e-06, |
| "loss": 2.0156, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.02632154112623294, |
| "grad_norm": 133.0, |
| "learning_rate": 1.4610966057441254e-06, |
| "loss": 2.0576, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.026697563142321985, |
| "grad_norm": 41.75, |
| "learning_rate": 1.4819843342036556e-06, |
| "loss": 2.0427, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.027073585158411025, |
| "grad_norm": 89.5, |
| "learning_rate": 1.5028720626631854e-06, |
| "loss": 1.9948, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.027449607174500068, |
| "grad_norm": 123.5, |
| "learning_rate": 1.5237597911227157e-06, |
| "loss": 1.987, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.027825629190589108, |
| "grad_norm": 72.0, |
| "learning_rate": 1.5446475195822455e-06, |
| "loss": 2.011, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.02820165120667815, |
| "grad_norm": 58.5, |
| "learning_rate": 1.5655352480417757e-06, |
| "loss": 1.9926, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.02857767322276719, |
| "grad_norm": 50.5, |
| "learning_rate": 1.5864229765013055e-06, |
| "loss": 1.9495, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.028953695238856235, |
| "grad_norm": 58.5, |
| "learning_rate": 1.6073107049608356e-06, |
| "loss": 1.9988, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.02932971725494528, |
| "grad_norm": 76.0, |
| "learning_rate": 1.6281984334203658e-06, |
| "loss": 1.98, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.02970573927103432, |
| "grad_norm": 99.0, |
| "learning_rate": 1.6490861618798956e-06, |
| "loss": 1.9849, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.030081761287123362, |
| "grad_norm": 116.0, |
| "learning_rate": 1.6699738903394258e-06, |
| "loss": 1.9464, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.030457783303212402, |
| "grad_norm": 119.0, |
| "learning_rate": 1.6908616187989557e-06, |
| "loss": 1.9654, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.030833805319301445, |
| "grad_norm": 130.0, |
| "learning_rate": 1.7117493472584859e-06, |
| "loss": 1.9718, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.03120982733539049, |
| "grad_norm": 200.0, |
| "learning_rate": 1.732637075718016e-06, |
| "loss": 1.9127, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.03158584935147953, |
| "grad_norm": 228.0, |
| "learning_rate": 1.7535248041775457e-06, |
| "loss": 1.9649, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.03196187136756857, |
| "grad_norm": 159.0, |
| "learning_rate": 1.774412532637076e-06, |
| "loss": 1.8952, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.03233789338365761, |
| "grad_norm": 87.0, |
| "learning_rate": 1.7953002610966058e-06, |
| "loss": 1.9328, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.032713915399746656, |
| "grad_norm": 129.0, |
| "learning_rate": 1.816187989556136e-06, |
| "loss": 1.9531, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.0330899374158357, |
| "grad_norm": 187.0, |
| "learning_rate": 1.8370757180156658e-06, |
| "loss": 1.8911, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.03346595943192474, |
| "grad_norm": 140.0, |
| "learning_rate": 1.857963446475196e-06, |
| "loss": 1.9228, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.03384198144801378, |
| "grad_norm": 152.0, |
| "learning_rate": 1.878851174934726e-06, |
| "loss": 1.9264, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.03421800346410282, |
| "grad_norm": 112.0, |
| "learning_rate": 1.899738903394256e-06, |
| "loss": 1.957, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.034594025480191866, |
| "grad_norm": 81.5, |
| "learning_rate": 1.920626631853786e-06, |
| "loss": 1.9084, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.03497004749628091, |
| "grad_norm": 52.25, |
| "learning_rate": 1.941514360313316e-06, |
| "loss": 1.895, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.035346069512369946, |
| "grad_norm": 52.75, |
| "learning_rate": 1.9624020887728464e-06, |
| "loss": 1.8667, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.03572209152845899, |
| "grad_norm": 73.0, |
| "learning_rate": 1.9832898172323762e-06, |
| "loss": 1.8782, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.03609811354454803, |
| "grad_norm": 157.0, |
| "learning_rate": 2.004177545691906e-06, |
| "loss": 1.8986, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.036474135560637076, |
| "grad_norm": 124.5, |
| "learning_rate": 2.0250652741514363e-06, |
| "loss": 1.8863, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.03685015757672612, |
| "grad_norm": 211.0, |
| "learning_rate": 2.045953002610966e-06, |
| "loss": 1.8851, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.037226179592815156, |
| "grad_norm": 157.0, |
| "learning_rate": 2.0668407310704963e-06, |
| "loss": 1.8669, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.0376022016089042, |
| "grad_norm": 163.0, |
| "learning_rate": 2.087728459530026e-06, |
| "loss": 1.873, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.03797822362499324, |
| "grad_norm": 76.5, |
| "learning_rate": 2.1086161879895564e-06, |
| "loss": 1.8493, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.03835424564108229, |
| "grad_norm": 250.0, |
| "learning_rate": 2.129503916449086e-06, |
| "loss": 1.8835, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.03873026765717133, |
| "grad_norm": 86.0, |
| "learning_rate": 2.1503916449086164e-06, |
| "loss": 1.8291, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.03910628967326037, |
| "grad_norm": 81.5, |
| "learning_rate": 2.1712793733681462e-06, |
| "loss": 1.8068, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.03948231168934941, |
| "grad_norm": 94.5, |
| "learning_rate": 2.1921671018276765e-06, |
| "loss": 1.7797, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.03985833370543845, |
| "grad_norm": 153.0, |
| "learning_rate": 2.2130548302872067e-06, |
| "loss": 1.8606, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.0402343557215275, |
| "grad_norm": 160.0, |
| "learning_rate": 2.2339425587467365e-06, |
| "loss": 1.8179, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.04061037773761654, |
| "grad_norm": 252.0, |
| "learning_rate": 2.2548302872062668e-06, |
| "loss": 1.8003, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.04098639975370558, |
| "grad_norm": 272.0, |
| "learning_rate": 2.2757180156657966e-06, |
| "loss": 1.7933, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.04136242176979462, |
| "grad_norm": 66.5, |
| "learning_rate": 2.2966057441253264e-06, |
| "loss": 1.8021, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.041738443785883664, |
| "grad_norm": 132.0, |
| "learning_rate": 2.3174934725848566e-06, |
| "loss": 1.757, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.04211446580197271, |
| "grad_norm": 101.0, |
| "learning_rate": 2.3383812010443865e-06, |
| "loss": 1.7466, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.04249048781806175, |
| "grad_norm": 111.5, |
| "learning_rate": 2.3592689295039167e-06, |
| "loss": 1.7771, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.04286650983415079, |
| "grad_norm": 64.0, |
| "learning_rate": 2.3801566579634465e-06, |
| "loss": 1.754, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.04324253185023983, |
| "grad_norm": 220.0, |
| "learning_rate": 2.4010443864229767e-06, |
| "loss": 1.7484, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.043618553866328874, |
| "grad_norm": 97.5, |
| "learning_rate": 2.4219321148825066e-06, |
| "loss": 1.7204, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.04399457588241792, |
| "grad_norm": 110.5, |
| "learning_rate": 2.442819843342037e-06, |
| "loss": 1.7732, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.04437059789850696, |
| "grad_norm": 73.5, |
| "learning_rate": 2.463707571801567e-06, |
| "loss": 1.7447, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.044746619914596, |
| "grad_norm": 78.5, |
| "learning_rate": 2.484595300261097e-06, |
| "loss": 1.7127, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.04512264193068504, |
| "grad_norm": 63.25, |
| "learning_rate": 2.5054830287206267e-06, |
| "loss": 1.6951, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.045498663946774084, |
| "grad_norm": 56.25, |
| "learning_rate": 2.5263707571801573e-06, |
| "loss": 1.6848, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.04587468596286313, |
| "grad_norm": 69.0, |
| "learning_rate": 2.547258485639687e-06, |
| "loss": 1.7051, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.046250707978952164, |
| "grad_norm": 83.0, |
| "learning_rate": 2.568146214099217e-06, |
| "loss": 1.6354, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.04662672999504121, |
| "grad_norm": 90.0, |
| "learning_rate": 2.5890339425587468e-06, |
| "loss": 1.643, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.04700275201113025, |
| "grad_norm": 62.5, |
| "learning_rate": 2.6099216710182766e-06, |
| "loss": 1.6811, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.047378774027219295, |
| "grad_norm": 199.0, |
| "learning_rate": 2.6308093994778072e-06, |
| "loss": 1.6851, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.04775479604330834, |
| "grad_norm": 57.75, |
| "learning_rate": 2.651697127937337e-06, |
| "loss": 1.6055, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.048130818059397375, |
| "grad_norm": 196.0, |
| "learning_rate": 2.672584856396867e-06, |
| "loss": 1.6079, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.04850684007548642, |
| "grad_norm": 149.0, |
| "learning_rate": 2.693472584856397e-06, |
| "loss": 1.6273, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.04888286209157546, |
| "grad_norm": 95.5, |
| "learning_rate": 2.714360313315927e-06, |
| "loss": 1.6333, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.049258884107664505, |
| "grad_norm": 72.0, |
| "learning_rate": 2.735248041775457e-06, |
| "loss": 1.6026, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.04963490612375355, |
| "grad_norm": 338.0, |
| "learning_rate": 2.7561357702349874e-06, |
| "loss": 1.5909, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.050010928139842585, |
| "grad_norm": 65.5, |
| "learning_rate": 2.777023498694517e-06, |
| "loss": 1.6058, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.05038695015593163, |
| "grad_norm": 126.0, |
| "learning_rate": 2.797911227154047e-06, |
| "loss": 1.5821, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.05076297217202067, |
| "grad_norm": 142.0, |
| "learning_rate": 2.8187989556135777e-06, |
| "loss": 1.5928, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.051138994188109715, |
| "grad_norm": 59.0, |
| "learning_rate": 2.8396866840731075e-06, |
| "loss": 1.5513, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.05151501620419876, |
| "grad_norm": 173.0, |
| "learning_rate": 2.8605744125326373e-06, |
| "loss": 1.519, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.051891038220287795, |
| "grad_norm": 118.5, |
| "learning_rate": 2.881462140992167e-06, |
| "loss": 1.5389, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.05226706023637684, |
| "grad_norm": 121.5, |
| "learning_rate": 2.9023498694516974e-06, |
| "loss": 1.5027, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.05264308225246588, |
| "grad_norm": 71.5, |
| "learning_rate": 2.9232375979112276e-06, |
| "loss": 1.5588, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.053019104268554926, |
| "grad_norm": 148.0, |
| "learning_rate": 2.9441253263707574e-06, |
| "loss": 1.544, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.05339512628464397, |
| "grad_norm": 98.5, |
| "learning_rate": 2.9650130548302876e-06, |
| "loss": 1.4796, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.053771148300733006, |
| "grad_norm": 119.5, |
| "learning_rate": 2.9859007832898175e-06, |
| "loss": 1.5498, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.05414717031682205, |
| "grad_norm": 68.0, |
| "learning_rate": 3.0067885117493473e-06, |
| "loss": 1.5174, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.05452319233291109, |
| "grad_norm": 81.0, |
| "learning_rate": 3.027676240208878e-06, |
| "loss": 1.5218, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.054899214349000136, |
| "grad_norm": 89.5, |
| "learning_rate": 3.0485639686684078e-06, |
| "loss": 1.4837, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.05527523636508917, |
| "grad_norm": 175.0, |
| "learning_rate": 3.0694516971279376e-06, |
| "loss": 1.469, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.055651258381178216, |
| "grad_norm": 188.0, |
| "learning_rate": 3.0903394255874674e-06, |
| "loss": 1.4704, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.05602728039726726, |
| "grad_norm": 53.5, |
| "learning_rate": 3.111227154046997e-06, |
| "loss": 1.4528, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.0564033024133563, |
| "grad_norm": 91.0, |
| "learning_rate": 3.132114882506528e-06, |
| "loss": 1.4783, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.056779324429445346, |
| "grad_norm": 81.5, |
| "learning_rate": 3.1530026109660577e-06, |
| "loss": 1.4367, |
| "step": 3020 |
| }, |
| { |
| "epoch": 0.05715534644553438, |
| "grad_norm": 69.0, |
| "learning_rate": 3.1738903394255875e-06, |
| "loss": 1.4717, |
| "step": 3040 |
| }, |
| { |
| "epoch": 0.057531368461623426, |
| "grad_norm": 207.0, |
| "learning_rate": 3.1947780678851177e-06, |
| "loss": 1.4713, |
| "step": 3060 |
| }, |
| { |
| "epoch": 0.05790739047771247, |
| "grad_norm": 87.5, |
| "learning_rate": 3.215665796344648e-06, |
| "loss": 1.4269, |
| "step": 3080 |
| }, |
| { |
| "epoch": 0.05828341249380151, |
| "grad_norm": 87.0, |
| "learning_rate": 3.2365535248041778e-06, |
| "loss": 1.4116, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.05865943450989056, |
| "grad_norm": 133.0, |
| "learning_rate": 3.257441253263708e-06, |
| "loss": 1.423, |
| "step": 3120 |
| }, |
| { |
| "epoch": 0.05903545652597959, |
| "grad_norm": 66.0, |
| "learning_rate": 3.278328981723238e-06, |
| "loss": 1.3921, |
| "step": 3140 |
| }, |
| { |
| "epoch": 0.05941147854206864, |
| "grad_norm": 70.0, |
| "learning_rate": 3.2992167101827676e-06, |
| "loss": 1.4027, |
| "step": 3160 |
| }, |
| { |
| "epoch": 0.05978750055815768, |
| "grad_norm": 114.5, |
| "learning_rate": 3.3201044386422983e-06, |
| "loss": 1.4118, |
| "step": 3180 |
| }, |
| { |
| "epoch": 0.060163522574246724, |
| "grad_norm": 64.5, |
| "learning_rate": 3.340992167101828e-06, |
| "loss": 1.3805, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.06053954459033577, |
| "grad_norm": 76.0, |
| "learning_rate": 3.361879895561358e-06, |
| "loss": 1.3966, |
| "step": 3220 |
| }, |
| { |
| "epoch": 0.060915566606424804, |
| "grad_norm": 52.5, |
| "learning_rate": 3.3827676240208877e-06, |
| "loss": 1.3991, |
| "step": 3240 |
| }, |
| { |
| "epoch": 0.06129158862251385, |
| "grad_norm": 139.0, |
| "learning_rate": 3.403655352480418e-06, |
| "loss": 1.3851, |
| "step": 3260 |
| }, |
| { |
| "epoch": 0.06166761063860289, |
| "grad_norm": 56.25, |
| "learning_rate": 3.4245430809399482e-06, |
| "loss": 1.3506, |
| "step": 3280 |
| }, |
| { |
| "epoch": 0.062043632654691934, |
| "grad_norm": 86.0, |
| "learning_rate": 3.445430809399478e-06, |
| "loss": 1.3288, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.06241965467078098, |
| "grad_norm": 108.5, |
| "learning_rate": 3.4663185378590083e-06, |
| "loss": 1.3767, |
| "step": 3320 |
| }, |
| { |
| "epoch": 0.06279567668687001, |
| "grad_norm": 46.5, |
| "learning_rate": 3.487206266318538e-06, |
| "loss": 1.339, |
| "step": 3340 |
| }, |
| { |
| "epoch": 0.06317169870295906, |
| "grad_norm": 127.5, |
| "learning_rate": 3.5080939947780683e-06, |
| "loss": 1.3316, |
| "step": 3360 |
| }, |
| { |
| "epoch": 0.0635477207190481, |
| "grad_norm": 53.0, |
| "learning_rate": 3.5289817232375986e-06, |
| "loss": 1.362, |
| "step": 3380 |
| }, |
| { |
| "epoch": 0.06392374273513714, |
| "grad_norm": 75.5, |
| "learning_rate": 3.5498694516971284e-06, |
| "loss": 1.3073, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.06429976475122619, |
| "grad_norm": 103.5, |
| "learning_rate": 3.570757180156658e-06, |
| "loss": 1.3008, |
| "step": 3420 |
| }, |
| { |
| "epoch": 0.06467578676731522, |
| "grad_norm": 50.25, |
| "learning_rate": 3.591644908616188e-06, |
| "loss": 1.3438, |
| "step": 3440 |
| }, |
| { |
| "epoch": 0.06505180878340427, |
| "grad_norm": 108.0, |
| "learning_rate": 3.6125326370757187e-06, |
| "loss": 1.3175, |
| "step": 3460 |
| }, |
| { |
| "epoch": 0.06542783079949331, |
| "grad_norm": 97.0, |
| "learning_rate": 3.6334203655352485e-06, |
| "loss": 1.3031, |
| "step": 3480 |
| }, |
| { |
| "epoch": 0.06580385281558235, |
| "grad_norm": 124.5, |
| "learning_rate": 3.6543080939947783e-06, |
| "loss": 1.3161, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.0661798748316714, |
| "grad_norm": 78.5, |
| "learning_rate": 3.675195822454308e-06, |
| "loss": 1.2822, |
| "step": 3520 |
| }, |
| { |
| "epoch": 0.06655589684776043, |
| "grad_norm": 81.5, |
| "learning_rate": 3.6960835509138383e-06, |
| "loss": 1.3111, |
| "step": 3540 |
| }, |
| { |
| "epoch": 0.06693191886384948, |
| "grad_norm": 94.5, |
| "learning_rate": 3.7169712793733686e-06, |
| "loss": 1.2909, |
| "step": 3560 |
| }, |
| { |
| "epoch": 0.06730794087993852, |
| "grad_norm": 86.0, |
| "learning_rate": 3.7378590078328984e-06, |
| "loss": 1.2535, |
| "step": 3580 |
| }, |
| { |
| "epoch": 0.06768396289602756, |
| "grad_norm": 142.0, |
| "learning_rate": 3.7587467362924286e-06, |
| "loss": 1.2963, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.06805998491211661, |
| "grad_norm": 58.25, |
| "learning_rate": 3.7796344647519584e-06, |
| "loss": 1.2354, |
| "step": 3620 |
| }, |
| { |
| "epoch": 0.06843600692820564, |
| "grad_norm": 67.5, |
| "learning_rate": 3.8005221932114883e-06, |
| "loss": 1.2719, |
| "step": 3640 |
| }, |
| { |
| "epoch": 0.0688120289442947, |
| "grad_norm": 103.0, |
| "learning_rate": 3.821409921671019e-06, |
| "loss": 1.246, |
| "step": 3660 |
| }, |
| { |
| "epoch": 0.06918805096038373, |
| "grad_norm": 110.0, |
| "learning_rate": 3.842297650130548e-06, |
| "loss": 1.2397, |
| "step": 3680 |
| }, |
| { |
| "epoch": 0.06956407297647277, |
| "grad_norm": 45.5, |
| "learning_rate": 3.8631853785900785e-06, |
| "loss": 1.2576, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.06994009499256182, |
| "grad_norm": 62.0, |
| "learning_rate": 3.884073107049609e-06, |
| "loss": 1.2273, |
| "step": 3720 |
| }, |
| { |
| "epoch": 0.07031611700865086, |
| "grad_norm": 77.5, |
| "learning_rate": 3.904960835509139e-06, |
| "loss": 1.2366, |
| "step": 3740 |
| }, |
| { |
| "epoch": 0.07069213902473989, |
| "grad_norm": 89.5, |
| "learning_rate": 3.925848563968669e-06, |
| "loss": 1.2027, |
| "step": 3760 |
| }, |
| { |
| "epoch": 0.07106816104082894, |
| "grad_norm": 84.0, |
| "learning_rate": 3.946736292428199e-06, |
| "loss": 1.2029, |
| "step": 3780 |
| }, |
| { |
| "epoch": 0.07144418305691798, |
| "grad_norm": 51.0, |
| "learning_rate": 3.967624020887729e-06, |
| "loss": 1.2181, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.07182020507300703, |
| "grad_norm": 71.0, |
| "learning_rate": 3.988511749347258e-06, |
| "loss": 1.2405, |
| "step": 3820 |
| }, |
| { |
| "epoch": 0.07219622708909607, |
| "grad_norm": 78.0, |
| "learning_rate": 4.009399477806789e-06, |
| "loss": 1.1956, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.0725722491051851, |
| "grad_norm": 76.0, |
| "learning_rate": 4.030287206266319e-06, |
| "loss": 1.2008, |
| "step": 3860 |
| }, |
| { |
| "epoch": 0.07294827112127415, |
| "grad_norm": 71.0, |
| "learning_rate": 4.051174934725849e-06, |
| "loss": 1.209, |
| "step": 3880 |
| }, |
| { |
| "epoch": 0.07332429313736319, |
| "grad_norm": 91.0, |
| "learning_rate": 4.072062663185378e-06, |
| "loss": 1.2152, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.07370031515345224, |
| "grad_norm": 65.5, |
| "learning_rate": 4.092950391644909e-06, |
| "loss": 1.2116, |
| "step": 3920 |
| }, |
| { |
| "epoch": 0.07407633716954128, |
| "grad_norm": 84.0, |
| "learning_rate": 4.113838120104439e-06, |
| "loss": 1.1892, |
| "step": 3940 |
| }, |
| { |
| "epoch": 0.07445235918563031, |
| "grad_norm": 80.0, |
| "learning_rate": 4.134725848563969e-06, |
| "loss": 1.1591, |
| "step": 3960 |
| }, |
| { |
| "epoch": 0.07482838120171936, |
| "grad_norm": 93.0, |
| "learning_rate": 4.155613577023499e-06, |
| "loss": 1.1767, |
| "step": 3980 |
| }, |
| { |
| "epoch": 0.0752044032178084, |
| "grad_norm": 83.0, |
| "learning_rate": 4.176501305483029e-06, |
| "loss": 1.1395, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.07558042523389745, |
| "grad_norm": 72.5, |
| "learning_rate": 4.197389033942559e-06, |
| "loss": 1.1689, |
| "step": 4020 |
| }, |
| { |
| "epoch": 0.07595644724998649, |
| "grad_norm": 58.5, |
| "learning_rate": 4.218276762402089e-06, |
| "loss": 1.1351, |
| "step": 4040 |
| }, |
| { |
| "epoch": 0.07633246926607552, |
| "grad_norm": 64.5, |
| "learning_rate": 4.2391644908616194e-06, |
| "loss": 1.1314, |
| "step": 4060 |
| }, |
| { |
| "epoch": 0.07670849128216457, |
| "grad_norm": 66.5, |
| "learning_rate": 4.260052219321149e-06, |
| "loss": 1.124, |
| "step": 4080 |
| }, |
| { |
| "epoch": 0.07708451329825361, |
| "grad_norm": 42.75, |
| "learning_rate": 4.280939947780679e-06, |
| "loss": 1.0991, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.07746053531434266, |
| "grad_norm": 63.25, |
| "learning_rate": 4.301827676240209e-06, |
| "loss": 1.1353, |
| "step": 4120 |
| }, |
| { |
| "epoch": 0.0778365573304317, |
| "grad_norm": 47.5, |
| "learning_rate": 4.3227154046997395e-06, |
| "loss": 1.1189, |
| "step": 4140 |
| }, |
| { |
| "epoch": 0.07821257934652073, |
| "grad_norm": 34.0, |
| "learning_rate": 4.343603133159269e-06, |
| "loss": 1.1196, |
| "step": 4160 |
| }, |
| { |
| "epoch": 0.07858860136260978, |
| "grad_norm": 140.0, |
| "learning_rate": 4.364490861618799e-06, |
| "loss": 1.0964, |
| "step": 4180 |
| }, |
| { |
| "epoch": 0.07896462337869882, |
| "grad_norm": 60.75, |
| "learning_rate": 4.385378590078329e-06, |
| "loss": 1.1315, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.07934064539478787, |
| "grad_norm": 57.75, |
| "learning_rate": 4.40626631853786e-06, |
| "loss": 1.0911, |
| "step": 4220 |
| }, |
| { |
| "epoch": 0.0797166674108769, |
| "grad_norm": 70.5, |
| "learning_rate": 4.42715404699739e-06, |
| "loss": 1.084, |
| "step": 4240 |
| }, |
| { |
| "epoch": 0.08009268942696594, |
| "grad_norm": 29.25, |
| "learning_rate": 4.448041775456919e-06, |
| "loss": 1.0646, |
| "step": 4260 |
| }, |
| { |
| "epoch": 0.080468711443055, |
| "grad_norm": 33.0, |
| "learning_rate": 4.4689295039164495e-06, |
| "loss": 1.0657, |
| "step": 4280 |
| }, |
| { |
| "epoch": 0.08084473345914403, |
| "grad_norm": 75.5, |
| "learning_rate": 4.489817232375979e-06, |
| "loss": 1.0553, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.08122075547523308, |
| "grad_norm": 49.75, |
| "learning_rate": 4.51070496083551e-06, |
| "loss": 1.0675, |
| "step": 4320 |
| }, |
| { |
| "epoch": 0.08159677749132212, |
| "grad_norm": 40.25, |
| "learning_rate": 4.531592689295039e-06, |
| "loss": 1.0539, |
| "step": 4340 |
| }, |
| { |
| "epoch": 0.08197279950741115, |
| "grad_norm": 40.5, |
| "learning_rate": 4.55248041775457e-06, |
| "loss": 1.0381, |
| "step": 4360 |
| }, |
| { |
| "epoch": 0.0823488215235002, |
| "grad_norm": 51.75, |
| "learning_rate": 4.573368146214099e-06, |
| "loss": 1.062, |
| "step": 4380 |
| }, |
| { |
| "epoch": 0.08272484353958924, |
| "grad_norm": 120.5, |
| "learning_rate": 4.59425587467363e-06, |
| "loss": 1.0553, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.08310086555567829, |
| "grad_norm": 51.75, |
| "learning_rate": 4.6151436031331595e-06, |
| "loss": 1.0386, |
| "step": 4420 |
| }, |
| { |
| "epoch": 0.08347688757176733, |
| "grad_norm": 29.125, |
| "learning_rate": 4.63603133159269e-06, |
| "loss": 1.0334, |
| "step": 4440 |
| }, |
| { |
| "epoch": 0.08385290958785636, |
| "grad_norm": 43.25, |
| "learning_rate": 4.65691906005222e-06, |
| "loss": 1.0358, |
| "step": 4460 |
| }, |
| { |
| "epoch": 0.08422893160394541, |
| "grad_norm": 34.5, |
| "learning_rate": 4.677806788511749e-06, |
| "loss": 1.0064, |
| "step": 4480 |
| }, |
| { |
| "epoch": 0.08460495362003445, |
| "grad_norm": 40.25, |
| "learning_rate": 4.6986945169712796e-06, |
| "loss": 1.0205, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.0849809756361235, |
| "grad_norm": 37.75, |
| "learning_rate": 4.71958224543081e-06, |
| "loss": 0.9937, |
| "step": 4520 |
| }, |
| { |
| "epoch": 0.08535699765221254, |
| "grad_norm": 41.25, |
| "learning_rate": 4.74046997389034e-06, |
| "loss": 1.0081, |
| "step": 4540 |
| }, |
| { |
| "epoch": 0.08573301966830157, |
| "grad_norm": 59.25, |
| "learning_rate": 4.7613577023498694e-06, |
| "loss": 0.9894, |
| "step": 4560 |
| }, |
| { |
| "epoch": 0.08610904168439062, |
| "grad_norm": 57.5, |
| "learning_rate": 4.7822454308094e-06, |
| "loss": 0.9949, |
| "step": 4580 |
| }, |
| { |
| "epoch": 0.08648506370047966, |
| "grad_norm": 46.5, |
| "learning_rate": 4.80313315926893e-06, |
| "loss": 1.0066, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.08686108571656871, |
| "grad_norm": 46.0, |
| "learning_rate": 4.82402088772846e-06, |
| "loss": 1.001, |
| "step": 4620 |
| }, |
| { |
| "epoch": 0.08723710773265775, |
| "grad_norm": 34.5, |
| "learning_rate": 4.8449086161879895e-06, |
| "loss": 0.9981, |
| "step": 4640 |
| }, |
| { |
| "epoch": 0.08761312974874678, |
| "grad_norm": 49.75, |
| "learning_rate": 4.86579634464752e-06, |
| "loss": 0.9975, |
| "step": 4660 |
| }, |
| { |
| "epoch": 0.08798915176483584, |
| "grad_norm": 25.125, |
| "learning_rate": 4.88668407310705e-06, |
| "loss": 0.9697, |
| "step": 4680 |
| }, |
| { |
| "epoch": 0.08836517378092487, |
| "grad_norm": 68.0, |
| "learning_rate": 4.90757180156658e-06, |
| "loss": 0.9851, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.08874119579701392, |
| "grad_norm": 122.0, |
| "learning_rate": 4.9284595300261105e-06, |
| "loss": 0.9624, |
| "step": 4720 |
| }, |
| { |
| "epoch": 0.08911721781310296, |
| "grad_norm": 68.5, |
| "learning_rate": 4.94934725848564e-06, |
| "loss": 0.9812, |
| "step": 4740 |
| }, |
| { |
| "epoch": 0.089493239829192, |
| "grad_norm": 43.5, |
| "learning_rate": 4.97023498694517e-06, |
| "loss": 0.9594, |
| "step": 4760 |
| }, |
| { |
| "epoch": 0.08986926184528105, |
| "grad_norm": 26.625, |
| "learning_rate": 4.9911227154047e-06, |
| "loss": 0.9582, |
| "step": 4780 |
| }, |
| { |
| "epoch": 0.09024528386137008, |
| "grad_norm": 30.375, |
| "learning_rate": 5.012010443864231e-06, |
| "loss": 0.9374, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.09062130587745912, |
| "grad_norm": 43.75, |
| "learning_rate": 5.03289817232376e-06, |
| "loss": 0.9564, |
| "step": 4820 |
| }, |
| { |
| "epoch": 0.09099732789354817, |
| "grad_norm": 61.5, |
| "learning_rate": 5.05378590078329e-06, |
| "loss": 0.9293, |
| "step": 4840 |
| }, |
| { |
| "epoch": 0.0913733499096372, |
| "grad_norm": 34.25, |
| "learning_rate": 5.07467362924282e-06, |
| "loss": 0.9345, |
| "step": 4860 |
| }, |
| { |
| "epoch": 0.09174937192572626, |
| "grad_norm": 27.375, |
| "learning_rate": 5.09556135770235e-06, |
| "loss": 0.9374, |
| "step": 4880 |
| }, |
| { |
| "epoch": 0.09212539394181529, |
| "grad_norm": 42.0, |
| "learning_rate": 5.11644908616188e-06, |
| "loss": 0.9305, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.09250141595790433, |
| "grad_norm": 27.875, |
| "learning_rate": 5.137336814621411e-06, |
| "loss": 0.9216, |
| "step": 4920 |
| }, |
| { |
| "epoch": 0.09287743797399338, |
| "grad_norm": 54.0, |
| "learning_rate": 5.1582245430809406e-06, |
| "loss": 0.9187, |
| "step": 4940 |
| }, |
| { |
| "epoch": 0.09325345999008242, |
| "grad_norm": 33.5, |
| "learning_rate": 5.179112271540471e-06, |
| "loss": 0.9057, |
| "step": 4960 |
| }, |
| { |
| "epoch": 0.09362948200617147, |
| "grad_norm": 34.5, |
| "learning_rate": 5.2e-06, |
| "loss": 0.8961, |
| "step": 4980 |
| }, |
| { |
| "epoch": 0.0940055040222605, |
| "grad_norm": 33.75, |
| "learning_rate": 5.2208877284595304e-06, |
| "loss": 0.9232, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.09438152603834954, |
| "grad_norm": 21.375, |
| "learning_rate": 5.241775456919061e-06, |
| "loss": 0.9002, |
| "step": 5020 |
| }, |
| { |
| "epoch": 0.09475754805443859, |
| "grad_norm": 37.0, |
| "learning_rate": 5.26266318537859e-06, |
| "loss": 0.9141, |
| "step": 5040 |
| }, |
| { |
| "epoch": 0.09513357007052763, |
| "grad_norm": 31.75, |
| "learning_rate": 5.28355091383812e-06, |
| "loss": 0.9062, |
| "step": 5060 |
| }, |
| { |
| "epoch": 0.09550959208661668, |
| "grad_norm": 25.0, |
| "learning_rate": 5.3044386422976505e-06, |
| "loss": 0.8911, |
| "step": 5080 |
| }, |
| { |
| "epoch": 0.09588561410270571, |
| "grad_norm": 39.75, |
| "learning_rate": 5.32532637075718e-06, |
| "loss": 0.8961, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.09626163611879475, |
| "grad_norm": 21.375, |
| "learning_rate": 5.346214099216711e-06, |
| "loss": 0.8682, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.0966376581348838, |
| "grad_norm": 23.625, |
| "learning_rate": 5.367101827676241e-06, |
| "loss": 0.8722, |
| "step": 5140 |
| }, |
| { |
| "epoch": 0.09701368015097284, |
| "grad_norm": 32.75, |
| "learning_rate": 5.387989556135771e-06, |
| "loss": 0.8664, |
| "step": 5160 |
| }, |
| { |
| "epoch": 0.09738970216706189, |
| "grad_norm": 22.875, |
| "learning_rate": 5.408877284595301e-06, |
| "loss": 0.8557, |
| "step": 5180 |
| }, |
| { |
| "epoch": 0.09776572418315092, |
| "grad_norm": 43.5, |
| "learning_rate": 5.429765013054831e-06, |
| "loss": 0.8546, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.09814174619923996, |
| "grad_norm": 22.375, |
| "learning_rate": 5.4506527415143605e-06, |
| "loss": 0.8568, |
| "step": 5220 |
| }, |
| { |
| "epoch": 0.09851776821532901, |
| "grad_norm": 24.75, |
| "learning_rate": 5.471540469973891e-06, |
| "loss": 0.8628, |
| "step": 5240 |
| }, |
| { |
| "epoch": 0.09889379023141805, |
| "grad_norm": 23.75, |
| "learning_rate": 5.49242819843342e-06, |
| "loss": 0.8456, |
| "step": 5260 |
| }, |
| { |
| "epoch": 0.0992698122475071, |
| "grad_norm": 23.5, |
| "learning_rate": 5.51331592689295e-06, |
| "loss": 0.8357, |
| "step": 5280 |
| }, |
| { |
| "epoch": 0.09964583426359613, |
| "grad_norm": 14.9375, |
| "learning_rate": 5.5342036553524814e-06, |
| "loss": 0.8189, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.10002185627968517, |
| "grad_norm": 40.25, |
| "learning_rate": 5.555091383812012e-06, |
| "loss": 0.8384, |
| "step": 5320 |
| }, |
| { |
| "epoch": 0.10039787829577422, |
| "grad_norm": 48.0, |
| "learning_rate": 5.575979112271541e-06, |
| "loss": 0.8441, |
| "step": 5340 |
| }, |
| { |
| "epoch": 0.10077390031186326, |
| "grad_norm": 25.0, |
| "learning_rate": 5.596866840731071e-06, |
| "loss": 0.81, |
| "step": 5360 |
| }, |
| { |
| "epoch": 0.10114992232795231, |
| "grad_norm": 53.25, |
| "learning_rate": 5.617754569190601e-06, |
| "loss": 0.846, |
| "step": 5380 |
| }, |
| { |
| "epoch": 0.10152594434404134, |
| "grad_norm": 21.25, |
| "learning_rate": 5.638642297650131e-06, |
| "loss": 0.8235, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.10190196636013038, |
| "grad_norm": 22.75, |
| "learning_rate": 5.659530026109661e-06, |
| "loss": 0.8416, |
| "step": 5420 |
| }, |
| { |
| "epoch": 0.10227798837621943, |
| "grad_norm": 10.4375, |
| "learning_rate": 5.6804177545691906e-06, |
| "loss": 0.8025, |
| "step": 5440 |
| }, |
| { |
| "epoch": 0.10265401039230847, |
| "grad_norm": 11.3125, |
| "learning_rate": 5.701305483028721e-06, |
| "loss": 0.8071, |
| "step": 5460 |
| }, |
| { |
| "epoch": 0.10303003240839752, |
| "grad_norm": 13.0, |
| "learning_rate": 5.72219321148825e-06, |
| "loss": 0.819, |
| "step": 5480 |
| }, |
| { |
| "epoch": 0.10340605442448655, |
| "grad_norm": 36.25, |
| "learning_rate": 5.743080939947781e-06, |
| "loss": 0.809, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.10378207644057559, |
| "grad_norm": 22.0, |
| "learning_rate": 5.7639686684073115e-06, |
| "loss": 0.8222, |
| "step": 5520 |
| }, |
| { |
| "epoch": 0.10415809845666464, |
| "grad_norm": 20.375, |
| "learning_rate": 5.784856396866842e-06, |
| "loss": 0.7892, |
| "step": 5540 |
| }, |
| { |
| "epoch": 0.10453412047275368, |
| "grad_norm": 23.875, |
| "learning_rate": 5.805744125326371e-06, |
| "loss": 0.8109, |
| "step": 5560 |
| }, |
| { |
| "epoch": 0.10491014248884273, |
| "grad_norm": 43.5, |
| "learning_rate": 5.826631853785901e-06, |
| "loss": 0.7978, |
| "step": 5580 |
| }, |
| { |
| "epoch": 0.10528616450493176, |
| "grad_norm": 22.25, |
| "learning_rate": 5.847519582245431e-06, |
| "loss": 0.7947, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.1056621865210208, |
| "grad_norm": 9.5, |
| "learning_rate": 5.868407310704961e-06, |
| "loss": 0.8045, |
| "step": 5620 |
| }, |
| { |
| "epoch": 0.10603820853710985, |
| "grad_norm": 12.375, |
| "learning_rate": 5.889295039164491e-06, |
| "loss": 0.8083, |
| "step": 5640 |
| }, |
| { |
| "epoch": 0.10641423055319889, |
| "grad_norm": 30.125, |
| "learning_rate": 5.910182767624021e-06, |
| "loss": 0.8052, |
| "step": 5660 |
| }, |
| { |
| "epoch": 0.10679025256928794, |
| "grad_norm": 20.125, |
| "learning_rate": 5.931070496083552e-06, |
| "loss": 0.7854, |
| "step": 5680 |
| }, |
| { |
| "epoch": 0.10716627458537697, |
| "grad_norm": 15.25, |
| "learning_rate": 5.951958224543082e-06, |
| "loss": 0.7947, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.10754229660146601, |
| "grad_norm": 8.5625, |
| "learning_rate": 5.972845953002611e-06, |
| "loss": 0.7992, |
| "step": 5720 |
| }, |
| { |
| "epoch": 0.10791831861755506, |
| "grad_norm": 20.625, |
| "learning_rate": 5.993733681462142e-06, |
| "loss": 0.7855, |
| "step": 5740 |
| }, |
| { |
| "epoch": 0.1082943406336441, |
| "grad_norm": 10.125, |
| "learning_rate": 6.014621409921672e-06, |
| "loss": 0.7857, |
| "step": 5760 |
| }, |
| { |
| "epoch": 0.10867036264973315, |
| "grad_norm": 33.5, |
| "learning_rate": 6.035509138381201e-06, |
| "loss": 0.7908, |
| "step": 5780 |
| }, |
| { |
| "epoch": 0.10904638466582219, |
| "grad_norm": 12.75, |
| "learning_rate": 6.0563968668407315e-06, |
| "loss": 0.771, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.10942240668191122, |
| "grad_norm": 23.5, |
| "learning_rate": 6.077284595300262e-06, |
| "loss": 0.7734, |
| "step": 5820 |
| }, |
| { |
| "epoch": 0.10979842869800027, |
| "grad_norm": 12.125, |
| "learning_rate": 6.098172323759791e-06, |
| "loss": 0.7723, |
| "step": 5840 |
| }, |
| { |
| "epoch": 0.11017445071408931, |
| "grad_norm": 14.0, |
| "learning_rate": 6.119060052219322e-06, |
| "loss": 0.7602, |
| "step": 5860 |
| }, |
| { |
| "epoch": 0.11055047273017835, |
| "grad_norm": 32.0, |
| "learning_rate": 6.139947780678852e-06, |
| "loss": 0.7809, |
| "step": 5880 |
| }, |
| { |
| "epoch": 0.1109264947462674, |
| "grad_norm": 15.4375, |
| "learning_rate": 6.160835509138382e-06, |
| "loss": 0.7905, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.11130251676235643, |
| "grad_norm": 24.875, |
| "learning_rate": 6.181723237597912e-06, |
| "loss": 0.7764, |
| "step": 5920 |
| }, |
| { |
| "epoch": 0.11167853877844548, |
| "grad_norm": 19.625, |
| "learning_rate": 6.202610966057441e-06, |
| "loss": 0.7877, |
| "step": 5940 |
| }, |
| { |
| "epoch": 0.11205456079453452, |
| "grad_norm": 13.5625, |
| "learning_rate": 6.223498694516972e-06, |
| "loss": 0.7779, |
| "step": 5960 |
| }, |
| { |
| "epoch": 0.11243058281062356, |
| "grad_norm": 15.9375, |
| "learning_rate": 6.244386422976502e-06, |
| "loss": 0.7711, |
| "step": 5980 |
| }, |
| { |
| "epoch": 0.1128066048267126, |
| "grad_norm": 7.90625, |
| "learning_rate": 6.265274151436031e-06, |
| "loss": 0.7661, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.11318262684280164, |
| "grad_norm": 7.75, |
| "learning_rate": 6.2861618798955615e-06, |
| "loss": 0.76, |
| "step": 6020 |
| }, |
| { |
| "epoch": 0.11355864885889069, |
| "grad_norm": 9.9375, |
| "learning_rate": 6.307049608355092e-06, |
| "loss": 0.7445, |
| "step": 6040 |
| }, |
| { |
| "epoch": 0.11393467087497973, |
| "grad_norm": 12.125, |
| "learning_rate": 6.327937336814622e-06, |
| "loss": 0.7601, |
| "step": 6060 |
| }, |
| { |
| "epoch": 0.11431069289106877, |
| "grad_norm": 10.5, |
| "learning_rate": 6.348825065274152e-06, |
| "loss": 0.7641, |
| "step": 6080 |
| }, |
| { |
| "epoch": 0.11468671490715782, |
| "grad_norm": 26.0, |
| "learning_rate": 6.3697127937336825e-06, |
| "loss": 0.7501, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.11506273692324685, |
| "grad_norm": 18.625, |
| "learning_rate": 6.390600522193212e-06, |
| "loss": 0.7625, |
| "step": 6120 |
| }, |
| { |
| "epoch": 0.1154387589393359, |
| "grad_norm": 12.375, |
| "learning_rate": 6.411488250652742e-06, |
| "loss": 0.752, |
| "step": 6140 |
| }, |
| { |
| "epoch": 0.11581478095542494, |
| "grad_norm": 7.21875, |
| "learning_rate": 6.432375979112272e-06, |
| "loss": 0.7546, |
| "step": 6160 |
| }, |
| { |
| "epoch": 0.11619080297151398, |
| "grad_norm": 13.5625, |
| "learning_rate": 6.453263707571802e-06, |
| "loss": 0.7533, |
| "step": 6180 |
| }, |
| { |
| "epoch": 0.11656682498760303, |
| "grad_norm": 7.78125, |
| "learning_rate": 6.474151436031332e-06, |
| "loss": 0.7427, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.11694284700369206, |
| "grad_norm": 7.34375, |
| "learning_rate": 6.495039164490861e-06, |
| "loss": 0.7569, |
| "step": 6220 |
| }, |
| { |
| "epoch": 0.11731886901978111, |
| "grad_norm": 11.5, |
| "learning_rate": 6.5159268929503924e-06, |
| "loss": 0.7568, |
| "step": 6240 |
| }, |
| { |
| "epoch": 0.11769489103587015, |
| "grad_norm": 9.125, |
| "learning_rate": 6.536814621409923e-06, |
| "loss": 0.7583, |
| "step": 6260 |
| }, |
| { |
| "epoch": 0.11807091305195919, |
| "grad_norm": 6.0, |
| "learning_rate": 6.557702349869453e-06, |
| "loss": 0.745, |
| "step": 6280 |
| }, |
| { |
| "epoch": 0.11844693506804824, |
| "grad_norm": 8.9375, |
| "learning_rate": 6.578590078328982e-06, |
| "loss": 0.7423, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.11882295708413727, |
| "grad_norm": 17.375, |
| "learning_rate": 6.5994778067885125e-06, |
| "loss": 0.7417, |
| "step": 6320 |
| }, |
| { |
| "epoch": 0.11919897910022632, |
| "grad_norm": 6.28125, |
| "learning_rate": 6.620365535248042e-06, |
| "loss": 0.7414, |
| "step": 6340 |
| }, |
| { |
| "epoch": 0.11957500111631536, |
| "grad_norm": 18.375, |
| "learning_rate": 6.641253263707572e-06, |
| "loss": 0.7489, |
| "step": 6360 |
| }, |
| { |
| "epoch": 0.1199510231324044, |
| "grad_norm": 18.75, |
| "learning_rate": 6.662140992167102e-06, |
| "loss": 0.748, |
| "step": 6380 |
| }, |
| { |
| "epoch": 0.12032704514849345, |
| "grad_norm": 7.46875, |
| "learning_rate": 6.683028720626632e-06, |
| "loss": 0.7259, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.12070306716458248, |
| "grad_norm": 6.78125, |
| "learning_rate": 6.703916449086162e-06, |
| "loss": 0.7454, |
| "step": 6420 |
| }, |
| { |
| "epoch": 0.12107908918067153, |
| "grad_norm": 12.0, |
| "learning_rate": 6.724804177545693e-06, |
| "loss": 0.7378, |
| "step": 6440 |
| }, |
| { |
| "epoch": 0.12145511119676057, |
| "grad_norm": 10.375, |
| "learning_rate": 6.7456919060052225e-06, |
| "loss": 0.7508, |
| "step": 6460 |
| }, |
| { |
| "epoch": 0.12183113321284961, |
| "grad_norm": 8.875, |
| "learning_rate": 6.766579634464753e-06, |
| "loss": 0.7262, |
| "step": 6480 |
| }, |
| { |
| "epoch": 0.12220715522893866, |
| "grad_norm": 10.375, |
| "learning_rate": 6.787467362924283e-06, |
| "loss": 0.7445, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.1225831772450277, |
| "grad_norm": 7.46875, |
| "learning_rate": 6.808355091383812e-06, |
| "loss": 0.7337, |
| "step": 6520 |
| }, |
| { |
| "epoch": 0.12295919926111674, |
| "grad_norm": 20.375, |
| "learning_rate": 6.829242819843343e-06, |
| "loss": 0.7305, |
| "step": 6540 |
| }, |
| { |
| "epoch": 0.12333522127720578, |
| "grad_norm": 6.875, |
| "learning_rate": 6.850130548302872e-06, |
| "loss": 0.7247, |
| "step": 6560 |
| }, |
| { |
| "epoch": 0.12371124329329482, |
| "grad_norm": 9.1875, |
| "learning_rate": 6.871018276762402e-06, |
| "loss": 0.7185, |
| "step": 6580 |
| }, |
| { |
| "epoch": 0.12408726530938387, |
| "grad_norm": 8.125, |
| "learning_rate": 6.8919060052219325e-06, |
| "loss": 0.7359, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.1244632873254729, |
| "grad_norm": 11.3125, |
| "learning_rate": 6.9127937336814636e-06, |
| "loss": 0.7158, |
| "step": 6620 |
| }, |
| { |
| "epoch": 0.12483930934156195, |
| "grad_norm": 17.75, |
| "learning_rate": 6.933681462140993e-06, |
| "loss": 0.7367, |
| "step": 6640 |
| }, |
| { |
| "epoch": 0.12521533135765098, |
| "grad_norm": 9.375, |
| "learning_rate": 6.954569190600523e-06, |
| "loss": 0.718, |
| "step": 6660 |
| }, |
| { |
| "epoch": 0.12559135337374003, |
| "grad_norm": 8.375, |
| "learning_rate": 6.975456919060053e-06, |
| "loss": 0.7174, |
| "step": 6680 |
| }, |
| { |
| "epoch": 0.12596737538982908, |
| "grad_norm": 6.90625, |
| "learning_rate": 6.996344647519583e-06, |
| "loss": 0.7147, |
| "step": 6700 |
| }, |
| { |
| "epoch": 0.12634339740591813, |
| "grad_norm": 7.40625, |
| "learning_rate": 7.017232375979113e-06, |
| "loss": 0.7146, |
| "step": 6720 |
| }, |
| { |
| "epoch": 0.12671941942200715, |
| "grad_norm": 9.0, |
| "learning_rate": 7.0381201044386425e-06, |
| "loss": 0.7205, |
| "step": 6740 |
| }, |
| { |
| "epoch": 0.1270954414380962, |
| "grad_norm": 4.9375, |
| "learning_rate": 7.059007832898173e-06, |
| "loss": 0.715, |
| "step": 6760 |
| }, |
| { |
| "epoch": 0.12747146345418525, |
| "grad_norm": 7.375, |
| "learning_rate": 7.079895561357703e-06, |
| "loss": 0.7167, |
| "step": 6780 |
| }, |
| { |
| "epoch": 0.12784748547027427, |
| "grad_norm": 6.3125, |
| "learning_rate": 7.100783289817232e-06, |
| "loss": 0.7125, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.12822350748636333, |
| "grad_norm": 6.5625, |
| "learning_rate": 7.121671018276763e-06, |
| "loss": 0.7231, |
| "step": 6820 |
| }, |
| { |
| "epoch": 0.12859952950245238, |
| "grad_norm": 7.96875, |
| "learning_rate": 7.142558746736294e-06, |
| "loss": 0.7154, |
| "step": 6840 |
| }, |
| { |
| "epoch": 0.1289755515185414, |
| "grad_norm": 6.46875, |
| "learning_rate": 7.163446475195823e-06, |
| "loss": 0.7118, |
| "step": 6860 |
| }, |
| { |
| "epoch": 0.12935157353463045, |
| "grad_norm": 12.8125, |
| "learning_rate": 7.184334203655353e-06, |
| "loss": 0.6987, |
| "step": 6880 |
| }, |
| { |
| "epoch": 0.1297275955507195, |
| "grad_norm": 6.375, |
| "learning_rate": 7.205221932114883e-06, |
| "loss": 0.7044, |
| "step": 6900 |
| }, |
| { |
| "epoch": 0.13010361756680855, |
| "grad_norm": 6.6875, |
| "learning_rate": 7.226109660574413e-06, |
| "loss": 0.6975, |
| "step": 6920 |
| }, |
| { |
| "epoch": 0.13047963958289757, |
| "grad_norm": 5.875, |
| "learning_rate": 7.246997389033943e-06, |
| "loss": 0.7044, |
| "step": 6940 |
| }, |
| { |
| "epoch": 0.13085566159898662, |
| "grad_norm": 10.8125, |
| "learning_rate": 7.2678851174934725e-06, |
| "loss": 0.6952, |
| "step": 6960 |
| }, |
| { |
| "epoch": 0.13123168361507567, |
| "grad_norm": 5.25, |
| "learning_rate": 7.288772845953003e-06, |
| "loss": 0.7118, |
| "step": 6980 |
| }, |
| { |
| "epoch": 0.1316077056311647, |
| "grad_norm": 5.28125, |
| "learning_rate": 7.309660574412534e-06, |
| "loss": 0.713, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.13198372764725375, |
| "grad_norm": 8.0625, |
| "learning_rate": 7.330548302872063e-06, |
| "loss": 0.7068, |
| "step": 7020 |
| }, |
| { |
| "epoch": 0.1323597496633428, |
| "grad_norm": 6.21875, |
| "learning_rate": 7.3514360313315935e-06, |
| "loss": 0.7073, |
| "step": 7040 |
| }, |
| { |
| "epoch": 0.13273577167943182, |
| "grad_norm": 9.9375, |
| "learning_rate": 7.372323759791124e-06, |
| "loss": 0.7097, |
| "step": 7060 |
| }, |
| { |
| "epoch": 0.13311179369552087, |
| "grad_norm": 5.09375, |
| "learning_rate": 7.393211488250653e-06, |
| "loss": 0.6954, |
| "step": 7080 |
| }, |
| { |
| "epoch": 0.13348781571160992, |
| "grad_norm": 5.0, |
| "learning_rate": 7.414099216710183e-06, |
| "loss": 0.7081, |
| "step": 7100 |
| }, |
| { |
| "epoch": 0.13386383772769897, |
| "grad_norm": 4.6875, |
| "learning_rate": 7.4349869451697136e-06, |
| "loss": 0.702, |
| "step": 7120 |
| }, |
| { |
| "epoch": 0.134239859743788, |
| "grad_norm": 5.09375, |
| "learning_rate": 7.455874673629243e-06, |
| "loss": 0.7005, |
| "step": 7140 |
| }, |
| { |
| "epoch": 0.13461588175987704, |
| "grad_norm": 7.4375, |
| "learning_rate": 7.476762402088773e-06, |
| "loss": 0.6976, |
| "step": 7160 |
| }, |
| { |
| "epoch": 0.1349919037759661, |
| "grad_norm": 4.46875, |
| "learning_rate": 7.497650130548304e-06, |
| "loss": 0.6871, |
| "step": 7180 |
| }, |
| { |
| "epoch": 0.13536792579205512, |
| "grad_norm": 4.71875, |
| "learning_rate": 7.518537859007834e-06, |
| "loss": 0.6885, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.13574394780814417, |
| "grad_norm": 4.65625, |
| "learning_rate": 7.539425587467364e-06, |
| "loss": 0.6964, |
| "step": 7220 |
| }, |
| { |
| "epoch": 0.13611996982423322, |
| "grad_norm": 8.6875, |
| "learning_rate": 7.560313315926894e-06, |
| "loss": 0.7011, |
| "step": 7240 |
| }, |
| { |
| "epoch": 0.13649599184032224, |
| "grad_norm": 8.75, |
| "learning_rate": 7.5812010443864235e-06, |
| "loss": 0.687, |
| "step": 7260 |
| }, |
| { |
| "epoch": 0.1368720138564113, |
| "grad_norm": 4.03125, |
| "learning_rate": 7.602088772845954e-06, |
| "loss": 0.6915, |
| "step": 7280 |
| }, |
| { |
| "epoch": 0.13724803587250034, |
| "grad_norm": 5.84375, |
| "learning_rate": 7.622976501305483e-06, |
| "loss": 0.6777, |
| "step": 7300 |
| }, |
| { |
| "epoch": 0.1376240578885894, |
| "grad_norm": 4.78125, |
| "learning_rate": 7.643864229765013e-06, |
| "loss": 0.6925, |
| "step": 7320 |
| }, |
| { |
| "epoch": 0.1380000799046784, |
| "grad_norm": 13.3125, |
| "learning_rate": 7.664751958224544e-06, |
| "loss": 0.6797, |
| "step": 7340 |
| }, |
| { |
| "epoch": 0.13837610192076746, |
| "grad_norm": 7.5625, |
| "learning_rate": 7.685639686684074e-06, |
| "loss": 0.6898, |
| "step": 7360 |
| }, |
| { |
| "epoch": 0.13875212393685651, |
| "grad_norm": 4.34375, |
| "learning_rate": 7.706527415143604e-06, |
| "loss": 0.689, |
| "step": 7380 |
| }, |
| { |
| "epoch": 0.13912814595294554, |
| "grad_norm": 4.0, |
| "learning_rate": 7.727415143603134e-06, |
| "loss": 0.6946, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.1395041679690346, |
| "grad_norm": 5.53125, |
| "learning_rate": 7.748302872062665e-06, |
| "loss": 0.6817, |
| "step": 7420 |
| }, |
| { |
| "epoch": 0.13988018998512364, |
| "grad_norm": 6.03125, |
| "learning_rate": 7.769190600522193e-06, |
| "loss": 0.6864, |
| "step": 7440 |
| }, |
| { |
| "epoch": 0.14025621200121266, |
| "grad_norm": 4.84375, |
| "learning_rate": 7.790078328981723e-06, |
| "loss": 0.6869, |
| "step": 7460 |
| }, |
| { |
| "epoch": 0.1406322340173017, |
| "grad_norm": 4.4375, |
| "learning_rate": 7.810966057441254e-06, |
| "loss": 0.6908, |
| "step": 7480 |
| }, |
| { |
| "epoch": 0.14100825603339076, |
| "grad_norm": 8.625, |
| "learning_rate": 7.831853785900784e-06, |
| "loss": 0.6784, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.14138427804947978, |
| "grad_norm": 3.25, |
| "learning_rate": 7.852741514360314e-06, |
| "loss": 0.6762, |
| "step": 7520 |
| }, |
| { |
| "epoch": 0.14176030006556883, |
| "grad_norm": 8.4375, |
| "learning_rate": 7.873629242819844e-06, |
| "loss": 0.6726, |
| "step": 7540 |
| }, |
| { |
| "epoch": 0.14213632208165788, |
| "grad_norm": 4.1875, |
| "learning_rate": 7.894516971279375e-06, |
| "loss": 0.6635, |
| "step": 7560 |
| }, |
| { |
| "epoch": 0.14251234409774693, |
| "grad_norm": 5.40625, |
| "learning_rate": 7.915404699738905e-06, |
| "loss": 0.6875, |
| "step": 7580 |
| }, |
| { |
| "epoch": 0.14288836611383596, |
| "grad_norm": 4.5625, |
| "learning_rate": 7.936292428198435e-06, |
| "loss": 0.6747, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.143264388129925, |
| "grad_norm": 3.53125, |
| "learning_rate": 7.957180156657964e-06, |
| "loss": 0.6749, |
| "step": 7620 |
| }, |
| { |
| "epoch": 0.14364041014601406, |
| "grad_norm": 4.125, |
| "learning_rate": 7.978067885117494e-06, |
| "loss": 0.6701, |
| "step": 7640 |
| }, |
| { |
| "epoch": 0.14401643216210308, |
| "grad_norm": 5.59375, |
| "learning_rate": 7.998955613577024e-06, |
| "loss": 0.6641, |
| "step": 7660 |
| }, |
| { |
| "epoch": 0.14439245417819213, |
| "grad_norm": 10.75, |
| "learning_rate": 8.019843342036554e-06, |
| "loss": 0.661, |
| "step": 7680 |
| }, |
| { |
| "epoch": 0.14476847619428118, |
| "grad_norm": 4.84375, |
| "learning_rate": 8.040731070496085e-06, |
| "loss": 0.6687, |
| "step": 7700 |
| }, |
| { |
| "epoch": 0.1451444982103702, |
| "grad_norm": 4.875, |
| "learning_rate": 8.061618798955613e-06, |
| "loss": 0.6559, |
| "step": 7720 |
| }, |
| { |
| "epoch": 0.14552052022645925, |
| "grad_norm": 10.6875, |
| "learning_rate": 8.082506527415143e-06, |
| "loss": 0.6601, |
| "step": 7740 |
| }, |
| { |
| "epoch": 0.1458965422425483, |
| "grad_norm": 4.46875, |
| "learning_rate": 8.103394255874675e-06, |
| "loss": 0.6761, |
| "step": 7760 |
| }, |
| { |
| "epoch": 0.14627256425863736, |
| "grad_norm": 3.609375, |
| "learning_rate": 8.124281984334205e-06, |
| "loss": 0.6663, |
| "step": 7780 |
| }, |
| { |
| "epoch": 0.14664858627472638, |
| "grad_norm": 4.0625, |
| "learning_rate": 8.145169712793734e-06, |
| "loss": 0.6693, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.14702460829081543, |
| "grad_norm": 7.25, |
| "learning_rate": 8.166057441253264e-06, |
| "loss": 0.6543, |
| "step": 7820 |
| }, |
| { |
| "epoch": 0.14740063030690448, |
| "grad_norm": 4.59375, |
| "learning_rate": 8.186945169712795e-06, |
| "loss": 0.6718, |
| "step": 7840 |
| }, |
| { |
| "epoch": 0.1477766523229935, |
| "grad_norm": 4.5, |
| "learning_rate": 8.207832898172325e-06, |
| "loss": 0.6534, |
| "step": 7860 |
| }, |
| { |
| "epoch": 0.14815267433908255, |
| "grad_norm": 5.21875, |
| "learning_rate": 8.228720626631855e-06, |
| "loss": 0.6576, |
| "step": 7880 |
| }, |
| { |
| "epoch": 0.1485286963551716, |
| "grad_norm": 3.0, |
| "learning_rate": 8.249608355091384e-06, |
| "loss": 0.646, |
| "step": 7900 |
| }, |
| { |
| "epoch": 0.14890471837126062, |
| "grad_norm": 4.21875, |
| "learning_rate": 8.270496083550914e-06, |
| "loss": 0.6576, |
| "step": 7920 |
| }, |
| { |
| "epoch": 0.14928074038734968, |
| "grad_norm": 4.875, |
| "learning_rate": 8.291383812010446e-06, |
| "loss": 0.6728, |
| "step": 7940 |
| }, |
| { |
| "epoch": 0.14965676240343873, |
| "grad_norm": 6.1875, |
| "learning_rate": 8.312271540469974e-06, |
| "loss": 0.6676, |
| "step": 7960 |
| }, |
| { |
| "epoch": 0.15003278441952778, |
| "grad_norm": 11.6875, |
| "learning_rate": 8.333159268929504e-06, |
| "loss": 0.6537, |
| "step": 7980 |
| }, |
| { |
| "epoch": 0.1504088064356168, |
| "grad_norm": 5.40625, |
| "learning_rate": 8.354046997389035e-06, |
| "loss": 0.6551, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.15078482845170585, |
| "grad_norm": 3.703125, |
| "learning_rate": 8.374934725848565e-06, |
| "loss": 0.6531, |
| "step": 8020 |
| }, |
| { |
| "epoch": 0.1511608504677949, |
| "grad_norm": 4.0, |
| "learning_rate": 8.395822454308095e-06, |
| "loss": 0.654, |
| "step": 8040 |
| }, |
| { |
| "epoch": 0.15153687248388392, |
| "grad_norm": 7.3125, |
| "learning_rate": 8.416710182767624e-06, |
| "loss": 0.658, |
| "step": 8060 |
| }, |
| { |
| "epoch": 0.15191289449997297, |
| "grad_norm": 3.71875, |
| "learning_rate": 8.437597911227154e-06, |
| "loss": 0.6516, |
| "step": 8080 |
| }, |
| { |
| "epoch": 0.15228891651606202, |
| "grad_norm": 6.34375, |
| "learning_rate": 8.458485639686684e-06, |
| "loss": 0.6584, |
| "step": 8100 |
| }, |
| { |
| "epoch": 0.15266493853215105, |
| "grad_norm": 3.171875, |
| "learning_rate": 8.479373368146214e-06, |
| "loss": 0.6544, |
| "step": 8120 |
| }, |
| { |
| "epoch": 0.1530409605482401, |
| "grad_norm": 3.765625, |
| "learning_rate": 8.500261096605745e-06, |
| "loss": 0.6579, |
| "step": 8140 |
| }, |
| { |
| "epoch": 0.15341698256432915, |
| "grad_norm": 3.03125, |
| "learning_rate": 8.521148825065275e-06, |
| "loss": 0.6413, |
| "step": 8160 |
| }, |
| { |
| "epoch": 0.1537930045804182, |
| "grad_norm": 2.734375, |
| "learning_rate": 8.542036553524805e-06, |
| "loss": 0.6537, |
| "step": 8180 |
| }, |
| { |
| "epoch": 0.15416902659650722, |
| "grad_norm": 2.53125, |
| "learning_rate": 8.562924281984335e-06, |
| "loss": 0.6568, |
| "step": 8200 |
| }, |
| { |
| "epoch": 0.15454504861259627, |
| "grad_norm": 3.203125, |
| "learning_rate": 8.583812010443866e-06, |
| "loss": 0.636, |
| "step": 8220 |
| }, |
| { |
| "epoch": 0.15492107062868532, |
| "grad_norm": 2.296875, |
| "learning_rate": 8.604699738903394e-06, |
| "loss": 0.6439, |
| "step": 8240 |
| }, |
| { |
| "epoch": 0.15529709264477434, |
| "grad_norm": 3.296875, |
| "learning_rate": 8.625587467362924e-06, |
| "loss": 0.6409, |
| "step": 8260 |
| }, |
| { |
| "epoch": 0.1556731146608634, |
| "grad_norm": 3.015625, |
| "learning_rate": 8.646475195822455e-06, |
| "loss": 0.6533, |
| "step": 8280 |
| }, |
| { |
| "epoch": 0.15604913667695244, |
| "grad_norm": 3.3125, |
| "learning_rate": 8.667362924281985e-06, |
| "loss": 0.648, |
| "step": 8300 |
| }, |
| { |
| "epoch": 0.15642515869304147, |
| "grad_norm": 2.5625, |
| "learning_rate": 8.688250652741515e-06, |
| "loss": 0.6388, |
| "step": 8320 |
| }, |
| { |
| "epoch": 0.15680118070913052, |
| "grad_norm": 5.125, |
| "learning_rate": 8.709138381201045e-06, |
| "loss": 0.6439, |
| "step": 8340 |
| }, |
| { |
| "epoch": 0.15717720272521957, |
| "grad_norm": 2.359375, |
| "learning_rate": 8.730026109660576e-06, |
| "loss": 0.6397, |
| "step": 8360 |
| }, |
| { |
| "epoch": 0.15755322474130862, |
| "grad_norm": 2.5625, |
| "learning_rate": 8.750913838120106e-06, |
| "loss": 0.6342, |
| "step": 8380 |
| }, |
| { |
| "epoch": 0.15792924675739764, |
| "grad_norm": 2.46875, |
| "learning_rate": 8.771801566579634e-06, |
| "loss": 0.6388, |
| "step": 8400 |
| }, |
| { |
| "epoch": 0.1583052687734867, |
| "grad_norm": 2.953125, |
| "learning_rate": 8.792689295039165e-06, |
| "loss": 0.6361, |
| "step": 8420 |
| }, |
| { |
| "epoch": 0.15868129078957574, |
| "grad_norm": 3.96875, |
| "learning_rate": 8.813577023498695e-06, |
| "loss": 0.6334, |
| "step": 8440 |
| }, |
| { |
| "epoch": 0.15905731280566476, |
| "grad_norm": 2.546875, |
| "learning_rate": 8.834464751958225e-06, |
| "loss": 0.6556, |
| "step": 8460 |
| }, |
| { |
| "epoch": 0.1594333348217538, |
| "grad_norm": 3.015625, |
| "learning_rate": 8.855352480417755e-06, |
| "loss": 0.6357, |
| "step": 8480 |
| }, |
| { |
| "epoch": 0.15980935683784286, |
| "grad_norm": 3.0, |
| "learning_rate": 8.876240208877286e-06, |
| "loss": 0.6383, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.1601853788539319, |
| "grad_norm": 2.625, |
| "learning_rate": 8.897127937336816e-06, |
| "loss": 0.6429, |
| "step": 8520 |
| }, |
| { |
| "epoch": 0.16056140087002094, |
| "grad_norm": 3.609375, |
| "learning_rate": 8.918015665796346e-06, |
| "loss": 0.6338, |
| "step": 8540 |
| }, |
| { |
| "epoch": 0.16093742288611, |
| "grad_norm": 2.171875, |
| "learning_rate": 8.938903394255876e-06, |
| "loss": 0.649, |
| "step": 8560 |
| }, |
| { |
| "epoch": 0.161313444902199, |
| "grad_norm": 3.984375, |
| "learning_rate": 8.959791122715405e-06, |
| "loss": 0.6272, |
| "step": 8580 |
| }, |
| { |
| "epoch": 0.16168946691828806, |
| "grad_norm": 3.015625, |
| "learning_rate": 8.980678851174935e-06, |
| "loss": 0.6372, |
| "step": 8600 |
| }, |
| { |
| "epoch": 0.1620654889343771, |
| "grad_norm": 2.3125, |
| "learning_rate": 9.001566579634465e-06, |
| "loss": 0.6178, |
| "step": 8620 |
| }, |
| { |
| "epoch": 0.16244151095046616, |
| "grad_norm": 4.875, |
| "learning_rate": 9.022454308093996e-06, |
| "loss": 0.6354, |
| "step": 8640 |
| }, |
| { |
| "epoch": 0.16281753296655518, |
| "grad_norm": 2.671875, |
| "learning_rate": 9.043342036553526e-06, |
| "loss": 0.6427, |
| "step": 8660 |
| }, |
| { |
| "epoch": 0.16319355498264423, |
| "grad_norm": 2.828125, |
| "learning_rate": 9.064229765013054e-06, |
| "loss": 0.6283, |
| "step": 8680 |
| }, |
| { |
| "epoch": 0.16356957699873328, |
| "grad_norm": 2.703125, |
| "learning_rate": 9.085117493472586e-06, |
| "loss": 0.6364, |
| "step": 8700 |
| }, |
| { |
| "epoch": 0.1639455990148223, |
| "grad_norm": 2.21875, |
| "learning_rate": 9.106005221932116e-06, |
| "loss": 0.6289, |
| "step": 8720 |
| }, |
| { |
| "epoch": 0.16432162103091136, |
| "grad_norm": 2.484375, |
| "learning_rate": 9.126892950391647e-06, |
| "loss": 0.6357, |
| "step": 8740 |
| }, |
| { |
| "epoch": 0.1646976430470004, |
| "grad_norm": 4.0625, |
| "learning_rate": 9.147780678851175e-06, |
| "loss": 0.6392, |
| "step": 8760 |
| }, |
| { |
| "epoch": 0.16507366506308943, |
| "grad_norm": 1.6953125, |
| "learning_rate": 9.168668407310705e-06, |
| "loss": 0.6211, |
| "step": 8780 |
| }, |
| { |
| "epoch": 0.16544968707917848, |
| "grad_norm": 2.34375, |
| "learning_rate": 9.189556135770236e-06, |
| "loss": 0.6395, |
| "step": 8800 |
| }, |
| { |
| "epoch": 0.16582570909526753, |
| "grad_norm": 2.765625, |
| "learning_rate": 9.210443864229766e-06, |
| "loss": 0.625, |
| "step": 8820 |
| }, |
| { |
| "epoch": 0.16620173111135658, |
| "grad_norm": 2.296875, |
| "learning_rate": 9.231331592689296e-06, |
| "loss": 0.6307, |
| "step": 8840 |
| }, |
| { |
| "epoch": 0.1665777531274456, |
| "grad_norm": 2.0625, |
| "learning_rate": 9.252219321148825e-06, |
| "loss": 0.6201, |
| "step": 8860 |
| }, |
| { |
| "epoch": 0.16695377514353466, |
| "grad_norm": 1.96875, |
| "learning_rate": 9.273107049608357e-06, |
| "loss": 0.6211, |
| "step": 8880 |
| }, |
| { |
| "epoch": 0.1673297971596237, |
| "grad_norm": 1.9765625, |
| "learning_rate": 9.293994778067887e-06, |
| "loss": 0.6242, |
| "step": 8900 |
| }, |
| { |
| "epoch": 0.16770581917571273, |
| "grad_norm": 2.0625, |
| "learning_rate": 9.314882506527415e-06, |
| "loss": 0.6216, |
| "step": 8920 |
| }, |
| { |
| "epoch": 0.16808184119180178, |
| "grad_norm": 2.21875, |
| "learning_rate": 9.335770234986946e-06, |
| "loss": 0.6271, |
| "step": 8940 |
| }, |
| { |
| "epoch": 0.16845786320789083, |
| "grad_norm": 1.7265625, |
| "learning_rate": 9.356657963446476e-06, |
| "loss": 0.6366, |
| "step": 8960 |
| }, |
| { |
| "epoch": 0.16883388522397985, |
| "grad_norm": 2.234375, |
| "learning_rate": 9.377545691906006e-06, |
| "loss": 0.6155, |
| "step": 8980 |
| }, |
| { |
| "epoch": 0.1692099072400689, |
| "grad_norm": 2.421875, |
| "learning_rate": 9.398433420365536e-06, |
| "loss": 0.6162, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.16958592925615795, |
| "grad_norm": 3.578125, |
| "learning_rate": 9.419321148825065e-06, |
| "loss": 0.6269, |
| "step": 9020 |
| }, |
| { |
| "epoch": 0.169961951272247, |
| "grad_norm": 1.953125, |
| "learning_rate": 9.440208877284595e-06, |
| "loss": 0.6306, |
| "step": 9040 |
| }, |
| { |
| "epoch": 0.17033797328833603, |
| "grad_norm": 1.7265625, |
| "learning_rate": 9.461096605744125e-06, |
| "loss": 0.6247, |
| "step": 9060 |
| }, |
| { |
| "epoch": 0.17071399530442508, |
| "grad_norm": 1.9140625, |
| "learning_rate": 9.481984334203657e-06, |
| "loss": 0.613, |
| "step": 9080 |
| }, |
| { |
| "epoch": 0.17109001732051413, |
| "grad_norm": 3.953125, |
| "learning_rate": 9.502872062663186e-06, |
| "loss": 0.6187, |
| "step": 9100 |
| }, |
| { |
| "epoch": 0.17146603933660315, |
| "grad_norm": 2.875, |
| "learning_rate": 9.523759791122716e-06, |
| "loss": 0.6215, |
| "step": 9120 |
| }, |
| { |
| "epoch": 0.1718420613526922, |
| "grad_norm": 2.484375, |
| "learning_rate": 9.544647519582246e-06, |
| "loss": 0.6234, |
| "step": 9140 |
| }, |
| { |
| "epoch": 0.17221808336878125, |
| "grad_norm": 2.421875, |
| "learning_rate": 9.565535248041777e-06, |
| "loss": 0.618, |
| "step": 9160 |
| }, |
| { |
| "epoch": 0.17259410538487027, |
| "grad_norm": 1.7734375, |
| "learning_rate": 9.586422976501307e-06, |
| "loss": 0.6134, |
| "step": 9180 |
| }, |
| { |
| "epoch": 0.17297012740095932, |
| "grad_norm": 2.296875, |
| "learning_rate": 9.607310704960835e-06, |
| "loss": 0.6127, |
| "step": 9200 |
| }, |
| { |
| "epoch": 0.17334614941704837, |
| "grad_norm": 2.625, |
| "learning_rate": 9.628198433420366e-06, |
| "loss": 0.6139, |
| "step": 9220 |
| }, |
| { |
| "epoch": 0.17372217143313742, |
| "grad_norm": 2.875, |
| "learning_rate": 9.649086161879896e-06, |
| "loss": 0.6121, |
| "step": 9240 |
| }, |
| { |
| "epoch": 0.17409819344922645, |
| "grad_norm": 2.09375, |
| "learning_rate": 9.669973890339426e-06, |
| "loss": 0.6126, |
| "step": 9260 |
| }, |
| { |
| "epoch": 0.1744742154653155, |
| "grad_norm": 1.2265625, |
| "learning_rate": 9.690861618798956e-06, |
| "loss": 0.6162, |
| "step": 9280 |
| }, |
| { |
| "epoch": 0.17485023748140455, |
| "grad_norm": 2.796875, |
| "learning_rate": 9.711749347258487e-06, |
| "loss": 0.6072, |
| "step": 9300 |
| }, |
| { |
| "epoch": 0.17522625949749357, |
| "grad_norm": 1.5234375, |
| "learning_rate": 9.732637075718017e-06, |
| "loss": 0.6122, |
| "step": 9320 |
| }, |
| { |
| "epoch": 0.17560228151358262, |
| "grad_norm": 1.515625, |
| "learning_rate": 9.753524804177547e-06, |
| "loss": 0.6053, |
| "step": 9340 |
| }, |
| { |
| "epoch": 0.17597830352967167, |
| "grad_norm": 1.4765625, |
| "learning_rate": 9.774412532637077e-06, |
| "loss": 0.6149, |
| "step": 9360 |
| }, |
| { |
| "epoch": 0.1763543255457607, |
| "grad_norm": 1.5, |
| "learning_rate": 9.795300261096606e-06, |
| "loss": 0.6229, |
| "step": 9380 |
| }, |
| { |
| "epoch": 0.17673034756184974, |
| "grad_norm": 1.609375, |
| "learning_rate": 9.816187989556136e-06, |
| "loss": 0.6134, |
| "step": 9400 |
| }, |
| { |
| "epoch": 0.1771063695779388, |
| "grad_norm": 2.140625, |
| "learning_rate": 9.837075718015666e-06, |
| "loss": 0.6155, |
| "step": 9420 |
| }, |
| { |
| "epoch": 0.17748239159402784, |
| "grad_norm": 1.6015625, |
| "learning_rate": 9.857963446475197e-06, |
| "loss": 0.6042, |
| "step": 9440 |
| }, |
| { |
| "epoch": 0.17785841361011687, |
| "grad_norm": 1.78125, |
| "learning_rate": 9.878851174934727e-06, |
| "loss": 0.6182, |
| "step": 9460 |
| }, |
| { |
| "epoch": 0.17823443562620592, |
| "grad_norm": 1.3515625, |
| "learning_rate": 9.899738903394257e-06, |
| "loss": 0.6036, |
| "step": 9480 |
| }, |
| { |
| "epoch": 0.17861045764229497, |
| "grad_norm": 1.984375, |
| "learning_rate": 9.920626631853787e-06, |
| "loss": 0.6027, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.178986479658384, |
| "grad_norm": 1.5, |
| "learning_rate": 9.941514360313318e-06, |
| "loss": 0.6089, |
| "step": 9520 |
| }, |
| { |
| "epoch": 0.17936250167447304, |
| "grad_norm": 1.7734375, |
| "learning_rate": 9.962402088772846e-06, |
| "loss": 0.604, |
| "step": 9540 |
| }, |
| { |
| "epoch": 0.1797385236905621, |
| "grad_norm": 1.28125, |
| "learning_rate": 9.983289817232376e-06, |
| "loss": 0.6004, |
| "step": 9560 |
| }, |
| { |
| "epoch": 0.1801145457066511, |
| "grad_norm": 1.5234375, |
| "learning_rate": 9.999999995880232e-06, |
| "loss": 0.6019, |
| "step": 9580 |
| }, |
| { |
| "epoch": 0.18049056772274016, |
| "grad_norm": 1.421875, |
| "learning_rate": 9.999999851688318e-06, |
| "loss": 0.6145, |
| "step": 9600 |
| }, |
| { |
| "epoch": 0.18086658973882921, |
| "grad_norm": 1.78125, |
| "learning_rate": 9.999999501507959e-06, |
| "loss": 0.6105, |
| "step": 9620 |
| }, |
| { |
| "epoch": 0.18124261175491824, |
| "grad_norm": 2.109375, |
| "learning_rate": 9.999998945339171e-06, |
| "loss": 0.6139, |
| "step": 9640 |
| }, |
| { |
| "epoch": 0.1816186337710073, |
| "grad_norm": 1.8671875, |
| "learning_rate": 9.999998183181976e-06, |
| "loss": 0.6122, |
| "step": 9660 |
| }, |
| { |
| "epoch": 0.18199465578709634, |
| "grad_norm": 1.6171875, |
| "learning_rate": 9.999997215036408e-06, |
| "loss": 0.6095, |
| "step": 9680 |
| }, |
| { |
| "epoch": 0.1823706778031854, |
| "grad_norm": 1.359375, |
| "learning_rate": 9.999996040902503e-06, |
| "loss": 0.5928, |
| "step": 9700 |
| }, |
| { |
| "epoch": 0.1827466998192744, |
| "grad_norm": 1.21875, |
| "learning_rate": 9.999994660780312e-06, |
| "loss": 0.6034, |
| "step": 9720 |
| }, |
| { |
| "epoch": 0.18312272183536346, |
| "grad_norm": 1.3984375, |
| "learning_rate": 9.99999307466989e-06, |
| "loss": 0.6012, |
| "step": 9740 |
| }, |
| { |
| "epoch": 0.1834987438514525, |
| "grad_norm": 2.09375, |
| "learning_rate": 9.999991282571304e-06, |
| "loss": 0.605, |
| "step": 9760 |
| }, |
| { |
| "epoch": 0.18387476586754153, |
| "grad_norm": 1.421875, |
| "learning_rate": 9.999989284484629e-06, |
| "loss": 0.6093, |
| "step": 9780 |
| }, |
| { |
| "epoch": 0.18425078788363058, |
| "grad_norm": 1.4765625, |
| "learning_rate": 9.999987080409942e-06, |
| "loss": 0.6032, |
| "step": 9800 |
| }, |
| { |
| "epoch": 0.18462680989971963, |
| "grad_norm": 1.2421875, |
| "learning_rate": 9.99998467034734e-06, |
| "loss": 0.6019, |
| "step": 9820 |
| }, |
| { |
| "epoch": 0.18500283191580866, |
| "grad_norm": 1.84375, |
| "learning_rate": 9.99998205429692e-06, |
| "loss": 0.6006, |
| "step": 9840 |
| }, |
| { |
| "epoch": 0.1853788539318977, |
| "grad_norm": 1.328125, |
| "learning_rate": 9.999979232258787e-06, |
| "loss": 0.586, |
| "step": 9860 |
| }, |
| { |
| "epoch": 0.18575487594798676, |
| "grad_norm": 1.375, |
| "learning_rate": 9.999976204233062e-06, |
| "loss": 0.614, |
| "step": 9880 |
| }, |
| { |
| "epoch": 0.1861308979640758, |
| "grad_norm": 1.2421875, |
| "learning_rate": 9.999972970219865e-06, |
| "loss": 0.6049, |
| "step": 9900 |
| }, |
| { |
| "epoch": 0.18650691998016483, |
| "grad_norm": 1.3984375, |
| "learning_rate": 9.999969530219333e-06, |
| "loss": 0.6048, |
| "step": 9920 |
| }, |
| { |
| "epoch": 0.18688294199625388, |
| "grad_norm": 1.3828125, |
| "learning_rate": 9.999965884231607e-06, |
| "loss": 0.608, |
| "step": 9940 |
| }, |
| { |
| "epoch": 0.18725896401234293, |
| "grad_norm": 1.609375, |
| "learning_rate": 9.999962032256836e-06, |
| "loss": 0.6017, |
| "step": 9960 |
| }, |
| { |
| "epoch": 0.18763498602843195, |
| "grad_norm": 1.265625, |
| "learning_rate": 9.99995797429518e-06, |
| "loss": 0.592, |
| "step": 9980 |
| }, |
| { |
| "epoch": 0.188011008044521, |
| "grad_norm": 1.421875, |
| "learning_rate": 9.999953710346804e-06, |
| "loss": 0.602, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.18838703006061006, |
| "grad_norm": 1.3125, |
| "learning_rate": 9.999949240411886e-06, |
| "loss": 0.5894, |
| "step": 10020 |
| }, |
| { |
| "epoch": 0.18876305207669908, |
| "grad_norm": 1.375, |
| "learning_rate": 9.99994456449061e-06, |
| "loss": 0.5908, |
| "step": 10040 |
| }, |
| { |
| "epoch": 0.18913907409278813, |
| "grad_norm": 1.2890625, |
| "learning_rate": 9.999939682583166e-06, |
| "loss": 0.5914, |
| "step": 10060 |
| }, |
| { |
| "epoch": 0.18951509610887718, |
| "grad_norm": 1.6328125, |
| "learning_rate": 9.999934594689759e-06, |
| "loss": 0.5951, |
| "step": 10080 |
| }, |
| { |
| "epoch": 0.18989111812496623, |
| "grad_norm": 1.1796875, |
| "learning_rate": 9.999929300810595e-06, |
| "loss": 0.5925, |
| "step": 10100 |
| }, |
| { |
| "epoch": 0.19026714014105525, |
| "grad_norm": 1.1953125, |
| "learning_rate": 9.999923800945895e-06, |
| "loss": 0.5982, |
| "step": 10120 |
| }, |
| { |
| "epoch": 0.1906431621571443, |
| "grad_norm": 1.1640625, |
| "learning_rate": 9.999918095095884e-06, |
| "loss": 0.6023, |
| "step": 10140 |
| }, |
| { |
| "epoch": 0.19101918417323335, |
| "grad_norm": 1.171875, |
| "learning_rate": 9.999912183260798e-06, |
| "loss": 0.5926, |
| "step": 10160 |
| }, |
| { |
| "epoch": 0.19139520618932238, |
| "grad_norm": 1.28125, |
| "learning_rate": 9.999906065440878e-06, |
| "loss": 0.5869, |
| "step": 10180 |
| }, |
| { |
| "epoch": 0.19177122820541143, |
| "grad_norm": 1.296875, |
| "learning_rate": 9.999899741636381e-06, |
| "loss": 0.5965, |
| "step": 10200 |
| }, |
| { |
| "epoch": 0.19214725022150048, |
| "grad_norm": 1.0234375, |
| "learning_rate": 9.999893211847563e-06, |
| "loss": 0.601, |
| "step": 10220 |
| }, |
| { |
| "epoch": 0.1925232722375895, |
| "grad_norm": 1.0390625, |
| "learning_rate": 9.999886476074694e-06, |
| "loss": 0.5916, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.19289929425367855, |
| "grad_norm": 1.078125, |
| "learning_rate": 9.999879534318051e-06, |
| "loss": 0.5947, |
| "step": 10260 |
| }, |
| { |
| "epoch": 0.1932753162697676, |
| "grad_norm": 1.109375, |
| "learning_rate": 9.999872386577923e-06, |
| "loss": 0.5979, |
| "step": 10280 |
| }, |
| { |
| "epoch": 0.19365133828585665, |
| "grad_norm": 1.21875, |
| "learning_rate": 9.9998650328546e-06, |
| "loss": 0.5927, |
| "step": 10300 |
| }, |
| { |
| "epoch": 0.19402736030194567, |
| "grad_norm": 3.171875, |
| "learning_rate": 9.99985747314839e-06, |
| "loss": 0.5999, |
| "step": 10320 |
| }, |
| { |
| "epoch": 0.19440338231803472, |
| "grad_norm": 1.1640625, |
| "learning_rate": 9.999849707459601e-06, |
| "loss": 0.6072, |
| "step": 10340 |
| }, |
| { |
| "epoch": 0.19477940433412377, |
| "grad_norm": 1.6171875, |
| "learning_rate": 9.999841735788555e-06, |
| "loss": 0.601, |
| "step": 10360 |
| }, |
| { |
| "epoch": 0.1951554263502128, |
| "grad_norm": 1.484375, |
| "learning_rate": 9.999833558135578e-06, |
| "loss": 0.5996, |
| "step": 10380 |
| }, |
| { |
| "epoch": 0.19553144836630185, |
| "grad_norm": 1.015625, |
| "learning_rate": 9.999825174501009e-06, |
| "loss": 0.5907, |
| "step": 10400 |
| }, |
| { |
| "epoch": 0.1959074703823909, |
| "grad_norm": 1.265625, |
| "learning_rate": 9.999816584885192e-06, |
| "loss": 0.5888, |
| "step": 10420 |
| }, |
| { |
| "epoch": 0.19628349239847992, |
| "grad_norm": 1.515625, |
| "learning_rate": 9.99980778928848e-06, |
| "loss": 0.5894, |
| "step": 10440 |
| }, |
| { |
| "epoch": 0.19665951441456897, |
| "grad_norm": 2.25, |
| "learning_rate": 9.999798787711239e-06, |
| "loss": 0.5938, |
| "step": 10460 |
| }, |
| { |
| "epoch": 0.19703553643065802, |
| "grad_norm": 1.1796875, |
| "learning_rate": 9.999789580153835e-06, |
| "loss": 0.5832, |
| "step": 10480 |
| }, |
| { |
| "epoch": 0.19741155844674707, |
| "grad_norm": 1.1015625, |
| "learning_rate": 9.999780166616652e-06, |
| "loss": 0.579, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.1977875804628361, |
| "grad_norm": 1.015625, |
| "learning_rate": 9.999770547100073e-06, |
| "loss": 0.596, |
| "step": 10520 |
| }, |
| { |
| "epoch": 0.19816360247892514, |
| "grad_norm": 1.453125, |
| "learning_rate": 9.9997607216045e-06, |
| "loss": 0.5934, |
| "step": 10540 |
| }, |
| { |
| "epoch": 0.1985396244950142, |
| "grad_norm": 1.4140625, |
| "learning_rate": 9.999750690130335e-06, |
| "loss": 0.5884, |
| "step": 10560 |
| }, |
| { |
| "epoch": 0.19891564651110322, |
| "grad_norm": 1.390625, |
| "learning_rate": 9.99974045267799e-06, |
| "loss": 0.5949, |
| "step": 10580 |
| }, |
| { |
| "epoch": 0.19929166852719227, |
| "grad_norm": 1.2265625, |
| "learning_rate": 9.999730009247888e-06, |
| "loss": 0.5935, |
| "step": 10600 |
| }, |
| { |
| "epoch": 0.19966769054328132, |
| "grad_norm": 1.265625, |
| "learning_rate": 9.999719359840459e-06, |
| "loss": 0.5904, |
| "step": 10620 |
| }, |
| { |
| "epoch": 0.20004371255937034, |
| "grad_norm": 1.296875, |
| "learning_rate": 9.99970850445614e-06, |
| "loss": 0.5811, |
| "step": 10640 |
| }, |
| { |
| "epoch": 0.2004197345754594, |
| "grad_norm": 0.98828125, |
| "learning_rate": 9.999697443095383e-06, |
| "loss": 0.584, |
| "step": 10660 |
| }, |
| { |
| "epoch": 0.20079575659154844, |
| "grad_norm": 1.125, |
| "learning_rate": 9.999686175758639e-06, |
| "loss": 0.586, |
| "step": 10680 |
| }, |
| { |
| "epoch": 0.20117177860763746, |
| "grad_norm": 1.0234375, |
| "learning_rate": 9.999674702446375e-06, |
| "loss": 0.5924, |
| "step": 10700 |
| }, |
| { |
| "epoch": 0.20154780062372651, |
| "grad_norm": 1.1796875, |
| "learning_rate": 9.999663023159062e-06, |
| "loss": 0.5876, |
| "step": 10720 |
| }, |
| { |
| "epoch": 0.20192382263981556, |
| "grad_norm": 0.97265625, |
| "learning_rate": 9.999651137897182e-06, |
| "loss": 0.5857, |
| "step": 10740 |
| }, |
| { |
| "epoch": 0.20229984465590461, |
| "grad_norm": 1.0234375, |
| "learning_rate": 9.999639046661226e-06, |
| "loss": 0.5847, |
| "step": 10760 |
| }, |
| { |
| "epoch": 0.20267586667199364, |
| "grad_norm": 1.1953125, |
| "learning_rate": 9.999626749451688e-06, |
| "loss": 0.5865, |
| "step": 10780 |
| }, |
| { |
| "epoch": 0.2030518886880827, |
| "grad_norm": 1.0234375, |
| "learning_rate": 9.999614246269076e-06, |
| "loss": 0.5876, |
| "step": 10800 |
| }, |
| { |
| "epoch": 0.20342791070417174, |
| "grad_norm": 1.4296875, |
| "learning_rate": 9.999601537113908e-06, |
| "loss": 0.5751, |
| "step": 10820 |
| }, |
| { |
| "epoch": 0.20380393272026076, |
| "grad_norm": 1.125, |
| "learning_rate": 9.999588621986707e-06, |
| "loss": 0.5764, |
| "step": 10840 |
| }, |
| { |
| "epoch": 0.2041799547363498, |
| "grad_norm": 1.671875, |
| "learning_rate": 9.999575500888004e-06, |
| "loss": 0.5752, |
| "step": 10860 |
| }, |
| { |
| "epoch": 0.20455597675243886, |
| "grad_norm": 0.921875, |
| "learning_rate": 9.999562173818338e-06, |
| "loss": 0.5858, |
| "step": 10880 |
| }, |
| { |
| "epoch": 0.20493199876852788, |
| "grad_norm": 1.09375, |
| "learning_rate": 9.999548640778259e-06, |
| "loss": 0.5932, |
| "step": 10900 |
| }, |
| { |
| "epoch": 0.20530802078461693, |
| "grad_norm": 0.97265625, |
| "learning_rate": 9.999534901768326e-06, |
| "loss": 0.5797, |
| "step": 10920 |
| }, |
| { |
| "epoch": 0.20568404280070599, |
| "grad_norm": 1.0234375, |
| "learning_rate": 9.999520956789104e-06, |
| "loss": 0.5839, |
| "step": 10940 |
| }, |
| { |
| "epoch": 0.20606006481679504, |
| "grad_norm": 0.96484375, |
| "learning_rate": 9.999506805841169e-06, |
| "loss": 0.5883, |
| "step": 10960 |
| }, |
| { |
| "epoch": 0.20643608683288406, |
| "grad_norm": 1.125, |
| "learning_rate": 9.999492448925102e-06, |
| "loss": 0.5793, |
| "step": 10980 |
| }, |
| { |
| "epoch": 0.2068121088489731, |
| "grad_norm": 1.078125, |
| "learning_rate": 9.999477886041493e-06, |
| "loss": 0.5795, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.20718813086506216, |
| "grad_norm": 1.0859375, |
| "learning_rate": 9.999463117190945e-06, |
| "loss": 0.5798, |
| "step": 11020 |
| }, |
| { |
| "epoch": 0.20756415288115118, |
| "grad_norm": 1.15625, |
| "learning_rate": 9.999448142374066e-06, |
| "loss": 0.5855, |
| "step": 11040 |
| }, |
| { |
| "epoch": 0.20794017489724023, |
| "grad_norm": 0.85546875, |
| "learning_rate": 9.999432961591472e-06, |
| "loss": 0.6086, |
| "step": 11060 |
| }, |
| { |
| "epoch": 0.20831619691332928, |
| "grad_norm": 1.2265625, |
| "learning_rate": 9.999417574843788e-06, |
| "loss": 0.5777, |
| "step": 11080 |
| }, |
| { |
| "epoch": 0.2086922189294183, |
| "grad_norm": 1.0, |
| "learning_rate": 9.99940198213165e-06, |
| "loss": 0.5858, |
| "step": 11100 |
| }, |
| { |
| "epoch": 0.20906824094550736, |
| "grad_norm": 0.86328125, |
| "learning_rate": 9.9993861834557e-06, |
| "loss": 0.5761, |
| "step": 11120 |
| }, |
| { |
| "epoch": 0.2094442629615964, |
| "grad_norm": 1.4140625, |
| "learning_rate": 9.999370178816586e-06, |
| "loss": 0.5777, |
| "step": 11140 |
| }, |
| { |
| "epoch": 0.20982028497768546, |
| "grad_norm": 1.453125, |
| "learning_rate": 9.999353968214969e-06, |
| "loss": 0.5853, |
| "step": 11160 |
| }, |
| { |
| "epoch": 0.21019630699377448, |
| "grad_norm": 1.359375, |
| "learning_rate": 9.999337551651517e-06, |
| "loss": 0.5951, |
| "step": 11180 |
| }, |
| { |
| "epoch": 0.21057232900986353, |
| "grad_norm": 0.8828125, |
| "learning_rate": 9.999320929126909e-06, |
| "loss": 0.5874, |
| "step": 11200 |
| }, |
| { |
| "epoch": 0.21094835102595258, |
| "grad_norm": 0.859375, |
| "learning_rate": 9.999304100641824e-06, |
| "loss": 0.5924, |
| "step": 11220 |
| }, |
| { |
| "epoch": 0.2113243730420416, |
| "grad_norm": 1.015625, |
| "learning_rate": 9.99928706619696e-06, |
| "loss": 0.5927, |
| "step": 11240 |
| }, |
| { |
| "epoch": 0.21170039505813065, |
| "grad_norm": 0.91015625, |
| "learning_rate": 9.999269825793018e-06, |
| "loss": 0.5941, |
| "step": 11260 |
| }, |
| { |
| "epoch": 0.2120764170742197, |
| "grad_norm": 0.9296875, |
| "learning_rate": 9.999252379430707e-06, |
| "loss": 0.5873, |
| "step": 11280 |
| }, |
| { |
| "epoch": 0.21245243909030873, |
| "grad_norm": 0.8671875, |
| "learning_rate": 9.999234727110746e-06, |
| "loss": 0.586, |
| "step": 11300 |
| }, |
| { |
| "epoch": 0.21282846110639778, |
| "grad_norm": 0.8515625, |
| "learning_rate": 9.999216868833864e-06, |
| "loss": 0.5901, |
| "step": 11320 |
| }, |
| { |
| "epoch": 0.21320448312248683, |
| "grad_norm": 1.03125, |
| "learning_rate": 9.999198804600793e-06, |
| "loss": 0.5738, |
| "step": 11340 |
| }, |
| { |
| "epoch": 0.21358050513857588, |
| "grad_norm": 1.1328125, |
| "learning_rate": 9.999180534412281e-06, |
| "loss": 0.5837, |
| "step": 11360 |
| }, |
| { |
| "epoch": 0.2139565271546649, |
| "grad_norm": 0.98046875, |
| "learning_rate": 9.999162058269079e-06, |
| "loss": 0.58, |
| "step": 11380 |
| }, |
| { |
| "epoch": 0.21433254917075395, |
| "grad_norm": 0.8359375, |
| "learning_rate": 9.99914337617195e-06, |
| "loss": 0.5803, |
| "step": 11400 |
| }, |
| { |
| "epoch": 0.214708571186843, |
| "grad_norm": 0.94140625, |
| "learning_rate": 9.999124488121658e-06, |
| "loss": 0.5759, |
| "step": 11420 |
| }, |
| { |
| "epoch": 0.21508459320293202, |
| "grad_norm": 0.9921875, |
| "learning_rate": 9.999105394118988e-06, |
| "loss": 0.5867, |
| "step": 11440 |
| }, |
| { |
| "epoch": 0.21546061521902107, |
| "grad_norm": 0.8203125, |
| "learning_rate": 9.999086094164724e-06, |
| "loss": 0.5784, |
| "step": 11460 |
| }, |
| { |
| "epoch": 0.21583663723511012, |
| "grad_norm": 0.90625, |
| "learning_rate": 9.99906658825966e-06, |
| "loss": 0.5796, |
| "step": 11480 |
| }, |
| { |
| "epoch": 0.21621265925119915, |
| "grad_norm": 0.890625, |
| "learning_rate": 9.999046876404602e-06, |
| "loss": 0.5758, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.2165886812672882, |
| "grad_norm": 0.921875, |
| "learning_rate": 9.999026958600358e-06, |
| "loss": 0.5852, |
| "step": 11520 |
| }, |
| { |
| "epoch": 0.21696470328337725, |
| "grad_norm": 1.109375, |
| "learning_rate": 9.999006834847752e-06, |
| "loss": 0.576, |
| "step": 11540 |
| }, |
| { |
| "epoch": 0.2173407252994663, |
| "grad_norm": 0.83203125, |
| "learning_rate": 9.998986505147612e-06, |
| "loss": 0.5848, |
| "step": 11560 |
| }, |
| { |
| "epoch": 0.21771674731555532, |
| "grad_norm": 1.015625, |
| "learning_rate": 9.998965969500779e-06, |
| "loss": 0.5871, |
| "step": 11580 |
| }, |
| { |
| "epoch": 0.21809276933164437, |
| "grad_norm": 0.86328125, |
| "learning_rate": 9.99894522790809e-06, |
| "loss": 0.5829, |
| "step": 11600 |
| }, |
| { |
| "epoch": 0.21846879134773342, |
| "grad_norm": 0.94921875, |
| "learning_rate": 9.99892428037041e-06, |
| "loss": 0.5742, |
| "step": 11620 |
| }, |
| { |
| "epoch": 0.21884481336382244, |
| "grad_norm": 0.89453125, |
| "learning_rate": 9.998903126888595e-06, |
| "loss": 0.5841, |
| "step": 11640 |
| }, |
| { |
| "epoch": 0.2192208353799115, |
| "grad_norm": 1.0625, |
| "learning_rate": 9.998881767463519e-06, |
| "loss": 0.5819, |
| "step": 11660 |
| }, |
| { |
| "epoch": 0.21959685739600054, |
| "grad_norm": 1.0078125, |
| "learning_rate": 9.998860202096063e-06, |
| "loss": 0.5805, |
| "step": 11680 |
| }, |
| { |
| "epoch": 0.21997287941208957, |
| "grad_norm": 0.79296875, |
| "learning_rate": 9.998838430787112e-06, |
| "loss": 0.5785, |
| "step": 11700 |
| }, |
| { |
| "epoch": 0.22034890142817862, |
| "grad_norm": 1.0078125, |
| "learning_rate": 9.998816453537568e-06, |
| "loss": 0.5804, |
| "step": 11720 |
| }, |
| { |
| "epoch": 0.22072492344426767, |
| "grad_norm": 0.91796875, |
| "learning_rate": 9.998794270348331e-06, |
| "loss": 0.5854, |
| "step": 11740 |
| }, |
| { |
| "epoch": 0.2211009454603567, |
| "grad_norm": 0.890625, |
| "learning_rate": 9.998771881220319e-06, |
| "loss": 0.5857, |
| "step": 11760 |
| }, |
| { |
| "epoch": 0.22147696747644574, |
| "grad_norm": 0.8125, |
| "learning_rate": 9.99874928615445e-06, |
| "loss": 0.5855, |
| "step": 11780 |
| }, |
| { |
| "epoch": 0.2218529894925348, |
| "grad_norm": 0.9375, |
| "learning_rate": 9.99872648515166e-06, |
| "loss": 0.5736, |
| "step": 11800 |
| }, |
| { |
| "epoch": 0.22222901150862384, |
| "grad_norm": 0.86328125, |
| "learning_rate": 9.998703478212885e-06, |
| "loss": 0.5792, |
| "step": 11820 |
| }, |
| { |
| "epoch": 0.22260503352471286, |
| "grad_norm": 0.85546875, |
| "learning_rate": 9.998680265339076e-06, |
| "loss": 0.5709, |
| "step": 11840 |
| }, |
| { |
| "epoch": 0.22298105554080191, |
| "grad_norm": 0.93359375, |
| "learning_rate": 9.998656846531185e-06, |
| "loss": 0.5717, |
| "step": 11860 |
| }, |
| { |
| "epoch": 0.22335707755689096, |
| "grad_norm": 1.0, |
| "learning_rate": 9.99863322179018e-06, |
| "loss": 0.5719, |
| "step": 11880 |
| }, |
| { |
| "epoch": 0.22373309957298, |
| "grad_norm": 0.86328125, |
| "learning_rate": 9.99860939111703e-06, |
| "loss": 0.5874, |
| "step": 11900 |
| }, |
| { |
| "epoch": 0.22410912158906904, |
| "grad_norm": 0.90234375, |
| "learning_rate": 9.998585354512725e-06, |
| "loss": 0.5723, |
| "step": 11920 |
| }, |
| { |
| "epoch": 0.2244851436051581, |
| "grad_norm": 0.96875, |
| "learning_rate": 9.998561111978246e-06, |
| "loss": 0.5899, |
| "step": 11940 |
| }, |
| { |
| "epoch": 0.2248611656212471, |
| "grad_norm": 0.76953125, |
| "learning_rate": 9.998536663514599e-06, |
| "loss": 0.5824, |
| "step": 11960 |
| }, |
| { |
| "epoch": 0.22523718763733616, |
| "grad_norm": 0.98046875, |
| "learning_rate": 9.998512009122787e-06, |
| "loss": 0.5668, |
| "step": 11980 |
| }, |
| { |
| "epoch": 0.2256132096534252, |
| "grad_norm": 1.0234375, |
| "learning_rate": 9.998487148803826e-06, |
| "loss": 0.5701, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.22598923166951426, |
| "grad_norm": 1.0078125, |
| "learning_rate": 9.998462082558741e-06, |
| "loss": 0.576, |
| "step": 12020 |
| }, |
| { |
| "epoch": 0.22636525368560328, |
| "grad_norm": 0.90625, |
| "learning_rate": 9.998436810388566e-06, |
| "loss": 0.5761, |
| "step": 12040 |
| }, |
| { |
| "epoch": 0.22674127570169234, |
| "grad_norm": 1.34375, |
| "learning_rate": 9.998411332294341e-06, |
| "loss": 0.5786, |
| "step": 12060 |
| }, |
| { |
| "epoch": 0.22711729771778139, |
| "grad_norm": 0.8125, |
| "learning_rate": 9.998385648277116e-06, |
| "loss": 0.5758, |
| "step": 12080 |
| }, |
| { |
| "epoch": 0.2274933197338704, |
| "grad_norm": 0.9921875, |
| "learning_rate": 9.998359758337947e-06, |
| "loss": 0.5769, |
| "step": 12100 |
| }, |
| { |
| "epoch": 0.22786934174995946, |
| "grad_norm": 0.76953125, |
| "learning_rate": 9.998333662477903e-06, |
| "loss": 0.5666, |
| "step": 12120 |
| }, |
| { |
| "epoch": 0.2282453637660485, |
| "grad_norm": 0.79296875, |
| "learning_rate": 9.998307360698059e-06, |
| "loss": 0.5754, |
| "step": 12140 |
| }, |
| { |
| "epoch": 0.22862138578213753, |
| "grad_norm": 0.8359375, |
| "learning_rate": 9.998280852999496e-06, |
| "loss": 0.5627, |
| "step": 12160 |
| }, |
| { |
| "epoch": 0.22899740779822658, |
| "grad_norm": 0.8359375, |
| "learning_rate": 9.99825413938331e-06, |
| "loss": 0.5797, |
| "step": 12180 |
| }, |
| { |
| "epoch": 0.22937342981431563, |
| "grad_norm": 0.8515625, |
| "learning_rate": 9.998227219850597e-06, |
| "loss": 0.5875, |
| "step": 12200 |
| }, |
| { |
| "epoch": 0.22974945183040468, |
| "grad_norm": 0.8359375, |
| "learning_rate": 9.998200094402471e-06, |
| "loss": 0.5809, |
| "step": 12220 |
| }, |
| { |
| "epoch": 0.2301254738464937, |
| "grad_norm": 1.046875, |
| "learning_rate": 9.998172763040048e-06, |
| "loss": 0.5714, |
| "step": 12240 |
| }, |
| { |
| "epoch": 0.23050149586258276, |
| "grad_norm": 0.94140625, |
| "learning_rate": 9.99814522576445e-06, |
| "loss": 0.5755, |
| "step": 12260 |
| }, |
| { |
| "epoch": 0.2308775178786718, |
| "grad_norm": 0.9453125, |
| "learning_rate": 9.998117482576816e-06, |
| "loss": 0.5764, |
| "step": 12280 |
| }, |
| { |
| "epoch": 0.23125353989476083, |
| "grad_norm": 0.9375, |
| "learning_rate": 9.998089533478287e-06, |
| "loss": 0.5699, |
| "step": 12300 |
| }, |
| { |
| "epoch": 0.23162956191084988, |
| "grad_norm": 0.83203125, |
| "learning_rate": 9.998061378470016e-06, |
| "loss": 0.5814, |
| "step": 12320 |
| }, |
| { |
| "epoch": 0.23200558392693893, |
| "grad_norm": 0.78125, |
| "learning_rate": 9.998033017553162e-06, |
| "loss": 0.5776, |
| "step": 12340 |
| }, |
| { |
| "epoch": 0.23238160594302795, |
| "grad_norm": 0.88671875, |
| "learning_rate": 9.99800445072889e-06, |
| "loss": 0.5776, |
| "step": 12360 |
| }, |
| { |
| "epoch": 0.232757627959117, |
| "grad_norm": 0.87890625, |
| "learning_rate": 9.997975677998385e-06, |
| "loss": 0.574, |
| "step": 12380 |
| }, |
| { |
| "epoch": 0.23313364997520605, |
| "grad_norm": 1.328125, |
| "learning_rate": 9.997946699362825e-06, |
| "loss": 0.5668, |
| "step": 12400 |
| }, |
| { |
| "epoch": 0.2335096719912951, |
| "grad_norm": 0.80859375, |
| "learning_rate": 9.997917514823406e-06, |
| "loss": 0.5711, |
| "step": 12420 |
| }, |
| { |
| "epoch": 0.23388569400738413, |
| "grad_norm": 0.76171875, |
| "learning_rate": 9.99788812438133e-06, |
| "loss": 0.5556, |
| "step": 12440 |
| }, |
| { |
| "epoch": 0.23426171602347318, |
| "grad_norm": 0.96484375, |
| "learning_rate": 9.99785852803781e-06, |
| "loss": 0.5841, |
| "step": 12460 |
| }, |
| { |
| "epoch": 0.23463773803956223, |
| "grad_norm": 0.91015625, |
| "learning_rate": 9.997828725794061e-06, |
| "loss": 0.5763, |
| "step": 12480 |
| }, |
| { |
| "epoch": 0.23501376005565125, |
| "grad_norm": 0.79296875, |
| "learning_rate": 9.997798717651316e-06, |
| "loss": 0.5698, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.2353897820717403, |
| "grad_norm": 0.9140625, |
| "learning_rate": 9.99776850361081e-06, |
| "loss": 0.5708, |
| "step": 12520 |
| }, |
| { |
| "epoch": 0.23576580408782935, |
| "grad_norm": 0.75, |
| "learning_rate": 9.997738083673785e-06, |
| "loss": 0.5727, |
| "step": 12540 |
| }, |
| { |
| "epoch": 0.23614182610391837, |
| "grad_norm": 0.95703125, |
| "learning_rate": 9.997707457841496e-06, |
| "loss": 0.5596, |
| "step": 12560 |
| }, |
| { |
| "epoch": 0.23651784812000742, |
| "grad_norm": 1.1015625, |
| "learning_rate": 9.997676626115205e-06, |
| "loss": 0.5688, |
| "step": 12580 |
| }, |
| { |
| "epoch": 0.23689387013609647, |
| "grad_norm": 0.94140625, |
| "learning_rate": 9.997645588496181e-06, |
| "loss": 0.5598, |
| "step": 12600 |
| }, |
| { |
| "epoch": 0.23726989215218552, |
| "grad_norm": 0.84375, |
| "learning_rate": 9.997614344985705e-06, |
| "loss": 0.5573, |
| "step": 12620 |
| }, |
| { |
| "epoch": 0.23764591416827455, |
| "grad_norm": 0.8203125, |
| "learning_rate": 9.99758289558506e-06, |
| "loss": 0.5708, |
| "step": 12640 |
| }, |
| { |
| "epoch": 0.2380219361843636, |
| "grad_norm": 0.8984375, |
| "learning_rate": 9.997551240295546e-06, |
| "loss": 0.5752, |
| "step": 12660 |
| }, |
| { |
| "epoch": 0.23839795820045265, |
| "grad_norm": 0.73046875, |
| "learning_rate": 9.997519379118465e-06, |
| "loss": 0.5741, |
| "step": 12680 |
| }, |
| { |
| "epoch": 0.23877398021654167, |
| "grad_norm": 0.75390625, |
| "learning_rate": 9.99748731205513e-06, |
| "loss": 0.5625, |
| "step": 12700 |
| }, |
| { |
| "epoch": 0.23915000223263072, |
| "grad_norm": 1.09375, |
| "learning_rate": 9.997455039106861e-06, |
| "loss": 0.5751, |
| "step": 12720 |
| }, |
| { |
| "epoch": 0.23952602424871977, |
| "grad_norm": 0.84765625, |
| "learning_rate": 9.99742256027499e-06, |
| "loss": 0.5627, |
| "step": 12740 |
| }, |
| { |
| "epoch": 0.2399020462648088, |
| "grad_norm": 1.15625, |
| "learning_rate": 9.997389875560853e-06, |
| "loss": 0.5675, |
| "step": 12760 |
| }, |
| { |
| "epoch": 0.24027806828089784, |
| "grad_norm": 0.90234375, |
| "learning_rate": 9.997356984965798e-06, |
| "loss": 0.5751, |
| "step": 12780 |
| }, |
| { |
| "epoch": 0.2406540902969869, |
| "grad_norm": 0.83203125, |
| "learning_rate": 9.997323888491178e-06, |
| "loss": 0.5762, |
| "step": 12800 |
| }, |
| { |
| "epoch": 0.24103011231307592, |
| "grad_norm": 0.80859375, |
| "learning_rate": 9.997290586138357e-06, |
| "loss": 0.5744, |
| "step": 12820 |
| }, |
| { |
| "epoch": 0.24140613432916497, |
| "grad_norm": 0.703125, |
| "learning_rate": 9.99725707790871e-06, |
| "loss": 0.5676, |
| "step": 12840 |
| }, |
| { |
| "epoch": 0.24178215634525402, |
| "grad_norm": 0.76953125, |
| "learning_rate": 9.997223363803615e-06, |
| "loss": 0.5817, |
| "step": 12860 |
| }, |
| { |
| "epoch": 0.24215817836134307, |
| "grad_norm": 0.80859375, |
| "learning_rate": 9.99718944382446e-06, |
| "loss": 0.5763, |
| "step": 12880 |
| }, |
| { |
| "epoch": 0.2425342003774321, |
| "grad_norm": 0.8125, |
| "learning_rate": 9.997155317972643e-06, |
| "loss": 0.5745, |
| "step": 12900 |
| }, |
| { |
| "epoch": 0.24291022239352114, |
| "grad_norm": 0.7578125, |
| "learning_rate": 9.99712098624957e-06, |
| "loss": 0.5663, |
| "step": 12920 |
| }, |
| { |
| "epoch": 0.2432862444096102, |
| "grad_norm": 0.86328125, |
| "learning_rate": 9.997086448656658e-06, |
| "loss": 0.5695, |
| "step": 12940 |
| }, |
| { |
| "epoch": 0.24366226642569921, |
| "grad_norm": 0.71875, |
| "learning_rate": 9.997051705195326e-06, |
| "loss": 0.573, |
| "step": 12960 |
| }, |
| { |
| "epoch": 0.24403828844178826, |
| "grad_norm": 0.88671875, |
| "learning_rate": 9.997016755867008e-06, |
| "loss": 0.5698, |
| "step": 12980 |
| }, |
| { |
| "epoch": 0.24441431045787732, |
| "grad_norm": 0.921875, |
| "learning_rate": 9.996981600673144e-06, |
| "loss": 0.5666, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.24479033247396634, |
| "grad_norm": 0.75390625, |
| "learning_rate": 9.99694623961518e-06, |
| "loss": 0.5694, |
| "step": 13020 |
| }, |
| { |
| "epoch": 0.2451663544900554, |
| "grad_norm": 0.9140625, |
| "learning_rate": 9.996910672694573e-06, |
| "loss": 0.5574, |
| "step": 13040 |
| }, |
| { |
| "epoch": 0.24554237650614444, |
| "grad_norm": 0.94140625, |
| "learning_rate": 9.99687489991279e-06, |
| "loss": 0.5564, |
| "step": 13060 |
| }, |
| { |
| "epoch": 0.2459183985222335, |
| "grad_norm": 0.9140625, |
| "learning_rate": 9.996838921271304e-06, |
| "loss": 0.5666, |
| "step": 13080 |
| }, |
| { |
| "epoch": 0.2462944205383225, |
| "grad_norm": 0.890625, |
| "learning_rate": 9.996802736771597e-06, |
| "loss": 0.5758, |
| "step": 13100 |
| }, |
| { |
| "epoch": 0.24667044255441156, |
| "grad_norm": 0.69140625, |
| "learning_rate": 9.99676634641516e-06, |
| "loss": 0.5619, |
| "step": 13120 |
| }, |
| { |
| "epoch": 0.2470464645705006, |
| "grad_norm": 0.8203125, |
| "learning_rate": 9.996729750203493e-06, |
| "loss": 0.5817, |
| "step": 13140 |
| }, |
| { |
| "epoch": 0.24742248658658964, |
| "grad_norm": 0.84375, |
| "learning_rate": 9.996692948138102e-06, |
| "loss": 0.5705, |
| "step": 13160 |
| }, |
| { |
| "epoch": 0.24779850860267869, |
| "grad_norm": 0.8203125, |
| "learning_rate": 9.996655940220504e-06, |
| "loss": 0.5713, |
| "step": 13180 |
| }, |
| { |
| "epoch": 0.24817453061876774, |
| "grad_norm": 0.89453125, |
| "learning_rate": 9.996618726452223e-06, |
| "loss": 0.5715, |
| "step": 13200 |
| }, |
| { |
| "epoch": 0.24855055263485676, |
| "grad_norm": 0.8046875, |
| "learning_rate": 9.996581306834793e-06, |
| "loss": 0.5622, |
| "step": 13220 |
| }, |
| { |
| "epoch": 0.2489265746509458, |
| "grad_norm": 0.71875, |
| "learning_rate": 9.996543681369756e-06, |
| "loss": 0.5636, |
| "step": 13240 |
| }, |
| { |
| "epoch": 0.24930259666703486, |
| "grad_norm": 1.0703125, |
| "learning_rate": 9.996505850058663e-06, |
| "loss": 0.5753, |
| "step": 13260 |
| }, |
| { |
| "epoch": 0.2496786186831239, |
| "grad_norm": 0.796875, |
| "learning_rate": 9.996467812903067e-06, |
| "loss": 0.5774, |
| "step": 13280 |
| }, |
| { |
| "epoch": 0.25005464069921296, |
| "grad_norm": 0.83984375, |
| "learning_rate": 9.996429569904542e-06, |
| "loss": 0.5687, |
| "step": 13300 |
| }, |
| { |
| "epoch": 0.25043066271530195, |
| "grad_norm": 1.09375, |
| "learning_rate": 9.99639112106466e-06, |
| "loss": 0.5652, |
| "step": 13320 |
| }, |
| { |
| "epoch": 0.250806684731391, |
| "grad_norm": 0.8671875, |
| "learning_rate": 9.996352466385006e-06, |
| "loss": 0.5579, |
| "step": 13340 |
| }, |
| { |
| "epoch": 0.25118270674748006, |
| "grad_norm": 0.79296875, |
| "learning_rate": 9.996313605867172e-06, |
| "loss": 0.5663, |
| "step": 13360 |
| }, |
| { |
| "epoch": 0.2515587287635691, |
| "grad_norm": 0.69140625, |
| "learning_rate": 9.996274539512759e-06, |
| "loss": 0.5653, |
| "step": 13380 |
| }, |
| { |
| "epoch": 0.25193475077965816, |
| "grad_norm": 0.8203125, |
| "learning_rate": 9.996235267323375e-06, |
| "loss": 0.5658, |
| "step": 13400 |
| }, |
| { |
| "epoch": 0.2523107727957472, |
| "grad_norm": 0.8828125, |
| "learning_rate": 9.99619578930064e-06, |
| "loss": 0.5738, |
| "step": 13420 |
| }, |
| { |
| "epoch": 0.25268679481183626, |
| "grad_norm": 0.72265625, |
| "learning_rate": 9.996156105446182e-06, |
| "loss": 0.572, |
| "step": 13440 |
| }, |
| { |
| "epoch": 0.25306281682792525, |
| "grad_norm": 0.73046875, |
| "learning_rate": 9.99611621576163e-06, |
| "loss": 0.5811, |
| "step": 13460 |
| }, |
| { |
| "epoch": 0.2534388388440143, |
| "grad_norm": 0.7421875, |
| "learning_rate": 9.996076120248634e-06, |
| "loss": 0.5669, |
| "step": 13480 |
| }, |
| { |
| "epoch": 0.25381486086010335, |
| "grad_norm": 0.71875, |
| "learning_rate": 9.996035818908842e-06, |
| "loss": 0.5716, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.2541908828761924, |
| "grad_norm": 0.76953125, |
| "learning_rate": 9.995995311743915e-06, |
| "loss": 0.5712, |
| "step": 13520 |
| }, |
| { |
| "epoch": 0.25456690489228145, |
| "grad_norm": 0.7734375, |
| "learning_rate": 9.995954598755522e-06, |
| "loss": 0.5702, |
| "step": 13540 |
| }, |
| { |
| "epoch": 0.2549429269083705, |
| "grad_norm": 0.81640625, |
| "learning_rate": 9.99591367994534e-06, |
| "loss": 0.5655, |
| "step": 13560 |
| }, |
| { |
| "epoch": 0.25531894892445955, |
| "grad_norm": 0.75, |
| "learning_rate": 9.995872555315056e-06, |
| "loss": 0.5704, |
| "step": 13580 |
| }, |
| { |
| "epoch": 0.25569497094054855, |
| "grad_norm": 0.76171875, |
| "learning_rate": 9.995831224866363e-06, |
| "loss": 0.5744, |
| "step": 13600 |
| }, |
| { |
| "epoch": 0.2560709929566376, |
| "grad_norm": 0.94140625, |
| "learning_rate": 9.995789688600964e-06, |
| "loss": 0.5598, |
| "step": 13620 |
| }, |
| { |
| "epoch": 0.25644701497272665, |
| "grad_norm": 0.78125, |
| "learning_rate": 9.995747946520569e-06, |
| "loss": 0.5758, |
| "step": 13640 |
| }, |
| { |
| "epoch": 0.2568230369888157, |
| "grad_norm": 0.75390625, |
| "learning_rate": 9.995705998626898e-06, |
| "loss": 0.5645, |
| "step": 13660 |
| }, |
| { |
| "epoch": 0.25719905900490475, |
| "grad_norm": 0.86328125, |
| "learning_rate": 9.995663844921684e-06, |
| "loss": 0.5619, |
| "step": 13680 |
| }, |
| { |
| "epoch": 0.2575750810209938, |
| "grad_norm": 0.76171875, |
| "learning_rate": 9.995621485406658e-06, |
| "loss": 0.5648, |
| "step": 13700 |
| }, |
| { |
| "epoch": 0.2579511030370828, |
| "grad_norm": 0.8203125, |
| "learning_rate": 9.995578920083565e-06, |
| "loss": 0.5713, |
| "step": 13720 |
| }, |
| { |
| "epoch": 0.25832712505317185, |
| "grad_norm": 0.67578125, |
| "learning_rate": 9.995536148954162e-06, |
| "loss": 0.5617, |
| "step": 13740 |
| }, |
| { |
| "epoch": 0.2587031470692609, |
| "grad_norm": 0.8984375, |
| "learning_rate": 9.995493172020208e-06, |
| "loss": 0.5586, |
| "step": 13760 |
| }, |
| { |
| "epoch": 0.25907916908534995, |
| "grad_norm": 0.921875, |
| "learning_rate": 9.995449989283477e-06, |
| "loss": 0.5664, |
| "step": 13780 |
| }, |
| { |
| "epoch": 0.259455191101439, |
| "grad_norm": 0.73828125, |
| "learning_rate": 9.995406600745745e-06, |
| "loss": 0.5642, |
| "step": 13800 |
| }, |
| { |
| "epoch": 0.25983121311752805, |
| "grad_norm": 0.6796875, |
| "learning_rate": 9.9953630064088e-06, |
| "loss": 0.5631, |
| "step": 13820 |
| }, |
| { |
| "epoch": 0.2602072351336171, |
| "grad_norm": 0.71875, |
| "learning_rate": 9.99531920627444e-06, |
| "loss": 0.5721, |
| "step": 13840 |
| }, |
| { |
| "epoch": 0.2605832571497061, |
| "grad_norm": 0.765625, |
| "learning_rate": 9.995275200344467e-06, |
| "loss": 0.572, |
| "step": 13860 |
| }, |
| { |
| "epoch": 0.26095927916579514, |
| "grad_norm": 0.82421875, |
| "learning_rate": 9.995230988620694e-06, |
| "loss": 0.5649, |
| "step": 13880 |
| }, |
| { |
| "epoch": 0.2613353011818842, |
| "grad_norm": 0.828125, |
| "learning_rate": 9.995186571104945e-06, |
| "loss": 0.571, |
| "step": 13900 |
| }, |
| { |
| "epoch": 0.26171132319797324, |
| "grad_norm": 0.89453125, |
| "learning_rate": 9.995141947799047e-06, |
| "loss": 0.5718, |
| "step": 13920 |
| }, |
| { |
| "epoch": 0.2620873452140623, |
| "grad_norm": 0.71875, |
| "learning_rate": 9.995097118704843e-06, |
| "loss": 0.5686, |
| "step": 13940 |
| }, |
| { |
| "epoch": 0.26246336723015135, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.995052083824173e-06, |
| "loss": 0.5607, |
| "step": 13960 |
| }, |
| { |
| "epoch": 0.26283938924624034, |
| "grad_norm": 0.70703125, |
| "learning_rate": 9.995006843158896e-06, |
| "loss": 0.5693, |
| "step": 13980 |
| }, |
| { |
| "epoch": 0.2632154112623294, |
| "grad_norm": 0.79296875, |
| "learning_rate": 9.994961396710876e-06, |
| "loss": 0.5632, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.26359143327841844, |
| "grad_norm": 0.80078125, |
| "learning_rate": 9.994915744481985e-06, |
| "loss": 0.5622, |
| "step": 14020 |
| }, |
| { |
| "epoch": 0.2639674552945075, |
| "grad_norm": 0.7890625, |
| "learning_rate": 9.994869886474103e-06, |
| "loss": 0.5606, |
| "step": 14040 |
| }, |
| { |
| "epoch": 0.26434347731059654, |
| "grad_norm": 0.7265625, |
| "learning_rate": 9.994823822689121e-06, |
| "loss": 0.5725, |
| "step": 14060 |
| }, |
| { |
| "epoch": 0.2647194993266856, |
| "grad_norm": 0.7109375, |
| "learning_rate": 9.994777553128935e-06, |
| "loss": 0.568, |
| "step": 14080 |
| }, |
| { |
| "epoch": 0.26509552134277464, |
| "grad_norm": 0.8984375, |
| "learning_rate": 9.994731077795454e-06, |
| "loss": 0.5614, |
| "step": 14100 |
| }, |
| { |
| "epoch": 0.26547154335886364, |
| "grad_norm": 0.90625, |
| "learning_rate": 9.994684396690588e-06, |
| "loss": 0.5747, |
| "step": 14120 |
| }, |
| { |
| "epoch": 0.2658475653749527, |
| "grad_norm": 0.74609375, |
| "learning_rate": 9.994637509816263e-06, |
| "loss": 0.5703, |
| "step": 14140 |
| }, |
| { |
| "epoch": 0.26622358739104174, |
| "grad_norm": 0.7578125, |
| "learning_rate": 9.994590417174411e-06, |
| "loss": 0.5579, |
| "step": 14160 |
| }, |
| { |
| "epoch": 0.2665996094071308, |
| "grad_norm": 0.7734375, |
| "learning_rate": 9.994543118766972e-06, |
| "loss": 0.575, |
| "step": 14180 |
| }, |
| { |
| "epoch": 0.26697563142321984, |
| "grad_norm": 0.7578125, |
| "learning_rate": 9.994495614595892e-06, |
| "loss": 0.5576, |
| "step": 14200 |
| }, |
| { |
| "epoch": 0.2673516534393089, |
| "grad_norm": 0.7421875, |
| "learning_rate": 9.994447904663132e-06, |
| "loss": 0.5698, |
| "step": 14220 |
| }, |
| { |
| "epoch": 0.26772767545539794, |
| "grad_norm": 0.7265625, |
| "learning_rate": 9.994399988970654e-06, |
| "loss": 0.5643, |
| "step": 14240 |
| }, |
| { |
| "epoch": 0.26810369747148693, |
| "grad_norm": 0.734375, |
| "learning_rate": 9.994351867520436e-06, |
| "loss": 0.5491, |
| "step": 14260 |
| }, |
| { |
| "epoch": 0.268479719487576, |
| "grad_norm": 0.74609375, |
| "learning_rate": 9.994303540314457e-06, |
| "loss": 0.5591, |
| "step": 14280 |
| }, |
| { |
| "epoch": 0.26885574150366504, |
| "grad_norm": 0.69140625, |
| "learning_rate": 9.994255007354708e-06, |
| "loss": 0.558, |
| "step": 14300 |
| }, |
| { |
| "epoch": 0.2692317635197541, |
| "grad_norm": 0.76953125, |
| "learning_rate": 9.994206268643189e-06, |
| "loss": 0.5541, |
| "step": 14320 |
| }, |
| { |
| "epoch": 0.26960778553584314, |
| "grad_norm": 0.76171875, |
| "learning_rate": 9.99415732418191e-06, |
| "loss": 0.561, |
| "step": 14340 |
| }, |
| { |
| "epoch": 0.2699838075519322, |
| "grad_norm": 0.72265625, |
| "learning_rate": 9.994108173972885e-06, |
| "loss": 0.5578, |
| "step": 14360 |
| }, |
| { |
| "epoch": 0.2703598295680212, |
| "grad_norm": 0.7109375, |
| "learning_rate": 9.99405881801814e-06, |
| "loss": 0.5665, |
| "step": 14380 |
| }, |
| { |
| "epoch": 0.27073585158411023, |
| "grad_norm": 0.9375, |
| "learning_rate": 9.99400925631971e-06, |
| "loss": 0.5671, |
| "step": 14400 |
| }, |
| { |
| "epoch": 0.2711118736001993, |
| "grad_norm": 0.75390625, |
| "learning_rate": 9.993959488879632e-06, |
| "loss": 0.5585, |
| "step": 14420 |
| }, |
| { |
| "epoch": 0.27148789561628833, |
| "grad_norm": 0.88671875, |
| "learning_rate": 9.99390951569996e-06, |
| "loss": 0.569, |
| "step": 14440 |
| }, |
| { |
| "epoch": 0.2718639176323774, |
| "grad_norm": 0.765625, |
| "learning_rate": 9.993859336782752e-06, |
| "loss": 0.5634, |
| "step": 14460 |
| }, |
| { |
| "epoch": 0.27223993964846643, |
| "grad_norm": 0.6953125, |
| "learning_rate": 9.993808952130076e-06, |
| "loss": 0.5823, |
| "step": 14480 |
| }, |
| { |
| "epoch": 0.2726159616645555, |
| "grad_norm": 0.72265625, |
| "learning_rate": 9.993758361744007e-06, |
| "loss": 0.5762, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.2729919836806445, |
| "grad_norm": 0.90234375, |
| "learning_rate": 9.99370756562663e-06, |
| "loss": 0.5647, |
| "step": 14520 |
| }, |
| { |
| "epoch": 0.27336800569673353, |
| "grad_norm": 0.59375, |
| "learning_rate": 9.993656563780034e-06, |
| "loss": 0.5638, |
| "step": 14540 |
| }, |
| { |
| "epoch": 0.2737440277128226, |
| "grad_norm": 0.7421875, |
| "learning_rate": 9.993605356206324e-06, |
| "loss": 0.5812, |
| "step": 14560 |
| }, |
| { |
| "epoch": 0.27412004972891163, |
| "grad_norm": 0.6875, |
| "learning_rate": 9.99355394290761e-06, |
| "loss": 0.5694, |
| "step": 14580 |
| }, |
| { |
| "epoch": 0.2744960717450007, |
| "grad_norm": 0.6875, |
| "learning_rate": 9.993502323886008e-06, |
| "loss": 0.5601, |
| "step": 14600 |
| }, |
| { |
| "epoch": 0.27487209376108973, |
| "grad_norm": 0.69921875, |
| "learning_rate": 9.993450499143646e-06, |
| "loss": 0.5712, |
| "step": 14620 |
| }, |
| { |
| "epoch": 0.2752481157771788, |
| "grad_norm": 0.77734375, |
| "learning_rate": 9.993398468682657e-06, |
| "loss": 0.5676, |
| "step": 14640 |
| }, |
| { |
| "epoch": 0.2756241377932678, |
| "grad_norm": 0.7421875, |
| "learning_rate": 9.993346232505186e-06, |
| "loss": 0.5554, |
| "step": 14660 |
| }, |
| { |
| "epoch": 0.2760001598093568, |
| "grad_norm": 0.69140625, |
| "learning_rate": 9.993293790613386e-06, |
| "loss": 0.576, |
| "step": 14680 |
| }, |
| { |
| "epoch": 0.2763761818254459, |
| "grad_norm": 0.75, |
| "learning_rate": 9.993241143009416e-06, |
| "loss": 0.5732, |
| "step": 14700 |
| }, |
| { |
| "epoch": 0.2767522038415349, |
| "grad_norm": 0.765625, |
| "learning_rate": 9.993188289695446e-06, |
| "loss": 0.5655, |
| "step": 14720 |
| }, |
| { |
| "epoch": 0.277128225857624, |
| "grad_norm": 0.66796875, |
| "learning_rate": 9.993135230673651e-06, |
| "loss": 0.5572, |
| "step": 14740 |
| }, |
| { |
| "epoch": 0.27750424787371303, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.993081965946221e-06, |
| "loss": 0.5657, |
| "step": 14760 |
| }, |
| { |
| "epoch": 0.277880269889802, |
| "grad_norm": 0.80859375, |
| "learning_rate": 9.993028495515347e-06, |
| "loss": 0.5521, |
| "step": 14780 |
| }, |
| { |
| "epoch": 0.2782562919058911, |
| "grad_norm": 0.7890625, |
| "learning_rate": 9.992974819383233e-06, |
| "loss": 0.5614, |
| "step": 14800 |
| }, |
| { |
| "epoch": 0.2786323139219801, |
| "grad_norm": 0.7265625, |
| "learning_rate": 9.99292093755209e-06, |
| "loss": 0.5646, |
| "step": 14820 |
| }, |
| { |
| "epoch": 0.2790083359380692, |
| "grad_norm": 0.7890625, |
| "learning_rate": 9.992866850024138e-06, |
| "loss": 0.5673, |
| "step": 14840 |
| }, |
| { |
| "epoch": 0.2793843579541582, |
| "grad_norm": 0.6875, |
| "learning_rate": 9.992812556801607e-06, |
| "loss": 0.5631, |
| "step": 14860 |
| }, |
| { |
| "epoch": 0.2797603799702473, |
| "grad_norm": 0.80859375, |
| "learning_rate": 9.992758057886732e-06, |
| "loss": 0.5607, |
| "step": 14880 |
| }, |
| { |
| "epoch": 0.2801364019863363, |
| "grad_norm": 0.921875, |
| "learning_rate": 9.992703353281757e-06, |
| "loss": 0.5581, |
| "step": 14900 |
| }, |
| { |
| "epoch": 0.2805124240024253, |
| "grad_norm": 0.86328125, |
| "learning_rate": 9.992648442988937e-06, |
| "loss": 0.5556, |
| "step": 14920 |
| }, |
| { |
| "epoch": 0.28088844601851437, |
| "grad_norm": 0.6796875, |
| "learning_rate": 9.992593327010536e-06, |
| "loss": 0.5565, |
| "step": 14940 |
| }, |
| { |
| "epoch": 0.2812644680346034, |
| "grad_norm": 0.671875, |
| "learning_rate": 9.99253800534882e-06, |
| "loss": 0.5663, |
| "step": 14960 |
| }, |
| { |
| "epoch": 0.28164049005069247, |
| "grad_norm": 0.68359375, |
| "learning_rate": 9.992482478006073e-06, |
| "loss": 0.5658, |
| "step": 14980 |
| }, |
| { |
| "epoch": 0.2820165120667815, |
| "grad_norm": 0.6953125, |
| "learning_rate": 9.992426744984582e-06, |
| "loss": 0.561, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.28239253408287057, |
| "grad_norm": 0.65625, |
| "learning_rate": 9.99237080628664e-06, |
| "loss": 0.558, |
| "step": 15020 |
| }, |
| { |
| "epoch": 0.28276855609895957, |
| "grad_norm": 0.7109375, |
| "learning_rate": 9.992314661914553e-06, |
| "loss": 0.569, |
| "step": 15040 |
| }, |
| { |
| "epoch": 0.2831445781150486, |
| "grad_norm": 0.79296875, |
| "learning_rate": 9.992258311870636e-06, |
| "loss": 0.558, |
| "step": 15060 |
| }, |
| { |
| "epoch": 0.28352060013113767, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.992201756157207e-06, |
| "loss": 0.5582, |
| "step": 15080 |
| }, |
| { |
| "epoch": 0.2838966221472267, |
| "grad_norm": 0.7421875, |
| "learning_rate": 9.992144994776597e-06, |
| "loss": 0.5632, |
| "step": 15100 |
| }, |
| { |
| "epoch": 0.28427264416331577, |
| "grad_norm": 0.6640625, |
| "learning_rate": 9.992088027731146e-06, |
| "loss": 0.5654, |
| "step": 15120 |
| }, |
| { |
| "epoch": 0.2846486661794048, |
| "grad_norm": 0.75, |
| "learning_rate": 9.992030855023201e-06, |
| "loss": 0.5666, |
| "step": 15140 |
| }, |
| { |
| "epoch": 0.28502468819549387, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.991973476655116e-06, |
| "loss": 0.5561, |
| "step": 15160 |
| }, |
| { |
| "epoch": 0.28540071021158286, |
| "grad_norm": 0.68359375, |
| "learning_rate": 9.991915892629255e-06, |
| "loss": 0.5668, |
| "step": 15180 |
| }, |
| { |
| "epoch": 0.2857767322276719, |
| "grad_norm": 0.59375, |
| "learning_rate": 9.991858102947991e-06, |
| "loss": 0.5642, |
| "step": 15200 |
| }, |
| { |
| "epoch": 0.28615275424376097, |
| "grad_norm": 0.64453125, |
| "learning_rate": 9.991800107613704e-06, |
| "loss": 0.5549, |
| "step": 15220 |
| }, |
| { |
| "epoch": 0.28652877625985, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.991741906628784e-06, |
| "loss": 0.5641, |
| "step": 15240 |
| }, |
| { |
| "epoch": 0.28690479827593907, |
| "grad_norm": 0.69921875, |
| "learning_rate": 9.991683499995629e-06, |
| "loss": 0.5585, |
| "step": 15260 |
| }, |
| { |
| "epoch": 0.2872808202920281, |
| "grad_norm": 0.64453125, |
| "learning_rate": 9.991624887716644e-06, |
| "loss": 0.5639, |
| "step": 15280 |
| }, |
| { |
| "epoch": 0.28765684230811717, |
| "grad_norm": 0.74609375, |
| "learning_rate": 9.991566069794244e-06, |
| "loss": 0.5636, |
| "step": 15300 |
| }, |
| { |
| "epoch": 0.28803286432420616, |
| "grad_norm": 0.84765625, |
| "learning_rate": 9.991507046230853e-06, |
| "loss": 0.5625, |
| "step": 15320 |
| }, |
| { |
| "epoch": 0.2884088863402952, |
| "grad_norm": 0.67578125, |
| "learning_rate": 9.991447817028903e-06, |
| "loss": 0.5631, |
| "step": 15340 |
| }, |
| { |
| "epoch": 0.28878490835638426, |
| "grad_norm": 0.7265625, |
| "learning_rate": 9.991388382190832e-06, |
| "loss": 0.5494, |
| "step": 15360 |
| }, |
| { |
| "epoch": 0.2891609303724733, |
| "grad_norm": 0.6953125, |
| "learning_rate": 9.991328741719092e-06, |
| "loss": 0.5587, |
| "step": 15380 |
| }, |
| { |
| "epoch": 0.28953695238856236, |
| "grad_norm": 0.703125, |
| "learning_rate": 9.991268895616136e-06, |
| "loss": 0.5561, |
| "step": 15400 |
| }, |
| { |
| "epoch": 0.2899129744046514, |
| "grad_norm": 0.70703125, |
| "learning_rate": 9.99120884388443e-06, |
| "loss": 0.5552, |
| "step": 15420 |
| }, |
| { |
| "epoch": 0.2902889964207404, |
| "grad_norm": 0.70703125, |
| "learning_rate": 9.991148586526451e-06, |
| "loss": 0.5637, |
| "step": 15440 |
| }, |
| { |
| "epoch": 0.29066501843682946, |
| "grad_norm": 0.82421875, |
| "learning_rate": 9.99108812354468e-06, |
| "loss": 0.5659, |
| "step": 15460 |
| }, |
| { |
| "epoch": 0.2910410404529185, |
| "grad_norm": 0.765625, |
| "learning_rate": 9.991027454941608e-06, |
| "loss": 0.5664, |
| "step": 15480 |
| }, |
| { |
| "epoch": 0.29141706246900756, |
| "grad_norm": 0.640625, |
| "learning_rate": 9.990966580719734e-06, |
| "loss": 0.5653, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.2917930844850966, |
| "grad_norm": 0.82421875, |
| "learning_rate": 9.990905500881568e-06, |
| "loss": 0.5576, |
| "step": 15520 |
| }, |
| { |
| "epoch": 0.29216910650118566, |
| "grad_norm": 0.9140625, |
| "learning_rate": 9.990844215429621e-06, |
| "loss": 0.5644, |
| "step": 15540 |
| }, |
| { |
| "epoch": 0.2925451285172747, |
| "grad_norm": 0.7734375, |
| "learning_rate": 9.990782724366424e-06, |
| "loss": 0.5569, |
| "step": 15560 |
| }, |
| { |
| "epoch": 0.2929211505333637, |
| "grad_norm": 0.7109375, |
| "learning_rate": 9.990721027694506e-06, |
| "loss": 0.5541, |
| "step": 15580 |
| }, |
| { |
| "epoch": 0.29329717254945276, |
| "grad_norm": 0.7578125, |
| "learning_rate": 9.990659125416411e-06, |
| "loss": 0.5702, |
| "step": 15600 |
| }, |
| { |
| "epoch": 0.2936731945655418, |
| "grad_norm": 0.6875, |
| "learning_rate": 9.990597017534689e-06, |
| "loss": 0.5716, |
| "step": 15620 |
| }, |
| { |
| "epoch": 0.29404921658163086, |
| "grad_norm": 0.59375, |
| "learning_rate": 9.990534704051897e-06, |
| "loss": 0.5562, |
| "step": 15640 |
| }, |
| { |
| "epoch": 0.2944252385977199, |
| "grad_norm": 0.7265625, |
| "learning_rate": 9.990472184970603e-06, |
| "loss": 0.5657, |
| "step": 15660 |
| }, |
| { |
| "epoch": 0.29480126061380896, |
| "grad_norm": 0.66015625, |
| "learning_rate": 9.990409460293385e-06, |
| "loss": 0.5693, |
| "step": 15680 |
| }, |
| { |
| "epoch": 0.295177282629898, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.990346530022826e-06, |
| "loss": 0.5766, |
| "step": 15700 |
| }, |
| { |
| "epoch": 0.295553304645987, |
| "grad_norm": 0.66015625, |
| "learning_rate": 9.990283394161515e-06, |
| "loss": 0.5619, |
| "step": 15720 |
| }, |
| { |
| "epoch": 0.29592932666207605, |
| "grad_norm": 0.69140625, |
| "learning_rate": 9.990220052712056e-06, |
| "loss": 0.5574, |
| "step": 15740 |
| }, |
| { |
| "epoch": 0.2963053486781651, |
| "grad_norm": 0.74609375, |
| "learning_rate": 9.99015650567706e-06, |
| "loss": 0.558, |
| "step": 15760 |
| }, |
| { |
| "epoch": 0.29668137069425415, |
| "grad_norm": 0.6640625, |
| "learning_rate": 9.990092753059142e-06, |
| "loss": 0.5712, |
| "step": 15780 |
| }, |
| { |
| "epoch": 0.2970573927103432, |
| "grad_norm": 0.55859375, |
| "learning_rate": 9.990028794860931e-06, |
| "loss": 0.565, |
| "step": 15800 |
| }, |
| { |
| "epoch": 0.29743341472643225, |
| "grad_norm": 0.7109375, |
| "learning_rate": 9.98996463108506e-06, |
| "loss": 0.5626, |
| "step": 15820 |
| }, |
| { |
| "epoch": 0.29780943674252125, |
| "grad_norm": 0.8046875, |
| "learning_rate": 9.989900261734174e-06, |
| "loss": 0.5693, |
| "step": 15840 |
| }, |
| { |
| "epoch": 0.2981854587586103, |
| "grad_norm": 0.6640625, |
| "learning_rate": 9.989835686810922e-06, |
| "loss": 0.553, |
| "step": 15860 |
| }, |
| { |
| "epoch": 0.29856148077469935, |
| "grad_norm": 0.69140625, |
| "learning_rate": 9.989770906317967e-06, |
| "loss": 0.5728, |
| "step": 15880 |
| }, |
| { |
| "epoch": 0.2989375027907884, |
| "grad_norm": 0.7109375, |
| "learning_rate": 9.989705920257977e-06, |
| "loss": 0.5645, |
| "step": 15900 |
| }, |
| { |
| "epoch": 0.29931352480687745, |
| "grad_norm": 0.6015625, |
| "learning_rate": 9.989640728633631e-06, |
| "loss": 0.5573, |
| "step": 15920 |
| }, |
| { |
| "epoch": 0.2996895468229665, |
| "grad_norm": 0.640625, |
| "learning_rate": 9.989575331447612e-06, |
| "loss": 0.5529, |
| "step": 15940 |
| }, |
| { |
| "epoch": 0.30006556883905555, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.989509728702615e-06, |
| "loss": 0.5619, |
| "step": 15960 |
| }, |
| { |
| "epoch": 0.30044159085514455, |
| "grad_norm": 0.8359375, |
| "learning_rate": 9.989443920401344e-06, |
| "loss": 0.5711, |
| "step": 15980 |
| }, |
| { |
| "epoch": 0.3008176128712336, |
| "grad_norm": 0.6875, |
| "learning_rate": 9.989377906546509e-06, |
| "loss": 0.5515, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.30119363488732265, |
| "grad_norm": 0.72265625, |
| "learning_rate": 9.989311687140831e-06, |
| "loss": 0.5689, |
| "step": 16020 |
| }, |
| { |
| "epoch": 0.3015696569034117, |
| "grad_norm": 0.69140625, |
| "learning_rate": 9.989245262187033e-06, |
| "loss": 0.5511, |
| "step": 16040 |
| }, |
| { |
| "epoch": 0.30194567891950075, |
| "grad_norm": 0.70703125, |
| "learning_rate": 9.989178631687859e-06, |
| "loss": 0.574, |
| "step": 16060 |
| }, |
| { |
| "epoch": 0.3023217009355898, |
| "grad_norm": 0.6796875, |
| "learning_rate": 9.98911179564605e-06, |
| "loss": 0.5555, |
| "step": 16080 |
| }, |
| { |
| "epoch": 0.3026977229516788, |
| "grad_norm": 0.63671875, |
| "learning_rate": 9.989044754064358e-06, |
| "loss": 0.5591, |
| "step": 16100 |
| }, |
| { |
| "epoch": 0.30307374496776784, |
| "grad_norm": 0.7890625, |
| "learning_rate": 9.988977506945549e-06, |
| "loss": 0.5591, |
| "step": 16120 |
| }, |
| { |
| "epoch": 0.3034497669838569, |
| "grad_norm": 0.6796875, |
| "learning_rate": 9.98891005429239e-06, |
| "loss": 0.5722, |
| "step": 16140 |
| }, |
| { |
| "epoch": 0.30382578899994594, |
| "grad_norm": 0.68359375, |
| "learning_rate": 9.988842396107663e-06, |
| "loss": 0.5603, |
| "step": 16160 |
| }, |
| { |
| "epoch": 0.304201811016035, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.988774532394152e-06, |
| "loss": 0.5677, |
| "step": 16180 |
| }, |
| { |
| "epoch": 0.30457783303212405, |
| "grad_norm": 0.7734375, |
| "learning_rate": 9.988706463154656e-06, |
| "loss": 0.5559, |
| "step": 16200 |
| }, |
| { |
| "epoch": 0.3049538550482131, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.988638188391974e-06, |
| "loss": 0.557, |
| "step": 16220 |
| }, |
| { |
| "epoch": 0.3053298770643021, |
| "grad_norm": 0.7734375, |
| "learning_rate": 9.988569708108927e-06, |
| "loss": 0.5588, |
| "step": 16240 |
| }, |
| { |
| "epoch": 0.30570589908039114, |
| "grad_norm": 0.6640625, |
| "learning_rate": 9.988501022308331e-06, |
| "loss": 0.5643, |
| "step": 16260 |
| }, |
| { |
| "epoch": 0.3060819210964802, |
| "grad_norm": 0.609375, |
| "learning_rate": 9.988432130993013e-06, |
| "loss": 0.5608, |
| "step": 16280 |
| }, |
| { |
| "epoch": 0.30645794311256924, |
| "grad_norm": 0.69140625, |
| "learning_rate": 9.988363034165817e-06, |
| "loss": 0.5647, |
| "step": 16300 |
| }, |
| { |
| "epoch": 0.3068339651286583, |
| "grad_norm": 0.76953125, |
| "learning_rate": 9.988293731829588e-06, |
| "loss": 0.5569, |
| "step": 16320 |
| }, |
| { |
| "epoch": 0.30720998714474734, |
| "grad_norm": 0.66796875, |
| "learning_rate": 9.98822422398718e-06, |
| "loss": 0.5603, |
| "step": 16340 |
| }, |
| { |
| "epoch": 0.3075860091608364, |
| "grad_norm": 0.69921875, |
| "learning_rate": 9.988154510641455e-06, |
| "loss": 0.5614, |
| "step": 16360 |
| }, |
| { |
| "epoch": 0.3079620311769254, |
| "grad_norm": 0.69921875, |
| "learning_rate": 9.98808459179529e-06, |
| "loss": 0.543, |
| "step": 16380 |
| }, |
| { |
| "epoch": 0.30833805319301444, |
| "grad_norm": 0.6796875, |
| "learning_rate": 9.98801446745156e-06, |
| "loss": 0.5666, |
| "step": 16400 |
| }, |
| { |
| "epoch": 0.3087140752091035, |
| "grad_norm": 0.68359375, |
| "learning_rate": 9.987944137613155e-06, |
| "loss": 0.5627, |
| "step": 16420 |
| }, |
| { |
| "epoch": 0.30909009722519254, |
| "grad_norm": 0.7421875, |
| "learning_rate": 9.987873602282976e-06, |
| "loss": 0.5492, |
| "step": 16440 |
| }, |
| { |
| "epoch": 0.3094661192412816, |
| "grad_norm": 0.59375, |
| "learning_rate": 9.987802861463927e-06, |
| "loss": 0.5667, |
| "step": 16460 |
| }, |
| { |
| "epoch": 0.30984214125737064, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.98773191515892e-06, |
| "loss": 0.5597, |
| "step": 16480 |
| }, |
| { |
| "epoch": 0.31021816327345964, |
| "grad_norm": 0.64453125, |
| "learning_rate": 9.987660763370883e-06, |
| "loss": 0.5579, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.3105941852895487, |
| "grad_norm": 0.734375, |
| "learning_rate": 9.98758940610274e-06, |
| "loss": 0.5672, |
| "step": 16520 |
| }, |
| { |
| "epoch": 0.31097020730563774, |
| "grad_norm": 0.7421875, |
| "learning_rate": 9.98751784335744e-06, |
| "loss": 0.5598, |
| "step": 16540 |
| }, |
| { |
| "epoch": 0.3113462293217268, |
| "grad_norm": 0.69921875, |
| "learning_rate": 9.987446075137922e-06, |
| "loss": 0.5531, |
| "step": 16560 |
| }, |
| { |
| "epoch": 0.31172225133781584, |
| "grad_norm": 0.765625, |
| "learning_rate": 9.987374101447148e-06, |
| "loss": 0.5565, |
| "step": 16580 |
| }, |
| { |
| "epoch": 0.3120982733539049, |
| "grad_norm": 0.83984375, |
| "learning_rate": 9.987301922288082e-06, |
| "loss": 0.5607, |
| "step": 16600 |
| }, |
| { |
| "epoch": 0.31247429536999394, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.987229537663698e-06, |
| "loss": 0.5497, |
| "step": 16620 |
| }, |
| { |
| "epoch": 0.31285031738608293, |
| "grad_norm": 0.63671875, |
| "learning_rate": 9.987156947576977e-06, |
| "loss": 0.5613, |
| "step": 16640 |
| }, |
| { |
| "epoch": 0.313226339402172, |
| "grad_norm": 0.64453125, |
| "learning_rate": 9.98708415203091e-06, |
| "loss": 0.5598, |
| "step": 16660 |
| }, |
| { |
| "epoch": 0.31360236141826103, |
| "grad_norm": 0.83984375, |
| "learning_rate": 9.987011151028496e-06, |
| "loss": 0.5646, |
| "step": 16680 |
| }, |
| { |
| "epoch": 0.3139783834343501, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.986937944572746e-06, |
| "loss": 0.5668, |
| "step": 16700 |
| }, |
| { |
| "epoch": 0.31435440545043913, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.98686453266667e-06, |
| "loss": 0.5572, |
| "step": 16720 |
| }, |
| { |
| "epoch": 0.3147304274665282, |
| "grad_norm": 0.71484375, |
| "learning_rate": 9.986790915313293e-06, |
| "loss": 0.5523, |
| "step": 16740 |
| }, |
| { |
| "epoch": 0.31510644948261723, |
| "grad_norm": 0.703125, |
| "learning_rate": 9.986717092515653e-06, |
| "loss": 0.5625, |
| "step": 16760 |
| }, |
| { |
| "epoch": 0.31548247149870623, |
| "grad_norm": 0.625, |
| "learning_rate": 9.986643064276786e-06, |
| "loss": 0.5636, |
| "step": 16780 |
| }, |
| { |
| "epoch": 0.3158584935147953, |
| "grad_norm": 0.71875, |
| "learning_rate": 9.986568830599745e-06, |
| "loss": 0.5582, |
| "step": 16800 |
| }, |
| { |
| "epoch": 0.31623451553088433, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.986494391487588e-06, |
| "loss": 0.5688, |
| "step": 16820 |
| }, |
| { |
| "epoch": 0.3166105375469734, |
| "grad_norm": 0.58984375, |
| "learning_rate": 9.98641974694338e-06, |
| "loss": 0.5604, |
| "step": 16840 |
| }, |
| { |
| "epoch": 0.31698655956306243, |
| "grad_norm": 0.76953125, |
| "learning_rate": 9.986344896970198e-06, |
| "loss": 0.5631, |
| "step": 16860 |
| }, |
| { |
| "epoch": 0.3173625815791515, |
| "grad_norm": 0.71875, |
| "learning_rate": 9.986269841571124e-06, |
| "loss": 0.5611, |
| "step": 16880 |
| }, |
| { |
| "epoch": 0.3177386035952405, |
| "grad_norm": 0.703125, |
| "learning_rate": 9.986194580749251e-06, |
| "loss": 0.5505, |
| "step": 16900 |
| }, |
| { |
| "epoch": 0.3181146256113295, |
| "grad_norm": 0.69921875, |
| "learning_rate": 9.98611911450768e-06, |
| "loss": 0.5597, |
| "step": 16920 |
| }, |
| { |
| "epoch": 0.3184906476274186, |
| "grad_norm": 0.6640625, |
| "learning_rate": 9.98604344284952e-06, |
| "loss": 0.5474, |
| "step": 16940 |
| }, |
| { |
| "epoch": 0.3188666696435076, |
| "grad_norm": 0.65234375, |
| "learning_rate": 9.985967565777887e-06, |
| "loss": 0.5493, |
| "step": 16960 |
| }, |
| { |
| "epoch": 0.3192426916595967, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.985891483295908e-06, |
| "loss": 0.5638, |
| "step": 16980 |
| }, |
| { |
| "epoch": 0.31961871367568573, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.985815195406718e-06, |
| "loss": 0.552, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.3199947356917748, |
| "grad_norm": 0.76171875, |
| "learning_rate": 9.985738702113457e-06, |
| "loss": 0.5554, |
| "step": 17020 |
| }, |
| { |
| "epoch": 0.3203707577078638, |
| "grad_norm": 0.91796875, |
| "learning_rate": 9.985662003419282e-06, |
| "loss": 0.5725, |
| "step": 17040 |
| }, |
| { |
| "epoch": 0.3207467797239528, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.985585099327348e-06, |
| "loss": 0.5567, |
| "step": 17060 |
| }, |
| { |
| "epoch": 0.3211228017400419, |
| "grad_norm": 0.6640625, |
| "learning_rate": 9.985507989840824e-06, |
| "loss": 0.5699, |
| "step": 17080 |
| }, |
| { |
| "epoch": 0.3214988237561309, |
| "grad_norm": 0.640625, |
| "learning_rate": 9.985430674962888e-06, |
| "loss": 0.5513, |
| "step": 17100 |
| }, |
| { |
| "epoch": 0.32187484577222, |
| "grad_norm": 0.76953125, |
| "learning_rate": 9.985353154696725e-06, |
| "loss": 0.5634, |
| "step": 17120 |
| }, |
| { |
| "epoch": 0.322250867788309, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.985275429045526e-06, |
| "loss": 0.5576, |
| "step": 17140 |
| }, |
| { |
| "epoch": 0.322626889804398, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.985197498012499e-06, |
| "loss": 0.5595, |
| "step": 17160 |
| }, |
| { |
| "epoch": 0.32300291182048707, |
| "grad_norm": 0.609375, |
| "learning_rate": 9.98511936160085e-06, |
| "loss": 0.5551, |
| "step": 17180 |
| }, |
| { |
| "epoch": 0.3233789338365761, |
| "grad_norm": 0.609375, |
| "learning_rate": 9.985041019813797e-06, |
| "loss": 0.5535, |
| "step": 17200 |
| }, |
| { |
| "epoch": 0.32375495585266517, |
| "grad_norm": 0.6875, |
| "learning_rate": 9.98496247265457e-06, |
| "loss": 0.5611, |
| "step": 17220 |
| }, |
| { |
| "epoch": 0.3241309778687542, |
| "grad_norm": 0.671875, |
| "learning_rate": 9.984883720126407e-06, |
| "loss": 0.5602, |
| "step": 17240 |
| }, |
| { |
| "epoch": 0.3245069998848433, |
| "grad_norm": 0.63671875, |
| "learning_rate": 9.984804762232548e-06, |
| "loss": 0.5559, |
| "step": 17260 |
| }, |
| { |
| "epoch": 0.3248830219009323, |
| "grad_norm": 0.80078125, |
| "learning_rate": 9.984725598976248e-06, |
| "loss": 0.5542, |
| "step": 17280 |
| }, |
| { |
| "epoch": 0.3252590439170213, |
| "grad_norm": 0.625, |
| "learning_rate": 9.984646230360768e-06, |
| "loss": 0.5537, |
| "step": 17300 |
| }, |
| { |
| "epoch": 0.32563506593311037, |
| "grad_norm": 0.76171875, |
| "learning_rate": 9.984566656389378e-06, |
| "loss": 0.559, |
| "step": 17320 |
| }, |
| { |
| "epoch": 0.3260110879491994, |
| "grad_norm": 0.69140625, |
| "learning_rate": 9.984486877065357e-06, |
| "loss": 0.5664, |
| "step": 17340 |
| }, |
| { |
| "epoch": 0.32638710996528847, |
| "grad_norm": 0.671875, |
| "learning_rate": 9.98440689239199e-06, |
| "loss": 0.5545, |
| "step": 17360 |
| }, |
| { |
| "epoch": 0.3267631319813775, |
| "grad_norm": 0.6015625, |
| "learning_rate": 9.984326702372572e-06, |
| "loss": 0.5492, |
| "step": 17380 |
| }, |
| { |
| "epoch": 0.32713915399746657, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.984246307010411e-06, |
| "loss": 0.5514, |
| "step": 17400 |
| }, |
| { |
| "epoch": 0.3275151760135556, |
| "grad_norm": 0.67578125, |
| "learning_rate": 9.984165706308815e-06, |
| "loss": 0.5548, |
| "step": 17420 |
| }, |
| { |
| "epoch": 0.3278911980296446, |
| "grad_norm": 0.78125, |
| "learning_rate": 9.984084900271104e-06, |
| "loss": 0.57, |
| "step": 17440 |
| }, |
| { |
| "epoch": 0.32826722004573367, |
| "grad_norm": 0.91015625, |
| "learning_rate": 9.984003888900608e-06, |
| "loss": 0.5594, |
| "step": 17460 |
| }, |
| { |
| "epoch": 0.3286432420618227, |
| "grad_norm": 0.734375, |
| "learning_rate": 9.983922672200666e-06, |
| "loss": 0.553, |
| "step": 17480 |
| }, |
| { |
| "epoch": 0.32901926407791177, |
| "grad_norm": 0.76953125, |
| "learning_rate": 9.983841250174623e-06, |
| "loss": 0.5664, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.3293952860940008, |
| "grad_norm": 0.65234375, |
| "learning_rate": 9.983759622825832e-06, |
| "loss": 0.5566, |
| "step": 17520 |
| }, |
| { |
| "epoch": 0.32977130811008987, |
| "grad_norm": 0.68359375, |
| "learning_rate": 9.983677790157659e-06, |
| "loss": 0.5628, |
| "step": 17540 |
| }, |
| { |
| "epoch": 0.33014733012617886, |
| "grad_norm": 0.640625, |
| "learning_rate": 9.983595752173472e-06, |
| "loss": 0.5613, |
| "step": 17560 |
| }, |
| { |
| "epoch": 0.3305233521422679, |
| "grad_norm": 0.69921875, |
| "learning_rate": 9.983513508876653e-06, |
| "loss": 0.5599, |
| "step": 17580 |
| }, |
| { |
| "epoch": 0.33089937415835696, |
| "grad_norm": 0.67578125, |
| "learning_rate": 9.98343106027059e-06, |
| "loss": 0.5596, |
| "step": 17600 |
| }, |
| { |
| "epoch": 0.331275396174446, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.983348406358677e-06, |
| "loss": 0.5596, |
| "step": 17620 |
| }, |
| { |
| "epoch": 0.33165141819053506, |
| "grad_norm": 0.64453125, |
| "learning_rate": 9.983265547144323e-06, |
| "loss": 0.561, |
| "step": 17640 |
| }, |
| { |
| "epoch": 0.3320274402066241, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.98318248263094e-06, |
| "loss": 0.5546, |
| "step": 17660 |
| }, |
| { |
| "epoch": 0.33240346222271316, |
| "grad_norm": 0.859375, |
| "learning_rate": 9.98309921282195e-06, |
| "loss": 0.5602, |
| "step": 17680 |
| }, |
| { |
| "epoch": 0.33277948423880216, |
| "grad_norm": 0.72265625, |
| "learning_rate": 9.983015737720782e-06, |
| "loss": 0.5521, |
| "step": 17700 |
| }, |
| { |
| "epoch": 0.3331555062548912, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.982932057330878e-06, |
| "loss": 0.5603, |
| "step": 17720 |
| }, |
| { |
| "epoch": 0.33353152827098026, |
| "grad_norm": 0.58203125, |
| "learning_rate": 9.982848171655684e-06, |
| "loss": 0.5622, |
| "step": 17740 |
| }, |
| { |
| "epoch": 0.3339075502870693, |
| "grad_norm": 0.65625, |
| "learning_rate": 9.982764080698655e-06, |
| "loss": 0.5577, |
| "step": 17760 |
| }, |
| { |
| "epoch": 0.33428357230315836, |
| "grad_norm": 0.71484375, |
| "learning_rate": 9.982679784463256e-06, |
| "loss": 0.5608, |
| "step": 17780 |
| }, |
| { |
| "epoch": 0.3346595943192474, |
| "grad_norm": 0.578125, |
| "learning_rate": 9.98259528295296e-06, |
| "loss": 0.5618, |
| "step": 17800 |
| }, |
| { |
| "epoch": 0.33503561633533646, |
| "grad_norm": 0.671875, |
| "learning_rate": 9.982510576171249e-06, |
| "loss": 0.562, |
| "step": 17820 |
| }, |
| { |
| "epoch": 0.33541163835142546, |
| "grad_norm": 0.6015625, |
| "learning_rate": 9.982425664121611e-06, |
| "loss": 0.5591, |
| "step": 17840 |
| }, |
| { |
| "epoch": 0.3357876603675145, |
| "grad_norm": 0.671875, |
| "learning_rate": 9.982340546807546e-06, |
| "loss": 0.5604, |
| "step": 17860 |
| }, |
| { |
| "epoch": 0.33616368238360356, |
| "grad_norm": 0.63671875, |
| "learning_rate": 9.982255224232558e-06, |
| "loss": 0.5541, |
| "step": 17880 |
| }, |
| { |
| "epoch": 0.3365397043996926, |
| "grad_norm": 0.640625, |
| "learning_rate": 9.982169696400166e-06, |
| "loss": 0.5544, |
| "step": 17900 |
| }, |
| { |
| "epoch": 0.33691572641578166, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.982083963313892e-06, |
| "loss": 0.5567, |
| "step": 17920 |
| }, |
| { |
| "epoch": 0.3372917484318707, |
| "grad_norm": 0.72265625, |
| "learning_rate": 9.981998024977263e-06, |
| "loss": 0.5536, |
| "step": 17940 |
| }, |
| { |
| "epoch": 0.3376677704479597, |
| "grad_norm": 0.6953125, |
| "learning_rate": 9.981911881393828e-06, |
| "loss": 0.5518, |
| "step": 17960 |
| }, |
| { |
| "epoch": 0.33804379246404875, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.98182553256713e-06, |
| "loss": 0.5594, |
| "step": 17980 |
| }, |
| { |
| "epoch": 0.3384198144801378, |
| "grad_norm": 0.67578125, |
| "learning_rate": 9.98173897850073e-06, |
| "loss": 0.561, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.33879583649622685, |
| "grad_norm": 0.72265625, |
| "learning_rate": 9.981652219198191e-06, |
| "loss": 0.5478, |
| "step": 18020 |
| }, |
| { |
| "epoch": 0.3391718585123159, |
| "grad_norm": 0.6640625, |
| "learning_rate": 9.981565254663089e-06, |
| "loss": 0.5547, |
| "step": 18040 |
| }, |
| { |
| "epoch": 0.33954788052840496, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.981478084899004e-06, |
| "loss": 0.5496, |
| "step": 18060 |
| }, |
| { |
| "epoch": 0.339923902544494, |
| "grad_norm": 0.59765625, |
| "learning_rate": 9.981390709909531e-06, |
| "loss": 0.5502, |
| "step": 18080 |
| }, |
| { |
| "epoch": 0.340299924560583, |
| "grad_norm": 1.84375, |
| "learning_rate": 9.981303129698268e-06, |
| "loss": 0.5611, |
| "step": 18100 |
| }, |
| { |
| "epoch": 0.34067594657667205, |
| "grad_norm": 0.7578125, |
| "learning_rate": 9.981215344268821e-06, |
| "loss": 0.5581, |
| "step": 18120 |
| }, |
| { |
| "epoch": 0.3410519685927611, |
| "grad_norm": 0.765625, |
| "learning_rate": 9.981127353624811e-06, |
| "loss": 0.5598, |
| "step": 18140 |
| }, |
| { |
| "epoch": 0.34142799060885015, |
| "grad_norm": 0.69921875, |
| "learning_rate": 9.98103915776986e-06, |
| "loss": 0.5529, |
| "step": 18160 |
| }, |
| { |
| "epoch": 0.3418040126249392, |
| "grad_norm": 0.625, |
| "learning_rate": 9.9809507567076e-06, |
| "loss": 0.5582, |
| "step": 18180 |
| }, |
| { |
| "epoch": 0.34218003464102825, |
| "grad_norm": 0.890625, |
| "learning_rate": 9.980862150441677e-06, |
| "loss": 0.5514, |
| "step": 18200 |
| }, |
| { |
| "epoch": 0.34255605665711725, |
| "grad_norm": 0.671875, |
| "learning_rate": 9.980773338975737e-06, |
| "loss": 0.5448, |
| "step": 18220 |
| }, |
| { |
| "epoch": 0.3429320786732063, |
| "grad_norm": 0.6875, |
| "learning_rate": 9.980684322313443e-06, |
| "loss": 0.55, |
| "step": 18240 |
| }, |
| { |
| "epoch": 0.34330810068929535, |
| "grad_norm": 0.859375, |
| "learning_rate": 9.980595100458462e-06, |
| "loss": 0.563, |
| "step": 18260 |
| }, |
| { |
| "epoch": 0.3436841227053844, |
| "grad_norm": 0.8125, |
| "learning_rate": 9.980505673414465e-06, |
| "loss": 0.5649, |
| "step": 18280 |
| }, |
| { |
| "epoch": 0.34406014472147345, |
| "grad_norm": 0.6953125, |
| "learning_rate": 9.980416041185141e-06, |
| "loss": 0.556, |
| "step": 18300 |
| }, |
| { |
| "epoch": 0.3444361667375625, |
| "grad_norm": 0.640625, |
| "learning_rate": 9.98032620377418e-06, |
| "loss": 0.5583, |
| "step": 18320 |
| }, |
| { |
| "epoch": 0.34481218875365155, |
| "grad_norm": 0.609375, |
| "learning_rate": 9.980236161185284e-06, |
| "loss": 0.5583, |
| "step": 18340 |
| }, |
| { |
| "epoch": 0.34518821076974054, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.980145913422164e-06, |
| "loss": 0.5648, |
| "step": 18360 |
| }, |
| { |
| "epoch": 0.3455642327858296, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.980055460488537e-06, |
| "loss": 0.5585, |
| "step": 18380 |
| }, |
| { |
| "epoch": 0.34594025480191865, |
| "grad_norm": 0.71484375, |
| "learning_rate": 9.979964802388127e-06, |
| "loss": 0.5584, |
| "step": 18400 |
| }, |
| { |
| "epoch": 0.3463162768180077, |
| "grad_norm": 0.69921875, |
| "learning_rate": 9.979873939124672e-06, |
| "loss": 0.5623, |
| "step": 18420 |
| }, |
| { |
| "epoch": 0.34669229883409675, |
| "grad_norm": 0.59375, |
| "learning_rate": 9.979782870701912e-06, |
| "loss": 0.56, |
| "step": 18440 |
| }, |
| { |
| "epoch": 0.3470683208501858, |
| "grad_norm": 0.671875, |
| "learning_rate": 9.979691597123604e-06, |
| "loss": 0.5547, |
| "step": 18460 |
| }, |
| { |
| "epoch": 0.34744434286627485, |
| "grad_norm": 0.7421875, |
| "learning_rate": 9.979600118393503e-06, |
| "loss": 0.5532, |
| "step": 18480 |
| }, |
| { |
| "epoch": 0.34782036488236384, |
| "grad_norm": 0.5859375, |
| "learning_rate": 9.979508434515383e-06, |
| "loss": 0.5563, |
| "step": 18500 |
| }, |
| { |
| "epoch": 0.3481963868984529, |
| "grad_norm": 0.703125, |
| "learning_rate": 9.979416545493016e-06, |
| "loss": 0.5605, |
| "step": 18520 |
| }, |
| { |
| "epoch": 0.34857240891454194, |
| "grad_norm": 0.81640625, |
| "learning_rate": 9.979324451330193e-06, |
| "loss": 0.5574, |
| "step": 18540 |
| }, |
| { |
| "epoch": 0.348948430930631, |
| "grad_norm": 0.66796875, |
| "learning_rate": 9.979232152030703e-06, |
| "loss": 0.5638, |
| "step": 18560 |
| }, |
| { |
| "epoch": 0.34932445294672004, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.97913964759835e-06, |
| "loss": 0.57, |
| "step": 18580 |
| }, |
| { |
| "epoch": 0.3497004749628091, |
| "grad_norm": 0.6796875, |
| "learning_rate": 9.979046938036945e-06, |
| "loss": 0.5544, |
| "step": 18600 |
| }, |
| { |
| "epoch": 0.3500764969788981, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.97895402335031e-06, |
| "loss": 0.5502, |
| "step": 18620 |
| }, |
| { |
| "epoch": 0.35045251899498714, |
| "grad_norm": 0.64453125, |
| "learning_rate": 9.978860903542268e-06, |
| "loss": 0.5529, |
| "step": 18640 |
| }, |
| { |
| "epoch": 0.3508285410110762, |
| "grad_norm": 0.66796875, |
| "learning_rate": 9.97876757861666e-06, |
| "loss": 0.5576, |
| "step": 18660 |
| }, |
| { |
| "epoch": 0.35120456302716524, |
| "grad_norm": 0.78515625, |
| "learning_rate": 9.978674048577326e-06, |
| "loss": 0.5509, |
| "step": 18680 |
| }, |
| { |
| "epoch": 0.3515805850432543, |
| "grad_norm": 0.7265625, |
| "learning_rate": 9.978580313428125e-06, |
| "loss": 0.5566, |
| "step": 18700 |
| }, |
| { |
| "epoch": 0.35195660705934334, |
| "grad_norm": 0.69140625, |
| "learning_rate": 9.978486373172916e-06, |
| "loss": 0.5527, |
| "step": 18720 |
| }, |
| { |
| "epoch": 0.3523326290754324, |
| "grad_norm": 0.75390625, |
| "learning_rate": 9.978392227815568e-06, |
| "loss": 0.5494, |
| "step": 18740 |
| }, |
| { |
| "epoch": 0.3527086510915214, |
| "grad_norm": 0.66015625, |
| "learning_rate": 9.97829787735996e-06, |
| "loss": 0.5529, |
| "step": 18760 |
| }, |
| { |
| "epoch": 0.35308467310761044, |
| "grad_norm": 0.6640625, |
| "learning_rate": 9.978203321809979e-06, |
| "loss": 0.5507, |
| "step": 18780 |
| }, |
| { |
| "epoch": 0.3534606951236995, |
| "grad_norm": 0.70703125, |
| "learning_rate": 9.978108561169521e-06, |
| "loss": 0.5541, |
| "step": 18800 |
| }, |
| { |
| "epoch": 0.35383671713978854, |
| "grad_norm": 0.66015625, |
| "learning_rate": 9.97801359544249e-06, |
| "loss": 0.5559, |
| "step": 18820 |
| }, |
| { |
| "epoch": 0.3542127391558776, |
| "grad_norm": 0.63671875, |
| "learning_rate": 9.977918424632798e-06, |
| "loss": 0.5471, |
| "step": 18840 |
| }, |
| { |
| "epoch": 0.35458876117196664, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.977823048744367e-06, |
| "loss": 0.5535, |
| "step": 18860 |
| }, |
| { |
| "epoch": 0.3549647831880557, |
| "grad_norm": 0.83203125, |
| "learning_rate": 9.977727467781124e-06, |
| "loss": 0.5645, |
| "step": 18880 |
| }, |
| { |
| "epoch": 0.3553408052041447, |
| "grad_norm": 0.63671875, |
| "learning_rate": 9.97763168174701e-06, |
| "loss": 0.55, |
| "step": 18900 |
| }, |
| { |
| "epoch": 0.35571682722023373, |
| "grad_norm": 0.69140625, |
| "learning_rate": 9.977535690645967e-06, |
| "loss": 0.5513, |
| "step": 18920 |
| }, |
| { |
| "epoch": 0.3560928492363228, |
| "grad_norm": 0.73828125, |
| "learning_rate": 9.977439494481951e-06, |
| "loss": 0.5507, |
| "step": 18940 |
| }, |
| { |
| "epoch": 0.35646887125241183, |
| "grad_norm": 0.6875, |
| "learning_rate": 9.977343093258928e-06, |
| "loss": 0.5519, |
| "step": 18960 |
| }, |
| { |
| "epoch": 0.3568448932685009, |
| "grad_norm": 0.73828125, |
| "learning_rate": 9.977246486980867e-06, |
| "loss": 0.5514, |
| "step": 18980 |
| }, |
| { |
| "epoch": 0.35722091528458993, |
| "grad_norm": 0.65625, |
| "learning_rate": 9.977149675651747e-06, |
| "loss": 0.5593, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.35759693730067893, |
| "grad_norm": 0.9921875, |
| "learning_rate": 9.977052659275559e-06, |
| "loss": 0.5538, |
| "step": 19020 |
| }, |
| { |
| "epoch": 0.357972959316768, |
| "grad_norm": 0.8046875, |
| "learning_rate": 9.976955437856299e-06, |
| "loss": 0.5653, |
| "step": 19040 |
| }, |
| { |
| "epoch": 0.35834898133285703, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.97685801139797e-06, |
| "loss": 0.5466, |
| "step": 19060 |
| }, |
| { |
| "epoch": 0.3587250033489461, |
| "grad_norm": 0.65234375, |
| "learning_rate": 9.976760379904588e-06, |
| "loss": 0.5499, |
| "step": 19080 |
| }, |
| { |
| "epoch": 0.35910102536503513, |
| "grad_norm": 0.58984375, |
| "learning_rate": 9.976662543380175e-06, |
| "loss": 0.5516, |
| "step": 19100 |
| }, |
| { |
| "epoch": 0.3594770473811242, |
| "grad_norm": 0.68359375, |
| "learning_rate": 9.976564501828763e-06, |
| "loss": 0.5512, |
| "step": 19120 |
| }, |
| { |
| "epoch": 0.35985306939721323, |
| "grad_norm": 0.5625, |
| "learning_rate": 9.976466255254387e-06, |
| "loss": 0.5686, |
| "step": 19140 |
| }, |
| { |
| "epoch": 0.3602290914133022, |
| "grad_norm": 0.68359375, |
| "learning_rate": 9.976367803661097e-06, |
| "loss": 0.5557, |
| "step": 19160 |
| }, |
| { |
| "epoch": 0.3606051134293913, |
| "grad_norm": 0.5859375, |
| "learning_rate": 9.976269147052951e-06, |
| "loss": 0.5677, |
| "step": 19180 |
| }, |
| { |
| "epoch": 0.36098113544548033, |
| "grad_norm": 0.609375, |
| "learning_rate": 9.976170285434012e-06, |
| "loss": 0.5557, |
| "step": 19200 |
| }, |
| { |
| "epoch": 0.3613571574615694, |
| "grad_norm": 0.609375, |
| "learning_rate": 9.97607121880835e-06, |
| "loss": 0.547, |
| "step": 19220 |
| }, |
| { |
| "epoch": 0.36173317947765843, |
| "grad_norm": 0.63671875, |
| "learning_rate": 9.97597194718005e-06, |
| "loss": 0.5564, |
| "step": 19240 |
| }, |
| { |
| "epoch": 0.3621092014937475, |
| "grad_norm": 0.640625, |
| "learning_rate": 9.9758724705532e-06, |
| "loss": 0.546, |
| "step": 19260 |
| }, |
| { |
| "epoch": 0.3624852235098365, |
| "grad_norm": 0.64453125, |
| "learning_rate": 9.975772788931898e-06, |
| "loss": 0.5529, |
| "step": 19280 |
| }, |
| { |
| "epoch": 0.3628612455259255, |
| "grad_norm": 0.703125, |
| "learning_rate": 9.975672902320252e-06, |
| "loss": 0.5538, |
| "step": 19300 |
| }, |
| { |
| "epoch": 0.3632372675420146, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.975572810722376e-06, |
| "loss": 0.5516, |
| "step": 19320 |
| }, |
| { |
| "epoch": 0.3636132895581036, |
| "grad_norm": 0.609375, |
| "learning_rate": 9.975472514142392e-06, |
| "loss": 0.5601, |
| "step": 19340 |
| }, |
| { |
| "epoch": 0.3639893115741927, |
| "grad_norm": 0.71875, |
| "learning_rate": 9.975372012584437e-06, |
| "loss": 0.5593, |
| "step": 19360 |
| }, |
| { |
| "epoch": 0.3643653335902817, |
| "grad_norm": 0.5703125, |
| "learning_rate": 9.975271306052648e-06, |
| "loss": 0.551, |
| "step": 19380 |
| }, |
| { |
| "epoch": 0.3647413556063708, |
| "grad_norm": 0.58203125, |
| "learning_rate": 9.975170394551173e-06, |
| "loss": 0.5599, |
| "step": 19400 |
| }, |
| { |
| "epoch": 0.36511737762245977, |
| "grad_norm": 0.77734375, |
| "learning_rate": 9.97506927808417e-06, |
| "loss": 0.5618, |
| "step": 19420 |
| }, |
| { |
| "epoch": 0.3654933996385488, |
| "grad_norm": 0.703125, |
| "learning_rate": 9.974967956655806e-06, |
| "loss": 0.549, |
| "step": 19440 |
| }, |
| { |
| "epoch": 0.36586942165463787, |
| "grad_norm": 0.58984375, |
| "learning_rate": 9.974866430270254e-06, |
| "loss": 0.5692, |
| "step": 19460 |
| }, |
| { |
| "epoch": 0.3662454436707269, |
| "grad_norm": 0.64453125, |
| "learning_rate": 9.974764698931698e-06, |
| "loss": 0.5503, |
| "step": 19480 |
| }, |
| { |
| "epoch": 0.366621465686816, |
| "grad_norm": 0.625, |
| "learning_rate": 9.974662762644328e-06, |
| "loss": 0.5578, |
| "step": 19500 |
| }, |
| { |
| "epoch": 0.366997487702905, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.974560621412342e-06, |
| "loss": 0.5566, |
| "step": 19520 |
| }, |
| { |
| "epoch": 0.3673735097189941, |
| "grad_norm": 0.6015625, |
| "learning_rate": 9.97445827523995e-06, |
| "loss": 0.5625, |
| "step": 19540 |
| }, |
| { |
| "epoch": 0.36774953173508307, |
| "grad_norm": 0.8359375, |
| "learning_rate": 9.974355724131371e-06, |
| "loss": 0.5508, |
| "step": 19560 |
| }, |
| { |
| "epoch": 0.3681255537511721, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.974252968090826e-06, |
| "loss": 0.5551, |
| "step": 19580 |
| }, |
| { |
| "epoch": 0.36850157576726117, |
| "grad_norm": 0.75390625, |
| "learning_rate": 9.974150007122548e-06, |
| "loss": 0.5508, |
| "step": 19600 |
| }, |
| { |
| "epoch": 0.3688775977833502, |
| "grad_norm": 0.625, |
| "learning_rate": 9.97404684123078e-06, |
| "loss": 0.5576, |
| "step": 19620 |
| }, |
| { |
| "epoch": 0.36925361979943927, |
| "grad_norm": 0.71875, |
| "learning_rate": 9.973943470419773e-06, |
| "loss": 0.5678, |
| "step": 19640 |
| }, |
| { |
| "epoch": 0.3696296418155283, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.973839894693785e-06, |
| "loss": 0.5518, |
| "step": 19660 |
| }, |
| { |
| "epoch": 0.3700056638316173, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.973736114057083e-06, |
| "loss": 0.5599, |
| "step": 19680 |
| }, |
| { |
| "epoch": 0.37038168584770637, |
| "grad_norm": 0.625, |
| "learning_rate": 9.973632128513943e-06, |
| "loss": 0.5481, |
| "step": 19700 |
| }, |
| { |
| "epoch": 0.3707577078637954, |
| "grad_norm": 0.625, |
| "learning_rate": 9.973527938068648e-06, |
| "loss": 0.5442, |
| "step": 19720 |
| }, |
| { |
| "epoch": 0.37113372987988447, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.973423542725491e-06, |
| "loss": 0.556, |
| "step": 19740 |
| }, |
| { |
| "epoch": 0.3715097518959735, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.973318942488772e-06, |
| "loss": 0.5486, |
| "step": 19760 |
| }, |
| { |
| "epoch": 0.37188577391206257, |
| "grad_norm": 0.734375, |
| "learning_rate": 9.9732141373628e-06, |
| "loss": 0.555, |
| "step": 19780 |
| }, |
| { |
| "epoch": 0.3722617959281516, |
| "grad_norm": 0.6015625, |
| "learning_rate": 9.973109127351897e-06, |
| "loss": 0.5592, |
| "step": 19800 |
| }, |
| { |
| "epoch": 0.3726378179442406, |
| "grad_norm": 0.63671875, |
| "learning_rate": 9.973003912460383e-06, |
| "loss": 0.5519, |
| "step": 19820 |
| }, |
| { |
| "epoch": 0.37301383996032966, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.972898492692598e-06, |
| "loss": 0.5588, |
| "step": 19840 |
| }, |
| { |
| "epoch": 0.3733898619764187, |
| "grad_norm": 0.86328125, |
| "learning_rate": 9.972792868052882e-06, |
| "loss": 0.5564, |
| "step": 19860 |
| }, |
| { |
| "epoch": 0.37376588399250776, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.972687038545586e-06, |
| "loss": 0.5545, |
| "step": 19880 |
| }, |
| { |
| "epoch": 0.3741419060085968, |
| "grad_norm": 0.58984375, |
| "learning_rate": 9.972581004175073e-06, |
| "loss": 0.556, |
| "step": 19900 |
| }, |
| { |
| "epoch": 0.37451792802468586, |
| "grad_norm": 0.70703125, |
| "learning_rate": 9.972474764945707e-06, |
| "loss": 0.5468, |
| "step": 19920 |
| }, |
| { |
| "epoch": 0.3748939500407749, |
| "grad_norm": 0.66796875, |
| "learning_rate": 9.972368320861868e-06, |
| "loss": 0.5608, |
| "step": 19940 |
| }, |
| { |
| "epoch": 0.3752699720568639, |
| "grad_norm": 0.640625, |
| "learning_rate": 9.972261671927941e-06, |
| "loss": 0.5489, |
| "step": 19960 |
| }, |
| { |
| "epoch": 0.37564599407295296, |
| "grad_norm": 0.625, |
| "learning_rate": 9.972154818148319e-06, |
| "loss": 0.5548, |
| "step": 19980 |
| }, |
| { |
| "epoch": 0.376022016089042, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.972047759527404e-06, |
| "loss": 0.5457, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.37639803810513106, |
| "grad_norm": 0.70703125, |
| "learning_rate": 9.971940496069607e-06, |
| "loss": 0.5574, |
| "step": 20020 |
| }, |
| { |
| "epoch": 0.3767740601212201, |
| "grad_norm": 0.609375, |
| "learning_rate": 9.971833027779347e-06, |
| "loss": 0.5542, |
| "step": 20040 |
| }, |
| { |
| "epoch": 0.37715008213730916, |
| "grad_norm": 0.609375, |
| "learning_rate": 9.97172535466105e-06, |
| "loss": 0.5445, |
| "step": 20060 |
| }, |
| { |
| "epoch": 0.37752610415339816, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.971617476719155e-06, |
| "loss": 0.5484, |
| "step": 20080 |
| }, |
| { |
| "epoch": 0.3779021261694872, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.971509393958103e-06, |
| "loss": 0.544, |
| "step": 20100 |
| }, |
| { |
| "epoch": 0.37827814818557626, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.97140110638235e-06, |
| "loss": 0.5425, |
| "step": 20120 |
| }, |
| { |
| "epoch": 0.3786541702016653, |
| "grad_norm": 0.66796875, |
| "learning_rate": 9.971292613996352e-06, |
| "loss": 0.5417, |
| "step": 20140 |
| }, |
| { |
| "epoch": 0.37903019221775436, |
| "grad_norm": 0.65625, |
| "learning_rate": 9.971183916804585e-06, |
| "loss": 0.554, |
| "step": 20160 |
| }, |
| { |
| "epoch": 0.3794062142338434, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.971075014811525e-06, |
| "loss": 0.561, |
| "step": 20180 |
| }, |
| { |
| "epoch": 0.37978223624993246, |
| "grad_norm": 0.64453125, |
| "learning_rate": 9.970965908021656e-06, |
| "loss": 0.5485, |
| "step": 20200 |
| }, |
| { |
| "epoch": 0.38015825826602145, |
| "grad_norm": 0.58984375, |
| "learning_rate": 9.970856596439474e-06, |
| "loss": 0.5519, |
| "step": 20220 |
| }, |
| { |
| "epoch": 0.3805342802821105, |
| "grad_norm": 0.625, |
| "learning_rate": 9.970747080069485e-06, |
| "loss": 0.547, |
| "step": 20240 |
| }, |
| { |
| "epoch": 0.38091030229819955, |
| "grad_norm": 0.64453125, |
| "learning_rate": 9.970637358916198e-06, |
| "loss": 0.5524, |
| "step": 20260 |
| }, |
| { |
| "epoch": 0.3812863243142886, |
| "grad_norm": 0.58203125, |
| "learning_rate": 9.970527432984133e-06, |
| "loss": 0.5468, |
| "step": 20280 |
| }, |
| { |
| "epoch": 0.38166234633037766, |
| "grad_norm": 0.70703125, |
| "learning_rate": 9.97041730227782e-06, |
| "loss": 0.5562, |
| "step": 20300 |
| }, |
| { |
| "epoch": 0.3820383683464667, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.970306966801796e-06, |
| "loss": 0.5454, |
| "step": 20320 |
| }, |
| { |
| "epoch": 0.3824143903625557, |
| "grad_norm": 0.546875, |
| "learning_rate": 9.970196426560607e-06, |
| "loss": 0.5521, |
| "step": 20340 |
| }, |
| { |
| "epoch": 0.38279041237864475, |
| "grad_norm": 0.734375, |
| "learning_rate": 9.970085681558807e-06, |
| "loss": 0.5545, |
| "step": 20360 |
| }, |
| { |
| "epoch": 0.3831664343947338, |
| "grad_norm": 0.5703125, |
| "learning_rate": 9.969974731800957e-06, |
| "loss": 0.5489, |
| "step": 20380 |
| }, |
| { |
| "epoch": 0.38354245641082285, |
| "grad_norm": 0.8671875, |
| "learning_rate": 9.96986357729163e-06, |
| "loss": 0.5501, |
| "step": 20400 |
| }, |
| { |
| "epoch": 0.3839184784269119, |
| "grad_norm": 0.59375, |
| "learning_rate": 9.969752218035404e-06, |
| "loss": 0.5461, |
| "step": 20420 |
| }, |
| { |
| "epoch": 0.38429450044300095, |
| "grad_norm": 0.76953125, |
| "learning_rate": 9.969640654036864e-06, |
| "loss": 0.5637, |
| "step": 20440 |
| }, |
| { |
| "epoch": 0.38467052245909, |
| "grad_norm": 0.64453125, |
| "learning_rate": 9.969528885300612e-06, |
| "loss": 0.5475, |
| "step": 20460 |
| }, |
| { |
| "epoch": 0.385046544475179, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.96941691183125e-06, |
| "loss": 0.5571, |
| "step": 20480 |
| }, |
| { |
| "epoch": 0.38542256649126805, |
| "grad_norm": 0.65234375, |
| "learning_rate": 9.96930473363339e-06, |
| "loss": 0.5496, |
| "step": 20500 |
| }, |
| { |
| "epoch": 0.3857985885073571, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.969192350711651e-06, |
| "loss": 0.5493, |
| "step": 20520 |
| }, |
| { |
| "epoch": 0.38617461052344615, |
| "grad_norm": 0.6015625, |
| "learning_rate": 9.969079763070671e-06, |
| "loss": 0.5576, |
| "step": 20540 |
| }, |
| { |
| "epoch": 0.3865506325395352, |
| "grad_norm": 0.640625, |
| "learning_rate": 9.96896697071508e-06, |
| "loss": 0.5492, |
| "step": 20560 |
| }, |
| { |
| "epoch": 0.38692665455562425, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.96885397364953e-06, |
| "loss": 0.5553, |
| "step": 20580 |
| }, |
| { |
| "epoch": 0.3873026765717133, |
| "grad_norm": 0.84765625, |
| "learning_rate": 9.968740771878673e-06, |
| "loss": 0.5351, |
| "step": 20600 |
| }, |
| { |
| "epoch": 0.3876786985878023, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.968627365407174e-06, |
| "loss": 0.5607, |
| "step": 20620 |
| }, |
| { |
| "epoch": 0.38805472060389135, |
| "grad_norm": 0.65625, |
| "learning_rate": 9.968513754239707e-06, |
| "loss": 0.5564, |
| "step": 20640 |
| }, |
| { |
| "epoch": 0.3884307426199804, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.968399938380951e-06, |
| "loss": 0.5522, |
| "step": 20660 |
| }, |
| { |
| "epoch": 0.38880676463606945, |
| "grad_norm": 0.63671875, |
| "learning_rate": 9.968285917835592e-06, |
| "loss": 0.5573, |
| "step": 20680 |
| }, |
| { |
| "epoch": 0.3891827866521585, |
| "grad_norm": 0.64453125, |
| "learning_rate": 9.968171692608332e-06, |
| "loss": 0.55, |
| "step": 20700 |
| }, |
| { |
| "epoch": 0.38955880866824755, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.968057262703875e-06, |
| "loss": 0.5542, |
| "step": 20720 |
| }, |
| { |
| "epoch": 0.38993483068433654, |
| "grad_norm": 0.6953125, |
| "learning_rate": 9.967942628126933e-06, |
| "loss": 0.5527, |
| "step": 20740 |
| }, |
| { |
| "epoch": 0.3903108527004256, |
| "grad_norm": 0.56640625, |
| "learning_rate": 9.967827788882231e-06, |
| "loss": 0.5462, |
| "step": 20760 |
| }, |
| { |
| "epoch": 0.39068687471651464, |
| "grad_norm": 0.7421875, |
| "learning_rate": 9.967712744974502e-06, |
| "loss": 0.5582, |
| "step": 20780 |
| }, |
| { |
| "epoch": 0.3910628967326037, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.967597496408483e-06, |
| "loss": 0.5548, |
| "step": 20800 |
| }, |
| { |
| "epoch": 0.39143891874869274, |
| "grad_norm": 0.6640625, |
| "learning_rate": 9.96748204318892e-06, |
| "loss": 0.5484, |
| "step": 20820 |
| }, |
| { |
| "epoch": 0.3918149407647818, |
| "grad_norm": 0.63671875, |
| "learning_rate": 9.967366385320576e-06, |
| "loss": 0.5537, |
| "step": 20840 |
| }, |
| { |
| "epoch": 0.39219096278087084, |
| "grad_norm": 0.71875, |
| "learning_rate": 9.967250522808208e-06, |
| "loss": 0.5436, |
| "step": 20860 |
| }, |
| { |
| "epoch": 0.39256698479695984, |
| "grad_norm": 0.72265625, |
| "learning_rate": 9.967134455656595e-06, |
| "loss": 0.561, |
| "step": 20880 |
| }, |
| { |
| "epoch": 0.3929430068130489, |
| "grad_norm": 0.65234375, |
| "learning_rate": 9.967018183870514e-06, |
| "loss": 0.5531, |
| "step": 20900 |
| }, |
| { |
| "epoch": 0.39331902882913794, |
| "grad_norm": 0.59765625, |
| "learning_rate": 9.96690170745476e-06, |
| "loss": 0.5543, |
| "step": 20920 |
| }, |
| { |
| "epoch": 0.393695050845227, |
| "grad_norm": 0.5703125, |
| "learning_rate": 9.96678502641413e-06, |
| "loss": 0.5509, |
| "step": 20940 |
| }, |
| { |
| "epoch": 0.39407107286131604, |
| "grad_norm": 0.73828125, |
| "learning_rate": 9.966668140753428e-06, |
| "loss": 0.5571, |
| "step": 20960 |
| }, |
| { |
| "epoch": 0.3944470948774051, |
| "grad_norm": 0.66015625, |
| "learning_rate": 9.966551050477473e-06, |
| "loss": 0.558, |
| "step": 20980 |
| }, |
| { |
| "epoch": 0.39482311689349414, |
| "grad_norm": 0.6796875, |
| "learning_rate": 9.966433755591087e-06, |
| "loss": 0.5545, |
| "step": 21000 |
| }, |
| { |
| "epoch": 0.39519913890958314, |
| "grad_norm": 0.59375, |
| "learning_rate": 9.966316256099104e-06, |
| "loss": 0.5453, |
| "step": 21020 |
| }, |
| { |
| "epoch": 0.3955751609256722, |
| "grad_norm": 0.5859375, |
| "learning_rate": 9.966198552006361e-06, |
| "loss": 0.5585, |
| "step": 21040 |
| }, |
| { |
| "epoch": 0.39595118294176124, |
| "grad_norm": 0.54296875, |
| "learning_rate": 9.966080643317713e-06, |
| "loss": 0.536, |
| "step": 21060 |
| }, |
| { |
| "epoch": 0.3963272049578503, |
| "grad_norm": 0.671875, |
| "learning_rate": 9.96596253003801e-06, |
| "loss": 0.5536, |
| "step": 21080 |
| }, |
| { |
| "epoch": 0.39670322697393934, |
| "grad_norm": 0.69921875, |
| "learning_rate": 9.965844212172127e-06, |
| "loss": 0.5517, |
| "step": 21100 |
| }, |
| { |
| "epoch": 0.3970792489900284, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.965725689724931e-06, |
| "loss": 0.5481, |
| "step": 21120 |
| }, |
| { |
| "epoch": 0.3974552710061174, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.965606962701308e-06, |
| "loss": 0.557, |
| "step": 21140 |
| }, |
| { |
| "epoch": 0.39783129302220643, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.96548803110615e-06, |
| "loss": 0.5557, |
| "step": 21160 |
| }, |
| { |
| "epoch": 0.3982073150382955, |
| "grad_norm": 0.625, |
| "learning_rate": 9.965368894944353e-06, |
| "loss": 0.5471, |
| "step": 21180 |
| }, |
| { |
| "epoch": 0.39858333705438453, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.965249554220828e-06, |
| "loss": 0.5658, |
| "step": 21200 |
| }, |
| { |
| "epoch": 0.3989593590704736, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.965130008940493e-06, |
| "loss": 0.5486, |
| "step": 21220 |
| }, |
| { |
| "epoch": 0.39933538108656264, |
| "grad_norm": 0.55078125, |
| "learning_rate": 9.965010259108269e-06, |
| "loss": 0.5458, |
| "step": 21240 |
| }, |
| { |
| "epoch": 0.3997114031026517, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.964890304729094e-06, |
| "loss": 0.5525, |
| "step": 21260 |
| }, |
| { |
| "epoch": 0.4000874251187407, |
| "grad_norm": 0.59765625, |
| "learning_rate": 9.964770145807907e-06, |
| "loss": 0.5471, |
| "step": 21280 |
| }, |
| { |
| "epoch": 0.40046344713482973, |
| "grad_norm": 0.6640625, |
| "learning_rate": 9.964649782349658e-06, |
| "loss": 0.5457, |
| "step": 21300 |
| }, |
| { |
| "epoch": 0.4008394691509188, |
| "grad_norm": 0.58203125, |
| "learning_rate": 9.964529214359306e-06, |
| "loss": 0.5438, |
| "step": 21320 |
| }, |
| { |
| "epoch": 0.40121549116700783, |
| "grad_norm": 0.640625, |
| "learning_rate": 9.964408441841819e-06, |
| "loss": 0.5474, |
| "step": 21340 |
| }, |
| { |
| "epoch": 0.4015915131830969, |
| "grad_norm": 0.55859375, |
| "learning_rate": 9.964287464802172e-06, |
| "loss": 0.5474, |
| "step": 21360 |
| }, |
| { |
| "epoch": 0.40196753519918593, |
| "grad_norm": 0.65234375, |
| "learning_rate": 9.964166283245348e-06, |
| "loss": 0.5563, |
| "step": 21380 |
| }, |
| { |
| "epoch": 0.4023435572152749, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.964044897176342e-06, |
| "loss": 0.5552, |
| "step": 21400 |
| }, |
| { |
| "epoch": 0.402719579231364, |
| "grad_norm": 0.64453125, |
| "learning_rate": 9.963923306600154e-06, |
| "loss": 0.5473, |
| "step": 21420 |
| }, |
| { |
| "epoch": 0.40309560124745303, |
| "grad_norm": 0.59375, |
| "learning_rate": 9.963801511521791e-06, |
| "loss": 0.5507, |
| "step": 21440 |
| }, |
| { |
| "epoch": 0.4034716232635421, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.963679511946271e-06, |
| "loss": 0.5531, |
| "step": 21460 |
| }, |
| { |
| "epoch": 0.40384764527963113, |
| "grad_norm": 0.63671875, |
| "learning_rate": 9.963557307878624e-06, |
| "loss": 0.5497, |
| "step": 21480 |
| }, |
| { |
| "epoch": 0.4042236672957202, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.96343489932388e-06, |
| "loss": 0.5551, |
| "step": 21500 |
| }, |
| { |
| "epoch": 0.40459968931180923, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.963312286287086e-06, |
| "loss": 0.5465, |
| "step": 21520 |
| }, |
| { |
| "epoch": 0.4049757113278982, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.96318946877329e-06, |
| "loss": 0.5419, |
| "step": 21540 |
| }, |
| { |
| "epoch": 0.4053517333439873, |
| "grad_norm": 0.66796875, |
| "learning_rate": 9.96306644678755e-06, |
| "loss": 0.5615, |
| "step": 21560 |
| }, |
| { |
| "epoch": 0.4057277553600763, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.96294322033494e-06, |
| "loss": 0.5619, |
| "step": 21580 |
| }, |
| { |
| "epoch": 0.4061037773761654, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.962819789420535e-06, |
| "loss": 0.5473, |
| "step": 21600 |
| }, |
| { |
| "epoch": 0.4064797993922544, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.962696154049416e-06, |
| "loss": 0.5478, |
| "step": 21620 |
| }, |
| { |
| "epoch": 0.4068558214083435, |
| "grad_norm": 0.58984375, |
| "learning_rate": 9.962572314226682e-06, |
| "loss": 0.5455, |
| "step": 21640 |
| }, |
| { |
| "epoch": 0.4072318434244325, |
| "grad_norm": 0.71484375, |
| "learning_rate": 9.96244826995743e-06, |
| "loss": 0.5557, |
| "step": 21660 |
| }, |
| { |
| "epoch": 0.4076078654405215, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.962324021246775e-06, |
| "loss": 0.5541, |
| "step": 21680 |
| }, |
| { |
| "epoch": 0.40798388745661057, |
| "grad_norm": 0.625, |
| "learning_rate": 9.96219956809983e-06, |
| "loss": 0.5512, |
| "step": 21700 |
| }, |
| { |
| "epoch": 0.4083599094726996, |
| "grad_norm": 0.82421875, |
| "learning_rate": 9.962074910521729e-06, |
| "loss": 0.5616, |
| "step": 21720 |
| }, |
| { |
| "epoch": 0.4087359314887887, |
| "grad_norm": 0.67578125, |
| "learning_rate": 9.961950048517604e-06, |
| "loss": 0.5375, |
| "step": 21740 |
| }, |
| { |
| "epoch": 0.4091119535048777, |
| "grad_norm": 0.671875, |
| "learning_rate": 9.961824982092597e-06, |
| "loss": 0.5522, |
| "step": 21760 |
| }, |
| { |
| "epoch": 0.4094879755209668, |
| "grad_norm": 0.66796875, |
| "learning_rate": 9.961699711251864e-06, |
| "loss": 0.5537, |
| "step": 21780 |
| }, |
| { |
| "epoch": 0.40986399753705577, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.961574236000564e-06, |
| "loss": 0.5558, |
| "step": 21800 |
| }, |
| { |
| "epoch": 0.4102400195531448, |
| "grad_norm": 0.56640625, |
| "learning_rate": 9.961448556343866e-06, |
| "loss": 0.5611, |
| "step": 21820 |
| }, |
| { |
| "epoch": 0.41061604156923387, |
| "grad_norm": 0.578125, |
| "learning_rate": 9.961322672286951e-06, |
| "loss": 0.5521, |
| "step": 21840 |
| }, |
| { |
| "epoch": 0.4109920635853229, |
| "grad_norm": 0.75, |
| "learning_rate": 9.961196583835e-06, |
| "loss": 0.554, |
| "step": 21860 |
| }, |
| { |
| "epoch": 0.41136808560141197, |
| "grad_norm": 0.6015625, |
| "learning_rate": 9.961070290993212e-06, |
| "loss": 0.5543, |
| "step": 21880 |
| }, |
| { |
| "epoch": 0.411744107617501, |
| "grad_norm": 0.58984375, |
| "learning_rate": 9.960943793766788e-06, |
| "loss": 0.5555, |
| "step": 21900 |
| }, |
| { |
| "epoch": 0.41212012963359007, |
| "grad_norm": 0.65625, |
| "learning_rate": 9.96081709216094e-06, |
| "loss": 0.5535, |
| "step": 21920 |
| }, |
| { |
| "epoch": 0.41249615164967907, |
| "grad_norm": 0.7421875, |
| "learning_rate": 9.960690186180886e-06, |
| "loss": 0.5591, |
| "step": 21940 |
| }, |
| { |
| "epoch": 0.4128721736657681, |
| "grad_norm": 0.6796875, |
| "learning_rate": 9.960563075831856e-06, |
| "loss": 0.5559, |
| "step": 21960 |
| }, |
| { |
| "epoch": 0.41324819568185717, |
| "grad_norm": 0.65625, |
| "learning_rate": 9.960435761119088e-06, |
| "loss": 0.5616, |
| "step": 21980 |
| }, |
| { |
| "epoch": 0.4136242176979462, |
| "grad_norm": 0.6875, |
| "learning_rate": 9.960308242047822e-06, |
| "loss": 0.558, |
| "step": 22000 |
| }, |
| { |
| "epoch": 0.41400023971403527, |
| "grad_norm": 0.6875, |
| "learning_rate": 9.960180518623317e-06, |
| "loss": 0.5523, |
| "step": 22020 |
| }, |
| { |
| "epoch": 0.4143762617301243, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.960052590850833e-06, |
| "loss": 0.5468, |
| "step": 22040 |
| }, |
| { |
| "epoch": 0.41475228374621337, |
| "grad_norm": 0.5625, |
| "learning_rate": 9.95992445873564e-06, |
| "loss": 0.5365, |
| "step": 22060 |
| }, |
| { |
| "epoch": 0.41512830576230236, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.959796122283016e-06, |
| "loss": 0.5475, |
| "step": 22080 |
| }, |
| { |
| "epoch": 0.4155043277783914, |
| "grad_norm": 0.74609375, |
| "learning_rate": 9.959667581498249e-06, |
| "loss": 0.5538, |
| "step": 22100 |
| }, |
| { |
| "epoch": 0.41588034979448046, |
| "grad_norm": 0.490234375, |
| "learning_rate": 9.959538836386635e-06, |
| "loss": 0.5504, |
| "step": 22120 |
| }, |
| { |
| "epoch": 0.4162563718105695, |
| "grad_norm": 0.54296875, |
| "learning_rate": 9.959409886953477e-06, |
| "loss": 0.5493, |
| "step": 22140 |
| }, |
| { |
| "epoch": 0.41663239382665856, |
| "grad_norm": 0.59375, |
| "learning_rate": 9.95928073320409e-06, |
| "loss": 0.554, |
| "step": 22160 |
| }, |
| { |
| "epoch": 0.4170084158427476, |
| "grad_norm": 0.5859375, |
| "learning_rate": 9.95915137514379e-06, |
| "loss": 0.5457, |
| "step": 22180 |
| }, |
| { |
| "epoch": 0.4173844378588366, |
| "grad_norm": 0.640625, |
| "learning_rate": 9.95902181277791e-06, |
| "loss": 0.5512, |
| "step": 22200 |
| }, |
| { |
| "epoch": 0.41776045987492566, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.958892046111786e-06, |
| "loss": 0.5477, |
| "step": 22220 |
| }, |
| { |
| "epoch": 0.4181364818910147, |
| "grad_norm": 0.63671875, |
| "learning_rate": 9.958762075150767e-06, |
| "loss": 0.5559, |
| "step": 22240 |
| }, |
| { |
| "epoch": 0.41851250390710376, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.958631899900203e-06, |
| "loss": 0.5452, |
| "step": 22260 |
| }, |
| { |
| "epoch": 0.4188885259231928, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.958501520365463e-06, |
| "loss": 0.5511, |
| "step": 22280 |
| }, |
| { |
| "epoch": 0.41926454793928186, |
| "grad_norm": 0.578125, |
| "learning_rate": 9.958370936551911e-06, |
| "loss": 0.5511, |
| "step": 22300 |
| }, |
| { |
| "epoch": 0.4196405699553709, |
| "grad_norm": 0.6875, |
| "learning_rate": 9.95824014846493e-06, |
| "loss": 0.5567, |
| "step": 22320 |
| }, |
| { |
| "epoch": 0.4200165919714599, |
| "grad_norm": 0.66796875, |
| "learning_rate": 9.958109156109912e-06, |
| "loss": 0.5426, |
| "step": 22340 |
| }, |
| { |
| "epoch": 0.42039261398754896, |
| "grad_norm": 0.58984375, |
| "learning_rate": 9.957977959492247e-06, |
| "loss": 0.5453, |
| "step": 22360 |
| }, |
| { |
| "epoch": 0.420768636003638, |
| "grad_norm": 0.66796875, |
| "learning_rate": 9.957846558617345e-06, |
| "loss": 0.5563, |
| "step": 22380 |
| }, |
| { |
| "epoch": 0.42114465801972706, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.957714953490616e-06, |
| "loss": 0.5543, |
| "step": 22400 |
| }, |
| { |
| "epoch": 0.4215206800358161, |
| "grad_norm": 0.59375, |
| "learning_rate": 9.957583144117483e-06, |
| "loss": 0.5434, |
| "step": 22420 |
| }, |
| { |
| "epoch": 0.42189670205190516, |
| "grad_norm": 0.625, |
| "learning_rate": 9.95745113050338e-06, |
| "loss": 0.5512, |
| "step": 22440 |
| }, |
| { |
| "epoch": 0.42227272406799415, |
| "grad_norm": 0.5859375, |
| "learning_rate": 9.957318912653738e-06, |
| "loss": 0.5338, |
| "step": 22460 |
| }, |
| { |
| "epoch": 0.4226487460840832, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.95718649057401e-06, |
| "loss": 0.5483, |
| "step": 22480 |
| }, |
| { |
| "epoch": 0.42302476810017225, |
| "grad_norm": 0.578125, |
| "learning_rate": 9.95705386426965e-06, |
| "loss": 0.5552, |
| "step": 22500 |
| }, |
| { |
| "epoch": 0.4234007901162613, |
| "grad_norm": 0.609375, |
| "learning_rate": 9.956921033746123e-06, |
| "loss": 0.5355, |
| "step": 22520 |
| }, |
| { |
| "epoch": 0.42377681213235036, |
| "grad_norm": 0.5703125, |
| "learning_rate": 9.956787999008898e-06, |
| "loss": 0.5556, |
| "step": 22540 |
| }, |
| { |
| "epoch": 0.4241528341484394, |
| "grad_norm": 0.765625, |
| "learning_rate": 9.956654760063458e-06, |
| "loss": 0.543, |
| "step": 22560 |
| }, |
| { |
| "epoch": 0.42452885616452846, |
| "grad_norm": 0.73828125, |
| "learning_rate": 9.956521316915293e-06, |
| "loss": 0.5503, |
| "step": 22580 |
| }, |
| { |
| "epoch": 0.42490487818061745, |
| "grad_norm": 0.6796875, |
| "learning_rate": 9.956387669569898e-06, |
| "loss": 0.5502, |
| "step": 22600 |
| }, |
| { |
| "epoch": 0.4252809001967065, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.956253818032782e-06, |
| "loss": 0.5624, |
| "step": 22620 |
| }, |
| { |
| "epoch": 0.42565692221279555, |
| "grad_norm": 0.58203125, |
| "learning_rate": 9.956119762309456e-06, |
| "loss": 0.5492, |
| "step": 22640 |
| }, |
| { |
| "epoch": 0.4260329442288846, |
| "grad_norm": 0.75, |
| "learning_rate": 9.955985502405446e-06, |
| "loss": 0.5575, |
| "step": 22660 |
| }, |
| { |
| "epoch": 0.42640896624497365, |
| "grad_norm": 0.59375, |
| "learning_rate": 9.955851038326279e-06, |
| "loss": 0.5539, |
| "step": 22680 |
| }, |
| { |
| "epoch": 0.4267849882610627, |
| "grad_norm": 0.7109375, |
| "learning_rate": 9.9557163700775e-06, |
| "loss": 0.5512, |
| "step": 22700 |
| }, |
| { |
| "epoch": 0.42716101027715175, |
| "grad_norm": 0.75, |
| "learning_rate": 9.955581497664653e-06, |
| "loss": 0.5484, |
| "step": 22720 |
| }, |
| { |
| "epoch": 0.42753703229324075, |
| "grad_norm": 0.625, |
| "learning_rate": 9.955446421093297e-06, |
| "loss": 0.5482, |
| "step": 22740 |
| }, |
| { |
| "epoch": 0.4279130543093298, |
| "grad_norm": 0.609375, |
| "learning_rate": 9.955311140368995e-06, |
| "loss": 0.5572, |
| "step": 22760 |
| }, |
| { |
| "epoch": 0.42828907632541885, |
| "grad_norm": 0.5859375, |
| "learning_rate": 9.955175655497321e-06, |
| "loss": 0.5442, |
| "step": 22780 |
| }, |
| { |
| "epoch": 0.4286650983415079, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.955039966483856e-06, |
| "loss": 0.5471, |
| "step": 22800 |
| }, |
| { |
| "epoch": 0.42904112035759695, |
| "grad_norm": 0.59375, |
| "learning_rate": 9.954904073334191e-06, |
| "loss": 0.5565, |
| "step": 22820 |
| }, |
| { |
| "epoch": 0.429417142373686, |
| "grad_norm": 0.59765625, |
| "learning_rate": 9.954767976053925e-06, |
| "loss": 0.5479, |
| "step": 22840 |
| }, |
| { |
| "epoch": 0.429793164389775, |
| "grad_norm": 0.53125, |
| "learning_rate": 9.954631674648662e-06, |
| "loss": 0.5467, |
| "step": 22860 |
| }, |
| { |
| "epoch": 0.43016918640586405, |
| "grad_norm": 0.59375, |
| "learning_rate": 9.954495169124022e-06, |
| "loss": 0.5525, |
| "step": 22880 |
| }, |
| { |
| "epoch": 0.4305452084219531, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.954358459485625e-06, |
| "loss": 0.5517, |
| "step": 22900 |
| }, |
| { |
| "epoch": 0.43092123043804215, |
| "grad_norm": 0.63671875, |
| "learning_rate": 9.954221545739102e-06, |
| "loss": 0.5473, |
| "step": 22920 |
| }, |
| { |
| "epoch": 0.4312972524541312, |
| "grad_norm": 0.65234375, |
| "learning_rate": 9.954084427890099e-06, |
| "loss": 0.558, |
| "step": 22940 |
| }, |
| { |
| "epoch": 0.43167327447022025, |
| "grad_norm": 0.57421875, |
| "learning_rate": 9.953947105944259e-06, |
| "loss": 0.5496, |
| "step": 22960 |
| }, |
| { |
| "epoch": 0.4320492964863093, |
| "grad_norm": 0.5625, |
| "learning_rate": 9.953809579907244e-06, |
| "loss": 0.5563, |
| "step": 22980 |
| }, |
| { |
| "epoch": 0.4324253185023983, |
| "grad_norm": 0.5625, |
| "learning_rate": 9.953671849784717e-06, |
| "loss": 0.5504, |
| "step": 23000 |
| }, |
| { |
| "epoch": 0.43280134051848734, |
| "grad_norm": 0.58203125, |
| "learning_rate": 9.95353391558235e-06, |
| "loss": 0.5524, |
| "step": 23020 |
| }, |
| { |
| "epoch": 0.4331773625345764, |
| "grad_norm": 0.64453125, |
| "learning_rate": 9.953395777305832e-06, |
| "loss": 0.5537, |
| "step": 23040 |
| }, |
| { |
| "epoch": 0.43355338455066544, |
| "grad_norm": 0.6953125, |
| "learning_rate": 9.953257434960848e-06, |
| "loss": 0.5453, |
| "step": 23060 |
| }, |
| { |
| "epoch": 0.4339294065667545, |
| "grad_norm": 0.609375, |
| "learning_rate": 9.953118888553102e-06, |
| "loss": 0.5496, |
| "step": 23080 |
| }, |
| { |
| "epoch": 0.43430542858284354, |
| "grad_norm": 0.55859375, |
| "learning_rate": 9.9529801380883e-06, |
| "loss": 0.5505, |
| "step": 23100 |
| }, |
| { |
| "epoch": 0.4346814505989326, |
| "grad_norm": 0.55078125, |
| "learning_rate": 9.952841183572154e-06, |
| "loss": 0.5462, |
| "step": 23120 |
| }, |
| { |
| "epoch": 0.4350574726150216, |
| "grad_norm": 0.69140625, |
| "learning_rate": 9.952702025010397e-06, |
| "loss": 0.5504, |
| "step": 23140 |
| }, |
| { |
| "epoch": 0.43543349463111064, |
| "grad_norm": 0.71875, |
| "learning_rate": 9.952562662408755e-06, |
| "loss": 0.5546, |
| "step": 23160 |
| }, |
| { |
| "epoch": 0.4358095166471997, |
| "grad_norm": 0.6953125, |
| "learning_rate": 9.952423095772971e-06, |
| "loss": 0.5504, |
| "step": 23180 |
| }, |
| { |
| "epoch": 0.43618553866328874, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.952283325108799e-06, |
| "loss": 0.5514, |
| "step": 23200 |
| }, |
| { |
| "epoch": 0.4365615606793778, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.95214335042199e-06, |
| "loss": 0.5484, |
| "step": 23220 |
| }, |
| { |
| "epoch": 0.43693758269546684, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.952003171718316e-06, |
| "loss": 0.5519, |
| "step": 23240 |
| }, |
| { |
| "epoch": 0.43731360471155584, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.951862789003552e-06, |
| "loss": 0.5436, |
| "step": 23260 |
| }, |
| { |
| "epoch": 0.4376896267276449, |
| "grad_norm": 0.578125, |
| "learning_rate": 9.951722202283479e-06, |
| "loss": 0.5415, |
| "step": 23280 |
| }, |
| { |
| "epoch": 0.43806564874373394, |
| "grad_norm": 0.58984375, |
| "learning_rate": 9.95158141156389e-06, |
| "loss": 0.5525, |
| "step": 23300 |
| }, |
| { |
| "epoch": 0.438441670759823, |
| "grad_norm": 0.66796875, |
| "learning_rate": 9.951440416850582e-06, |
| "loss": 0.5553, |
| "step": 23320 |
| }, |
| { |
| "epoch": 0.43881769277591204, |
| "grad_norm": 0.609375, |
| "learning_rate": 9.951299218149371e-06, |
| "loss": 0.5476, |
| "step": 23340 |
| }, |
| { |
| "epoch": 0.4391937147920011, |
| "grad_norm": 0.5625, |
| "learning_rate": 9.951157815466069e-06, |
| "loss": 0.5412, |
| "step": 23360 |
| }, |
| { |
| "epoch": 0.43956973680809014, |
| "grad_norm": 0.55078125, |
| "learning_rate": 9.9510162088065e-06, |
| "loss": 0.5443, |
| "step": 23380 |
| }, |
| { |
| "epoch": 0.43994575882417913, |
| "grad_norm": 0.58984375, |
| "learning_rate": 9.950874398176503e-06, |
| "loss": 0.5535, |
| "step": 23400 |
| }, |
| { |
| "epoch": 0.4403217808402682, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.950732383581915e-06, |
| "loss": 0.5426, |
| "step": 23420 |
| }, |
| { |
| "epoch": 0.44069780285635723, |
| "grad_norm": 0.5625, |
| "learning_rate": 9.95059016502859e-06, |
| "loss": 0.5538, |
| "step": 23440 |
| }, |
| { |
| "epoch": 0.4410738248724463, |
| "grad_norm": 0.59765625, |
| "learning_rate": 9.950447742522387e-06, |
| "loss": 0.5513, |
| "step": 23460 |
| }, |
| { |
| "epoch": 0.44144984688853534, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.950305116069171e-06, |
| "loss": 0.5618, |
| "step": 23480 |
| }, |
| { |
| "epoch": 0.4418258689046244, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.95016228567482e-06, |
| "loss": 0.5504, |
| "step": 23500 |
| }, |
| { |
| "epoch": 0.4422018909207134, |
| "grad_norm": 0.5859375, |
| "learning_rate": 9.95001925134522e-06, |
| "loss": 0.5455, |
| "step": 23520 |
| }, |
| { |
| "epoch": 0.44257791293680243, |
| "grad_norm": 0.56640625, |
| "learning_rate": 9.949876013086258e-06, |
| "loss": 0.5425, |
| "step": 23540 |
| }, |
| { |
| "epoch": 0.4429539349528915, |
| "grad_norm": 0.69140625, |
| "learning_rate": 9.94973257090384e-06, |
| "loss": 0.5531, |
| "step": 23560 |
| }, |
| { |
| "epoch": 0.44332995696898053, |
| "grad_norm": 0.625, |
| "learning_rate": 9.949588924803875e-06, |
| "loss": 0.5569, |
| "step": 23580 |
| }, |
| { |
| "epoch": 0.4437059789850696, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.949445074792279e-06, |
| "loss": 0.5535, |
| "step": 23600 |
| }, |
| { |
| "epoch": 0.44408200100115863, |
| "grad_norm": 0.578125, |
| "learning_rate": 9.94930102087498e-06, |
| "loss": 0.5458, |
| "step": 23620 |
| }, |
| { |
| "epoch": 0.4444580230172477, |
| "grad_norm": 0.56640625, |
| "learning_rate": 9.949156763057912e-06, |
| "loss": 0.5536, |
| "step": 23640 |
| }, |
| { |
| "epoch": 0.4448340450333367, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.949012301347016e-06, |
| "loss": 0.554, |
| "step": 23660 |
| }, |
| { |
| "epoch": 0.44521006704942573, |
| "grad_norm": 0.67578125, |
| "learning_rate": 9.948867635748248e-06, |
| "loss": 0.5641, |
| "step": 23680 |
| }, |
| { |
| "epoch": 0.4455860890655148, |
| "grad_norm": 0.59765625, |
| "learning_rate": 9.948722766267565e-06, |
| "loss": 0.5517, |
| "step": 23700 |
| }, |
| { |
| "epoch": 0.44596211108160383, |
| "grad_norm": 0.58203125, |
| "learning_rate": 9.948577692910934e-06, |
| "loss": 0.5528, |
| "step": 23720 |
| }, |
| { |
| "epoch": 0.4463381330976929, |
| "grad_norm": 0.78125, |
| "learning_rate": 9.948432415684335e-06, |
| "loss": 0.5397, |
| "step": 23740 |
| }, |
| { |
| "epoch": 0.44671415511378193, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.948286934593751e-06, |
| "loss": 0.5533, |
| "step": 23760 |
| }, |
| { |
| "epoch": 0.447090177129871, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.948141249645176e-06, |
| "loss": 0.5564, |
| "step": 23780 |
| }, |
| { |
| "epoch": 0.44746619914596, |
| "grad_norm": 0.57421875, |
| "learning_rate": 9.94799536084461e-06, |
| "loss": 0.5378, |
| "step": 23800 |
| }, |
| { |
| "epoch": 0.447842221162049, |
| "grad_norm": 0.578125, |
| "learning_rate": 9.947849268198067e-06, |
| "loss": 0.5495, |
| "step": 23820 |
| }, |
| { |
| "epoch": 0.4482182431781381, |
| "grad_norm": 0.58984375, |
| "learning_rate": 9.947702971711564e-06, |
| "loss": 0.5491, |
| "step": 23840 |
| }, |
| { |
| "epoch": 0.4485942651942271, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.947556471391127e-06, |
| "loss": 0.5517, |
| "step": 23860 |
| }, |
| { |
| "epoch": 0.4489702872103162, |
| "grad_norm": 0.5703125, |
| "learning_rate": 9.947409767242793e-06, |
| "loss": 0.5504, |
| "step": 23880 |
| }, |
| { |
| "epoch": 0.4493463092264052, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.947262859272605e-06, |
| "loss": 0.5478, |
| "step": 23900 |
| }, |
| { |
| "epoch": 0.4497223312424942, |
| "grad_norm": 0.5234375, |
| "learning_rate": 9.947115747486616e-06, |
| "loss": 0.5491, |
| "step": 23920 |
| }, |
| { |
| "epoch": 0.4500983532585833, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.946968431890884e-06, |
| "loss": 0.5444, |
| "step": 23940 |
| }, |
| { |
| "epoch": 0.4504743752746723, |
| "grad_norm": 0.65625, |
| "learning_rate": 9.946820912491483e-06, |
| "loss": 0.5486, |
| "step": 23960 |
| }, |
| { |
| "epoch": 0.4508503972907614, |
| "grad_norm": 0.703125, |
| "learning_rate": 9.946673189294486e-06, |
| "loss": 0.5474, |
| "step": 23980 |
| }, |
| { |
| "epoch": 0.4512264193068504, |
| "grad_norm": 0.58984375, |
| "learning_rate": 9.94652526230598e-06, |
| "loss": 0.5411, |
| "step": 24000 |
| }, |
| { |
| "epoch": 0.4516024413229395, |
| "grad_norm": 0.66015625, |
| "learning_rate": 9.94637713153206e-06, |
| "loss": 0.5557, |
| "step": 24020 |
| }, |
| { |
| "epoch": 0.4519784633390285, |
| "grad_norm": 0.609375, |
| "learning_rate": 9.94622879697883e-06, |
| "loss": 0.5471, |
| "step": 24040 |
| }, |
| { |
| "epoch": 0.4523544853551175, |
| "grad_norm": 0.6640625, |
| "learning_rate": 9.9460802586524e-06, |
| "loss": 0.5487, |
| "step": 24060 |
| }, |
| { |
| "epoch": 0.45273050737120657, |
| "grad_norm": 0.66015625, |
| "learning_rate": 9.945931516558886e-06, |
| "loss": 0.5558, |
| "step": 24080 |
| }, |
| { |
| "epoch": 0.4531065293872956, |
| "grad_norm": 0.625, |
| "learning_rate": 9.945782570704421e-06, |
| "loss": 0.5548, |
| "step": 24100 |
| }, |
| { |
| "epoch": 0.45348255140338467, |
| "grad_norm": 0.640625, |
| "learning_rate": 9.945633421095137e-06, |
| "loss": 0.5387, |
| "step": 24120 |
| }, |
| { |
| "epoch": 0.4538585734194737, |
| "grad_norm": 0.58984375, |
| "learning_rate": 9.945484067737182e-06, |
| "loss": 0.5526, |
| "step": 24140 |
| }, |
| { |
| "epoch": 0.45423459543556277, |
| "grad_norm": 0.64453125, |
| "learning_rate": 9.945334510636707e-06, |
| "loss": 0.5455, |
| "step": 24160 |
| }, |
| { |
| "epoch": 0.4546106174516518, |
| "grad_norm": 0.6640625, |
| "learning_rate": 9.945184749799874e-06, |
| "loss": 0.5458, |
| "step": 24180 |
| }, |
| { |
| "epoch": 0.4549866394677408, |
| "grad_norm": 0.671875, |
| "learning_rate": 9.945034785232853e-06, |
| "loss": 0.5505, |
| "step": 24200 |
| }, |
| { |
| "epoch": 0.45536266148382987, |
| "grad_norm": 0.6640625, |
| "learning_rate": 9.944884616941822e-06, |
| "loss": 0.5433, |
| "step": 24220 |
| }, |
| { |
| "epoch": 0.4557386834999189, |
| "grad_norm": 0.54296875, |
| "learning_rate": 9.944734244932968e-06, |
| "loss": 0.5481, |
| "step": 24240 |
| }, |
| { |
| "epoch": 0.45611470551600797, |
| "grad_norm": 0.65625, |
| "learning_rate": 9.944583669212485e-06, |
| "loss": 0.5555, |
| "step": 24260 |
| }, |
| { |
| "epoch": 0.456490727532097, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.944432889786578e-06, |
| "loss": 0.5525, |
| "step": 24280 |
| }, |
| { |
| "epoch": 0.45686674954818607, |
| "grad_norm": 0.5546875, |
| "learning_rate": 9.944281906661455e-06, |
| "loss": 0.5402, |
| "step": 24300 |
| }, |
| { |
| "epoch": 0.45724277156427506, |
| "grad_norm": 0.7265625, |
| "learning_rate": 9.944130719843341e-06, |
| "loss": 0.5559, |
| "step": 24320 |
| }, |
| { |
| "epoch": 0.4576187935803641, |
| "grad_norm": 0.67578125, |
| "learning_rate": 9.94397932933846e-06, |
| "loss": 0.5475, |
| "step": 24340 |
| }, |
| { |
| "epoch": 0.45799481559645316, |
| "grad_norm": 0.5625, |
| "learning_rate": 9.943827735153055e-06, |
| "loss": 0.5473, |
| "step": 24360 |
| }, |
| { |
| "epoch": 0.4583708376125422, |
| "grad_norm": 0.640625, |
| "learning_rate": 9.943675937293365e-06, |
| "loss": 0.5471, |
| "step": 24380 |
| }, |
| { |
| "epoch": 0.45874685962863126, |
| "grad_norm": 0.609375, |
| "learning_rate": 9.943523935765647e-06, |
| "loss": 0.5487, |
| "step": 24400 |
| }, |
| { |
| "epoch": 0.4591228816447203, |
| "grad_norm": 0.6015625, |
| "learning_rate": 9.943371730576164e-06, |
| "loss": 0.5439, |
| "step": 24420 |
| }, |
| { |
| "epoch": 0.45949890366080937, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.943219321731183e-06, |
| "loss": 0.5513, |
| "step": 24440 |
| }, |
| { |
| "epoch": 0.45987492567689836, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.943066709236985e-06, |
| "loss": 0.5473, |
| "step": 24460 |
| }, |
| { |
| "epoch": 0.4602509476929874, |
| "grad_norm": 0.6640625, |
| "learning_rate": 9.942913893099859e-06, |
| "loss": 0.553, |
| "step": 24480 |
| }, |
| { |
| "epoch": 0.46062696970907646, |
| "grad_norm": 0.6015625, |
| "learning_rate": 9.942760873326096e-06, |
| "loss": 0.5543, |
| "step": 24500 |
| }, |
| { |
| "epoch": 0.4610029917251655, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.942607649922005e-06, |
| "loss": 0.5534, |
| "step": 24520 |
| }, |
| { |
| "epoch": 0.46137901374125456, |
| "grad_norm": 0.67578125, |
| "learning_rate": 9.942454222893895e-06, |
| "loss": 0.5498, |
| "step": 24540 |
| }, |
| { |
| "epoch": 0.4617550357573436, |
| "grad_norm": 0.67578125, |
| "learning_rate": 9.94230059224809e-06, |
| "loss": 0.5488, |
| "step": 24560 |
| }, |
| { |
| "epoch": 0.4621310577734326, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.942146757990916e-06, |
| "loss": 0.5436, |
| "step": 24580 |
| }, |
| { |
| "epoch": 0.46250707978952166, |
| "grad_norm": 0.6640625, |
| "learning_rate": 9.941992720128713e-06, |
| "loss": 0.5396, |
| "step": 24600 |
| }, |
| { |
| "epoch": 0.4628831018056107, |
| "grad_norm": 0.578125, |
| "learning_rate": 9.941838478667825e-06, |
| "loss": 0.5523, |
| "step": 24620 |
| }, |
| { |
| "epoch": 0.46325912382169976, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.941684033614607e-06, |
| "loss": 0.5429, |
| "step": 24640 |
| }, |
| { |
| "epoch": 0.4636351458377888, |
| "grad_norm": 0.51953125, |
| "learning_rate": 9.941529384975423e-06, |
| "loss": 0.549, |
| "step": 24660 |
| }, |
| { |
| "epoch": 0.46401116785387786, |
| "grad_norm": 0.5859375, |
| "learning_rate": 9.941374532756644e-06, |
| "loss": 0.5485, |
| "step": 24680 |
| }, |
| { |
| "epoch": 0.4643871898699669, |
| "grad_norm": 0.64453125, |
| "learning_rate": 9.941219476964648e-06, |
| "loss": 0.5436, |
| "step": 24700 |
| }, |
| { |
| "epoch": 0.4647632118860559, |
| "grad_norm": 0.55859375, |
| "learning_rate": 9.941064217605824e-06, |
| "loss": 0.5441, |
| "step": 24720 |
| }, |
| { |
| "epoch": 0.46513923390214496, |
| "grad_norm": 0.6015625, |
| "learning_rate": 9.94090875468657e-06, |
| "loss": 0.5515, |
| "step": 24740 |
| }, |
| { |
| "epoch": 0.465515255918234, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.940753088213287e-06, |
| "loss": 0.5519, |
| "step": 24760 |
| }, |
| { |
| "epoch": 0.46589127793432306, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.94059721819239e-06, |
| "loss": 0.5534, |
| "step": 24780 |
| }, |
| { |
| "epoch": 0.4662672999504121, |
| "grad_norm": 0.6796875, |
| "learning_rate": 9.940441144630299e-06, |
| "loss": 0.5367, |
| "step": 24800 |
| }, |
| { |
| "epoch": 0.46664332196650116, |
| "grad_norm": 0.609375, |
| "learning_rate": 9.940284867533447e-06, |
| "loss": 0.5532, |
| "step": 24820 |
| }, |
| { |
| "epoch": 0.4670193439825902, |
| "grad_norm": 0.59765625, |
| "learning_rate": 9.940128386908272e-06, |
| "loss": 0.5458, |
| "step": 24840 |
| }, |
| { |
| "epoch": 0.4673953659986792, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.939971702761217e-06, |
| "loss": 0.5565, |
| "step": 24860 |
| }, |
| { |
| "epoch": 0.46777138801476825, |
| "grad_norm": 0.68359375, |
| "learning_rate": 9.93981481509874e-06, |
| "loss": 0.5472, |
| "step": 24880 |
| }, |
| { |
| "epoch": 0.4681474100308573, |
| "grad_norm": 0.625, |
| "learning_rate": 9.939657723927305e-06, |
| "loss": 0.5492, |
| "step": 24900 |
| }, |
| { |
| "epoch": 0.46852343204694635, |
| "grad_norm": 0.65234375, |
| "learning_rate": 9.93950042925338e-06, |
| "loss": 0.5467, |
| "step": 24920 |
| }, |
| { |
| "epoch": 0.4688994540630354, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.93934293108345e-06, |
| "loss": 0.5484, |
| "step": 24940 |
| }, |
| { |
| "epoch": 0.46927547607912445, |
| "grad_norm": 0.703125, |
| "learning_rate": 9.939185229424e-06, |
| "loss": 0.5557, |
| "step": 24960 |
| }, |
| { |
| "epoch": 0.46965149809521345, |
| "grad_norm": 0.73046875, |
| "learning_rate": 9.939027324281529e-06, |
| "loss": 0.5579, |
| "step": 24980 |
| }, |
| { |
| "epoch": 0.4700275201113025, |
| "grad_norm": 0.6015625, |
| "learning_rate": 9.938869215662541e-06, |
| "loss": 0.5543, |
| "step": 25000 |
| }, |
| { |
| "epoch": 0.47040354212739155, |
| "grad_norm": 0.55859375, |
| "learning_rate": 9.93871090357355e-06, |
| "loss": 0.5489, |
| "step": 25020 |
| }, |
| { |
| "epoch": 0.4707795641434806, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.938552388021079e-06, |
| "loss": 0.5477, |
| "step": 25040 |
| }, |
| { |
| "epoch": 0.47115558615956965, |
| "grad_norm": 0.59765625, |
| "learning_rate": 9.938393669011657e-06, |
| "loss": 0.5491, |
| "step": 25060 |
| }, |
| { |
| "epoch": 0.4715316081756587, |
| "grad_norm": 0.65234375, |
| "learning_rate": 9.938234746551825e-06, |
| "loss": 0.5503, |
| "step": 25080 |
| }, |
| { |
| "epoch": 0.47190763019174775, |
| "grad_norm": 0.58984375, |
| "learning_rate": 9.938075620648127e-06, |
| "loss": 0.5569, |
| "step": 25100 |
| }, |
| { |
| "epoch": 0.47228365220783675, |
| "grad_norm": 0.59375, |
| "learning_rate": 9.937916291307122e-06, |
| "loss": 0.5441, |
| "step": 25120 |
| }, |
| { |
| "epoch": 0.4726596742239258, |
| "grad_norm": 0.82421875, |
| "learning_rate": 9.937756758535371e-06, |
| "loss": 0.555, |
| "step": 25140 |
| }, |
| { |
| "epoch": 0.47303569624001485, |
| "grad_norm": 0.52734375, |
| "learning_rate": 9.937597022339448e-06, |
| "loss": 0.5498, |
| "step": 25160 |
| }, |
| { |
| "epoch": 0.4734117182561039, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.937437082725934e-06, |
| "loss": 0.5497, |
| "step": 25180 |
| }, |
| { |
| "epoch": 0.47378774027219295, |
| "grad_norm": 0.640625, |
| "learning_rate": 9.937276939701418e-06, |
| "loss": 0.5402, |
| "step": 25200 |
| }, |
| { |
| "epoch": 0.474163762288282, |
| "grad_norm": 0.625, |
| "learning_rate": 9.937116593272499e-06, |
| "loss": 0.5427, |
| "step": 25220 |
| }, |
| { |
| "epoch": 0.47453978430437105, |
| "grad_norm": 0.609375, |
| "learning_rate": 9.936956043445778e-06, |
| "loss": 0.5426, |
| "step": 25240 |
| }, |
| { |
| "epoch": 0.47491580632046004, |
| "grad_norm": 0.66015625, |
| "learning_rate": 9.936795290227875e-06, |
| "loss": 0.5425, |
| "step": 25260 |
| }, |
| { |
| "epoch": 0.4752918283365491, |
| "grad_norm": 0.7109375, |
| "learning_rate": 9.936634333625407e-06, |
| "loss": 0.5603, |
| "step": 25280 |
| }, |
| { |
| "epoch": 0.47566785035263814, |
| "grad_norm": 0.58203125, |
| "learning_rate": 9.936473173645012e-06, |
| "loss": 0.5467, |
| "step": 25300 |
| }, |
| { |
| "epoch": 0.4760438723687272, |
| "grad_norm": 0.578125, |
| "learning_rate": 9.936311810293322e-06, |
| "loss": 0.5531, |
| "step": 25320 |
| }, |
| { |
| "epoch": 0.47641989438481624, |
| "grad_norm": 0.55859375, |
| "learning_rate": 9.93615024357699e-06, |
| "loss": 0.5522, |
| "step": 25340 |
| }, |
| { |
| "epoch": 0.4767959164009053, |
| "grad_norm": 0.5546875, |
| "learning_rate": 9.935988473502671e-06, |
| "loss": 0.5442, |
| "step": 25360 |
| }, |
| { |
| "epoch": 0.4771719384169943, |
| "grad_norm": 0.5859375, |
| "learning_rate": 9.935826500077029e-06, |
| "loss": 0.5484, |
| "step": 25380 |
| }, |
| { |
| "epoch": 0.47754796043308334, |
| "grad_norm": 0.71484375, |
| "learning_rate": 9.935664323306737e-06, |
| "loss": 0.5355, |
| "step": 25400 |
| }, |
| { |
| "epoch": 0.4779239824491724, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.935501943198478e-06, |
| "loss": 0.5461, |
| "step": 25420 |
| }, |
| { |
| "epoch": 0.47830000446526144, |
| "grad_norm": 0.66796875, |
| "learning_rate": 9.935339359758938e-06, |
| "loss": 0.5574, |
| "step": 25440 |
| }, |
| { |
| "epoch": 0.4786760264813505, |
| "grad_norm": 0.66796875, |
| "learning_rate": 9.935176572994816e-06, |
| "loss": 0.5446, |
| "step": 25460 |
| }, |
| { |
| "epoch": 0.47905204849743954, |
| "grad_norm": 0.578125, |
| "learning_rate": 9.935013582912822e-06, |
| "loss": 0.5464, |
| "step": 25480 |
| }, |
| { |
| "epoch": 0.4794280705135286, |
| "grad_norm": 0.58984375, |
| "learning_rate": 9.934850389519666e-06, |
| "loss": 0.556, |
| "step": 25500 |
| }, |
| { |
| "epoch": 0.4798040925296176, |
| "grad_norm": 0.66796875, |
| "learning_rate": 9.934686992822076e-06, |
| "loss": 0.5436, |
| "step": 25520 |
| }, |
| { |
| "epoch": 0.48018011454570664, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.93452339282678e-06, |
| "loss": 0.5428, |
| "step": 25540 |
| }, |
| { |
| "epoch": 0.4805561365617957, |
| "grad_norm": 0.5859375, |
| "learning_rate": 9.934359589540519e-06, |
| "loss": 0.5511, |
| "step": 25560 |
| }, |
| { |
| "epoch": 0.48093215857788474, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.934195582970042e-06, |
| "loss": 0.5386, |
| "step": 25580 |
| }, |
| { |
| "epoch": 0.4813081805939738, |
| "grad_norm": 0.67578125, |
| "learning_rate": 9.934031373122104e-06, |
| "loss": 0.5477, |
| "step": 25600 |
| }, |
| { |
| "epoch": 0.48168420261006284, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.933866960003471e-06, |
| "loss": 0.5394, |
| "step": 25620 |
| }, |
| { |
| "epoch": 0.48206022462615183, |
| "grad_norm": 0.515625, |
| "learning_rate": 9.933702343620917e-06, |
| "loss": 0.5408, |
| "step": 25640 |
| }, |
| { |
| "epoch": 0.4824362466422409, |
| "grad_norm": 0.58203125, |
| "learning_rate": 9.933537523981226e-06, |
| "loss": 0.5506, |
| "step": 25660 |
| }, |
| { |
| "epoch": 0.48281226865832994, |
| "grad_norm": 0.5546875, |
| "learning_rate": 9.933372501091182e-06, |
| "loss": 0.5436, |
| "step": 25680 |
| }, |
| { |
| "epoch": 0.483188290674419, |
| "grad_norm": 0.5390625, |
| "learning_rate": 9.933207274957588e-06, |
| "loss": 0.5479, |
| "step": 25700 |
| }, |
| { |
| "epoch": 0.48356431269050804, |
| "grad_norm": 0.81640625, |
| "learning_rate": 9.93304184558725e-06, |
| "loss": 0.5432, |
| "step": 25720 |
| }, |
| { |
| "epoch": 0.4839403347065971, |
| "grad_norm": 0.640625, |
| "learning_rate": 9.932876212986984e-06, |
| "loss": 0.5365, |
| "step": 25740 |
| }, |
| { |
| "epoch": 0.48431635672268614, |
| "grad_norm": 0.546875, |
| "learning_rate": 9.932710377163612e-06, |
| "loss": 0.558, |
| "step": 25760 |
| }, |
| { |
| "epoch": 0.48469237873877513, |
| "grad_norm": 0.76953125, |
| "learning_rate": 9.932544338123969e-06, |
| "loss": 0.5381, |
| "step": 25780 |
| }, |
| { |
| "epoch": 0.4850684007548642, |
| "grad_norm": 0.6015625, |
| "learning_rate": 9.932378095874893e-06, |
| "loss": 0.5481, |
| "step": 25800 |
| }, |
| { |
| "epoch": 0.48544442277095323, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.932211650423234e-06, |
| "loss": 0.5428, |
| "step": 25820 |
| }, |
| { |
| "epoch": 0.4858204447870423, |
| "grad_norm": 0.59765625, |
| "learning_rate": 9.932045001775846e-06, |
| "loss": 0.5462, |
| "step": 25840 |
| }, |
| { |
| "epoch": 0.48619646680313133, |
| "grad_norm": 0.5703125, |
| "learning_rate": 9.9318781499396e-06, |
| "loss": 0.5425, |
| "step": 25860 |
| }, |
| { |
| "epoch": 0.4865724888192204, |
| "grad_norm": 0.65625, |
| "learning_rate": 9.931711094921363e-06, |
| "loss": 0.5506, |
| "step": 25880 |
| }, |
| { |
| "epoch": 0.48694851083530943, |
| "grad_norm": 0.73046875, |
| "learning_rate": 9.931543836728025e-06, |
| "loss": 0.5545, |
| "step": 25900 |
| }, |
| { |
| "epoch": 0.48732453285139843, |
| "grad_norm": 0.66796875, |
| "learning_rate": 9.931376375366471e-06, |
| "loss": 0.5557, |
| "step": 25920 |
| }, |
| { |
| "epoch": 0.4877005548674875, |
| "grad_norm": 0.5234375, |
| "learning_rate": 9.931208710843603e-06, |
| "loss": 0.5373, |
| "step": 25940 |
| }, |
| { |
| "epoch": 0.48807657688357653, |
| "grad_norm": 0.59375, |
| "learning_rate": 9.931040843166326e-06, |
| "loss": 0.5354, |
| "step": 25960 |
| }, |
| { |
| "epoch": 0.4884525988996656, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.930872772341558e-06, |
| "loss": 0.553, |
| "step": 25980 |
| }, |
| { |
| "epoch": 0.48882862091575463, |
| "grad_norm": 0.609375, |
| "learning_rate": 9.930704498376223e-06, |
| "loss": 0.5476, |
| "step": 26000 |
| }, |
| { |
| "epoch": 0.4892046429318437, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.93053602127725e-06, |
| "loss": 0.5514, |
| "step": 26020 |
| }, |
| { |
| "epoch": 0.4895806649479327, |
| "grad_norm": 0.59765625, |
| "learning_rate": 9.930367341051586e-06, |
| "loss": 0.5403, |
| "step": 26040 |
| }, |
| { |
| "epoch": 0.4899566869640217, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.930198457706176e-06, |
| "loss": 0.5484, |
| "step": 26060 |
| }, |
| { |
| "epoch": 0.4903327089801108, |
| "grad_norm": 0.64453125, |
| "learning_rate": 9.930029371247975e-06, |
| "loss": 0.5646, |
| "step": 26080 |
| }, |
| { |
| "epoch": 0.4907087309961998, |
| "grad_norm": 0.67578125, |
| "learning_rate": 9.929860081683954e-06, |
| "loss": 0.5528, |
| "step": 26100 |
| }, |
| { |
| "epoch": 0.4910847530122889, |
| "grad_norm": 0.55078125, |
| "learning_rate": 9.929690589021087e-06, |
| "loss": 0.5439, |
| "step": 26120 |
| }, |
| { |
| "epoch": 0.4914607750283779, |
| "grad_norm": 0.64453125, |
| "learning_rate": 9.929520893266355e-06, |
| "loss": 0.5472, |
| "step": 26140 |
| }, |
| { |
| "epoch": 0.491836797044467, |
| "grad_norm": 0.70703125, |
| "learning_rate": 9.929350994426751e-06, |
| "loss": 0.5466, |
| "step": 26160 |
| }, |
| { |
| "epoch": 0.492212819060556, |
| "grad_norm": 0.66796875, |
| "learning_rate": 9.929180892509272e-06, |
| "loss": 0.541, |
| "step": 26180 |
| }, |
| { |
| "epoch": 0.492588841076645, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.929010587520926e-06, |
| "loss": 0.5494, |
| "step": 26200 |
| }, |
| { |
| "epoch": 0.4929648630927341, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.92884007946873e-06, |
| "loss": 0.5538, |
| "step": 26220 |
| }, |
| { |
| "epoch": 0.4933408851088231, |
| "grad_norm": 0.76953125, |
| "learning_rate": 9.928669368359706e-06, |
| "loss": 0.5601, |
| "step": 26240 |
| }, |
| { |
| "epoch": 0.4937169071249122, |
| "grad_norm": 0.5859375, |
| "learning_rate": 9.928498454200894e-06, |
| "loss": 0.5486, |
| "step": 26260 |
| }, |
| { |
| "epoch": 0.4940929291410012, |
| "grad_norm": 0.5859375, |
| "learning_rate": 9.928327336999329e-06, |
| "loss": 0.5432, |
| "step": 26280 |
| }, |
| { |
| "epoch": 0.4944689511570903, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.928156016762061e-06, |
| "loss": 0.5413, |
| "step": 26300 |
| }, |
| { |
| "epoch": 0.49484497317317927, |
| "grad_norm": 0.609375, |
| "learning_rate": 9.92798449349615e-06, |
| "loss": 0.551, |
| "step": 26320 |
| }, |
| { |
| "epoch": 0.4952209951892683, |
| "grad_norm": 0.68359375, |
| "learning_rate": 9.927812767208662e-06, |
| "loss": 0.5532, |
| "step": 26340 |
| }, |
| { |
| "epoch": 0.49559701720535737, |
| "grad_norm": 0.5859375, |
| "learning_rate": 9.92764083790667e-06, |
| "loss": 0.5397, |
| "step": 26360 |
| }, |
| { |
| "epoch": 0.4959730392214464, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.927468705597258e-06, |
| "loss": 0.548, |
| "step": 26380 |
| }, |
| { |
| "epoch": 0.49634906123753547, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.92729637028752e-06, |
| "loss": 0.5562, |
| "step": 26400 |
| }, |
| { |
| "epoch": 0.4967250832536245, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.927123831984553e-06, |
| "loss": 0.538, |
| "step": 26420 |
| }, |
| { |
| "epoch": 0.4971011052697135, |
| "grad_norm": 0.66796875, |
| "learning_rate": 9.926951090695466e-06, |
| "loss": 0.553, |
| "step": 26440 |
| }, |
| { |
| "epoch": 0.49747712728580257, |
| "grad_norm": 0.578125, |
| "learning_rate": 9.926778146427374e-06, |
| "loss": 0.552, |
| "step": 26460 |
| }, |
| { |
| "epoch": 0.4978531493018916, |
| "grad_norm": 0.59765625, |
| "learning_rate": 9.926604999187405e-06, |
| "loss": 0.5408, |
| "step": 26480 |
| }, |
| { |
| "epoch": 0.49822917131798067, |
| "grad_norm": 0.671875, |
| "learning_rate": 9.92643164898269e-06, |
| "loss": 0.549, |
| "step": 26500 |
| }, |
| { |
| "epoch": 0.4986051933340697, |
| "grad_norm": 0.58984375, |
| "learning_rate": 9.926258095820372e-06, |
| "loss": 0.54, |
| "step": 26520 |
| }, |
| { |
| "epoch": 0.49898121535015877, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.9260843397076e-06, |
| "loss": 0.545, |
| "step": 26540 |
| }, |
| { |
| "epoch": 0.4993572373662478, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.925910380651531e-06, |
| "loss": 0.5392, |
| "step": 26560 |
| }, |
| { |
| "epoch": 0.4997332593823368, |
| "grad_norm": 0.625, |
| "learning_rate": 9.925736218659333e-06, |
| "loss": 0.5557, |
| "step": 26580 |
| } |
| ], |
| "logging_steps": 20, |
| "max_steps": 319134, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 6, |
| "save_steps": 13297, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.9989602005747545e+20, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|