| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 51.01892063492063, | |
| "eval_steps": 500, | |
| "global_step": 15700, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0015873015873015873, | |
| "grad_norm": 9.738715171813965, | |
| "learning_rate": 1.5862944162436548e-07, | |
| "loss": 5.3155, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.0031746031746031746, | |
| "grad_norm": 32.81618881225586, | |
| "learning_rate": 3.1725888324873095e-07, | |
| "loss": 5.0547, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.004761904761904762, | |
| "grad_norm": 32.67608642578125, | |
| "learning_rate": 4.7588832487309643e-07, | |
| "loss": 5.1135, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.006349206349206349, | |
| "grad_norm": 10.283950805664062, | |
| "learning_rate": 6.345177664974619e-07, | |
| "loss": 4.9131, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.007936507936507936, | |
| "grad_norm": 21.210186004638672, | |
| "learning_rate": 7.931472081218275e-07, | |
| "loss": 4.9171, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.009523809523809525, | |
| "grad_norm": 48.682411193847656, | |
| "learning_rate": 9.517766497461929e-07, | |
| "loss": 4.993, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.011111111111111112, | |
| "grad_norm": 12.522156715393066, | |
| "learning_rate": 1.1104060913705584e-06, | |
| "loss": 4.966, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.012698412698412698, | |
| "grad_norm": 32.584293365478516, | |
| "learning_rate": 1.2690355329949238e-06, | |
| "loss": 4.7858, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.014285714285714285, | |
| "grad_norm": 22.508533477783203, | |
| "learning_rate": 1.4276649746192894e-06, | |
| "loss": 4.713, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.015873015873015872, | |
| "grad_norm": 10.486129760742188, | |
| "learning_rate": 1.586294416243655e-06, | |
| "loss": 4.7809, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.01746031746031746, | |
| "grad_norm": 10.104169845581055, | |
| "learning_rate": 1.7449238578680206e-06, | |
| "loss": 4.8488, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.01904761904761905, | |
| "grad_norm": 20.557931900024414, | |
| "learning_rate": 1.9035532994923857e-06, | |
| "loss": 4.5276, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.0014603174603174, | |
| "grad_norm": 14.145257949829102, | |
| "learning_rate": 2.0621827411167515e-06, | |
| "loss": 4.641, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 1.003047619047619, | |
| "grad_norm": 11.256460189819336, | |
| "learning_rate": 2.220812182741117e-06, | |
| "loss": 4.4173, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.0046349206349205, | |
| "grad_norm": 13.009560585021973, | |
| "learning_rate": 2.3794416243654827e-06, | |
| "loss": 4.2029, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 1.0062222222222221, | |
| "grad_norm": 10.433947563171387, | |
| "learning_rate": 2.5380710659898476e-06, | |
| "loss": 4.0364, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.0078095238095237, | |
| "grad_norm": 7.473550319671631, | |
| "learning_rate": 2.6967005076142134e-06, | |
| "loss": 3.8754, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 1.0093968253968253, | |
| "grad_norm": 12.957052230834961, | |
| "learning_rate": 2.855329949238579e-06, | |
| "loss": 4.0458, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.0109841269841269, | |
| "grad_norm": 9.960160255432129, | |
| "learning_rate": 3.0139593908629446e-06, | |
| "loss": 3.7578, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 1.0125714285714287, | |
| "grad_norm": 12.287276268005371, | |
| "learning_rate": 3.17258883248731e-06, | |
| "loss": 3.6334, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.0141587301587303, | |
| "grad_norm": 12.79332160949707, | |
| "learning_rate": 3.3312182741116753e-06, | |
| "loss": 3.4685, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 1.0157460317460318, | |
| "grad_norm": 8.90628719329834, | |
| "learning_rate": 3.489847715736041e-06, | |
| "loss": 3.3762, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.0173333333333334, | |
| "grad_norm": 12.635107040405273, | |
| "learning_rate": 3.6484771573604065e-06, | |
| "loss": 3.305, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 1.018920634920635, | |
| "grad_norm": 15.49641227722168, | |
| "learning_rate": 3.8071065989847715e-06, | |
| "loss": 2.9876, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.001333333333333, | |
| "grad_norm": 10.022441864013672, | |
| "learning_rate": 3.965736040609137e-06, | |
| "loss": 3.0157, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 2.0029206349206348, | |
| "grad_norm": 9.137686729431152, | |
| "learning_rate": 4.124365482233503e-06, | |
| "loss": 2.8705, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 2.0045079365079363, | |
| "grad_norm": 10.643122673034668, | |
| "learning_rate": 4.282994923857868e-06, | |
| "loss": 2.5582, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 2.006095238095238, | |
| "grad_norm": 8.754136085510254, | |
| "learning_rate": 4.441624365482234e-06, | |
| "loss": 2.419, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.0076825396825395, | |
| "grad_norm": 9.287137031555176, | |
| "learning_rate": 4.6002538071066e-06, | |
| "loss": 2.2705, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 2.009269841269841, | |
| "grad_norm": 10.788775444030762, | |
| "learning_rate": 4.758883248730965e-06, | |
| "loss": 2.2656, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 2.0108571428571427, | |
| "grad_norm": 9.800201416015625, | |
| "learning_rate": 4.91751269035533e-06, | |
| "loss": 1.9858, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 2.0124444444444443, | |
| "grad_norm": 10.444952011108398, | |
| "learning_rate": 4.995989840930358e-06, | |
| "loss": 1.8272, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 2.014031746031746, | |
| "grad_norm": 11.801770210266113, | |
| "learning_rate": 4.987635342868601e-06, | |
| "loss": 1.7213, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 2.0156190476190474, | |
| "grad_norm": 8.304800033569336, | |
| "learning_rate": 4.979280844806844e-06, | |
| "loss": 1.5774, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 2.017206349206349, | |
| "grad_norm": 10.936843872070312, | |
| "learning_rate": 4.970926346745088e-06, | |
| "loss": 1.5306, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 2.0187936507936506, | |
| "grad_norm": 12.08633804321289, | |
| "learning_rate": 4.962571848683331e-06, | |
| "loss": 1.4236, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 3.001206349206349, | |
| "grad_norm": 12.72133731842041, | |
| "learning_rate": 4.954217350621575e-06, | |
| "loss": 1.3777, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 3.002793650793651, | |
| "grad_norm": 7.71216344833374, | |
| "learning_rate": 4.945862852559818e-06, | |
| "loss": 1.3662, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 3.0043809523809526, | |
| "grad_norm": 4.924420356750488, | |
| "learning_rate": 4.937508354498062e-06, | |
| "loss": 1.1704, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 3.005968253968254, | |
| "grad_norm": 9.430765151977539, | |
| "learning_rate": 4.9291538564363055e-06, | |
| "loss": 1.1824, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 3.0075555555555558, | |
| "grad_norm": 8.154838562011719, | |
| "learning_rate": 4.9207993583745495e-06, | |
| "loss": 1.1026, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 3.0091428571428573, | |
| "grad_norm": 8.593589782714844, | |
| "learning_rate": 4.912444860312793e-06, | |
| "loss": 1.1226, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 3.010730158730159, | |
| "grad_norm": 7.987609386444092, | |
| "learning_rate": 4.904090362251037e-06, | |
| "loss": 1.0897, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 3.0123174603174605, | |
| "grad_norm": 6.220165252685547, | |
| "learning_rate": 4.89573586418928e-06, | |
| "loss": 1.0358, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 3.013904761904762, | |
| "grad_norm": 5.584622383117676, | |
| "learning_rate": 4.887381366127523e-06, | |
| "loss": 1.0586, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 3.0154920634920637, | |
| "grad_norm": 6.9964141845703125, | |
| "learning_rate": 4.879026868065767e-06, | |
| "loss": 1.0425, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 3.0170793650793652, | |
| "grad_norm": 6.9891839027404785, | |
| "learning_rate": 4.87067237000401e-06, | |
| "loss": 1.0801, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 3.018666666666667, | |
| "grad_norm": 5.334001541137695, | |
| "learning_rate": 4.862317871942254e-06, | |
| "loss": 1.071, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 4.001079365079365, | |
| "grad_norm": 6.936366081237793, | |
| "learning_rate": 4.853963373880497e-06, | |
| "loss": 1.0287, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 4.002666666666666, | |
| "grad_norm": 5.803761959075928, | |
| "learning_rate": 4.845608875818741e-06, | |
| "loss": 1.1338, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 4.004253968253968, | |
| "grad_norm": 6.901465892791748, | |
| "learning_rate": 4.837254377756984e-06, | |
| "loss": 0.9522, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 4.0058412698412695, | |
| "grad_norm": 7.466715335845947, | |
| "learning_rate": 4.828899879695228e-06, | |
| "loss": 0.9676, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 4.007428571428571, | |
| "grad_norm": 5.247936248779297, | |
| "learning_rate": 4.820545381633472e-06, | |
| "loss": 0.9219, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 4.009015873015873, | |
| "grad_norm": 9.886089324951172, | |
| "learning_rate": 4.812190883571715e-06, | |
| "loss": 0.9054, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 4.010603174603174, | |
| "grad_norm": 6.104865550994873, | |
| "learning_rate": 4.803836385509959e-06, | |
| "loss": 0.9142, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 4.012190476190476, | |
| "grad_norm": 7.953219413757324, | |
| "learning_rate": 4.7954818874482025e-06, | |
| "loss": 0.899, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 4.0137777777777774, | |
| "grad_norm": 6.037745475769043, | |
| "learning_rate": 4.7871273893864465e-06, | |
| "loss": 0.9195, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 4.015365079365079, | |
| "grad_norm": 5.599011421203613, | |
| "learning_rate": 4.77877289132469e-06, | |
| "loss": 0.9049, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 4.016952380952381, | |
| "grad_norm": 9.971386909484863, | |
| "learning_rate": 4.770418393262934e-06, | |
| "loss": 0.9406, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 4.018539682539682, | |
| "grad_norm": 5.549421310424805, | |
| "learning_rate": 4.762063895201177e-06, | |
| "loss": 0.9306, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 5.000952380952381, | |
| "grad_norm": 8.038061141967773, | |
| "learning_rate": 4.753709397139421e-06, | |
| "loss": 0.9159, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 5.002539682539682, | |
| "grad_norm": 4.798451900482178, | |
| "learning_rate": 4.745354899077664e-06, | |
| "loss": 0.9853, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 5.004126984126984, | |
| "grad_norm": 5.497593402862549, | |
| "learning_rate": 4.737000401015907e-06, | |
| "loss": 0.8753, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 5.005714285714285, | |
| "grad_norm": 8.052043914794922, | |
| "learning_rate": 4.728645902954151e-06, | |
| "loss": 0.8879, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 5.007301587301587, | |
| "grad_norm": 10.569701194763184, | |
| "learning_rate": 4.720291404892394e-06, | |
| "loss": 0.8254, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 5.0088888888888885, | |
| "grad_norm": 7.724920749664307, | |
| "learning_rate": 4.711936906830638e-06, | |
| "loss": 0.7917, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 5.01047619047619, | |
| "grad_norm": 6.411539554595947, | |
| "learning_rate": 4.703582408768881e-06, | |
| "loss": 0.8296, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 5.012063492063492, | |
| "grad_norm": 4.88054084777832, | |
| "learning_rate": 4.695227910707125e-06, | |
| "loss": 0.8315, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 5.013650793650793, | |
| "grad_norm": 8.332351684570312, | |
| "learning_rate": 4.6868734126453685e-06, | |
| "loss": 0.8323, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 5.015238095238095, | |
| "grad_norm": 5.097577095031738, | |
| "learning_rate": 4.6785189145836124e-06, | |
| "loss": 0.8122, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 5.016825396825396, | |
| "grad_norm": 9.592188835144043, | |
| "learning_rate": 4.670164416521856e-06, | |
| "loss": 0.8712, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 5.018412698412698, | |
| "grad_norm": 7.307371616363525, | |
| "learning_rate": 4.6618099184601e-06, | |
| "loss": 0.8648, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 6.000825396825396, | |
| "grad_norm": 7.067652702331543, | |
| "learning_rate": 4.653455420398343e-06, | |
| "loss": 0.7985, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 6.002412698412698, | |
| "grad_norm": 6.129504203796387, | |
| "learning_rate": 4.645100922336586e-06, | |
| "loss": 0.9276, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 6.004, | |
| "grad_norm": 5.0192742347717285, | |
| "learning_rate": 4.63674642427483e-06, | |
| "loss": 0.8118, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 6.005587301587302, | |
| "grad_norm": 9.476181030273438, | |
| "learning_rate": 4.628391926213073e-06, | |
| "loss": 0.8063, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 6.007174603174604, | |
| "grad_norm": 4.5081987380981445, | |
| "learning_rate": 4.620037428151317e-06, | |
| "loss": 0.7605, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 6.008761904761905, | |
| "grad_norm": 7.077738285064697, | |
| "learning_rate": 4.61168293008956e-06, | |
| "loss": 0.7435, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 6.010349206349207, | |
| "grad_norm": 5.30238151550293, | |
| "learning_rate": 4.603328432027804e-06, | |
| "loss": 0.7603, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 6.011936507936508, | |
| "grad_norm": 6.851855754852295, | |
| "learning_rate": 4.594973933966047e-06, | |
| "loss": 0.7606, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 6.01352380952381, | |
| "grad_norm": 7.457930564880371, | |
| "learning_rate": 4.586619435904291e-06, | |
| "loss": 0.7822, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 6.0151111111111115, | |
| "grad_norm": 7.502450466156006, | |
| "learning_rate": 4.578264937842534e-06, | |
| "loss": 0.7487, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 6.016698412698413, | |
| "grad_norm": 6.5313544273376465, | |
| "learning_rate": 4.569910439780778e-06, | |
| "loss": 0.7878, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 6.018285714285715, | |
| "grad_norm": 7.514427661895752, | |
| "learning_rate": 4.5615559417190215e-06, | |
| "loss": 0.812, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 7.000698412698413, | |
| "grad_norm": 5.095201015472412, | |
| "learning_rate": 4.5532014436572655e-06, | |
| "loss": 0.7322, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 7.002285714285715, | |
| "grad_norm": 5.384045600891113, | |
| "learning_rate": 4.5448469455955095e-06, | |
| "loss": 0.8814, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 7.003873015873016, | |
| "grad_norm": 4.990402698516846, | |
| "learning_rate": 4.536492447533753e-06, | |
| "loss": 0.7483, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 7.005460317460318, | |
| "grad_norm": 6.337180137634277, | |
| "learning_rate": 4.528137949471997e-06, | |
| "loss": 0.742, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 7.007047619047619, | |
| "grad_norm": 4.511834621429443, | |
| "learning_rate": 4.51978345141024e-06, | |
| "loss": 0.7482, | |
| "step": 2225 | |
| }, | |
| { | |
| "epoch": 7.008634920634921, | |
| "grad_norm": 10.161249160766602, | |
| "learning_rate": 4.511428953348484e-06, | |
| "loss": 0.6561, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 7.010222222222223, | |
| "grad_norm": 13.714274406433105, | |
| "learning_rate": 4.503074455286727e-06, | |
| "loss": 0.7306, | |
| "step": 2275 | |
| }, | |
| { | |
| "epoch": 7.011809523809524, | |
| "grad_norm": 7.517003536224365, | |
| "learning_rate": 4.494719957224971e-06, | |
| "loss": 0.6942, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 7.013396825396826, | |
| "grad_norm": 5.552587509155273, | |
| "learning_rate": 4.486365459163214e-06, | |
| "loss": 0.7466, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 7.014984126984127, | |
| "grad_norm": 6.234260559082031, | |
| "learning_rate": 4.478010961101457e-06, | |
| "loss": 0.6913, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 7.016571428571429, | |
| "grad_norm": 5.961171627044678, | |
| "learning_rate": 4.469656463039701e-06, | |
| "loss": 0.7444, | |
| "step": 2375 | |
| }, | |
| { | |
| "epoch": 7.0181587301587305, | |
| "grad_norm": 9.553787231445312, | |
| "learning_rate": 4.461301964977944e-06, | |
| "loss": 0.7764, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 8.000571428571428, | |
| "grad_norm": 6.736727237701416, | |
| "learning_rate": 4.452947466916188e-06, | |
| "loss": 0.6902, | |
| "step": 2425 | |
| }, | |
| { | |
| "epoch": 8.00215873015873, | |
| "grad_norm": 6.498841285705566, | |
| "learning_rate": 4.4445929688544314e-06, | |
| "loss": 0.8205, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 8.003746031746031, | |
| "grad_norm": 3.605954647064209, | |
| "learning_rate": 4.4362384707926754e-06, | |
| "loss": 0.7139, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 8.005333333333333, | |
| "grad_norm": 4.930444240570068, | |
| "learning_rate": 4.427883972730919e-06, | |
| "loss": 0.7082, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 8.006920634920634, | |
| "grad_norm": 4.654155731201172, | |
| "learning_rate": 4.4195294746691626e-06, | |
| "loss": 0.734, | |
| "step": 2525 | |
| }, | |
| { | |
| "epoch": 8.008507936507936, | |
| "grad_norm": 4.657145977020264, | |
| "learning_rate": 4.411174976607406e-06, | |
| "loss": 0.6076, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 8.010095238095237, | |
| "grad_norm": 5.5561418533325195, | |
| "learning_rate": 4.402820478545649e-06, | |
| "loss": 0.6882, | |
| "step": 2575 | |
| }, | |
| { | |
| "epoch": 8.011682539682539, | |
| "grad_norm": 4.575371742248535, | |
| "learning_rate": 4.394465980483893e-06, | |
| "loss": 0.6572, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 8.01326984126984, | |
| "grad_norm": 13.730400085449219, | |
| "learning_rate": 4.386111482422136e-06, | |
| "loss": 0.7209, | |
| "step": 2625 | |
| }, | |
| { | |
| "epoch": 8.014857142857142, | |
| "grad_norm": 4.949292182922363, | |
| "learning_rate": 4.37775698436038e-06, | |
| "loss": 0.6433, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 8.016444444444444, | |
| "grad_norm": 9.161581039428711, | |
| "learning_rate": 4.369402486298623e-06, | |
| "loss": 0.7117, | |
| "step": 2675 | |
| }, | |
| { | |
| "epoch": 8.018031746031745, | |
| "grad_norm": 6.098261833190918, | |
| "learning_rate": 4.361047988236867e-06, | |
| "loss": 0.7515, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 9.000444444444444, | |
| "grad_norm": 5.116198539733887, | |
| "learning_rate": 4.35269349017511e-06, | |
| "loss": 0.6484, | |
| "step": 2725 | |
| }, | |
| { | |
| "epoch": 9.002031746031745, | |
| "grad_norm": 10.22689437866211, | |
| "learning_rate": 4.344338992113354e-06, | |
| "loss": 0.7807, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 9.003619047619047, | |
| "grad_norm": 12.227742195129395, | |
| "learning_rate": 4.335984494051597e-06, | |
| "loss": 0.6763, | |
| "step": 2775 | |
| }, | |
| { | |
| "epoch": 9.005206349206349, | |
| "grad_norm": 9.396242141723633, | |
| "learning_rate": 4.327629995989841e-06, | |
| "loss": 0.6722, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 9.00679365079365, | |
| "grad_norm": 5.258615016937256, | |
| "learning_rate": 4.3192754979280845e-06, | |
| "loss": 0.7176, | |
| "step": 2825 | |
| }, | |
| { | |
| "epoch": 9.008380952380952, | |
| "grad_norm": 5.422908306121826, | |
| "learning_rate": 4.3109209998663285e-06, | |
| "loss": 0.5881, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 9.009968253968253, | |
| "grad_norm": 7.922283172607422, | |
| "learning_rate": 4.302566501804572e-06, | |
| "loss": 0.6492, | |
| "step": 2875 | |
| }, | |
| { | |
| "epoch": 9.011555555555555, | |
| "grad_norm": 8.866413116455078, | |
| "learning_rate": 4.294212003742816e-06, | |
| "loss": 0.6277, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 9.013142857142856, | |
| "grad_norm": 9.206356048583984, | |
| "learning_rate": 4.285857505681059e-06, | |
| "loss": 0.6644, | |
| "step": 2925 | |
| }, | |
| { | |
| "epoch": 9.014730158730158, | |
| "grad_norm": 5.49714994430542, | |
| "learning_rate": 4.277503007619303e-06, | |
| "loss": 0.6481, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 9.01631746031746, | |
| "grad_norm": 6.799171447753906, | |
| "learning_rate": 4.269148509557547e-06, | |
| "loss": 0.67, | |
| "step": 2975 | |
| }, | |
| { | |
| "epoch": 9.017904761904761, | |
| "grad_norm": 7.201016902923584, | |
| "learning_rate": 4.26079401149579e-06, | |
| "loss": 0.7103, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 10.00031746031746, | |
| "grad_norm": 6.763806343078613, | |
| "learning_rate": 4.252439513434034e-06, | |
| "loss": 0.6487, | |
| "step": 3025 | |
| }, | |
| { | |
| "epoch": 10.001904761904761, | |
| "grad_norm": 9.263688087463379, | |
| "learning_rate": 4.244085015372277e-06, | |
| "loss": 0.7082, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 10.003492063492063, | |
| "grad_norm": 11.184915542602539, | |
| "learning_rate": 4.23573051731052e-06, | |
| "loss": 0.6759, | |
| "step": 3075 | |
| }, | |
| { | |
| "epoch": 10.005079365079364, | |
| "grad_norm": 4.252372741699219, | |
| "learning_rate": 4.227376019248764e-06, | |
| "loss": 0.66, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 10.006666666666666, | |
| "grad_norm": 11.342703819274902, | |
| "learning_rate": 4.219021521187007e-06, | |
| "loss": 0.6747, | |
| "step": 3125 | |
| }, | |
| { | |
| "epoch": 10.008253968253968, | |
| "grad_norm": 5.704590797424316, | |
| "learning_rate": 4.210667023125251e-06, | |
| "loss": 0.5862, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 10.009841269841269, | |
| "grad_norm": 5.683150291442871, | |
| "learning_rate": 4.2023125250634944e-06, | |
| "loss": 0.6129, | |
| "step": 3175 | |
| }, | |
| { | |
| "epoch": 10.01142857142857, | |
| "grad_norm": 4.855335235595703, | |
| "learning_rate": 4.193958027001738e-06, | |
| "loss": 0.604, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 10.013015873015872, | |
| "grad_norm": 7.8647613525390625, | |
| "learning_rate": 4.1856035289399816e-06, | |
| "loss": 0.6367, | |
| "step": 3225 | |
| }, | |
| { | |
| "epoch": 10.014603174603174, | |
| "grad_norm": 6.591641902923584, | |
| "learning_rate": 4.1772490308782256e-06, | |
| "loss": 0.6371, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 10.016190476190475, | |
| "grad_norm": 7.047679424285889, | |
| "learning_rate": 4.168894532816469e-06, | |
| "loss": 0.6433, | |
| "step": 3275 | |
| }, | |
| { | |
| "epoch": 10.017777777777777, | |
| "grad_norm": 7.674762725830078, | |
| "learning_rate": 4.160540034754712e-06, | |
| "loss": 0.6696, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 11.000190476190475, | |
| "grad_norm": 7.130011081695557, | |
| "learning_rate": 4.152185536692956e-06, | |
| "loss": 0.6527, | |
| "step": 3325 | |
| }, | |
| { | |
| "epoch": 11.001777777777777, | |
| "grad_norm": 7.317767143249512, | |
| "learning_rate": 4.143831038631199e-06, | |
| "loss": 0.6586, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 11.003365079365079, | |
| "grad_norm": 2.615405321121216, | |
| "learning_rate": 4.135476540569443e-06, | |
| "loss": 0.6771, | |
| "step": 3375 | |
| }, | |
| { | |
| "epoch": 11.00495238095238, | |
| "grad_norm": 4.891953468322754, | |
| "learning_rate": 4.127122042507686e-06, | |
| "loss": 0.64, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 11.006539682539682, | |
| "grad_norm": 6.664401531219482, | |
| "learning_rate": 4.11876754444593e-06, | |
| "loss": 0.6411, | |
| "step": 3425 | |
| }, | |
| { | |
| "epoch": 11.008126984126983, | |
| "grad_norm": 6.38748836517334, | |
| "learning_rate": 4.110413046384173e-06, | |
| "loss": 0.5616, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 11.009714285714285, | |
| "grad_norm": 6.957112789154053, | |
| "learning_rate": 4.102058548322417e-06, | |
| "loss": 0.5956, | |
| "step": 3475 | |
| }, | |
| { | |
| "epoch": 11.011301587301586, | |
| "grad_norm": 4.554030895233154, | |
| "learning_rate": 4.09370405026066e-06, | |
| "loss": 0.5998, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 11.012888888888888, | |
| "grad_norm": 5.208797454833984, | |
| "learning_rate": 4.085349552198904e-06, | |
| "loss": 0.616, | |
| "step": 3525 | |
| }, | |
| { | |
| "epoch": 11.01447619047619, | |
| "grad_norm": 6.402866840362549, | |
| "learning_rate": 4.0769950541371475e-06, | |
| "loss": 0.5958, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 11.016063492063491, | |
| "grad_norm": 5.4900946617126465, | |
| "learning_rate": 4.0686405560753915e-06, | |
| "loss": 0.6173, | |
| "step": 3575 | |
| }, | |
| { | |
| "epoch": 11.017650793650793, | |
| "grad_norm": 9.454499244689941, | |
| "learning_rate": 4.060286058013635e-06, | |
| "loss": 0.6567, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 12.000063492063491, | |
| "grad_norm": 9.544127464294434, | |
| "learning_rate": 4.051931559951879e-06, | |
| "loss": 0.6164, | |
| "step": 3625 | |
| }, | |
| { | |
| "epoch": 12.001650793650793, | |
| "grad_norm": 5.444927215576172, | |
| "learning_rate": 4.043577061890122e-06, | |
| "loss": 0.6563, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 12.003238095238094, | |
| "grad_norm": 3.2568845748901367, | |
| "learning_rate": 4.035222563828366e-06, | |
| "loss": 0.6631, | |
| "step": 3675 | |
| }, | |
| { | |
| "epoch": 12.004825396825396, | |
| "grad_norm": 11.233345031738281, | |
| "learning_rate": 4.026868065766609e-06, | |
| "loss": 0.6186, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 12.006412698412698, | |
| "grad_norm": 5.101284027099609, | |
| "learning_rate": 4.018513567704853e-06, | |
| "loss": 0.6307, | |
| "step": 3725 | |
| }, | |
| { | |
| "epoch": 12.008, | |
| "grad_norm": 7.161935329437256, | |
| "learning_rate": 4.010159069643096e-06, | |
| "loss": 0.5363, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 12.0095873015873, | |
| "grad_norm": 8.10970401763916, | |
| "learning_rate": 4.00180457158134e-06, | |
| "loss": 0.5729, | |
| "step": 3775 | |
| }, | |
| { | |
| "epoch": 12.011174603174604, | |
| "grad_norm": 6.0759077072143555, | |
| "learning_rate": 3.993450073519583e-06, | |
| "loss": 0.5785, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 12.012761904761906, | |
| "grad_norm": 11.944267272949219, | |
| "learning_rate": 3.985095575457827e-06, | |
| "loss": 0.5812, | |
| "step": 3825 | |
| }, | |
| { | |
| "epoch": 12.014349206349207, | |
| "grad_norm": 6.059834957122803, | |
| "learning_rate": 3.97674107739607e-06, | |
| "loss": 0.5958, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 12.015936507936509, | |
| "grad_norm": 5.849289417266846, | |
| "learning_rate": 3.968386579334314e-06, | |
| "loss": 0.6121, | |
| "step": 3875 | |
| }, | |
| { | |
| "epoch": 12.01752380952381, | |
| "grad_norm": 5.543300628662109, | |
| "learning_rate": 3.960032081272557e-06, | |
| "loss": 0.6259, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 12.019111111111112, | |
| "grad_norm": 5.864200592041016, | |
| "learning_rate": 3.951677583210801e-06, | |
| "loss": 0.597, | |
| "step": 3925 | |
| }, | |
| { | |
| "epoch": 13.00152380952381, | |
| "grad_norm": 9.676383972167969, | |
| "learning_rate": 3.9433230851490445e-06, | |
| "loss": 0.6586, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 13.003111111111112, | |
| "grad_norm": 11.184745788574219, | |
| "learning_rate": 3.9349685870872885e-06, | |
| "loss": 0.644, | |
| "step": 3975 | |
| }, | |
| { | |
| "epoch": 13.004698412698414, | |
| "grad_norm": 6.4422502517700195, | |
| "learning_rate": 3.926614089025532e-06, | |
| "loss": 0.5813, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 13.006285714285715, | |
| "grad_norm": 5.095337390899658, | |
| "learning_rate": 3.918259590963775e-06, | |
| "loss": 0.6238, | |
| "step": 4025 | |
| }, | |
| { | |
| "epoch": 13.007873015873017, | |
| "grad_norm": 7.0134663581848145, | |
| "learning_rate": 3.909905092902019e-06, | |
| "loss": 0.5323, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 13.009460317460318, | |
| "grad_norm": 11.49138069152832, | |
| "learning_rate": 3.901550594840262e-06, | |
| "loss": 0.5527, | |
| "step": 4075 | |
| }, | |
| { | |
| "epoch": 13.01104761904762, | |
| "grad_norm": 5.017846584320068, | |
| "learning_rate": 3.893196096778506e-06, | |
| "loss": 0.5561, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 13.012634920634921, | |
| "grad_norm": 6.452044486999512, | |
| "learning_rate": 3.884841598716749e-06, | |
| "loss": 0.5796, | |
| "step": 4125 | |
| }, | |
| { | |
| "epoch": 13.014222222222223, | |
| "grad_norm": 4.59455680847168, | |
| "learning_rate": 3.876487100654993e-06, | |
| "loss": 0.5686, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 13.015809523809525, | |
| "grad_norm": 6.177151203155518, | |
| "learning_rate": 3.868132602593236e-06, | |
| "loss": 0.5974, | |
| "step": 4175 | |
| }, | |
| { | |
| "epoch": 13.017396825396826, | |
| "grad_norm": 8.819549560546875, | |
| "learning_rate": 3.85977810453148e-06, | |
| "loss": 0.6186, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 13.018984126984128, | |
| "grad_norm": 4.838363170623779, | |
| "learning_rate": 3.851423606469723e-06, | |
| "loss": 0.5783, | |
| "step": 4225 | |
| }, | |
| { | |
| "epoch": 14.001396825396826, | |
| "grad_norm": 6.992326736450195, | |
| "learning_rate": 3.843069108407967e-06, | |
| "loss": 0.6319, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 14.002984126984128, | |
| "grad_norm": 6.273831367492676, | |
| "learning_rate": 3.8347146103462105e-06, | |
| "loss": 0.6446, | |
| "step": 4275 | |
| }, | |
| { | |
| "epoch": 14.00457142857143, | |
| "grad_norm": 5.901345252990723, | |
| "learning_rate": 3.826360112284454e-06, | |
| "loss": 0.5465, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 14.006158730158731, | |
| "grad_norm": 9.980649948120117, | |
| "learning_rate": 3.818005614222698e-06, | |
| "loss": 0.6371, | |
| "step": 4325 | |
| }, | |
| { | |
| "epoch": 14.007746031746033, | |
| "grad_norm": 3.479801654815674, | |
| "learning_rate": 3.809651116160941e-06, | |
| "loss": 0.5216, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 14.009333333333334, | |
| "grad_norm": 7.416255950927734, | |
| "learning_rate": 3.801296618099185e-06, | |
| "loss": 0.5298, | |
| "step": 4375 | |
| }, | |
| { | |
| "epoch": 14.010920634920636, | |
| "grad_norm": 6.53993034362793, | |
| "learning_rate": 3.7929421200374283e-06, | |
| "loss": 0.5488, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 14.012507936507937, | |
| "grad_norm": 5.593329906463623, | |
| "learning_rate": 3.7845876219756723e-06, | |
| "loss": 0.5506, | |
| "step": 4425 | |
| }, | |
| { | |
| "epoch": 14.014095238095239, | |
| "grad_norm": 5.567030906677246, | |
| "learning_rate": 3.7762331239139154e-06, | |
| "loss": 0.5616, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 14.01568253968254, | |
| "grad_norm": 11.033809661865234, | |
| "learning_rate": 3.7678786258521594e-06, | |
| "loss": 0.5849, | |
| "step": 4475 | |
| }, | |
| { | |
| "epoch": 14.017269841269842, | |
| "grad_norm": 5.325310707092285, | |
| "learning_rate": 3.7595241277904026e-06, | |
| "loss": 0.5911, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 14.018857142857144, | |
| "grad_norm": 10.827190399169922, | |
| "learning_rate": 3.751169629728646e-06, | |
| "loss": 0.5941, | |
| "step": 4525 | |
| }, | |
| { | |
| "epoch": 15.001269841269842, | |
| "grad_norm": 6.941823482513428, | |
| "learning_rate": 3.7428151316668897e-06, | |
| "loss": 0.6097, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 15.002857142857144, | |
| "grad_norm": 11.185466766357422, | |
| "learning_rate": 3.7344606336051333e-06, | |
| "loss": 0.6295, | |
| "step": 4575 | |
| }, | |
| { | |
| "epoch": 15.004444444444445, | |
| "grad_norm": 7.75522518157959, | |
| "learning_rate": 3.726106135543377e-06, | |
| "loss": 0.5308, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 15.006031746031747, | |
| "grad_norm": 5.286986827850342, | |
| "learning_rate": 3.7177516374816204e-06, | |
| "loss": 0.6022, | |
| "step": 4625 | |
| }, | |
| { | |
| "epoch": 15.007619047619048, | |
| "grad_norm": 6.149432182312012, | |
| "learning_rate": 3.709397139419864e-06, | |
| "loss": 0.5457, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 15.00920634920635, | |
| "grad_norm": 7.458939552307129, | |
| "learning_rate": 3.7010426413581075e-06, | |
| "loss": 0.5073, | |
| "step": 4675 | |
| }, | |
| { | |
| "epoch": 15.010793650793651, | |
| "grad_norm": 5.971858024597168, | |
| "learning_rate": 3.692688143296351e-06, | |
| "loss": 0.5418, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 15.012380952380953, | |
| "grad_norm": 5.619646072387695, | |
| "learning_rate": 3.6843336452345947e-06, | |
| "loss": 0.5423, | |
| "step": 4725 | |
| }, | |
| { | |
| "epoch": 15.013968253968255, | |
| "grad_norm": 6.382497787475586, | |
| "learning_rate": 3.675979147172838e-06, | |
| "loss": 0.5446, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 15.015555555555556, | |
| "grad_norm": 6.406772136688232, | |
| "learning_rate": 3.667624649111082e-06, | |
| "loss": 0.5734, | |
| "step": 4775 | |
| }, | |
| { | |
| "epoch": 15.017142857142858, | |
| "grad_norm": 7.28453254699707, | |
| "learning_rate": 3.659270151049325e-06, | |
| "loss": 0.5786, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 15.01873015873016, | |
| "grad_norm": 9.923730850219727, | |
| "learning_rate": 3.650915652987569e-06, | |
| "loss": 0.5791, | |
| "step": 4825 | |
| }, | |
| { | |
| "epoch": 16.001142857142856, | |
| "grad_norm": 8.251771926879883, | |
| "learning_rate": 3.642561154925812e-06, | |
| "loss": 0.5688, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 16.00273015873016, | |
| "grad_norm": 7.045546054840088, | |
| "learning_rate": 3.634206656864056e-06, | |
| "loss": 0.6337, | |
| "step": 4875 | |
| }, | |
| { | |
| "epoch": 16.00431746031746, | |
| "grad_norm": 8.073708534240723, | |
| "learning_rate": 3.625852158802299e-06, | |
| "loss": 0.5459, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 16.005904761904763, | |
| "grad_norm": 4.836098670959473, | |
| "learning_rate": 3.617497660740543e-06, | |
| "loss": 0.5852, | |
| "step": 4925 | |
| }, | |
| { | |
| "epoch": 16.007492063492062, | |
| "grad_norm": 6.777514457702637, | |
| "learning_rate": 3.6091431626787863e-06, | |
| "loss": 0.5408, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 16.009079365079366, | |
| "grad_norm": 4.61210298538208, | |
| "learning_rate": 3.6007886646170303e-06, | |
| "loss": 0.4816, | |
| "step": 4975 | |
| }, | |
| { | |
| "epoch": 16.010666666666665, | |
| "grad_norm": 6.172779083251953, | |
| "learning_rate": 3.5924341665552735e-06, | |
| "loss": 0.528, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 16.01225396825397, | |
| "grad_norm": 5.35014009475708, | |
| "learning_rate": 3.584079668493517e-06, | |
| "loss": 0.5443, | |
| "step": 5025 | |
| }, | |
| { | |
| "epoch": 16.01384126984127, | |
| "grad_norm": 7.521047115325928, | |
| "learning_rate": 3.575725170431761e-06, | |
| "loss": 0.5411, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 16.015428571428572, | |
| "grad_norm": 3.7705650329589844, | |
| "learning_rate": 3.567370672370004e-06, | |
| "loss": 0.555, | |
| "step": 5075 | |
| }, | |
| { | |
| "epoch": 16.017015873015872, | |
| "grad_norm": 4.953466892242432, | |
| "learning_rate": 3.559016174308248e-06, | |
| "loss": 0.5691, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 16.018603174603175, | |
| "grad_norm": 9.339215278625488, | |
| "learning_rate": 3.5506616762464913e-06, | |
| "loss": 0.5754, | |
| "step": 5125 | |
| }, | |
| { | |
| "epoch": 17.001015873015874, | |
| "grad_norm": 6.013854503631592, | |
| "learning_rate": 3.5423071781847353e-06, | |
| "loss": 0.5407, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 17.002603174603173, | |
| "grad_norm": 6.144811630249023, | |
| "learning_rate": 3.5339526801229784e-06, | |
| "loss": 0.6193, | |
| "step": 5175 | |
| }, | |
| { | |
| "epoch": 17.004190476190477, | |
| "grad_norm": 4.639403820037842, | |
| "learning_rate": 3.5255981820612224e-06, | |
| "loss": 0.5446, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 17.005777777777777, | |
| "grad_norm": 3.8471908569335938, | |
| "learning_rate": 3.5172436839994656e-06, | |
| "loss": 0.5891, | |
| "step": 5225 | |
| }, | |
| { | |
| "epoch": 17.00736507936508, | |
| "grad_norm": 4.270881175994873, | |
| "learning_rate": 3.5088891859377087e-06, | |
| "loss": 0.5323, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 17.00895238095238, | |
| "grad_norm": 6.322847366333008, | |
| "learning_rate": 3.5005346878759527e-06, | |
| "loss": 0.4589, | |
| "step": 5275 | |
| }, | |
| { | |
| "epoch": 17.010539682539683, | |
| "grad_norm": 18.737030029296875, | |
| "learning_rate": 3.492180189814196e-06, | |
| "loss": 0.5348, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 17.012126984126983, | |
| "grad_norm": 3.4430785179138184, | |
| "learning_rate": 3.48382569175244e-06, | |
| "loss": 0.5397, | |
| "step": 5325 | |
| }, | |
| { | |
| "epoch": 17.013714285714286, | |
| "grad_norm": 6.301079273223877, | |
| "learning_rate": 3.4754711936906834e-06, | |
| "loss": 0.5254, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 17.015301587301586, | |
| "grad_norm": 10.843756675720215, | |
| "learning_rate": 3.467116695628927e-06, | |
| "loss": 0.5535, | |
| "step": 5375 | |
| }, | |
| { | |
| "epoch": 17.01688888888889, | |
| "grad_norm": 9.099514961242676, | |
| "learning_rate": 3.4587621975671705e-06, | |
| "loss": 0.5458, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 17.01847619047619, | |
| "grad_norm": 5.591258525848389, | |
| "learning_rate": 3.450407699505414e-06, | |
| "loss": 0.5715, | |
| "step": 5425 | |
| }, | |
| { | |
| "epoch": 18.000888888888888, | |
| "grad_norm": 7.137011528015137, | |
| "learning_rate": 3.4420532014436577e-06, | |
| "loss": 0.5219, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 18.00247619047619, | |
| "grad_norm": 9.867881774902344, | |
| "learning_rate": 3.4336987033819012e-06, | |
| "loss": 0.6136, | |
| "step": 5475 | |
| }, | |
| { | |
| "epoch": 18.00406349206349, | |
| "grad_norm": 11.723578453063965, | |
| "learning_rate": 3.4253442053201448e-06, | |
| "loss": 0.5442, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 18.005650793650794, | |
| "grad_norm": 5.498622417449951, | |
| "learning_rate": 3.416989707258388e-06, | |
| "loss": 0.5691, | |
| "step": 5525 | |
| }, | |
| { | |
| "epoch": 18.007238095238094, | |
| "grad_norm": 5.055809020996094, | |
| "learning_rate": 3.408635209196632e-06, | |
| "loss": 0.5105, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 18.008825396825397, | |
| "grad_norm": 3.1195857524871826, | |
| "learning_rate": 3.400280711134875e-06, | |
| "loss": 0.4747, | |
| "step": 5575 | |
| }, | |
| { | |
| "epoch": 18.010412698412697, | |
| "grad_norm": 11.140824317932129, | |
| "learning_rate": 3.391926213073119e-06, | |
| "loss": 0.5209, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 18.012, | |
| "grad_norm": 6.157780647277832, | |
| "learning_rate": 3.383571715011362e-06, | |
| "loss": 0.534, | |
| "step": 5625 | |
| }, | |
| { | |
| "epoch": 18.0135873015873, | |
| "grad_norm": 4.913928031921387, | |
| "learning_rate": 3.375217216949606e-06, | |
| "loss": 0.5126, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 18.015174603174604, | |
| "grad_norm": 7.3261895179748535, | |
| "learning_rate": 3.3668627188878493e-06, | |
| "loss": 0.5411, | |
| "step": 5675 | |
| }, | |
| { | |
| "epoch": 18.016761904761903, | |
| "grad_norm": 4.676502227783203, | |
| "learning_rate": 3.3585082208260933e-06, | |
| "loss": 0.5322, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 18.018349206349207, | |
| "grad_norm": 8.648612976074219, | |
| "learning_rate": 3.3501537227643365e-06, | |
| "loss": 0.5704, | |
| "step": 5725 | |
| }, | |
| { | |
| "epoch": 19.000761904761905, | |
| "grad_norm": 10.17095947265625, | |
| "learning_rate": 3.34179922470258e-06, | |
| "loss": 0.5118, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 19.002349206349205, | |
| "grad_norm": 4.843137264251709, | |
| "learning_rate": 3.3334447266408236e-06, | |
| "loss": 0.6025, | |
| "step": 5775 | |
| }, | |
| { | |
| "epoch": 19.00393650793651, | |
| "grad_norm": 8.146428108215332, | |
| "learning_rate": 3.325090228579067e-06, | |
| "loss": 0.5498, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 19.005523809523808, | |
| "grad_norm": 9.00712776184082, | |
| "learning_rate": 3.3167357305173107e-06, | |
| "loss": 0.5468, | |
| "step": 5825 | |
| }, | |
| { | |
| "epoch": 19.00711111111111, | |
| "grad_norm": 4.855189800262451, | |
| "learning_rate": 3.3083812324555543e-06, | |
| "loss": 0.5228, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 19.00869841269841, | |
| "grad_norm": 4.557061672210693, | |
| "learning_rate": 3.300026734393798e-06, | |
| "loss": 0.4595, | |
| "step": 5875 | |
| }, | |
| { | |
| "epoch": 19.010285714285715, | |
| "grad_norm": 8.567035675048828, | |
| "learning_rate": 3.2916722363320414e-06, | |
| "loss": 0.519, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 19.011873015873014, | |
| "grad_norm": 26.754615783691406, | |
| "learning_rate": 3.2833177382702854e-06, | |
| "loss": 0.5212, | |
| "step": 5925 | |
| }, | |
| { | |
| "epoch": 19.013460317460318, | |
| "grad_norm": 4.910426139831543, | |
| "learning_rate": 3.2749632402085285e-06, | |
| "loss": 0.5025, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 19.015047619047618, | |
| "grad_norm": 11.170868873596191, | |
| "learning_rate": 3.2666087421467717e-06, | |
| "loss": 0.5109, | |
| "step": 5975 | |
| }, | |
| { | |
| "epoch": 19.01663492063492, | |
| "grad_norm": 8.7157564163208, | |
| "learning_rate": 3.2582542440850157e-06, | |
| "loss": 0.5374, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 19.01822222222222, | |
| "grad_norm": 5.3223700523376465, | |
| "learning_rate": 3.249899746023259e-06, | |
| "loss": 0.5608, | |
| "step": 6025 | |
| }, | |
| { | |
| "epoch": 20.00063492063492, | |
| "grad_norm": 15.868850708007812, | |
| "learning_rate": 3.241545247961503e-06, | |
| "loss": 0.5028, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 20.002222222222223, | |
| "grad_norm": 7.932621955871582, | |
| "learning_rate": 3.233190749899746e-06, | |
| "loss": 0.6152, | |
| "step": 6075 | |
| }, | |
| { | |
| "epoch": 20.003809523809522, | |
| "grad_norm": 10.044479370117188, | |
| "learning_rate": 3.22483625183799e-06, | |
| "loss": 0.5187, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 20.005396825396826, | |
| "grad_norm": 7.519008159637451, | |
| "learning_rate": 3.216481753776233e-06, | |
| "loss": 0.5333, | |
| "step": 6125 | |
| }, | |
| { | |
| "epoch": 20.006984126984126, | |
| "grad_norm": 4.2018866539001465, | |
| "learning_rate": 3.208127255714477e-06, | |
| "loss": 0.546, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 20.00857142857143, | |
| "grad_norm": 6.87973690032959, | |
| "learning_rate": 3.1997727576527206e-06, | |
| "loss": 0.4432, | |
| "step": 6175 | |
| }, | |
| { | |
| "epoch": 20.01015873015873, | |
| "grad_norm": 6.215482711791992, | |
| "learning_rate": 3.191418259590964e-06, | |
| "loss": 0.5015, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 20.011746031746032, | |
| "grad_norm": 5.1478753089904785, | |
| "learning_rate": 3.1830637615292078e-06, | |
| "loss": 0.5034, | |
| "step": 6225 | |
| }, | |
| { | |
| "epoch": 20.013333333333332, | |
| "grad_norm": 3.017598867416382, | |
| "learning_rate": 3.174709263467451e-06, | |
| "loss": 0.5259, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 20.014920634920635, | |
| "grad_norm": 9.40729808807373, | |
| "learning_rate": 3.166354765405695e-06, | |
| "loss": 0.4903, | |
| "step": 6275 | |
| }, | |
| { | |
| "epoch": 20.016507936507935, | |
| "grad_norm": 10.465718269348145, | |
| "learning_rate": 3.158000267343938e-06, | |
| "loss": 0.5392, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 20.01809523809524, | |
| "grad_norm": 5.032984733581543, | |
| "learning_rate": 3.149645769282182e-06, | |
| "loss": 0.5571, | |
| "step": 6325 | |
| }, | |
| { | |
| "epoch": 21.000507936507937, | |
| "grad_norm": 9.124772071838379, | |
| "learning_rate": 3.141291271220425e-06, | |
| "loss": 0.4855, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 21.002095238095237, | |
| "grad_norm": 5.090267181396484, | |
| "learning_rate": 3.132936773158669e-06, | |
| "loss": 0.5912, | |
| "step": 6375 | |
| }, | |
| { | |
| "epoch": 21.00368253968254, | |
| "grad_norm": 7.209656238555908, | |
| "learning_rate": 3.1245822750969123e-06, | |
| "loss": 0.5315, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 21.00526984126984, | |
| "grad_norm": 10.766797065734863, | |
| "learning_rate": 3.1162277770351563e-06, | |
| "loss": 0.5364, | |
| "step": 6425 | |
| }, | |
| { | |
| "epoch": 21.006857142857143, | |
| "grad_norm": 9.655423164367676, | |
| "learning_rate": 3.1078732789733994e-06, | |
| "loss": 0.5469, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 21.008444444444443, | |
| "grad_norm": 9.706192970275879, | |
| "learning_rate": 3.099518780911643e-06, | |
| "loss": 0.4377, | |
| "step": 6475 | |
| }, | |
| { | |
| "epoch": 21.010031746031746, | |
| "grad_norm": 5.6594343185424805, | |
| "learning_rate": 3.0911642828498866e-06, | |
| "loss": 0.4867, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 21.011619047619046, | |
| "grad_norm": 9.920619010925293, | |
| "learning_rate": 3.08280978478813e-06, | |
| "loss": 0.497, | |
| "step": 6525 | |
| }, | |
| { | |
| "epoch": 21.01320634920635, | |
| "grad_norm": 4.976430892944336, | |
| "learning_rate": 3.0744552867263737e-06, | |
| "loss": 0.4949, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 21.01479365079365, | |
| "grad_norm": 7.563751697540283, | |
| "learning_rate": 3.0661007886646173e-06, | |
| "loss": 0.5136, | |
| "step": 6575 | |
| }, | |
| { | |
| "epoch": 21.016380952380953, | |
| "grad_norm": 5.668909072875977, | |
| "learning_rate": 3.057746290602861e-06, | |
| "loss": 0.5097, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 21.017968253968252, | |
| "grad_norm": 6.428191661834717, | |
| "learning_rate": 3.0493917925411044e-06, | |
| "loss": 0.5594, | |
| "step": 6625 | |
| }, | |
| { | |
| "epoch": 22.00038095238095, | |
| "grad_norm": 6.726556301116943, | |
| "learning_rate": 3.041037294479348e-06, | |
| "loss": 0.4874, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 22.001968253968254, | |
| "grad_norm": 14.118648529052734, | |
| "learning_rate": 3.0326827964175915e-06, | |
| "loss": 0.5642, | |
| "step": 6675 | |
| }, | |
| { | |
| "epoch": 22.003555555555554, | |
| "grad_norm": 11.719437599182129, | |
| "learning_rate": 3.024328298355835e-06, | |
| "loss": 0.5228, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 22.005142857142857, | |
| "grad_norm": 7.100255489349365, | |
| "learning_rate": 3.0159738002940787e-06, | |
| "loss": 0.5289, | |
| "step": 6725 | |
| }, | |
| { | |
| "epoch": 22.006730158730157, | |
| "grad_norm": 15.27164363861084, | |
| "learning_rate": 3.007619302232322e-06, | |
| "loss": 0.5544, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 22.00831746031746, | |
| "grad_norm": 6.844352722167969, | |
| "learning_rate": 2.999264804170566e-06, | |
| "loss": 0.4507, | |
| "step": 6775 | |
| }, | |
| { | |
| "epoch": 22.00990476190476, | |
| "grad_norm": 5.465493202209473, | |
| "learning_rate": 2.990910306108809e-06, | |
| "loss": 0.4717, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 22.011492063492064, | |
| "grad_norm": 6.300055503845215, | |
| "learning_rate": 2.982555808047053e-06, | |
| "loss": 0.4833, | |
| "step": 6825 | |
| }, | |
| { | |
| "epoch": 22.013079365079363, | |
| "grad_norm": 3.9687702655792236, | |
| "learning_rate": 2.974201309985296e-06, | |
| "loss": 0.4925, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 22.014666666666667, | |
| "grad_norm": 5.950267791748047, | |
| "learning_rate": 2.96584681192354e-06, | |
| "loss": 0.5093, | |
| "step": 6875 | |
| }, | |
| { | |
| "epoch": 22.016253968253967, | |
| "grad_norm": 7.3085618019104, | |
| "learning_rate": 2.957492313861783e-06, | |
| "loss": 0.5095, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 22.01784126984127, | |
| "grad_norm": 10.438004493713379, | |
| "learning_rate": 2.949137815800027e-06, | |
| "loss": 0.5509, | |
| "step": 6925 | |
| }, | |
| { | |
| "epoch": 23.00025396825397, | |
| "grad_norm": 5.851992130279541, | |
| "learning_rate": 2.9407833177382703e-06, | |
| "loss": 0.4989, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 23.001841269841268, | |
| "grad_norm": 5.808182716369629, | |
| "learning_rate": 2.932428819676514e-06, | |
| "loss": 0.531, | |
| "step": 6975 | |
| }, | |
| { | |
| "epoch": 23.00342857142857, | |
| "grad_norm": 4.814669132232666, | |
| "learning_rate": 2.924074321614758e-06, | |
| "loss": 0.5413, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 23.00501587301587, | |
| "grad_norm": 7.406203269958496, | |
| "learning_rate": 2.915719823553001e-06, | |
| "loss": 0.5223, | |
| "step": 7025 | |
| }, | |
| { | |
| "epoch": 23.006603174603175, | |
| "grad_norm": 4.7713942527771, | |
| "learning_rate": 2.907365325491245e-06, | |
| "loss": 0.5251, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 23.008190476190475, | |
| "grad_norm": 4.403865814208984, | |
| "learning_rate": 2.899010827429488e-06, | |
| "loss": 0.46, | |
| "step": 7075 | |
| }, | |
| { | |
| "epoch": 23.009777777777778, | |
| "grad_norm": 5.674661636352539, | |
| "learning_rate": 2.890656329367732e-06, | |
| "loss": 0.4705, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 23.011365079365078, | |
| "grad_norm": 7.83860445022583, | |
| "learning_rate": 2.8823018313059753e-06, | |
| "loss": 0.4906, | |
| "step": 7125 | |
| }, | |
| { | |
| "epoch": 23.01295238095238, | |
| "grad_norm": 3.9756040573120117, | |
| "learning_rate": 2.8739473332442193e-06, | |
| "loss": 0.4931, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 23.01453968253968, | |
| "grad_norm": 4.530709743499756, | |
| "learning_rate": 2.8655928351824624e-06, | |
| "loss": 0.4957, | |
| "step": 7175 | |
| }, | |
| { | |
| "epoch": 23.016126984126984, | |
| "grad_norm": 7.570037364959717, | |
| "learning_rate": 2.8572383371207056e-06, | |
| "loss": 0.5039, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 23.017714285714284, | |
| "grad_norm": 6.422541618347168, | |
| "learning_rate": 2.8488838390589496e-06, | |
| "loss": 0.5312, | |
| "step": 7225 | |
| }, | |
| { | |
| "epoch": 24.000126984126982, | |
| "grad_norm": 7.004579544067383, | |
| "learning_rate": 2.8405293409971927e-06, | |
| "loss": 0.4954, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 24.001714285714286, | |
| "grad_norm": 11.172274589538574, | |
| "learning_rate": 2.8321748429354367e-06, | |
| "loss": 0.5304, | |
| "step": 7275 | |
| }, | |
| { | |
| "epoch": 24.003301587301586, | |
| "grad_norm": 6.250117301940918, | |
| "learning_rate": 2.8238203448736803e-06, | |
| "loss": 0.5508, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 24.00488888888889, | |
| "grad_norm": 5.038013935089111, | |
| "learning_rate": 2.815465846811924e-06, | |
| "loss": 0.5128, | |
| "step": 7325 | |
| }, | |
| { | |
| "epoch": 24.00647619047619, | |
| "grad_norm": 3.5625054836273193, | |
| "learning_rate": 2.8071113487501674e-06, | |
| "loss": 0.5219, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 24.008063492063492, | |
| "grad_norm": 4.982530117034912, | |
| "learning_rate": 2.798756850688411e-06, | |
| "loss": 0.4495, | |
| "step": 7375 | |
| }, | |
| { | |
| "epoch": 24.009650793650792, | |
| "grad_norm": 17.86178207397461, | |
| "learning_rate": 2.7904023526266545e-06, | |
| "loss": 0.4743, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 24.011238095238095, | |
| "grad_norm": 6.184370517730713, | |
| "learning_rate": 2.782047854564898e-06, | |
| "loss": 0.4768, | |
| "step": 7425 | |
| }, | |
| { | |
| "epoch": 24.012825396825395, | |
| "grad_norm": 11.036730766296387, | |
| "learning_rate": 2.7736933565031416e-06, | |
| "loss": 0.493, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 24.0144126984127, | |
| "grad_norm": 7.9786577224731445, | |
| "learning_rate": 2.765338858441385e-06, | |
| "loss": 0.4758, | |
| "step": 7475 | |
| }, | |
| { | |
| "epoch": 24.016, | |
| "grad_norm": 5.915741443634033, | |
| "learning_rate": 2.7569843603796288e-06, | |
| "loss": 0.5088, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 24.0175873015873, | |
| "grad_norm": 10.715784072875977, | |
| "learning_rate": 2.748629862317872e-06, | |
| "loss": 0.5091, | |
| "step": 7525 | |
| }, | |
| { | |
| "epoch": 24.0191746031746, | |
| "grad_norm": 6.6324381828308105, | |
| "learning_rate": 2.740275364256116e-06, | |
| "loss": 0.4805, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 25.001587301587303, | |
| "grad_norm": 4.505024433135986, | |
| "learning_rate": 2.731920866194359e-06, | |
| "loss": 0.55, | |
| "step": 7575 | |
| }, | |
| { | |
| "epoch": 25.003174603174603, | |
| "grad_norm": 6.079422950744629, | |
| "learning_rate": 2.723566368132603e-06, | |
| "loss": 0.5442, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 25.004761904761907, | |
| "grad_norm": 5.729933738708496, | |
| "learning_rate": 2.715211870070846e-06, | |
| "loss": 0.4952, | |
| "step": 7625 | |
| }, | |
| { | |
| "epoch": 25.006349206349206, | |
| "grad_norm": 12.97045612335205, | |
| "learning_rate": 2.70685737200909e-06, | |
| "loss": 0.5312, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 25.00793650793651, | |
| "grad_norm": 6.688389301300049, | |
| "learning_rate": 2.6985028739473333e-06, | |
| "loss": 0.4426, | |
| "step": 7675 | |
| }, | |
| { | |
| "epoch": 25.00952380952381, | |
| "grad_norm": 5.51877498626709, | |
| "learning_rate": 2.690148375885577e-06, | |
| "loss": 0.4597, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 25.011111111111113, | |
| "grad_norm": 6.6266374588012695, | |
| "learning_rate": 2.6817938778238204e-06, | |
| "loss": 0.4767, | |
| "step": 7725 | |
| }, | |
| { | |
| "epoch": 25.012698412698413, | |
| "grad_norm": 4.988967418670654, | |
| "learning_rate": 2.673439379762064e-06, | |
| "loss": 0.4721, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 25.014285714285716, | |
| "grad_norm": 5.249930381774902, | |
| "learning_rate": 2.6650848817003076e-06, | |
| "loss": 0.481, | |
| "step": 7775 | |
| }, | |
| { | |
| "epoch": 25.015873015873016, | |
| "grad_norm": 8.894637107849121, | |
| "learning_rate": 2.656730383638551e-06, | |
| "loss": 0.5092, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 25.01746031746032, | |
| "grad_norm": 6.139794826507568, | |
| "learning_rate": 2.648375885576795e-06, | |
| "loss": 0.5146, | |
| "step": 7825 | |
| }, | |
| { | |
| "epoch": 25.01904761904762, | |
| "grad_norm": 4.836121082305908, | |
| "learning_rate": 2.6400213875150383e-06, | |
| "loss": 0.4853, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 26.001460317460317, | |
| "grad_norm": 4.840237140655518, | |
| "learning_rate": 2.6316668894532823e-06, | |
| "loss": 0.5325, | |
| "step": 7875 | |
| }, | |
| { | |
| "epoch": 26.00304761904762, | |
| "grad_norm": 6.270430088043213, | |
| "learning_rate": 2.6233123913915254e-06, | |
| "loss": 0.5517, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 26.00463492063492, | |
| "grad_norm": 6.732022285461426, | |
| "learning_rate": 2.6149578933297694e-06, | |
| "loss": 0.4771, | |
| "step": 7925 | |
| }, | |
| { | |
| "epoch": 26.006222222222224, | |
| "grad_norm": 4.249831199645996, | |
| "learning_rate": 2.6066033952680125e-06, | |
| "loss": 0.5281, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 26.007809523809524, | |
| "grad_norm": 9.650166511535645, | |
| "learning_rate": 2.5982488972062557e-06, | |
| "loss": 0.4461, | |
| "step": 7975 | |
| }, | |
| { | |
| "epoch": 26.009396825396827, | |
| "grad_norm": 5.7691216468811035, | |
| "learning_rate": 2.5898943991444997e-06, | |
| "loss": 0.4487, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 26.010984126984127, | |
| "grad_norm": 5.991948127746582, | |
| "learning_rate": 2.581539901082743e-06, | |
| "loss": 0.4715, | |
| "step": 8025 | |
| }, | |
| { | |
| "epoch": 26.01257142857143, | |
| "grad_norm": 11.065790176391602, | |
| "learning_rate": 2.573185403020987e-06, | |
| "loss": 0.4742, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 26.01415873015873, | |
| "grad_norm": 11.387042045593262, | |
| "learning_rate": 2.56483090495923e-06, | |
| "loss": 0.4793, | |
| "step": 8075 | |
| }, | |
| { | |
| "epoch": 26.015746031746033, | |
| "grad_norm": 7.323668479919434, | |
| "learning_rate": 2.556476406897474e-06, | |
| "loss": 0.4933, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 26.017333333333333, | |
| "grad_norm": 10.183083534240723, | |
| "learning_rate": 2.5481219088357175e-06, | |
| "loss": 0.5176, | |
| "step": 8125 | |
| }, | |
| { | |
| "epoch": 26.018920634920637, | |
| "grad_norm": 3.41259503364563, | |
| "learning_rate": 2.539767410773961e-06, | |
| "loss": 0.4789, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 27.001333333333335, | |
| "grad_norm": 8.132092475891113, | |
| "learning_rate": 2.5314129127122046e-06, | |
| "loss": 0.5315, | |
| "step": 8175 | |
| }, | |
| { | |
| "epoch": 27.002920634920635, | |
| "grad_norm": 6.488096237182617, | |
| "learning_rate": 2.5230584146504478e-06, | |
| "loss": 0.5424, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 27.004507936507938, | |
| "grad_norm": 7.1543803215026855, | |
| "learning_rate": 2.5147039165886918e-06, | |
| "loss": 0.4628, | |
| "step": 8225 | |
| }, | |
| { | |
| "epoch": 27.006095238095238, | |
| "grad_norm": 6.017189025878906, | |
| "learning_rate": 2.506349418526935e-06, | |
| "loss": 0.5284, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 27.00768253968254, | |
| "grad_norm": 5.09862756729126, | |
| "learning_rate": 2.497994920465179e-06, | |
| "loss": 0.4613, | |
| "step": 8275 | |
| }, | |
| { | |
| "epoch": 27.00926984126984, | |
| "grad_norm": 6.283570766448975, | |
| "learning_rate": 2.489640422403422e-06, | |
| "loss": 0.4374, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 27.010857142857144, | |
| "grad_norm": 5.45609712600708, | |
| "learning_rate": 2.4812859243416656e-06, | |
| "loss": 0.4615, | |
| "step": 8325 | |
| }, | |
| { | |
| "epoch": 27.012444444444444, | |
| "grad_norm": 9.621217727661133, | |
| "learning_rate": 2.472931426279909e-06, | |
| "loss": 0.4665, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 27.014031746031748, | |
| "grad_norm": 10.336989402770996, | |
| "learning_rate": 2.4645769282181527e-06, | |
| "loss": 0.4712, | |
| "step": 8375 | |
| }, | |
| { | |
| "epoch": 27.015619047619047, | |
| "grad_norm": 8.53022289276123, | |
| "learning_rate": 2.4562224301563963e-06, | |
| "loss": 0.4869, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 27.01720634920635, | |
| "grad_norm": 6.758102893829346, | |
| "learning_rate": 2.44786793209464e-06, | |
| "loss": 0.5007, | |
| "step": 8425 | |
| }, | |
| { | |
| "epoch": 27.01879365079365, | |
| "grad_norm": 6.737295627593994, | |
| "learning_rate": 2.4395134340328834e-06, | |
| "loss": 0.5, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 28.00120634920635, | |
| "grad_norm": 8.9446439743042, | |
| "learning_rate": 2.431158935971127e-06, | |
| "loss": 0.5098, | |
| "step": 8475 | |
| }, | |
| { | |
| "epoch": 28.002793650793652, | |
| "grad_norm": 4.6513190269470215, | |
| "learning_rate": 2.4228044379093706e-06, | |
| "loss": 0.5249, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 28.004380952380952, | |
| "grad_norm": 7.930838108062744, | |
| "learning_rate": 2.414449939847614e-06, | |
| "loss": 0.4823, | |
| "step": 8525 | |
| }, | |
| { | |
| "epoch": 28.005968253968256, | |
| "grad_norm": 5.986405372619629, | |
| "learning_rate": 2.4060954417858577e-06, | |
| "loss": 0.5295, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 28.007555555555555, | |
| "grad_norm": 6.348638534545898, | |
| "learning_rate": 2.3977409437241013e-06, | |
| "loss": 0.4608, | |
| "step": 8575 | |
| }, | |
| { | |
| "epoch": 28.00914285714286, | |
| "grad_norm": 5.640425205230713, | |
| "learning_rate": 2.389386445662345e-06, | |
| "loss": 0.4183, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 28.01073015873016, | |
| "grad_norm": 7.974732875823975, | |
| "learning_rate": 2.3810319476005884e-06, | |
| "loss": 0.4716, | |
| "step": 8625 | |
| }, | |
| { | |
| "epoch": 28.012317460317462, | |
| "grad_norm": 4.698752403259277, | |
| "learning_rate": 2.372677449538832e-06, | |
| "loss": 0.4637, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 28.01390476190476, | |
| "grad_norm": 4.2253828048706055, | |
| "learning_rate": 2.3643229514770755e-06, | |
| "loss": 0.4589, | |
| "step": 8675 | |
| }, | |
| { | |
| "epoch": 28.015492063492065, | |
| "grad_norm": 7.007496356964111, | |
| "learning_rate": 2.355968453415319e-06, | |
| "loss": 0.4978, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 28.017079365079365, | |
| "grad_norm": 4.830111026763916, | |
| "learning_rate": 2.3476139553535627e-06, | |
| "loss": 0.4873, | |
| "step": 8725 | |
| }, | |
| { | |
| "epoch": 28.018666666666668, | |
| "grad_norm": 3.9254467487335205, | |
| "learning_rate": 2.3392594572918062e-06, | |
| "loss": 0.4994, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 29.001079365079367, | |
| "grad_norm": 6.090777397155762, | |
| "learning_rate": 2.33090495923005e-06, | |
| "loss": 0.4761, | |
| "step": 8775 | |
| }, | |
| { | |
| "epoch": 29.002666666666666, | |
| "grad_norm": 5.358640670776367, | |
| "learning_rate": 2.322550461168293e-06, | |
| "loss": 0.5431, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 29.00425396825397, | |
| "grad_norm": 9.447075843811035, | |
| "learning_rate": 2.3141959631065365e-06, | |
| "loss": 0.4746, | |
| "step": 8825 | |
| }, | |
| { | |
| "epoch": 29.00584126984127, | |
| "grad_norm": 5.390321731567383, | |
| "learning_rate": 2.30584146504478e-06, | |
| "loss": 0.5236, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 29.007428571428573, | |
| "grad_norm": 4.194957256317139, | |
| "learning_rate": 2.2974869669830236e-06, | |
| "loss": 0.4683, | |
| "step": 8875 | |
| }, | |
| { | |
| "epoch": 29.009015873015873, | |
| "grad_norm": 10.377429008483887, | |
| "learning_rate": 2.289132468921267e-06, | |
| "loss": 0.4122, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 29.010603174603176, | |
| "grad_norm": 4.972590923309326, | |
| "learning_rate": 2.2807779708595108e-06, | |
| "loss": 0.4557, | |
| "step": 8925 | |
| }, | |
| { | |
| "epoch": 29.012190476190476, | |
| "grad_norm": 4.772759437561035, | |
| "learning_rate": 2.2724234727977548e-06, | |
| "loss": 0.4732, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 29.01377777777778, | |
| "grad_norm": 19.494970321655273, | |
| "learning_rate": 2.2640689747359983e-06, | |
| "loss": 0.4653, | |
| "step": 8975 | |
| }, | |
| { | |
| "epoch": 29.01536507936508, | |
| "grad_norm": 6.1877593994140625, | |
| "learning_rate": 2.255714476674242e-06, | |
| "loss": 0.4901, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 29.016952380952382, | |
| "grad_norm": 5.228841781616211, | |
| "learning_rate": 2.2473599786124854e-06, | |
| "loss": 0.4841, | |
| "step": 9025 | |
| }, | |
| { | |
| "epoch": 29.018539682539682, | |
| "grad_norm": 5.32314395904541, | |
| "learning_rate": 2.2390054805507286e-06, | |
| "loss": 0.4902, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 30.00095238095238, | |
| "grad_norm": 6.952610015869141, | |
| "learning_rate": 2.230650982488972e-06, | |
| "loss": 0.4803, | |
| "step": 9075 | |
| }, | |
| { | |
| "epoch": 30.002539682539684, | |
| "grad_norm": 4.230266571044922, | |
| "learning_rate": 2.2222964844272157e-06, | |
| "loss": 0.5235, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 30.004126984126984, | |
| "grad_norm": 7.016523361206055, | |
| "learning_rate": 2.2139419863654593e-06, | |
| "loss": 0.4873, | |
| "step": 9125 | |
| }, | |
| { | |
| "epoch": 30.005714285714287, | |
| "grad_norm": 10.13500690460205, | |
| "learning_rate": 2.205587488303703e-06, | |
| "loss": 0.5262, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 30.007301587301587, | |
| "grad_norm": 7.627212047576904, | |
| "learning_rate": 2.1972329902419464e-06, | |
| "loss": 0.4619, | |
| "step": 9175 | |
| }, | |
| { | |
| "epoch": 30.00888888888889, | |
| "grad_norm": 7.077376365661621, | |
| "learning_rate": 2.18887849218019e-06, | |
| "loss": 0.4011, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 30.01047619047619, | |
| "grad_norm": 7.501957416534424, | |
| "learning_rate": 2.1805239941184336e-06, | |
| "loss": 0.455, | |
| "step": 9225 | |
| }, | |
| { | |
| "epoch": 30.012063492063493, | |
| "grad_norm": 6.617973327636719, | |
| "learning_rate": 2.172169496056677e-06, | |
| "loss": 0.4814, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 30.013650793650793, | |
| "grad_norm": 3.885499954223633, | |
| "learning_rate": 2.1638149979949207e-06, | |
| "loss": 0.4523, | |
| "step": 9275 | |
| }, | |
| { | |
| "epoch": 30.015238095238097, | |
| "grad_norm": 5.5597615242004395, | |
| "learning_rate": 2.1554604999331642e-06, | |
| "loss": 0.4768, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 30.016825396825396, | |
| "grad_norm": 9.792261123657227, | |
| "learning_rate": 2.147106001871408e-06, | |
| "loss": 0.49, | |
| "step": 9325 | |
| }, | |
| { | |
| "epoch": 30.0184126984127, | |
| "grad_norm": 5.992704391479492, | |
| "learning_rate": 2.1387515038096514e-06, | |
| "loss": 0.498, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 31.000825396825398, | |
| "grad_norm": 8.464439392089844, | |
| "learning_rate": 2.130397005747895e-06, | |
| "loss": 0.4518, | |
| "step": 9375 | |
| }, | |
| { | |
| "epoch": 31.002412698412698, | |
| "grad_norm": 3.486860990524292, | |
| "learning_rate": 2.1220425076861385e-06, | |
| "loss": 0.5318, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 31.004, | |
| "grad_norm": 4.426388740539551, | |
| "learning_rate": 2.113688009624382e-06, | |
| "loss": 0.4917, | |
| "step": 9425 | |
| }, | |
| { | |
| "epoch": 31.0055873015873, | |
| "grad_norm": 8.08337116241455, | |
| "learning_rate": 2.1053335115626256e-06, | |
| "loss": 0.5093, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 31.007174603174604, | |
| "grad_norm": 3.963824987411499, | |
| "learning_rate": 2.096979013500869e-06, | |
| "loss": 0.4515, | |
| "step": 9475 | |
| }, | |
| { | |
| "epoch": 31.008761904761904, | |
| "grad_norm": 7.304539203643799, | |
| "learning_rate": 2.0886245154391128e-06, | |
| "loss": 0.4196, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 31.010349206349208, | |
| "grad_norm": 4.731977939605713, | |
| "learning_rate": 2.080270017377356e-06, | |
| "loss": 0.4529, | |
| "step": 9525 | |
| }, | |
| { | |
| "epoch": 31.011936507936507, | |
| "grad_norm": 8.285253524780273, | |
| "learning_rate": 2.0719155193155995e-06, | |
| "loss": 0.4653, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 31.01352380952381, | |
| "grad_norm": 8.305194854736328, | |
| "learning_rate": 2.063561021253843e-06, | |
| "loss": 0.4624, | |
| "step": 9575 | |
| }, | |
| { | |
| "epoch": 31.01511111111111, | |
| "grad_norm": 13.913382530212402, | |
| "learning_rate": 2.0552065231920866e-06, | |
| "loss": 0.4676, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 31.016698412698414, | |
| "grad_norm": 6.448155403137207, | |
| "learning_rate": 2.04685202513033e-06, | |
| "loss": 0.476, | |
| "step": 9625 | |
| }, | |
| { | |
| "epoch": 31.018285714285714, | |
| "grad_norm": 7.706886291503906, | |
| "learning_rate": 2.0384975270685737e-06, | |
| "loss": 0.4967, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 32.00069841269841, | |
| "grad_norm": 4.588306427001953, | |
| "learning_rate": 2.0301430290068173e-06, | |
| "loss": 0.4358, | |
| "step": 9675 | |
| }, | |
| { | |
| "epoch": 32.00228571428571, | |
| "grad_norm": 4.243907451629639, | |
| "learning_rate": 2.021788530945061e-06, | |
| "loss": 0.5546, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 32.00387301587302, | |
| "grad_norm": 6.786617755889893, | |
| "learning_rate": 2.0134340328833044e-06, | |
| "loss": 0.4775, | |
| "step": 9725 | |
| }, | |
| { | |
| "epoch": 32.00546031746032, | |
| "grad_norm": 9.617806434631348, | |
| "learning_rate": 2.005079534821548e-06, | |
| "loss": 0.4857, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 32.00704761904762, | |
| "grad_norm": 4.088709354400635, | |
| "learning_rate": 1.9967250367597916e-06, | |
| "loss": 0.4872, | |
| "step": 9775 | |
| }, | |
| { | |
| "epoch": 32.00863492063492, | |
| "grad_norm": 7.801070690155029, | |
| "learning_rate": 1.988370538698035e-06, | |
| "loss": 0.3978, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 32.010222222222225, | |
| "grad_norm": 12.151320457458496, | |
| "learning_rate": 1.9800160406362787e-06, | |
| "loss": 0.4567, | |
| "step": 9825 | |
| }, | |
| { | |
| "epoch": 32.011809523809525, | |
| "grad_norm": 6.325204372406006, | |
| "learning_rate": 1.9716615425745223e-06, | |
| "loss": 0.4568, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 32.013396825396825, | |
| "grad_norm": 4.849487781524658, | |
| "learning_rate": 1.963307044512766e-06, | |
| "loss": 0.4612, | |
| "step": 9875 | |
| }, | |
| { | |
| "epoch": 32.014984126984125, | |
| "grad_norm": 6.0611090660095215, | |
| "learning_rate": 1.9549525464510094e-06, | |
| "loss": 0.4492, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 32.01657142857143, | |
| "grad_norm": 8.705154418945312, | |
| "learning_rate": 1.946598048389253e-06, | |
| "loss": 0.4823, | |
| "step": 9925 | |
| }, | |
| { | |
| "epoch": 32.01815873015873, | |
| "grad_norm": 6.838711261749268, | |
| "learning_rate": 1.9382435503274965e-06, | |
| "loss": 0.4981, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 33.000571428571426, | |
| "grad_norm": 6.763596057891846, | |
| "learning_rate": 1.92988905226574e-06, | |
| "loss": 0.4369, | |
| "step": 9975 | |
| }, | |
| { | |
| "epoch": 33.00215873015873, | |
| "grad_norm": 5.268820285797119, | |
| "learning_rate": 1.9215345542039837e-06, | |
| "loss": 0.5427, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 33.00374603174603, | |
| "grad_norm": 3.5916361808776855, | |
| "learning_rate": 1.913180056142227e-06, | |
| "loss": 0.4798, | |
| "step": 10025 | |
| }, | |
| { | |
| "epoch": 33.00533333333333, | |
| "grad_norm": 4.288261890411377, | |
| "learning_rate": 1.9048255580804706e-06, | |
| "loss": 0.488, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 33.00692063492063, | |
| "grad_norm": 5.668625354766846, | |
| "learning_rate": 1.8964710600187142e-06, | |
| "loss": 0.5008, | |
| "step": 10075 | |
| }, | |
| { | |
| "epoch": 33.00850793650794, | |
| "grad_norm": 4.973710536956787, | |
| "learning_rate": 1.8881165619569577e-06, | |
| "loss": 0.391, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 33.01009523809524, | |
| "grad_norm": 7.073776721954346, | |
| "learning_rate": 1.8797620638952013e-06, | |
| "loss": 0.444, | |
| "step": 10125 | |
| }, | |
| { | |
| "epoch": 33.01168253968254, | |
| "grad_norm": 4.297868251800537, | |
| "learning_rate": 1.8714075658334449e-06, | |
| "loss": 0.4516, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 33.01326984126984, | |
| "grad_norm": 25.770071029663086, | |
| "learning_rate": 1.8630530677716884e-06, | |
| "loss": 0.4746, | |
| "step": 10175 | |
| }, | |
| { | |
| "epoch": 33.014857142857146, | |
| "grad_norm": 3.6950442790985107, | |
| "learning_rate": 1.854698569709932e-06, | |
| "loss": 0.4346, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 33.016444444444446, | |
| "grad_norm": 8.81914234161377, | |
| "learning_rate": 1.8463440716481756e-06, | |
| "loss": 0.4794, | |
| "step": 10225 | |
| }, | |
| { | |
| "epoch": 33.018031746031745, | |
| "grad_norm": 6.376156806945801, | |
| "learning_rate": 1.837989573586419e-06, | |
| "loss": 0.5085, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 34.00044444444445, | |
| "grad_norm": 4.302385330200195, | |
| "learning_rate": 1.8296350755246625e-06, | |
| "loss": 0.4276, | |
| "step": 10275 | |
| }, | |
| { | |
| "epoch": 34.00203174603175, | |
| "grad_norm": 8.226256370544434, | |
| "learning_rate": 1.821280577462906e-06, | |
| "loss": 0.5405, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 34.00361904761905, | |
| "grad_norm": 12.093836784362793, | |
| "learning_rate": 1.8129260794011496e-06, | |
| "loss": 0.4661, | |
| "step": 10325 | |
| }, | |
| { | |
| "epoch": 34.00520634920635, | |
| "grad_norm": 11.840346336364746, | |
| "learning_rate": 1.8045715813393932e-06, | |
| "loss": 0.4779, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 34.006793650793654, | |
| "grad_norm": 5.186977386474609, | |
| "learning_rate": 1.7962170832776367e-06, | |
| "loss": 0.5152, | |
| "step": 10375 | |
| }, | |
| { | |
| "epoch": 34.00838095238095, | |
| "grad_norm": 4.393645286560059, | |
| "learning_rate": 1.7878625852158805e-06, | |
| "loss": 0.3996, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 34.00996825396825, | |
| "grad_norm": 11.108858108520508, | |
| "learning_rate": 1.779508087154124e-06, | |
| "loss": 0.4351, | |
| "step": 10425 | |
| }, | |
| { | |
| "epoch": 34.01155555555555, | |
| "grad_norm": 6.400074005126953, | |
| "learning_rate": 1.7711535890923676e-06, | |
| "loss": 0.4461, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 34.01314285714286, | |
| "grad_norm": 11.155898094177246, | |
| "learning_rate": 1.7627990910306112e-06, | |
| "loss": 0.4503, | |
| "step": 10475 | |
| }, | |
| { | |
| "epoch": 34.01473015873016, | |
| "grad_norm": 5.40648889541626, | |
| "learning_rate": 1.7544445929688544e-06, | |
| "loss": 0.4552, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 34.01631746031746, | |
| "grad_norm": 4.550622463226318, | |
| "learning_rate": 1.746090094907098e-06, | |
| "loss": 0.4701, | |
| "step": 10525 | |
| }, | |
| { | |
| "epoch": 34.01790476190476, | |
| "grad_norm": 7.3100433349609375, | |
| "learning_rate": 1.7377355968453417e-06, | |
| "loss": 0.4936, | |
| "step": 10550 | |
| }, | |
| { | |
| "epoch": 35.00031746031746, | |
| "grad_norm": 6.450623035430908, | |
| "learning_rate": 1.7293810987835853e-06, | |
| "loss": 0.449, | |
| "step": 10575 | |
| }, | |
| { | |
| "epoch": 35.00190476190476, | |
| "grad_norm": 9.00794792175293, | |
| "learning_rate": 1.7210266007218288e-06, | |
| "loss": 0.5011, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 35.00349206349206, | |
| "grad_norm": 10.713994026184082, | |
| "learning_rate": 1.7126721026600724e-06, | |
| "loss": 0.4829, | |
| "step": 10625 | |
| }, | |
| { | |
| "epoch": 35.00507936507937, | |
| "grad_norm": 4.488622188568115, | |
| "learning_rate": 1.704317604598316e-06, | |
| "loss": 0.4795, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 35.00666666666667, | |
| "grad_norm": 16.104774475097656, | |
| "learning_rate": 1.6959631065365595e-06, | |
| "loss": 0.5035, | |
| "step": 10675 | |
| }, | |
| { | |
| "epoch": 35.00825396825397, | |
| "grad_norm": 4.884101390838623, | |
| "learning_rate": 1.687608608474803e-06, | |
| "loss": 0.4168, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 35.00984126984127, | |
| "grad_norm": 5.478789806365967, | |
| "learning_rate": 1.6792541104130467e-06, | |
| "loss": 0.4258, | |
| "step": 10725 | |
| }, | |
| { | |
| "epoch": 35.011428571428574, | |
| "grad_norm": 6.428930282592773, | |
| "learning_rate": 1.67089961235129e-06, | |
| "loss": 0.4392, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 35.013015873015874, | |
| "grad_norm": 2.7530977725982666, | |
| "learning_rate": 1.6625451142895336e-06, | |
| "loss": 0.451, | |
| "step": 10775 | |
| }, | |
| { | |
| "epoch": 35.014603174603174, | |
| "grad_norm": 5.9829912185668945, | |
| "learning_rate": 1.6541906162277771e-06, | |
| "loss": 0.4586, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 35.016190476190474, | |
| "grad_norm": 6.039813995361328, | |
| "learning_rate": 1.6458361181660207e-06, | |
| "loss": 0.4667, | |
| "step": 10825 | |
| }, | |
| { | |
| "epoch": 35.01777777777778, | |
| "grad_norm": 6.336811065673828, | |
| "learning_rate": 1.6374816201042643e-06, | |
| "loss": 0.482, | |
| "step": 10850 | |
| }, | |
| { | |
| "epoch": 36.000190476190475, | |
| "grad_norm": 6.172911643981934, | |
| "learning_rate": 1.6291271220425078e-06, | |
| "loss": 0.4651, | |
| "step": 10875 | |
| }, | |
| { | |
| "epoch": 36.001777777777775, | |
| "grad_norm": 4.215289115905762, | |
| "learning_rate": 1.6207726239807514e-06, | |
| "loss": 0.4776, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 36.00336507936508, | |
| "grad_norm": 2.862426519393921, | |
| "learning_rate": 1.612418125918995e-06, | |
| "loss": 0.5059, | |
| "step": 10925 | |
| }, | |
| { | |
| "epoch": 36.00495238095238, | |
| "grad_norm": 4.817645072937012, | |
| "learning_rate": 1.6040636278572385e-06, | |
| "loss": 0.4777, | |
| "step": 10950 | |
| }, | |
| { | |
| "epoch": 36.00653968253968, | |
| "grad_norm": 8.073090553283691, | |
| "learning_rate": 1.595709129795482e-06, | |
| "loss": 0.4837, | |
| "step": 10975 | |
| }, | |
| { | |
| "epoch": 36.00812698412698, | |
| "grad_norm": 6.108732223510742, | |
| "learning_rate": 1.5873546317337255e-06, | |
| "loss": 0.4149, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 36.00971428571429, | |
| "grad_norm": 4.37070369720459, | |
| "learning_rate": 1.579000133671969e-06, | |
| "loss": 0.4323, | |
| "step": 11025 | |
| }, | |
| { | |
| "epoch": 36.01130158730159, | |
| "grad_norm": 3.8069772720336914, | |
| "learning_rate": 1.5706456356102126e-06, | |
| "loss": 0.4488, | |
| "step": 11050 | |
| }, | |
| { | |
| "epoch": 36.01288888888889, | |
| "grad_norm": 4.619093894958496, | |
| "learning_rate": 1.5622911375484562e-06, | |
| "loss": 0.4551, | |
| "step": 11075 | |
| }, | |
| { | |
| "epoch": 36.01447619047619, | |
| "grad_norm": 5.150590419769287, | |
| "learning_rate": 1.5539366394866997e-06, | |
| "loss": 0.4401, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 36.016063492063495, | |
| "grad_norm": 6.264833450317383, | |
| "learning_rate": 1.5455821414249433e-06, | |
| "loss": 0.4642, | |
| "step": 11125 | |
| }, | |
| { | |
| "epoch": 36.017650793650795, | |
| "grad_norm": 7.851002216339111, | |
| "learning_rate": 1.5372276433631869e-06, | |
| "loss": 0.4791, | |
| "step": 11150 | |
| }, | |
| { | |
| "epoch": 37.00006349206349, | |
| "grad_norm": 7.941956996917725, | |
| "learning_rate": 1.5288731453014304e-06, | |
| "loss": 0.4488, | |
| "step": 11175 | |
| }, | |
| { | |
| "epoch": 37.001650793650796, | |
| "grad_norm": 6.259990692138672, | |
| "learning_rate": 1.520518647239674e-06, | |
| "loss": 0.4967, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 37.003238095238096, | |
| "grad_norm": 2.6891424655914307, | |
| "learning_rate": 1.5121641491779175e-06, | |
| "loss": 0.501, | |
| "step": 11225 | |
| }, | |
| { | |
| "epoch": 37.004825396825396, | |
| "grad_norm": 7.098905563354492, | |
| "learning_rate": 1.503809651116161e-06, | |
| "loss": 0.4755, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 37.006412698412696, | |
| "grad_norm": 5.689685344696045, | |
| "learning_rate": 1.4954551530544045e-06, | |
| "loss": 0.4868, | |
| "step": 11275 | |
| }, | |
| { | |
| "epoch": 37.008, | |
| "grad_norm": 6.782131195068359, | |
| "learning_rate": 1.487100654992648e-06, | |
| "loss": 0.4057, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 37.0095873015873, | |
| "grad_norm": 7.18269157409668, | |
| "learning_rate": 1.4787461569308916e-06, | |
| "loss": 0.4281, | |
| "step": 11325 | |
| }, | |
| { | |
| "epoch": 37.0111746031746, | |
| "grad_norm": 6.619096755981445, | |
| "learning_rate": 1.4703916588691352e-06, | |
| "loss": 0.4398, | |
| "step": 11350 | |
| }, | |
| { | |
| "epoch": 37.0127619047619, | |
| "grad_norm": 4.206869602203369, | |
| "learning_rate": 1.462037160807379e-06, | |
| "loss": 0.4394, | |
| "step": 11375 | |
| }, | |
| { | |
| "epoch": 37.01434920634921, | |
| "grad_norm": 7.179015636444092, | |
| "learning_rate": 1.4536826627456225e-06, | |
| "loss": 0.4515, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 37.01593650793651, | |
| "grad_norm": 5.137106895446777, | |
| "learning_rate": 1.445328164683866e-06, | |
| "loss": 0.4704, | |
| "step": 11425 | |
| }, | |
| { | |
| "epoch": 37.01752380952381, | |
| "grad_norm": 4.871583461761475, | |
| "learning_rate": 1.4369736666221096e-06, | |
| "loss": 0.4704, | |
| "step": 11450 | |
| }, | |
| { | |
| "epoch": 37.01911111111111, | |
| "grad_norm": 3.7863287925720215, | |
| "learning_rate": 1.4286191685603528e-06, | |
| "loss": 0.4518, | |
| "step": 11475 | |
| }, | |
| { | |
| "epoch": 38.00152380952381, | |
| "grad_norm": 11.65233039855957, | |
| "learning_rate": 1.4202646704985963e-06, | |
| "loss": 0.5093, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 38.00311111111111, | |
| "grad_norm": 9.356032371520996, | |
| "learning_rate": 1.4119101724368401e-06, | |
| "loss": 0.4997, | |
| "step": 11525 | |
| }, | |
| { | |
| "epoch": 38.00469841269841, | |
| "grad_norm": 5.056600093841553, | |
| "learning_rate": 1.4035556743750837e-06, | |
| "loss": 0.4455, | |
| "step": 11550 | |
| }, | |
| { | |
| "epoch": 38.00628571428572, | |
| "grad_norm": 4.728214740753174, | |
| "learning_rate": 1.3952011763133273e-06, | |
| "loss": 0.4935, | |
| "step": 11575 | |
| }, | |
| { | |
| "epoch": 38.00787301587302, | |
| "grad_norm": 6.161364555358887, | |
| "learning_rate": 1.3868466782515708e-06, | |
| "loss": 0.4153, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 38.00946031746032, | |
| "grad_norm": 10.345149993896484, | |
| "learning_rate": 1.3784921801898144e-06, | |
| "loss": 0.4206, | |
| "step": 11625 | |
| }, | |
| { | |
| "epoch": 38.011047619047616, | |
| "grad_norm": 3.4351565837860107, | |
| "learning_rate": 1.370137682128058e-06, | |
| "loss": 0.434, | |
| "step": 11650 | |
| }, | |
| { | |
| "epoch": 38.01263492063492, | |
| "grad_norm": 6.009977340698242, | |
| "learning_rate": 1.3617831840663015e-06, | |
| "loss": 0.448, | |
| "step": 11675 | |
| }, | |
| { | |
| "epoch": 38.01422222222222, | |
| "grad_norm": 5.062911510467529, | |
| "learning_rate": 1.353428686004545e-06, | |
| "loss": 0.4399, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 38.01580952380952, | |
| "grad_norm": 5.664973735809326, | |
| "learning_rate": 1.3450741879427884e-06, | |
| "loss": 0.4674, | |
| "step": 11725 | |
| }, | |
| { | |
| "epoch": 38.01739682539682, | |
| "grad_norm": 7.776061534881592, | |
| "learning_rate": 1.336719689881032e-06, | |
| "loss": 0.481, | |
| "step": 11750 | |
| }, | |
| { | |
| "epoch": 38.01898412698413, | |
| "grad_norm": 4.977148532867432, | |
| "learning_rate": 1.3283651918192756e-06, | |
| "loss": 0.446, | |
| "step": 11775 | |
| }, | |
| { | |
| "epoch": 39.001396825396824, | |
| "grad_norm": 5.45735502243042, | |
| "learning_rate": 1.3200106937575191e-06, | |
| "loss": 0.4955, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 39.002984126984124, | |
| "grad_norm": 5.3033881187438965, | |
| "learning_rate": 1.3116561956957627e-06, | |
| "loss": 0.5065, | |
| "step": 11825 | |
| }, | |
| { | |
| "epoch": 39.00457142857143, | |
| "grad_norm": 5.209838390350342, | |
| "learning_rate": 1.3033016976340063e-06, | |
| "loss": 0.4283, | |
| "step": 11850 | |
| }, | |
| { | |
| "epoch": 39.00615873015873, | |
| "grad_norm": 9.413320541381836, | |
| "learning_rate": 1.2949471995722498e-06, | |
| "loss": 0.5134, | |
| "step": 11875 | |
| }, | |
| { | |
| "epoch": 39.00774603174603, | |
| "grad_norm": 3.614576816558838, | |
| "learning_rate": 1.2865927015104934e-06, | |
| "loss": 0.4132, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 39.00933333333333, | |
| "grad_norm": 5.418633937835693, | |
| "learning_rate": 1.278238203448737e-06, | |
| "loss": 0.411, | |
| "step": 11925 | |
| }, | |
| { | |
| "epoch": 39.01092063492064, | |
| "grad_norm": 7.182598114013672, | |
| "learning_rate": 1.2698837053869805e-06, | |
| "loss": 0.4367, | |
| "step": 11950 | |
| }, | |
| { | |
| "epoch": 39.01250793650794, | |
| "grad_norm": 5.821928024291992, | |
| "learning_rate": 1.2615292073252239e-06, | |
| "loss": 0.4313, | |
| "step": 11975 | |
| }, | |
| { | |
| "epoch": 39.01409523809524, | |
| "grad_norm": 5.386777877807617, | |
| "learning_rate": 1.2531747092634675e-06, | |
| "loss": 0.4423, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 39.01568253968254, | |
| "grad_norm": 7.081798553466797, | |
| "learning_rate": 1.244820211201711e-06, | |
| "loss": 0.4637, | |
| "step": 12025 | |
| }, | |
| { | |
| "epoch": 39.017269841269844, | |
| "grad_norm": 6.970532417297363, | |
| "learning_rate": 1.2364657131399546e-06, | |
| "loss": 0.4641, | |
| "step": 12050 | |
| }, | |
| { | |
| "epoch": 39.018857142857144, | |
| "grad_norm": 10.821274757385254, | |
| "learning_rate": 1.2281112150781982e-06, | |
| "loss": 0.4712, | |
| "step": 12075 | |
| }, | |
| { | |
| "epoch": 40.00126984126984, | |
| "grad_norm": 7.221808910369873, | |
| "learning_rate": 1.2197567170164417e-06, | |
| "loss": 0.4856, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 40.002857142857145, | |
| "grad_norm": 15.445313453674316, | |
| "learning_rate": 1.2114022189546853e-06, | |
| "loss": 0.5051, | |
| "step": 12125 | |
| }, | |
| { | |
| "epoch": 40.004444444444445, | |
| "grad_norm": 5.132606029510498, | |
| "learning_rate": 1.2030477208929288e-06, | |
| "loss": 0.4254, | |
| "step": 12150 | |
| }, | |
| { | |
| "epoch": 40.006031746031745, | |
| "grad_norm": 3.0163042545318604, | |
| "learning_rate": 1.1946932228311724e-06, | |
| "loss": 0.4991, | |
| "step": 12175 | |
| }, | |
| { | |
| "epoch": 40.007619047619045, | |
| "grad_norm": 4.396322250366211, | |
| "learning_rate": 1.186338724769416e-06, | |
| "loss": 0.4394, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 40.00920634920635, | |
| "grad_norm": 9.984151840209961, | |
| "learning_rate": 1.1779842267076595e-06, | |
| "loss": 0.4004, | |
| "step": 12225 | |
| }, | |
| { | |
| "epoch": 40.01079365079365, | |
| "grad_norm": 5.9347825050354, | |
| "learning_rate": 1.1696297286459031e-06, | |
| "loss": 0.4373, | |
| "step": 12250 | |
| }, | |
| { | |
| "epoch": 40.01238095238095, | |
| "grad_norm": 5.0575852394104, | |
| "learning_rate": 1.1612752305841465e-06, | |
| "loss": 0.4321, | |
| "step": 12275 | |
| }, | |
| { | |
| "epoch": 40.01396825396825, | |
| "grad_norm": 6.324869155883789, | |
| "learning_rate": 1.15292073252239e-06, | |
| "loss": 0.4353, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 40.01555555555556, | |
| "grad_norm": 6.5207414627075195, | |
| "learning_rate": 1.1445662344606336e-06, | |
| "loss": 0.4675, | |
| "step": 12325 | |
| }, | |
| { | |
| "epoch": 40.01714285714286, | |
| "grad_norm": 6.220884799957275, | |
| "learning_rate": 1.1362117363988774e-06, | |
| "loss": 0.4611, | |
| "step": 12350 | |
| }, | |
| { | |
| "epoch": 40.01873015873016, | |
| "grad_norm": 10.964550018310547, | |
| "learning_rate": 1.127857238337121e-06, | |
| "loss": 0.4673, | |
| "step": 12375 | |
| }, | |
| { | |
| "epoch": 41.00114285714286, | |
| "grad_norm": 6.545460224151611, | |
| "learning_rate": 1.1195027402753643e-06, | |
| "loss": 0.4593, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 41.00273015873016, | |
| "grad_norm": 6.692239761352539, | |
| "learning_rate": 1.1111482422136079e-06, | |
| "loss": 0.512, | |
| "step": 12425 | |
| }, | |
| { | |
| "epoch": 41.00431746031746, | |
| "grad_norm": 7.213928699493408, | |
| "learning_rate": 1.1027937441518514e-06, | |
| "loss": 0.4477, | |
| "step": 12450 | |
| }, | |
| { | |
| "epoch": 41.00590476190476, | |
| "grad_norm": 4.5662431716918945, | |
| "learning_rate": 1.094439246090095e-06, | |
| "loss": 0.4916, | |
| "step": 12475 | |
| }, | |
| { | |
| "epoch": 41.007492063492066, | |
| "grad_norm": 4.408071041107178, | |
| "learning_rate": 1.0860847480283386e-06, | |
| "loss": 0.4456, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 41.009079365079366, | |
| "grad_norm": 5.65850830078125, | |
| "learning_rate": 1.0777302499665821e-06, | |
| "loss": 0.3866, | |
| "step": 12525 | |
| }, | |
| { | |
| "epoch": 41.010666666666665, | |
| "grad_norm": 5.419500827789307, | |
| "learning_rate": 1.0693757519048257e-06, | |
| "loss": 0.4345, | |
| "step": 12550 | |
| }, | |
| { | |
| "epoch": 41.012253968253965, | |
| "grad_norm": 4.399853229522705, | |
| "learning_rate": 1.0610212538430693e-06, | |
| "loss": 0.4444, | |
| "step": 12575 | |
| }, | |
| { | |
| "epoch": 41.01384126984127, | |
| "grad_norm": 6.50507116317749, | |
| "learning_rate": 1.0526667557813128e-06, | |
| "loss": 0.4385, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 41.01542857142857, | |
| "grad_norm": 2.587691068649292, | |
| "learning_rate": 1.0443122577195564e-06, | |
| "loss": 0.4558, | |
| "step": 12625 | |
| }, | |
| { | |
| "epoch": 41.01701587301587, | |
| "grad_norm": 4.828845977783203, | |
| "learning_rate": 1.0359577596577997e-06, | |
| "loss": 0.4655, | |
| "step": 12650 | |
| }, | |
| { | |
| "epoch": 41.01860317460317, | |
| "grad_norm": 9.805785179138184, | |
| "learning_rate": 1.0276032615960433e-06, | |
| "loss": 0.4688, | |
| "step": 12675 | |
| }, | |
| { | |
| "epoch": 42.001015873015874, | |
| "grad_norm": 4.370798587799072, | |
| "learning_rate": 1.0192487635342869e-06, | |
| "loss": 0.4457, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 42.00260317460317, | |
| "grad_norm": 5.954914093017578, | |
| "learning_rate": 1.0108942654725304e-06, | |
| "loss": 0.5047, | |
| "step": 12725 | |
| }, | |
| { | |
| "epoch": 42.00419047619047, | |
| "grad_norm": 4.680336952209473, | |
| "learning_rate": 1.002539767410774e-06, | |
| "loss": 0.4536, | |
| "step": 12750 | |
| }, | |
| { | |
| "epoch": 42.00577777777778, | |
| "grad_norm": 3.612610101699829, | |
| "learning_rate": 9.941852693490176e-07, | |
| "loss": 0.5006, | |
| "step": 12775 | |
| }, | |
| { | |
| "epoch": 42.00736507936508, | |
| "grad_norm": 4.1838178634643555, | |
| "learning_rate": 9.858307712872611e-07, | |
| "loss": 0.4459, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 42.00895238095238, | |
| "grad_norm": 5.238033771514893, | |
| "learning_rate": 9.774762732255047e-07, | |
| "loss": 0.3692, | |
| "step": 12825 | |
| }, | |
| { | |
| "epoch": 42.01053968253968, | |
| "grad_norm": 5.992905139923096, | |
| "learning_rate": 9.691217751637483e-07, | |
| "loss": 0.4471, | |
| "step": 12850 | |
| }, | |
| { | |
| "epoch": 42.012126984126986, | |
| "grad_norm": 3.9099578857421875, | |
| "learning_rate": 9.607672771019918e-07, | |
| "loss": 0.4505, | |
| "step": 12875 | |
| }, | |
| { | |
| "epoch": 42.013714285714286, | |
| "grad_norm": 5.2203497886657715, | |
| "learning_rate": 9.524127790402353e-07, | |
| "loss": 0.4285, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 42.015301587301586, | |
| "grad_norm": 10.484596252441406, | |
| "learning_rate": 9.440582809784789e-07, | |
| "loss": 0.4644, | |
| "step": 12925 | |
| }, | |
| { | |
| "epoch": 42.016888888888886, | |
| "grad_norm": 9.001386642456055, | |
| "learning_rate": 9.357037829167224e-07, | |
| "loss": 0.4485, | |
| "step": 12950 | |
| }, | |
| { | |
| "epoch": 42.01847619047619, | |
| "grad_norm": 5.222927570343018, | |
| "learning_rate": 9.27349284854966e-07, | |
| "loss": 0.4722, | |
| "step": 12975 | |
| }, | |
| { | |
| "epoch": 43.00088888888889, | |
| "grad_norm": 5.298580169677734, | |
| "learning_rate": 9.189947867932095e-07, | |
| "loss": 0.4334, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 43.00247619047619, | |
| "grad_norm": 7.794508457183838, | |
| "learning_rate": 9.10640288731453e-07, | |
| "loss": 0.5104, | |
| "step": 13025 | |
| }, | |
| { | |
| "epoch": 43.004063492063494, | |
| "grad_norm": 7.346789360046387, | |
| "learning_rate": 9.022857906696966e-07, | |
| "loss": 0.4567, | |
| "step": 13050 | |
| }, | |
| { | |
| "epoch": 43.005650793650794, | |
| "grad_norm": 7.656489372253418, | |
| "learning_rate": 8.939312926079403e-07, | |
| "loss": 0.4923, | |
| "step": 13075 | |
| }, | |
| { | |
| "epoch": 43.007238095238094, | |
| "grad_norm": 4.487580299377441, | |
| "learning_rate": 8.855767945461838e-07, | |
| "loss": 0.4286, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 43.008825396825394, | |
| "grad_norm": 3.4565789699554443, | |
| "learning_rate": 8.772222964844272e-07, | |
| "loss": 0.3914, | |
| "step": 13125 | |
| }, | |
| { | |
| "epoch": 43.0104126984127, | |
| "grad_norm": 6.925444602966309, | |
| "learning_rate": 8.688677984226708e-07, | |
| "loss": 0.4391, | |
| "step": 13150 | |
| }, | |
| { | |
| "epoch": 43.012, | |
| "grad_norm": 5.899662494659424, | |
| "learning_rate": 8.605133003609144e-07, | |
| "loss": 0.4529, | |
| "step": 13175 | |
| }, | |
| { | |
| "epoch": 43.0135873015873, | |
| "grad_norm": 4.932972431182861, | |
| "learning_rate": 8.52158802299158e-07, | |
| "loss": 0.4289, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 43.0151746031746, | |
| "grad_norm": 6.803351402282715, | |
| "learning_rate": 8.438043042374015e-07, | |
| "loss": 0.4562, | |
| "step": 13225 | |
| }, | |
| { | |
| "epoch": 43.01676190476191, | |
| "grad_norm": 4.4438066482543945, | |
| "learning_rate": 8.35449806175645e-07, | |
| "loss": 0.4426, | |
| "step": 13250 | |
| }, | |
| { | |
| "epoch": 43.01834920634921, | |
| "grad_norm": 9.328689575195312, | |
| "learning_rate": 8.270953081138886e-07, | |
| "loss": 0.4814, | |
| "step": 13275 | |
| }, | |
| { | |
| "epoch": 44.0007619047619, | |
| "grad_norm": 10.716085433959961, | |
| "learning_rate": 8.187408100521321e-07, | |
| "loss": 0.4272, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 44.00234920634921, | |
| "grad_norm": 5.453390598297119, | |
| "learning_rate": 8.103863119903757e-07, | |
| "loss": 0.5105, | |
| "step": 13325 | |
| }, | |
| { | |
| "epoch": 44.00393650793651, | |
| "grad_norm": 8.206409454345703, | |
| "learning_rate": 8.020318139286193e-07, | |
| "loss": 0.4678, | |
| "step": 13350 | |
| }, | |
| { | |
| "epoch": 44.00552380952381, | |
| "grad_norm": 9.60810661315918, | |
| "learning_rate": 7.936773158668627e-07, | |
| "loss": 0.4715, | |
| "step": 13375 | |
| }, | |
| { | |
| "epoch": 44.00711111111111, | |
| "grad_norm": 4.23331880569458, | |
| "learning_rate": 7.853228178051063e-07, | |
| "loss": 0.4476, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 44.008698412698415, | |
| "grad_norm": 4.390725135803223, | |
| "learning_rate": 7.769683197433499e-07, | |
| "loss": 0.3836, | |
| "step": 13425 | |
| }, | |
| { | |
| "epoch": 44.010285714285715, | |
| "grad_norm": 8.254178047180176, | |
| "learning_rate": 7.686138216815934e-07, | |
| "loss": 0.441, | |
| "step": 13450 | |
| }, | |
| { | |
| "epoch": 44.011873015873014, | |
| "grad_norm": 9.834754943847656, | |
| "learning_rate": 7.60259323619837e-07, | |
| "loss": 0.4474, | |
| "step": 13475 | |
| }, | |
| { | |
| "epoch": 44.013460317460314, | |
| "grad_norm": 5.189121723175049, | |
| "learning_rate": 7.519048255580805e-07, | |
| "loss": 0.4259, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 44.01504761904762, | |
| "grad_norm": 8.684473037719727, | |
| "learning_rate": 7.43550327496324e-07, | |
| "loss": 0.4342, | |
| "step": 13525 | |
| }, | |
| { | |
| "epoch": 44.01663492063492, | |
| "grad_norm": 9.289813041687012, | |
| "learning_rate": 7.351958294345676e-07, | |
| "loss": 0.4599, | |
| "step": 13550 | |
| }, | |
| { | |
| "epoch": 44.01822222222222, | |
| "grad_norm": 4.835540294647217, | |
| "learning_rate": 7.268413313728113e-07, | |
| "loss": 0.4765, | |
| "step": 13575 | |
| }, | |
| { | |
| "epoch": 45.00063492063492, | |
| "grad_norm": 15.820073127746582, | |
| "learning_rate": 7.184868333110548e-07, | |
| "loss": 0.429, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 45.00222222222222, | |
| "grad_norm": 7.332337856292725, | |
| "learning_rate": 7.101323352492982e-07, | |
| "loss": 0.5291, | |
| "step": 13625 | |
| }, | |
| { | |
| "epoch": 45.00380952380952, | |
| "grad_norm": 9.775607109069824, | |
| "learning_rate": 7.017778371875418e-07, | |
| "loss": 0.4441, | |
| "step": 13650 | |
| }, | |
| { | |
| "epoch": 45.00539682539682, | |
| "grad_norm": 6.92523717880249, | |
| "learning_rate": 6.934233391257854e-07, | |
| "loss": 0.4656, | |
| "step": 13675 | |
| }, | |
| { | |
| "epoch": 45.00698412698413, | |
| "grad_norm": 4.137689590454102, | |
| "learning_rate": 6.85068841064029e-07, | |
| "loss": 0.4757, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 45.00857142857143, | |
| "grad_norm": 6.4185638427734375, | |
| "learning_rate": 6.767143430022725e-07, | |
| "loss": 0.3757, | |
| "step": 13725 | |
| }, | |
| { | |
| "epoch": 45.01015873015873, | |
| "grad_norm": 6.079351425170898, | |
| "learning_rate": 6.68359844940516e-07, | |
| "loss": 0.431, | |
| "step": 13750 | |
| }, | |
| { | |
| "epoch": 45.01174603174603, | |
| "grad_norm": 4.2088165283203125, | |
| "learning_rate": 6.600053468787596e-07, | |
| "loss": 0.4354, | |
| "step": 13775 | |
| }, | |
| { | |
| "epoch": 45.013333333333335, | |
| "grad_norm": 3.3986575603485107, | |
| "learning_rate": 6.516508488170031e-07, | |
| "loss": 0.4486, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 45.014920634920635, | |
| "grad_norm": 5.290012359619141, | |
| "learning_rate": 6.432963507552467e-07, | |
| "loss": 0.4227, | |
| "step": 13825 | |
| }, | |
| { | |
| "epoch": 45.016507936507935, | |
| "grad_norm": 8.828485488891602, | |
| "learning_rate": 6.349418526934903e-07, | |
| "loss": 0.4662, | |
| "step": 13850 | |
| }, | |
| { | |
| "epoch": 45.018095238095235, | |
| "grad_norm": 4.196980953216553, | |
| "learning_rate": 6.265873546317337e-07, | |
| "loss": 0.4792, | |
| "step": 13875 | |
| }, | |
| { | |
| "epoch": 46.00050793650794, | |
| "grad_norm": 8.14965534210205, | |
| "learning_rate": 6.182328565699773e-07, | |
| "loss": 0.4178, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 46.00209523809524, | |
| "grad_norm": 4.221451282501221, | |
| "learning_rate": 6.098783585082209e-07, | |
| "loss": 0.5127, | |
| "step": 13925 | |
| }, | |
| { | |
| "epoch": 46.003682539682536, | |
| "grad_norm": 7.106605052947998, | |
| "learning_rate": 6.015238604464644e-07, | |
| "loss": 0.4599, | |
| "step": 13950 | |
| }, | |
| { | |
| "epoch": 46.00526984126984, | |
| "grad_norm": 11.949932098388672, | |
| "learning_rate": 5.93169362384708e-07, | |
| "loss": 0.4757, | |
| "step": 13975 | |
| }, | |
| { | |
| "epoch": 46.00685714285714, | |
| "grad_norm": 7.468497276306152, | |
| "learning_rate": 5.848148643229516e-07, | |
| "loss": 0.4786, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 46.00844444444444, | |
| "grad_norm": 9.329829216003418, | |
| "learning_rate": 5.76460366261195e-07, | |
| "loss": 0.3776, | |
| "step": 14025 | |
| }, | |
| { | |
| "epoch": 46.01003174603174, | |
| "grad_norm": 5.68237829208374, | |
| "learning_rate": 5.681058681994387e-07, | |
| "loss": 0.419, | |
| "step": 14050 | |
| }, | |
| { | |
| "epoch": 46.01161904761905, | |
| "grad_norm": 9.558659553527832, | |
| "learning_rate": 5.597513701376821e-07, | |
| "loss": 0.4381, | |
| "step": 14075 | |
| }, | |
| { | |
| "epoch": 46.01320634920635, | |
| "grad_norm": 4.898390769958496, | |
| "learning_rate": 5.513968720759257e-07, | |
| "loss": 0.4285, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 46.01479365079365, | |
| "grad_norm": 6.284289360046387, | |
| "learning_rate": 5.430423740141693e-07, | |
| "loss": 0.4426, | |
| "step": 14125 | |
| }, | |
| { | |
| "epoch": 46.01638095238095, | |
| "grad_norm": 7.268927574157715, | |
| "learning_rate": 5.346878759524128e-07, | |
| "loss": 0.4454, | |
| "step": 14150 | |
| }, | |
| { | |
| "epoch": 46.017968253968256, | |
| "grad_norm": 8.74441146850586, | |
| "learning_rate": 5.263333778906564e-07, | |
| "loss": 0.4862, | |
| "step": 14175 | |
| }, | |
| { | |
| "epoch": 47.00038095238095, | |
| "grad_norm": 5.302139759063721, | |
| "learning_rate": 5.179788798288999e-07, | |
| "loss": 0.4241, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 47.00196825396825, | |
| "grad_norm": 9.133415222167969, | |
| "learning_rate": 5.096243817671434e-07, | |
| "loss": 0.4938, | |
| "step": 14225 | |
| }, | |
| { | |
| "epoch": 47.00355555555556, | |
| "grad_norm": 11.927820205688477, | |
| "learning_rate": 5.01269883705387e-07, | |
| "loss": 0.4569, | |
| "step": 14250 | |
| }, | |
| { | |
| "epoch": 47.00514285714286, | |
| "grad_norm": 6.953431606292725, | |
| "learning_rate": 4.929153856436306e-07, | |
| "loss": 0.4669, | |
| "step": 14275 | |
| }, | |
| { | |
| "epoch": 47.00673015873016, | |
| "grad_norm": 15.06204891204834, | |
| "learning_rate": 4.845608875818741e-07, | |
| "loss": 0.4956, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 47.00831746031746, | |
| "grad_norm": 6.762052059173584, | |
| "learning_rate": 4.7620638952011765e-07, | |
| "loss": 0.3959, | |
| "step": 14325 | |
| }, | |
| { | |
| "epoch": 47.009904761904764, | |
| "grad_norm": 5.515415668487549, | |
| "learning_rate": 4.678518914583612e-07, | |
| "loss": 0.4077, | |
| "step": 14350 | |
| }, | |
| { | |
| "epoch": 47.011492063492064, | |
| "grad_norm": 5.323752403259277, | |
| "learning_rate": 4.594973933966047e-07, | |
| "loss": 0.4282, | |
| "step": 14375 | |
| }, | |
| { | |
| "epoch": 47.01307936507936, | |
| "grad_norm": 3.4081521034240723, | |
| "learning_rate": 4.511428953348483e-07, | |
| "loss": 0.4318, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 47.01466666666666, | |
| "grad_norm": 10.325825691223145, | |
| "learning_rate": 4.427883972730919e-07, | |
| "loss": 0.443, | |
| "step": 14425 | |
| }, | |
| { | |
| "epoch": 47.01625396825397, | |
| "grad_norm": 7.097803592681885, | |
| "learning_rate": 4.344338992113354e-07, | |
| "loss": 0.4469, | |
| "step": 14450 | |
| }, | |
| { | |
| "epoch": 47.01784126984127, | |
| "grad_norm": 8.34315013885498, | |
| "learning_rate": 4.26079401149579e-07, | |
| "loss": 0.48, | |
| "step": 14475 | |
| }, | |
| { | |
| "epoch": 48.000253968253965, | |
| "grad_norm": 5.317016124725342, | |
| "learning_rate": 4.177249030878225e-07, | |
| "loss": 0.4391, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 48.00184126984127, | |
| "grad_norm": 6.396937370300293, | |
| "learning_rate": 4.0937040502606607e-07, | |
| "loss": 0.4688, | |
| "step": 14525 | |
| }, | |
| { | |
| "epoch": 48.00342857142857, | |
| "grad_norm": 3.8121814727783203, | |
| "learning_rate": 4.0101590696430963e-07, | |
| "loss": 0.4771, | |
| "step": 14550 | |
| }, | |
| { | |
| "epoch": 48.00501587301587, | |
| "grad_norm": 7.068954944610596, | |
| "learning_rate": 3.9266140890255315e-07, | |
| "loss": 0.4657, | |
| "step": 14575 | |
| }, | |
| { | |
| "epoch": 48.00660317460317, | |
| "grad_norm": 3.8409268856048584, | |
| "learning_rate": 3.843069108407967e-07, | |
| "loss": 0.4651, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 48.00819047619048, | |
| "grad_norm": 4.6952714920043945, | |
| "learning_rate": 3.759524127790402e-07, | |
| "loss": 0.4065, | |
| "step": 14625 | |
| }, | |
| { | |
| "epoch": 48.00977777777778, | |
| "grad_norm": 5.573005199432373, | |
| "learning_rate": 3.675979147172838e-07, | |
| "loss": 0.411, | |
| "step": 14650 | |
| }, | |
| { | |
| "epoch": 48.01136507936508, | |
| "grad_norm": 7.740847587585449, | |
| "learning_rate": 3.592434166555274e-07, | |
| "loss": 0.4348, | |
| "step": 14675 | |
| }, | |
| { | |
| "epoch": 48.01295238095238, | |
| "grad_norm": 3.9830234050750732, | |
| "learning_rate": 3.508889185937709e-07, | |
| "loss": 0.437, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 48.014539682539684, | |
| "grad_norm": 4.826086521148682, | |
| "learning_rate": 3.425344205320145e-07, | |
| "loss": 0.4347, | |
| "step": 14725 | |
| }, | |
| { | |
| "epoch": 48.016126984126984, | |
| "grad_norm": 7.815319538116455, | |
| "learning_rate": 3.34179922470258e-07, | |
| "loss": 0.4485, | |
| "step": 14750 | |
| }, | |
| { | |
| "epoch": 48.017714285714284, | |
| "grad_norm": 7.226869583129883, | |
| "learning_rate": 3.2582542440850157e-07, | |
| "loss": 0.4703, | |
| "step": 14775 | |
| }, | |
| { | |
| "epoch": 49.000126984126986, | |
| "grad_norm": 6.489394187927246, | |
| "learning_rate": 3.1747092634674513e-07, | |
| "loss": 0.4377, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 49.001714285714286, | |
| "grad_norm": 7.8261399269104, | |
| "learning_rate": 3.0911642828498865e-07, | |
| "loss": 0.4772, | |
| "step": 14825 | |
| }, | |
| { | |
| "epoch": 49.003301587301586, | |
| "grad_norm": 5.6710004806518555, | |
| "learning_rate": 3.007619302232322e-07, | |
| "loss": 0.4895, | |
| "step": 14850 | |
| }, | |
| { | |
| "epoch": 49.004888888888885, | |
| "grad_norm": 5.4189252853393555, | |
| "learning_rate": 2.924074321614758e-07, | |
| "loss": 0.4575, | |
| "step": 14875 | |
| }, | |
| { | |
| "epoch": 49.00647619047619, | |
| "grad_norm": 3.988452434539795, | |
| "learning_rate": 2.8405293409971934e-07, | |
| "loss": 0.4701, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 49.00806349206349, | |
| "grad_norm": 6.010385990142822, | |
| "learning_rate": 2.7569843603796286e-07, | |
| "loss": 0.4005, | |
| "step": 14925 | |
| }, | |
| { | |
| "epoch": 49.00965079365079, | |
| "grad_norm": 12.048097610473633, | |
| "learning_rate": 2.673439379762064e-07, | |
| "loss": 0.4209, | |
| "step": 14950 | |
| }, | |
| { | |
| "epoch": 49.01123809523809, | |
| "grad_norm": 5.6577839851379395, | |
| "learning_rate": 2.5898943991444994e-07, | |
| "loss": 0.4263, | |
| "step": 14975 | |
| }, | |
| { | |
| "epoch": 49.0128253968254, | |
| "grad_norm": 9.367551803588867, | |
| "learning_rate": 2.506349418526935e-07, | |
| "loss": 0.4412, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 49.0144126984127, | |
| "grad_norm": 6.728636741638184, | |
| "learning_rate": 2.4228044379093707e-07, | |
| "loss": 0.4234, | |
| "step": 15025 | |
| }, | |
| { | |
| "epoch": 49.016, | |
| "grad_norm": 6.2957234382629395, | |
| "learning_rate": 2.339259457291806e-07, | |
| "loss": 0.4565, | |
| "step": 15050 | |
| }, | |
| { | |
| "epoch": 49.0175873015873, | |
| "grad_norm": 10.448657989501953, | |
| "learning_rate": 2.2557144766742415e-07, | |
| "loss": 0.4544, | |
| "step": 15075 | |
| }, | |
| { | |
| "epoch": 49.019174603174605, | |
| "grad_norm": 6.694546699523926, | |
| "learning_rate": 2.172169496056677e-07, | |
| "loss": 0.4277, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 50.0015873015873, | |
| "grad_norm": 5.7494096755981445, | |
| "learning_rate": 2.0886245154391125e-07, | |
| "loss": 0.4951, | |
| "step": 15125 | |
| }, | |
| { | |
| "epoch": 50.00317460317461, | |
| "grad_norm": 6.14343786239624, | |
| "learning_rate": 2.0050795348215482e-07, | |
| "loss": 0.4884, | |
| "step": 15150 | |
| }, | |
| { | |
| "epoch": 50.00476190476191, | |
| "grad_norm": 5.558969974517822, | |
| "learning_rate": 1.9215345542039836e-07, | |
| "loss": 0.4453, | |
| "step": 15175 | |
| }, | |
| { | |
| "epoch": 50.006349206349206, | |
| "grad_norm": 12.143143653869629, | |
| "learning_rate": 1.837989573586419e-07, | |
| "loss": 0.4809, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 50.007936507936506, | |
| "grad_norm": 6.324706554412842, | |
| "learning_rate": 1.7544445929688546e-07, | |
| "loss": 0.4006, | |
| "step": 15225 | |
| }, | |
| { | |
| "epoch": 50.00952380952381, | |
| "grad_norm": 5.624573230743408, | |
| "learning_rate": 1.67089961235129e-07, | |
| "loss": 0.4116, | |
| "step": 15250 | |
| }, | |
| { | |
| "epoch": 50.01111111111111, | |
| "grad_norm": 6.506253242492676, | |
| "learning_rate": 1.5873546317337257e-07, | |
| "loss": 0.4286, | |
| "step": 15275 | |
| }, | |
| { | |
| "epoch": 50.01269841269841, | |
| "grad_norm": 6.208716869354248, | |
| "learning_rate": 1.503809651116161e-07, | |
| "loss": 0.4255, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 50.01428571428571, | |
| "grad_norm": 3.370025634765625, | |
| "learning_rate": 1.4202646704985967e-07, | |
| "loss": 0.4314, | |
| "step": 15325 | |
| }, | |
| { | |
| "epoch": 50.01587301587302, | |
| "grad_norm": 10.119969367980957, | |
| "learning_rate": 1.336719689881032e-07, | |
| "loss": 0.4615, | |
| "step": 15350 | |
| }, | |
| { | |
| "epoch": 50.01746031746032, | |
| "grad_norm": 5.545236110687256, | |
| "learning_rate": 1.2531747092634675e-07, | |
| "loss": 0.4646, | |
| "step": 15375 | |
| }, | |
| { | |
| "epoch": 50.01904761904762, | |
| "grad_norm": 4.946952819824219, | |
| "learning_rate": 1.169629728645903e-07, | |
| "loss": 0.4344, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 51.00146031746032, | |
| "grad_norm": 5.004914283752441, | |
| "learning_rate": 1.0860847480283386e-07, | |
| "loss": 0.4824, | |
| "step": 15425 | |
| }, | |
| { | |
| "epoch": 51.00304761904762, | |
| "grad_norm": 6.189335823059082, | |
| "learning_rate": 1.0025397674107741e-07, | |
| "loss": 0.4991, | |
| "step": 15450 | |
| }, | |
| { | |
| "epoch": 51.00463492063492, | |
| "grad_norm": 7.598541259765625, | |
| "learning_rate": 9.189947867932095e-08, | |
| "loss": 0.4302, | |
| "step": 15475 | |
| }, | |
| { | |
| "epoch": 51.00622222222222, | |
| "grad_norm": 5.034823417663574, | |
| "learning_rate": 8.35449806175645e-08, | |
| "loss": 0.4842, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 51.00780952380953, | |
| "grad_norm": 10.85693359375, | |
| "learning_rate": 7.519048255580805e-08, | |
| "loss": 0.4062, | |
| "step": 15525 | |
| }, | |
| { | |
| "epoch": 51.00939682539683, | |
| "grad_norm": 5.432575702667236, | |
| "learning_rate": 6.68359844940516e-08, | |
| "loss": 0.4042, | |
| "step": 15550 | |
| }, | |
| { | |
| "epoch": 51.01098412698413, | |
| "grad_norm": 6.148897171020508, | |
| "learning_rate": 5.848148643229515e-08, | |
| "loss": 0.4301, | |
| "step": 15575 | |
| }, | |
| { | |
| "epoch": 51.01257142857143, | |
| "grad_norm": 11.638060569763184, | |
| "learning_rate": 5.0126988370538704e-08, | |
| "loss": 0.4318, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 51.014158730158734, | |
| "grad_norm": 9.534034729003906, | |
| "learning_rate": 4.177249030878225e-08, | |
| "loss": 0.4328, | |
| "step": 15625 | |
| }, | |
| { | |
| "epoch": 51.01574603174603, | |
| "grad_norm": 7.127634048461914, | |
| "learning_rate": 3.34179922470258e-08, | |
| "loss": 0.4515, | |
| "step": 15650 | |
| }, | |
| { | |
| "epoch": 51.01733333333333, | |
| "grad_norm": 11.5350923538208, | |
| "learning_rate": 2.5063494185269352e-08, | |
| "loss": 0.472, | |
| "step": 15675 | |
| }, | |
| { | |
| "epoch": 51.01892063492063, | |
| "grad_norm": 2.837473154067993, | |
| "learning_rate": 1.67089961235129e-08, | |
| "loss": 0.4333, | |
| "step": 15700 | |
| } | |
| ], | |
| "logging_steps": 25, | |
| "max_steps": 15750, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 9223372036854775807, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.19361437974528e+18, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |