| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9992242048099301, | |
| "eval_steps": 250, | |
| "global_step": 966, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.001034393586759762, | |
| "grad_norm": 8.6292288425228, | |
| "learning_rate": 3.448275862068965e-08, | |
| "loss": 1.2678, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.002068787173519524, | |
| "grad_norm": 9.965470331972073, | |
| "learning_rate": 6.89655172413793e-08, | |
| "loss": 1.2086, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0031031807602792862, | |
| "grad_norm": 9.557769078016806, | |
| "learning_rate": 1.0344827586206897e-07, | |
| "loss": 1.2208, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.004137574347039048, | |
| "grad_norm": 10.505996002746633, | |
| "learning_rate": 1.379310344827586e-07, | |
| "loss": 1.2857, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.00517196793379881, | |
| "grad_norm": 11.13702223836494, | |
| "learning_rate": 1.7241379310344828e-07, | |
| "loss": 1.2363, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0062063615205585725, | |
| "grad_norm": 10.753583674040131, | |
| "learning_rate": 2.0689655172413793e-07, | |
| "loss": 1.2329, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.0072407551073183345, | |
| "grad_norm": 8.507239109392922, | |
| "learning_rate": 2.413793103448276e-07, | |
| "loss": 1.2656, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.008275148694078097, | |
| "grad_norm": 9.184448420528431, | |
| "learning_rate": 2.758620689655172e-07, | |
| "loss": 1.2489, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.009309542280837859, | |
| "grad_norm": 8.089688592723983, | |
| "learning_rate": 3.103448275862069e-07, | |
| "loss": 1.2894, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.01034393586759762, | |
| "grad_norm": 9.029342018857422, | |
| "learning_rate": 3.4482758620689656e-07, | |
| "loss": 1.219, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.011378329454357383, | |
| "grad_norm": 9.754534746854281, | |
| "learning_rate": 3.793103448275862e-07, | |
| "loss": 1.2412, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.012412723041117145, | |
| "grad_norm": 8.780868813191951, | |
| "learning_rate": 4.1379310344827586e-07, | |
| "loss": 1.2385, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.013447116627876907, | |
| "grad_norm": 7.891016901033683, | |
| "learning_rate": 4.482758620689655e-07, | |
| "loss": 1.2303, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.014481510214636669, | |
| "grad_norm": 9.55833203729864, | |
| "learning_rate": 4.827586206896552e-07, | |
| "loss": 1.2686, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.015515903801396431, | |
| "grad_norm": 8.287737043116875, | |
| "learning_rate": 5.172413793103448e-07, | |
| "loss": 1.2832, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.016550297388156193, | |
| "grad_norm": 8.508603196763787, | |
| "learning_rate": 5.517241379310344e-07, | |
| "loss": 1.2149, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.017584690974915957, | |
| "grad_norm": 7.723019786546963, | |
| "learning_rate": 5.86206896551724e-07, | |
| "loss": 1.206, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.018619084561675717, | |
| "grad_norm": 6.8086707245136004, | |
| "learning_rate": 6.206896551724138e-07, | |
| "loss": 1.2084, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.01965347814843548, | |
| "grad_norm": 6.574198043934514, | |
| "learning_rate": 6.551724137931034e-07, | |
| "loss": 1.266, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.02068787173519524, | |
| "grad_norm": 6.753306446650081, | |
| "learning_rate": 6.896551724137931e-07, | |
| "loss": 1.1796, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.021722265321955005, | |
| "grad_norm": 6.5625976144310485, | |
| "learning_rate": 7.241379310344827e-07, | |
| "loss": 1.1802, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.022756658908714766, | |
| "grad_norm": 6.484152901580707, | |
| "learning_rate": 7.586206896551724e-07, | |
| "loss": 1.1399, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.02379105249547453, | |
| "grad_norm": 4.927660836739744, | |
| "learning_rate": 7.931034482758621e-07, | |
| "loss": 1.1434, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.02482544608223429, | |
| "grad_norm": 4.6650906079822, | |
| "learning_rate": 8.275862068965517e-07, | |
| "loss": 1.1432, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.025859839668994054, | |
| "grad_norm": 4.52088784654716, | |
| "learning_rate": 8.620689655172412e-07, | |
| "loss": 1.1569, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.026894233255753814, | |
| "grad_norm": 4.2461305874656725, | |
| "learning_rate": 8.96551724137931e-07, | |
| "loss": 1.104, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.027928626842513578, | |
| "grad_norm": 4.799664537780579, | |
| "learning_rate": 9.310344827586206e-07, | |
| "loss": 1.1236, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.028963020429273338, | |
| "grad_norm": 4.145931715979601, | |
| "learning_rate": 9.655172413793103e-07, | |
| "loss": 1.1032, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.029997414016033102, | |
| "grad_norm": 3.72727705103677, | |
| "learning_rate": 1e-06, | |
| "loss": 1.1618, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.031031807602792862, | |
| "grad_norm": 4.228191511347933, | |
| "learning_rate": 9.999971896515835e-07, | |
| "loss": 1.0839, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.032066201189552626, | |
| "grad_norm": 3.2430072064027056, | |
| "learning_rate": 9.999887586379264e-07, | |
| "loss": 1.1732, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.033100594776312386, | |
| "grad_norm": 3.1770055169594844, | |
| "learning_rate": 9.999747070538048e-07, | |
| "loss": 1.098, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.03413498836307215, | |
| "grad_norm": 3.103050550687401, | |
| "learning_rate": 9.999550350571783e-07, | |
| "loss": 1.014, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.035169381949831914, | |
| "grad_norm": 2.8063688949479215, | |
| "learning_rate": 9.999297428691876e-07, | |
| "loss": 1.1059, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.036203775536591674, | |
| "grad_norm": 3.0173849387755913, | |
| "learning_rate": 9.99898830774152e-07, | |
| "loss": 1.0261, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.037238169123351435, | |
| "grad_norm": 3.095904156733884, | |
| "learning_rate": 9.998622991195666e-07, | |
| "loss": 1.012, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.038272562710111195, | |
| "grad_norm": 3.277781044174708, | |
| "learning_rate": 9.99820148316098e-07, | |
| "loss": 0.9564, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.03930695629687096, | |
| "grad_norm": 2.649673884083575, | |
| "learning_rate": 9.997723788375802e-07, | |
| "loss": 1.0233, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.04034134988363072, | |
| "grad_norm": 2.6905624888489013, | |
| "learning_rate": 9.997189912210084e-07, | |
| "loss": 1.0046, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.04137574347039048, | |
| "grad_norm": 2.582686418113294, | |
| "learning_rate": 9.99659986066534e-07, | |
| "loss": 1.0407, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.04241013705715024, | |
| "grad_norm": 2.516684573523411, | |
| "learning_rate": 9.995953640374573e-07, | |
| "loss": 1.0513, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.04344453064391001, | |
| "grad_norm": 2.4520138256366106, | |
| "learning_rate": 9.995251258602197e-07, | |
| "loss": 1.0313, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.04447892423066977, | |
| "grad_norm": 2.3158001801406725, | |
| "learning_rate": 9.994492723243964e-07, | |
| "loss": 1.0006, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.04551331781742953, | |
| "grad_norm": 2.091689299453111, | |
| "learning_rate": 9.993678042826867e-07, | |
| "loss": 1.0325, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.04654771140418929, | |
| "grad_norm": 2.2551948550368515, | |
| "learning_rate": 9.99280722650905e-07, | |
| "loss": 0.8933, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.04758210499094906, | |
| "grad_norm": 2.040969732809106, | |
| "learning_rate": 9.991880284079703e-07, | |
| "loss": 1.0254, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.04861649857770882, | |
| "grad_norm": 1.9479976337586296, | |
| "learning_rate": 9.99089722595895e-07, | |
| "loss": 0.9235, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.04965089216446858, | |
| "grad_norm": 2.1200199623937017, | |
| "learning_rate": 9.989858063197734e-07, | |
| "loss": 0.936, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.05068528575122834, | |
| "grad_norm": 2.071659076247091, | |
| "learning_rate": 9.988762807477694e-07, | |
| "loss": 0.9003, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.05171967933798811, | |
| "grad_norm": 2.197778609307642, | |
| "learning_rate": 9.987611471111026e-07, | |
| "loss": 0.9034, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.05275407292474787, | |
| "grad_norm": 2.2436321244849773, | |
| "learning_rate": 9.98640406704036e-07, | |
| "loss": 0.9976, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.05378846651150763, | |
| "grad_norm": 2.0524774761136144, | |
| "learning_rate": 9.985140608838603e-07, | |
| "loss": 0.9427, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.05482286009826739, | |
| "grad_norm": 2.0328934281600515, | |
| "learning_rate": 9.983821110708779e-07, | |
| "loss": 1.0018, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.055857253685027156, | |
| "grad_norm": 2.0713056621109778, | |
| "learning_rate": 9.98244558748389e-07, | |
| "loss": 0.9113, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.056891647271786916, | |
| "grad_norm": 2.0587340262014964, | |
| "learning_rate": 9.981014054626735e-07, | |
| "loss": 0.9647, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.057926040858546676, | |
| "grad_norm": 2.0470562113606436, | |
| "learning_rate": 9.979526528229736e-07, | |
| "loss": 0.9512, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.05896043444530644, | |
| "grad_norm": 1.8754225323309222, | |
| "learning_rate": 9.977983025014763e-07, | |
| "loss": 0.8854, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.059994828032066204, | |
| "grad_norm": 1.7225213006729667, | |
| "learning_rate": 9.976383562332945e-07, | |
| "loss": 0.8417, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.061029221618825964, | |
| "grad_norm": 1.8264006977633622, | |
| "learning_rate": 9.97472815816447e-07, | |
| "loss": 0.9161, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.062063615205585725, | |
| "grad_norm": 1.70040844523006, | |
| "learning_rate": 9.973016831118389e-07, | |
| "loss": 0.9553, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.06309800879234549, | |
| "grad_norm": 1.7493516617790992, | |
| "learning_rate": 9.9712496004324e-07, | |
| "loss": 0.9578, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.06413240237910525, | |
| "grad_norm": 1.6371103264137667, | |
| "learning_rate": 9.969426485972644e-07, | |
| "loss": 0.888, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.06516679596586501, | |
| "grad_norm": 1.753188214228944, | |
| "learning_rate": 9.967547508233464e-07, | |
| "loss": 0.9204, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.06620118955262477, | |
| "grad_norm": 1.7596020708991458, | |
| "learning_rate": 9.965612688337193e-07, | |
| "loss": 0.9213, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.06723558313938453, | |
| "grad_norm": 1.573070708926807, | |
| "learning_rate": 9.963622048033898e-07, | |
| "loss": 0.8869, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.0682699767261443, | |
| "grad_norm": 1.5038762023933547, | |
| "learning_rate": 9.961575609701152e-07, | |
| "loss": 0.8869, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.06930437031290405, | |
| "grad_norm": 1.555573739130485, | |
| "learning_rate": 9.959473396343777e-07, | |
| "loss": 0.8727, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.07033876389966383, | |
| "grad_norm": 1.5595913743322327, | |
| "learning_rate": 9.957315431593576e-07, | |
| "loss": 0.9223, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.07137315748642359, | |
| "grad_norm": 1.5831257444023528, | |
| "learning_rate": 9.955101739709083e-07, | |
| "loss": 0.8241, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.07240755107318335, | |
| "grad_norm": 1.656941667215278, | |
| "learning_rate": 9.95283234557528e-07, | |
| "loss": 0.8372, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.07344194465994311, | |
| "grad_norm": 1.493239400938567, | |
| "learning_rate": 9.95050727470332e-07, | |
| "loss": 0.9673, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.07447633824670287, | |
| "grad_norm": 1.5660186375984284, | |
| "learning_rate": 9.948126553230241e-07, | |
| "loss": 0.8078, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.07551073183346263, | |
| "grad_norm": 1.532085948289895, | |
| "learning_rate": 9.945690207918666e-07, | |
| "loss": 0.8415, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.07654512542022239, | |
| "grad_norm": 1.606555072425888, | |
| "learning_rate": 9.943198266156516e-07, | |
| "loss": 0.9266, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.07757951900698215, | |
| "grad_norm": 1.4962374490115282, | |
| "learning_rate": 9.940650755956685e-07, | |
| "loss": 0.8566, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.07861391259374192, | |
| "grad_norm": 4.177550066822741, | |
| "learning_rate": 9.938047705956745e-07, | |
| "loss": 0.9019, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.07964830618050169, | |
| "grad_norm": 1.5297558579983996, | |
| "learning_rate": 9.935389145418597e-07, | |
| "loss": 0.7899, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.08068269976726145, | |
| "grad_norm": 1.5932323294464619, | |
| "learning_rate": 9.932675104228176e-07, | |
| "loss": 0.951, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.0817170933540212, | |
| "grad_norm": 1.738379806045725, | |
| "learning_rate": 9.92990561289508e-07, | |
| "loss": 0.8636, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.08275148694078097, | |
| "grad_norm": 1.5016475027288272, | |
| "learning_rate": 9.927080702552253e-07, | |
| "loss": 0.8775, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.08378588052754073, | |
| "grad_norm": 1.5232156197455422, | |
| "learning_rate": 9.924200404955626e-07, | |
| "loss": 0.8628, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.08482027411430049, | |
| "grad_norm": 1.561366999478242, | |
| "learning_rate": 9.921264752483761e-07, | |
| "loss": 0.949, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.08585466770106025, | |
| "grad_norm": 1.6475916793885796, | |
| "learning_rate": 9.918273778137476e-07, | |
| "loss": 0.914, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.08688906128782002, | |
| "grad_norm": 1.7258587410383575, | |
| "learning_rate": 9.915227515539494e-07, | |
| "loss": 0.8488, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.08792345487457978, | |
| "grad_norm": 1.527584345127291, | |
| "learning_rate": 9.912125998934055e-07, | |
| "loss": 0.8263, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.08895784846133954, | |
| "grad_norm": 1.5781029688469563, | |
| "learning_rate": 9.908969263186525e-07, | |
| "loss": 0.8773, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.0899922420480993, | |
| "grad_norm": 1.5065790590265955, | |
| "learning_rate": 9.905757343783012e-07, | |
| "loss": 0.9064, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.09102663563485906, | |
| "grad_norm": 1.5064854183346637, | |
| "learning_rate": 9.902490276829971e-07, | |
| "loss": 0.9239, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.09206102922161882, | |
| "grad_norm": 1.497767962948581, | |
| "learning_rate": 9.899168099053782e-07, | |
| "loss": 0.8906, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.09309542280837858, | |
| "grad_norm": 1.4788534876166923, | |
| "learning_rate": 9.89579084780036e-07, | |
| "loss": 0.8839, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.09412981639513834, | |
| "grad_norm": 1.5874151602991722, | |
| "learning_rate": 9.89235856103471e-07, | |
| "loss": 0.8322, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.09516420998189812, | |
| "grad_norm": 1.5264816644556816, | |
| "learning_rate": 9.88887127734052e-07, | |
| "loss": 0.8938, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.09619860356865788, | |
| "grad_norm": 1.4598708502748197, | |
| "learning_rate": 9.885329035919722e-07, | |
| "loss": 0.8806, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.09723299715541764, | |
| "grad_norm": 1.532930840270963, | |
| "learning_rate": 9.881731876592044e-07, | |
| "loss": 0.8895, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.0982673907421774, | |
| "grad_norm": 1.576219389026944, | |
| "learning_rate": 9.87807983979457e-07, | |
| "loss": 0.8796, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.09930178432893716, | |
| "grad_norm": 1.4596941675734272, | |
| "learning_rate": 9.874372966581283e-07, | |
| "loss": 0.8876, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.10033617791569692, | |
| "grad_norm": 1.4106584773083541, | |
| "learning_rate": 9.870611298622604e-07, | |
| "loss": 0.8888, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.10137057150245668, | |
| "grad_norm": 1.5172665864057402, | |
| "learning_rate": 9.866794878204925e-07, | |
| "loss": 0.8859, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.10240496508921644, | |
| "grad_norm": 1.4604425372411876, | |
| "learning_rate": 9.862923748230128e-07, | |
| "loss": 0.8692, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.10343935867597621, | |
| "grad_norm": 1.6260768941682198, | |
| "learning_rate": 9.858997952215111e-07, | |
| "loss": 0.754, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.10447375226273597, | |
| "grad_norm": 1.6604166901689625, | |
| "learning_rate": 9.855017534291292e-07, | |
| "loss": 0.8731, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.10550814584949574, | |
| "grad_norm": 1.4165233772189687, | |
| "learning_rate": 9.850982539204114e-07, | |
| "loss": 0.8702, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.1065425394362555, | |
| "grad_norm": 1.4715082552484342, | |
| "learning_rate": 9.846893012312547e-07, | |
| "loss": 0.8651, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.10757693302301526, | |
| "grad_norm": 1.3639239343579785, | |
| "learning_rate": 9.842748999588573e-07, | |
| "loss": 0.8264, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.10861132660977502, | |
| "grad_norm": 1.450213076046572, | |
| "learning_rate": 9.838550547616669e-07, | |
| "loss": 0.8914, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.10964572019653478, | |
| "grad_norm": 1.4219414373923307, | |
| "learning_rate": 9.83429770359329e-07, | |
| "loss": 0.8777, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.11068011378329454, | |
| "grad_norm": 1.4913067543289555, | |
| "learning_rate": 9.829990515326323e-07, | |
| "loss": 0.8833, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.11171450737005431, | |
| "grad_norm": 1.5163182386548368, | |
| "learning_rate": 9.825629031234573e-07, | |
| "loss": 0.8701, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.11274890095681407, | |
| "grad_norm": 1.4925830244211415, | |
| "learning_rate": 9.821213300347196e-07, | |
| "loss": 0.8438, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.11378329454357383, | |
| "grad_norm": 1.4928661561665226, | |
| "learning_rate": 9.816743372303166e-07, | |
| "loss": 0.8331, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.11481768813033359, | |
| "grad_norm": 1.5585360270293855, | |
| "learning_rate": 9.812219297350696e-07, | |
| "loss": 0.8159, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.11585208171709335, | |
| "grad_norm": 1.7332087385191335, | |
| "learning_rate": 9.807641126346702e-07, | |
| "loss": 0.8733, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.11688647530385311, | |
| "grad_norm": 1.4450294760262492, | |
| "learning_rate": 9.8030089107562e-07, | |
| "loss": 0.8594, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.11792086889061287, | |
| "grad_norm": 1.4452495342690843, | |
| "learning_rate": 9.798322702651754e-07, | |
| "loss": 0.826, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.11895526247737263, | |
| "grad_norm": 1.5521749322974772, | |
| "learning_rate": 9.793582554712872e-07, | |
| "loss": 0.857, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.11998965606413241, | |
| "grad_norm": 1.4440567137100666, | |
| "learning_rate": 9.788788520225419e-07, | |
| "loss": 0.8796, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.12102404965089217, | |
| "grad_norm": 1.5515099256914817, | |
| "learning_rate": 9.783940653081031e-07, | |
| "loss": 0.8716, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.12205844323765193, | |
| "grad_norm": 1.4211278874935114, | |
| "learning_rate": 9.779039007776486e-07, | |
| "loss": 0.8354, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.12309283682441169, | |
| "grad_norm": 1.560974467098542, | |
| "learning_rate": 9.77408363941311e-07, | |
| "loss": 0.7504, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.12412723041117145, | |
| "grad_norm": 1.3929067590589914, | |
| "learning_rate": 9.769074603696152e-07, | |
| "loss": 0.7936, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.1251616239979312, | |
| "grad_norm": 1.630654783938557, | |
| "learning_rate": 9.764011956934151e-07, | |
| "loss": 0.8687, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.12619601758469098, | |
| "grad_norm": 1.4321536586745944, | |
| "learning_rate": 9.758895756038312e-07, | |
| "loss": 0.8551, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.12723041117145073, | |
| "grad_norm": 1.5131276901293105, | |
| "learning_rate": 9.753726058521866e-07, | |
| "loss": 0.8551, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.1282648047582105, | |
| "grad_norm": 1.4914251931467337, | |
| "learning_rate": 9.748502922499417e-07, | |
| "loss": 0.868, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.12929919834497025, | |
| "grad_norm": 1.5289397758370802, | |
| "learning_rate": 9.74322640668629e-07, | |
| "loss": 0.8596, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.13033359193173003, | |
| "grad_norm": 1.3787346083280774, | |
| "learning_rate": 9.737896570397883e-07, | |
| "loss": 0.8415, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.13136798551848977, | |
| "grad_norm": 1.4073683893866338, | |
| "learning_rate": 9.732513473548978e-07, | |
| "loss": 0.9721, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.13240237910524955, | |
| "grad_norm": 1.4671303533720539, | |
| "learning_rate": 9.727077176653088e-07, | |
| "loss": 0.8142, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.13343677269200932, | |
| "grad_norm": 1.3755208641623216, | |
| "learning_rate": 9.721587740821765e-07, | |
| "loss": 0.7481, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.13447116627876907, | |
| "grad_norm": 1.5219723932594529, | |
| "learning_rate": 9.716045227763923e-07, | |
| "loss": 0.849, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.13550555986552884, | |
| "grad_norm": 1.6193849304174701, | |
| "learning_rate": 9.710449699785129e-07, | |
| "loss": 0.8231, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.1365399534522886, | |
| "grad_norm": 1.5038191885737227, | |
| "learning_rate": 9.704801219786914e-07, | |
| "loss": 0.8724, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.13757434703904836, | |
| "grad_norm": 1.5171932371668595, | |
| "learning_rate": 9.69909985126607e-07, | |
| "loss": 0.8876, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.1386087406258081, | |
| "grad_norm": 1.639050897024366, | |
| "learning_rate": 9.693345658313922e-07, | |
| "loss": 0.8225, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.13964313421256788, | |
| "grad_norm": 1.4253407372370313, | |
| "learning_rate": 9.68753870561562e-07, | |
| "loss": 0.8217, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.14067752779932766, | |
| "grad_norm": 1.4484134283181138, | |
| "learning_rate": 9.681679058449402e-07, | |
| "loss": 0.8276, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.1417119213860874, | |
| "grad_norm": 1.5307439468467967, | |
| "learning_rate": 9.675766782685873e-07, | |
| "loss": 0.7726, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.14274631497284718, | |
| "grad_norm": 1.5029635121448162, | |
| "learning_rate": 9.669801944787248e-07, | |
| "loss": 0.8393, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.14378070855960692, | |
| "grad_norm": 1.4821240652548662, | |
| "learning_rate": 9.663784611806623e-07, | |
| "loss": 0.8416, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.1448151021463667, | |
| "grad_norm": 1.398531670616934, | |
| "learning_rate": 9.657714851387202e-07, | |
| "loss": 0.8731, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.14584949573312644, | |
| "grad_norm": 1.3516541685959986, | |
| "learning_rate": 9.651592731761554e-07, | |
| "loss": 0.8472, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.14688388931988622, | |
| "grad_norm": 1.4270057997624648, | |
| "learning_rate": 9.645418321750834e-07, | |
| "loss": 0.8713, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.147918282906646, | |
| "grad_norm": 1.601745930929728, | |
| "learning_rate": 9.639191690764017e-07, | |
| "loss": 0.7939, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.14895267649340574, | |
| "grad_norm": 1.3960757900728265, | |
| "learning_rate": 9.632912908797114e-07, | |
| "loss": 0.8521, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.1499870700801655, | |
| "grad_norm": 1.4935521251861215, | |
| "learning_rate": 9.626582046432384e-07, | |
| "loss": 0.7767, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.15102146366692526, | |
| "grad_norm": 1.5020245793618436, | |
| "learning_rate": 9.62019917483754e-07, | |
| "loss": 0.8651, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.15205585725368503, | |
| "grad_norm": 1.480439042014831, | |
| "learning_rate": 9.61376436576496e-07, | |
| "loss": 0.8904, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.15309025084044478, | |
| "grad_norm": 1.4559411278849566, | |
| "learning_rate": 9.607277691550862e-07, | |
| "loss": 0.8043, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.15412464442720455, | |
| "grad_norm": 1.5181539446896193, | |
| "learning_rate": 9.600739225114505e-07, | |
| "loss": 0.966, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.1551590380139643, | |
| "grad_norm": 1.4439277868169944, | |
| "learning_rate": 9.594149039957364e-07, | |
| "loss": 0.898, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.15619343160072408, | |
| "grad_norm": 1.4043143638840228, | |
| "learning_rate": 9.587507210162305e-07, | |
| "loss": 0.8523, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.15722782518748385, | |
| "grad_norm": 1.5087316460749116, | |
| "learning_rate": 9.580813810392753e-07, | |
| "loss": 0.9122, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.1582622187742436, | |
| "grad_norm": 1.395564260639285, | |
| "learning_rate": 9.57406891589185e-07, | |
| "loss": 0.8463, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.15929661236100337, | |
| "grad_norm": 1.372587812050779, | |
| "learning_rate": 9.567272602481606e-07, | |
| "loss": 0.8825, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.16033100594776312, | |
| "grad_norm": 1.3868157787434698, | |
| "learning_rate": 9.560424946562058e-07, | |
| "loss": 0.8696, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.1613653995345229, | |
| "grad_norm": 1.3780239286456595, | |
| "learning_rate": 9.553526025110404e-07, | |
| "loss": 0.847, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.16239979312128264, | |
| "grad_norm": 1.4529391659504043, | |
| "learning_rate": 9.546575915680132e-07, | |
| "loss": 0.8505, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.1634341867080424, | |
| "grad_norm": 1.4218985262426207, | |
| "learning_rate": 9.539574696400163e-07, | |
| "loss": 0.8353, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.16446858029480219, | |
| "grad_norm": 1.4970269220621746, | |
| "learning_rate": 9.532522445973954e-07, | |
| "loss": 0.8988, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.16550297388156193, | |
| "grad_norm": 1.4045011239037704, | |
| "learning_rate": 9.525419243678631e-07, | |
| "loss": 0.8269, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.1665373674683217, | |
| "grad_norm": 1.83508753779999, | |
| "learning_rate": 9.518265169364088e-07, | |
| "loss": 0.8065, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.16757176105508145, | |
| "grad_norm": 1.5713407513622097, | |
| "learning_rate": 9.511060303452089e-07, | |
| "loss": 0.8709, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.16860615464184123, | |
| "grad_norm": 1.5075919814598666, | |
| "learning_rate": 9.503804726935367e-07, | |
| "loss": 0.8509, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.16964054822860097, | |
| "grad_norm": 1.4008248280416542, | |
| "learning_rate": 9.496498521376716e-07, | |
| "loss": 0.8033, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.17067494181536075, | |
| "grad_norm": 1.493900900080009, | |
| "learning_rate": 9.48914176890807e-07, | |
| "loss": 0.8529, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.1717093354021205, | |
| "grad_norm": 1.4588742471155032, | |
| "learning_rate": 9.481734552229576e-07, | |
| "loss": 0.8456, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.17274372898888027, | |
| "grad_norm": 1.4977346708119559, | |
| "learning_rate": 9.474276954608677e-07, | |
| "loss": 0.8221, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.17377812257564004, | |
| "grad_norm": 1.3378999488708223, | |
| "learning_rate": 9.466769059879159e-07, | |
| "loss": 0.8695, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.1748125161623998, | |
| "grad_norm": 1.5341879482844099, | |
| "learning_rate": 9.459210952440225e-07, | |
| "loss": 0.8555, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.17584690974915956, | |
| "grad_norm": 1.9440085900025996, | |
| "learning_rate": 9.451602717255536e-07, | |
| "loss": 0.8553, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.1768813033359193, | |
| "grad_norm": 1.4065858361697658, | |
| "learning_rate": 9.443944439852258e-07, | |
| "loss": 0.8326, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.17791569692267908, | |
| "grad_norm": 1.412142896020121, | |
| "learning_rate": 9.436236206320103e-07, | |
| "loss": 0.8313, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.17895009050943883, | |
| "grad_norm": 1.603723555718994, | |
| "learning_rate": 9.428478103310356e-07, | |
| "loss": 0.8453, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.1799844840961986, | |
| "grad_norm": 1.4116909406648754, | |
| "learning_rate": 9.420670218034912e-07, | |
| "loss": 0.8699, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.18101887768295838, | |
| "grad_norm": 1.429645805583994, | |
| "learning_rate": 9.412812638265278e-07, | |
| "loss": 0.8765, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.18205327126971813, | |
| "grad_norm": 1.4291844674737417, | |
| "learning_rate": 9.404905452331604e-07, | |
| "loss": 0.8081, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.1830876648564779, | |
| "grad_norm": 1.4125394882764855, | |
| "learning_rate": 9.39694874912168e-07, | |
| "loss": 0.8263, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.18412205844323765, | |
| "grad_norm": 1.6128653163626114, | |
| "learning_rate": 9.38894261807994e-07, | |
| "loss": 0.8593, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.18515645202999742, | |
| "grad_norm": 1.3644358859856955, | |
| "learning_rate": 9.380887149206451e-07, | |
| "loss": 0.8302, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.18619084561675717, | |
| "grad_norm": 1.3789524033916745, | |
| "learning_rate": 9.372782433055913e-07, | |
| "loss": 0.8397, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.18722523920351694, | |
| "grad_norm": 1.388287044768484, | |
| "learning_rate": 9.364628560736631e-07, | |
| "loss": 0.8488, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.1882596327902767, | |
| "grad_norm": 1.5125059312677152, | |
| "learning_rate": 9.356425623909492e-07, | |
| "loss": 0.81, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.18929402637703646, | |
| "grad_norm": 1.5007203238927405, | |
| "learning_rate": 9.348173714786937e-07, | |
| "loss": 0.8582, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.19032841996379624, | |
| "grad_norm": 1.4653792691805887, | |
| "learning_rate": 9.339872926131929e-07, | |
| "loss": 0.8029, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.19136281355055598, | |
| "grad_norm": 1.4243996006046065, | |
| "learning_rate": 9.331523351256895e-07, | |
| "loss": 0.8868, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.19239720713731576, | |
| "grad_norm": 1.3983462780958804, | |
| "learning_rate": 9.3231250840227e-07, | |
| "loss": 0.8827, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.1934316007240755, | |
| "grad_norm": 1.5337317004559414, | |
| "learning_rate": 9.314678218837569e-07, | |
| "loss": 0.8326, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.19446599431083528, | |
| "grad_norm": 1.5167484648519818, | |
| "learning_rate": 9.306182850656037e-07, | |
| "loss": 0.888, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.19550038789759502, | |
| "grad_norm": 1.4646304843997042, | |
| "learning_rate": 9.297639074977885e-07, | |
| "loss": 0.8818, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.1965347814843548, | |
| "grad_norm": 1.4596096989743117, | |
| "learning_rate": 9.289046987847056e-07, | |
| "loss": 0.863, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.19756917507111457, | |
| "grad_norm": 1.637336186140239, | |
| "learning_rate": 9.280406685850586e-07, | |
| "loss": 0.8421, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.19860356865787432, | |
| "grad_norm": 1.5007076440182832, | |
| "learning_rate": 9.271718266117511e-07, | |
| "loss": 0.8244, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.1996379622446341, | |
| "grad_norm": 1.4703769666137168, | |
| "learning_rate": 9.262981826317776e-07, | |
| "loss": 0.8129, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.20067235583139384, | |
| "grad_norm": 1.4100738457683402, | |
| "learning_rate": 9.254197464661142e-07, | |
| "loss": 0.837, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.2017067494181536, | |
| "grad_norm": 1.4181987567753875, | |
| "learning_rate": 9.245365279896075e-07, | |
| "loss": 0.7768, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.20274114300491336, | |
| "grad_norm": 1.371765935747676, | |
| "learning_rate": 9.236485371308641e-07, | |
| "loss": 0.8525, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.20377553659167313, | |
| "grad_norm": 1.5701329432776785, | |
| "learning_rate": 9.227557838721389e-07, | |
| "loss": 0.8314, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.20480993017843288, | |
| "grad_norm": 1.505123558108738, | |
| "learning_rate": 9.218582782492226e-07, | |
| "loss": 0.8734, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.20584432376519265, | |
| "grad_norm": 1.4697003501656294, | |
| "learning_rate": 9.209560303513295e-07, | |
| "loss": 0.7869, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.20687871735195243, | |
| "grad_norm": 1.5020578161277975, | |
| "learning_rate": 9.200490503209831e-07, | |
| "loss": 0.744, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.20791311093871218, | |
| "grad_norm": 1.4644359084199055, | |
| "learning_rate": 9.19137348353903e-07, | |
| "loss": 0.8221, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.20894750452547195, | |
| "grad_norm": 1.6041520442165111, | |
| "learning_rate": 9.1822093469889e-07, | |
| "loss": 0.7807, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.2099818981122317, | |
| "grad_norm": 1.5080190934947244, | |
| "learning_rate": 9.172998196577109e-07, | |
| "loss": 0.7861, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.21101629169899147, | |
| "grad_norm": 1.4251098397714579, | |
| "learning_rate": 9.163740135849822e-07, | |
| "loss": 0.8149, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.21205068528575122, | |
| "grad_norm": 1.4840100339855091, | |
| "learning_rate": 9.154435268880546e-07, | |
| "loss": 0.8141, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.213085078872511, | |
| "grad_norm": 1.4086193179540571, | |
| "learning_rate": 9.145083700268954e-07, | |
| "loss": 0.7382, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.21411947245927077, | |
| "grad_norm": 1.5961333066398782, | |
| "learning_rate": 9.135685535139708e-07, | |
| "loss": 0.8622, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.2151538660460305, | |
| "grad_norm": 1.5769590087575716, | |
| "learning_rate": 9.126240879141285e-07, | |
| "loss": 0.7436, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.2161882596327903, | |
| "grad_norm": 1.4374590835362402, | |
| "learning_rate": 9.116749838444777e-07, | |
| "loss": 0.8197, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.21722265321955003, | |
| "grad_norm": 1.4602830746246855, | |
| "learning_rate": 9.107212519742713e-07, | |
| "loss": 0.8294, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.2182570468063098, | |
| "grad_norm": 1.9265155504006533, | |
| "learning_rate": 9.097629030247846e-07, | |
| "loss": 0.851, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.21929144039306955, | |
| "grad_norm": 1.4215257148548188, | |
| "learning_rate": 9.087999477691952e-07, | |
| "loss": 0.8514, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.22032583397982933, | |
| "grad_norm": 1.550246735280128, | |
| "learning_rate": 9.078323970324624e-07, | |
| "loss": 0.7752, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.22136022756658907, | |
| "grad_norm": 1.5941164190887698, | |
| "learning_rate": 9.068602616912049e-07, | |
| "loss": 0.755, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.22239462115334885, | |
| "grad_norm": 1.458893558971683, | |
| "learning_rate": 9.058835526735786e-07, | |
| "loss": 0.9322, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.22342901474010862, | |
| "grad_norm": 1.4216021171016564, | |
| "learning_rate": 9.049022809591544e-07, | |
| "loss": 0.8629, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.22446340832686837, | |
| "grad_norm": 1.4354570668220092, | |
| "learning_rate": 9.039164575787936e-07, | |
| "loss": 0.7738, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.22549780191362814, | |
| "grad_norm": 1.5596991517575327, | |
| "learning_rate": 9.029260936145251e-07, | |
| "loss": 0.7552, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.2265321955003879, | |
| "grad_norm": 1.4921095313390973, | |
| "learning_rate": 9.019312001994201e-07, | |
| "loss": 0.94, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.22756658908714766, | |
| "grad_norm": 1.4878683464339324, | |
| "learning_rate": 9.00931788517467e-07, | |
| "loss": 0.8634, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.2286009826739074, | |
| "grad_norm": 1.5494236621641782, | |
| "learning_rate": 8.99927869803446e-07, | |
| "loss": 0.8201, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.22963537626066718, | |
| "grad_norm": 1.5721897259781508, | |
| "learning_rate": 8.989194553428026e-07, | |
| "loss": 0.862, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.23066976984742696, | |
| "grad_norm": 1.5743137131561835, | |
| "learning_rate": 8.979065564715208e-07, | |
| "loss": 0.8648, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.2317041634341867, | |
| "grad_norm": 1.6200870688905828, | |
| "learning_rate": 8.968891845759954e-07, | |
| "loss": 0.8137, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.23273855702094648, | |
| "grad_norm": 1.5184600989578962, | |
| "learning_rate": 8.958673510929046e-07, | |
| "loss": 0.8433, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.23377295060770623, | |
| "grad_norm": 1.4495493944560627, | |
| "learning_rate": 8.948410675090806e-07, | |
| "loss": 0.8146, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.234807344194466, | |
| "grad_norm": 1.435317674877896, | |
| "learning_rate": 8.938103453613813e-07, | |
| "loss": 0.7553, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.23584173778122575, | |
| "grad_norm": 1.5925789879452341, | |
| "learning_rate": 8.927751962365602e-07, | |
| "loss": 0.8341, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.23687613136798552, | |
| "grad_norm": 1.6220535235529296, | |
| "learning_rate": 8.917356317711358e-07, | |
| "loss": 0.7439, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.23791052495474527, | |
| "grad_norm": 1.577717682068361, | |
| "learning_rate": 8.906916636512618e-07, | |
| "loss": 0.7855, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.23894491854150504, | |
| "grad_norm": 1.4036946003756745, | |
| "learning_rate": 8.896433036125949e-07, | |
| "loss": 0.8587, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.23997931212826482, | |
| "grad_norm": 1.4943415565579108, | |
| "learning_rate": 8.885905634401627e-07, | |
| "loss": 0.8976, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.24101370571502456, | |
| "grad_norm": 1.3960704331335922, | |
| "learning_rate": 8.87533454968232e-07, | |
| "loss": 0.8369, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.24204809930178434, | |
| "grad_norm": 1.5598461149123333, | |
| "learning_rate": 8.864719900801754e-07, | |
| "loss": 0.8452, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.24308249288854408, | |
| "grad_norm": 1.4872658713261582, | |
| "learning_rate": 8.854061807083375e-07, | |
| "loss": 0.834, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.24411688647530386, | |
| "grad_norm": 1.4079953936618959, | |
| "learning_rate": 8.84336038833901e-07, | |
| "loss": 0.782, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.2451512800620636, | |
| "grad_norm": 1.440440157835943, | |
| "learning_rate": 8.83261576486752e-07, | |
| "loss": 0.8122, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.24618567364882338, | |
| "grad_norm": 1.5533288289550848, | |
| "learning_rate": 8.821828057453446e-07, | |
| "loss": 0.8182, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.24722006723558315, | |
| "grad_norm": 1.501566893111836, | |
| "learning_rate": 8.810997387365655e-07, | |
| "loss": 0.8444, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.2482544608223429, | |
| "grad_norm": 1.3559085480906514, | |
| "learning_rate": 8.800123876355975e-07, | |
| "loss": 0.8141, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.24928885440910267, | |
| "grad_norm": 1.3966163316985138, | |
| "learning_rate": 8.789207646657821e-07, | |
| "loss": 0.815, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.2503232479958624, | |
| "grad_norm": 1.372097559732323, | |
| "learning_rate": 8.778248820984829e-07, | |
| "loss": 0.8457, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.2513576415826222, | |
| "grad_norm": 1.5224963572020422, | |
| "learning_rate": 8.767247522529472e-07, | |
| "loss": 0.7814, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.25239203516938197, | |
| "grad_norm": 1.5088272668439549, | |
| "learning_rate": 8.756203874961678e-07, | |
| "loss": 0.8677, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.2534264287561417, | |
| "grad_norm": 1.50352176370755, | |
| "learning_rate": 8.745118002427438e-07, | |
| "loss": 0.8021, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.25446082234290146, | |
| "grad_norm": 1.395057792466667, | |
| "learning_rate": 8.733990029547407e-07, | |
| "loss": 0.7897, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.25549521592966123, | |
| "grad_norm": 1.477519024927662, | |
| "learning_rate": 8.722820081415508e-07, | |
| "loss": 0.7512, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.256529609516421, | |
| "grad_norm": 1.5541238933623498, | |
| "learning_rate": 8.711608283597529e-07, | |
| "loss": 0.774, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.2575640031031808, | |
| "grad_norm": 1.5339084994995749, | |
| "learning_rate": 8.700354762129699e-07, | |
| "loss": 0.8275, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.2585983966899405, | |
| "grad_norm": 1.4047189037517995, | |
| "learning_rate": 8.689059643517285e-07, | |
| "loss": 0.8244, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.2585983966899405, | |
| "eval_ical_mcts_chains_sft_val_MORECHAINS_loss": 2.765176773071289, | |
| "eval_ical_mcts_chains_sft_val_MORECHAINS_runtime": 5.8374, | |
| "eval_ical_mcts_chains_sft_val_MORECHAINS_samples_per_second": 15.932, | |
| "eval_ical_mcts_chains_sft_val_MORECHAINS_steps_per_second": 2.056, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.2596327902767003, | |
| "grad_norm": 1.4394754388399884, | |
| "learning_rate": 8.677723054733162e-07, | |
| "loss": 0.8506, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.26066718386346005, | |
| "grad_norm": 1.4720775443309166, | |
| "learning_rate": 8.666345123216386e-07, | |
| "loss": 0.888, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.2617015774502198, | |
| "grad_norm": 1.4084825089318342, | |
| "learning_rate": 8.654925976870765e-07, | |
| "loss": 0.8453, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.26273597103697954, | |
| "grad_norm": 1.4816360850899708, | |
| "learning_rate": 8.643465744063418e-07, | |
| "loss": 0.816, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.2637703646237393, | |
| "grad_norm": 1.5149018525586269, | |
| "learning_rate": 8.631964553623336e-07, | |
| "loss": 0.8154, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.2648047582104991, | |
| "grad_norm": 1.433988043483464, | |
| "learning_rate": 8.620422534839924e-07, | |
| "loss": 0.7963, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.26583915179725887, | |
| "grad_norm": 1.503208773643516, | |
| "learning_rate": 8.608839817461563e-07, | |
| "loss": 0.853, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.26687354538401864, | |
| "grad_norm": 1.3975010344759489, | |
| "learning_rate": 8.597216531694136e-07, | |
| "loss": 0.8496, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.26790793897077836, | |
| "grad_norm": 1.4379275247186591, | |
| "learning_rate": 8.585552808199575e-07, | |
| "loss": 0.9, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.26894233255753813, | |
| "grad_norm": 1.5387345622278876, | |
| "learning_rate": 8.573848778094388e-07, | |
| "loss": 0.773, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.2699767261442979, | |
| "grad_norm": 1.5706815627028357, | |
| "learning_rate": 8.562104572948184e-07, | |
| "loss": 0.7898, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.2710111197310577, | |
| "grad_norm": 1.5395542364220658, | |
| "learning_rate": 8.550320324782197e-07, | |
| "loss": 0.9193, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.27204551331781746, | |
| "grad_norm": 1.5077112472542034, | |
| "learning_rate": 8.538496166067796e-07, | |
| "loss": 0.8065, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.2730799069045772, | |
| "grad_norm": 1.3877372198333293, | |
| "learning_rate": 8.526632229725011e-07, | |
| "loss": 0.8441, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.27411430049133695, | |
| "grad_norm": 1.4748361145571705, | |
| "learning_rate": 8.514728649121016e-07, | |
| "loss": 0.8541, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.2751486940780967, | |
| "grad_norm": 1.4619572716696663, | |
| "learning_rate": 8.502785558068648e-07, | |
| "loss": 0.8, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.2761830876648565, | |
| "grad_norm": 1.412216536303301, | |
| "learning_rate": 8.490803090824893e-07, | |
| "loss": 0.8006, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.2772174812516162, | |
| "grad_norm": 1.5063496097264375, | |
| "learning_rate": 8.478781382089386e-07, | |
| "loss": 0.7636, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.278251874838376, | |
| "grad_norm": 1.4366116008895573, | |
| "learning_rate": 8.466720567002886e-07, | |
| "loss": 0.8371, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.27928626842513576, | |
| "grad_norm": 1.5898127318778144, | |
| "learning_rate": 8.45462078114576e-07, | |
| "loss": 0.8379, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.28032066201189554, | |
| "grad_norm": 1.481858382074936, | |
| "learning_rate": 8.442482160536468e-07, | |
| "loss": 0.8576, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.2813550555986553, | |
| "grad_norm": 1.439498886771279, | |
| "learning_rate": 8.430304841630022e-07, | |
| "loss": 0.8414, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.28238944918541503, | |
| "grad_norm": 1.4679587624005772, | |
| "learning_rate": 8.418088961316459e-07, | |
| "loss": 0.7998, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.2834238427721748, | |
| "grad_norm": 1.4334125681837777, | |
| "learning_rate": 8.405834656919295e-07, | |
| "loss": 0.7987, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.2844582363589346, | |
| "grad_norm": 1.5327122371371873, | |
| "learning_rate": 8.393542066193993e-07, | |
| "loss": 0.7777, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.28549262994569435, | |
| "grad_norm": 1.8087665783771092, | |
| "learning_rate": 8.381211327326402e-07, | |
| "loss": 0.8245, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.2865270235324541, | |
| "grad_norm": 1.4155031609857804, | |
| "learning_rate": 8.368842578931213e-07, | |
| "loss": 0.8477, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.28756141711921385, | |
| "grad_norm": 1.9749147944210403, | |
| "learning_rate": 8.356435960050397e-07, | |
| "loss": 0.7874, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.2885958107059736, | |
| "grad_norm": 1.3621935565383994, | |
| "learning_rate": 8.343991610151639e-07, | |
| "loss": 0.8595, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.2896302042927334, | |
| "grad_norm": 1.4396690651872384, | |
| "learning_rate": 8.331509669126777e-07, | |
| "loss": 0.7884, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.29066459787949317, | |
| "grad_norm": 1.6040018436825825, | |
| "learning_rate": 8.318990277290223e-07, | |
| "loss": 0.8502, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.2916989914662529, | |
| "grad_norm": 1.564353686269944, | |
| "learning_rate": 8.306433575377387e-07, | |
| "loss": 0.8187, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.29273338505301266, | |
| "grad_norm": 1.5277726358592079, | |
| "learning_rate": 8.293839704543103e-07, | |
| "loss": 0.7703, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.29376777863977244, | |
| "grad_norm": 1.4230610013674245, | |
| "learning_rate": 8.281208806360026e-07, | |
| "loss": 0.8109, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.2948021722265322, | |
| "grad_norm": 1.4163435333828045, | |
| "learning_rate": 8.268541022817057e-07, | |
| "loss": 0.8166, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.295836565813292, | |
| "grad_norm": 1.4730542139713696, | |
| "learning_rate": 8.255836496317739e-07, | |
| "loss": 0.777, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.2968709594000517, | |
| "grad_norm": 1.6846131863847775, | |
| "learning_rate": 8.243095369678652e-07, | |
| "loss": 0.8104, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.2979053529868115, | |
| "grad_norm": 1.5029833768706562, | |
| "learning_rate": 8.23031778612782e-07, | |
| "loss": 0.7899, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.29893974657357125, | |
| "grad_norm": 1.5022501974501297, | |
| "learning_rate": 8.217503889303088e-07, | |
| "loss": 0.8434, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.299974140160331, | |
| "grad_norm": 1.510227007685422, | |
| "learning_rate": 8.204653823250516e-07, | |
| "loss": 0.8466, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.30100853374709075, | |
| "grad_norm": 1.447551339739686, | |
| "learning_rate": 8.191767732422752e-07, | |
| "loss": 0.8174, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.3020429273338505, | |
| "grad_norm": 1.4289939538093588, | |
| "learning_rate": 8.17884576167742e-07, | |
| "loss": 0.8052, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.3030773209206103, | |
| "grad_norm": 1.4412557843896296, | |
| "learning_rate": 8.165888056275477e-07, | |
| "loss": 0.758, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.30411171450737007, | |
| "grad_norm": 1.5141404116651977, | |
| "learning_rate": 8.152894761879591e-07, | |
| "loss": 0.7469, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.30514610809412984, | |
| "grad_norm": 1.472820698385534, | |
| "learning_rate": 8.1398660245525e-07, | |
| "loss": 0.8277, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.30618050168088956, | |
| "grad_norm": 1.396181799902574, | |
| "learning_rate": 8.126801990755369e-07, | |
| "loss": 0.7955, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.30721489526764933, | |
| "grad_norm": 1.4098635869292606, | |
| "learning_rate": 8.113702807346146e-07, | |
| "loss": 0.7943, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.3082492888544091, | |
| "grad_norm": 1.3825827509526358, | |
| "learning_rate": 8.100568621577906e-07, | |
| "loss": 0.7883, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.3092836824411689, | |
| "grad_norm": 1.4687720939946531, | |
| "learning_rate": 8.087399581097203e-07, | |
| "loss": 0.7784, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.3103180760279286, | |
| "grad_norm": 1.477969400441921, | |
| "learning_rate": 8.074195833942405e-07, | |
| "loss": 0.7904, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.3113524696146884, | |
| "grad_norm": 1.4559540471557144, | |
| "learning_rate": 8.060957528542031e-07, | |
| "loss": 0.7885, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.31238686320144815, | |
| "grad_norm": 1.4932304982503848, | |
| "learning_rate": 8.047684813713085e-07, | |
| "loss": 0.8644, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.3134212567882079, | |
| "grad_norm": 1.44704678503144, | |
| "learning_rate": 8.034377838659378e-07, | |
| "loss": 0.8521, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.3144556503749677, | |
| "grad_norm": 1.4136992886355269, | |
| "learning_rate": 8.021036752969859e-07, | |
| "loss": 0.8538, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.3154900439617274, | |
| "grad_norm": 1.4752301955923899, | |
| "learning_rate": 8.007661706616917e-07, | |
| "loss": 0.8157, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.3165244375484872, | |
| "grad_norm": 1.5004100326276062, | |
| "learning_rate": 7.994252849954719e-07, | |
| "loss": 0.8042, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.31755883113524697, | |
| "grad_norm": 1.4320269607909135, | |
| "learning_rate": 7.980810333717498e-07, | |
| "loss": 0.8373, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.31859322472200674, | |
| "grad_norm": 1.4456028996437913, | |
| "learning_rate": 7.967334309017874e-07, | |
| "loss": 0.7891, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.31962761830876646, | |
| "grad_norm": 1.475495065622217, | |
| "learning_rate": 7.953824927345144e-07, | |
| "loss": 0.8185, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.32066201189552623, | |
| "grad_norm": 1.362519146496714, | |
| "learning_rate": 7.940282340563585e-07, | |
| "loss": 0.8348, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.321696405482286, | |
| "grad_norm": 1.5136916102860687, | |
| "learning_rate": 7.926706700910748e-07, | |
| "loss": 0.8429, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.3227307990690458, | |
| "grad_norm": 1.4565868389604968, | |
| "learning_rate": 7.913098160995741e-07, | |
| "loss": 0.829, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.32376519265580556, | |
| "grad_norm": 1.477942390545562, | |
| "learning_rate": 7.899456873797518e-07, | |
| "loss": 0.8037, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.3247995862425653, | |
| "grad_norm": 1.4103970043133645, | |
| "learning_rate": 7.885782992663162e-07, | |
| "loss": 0.8157, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.32583397982932505, | |
| "grad_norm": 1.4643979448893485, | |
| "learning_rate": 7.872076671306149e-07, | |
| "loss": 0.7963, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.3268683734160848, | |
| "grad_norm": 1.4786053324532196, | |
| "learning_rate": 7.858338063804637e-07, | |
| "loss": 0.8265, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.3279027670028446, | |
| "grad_norm": 1.4127809818930817, | |
| "learning_rate": 7.844567324599717e-07, | |
| "loss": 0.8114, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.32893716058960437, | |
| "grad_norm": 1.4994140475607236, | |
| "learning_rate": 7.830764608493696e-07, | |
| "loss": 0.8101, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.3299715541763641, | |
| "grad_norm": 1.580241986618675, | |
| "learning_rate": 7.816930070648333e-07, | |
| "loss": 0.8445, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.33100594776312386, | |
| "grad_norm": 1.4586314519511814, | |
| "learning_rate": 7.803063866583117e-07, | |
| "loss": 0.7537, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.33204034134988364, | |
| "grad_norm": 1.6700442934634265, | |
| "learning_rate": 7.789166152173508e-07, | |
| "loss": 0.7907, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.3330747349366434, | |
| "grad_norm": 1.641259202462374, | |
| "learning_rate": 7.775237083649181e-07, | |
| "loss": 0.7186, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.33410912852340313, | |
| "grad_norm": 1.5472194968258295, | |
| "learning_rate": 7.761276817592281e-07, | |
| "loss": 0.8355, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.3351435221101629, | |
| "grad_norm": 1.546948831592496, | |
| "learning_rate": 7.747285510935654e-07, | |
| "loss": 0.8643, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.3361779156969227, | |
| "grad_norm": 1.4607408747302513, | |
| "learning_rate": 7.733263320961086e-07, | |
| "loss": 0.8537, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.33721230928368245, | |
| "grad_norm": 1.420438816436174, | |
| "learning_rate": 7.719210405297536e-07, | |
| "loss": 0.822, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.33824670287044223, | |
| "grad_norm": 1.4179569357328767, | |
| "learning_rate": 7.705126921919358e-07, | |
| "loss": 0.8478, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.33928109645720195, | |
| "grad_norm": 1.6095710739664468, | |
| "learning_rate": 7.691013029144535e-07, | |
| "loss": 0.7535, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.3403154900439617, | |
| "grad_norm": 1.55309937226036, | |
| "learning_rate": 7.676868885632892e-07, | |
| "loss": 0.7607, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.3413498836307215, | |
| "grad_norm": 1.4490162238620672, | |
| "learning_rate": 7.662694650384315e-07, | |
| "loss": 0.8097, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.34238427721748127, | |
| "grad_norm": 1.5781446919179913, | |
| "learning_rate": 7.648490482736958e-07, | |
| "loss": 0.7947, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.343418670804241, | |
| "grad_norm": 1.418530792671114, | |
| "learning_rate": 7.634256542365467e-07, | |
| "loss": 0.84, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.34445306439100076, | |
| "grad_norm": 1.5659500251333682, | |
| "learning_rate": 7.619992989279166e-07, | |
| "loss": 0.8041, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.34548745797776054, | |
| "grad_norm": 1.4764935667473418, | |
| "learning_rate": 7.605699983820269e-07, | |
| "loss": 0.8767, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.3465218515645203, | |
| "grad_norm": 1.6377726223424987, | |
| "learning_rate": 7.59137768666208e-07, | |
| "loss": 0.7707, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.3475562451512801, | |
| "grad_norm": 1.4472171831031893, | |
| "learning_rate": 7.57702625880718e-07, | |
| "loss": 0.8382, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.3485906387380398, | |
| "grad_norm": 1.4565224369812022, | |
| "learning_rate": 7.562645861585614e-07, | |
| "loss": 0.8159, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.3496250323247996, | |
| "grad_norm": 1.5188225989804915, | |
| "learning_rate": 7.548236656653094e-07, | |
| "loss": 0.7966, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.35065942591155935, | |
| "grad_norm": 1.5439099482255156, | |
| "learning_rate": 7.533798805989164e-07, | |
| "loss": 0.8215, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.3516938194983191, | |
| "grad_norm": 1.4029033836103997, | |
| "learning_rate": 7.519332471895383e-07, | |
| "loss": 0.8001, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.35272821308507885, | |
| "grad_norm": 1.550906939698086, | |
| "learning_rate": 7.504837816993513e-07, | |
| "loss": 0.8284, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.3537626066718386, | |
| "grad_norm": 1.3438605990264927, | |
| "learning_rate": 7.49031500422367e-07, | |
| "loss": 0.8348, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.3547970002585984, | |
| "grad_norm": 1.4828830317049018, | |
| "learning_rate": 7.475764196842516e-07, | |
| "loss": 0.7217, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.35583139384535817, | |
| "grad_norm": 1.4625882076023702, | |
| "learning_rate": 7.461185558421399e-07, | |
| "loss": 0.7991, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.35686578743211794, | |
| "grad_norm": 1.418070705677018, | |
| "learning_rate": 7.446579252844535e-07, | |
| "loss": 0.8249, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.35790018101887766, | |
| "grad_norm": 1.3655261335697446, | |
| "learning_rate": 7.431945444307156e-07, | |
| "loss": 0.8541, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.35893457460563744, | |
| "grad_norm": 1.5327996928980243, | |
| "learning_rate": 7.417284297313664e-07, | |
| "loss": 0.8556, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.3599689681923972, | |
| "grad_norm": 1.4433663896337734, | |
| "learning_rate": 7.402595976675784e-07, | |
| "loss": 0.7563, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.361003361779157, | |
| "grad_norm": 1.468806872720132, | |
| "learning_rate": 7.387880647510708e-07, | |
| "loss": 0.7754, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.36203775536591676, | |
| "grad_norm": 1.3919904041509488, | |
| "learning_rate": 7.373138475239249e-07, | |
| "loss": 0.8348, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.3630721489526765, | |
| "grad_norm": 1.3999391785288244, | |
| "learning_rate": 7.358369625583965e-07, | |
| "loss": 0.8071, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.36410654253943625, | |
| "grad_norm": 1.3686796721254153, | |
| "learning_rate": 7.343574264567309e-07, | |
| "loss": 0.8719, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.365140936126196, | |
| "grad_norm": 1.5007468615120227, | |
| "learning_rate": 7.328752558509761e-07, | |
| "loss": 0.8197, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.3661753297129558, | |
| "grad_norm": 1.437254475212084, | |
| "learning_rate": 7.313904674027953e-07, | |
| "loss": 0.7954, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.3672097232997155, | |
| "grad_norm": 1.5289418485113055, | |
| "learning_rate": 7.299030778032798e-07, | |
| "loss": 0.7727, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.3682441168864753, | |
| "grad_norm": 1.4337049587616504, | |
| "learning_rate": 7.284131037727618e-07, | |
| "loss": 0.7476, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.36927851047323507, | |
| "grad_norm": 1.4599582688284396, | |
| "learning_rate": 7.269205620606258e-07, | |
| "loss": 0.7853, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.37031290405999484, | |
| "grad_norm": 1.3775436069370972, | |
| "learning_rate": 7.254254694451209e-07, | |
| "loss": 0.7849, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.3713472976467546, | |
| "grad_norm": 1.5901350781009986, | |
| "learning_rate": 7.239278427331717e-07, | |
| "loss": 0.7745, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.37238169123351433, | |
| "grad_norm": 1.511778204031967, | |
| "learning_rate": 7.224276987601894e-07, | |
| "loss": 0.8303, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.3734160848202741, | |
| "grad_norm": 1.3855564338163748, | |
| "learning_rate": 7.209250543898832e-07, | |
| "loss": 0.7875, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.3744504784070339, | |
| "grad_norm": 1.507764814139205, | |
| "learning_rate": 7.1941992651407e-07, | |
| "loss": 0.7763, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.37548487199379366, | |
| "grad_norm": 1.4927522102160196, | |
| "learning_rate": 7.179123320524847e-07, | |
| "loss": 0.7863, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.3765192655805534, | |
| "grad_norm": 1.4975837221442445, | |
| "learning_rate": 7.164022879525902e-07, | |
| "loss": 0.8549, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.37755365916731315, | |
| "grad_norm": 1.6581784580492507, | |
| "learning_rate": 7.148898111893866e-07, | |
| "loss": 0.7162, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.3785880527540729, | |
| "grad_norm": 1.4153599577715692, | |
| "learning_rate": 7.133749187652207e-07, | |
| "loss": 0.7572, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.3796224463408327, | |
| "grad_norm": 1.5086071405284451, | |
| "learning_rate": 7.118576277095944e-07, | |
| "loss": 0.7395, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.38065683992759247, | |
| "grad_norm": 1.4598122723474396, | |
| "learning_rate": 7.10337955078974e-07, | |
| "loss": 0.7813, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.3816912335143522, | |
| "grad_norm": 1.493505166104934, | |
| "learning_rate": 7.088159179565976e-07, | |
| "loss": 0.7872, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.38272562710111196, | |
| "grad_norm": 1.411155555762869, | |
| "learning_rate": 7.072915334522839e-07, | |
| "loss": 0.7733, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.38376002068787174, | |
| "grad_norm": 1.5970211298720591, | |
| "learning_rate": 7.05764818702239e-07, | |
| "loss": 0.7617, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.3847944142746315, | |
| "grad_norm": 1.4154540093144994, | |
| "learning_rate": 7.042357908688646e-07, | |
| "loss": 0.7782, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.38582880786139123, | |
| "grad_norm": 1.4878883836919452, | |
| "learning_rate": 7.027044671405642e-07, | |
| "loss": 0.838, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.386863201448151, | |
| "grad_norm": 1.4274531473992906, | |
| "learning_rate": 7.011708647315508e-07, | |
| "loss": 0.7734, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.3878975950349108, | |
| "grad_norm": 1.3959360239221512, | |
| "learning_rate": 6.99635000881653e-07, | |
| "loss": 0.8167, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.38893198862167055, | |
| "grad_norm": 1.4922339102729454, | |
| "learning_rate": 6.980968928561209e-07, | |
| "loss": 0.8452, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.38996638220843033, | |
| "grad_norm": 1.3455483048831731, | |
| "learning_rate": 6.965565579454321e-07, | |
| "loss": 0.8094, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.39100077579519005, | |
| "grad_norm": 1.3949456205331519, | |
| "learning_rate": 6.950140134650978e-07, | |
| "loss": 0.8441, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.3920351693819498, | |
| "grad_norm": 1.629536263962534, | |
| "learning_rate": 6.934692767554679e-07, | |
| "loss": 0.7783, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.3930695629687096, | |
| "grad_norm": 1.4500317085708045, | |
| "learning_rate": 6.919223651815356e-07, | |
| "loss": 0.8704, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.39410395655546937, | |
| "grad_norm": 1.4550461014517297, | |
| "learning_rate": 6.903732961327431e-07, | |
| "loss": 0.7601, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.39513835014222914, | |
| "grad_norm": 1.532290380261664, | |
| "learning_rate": 6.888220870227852e-07, | |
| "loss": 0.7519, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.39617274372898886, | |
| "grad_norm": 1.414888878878272, | |
| "learning_rate": 6.872687552894145e-07, | |
| "loss": 0.821, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.39720713731574864, | |
| "grad_norm": 1.5357331505048835, | |
| "learning_rate": 6.857133183942441e-07, | |
| "loss": 0.7413, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.3982415309025084, | |
| "grad_norm": 1.5060267713431845, | |
| "learning_rate": 6.841557938225526e-07, | |
| "loss": 0.7936, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.3992759244892682, | |
| "grad_norm": 1.5235810859874255, | |
| "learning_rate": 6.82596199083087e-07, | |
| "loss": 0.8234, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.4003103180760279, | |
| "grad_norm": 1.6120706962469804, | |
| "learning_rate": 6.810345517078656e-07, | |
| "loss": 0.7535, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.4013447116627877, | |
| "grad_norm": 1.433375088330839, | |
| "learning_rate": 6.794708692519814e-07, | |
| "loss": 0.82, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.40237910524954745, | |
| "grad_norm": 1.48933757949774, | |
| "learning_rate": 6.779051692934043e-07, | |
| "loss": 0.8242, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.4034134988363072, | |
| "grad_norm": 1.4111248361431712, | |
| "learning_rate": 6.763374694327839e-07, | |
| "loss": 0.7586, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.404447892423067, | |
| "grad_norm": 1.459526308527859, | |
| "learning_rate": 6.747677872932518e-07, | |
| "loss": 0.7872, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.4054822860098267, | |
| "grad_norm": 1.4061072076493546, | |
| "learning_rate": 6.731961405202224e-07, | |
| "loss": 0.8656, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.4065166795965865, | |
| "grad_norm": 1.3973634314832557, | |
| "learning_rate": 6.71622546781196e-07, | |
| "loss": 0.7878, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.40755107318334627, | |
| "grad_norm": 1.4800049570039662, | |
| "learning_rate": 6.700470237655595e-07, | |
| "loss": 0.7944, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.40858546677010604, | |
| "grad_norm": 1.4012361199298746, | |
| "learning_rate": 6.68469589184387e-07, | |
| "loss": 0.8711, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.40961986035686576, | |
| "grad_norm": 1.4833605466978868, | |
| "learning_rate": 6.668902607702418e-07, | |
| "loss": 0.7866, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.41065425394362554, | |
| "grad_norm": 1.500693916582192, | |
| "learning_rate": 6.653090562769763e-07, | |
| "loss": 0.808, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.4116886475303853, | |
| "grad_norm": 1.4132947906488624, | |
| "learning_rate": 6.637259934795327e-07, | |
| "loss": 0.8239, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.4127230411171451, | |
| "grad_norm": 1.549971773103694, | |
| "learning_rate": 6.621410901737429e-07, | |
| "loss": 0.8112, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.41375743470390486, | |
| "grad_norm": 1.4440278429568445, | |
| "learning_rate": 6.605543641761292e-07, | |
| "loss": 0.7494, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.4147918282906646, | |
| "grad_norm": 1.4006520297389717, | |
| "learning_rate": 6.589658333237031e-07, | |
| "loss": 0.7843, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.41582622187742435, | |
| "grad_norm": 1.447986349864676, | |
| "learning_rate": 6.57375515473765e-07, | |
| "loss": 0.7922, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.4168606154641841, | |
| "grad_norm": 1.3248237663866569, | |
| "learning_rate": 6.557834285037041e-07, | |
| "loss": 0.8024, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.4178950090509439, | |
| "grad_norm": 1.4661369856095736, | |
| "learning_rate": 6.541895903107969e-07, | |
| "loss": 0.8628, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.4189294026377037, | |
| "grad_norm": 1.4848079172502218, | |
| "learning_rate": 6.525940188120058e-07, | |
| "loss": 0.8236, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.4199637962244634, | |
| "grad_norm": 1.5078012969227559, | |
| "learning_rate": 6.509967319437782e-07, | |
| "loss": 0.7764, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.42099818981122317, | |
| "grad_norm": 1.5878487424780954, | |
| "learning_rate": 6.493977476618444e-07, | |
| "loss": 0.8629, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.42203258339798294, | |
| "grad_norm": 1.4208606549441456, | |
| "learning_rate": 6.477970839410165e-07, | |
| "loss": 0.7888, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.4230669769847427, | |
| "grad_norm": 1.4465545259205532, | |
| "learning_rate": 6.461947587749854e-07, | |
| "loss": 0.7607, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.42410137057150243, | |
| "grad_norm": 1.5424213555616368, | |
| "learning_rate": 6.445907901761189e-07, | |
| "loss": 0.8163, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.4251357641582622, | |
| "grad_norm": 1.39694826052931, | |
| "learning_rate": 6.429851961752596e-07, | |
| "loss": 0.7036, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.426170157745022, | |
| "grad_norm": 1.3814364035735487, | |
| "learning_rate": 6.413779948215218e-07, | |
| "loss": 0.8447, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.42720455133178176, | |
| "grad_norm": 1.4813429082667662, | |
| "learning_rate": 6.397692041820885e-07, | |
| "loss": 0.772, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.42823894491854153, | |
| "grad_norm": 1.4627308480754002, | |
| "learning_rate": 6.381588423420085e-07, | |
| "loss": 0.8076, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.42927333850530125, | |
| "grad_norm": 1.4708752354180932, | |
| "learning_rate": 6.365469274039935e-07, | |
| "loss": 0.7023, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.430307732092061, | |
| "grad_norm": 1.4749068387304303, | |
| "learning_rate": 6.349334774882135e-07, | |
| "loss": 0.735, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.4313421256788208, | |
| "grad_norm": 1.4006313791642289, | |
| "learning_rate": 6.333185107320944e-07, | |
| "loss": 0.8539, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.4323765192655806, | |
| "grad_norm": 1.530400186446558, | |
| "learning_rate": 6.317020452901133e-07, | |
| "loss": 0.8351, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.4334109128523403, | |
| "grad_norm": 1.5214206862898372, | |
| "learning_rate": 6.300840993335944e-07, | |
| "loss": 0.7663, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.43444530643910007, | |
| "grad_norm": 1.4367117786702897, | |
| "learning_rate": 6.284646910505053e-07, | |
| "loss": 0.8235, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.43547970002585984, | |
| "grad_norm": 1.4726379186885243, | |
| "learning_rate": 6.268438386452519e-07, | |
| "loss": 0.7513, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.4365140936126196, | |
| "grad_norm": 1.49188005989065, | |
| "learning_rate": 6.252215603384743e-07, | |
| "loss": 0.7293, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.4375484871993794, | |
| "grad_norm": 1.334485230190832, | |
| "learning_rate": 6.235978743668415e-07, | |
| "loss": 0.7881, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.4385828807861391, | |
| "grad_norm": 1.4201416785975403, | |
| "learning_rate": 6.219727989828465e-07, | |
| "loss": 0.7914, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.4396172743728989, | |
| "grad_norm": 1.4860203520097357, | |
| "learning_rate": 6.203463524546017e-07, | |
| "loss": 0.7447, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.44065166795965865, | |
| "grad_norm": 1.421429133721482, | |
| "learning_rate": 6.187185530656327e-07, | |
| "loss": 0.7907, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.44168606154641843, | |
| "grad_norm": 1.5020663725102064, | |
| "learning_rate": 6.170894191146733e-07, | |
| "loss": 0.847, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.44272045513317815, | |
| "grad_norm": 1.5372927659858155, | |
| "learning_rate": 6.154589689154593e-07, | |
| "loss": 0.7446, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.4437548487199379, | |
| "grad_norm": 1.4218930534238439, | |
| "learning_rate": 6.138272207965237e-07, | |
| "loss": 0.7991, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.4447892423066977, | |
| "grad_norm": 1.5561293813583938, | |
| "learning_rate": 6.121941931009894e-07, | |
| "loss": 0.8258, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.44582363589345747, | |
| "grad_norm": 1.506408566968627, | |
| "learning_rate": 6.105599041863631e-07, | |
| "loss": 0.7912, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.44685802948021724, | |
| "grad_norm": 1.489475460845033, | |
| "learning_rate": 6.089243724243303e-07, | |
| "loss": 0.813, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.44789242306697696, | |
| "grad_norm": 1.4417812759659188, | |
| "learning_rate": 6.072876162005474e-07, | |
| "loss": 0.7397, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.44892681665373674, | |
| "grad_norm": 1.416241590536411, | |
| "learning_rate": 6.05649653914435e-07, | |
| "loss": 0.724, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.4499612102404965, | |
| "grad_norm": 1.5851742757954237, | |
| "learning_rate": 6.040105039789726e-07, | |
| "loss": 0.7972, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.4509956038272563, | |
| "grad_norm": 1.444408913562249, | |
| "learning_rate": 6.023701848204893e-07, | |
| "loss": 0.7668, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.45202999741401606, | |
| "grad_norm": 1.5372373413485305, | |
| "learning_rate": 6.00728714878459e-07, | |
| "loss": 0.8043, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.4530643910007758, | |
| "grad_norm": 1.5050852557606191, | |
| "learning_rate": 5.990861126052913e-07, | |
| "loss": 0.7516, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.45409878458753555, | |
| "grad_norm": 1.4751856261494813, | |
| "learning_rate": 5.974423964661248e-07, | |
| "loss": 0.7907, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.4551331781742953, | |
| "grad_norm": 1.5052231213656526, | |
| "learning_rate": 5.957975849386201e-07, | |
| "loss": 0.7662, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.4561675717610551, | |
| "grad_norm": 1.3635521071151895, | |
| "learning_rate": 5.941516965127508e-07, | |
| "loss": 0.7511, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.4572019653478148, | |
| "grad_norm": 1.5458883544445075, | |
| "learning_rate": 5.925047496905967e-07, | |
| "loss": 0.8627, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.4582363589345746, | |
| "grad_norm": 1.4193806724971627, | |
| "learning_rate": 5.908567629861353e-07, | |
| "loss": 0.8074, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.45927075252133437, | |
| "grad_norm": 1.4286661157605234, | |
| "learning_rate": 5.892077549250341e-07, | |
| "loss": 0.8114, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.46030514610809414, | |
| "grad_norm": 1.4862337504285723, | |
| "learning_rate": 5.875577440444417e-07, | |
| "loss": 0.793, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.4613395396948539, | |
| "grad_norm": 1.387338561627281, | |
| "learning_rate": 5.859067488927799e-07, | |
| "loss": 0.7779, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.46237393328161364, | |
| "grad_norm": 1.5309317375621851, | |
| "learning_rate": 5.842547880295353e-07, | |
| "loss": 0.7719, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.4634083268683734, | |
| "grad_norm": 1.5215534485748197, | |
| "learning_rate": 5.826018800250502e-07, | |
| "loss": 0.8138, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.4644427204551332, | |
| "grad_norm": 1.5477288257959607, | |
| "learning_rate": 5.809480434603143e-07, | |
| "loss": 0.7668, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.46547711404189296, | |
| "grad_norm": 1.4668767969890304, | |
| "learning_rate": 5.792932969267552e-07, | |
| "loss": 0.7834, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.4665115076286527, | |
| "grad_norm": 1.5219075605411267, | |
| "learning_rate": 5.776376590260306e-07, | |
| "loss": 0.7616, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.46754590121541245, | |
| "grad_norm": 1.3672108673968608, | |
| "learning_rate": 5.759811483698172e-07, | |
| "loss": 0.8073, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.4685802948021722, | |
| "grad_norm": 1.4273939840224796, | |
| "learning_rate": 5.743237835796041e-07, | |
| "loss": 0.7473, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.469614688388932, | |
| "grad_norm": 1.4788437098817027, | |
| "learning_rate": 5.726655832864809e-07, | |
| "loss": 0.8348, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.4706490819756918, | |
| "grad_norm": 1.4285110166545327, | |
| "learning_rate": 5.7100656613093e-07, | |
| "loss": 0.88, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.4716834755624515, | |
| "grad_norm": 1.3988106702790735, | |
| "learning_rate": 5.693467507626164e-07, | |
| "loss": 0.7721, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.47271786914921127, | |
| "grad_norm": 1.4169929421999459, | |
| "learning_rate": 5.67686155840178e-07, | |
| "loss": 0.8039, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.47375226273597104, | |
| "grad_norm": 1.484391242114432, | |
| "learning_rate": 5.660248000310162e-07, | |
| "loss": 0.7668, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.4747866563227308, | |
| "grad_norm": 1.3679176686505365, | |
| "learning_rate": 5.643627020110855e-07, | |
| "loss": 0.7534, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.47582104990949053, | |
| "grad_norm": 1.4998288372071416, | |
| "learning_rate": 5.626998804646841e-07, | |
| "loss": 0.7678, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.4768554434962503, | |
| "grad_norm": 1.4643760039252145, | |
| "learning_rate": 5.610363540842434e-07, | |
| "loss": 0.8276, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.4778898370830101, | |
| "grad_norm": 1.452192630146472, | |
| "learning_rate": 5.593721415701188e-07, | |
| "loss": 0.7908, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.47892423066976986, | |
| "grad_norm": 1.4539110217460307, | |
| "learning_rate": 5.577072616303779e-07, | |
| "loss": 0.8145, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.47995862425652963, | |
| "grad_norm": 1.5013069028762214, | |
| "learning_rate": 5.560417329805915e-07, | |
| "loss": 0.786, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.48099301784328935, | |
| "grad_norm": 1.4473162971215185, | |
| "learning_rate": 5.54375574343623e-07, | |
| "loss": 0.8079, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.4820274114300491, | |
| "grad_norm": 1.5192567688503629, | |
| "learning_rate": 5.527088044494176e-07, | |
| "loss": 0.8067, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.4830618050168089, | |
| "grad_norm": 1.437077841592271, | |
| "learning_rate": 5.510414420347918e-07, | |
| "loss": 0.8056, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.4840961986035687, | |
| "grad_norm": 1.4855884972514712, | |
| "learning_rate": 5.493735058432226e-07, | |
| "loss": 0.7979, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.48513059219032845, | |
| "grad_norm": 1.4334300974894107, | |
| "learning_rate": 5.477050146246378e-07, | |
| "loss": 0.7917, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.48616498577708817, | |
| "grad_norm": 1.426864897223084, | |
| "learning_rate": 5.460359871352034e-07, | |
| "loss": 0.786, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.48719937936384794, | |
| "grad_norm": 1.4941413560002006, | |
| "learning_rate": 5.443664421371152e-07, | |
| "loss": 0.7756, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.4882337729506077, | |
| "grad_norm": 1.439453063593448, | |
| "learning_rate": 5.426963983983852e-07, | |
| "loss": 0.7718, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.4892681665373675, | |
| "grad_norm": 1.4182177774813618, | |
| "learning_rate": 5.410258746926327e-07, | |
| "loss": 0.7196, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.4903025601241272, | |
| "grad_norm": 1.6561074842137438, | |
| "learning_rate": 5.393548897988724e-07, | |
| "loss": 0.9053, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.491336953710887, | |
| "grad_norm": 1.4933846836955684, | |
| "learning_rate": 5.37683462501303e-07, | |
| "loss": 0.8478, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.49237134729764676, | |
| "grad_norm": 1.5948038843876355, | |
| "learning_rate": 5.360116115890971e-07, | |
| "loss": 0.7478, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.49340574088440653, | |
| "grad_norm": 1.6327907371316308, | |
| "learning_rate": 5.343393558561887e-07, | |
| "loss": 0.7393, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.4944401344711663, | |
| "grad_norm": 1.397647673929809, | |
| "learning_rate": 5.326667141010631e-07, | |
| "loss": 0.7473, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.495474528057926, | |
| "grad_norm": 1.5085562984418808, | |
| "learning_rate": 5.309937051265442e-07, | |
| "loss": 0.8262, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.4965089216446858, | |
| "grad_norm": 1.4183037560402414, | |
| "learning_rate": 5.29320347739585e-07, | |
| "loss": 0.7652, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.49754331523144557, | |
| "grad_norm": 1.4862990654885089, | |
| "learning_rate": 5.276466607510544e-07, | |
| "loss": 0.8341, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.49857770881820535, | |
| "grad_norm": 1.4221940463426233, | |
| "learning_rate": 5.259726629755266e-07, | |
| "loss": 0.7883, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.49961210240496506, | |
| "grad_norm": 1.5515522229627332, | |
| "learning_rate": 5.2429837323107e-07, | |
| "loss": 0.8333, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.5006464959917248, | |
| "grad_norm": 1.4227508067897034, | |
| "learning_rate": 5.226238103390342e-07, | |
| "loss": 0.8274, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.5016808895784847, | |
| "grad_norm": 1.4232133072930497, | |
| "learning_rate": 5.209489931238405e-07, | |
| "loss": 0.7206, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.5027152831652444, | |
| "grad_norm": 1.5080795964583507, | |
| "learning_rate": 5.192739404127678e-07, | |
| "loss": 0.7535, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.5037496767520041, | |
| "grad_norm": 1.3442484726764143, | |
| "learning_rate": 5.175986710357438e-07, | |
| "loss": 0.7637, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.5047840703387639, | |
| "grad_norm": 1.5136024334189455, | |
| "learning_rate": 5.159232038251305e-07, | |
| "loss": 0.7584, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.5058184639255237, | |
| "grad_norm": 1.3194718091569664, | |
| "learning_rate": 5.142475576155145e-07, | |
| "loss": 0.8463, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.5068528575122834, | |
| "grad_norm": 1.3965569350837568, | |
| "learning_rate": 5.125717512434946e-07, | |
| "loss": 0.8525, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.5078872510990432, | |
| "grad_norm": 1.37978735574125, | |
| "learning_rate": 5.108958035474702e-07, | |
| "loss": 0.8334, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.5089216446858029, | |
| "grad_norm": 1.4087702825462634, | |
| "learning_rate": 5.092197333674285e-07, | |
| "loss": 0.7739, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.5099560382725628, | |
| "grad_norm": 1.4087572461727074, | |
| "learning_rate": 5.075435595447346e-07, | |
| "loss": 0.7607, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.5109904318593225, | |
| "grad_norm": 1.4382087272587971, | |
| "learning_rate": 5.058673009219184e-07, | |
| "loss": 0.7226, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.5120248254460822, | |
| "grad_norm": 1.3760893738001516, | |
| "learning_rate": 5.041909763424625e-07, | |
| "loss": 0.7617, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.513059219032842, | |
| "grad_norm": 1.4434496822446814, | |
| "learning_rate": 5.025146046505917e-07, | |
| "loss": 0.819, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.5140936126196017, | |
| "grad_norm": 1.3997974862674474, | |
| "learning_rate": 5.008382046910601e-07, | |
| "loss": 0.8183, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.5151280062063616, | |
| "grad_norm": 1.5482481361570593, | |
| "learning_rate": 4.991617953089398e-07, | |
| "loss": 0.7524, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.5161623997931213, | |
| "grad_norm": 1.4970337148357162, | |
| "learning_rate": 4.974853953494083e-07, | |
| "loss": 0.7768, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.517196793379881, | |
| "grad_norm": 1.4821221872901753, | |
| "learning_rate": 4.958090236575376e-07, | |
| "loss": 0.7648, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.517196793379881, | |
| "eval_ical_mcts_chains_sft_val_MORECHAINS_loss": 2.7658002376556396, | |
| "eval_ical_mcts_chains_sft_val_MORECHAINS_runtime": 5.4437, | |
| "eval_ical_mcts_chains_sft_val_MORECHAINS_samples_per_second": 17.084, | |
| "eval_ical_mcts_chains_sft_val_MORECHAINS_steps_per_second": 2.204, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.5182311869666408, | |
| "grad_norm": 1.5029396754327822, | |
| "learning_rate": 4.941326990780818e-07, | |
| "loss": 0.7193, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.5192655805534006, | |
| "grad_norm": 1.6445373200961098, | |
| "learning_rate": 4.924564404552654e-07, | |
| "loss": 0.8152, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.5202999741401604, | |
| "grad_norm": 1.4463282173653536, | |
| "learning_rate": 4.907802666325715e-07, | |
| "loss": 0.7492, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.5213343677269201, | |
| "grad_norm": 1.565557338916038, | |
| "learning_rate": 4.8910419645253e-07, | |
| "loss": 0.8369, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.5223687613136798, | |
| "grad_norm": 1.345167417876588, | |
| "learning_rate": 4.874282487565052e-07, | |
| "loss": 0.7625, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.5234031549004396, | |
| "grad_norm": 1.5344700797262696, | |
| "learning_rate": 4.857524423844854e-07, | |
| "loss": 0.8028, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.5244375484871994, | |
| "grad_norm": 1.4843313398732627, | |
| "learning_rate": 4.840767961748697e-07, | |
| "loss": 0.7442, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.5254719420739591, | |
| "grad_norm": 1.4525895807461926, | |
| "learning_rate": 4.824013289642563e-07, | |
| "loss": 0.8049, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.5265063356607189, | |
| "grad_norm": 1.4590898113226547, | |
| "learning_rate": 4.807260595872322e-07, | |
| "loss": 0.769, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.5275407292474786, | |
| "grad_norm": 1.4260534940890022, | |
| "learning_rate": 4.790510068761595e-07, | |
| "loss": 0.7482, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.5285751228342385, | |
| "grad_norm": 1.6179704147157994, | |
| "learning_rate": 4.773761896609657e-07, | |
| "loss": 0.7918, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.5296095164209982, | |
| "grad_norm": 1.469834612223002, | |
| "learning_rate": 4.7570162676893013e-07, | |
| "loss": 0.7917, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.5306439100077579, | |
| "grad_norm": 1.3999919597469435, | |
| "learning_rate": 4.740273370244733e-07, | |
| "loss": 0.8298, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.5316783035945177, | |
| "grad_norm": 1.35470656355707, | |
| "learning_rate": 4.7235333924894564e-07, | |
| "loss": 0.8196, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.5327126971812775, | |
| "grad_norm": 1.4581392961801638, | |
| "learning_rate": 4.7067965226041513e-07, | |
| "loss": 0.764, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.5337470907680373, | |
| "grad_norm": 1.5110163291621437, | |
| "learning_rate": 4.6900629487345576e-07, | |
| "loss": 0.8405, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.534781484354797, | |
| "grad_norm": 1.4841257849998581, | |
| "learning_rate": 4.6733328589893706e-07, | |
| "loss": 0.7208, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.5358158779415567, | |
| "grad_norm": 1.5181369749152571, | |
| "learning_rate": 4.656606441438113e-07, | |
| "loss": 0.8244, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.5368502715283165, | |
| "grad_norm": 1.4072037394769683, | |
| "learning_rate": 4.639883884109028e-07, | |
| "loss": 0.7764, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.5378846651150763, | |
| "grad_norm": 1.4754551009180858, | |
| "learning_rate": 4.62316537498697e-07, | |
| "loss": 0.8276, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.5389190587018361, | |
| "grad_norm": 1.403974634302295, | |
| "learning_rate": 4.6064511020112774e-07, | |
| "loss": 0.8378, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.5399534522885958, | |
| "grad_norm": 1.4700502128907051, | |
| "learning_rate": 4.589741253073673e-07, | |
| "loss": 0.8122, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.5409878458753555, | |
| "grad_norm": 1.4230633840695792, | |
| "learning_rate": 4.573036016016149e-07, | |
| "loss": 0.8164, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.5420222394621154, | |
| "grad_norm": 1.473986096284714, | |
| "learning_rate": 4.556335578628848e-07, | |
| "loss": 0.836, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.5430566330488751, | |
| "grad_norm": 1.6310980896671778, | |
| "learning_rate": 4.5396401286479646e-07, | |
| "loss": 0.7322, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.5440910266356349, | |
| "grad_norm": 1.458407931778618, | |
| "learning_rate": 4.522949853753623e-07, | |
| "loss": 0.7777, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.5451254202223946, | |
| "grad_norm": 1.5622304106121399, | |
| "learning_rate": 4.506264941567773e-07, | |
| "loss": 0.7964, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.5461598138091543, | |
| "grad_norm": 1.4284806713984737, | |
| "learning_rate": 4.4895855796520826e-07, | |
| "loss": 0.7449, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.5471942073959142, | |
| "grad_norm": 1.4088615024125337, | |
| "learning_rate": 4.4729119555058235e-07, | |
| "loss": 0.7595, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.5482286009826739, | |
| "grad_norm": 1.477158182482703, | |
| "learning_rate": 4.4562442565637684e-07, | |
| "loss": 0.7257, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.5492629945694336, | |
| "grad_norm": 1.4327114915812629, | |
| "learning_rate": 4.4395826701940846e-07, | |
| "loss": 0.7291, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.5502973881561934, | |
| "grad_norm": 1.4948151795403506, | |
| "learning_rate": 4.422927383696223e-07, | |
| "loss": 0.7805, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.5513317817429532, | |
| "grad_norm": 1.5403970773425224, | |
| "learning_rate": 4.406278584298812e-07, | |
| "loss": 0.7509, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.552366175329713, | |
| "grad_norm": 1.4904379600517668, | |
| "learning_rate": 4.3896364591575664e-07, | |
| "loss": 0.7126, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.5534005689164727, | |
| "grad_norm": 1.4688145006442674, | |
| "learning_rate": 4.3730011953531586e-07, | |
| "loss": 0.7816, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.5544349625032324, | |
| "grad_norm": 1.520887812045378, | |
| "learning_rate": 4.3563729798891456e-07, | |
| "loss": 0.789, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.5554693560899923, | |
| "grad_norm": 1.3836876531721884, | |
| "learning_rate": 4.3397519996898385e-07, | |
| "loss": 0.8576, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.556503749676752, | |
| "grad_norm": 1.3766376250417338, | |
| "learning_rate": 4.3231384415982183e-07, | |
| "loss": 0.7214, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.5575381432635118, | |
| "grad_norm": 1.4626465247323845, | |
| "learning_rate": 4.3065324923738357e-07, | |
| "loss": 0.7961, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.5585725368502715, | |
| "grad_norm": 1.4532565637833812, | |
| "learning_rate": 4.2899343386907e-07, | |
| "loss": 0.7172, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.5596069304370312, | |
| "grad_norm": 1.4236752405658848, | |
| "learning_rate": 4.2733441671351904e-07, | |
| "loss": 0.8164, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.5606413240237911, | |
| "grad_norm": 1.5222257394852572, | |
| "learning_rate": 4.256762164203959e-07, | |
| "loss": 0.7025, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.5616757176105508, | |
| "grad_norm": 1.3785968391687746, | |
| "learning_rate": 4.2401885163018285e-07, | |
| "loss": 0.8072, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.5627101111973106, | |
| "grad_norm": 1.4148662805860468, | |
| "learning_rate": 4.2236234097396946e-07, | |
| "loss": 0.7613, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.5637445047840703, | |
| "grad_norm": 1.6734834453691072, | |
| "learning_rate": 4.207067030732448e-07, | |
| "loss": 0.7511, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.5647788983708301, | |
| "grad_norm": 1.4506246465332198, | |
| "learning_rate": 4.190519565396858e-07, | |
| "loss": 0.7767, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.5658132919575899, | |
| "grad_norm": 1.4697711868455134, | |
| "learning_rate": 4.1739811997494977e-07, | |
| "loss": 0.8115, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.5668476855443496, | |
| "grad_norm": 1.423105635433696, | |
| "learning_rate": 4.1574521197046475e-07, | |
| "loss": 0.7743, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.5678820791311094, | |
| "grad_norm": 1.4980482836182418, | |
| "learning_rate": 4.140932511072201e-07, | |
| "loss": 0.7213, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.5689164727178692, | |
| "grad_norm": 1.4405915329619516, | |
| "learning_rate": 4.124422559555584e-07, | |
| "loss": 0.8043, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.5699508663046289, | |
| "grad_norm": 1.496346034369283, | |
| "learning_rate": 4.1079224507496594e-07, | |
| "loss": 0.7431, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.5709852598913887, | |
| "grad_norm": 1.506884006689124, | |
| "learning_rate": 4.091432370138646e-07, | |
| "loss": 0.7374, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.5720196534781484, | |
| "grad_norm": 1.4616677828754718, | |
| "learning_rate": 4.074952503094033e-07, | |
| "loss": 0.7899, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.5730540470649081, | |
| "grad_norm": 1.5519179958261609, | |
| "learning_rate": 4.058483034872493e-07, | |
| "loss": 0.7293, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.574088440651668, | |
| "grad_norm": 1.5124699711304432, | |
| "learning_rate": 4.042024150613798e-07, | |
| "loss": 0.8232, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.5751228342384277, | |
| "grad_norm": 1.474655584027013, | |
| "learning_rate": 4.0255760353387514e-07, | |
| "loss": 0.7923, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.5761572278251875, | |
| "grad_norm": 1.476706485809985, | |
| "learning_rate": 4.009138873947088e-07, | |
| "loss": 0.7553, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.5771916214119472, | |
| "grad_norm": 1.497064774740194, | |
| "learning_rate": 3.99271285121541e-07, | |
| "loss": 0.8183, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.578226014998707, | |
| "grad_norm": 1.4893883383147994, | |
| "learning_rate": 3.9762981517951065e-07, | |
| "loss": 0.7869, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.5792604085854668, | |
| "grad_norm": 1.4635590462354513, | |
| "learning_rate": 3.959894960210275e-07, | |
| "loss": 0.8262, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.5802948021722265, | |
| "grad_norm": 1.5587818661064967, | |
| "learning_rate": 3.94350346085565e-07, | |
| "loss": 0.76, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.5813291957589863, | |
| "grad_norm": 1.5467025972186752, | |
| "learning_rate": 3.927123837994528e-07, | |
| "loss": 0.7672, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.5823635893457461, | |
| "grad_norm": 1.4742981776387012, | |
| "learning_rate": 3.910756275756697e-07, | |
| "loss": 0.808, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.5833979829325058, | |
| "grad_norm": 1.4796930337000134, | |
| "learning_rate": 3.894400958136369e-07, | |
| "loss": 0.7366, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.5844323765192656, | |
| "grad_norm": 1.481727684332486, | |
| "learning_rate": 3.8780580689901087e-07, | |
| "loss": 0.7291, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.5854667701060253, | |
| "grad_norm": 1.5017214577012585, | |
| "learning_rate": 3.861727792034762e-07, | |
| "loss": 0.8023, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.5865011636927852, | |
| "grad_norm": 1.5872102028389574, | |
| "learning_rate": 3.8454103108454067e-07, | |
| "loss": 0.6981, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.5875355572795449, | |
| "grad_norm": 1.3376434221898008, | |
| "learning_rate": 3.829105808853269e-07, | |
| "loss": 0.758, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.5885699508663046, | |
| "grad_norm": 1.4934761546470774, | |
| "learning_rate": 3.812814469343674e-07, | |
| "loss": 0.734, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.5896043444530644, | |
| "grad_norm": 1.4419192074560656, | |
| "learning_rate": 3.796536475453984e-07, | |
| "loss": 0.7653, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.5906387380398241, | |
| "grad_norm": 1.4692814651692723, | |
| "learning_rate": 3.7802720101715354e-07, | |
| "loss": 0.797, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.591673131626584, | |
| "grad_norm": 1.4103055734560583, | |
| "learning_rate": 3.7640212563315865e-07, | |
| "loss": 0.8125, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.5927075252133437, | |
| "grad_norm": 1.6276487488421492, | |
| "learning_rate": 3.7477843966152577e-07, | |
| "loss": 0.7341, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.5937419188001034, | |
| "grad_norm": 1.4535465635386358, | |
| "learning_rate": 3.73156161354748e-07, | |
| "loss": 0.7595, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.5947763123868632, | |
| "grad_norm": 1.5053447595161533, | |
| "learning_rate": 3.715353089494947e-07, | |
| "loss": 0.802, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.595810705973623, | |
| "grad_norm": 1.3781826181990362, | |
| "learning_rate": 3.699159006664055e-07, | |
| "loss": 0.7652, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.5968450995603827, | |
| "grad_norm": 1.933557338538353, | |
| "learning_rate": 3.682979547098867e-07, | |
| "loss": 0.7394, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.5978794931471425, | |
| "grad_norm": 1.5121869317282295, | |
| "learning_rate": 3.6668148926790556e-07, | |
| "loss": 0.7644, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.5989138867339022, | |
| "grad_norm": 1.3607740443600367, | |
| "learning_rate": 3.650665225117866e-07, | |
| "loss": 0.7582, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.599948280320662, | |
| "grad_norm": 1.4202384197604014, | |
| "learning_rate": 3.6345307259600654e-07, | |
| "loss": 0.813, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.6009826739074218, | |
| "grad_norm": 1.459117182435846, | |
| "learning_rate": 3.618411576579916e-07, | |
| "loss": 0.7812, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.6020170674941815, | |
| "grad_norm": 1.4377149616884963, | |
| "learning_rate": 3.6023079581791165e-07, | |
| "loss": 0.7913, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.6030514610809413, | |
| "grad_norm": 1.3305248287520504, | |
| "learning_rate": 3.5862200517847824e-07, | |
| "loss": 0.7607, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.604085854667701, | |
| "grad_norm": 1.52168365869008, | |
| "learning_rate": 3.570148038247404e-07, | |
| "loss": 0.7002, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.6051202482544609, | |
| "grad_norm": 1.394921669740362, | |
| "learning_rate": 3.5540920982388107e-07, | |
| "loss": 0.8299, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.6061546418412206, | |
| "grad_norm": 1.3810722638103607, | |
| "learning_rate": 3.5380524122501463e-07, | |
| "loss": 0.8784, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.6071890354279803, | |
| "grad_norm": 1.4609854919237661, | |
| "learning_rate": 3.522029160589835e-07, | |
| "loss": 0.8313, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.6082234290147401, | |
| "grad_norm": 1.5071504770855946, | |
| "learning_rate": 3.506022523381555e-07, | |
| "loss": 0.8273, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.6092578226014999, | |
| "grad_norm": 1.5363035431348684, | |
| "learning_rate": 3.490032680562218e-07, | |
| "loss": 0.7937, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.6102922161882597, | |
| "grad_norm": 1.4385900524680808, | |
| "learning_rate": 3.4740598118799435e-07, | |
| "loss": 0.8138, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.6113266097750194, | |
| "grad_norm": 1.394401364169, | |
| "learning_rate": 3.458104096892031e-07, | |
| "loss": 0.7819, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.6123610033617791, | |
| "grad_norm": 1.5041924693631308, | |
| "learning_rate": 3.442165714962959e-07, | |
| "loss": 0.7111, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.613395396948539, | |
| "grad_norm": 1.4425782469869788, | |
| "learning_rate": 3.426244845262351e-07, | |
| "loss": 0.7482, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.6144297905352987, | |
| "grad_norm": 1.3894417788796483, | |
| "learning_rate": 3.4103416667629705e-07, | |
| "loss": 0.8493, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.6154641841220584, | |
| "grad_norm": 1.4153136457416773, | |
| "learning_rate": 3.394456358238708e-07, | |
| "loss": 0.8307, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.6164985777088182, | |
| "grad_norm": 1.6272884116358777, | |
| "learning_rate": 3.3785890982625697e-07, | |
| "loss": 0.726, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.6175329712955779, | |
| "grad_norm": 1.5575007029028212, | |
| "learning_rate": 3.3627400652046734e-07, | |
| "loss": 0.8262, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.6185673648823378, | |
| "grad_norm": 1.5191566390764035, | |
| "learning_rate": 3.346909437230237e-07, | |
| "loss": 0.7278, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.6196017584690975, | |
| "grad_norm": 1.3928635500136382, | |
| "learning_rate": 3.3310973922975817e-07, | |
| "loss": 0.7797, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.6206361520558572, | |
| "grad_norm": 1.4668239913579317, | |
| "learning_rate": 3.3153041081561296e-07, | |
| "loss": 0.7436, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.621670545642617, | |
| "grad_norm": 1.4969723101015961, | |
| "learning_rate": 3.299529762344406e-07, | |
| "loss": 0.7835, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.6227049392293768, | |
| "grad_norm": 1.3916179168719565, | |
| "learning_rate": 3.283774532188038e-07, | |
| "loss": 0.8411, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.6237393328161366, | |
| "grad_norm": 1.441017167233709, | |
| "learning_rate": 3.268038594797777e-07, | |
| "loss": 0.7231, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.6247737264028963, | |
| "grad_norm": 1.4681530791745208, | |
| "learning_rate": 3.252322127067484e-07, | |
| "loss": 0.7592, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.625808119989656, | |
| "grad_norm": 1.451197343230802, | |
| "learning_rate": 3.2366253056721603e-07, | |
| "loss": 0.7663, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.6268425135764158, | |
| "grad_norm": 1.4103640922877645, | |
| "learning_rate": 3.2209483070659583e-07, | |
| "loss": 0.7178, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.6278769071631756, | |
| "grad_norm": 1.4531828208935964, | |
| "learning_rate": 3.2052913074801876e-07, | |
| "loss": 0.8529, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.6289113007499354, | |
| "grad_norm": 1.430971481731334, | |
| "learning_rate": 3.189654482921344e-07, | |
| "loss": 0.8246, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.6299456943366951, | |
| "grad_norm": 1.4881264395489575, | |
| "learning_rate": 3.17403800916913e-07, | |
| "loss": 0.7786, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.6309800879234548, | |
| "grad_norm": 1.5004805396144116, | |
| "learning_rate": 3.158442061774473e-07, | |
| "loss": 0.7623, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.6320144815102147, | |
| "grad_norm": 1.4405864511611242, | |
| "learning_rate": 3.142866816057559e-07, | |
| "loss": 0.8481, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.6330488750969744, | |
| "grad_norm": 1.434404018114957, | |
| "learning_rate": 3.1273124471058566e-07, | |
| "loss": 0.8011, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.6340832686837342, | |
| "grad_norm": 1.5615135639766493, | |
| "learning_rate": 3.111779129772146e-07, | |
| "loss": 0.7539, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.6351176622704939, | |
| "grad_norm": 1.4803469489425465, | |
| "learning_rate": 3.09626703867257e-07, | |
| "loss": 0.7617, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.6361520558572537, | |
| "grad_norm": 1.4489064518626282, | |
| "learning_rate": 3.0807763481846447e-07, | |
| "loss": 0.868, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.6371864494440135, | |
| "grad_norm": 1.4701571381430023, | |
| "learning_rate": 3.065307232445322e-07, | |
| "loss": 0.8161, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.6382208430307732, | |
| "grad_norm": 1.4388060907088556, | |
| "learning_rate": 3.0498598653490224e-07, | |
| "loss": 0.7694, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.6392552366175329, | |
| "grad_norm": 1.5941341598490246, | |
| "learning_rate": 3.03443442054568e-07, | |
| "loss": 0.778, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.6402896302042927, | |
| "grad_norm": 1.5835074231338933, | |
| "learning_rate": 3.019031071438791e-07, | |
| "loss": 0.771, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.6413240237910525, | |
| "grad_norm": 1.4566118205366114, | |
| "learning_rate": 3.0036499911834697e-07, | |
| "loss": 0.764, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.6423584173778123, | |
| "grad_norm": 1.3998869348432936, | |
| "learning_rate": 2.9882913526844906e-07, | |
| "loss": 0.7407, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.643392810964572, | |
| "grad_norm": 1.47052537888667, | |
| "learning_rate": 2.972955328594358e-07, | |
| "loss": 0.8103, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.6444272045513317, | |
| "grad_norm": 1.4383312302118985, | |
| "learning_rate": 2.9576420913113565e-07, | |
| "loss": 0.8526, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.6454615981380916, | |
| "grad_norm": 1.526450377301045, | |
| "learning_rate": 2.9423518129776094e-07, | |
| "loss": 0.7379, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.6464959917248513, | |
| "grad_norm": 1.491287651360605, | |
| "learning_rate": 2.927084665477162e-07, | |
| "loss": 0.7178, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.6475303853116111, | |
| "grad_norm": 1.4441369366561867, | |
| "learning_rate": 2.9118408204340237e-07, | |
| "loss": 0.8176, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.6485647788983708, | |
| "grad_norm": 1.4175156805600948, | |
| "learning_rate": 2.89662044921026e-07, | |
| "loss": 0.7676, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.6495991724851305, | |
| "grad_norm": 1.338874564770139, | |
| "learning_rate": 2.881423722904055e-07, | |
| "loss": 0.7456, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.6506335660718904, | |
| "grad_norm": 1.5616306772587363, | |
| "learning_rate": 2.8662508123477944e-07, | |
| "loss": 0.7939, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.6516679596586501, | |
| "grad_norm": 1.4910096938436541, | |
| "learning_rate": 2.851101888106134e-07, | |
| "loss": 0.7961, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.6527023532454099, | |
| "grad_norm": 1.388305147808417, | |
| "learning_rate": 2.8359771204740995e-07, | |
| "loss": 0.7857, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.6537367468321696, | |
| "grad_norm": 1.483546607985, | |
| "learning_rate": 2.8208766794751516e-07, | |
| "loss": 0.7547, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.6547711404189294, | |
| "grad_norm": 1.5247414791239315, | |
| "learning_rate": 2.8058007348593e-07, | |
| "loss": 0.8188, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.6558055340056892, | |
| "grad_norm": 1.5934085311411073, | |
| "learning_rate": 2.790749456101169e-07, | |
| "loss": 0.7814, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.6568399275924489, | |
| "grad_norm": 1.3653127273101069, | |
| "learning_rate": 2.775723012398107e-07, | |
| "loss": 0.7887, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.6578743211792087, | |
| "grad_norm": 1.4504800404645346, | |
| "learning_rate": 2.7607215726682835e-07, | |
| "loss": 0.741, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.6589087147659685, | |
| "grad_norm": 1.5866283839186583, | |
| "learning_rate": 2.7457453055487924e-07, | |
| "loss": 0.745, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.6599431083527282, | |
| "grad_norm": 1.4096430646579101, | |
| "learning_rate": 2.730794379393742e-07, | |
| "loss": 0.8231, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.660977501939488, | |
| "grad_norm": 1.4493052314436135, | |
| "learning_rate": 2.715868962272381e-07, | |
| "loss": 0.7667, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.6620118955262477, | |
| "grad_norm": 1.5669380803430561, | |
| "learning_rate": 2.700969221967202e-07, | |
| "loss": 0.6883, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.6630462891130074, | |
| "grad_norm": 1.552405280988061, | |
| "learning_rate": 2.686095325972047e-07, | |
| "loss": 0.8685, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.6640806826997673, | |
| "grad_norm": 1.4953716190300075, | |
| "learning_rate": 2.67124744149024e-07, | |
| "loss": 0.8147, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.665115076286527, | |
| "grad_norm": 1.402125770056715, | |
| "learning_rate": 2.6564257354326915e-07, | |
| "loss": 0.7906, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.6661494698732868, | |
| "grad_norm": 1.600040158532781, | |
| "learning_rate": 2.6416303744160355e-07, | |
| "loss": 0.7624, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.6671838634600465, | |
| "grad_norm": 1.6252533144822985, | |
| "learning_rate": 2.626861524760753e-07, | |
| "loss": 0.7412, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.6682182570468063, | |
| "grad_norm": 1.4615354314532314, | |
| "learning_rate": 2.6121193524892914e-07, | |
| "loss": 0.7715, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.6692526506335661, | |
| "grad_norm": 1.504764628072939, | |
| "learning_rate": 2.597404023324217e-07, | |
| "loss": 0.8577, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.6702870442203258, | |
| "grad_norm": 1.3825713638796224, | |
| "learning_rate": 2.5827157026863367e-07, | |
| "loss": 0.7694, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.6713214378070856, | |
| "grad_norm": 1.6040907161295443, | |
| "learning_rate": 2.5680545556928434e-07, | |
| "loss": 0.7149, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.6723558313938454, | |
| "grad_norm": 1.4882704519785939, | |
| "learning_rate": 2.553420747155464e-07, | |
| "loss": 0.77, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.6733902249806051, | |
| "grad_norm": 1.4738763131560069, | |
| "learning_rate": 2.5388144415786025e-07, | |
| "loss": 0.7932, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.6744246185673649, | |
| "grad_norm": 1.4784850694033695, | |
| "learning_rate": 2.524235803157485e-07, | |
| "loss": 0.789, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.6754590121541246, | |
| "grad_norm": 1.371132582566805, | |
| "learning_rate": 2.509684995776329e-07, | |
| "loss": 0.8164, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.6764934057408845, | |
| "grad_norm": 1.3972746713747322, | |
| "learning_rate": 2.4951621830064886e-07, | |
| "loss": 0.8493, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.6775277993276442, | |
| "grad_norm": 1.4801020170703414, | |
| "learning_rate": 2.4806675281046166e-07, | |
| "loss": 0.8137, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.6785621929144039, | |
| "grad_norm": 1.5097483258582645, | |
| "learning_rate": 2.466201194010838e-07, | |
| "loss": 0.7149, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.6795965865011637, | |
| "grad_norm": 1.7910575369411235, | |
| "learning_rate": 2.451763343346906e-07, | |
| "loss": 0.8543, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.6806309800879234, | |
| "grad_norm": 1.4396898090572523, | |
| "learning_rate": 2.437354138414385e-07, | |
| "loss": 0.7941, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.6816653736746832, | |
| "grad_norm": 1.4730391754800491, | |
| "learning_rate": 2.422973741192822e-07, | |
| "loss": 0.7871, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.682699767261443, | |
| "grad_norm": 1.4248345342201216, | |
| "learning_rate": 2.4086223133379196e-07, | |
| "loss": 0.7567, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.6837341608482027, | |
| "grad_norm": 1.518214549930383, | |
| "learning_rate": 2.39430001617973e-07, | |
| "loss": 0.7475, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.6847685544349625, | |
| "grad_norm": 1.4301534432380316, | |
| "learning_rate": 2.3800070107208353e-07, | |
| "loss": 0.8112, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.6858029480217223, | |
| "grad_norm": 1.5148492874781332, | |
| "learning_rate": 2.3657434576345331e-07, | |
| "loss": 0.6988, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.686837341608482, | |
| "grad_norm": 1.3784661078519251, | |
| "learning_rate": 2.3515095172630406e-07, | |
| "loss": 0.7896, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.6878717351952418, | |
| "grad_norm": 1.5086064007950462, | |
| "learning_rate": 2.3373053496156863e-07, | |
| "loss": 0.7606, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.6889061287820015, | |
| "grad_norm": 1.6473490292838275, | |
| "learning_rate": 2.3231311143671074e-07, | |
| "loss": 0.8637, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.6899405223687614, | |
| "grad_norm": 1.415892288816798, | |
| "learning_rate": 2.3089869708554654e-07, | |
| "loss": 0.7407, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.6909749159555211, | |
| "grad_norm": 1.4184131309690355, | |
| "learning_rate": 2.2948730780806403e-07, | |
| "loss": 0.8063, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.6920093095422808, | |
| "grad_norm": 1.44617438631048, | |
| "learning_rate": 2.280789594702464e-07, | |
| "loss": 0.7827, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.6930437031290406, | |
| "grad_norm": 1.4366244309854974, | |
| "learning_rate": 2.2667366790389148e-07, | |
| "loss": 0.8607, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.6940780967158003, | |
| "grad_norm": 1.5479892576375742, | |
| "learning_rate": 2.2527144890643462e-07, | |
| "loss": 0.8091, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.6951124903025602, | |
| "grad_norm": 1.5521102830432434, | |
| "learning_rate": 2.2387231824077184e-07, | |
| "loss": 0.8076, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.6961468838893199, | |
| "grad_norm": 1.5203451124269218, | |
| "learning_rate": 2.2247629163508202e-07, | |
| "loss": 0.7935, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.6971812774760796, | |
| "grad_norm": 1.4886201449179939, | |
| "learning_rate": 2.2108338478264932e-07, | |
| "loss": 0.7996, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.6982156710628394, | |
| "grad_norm": 1.462641976226029, | |
| "learning_rate": 2.1969361334168817e-07, | |
| "loss": 0.7939, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.6992500646495992, | |
| "grad_norm": 1.3885284902649586, | |
| "learning_rate": 2.1830699293516675e-07, | |
| "loss": 0.7456, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.700284458236359, | |
| "grad_norm": 1.3908889368609065, | |
| "learning_rate": 2.1692353915063045e-07, | |
| "loss": 0.7519, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.7013188518231187, | |
| "grad_norm": 1.4455191623840915, | |
| "learning_rate": 2.1554326754002827e-07, | |
| "loss": 0.7583, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.7023532454098784, | |
| "grad_norm": 1.5229064191663302, | |
| "learning_rate": 2.1416619361953636e-07, | |
| "loss": 0.7502, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.7033876389966383, | |
| "grad_norm": 1.471967907263402, | |
| "learning_rate": 2.1279233286938504e-07, | |
| "loss": 0.7619, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.704422032583398, | |
| "grad_norm": 1.4984272720837468, | |
| "learning_rate": 2.1142170073368392e-07, | |
| "loss": 0.8259, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.7054564261701577, | |
| "grad_norm": 1.4864104046528848, | |
| "learning_rate": 2.1005431262024808e-07, | |
| "loss": 0.7507, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.7064908197569175, | |
| "grad_norm": 1.4540736490751358, | |
| "learning_rate": 2.0869018390042585e-07, | |
| "loss": 0.7158, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.7075252133436772, | |
| "grad_norm": 1.4543278048580008, | |
| "learning_rate": 2.0732932990892527e-07, | |
| "loss": 0.7965, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.7085596069304371, | |
| "grad_norm": 1.4402177073198272, | |
| "learning_rate": 2.0597176594364146e-07, | |
| "loss": 0.8589, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.7095940005171968, | |
| "grad_norm": 1.548049611618129, | |
| "learning_rate": 2.0461750726548553e-07, | |
| "loss": 0.7782, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.7106283941039565, | |
| "grad_norm": 1.3397434371389205, | |
| "learning_rate": 2.032665690982126e-07, | |
| "loss": 0.7575, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.7116627876907163, | |
| "grad_norm": 1.4832979351948137, | |
| "learning_rate": 2.0191896662825009e-07, | |
| "loss": 0.8102, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.712697181277476, | |
| "grad_norm": 1.4256440388288476, | |
| "learning_rate": 2.0057471500452822e-07, | |
| "loss": 0.803, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.7137315748642359, | |
| "grad_norm": 1.5552352317416895, | |
| "learning_rate": 1.9923382933830836e-07, | |
| "loss": 0.7378, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.7147659684509956, | |
| "grad_norm": 1.5859395469964523, | |
| "learning_rate": 1.978963247030142e-07, | |
| "loss": 0.849, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.7158003620377553, | |
| "grad_norm": 1.4751442047677354, | |
| "learning_rate": 1.9656221613406215e-07, | |
| "loss": 0.744, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.7168347556245152, | |
| "grad_norm": 1.5384009951892297, | |
| "learning_rate": 1.9523151862869147e-07, | |
| "loss": 0.8252, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.7178691492112749, | |
| "grad_norm": 1.4581667496620268, | |
| "learning_rate": 1.9390424714579677e-07, | |
| "loss": 0.7835, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.7189035427980347, | |
| "grad_norm": 1.4336408065912338, | |
| "learning_rate": 1.9258041660575958e-07, | |
| "loss": 0.7232, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.7199379363847944, | |
| "grad_norm": 1.366885890580138, | |
| "learning_rate": 1.9126004189027972e-07, | |
| "loss": 0.7354, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.7209723299715541, | |
| "grad_norm": 1.458357731991347, | |
| "learning_rate": 1.899431378422094e-07, | |
| "loss": 0.7372, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 0.722006723558314, | |
| "grad_norm": 1.377020610792232, | |
| "learning_rate": 1.886297192653855e-07, | |
| "loss": 0.8001, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.7230411171450737, | |
| "grad_norm": 1.398202217450958, | |
| "learning_rate": 1.8731980092446304e-07, | |
| "loss": 0.8419, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.7240755107318335, | |
| "grad_norm": 1.5362107919440666, | |
| "learning_rate": 1.8601339754475005e-07, | |
| "loss": 0.7185, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.7251099043185932, | |
| "grad_norm": 1.4090895212949013, | |
| "learning_rate": 1.847105238120409e-07, | |
| "loss": 0.8149, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 0.726144297905353, | |
| "grad_norm": 1.4376787838723264, | |
| "learning_rate": 1.834111943724523e-07, | |
| "loss": 0.7722, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.7271786914921128, | |
| "grad_norm": 1.39307998427213, | |
| "learning_rate": 1.8211542383225808e-07, | |
| "loss": 0.8278, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 0.7282130850788725, | |
| "grad_norm": 1.4834089564910766, | |
| "learning_rate": 1.8082322675772476e-07, | |
| "loss": 0.7723, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.7292474786656322, | |
| "grad_norm": 1.5032770465089857, | |
| "learning_rate": 1.7953461767494837e-07, | |
| "loss": 0.7963, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.730281872252392, | |
| "grad_norm": 1.4189974344813017, | |
| "learning_rate": 1.7824961106969121e-07, | |
| "loss": 0.8446, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.7313162658391518, | |
| "grad_norm": 1.4115923780029827, | |
| "learning_rate": 1.7696822138721794e-07, | |
| "loss": 0.7731, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.7323506594259116, | |
| "grad_norm": 1.3597747711900041, | |
| "learning_rate": 1.756904630321347e-07, | |
| "loss": 0.7767, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.7333850530126713, | |
| "grad_norm": 1.4176865244103791, | |
| "learning_rate": 1.7441635036822623e-07, | |
| "loss": 0.8328, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 0.734419446599431, | |
| "grad_norm": 1.516213503078198, | |
| "learning_rate": 1.7314589771829424e-07, | |
| "loss": 0.8045, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.7354538401861909, | |
| "grad_norm": 1.3835288520431024, | |
| "learning_rate": 1.718791193639973e-07, | |
| "loss": 0.7498, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.7364882337729506, | |
| "grad_norm": 1.48846960858767, | |
| "learning_rate": 1.7061602954568978e-07, | |
| "loss": 0.8259, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.7375226273597104, | |
| "grad_norm": 1.4597435857434944, | |
| "learning_rate": 1.6935664246226116e-07, | |
| "loss": 0.7782, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.7385570209464701, | |
| "grad_norm": 1.4484159729242072, | |
| "learning_rate": 1.681009722709778e-07, | |
| "loss": 0.8126, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.7395914145332299, | |
| "grad_norm": 1.4177301636764374, | |
| "learning_rate": 1.668490330873223e-07, | |
| "loss": 0.7959, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.7406258081199897, | |
| "grad_norm": 1.3254071170119985, | |
| "learning_rate": 1.6560083898483597e-07, | |
| "loss": 0.7907, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.7416602017067494, | |
| "grad_norm": 1.4559287358786062, | |
| "learning_rate": 1.6435640399496032e-07, | |
| "loss": 0.7839, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.7426945952935092, | |
| "grad_norm": 1.433278839247102, | |
| "learning_rate": 1.6311574210687862e-07, | |
| "loss": 0.7039, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.743728988880269, | |
| "grad_norm": 1.3892115917188481, | |
| "learning_rate": 1.6187886726735977e-07, | |
| "loss": 0.7734, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.7447633824670287, | |
| "grad_norm": 1.5234739714447227, | |
| "learning_rate": 1.6064579338060085e-07, | |
| "loss": 0.803, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.7457977760537885, | |
| "grad_norm": 1.4953838669446045, | |
| "learning_rate": 1.594165343080705e-07, | |
| "loss": 0.7826, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 0.7468321696405482, | |
| "grad_norm": 1.376138301952389, | |
| "learning_rate": 1.581911038683541e-07, | |
| "loss": 0.7798, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.747866563227308, | |
| "grad_norm": 1.4446918336824965, | |
| "learning_rate": 1.5696951583699775e-07, | |
| "loss": 0.7645, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 0.7489009568140678, | |
| "grad_norm": 1.4472461557599074, | |
| "learning_rate": 1.5575178394635313e-07, | |
| "loss": 0.8168, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.7499353504008275, | |
| "grad_norm": 1.4511340372294639, | |
| "learning_rate": 1.545379218854241e-07, | |
| "loss": 0.8063, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.7509697439875873, | |
| "grad_norm": 1.3902139908203044, | |
| "learning_rate": 1.5332794329971156e-07, | |
| "loss": 0.7831, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.752004137574347, | |
| "grad_norm": 1.5912278711067618, | |
| "learning_rate": 1.521218617910614e-07, | |
| "loss": 0.7509, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 0.7530385311611068, | |
| "grad_norm": 1.3922663428502382, | |
| "learning_rate": 1.509196909175107e-07, | |
| "loss": 0.8119, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.7540729247478666, | |
| "grad_norm": 1.462973383734247, | |
| "learning_rate": 1.4972144419313525e-07, | |
| "loss": 0.8055, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 0.7551073183346263, | |
| "grad_norm": 1.4856912009900647, | |
| "learning_rate": 1.4852713508789833e-07, | |
| "loss": 0.845, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.7561417119213861, | |
| "grad_norm": 1.3864966112932746, | |
| "learning_rate": 1.4733677702749893e-07, | |
| "loss": 0.8196, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 0.7571761055081458, | |
| "grad_norm": 1.5787786849551309, | |
| "learning_rate": 1.4615038339322023e-07, | |
| "loss": 0.7564, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.7582104990949056, | |
| "grad_norm": 1.434319809794859, | |
| "learning_rate": 1.449679675217803e-07, | |
| "loss": 0.7899, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 0.7592448926816654, | |
| "grad_norm": 1.527214741501682, | |
| "learning_rate": 1.437895427051817e-07, | |
| "loss": 0.7349, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.7602792862684251, | |
| "grad_norm": 1.478409724302356, | |
| "learning_rate": 1.4261512219056116e-07, | |
| "loss": 0.6813, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.7613136798551849, | |
| "grad_norm": 1.539708344276648, | |
| "learning_rate": 1.4144471918004254e-07, | |
| "loss": 0.7354, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.7623480734419447, | |
| "grad_norm": 1.5486126025685474, | |
| "learning_rate": 1.4027834683058636e-07, | |
| "loss": 0.7769, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 0.7633824670287044, | |
| "grad_norm": 1.395867166206849, | |
| "learning_rate": 1.3911601825384368e-07, | |
| "loss": 0.7901, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.7644168606154642, | |
| "grad_norm": 1.5115592761093977, | |
| "learning_rate": 1.3795774651600755e-07, | |
| "loss": 0.8391, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 0.7654512542022239, | |
| "grad_norm": 1.4368849826114931, | |
| "learning_rate": 1.368035446376664e-07, | |
| "loss": 0.7686, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.7664856477889838, | |
| "grad_norm": 1.595681587519276, | |
| "learning_rate": 1.3565342559365806e-07, | |
| "loss": 0.7783, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 0.7675200413757435, | |
| "grad_norm": 1.5255994552148735, | |
| "learning_rate": 1.3450740231292351e-07, | |
| "loss": 0.8006, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.7685544349625032, | |
| "grad_norm": 1.4058157472236206, | |
| "learning_rate": 1.3336548767836142e-07, | |
| "loss": 0.7765, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 0.769588828549263, | |
| "grad_norm": 1.4948150089375694, | |
| "learning_rate": 1.322276945266838e-07, | |
| "loss": 0.8221, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.7706232221360227, | |
| "grad_norm": 1.5796549973897047, | |
| "learning_rate": 1.3109403564827155e-07, | |
| "loss": 0.8409, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.7716576157227825, | |
| "grad_norm": 1.5985231335601702, | |
| "learning_rate": 1.299645237870301e-07, | |
| "loss": 0.8089, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.7726920093095423, | |
| "grad_norm": 1.4809289753568542, | |
| "learning_rate": 1.288391716402472e-07, | |
| "loss": 0.8228, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 0.773726402896302, | |
| "grad_norm": 1.4524951503571424, | |
| "learning_rate": 1.2771799185844912e-07, | |
| "loss": 0.7822, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.7747607964830618, | |
| "grad_norm": 1.4464331436069662, | |
| "learning_rate": 1.2660099704525927e-07, | |
| "loss": 0.7413, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 0.7757951900698216, | |
| "grad_norm": 1.4414343861405192, | |
| "learning_rate": 1.2548819975725622e-07, | |
| "loss": 0.7193, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.7757951900698216, | |
| "eval_ical_mcts_chains_sft_val_MORECHAINS_loss": 2.762585401535034, | |
| "eval_ical_mcts_chains_sft_val_MORECHAINS_runtime": 5.4076, | |
| "eval_ical_mcts_chains_sft_val_MORECHAINS_samples_per_second": 17.198, | |
| "eval_ical_mcts_chains_sft_val_MORECHAINS_steps_per_second": 2.219, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.7768295836565813, | |
| "grad_norm": 1.3911073734403505, | |
| "learning_rate": 1.2437961250383206e-07, | |
| "loss": 0.8126, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 0.7778639772433411, | |
| "grad_norm": 1.490245216601882, | |
| "learning_rate": 1.2327524774705267e-07, | |
| "loss": 0.7551, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.7788983708301008, | |
| "grad_norm": 1.5264250595268665, | |
| "learning_rate": 1.221751179015172e-07, | |
| "loss": 0.7507, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 0.7799327644168607, | |
| "grad_norm": 1.4694452350543095, | |
| "learning_rate": 1.2107923533421792e-07, | |
| "loss": 0.792, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.7809671580036204, | |
| "grad_norm": 1.3517786374158185, | |
| "learning_rate": 1.1998761236440247e-07, | |
| "loss": 0.7487, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.7820015515903801, | |
| "grad_norm": 1.4657101126825252, | |
| "learning_rate": 1.1890026126343444e-07, | |
| "loss": 0.8098, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.7830359451771399, | |
| "grad_norm": 1.4755415945958767, | |
| "learning_rate": 1.1781719425465536e-07, | |
| "loss": 0.7467, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 0.7840703387638996, | |
| "grad_norm": 1.4715304221906624, | |
| "learning_rate": 1.1673842351324814e-07, | |
| "loss": 0.8334, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.7851047323506595, | |
| "grad_norm": 1.4370413781084388, | |
| "learning_rate": 1.1566396116609906e-07, | |
| "loss": 0.7725, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 0.7861391259374192, | |
| "grad_norm": 1.49238406786731, | |
| "learning_rate": 1.1459381929166251e-07, | |
| "loss": 0.7688, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.7871735195241789, | |
| "grad_norm": 1.4572074661812822, | |
| "learning_rate": 1.1352800991982464e-07, | |
| "loss": 0.7392, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 0.7882079131109387, | |
| "grad_norm": 1.4518792620141616, | |
| "learning_rate": 1.1246654503176795e-07, | |
| "loss": 0.7768, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.7892423066976985, | |
| "grad_norm": 1.439371588993288, | |
| "learning_rate": 1.1140943655983725e-07, | |
| "loss": 0.8205, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 0.7902767002844583, | |
| "grad_norm": 1.5864019297787213, | |
| "learning_rate": 1.1035669638740519e-07, | |
| "loss": 0.6854, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.791311093871218, | |
| "grad_norm": 1.463406672052628, | |
| "learning_rate": 1.0930833634873809e-07, | |
| "loss": 0.7189, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.7923454874579777, | |
| "grad_norm": 1.3197655035552545, | |
| "learning_rate": 1.0826436822886409e-07, | |
| "loss": 0.7641, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.7933798810447376, | |
| "grad_norm": 1.4253718707377334, | |
| "learning_rate": 1.0722480376343995e-07, | |
| "loss": 0.8125, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 0.7944142746314973, | |
| "grad_norm": 1.3959295650383816, | |
| "learning_rate": 1.0618965463861868e-07, | |
| "loss": 0.8011, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.795448668218257, | |
| "grad_norm": 1.3155195682372327, | |
| "learning_rate": 1.0515893249091933e-07, | |
| "loss": 0.8005, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 0.7964830618050168, | |
| "grad_norm": 1.6028462873198248, | |
| "learning_rate": 1.0413264890709545e-07, | |
| "loss": 0.8232, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.7975174553917765, | |
| "grad_norm": 1.4791348322826015, | |
| "learning_rate": 1.0311081542400452e-07, | |
| "loss": 0.7397, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 0.7985518489785364, | |
| "grad_norm": 1.4516674093766826, | |
| "learning_rate": 1.020934435284792e-07, | |
| "loss": 0.7439, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.7995862425652961, | |
| "grad_norm": 1.378859928629084, | |
| "learning_rate": 1.0108054465719734e-07, | |
| "loss": 0.7446, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 0.8006206361520558, | |
| "grad_norm": 1.402209532604386, | |
| "learning_rate": 1.0007213019655392e-07, | |
| "loss": 0.82, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.8016550297388156, | |
| "grad_norm": 1.4105234634779549, | |
| "learning_rate": 9.906821148253303e-08, | |
| "loss": 0.8135, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.8026894233255754, | |
| "grad_norm": 1.487782525075316, | |
| "learning_rate": 9.806879980057992e-08, | |
| "loss": 0.7198, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.8037238169123352, | |
| "grad_norm": 1.3987708648566481, | |
| "learning_rate": 9.70739063854748e-08, | |
| "loss": 0.7696, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 0.8047582104990949, | |
| "grad_norm": 1.3232819069861823, | |
| "learning_rate": 9.608354242120637e-08, | |
| "loss": 0.8101, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.8057926040858546, | |
| "grad_norm": 1.5491732054551224, | |
| "learning_rate": 9.509771904084557e-08, | |
| "loss": 0.7539, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 0.8068269976726145, | |
| "grad_norm": 1.4736166265733701, | |
| "learning_rate": 9.411644732642121e-08, | |
| "loss": 0.7821, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.8078613912593742, | |
| "grad_norm": 1.3980858472146012, | |
| "learning_rate": 9.313973830879513e-08, | |
| "loss": 0.8041, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 0.808895784846134, | |
| "grad_norm": 1.4856929150133562, | |
| "learning_rate": 9.216760296753757e-08, | |
| "loss": 0.7021, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.8099301784328937, | |
| "grad_norm": 1.5173548645299464, | |
| "learning_rate": 9.120005223080484e-08, | |
| "loss": 0.7502, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 0.8109645720196534, | |
| "grad_norm": 1.4589229254185816, | |
| "learning_rate": 9.023709697521542e-08, | |
| "loss": 0.8147, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.8119989656064133, | |
| "grad_norm": 1.5650292749921875, | |
| "learning_rate": 8.92787480257286e-08, | |
| "loss": 0.8249, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.813033359193173, | |
| "grad_norm": 1.3897295751765029, | |
| "learning_rate": 8.832501615552223e-08, | |
| "loss": 0.7218, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.8140677527799328, | |
| "grad_norm": 1.5083322604955727, | |
| "learning_rate": 8.737591208587158e-08, | |
| "loss": 0.8437, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 0.8151021463666925, | |
| "grad_norm": 1.4982850242555628, | |
| "learning_rate": 8.643144648602912e-08, | |
| "loss": 0.7587, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.8161365399534523, | |
| "grad_norm": 1.3972994822479223, | |
| "learning_rate": 8.549162997310466e-08, | |
| "loss": 0.7712, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 0.8171709335402121, | |
| "grad_norm": 1.3600891639674606, | |
| "learning_rate": 8.455647311194536e-08, | |
| "loss": 0.7161, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.8182053271269718, | |
| "grad_norm": 1.4393048239174115, | |
| "learning_rate": 8.362598641501773e-08, | |
| "loss": 0.8355, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 0.8192397207137315, | |
| "grad_norm": 1.4322864880883386, | |
| "learning_rate": 8.270018034228915e-08, | |
| "loss": 0.8361, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.8202741143004914, | |
| "grad_norm": 1.4854843567290648, | |
| "learning_rate": 8.177906530110995e-08, | |
| "loss": 0.7448, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 0.8213085078872511, | |
| "grad_norm": 1.4015501722223265, | |
| "learning_rate": 8.086265164609707e-08, | |
| "loss": 0.6913, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.8223429014740109, | |
| "grad_norm": 1.428671256921823, | |
| "learning_rate": 7.9950949679017e-08, | |
| "loss": 0.8068, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.8233772950607706, | |
| "grad_norm": 1.4074224666357722, | |
| "learning_rate": 7.904396964867049e-08, | |
| "loss": 0.7505, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.8244116886475303, | |
| "grad_norm": 1.4824989278534766, | |
| "learning_rate": 7.814172175077737e-08, | |
| "loss": 0.7239, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 0.8254460822342902, | |
| "grad_norm": 1.4367606402549709, | |
| "learning_rate": 7.724421612786108e-08, | |
| "loss": 0.7545, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.8264804758210499, | |
| "grad_norm": 1.4481468571958176, | |
| "learning_rate": 7.635146286913585e-08, | |
| "loss": 0.8053, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 0.8275148694078097, | |
| "grad_norm": 1.4616818095746162, | |
| "learning_rate": 7.546347201039255e-08, | |
| "loss": 0.7558, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.8285492629945694, | |
| "grad_norm": 1.4643196628418114, | |
| "learning_rate": 7.458025353388591e-08, | |
| "loss": 0.7245, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 0.8295836565813292, | |
| "grad_norm": 1.4560433733970786, | |
| "learning_rate": 7.370181736822229e-08, | |
| "loss": 0.7805, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.830618050168089, | |
| "grad_norm": 1.343368703136031, | |
| "learning_rate": 7.282817338824892e-08, | |
| "loss": 0.7061, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 0.8316524437548487, | |
| "grad_norm": 1.451887545604897, | |
| "learning_rate": 7.195933141494131e-08, | |
| "loss": 0.739, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.8326868373416085, | |
| "grad_norm": 1.4039441507613721, | |
| "learning_rate": 7.109530121529438e-08, | |
| "loss": 0.7992, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.8337212309283683, | |
| "grad_norm": 1.4863940871830705, | |
| "learning_rate": 7.023609250221152e-08, | |
| "loss": 0.8089, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.834755624515128, | |
| "grad_norm": 1.3986266258156324, | |
| "learning_rate": 6.93817149343962e-08, | |
| "loss": 0.7735, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 0.8357900181018878, | |
| "grad_norm": 1.4940676669737767, | |
| "learning_rate": 6.853217811624313e-08, | |
| "loss": 0.7762, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.8368244116886475, | |
| "grad_norm": 1.4789925076158594, | |
| "learning_rate": 6.76874915977299e-08, | |
| "loss": 0.805, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 0.8378588052754073, | |
| "grad_norm": 1.4518286935475622, | |
| "learning_rate": 6.684766487431026e-08, | |
| "loss": 0.7662, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.8388931988621671, | |
| "grad_norm": 1.3731364286777918, | |
| "learning_rate": 6.60127073868072e-08, | |
| "loss": 0.7474, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 0.8399275924489268, | |
| "grad_norm": 1.4947866051424046, | |
| "learning_rate": 6.518262852130624e-08, | |
| "loss": 0.7652, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.8409619860356866, | |
| "grad_norm": 1.5437889862392602, | |
| "learning_rate": 6.435743760905083e-08, | |
| "loss": 0.8021, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 0.8419963796224463, | |
| "grad_norm": 1.4010486448796977, | |
| "learning_rate": 6.353714392633696e-08, | |
| "loss": 0.7877, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.843030773209206, | |
| "grad_norm": 1.4482198624415896, | |
| "learning_rate": 6.27217566944086e-08, | |
| "loss": 0.8118, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.8440651667959659, | |
| "grad_norm": 1.3695064207859293, | |
| "learning_rate": 6.191128507935478e-08, | |
| "loss": 0.7653, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.8450995603827256, | |
| "grad_norm": 1.505571290761308, | |
| "learning_rate": 6.110573819200604e-08, | |
| "loss": 0.8002, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 0.8461339539694854, | |
| "grad_norm": 1.5290113686879605, | |
| "learning_rate": 6.030512508783186e-08, | |
| "loss": 0.7281, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.8471683475562451, | |
| "grad_norm": 1.421004913965161, | |
| "learning_rate": 5.950945476683955e-08, | |
| "loss": 0.8038, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 0.8482027411430049, | |
| "grad_norm": 1.5232656821418804, | |
| "learning_rate": 5.871873617347217e-08, | |
| "loss": 0.752, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.8492371347297647, | |
| "grad_norm": 1.4007224928948465, | |
| "learning_rate": 5.793297819650883e-08, | |
| "loss": 0.8009, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 0.8502715283165244, | |
| "grad_norm": 1.5375024015029433, | |
| "learning_rate": 5.7152189668964346e-08, | |
| "loss": 0.7132, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.8513059219032842, | |
| "grad_norm": 1.4206171384176234, | |
| "learning_rate": 5.637637936798978e-08, | |
| "loss": 0.8328, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 0.852340315490044, | |
| "grad_norm": 1.4549811861946342, | |
| "learning_rate": 5.560555601477418e-08, | |
| "loss": 0.7635, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.8533747090768037, | |
| "grad_norm": 1.4725461511151157, | |
| "learning_rate": 5.4839728274446437e-08, | |
| "loss": 0.8545, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.8544091026635635, | |
| "grad_norm": 1.3527900732670748, | |
| "learning_rate": 5.4078904755977594e-08, | |
| "loss": 0.7918, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.8554434962503232, | |
| "grad_norm": 1.4171967970741899, | |
| "learning_rate": 5.332309401208407e-08, | |
| "loss": 0.8103, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 0.8564778898370831, | |
| "grad_norm": 1.5235440611198816, | |
| "learning_rate": 5.257230453913236e-08, | |
| "loss": 0.7408, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.8575122834238428, | |
| "grad_norm": 1.3849703275186278, | |
| "learning_rate": 5.182654477704229e-08, | |
| "loss": 0.7791, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 0.8585466770106025, | |
| "grad_norm": 1.4854167447528173, | |
| "learning_rate": 5.1085823109193014e-08, | |
| "loss": 0.7682, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.8595810705973623, | |
| "grad_norm": 1.5132331797699872, | |
| "learning_rate": 5.035014786232827e-08, | |
| "loss": 0.7882, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 0.860615464184122, | |
| "grad_norm": 1.3854530175480861, | |
| "learning_rate": 4.9619527306463186e-08, | |
| "loss": 0.8011, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.8616498577708818, | |
| "grad_norm": 1.5369376856731023, | |
| "learning_rate": 4.8893969654791145e-08, | |
| "loss": 0.6802, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 0.8626842513576416, | |
| "grad_norm": 1.4053959560591536, | |
| "learning_rate": 4.81734830635912e-08, | |
| "loss": 0.7404, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.8637186449444013, | |
| "grad_norm": 1.4382170597749135, | |
| "learning_rate": 4.745807563213677e-08, | |
| "loss": 0.7512, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.8647530385311611, | |
| "grad_norm": 1.355141412978352, | |
| "learning_rate": 4.674775540260456e-08, | |
| "loss": 0.8219, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.8657874321179209, | |
| "grad_norm": 1.3918651399802737, | |
| "learning_rate": 4.604253035998379e-08, | |
| "loss": 0.7159, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 0.8668218257046806, | |
| "grad_norm": 1.423600258829345, | |
| "learning_rate": 4.534240843198661e-08, | |
| "loss": 0.7509, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.8678562192914404, | |
| "grad_norm": 1.4510983494503749, | |
| "learning_rate": 4.464739748895963e-08, | |
| "loss": 0.8447, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 0.8688906128782001, | |
| "grad_norm": 1.3548128172922889, | |
| "learning_rate": 4.395750534379411e-08, | |
| "loss": 0.7913, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.86992500646496, | |
| "grad_norm": 1.3574730585550736, | |
| "learning_rate": 4.327273975183948e-08, | |
| "loss": 0.7482, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 0.8709594000517197, | |
| "grad_norm": 1.5424782075007908, | |
| "learning_rate": 4.2593108410815146e-08, | |
| "loss": 0.7084, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.8719937936384794, | |
| "grad_norm": 1.552156047553469, | |
| "learning_rate": 4.191861896072457e-08, | |
| "loss": 0.7775, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 0.8730281872252392, | |
| "grad_norm": 1.396546714935789, | |
| "learning_rate": 4.12492789837694e-08, | |
| "loss": 0.7897, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.874062580811999, | |
| "grad_norm": 1.3547911853375985, | |
| "learning_rate": 4.0585096004263573e-08, | |
| "loss": 0.7818, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.8750969743987588, | |
| "grad_norm": 1.5001464034972132, | |
| "learning_rate": 3.992607748854954e-08, | |
| "loss": 0.7943, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.8761313679855185, | |
| "grad_norm": 1.489903879602473, | |
| "learning_rate": 3.927223084491388e-08, | |
| "loss": 0.7891, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 0.8771657615722782, | |
| "grad_norm": 1.3913397770398852, | |
| "learning_rate": 3.862356342350409e-08, | |
| "loss": 0.7906, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.878200155159038, | |
| "grad_norm": 1.452219880655871, | |
| "learning_rate": 3.798008251624585e-08, | |
| "loss": 0.7484, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 0.8792345487457978, | |
| "grad_norm": 1.6016935848347844, | |
| "learning_rate": 3.734179535676168e-08, | |
| "loss": 0.8353, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.8802689423325576, | |
| "grad_norm": 1.409682688811978, | |
| "learning_rate": 3.670870912028856e-08, | |
| "loss": 0.7647, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 0.8813033359193173, | |
| "grad_norm": 1.3698726921479196, | |
| "learning_rate": 3.608083092359826e-08, | |
| "loss": 0.7803, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.882337729506077, | |
| "grad_norm": 1.393227732291428, | |
| "learning_rate": 3.545816782491656e-08, | |
| "loss": 0.743, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 0.8833721230928369, | |
| "grad_norm": 1.539440250774928, | |
| "learning_rate": 3.4840726823844646e-08, | |
| "loss": 0.751, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.8844065166795966, | |
| "grad_norm": 1.4912336210962442, | |
| "learning_rate": 3.422851486127987e-08, | |
| "loss": 0.7486, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.8854409102663563, | |
| "grad_norm": 1.399027954464841, | |
| "learning_rate": 3.362153881933777e-08, | |
| "loss": 0.8415, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.8864753038531161, | |
| "grad_norm": 1.3539040330922476, | |
| "learning_rate": 3.301980552127509e-08, | |
| "loss": 0.8283, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 0.8875096974398758, | |
| "grad_norm": 1.4662899825339653, | |
| "learning_rate": 3.242332173141277e-08, | |
| "loss": 0.8241, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.8885440910266357, | |
| "grad_norm": 1.449948654111736, | |
| "learning_rate": 3.183209415505978e-08, | |
| "loss": 0.7448, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 0.8895784846133954, | |
| "grad_norm": 1.5837465040255534, | |
| "learning_rate": 3.1246129438438076e-08, | |
| "loss": 0.8291, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.8906128782001551, | |
| "grad_norm": 1.4420356948940825, | |
| "learning_rate": 3.066543416860784e-08, | |
| "loss": 0.7053, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 0.8916472717869149, | |
| "grad_norm": 1.5431010945772905, | |
| "learning_rate": 3.0090014873393074e-08, | |
| "loss": 0.803, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.8926816653736747, | |
| "grad_norm": 1.4679524213755917, | |
| "learning_rate": 2.951987802130862e-08, | |
| "loss": 0.7987, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 0.8937160589604345, | |
| "grad_norm": 1.4571290735517293, | |
| "learning_rate": 2.8955030021487247e-08, | |
| "loss": 0.7748, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.8947504525471942, | |
| "grad_norm": 1.4517033580689347, | |
| "learning_rate": 2.839547722360769e-08, | |
| "loss": 0.8353, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.8957848461339539, | |
| "grad_norm": 1.3832760756247497, | |
| "learning_rate": 2.7841225917823342e-08, | |
| "loss": 0.7991, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.8968192397207138, | |
| "grad_norm": 1.408125909510052, | |
| "learning_rate": 2.7292282334691164e-08, | |
| "loss": 0.7949, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 0.8978536333074735, | |
| "grad_norm": 1.4946296610226655, | |
| "learning_rate": 2.6748652645102177e-08, | |
| "loss": 0.7854, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.8988880268942333, | |
| "grad_norm": 1.3943411702887958, | |
| "learning_rate": 2.621034296021174e-08, | |
| "loss": 0.7867, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 0.899922420480993, | |
| "grad_norm": 1.4489993759143427, | |
| "learning_rate": 2.567735933137083e-08, | |
| "loss": 0.7896, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.9009568140677527, | |
| "grad_norm": 1.636262766485154, | |
| "learning_rate": 2.5149707750058312e-08, | |
| "loss": 0.8047, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 0.9019912076545126, | |
| "grad_norm": 1.429224461061849, | |
| "learning_rate": 2.462739414781334e-08, | |
| "loss": 0.7413, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.9030256012412723, | |
| "grad_norm": 1.4574966576563904, | |
| "learning_rate": 2.4110424396168726e-08, | |
| "loss": 0.7172, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 0.9040599948280321, | |
| "grad_norm": 1.4211036738678824, | |
| "learning_rate": 2.359880430658484e-08, | |
| "loss": 0.8174, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.9050943884147918, | |
| "grad_norm": 1.3872817370006643, | |
| "learning_rate": 2.3092539630384765e-08, | |
| "loss": 0.7734, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.9061287820015516, | |
| "grad_norm": 1.4342975784145786, | |
| "learning_rate": 2.25916360586888e-08, | |
| "loss": 0.8026, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.9071631755883114, | |
| "grad_norm": 1.4195341226450182, | |
| "learning_rate": 2.209609922235134e-08, | |
| "loss": 0.7988, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 0.9081975691750711, | |
| "grad_norm": 1.35515425710915, | |
| "learning_rate": 2.1605934691896865e-08, | |
| "loss": 0.8106, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.9092319627618308, | |
| "grad_norm": 1.5511479710011553, | |
| "learning_rate": 2.1121147977457954e-08, | |
| "loss": 0.8435, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 0.9102663563485907, | |
| "grad_norm": 1.4402808653187715, | |
| "learning_rate": 2.0641744528712925e-08, | |
| "loss": 0.8404, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.9113007499353504, | |
| "grad_norm": 1.5353621498594796, | |
| "learning_rate": 2.0167729734824556e-08, | |
| "loss": 0.8685, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 0.9123351435221102, | |
| "grad_norm": 1.462792494272258, | |
| "learning_rate": 1.9699108924379815e-08, | |
| "loss": 0.714, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.9133695371088699, | |
| "grad_norm": 1.46264616892769, | |
| "learning_rate": 1.9235887365329772e-08, | |
| "loss": 0.8528, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 0.9144039306956296, | |
| "grad_norm": 1.469208326086429, | |
| "learning_rate": 1.877807026493028e-08, | |
| "loss": 0.7741, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.9154383242823895, | |
| "grad_norm": 1.4944268879899918, | |
| "learning_rate": 1.8325662769683447e-08, | |
| "loss": 0.7959, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.9164727178691492, | |
| "grad_norm": 1.5107495681358998, | |
| "learning_rate": 1.7878669965280313e-08, | |
| "loss": 0.7381, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.917507111455909, | |
| "grad_norm": 1.5454424637939428, | |
| "learning_rate": 1.743709687654271e-08, | |
| "loss": 0.8037, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 0.9185415050426687, | |
| "grad_norm": 1.4767533836012885, | |
| "learning_rate": 1.7000948467367715e-08, | |
| "loss": 0.7494, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.9195758986294285, | |
| "grad_norm": 1.518832278401718, | |
| "learning_rate": 1.657022964067112e-08, | |
| "loss": 0.8264, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 0.9206102922161883, | |
| "grad_norm": 1.5489663706262933, | |
| "learning_rate": 1.6144945238332985e-08, | |
| "loss": 0.7865, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.921644685802948, | |
| "grad_norm": 1.467649587416207, | |
| "learning_rate": 1.5725100041142692e-08, | |
| "loss": 0.8559, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 0.9226790793897078, | |
| "grad_norm": 1.5114726377694383, | |
| "learning_rate": 1.5310698768745246e-08, | |
| "loss": 0.8152, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.9237134729764676, | |
| "grad_norm": 1.3844215997161795, | |
| "learning_rate": 1.490174607958855e-08, | |
| "loss": 0.7708, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 0.9247478665632273, | |
| "grad_norm": 1.4089428025652646, | |
| "learning_rate": 1.4498246570870842e-08, | |
| "loss": 0.7823, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.9257822601499871, | |
| "grad_norm": 1.3892568475731042, | |
| "learning_rate": 1.4100204778488944e-08, | |
| "loss": 0.7842, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.9268166537367468, | |
| "grad_norm": 1.4799797056028439, | |
| "learning_rate": 1.3707625176987148e-08, | |
| "loss": 0.8864, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.9278510473235065, | |
| "grad_norm": 1.4507160282179603, | |
| "learning_rate": 1.3320512179507526e-08, | |
| "loss": 0.7283, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 0.9288854409102664, | |
| "grad_norm": 1.384274018450003, | |
| "learning_rate": 1.2938870137739589e-08, | |
| "loss": 0.7523, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.9299198344970261, | |
| "grad_norm": 1.4145650769625586, | |
| "learning_rate": 1.2562703341871705e-08, | |
| "loss": 0.7873, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 0.9309542280837859, | |
| "grad_norm": 1.6539888353079981, | |
| "learning_rate": 1.2192016020542984e-08, | |
| "loss": 0.7674, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.9319886216705456, | |
| "grad_norm": 1.5135139926816086, | |
| "learning_rate": 1.1826812340795522e-08, | |
| "loss": 0.7529, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 0.9330230152573054, | |
| "grad_norm": 1.5027904734752995, | |
| "learning_rate": 1.1467096408027677e-08, | |
| "loss": 0.7796, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.9340574088440652, | |
| "grad_norm": 1.5122228712978103, | |
| "learning_rate": 1.1112872265947815e-08, | |
| "loss": 0.851, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 0.9350918024308249, | |
| "grad_norm": 1.481844289354856, | |
| "learning_rate": 1.0764143896528966e-08, | |
| "loss": 0.7973, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.9361261960175847, | |
| "grad_norm": 1.4757389354279868, | |
| "learning_rate": 1.0420915219964021e-08, | |
| "loss": 0.809, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.9371605896043445, | |
| "grad_norm": 1.3774834780341196, | |
| "learning_rate": 1.0083190094621719e-08, | |
| "loss": 0.7734, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.9381949831911042, | |
| "grad_norm": 1.4572058507424313, | |
| "learning_rate": 9.750972317002948e-09, | |
| "loss": 0.7147, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 0.939229376777864, | |
| "grad_norm": 1.4875336499724323, | |
| "learning_rate": 9.424265621698735e-09, | |
| "loss": 0.8156, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.9402637703646237, | |
| "grad_norm": 1.541989995040697, | |
| "learning_rate": 9.103073681347606e-09, | |
| "loss": 0.781, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 0.9412981639513835, | |
| "grad_norm": 1.5034605056798724, | |
| "learning_rate": 8.787400106594566e-09, | |
| "loss": 0.701, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.9423325575381433, | |
| "grad_norm": 1.4336069186789564, | |
| "learning_rate": 8.477248446050523e-09, | |
| "loss": 0.743, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 0.943366951124903, | |
| "grad_norm": 1.4289799665359655, | |
| "learning_rate": 8.17262218625242e-09, | |
| "loss": 0.7489, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.9444013447116628, | |
| "grad_norm": 1.4577567199188532, | |
| "learning_rate": 7.873524751624005e-09, | |
| "loss": 0.8103, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 0.9454357382984225, | |
| "grad_norm": 1.41298554753071, | |
| "learning_rate": 7.579959504437183e-09, | |
| "loss": 0.7501, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 0.9464701318851824, | |
| "grad_norm": 1.4383710033346153, | |
| "learning_rate": 7.291929744774494e-09, | |
| "loss": 0.7386, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.9475045254719421, | |
| "grad_norm": 1.3975838103832199, | |
| "learning_rate": 7.009438710491977e-09, | |
| "loss": 0.8014, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 0.9485389190587018, | |
| "grad_norm": 1.3073144207528744, | |
| "learning_rate": 6.7324895771824206e-09, | |
| "loss": 0.776, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 0.9495733126454616, | |
| "grad_norm": 1.40976222005968, | |
| "learning_rate": 6.4610854581400584e-09, | |
| "loss": 0.7556, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 0.9506077062322214, | |
| "grad_norm": 1.5523829976125192, | |
| "learning_rate": 6.195229404325541e-09, | |
| "loss": 0.7828, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 0.9516420998189811, | |
| "grad_norm": 1.4440751586776785, | |
| "learning_rate": 5.934924404331354e-09, | |
| "loss": 0.7566, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.9526764934057409, | |
| "grad_norm": 1.4564112156715576, | |
| "learning_rate": 5.6801733843484525e-09, | |
| "loss": 0.7331, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 0.9537108869925006, | |
| "grad_norm": 1.3943268618131308, | |
| "learning_rate": 5.430979208133401e-09, | |
| "loss": 0.7196, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 0.9547452805792604, | |
| "grad_norm": 1.4872175073183012, | |
| "learning_rate": 5.1873446769760135e-09, | |
| "loss": 0.7814, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 0.9557796741660202, | |
| "grad_norm": 1.4106609722622656, | |
| "learning_rate": 4.949272529667925e-09, | |
| "loss": 0.6822, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 0.9568140677527799, | |
| "grad_norm": 1.6085462035608777, | |
| "learning_rate": 4.716765442471848e-09, | |
| "loss": 0.7589, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.9578484613395397, | |
| "grad_norm": 1.4095084148502441, | |
| "learning_rate": 4.489826029091592e-09, | |
| "loss": 0.8238, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 0.9588828549262994, | |
| "grad_norm": 1.6037541730404918, | |
| "learning_rate": 4.268456840642365e-09, | |
| "loss": 0.8143, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 0.9599172485130593, | |
| "grad_norm": 1.470001123603321, | |
| "learning_rate": 4.0526603656223515e-09, | |
| "loss": 0.7847, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.960951642099819, | |
| "grad_norm": 1.451183503982382, | |
| "learning_rate": 3.842439029884681e-09, | |
| "loss": 0.8004, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 0.9619860356865787, | |
| "grad_norm": 1.454201251763101, | |
| "learning_rate": 3.637795196610227e-09, | |
| "loss": 0.7614, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.9630204292733385, | |
| "grad_norm": 1.4314893842137169, | |
| "learning_rate": 3.4387311662807393e-09, | |
| "loss": 0.742, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 0.9640548228600982, | |
| "grad_norm": 1.456535794818062, | |
| "learning_rate": 3.2452491766534198e-09, | |
| "loss": 0.7141, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 0.9650892164468581, | |
| "grad_norm": 1.4428720762240117, | |
| "learning_rate": 3.057351402735553e-09, | |
| "loss": 0.7359, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 0.9661236100336178, | |
| "grad_norm": 1.396744784829951, | |
| "learning_rate": 2.8750399567599173e-09, | |
| "loss": 0.765, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 0.9671580036203775, | |
| "grad_norm": 1.529756629708109, | |
| "learning_rate": 2.6983168881611893e-09, | |
| "loss": 0.6982, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.9681923972071373, | |
| "grad_norm": 1.5041696030057317, | |
| "learning_rate": 2.5271841835530215e-09, | |
| "loss": 0.8308, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 0.9692267907938971, | |
| "grad_norm": 1.4443492091998595, | |
| "learning_rate": 2.361643766705501e-09, | |
| "loss": 0.7749, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 0.9702611843806569, | |
| "grad_norm": 1.4433520549911159, | |
| "learning_rate": 2.201697498523669e-09, | |
| "loss": 0.7029, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 0.9712955779674166, | |
| "grad_norm": 1.5117017124161056, | |
| "learning_rate": 2.0473471770263707e-09, | |
| "loss": 0.8443, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 0.9723299715541763, | |
| "grad_norm": 1.4418749597922746, | |
| "learning_rate": 1.8985945373264366e-09, | |
| "loss": 0.7861, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.9733643651409362, | |
| "grad_norm": 1.5156109139572462, | |
| "learning_rate": 1.7554412516108675e-09, | |
| "loss": 0.7867, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 0.9743987587276959, | |
| "grad_norm": 1.450691952301201, | |
| "learning_rate": 1.6178889291220132e-09, | |
| "loss": 0.7484, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 0.9754331523144556, | |
| "grad_norm": 1.6666779719691138, | |
| "learning_rate": 1.4859391161397006e-09, | |
| "loss": 0.6941, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 0.9764675459012154, | |
| "grad_norm": 1.4690189193187233, | |
| "learning_rate": 1.3595932959638012e-09, | |
| "loss": 0.7448, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 0.9775019394879751, | |
| "grad_norm": 1.5374370831265771, | |
| "learning_rate": 1.2388528888973016e-09, | |
| "loss": 0.7113, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.978536333074735, | |
| "grad_norm": 1.5263263761694803, | |
| "learning_rate": 1.1237192522307592e-09, | |
| "loss": 0.8132, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 0.9795707266614947, | |
| "grad_norm": 1.396566868232177, | |
| "learning_rate": 1.0141936802265938e-09, | |
| "loss": 0.8064, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 0.9806051202482544, | |
| "grad_norm": 1.3480588903701196, | |
| "learning_rate": 9.102774041049865e-10, | |
| "loss": 0.7224, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 0.9816395138350142, | |
| "grad_norm": 1.4055601725585016, | |
| "learning_rate": 8.119715920296144e-10, | |
| "loss": 0.7763, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 0.982673907421774, | |
| "grad_norm": 1.5211653376410321, | |
| "learning_rate": 7.192773490948822e-10, | |
| "loss": 0.7874, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.9837083010085338, | |
| "grad_norm": 1.543903841797646, | |
| "learning_rate": 6.321957173132664e-10, | |
| "loss": 0.7624, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 0.9847426945952935, | |
| "grad_norm": 1.4654507323460788, | |
| "learning_rate": 5.507276756036017e-10, | |
| "loss": 0.7508, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 0.9857770881820532, | |
| "grad_norm": 1.4246133713584954, | |
| "learning_rate": 4.74874139780257e-10, | |
| "loss": 0.7953, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 0.9868114817688131, | |
| "grad_norm": 1.4388026111912096, | |
| "learning_rate": 4.046359625426987e-10, | |
| "loss": 0.7727, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 0.9878458753555728, | |
| "grad_norm": 1.3346121552640693, | |
| "learning_rate": 3.4001393346588804e-10, | |
| "loss": 0.7306, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.9888802689423326, | |
| "grad_norm": 1.4764613103530917, | |
| "learning_rate": 2.810087789915649e-10, | |
| "loss": 0.7899, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 0.9899146625290923, | |
| "grad_norm": 1.5081548051587972, | |
| "learning_rate": 2.2762116241981099e-10, | |
| "loss": 0.7037, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 0.990949056115852, | |
| "grad_norm": 1.4701156263691104, | |
| "learning_rate": 1.7985168390194373e-10, | |
| "loss": 0.7383, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 0.9919834497026119, | |
| "grad_norm": 1.4701471668099184, | |
| "learning_rate": 1.377008804333557e-10, | |
| "loss": 0.7406, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 0.9930178432893716, | |
| "grad_norm": 1.4243240377862414, | |
| "learning_rate": 1.0116922584790798e-10, | |
| "loss": 0.7418, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.9940522368761314, | |
| "grad_norm": 1.4904421243432315, | |
| "learning_rate": 7.025713081232342e-11, | |
| "loss": 0.8052, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 0.9950866304628911, | |
| "grad_norm": 1.3846412877629255, | |
| "learning_rate": 4.496494282157926e-11, | |
| "loss": 0.7892, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 0.9961210240496509, | |
| "grad_norm": 1.4324222908899813, | |
| "learning_rate": 2.5292946195132338e-11, | |
| "loss": 0.7949, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 0.9971554176364107, | |
| "grad_norm": 1.364762632330597, | |
| "learning_rate": 1.12413620735885e-11, | |
| "loss": 0.7489, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 0.9981898112231704, | |
| "grad_norm": 1.5940625943509268, | |
| "learning_rate": 2.810348416482089e-12, | |
| "loss": 0.7235, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.9992242048099301, | |
| "grad_norm": 1.5362344826695928, | |
| "learning_rate": 0.0, | |
| "loss": 0.7675, | |
| "step": 966 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 966, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 462892148195328.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |