| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 10.0, |
| "eval_steps": 500, |
| "global_step": 14090, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.07097232079488999, |
| "grad_norm": 4.760485649108887, |
| "learning_rate": 1.2411284599006388e-05, |
| "loss": 5.3493, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.14194464158977999, |
| "grad_norm": 3.876619577407837, |
| "learning_rate": 1.2322569198012776e-05, |
| "loss": 4.314, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.21291696238466998, |
| "grad_norm": 3.3549132347106934, |
| "learning_rate": 1.2233853797019163e-05, |
| "loss": 3.8419, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.28388928317955997, |
| "grad_norm": 2.582584857940674, |
| "learning_rate": 1.214513839602555e-05, |
| "loss": 3.6877, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.35486160397445, |
| "grad_norm": 3.2531728744506836, |
| "learning_rate": 1.2056422995031938e-05, |
| "loss": 3.5523, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.42583392476933996, |
| "grad_norm": 26.283876419067383, |
| "learning_rate": 1.1967707594038326e-05, |
| "loss": 3.5161, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.49680624556423, |
| "grad_norm": 3.0806777477264404, |
| "learning_rate": 1.1878992193044713e-05, |
| "loss": 3.496, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.5677785663591199, |
| "grad_norm": 2.6790292263031006, |
| "learning_rate": 1.17902767920511e-05, |
| "loss": 3.4429, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.63875088715401, |
| "grad_norm": 2.5105574131011963, |
| "learning_rate": 1.1701561391057488e-05, |
| "loss": 3.4733, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.7097232079489, |
| "grad_norm": 2.3946118354797363, |
| "learning_rate": 1.1612845990063876e-05, |
| "loss": 3.4635, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.7806955287437899, |
| "grad_norm": 2.8345706462860107, |
| "learning_rate": 1.1524130589070263e-05, |
| "loss": 3.3713, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.8516678495386799, |
| "grad_norm": 2.603573799133301, |
| "learning_rate": 1.143541518807665e-05, |
| "loss": 3.345, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.9226401703335699, |
| "grad_norm": 2.7637786865234375, |
| "learning_rate": 1.1346699787083038e-05, |
| "loss": 3.4174, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.99361249112846, |
| "grad_norm": 3.634453296661377, |
| "learning_rate": 1.1257984386089426e-05, |
| "loss": 3.3344, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.0645848119233499, |
| "grad_norm": 3.2783639430999756, |
| "learning_rate": 1.1169268985095813e-05, |
| "loss": 3.2603, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.1355571327182399, |
| "grad_norm": 12.153242111206055, |
| "learning_rate": 1.1080553584102201e-05, |
| "loss": 3.2665, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.20652945351313, |
| "grad_norm": 3.7843754291534424, |
| "learning_rate": 1.0991838183108588e-05, |
| "loss": 3.2353, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.27750177430802, |
| "grad_norm": 3.065302848815918, |
| "learning_rate": 1.0903122782114976e-05, |
| "loss": 3.2399, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.34847409510291, |
| "grad_norm": 5.1582183837890625, |
| "learning_rate": 1.0814407381121363e-05, |
| "loss": 3.2087, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.4194464158978, |
| "grad_norm": 3.7378036975860596, |
| "learning_rate": 1.072569198012775e-05, |
| "loss": 3.1919, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.49041873669269, |
| "grad_norm": 3.0015852451324463, |
| "learning_rate": 1.0636976579134138e-05, |
| "loss": 3.2515, |
| "step": 2100 |
| }, |
| { |
| "epoch": 1.56139105748758, |
| "grad_norm": 2.782742977142334, |
| "learning_rate": 1.0548261178140526e-05, |
| "loss": 3.182, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.6323633782824698, |
| "grad_norm": 2.8899967670440674, |
| "learning_rate": 1.0459545777146914e-05, |
| "loss": 3.2035, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.7033356990773598, |
| "grad_norm": 3.529968500137329, |
| "learning_rate": 1.0370830376153301e-05, |
| "loss": 3.2547, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.7743080198722498, |
| "grad_norm": 3.1597557067871094, |
| "learning_rate": 1.0282114975159689e-05, |
| "loss": 3.2193, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.8452803406671399, |
| "grad_norm": 2.7804019451141357, |
| "learning_rate": 1.0193399574166074e-05, |
| "loss": 3.1883, |
| "step": 2600 |
| }, |
| { |
| "epoch": 1.9162526614620297, |
| "grad_norm": 3.4074594974517822, |
| "learning_rate": 1.0104684173172464e-05, |
| "loss": 3.1894, |
| "step": 2700 |
| }, |
| { |
| "epoch": 1.9872249822569197, |
| "grad_norm": 3.2770557403564453, |
| "learning_rate": 1.0015968772178851e-05, |
| "loss": 3.1904, |
| "step": 2800 |
| }, |
| { |
| "epoch": 2.0581973030518097, |
| "grad_norm": 3.2407679557800293, |
| "learning_rate": 9.927253371185239e-06, |
| "loss": 3.0811, |
| "step": 2900 |
| }, |
| { |
| "epoch": 2.1291696238466997, |
| "grad_norm": 2.982088565826416, |
| "learning_rate": 9.838537970191626e-06, |
| "loss": 3.0856, |
| "step": 3000 |
| }, |
| { |
| "epoch": 2.2001419446415897, |
| "grad_norm": 3.4891321659088135, |
| "learning_rate": 9.749822569198014e-06, |
| "loss": 3.0932, |
| "step": 3100 |
| }, |
| { |
| "epoch": 2.2711142654364798, |
| "grad_norm": 2.988189220428467, |
| "learning_rate": 9.6611071682044e-06, |
| "loss": 3.0661, |
| "step": 3200 |
| }, |
| { |
| "epoch": 2.34208658623137, |
| "grad_norm": 2.7912137508392334, |
| "learning_rate": 9.572391767210789e-06, |
| "loss": 3.0924, |
| "step": 3300 |
| }, |
| { |
| "epoch": 2.41305890702626, |
| "grad_norm": 3.0504982471466064, |
| "learning_rate": 9.483676366217176e-06, |
| "loss": 3.0704, |
| "step": 3400 |
| }, |
| { |
| "epoch": 2.48403122782115, |
| "grad_norm": 3.195739984512329, |
| "learning_rate": 9.394960965223564e-06, |
| "loss": 3.06, |
| "step": 3500 |
| }, |
| { |
| "epoch": 2.55500354861604, |
| "grad_norm": 6.643301010131836, |
| "learning_rate": 9.306245564229951e-06, |
| "loss": 3.0857, |
| "step": 3600 |
| }, |
| { |
| "epoch": 2.62597586941093, |
| "grad_norm": 2.8578591346740723, |
| "learning_rate": 9.217530163236339e-06, |
| "loss": 3.0512, |
| "step": 3700 |
| }, |
| { |
| "epoch": 2.69694819020582, |
| "grad_norm": 3.1686370372772217, |
| "learning_rate": 9.128814762242725e-06, |
| "loss": 3.1005, |
| "step": 3800 |
| }, |
| { |
| "epoch": 2.7679205110007095, |
| "grad_norm": 2.9187655448913574, |
| "learning_rate": 9.040099361249114e-06, |
| "loss": 3.0878, |
| "step": 3900 |
| }, |
| { |
| "epoch": 2.8388928317956, |
| "grad_norm": 3.5484399795532227, |
| "learning_rate": 8.951383960255501e-06, |
| "loss": 3.0252, |
| "step": 4000 |
| }, |
| { |
| "epoch": 2.9098651525904895, |
| "grad_norm": 3.374964714050293, |
| "learning_rate": 8.862668559261889e-06, |
| "loss": 3.0888, |
| "step": 4100 |
| }, |
| { |
| "epoch": 2.98083747338538, |
| "grad_norm": 3.1180434226989746, |
| "learning_rate": 8.773953158268276e-06, |
| "loss": 3.0332, |
| "step": 4200 |
| }, |
| { |
| "epoch": 3.0518097941802695, |
| "grad_norm": 3.4749999046325684, |
| "learning_rate": 8.685237757274662e-06, |
| "loss": 3.031, |
| "step": 4300 |
| }, |
| { |
| "epoch": 3.1227821149751596, |
| "grad_norm": 3.021486282348633, |
| "learning_rate": 8.59652235628105e-06, |
| "loss": 3.0399, |
| "step": 4400 |
| }, |
| { |
| "epoch": 3.1937544357700496, |
| "grad_norm": 3.392218589782715, |
| "learning_rate": 8.507806955287439e-06, |
| "loss": 2.9733, |
| "step": 4500 |
| }, |
| { |
| "epoch": 3.2647267565649396, |
| "grad_norm": 2.912814140319824, |
| "learning_rate": 8.419091554293826e-06, |
| "loss": 2.9437, |
| "step": 4600 |
| }, |
| { |
| "epoch": 3.3356990773598296, |
| "grad_norm": 4.143916606903076, |
| "learning_rate": 8.330376153300214e-06, |
| "loss": 2.9484, |
| "step": 4700 |
| }, |
| { |
| "epoch": 3.4066713981547196, |
| "grad_norm": 12.112326622009277, |
| "learning_rate": 8.241660752306602e-06, |
| "loss": 2.9534, |
| "step": 4800 |
| }, |
| { |
| "epoch": 3.4776437189496097, |
| "grad_norm": 3.6819448471069336, |
| "learning_rate": 8.152945351312987e-06, |
| "loss": 2.9621, |
| "step": 4900 |
| }, |
| { |
| "epoch": 3.5486160397444997, |
| "grad_norm": 3.4722225666046143, |
| "learning_rate": 8.064229950319375e-06, |
| "loss": 2.9712, |
| "step": 5000 |
| }, |
| { |
| "epoch": 3.6195883605393897, |
| "grad_norm": 3.237942934036255, |
| "learning_rate": 7.975514549325764e-06, |
| "loss": 3.0101, |
| "step": 5100 |
| }, |
| { |
| "epoch": 3.6905606813342797, |
| "grad_norm": 2.8785219192504883, |
| "learning_rate": 7.886799148332152e-06, |
| "loss": 2.9564, |
| "step": 5200 |
| }, |
| { |
| "epoch": 3.7615330021291697, |
| "grad_norm": 3.193901777267456, |
| "learning_rate": 7.798083747338539e-06, |
| "loss": 2.9435, |
| "step": 5300 |
| }, |
| { |
| "epoch": 3.8325053229240598, |
| "grad_norm": 2.641021490097046, |
| "learning_rate": 7.709368346344927e-06, |
| "loss": 2.9404, |
| "step": 5400 |
| }, |
| { |
| "epoch": 3.90347764371895, |
| "grad_norm": 2.879225492477417, |
| "learning_rate": 7.6206529453513125e-06, |
| "loss": 2.9642, |
| "step": 5500 |
| }, |
| { |
| "epoch": 3.9744499645138394, |
| "grad_norm": 2.779848575592041, |
| "learning_rate": 7.531937544357701e-06, |
| "loss": 2.991, |
| "step": 5600 |
| }, |
| { |
| "epoch": 4.04542228530873, |
| "grad_norm": 4.226039886474609, |
| "learning_rate": 7.443222143364088e-06, |
| "loss": 2.8704, |
| "step": 5700 |
| }, |
| { |
| "epoch": 4.116394606103619, |
| "grad_norm": 3.5658841133117676, |
| "learning_rate": 7.354506742370476e-06, |
| "loss": 2.8641, |
| "step": 5800 |
| }, |
| { |
| "epoch": 4.18736692689851, |
| "grad_norm": 2.6197798252105713, |
| "learning_rate": 7.265791341376864e-06, |
| "loss": 2.8462, |
| "step": 5900 |
| }, |
| { |
| "epoch": 4.258339247693399, |
| "grad_norm": 2.7553303241729736, |
| "learning_rate": 7.17707594038325e-06, |
| "loss": 2.882, |
| "step": 6000 |
| }, |
| { |
| "epoch": 4.32931156848829, |
| "grad_norm": 2.9696245193481445, |
| "learning_rate": 7.0883605393896376e-06, |
| "loss": 2.9033, |
| "step": 6100 |
| }, |
| { |
| "epoch": 4.4002838892831795, |
| "grad_norm": 3.14622163772583, |
| "learning_rate": 6.999645138396026e-06, |
| "loss": 2.9003, |
| "step": 6200 |
| }, |
| { |
| "epoch": 4.47125621007807, |
| "grad_norm": 3.744727849960327, |
| "learning_rate": 6.9109297374024135e-06, |
| "loss": 2.8839, |
| "step": 6300 |
| }, |
| { |
| "epoch": 4.5422285308729595, |
| "grad_norm": 3.2394585609436035, |
| "learning_rate": 6.822214336408801e-06, |
| "loss": 2.9144, |
| "step": 6400 |
| }, |
| { |
| "epoch": 4.61320085166785, |
| "grad_norm": 3.425605535507202, |
| "learning_rate": 6.733498935415189e-06, |
| "loss": 2.9345, |
| "step": 6500 |
| }, |
| { |
| "epoch": 4.68417317246274, |
| "grad_norm": 2.960824489593506, |
| "learning_rate": 6.644783534421575e-06, |
| "loss": 2.8784, |
| "step": 6600 |
| }, |
| { |
| "epoch": 4.755145493257629, |
| "grad_norm": 2.8103156089782715, |
| "learning_rate": 6.556068133427963e-06, |
| "loss": 2.9073, |
| "step": 6700 |
| }, |
| { |
| "epoch": 4.82611781405252, |
| "grad_norm": 3.3681435585021973, |
| "learning_rate": 6.467352732434351e-06, |
| "loss": 2.8941, |
| "step": 6800 |
| }, |
| { |
| "epoch": 4.897090134847409, |
| "grad_norm": 2.99110746383667, |
| "learning_rate": 6.3786373314407386e-06, |
| "loss": 2.8776, |
| "step": 6900 |
| }, |
| { |
| "epoch": 4.9680624556423, |
| "grad_norm": 3.0673208236694336, |
| "learning_rate": 6.289921930447126e-06, |
| "loss": 2.8941, |
| "step": 7000 |
| }, |
| { |
| "epoch": 5.039034776437189, |
| "grad_norm": 3.0405173301696777, |
| "learning_rate": 6.201206529453514e-06, |
| "loss": 2.8133, |
| "step": 7100 |
| }, |
| { |
| "epoch": 5.11000709723208, |
| "grad_norm": 2.673586368560791, |
| "learning_rate": 6.112491128459901e-06, |
| "loss": 2.8009, |
| "step": 7200 |
| }, |
| { |
| "epoch": 5.180979418026969, |
| "grad_norm": 2.86667799949646, |
| "learning_rate": 6.023775727466288e-06, |
| "loss": 2.8341, |
| "step": 7300 |
| }, |
| { |
| "epoch": 5.25195173882186, |
| "grad_norm": 2.725980758666992, |
| "learning_rate": 5.935060326472676e-06, |
| "loss": 2.8206, |
| "step": 7400 |
| }, |
| { |
| "epoch": 5.322924059616749, |
| "grad_norm": 9.25928783416748, |
| "learning_rate": 5.846344925479064e-06, |
| "loss": 2.8782, |
| "step": 7500 |
| }, |
| { |
| "epoch": 5.39389638041164, |
| "grad_norm": 3.619668960571289, |
| "learning_rate": 5.75762952448545e-06, |
| "loss": 2.8083, |
| "step": 7600 |
| }, |
| { |
| "epoch": 5.464868701206529, |
| "grad_norm": 4.820442199707031, |
| "learning_rate": 5.668914123491839e-06, |
| "loss": 2.8051, |
| "step": 7700 |
| }, |
| { |
| "epoch": 5.53584102200142, |
| "grad_norm": 3.3483476638793945, |
| "learning_rate": 5.580198722498226e-06, |
| "loss": 2.7862, |
| "step": 7800 |
| }, |
| { |
| "epoch": 5.606813342796309, |
| "grad_norm": 3.04085373878479, |
| "learning_rate": 5.491483321504613e-06, |
| "loss": 2.786, |
| "step": 7900 |
| }, |
| { |
| "epoch": 5.6777856635912, |
| "grad_norm": 6.183100700378418, |
| "learning_rate": 5.402767920511001e-06, |
| "loss": 2.8541, |
| "step": 8000 |
| }, |
| { |
| "epoch": 5.748757984386089, |
| "grad_norm": 3.20927357673645, |
| "learning_rate": 5.314052519517389e-06, |
| "loss": 2.8428, |
| "step": 8100 |
| }, |
| { |
| "epoch": 5.819730305180979, |
| "grad_norm": 2.7963485717773438, |
| "learning_rate": 5.2253371185237755e-06, |
| "loss": 2.8391, |
| "step": 8200 |
| }, |
| { |
| "epoch": 5.8907026259758695, |
| "grad_norm": 2.9603395462036133, |
| "learning_rate": 5.136621717530164e-06, |
| "loss": 2.8483, |
| "step": 8300 |
| }, |
| { |
| "epoch": 5.961674946770759, |
| "grad_norm": 3.2761342525482178, |
| "learning_rate": 5.047906316536551e-06, |
| "loss": 2.8908, |
| "step": 8400 |
| }, |
| { |
| "epoch": 6.0326472675656495, |
| "grad_norm": 2.543006181716919, |
| "learning_rate": 4.959190915542938e-06, |
| "loss": 2.7455, |
| "step": 8500 |
| }, |
| { |
| "epoch": 6.103619588360539, |
| "grad_norm": 2.7663486003875732, |
| "learning_rate": 4.870475514549326e-06, |
| "loss": 2.781, |
| "step": 8600 |
| }, |
| { |
| "epoch": 6.1745919091554295, |
| "grad_norm": 3.1819772720336914, |
| "learning_rate": 4.781760113555713e-06, |
| "loss": 2.8151, |
| "step": 8700 |
| }, |
| { |
| "epoch": 6.245564229950319, |
| "grad_norm": 3.4800798892974854, |
| "learning_rate": 4.693044712562101e-06, |
| "loss": 2.742, |
| "step": 8800 |
| }, |
| { |
| "epoch": 6.31653655074521, |
| "grad_norm": 2.997176170349121, |
| "learning_rate": 4.604329311568489e-06, |
| "loss": 2.7626, |
| "step": 8900 |
| }, |
| { |
| "epoch": 6.387508871540099, |
| "grad_norm": 2.6546881198883057, |
| "learning_rate": 4.515613910574876e-06, |
| "loss": 2.7803, |
| "step": 9000 |
| }, |
| { |
| "epoch": 6.45848119233499, |
| "grad_norm": 3.736813545227051, |
| "learning_rate": 4.426898509581263e-06, |
| "loss": 2.7491, |
| "step": 9100 |
| }, |
| { |
| "epoch": 6.529453513129879, |
| "grad_norm": 2.904879331588745, |
| "learning_rate": 4.3381831085876515e-06, |
| "loss": 2.7707, |
| "step": 9200 |
| }, |
| { |
| "epoch": 6.60042583392477, |
| "grad_norm": 3.1349782943725586, |
| "learning_rate": 4.249467707594038e-06, |
| "loss": 2.7715, |
| "step": 9300 |
| }, |
| { |
| "epoch": 6.671398154719659, |
| "grad_norm": 3.8655664920806885, |
| "learning_rate": 4.160752306600426e-06, |
| "loss": 2.8137, |
| "step": 9400 |
| }, |
| { |
| "epoch": 6.74237047551455, |
| "grad_norm": 3.0926291942596436, |
| "learning_rate": 4.072036905606814e-06, |
| "loss": 2.7606, |
| "step": 9500 |
| }, |
| { |
| "epoch": 6.813342796309439, |
| "grad_norm": 2.9830901622772217, |
| "learning_rate": 3.983321504613201e-06, |
| "loss": 2.7696, |
| "step": 9600 |
| }, |
| { |
| "epoch": 6.884315117104329, |
| "grad_norm": 2.9719297885894775, |
| "learning_rate": 3.894606103619588e-06, |
| "loss": 2.7477, |
| "step": 9700 |
| }, |
| { |
| "epoch": 6.955287437899219, |
| "grad_norm": 4.136593818664551, |
| "learning_rate": 3.805890702625976e-06, |
| "loss": 2.7814, |
| "step": 9800 |
| }, |
| { |
| "epoch": 7.026259758694109, |
| "grad_norm": 3.006192922592163, |
| "learning_rate": 3.7171753016323633e-06, |
| "loss": 2.7316, |
| "step": 9900 |
| }, |
| { |
| "epoch": 7.097232079488999, |
| "grad_norm": 3.204758644104004, |
| "learning_rate": 3.6284599006387512e-06, |
| "loss": 2.7784, |
| "step": 10000 |
| }, |
| { |
| "epoch": 7.168204400283889, |
| "grad_norm": 4.11678409576416, |
| "learning_rate": 3.5397444996451388e-06, |
| "loss": 2.7377, |
| "step": 10100 |
| }, |
| { |
| "epoch": 7.239176721078779, |
| "grad_norm": 3.0145442485809326, |
| "learning_rate": 3.451029098651526e-06, |
| "loss": 2.7327, |
| "step": 10200 |
| }, |
| { |
| "epoch": 7.310149041873669, |
| "grad_norm": 2.759552240371704, |
| "learning_rate": 3.362313697657914e-06, |
| "loss": 2.6663, |
| "step": 10300 |
| }, |
| { |
| "epoch": 7.3811213626685594, |
| "grad_norm": 3.0694785118103027, |
| "learning_rate": 3.2735982966643013e-06, |
| "loss": 2.7523, |
| "step": 10400 |
| }, |
| { |
| "epoch": 7.452093683463449, |
| "grad_norm": 2.6538655757904053, |
| "learning_rate": 3.1848828956706884e-06, |
| "loss": 2.749, |
| "step": 10500 |
| }, |
| { |
| "epoch": 7.5230660042583395, |
| "grad_norm": 3.4055073261260986, |
| "learning_rate": 3.0961674946770763e-06, |
| "loss": 2.7119, |
| "step": 10600 |
| }, |
| { |
| "epoch": 7.594038325053229, |
| "grad_norm": 2.686981439590454, |
| "learning_rate": 3.0074520936834634e-06, |
| "loss": 2.6696, |
| "step": 10700 |
| }, |
| { |
| "epoch": 7.6650106458481195, |
| "grad_norm": 3.1230781078338623, |
| "learning_rate": 2.918736692689851e-06, |
| "loss": 2.7344, |
| "step": 10800 |
| }, |
| { |
| "epoch": 7.735982966643009, |
| "grad_norm": 2.9182968139648438, |
| "learning_rate": 2.830021291696239e-06, |
| "loss": 2.7724, |
| "step": 10900 |
| }, |
| { |
| "epoch": 7.8069552874379, |
| "grad_norm": 4.071280002593994, |
| "learning_rate": 2.741305890702626e-06, |
| "loss": 2.7562, |
| "step": 11000 |
| }, |
| { |
| "epoch": 7.877927608232789, |
| "grad_norm": 2.9077835083007812, |
| "learning_rate": 2.6525904897090135e-06, |
| "loss": 2.7375, |
| "step": 11100 |
| }, |
| { |
| "epoch": 7.948899929027679, |
| "grad_norm": 3.786334276199341, |
| "learning_rate": 2.563875088715401e-06, |
| "loss": 2.7451, |
| "step": 11200 |
| }, |
| { |
| "epoch": 8.01987224982257, |
| "grad_norm": 3.843573808670044, |
| "learning_rate": 2.4751596877217886e-06, |
| "loss": 2.7024, |
| "step": 11300 |
| }, |
| { |
| "epoch": 8.09084457061746, |
| "grad_norm": 3.3538873195648193, |
| "learning_rate": 2.386444286728176e-06, |
| "loss": 2.7569, |
| "step": 11400 |
| }, |
| { |
| "epoch": 8.161816891412348, |
| "grad_norm": 3.028632879257202, |
| "learning_rate": 2.2977288857345636e-06, |
| "loss": 2.7473, |
| "step": 11500 |
| }, |
| { |
| "epoch": 8.232789212207239, |
| "grad_norm": 3.1207797527313232, |
| "learning_rate": 2.209013484740951e-06, |
| "loss": 2.7304, |
| "step": 11600 |
| }, |
| { |
| "epoch": 8.30376153300213, |
| "grad_norm": 2.9007532596588135, |
| "learning_rate": 2.1202980837473386e-06, |
| "loss": 2.6945, |
| "step": 11700 |
| }, |
| { |
| "epoch": 8.37473385379702, |
| "grad_norm": 3.2682387828826904, |
| "learning_rate": 2.031582682753726e-06, |
| "loss": 2.6944, |
| "step": 11800 |
| }, |
| { |
| "epoch": 8.445706174591908, |
| "grad_norm": 2.944650173187256, |
| "learning_rate": 1.9428672817601137e-06, |
| "loss": 2.6788, |
| "step": 11900 |
| }, |
| { |
| "epoch": 8.516678495386799, |
| "grad_norm": 3.6891725063323975, |
| "learning_rate": 1.8541518807665012e-06, |
| "loss": 2.6576, |
| "step": 12000 |
| }, |
| { |
| "epoch": 8.58765081618169, |
| "grad_norm": 3.5069870948791504, |
| "learning_rate": 1.7654364797728887e-06, |
| "loss": 2.6916, |
| "step": 12100 |
| }, |
| { |
| "epoch": 8.65862313697658, |
| "grad_norm": 3.2562413215637207, |
| "learning_rate": 1.676721078779276e-06, |
| "loss": 2.709, |
| "step": 12200 |
| }, |
| { |
| "epoch": 8.729595457771469, |
| "grad_norm": 2.882202386856079, |
| "learning_rate": 1.5880056777856637e-06, |
| "loss": 2.6932, |
| "step": 12300 |
| }, |
| { |
| "epoch": 8.800567778566359, |
| "grad_norm": 3.014678955078125, |
| "learning_rate": 1.499290276792051e-06, |
| "loss": 2.7494, |
| "step": 12400 |
| }, |
| { |
| "epoch": 8.87154009936125, |
| "grad_norm": 3.5019314289093018, |
| "learning_rate": 1.4105748757984388e-06, |
| "loss": 2.716, |
| "step": 12500 |
| }, |
| { |
| "epoch": 8.94251242015614, |
| "grad_norm": 3.2683048248291016, |
| "learning_rate": 1.321859474804826e-06, |
| "loss": 2.7186, |
| "step": 12600 |
| }, |
| { |
| "epoch": 9.013484740951029, |
| "grad_norm": 2.985898017883301, |
| "learning_rate": 1.2331440738112136e-06, |
| "loss": 2.7296, |
| "step": 12700 |
| }, |
| { |
| "epoch": 9.084457061745919, |
| "grad_norm": 3.0097787380218506, |
| "learning_rate": 1.1444286728176011e-06, |
| "loss": 2.7134, |
| "step": 12800 |
| }, |
| { |
| "epoch": 9.15542938254081, |
| "grad_norm": 3.375105142593384, |
| "learning_rate": 1.0557132718239887e-06, |
| "loss": 2.6976, |
| "step": 12900 |
| }, |
| { |
| "epoch": 9.2264017033357, |
| "grad_norm": 18.599666595458984, |
| "learning_rate": 9.669978708303762e-07, |
| "loss": 2.6748, |
| "step": 13000 |
| }, |
| { |
| "epoch": 9.297374024130589, |
| "grad_norm": 3.4463281631469727, |
| "learning_rate": 8.782824698367637e-07, |
| "loss": 2.6669, |
| "step": 13100 |
| }, |
| { |
| "epoch": 9.36834634492548, |
| "grad_norm": 3.3725767135620117, |
| "learning_rate": 7.895670688431512e-07, |
| "loss": 2.6394, |
| "step": 13200 |
| }, |
| { |
| "epoch": 9.43931866572037, |
| "grad_norm": 3.3101234436035156, |
| "learning_rate": 7.008516678495387e-07, |
| "loss": 2.7324, |
| "step": 13300 |
| }, |
| { |
| "epoch": 9.510290986515258, |
| "grad_norm": 3.0802764892578125, |
| "learning_rate": 6.121362668559262e-07, |
| "loss": 2.6929, |
| "step": 13400 |
| }, |
| { |
| "epoch": 9.581263307310149, |
| "grad_norm": 2.806544303894043, |
| "learning_rate": 5.234208658623137e-07, |
| "loss": 2.6382, |
| "step": 13500 |
| }, |
| { |
| "epoch": 9.65223562810504, |
| "grad_norm": 3.4582176208496094, |
| "learning_rate": 4.347054648687013e-07, |
| "loss": 2.694, |
| "step": 13600 |
| }, |
| { |
| "epoch": 9.72320794889993, |
| "grad_norm": 4.037984848022461, |
| "learning_rate": 3.459900638750887e-07, |
| "loss": 2.6497, |
| "step": 13700 |
| }, |
| { |
| "epoch": 9.794180269694818, |
| "grad_norm": 3.1365439891815186, |
| "learning_rate": 2.572746628814762e-07, |
| "loss": 2.705, |
| "step": 13800 |
| }, |
| { |
| "epoch": 9.865152590489709, |
| "grad_norm": 3.4496893882751465, |
| "learning_rate": 1.6855926188786376e-07, |
| "loss": 2.7123, |
| "step": 13900 |
| }, |
| { |
| "epoch": 9.9361249112846, |
| "grad_norm": 3.1736724376678467, |
| "learning_rate": 7.984386089425125e-08, |
| "loss": 2.7009, |
| "step": 14000 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 14090, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.430320762257408e+16, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|