codegr-vault-query-generator-Ruby / trainer_state.json
auphong2707's picture
Upload folder using huggingface_hub
f628870 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 10.0,
"eval_steps": 500,
"global_step": 14090,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.07097232079488999,
"grad_norm": 4.760485649108887,
"learning_rate": 1.2411284599006388e-05,
"loss": 5.3493,
"step": 100
},
{
"epoch": 0.14194464158977999,
"grad_norm": 3.876619577407837,
"learning_rate": 1.2322569198012776e-05,
"loss": 4.314,
"step": 200
},
{
"epoch": 0.21291696238466998,
"grad_norm": 3.3549132347106934,
"learning_rate": 1.2233853797019163e-05,
"loss": 3.8419,
"step": 300
},
{
"epoch": 0.28388928317955997,
"grad_norm": 2.582584857940674,
"learning_rate": 1.214513839602555e-05,
"loss": 3.6877,
"step": 400
},
{
"epoch": 0.35486160397445,
"grad_norm": 3.2531728744506836,
"learning_rate": 1.2056422995031938e-05,
"loss": 3.5523,
"step": 500
},
{
"epoch": 0.42583392476933996,
"grad_norm": 26.283876419067383,
"learning_rate": 1.1967707594038326e-05,
"loss": 3.5161,
"step": 600
},
{
"epoch": 0.49680624556423,
"grad_norm": 3.0806777477264404,
"learning_rate": 1.1878992193044713e-05,
"loss": 3.496,
"step": 700
},
{
"epoch": 0.5677785663591199,
"grad_norm": 2.6790292263031006,
"learning_rate": 1.17902767920511e-05,
"loss": 3.4429,
"step": 800
},
{
"epoch": 0.63875088715401,
"grad_norm": 2.5105574131011963,
"learning_rate": 1.1701561391057488e-05,
"loss": 3.4733,
"step": 900
},
{
"epoch": 0.7097232079489,
"grad_norm": 2.3946118354797363,
"learning_rate": 1.1612845990063876e-05,
"loss": 3.4635,
"step": 1000
},
{
"epoch": 0.7806955287437899,
"grad_norm": 2.8345706462860107,
"learning_rate": 1.1524130589070263e-05,
"loss": 3.3713,
"step": 1100
},
{
"epoch": 0.8516678495386799,
"grad_norm": 2.603573799133301,
"learning_rate": 1.143541518807665e-05,
"loss": 3.345,
"step": 1200
},
{
"epoch": 0.9226401703335699,
"grad_norm": 2.7637786865234375,
"learning_rate": 1.1346699787083038e-05,
"loss": 3.4174,
"step": 1300
},
{
"epoch": 0.99361249112846,
"grad_norm": 3.634453296661377,
"learning_rate": 1.1257984386089426e-05,
"loss": 3.3344,
"step": 1400
},
{
"epoch": 1.0645848119233499,
"grad_norm": 3.2783639430999756,
"learning_rate": 1.1169268985095813e-05,
"loss": 3.2603,
"step": 1500
},
{
"epoch": 1.1355571327182399,
"grad_norm": 12.153242111206055,
"learning_rate": 1.1080553584102201e-05,
"loss": 3.2665,
"step": 1600
},
{
"epoch": 1.20652945351313,
"grad_norm": 3.7843754291534424,
"learning_rate": 1.0991838183108588e-05,
"loss": 3.2353,
"step": 1700
},
{
"epoch": 1.27750177430802,
"grad_norm": 3.065302848815918,
"learning_rate": 1.0903122782114976e-05,
"loss": 3.2399,
"step": 1800
},
{
"epoch": 1.34847409510291,
"grad_norm": 5.1582183837890625,
"learning_rate": 1.0814407381121363e-05,
"loss": 3.2087,
"step": 1900
},
{
"epoch": 1.4194464158978,
"grad_norm": 3.7378036975860596,
"learning_rate": 1.072569198012775e-05,
"loss": 3.1919,
"step": 2000
},
{
"epoch": 1.49041873669269,
"grad_norm": 3.0015852451324463,
"learning_rate": 1.0636976579134138e-05,
"loss": 3.2515,
"step": 2100
},
{
"epoch": 1.56139105748758,
"grad_norm": 2.782742977142334,
"learning_rate": 1.0548261178140526e-05,
"loss": 3.182,
"step": 2200
},
{
"epoch": 1.6323633782824698,
"grad_norm": 2.8899967670440674,
"learning_rate": 1.0459545777146914e-05,
"loss": 3.2035,
"step": 2300
},
{
"epoch": 1.7033356990773598,
"grad_norm": 3.529968500137329,
"learning_rate": 1.0370830376153301e-05,
"loss": 3.2547,
"step": 2400
},
{
"epoch": 1.7743080198722498,
"grad_norm": 3.1597557067871094,
"learning_rate": 1.0282114975159689e-05,
"loss": 3.2193,
"step": 2500
},
{
"epoch": 1.8452803406671399,
"grad_norm": 2.7804019451141357,
"learning_rate": 1.0193399574166074e-05,
"loss": 3.1883,
"step": 2600
},
{
"epoch": 1.9162526614620297,
"grad_norm": 3.4074594974517822,
"learning_rate": 1.0104684173172464e-05,
"loss": 3.1894,
"step": 2700
},
{
"epoch": 1.9872249822569197,
"grad_norm": 3.2770557403564453,
"learning_rate": 1.0015968772178851e-05,
"loss": 3.1904,
"step": 2800
},
{
"epoch": 2.0581973030518097,
"grad_norm": 3.2407679557800293,
"learning_rate": 9.927253371185239e-06,
"loss": 3.0811,
"step": 2900
},
{
"epoch": 2.1291696238466997,
"grad_norm": 2.982088565826416,
"learning_rate": 9.838537970191626e-06,
"loss": 3.0856,
"step": 3000
},
{
"epoch": 2.2001419446415897,
"grad_norm": 3.4891321659088135,
"learning_rate": 9.749822569198014e-06,
"loss": 3.0932,
"step": 3100
},
{
"epoch": 2.2711142654364798,
"grad_norm": 2.988189220428467,
"learning_rate": 9.6611071682044e-06,
"loss": 3.0661,
"step": 3200
},
{
"epoch": 2.34208658623137,
"grad_norm": 2.7912137508392334,
"learning_rate": 9.572391767210789e-06,
"loss": 3.0924,
"step": 3300
},
{
"epoch": 2.41305890702626,
"grad_norm": 3.0504982471466064,
"learning_rate": 9.483676366217176e-06,
"loss": 3.0704,
"step": 3400
},
{
"epoch": 2.48403122782115,
"grad_norm": 3.195739984512329,
"learning_rate": 9.394960965223564e-06,
"loss": 3.06,
"step": 3500
},
{
"epoch": 2.55500354861604,
"grad_norm": 6.643301010131836,
"learning_rate": 9.306245564229951e-06,
"loss": 3.0857,
"step": 3600
},
{
"epoch": 2.62597586941093,
"grad_norm": 2.8578591346740723,
"learning_rate": 9.217530163236339e-06,
"loss": 3.0512,
"step": 3700
},
{
"epoch": 2.69694819020582,
"grad_norm": 3.1686370372772217,
"learning_rate": 9.128814762242725e-06,
"loss": 3.1005,
"step": 3800
},
{
"epoch": 2.7679205110007095,
"grad_norm": 2.9187655448913574,
"learning_rate": 9.040099361249114e-06,
"loss": 3.0878,
"step": 3900
},
{
"epoch": 2.8388928317956,
"grad_norm": 3.5484399795532227,
"learning_rate": 8.951383960255501e-06,
"loss": 3.0252,
"step": 4000
},
{
"epoch": 2.9098651525904895,
"grad_norm": 3.374964714050293,
"learning_rate": 8.862668559261889e-06,
"loss": 3.0888,
"step": 4100
},
{
"epoch": 2.98083747338538,
"grad_norm": 3.1180434226989746,
"learning_rate": 8.773953158268276e-06,
"loss": 3.0332,
"step": 4200
},
{
"epoch": 3.0518097941802695,
"grad_norm": 3.4749999046325684,
"learning_rate": 8.685237757274662e-06,
"loss": 3.031,
"step": 4300
},
{
"epoch": 3.1227821149751596,
"grad_norm": 3.021486282348633,
"learning_rate": 8.59652235628105e-06,
"loss": 3.0399,
"step": 4400
},
{
"epoch": 3.1937544357700496,
"grad_norm": 3.392218589782715,
"learning_rate": 8.507806955287439e-06,
"loss": 2.9733,
"step": 4500
},
{
"epoch": 3.2647267565649396,
"grad_norm": 2.912814140319824,
"learning_rate": 8.419091554293826e-06,
"loss": 2.9437,
"step": 4600
},
{
"epoch": 3.3356990773598296,
"grad_norm": 4.143916606903076,
"learning_rate": 8.330376153300214e-06,
"loss": 2.9484,
"step": 4700
},
{
"epoch": 3.4066713981547196,
"grad_norm": 12.112326622009277,
"learning_rate": 8.241660752306602e-06,
"loss": 2.9534,
"step": 4800
},
{
"epoch": 3.4776437189496097,
"grad_norm": 3.6819448471069336,
"learning_rate": 8.152945351312987e-06,
"loss": 2.9621,
"step": 4900
},
{
"epoch": 3.5486160397444997,
"grad_norm": 3.4722225666046143,
"learning_rate": 8.064229950319375e-06,
"loss": 2.9712,
"step": 5000
},
{
"epoch": 3.6195883605393897,
"grad_norm": 3.237942934036255,
"learning_rate": 7.975514549325764e-06,
"loss": 3.0101,
"step": 5100
},
{
"epoch": 3.6905606813342797,
"grad_norm": 2.8785219192504883,
"learning_rate": 7.886799148332152e-06,
"loss": 2.9564,
"step": 5200
},
{
"epoch": 3.7615330021291697,
"grad_norm": 3.193901777267456,
"learning_rate": 7.798083747338539e-06,
"loss": 2.9435,
"step": 5300
},
{
"epoch": 3.8325053229240598,
"grad_norm": 2.641021490097046,
"learning_rate": 7.709368346344927e-06,
"loss": 2.9404,
"step": 5400
},
{
"epoch": 3.90347764371895,
"grad_norm": 2.879225492477417,
"learning_rate": 7.6206529453513125e-06,
"loss": 2.9642,
"step": 5500
},
{
"epoch": 3.9744499645138394,
"grad_norm": 2.779848575592041,
"learning_rate": 7.531937544357701e-06,
"loss": 2.991,
"step": 5600
},
{
"epoch": 4.04542228530873,
"grad_norm": 4.226039886474609,
"learning_rate": 7.443222143364088e-06,
"loss": 2.8704,
"step": 5700
},
{
"epoch": 4.116394606103619,
"grad_norm": 3.5658841133117676,
"learning_rate": 7.354506742370476e-06,
"loss": 2.8641,
"step": 5800
},
{
"epoch": 4.18736692689851,
"grad_norm": 2.6197798252105713,
"learning_rate": 7.265791341376864e-06,
"loss": 2.8462,
"step": 5900
},
{
"epoch": 4.258339247693399,
"grad_norm": 2.7553303241729736,
"learning_rate": 7.17707594038325e-06,
"loss": 2.882,
"step": 6000
},
{
"epoch": 4.32931156848829,
"grad_norm": 2.9696245193481445,
"learning_rate": 7.0883605393896376e-06,
"loss": 2.9033,
"step": 6100
},
{
"epoch": 4.4002838892831795,
"grad_norm": 3.14622163772583,
"learning_rate": 6.999645138396026e-06,
"loss": 2.9003,
"step": 6200
},
{
"epoch": 4.47125621007807,
"grad_norm": 3.744727849960327,
"learning_rate": 6.9109297374024135e-06,
"loss": 2.8839,
"step": 6300
},
{
"epoch": 4.5422285308729595,
"grad_norm": 3.2394585609436035,
"learning_rate": 6.822214336408801e-06,
"loss": 2.9144,
"step": 6400
},
{
"epoch": 4.61320085166785,
"grad_norm": 3.425605535507202,
"learning_rate": 6.733498935415189e-06,
"loss": 2.9345,
"step": 6500
},
{
"epoch": 4.68417317246274,
"grad_norm": 2.960824489593506,
"learning_rate": 6.644783534421575e-06,
"loss": 2.8784,
"step": 6600
},
{
"epoch": 4.755145493257629,
"grad_norm": 2.8103156089782715,
"learning_rate": 6.556068133427963e-06,
"loss": 2.9073,
"step": 6700
},
{
"epoch": 4.82611781405252,
"grad_norm": 3.3681435585021973,
"learning_rate": 6.467352732434351e-06,
"loss": 2.8941,
"step": 6800
},
{
"epoch": 4.897090134847409,
"grad_norm": 2.99110746383667,
"learning_rate": 6.3786373314407386e-06,
"loss": 2.8776,
"step": 6900
},
{
"epoch": 4.9680624556423,
"grad_norm": 3.0673208236694336,
"learning_rate": 6.289921930447126e-06,
"loss": 2.8941,
"step": 7000
},
{
"epoch": 5.039034776437189,
"grad_norm": 3.0405173301696777,
"learning_rate": 6.201206529453514e-06,
"loss": 2.8133,
"step": 7100
},
{
"epoch": 5.11000709723208,
"grad_norm": 2.673586368560791,
"learning_rate": 6.112491128459901e-06,
"loss": 2.8009,
"step": 7200
},
{
"epoch": 5.180979418026969,
"grad_norm": 2.86667799949646,
"learning_rate": 6.023775727466288e-06,
"loss": 2.8341,
"step": 7300
},
{
"epoch": 5.25195173882186,
"grad_norm": 2.725980758666992,
"learning_rate": 5.935060326472676e-06,
"loss": 2.8206,
"step": 7400
},
{
"epoch": 5.322924059616749,
"grad_norm": 9.25928783416748,
"learning_rate": 5.846344925479064e-06,
"loss": 2.8782,
"step": 7500
},
{
"epoch": 5.39389638041164,
"grad_norm": 3.619668960571289,
"learning_rate": 5.75762952448545e-06,
"loss": 2.8083,
"step": 7600
},
{
"epoch": 5.464868701206529,
"grad_norm": 4.820442199707031,
"learning_rate": 5.668914123491839e-06,
"loss": 2.8051,
"step": 7700
},
{
"epoch": 5.53584102200142,
"grad_norm": 3.3483476638793945,
"learning_rate": 5.580198722498226e-06,
"loss": 2.7862,
"step": 7800
},
{
"epoch": 5.606813342796309,
"grad_norm": 3.04085373878479,
"learning_rate": 5.491483321504613e-06,
"loss": 2.786,
"step": 7900
},
{
"epoch": 5.6777856635912,
"grad_norm": 6.183100700378418,
"learning_rate": 5.402767920511001e-06,
"loss": 2.8541,
"step": 8000
},
{
"epoch": 5.748757984386089,
"grad_norm": 3.20927357673645,
"learning_rate": 5.314052519517389e-06,
"loss": 2.8428,
"step": 8100
},
{
"epoch": 5.819730305180979,
"grad_norm": 2.7963485717773438,
"learning_rate": 5.2253371185237755e-06,
"loss": 2.8391,
"step": 8200
},
{
"epoch": 5.8907026259758695,
"grad_norm": 2.9603395462036133,
"learning_rate": 5.136621717530164e-06,
"loss": 2.8483,
"step": 8300
},
{
"epoch": 5.961674946770759,
"grad_norm": 3.2761342525482178,
"learning_rate": 5.047906316536551e-06,
"loss": 2.8908,
"step": 8400
},
{
"epoch": 6.0326472675656495,
"grad_norm": 2.543006181716919,
"learning_rate": 4.959190915542938e-06,
"loss": 2.7455,
"step": 8500
},
{
"epoch": 6.103619588360539,
"grad_norm": 2.7663486003875732,
"learning_rate": 4.870475514549326e-06,
"loss": 2.781,
"step": 8600
},
{
"epoch": 6.1745919091554295,
"grad_norm": 3.1819772720336914,
"learning_rate": 4.781760113555713e-06,
"loss": 2.8151,
"step": 8700
},
{
"epoch": 6.245564229950319,
"grad_norm": 3.4800798892974854,
"learning_rate": 4.693044712562101e-06,
"loss": 2.742,
"step": 8800
},
{
"epoch": 6.31653655074521,
"grad_norm": 2.997176170349121,
"learning_rate": 4.604329311568489e-06,
"loss": 2.7626,
"step": 8900
},
{
"epoch": 6.387508871540099,
"grad_norm": 2.6546881198883057,
"learning_rate": 4.515613910574876e-06,
"loss": 2.7803,
"step": 9000
},
{
"epoch": 6.45848119233499,
"grad_norm": 3.736813545227051,
"learning_rate": 4.426898509581263e-06,
"loss": 2.7491,
"step": 9100
},
{
"epoch": 6.529453513129879,
"grad_norm": 2.904879331588745,
"learning_rate": 4.3381831085876515e-06,
"loss": 2.7707,
"step": 9200
},
{
"epoch": 6.60042583392477,
"grad_norm": 3.1349782943725586,
"learning_rate": 4.249467707594038e-06,
"loss": 2.7715,
"step": 9300
},
{
"epoch": 6.671398154719659,
"grad_norm": 3.8655664920806885,
"learning_rate": 4.160752306600426e-06,
"loss": 2.8137,
"step": 9400
},
{
"epoch": 6.74237047551455,
"grad_norm": 3.0926291942596436,
"learning_rate": 4.072036905606814e-06,
"loss": 2.7606,
"step": 9500
},
{
"epoch": 6.813342796309439,
"grad_norm": 2.9830901622772217,
"learning_rate": 3.983321504613201e-06,
"loss": 2.7696,
"step": 9600
},
{
"epoch": 6.884315117104329,
"grad_norm": 2.9719297885894775,
"learning_rate": 3.894606103619588e-06,
"loss": 2.7477,
"step": 9700
},
{
"epoch": 6.955287437899219,
"grad_norm": 4.136593818664551,
"learning_rate": 3.805890702625976e-06,
"loss": 2.7814,
"step": 9800
},
{
"epoch": 7.026259758694109,
"grad_norm": 3.006192922592163,
"learning_rate": 3.7171753016323633e-06,
"loss": 2.7316,
"step": 9900
},
{
"epoch": 7.097232079488999,
"grad_norm": 3.204758644104004,
"learning_rate": 3.6284599006387512e-06,
"loss": 2.7784,
"step": 10000
},
{
"epoch": 7.168204400283889,
"grad_norm": 4.11678409576416,
"learning_rate": 3.5397444996451388e-06,
"loss": 2.7377,
"step": 10100
},
{
"epoch": 7.239176721078779,
"grad_norm": 3.0145442485809326,
"learning_rate": 3.451029098651526e-06,
"loss": 2.7327,
"step": 10200
},
{
"epoch": 7.310149041873669,
"grad_norm": 2.759552240371704,
"learning_rate": 3.362313697657914e-06,
"loss": 2.6663,
"step": 10300
},
{
"epoch": 7.3811213626685594,
"grad_norm": 3.0694785118103027,
"learning_rate": 3.2735982966643013e-06,
"loss": 2.7523,
"step": 10400
},
{
"epoch": 7.452093683463449,
"grad_norm": 2.6538655757904053,
"learning_rate": 3.1848828956706884e-06,
"loss": 2.749,
"step": 10500
},
{
"epoch": 7.5230660042583395,
"grad_norm": 3.4055073261260986,
"learning_rate": 3.0961674946770763e-06,
"loss": 2.7119,
"step": 10600
},
{
"epoch": 7.594038325053229,
"grad_norm": 2.686981439590454,
"learning_rate": 3.0074520936834634e-06,
"loss": 2.6696,
"step": 10700
},
{
"epoch": 7.6650106458481195,
"grad_norm": 3.1230781078338623,
"learning_rate": 2.918736692689851e-06,
"loss": 2.7344,
"step": 10800
},
{
"epoch": 7.735982966643009,
"grad_norm": 2.9182968139648438,
"learning_rate": 2.830021291696239e-06,
"loss": 2.7724,
"step": 10900
},
{
"epoch": 7.8069552874379,
"grad_norm": 4.071280002593994,
"learning_rate": 2.741305890702626e-06,
"loss": 2.7562,
"step": 11000
},
{
"epoch": 7.877927608232789,
"grad_norm": 2.9077835083007812,
"learning_rate": 2.6525904897090135e-06,
"loss": 2.7375,
"step": 11100
},
{
"epoch": 7.948899929027679,
"grad_norm": 3.786334276199341,
"learning_rate": 2.563875088715401e-06,
"loss": 2.7451,
"step": 11200
},
{
"epoch": 8.01987224982257,
"grad_norm": 3.843573808670044,
"learning_rate": 2.4751596877217886e-06,
"loss": 2.7024,
"step": 11300
},
{
"epoch": 8.09084457061746,
"grad_norm": 3.3538873195648193,
"learning_rate": 2.386444286728176e-06,
"loss": 2.7569,
"step": 11400
},
{
"epoch": 8.161816891412348,
"grad_norm": 3.028632879257202,
"learning_rate": 2.2977288857345636e-06,
"loss": 2.7473,
"step": 11500
},
{
"epoch": 8.232789212207239,
"grad_norm": 3.1207797527313232,
"learning_rate": 2.209013484740951e-06,
"loss": 2.7304,
"step": 11600
},
{
"epoch": 8.30376153300213,
"grad_norm": 2.9007532596588135,
"learning_rate": 2.1202980837473386e-06,
"loss": 2.6945,
"step": 11700
},
{
"epoch": 8.37473385379702,
"grad_norm": 3.2682387828826904,
"learning_rate": 2.031582682753726e-06,
"loss": 2.6944,
"step": 11800
},
{
"epoch": 8.445706174591908,
"grad_norm": 2.944650173187256,
"learning_rate": 1.9428672817601137e-06,
"loss": 2.6788,
"step": 11900
},
{
"epoch": 8.516678495386799,
"grad_norm": 3.6891725063323975,
"learning_rate": 1.8541518807665012e-06,
"loss": 2.6576,
"step": 12000
},
{
"epoch": 8.58765081618169,
"grad_norm": 3.5069870948791504,
"learning_rate": 1.7654364797728887e-06,
"loss": 2.6916,
"step": 12100
},
{
"epoch": 8.65862313697658,
"grad_norm": 3.2562413215637207,
"learning_rate": 1.676721078779276e-06,
"loss": 2.709,
"step": 12200
},
{
"epoch": 8.729595457771469,
"grad_norm": 2.882202386856079,
"learning_rate": 1.5880056777856637e-06,
"loss": 2.6932,
"step": 12300
},
{
"epoch": 8.800567778566359,
"grad_norm": 3.014678955078125,
"learning_rate": 1.499290276792051e-06,
"loss": 2.7494,
"step": 12400
},
{
"epoch": 8.87154009936125,
"grad_norm": 3.5019314289093018,
"learning_rate": 1.4105748757984388e-06,
"loss": 2.716,
"step": 12500
},
{
"epoch": 8.94251242015614,
"grad_norm": 3.2683048248291016,
"learning_rate": 1.321859474804826e-06,
"loss": 2.7186,
"step": 12600
},
{
"epoch": 9.013484740951029,
"grad_norm": 2.985898017883301,
"learning_rate": 1.2331440738112136e-06,
"loss": 2.7296,
"step": 12700
},
{
"epoch": 9.084457061745919,
"grad_norm": 3.0097787380218506,
"learning_rate": 1.1444286728176011e-06,
"loss": 2.7134,
"step": 12800
},
{
"epoch": 9.15542938254081,
"grad_norm": 3.375105142593384,
"learning_rate": 1.0557132718239887e-06,
"loss": 2.6976,
"step": 12900
},
{
"epoch": 9.2264017033357,
"grad_norm": 18.599666595458984,
"learning_rate": 9.669978708303762e-07,
"loss": 2.6748,
"step": 13000
},
{
"epoch": 9.297374024130589,
"grad_norm": 3.4463281631469727,
"learning_rate": 8.782824698367637e-07,
"loss": 2.6669,
"step": 13100
},
{
"epoch": 9.36834634492548,
"grad_norm": 3.3725767135620117,
"learning_rate": 7.895670688431512e-07,
"loss": 2.6394,
"step": 13200
},
{
"epoch": 9.43931866572037,
"grad_norm": 3.3101234436035156,
"learning_rate": 7.008516678495387e-07,
"loss": 2.7324,
"step": 13300
},
{
"epoch": 9.510290986515258,
"grad_norm": 3.0802764892578125,
"learning_rate": 6.121362668559262e-07,
"loss": 2.6929,
"step": 13400
},
{
"epoch": 9.581263307310149,
"grad_norm": 2.806544303894043,
"learning_rate": 5.234208658623137e-07,
"loss": 2.6382,
"step": 13500
},
{
"epoch": 9.65223562810504,
"grad_norm": 3.4582176208496094,
"learning_rate": 4.347054648687013e-07,
"loss": 2.694,
"step": 13600
},
{
"epoch": 9.72320794889993,
"grad_norm": 4.037984848022461,
"learning_rate": 3.459900638750887e-07,
"loss": 2.6497,
"step": 13700
},
{
"epoch": 9.794180269694818,
"grad_norm": 3.1365439891815186,
"learning_rate": 2.572746628814762e-07,
"loss": 2.705,
"step": 13800
},
{
"epoch": 9.865152590489709,
"grad_norm": 3.4496893882751465,
"learning_rate": 1.6855926188786376e-07,
"loss": 2.7123,
"step": 13900
},
{
"epoch": 9.9361249112846,
"grad_norm": 3.1736724376678467,
"learning_rate": 7.984386089425125e-08,
"loss": 2.7009,
"step": 14000
}
],
"logging_steps": 100,
"max_steps": 14090,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.430320762257408e+16,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}