speech2phone-ctc / trainer_state.json
indiejoseph's picture
Upload folder using huggingface_hub
4c21da6 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 14.0,
"eval_steps": 500,
"global_step": 14728,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.09505703422053231,
"grad_norm": 0.9250678420066833,
"learning_rate": 3.95882818685669e-05,
"loss": 6.6663,
"step": 100
},
{
"epoch": 0.19011406844106463,
"grad_norm": 1.117205262184143,
"learning_rate": 7.91765637371338e-05,
"loss": 4.145,
"step": 200
},
{
"epoch": 0.28517110266159695,
"grad_norm": 1.3325448036193848,
"learning_rate": 0.00011876484560570071,
"loss": 3.8608,
"step": 300
},
{
"epoch": 0.38022813688212925,
"grad_norm": 0.9613128900527954,
"learning_rate": 0.0001583531274742676,
"loss": 3.768,
"step": 400
},
{
"epoch": 0.4752851711026616,
"grad_norm": 1.1198444366455078,
"learning_rate": 0.00019794140934283454,
"loss": 3.7093,
"step": 500
},
{
"epoch": 0.5703422053231939,
"grad_norm": 1.0210366249084473,
"learning_rate": 0.00023752969121140142,
"loss": 3.6664,
"step": 600
},
{
"epoch": 0.6653992395437263,
"grad_norm": 0.9687440395355225,
"learning_rate": 0.00027711797307996834,
"loss": 3.5409,
"step": 700
},
{
"epoch": 0.7604562737642585,
"grad_norm": 1.4981633424758911,
"learning_rate": 0.0003167062549485352,
"loss": 3.3992,
"step": 800
},
{
"epoch": 0.8555133079847909,
"grad_norm": 0.8627603650093079,
"learning_rate": 0.00035629453681710216,
"loss": 3.1727,
"step": 900
},
{
"epoch": 0.9505703422053232,
"grad_norm": 0.9925593733787537,
"learning_rate": 0.0003958828186856691,
"loss": 2.9724,
"step": 1000
},
{
"epoch": 1.0,
"eval_loss": 2.6960370540618896,
"eval_runtime": 3.7345,
"eval_samples_per_second": 1897.152,
"eval_steps_per_second": 118.622,
"step": 1052
},
{
"epoch": 1.0456273764258555,
"grad_norm": 0.9267619848251343,
"learning_rate": 0.00043547110055423594,
"loss": 2.7022,
"step": 1100
},
{
"epoch": 1.1406844106463878,
"grad_norm": 0.7666485905647278,
"learning_rate": 0.00047505938242280285,
"loss": 2.5641,
"step": 1200
},
{
"epoch": 1.2357414448669202,
"grad_norm": 0.5969619154930115,
"learning_rate": 0.0004990645699549983,
"loss": 2.5058,
"step": 1300
},
{
"epoch": 1.3307984790874525,
"grad_norm": 0.7782655358314514,
"learning_rate": 0.0004965363806441826,
"loss": 2.4665,
"step": 1400
},
{
"epoch": 1.4258555133079849,
"grad_norm": 0.8928040266036987,
"learning_rate": 0.000494008191333367,
"loss": 2.4311,
"step": 1500
},
{
"epoch": 1.5209125475285172,
"grad_norm": 0.8687949180603027,
"learning_rate": 0.0004914800020225515,
"loss": 2.3964,
"step": 1600
},
{
"epoch": 1.6159695817490496,
"grad_norm": 0.6245518922805786,
"learning_rate": 0.0004889518127117359,
"loss": 2.374,
"step": 1700
},
{
"epoch": 1.7110266159695817,
"grad_norm": 0.6903976202011108,
"learning_rate": 0.0004864236234009203,
"loss": 2.3606,
"step": 1800
},
{
"epoch": 1.806083650190114,
"grad_norm": 0.8996257781982422,
"learning_rate": 0.00048389543409010466,
"loss": 2.3376,
"step": 1900
},
{
"epoch": 1.9011406844106464,
"grad_norm": 0.734466016292572,
"learning_rate": 0.0004813672447792891,
"loss": 2.3226,
"step": 2000
},
{
"epoch": 1.9961977186311786,
"grad_norm": 0.6836825013160706,
"learning_rate": 0.0004788390554684735,
"loss": 2.3108,
"step": 2100
},
{
"epoch": 2.0,
"eval_loss": 2.285733461380005,
"eval_runtime": 3.623,
"eval_samples_per_second": 1955.579,
"eval_steps_per_second": 122.275,
"step": 2104
},
{
"epoch": 2.091254752851711,
"grad_norm": 0.5974160432815552,
"learning_rate": 0.0004763108661576579,
"loss": 2.2585,
"step": 2200
},
{
"epoch": 2.1863117870722433,
"grad_norm": 0.788093626499176,
"learning_rate": 0.0004737826768468423,
"loss": 2.264,
"step": 2300
},
{
"epoch": 2.2813688212927756,
"grad_norm": 0.7451100945472717,
"learning_rate": 0.00047125448753602674,
"loss": 2.2504,
"step": 2400
},
{
"epoch": 2.376425855513308,
"grad_norm": 0.6724629998207092,
"learning_rate": 0.0004687262982252111,
"loss": 2.2358,
"step": 2500
},
{
"epoch": 2.4714828897338403,
"grad_norm": 0.6606141924858093,
"learning_rate": 0.00046619810891439554,
"loss": 2.2301,
"step": 2600
},
{
"epoch": 2.5665399239543727,
"grad_norm": 0.6599621772766113,
"learning_rate": 0.0004636699196035799,
"loss": 2.2268,
"step": 2700
},
{
"epoch": 2.661596958174905,
"grad_norm": 0.6633493304252625,
"learning_rate": 0.00046114173029276434,
"loss": 2.2247,
"step": 2800
},
{
"epoch": 2.7566539923954374,
"grad_norm": 0.6308265328407288,
"learning_rate": 0.00045861354098194877,
"loss": 2.2221,
"step": 2900
},
{
"epoch": 2.8517110266159698,
"grad_norm": 0.6383451223373413,
"learning_rate": 0.00045608535167113314,
"loss": 2.2274,
"step": 3000
},
{
"epoch": 2.9467680608365017,
"grad_norm": 0.61512291431427,
"learning_rate": 0.00045355716236031757,
"loss": 2.2067,
"step": 3100
},
{
"epoch": 3.0,
"eval_loss": 2.2008087635040283,
"eval_runtime": 3.5613,
"eval_samples_per_second": 1989.445,
"eval_steps_per_second": 124.393,
"step": 3156
},
{
"epoch": 3.041825095057034,
"grad_norm": 0.7461186647415161,
"learning_rate": 0.00045102897304950194,
"loss": 2.1882,
"step": 3200
},
{
"epoch": 3.1368821292775664,
"grad_norm": 0.6590662598609924,
"learning_rate": 0.00044850078373868637,
"loss": 2.1662,
"step": 3300
},
{
"epoch": 3.2319391634980987,
"grad_norm": 0.5832785964012146,
"learning_rate": 0.00044597259442787074,
"loss": 2.1603,
"step": 3400
},
{
"epoch": 3.326996197718631,
"grad_norm": 0.6356543898582458,
"learning_rate": 0.00044344440511705517,
"loss": 2.1601,
"step": 3500
},
{
"epoch": 3.4220532319391634,
"grad_norm": 0.7197031378746033,
"learning_rate": 0.0004409162158062396,
"loss": 2.1567,
"step": 3600
},
{
"epoch": 3.517110266159696,
"grad_norm": 0.5856086611747742,
"learning_rate": 0.00043838802649542397,
"loss": 2.1588,
"step": 3700
},
{
"epoch": 3.612167300380228,
"grad_norm": 0.6212655305862427,
"learning_rate": 0.00043585983718460834,
"loss": 2.1565,
"step": 3800
},
{
"epoch": 3.7072243346007605,
"grad_norm": 0.6765671968460083,
"learning_rate": 0.0004333316478737928,
"loss": 2.1667,
"step": 3900
},
{
"epoch": 3.802281368821293,
"grad_norm": 0.6720090508460999,
"learning_rate": 0.0004308034585629772,
"loss": 2.1675,
"step": 4000
},
{
"epoch": 3.897338403041825,
"grad_norm": 0.7150991559028625,
"learning_rate": 0.00042827526925216157,
"loss": 2.1474,
"step": 4100
},
{
"epoch": 3.9923954372623576,
"grad_norm": 0.5831249356269836,
"learning_rate": 0.00042574707994134605,
"loss": 2.1485,
"step": 4200
},
{
"epoch": 4.0,
"eval_loss": 2.15364408493042,
"eval_runtime": 3.644,
"eval_samples_per_second": 1944.292,
"eval_steps_per_second": 121.57,
"step": 4208
},
{
"epoch": 4.08745247148289,
"grad_norm": 0.6653150916099548,
"learning_rate": 0.0004232188906305304,
"loss": 2.0899,
"step": 4300
},
{
"epoch": 4.182509505703422,
"grad_norm": 0.7235066294670105,
"learning_rate": 0.0004206907013197148,
"loss": 2.0982,
"step": 4400
},
{
"epoch": 4.277566539923955,
"grad_norm": 0.7326545715332031,
"learning_rate": 0.0004181625120088992,
"loss": 2.1007,
"step": 4500
},
{
"epoch": 4.3726235741444865,
"grad_norm": 0.6236776113510132,
"learning_rate": 0.00041563432269808365,
"loss": 2.1031,
"step": 4600
},
{
"epoch": 4.467680608365019,
"grad_norm": 0.5669475197792053,
"learning_rate": 0.000413106133387268,
"loss": 2.1087,
"step": 4700
},
{
"epoch": 4.562737642585551,
"grad_norm": 0.5483006834983826,
"learning_rate": 0.00041057794407645245,
"loss": 2.1034,
"step": 4800
},
{
"epoch": 4.657794676806084,
"grad_norm": 0.5456926822662354,
"learning_rate": 0.0004080497547656369,
"loss": 2.1065,
"step": 4900
},
{
"epoch": 4.752851711026616,
"grad_norm": 0.9545803666114807,
"learning_rate": 0.00040552156545482125,
"loss": 2.1168,
"step": 5000
},
{
"epoch": 4.847908745247148,
"grad_norm": 0.5378767251968384,
"learning_rate": 0.0004029933761440057,
"loss": 2.1107,
"step": 5100
},
{
"epoch": 4.942965779467681,
"grad_norm": 0.629880964756012,
"learning_rate": 0.00040046518683319005,
"loss": 2.0983,
"step": 5200
},
{
"epoch": 5.0,
"eval_loss": 2.132718801498413,
"eval_runtime": 3.6373,
"eval_samples_per_second": 1947.857,
"eval_steps_per_second": 121.793,
"step": 5260
},
{
"epoch": 5.038022813688213,
"grad_norm": 0.5900342464447021,
"learning_rate": 0.0003979369975223745,
"loss": 2.0758,
"step": 5300
},
{
"epoch": 5.133079847908745,
"grad_norm": 0.6181082129478455,
"learning_rate": 0.0003954088082115589,
"loss": 2.041,
"step": 5400
},
{
"epoch": 5.228136882129277,
"grad_norm": 0.6756412386894226,
"learning_rate": 0.0003928806189007433,
"loss": 2.0548,
"step": 5500
},
{
"epoch": 5.32319391634981,
"grad_norm": 0.6649320125579834,
"learning_rate": 0.0003903524295899277,
"loss": 2.0438,
"step": 5600
},
{
"epoch": 5.418250950570342,
"grad_norm": 0.5628513693809509,
"learning_rate": 0.00038782424027911214,
"loss": 2.0485,
"step": 5700
},
{
"epoch": 5.513307984790875,
"grad_norm": 0.6923677921295166,
"learning_rate": 0.0003852960509682965,
"loss": 2.063,
"step": 5800
},
{
"epoch": 5.608365019011407,
"grad_norm": 0.6819363236427307,
"learning_rate": 0.0003827678616574809,
"loss": 2.0618,
"step": 5900
},
{
"epoch": 5.7034220532319395,
"grad_norm": 0.6446284055709839,
"learning_rate": 0.00038023967234666537,
"loss": 2.0674,
"step": 6000
},
{
"epoch": 5.798479087452471,
"grad_norm": 0.6319680213928223,
"learning_rate": 0.00037771148303584974,
"loss": 2.061,
"step": 6100
},
{
"epoch": 5.893536121673003,
"grad_norm": 0.6318814754486084,
"learning_rate": 0.0003751832937250341,
"loss": 2.0656,
"step": 6200
},
{
"epoch": 5.988593155893536,
"grad_norm": 0.6261875033378601,
"learning_rate": 0.0003726551044142186,
"loss": 2.0663,
"step": 6300
},
{
"epoch": 6.0,
"eval_loss": 2.1098814010620117,
"eval_runtime": 3.698,
"eval_samples_per_second": 1915.889,
"eval_steps_per_second": 119.794,
"step": 6312
},
{
"epoch": 6.083650190114068,
"grad_norm": 0.6620230674743652,
"learning_rate": 0.00037012691510340297,
"loss": 1.9996,
"step": 6400
},
{
"epoch": 6.178707224334601,
"grad_norm": 1.0794607400894165,
"learning_rate": 0.00036759872579258734,
"loss": 2.0018,
"step": 6500
},
{
"epoch": 6.273764258555133,
"grad_norm": 1.372861385345459,
"learning_rate": 0.00036507053648177177,
"loss": 2.0059,
"step": 6600
},
{
"epoch": 6.3688212927756656,
"grad_norm": 0.5926664471626282,
"learning_rate": 0.0003625423471709562,
"loss": 2.012,
"step": 6700
},
{
"epoch": 6.4638783269961975,
"grad_norm": 0.7855852246284485,
"learning_rate": 0.00036001415786014057,
"loss": 2.0128,
"step": 6800
},
{
"epoch": 6.55893536121673,
"grad_norm": 0.6684075593948364,
"learning_rate": 0.000357485968549325,
"loss": 2.0221,
"step": 6900
},
{
"epoch": 6.653992395437262,
"grad_norm": 0.628013014793396,
"learning_rate": 0.00035495777923850937,
"loss": 2.0159,
"step": 7000
},
{
"epoch": 6.749049429657795,
"grad_norm": 0.7943947911262512,
"learning_rate": 0.0003524295899276938,
"loss": 2.0223,
"step": 7100
},
{
"epoch": 6.844106463878327,
"grad_norm": 0.645799994468689,
"learning_rate": 0.0003499014006168782,
"loss": 2.0206,
"step": 7200
},
{
"epoch": 6.93916349809886,
"grad_norm": 0.6603648066520691,
"learning_rate": 0.0003473732113060626,
"loss": 2.0304,
"step": 7300
},
{
"epoch": 7.0,
"eval_loss": 2.099062919616699,
"eval_runtime": 3.631,
"eval_samples_per_second": 1951.251,
"eval_steps_per_second": 122.005,
"step": 7364
},
{
"epoch": 7.034220532319392,
"grad_norm": 0.6082973480224609,
"learning_rate": 0.000344845021995247,
"loss": 2.0039,
"step": 7400
},
{
"epoch": 7.129277566539924,
"grad_norm": 0.673995852470398,
"learning_rate": 0.0003423168326844314,
"loss": 1.9663,
"step": 7500
},
{
"epoch": 7.224334600760456,
"grad_norm": 0.675037682056427,
"learning_rate": 0.0003397886433736158,
"loss": 1.9696,
"step": 7600
},
{
"epoch": 7.319391634980988,
"grad_norm": 0.6488978266716003,
"learning_rate": 0.0003372604540628002,
"loss": 1.9701,
"step": 7700
},
{
"epoch": 7.414448669201521,
"grad_norm": 0.8255399465560913,
"learning_rate": 0.0003347322647519846,
"loss": 1.9654,
"step": 7800
},
{
"epoch": 7.509505703422053,
"grad_norm": 1.2661654949188232,
"learning_rate": 0.00033220407544116905,
"loss": 1.9736,
"step": 7900
},
{
"epoch": 7.604562737642586,
"grad_norm": 0.6545805335044861,
"learning_rate": 0.0003296758861303534,
"loss": 1.9783,
"step": 8000
},
{
"epoch": 7.699619771863118,
"grad_norm": 0.8890361189842224,
"learning_rate": 0.00032714769681953785,
"loss": 1.9807,
"step": 8100
},
{
"epoch": 7.79467680608365,
"grad_norm": 0.6547899842262268,
"learning_rate": 0.0003246195075087223,
"loss": 1.9723,
"step": 8200
},
{
"epoch": 7.889733840304182,
"grad_norm": 1.1239402294158936,
"learning_rate": 0.00032209131819790665,
"loss": 1.9734,
"step": 8300
},
{
"epoch": 7.984790874524715,
"grad_norm": 0.6624830961227417,
"learning_rate": 0.000319563128887091,
"loss": 1.9869,
"step": 8400
},
{
"epoch": 8.0,
"eval_loss": 2.1034328937530518,
"eval_runtime": 3.6013,
"eval_samples_per_second": 1967.337,
"eval_steps_per_second": 123.011,
"step": 8416
},
{
"epoch": 8.079847908745247,
"grad_norm": 0.6550971269607544,
"learning_rate": 0.0003170349395762755,
"loss": 1.9223,
"step": 8500
},
{
"epoch": 8.17490494296578,
"grad_norm": 0.660987138748169,
"learning_rate": 0.0003145067502654599,
"loss": 1.9245,
"step": 8600
},
{
"epoch": 8.269961977186313,
"grad_norm": 0.759884774684906,
"learning_rate": 0.00031197856095464425,
"loss": 1.9235,
"step": 8700
},
{
"epoch": 8.365019011406844,
"grad_norm": 0.9319919347763062,
"learning_rate": 0.00030945037164382874,
"loss": 1.9239,
"step": 8800
},
{
"epoch": 8.460076045627376,
"grad_norm": 0.6610597968101501,
"learning_rate": 0.0003069221823330131,
"loss": 1.928,
"step": 8900
},
{
"epoch": 8.55513307984791,
"grad_norm": 0.7076143622398376,
"learning_rate": 0.0003043939930221975,
"loss": 1.9289,
"step": 9000
},
{
"epoch": 8.65019011406844,
"grad_norm": 0.6368849873542786,
"learning_rate": 0.0003018658037113819,
"loss": 1.932,
"step": 9100
},
{
"epoch": 8.745247148288973,
"grad_norm": 0.7639185786247253,
"learning_rate": 0.00029933761440056634,
"loss": 1.9485,
"step": 9200
},
{
"epoch": 8.840304182509506,
"grad_norm": 1.0823330879211426,
"learning_rate": 0.0002968094250897507,
"loss": 1.9447,
"step": 9300
},
{
"epoch": 8.935361216730039,
"grad_norm": 0.8542035222053528,
"learning_rate": 0.00029428123577893514,
"loss": 1.942,
"step": 9400
},
{
"epoch": 9.0,
"eval_loss": 2.0947535037994385,
"eval_runtime": 3.6147,
"eval_samples_per_second": 1960.063,
"eval_steps_per_second": 122.556,
"step": 9468
},
{
"epoch": 9.03041825095057,
"grad_norm": 0.7601971626281738,
"learning_rate": 0.00029175304646811956,
"loss": 1.9243,
"step": 9500
},
{
"epoch": 9.125475285171103,
"grad_norm": 0.7461040019989014,
"learning_rate": 0.00028922485715730394,
"loss": 1.8704,
"step": 9600
},
{
"epoch": 9.220532319391635,
"grad_norm": 0.7719326019287109,
"learning_rate": 0.00028669666784648836,
"loss": 1.8832,
"step": 9700
},
{
"epoch": 9.315589353612168,
"grad_norm": 0.716136634349823,
"learning_rate": 0.00028416847853567274,
"loss": 1.8787,
"step": 9800
},
{
"epoch": 9.4106463878327,
"grad_norm": 0.6928532123565674,
"learning_rate": 0.00028164028922485717,
"loss": 1.8855,
"step": 9900
},
{
"epoch": 9.505703422053232,
"grad_norm": 0.7696681618690491,
"learning_rate": 0.0002791120999140416,
"loss": 1.8855,
"step": 10000
},
{
"epoch": 9.600760456273765,
"grad_norm": 0.8969391584396362,
"learning_rate": 0.00027658391060322597,
"loss": 1.9034,
"step": 10100
},
{
"epoch": 9.695817490494296,
"grad_norm": 0.8469530940055847,
"learning_rate": 0.00027405572129241034,
"loss": 1.8965,
"step": 10200
},
{
"epoch": 9.790874524714829,
"grad_norm": 0.7956866025924683,
"learning_rate": 0.0002715275319815948,
"loss": 1.9087,
"step": 10300
},
{
"epoch": 9.885931558935361,
"grad_norm": 0.8293343782424927,
"learning_rate": 0.0002689993426707792,
"loss": 1.9177,
"step": 10400
},
{
"epoch": 9.980988593155894,
"grad_norm": 0.7472631931304932,
"learning_rate": 0.00026647115335996357,
"loss": 1.9082,
"step": 10500
},
{
"epoch": 10.0,
"eval_loss": 2.097904920578003,
"eval_runtime": 3.5592,
"eval_samples_per_second": 1990.641,
"eval_steps_per_second": 124.468,
"step": 10520
},
{
"epoch": 10.076045627376425,
"grad_norm": 0.7787309288978577,
"learning_rate": 0.00026394296404914805,
"loss": 1.8393,
"step": 10600
},
{
"epoch": 10.171102661596958,
"grad_norm": 1.3328174352645874,
"learning_rate": 0.0002614147747383324,
"loss": 1.8283,
"step": 10700
},
{
"epoch": 10.26615969581749,
"grad_norm": 0.7740694284439087,
"learning_rate": 0.0002588865854275168,
"loss": 1.8422,
"step": 10800
},
{
"epoch": 10.361216730038024,
"grad_norm": 0.828940749168396,
"learning_rate": 0.0002563583961167012,
"loss": 1.8516,
"step": 10900
},
{
"epoch": 10.456273764258555,
"grad_norm": 0.751752495765686,
"learning_rate": 0.00025383020680588565,
"loss": 1.8624,
"step": 11000
},
{
"epoch": 10.551330798479087,
"grad_norm": 0.9940192103385925,
"learning_rate": 0.00025130201749507,
"loss": 1.8599,
"step": 11100
},
{
"epoch": 10.64638783269962,
"grad_norm": 0.8591569066047668,
"learning_rate": 0.00024877382818425445,
"loss": 1.8581,
"step": 11200
},
{
"epoch": 10.741444866920151,
"grad_norm": 0.7676281332969666,
"learning_rate": 0.0002462456388734388,
"loss": 1.8637,
"step": 11300
},
{
"epoch": 10.836501901140684,
"grad_norm": 0.7896871566772461,
"learning_rate": 0.00024371744956262325,
"loss": 1.8606,
"step": 11400
},
{
"epoch": 10.931558935361217,
"grad_norm": 0.8302274942398071,
"learning_rate": 0.00024118926025180765,
"loss": 1.8656,
"step": 11500
},
{
"epoch": 11.0,
"eval_loss": 2.0961618423461914,
"eval_runtime": 3.6362,
"eval_samples_per_second": 1948.473,
"eval_steps_per_second": 121.831,
"step": 11572
},
{
"epoch": 11.02661596958175,
"grad_norm": 0.8891871571540833,
"learning_rate": 0.00023866107094099208,
"loss": 1.8522,
"step": 11600
},
{
"epoch": 11.12167300380228,
"grad_norm": 0.7549653649330139,
"learning_rate": 0.00023613288163017645,
"loss": 1.7913,
"step": 11700
},
{
"epoch": 11.216730038022813,
"grad_norm": 0.8127674460411072,
"learning_rate": 0.00023360469231936088,
"loss": 1.8102,
"step": 11800
},
{
"epoch": 11.311787072243346,
"grad_norm": 0.841659426689148,
"learning_rate": 0.0002310765030085453,
"loss": 1.803,
"step": 11900
},
{
"epoch": 11.406844106463879,
"grad_norm": 0.8460645079612732,
"learning_rate": 0.00022854831369772968,
"loss": 1.8201,
"step": 12000
},
{
"epoch": 11.50190114068441,
"grad_norm": 0.7932580709457397,
"learning_rate": 0.0002260201243869141,
"loss": 1.811,
"step": 12100
},
{
"epoch": 11.596958174904943,
"grad_norm": 0.8419378399848938,
"learning_rate": 0.0002234919350760985,
"loss": 1.8145,
"step": 12200
},
{
"epoch": 11.692015209125476,
"grad_norm": 0.8346748352050781,
"learning_rate": 0.0002209637457652829,
"loss": 1.8328,
"step": 12300
},
{
"epoch": 11.787072243346007,
"grad_norm": 1.019510269165039,
"learning_rate": 0.0002184355564544673,
"loss": 1.8257,
"step": 12400
},
{
"epoch": 11.88212927756654,
"grad_norm": 0.8175719976425171,
"learning_rate": 0.00021590736714365173,
"loss": 1.8274,
"step": 12500
},
{
"epoch": 11.977186311787072,
"grad_norm": 0.7476153373718262,
"learning_rate": 0.00021337917783283614,
"loss": 1.8361,
"step": 12600
},
{
"epoch": 12.0,
"eval_loss": 2.1029505729675293,
"eval_runtime": 3.5932,
"eval_samples_per_second": 1971.782,
"eval_steps_per_second": 123.289,
"step": 12624
},
{
"epoch": 12.072243346007605,
"grad_norm": 0.8637651205062866,
"learning_rate": 0.00021085098852202054,
"loss": 1.7684,
"step": 12700
},
{
"epoch": 12.167300380228136,
"grad_norm": 0.80800461769104,
"learning_rate": 0.00020832279921120496,
"loss": 1.7703,
"step": 12800
},
{
"epoch": 12.262357414448669,
"grad_norm": 1.0111021995544434,
"learning_rate": 0.00020579460990038934,
"loss": 1.7809,
"step": 12900
},
{
"epoch": 12.357414448669202,
"grad_norm": 0.8477798700332642,
"learning_rate": 0.00020326642058957376,
"loss": 1.7795,
"step": 13000
},
{
"epoch": 12.452471482889734,
"grad_norm": 0.8284028172492981,
"learning_rate": 0.00020073823127875814,
"loss": 1.7803,
"step": 13100
},
{
"epoch": 12.547528517110266,
"grad_norm": 0.7752136588096619,
"learning_rate": 0.00019821004196794256,
"loss": 1.7836,
"step": 13200
},
{
"epoch": 12.642585551330798,
"grad_norm": 0.8929184675216675,
"learning_rate": 0.00019568185265712696,
"loss": 1.7724,
"step": 13300
},
{
"epoch": 12.737642585551331,
"grad_norm": 0.8475900888442993,
"learning_rate": 0.00019315366334631136,
"loss": 1.7891,
"step": 13400
},
{
"epoch": 12.832699619771864,
"grad_norm": 0.9029939770698547,
"learning_rate": 0.0001906254740354958,
"loss": 1.7888,
"step": 13500
},
{
"epoch": 12.927756653992395,
"grad_norm": 0.841206967830658,
"learning_rate": 0.0001880972847246802,
"loss": 1.8005,
"step": 13600
},
{
"epoch": 13.0,
"eval_loss": 2.1176211833953857,
"eval_runtime": 3.6226,
"eval_samples_per_second": 1955.796,
"eval_steps_per_second": 122.289,
"step": 13676
},
{
"epoch": 13.022813688212928,
"grad_norm": 0.786509096622467,
"learning_rate": 0.0001855690954138646,
"loss": 1.7784,
"step": 13700
},
{
"epoch": 13.11787072243346,
"grad_norm": 0.8644747734069824,
"learning_rate": 0.000183040906103049,
"loss": 1.7234,
"step": 13800
},
{
"epoch": 13.212927756653992,
"grad_norm": 0.8760172128677368,
"learning_rate": 0.00018051271679223342,
"loss": 1.7308,
"step": 13900
},
{
"epoch": 13.307984790874524,
"grad_norm": 0.7858941555023193,
"learning_rate": 0.0001779845274814178,
"loss": 1.7318,
"step": 14000
},
{
"epoch": 13.403041825095057,
"grad_norm": 0.8771238327026367,
"learning_rate": 0.00017545633817060222,
"loss": 1.7473,
"step": 14100
},
{
"epoch": 13.49809885931559,
"grad_norm": 0.8886803984642029,
"learning_rate": 0.00017292814885978665,
"loss": 1.7491,
"step": 14200
},
{
"epoch": 13.593155893536121,
"grad_norm": 0.8704127669334412,
"learning_rate": 0.00017039995954897102,
"loss": 1.7548,
"step": 14300
},
{
"epoch": 13.688212927756654,
"grad_norm": 1.2635705471038818,
"learning_rate": 0.00016787177023815545,
"loss": 1.7532,
"step": 14400
},
{
"epoch": 13.783269961977187,
"grad_norm": 0.9218750596046448,
"learning_rate": 0.00016534358092733985,
"loss": 1.7531,
"step": 14500
},
{
"epoch": 13.87832699619772,
"grad_norm": 0.9513919353485107,
"learning_rate": 0.00016281539161652425,
"loss": 1.7618,
"step": 14600
},
{
"epoch": 13.97338403041825,
"grad_norm": 1.010962963104248,
"learning_rate": 0.00016028720230570865,
"loss": 1.7646,
"step": 14700
},
{
"epoch": 14.0,
"eval_loss": 2.130631923675537,
"eval_runtime": 3.6539,
"eval_samples_per_second": 1938.998,
"eval_steps_per_second": 121.239,
"step": 14728
}
],
"logging_steps": 100,
"max_steps": 21040,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 5.6215157665850184e+16,
"train_batch_size": 128,
"trial_name": null,
"trial_params": null
}