Metis-HOME / trainer_state.json
rubyla1's picture
Upload folder using huggingface_hub
c918c06 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.903111111111111,
"eval_steps": 500,
"global_step": 1100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.035555555555555556,
"grad_norm": 1.6136552095413208,
"learning_rate": 1.4084507042253522e-07,
"loss": 1.4283,
"step": 10
},
{
"epoch": 0.07111111111111111,
"grad_norm": 2.3250255584716797,
"learning_rate": 2.8169014084507043e-07,
"loss": 1.4176,
"step": 20
},
{
"epoch": 0.10666666666666667,
"grad_norm": 2.205648422241211,
"learning_rate": 4.225352112676056e-07,
"loss": 1.3904,
"step": 30
},
{
"epoch": 0.14222222222222222,
"grad_norm": 1.679602861404419,
"learning_rate": 5.633802816901409e-07,
"loss": 1.3256,
"step": 40
},
{
"epoch": 0.17777777777777778,
"grad_norm": 1.6885226964950562,
"learning_rate": 7.04225352112676e-07,
"loss": 1.2877,
"step": 50
},
{
"epoch": 0.21333333333333335,
"grad_norm": 1.3719532489776611,
"learning_rate": 8.450704225352112e-07,
"loss": 1.2335,
"step": 60
},
{
"epoch": 0.24888888888888888,
"grad_norm": 1.6127221584320068,
"learning_rate": 9.859154929577465e-07,
"loss": 1.1898,
"step": 70
},
{
"epoch": 0.28444444444444444,
"grad_norm": 1.3292348384857178,
"learning_rate": 9.998876955784181e-07,
"loss": 1.1213,
"step": 80
},
{
"epoch": 0.32,
"grad_norm": 1.1058685779571533,
"learning_rate": 9.994995475316987e-07,
"loss": 1.104,
"step": 90
},
{
"epoch": 0.35555555555555557,
"grad_norm": 1.0595113039016724,
"learning_rate": 9.988343845952696e-07,
"loss": 1.059,
"step": 100
},
{
"epoch": 0.39111111111111113,
"grad_norm": 0.9761242270469666,
"learning_rate": 9.978925756584284e-07,
"loss": 0.9813,
"step": 110
},
{
"epoch": 0.4266666666666667,
"grad_norm": 0.8893954157829285,
"learning_rate": 9.966746430341582e-07,
"loss": 0.9635,
"step": 120
},
{
"epoch": 0.4622222222222222,
"grad_norm": 0.8302690982818604,
"learning_rate": 9.951812621694608e-07,
"loss": 0.9373,
"step": 130
},
{
"epoch": 0.49777777777777776,
"grad_norm": 0.74117112159729,
"learning_rate": 9.93413261270763e-07,
"loss": 0.9394,
"step": 140
},
{
"epoch": 0.5333333333333333,
"grad_norm": 0.910311758518219,
"learning_rate": 9.913716208446065e-07,
"loss": 0.9476,
"step": 150
},
{
"epoch": 0.5688888888888889,
"grad_norm": 0.9787248373031616,
"learning_rate": 9.890574731538739e-07,
"loss": 0.9403,
"step": 160
},
{
"epoch": 0.6044444444444445,
"grad_norm": 0.6852824091911316,
"learning_rate": 9.864721015898523e-07,
"loss": 0.9306,
"step": 170
},
{
"epoch": 0.64,
"grad_norm": 0.9083530306816101,
"learning_rate": 9.836169399604845e-07,
"loss": 0.9356,
"step": 180
},
{
"epoch": 0.6755555555555556,
"grad_norm": 0.6284005641937256,
"learning_rate": 9.80493571695201e-07,
"loss": 0.9154,
"step": 190
},
{
"epoch": 0.7111111111111111,
"grad_norm": 0.8122096061706543,
"learning_rate": 9.771037289667726e-07,
"loss": 0.8989,
"step": 200
},
{
"epoch": 0.7466666666666667,
"grad_norm": 0.6801354885101318,
"learning_rate": 9.734492917306754e-07,
"loss": 0.9159,
"step": 210
},
{
"epoch": 0.7822222222222223,
"grad_norm": 1.5338674783706665,
"learning_rate": 9.695322866824947e-07,
"loss": 0.8969,
"step": 220
},
{
"epoch": 0.8177777777777778,
"grad_norm": 0.9366681575775146,
"learning_rate": 9.653548861339508e-07,
"loss": 0.9099,
"step": 230
},
{
"epoch": 0.8533333333333334,
"grad_norm": 0.8953334093093872,
"learning_rate": 9.60919406808168e-07,
"loss": 0.8797,
"step": 240
},
{
"epoch": 0.8888888888888888,
"grad_norm": 0.7514542937278748,
"learning_rate": 9.562283085548543e-07,
"loss": 0.8666,
"step": 250
},
{
"epoch": 0.9244444444444444,
"grad_norm": 0.7203475832939148,
"learning_rate": 9.512841929861068e-07,
"loss": 0.893,
"step": 260
},
{
"epoch": 0.96,
"grad_norm": 0.9745852947235107,
"learning_rate": 9.460898020335964e-07,
"loss": 0.8883,
"step": 270
},
{
"epoch": 0.9955555555555555,
"grad_norm": 0.9440745711326599,
"learning_rate": 9.40648016427934e-07,
"loss": 0.869,
"step": 280
},
{
"epoch": 1.0284444444444445,
"grad_norm": 1.0532046556472778,
"learning_rate": 9.349618541010616e-07,
"loss": 0.7853,
"step": 290
},
{
"epoch": 1.064,
"grad_norm": 0.7366812825202942,
"learning_rate": 9.290344685125519e-07,
"loss": 0.8485,
"step": 300
},
{
"epoch": 1.0995555555555556,
"grad_norm": 0.6317222118377686,
"learning_rate": 9.228691469007486e-07,
"loss": 0.8323,
"step": 310
},
{
"epoch": 1.1351111111111112,
"grad_norm": 0.4928416907787323,
"learning_rate": 9.16469308459712e-07,
"loss": 0.881,
"step": 320
},
{
"epoch": 1.1706666666666667,
"grad_norm": 0.8622790575027466,
"learning_rate": 9.098385024429874e-07,
"loss": 0.8618,
"step": 330
},
{
"epoch": 1.2062222222222223,
"grad_norm": 0.9656073451042175,
"learning_rate": 9.029804061952424e-07,
"loss": 0.8504,
"step": 340
},
{
"epoch": 1.2417777777777779,
"grad_norm": 0.8012099862098694,
"learning_rate": 8.958988231128663e-07,
"loss": 0.8289,
"step": 350
},
{
"epoch": 1.2773333333333334,
"grad_norm": 0.831724226474762,
"learning_rate": 8.885976805346651e-07,
"loss": 0.8313,
"step": 360
},
{
"epoch": 1.3128888888888888,
"grad_norm": 0.9381484389305115,
"learning_rate": 8.810810275638182e-07,
"loss": 0.8222,
"step": 370
},
{
"epoch": 1.3484444444444446,
"grad_norm": 0.7074716687202454,
"learning_rate": 8.733530328223075e-07,
"loss": 0.815,
"step": 380
},
{
"epoch": 1.384,
"grad_norm": 0.6802889704704285,
"learning_rate": 8.654179821390621e-07,
"loss": 0.8485,
"step": 390
},
{
"epoch": 1.4195555555555557,
"grad_norm": 0.6159129738807678,
"learning_rate": 8.572802761731031e-07,
"loss": 0.8396,
"step": 400
},
{
"epoch": 1.455111111111111,
"grad_norm": 1.0787162780761719,
"learning_rate": 8.489444279730045e-07,
"loss": 0.8342,
"step": 410
},
{
"epoch": 1.4906666666666666,
"grad_norm": 0.850229024887085,
"learning_rate": 8.404150604740248e-07,
"loss": 0.8385,
"step": 420
},
{
"epoch": 1.5262222222222221,
"grad_norm": 0.9370916485786438,
"learning_rate": 8.316969039342963e-07,
"loss": 0.7899,
"step": 430
},
{
"epoch": 1.561777777777778,
"grad_norm": 0.7209655046463013,
"learning_rate": 8.22794793311497e-07,
"loss": 0.8046,
"step": 440
},
{
"epoch": 1.5973333333333333,
"grad_norm": 0.8257189989089966,
"learning_rate": 8.137136655814549e-07,
"loss": 0.8178,
"step": 450
},
{
"epoch": 1.6328888888888888,
"grad_norm": 0.8620548248291016,
"learning_rate": 8.044585570001769e-07,
"loss": 0.807,
"step": 460
},
{
"epoch": 1.6684444444444444,
"grad_norm": 0.8659062385559082,
"learning_rate": 7.950346003108166e-07,
"loss": 0.8087,
"step": 470
},
{
"epoch": 1.704,
"grad_norm": 0.5293139815330505,
"learning_rate": 7.854470218971332e-07,
"loss": 0.7872,
"step": 480
},
{
"epoch": 1.7395555555555555,
"grad_norm": 0.5208423733711243,
"learning_rate": 7.75701138885018e-07,
"loss": 0.8161,
"step": 490
},
{
"epoch": 1.775111111111111,
"grad_norm": 0.7580987811088562,
"learning_rate": 7.658023561936966e-07,
"loss": 0.8314,
"step": 500
},
{
"epoch": 1.8106666666666666,
"grad_norm": 0.8971360325813293,
"learning_rate": 7.557561635382432e-07,
"loss": 0.806,
"step": 510
},
{
"epoch": 1.8462222222222222,
"grad_norm": 0.6375018954277039,
"learning_rate": 7.455681323850668e-07,
"loss": 0.7969,
"step": 520
},
{
"epoch": 1.8817777777777778,
"grad_norm": 1.017171859741211,
"learning_rate": 7.352439128620609e-07,
"loss": 0.7974,
"step": 530
},
{
"epoch": 1.9173333333333333,
"grad_norm": 0.8392543196678162,
"learning_rate": 7.247892306251275e-07,
"loss": 0.807,
"step": 540
},
{
"epoch": 1.952888888888889,
"grad_norm": 1.016851782798767,
"learning_rate": 7.142098836828161e-07,
"loss": 0.8062,
"step": 550
},
{
"epoch": 1.9884444444444445,
"grad_norm": 0.8153456449508667,
"learning_rate": 7.035117391808341e-07,
"loss": 0.7673,
"step": 560
},
{
"epoch": 2.021333333333333,
"grad_norm": 0.7162724733352661,
"learning_rate": 6.927007301482186e-07,
"loss": 0.7502,
"step": 570
},
{
"epoch": 2.056888888888889,
"grad_norm": 0.9724966883659363,
"learning_rate": 6.817828522069667e-07,
"loss": 0.7868,
"step": 580
},
{
"epoch": 2.0924444444444443,
"grad_norm": 1.1692003011703491,
"learning_rate": 6.707641602469553e-07,
"loss": 0.7739,
"step": 590
},
{
"epoch": 2.128,
"grad_norm": 0.7322782874107361,
"learning_rate": 6.596507650679899e-07,
"loss": 0.7829,
"step": 600
},
{
"epoch": 2.1635555555555555,
"grad_norm": 0.9158796072006226,
"learning_rate": 6.484488299908486e-07,
"loss": 0.772,
"step": 610
},
{
"epoch": 2.1991111111111112,
"grad_norm": 0.8015128374099731,
"learning_rate": 6.371645674391966e-07,
"loss": 0.7806,
"step": 620
},
{
"epoch": 2.2346666666666666,
"grad_norm": 0.7846320271492004,
"learning_rate": 6.258042354942707e-07,
"loss": 0.775,
"step": 630
},
{
"epoch": 2.2702222222222224,
"grad_norm": 0.8747680187225342,
"learning_rate": 6.143741344242423e-07,
"loss": 0.7837,
"step": 640
},
{
"epoch": 2.3057777777777777,
"grad_norm": 0.8119185566902161,
"learning_rate": 6.028806031901829e-07,
"loss": 0.7519,
"step": 650
},
{
"epoch": 2.3413333333333335,
"grad_norm": 0.8647979497909546,
"learning_rate": 5.91330015930574e-07,
"loss": 0.7715,
"step": 660
},
{
"epoch": 2.376888888888889,
"grad_norm": 0.8015746474266052,
"learning_rate": 5.797287784263046e-07,
"loss": 0.7829,
"step": 670
},
{
"epoch": 2.4124444444444446,
"grad_norm": 0.715522289276123,
"learning_rate": 5.680833245481234e-07,
"loss": 0.7719,
"step": 680
},
{
"epoch": 2.448,
"grad_norm": 0.9125120639801025,
"learning_rate": 5.564001126885105e-07,
"loss": 0.7632,
"step": 690
},
{
"epoch": 2.4835555555555557,
"grad_norm": 0.9937298893928528,
"learning_rate": 5.446856221799514e-07,
"loss": 0.7511,
"step": 700
},
{
"epoch": 2.519111111111111,
"grad_norm": 0.5765209794044495,
"learning_rate": 5.329463497015968e-07,
"loss": 0.7581,
"step": 710
},
{
"epoch": 2.554666666666667,
"grad_norm": 0.841436505317688,
"learning_rate": 5.211888056763029e-07,
"loss": 0.7813,
"step": 720
},
{
"epoch": 2.590222222222222,
"grad_norm": 1.1379077434539795,
"learning_rate": 5.094195106600489e-07,
"loss": 0.7874,
"step": 730
},
{
"epoch": 2.6257777777777775,
"grad_norm": 0.7455689311027527,
"learning_rate": 4.976449917257365e-07,
"loss": 0.797,
"step": 740
},
{
"epoch": 2.6613333333333333,
"grad_norm": 0.6947171092033386,
"learning_rate": 4.858717788433725e-07,
"loss": 0.7531,
"step": 750
},
{
"epoch": 2.696888888888889,
"grad_norm": 0.8182320594787598,
"learning_rate": 4.741064012586478e-07,
"loss": 0.7659,
"step": 760
},
{
"epoch": 2.7324444444444445,
"grad_norm": 0.8583469390869141,
"learning_rate": 4.6235538387191507e-07,
"loss": 0.753,
"step": 770
},
{
"epoch": 2.768,
"grad_norm": 0.6977065205574036,
"learning_rate": 4.50625243619579e-07,
"loss": 0.7786,
"step": 780
},
{
"epoch": 2.8035555555555556,
"grad_norm": 0.8603796362876892,
"learning_rate": 4.3892248585990147e-07,
"loss": 0.7842,
"step": 790
},
{
"epoch": 2.8391111111111114,
"grad_norm": 0.6347509026527405,
"learning_rate": 4.27253600765228e-07,
"loss": 0.7808,
"step": 800
},
{
"epoch": 2.8746666666666667,
"grad_norm": 0.6170427203178406,
"learning_rate": 4.1562505972263726e-07,
"loss": 0.7623,
"step": 810
},
{
"epoch": 2.910222222222222,
"grad_norm": 0.6599701046943665,
"learning_rate": 4.0404331174500656e-07,
"loss": 0.7692,
"step": 820
},
{
"epoch": 2.945777777777778,
"grad_norm": 0.6815395951271057,
"learning_rate": 3.9251477989448795e-07,
"loss": 0.8188,
"step": 830
},
{
"epoch": 2.981333333333333,
"grad_norm": 0.5231301784515381,
"learning_rate": 3.810458577203749e-07,
"loss": 0.7577,
"step": 840
},
{
"epoch": 3.014222222222222,
"grad_norm": 0.6689186692237854,
"learning_rate": 3.696429057133358e-07,
"loss": 0.715,
"step": 850
},
{
"epoch": 3.049777777777778,
"grad_norm": 0.7008723020553589,
"learning_rate": 3.583122477779834e-07,
"loss": 0.782,
"step": 860
},
{
"epoch": 3.0853333333333333,
"grad_norm": 0.915671706199646,
"learning_rate": 3.470601677257323e-07,
"loss": 0.8049,
"step": 870
},
{
"epoch": 3.120888888888889,
"grad_norm": 0.6437973976135254,
"learning_rate": 3.3589290578989213e-07,
"loss": 0.7404,
"step": 880
},
{
"epoch": 3.1564444444444444,
"grad_norm": 0.6364536285400391,
"learning_rate": 3.2481665516492876e-07,
"loss": 0.7662,
"step": 890
},
{
"epoch": 3.192,
"grad_norm": 0.7271984219551086,
"learning_rate": 3.138375585718125e-07,
"loss": 0.7738,
"step": 900
},
{
"epoch": 3.2275555555555555,
"grad_norm": 0.6700648665428162,
"learning_rate": 3.0296170485135784e-07,
"loss": 0.735,
"step": 910
},
{
"epoch": 3.2631111111111113,
"grad_norm": 0.6754481196403503,
"learning_rate": 2.9219512558744486e-07,
"loss": 0.7539,
"step": 920
},
{
"epoch": 3.2986666666666666,
"grad_norm": 0.8119938969612122,
"learning_rate": 2.815437917619932e-07,
"loss": 0.7498,
"step": 930
},
{
"epoch": 3.3342222222222224,
"grad_norm": 0.5352524518966675,
"learning_rate": 2.7101361044354696e-07,
"loss": 0.7316,
"step": 940
},
{
"epoch": 3.3697777777777778,
"grad_norm": 0.7653639316558838,
"learning_rate": 2.6061042151130323e-07,
"loss": 0.73,
"step": 950
},
{
"epoch": 3.405333333333333,
"grad_norm": 0.7560474872589111,
"learning_rate": 2.5033999441640344e-07,
"loss": 0.7561,
"step": 960
},
{
"epoch": 3.440888888888889,
"grad_norm": 0.7517653703689575,
"learning_rate": 2.4020802498228334e-07,
"loss": 0.7382,
"step": 970
},
{
"epoch": 3.4764444444444447,
"grad_norm": 1.0488708019256592,
"learning_rate": 2.3022013224585519e-07,
"loss": 0.7805,
"step": 980
},
{
"epoch": 3.512,
"grad_norm": 0.8792369365692139,
"learning_rate": 2.203818553412757e-07,
"loss": 0.7754,
"step": 990
},
{
"epoch": 3.5475555555555554,
"grad_norm": 0.6874270439147949,
"learning_rate": 2.10698650428025e-07,
"loss": 0.7465,
"step": 1000
},
{
"epoch": 3.583111111111111,
"grad_norm": 0.7939172983169556,
"learning_rate": 2.011758876650037e-07,
"loss": 0.7451,
"step": 1010
},
{
"epoch": 3.618666666666667,
"grad_norm": 0.7084336876869202,
"learning_rate": 1.9181884823232413e-07,
"loss": 0.7559,
"step": 1020
},
{
"epoch": 3.6542222222222223,
"grad_norm": 0.6327200531959534,
"learning_rate": 1.82632721402448e-07,
"loss": 0.7191,
"step": 1030
},
{
"epoch": 3.6897777777777776,
"grad_norm": 0.5157420635223389,
"learning_rate": 1.7362260166229308e-07,
"loss": 0.7336,
"step": 1040
},
{
"epoch": 3.7253333333333334,
"grad_norm": 0.5553033947944641,
"learning_rate": 1.6479348588791e-07,
"loss": 0.7527,
"step": 1050
},
{
"epoch": 3.7608888888888887,
"grad_norm": 0.7045750617980957,
"learning_rate": 1.561502705732883e-07,
"loss": 0.7352,
"step": 1060
},
{
"epoch": 3.7964444444444445,
"grad_norm": 0.70656418800354,
"learning_rate": 1.4769774911483686e-07,
"loss": 0.7666,
"step": 1070
},
{
"epoch": 3.832,
"grad_norm": 0.8279157876968384,
"learning_rate": 1.394406091530367e-07,
"loss": 0.7362,
"step": 1080
},
{
"epoch": 3.8675555555555556,
"grad_norm": 0.7268490195274353,
"learning_rate": 1.313834299727488e-07,
"loss": 0.7346,
"step": 1090
},
{
"epoch": 3.903111111111111,
"grad_norm": 0.5250927209854126,
"learning_rate": 1.2353067996361033e-07,
"loss": 0.7359,
"step": 1100
}
],
"logging_steps": 10,
"max_steps": 1405,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 6.602096798242701e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}