text2sql-sft-v9 / trainer_state.json
genies-llm's picture
Model save
b11e9a8 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 171,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.017543859649122806,
"grad_norm": 3.77577782421705,
"learning_rate": 0.0,
"loss": 1.1733,
"num_tokens": 427344.0,
"step": 1
},
{
"epoch": 0.03508771929824561,
"grad_norm": 3.7130563556909215,
"learning_rate": 1.6666666666666667e-06,
"loss": 1.164,
"num_tokens": 866449.0,
"step": 2
},
{
"epoch": 0.05263157894736842,
"grad_norm": 3.970572390407664,
"learning_rate": 3.3333333333333333e-06,
"loss": 1.2254,
"num_tokens": 1268563.0,
"step": 3
},
{
"epoch": 0.07017543859649122,
"grad_norm": 3.512030300056592,
"learning_rate": 5e-06,
"loss": 1.2077,
"num_tokens": 1669265.0,
"step": 4
},
{
"epoch": 0.08771929824561403,
"grad_norm": 2.37795690695823,
"learning_rate": 6.666666666666667e-06,
"loss": 1.0413,
"num_tokens": 2093363.0,
"step": 5
},
{
"epoch": 0.10526315789473684,
"grad_norm": 1.525864968420599,
"learning_rate": 8.333333333333334e-06,
"loss": 0.874,
"num_tokens": 2505147.0,
"step": 6
},
{
"epoch": 0.12280701754385964,
"grad_norm": 1.4347947716935,
"learning_rate": 1e-05,
"loss": 0.8159,
"num_tokens": 2931742.0,
"step": 7
},
{
"epoch": 0.14035087719298245,
"grad_norm": 2.6084462153475063,
"learning_rate": 9.999184354855868e-06,
"loss": 0.6802,
"num_tokens": 3316057.0,
"step": 8
},
{
"epoch": 0.15789473684210525,
"grad_norm": 1.5740859932252258,
"learning_rate": 9.996737715102133e-06,
"loss": 0.6019,
"num_tokens": 3761399.0,
"step": 9
},
{
"epoch": 0.17543859649122806,
"grad_norm": 1.5450933348615719,
"learning_rate": 9.99266096766761e-06,
"loss": 0.5439,
"num_tokens": 4179394.0,
"step": 10
},
{
"epoch": 0.19298245614035087,
"grad_norm": 0.9018401685238229,
"learning_rate": 9.98695559040975e-06,
"loss": 0.4438,
"num_tokens": 4600345.0,
"step": 11
},
{
"epoch": 0.21052631578947367,
"grad_norm": 0.5196800459649759,
"learning_rate": 9.979623651578881e-06,
"loss": 0.395,
"num_tokens": 4980610.0,
"step": 12
},
{
"epoch": 0.22807017543859648,
"grad_norm": 0.4799071601752034,
"learning_rate": 9.970667809068476e-06,
"loss": 0.3892,
"num_tokens": 5395359.0,
"step": 13
},
{
"epoch": 0.24561403508771928,
"grad_norm": 0.31207959270885294,
"learning_rate": 9.960091309451626e-06,
"loss": 0.3808,
"num_tokens": 5805663.0,
"step": 14
},
{
"epoch": 0.2631578947368421,
"grad_norm": 0.31760448278346387,
"learning_rate": 9.947897986804131e-06,
"loss": 0.3708,
"num_tokens": 6201438.0,
"step": 15
},
{
"epoch": 0.2807017543859649,
"grad_norm": 0.3009817279323404,
"learning_rate": 9.93409226131462e-06,
"loss": 0.3557,
"num_tokens": 6614132.0,
"step": 16
},
{
"epoch": 0.2982456140350877,
"grad_norm": 0.2599822684525422,
"learning_rate": 9.91867913768218e-06,
"loss": 0.3284,
"num_tokens": 7030814.0,
"step": 17
},
{
"epoch": 0.3157894736842105,
"grad_norm": 0.24197734921107386,
"learning_rate": 9.901664203302126e-06,
"loss": 0.3272,
"num_tokens": 7453748.0,
"step": 18
},
{
"epoch": 0.3333333333333333,
"grad_norm": 0.2741912470593866,
"learning_rate": 9.883053626240503e-06,
"loss": 0.3181,
"num_tokens": 7874681.0,
"step": 19
},
{
"epoch": 0.3508771929824561,
"grad_norm": 0.21560866373997145,
"learning_rate": 9.862854152998112e-06,
"loss": 0.3102,
"num_tokens": 8349009.0,
"step": 20
},
{
"epoch": 0.3684210526315789,
"grad_norm": 0.203790119667157,
"learning_rate": 9.841073106064852e-06,
"loss": 0.3038,
"num_tokens": 8779295.0,
"step": 21
},
{
"epoch": 0.38596491228070173,
"grad_norm": 0.21543607735771148,
"learning_rate": 9.81771838126524e-06,
"loss": 0.3039,
"num_tokens": 9155380.0,
"step": 22
},
{
"epoch": 0.40350877192982454,
"grad_norm": 0.19448461940928388,
"learning_rate": 9.792798444896107e-06,
"loss": 0.2923,
"num_tokens": 9572881.0,
"step": 23
},
{
"epoch": 0.42105263157894735,
"grad_norm": 0.20104199677846465,
"learning_rate": 9.766322330657499e-06,
"loss": 0.3004,
"num_tokens": 9972480.0,
"step": 24
},
{
"epoch": 0.43859649122807015,
"grad_norm": 0.19715422132031898,
"learning_rate": 9.738299636377863e-06,
"loss": 0.291,
"num_tokens": 10406948.0,
"step": 25
},
{
"epoch": 0.45614035087719296,
"grad_norm": 0.19192801250536232,
"learning_rate": 9.70874052053476e-06,
"loss": 0.289,
"num_tokens": 10821942.0,
"step": 26
},
{
"epoch": 0.47368421052631576,
"grad_norm": 0.18616122639207905,
"learning_rate": 9.677655698572326e-06,
"loss": 0.2622,
"num_tokens": 11234170.0,
"step": 27
},
{
"epoch": 0.49122807017543857,
"grad_norm": 0.192192334546507,
"learning_rate": 9.645056439016827e-06,
"loss": 0.275,
"num_tokens": 11627308.0,
"step": 28
},
{
"epoch": 0.5087719298245614,
"grad_norm": 0.18829460979027127,
"learning_rate": 9.610954559391704e-06,
"loss": 0.2754,
"num_tokens": 12022656.0,
"step": 29
},
{
"epoch": 0.5263157894736842,
"grad_norm": 0.17300283701775698,
"learning_rate": 9.57536242193364e-06,
"loss": 0.2692,
"num_tokens": 12444405.0,
"step": 30
},
{
"epoch": 0.543859649122807,
"grad_norm": 0.17695771762278176,
"learning_rate": 9.538292929111114e-06,
"loss": 0.2734,
"num_tokens": 12837998.0,
"step": 31
},
{
"epoch": 0.5614035087719298,
"grad_norm": 0.16869993658358656,
"learning_rate": 9.499759518947156e-06,
"loss": 0.2657,
"num_tokens": 13261798.0,
"step": 32
},
{
"epoch": 0.5789473684210527,
"grad_norm": 0.15857386041778282,
"learning_rate": 9.459776160147941e-06,
"loss": 0.2559,
"num_tokens": 13717762.0,
"step": 33
},
{
"epoch": 0.5964912280701754,
"grad_norm": 0.16181853610382824,
"learning_rate": 9.418357347038999e-06,
"loss": 0.2427,
"num_tokens": 14142042.0,
"step": 34
},
{
"epoch": 0.6140350877192983,
"grad_norm": 0.16729961186553155,
"learning_rate": 9.375518094310904e-06,
"loss": 0.2546,
"num_tokens": 14543269.0,
"step": 35
},
{
"epoch": 0.631578947368421,
"grad_norm": 0.16125107833920507,
"learning_rate": 9.331273931576306e-06,
"loss": 0.2455,
"num_tokens": 14941779.0,
"step": 36
},
{
"epoch": 0.6491228070175439,
"grad_norm": 0.1651336881757046,
"learning_rate": 9.285640897740316e-06,
"loss": 0.2479,
"num_tokens": 15378768.0,
"step": 37
},
{
"epoch": 0.6666666666666666,
"grad_norm": 0.16309612539485216,
"learning_rate": 9.238635535186247e-06,
"loss": 0.2524,
"num_tokens": 15784254.0,
"step": 38
},
{
"epoch": 0.6842105263157895,
"grad_norm": 0.1613815194130245,
"learning_rate": 9.19027488377886e-06,
"loss": 0.252,
"num_tokens": 16187575.0,
"step": 39
},
{
"epoch": 0.7017543859649122,
"grad_norm": 0.15494556090577644,
"learning_rate": 9.140576474687263e-06,
"loss": 0.2397,
"num_tokens": 16592627.0,
"step": 40
},
{
"epoch": 0.7192982456140351,
"grad_norm": 0.15730027265317648,
"learning_rate": 9.0895583240297e-06,
"loss": 0.2396,
"num_tokens": 17019496.0,
"step": 41
},
{
"epoch": 0.7368421052631579,
"grad_norm": 0.15536856311696803,
"learning_rate": 9.037238926342544e-06,
"loss": 0.2388,
"num_tokens": 17448731.0,
"step": 42
},
{
"epoch": 0.7543859649122807,
"grad_norm": 0.16373703052589486,
"learning_rate": 8.983637247875872e-06,
"loss": 0.2447,
"num_tokens": 17852418.0,
"step": 43
},
{
"epoch": 0.7719298245614035,
"grad_norm": 0.1564039931823383,
"learning_rate": 8.92877271971802e-06,
"loss": 0.2317,
"num_tokens": 18284414.0,
"step": 44
},
{
"epoch": 0.7894736842105263,
"grad_norm": 0.15673969833275106,
"learning_rate": 8.872665230751644e-06,
"loss": 0.2445,
"num_tokens": 18700575.0,
"step": 45
},
{
"epoch": 0.8070175438596491,
"grad_norm": 0.16227301290569535,
"learning_rate": 8.815335120443822e-06,
"loss": 0.2369,
"num_tokens": 19112507.0,
"step": 46
},
{
"epoch": 0.8245614035087719,
"grad_norm": 0.16192875727409162,
"learning_rate": 8.756803171472817e-06,
"loss": 0.2488,
"num_tokens": 19497572.0,
"step": 47
},
{
"epoch": 0.8421052631578947,
"grad_norm": 0.14878902875215574,
"learning_rate": 8.69709060219416e-06,
"loss": 0.221,
"num_tokens": 19887057.0,
"step": 48
},
{
"epoch": 0.8596491228070176,
"grad_norm": 0.168304643511618,
"learning_rate": 8.636219058948823e-06,
"loss": 0.2338,
"num_tokens": 20294327.0,
"step": 49
},
{
"epoch": 0.8771929824561403,
"grad_norm": 0.14440251287899045,
"learning_rate": 8.574210608216206e-06,
"loss": 0.2165,
"num_tokens": 20731445.0,
"step": 50
},
{
"epoch": 0.8947368421052632,
"grad_norm": 0.14573622062248015,
"learning_rate": 8.511087728614863e-06,
"loss": 0.2291,
"num_tokens": 21165129.0,
"step": 51
},
{
"epoch": 0.9122807017543859,
"grad_norm": 0.15767251624934578,
"learning_rate": 8.446873302753783e-06,
"loss": 0.2231,
"num_tokens": 21564437.0,
"step": 52
},
{
"epoch": 0.9298245614035088,
"grad_norm": 0.14330856767528197,
"learning_rate": 8.381590608937251e-06,
"loss": 0.2274,
"num_tokens": 22012280.0,
"step": 53
},
{
"epoch": 0.9473684210526315,
"grad_norm": 0.1524166655671973,
"learning_rate": 8.315263312726248e-06,
"loss": 0.2131,
"num_tokens": 22396001.0,
"step": 54
},
{
"epoch": 0.9649122807017544,
"grad_norm": 0.15336585183985868,
"learning_rate": 8.247915458359473e-06,
"loss": 0.2195,
"num_tokens": 22793769.0,
"step": 55
},
{
"epoch": 0.9824561403508771,
"grad_norm": 0.1588593078419976,
"learning_rate": 8.179571460037096e-06,
"loss": 0.2345,
"num_tokens": 23201717.0,
"step": 56
},
{
"epoch": 1.0,
"grad_norm": 0.14666483129173052,
"learning_rate": 8.110256093070393e-06,
"loss": 0.2346,
"num_tokens": 23647950.0,
"step": 57
},
{
"epoch": 1.0175438596491229,
"grad_norm": 0.15418874889368148,
"learning_rate": 8.039994484900463e-06,
"loss": 0.2268,
"num_tokens": 24100529.0,
"step": 58
},
{
"epoch": 1.0350877192982457,
"grad_norm": 0.14747387727593939,
"learning_rate": 7.968812105989316e-06,
"loss": 0.2155,
"num_tokens": 24540892.0,
"step": 59
},
{
"epoch": 1.0526315789473684,
"grad_norm": 0.154346076339797,
"learning_rate": 7.896734760586599e-06,
"loss": 0.2057,
"num_tokens": 24956824.0,
"step": 60
},
{
"epoch": 1.0701754385964912,
"grad_norm": 0.14685651214148715,
"learning_rate": 7.82378857737533e-06,
"loss": 0.2036,
"num_tokens": 25384518.0,
"step": 61
},
{
"epoch": 1.087719298245614,
"grad_norm": 0.16326674348993506,
"learning_rate": 7.75e-06,
"loss": 0.2001,
"num_tokens": 25771807.0,
"step": 62
},
{
"epoch": 1.1052631578947367,
"grad_norm": 0.147771119836904,
"learning_rate": 7.675395777480538e-06,
"loss": 0.1996,
"num_tokens": 26177417.0,
"step": 63
},
{
"epoch": 1.1228070175438596,
"grad_norm": 0.14003657083220583,
"learning_rate": 7.600002954515532e-06,
"loss": 0.2072,
"num_tokens": 26622325.0,
"step": 64
},
{
"epoch": 1.1403508771929824,
"grad_norm": 0.15332767124685198,
"learning_rate": 7.523848861678297e-06,
"loss": 0.2065,
"num_tokens": 27045078.0,
"step": 65
},
{
"epoch": 1.1578947368421053,
"grad_norm": 0.15183433287347486,
"learning_rate": 7.446961105509289e-06,
"loss": 0.2032,
"num_tokens": 27438828.0,
"step": 66
},
{
"epoch": 1.1754385964912282,
"grad_norm": 0.14554656331938712,
"learning_rate": 7.36936755850849e-06,
"loss": 0.2054,
"num_tokens": 27854689.0,
"step": 67
},
{
"epoch": 1.1929824561403508,
"grad_norm": 0.1537836129829156,
"learning_rate": 7.2910963490313815e-06,
"loss": 0.1949,
"num_tokens": 28233580.0,
"step": 68
},
{
"epoch": 1.2105263157894737,
"grad_norm": 0.14572761360447276,
"learning_rate": 7.212175851092154e-06,
"loss": 0.1958,
"num_tokens": 28641897.0,
"step": 69
},
{
"epoch": 1.2280701754385965,
"grad_norm": 0.13708384426430809,
"learning_rate": 7.132634674077884e-06,
"loss": 0.2021,
"num_tokens": 29084929.0,
"step": 70
},
{
"epoch": 1.2456140350877192,
"grad_norm": 0.1486755044006831,
"learning_rate": 7.052501652377368e-06,
"loss": 0.2044,
"num_tokens": 29482516.0,
"step": 71
},
{
"epoch": 1.263157894736842,
"grad_norm": 0.14957101767673275,
"learning_rate": 6.971805834928399e-06,
"loss": 0.2048,
"num_tokens": 29899136.0,
"step": 72
},
{
"epoch": 1.280701754385965,
"grad_norm": 0.1486401064457622,
"learning_rate": 6.890576474687264e-06,
"loss": 0.2068,
"num_tokens": 30317666.0,
"step": 73
},
{
"epoch": 1.2982456140350878,
"grad_norm": 0.15958496167902586,
"learning_rate": 6.808843018024296e-06,
"loss": 0.1986,
"num_tokens": 30734034.0,
"step": 74
},
{
"epoch": 1.3157894736842106,
"grad_norm": 0.1383546863269268,
"learning_rate": 6.726635094049291e-06,
"loss": 0.199,
"num_tokens": 31155917.0,
"step": 75
},
{
"epoch": 1.3333333333333333,
"grad_norm": 0.14368049999314014,
"learning_rate": 6.643982503870693e-06,
"loss": 0.2032,
"num_tokens": 31573757.0,
"step": 76
},
{
"epoch": 1.3508771929824561,
"grad_norm": 0.13900291410105262,
"learning_rate": 6.560915209792424e-06,
"loss": 0.2016,
"num_tokens": 32010739.0,
"step": 77
},
{
"epoch": 1.368421052631579,
"grad_norm": 0.13844326865953724,
"learning_rate": 6.477463324452286e-06,
"loss": 0.1925,
"num_tokens": 32424467.0,
"step": 78
},
{
"epoch": 1.3859649122807016,
"grad_norm": 0.1433757292045691,
"learning_rate": 6.393657099905854e-06,
"loss": 0.2008,
"num_tokens": 32834770.0,
"step": 79
},
{
"epoch": 1.4035087719298245,
"grad_norm": 0.14196299072627255,
"learning_rate": 6.309526916659843e-06,
"loss": 0.1924,
"num_tokens": 33255872.0,
"step": 80
},
{
"epoch": 1.4210526315789473,
"grad_norm": 0.13753823275156205,
"learning_rate": 6.225103272658889e-06,
"loss": 0.2034,
"num_tokens": 33706927.0,
"step": 81
},
{
"epoch": 1.4385964912280702,
"grad_norm": 0.1384289808314504,
"learning_rate": 6.140416772229785e-06,
"loss": 0.1917,
"num_tokens": 34112601.0,
"step": 82
},
{
"epoch": 1.456140350877193,
"grad_norm": 0.14627667997521285,
"learning_rate": 6.0554981149871276e-06,
"loss": 0.2063,
"num_tokens": 34517104.0,
"step": 83
},
{
"epoch": 1.4736842105263157,
"grad_norm": 0.1558573666402862,
"learning_rate": 5.970378084704441e-06,
"loss": 0.1994,
"num_tokens": 34897309.0,
"step": 84
},
{
"epoch": 1.4912280701754386,
"grad_norm": 0.1417112898190135,
"learning_rate": 5.88508753815478e-06,
"loss": 0.1881,
"num_tokens": 35307793.0,
"step": 85
},
{
"epoch": 1.5087719298245614,
"grad_norm": 0.1385582692497573,
"learning_rate": 5.799657393924869e-06,
"loss": 0.198,
"num_tokens": 35741435.0,
"step": 86
},
{
"epoch": 1.526315789473684,
"grad_norm": 0.15662493183414183,
"learning_rate": 5.714118621206843e-06,
"loss": 0.1909,
"num_tokens": 36110154.0,
"step": 87
},
{
"epoch": 1.543859649122807,
"grad_norm": 0.14798100623872662,
"learning_rate": 5.6285022285716325e-06,
"loss": 0.2063,
"num_tokens": 36508508.0,
"step": 88
},
{
"epoch": 1.5614035087719298,
"grad_norm": 0.13949297838603725,
"learning_rate": 5.542839252728096e-06,
"loss": 0.2056,
"num_tokens": 36962199.0,
"step": 89
},
{
"epoch": 1.5789473684210527,
"grad_norm": 0.1388395627421214,
"learning_rate": 5.457160747271906e-06,
"loss": 0.1977,
"num_tokens": 37416119.0,
"step": 90
},
{
"epoch": 1.5964912280701755,
"grad_norm": 0.13753404705909872,
"learning_rate": 5.371497771428368e-06,
"loss": 0.1988,
"num_tokens": 37844052.0,
"step": 91
},
{
"epoch": 1.6140350877192984,
"grad_norm": 0.13813649436163167,
"learning_rate": 5.2858813787931605e-06,
"loss": 0.193,
"num_tokens": 38281149.0,
"step": 92
},
{
"epoch": 1.631578947368421,
"grad_norm": 0.13735934501319846,
"learning_rate": 5.2003426060751324e-06,
"loss": 0.1948,
"num_tokens": 38696776.0,
"step": 93
},
{
"epoch": 1.6491228070175439,
"grad_norm": 0.14464652766257102,
"learning_rate": 5.114912461845223e-06,
"loss": 0.1954,
"num_tokens": 39118421.0,
"step": 94
},
{
"epoch": 1.6666666666666665,
"grad_norm": 0.1441541884884212,
"learning_rate": 5.02962191529556e-06,
"loss": 0.1969,
"num_tokens": 39531921.0,
"step": 95
},
{
"epoch": 1.6842105263157894,
"grad_norm": 0.14549147633711634,
"learning_rate": 4.944501885012875e-06,
"loss": 0.1987,
"num_tokens": 39942510.0,
"step": 96
},
{
"epoch": 1.7017543859649122,
"grad_norm": 0.14239386063143547,
"learning_rate": 4.859583227770218e-06,
"loss": 0.1942,
"num_tokens": 40349157.0,
"step": 97
},
{
"epoch": 1.719298245614035,
"grad_norm": 0.14101938277964904,
"learning_rate": 4.774896727341113e-06,
"loss": 0.1896,
"num_tokens": 40755487.0,
"step": 98
},
{
"epoch": 1.736842105263158,
"grad_norm": 0.1513563377956916,
"learning_rate": 4.6904730833401575e-06,
"loss": 0.1741,
"num_tokens": 41109588.0,
"step": 99
},
{
"epoch": 1.7543859649122808,
"grad_norm": 0.14171664584567437,
"learning_rate": 4.606342900094147e-06,
"loss": 0.1978,
"num_tokens": 41549463.0,
"step": 100
},
{
"epoch": 1.7719298245614035,
"grad_norm": 0.1403769471140474,
"learning_rate": 4.5225366755477165e-06,
"loss": 0.2018,
"num_tokens": 41986009.0,
"step": 101
},
{
"epoch": 1.7894736842105263,
"grad_norm": 0.14588775725754724,
"learning_rate": 4.439084790207577e-06,
"loss": 0.1991,
"num_tokens": 42393517.0,
"step": 102
},
{
"epoch": 1.807017543859649,
"grad_norm": 0.14195463960673751,
"learning_rate": 4.35601749612931e-06,
"loss": 0.1954,
"num_tokens": 42788971.0,
"step": 103
},
{
"epoch": 1.8245614035087718,
"grad_norm": 0.14486517700153345,
"learning_rate": 4.273364905950711e-06,
"loss": 0.2001,
"num_tokens": 43200059.0,
"step": 104
},
{
"epoch": 1.8421052631578947,
"grad_norm": 0.14536497153998434,
"learning_rate": 4.191156981975704e-06,
"loss": 0.1881,
"num_tokens": 43591515.0,
"step": 105
},
{
"epoch": 1.8596491228070176,
"grad_norm": 0.1483672348465985,
"learning_rate": 4.109423525312738e-06,
"loss": 0.1936,
"num_tokens": 43989015.0,
"step": 106
},
{
"epoch": 1.8771929824561404,
"grad_norm": 0.14387471159557752,
"learning_rate": 4.028194165071603e-06,
"loss": 0.1959,
"num_tokens": 44390867.0,
"step": 107
},
{
"epoch": 1.8947368421052633,
"grad_norm": 0.14319263387686854,
"learning_rate": 3.9474983476226335e-06,
"loss": 0.2026,
"num_tokens": 44814288.0,
"step": 108
},
{
"epoch": 1.912280701754386,
"grad_norm": 0.13718763298366718,
"learning_rate": 3.867365325922116e-06,
"loss": 0.1919,
"num_tokens": 45232685.0,
"step": 109
},
{
"epoch": 1.9298245614035088,
"grad_norm": 0.13661747990592807,
"learning_rate": 3.7878241489078473e-06,
"loss": 0.192,
"num_tokens": 45633905.0,
"step": 110
},
{
"epoch": 1.9473684210526314,
"grad_norm": 0.13757723840377134,
"learning_rate": 3.7089036509686216e-06,
"loss": 0.196,
"num_tokens": 46052270.0,
"step": 111
},
{
"epoch": 1.9649122807017543,
"grad_norm": 0.14009156799615108,
"learning_rate": 3.630632441491512e-06,
"loss": 0.1945,
"num_tokens": 46479271.0,
"step": 112
},
{
"epoch": 1.9824561403508771,
"grad_norm": 0.1392559652525668,
"learning_rate": 3.5530388944907124e-06,
"loss": 0.1985,
"num_tokens": 46884227.0,
"step": 113
},
{
"epoch": 2.0,
"grad_norm": 0.13976428969132587,
"learning_rate": 3.476151138321705e-06,
"loss": 0.1995,
"num_tokens": 47297644.0,
"step": 114
},
{
"epoch": 2.017543859649123,
"grad_norm": 0.1378198428541279,
"learning_rate": 3.3999970454844688e-06,
"loss": 0.1724,
"num_tokens": 47688068.0,
"step": 115
},
{
"epoch": 2.0350877192982457,
"grad_norm": 0.134440422974191,
"learning_rate": 3.3246042225194626e-06,
"loss": 0.1796,
"num_tokens": 48092477.0,
"step": 116
},
{
"epoch": 2.0526315789473686,
"grad_norm": 0.13660484419562605,
"learning_rate": 3.2500000000000015e-06,
"loss": 0.1763,
"num_tokens": 48476841.0,
"step": 117
},
{
"epoch": 2.0701754385964914,
"grad_norm": 0.14109474340650238,
"learning_rate": 3.176211422624672e-06,
"loss": 0.1778,
"num_tokens": 48854905.0,
"step": 118
},
{
"epoch": 2.087719298245614,
"grad_norm": 0.13774654351946805,
"learning_rate": 3.103265239413401e-06,
"loss": 0.1793,
"num_tokens": 49295065.0,
"step": 119
},
{
"epoch": 2.1052631578947367,
"grad_norm": 0.14705463035874308,
"learning_rate": 3.0311878940106864e-06,
"loss": 0.1885,
"num_tokens": 49711843.0,
"step": 120
},
{
"epoch": 2.1228070175438596,
"grad_norm": 0.13965440849358451,
"learning_rate": 2.9600055150995397e-06,
"loss": 0.1804,
"num_tokens": 50121373.0,
"step": 121
},
{
"epoch": 2.1403508771929824,
"grad_norm": 0.1431354792667028,
"learning_rate": 2.889743906929609e-06,
"loss": 0.1761,
"num_tokens": 50524660.0,
"step": 122
},
{
"epoch": 2.1578947368421053,
"grad_norm": 0.13549946577694855,
"learning_rate": 2.820428539962905e-06,
"loss": 0.1756,
"num_tokens": 50952097.0,
"step": 123
},
{
"epoch": 2.175438596491228,
"grad_norm": 0.13874042982824947,
"learning_rate": 2.7520845416405285e-06,
"loss": 0.1787,
"num_tokens": 51357662.0,
"step": 124
},
{
"epoch": 2.192982456140351,
"grad_norm": 0.13352052067268536,
"learning_rate": 2.6847366872737535e-06,
"loss": 0.1786,
"num_tokens": 51772391.0,
"step": 125
},
{
"epoch": 2.2105263157894735,
"grad_norm": 0.13750830287403998,
"learning_rate": 2.618409391062751e-06,
"loss": 0.1827,
"num_tokens": 52198396.0,
"step": 126
},
{
"epoch": 2.2280701754385963,
"grad_norm": 0.14077287411728898,
"learning_rate": 2.5531266972462176e-06,
"loss": 0.1786,
"num_tokens": 52585564.0,
"step": 127
},
{
"epoch": 2.245614035087719,
"grad_norm": 0.13893984896019573,
"learning_rate": 2.4889122713851397e-06,
"loss": 0.1788,
"num_tokens": 52997398.0,
"step": 128
},
{
"epoch": 2.263157894736842,
"grad_norm": 0.13788162656378736,
"learning_rate": 2.425789391783796e-06,
"loss": 0.1878,
"num_tokens": 53407933.0,
"step": 129
},
{
"epoch": 2.280701754385965,
"grad_norm": 0.13629331805149528,
"learning_rate": 2.36378094105118e-06,
"loss": 0.1836,
"num_tokens": 53817667.0,
"step": 130
},
{
"epoch": 2.2982456140350878,
"grad_norm": 0.145846640939152,
"learning_rate": 2.302909397805841e-06,
"loss": 0.1761,
"num_tokens": 54208139.0,
"step": 131
},
{
"epoch": 2.3157894736842106,
"grad_norm": 0.1415158735561498,
"learning_rate": 2.2431968285271843e-06,
"loss": 0.1861,
"num_tokens": 54616138.0,
"step": 132
},
{
"epoch": 2.3333333333333335,
"grad_norm": 0.1399694993181749,
"learning_rate": 2.1846648795561777e-06,
"loss": 0.18,
"num_tokens": 55028264.0,
"step": 133
},
{
"epoch": 2.3508771929824563,
"grad_norm": 0.1340221566625987,
"learning_rate": 2.1273347692483574e-06,
"loss": 0.1818,
"num_tokens": 55474995.0,
"step": 134
},
{
"epoch": 2.3684210526315788,
"grad_norm": 0.13728502667314055,
"learning_rate": 2.071227280281982e-06,
"loss": 0.1697,
"num_tokens": 55872252.0,
"step": 135
},
{
"epoch": 2.3859649122807016,
"grad_norm": 0.13569940106251407,
"learning_rate": 2.016362752124129e-06,
"loss": 0.1799,
"num_tokens": 56295990.0,
"step": 136
},
{
"epoch": 2.4035087719298245,
"grad_norm": 0.1433225385861297,
"learning_rate": 1.9627610736574575e-06,
"loss": 0.1744,
"num_tokens": 56700633.0,
"step": 137
},
{
"epoch": 2.4210526315789473,
"grad_norm": 0.13712140562366157,
"learning_rate": 1.9104416759703017e-06,
"loss": 0.1772,
"num_tokens": 57123351.0,
"step": 138
},
{
"epoch": 2.43859649122807,
"grad_norm": 0.14064914274676912,
"learning_rate": 1.8594235253127373e-06,
"loss": 0.1794,
"num_tokens": 57541451.0,
"step": 139
},
{
"epoch": 2.456140350877193,
"grad_norm": 0.15170132064659694,
"learning_rate": 1.8097251162211405e-06,
"loss": 0.1831,
"num_tokens": 57962223.0,
"step": 140
},
{
"epoch": 2.473684210526316,
"grad_norm": 0.13964776563103484,
"learning_rate": 1.7613644648137543e-06,
"loss": 0.1756,
"num_tokens": 58375881.0,
"step": 141
},
{
"epoch": 2.4912280701754383,
"grad_norm": 0.13507579048092097,
"learning_rate": 1.7143591022596846e-06,
"loss": 0.1821,
"num_tokens": 58796929.0,
"step": 142
},
{
"epoch": 2.5087719298245617,
"grad_norm": 0.13875107577532086,
"learning_rate": 1.6687260684236943e-06,
"loss": 0.1773,
"num_tokens": 59207995.0,
"step": 143
},
{
"epoch": 2.526315789473684,
"grad_norm": 0.14061593378122658,
"learning_rate": 1.6244819056890975e-06,
"loss": 0.1716,
"num_tokens": 59582578.0,
"step": 144
},
{
"epoch": 2.543859649122807,
"grad_norm": 0.12901477335373565,
"learning_rate": 1.5816426529610035e-06,
"loss": 0.1764,
"num_tokens": 60014351.0,
"step": 145
},
{
"epoch": 2.56140350877193,
"grad_norm": 0.13513262564013573,
"learning_rate": 1.5402238398520614e-06,
"loss": 0.1742,
"num_tokens": 60428513.0,
"step": 146
},
{
"epoch": 2.5789473684210527,
"grad_norm": 0.12744611421871882,
"learning_rate": 1.5002404810528452e-06,
"loss": 0.1798,
"num_tokens": 60870775.0,
"step": 147
},
{
"epoch": 2.5964912280701755,
"grad_norm": 0.1281932184087842,
"learning_rate": 1.4617070708888882e-06,
"loss": 0.1788,
"num_tokens": 61333167.0,
"step": 148
},
{
"epoch": 2.6140350877192984,
"grad_norm": 0.13398144271039164,
"learning_rate": 1.4246375780663613e-06,
"loss": 0.1792,
"num_tokens": 61737623.0,
"step": 149
},
{
"epoch": 2.6315789473684212,
"grad_norm": 0.13540743049220252,
"learning_rate": 1.389045440608296e-06,
"loss": 0.1755,
"num_tokens": 62143293.0,
"step": 150
},
{
"epoch": 2.6491228070175437,
"grad_norm": 0.13564465493581726,
"learning_rate": 1.354943560983175e-06,
"loss": 0.1735,
"num_tokens": 62558499.0,
"step": 151
},
{
"epoch": 2.6666666666666665,
"grad_norm": 0.12805186009140426,
"learning_rate": 1.3223443014276738e-06,
"loss": 0.1736,
"num_tokens": 63004628.0,
"step": 152
},
{
"epoch": 2.6842105263157894,
"grad_norm": 0.1328569132316143,
"learning_rate": 1.2912594794652406e-06,
"loss": 0.1642,
"num_tokens": 63387346.0,
"step": 153
},
{
"epoch": 2.7017543859649122,
"grad_norm": 0.1325321320124978,
"learning_rate": 1.2617003636221394e-06,
"loss": 0.169,
"num_tokens": 63804970.0,
"step": 154
},
{
"epoch": 2.719298245614035,
"grad_norm": 0.13540714382771668,
"learning_rate": 1.2336776693425028e-06,
"loss": 0.1744,
"num_tokens": 64196162.0,
"step": 155
},
{
"epoch": 2.736842105263158,
"grad_norm": 0.14020781213013872,
"learning_rate": 1.2072015551038933e-06,
"loss": 0.1811,
"num_tokens": 64585657.0,
"step": 156
},
{
"epoch": 2.754385964912281,
"grad_norm": 0.14012421310202808,
"learning_rate": 1.1822816187347625e-06,
"loss": 0.1882,
"num_tokens": 64990929.0,
"step": 157
},
{
"epoch": 2.7719298245614032,
"grad_norm": 0.13359559789919473,
"learning_rate": 1.1589268939351499e-06,
"loss": 0.1644,
"num_tokens": 65419394.0,
"step": 158
},
{
"epoch": 2.7894736842105265,
"grad_norm": 0.1293973137684263,
"learning_rate": 1.1371458470018896e-06,
"loss": 0.1686,
"num_tokens": 65848256.0,
"step": 159
},
{
"epoch": 2.807017543859649,
"grad_norm": 0.12796590503255867,
"learning_rate": 1.1169463737594995e-06,
"loss": 0.173,
"num_tokens": 66276026.0,
"step": 160
},
{
"epoch": 2.824561403508772,
"grad_norm": 0.1386629969970847,
"learning_rate": 1.0983357966978747e-06,
"loss": 0.1698,
"num_tokens": 66662640.0,
"step": 161
},
{
"epoch": 2.8421052631578947,
"grad_norm": 0.1312256058443758,
"learning_rate": 1.0813208623178199e-06,
"loss": 0.1831,
"num_tokens": 67101128.0,
"step": 162
},
{
"epoch": 2.8596491228070176,
"grad_norm": 0.13566699518356568,
"learning_rate": 1.0659077386853817e-06,
"loss": 0.1918,
"num_tokens": 67527335.0,
"step": 163
},
{
"epoch": 2.8771929824561404,
"grad_norm": 0.13207372833151468,
"learning_rate": 1.0521020131958692e-06,
"loss": 0.18,
"num_tokens": 67953220.0,
"step": 164
},
{
"epoch": 2.8947368421052633,
"grad_norm": 0.1351068484295404,
"learning_rate": 1.0399086905483752e-06,
"loss": 0.1796,
"num_tokens": 68401961.0,
"step": 165
},
{
"epoch": 2.912280701754386,
"grad_norm": 0.13512574709033598,
"learning_rate": 1.0293321909315242e-06,
"loss": 0.1742,
"num_tokens": 68815465.0,
"step": 166
},
{
"epoch": 2.9298245614035086,
"grad_norm": 0.13483699443340522,
"learning_rate": 1.0203763484211196e-06,
"loss": 0.1778,
"num_tokens": 69255767.0,
"step": 167
},
{
"epoch": 2.9473684210526314,
"grad_norm": 0.13142810366700336,
"learning_rate": 1.0130444095902514e-06,
"loss": 0.1842,
"num_tokens": 69678619.0,
"step": 168
},
{
"epoch": 2.9649122807017543,
"grad_norm": 0.13693412077884642,
"learning_rate": 1.0073390323323897e-06,
"loss": 0.177,
"num_tokens": 70098414.0,
"step": 169
},
{
"epoch": 2.982456140350877,
"grad_norm": 0.1320466096940847,
"learning_rate": 1.0032622848978689e-06,
"loss": 0.168,
"num_tokens": 70513950.0,
"step": 170
},
{
"epoch": 3.0,
"grad_norm": 0.1365255306036787,
"learning_rate": 1.000815645144134e-06,
"loss": 0.1794,
"num_tokens": 70937090.0,
"step": 171
},
{
"epoch": 3.0,
"step": 171,
"total_flos": 2.276376686268252e+17,
"train_loss": 0.25581319124726526,
"train_runtime": 2707.0199,
"train_samples_per_second": 8.073,
"train_steps_per_second": 0.063
}
],
"logging_steps": 1,
"max_steps": 171,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.276376686268252e+17,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}