klora_2000_skill / 16 /trainer_state.json
RayDu0010's picture
Upload folder using huggingface_hub
92cae9d verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 500,
"global_step": 834,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.012004801920768308,
"grad_norm": 1.126847743988037,
"learning_rate": 1.142857142857143e-06,
"loss": 1.2938,
"step": 5
},
{
"epoch": 0.024009603841536616,
"grad_norm": 0.9763688445091248,
"learning_rate": 2.5714285714285716e-06,
"loss": 1.2541,
"step": 10
},
{
"epoch": 0.03601440576230492,
"grad_norm": 0.7441720366477966,
"learning_rate": 4e-06,
"loss": 1.2122,
"step": 15
},
{
"epoch": 0.04801920768307323,
"grad_norm": 0.6003404259681702,
"learning_rate": 5.428571428571429e-06,
"loss": 1.2006,
"step": 20
},
{
"epoch": 0.060024009603841535,
"grad_norm": 0.5410875678062439,
"learning_rate": 6.857142857142857e-06,
"loss": 1.2223,
"step": 25
},
{
"epoch": 0.07202881152460984,
"grad_norm": 0.5632787346839905,
"learning_rate": 8.285714285714287e-06,
"loss": 1.155,
"step": 30
},
{
"epoch": 0.08403361344537816,
"grad_norm": 0.5073860287666321,
"learning_rate": 9.714285714285715e-06,
"loss": 1.1372,
"step": 35
},
{
"epoch": 0.09603841536614646,
"grad_norm": 0.4458916187286377,
"learning_rate": 1.1142857142857143e-05,
"loss": 1.1612,
"step": 40
},
{
"epoch": 0.10804321728691477,
"grad_norm": 0.4466385841369629,
"learning_rate": 1.2571428571428572e-05,
"loss": 1.15,
"step": 45
},
{
"epoch": 0.12004801920768307,
"grad_norm": 0.5031300187110901,
"learning_rate": 1.4e-05,
"loss": 1.1447,
"step": 50
},
{
"epoch": 0.13205282112845138,
"grad_norm": 0.44504547119140625,
"learning_rate": 1.5428571428571428e-05,
"loss": 1.1461,
"step": 55
},
{
"epoch": 0.14405762304921968,
"grad_norm": 0.5289918184280396,
"learning_rate": 1.6857142857142858e-05,
"loss": 1.1246,
"step": 60
},
{
"epoch": 0.15606242496998798,
"grad_norm": 0.4286825954914093,
"learning_rate": 1.8285714285714288e-05,
"loss": 1.08,
"step": 65
},
{
"epoch": 0.16806722689075632,
"grad_norm": 0.4959764778614044,
"learning_rate": 1.9714285714285714e-05,
"loss": 1.0991,
"step": 70
},
{
"epoch": 0.18007202881152462,
"grad_norm": 0.4261907935142517,
"learning_rate": 2.1142857142857144e-05,
"loss": 1.108,
"step": 75
},
{
"epoch": 0.19207683073229292,
"grad_norm": 0.39412474632263184,
"learning_rate": 2.257142857142857e-05,
"loss": 1.0524,
"step": 80
},
{
"epoch": 0.20408163265306123,
"grad_norm": 0.457573264837265,
"learning_rate": 2.4e-05,
"loss": 1.09,
"step": 85
},
{
"epoch": 0.21608643457382953,
"grad_norm": 0.4494602084159851,
"learning_rate": 2.5428571428571427e-05,
"loss": 1.0803,
"step": 90
},
{
"epoch": 0.22809123649459784,
"grad_norm": 0.5574950575828552,
"learning_rate": 2.685714285714286e-05,
"loss": 1.0802,
"step": 95
},
{
"epoch": 0.24009603841536614,
"grad_norm": 0.581778883934021,
"learning_rate": 2.8285714285714287e-05,
"loss": 1.0872,
"step": 100
},
{
"epoch": 0.25210084033613445,
"grad_norm": 0.5143195986747742,
"learning_rate": 2.9714285714285717e-05,
"loss": 1.0241,
"step": 105
},
{
"epoch": 0.26410564225690275,
"grad_norm": 0.6538636684417725,
"learning_rate": 2.9999697901093597e-05,
"loss": 1.0171,
"step": 110
},
{
"epoch": 0.27611044417767105,
"grad_norm": 0.46839553117752075,
"learning_rate": 2.99984706451415e-05,
"loss": 1.0474,
"step": 115
},
{
"epoch": 0.28811524609843936,
"grad_norm": 0.45771801471710205,
"learning_rate": 2.999629942814264e-05,
"loss": 1.0063,
"step": 120
},
{
"epoch": 0.30012004801920766,
"grad_norm": 0.5769961476325989,
"learning_rate": 2.9993184386747226e-05,
"loss": 1.0608,
"step": 125
},
{
"epoch": 0.31212484993997597,
"grad_norm": 0.5172315835952759,
"learning_rate": 2.9989125717007107e-05,
"loss": 0.9765,
"step": 130
},
{
"epoch": 0.3241296518607443,
"grad_norm": 0.6754844784736633,
"learning_rate": 2.9984123674363393e-05,
"loss": 1.0156,
"step": 135
},
{
"epoch": 0.33613445378151263,
"grad_norm": 0.8809567093849182,
"learning_rate": 2.9978178573630414e-05,
"loss": 0.9547,
"step": 140
},
{
"epoch": 0.34813925570228094,
"grad_norm": 0.614689826965332,
"learning_rate": 2.9971290788975884e-05,
"loss": 0.9682,
"step": 145
},
{
"epoch": 0.36014405762304924,
"grad_norm": 0.6198326349258423,
"learning_rate": 2.9963460753897364e-05,
"loss": 0.9623,
"step": 150
},
{
"epoch": 0.37214885954381755,
"grad_norm": 0.5520050525665283,
"learning_rate": 2.9954688961194974e-05,
"loss": 0.9664,
"step": 155
},
{
"epoch": 0.38415366146458585,
"grad_norm": 0.5535730719566345,
"learning_rate": 2.994497596294037e-05,
"loss": 0.9632,
"step": 160
},
{
"epoch": 0.39615846338535415,
"grad_norm": 0.7371352314949036,
"learning_rate": 2.9934322370442022e-05,
"loss": 0.9166,
"step": 165
},
{
"epoch": 0.40816326530612246,
"grad_norm": 0.5894187688827515,
"learning_rate": 2.9922728854206704e-05,
"loss": 0.9436,
"step": 170
},
{
"epoch": 0.42016806722689076,
"grad_norm": 0.6794357299804688,
"learning_rate": 2.9910196143897334e-05,
"loss": 0.9549,
"step": 175
},
{
"epoch": 0.43217286914765907,
"grad_norm": 0.7796855568885803,
"learning_rate": 2.9896725028287017e-05,
"loss": 0.9387,
"step": 180
},
{
"epoch": 0.44417767106842737,
"grad_norm": 0.6924332976341248,
"learning_rate": 2.9882316355209423e-05,
"loss": 0.9127,
"step": 185
},
{
"epoch": 0.4561824729891957,
"grad_norm": 0.78730708360672,
"learning_rate": 2.986697103150542e-05,
"loss": 0.9377,
"step": 190
},
{
"epoch": 0.468187274909964,
"grad_norm": 0.6436963677406311,
"learning_rate": 2.9850690022965995e-05,
"loss": 0.9075,
"step": 195
},
{
"epoch": 0.4801920768307323,
"grad_norm": 0.6622775793075562,
"learning_rate": 2.9833474354271487e-05,
"loss": 0.8795,
"step": 200
},
{
"epoch": 0.4921968787515006,
"grad_norm": 0.6374281644821167,
"learning_rate": 2.981532510892707e-05,
"loss": 0.8786,
"step": 205
},
{
"epoch": 0.5042016806722689,
"grad_norm": 0.6543936133384705,
"learning_rate": 2.9796243429194578e-05,
"loss": 0.8963,
"step": 210
},
{
"epoch": 0.5162064825930373,
"grad_norm": 0.8162550330162048,
"learning_rate": 2.977623051602062e-05,
"loss": 0.8398,
"step": 215
},
{
"epoch": 0.5282112845138055,
"grad_norm": 0.6448330283164978,
"learning_rate": 2.9755287628960982e-05,
"loss": 0.8528,
"step": 220
},
{
"epoch": 0.5402160864345739,
"grad_norm": 4.620177268981934,
"learning_rate": 2.9733416086101356e-05,
"loss": 0.8638,
"step": 225
},
{
"epoch": 0.5522208883553421,
"grad_norm": 0.7428641319274902,
"learning_rate": 2.9710617263974385e-05,
"loss": 0.8236,
"step": 230
},
{
"epoch": 0.5642256902761105,
"grad_norm": 0.7523469924926758,
"learning_rate": 2.968689259747304e-05,
"loss": 0.8339,
"step": 235
},
{
"epoch": 0.5762304921968787,
"grad_norm": 0.7694314122200012,
"learning_rate": 2.966224357976029e-05,
"loss": 0.8341,
"step": 240
},
{
"epoch": 0.5882352941176471,
"grad_norm": 8.718358993530273,
"learning_rate": 2.9636671762175145e-05,
"loss": 0.8247,
"step": 245
},
{
"epoch": 0.6002400960384153,
"grad_norm": 0.8020578026771545,
"learning_rate": 2.9610178754135005e-05,
"loss": 0.7834,
"step": 250
},
{
"epoch": 0.6122448979591837,
"grad_norm": 0.7112622857093811,
"learning_rate": 2.958276622303438e-05,
"loss": 0.8256,
"step": 255
},
{
"epoch": 0.6242496998799519,
"grad_norm": 0.7433765530586243,
"learning_rate": 2.9554435894139945e-05,
"loss": 0.7668,
"step": 260
},
{
"epoch": 0.6362545018007203,
"grad_norm": 0.7709916234016418,
"learning_rate": 2.9525189550481955e-05,
"loss": 0.8346,
"step": 265
},
{
"epoch": 0.6482593037214885,
"grad_norm": 1.9189757108688354,
"learning_rate": 2.9495029032742025e-05,
"loss": 0.7604,
"step": 270
},
{
"epoch": 0.6602641056422569,
"grad_norm": 0.8425916433334351,
"learning_rate": 2.9463956239137287e-05,
"loss": 0.8273,
"step": 275
},
{
"epoch": 0.6722689075630253,
"grad_norm": 0.8174909353256226,
"learning_rate": 2.943197312530091e-05,
"loss": 0.7908,
"step": 280
},
{
"epoch": 0.6842737094837935,
"grad_norm": 0.6657884120941162,
"learning_rate": 2.9399081704159034e-05,
"loss": 0.7703,
"step": 285
},
{
"epoch": 0.6962785114045619,
"grad_norm": 0.8427141904830933,
"learning_rate": 2.936528404580408e-05,
"loss": 0.8011,
"step": 290
},
{
"epoch": 0.7082833133253301,
"grad_norm": 0.8150597214698792,
"learning_rate": 2.9330582277364453e-05,
"loss": 0.7654,
"step": 295
},
{
"epoch": 0.7202881152460985,
"grad_norm": 0.7505553364753723,
"learning_rate": 2.929497858287067e-05,
"loss": 0.7376,
"step": 300
},
{
"epoch": 0.7322929171668667,
"grad_norm": 0.7946950793266296,
"learning_rate": 2.925847520311791e-05,
"loss": 0.7239,
"step": 305
},
{
"epoch": 0.7442977190876351,
"grad_norm": 0.8390210866928101,
"learning_rate": 2.9221074435524995e-05,
"loss": 0.7742,
"step": 310
},
{
"epoch": 0.7563025210084033,
"grad_norm": 0.8262509107589722,
"learning_rate": 2.9182778633989756e-05,
"loss": 0.759,
"step": 315
},
{
"epoch": 0.7683073229291717,
"grad_norm": 0.9196308255195618,
"learning_rate": 2.9143590208740925e-05,
"loss": 0.7897,
"step": 320
},
{
"epoch": 0.78031212484994,
"grad_norm": 0.8700764775276184,
"learning_rate": 2.9103511626186444e-05,
"loss": 0.7395,
"step": 325
},
{
"epoch": 0.7923169267707083,
"grad_norm": 0.8262081742286682,
"learning_rate": 2.9062545408758193e-05,
"loss": 0.7038,
"step": 330
},
{
"epoch": 0.8043217286914766,
"grad_norm": 0.8056402206420898,
"learning_rate": 2.902069413475329e-05,
"loss": 0.7026,
"step": 335
},
{
"epoch": 0.8163265306122449,
"grad_norm": 0.8297358751296997,
"learning_rate": 2.8977960438171787e-05,
"loss": 0.7308,
"step": 340
},
{
"epoch": 0.8283313325330132,
"grad_norm": 0.8549435138702393,
"learning_rate": 2.8934347008550886e-05,
"loss": 0.6703,
"step": 345
},
{
"epoch": 0.8403361344537815,
"grad_norm": 0.9239634871482849,
"learning_rate": 2.8889856590795705e-05,
"loss": 0.7002,
"step": 350
},
{
"epoch": 0.8523409363745498,
"grad_norm": 0.8247022032737732,
"learning_rate": 2.8844491985006486e-05,
"loss": 0.7076,
"step": 355
},
{
"epoch": 0.8643457382953181,
"grad_norm": 0.9243834614753723,
"learning_rate": 2.8798256046302376e-05,
"loss": 0.6776,
"step": 360
},
{
"epoch": 0.8763505402160864,
"grad_norm": 0.9479813575744629,
"learning_rate": 2.875115168464174e-05,
"loss": 0.6979,
"step": 365
},
{
"epoch": 0.8883553421368547,
"grad_norm": 0.988172173500061,
"learning_rate": 2.8703181864639013e-05,
"loss": 0.7221,
"step": 370
},
{
"epoch": 0.9003601440576231,
"grad_norm": 1.0673149824142456,
"learning_rate": 2.865434960537811e-05,
"loss": 0.6326,
"step": 375
},
{
"epoch": 0.9123649459783914,
"grad_norm": 0.9364232420921326,
"learning_rate": 2.860465798022242e-05,
"loss": 0.6799,
"step": 380
},
{
"epoch": 0.9243697478991597,
"grad_norm": 0.9759359955787659,
"learning_rate": 2.8554110116621376e-05,
"loss": 0.6705,
"step": 385
},
{
"epoch": 0.936374549819928,
"grad_norm": 0.8587977290153503,
"learning_rate": 2.8502709195913617e-05,
"loss": 0.6405,
"step": 390
},
{
"epoch": 0.9483793517406963,
"grad_norm": 0.8970200419425964,
"learning_rate": 2.8450458453126773e-05,
"loss": 0.6404,
"step": 395
},
{
"epoch": 0.9603841536614646,
"grad_norm": 0.8413552045822144,
"learning_rate": 2.839736117677386e-05,
"loss": 0.6313,
"step": 400
},
{
"epoch": 0.9723889555822329,
"grad_norm": 0.9825181365013123,
"learning_rate": 2.8343420708646292e-05,
"loss": 0.5839,
"step": 405
},
{
"epoch": 0.9843937575030012,
"grad_norm": 0.8249124884605408,
"learning_rate": 2.8288640443603587e-05,
"loss": 0.6214,
"step": 410
},
{
"epoch": 0.9963985594237695,
"grad_norm": 1.0160948038101196,
"learning_rate": 2.823302382935968e-05,
"loss": 0.6417,
"step": 415
},
{
"epoch": 1.007202881152461,
"grad_norm": 0.9784197211265564,
"learning_rate": 2.8176574366265963e-05,
"loss": 0.5966,
"step": 420
},
{
"epoch": 1.0192076830732293,
"grad_norm": 0.9859529137611389,
"learning_rate": 2.8119295607090936e-05,
"loss": 0.5378,
"step": 425
},
{
"epoch": 1.0312124849939976,
"grad_norm": 1.0028948783874512,
"learning_rate": 2.8061191156796658e-05,
"loss": 0.5948,
"step": 430
},
{
"epoch": 1.043217286914766,
"grad_norm": 0.9926419854164124,
"learning_rate": 2.8002264672311822e-05,
"loss": 0.5835,
"step": 435
},
{
"epoch": 1.0552220888355341,
"grad_norm": 1.0303875207901,
"learning_rate": 2.7942519862301618e-05,
"loss": 0.4992,
"step": 440
},
{
"epoch": 1.0672268907563025,
"grad_norm": 0.8923658132553101,
"learning_rate": 2.7881960486934314e-05,
"loss": 0.5432,
"step": 445
},
{
"epoch": 1.0792316926770709,
"grad_norm": 0.8442468643188477,
"learning_rate": 2.7820590357644604e-05,
"loss": 0.5246,
"step": 450
},
{
"epoch": 1.0912364945978392,
"grad_norm": 0.9535374641418457,
"learning_rate": 2.7758413336893725e-05,
"loss": 0.5832,
"step": 455
},
{
"epoch": 1.1032412965186074,
"grad_norm": 1.013627290725708,
"learning_rate": 2.7695433337926362e-05,
"loss": 0.5381,
"step": 460
},
{
"epoch": 1.1152460984393757,
"grad_norm": 0.9319940805435181,
"learning_rate": 2.763165432452438e-05,
"loss": 0.5282,
"step": 465
},
{
"epoch": 1.127250900360144,
"grad_norm": 0.8916425704956055,
"learning_rate": 2.756708031075731e-05,
"loss": 0.5252,
"step": 470
},
{
"epoch": 1.1392557022809124,
"grad_norm": 0.9970868825912476,
"learning_rate": 2.7501715360729766e-05,
"loss": 0.5196,
"step": 475
},
{
"epoch": 1.1512605042016806,
"grad_norm": 0.9539552330970764,
"learning_rate": 2.7435563588325627e-05,
"loss": 0.4874,
"step": 480
},
{
"epoch": 1.163265306122449,
"grad_norm": 1.0665792226791382,
"learning_rate": 2.7368629156949137e-05,
"loss": 0.5093,
"step": 485
},
{
"epoch": 1.1752701080432173,
"grad_norm": 1.048183798789978,
"learning_rate": 2.7300916279262868e-05,
"loss": 0.522,
"step": 490
},
{
"epoch": 1.1872749099639857,
"grad_norm": 1.1342055797576904,
"learning_rate": 2.7232429216922583e-05,
"loss": 0.5293,
"step": 495
},
{
"epoch": 1.199279711884754,
"grad_norm": 0.9771571159362793,
"learning_rate": 2.7163172280309028e-05,
"loss": 0.5041,
"step": 500
},
{
"epoch": 1.2112845138055222,
"grad_norm": 1.2090368270874023,
"learning_rate": 2.7093149828256643e-05,
"loss": 0.5112,
"step": 505
},
{
"epoch": 1.2232893157262905,
"grad_norm": 1.1582231521606445,
"learning_rate": 2.702236626777923e-05,
"loss": 0.4962,
"step": 510
},
{
"epoch": 1.2352941176470589,
"grad_norm": 1.037467360496521,
"learning_rate": 2.6950826053792586e-05,
"loss": 0.4958,
"step": 515
},
{
"epoch": 1.247298919567827,
"grad_norm": 0.9874077439308167,
"learning_rate": 2.6878533688834125e-05,
"loss": 0.4696,
"step": 520
},
{
"epoch": 1.2593037214885954,
"grad_norm": 1.03226900100708,
"learning_rate": 2.6805493722779513e-05,
"loss": 0.4994,
"step": 525
},
{
"epoch": 1.2713085234093637,
"grad_norm": 0.986599326133728,
"learning_rate": 2.6731710752556293e-05,
"loss": 0.4679,
"step": 530
},
{
"epoch": 1.283313325330132,
"grad_norm": 0.8920894861221313,
"learning_rate": 2.6657189421854564e-05,
"loss": 0.5125,
"step": 535
},
{
"epoch": 1.2953181272509005,
"grad_norm": 1.0151013135910034,
"learning_rate": 2.6581934420834754e-05,
"loss": 0.508,
"step": 540
},
{
"epoch": 1.3073229291716686,
"grad_norm": 1.0022249221801758,
"learning_rate": 2.65059504858324e-05,
"loss": 0.4258,
"step": 545
},
{
"epoch": 1.319327731092437,
"grad_norm": 0.912742018699646,
"learning_rate": 2.6429242399060063e-05,
"loss": 0.5101,
"step": 550
},
{
"epoch": 1.3313325330132053,
"grad_norm": 1.016350507736206,
"learning_rate": 2.635181498830637e-05,
"loss": 0.4921,
"step": 555
},
{
"epoch": 1.3433373349339737,
"grad_norm": 0.8971051573753357,
"learning_rate": 2.627367312663214e-05,
"loss": 0.4477,
"step": 560
},
{
"epoch": 1.355342136854742,
"grad_norm": 0.9581533074378967,
"learning_rate": 2.6194821732063708e-05,
"loss": 0.4356,
"step": 565
},
{
"epoch": 1.3673469387755102,
"grad_norm": 1.00229012966156,
"learning_rate": 2.6115265767283377e-05,
"loss": 0.4632,
"step": 570
},
{
"epoch": 1.3793517406962785,
"grad_norm": 0.9300605654716492,
"learning_rate": 2.6035010239317106e-05,
"loss": 0.5101,
"step": 575
},
{
"epoch": 1.3913565426170469,
"grad_norm": 0.9333869814872742,
"learning_rate": 2.5954060199219364e-05,
"loss": 0.4389,
"step": 580
},
{
"epoch": 1.403361344537815,
"grad_norm": 1.121485710144043,
"learning_rate": 2.587242074175523e-05,
"loss": 0.4602,
"step": 585
},
{
"epoch": 1.4153661464585834,
"grad_norm": 0.9909285306930542,
"learning_rate": 2.5790097005079766e-05,
"loss": 0.4386,
"step": 590
},
{
"epoch": 1.4273709483793517,
"grad_norm": 1.1501377820968628,
"learning_rate": 2.5707094170414606e-05,
"loss": 0.4577,
"step": 595
},
{
"epoch": 1.43937575030012,
"grad_norm": 1.0231988430023193,
"learning_rate": 2.5623417461721887e-05,
"loss": 0.4762,
"step": 600
},
{
"epoch": 1.4513805522208885,
"grad_norm": 0.9353159070014954,
"learning_rate": 2.5539072145375452e-05,
"loss": 0.4456,
"step": 605
},
{
"epoch": 1.4633853541416566,
"grad_norm": 1.0465285778045654,
"learning_rate": 2.5454063529829405e-05,
"loss": 0.4379,
"step": 610
},
{
"epoch": 1.475390156062425,
"grad_norm": 1.1565396785736084,
"learning_rate": 2.5368396965284017e-05,
"loss": 0.4122,
"step": 615
},
{
"epoch": 1.4873949579831933,
"grad_norm": 1.128082513809204,
"learning_rate": 2.5282077843349e-05,
"loss": 0.4581,
"step": 620
},
{
"epoch": 1.4993997599039615,
"grad_norm": 0.9868873357772827,
"learning_rate": 2.519511159670417e-05,
"loss": 0.4402,
"step": 625
},
{
"epoch": 1.51140456182473,
"grad_norm": 1.0035743713378906,
"learning_rate": 2.510750369875752e-05,
"loss": 0.4489,
"step": 630
},
{
"epoch": 1.5234093637454982,
"grad_norm": 1.1779558658599854,
"learning_rate": 2.5019259663300758e-05,
"loss": 0.4474,
"step": 635
},
{
"epoch": 1.5354141656662665,
"grad_norm": 1.0303417444229126,
"learning_rate": 2.4930385044162282e-05,
"loss": 0.4338,
"step": 640
},
{
"epoch": 1.547418967587035,
"grad_norm": 1.0198581218719482,
"learning_rate": 2.4840885434857608e-05,
"loss": 0.399,
"step": 645
},
{
"epoch": 1.559423769507803,
"grad_norm": 0.9304978847503662,
"learning_rate": 2.4750766468237388e-05,
"loss": 0.4285,
"step": 650
},
{
"epoch": 1.5714285714285714,
"grad_norm": 0.9525741338729858,
"learning_rate": 2.466003381613283e-05,
"loss": 0.4253,
"step": 655
},
{
"epoch": 1.5834333733493398,
"grad_norm": 0.9774038791656494,
"learning_rate": 2.456869318899878e-05,
"loss": 0.4608,
"step": 660
},
{
"epoch": 1.595438175270108,
"grad_norm": 0.9870194792747498,
"learning_rate": 2.447675033555429e-05,
"loss": 0.3993,
"step": 665
},
{
"epoch": 1.6074429771908765,
"grad_norm": 0.9398269653320312,
"learning_rate": 2.4384211042420826e-05,
"loss": 0.4121,
"step": 670
},
{
"epoch": 1.6194477791116446,
"grad_norm": 0.9968813061714172,
"learning_rate": 2.4291081133758063e-05,
"loss": 0.4298,
"step": 675
},
{
"epoch": 1.631452581032413,
"grad_norm": 1.1225817203521729,
"learning_rate": 2.4197366470897352e-05,
"loss": 0.3736,
"step": 680
},
{
"epoch": 1.6434573829531813,
"grad_norm": 1.1227293014526367,
"learning_rate": 2.4103072951972794e-05,
"loss": 0.4418,
"step": 685
},
{
"epoch": 1.6554621848739495,
"grad_norm": 1.0404561758041382,
"learning_rate": 2.400820651155005e-05,
"loss": 0.3952,
"step": 690
},
{
"epoch": 1.667466986794718,
"grad_norm": 0.9955992102622986,
"learning_rate": 2.3912773120252822e-05,
"loss": 0.387,
"step": 695
},
{
"epoch": 1.6794717887154862,
"grad_norm": 1.0180872678756714,
"learning_rate": 2.3816778784387097e-05,
"loss": 0.3817,
"step": 700
},
{
"epoch": 1.6914765906362546,
"grad_norm": 0.9844180345535278,
"learning_rate": 2.372022954556311e-05,
"loss": 0.4103,
"step": 705
},
{
"epoch": 1.703481392557023,
"grad_norm": 1.05225670337677,
"learning_rate": 2.3623131480315107e-05,
"loss": 0.367,
"step": 710
},
{
"epoch": 1.715486194477791,
"grad_norm": 1.0176974534988403,
"learning_rate": 2.352549069971891e-05,
"loss": 0.3605,
"step": 715
},
{
"epoch": 1.7274909963985594,
"grad_norm": 1.0177741050720215,
"learning_rate": 2.3427313349007302e-05,
"loss": 0.3782,
"step": 720
},
{
"epoch": 1.7394957983193278,
"grad_norm": 0.9692652821540833,
"learning_rate": 2.3328605607183255e-05,
"loss": 0.3554,
"step": 725
},
{
"epoch": 1.751500600240096,
"grad_norm": 0.892565131187439,
"learning_rate": 2.322937368663105e-05,
"loss": 0.3714,
"step": 730
},
{
"epoch": 1.7635054021608645,
"grad_norm": 0.9849589467048645,
"learning_rate": 2.312962383272529e-05,
"loss": 0.3648,
"step": 735
},
{
"epoch": 1.7755102040816326,
"grad_norm": 1.1365302801132202,
"learning_rate": 2.3029362323437823e-05,
"loss": 0.3451,
"step": 740
},
{
"epoch": 1.787515006002401,
"grad_norm": 0.9451067447662354,
"learning_rate": 2.292859546894263e-05,
"loss": 0.3465,
"step": 745
},
{
"epoch": 1.7995198079231693,
"grad_norm": 1.0000107288360596,
"learning_rate": 2.282732961121869e-05,
"loss": 0.3297,
"step": 750
},
{
"epoch": 1.8115246098439375,
"grad_norm": 0.9930965900421143,
"learning_rate": 2.2725571123650814e-05,
"loss": 0.3413,
"step": 755
},
{
"epoch": 1.8235294117647058,
"grad_norm": 1.1068987846374512,
"learning_rate": 2.2623326410628537e-05,
"loss": 0.3501,
"step": 760
},
{
"epoch": 1.8355342136854742,
"grad_norm": 1.0239458084106445,
"learning_rate": 2.2520601907143045e-05,
"loss": 0.3738,
"step": 765
},
{
"epoch": 1.8475390156062423,
"grad_norm": 1.0029224157333374,
"learning_rate": 2.2417404078382153e-05,
"loss": 0.3683,
"step": 770
},
{
"epoch": 1.859543817527011,
"grad_norm": 1.0009812116622925,
"learning_rate": 2.2313739419323443e-05,
"loss": 0.3782,
"step": 775
},
{
"epoch": 1.871548619447779,
"grad_norm": 1.0867785215377808,
"learning_rate": 2.220961445432546e-05,
"loss": 0.3504,
"step": 780
},
{
"epoch": 1.8835534213685474,
"grad_norm": 1.0827844142913818,
"learning_rate": 2.2105035736717086e-05,
"loss": 0.3416,
"step": 785
},
{
"epoch": 1.8955582232893158,
"grad_norm": 0.9669961929321289,
"learning_rate": 2.2000009848385107e-05,
"loss": 0.3535,
"step": 790
},
{
"epoch": 1.907563025210084,
"grad_norm": 1.0839868783950806,
"learning_rate": 2.189454339935996e-05,
"loss": 0.3245,
"step": 795
},
{
"epoch": 1.9195678271308525,
"grad_norm": 1.1661854982376099,
"learning_rate": 2.1788643027399725e-05,
"loss": 0.3481,
"step": 800
},
{
"epoch": 1.9315726290516206,
"grad_norm": 1.0360976457595825,
"learning_rate": 2.1682315397572344e-05,
"loss": 0.3725,
"step": 805
},
{
"epoch": 1.943577430972389,
"grad_norm": 0.9784966707229614,
"learning_rate": 2.157556720183616e-05,
"loss": 0.318,
"step": 810
},
{
"epoch": 1.9555822328931574,
"grad_norm": 1.1009594202041626,
"learning_rate": 2.1468405158618744e-05,
"loss": 0.3223,
"step": 815
},
{
"epoch": 1.9675870348139255,
"grad_norm": 0.9810320734977722,
"learning_rate": 2.136083601239403e-05,
"loss": 0.3622,
"step": 820
},
{
"epoch": 1.9795918367346939,
"grad_norm": 1.039110779762268,
"learning_rate": 2.125286653325787e-05,
"loss": 0.3408,
"step": 825
},
{
"epoch": 1.9915966386554622,
"grad_norm": 1.1904340982437134,
"learning_rate": 2.114450351650193e-05,
"loss": 0.3264,
"step": 830
}
],
"logging_steps": 5,
"max_steps": 2085,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 2000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.3201796128560906e+18,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}