AaronWu901225's picture
Upload LoRA adapter folder
11ae149 verified
{
"best_metric": 0.09759029000997543,
"best_model_checkpoint": "./xlam_lora_new_2560_1_delete_over_size_3epoch_multi_t2/checkpoint-1384",
"epoch": 2.9994592321095954,
"eval_steps": 173,
"global_step": 1560,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.019227302769933306,
"grad_norm": 0.8343315720558167,
"learning_rate": 3.846153846153846e-06,
"loss": 0.6641,
"step": 10
},
{
"epoch": 0.03845460553986661,
"grad_norm": 0.461275190114975,
"learning_rate": 7.692307692307692e-06,
"loss": 0.6363,
"step": 20
},
{
"epoch": 0.05768190830979992,
"grad_norm": 0.5201115608215332,
"learning_rate": 1.153846153846154e-05,
"loss": 0.6127,
"step": 30
},
{
"epoch": 0.07690921107973323,
"grad_norm": 0.3447195589542389,
"learning_rate": 1.5384615384615384e-05,
"loss": 0.5328,
"step": 40
},
{
"epoch": 0.09613651384966652,
"grad_norm": 0.37843698263168335,
"learning_rate": 1.923076923076923e-05,
"loss": 0.4072,
"step": 50
},
{
"epoch": 0.11536381661959984,
"grad_norm": 0.3421487510204315,
"learning_rate": 2.307692307692308e-05,
"loss": 0.3608,
"step": 60
},
{
"epoch": 0.13459111938953314,
"grad_norm": 0.28197693824768066,
"learning_rate": 2.6923076923076923e-05,
"loss": 0.3121,
"step": 70
},
{
"epoch": 0.15381842215946645,
"grad_norm": 0.2969784438610077,
"learning_rate": 2.999986518943083e-05,
"loss": 0.2089,
"step": 80
},
{
"epoch": 0.17304572492939974,
"grad_norm": 0.35552915930747986,
"learning_rate": 2.999514707393943e-05,
"loss": 0.238,
"step": 90
},
{
"epoch": 0.19227302769933305,
"grad_norm": 0.36513009667396545,
"learning_rate": 2.9983690852978995e-05,
"loss": 0.2228,
"step": 100
},
{
"epoch": 0.21150033046926636,
"grad_norm": 0.3532279431819916,
"learning_rate": 2.996550167443001e-05,
"loss": 0.2321,
"step": 110
},
{
"epoch": 0.23072763323919968,
"grad_norm": 0.35095521807670593,
"learning_rate": 2.9940587711643693e-05,
"loss": 0.1888,
"step": 120
},
{
"epoch": 0.24995493600913296,
"grad_norm": 0.2918124198913574,
"learning_rate": 2.9908960159769243e-05,
"loss": 0.1585,
"step": 130
},
{
"epoch": 0.2691822387790663,
"grad_norm": 0.372545063495636,
"learning_rate": 2.9870633230723313e-05,
"loss": 0.1973,
"step": 140
},
{
"epoch": 0.28840954154899956,
"grad_norm": 0.36561667919158936,
"learning_rate": 2.9825624146803807e-05,
"loss": 0.1859,
"step": 150
},
{
"epoch": 0.3076368443189329,
"grad_norm": 0.38816386461257935,
"learning_rate": 2.977395313295105e-05,
"loss": 0.1965,
"step": 160
},
{
"epoch": 0.3268641470888662,
"grad_norm": 0.3820708096027374,
"learning_rate": 2.971564340765961e-05,
"loss": 0.1516,
"step": 170
},
{
"epoch": 0.3326323379198462,
"eval_loss": 0.13869456946849823,
"eval_runtime": 203.7684,
"eval_samples_per_second": 5.261,
"eval_steps_per_second": 5.261,
"step": 173
},
{
"epoch": 0.34609144985879947,
"grad_norm": 0.41539186239242554,
"learning_rate": 2.9650721172545048e-05,
"loss": 0.1895,
"step": 180
},
{
"epoch": 0.3653187526287328,
"grad_norm": 0.4073317348957062,
"learning_rate": 2.9579215600570152e-05,
"loss": 0.1545,
"step": 190
},
{
"epoch": 0.3845460553986661,
"grad_norm": 0.3595292866230011,
"learning_rate": 2.950115882293597e-05,
"loss": 0.1629,
"step": 200
},
{
"epoch": 0.40377335816859944,
"grad_norm": 0.28112220764160156,
"learning_rate": 2.9416585914643627e-05,
"loss": 0.1576,
"step": 210
},
{
"epoch": 0.4230006609385327,
"grad_norm": 0.4690268933773041,
"learning_rate": 2.932553487873324e-05,
"loss": 0.1614,
"step": 220
},
{
"epoch": 0.442227963708466,
"grad_norm": 0.5640648007392883,
"learning_rate": 2.922804662920718e-05,
"loss": 0.1689,
"step": 230
},
{
"epoch": 0.46145526647839935,
"grad_norm": 0.6498159766197205,
"learning_rate": 2.912416497264529e-05,
"loss": 0.1704,
"step": 240
},
{
"epoch": 0.48068256924833264,
"grad_norm": 0.46260717511177063,
"learning_rate": 2.9013936588520235e-05,
"loss": 0.1938,
"step": 250
},
{
"epoch": 0.4999098720182659,
"grad_norm": 0.44871020317077637,
"learning_rate": 2.8897411008222026e-05,
"loss": 0.1674,
"step": 260
},
{
"epoch": 0.5191371747881992,
"grad_norm": 0.5815374255180359,
"learning_rate": 2.8774640592800948e-05,
"loss": 0.1424,
"step": 270
},
{
"epoch": 0.5383644775581325,
"grad_norm": 0.5914287567138672,
"learning_rate": 2.864568050943899e-05,
"loss": 0.1818,
"step": 280
},
{
"epoch": 0.5575917803280659,
"grad_norm": 0.5079910755157471,
"learning_rate": 2.8510588706660338e-05,
"loss": 0.1633,
"step": 290
},
{
"epoch": 0.5768190830979991,
"grad_norm": 0.5987181067466736,
"learning_rate": 2.836942588829208e-05,
"loss": 0.1455,
"step": 300
},
{
"epoch": 0.5960463858679325,
"grad_norm": 0.6342700719833374,
"learning_rate": 2.8222255486186798e-05,
"loss": 0.1522,
"step": 310
},
{
"epoch": 0.6152736886378658,
"grad_norm": 0.5555654764175415,
"learning_rate": 2.8069143631719276e-05,
"loss": 0.1394,
"step": 320
},
{
"epoch": 0.634500991407799,
"grad_norm": 0.5811598300933838,
"learning_rate": 2.7910159126070257e-05,
"loss": 0.1623,
"step": 330
},
{
"epoch": 0.6537282941777324,
"grad_norm": 0.7482380270957947,
"learning_rate": 2.774537340931043e-05,
"loss": 0.1458,
"step": 340
},
{
"epoch": 0.6652646758396924,
"eval_loss": 0.11664145439863205,
"eval_runtime": 203.7283,
"eval_samples_per_second": 5.262,
"eval_steps_per_second": 5.262,
"step": 346
},
{
"epoch": 0.6729555969476657,
"grad_norm": 0.5891281962394714,
"learning_rate": 2.7574860528298677e-05,
"loss": 0.1406,
"step": 350
},
{
"epoch": 0.6921828997175989,
"grad_norm": 0.5311967730522156,
"learning_rate": 2.739869710340894e-05,
"loss": 0.1525,
"step": 360
},
{
"epoch": 0.7114102024875323,
"grad_norm": 0.5632440447807312,
"learning_rate": 2.7216962294100668e-05,
"loss": 0.1392,
"step": 370
},
{
"epoch": 0.7306375052574656,
"grad_norm": 0.6121944785118103,
"learning_rate": 2.7029737763348316e-05,
"loss": 0.1602,
"step": 380
},
{
"epoch": 0.7498648080273989,
"grad_norm": 0.6687933802604675,
"learning_rate": 2.6837107640945904e-05,
"loss": 0.1583,
"step": 390
},
{
"epoch": 0.7690921107973322,
"grad_norm": 0.5596562623977661,
"learning_rate": 2.6639158485703087e-05,
"loss": 0.1667,
"step": 400
},
{
"epoch": 0.7883194135672655,
"grad_norm": 0.7156023979187012,
"learning_rate": 2.6435979246549727e-05,
"loss": 0.1438,
"step": 410
},
{
"epoch": 0.8075467163371989,
"grad_norm": 0.5293470621109009,
"learning_rate": 2.6227661222566516e-05,
"loss": 0.1865,
"step": 420
},
{
"epoch": 0.8267740191071321,
"grad_norm": 0.5734898447990417,
"learning_rate": 2.6014298021959482e-05,
"loss": 0.1477,
"step": 430
},
{
"epoch": 0.8460013218770654,
"grad_norm": 0.5020838975906372,
"learning_rate": 2.5795985519996915e-05,
"loss": 0.1303,
"step": 440
},
{
"epoch": 0.8652286246469988,
"grad_norm": 0.6881216764450073,
"learning_rate": 2.5572821815927615e-05,
"loss": 0.1429,
"step": 450
},
{
"epoch": 0.884455927416932,
"grad_norm": 0.4911053776741028,
"learning_rate": 2.5344907188899715e-05,
"loss": 0.1547,
"step": 460
},
{
"epoch": 0.9036832301868654,
"grad_norm": 0.8948251605033875,
"learning_rate": 2.511234405290005e-05,
"loss": 0.136,
"step": 470
},
{
"epoch": 0.9229105329567987,
"grad_norm": 0.4923257529735565,
"learning_rate": 2.4875236910734145e-05,
"loss": 0.123,
"step": 480
},
{
"epoch": 0.9421378357267319,
"grad_norm": 0.6984175443649292,
"learning_rate": 2.4633692307067654e-05,
"loss": 0.1519,
"step": 490
},
{
"epoch": 0.9613651384966653,
"grad_norm": 0.6080285310745239,
"learning_rate": 2.4387818780550236e-05,
"loss": 0.1267,
"step": 500
},
{
"epoch": 0.9805924412665986,
"grad_norm": 0.6914392113685608,
"learning_rate": 2.4137726815043483e-05,
"loss": 0.1664,
"step": 510
},
{
"epoch": 0.9978970137595385,
"eval_loss": 0.10869105905294418,
"eval_runtime": 203.0899,
"eval_samples_per_second": 5.278,
"eval_steps_per_second": 5.278,
"step": 519
},
{
"epoch": 0.9998197440365318,
"grad_norm": 0.5257564187049866,
"learning_rate": 2.3883528789974703e-05,
"loss": 0.1474,
"step": 520
},
{
"epoch": 1.0190470468064652,
"grad_norm": 0.5023784041404724,
"learning_rate": 2.3625338929838952e-05,
"loss": 0.1057,
"step": 530
},
{
"epoch": 1.0382743495763984,
"grad_norm": 0.6757215857505798,
"learning_rate": 2.3363273252872003e-05,
"loss": 0.1477,
"step": 540
},
{
"epoch": 1.0575016523463319,
"grad_norm": 0.5462861657142639,
"learning_rate": 2.3097449518917257e-05,
"loss": 0.1205,
"step": 550
},
{
"epoch": 1.076728955116265,
"grad_norm": 0.7116460800170898,
"learning_rate": 2.2827987176510082e-05,
"loss": 0.1164,
"step": 560
},
{
"epoch": 1.0959562578861983,
"grad_norm": 0.5546866655349731,
"learning_rate": 2.255500730920332e-05,
"loss": 0.1304,
"step": 570
},
{
"epoch": 1.1151835606561318,
"grad_norm": 0.4724363088607788,
"learning_rate": 2.2278632581158095e-05,
"loss": 0.1295,
"step": 580
},
{
"epoch": 1.134410863426065,
"grad_norm": 1.0140602588653564,
"learning_rate": 2.1998987182024384e-05,
"loss": 0.1122,
"step": 590
},
{
"epoch": 1.1536381661959982,
"grad_norm": 0.790867805480957,
"learning_rate": 2.1716196771136115e-05,
"loss": 0.1169,
"step": 600
},
{
"epoch": 1.1728654689659317,
"grad_norm": 0.6885173320770264,
"learning_rate": 2.1430388421045812e-05,
"loss": 0.1352,
"step": 610
},
{
"epoch": 1.192092771735865,
"grad_norm": 0.6807064414024353,
"learning_rate": 2.1141690560424253e-05,
"loss": 0.1226,
"step": 620
},
{
"epoch": 1.2113200745057981,
"grad_norm": 0.5460578799247742,
"learning_rate": 2.0850232916350735e-05,
"loss": 0.1214,
"step": 630
},
{
"epoch": 1.2305473772757316,
"grad_norm": 0.7612866163253784,
"learning_rate": 2.05561464560199e-05,
"loss": 0.1164,
"step": 640
},
{
"epoch": 1.2497746800456648,
"grad_norm": 0.38693496584892273,
"learning_rate": 2.025956332789132e-05,
"loss": 0.1398,
"step": 650
},
{
"epoch": 1.269001982815598,
"grad_norm": 0.5924756526947021,
"learning_rate": 1.996061680230823e-05,
"loss": 0.1214,
"step": 660
},
{
"epoch": 1.2882292855855315,
"grad_norm": 0.7164785861968994,
"learning_rate": 1.9659441211612234e-05,
"loss": 0.1226,
"step": 670
},
{
"epoch": 1.3074565883554647,
"grad_norm": 0.5729460716247559,
"learning_rate": 1.93561718897807e-05,
"loss": 0.1481,
"step": 680
},
{
"epoch": 1.326683891125398,
"grad_norm": 0.6892575025558472,
"learning_rate": 1.9050945111614142e-05,
"loss": 0.1498,
"step": 690
},
{
"epoch": 1.3305293516793848,
"eval_loss": 0.10392692685127258,
"eval_runtime": 203.0603,
"eval_samples_per_second": 5.279,
"eval_steps_per_second": 5.279,
"step": 692
},
{
"epoch": 1.3459111938953314,
"grad_norm": 0.5614696145057678,
"learning_rate": 1.8743898031500772e-05,
"loss": 0.1105,
"step": 700
},
{
"epoch": 1.3651384966652647,
"grad_norm": 0.6355635523796082,
"learning_rate": 1.843516862178589e-05,
"loss": 0.1291,
"step": 710
},
{
"epoch": 1.3843657994351979,
"grad_norm": 0.7176327109336853,
"learning_rate": 1.8124895610773645e-05,
"loss": 0.1387,
"step": 720
},
{
"epoch": 1.4035931022051313,
"grad_norm": 0.9504517316818237,
"learning_rate": 1.781321842038914e-05,
"loss": 0.1346,
"step": 730
},
{
"epoch": 1.4228204049750646,
"grad_norm": 0.7893795371055603,
"learning_rate": 1.7500277103528883e-05,
"loss": 0.1224,
"step": 740
},
{
"epoch": 1.4420477077449978,
"grad_norm": 0.5944446921348572,
"learning_rate": 1.718621228112764e-05,
"loss": 0.1095,
"step": 750
},
{
"epoch": 1.4612750105149312,
"grad_norm": 0.5783366560935974,
"learning_rate": 1.6871165078970118e-05,
"loss": 0.1116,
"step": 760
},
{
"epoch": 1.4805023132848645,
"grad_norm": 0.6842564940452576,
"learning_rate": 1.6555277064275717e-05,
"loss": 0.1215,
"step": 770
},
{
"epoch": 1.4997296160547977,
"grad_norm": 0.5818539261817932,
"learning_rate": 1.623869018208499e-05,
"loss": 0.1283,
"step": 780
},
{
"epoch": 1.5189569188247312,
"grad_norm": 0.658789336681366,
"learning_rate": 1.5921546691476264e-05,
"loss": 0.1168,
"step": 790
},
{
"epoch": 1.5381842215946644,
"grad_norm": 0.7144546508789062,
"learning_rate": 1.5603989101641228e-05,
"loss": 0.1247,
"step": 800
},
{
"epoch": 1.5574115243645976,
"grad_norm": 0.5796612501144409,
"learning_rate": 1.5286160107848036e-05,
"loss": 0.1279,
"step": 810
},
{
"epoch": 1.576638827134531,
"grad_norm": 0.6537405252456665,
"learning_rate": 1.4968202527320868e-05,
"loss": 0.1396,
"step": 820
},
{
"epoch": 1.5958661299044643,
"grad_norm": 0.7590240836143494,
"learning_rate": 1.4650259235064662e-05,
"loss": 0.1183,
"step": 830
},
{
"epoch": 1.6150934326743975,
"grad_norm": 0.6850148439407349,
"learning_rate": 1.43324730996639e-05,
"loss": 0.1277,
"step": 840
},
{
"epoch": 1.634320735444331,
"grad_norm": 0.7500022053718567,
"learning_rate": 1.4014986919084228e-05,
"loss": 0.1285,
"step": 850
},
{
"epoch": 1.6535480382142642,
"grad_norm": 0.6234251856803894,
"learning_rate": 1.3697943356505897e-05,
"loss": 0.1071,
"step": 860
},
{
"epoch": 1.663161689599231,
"eval_loss": 0.10122876614332199,
"eval_runtime": 203.2008,
"eval_samples_per_second": 5.276,
"eval_steps_per_second": 5.276,
"step": 865
},
{
"epoch": 1.6727753409841974,
"grad_norm": 1.0110090970993042,
"learning_rate": 1.3381484876217669e-05,
"loss": 0.1252,
"step": 870
},
{
"epoch": 1.692002643754131,
"grad_norm": 0.8749274611473083,
"learning_rate": 1.3065753679600186e-05,
"loss": 0.1086,
"step": 880
},
{
"epoch": 1.7112299465240641,
"grad_norm": 0.563439667224884,
"learning_rate": 1.2750891641227418e-05,
"loss": 0.1273,
"step": 890
},
{
"epoch": 1.7304572492939974,
"grad_norm": 0.6679959297180176,
"learning_rate": 1.2437040245114966e-05,
"loss": 0.1124,
"step": 900
},
{
"epoch": 1.7496845520639308,
"grad_norm": 0.8824312090873718,
"learning_rate": 1.2124340521143929e-05,
"loss": 0.1275,
"step": 910
},
{
"epoch": 1.768911854833864,
"grad_norm": 0.6557831168174744,
"learning_rate": 1.1812932981688715e-05,
"loss": 0.1207,
"step": 920
},
{
"epoch": 1.7881391576037973,
"grad_norm": 0.5608255863189697,
"learning_rate": 1.1502957558477537e-05,
"loss": 0.1095,
"step": 930
},
{
"epoch": 1.8073664603737307,
"grad_norm": 0.8327426910400391,
"learning_rate": 1.119455353971371e-05,
"loss": 0.1423,
"step": 940
},
{
"epoch": 1.826593763143664,
"grad_norm": 0.7187633514404297,
"learning_rate": 1.0887859507486183e-05,
"loss": 0.1142,
"step": 950
},
{
"epoch": 1.8458210659135972,
"grad_norm": 0.7449970841407776,
"learning_rate": 1.0583013275497318e-05,
"loss": 0.1315,
"step": 960
},
{
"epoch": 1.8650483686835306,
"grad_norm": 0.5967345237731934,
"learning_rate": 1.0280151827136e-05,
"loss": 0.1147,
"step": 970
},
{
"epoch": 1.884275671453464,
"grad_norm": 0.8269909024238586,
"learning_rate": 9.979411253923813e-06,
"loss": 0.131,
"step": 980
},
{
"epoch": 1.903502974223397,
"grad_norm": 0.6085448861122131,
"learning_rate": 9.680926694361966e-06,
"loss": 0.1339,
"step": 990
},
{
"epoch": 1.9227302769933305,
"grad_norm": 0.7495784163475037,
"learning_rate": 9.384832273206514e-06,
"loss": 0.1324,
"step": 1000
},
{
"epoch": 1.941957579763264,
"grad_norm": 0.8735560178756714,
"learning_rate": 9.091261041199051e-06,
"loss": 0.1225,
"step": 1010
},
{
"epoch": 1.961184882533197,
"grad_norm": 0.7350926995277405,
"learning_rate": 8.80034491528005e-06,
"loss": 0.1108,
"step": 1020
},
{
"epoch": 1.9804121853031305,
"grad_norm": 0.7938897013664246,
"learning_rate": 8.51221461931167e-06,
"loss": 0.1416,
"step": 1030
},
{
"epoch": 1.995794027519077,
"eval_loss": 0.09892405569553375,
"eval_runtime": 202.9814,
"eval_samples_per_second": 5.281,
"eval_steps_per_second": 5.281,
"step": 1038
},
{
"epoch": 1.999639488073064,
"grad_norm": 1.0423219203948975,
"learning_rate": 8.226999625336663e-06,
"loss": 0.1597,
"step": 1040
},
{
"epoch": 2.018866790842997,
"grad_norm": 0.7138562202453613,
"learning_rate": 7.944828095399802e-06,
"loss": 0.1114,
"step": 1050
},
{
"epoch": 2.0380940936129304,
"grad_norm": 0.6143700480461121,
"learning_rate": 7.66582682395797e-06,
"loss": 0.1066,
"step": 1060
},
{
"epoch": 2.057321396382864,
"grad_norm": 0.8447745442390442,
"learning_rate": 7.390121180904763e-06,
"loss": 0.113,
"step": 1070
},
{
"epoch": 2.076548699152797,
"grad_norm": 0.41642722487449646,
"learning_rate": 7.117835055235195e-06,
"loss": 0.1095,
"step": 1080
},
{
"epoch": 2.0957760019227303,
"grad_norm": 0.7106382250785828,
"learning_rate": 6.849090799375931e-06,
"loss": 0.1214,
"step": 1090
},
{
"epoch": 2.1150033046926637,
"grad_norm": 0.6263849139213562,
"learning_rate": 6.584009174205888e-06,
"loss": 0.1293,
"step": 1100
},
{
"epoch": 2.1342306074625967,
"grad_norm": 0.7726497054100037,
"learning_rate": 6.322709294792051e-06,
"loss": 0.1394,
"step": 1110
},
{
"epoch": 2.15345791023253,
"grad_norm": 0.7134016752243042,
"learning_rate": 6.065308576864859e-06,
"loss": 0.1039,
"step": 1120
},
{
"epoch": 2.1726852130024636,
"grad_norm": 0.6412186026573181,
"learning_rate": 5.811922684057118e-06,
"loss": 0.1151,
"step": 1130
},
{
"epoch": 2.1919125157723967,
"grad_norm": 0.9640927314758301,
"learning_rate": 5.5626654759303085e-06,
"loss": 0.1247,
"step": 1140
},
{
"epoch": 2.21113981854233,
"grad_norm": 0.8550817370414734,
"learning_rate": 5.3176489568115e-06,
"loss": 0.1069,
"step": 1150
},
{
"epoch": 2.2303671213122636,
"grad_norm": 0.6239781975746155,
"learning_rate": 5.0769832254639355e-06,
"loss": 0.1013,
"step": 1160
},
{
"epoch": 2.2495944240821966,
"grad_norm": 0.7141818404197693,
"learning_rate": 4.840776425613887e-06,
"loss": 0.0976,
"step": 1170
},
{
"epoch": 2.26882172685213,
"grad_norm": 0.48725616931915283,
"learning_rate": 4.609134697356009e-06,
"loss": 0.1049,
"step": 1180
},
{
"epoch": 2.2880490296220635,
"grad_norm": 0.8563340902328491,
"learning_rate": 4.382162129459055e-06,
"loss": 0.0988,
"step": 1190
},
{
"epoch": 2.3072763323919965,
"grad_norm": 0.7721908092498779,
"learning_rate": 4.159960712593301e-06,
"loss": 0.1022,
"step": 1200
},
{
"epoch": 2.32650363516193,
"grad_norm": 0.6547017097473145,
"learning_rate": 3.942630293500821e-06,
"loss": 0.1321,
"step": 1210
},
{
"epoch": 2.3284263654389235,
"eval_loss": 0.09838072210550308,
"eval_runtime": 203.6342,
"eval_samples_per_second": 5.264,
"eval_steps_per_second": 5.264,
"step": 1211
},
{
"epoch": 2.3457309379318634,
"grad_norm": 0.6637281775474548,
"learning_rate": 3.730268530129097e-06,
"loss": 0.0987,
"step": 1220
},
{
"epoch": 2.3649582407017964,
"grad_norm": 0.9455267786979675,
"learning_rate": 3.522970847748196e-06,
"loss": 0.1286,
"step": 1230
},
{
"epoch": 2.38418554347173,
"grad_norm": 0.7048280239105225,
"learning_rate": 3.3208303960711895e-06,
"loss": 0.0998,
"step": 1240
},
{
"epoch": 2.4034128462416633,
"grad_norm": 0.8539944291114807,
"learning_rate": 3.1239380073971e-06,
"loss": 0.114,
"step": 1250
},
{
"epoch": 2.4226401490115963,
"grad_norm": 0.8466408252716064,
"learning_rate": 2.9323821557952007e-06,
"loss": 0.0986,
"step": 1260
},
{
"epoch": 2.4418674517815298,
"grad_norm": 0.6942047476768494,
"learning_rate": 2.7462489173489636e-06,
"loss": 0.1112,
"step": 1270
},
{
"epoch": 2.461094754551463,
"grad_norm": 0.7220749258995056,
"learning_rate": 2.5656219314775886e-06,
"loss": 0.1083,
"step": 1280
},
{
"epoch": 2.480322057321396,
"grad_norm": 0.8154662847518921,
"learning_rate": 2.3905823633523997e-06,
"loss": 0.0981,
"step": 1290
},
{
"epoch": 2.4995493600913297,
"grad_norm": 0.7933881282806396,
"learning_rate": 2.221208867425096e-06,
"loss": 0.0965,
"step": 1300
},
{
"epoch": 2.518776662861263,
"grad_norm": 0.770427405834198,
"learning_rate": 2.0575775520841878e-06,
"loss": 0.1399,
"step": 1310
},
{
"epoch": 2.538003965631196,
"grad_norm": 0.7757827043533325,
"learning_rate": 1.8997619454554955e-06,
"loss": 0.1022,
"step": 1320
},
{
"epoch": 2.5572312684011296,
"grad_norm": 0.8519064784049988,
"learning_rate": 1.7478329623621226e-06,
"loss": 0.1114,
"step": 1330
},
{
"epoch": 2.576458571171063,
"grad_norm": 0.4863261878490448,
"learning_rate": 1.601858872458702e-06,
"loss": 0.0964,
"step": 1340
},
{
"epoch": 2.5956858739409965,
"grad_norm": 0.7736539244651794,
"learning_rate": 1.4619052695542612e-06,
"loss": 0.1062,
"step": 1350
},
{
"epoch": 2.6149131767109295,
"grad_norm": 0.8441415429115295,
"learning_rate": 1.3280350421374888e-06,
"loss": 0.1158,
"step": 1360
},
{
"epoch": 2.634140479480863,
"grad_norm": 1.0856326818466187,
"learning_rate": 1.2003083451176366e-06,
"loss": 0.1314,
"step": 1370
},
{
"epoch": 2.653367782250796,
"grad_norm": 0.6666831374168396,
"learning_rate": 1.0787825727937783e-06,
"loss": 0.0889,
"step": 1380
},
{
"epoch": 2.6610587033587696,
"eval_loss": 0.09759029000997543,
"eval_runtime": 202.9122,
"eval_samples_per_second": 5.283,
"eval_steps_per_second": 5.283,
"step": 1384
},
{
"epoch": 2.6725950850207294,
"grad_norm": 0.7304587960243225,
"learning_rate": 9.635123330645218e-07,
"loss": 0.1098,
"step": 1390
},
{
"epoch": 2.691822387790663,
"grad_norm": 0.7729344964027405,
"learning_rate": 8.545494228898448e-07,
"loss": 0.0874,
"step": 1400
},
{
"epoch": 2.7110496905605963,
"grad_norm": 0.8187854886054993,
"learning_rate": 7.519428050159765e-07,
"loss": 0.1295,
"step": 1410
},
{
"epoch": 2.7302769933305293,
"grad_norm": 0.5940708518028259,
"learning_rate": 6.557385859738985e-07,
"loss": 0.1194,
"step": 1420
},
{
"epoch": 2.7495042961004628,
"grad_norm": 0.8048242926597595,
"learning_rate": 5.659799953612438e-07,
"loss": 0.1112,
"step": 1430
},
{
"epoch": 2.7687315988703958,
"grad_norm": 0.65644371509552,
"learning_rate": 4.827073664169812e-07,
"loss": 0.1061,
"step": 1440
},
{
"epoch": 2.7879589016403292,
"grad_norm": 0.733321487903595,
"learning_rate": 4.059581178975741e-07,
"loss": 0.1187,
"step": 1450
},
{
"epoch": 2.8071862044102627,
"grad_norm": 0.48416727781295776,
"learning_rate": 3.357667372627754e-07,
"loss": 0.1183,
"step": 1460
},
{
"epoch": 2.826413507180196,
"grad_norm": 0.5195744633674622,
"learning_rate": 2.7216476517860245e-07,
"loss": 0.0869,
"step": 1470
},
{
"epoch": 2.845640809950129,
"grad_norm": 0.6493935585021973,
"learning_rate": 2.151807813444606e-07,
"loss": 0.0867,
"step": 1480
},
{
"epoch": 2.8648681127200626,
"grad_norm": 0.7516520023345947,
"learning_rate": 1.6484039165079455e-07,
"loss": 0.1259,
"step": 1490
},
{
"epoch": 2.8840954154899956,
"grad_norm": 0.7778034210205078,
"learning_rate": 1.211662166730071e-07,
"loss": 0.1229,
"step": 1500
},
{
"epoch": 2.903322718259929,
"grad_norm": 0.6024238467216492,
"learning_rate": 8.417788150686001e-08,
"loss": 0.0972,
"step": 1510
},
{
"epoch": 2.9225500210298625,
"grad_norm": 0.6645819544792175,
"learning_rate": 5.389200694988494e-08,
"loss": 0.1297,
"step": 1520
},
{
"epoch": 2.941777323799796,
"grad_norm": 0.9006750583648682,
"learning_rate": 3.032220203278924e-08,
"loss": 0.1088,
"step": 1530
},
{
"epoch": 2.961004626569729,
"grad_norm": 0.7198818325996399,
"learning_rate": 1.3479057904204339e-08,
"loss": 0.1041,
"step": 1540
},
{
"epoch": 2.9802319293396624,
"grad_norm": 0.6966450810432434,
"learning_rate": 3.3701430715277202e-09,
"loss": 0.09,
"step": 1550
},
{
"epoch": 2.9936910412786157,
"eval_loss": 0.09760043770074844,
"eval_runtime": 202.9049,
"eval_samples_per_second": 5.283,
"eval_steps_per_second": 5.283,
"step": 1557
},
{
"epoch": 2.9994592321095954,
"grad_norm": 0.7270023822784424,
"learning_rate": 0.0,
"loss": 0.1184,
"step": 1560
},
{
"epoch": 2.9994592321095954,
"step": 1560,
"total_flos": 1.3623219564340838e+18,
"train_loss": 0.1486816066579941,
"train_runtime": 34006.2437,
"train_samples_per_second": 1.468,
"train_steps_per_second": 0.046
}
],
"logging_steps": 10,
"max_steps": 1560,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 173,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.3623219564340838e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}