matcha-making-0.1 / trainer_state.json
SIGRoboticsUIUC's picture
Upload folder using huggingface_hub
b9007de verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.36997299197158606,
"eval_steps": 500,
"global_step": 2500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0014798919678863443,
"grad_norm": 7.47622537612915,
"learning_rate": 7.2e-06,
"loss": 1.1232,
"step": 10
},
{
"epoch": 0.0029597839357726886,
"grad_norm": 1.880067229270935,
"learning_rate": 1.52e-05,
"loss": 0.5006,
"step": 20
},
{
"epoch": 0.004439675903659033,
"grad_norm": 1.9676536321640015,
"learning_rate": 2.32e-05,
"loss": 0.2438,
"step": 30
},
{
"epoch": 0.005919567871545377,
"grad_norm": 1.628406047821045,
"learning_rate": 3.12e-05,
"loss": 0.1641,
"step": 40
},
{
"epoch": 0.007399459839431722,
"grad_norm": 2.5903799533843994,
"learning_rate": 3.9200000000000004e-05,
"loss": 0.1585,
"step": 50
},
{
"epoch": 0.008879351807318065,
"grad_norm": 1.520799160003662,
"learning_rate": 4.72e-05,
"loss": 0.1282,
"step": 60
},
{
"epoch": 0.01035924377520441,
"grad_norm": 1.7377681732177734,
"learning_rate": 5.520000000000001e-05,
"loss": 0.1256,
"step": 70
},
{
"epoch": 0.011839135743090754,
"grad_norm": 1.098388433456421,
"learning_rate": 6.32e-05,
"loss": 0.1031,
"step": 80
},
{
"epoch": 0.013319027710977099,
"grad_norm": 1.4509732723236084,
"learning_rate": 7.12e-05,
"loss": 0.0973,
"step": 90
},
{
"epoch": 0.014798919678863444,
"grad_norm": 1.9007303714752197,
"learning_rate": 7.920000000000001e-05,
"loss": 0.1152,
"step": 100
},
{
"epoch": 0.016278811646749786,
"grad_norm": 1.5740575790405273,
"learning_rate": 8.72e-05,
"loss": 0.1151,
"step": 110
},
{
"epoch": 0.01775870361463613,
"grad_norm": 1.2793081998825073,
"learning_rate": 9.52e-05,
"loss": 0.1182,
"step": 120
},
{
"epoch": 0.019238595582522475,
"grad_norm": 1.5838185548782349,
"learning_rate": 9.999930010724872e-05,
"loss": 0.0955,
"step": 130
},
{
"epoch": 0.02071848755040882,
"grad_norm": 1.8031917810440063,
"learning_rate": 9.999142653881985e-05,
"loss": 0.1141,
"step": 140
},
{
"epoch": 0.022198379518295164,
"grad_norm": 1.5219581127166748,
"learning_rate": 9.997480591826183e-05,
"loss": 0.1069,
"step": 150
},
{
"epoch": 0.02367827148618151,
"grad_norm": 1.1764310598373413,
"learning_rate": 9.994944115370199e-05,
"loss": 0.0964,
"step": 160
},
{
"epoch": 0.025158163454067854,
"grad_norm": 1.0616095066070557,
"learning_rate": 9.991533668323974e-05,
"loss": 0.0761,
"step": 170
},
{
"epoch": 0.026638055421954198,
"grad_norm": 1.1210483312606812,
"learning_rate": 9.987249847416987e-05,
"loss": 0.087,
"step": 180
},
{
"epoch": 0.028117947389840543,
"grad_norm": 1.1077345609664917,
"learning_rate": 9.982093402193857e-05,
"loss": 0.0931,
"step": 190
},
{
"epoch": 0.029597839357726887,
"grad_norm": 1.352004051208496,
"learning_rate": 9.976065234883193e-05,
"loss": 0.083,
"step": 200
},
{
"epoch": 0.03107773132561323,
"grad_norm": 1.0884106159210205,
"learning_rate": 9.969166400239726e-05,
"loss": 0.0783,
"step": 210
},
{
"epoch": 0.03255762329349957,
"grad_norm": 1.1031562089920044,
"learning_rate": 9.961398105359764e-05,
"loss": 0.1047,
"step": 220
},
{
"epoch": 0.03403751526138592,
"grad_norm": 1.0741270780563354,
"learning_rate": 9.952761709469975e-05,
"loss": 0.1017,
"step": 230
},
{
"epoch": 0.03551740722927226,
"grad_norm": 1.2804232835769653,
"learning_rate": 9.94325872368957e-05,
"loss": 0.0968,
"step": 240
},
{
"epoch": 0.036997299197158606,
"grad_norm": 1.0856305360794067,
"learning_rate": 9.932890810765902e-05,
"loss": 0.0644,
"step": 250
},
{
"epoch": 0.03847719116504495,
"grad_norm": 1.1068660020828247,
"learning_rate": 9.921659784783526e-05,
"loss": 0.0644,
"step": 260
},
{
"epoch": 0.039957083132931295,
"grad_norm": 1.043481469154358,
"learning_rate": 9.909567610846788e-05,
"loss": 0.0667,
"step": 270
},
{
"epoch": 0.04143697510081764,
"grad_norm": 1.3862495422363281,
"learning_rate": 9.896616404736001e-05,
"loss": 0.0694,
"step": 280
},
{
"epoch": 0.042916867068703984,
"grad_norm": 1.405779480934143,
"learning_rate": 9.882808432537224e-05,
"loss": 0.0823,
"step": 290
},
{
"epoch": 0.04439675903659033,
"grad_norm": 1.4005481004714966,
"learning_rate": 9.86814611024578e-05,
"loss": 0.0812,
"step": 300
},
{
"epoch": 0.04587665100447667,
"grad_norm": 0.8486709594726562,
"learning_rate": 9.852632003343518e-05,
"loss": 0.0966,
"step": 310
},
{
"epoch": 0.04735654297236302,
"grad_norm": 0.6197394132614136,
"learning_rate": 9.836268826349933e-05,
"loss": 0.0595,
"step": 320
},
{
"epoch": 0.04883643494024936,
"grad_norm": 0.776345431804657,
"learning_rate": 9.819059442347193e-05,
"loss": 0.0599,
"step": 330
},
{
"epoch": 0.05031632690813571,
"grad_norm": 0.7929945588111877,
"learning_rate": 9.801006862479202e-05,
"loss": 0.07,
"step": 340
},
{
"epoch": 0.05179621887602205,
"grad_norm": 0.9411725997924805,
"learning_rate": 9.782114245424718e-05,
"loss": 0.0637,
"step": 350
},
{
"epoch": 0.053276110843908396,
"grad_norm": 0.825547993183136,
"learning_rate": 9.762384896844684e-05,
"loss": 0.0628,
"step": 360
},
{
"epoch": 0.05475600281179474,
"grad_norm": 1.1270116567611694,
"learning_rate": 9.741822268803833e-05,
"loss": 0.0702,
"step": 370
},
{
"epoch": 0.056235894779681085,
"grad_norm": 0.947913408279419,
"learning_rate": 9.720429959166675e-05,
"loss": 0.073,
"step": 380
},
{
"epoch": 0.05771578674756743,
"grad_norm": 1.0599220991134644,
"learning_rate": 9.69821171096798e-05,
"loss": 0.0678,
"step": 390
},
{
"epoch": 0.059195678715453774,
"grad_norm": 1.0569720268249512,
"learning_rate": 9.675171411757842e-05,
"loss": 0.0653,
"step": 400
},
{
"epoch": 0.06067557068334012,
"grad_norm": 1.1427547931671143,
"learning_rate": 9.65131309292149e-05,
"loss": 0.0739,
"step": 410
},
{
"epoch": 0.06215546265122646,
"grad_norm": 0.816104531288147,
"learning_rate": 9.626640928973892e-05,
"loss": 0.0518,
"step": 420
},
{
"epoch": 0.06363535461911281,
"grad_norm": 0.7173371911048889,
"learning_rate": 9.601159236829352e-05,
"loss": 0.0687,
"step": 430
},
{
"epoch": 0.06511524658699915,
"grad_norm": 0.8344293236732483,
"learning_rate": 9.574872475046166e-05,
"loss": 0.059,
"step": 440
},
{
"epoch": 0.0665951385548855,
"grad_norm": 0.6896253824234009,
"learning_rate": 9.547785243046505e-05,
"loss": 0.046,
"step": 450
},
{
"epoch": 0.06807503052277183,
"grad_norm": 1.0698304176330566,
"learning_rate": 9.519902280311653e-05,
"loss": 0.0511,
"step": 460
},
{
"epoch": 0.06955492249065819,
"grad_norm": 0.7512239217758179,
"learning_rate": 9.491228465552726e-05,
"loss": 0.0578,
"step": 470
},
{
"epoch": 0.07103481445854452,
"grad_norm": 1.0382441282272339,
"learning_rate": 9.461768815857053e-05,
"loss": 0.0618,
"step": 480
},
{
"epoch": 0.07251470642643087,
"grad_norm": 0.7745275497436523,
"learning_rate": 9.431528485810316e-05,
"loss": 0.069,
"step": 490
},
{
"epoch": 0.07399459839431721,
"grad_norm": 0.7567397952079773,
"learning_rate": 9.400512766594659e-05,
"loss": 0.047,
"step": 500
},
{
"epoch": 0.07547449036220356,
"grad_norm": 0.7035396695137024,
"learning_rate": 9.368727085062872e-05,
"loss": 0.0563,
"step": 510
},
{
"epoch": 0.0769543823300899,
"grad_norm": 0.8480479717254639,
"learning_rate": 9.336177002788862e-05,
"loss": 0.0498,
"step": 520
},
{
"epoch": 0.07843427429797625,
"grad_norm": 0.6571253538131714,
"learning_rate": 9.302868215094534e-05,
"loss": 0.0579,
"step": 530
},
{
"epoch": 0.07991416626586259,
"grad_norm": 0.6242689490318298,
"learning_rate": 9.268806550053264e-05,
"loss": 0.0465,
"step": 540
},
{
"epoch": 0.08139405823374894,
"grad_norm": 1.0013155937194824,
"learning_rate": 9.233997967470174e-05,
"loss": 0.0792,
"step": 550
},
{
"epoch": 0.08287395020163528,
"grad_norm": 1.0697152614593506,
"learning_rate": 9.198448557839321e-05,
"loss": 0.0707,
"step": 560
},
{
"epoch": 0.08435384216952163,
"grad_norm": 0.9719803333282471,
"learning_rate": 9.162164541278051e-05,
"loss": 0.0586,
"step": 570
},
{
"epoch": 0.08583373413740797,
"grad_norm": 1.0201226472854614,
"learning_rate": 9.125152266438649e-05,
"loss": 0.0535,
"step": 580
},
{
"epoch": 0.08731362610529432,
"grad_norm": 0.8870009779930115,
"learning_rate": 9.087418209397506e-05,
"loss": 0.058,
"step": 590
},
{
"epoch": 0.08879351807318066,
"grad_norm": 0.7464373707771301,
"learning_rate": 9.04896897252201e-05,
"loss": 0.0676,
"step": 600
},
{
"epoch": 0.090273410041067,
"grad_norm": 0.6516630053520203,
"learning_rate": 9.009811283315304e-05,
"loss": 0.0506,
"step": 610
},
{
"epoch": 0.09175330200895335,
"grad_norm": 0.6615219712257385,
"learning_rate": 8.969951993239177e-05,
"loss": 0.0479,
"step": 620
},
{
"epoch": 0.09323319397683968,
"grad_norm": 0.6776785850524902,
"learning_rate": 8.929398076515259e-05,
"loss": 0.0459,
"step": 630
},
{
"epoch": 0.09471308594472604,
"grad_norm": 0.7998208403587341,
"learning_rate": 8.888156628904724e-05,
"loss": 0.0493,
"step": 640
},
{
"epoch": 0.09619297791261237,
"grad_norm": 0.8668547868728638,
"learning_rate": 8.846234866466747e-05,
"loss": 0.0431,
"step": 650
},
{
"epoch": 0.09767286988049872,
"grad_norm": 0.7501780390739441,
"learning_rate": 8.803640124295902e-05,
"loss": 0.0664,
"step": 660
},
{
"epoch": 0.09915276184838506,
"grad_norm": 0.5983700156211853,
"learning_rate": 8.760379855238723e-05,
"loss": 0.0424,
"step": 670
},
{
"epoch": 0.10063265381627141,
"grad_norm": 0.7453112006187439,
"learning_rate": 8.716461628589683e-05,
"loss": 0.0508,
"step": 680
},
{
"epoch": 0.10211254578415775,
"grad_norm": 0.7227299809455872,
"learning_rate": 8.671893128766784e-05,
"loss": 0.045,
"step": 690
},
{
"epoch": 0.1035924377520441,
"grad_norm": 0.7978153824806213,
"learning_rate": 8.626682153967001e-05,
"loss": 0.0473,
"step": 700
},
{
"epoch": 0.10507232971993044,
"grad_norm": 0.6989890336990356,
"learning_rate": 8.580836614801827e-05,
"loss": 0.046,
"step": 710
},
{
"epoch": 0.10655222168781679,
"grad_norm": 0.8369282484054565,
"learning_rate": 8.534364532913144e-05,
"loss": 0.049,
"step": 720
},
{
"epoch": 0.10803211365570313,
"grad_norm": 0.5447561144828796,
"learning_rate": 8.487274039569675e-05,
"loss": 0.0428,
"step": 730
},
{
"epoch": 0.10951200562358948,
"grad_norm": 0.7491251826286316,
"learning_rate": 8.439573374244237e-05,
"loss": 0.0368,
"step": 740
},
{
"epoch": 0.11099189759147582,
"grad_norm": 0.48374220728874207,
"learning_rate": 8.391270883172073e-05,
"loss": 0.0403,
"step": 750
},
{
"epoch": 0.11247178955936217,
"grad_norm": 0.9518898129463196,
"learning_rate": 8.342375017890512e-05,
"loss": 0.038,
"step": 760
},
{
"epoch": 0.11395168152724851,
"grad_norm": 0.8046047687530518,
"learning_rate": 8.292894333760186e-05,
"loss": 0.0414,
"step": 770
},
{
"epoch": 0.11543157349513486,
"grad_norm": 0.6393342614173889,
"learning_rate": 8.242837488468087e-05,
"loss": 0.0615,
"step": 780
},
{
"epoch": 0.1169114654630212,
"grad_norm": 0.6403966546058655,
"learning_rate": 8.192213240512737e-05,
"loss": 0.0403,
"step": 790
},
{
"epoch": 0.11839135743090755,
"grad_norm": 0.5846046805381775,
"learning_rate": 8.141030447671686e-05,
"loss": 0.0376,
"step": 800
},
{
"epoch": 0.11987124939879389,
"grad_norm": 1.1184839010238647,
"learning_rate": 8.089298065451672e-05,
"loss": 0.042,
"step": 810
},
{
"epoch": 0.12135114136668024,
"grad_norm": 0.804345965385437,
"learning_rate": 8.037025145521657e-05,
"loss": 0.0448,
"step": 820
},
{
"epoch": 0.12283103333456658,
"grad_norm": 0.40974166989326477,
"learning_rate": 7.984220834129052e-05,
"loss": 0.0691,
"step": 830
},
{
"epoch": 0.12431092530245293,
"grad_norm": 0.5297871232032776,
"learning_rate": 7.93089437049939e-05,
"loss": 0.0477,
"step": 840
},
{
"epoch": 0.12579081727033928,
"grad_norm": 0.6452186107635498,
"learning_rate": 7.877055085219721e-05,
"loss": 0.0505,
"step": 850
},
{
"epoch": 0.12727070923822562,
"grad_norm": 0.6497986316680908,
"learning_rate": 7.82271239860604e-05,
"loss": 0.0471,
"step": 860
},
{
"epoch": 0.12875060120611195,
"grad_norm": 0.6113291382789612,
"learning_rate": 7.767875819054997e-05,
"loss": 0.0485,
"step": 870
},
{
"epoch": 0.1302304931739983,
"grad_norm": 0.8844181895256042,
"learning_rate": 7.712554941380206e-05,
"loss": 0.0429,
"step": 880
},
{
"epoch": 0.13171038514188466,
"grad_norm": 0.7872292995452881,
"learning_rate": 7.656759445133428e-05,
"loss": 0.0471,
"step": 890
},
{
"epoch": 0.133190277109771,
"grad_norm": 0.8881542682647705,
"learning_rate": 7.600499092910934e-05,
"loss": 0.0498,
"step": 900
},
{
"epoch": 0.13467016907765733,
"grad_norm": 0.6990298628807068,
"learning_rate": 7.543783728645328e-05,
"loss": 0.0385,
"step": 910
},
{
"epoch": 0.13615006104554367,
"grad_norm": 0.8354172110557556,
"learning_rate": 7.486623275883151e-05,
"loss": 0.0347,
"step": 920
},
{
"epoch": 0.13762995301343,
"grad_norm": 0.8185162544250488,
"learning_rate": 7.429027736048535e-05,
"loss": 0.0409,
"step": 930
},
{
"epoch": 0.13910984498131637,
"grad_norm": 0.9761926531791687,
"learning_rate": 7.37100718669326e-05,
"loss": 0.0422,
"step": 940
},
{
"epoch": 0.1405897369492027,
"grad_norm": 0.5239071249961853,
"learning_rate": 7.312571779733463e-05,
"loss": 0.0269,
"step": 950
},
{
"epoch": 0.14206962891708905,
"grad_norm": 0.7226734757423401,
"learning_rate": 7.253731739673349e-05,
"loss": 0.0366,
"step": 960
},
{
"epoch": 0.14354952088497538,
"grad_norm": 0.6310585141181946,
"learning_rate": 7.194497361816196e-05,
"loss": 0.0527,
"step": 970
},
{
"epoch": 0.14502941285286175,
"grad_norm": 0.5752474665641785,
"learning_rate": 7.134879010462988e-05,
"loss": 0.0312,
"step": 980
},
{
"epoch": 0.1465093048207481,
"grad_norm": 0.5619791150093079,
"learning_rate": 7.07488711709894e-05,
"loss": 0.0314,
"step": 990
},
{
"epoch": 0.14798919678863443,
"grad_norm": 0.6874368190765381,
"learning_rate": 7.014532178568314e-05,
"loss": 0.0294,
"step": 1000
},
{
"epoch": 0.14946908875652076,
"grad_norm": 0.6269967555999756,
"learning_rate": 6.953824755237756e-05,
"loss": 0.0357,
"step": 1010
},
{
"epoch": 0.15094898072440713,
"grad_norm": 0.7482361793518066,
"learning_rate": 6.892775469148553e-05,
"loss": 0.043,
"step": 1020
},
{
"epoch": 0.15242887269229347,
"grad_norm": 0.7447315454483032,
"learning_rate": 6.831395002158067e-05,
"loss": 0.0319,
"step": 1030
},
{
"epoch": 0.1539087646601798,
"grad_norm": 0.5281906127929688,
"learning_rate": 6.76969409407074e-05,
"loss": 0.0311,
"step": 1040
},
{
"epoch": 0.15538865662806614,
"grad_norm": 0.5784289240837097,
"learning_rate": 6.707683540758915e-05,
"loss": 0.0362,
"step": 1050
},
{
"epoch": 0.1568685485959525,
"grad_norm": 0.5917581915855408,
"learning_rate": 6.645374192273894e-05,
"loss": 0.0406,
"step": 1060
},
{
"epoch": 0.15834844056383884,
"grad_norm": 0.8599121570587158,
"learning_rate": 6.582776950947474e-05,
"loss": 0.0468,
"step": 1070
},
{
"epoch": 0.15982833253172518,
"grad_norm": 0.5944121479988098,
"learning_rate": 6.519902769484368e-05,
"loss": 0.0464,
"step": 1080
},
{
"epoch": 0.16130822449961152,
"grad_norm": 0.6295680999755859,
"learning_rate": 6.456762649045796e-05,
"loss": 0.0371,
"step": 1090
},
{
"epoch": 0.16278811646749788,
"grad_norm": 1.0905753374099731,
"learning_rate": 6.393367637324593e-05,
"loss": 0.0465,
"step": 1100
},
{
"epoch": 0.16426800843538422,
"grad_norm": 0.7028170228004456,
"learning_rate": 6.329728826612192e-05,
"loss": 0.0493,
"step": 1110
},
{
"epoch": 0.16574790040327056,
"grad_norm": 0.6126824617385864,
"learning_rate": 6.265857351857788e-05,
"loss": 0.0369,
"step": 1120
},
{
"epoch": 0.1672277923711569,
"grad_norm": 0.7399412393569946,
"learning_rate": 6.201764388720049e-05,
"loss": 0.0412,
"step": 1130
},
{
"epoch": 0.16870768433904326,
"grad_norm": 0.5751072764396667,
"learning_rate": 6.137461151611692e-05,
"loss": 0.0446,
"step": 1140
},
{
"epoch": 0.1701875763069296,
"grad_norm": 0.4830611050128937,
"learning_rate": 6.072958891737296e-05,
"loss": 0.0396,
"step": 1150
},
{
"epoch": 0.17166746827481594,
"grad_norm": 0.5445456504821777,
"learning_rate": 6.00826889512466e-05,
"loss": 0.0367,
"step": 1160
},
{
"epoch": 0.17314736024270228,
"grad_norm": 0.7560216188430786,
"learning_rate": 5.943402480650071e-05,
"loss": 0.0328,
"step": 1170
},
{
"epoch": 0.17462725221058864,
"grad_norm": 0.6100801229476929,
"learning_rate": 5.8783709980578414e-05,
"loss": 0.031,
"step": 1180
},
{
"epoch": 0.17610714417847498,
"grad_norm": 0.5339049696922302,
"learning_rate": 5.813185825974419e-05,
"loss": 0.0365,
"step": 1190
},
{
"epoch": 0.17758703614636132,
"grad_norm": 0.7650025486946106,
"learning_rate": 5.747858369917465e-05,
"loss": 0.034,
"step": 1200
},
{
"epoch": 0.17906692811424765,
"grad_norm": 0.6139253377914429,
"learning_rate": 5.682400060300213e-05,
"loss": 0.0399,
"step": 1210
},
{
"epoch": 0.180546820082134,
"grad_norm": 0.514707088470459,
"learning_rate": 5.6168223504314863e-05,
"loss": 0.0371,
"step": 1220
},
{
"epoch": 0.18202671205002036,
"grad_norm": 0.5504834651947021,
"learning_rate": 5.551136714511691e-05,
"loss": 0.0338,
"step": 1230
},
{
"epoch": 0.1835066040179067,
"grad_norm": 0.6514374017715454,
"learning_rate": 5.485354645625167e-05,
"loss": 0.0392,
"step": 1240
},
{
"epoch": 0.18498649598579303,
"grad_norm": 0.5081560015678406,
"learning_rate": 5.419487653729234e-05,
"loss": 0.0332,
"step": 1250
},
{
"epoch": 0.18646638795367937,
"grad_norm": 0.904439389705658,
"learning_rate": 5.353547263640273e-05,
"loss": 0.0382,
"step": 1260
},
{
"epoch": 0.18794627992156573,
"grad_norm": 0.5332797765731812,
"learning_rate": 5.2875450130172324e-05,
"loss": 0.0221,
"step": 1270
},
{
"epoch": 0.18942617188945207,
"grad_norm": 0.6516315937042236,
"learning_rate": 5.221492450342856e-05,
"loss": 0.0371,
"step": 1280
},
{
"epoch": 0.1909060638573384,
"grad_norm": 0.7484680414199829,
"learning_rate": 5.155401132903045e-05,
"loss": 0.0377,
"step": 1290
},
{
"epoch": 0.19238595582522475,
"grad_norm": 0.9511647820472717,
"learning_rate": 5.089282624764654e-05,
"loss": 0.0386,
"step": 1300
},
{
"epoch": 0.1938658477931111,
"grad_norm": 0.3802145719528198,
"learning_rate": 5.0231484947521336e-05,
"loss": 0.0324,
"step": 1310
},
{
"epoch": 0.19534573976099745,
"grad_norm": 0.5430207252502441,
"learning_rate": 4.9570103144233024e-05,
"loss": 0.0296,
"step": 1320
},
{
"epoch": 0.1968256317288838,
"grad_norm": 0.631252646446228,
"learning_rate": 4.890879656044669e-05,
"loss": 0.0241,
"step": 1330
},
{
"epoch": 0.19830552369677013,
"grad_norm": 0.4435235261917114,
"learning_rate": 4.824768090566618e-05,
"loss": 0.0285,
"step": 1340
},
{
"epoch": 0.1997854156646565,
"grad_norm": 0.7757057547569275,
"learning_rate": 4.7586871855988326e-05,
"loss": 0.0419,
"step": 1350
},
{
"epoch": 0.20126530763254283,
"grad_norm": 0.6761030554771423,
"learning_rate": 4.692648503386289e-05,
"loss": 0.0338,
"step": 1360
},
{
"epoch": 0.20274519960042917,
"grad_norm": 0.3639812767505646,
"learning_rate": 4.6266635987862086e-05,
"loss": 0.0244,
"step": 1370
},
{
"epoch": 0.2042250915683155,
"grad_norm": 0.3727477490901947,
"learning_rate": 4.560744017246284e-05,
"loss": 0.025,
"step": 1380
},
{
"epoch": 0.20570498353620187,
"grad_norm": 0.5406703948974609,
"learning_rate": 4.4949012927845676e-05,
"loss": 0.031,
"step": 1390
},
{
"epoch": 0.2071848755040882,
"grad_norm": 0.45045414566993713,
"learning_rate": 4.429146945971346e-05,
"loss": 0.0236,
"step": 1400
},
{
"epoch": 0.20866476747197454,
"grad_norm": 0.549584686756134,
"learning_rate": 4.3634924819133746e-05,
"loss": 0.0286,
"step": 1410
},
{
"epoch": 0.21014465943986088,
"grad_norm": 0.7233926653862,
"learning_rate": 4.297949388240823e-05,
"loss": 0.0297,
"step": 1420
},
{
"epoch": 0.21162455140774725,
"grad_norm": 0.5810157656669617,
"learning_rate": 4.2325291330972664e-05,
"loss": 0.0189,
"step": 1430
},
{
"epoch": 0.21310444337563358,
"grad_norm": 0.5259445905685425,
"learning_rate": 4.167243163133094e-05,
"loss": 0.0271,
"step": 1440
},
{
"epoch": 0.21458433534351992,
"grad_norm": 0.5740777254104614,
"learning_rate": 4.1021029015026736e-05,
"loss": 0.0367,
"step": 1450
},
{
"epoch": 0.21606422731140626,
"grad_norm": 0.6542416214942932,
"learning_rate": 4.037119745865641e-05,
"loss": 0.0323,
"step": 1460
},
{
"epoch": 0.21754411927929262,
"grad_norm": 0.5732387900352478,
"learning_rate": 3.972305066392626e-05,
"loss": 0.0295,
"step": 1470
},
{
"epoch": 0.21902401124717896,
"grad_norm": 0.8444030284881592,
"learning_rate": 3.9076702037758076e-05,
"loss": 0.027,
"step": 1480
},
{
"epoch": 0.2205039032150653,
"grad_norm": 0.36182349920272827,
"learning_rate": 3.8432264672446293e-05,
"loss": 0.0306,
"step": 1490
},
{
"epoch": 0.22198379518295164,
"grad_norm": 0.367953360080719,
"learning_rate": 3.778985132586995e-05,
"loss": 0.0258,
"step": 1500
},
{
"epoch": 0.22346368715083798,
"grad_norm": 0.43248283863067627,
"learning_rate": 3.714957440176345e-05,
"loss": 0.0237,
"step": 1510
},
{
"epoch": 0.22494357911872434,
"grad_norm": 0.5691545605659485,
"learning_rate": 3.651154593004911e-05,
"loss": 0.0257,
"step": 1520
},
{
"epoch": 0.22642347108661068,
"grad_norm": 0.4153839945793152,
"learning_rate": 3.587587754723523e-05,
"loss": 0.0256,
"step": 1530
},
{
"epoch": 0.22790336305449702,
"grad_norm": 0.4405042827129364,
"learning_rate": 3.5242680476882815e-05,
"loss": 0.0243,
"step": 1540
},
{
"epoch": 0.22938325502238335,
"grad_norm": 0.5467635989189148,
"learning_rate": 3.461206551014481e-05,
"loss": 0.0242,
"step": 1550
},
{
"epoch": 0.23086314699026972,
"grad_norm": 0.4557804763317108,
"learning_rate": 3.3984142986380764e-05,
"loss": 0.0338,
"step": 1560
},
{
"epoch": 0.23234303895815606,
"grad_norm": 0.39460909366607666,
"learning_rate": 3.335902277385067e-05,
"loss": 0.0205,
"step": 1570
},
{
"epoch": 0.2338229309260424,
"grad_norm": 0.5078433752059937,
"learning_rate": 3.2736814250491196e-05,
"loss": 0.0248,
"step": 1580
},
{
"epoch": 0.23530282289392873,
"grad_norm": 0.6240681409835815,
"learning_rate": 3.211762628477771e-05,
"loss": 0.0312,
"step": 1590
},
{
"epoch": 0.2367827148618151,
"grad_norm": 0.37351563572883606,
"learning_rate": 3.150156721667547e-05,
"loss": 0.0245,
"step": 1600
},
{
"epoch": 0.23826260682970143,
"grad_norm": 0.3536587953567505,
"learning_rate": 3.088874483868325e-05,
"loss": 0.021,
"step": 1610
},
{
"epoch": 0.23974249879758777,
"grad_norm": 0.30240142345428467,
"learning_rate": 3.0279266376972715e-05,
"loss": 0.025,
"step": 1620
},
{
"epoch": 0.2412223907654741,
"grad_norm": 0.4821987450122833,
"learning_rate": 2.96732384726271e-05,
"loss": 0.0427,
"step": 1630
},
{
"epoch": 0.24270228273336047,
"grad_norm": 0.6150904297828674,
"learning_rate": 2.907076716298196e-05,
"loss": 0.0297,
"step": 1640
},
{
"epoch": 0.2441821747012468,
"grad_norm": 0.49226638674736023,
"learning_rate": 2.847195786307174e-05,
"loss": 0.0267,
"step": 1650
},
{
"epoch": 0.24566206666913315,
"grad_norm": 0.49682047963142395,
"learning_rate": 2.7876915347185227e-05,
"loss": 0.0326,
"step": 1660
},
{
"epoch": 0.2471419586370195,
"grad_norm": 0.42303502559661865,
"learning_rate": 2.7285743730533143e-05,
"loss": 0.0212,
"step": 1670
},
{
"epoch": 0.24862185060490585,
"grad_norm": 0.5483497977256775,
"learning_rate": 2.6698546451030826e-05,
"loss": 0.0202,
"step": 1680
},
{
"epoch": 0.25010174257279216,
"grad_norm": 0.5056328177452087,
"learning_rate": 2.611542625119975e-05,
"loss": 0.0228,
"step": 1690
},
{
"epoch": 0.25158163454067856,
"grad_norm": 0.32099300622940063,
"learning_rate": 2.5536485160190482e-05,
"loss": 0.0224,
"step": 1700
},
{
"epoch": 0.2530615265085649,
"grad_norm": 0.5951581597328186,
"learning_rate": 2.496182447593055e-05,
"loss": 0.0272,
"step": 1710
},
{
"epoch": 0.25454141847645123,
"grad_norm": 0.45417091250419617,
"learning_rate": 2.4391544747400252e-05,
"loss": 0.0227,
"step": 1720
},
{
"epoch": 0.25602131044433757,
"grad_norm": 0.5122156739234924,
"learning_rate": 2.3825745757039452e-05,
"loss": 0.024,
"step": 1730
},
{
"epoch": 0.2575012024122239,
"grad_norm": 0.4841585159301758,
"learning_rate": 2.3264526503288642e-05,
"loss": 0.0225,
"step": 1740
},
{
"epoch": 0.25898109438011024,
"grad_norm": 0.43821659684181213,
"learning_rate": 2.2707985183266978e-05,
"loss": 0.0215,
"step": 1750
},
{
"epoch": 0.2604609863479966,
"grad_norm": 0.42495104670524597,
"learning_rate": 2.215621917559062e-05,
"loss": 0.0213,
"step": 1760
},
{
"epoch": 0.2619408783158829,
"grad_norm": 0.4166795015335083,
"learning_rate": 2.1609325023334377e-05,
"loss": 0.018,
"step": 1770
},
{
"epoch": 0.2634207702837693,
"grad_norm": 0.29304540157318115,
"learning_rate": 2.1067398417139466e-05,
"loss": 0.029,
"step": 1780
},
{
"epoch": 0.26490066225165565,
"grad_norm": 0.6335829496383667,
"learning_rate": 2.0530534178470322e-05,
"loss": 0.021,
"step": 1790
},
{
"epoch": 0.266380554219542,
"grad_norm": 0.35307204723358154,
"learning_rate": 1.9998826243023666e-05,
"loss": 0.021,
"step": 1800
},
{
"epoch": 0.2678604461874283,
"grad_norm": 0.4115915894508362,
"learning_rate": 1.9472367644292457e-05,
"loss": 0.0212,
"step": 1810
},
{
"epoch": 0.26934033815531466,
"grad_norm": 0.5536908507347107,
"learning_rate": 1.8951250497287716e-05,
"loss": 0.0291,
"step": 1820
},
{
"epoch": 0.270820230123201,
"grad_norm": 0.6118968725204468,
"learning_rate": 1.843556598242109e-05,
"loss": 0.0351,
"step": 1830
},
{
"epoch": 0.27230012209108734,
"grad_norm": 0.5278725624084473,
"learning_rate": 1.792540432955087e-05,
"loss": 0.0243,
"step": 1840
},
{
"epoch": 0.2737800140589737,
"grad_norm": 0.365327388048172,
"learning_rate": 1.742085480219449e-05,
"loss": 0.0303,
"step": 1850
},
{
"epoch": 0.27525990602686,
"grad_norm": 0.5491426587104797,
"learning_rate": 1.6922005681909843e-05,
"loss": 0.0321,
"step": 1860
},
{
"epoch": 0.2767397979947464,
"grad_norm": 0.5193561315536499,
"learning_rate": 1.642894425284867e-05,
"loss": 0.0207,
"step": 1870
},
{
"epoch": 0.27821968996263274,
"grad_norm": 0.39915916323661804,
"learning_rate": 1.5941756786484335e-05,
"loss": 0.0197,
"step": 1880
},
{
"epoch": 0.2796995819305191,
"grad_norm": 0.4479086101055145,
"learning_rate": 1.5460528526516804e-05,
"loss": 0.0233,
"step": 1890
},
{
"epoch": 0.2811794738984054,
"grad_norm": 0.3311406373977661,
"learning_rate": 1.498534367395748e-05,
"loss": 0.0202,
"step": 1900
},
{
"epoch": 0.28265936586629176,
"grad_norm": 0.2992391586303711,
"learning_rate": 1.4516285372396437e-05,
"loss": 0.0264,
"step": 1910
},
{
"epoch": 0.2841392578341781,
"grad_norm": 0.480135440826416,
"learning_rate": 1.4053435693454775e-05,
"loss": 0.0221,
"step": 1920
},
{
"epoch": 0.28561914980206443,
"grad_norm": 0.27029678225517273,
"learning_rate": 1.359687562242437e-05,
"loss": 0.0153,
"step": 1930
},
{
"epoch": 0.28709904176995077,
"grad_norm": 0.5432844758033752,
"learning_rate": 1.314668504409779e-05,
"loss": 0.0207,
"step": 1940
},
{
"epoch": 0.28857893373783716,
"grad_norm": 0.4192567765712738,
"learning_rate": 1.2702942728790895e-05,
"loss": 0.0168,
"step": 1950
},
{
"epoch": 0.2900588257057235,
"grad_norm": 0.5646419525146484,
"learning_rate": 1.2265726318560172e-05,
"loss": 0.0269,
"step": 1960
},
{
"epoch": 0.29153871767360984,
"grad_norm": 0.47364342212677,
"learning_rate": 1.1835112313617697e-05,
"loss": 0.0158,
"step": 1970
},
{
"epoch": 0.2930186096414962,
"grad_norm": 0.3461420238018036,
"learning_rate": 1.1411176058945771e-05,
"loss": 0.0212,
"step": 1980
},
{
"epoch": 0.2944985016093825,
"grad_norm": 0.34054312109947205,
"learning_rate": 1.0993991731113817e-05,
"loss": 0.0143,
"step": 1990
},
{
"epoch": 0.29597839357726885,
"grad_norm": 0.48517104983329773,
"learning_rate": 1.058363232529948e-05,
"loss": 0.0256,
"step": 2000
},
{
"epoch": 0.2974582855451552,
"grad_norm": 0.4162018299102783,
"learning_rate": 1.0180169642516718e-05,
"loss": 0.033,
"step": 2010
},
{
"epoch": 0.2989381775130415,
"grad_norm": 0.4353783130645752,
"learning_rate": 9.783674277052667e-06,
"loss": 0.018,
"step": 2020
},
{
"epoch": 0.3004180694809279,
"grad_norm": 0.4821432828903198,
"learning_rate": 9.394215604115641e-06,
"loss": 0.0136,
"step": 2030
},
{
"epoch": 0.30189796144881426,
"grad_norm": 0.3058512806892395,
"learning_rate": 9.011861767696522e-06,
"loss": 0.0217,
"step": 2040
},
{
"epoch": 0.3033778534167006,
"grad_norm": 0.4392576813697815,
"learning_rate": 8.636679668645536e-06,
"loss": 0.0182,
"step": 2050
},
{
"epoch": 0.30485774538458693,
"grad_norm": 0.3896658718585968,
"learning_rate": 8.268734952966505e-06,
"loss": 0.0189,
"step": 2060
},
{
"epoch": 0.30633763735247327,
"grad_norm": 0.4406259059906006,
"learning_rate": 7.908092000330747e-06,
"loss": 0.0145,
"step": 2070
},
{
"epoch": 0.3078175293203596,
"grad_norm": 0.357083261013031,
"learning_rate": 7.5548139128124364e-06,
"loss": 0.0177,
"step": 2080
},
{
"epoch": 0.30929742128824594,
"grad_norm": 0.2061689794063568,
"learning_rate": 7.2089625038476606e-06,
"loss": 0.0173,
"step": 2090
},
{
"epoch": 0.3107773132561323,
"grad_norm": 0.4433155953884125,
"learning_rate": 6.87059828741875e-06,
"loss": 0.0188,
"step": 2100
},
{
"epoch": 0.3122572052240186,
"grad_norm": 0.31645211577415466,
"learning_rate": 6.539780467466172e-06,
"loss": 0.0322,
"step": 2110
},
{
"epoch": 0.313737097191905,
"grad_norm": 0.3037683367729187,
"learning_rate": 6.216566927529455e-06,
"loss": 0.0177,
"step": 2120
},
{
"epoch": 0.31521698915979135,
"grad_norm": 0.2664813995361328,
"learning_rate": 5.9010142206194e-06,
"loss": 0.018,
"step": 2130
},
{
"epoch": 0.3166968811276777,
"grad_norm": 0.22976835072040558,
"learning_rate": 5.593177559322777e-06,
"loss": 0.0145,
"step": 2140
},
{
"epoch": 0.318176773095564,
"grad_norm": 0.3369593024253845,
"learning_rate": 5.293110806141832e-06,
"loss": 0.0218,
"step": 2150
},
{
"epoch": 0.31965666506345036,
"grad_norm": 0.3195848762989044,
"learning_rate": 5.000866464069842e-06,
"loss": 0.0194,
"step": 2160
},
{
"epoch": 0.3211365570313367,
"grad_norm": 0.19008630514144897,
"learning_rate": 4.716495667404691e-06,
"loss": 0.0174,
"step": 2170
},
{
"epoch": 0.32261644899922304,
"grad_norm": 0.42658621072769165,
"learning_rate": 4.440048172801725e-06,
"loss": 0.0247,
"step": 2180
},
{
"epoch": 0.3240963409671094,
"grad_norm": 0.6167186498641968,
"learning_rate": 4.171572350567898e-06,
"loss": 0.0193,
"step": 2190
},
{
"epoch": 0.32557623293499577,
"grad_norm": 0.30169373750686646,
"learning_rate": 3.9111151761983265e-06,
"loss": 0.017,
"step": 2200
},
{
"epoch": 0.3270561249028821,
"grad_norm": 0.3697860836982727,
"learning_rate": 3.6587222221569075e-06,
"loss": 0.0149,
"step": 2210
},
{
"epoch": 0.32853601687076844,
"grad_norm": 0.43350812792778015,
"learning_rate": 3.414437649902491e-06,
"loss": 0.0142,
"step": 2220
},
{
"epoch": 0.3300159088386548,
"grad_norm": 0.3299703896045685,
"learning_rate": 3.1783042021619026e-06,
"loss": 0.0161,
"step": 2230
},
{
"epoch": 0.3314958008065411,
"grad_norm": 0.5686686038970947,
"learning_rate": 2.9503631954511833e-06,
"loss": 0.0204,
"step": 2240
},
{
"epoch": 0.33297569277442746,
"grad_norm": 0.28825682401657104,
"learning_rate": 2.7306545128464202e-06,
"loss": 0.0225,
"step": 2250
},
{
"epoch": 0.3344555847423138,
"grad_norm": 0.248334601521492,
"learning_rate": 2.5192165970053307e-06,
"loss": 0.0122,
"step": 2260
},
{
"epoch": 0.33593547671020013,
"grad_norm": 0.4341728985309601,
"learning_rate": 2.316086443440962e-06,
"loss": 0.0164,
"step": 2270
},
{
"epoch": 0.3374153686780865,
"grad_norm": 0.23554910719394684,
"learning_rate": 2.1212995940485036e-06,
"loss": 0.0171,
"step": 2280
},
{
"epoch": 0.33889526064597286,
"grad_norm": 0.4276198744773865,
"learning_rate": 1.9348901308864796e-06,
"loss": 0.0246,
"step": 2290
},
{
"epoch": 0.3403751526138592,
"grad_norm": 0.35541170835494995,
"learning_rate": 1.7568906702134124e-06,
"loss": 0.0112,
"step": 2300
},
{
"epoch": 0.34185504458174554,
"grad_norm": 0.4070402681827545,
"learning_rate": 1.5873323567808963e-06,
"loss": 0.0202,
"step": 2310
},
{
"epoch": 0.3433349365496319,
"grad_norm": 0.16610288619995117,
"learning_rate": 1.4262448583841793e-06,
"loss": 0.0129,
"step": 2320
},
{
"epoch": 0.3448148285175182,
"grad_norm": 0.32411548495292664,
"learning_rate": 1.2736563606711382e-06,
"loss": 0.0173,
"step": 2330
},
{
"epoch": 0.34629472048540455,
"grad_norm": 0.3122292160987854,
"learning_rate": 1.1295935622106513e-06,
"loss": 0.0183,
"step": 2340
},
{
"epoch": 0.3477746124532909,
"grad_norm": 0.3919500708580017,
"learning_rate": 9.94081669821062e-07,
"loss": 0.0223,
"step": 2350
},
{
"epoch": 0.3492545044211773,
"grad_norm": 0.3418029546737671,
"learning_rate": 8.671443941597523e-07,
"loss": 0.0172,
"step": 2360
},
{
"epoch": 0.3507343963890636,
"grad_norm": 0.28939440846443176,
"learning_rate": 7.488039455744611e-07,
"loss": 0.0146,
"step": 2370
},
{
"epoch": 0.35221428835694996,
"grad_norm": 0.407928466796875,
"learning_rate": 6.390810302171146e-07,
"loss": 0.0244,
"step": 2380
},
{
"epoch": 0.3536941803248363,
"grad_norm": 0.32271912693977356,
"learning_rate": 5.379948464208418e-07,
"loss": 0.0214,
"step": 2390
},
{
"epoch": 0.35517407229272263,
"grad_norm": 0.4125051498413086,
"learning_rate": 4.455630813408329e-07,
"loss": 0.0166,
"step": 2400
},
{
"epoch": 0.35665396426060897,
"grad_norm": 0.24020278453826904,
"learning_rate": 3.61801907859588e-07,
"loss": 0.0174,
"step": 2410
},
{
"epoch": 0.3581338562284953,
"grad_norm": 0.2490530014038086,
"learning_rate": 2.867259817571355e-07,
"loss": 0.0224,
"step": 2420
},
{
"epoch": 0.35961374819638164,
"grad_norm": 0.1842329204082489,
"learning_rate": 2.2034843914670588e-07,
"loss": 0.0163,
"step": 2430
},
{
"epoch": 0.361093640164268,
"grad_norm": 0.4745646119117737,
"learning_rate": 1.626808941762703e-07,
"loss": 0.0207,
"step": 2440
},
{
"epoch": 0.3625735321321544,
"grad_norm": 0.4176884889602661,
"learning_rate": 1.1373343699642158e-07,
"loss": 0.0159,
"step": 2450
},
{
"epoch": 0.3640534241000407,
"grad_norm": 0.280038982629776,
"learning_rate": 7.351463199488651e-08,
"loss": 0.017,
"step": 2460
},
{
"epoch": 0.36553331606792705,
"grad_norm": 0.4648924767971039,
"learning_rate": 4.203151629798563e-08,
"loss": 0.0196,
"step": 2470
},
{
"epoch": 0.3670132080358134,
"grad_norm": 0.42389336228370667,
"learning_rate": 1.928959853936263e-08,
"loss": 0.0275,
"step": 2480
},
{
"epoch": 0.3684931000036997,
"grad_norm": 0.5853281021118164,
"learning_rate": 5.292857896133097e-09,
"loss": 0.0182,
"step": 2490
},
{
"epoch": 0.36997299197158606,
"grad_norm": 0.2962166368961334,
"learning_rate": 4.374339263035765e-11,
"loss": 0.014,
"step": 2500
}
],
"logging_steps": 10,
"max_steps": 2500,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}