{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9996979764421625, "eval_steps": 500, "global_step": 1655, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.006040471156750227, "grad_norm": 6.352119345594041, "learning_rate": 4.9999999999999996e-05, "loss": 5.7625, "step": 10 }, { "epoch": 0.012080942313500454, "grad_norm": 3.5107589040708578, "learning_rate": 6.505149978319905e-05, "loss": 3.1477, "step": 20 }, { "epoch": 0.018121413470250678, "grad_norm": 1.7597502982401718, "learning_rate": 7.385606273598311e-05, "loss": 1.8234, "step": 30 }, { "epoch": 0.024161884627000908, "grad_norm": 2.0430582664699704, "learning_rate": 8.01029995663981e-05, "loss": 1.482, "step": 40 }, { "epoch": 0.030202355783751134, "grad_norm": 2.30138022647804, "learning_rate": 8.494850021680092e-05, "loss": 1.427, "step": 50 }, { "epoch": 0.036242826940501356, "grad_norm": 1.961300808555845, "learning_rate": 8.890756251918216e-05, "loss": 1.3652, "step": 60 }, { "epoch": 0.042283298097251586, "grad_norm": 1.5670057056368645, "learning_rate": 9.225490200071284e-05, "loss": 1.2859, "step": 70 }, { "epoch": 0.048323769254001815, "grad_norm": 1.5648604780816326, "learning_rate": 9.515449934959716e-05, "loss": 1.2166, "step": 80 }, { "epoch": 0.05436424041075204, "grad_norm": 1.5594489363452173, "learning_rate": 9.771212547196623e-05, "loss": 1.2299, "step": 90 }, { "epoch": 0.06040471156750227, "grad_norm": 1.478498798125124, "learning_rate": 9.999999999999999e-05, "loss": 1.1406, "step": 100 }, { "epoch": 0.0664451827242525, "grad_norm": 1.3456415286113537, "learning_rate": 9.942122186495178e-05, "loss": 1.1512, "step": 110 }, { "epoch": 0.07248565388100271, "grad_norm": 1.4169868695799486, "learning_rate": 9.877813504823152e-05, "loss": 1.2473, "step": 120 }, { "epoch": 0.07852612503775294, "grad_norm": 1.5442406663559958, "learning_rate": 9.813504823151127e-05, "loss": 1.1609, "step": 130 }, { "epoch": 0.08456659619450317, "grad_norm": 1.3840740480619829, "learning_rate": 9.7491961414791e-05, "loss": 1.1617, "step": 140 }, { "epoch": 0.0906070673512534, "grad_norm": 1.1099163069476086, "learning_rate": 9.684887459807074e-05, "loss": 1.1746, "step": 150 }, { "epoch": 0.09664753850800363, "grad_norm": 1.2420434112131873, "learning_rate": 9.620578778135049e-05, "loss": 1.1504, "step": 160 }, { "epoch": 0.10268800966475385, "grad_norm": 1.081321720135299, "learning_rate": 9.556270096463023e-05, "loss": 1.1605, "step": 170 }, { "epoch": 0.10872848082150408, "grad_norm": 1.0982034537770153, "learning_rate": 9.491961414790998e-05, "loss": 1.0799, "step": 180 }, { "epoch": 0.1147689519782543, "grad_norm": 1.1736811633063773, "learning_rate": 9.427652733118972e-05, "loss": 1.1242, "step": 190 }, { "epoch": 0.12080942313500453, "grad_norm": 1.3595797342301437, "learning_rate": 9.363344051446946e-05, "loss": 1.1016, "step": 200 }, { "epoch": 0.12684989429175475, "grad_norm": 1.2621841618934533, "learning_rate": 9.29903536977492e-05, "loss": 1.0695, "step": 210 }, { "epoch": 0.132890365448505, "grad_norm": 1.2070699106064489, "learning_rate": 9.234726688102894e-05, "loss": 1.1049, "step": 220 }, { "epoch": 0.1389308366052552, "grad_norm": 1.0416383062176056, "learning_rate": 9.17041800643087e-05, "loss": 1.1467, "step": 230 }, { "epoch": 0.14497130776200542, "grad_norm": 1.1856097509180394, "learning_rate": 9.106109324758843e-05, "loss": 1.0746, "step": 240 }, { "epoch": 0.15101177891875567, "grad_norm": 1.144246874115688, "learning_rate": 9.041800643086817e-05, "loss": 1.2018, "step": 250 }, { "epoch": 0.15705225007550588, "grad_norm": 0.9353587652819314, "learning_rate": 8.977491961414792e-05, "loss": 1.0789, "step": 260 }, { "epoch": 0.16309272123225613, "grad_norm": 1.0285527563460544, "learning_rate": 8.913183279742766e-05, "loss": 1.0555, "step": 270 }, { "epoch": 0.16913319238900634, "grad_norm": 0.9852353032151006, "learning_rate": 8.848874598070739e-05, "loss": 1.1451, "step": 280 }, { "epoch": 0.17517366354575656, "grad_norm": 1.175932073617665, "learning_rate": 8.784565916398714e-05, "loss": 1.0789, "step": 290 }, { "epoch": 0.1812141347025068, "grad_norm": 1.213820312892746, "learning_rate": 8.720257234726688e-05, "loss": 1.0475, "step": 300 }, { "epoch": 0.18725460585925702, "grad_norm": 1.1846528600649535, "learning_rate": 8.655948553054663e-05, "loss": 1.0521, "step": 310 }, { "epoch": 0.19329507701600726, "grad_norm": 1.1497197430459096, "learning_rate": 8.591639871382637e-05, "loss": 1.082, "step": 320 }, { "epoch": 0.19933554817275748, "grad_norm": 1.1612797319381625, "learning_rate": 8.52733118971061e-05, "loss": 1.0793, "step": 330 }, { "epoch": 0.2053760193295077, "grad_norm": 1.082739707844854, "learning_rate": 8.463022508038586e-05, "loss": 1.1061, "step": 340 }, { "epoch": 0.21141649048625794, "grad_norm": 1.2757899011878036, "learning_rate": 8.398713826366559e-05, "loss": 1.073, "step": 350 }, { "epoch": 0.21745696164300815, "grad_norm": 0.9540956853203417, "learning_rate": 8.334405144694534e-05, "loss": 1.0135, "step": 360 }, { "epoch": 0.2234974327997584, "grad_norm": 1.0200709241255603, "learning_rate": 8.270096463022508e-05, "loss": 1.1389, "step": 370 }, { "epoch": 0.2295379039565086, "grad_norm": 0.8455317026975807, "learning_rate": 8.205787781350482e-05, "loss": 1.0598, "step": 380 }, { "epoch": 0.23557837511325883, "grad_norm": 1.1381349305391164, "learning_rate": 8.141479099678457e-05, "loss": 1.0777, "step": 390 }, { "epoch": 0.24161884627000907, "grad_norm": 1.103747383663609, "learning_rate": 8.07717041800643e-05, "loss": 1.0617, "step": 400 }, { "epoch": 0.24765931742675928, "grad_norm": 1.138904626814912, "learning_rate": 8.012861736334406e-05, "loss": 1.0324, "step": 410 }, { "epoch": 0.2536997885835095, "grad_norm": 1.0781428262426909, "learning_rate": 7.94855305466238e-05, "loss": 1.0859, "step": 420 }, { "epoch": 0.2597402597402597, "grad_norm": 1.0846995831727089, "learning_rate": 7.884244372990353e-05, "loss": 1.0553, "step": 430 }, { "epoch": 0.26578073089701, "grad_norm": 0.8641072003497396, "learning_rate": 7.819935691318328e-05, "loss": 1.0068, "step": 440 }, { "epoch": 0.2718212020537602, "grad_norm": 1.110226877216393, "learning_rate": 7.755627009646302e-05, "loss": 1.0115, "step": 450 }, { "epoch": 0.2778616732105104, "grad_norm": 1.1418636977442045, "learning_rate": 7.691318327974277e-05, "loss": 1.0373, "step": 460 }, { "epoch": 0.28390214436726063, "grad_norm": 0.9476740742119087, "learning_rate": 7.62700964630225e-05, "loss": 1.0023, "step": 470 }, { "epoch": 0.28994261552401085, "grad_norm": 1.1736350313366837, "learning_rate": 7.562700964630224e-05, "loss": 1.0338, "step": 480 }, { "epoch": 0.2959830866807611, "grad_norm": 1.113923619981689, "learning_rate": 7.4983922829582e-05, "loss": 0.9855, "step": 490 }, { "epoch": 0.30202355783751134, "grad_norm": 1.0389339180007815, "learning_rate": 7.434083601286173e-05, "loss": 1.0525, "step": 500 }, { "epoch": 0.30806402899426155, "grad_norm": 1.161270624803982, "learning_rate": 7.369774919614148e-05, "loss": 1.0697, "step": 510 }, { "epoch": 0.31410450015101177, "grad_norm": 0.8892068789215148, "learning_rate": 7.305466237942122e-05, "loss": 0.9889, "step": 520 }, { "epoch": 0.320144971307762, "grad_norm": 1.0455859065719944, "learning_rate": 7.241157556270097e-05, "loss": 0.9898, "step": 530 }, { "epoch": 0.32618544246451225, "grad_norm": 1.0186894188406512, "learning_rate": 7.17684887459807e-05, "loss": 0.9904, "step": 540 }, { "epoch": 0.33222591362126247, "grad_norm": 1.0072092839183986, "learning_rate": 7.112540192926044e-05, "loss": 0.9873, "step": 550 }, { "epoch": 0.3382663847780127, "grad_norm": 0.9663462582485958, "learning_rate": 7.04823151125402e-05, "loss": 1.0375, "step": 560 }, { "epoch": 0.3443068559347629, "grad_norm": 0.9103712505082897, "learning_rate": 6.983922829581993e-05, "loss": 1.0383, "step": 570 }, { "epoch": 0.3503473270915131, "grad_norm": 1.0485963233672557, "learning_rate": 6.919614147909968e-05, "loss": 0.9758, "step": 580 }, { "epoch": 0.3563877982482634, "grad_norm": 0.9912979171159205, "learning_rate": 6.855305466237942e-05, "loss": 0.993, "step": 590 }, { "epoch": 0.3624282694050136, "grad_norm": 1.1550179596081722, "learning_rate": 6.790996784565917e-05, "loss": 1.032, "step": 600 }, { "epoch": 0.3684687405617638, "grad_norm": 1.0176105219446732, "learning_rate": 6.726688102893891e-05, "loss": 1.0094, "step": 610 }, { "epoch": 0.37450921171851403, "grad_norm": 1.1210756115920897, "learning_rate": 6.662379421221864e-05, "loss": 0.968, "step": 620 }, { "epoch": 0.38054968287526425, "grad_norm": 0.9310657150450071, "learning_rate": 6.59807073954984e-05, "loss": 1.0102, "step": 630 }, { "epoch": 0.3865901540320145, "grad_norm": 1.1633016127800648, "learning_rate": 6.533762057877813e-05, "loss": 1.0256, "step": 640 }, { "epoch": 0.39263062518876474, "grad_norm": 0.8195008101746646, "learning_rate": 6.469453376205788e-05, "loss": 0.9906, "step": 650 }, { "epoch": 0.39867109634551495, "grad_norm": 0.9310761172857367, "learning_rate": 6.405144694533762e-05, "loss": 0.9754, "step": 660 }, { "epoch": 0.40471156750226517, "grad_norm": 1.0516630367824455, "learning_rate": 6.340836012861736e-05, "loss": 1.0275, "step": 670 }, { "epoch": 0.4107520386590154, "grad_norm": 1.0629036969985795, "learning_rate": 6.276527331189711e-05, "loss": 1.0088, "step": 680 }, { "epoch": 0.41679250981576565, "grad_norm": 1.0110994904370134, "learning_rate": 6.212218649517684e-05, "loss": 1.0416, "step": 690 }, { "epoch": 0.42283298097251587, "grad_norm": 0.8895584335650736, "learning_rate": 6.14790996784566e-05, "loss": 1.0762, "step": 700 }, { "epoch": 0.4288734521292661, "grad_norm": 1.0448634604827216, "learning_rate": 6.083601286173633e-05, "loss": 0.9771, "step": 710 }, { "epoch": 0.4349139232860163, "grad_norm": 0.7905625978703504, "learning_rate": 6.019292604501608e-05, "loss": 1.0104, "step": 720 }, { "epoch": 0.4409543944427665, "grad_norm": 1.0522004404580236, "learning_rate": 5.954983922829582e-05, "loss": 0.9977, "step": 730 }, { "epoch": 0.4469948655995168, "grad_norm": 0.989475716298655, "learning_rate": 5.8906752411575564e-05, "loss": 0.9752, "step": 740 }, { "epoch": 0.453035336756267, "grad_norm": 1.1463565668787514, "learning_rate": 5.826366559485531e-05, "loss": 0.9922, "step": 750 }, { "epoch": 0.4590758079130172, "grad_norm": 1.189865689753586, "learning_rate": 5.7620578778135045e-05, "loss": 1.0211, "step": 760 }, { "epoch": 0.46511627906976744, "grad_norm": 1.0920686333427474, "learning_rate": 5.6977491961414796e-05, "loss": 0.9664, "step": 770 }, { "epoch": 0.47115675022651765, "grad_norm": 1.0174913677757231, "learning_rate": 5.633440514469453e-05, "loss": 0.9992, "step": 780 }, { "epoch": 0.4771972213832679, "grad_norm": 1.1913080680366728, "learning_rate": 5.5691318327974284e-05, "loss": 1.033, "step": 790 }, { "epoch": 0.48323769254001814, "grad_norm": 1.0204323369307062, "learning_rate": 5.504823151125402e-05, "loss": 0.9807, "step": 800 }, { "epoch": 0.48927816369676835, "grad_norm": 1.118864062431584, "learning_rate": 5.440514469453376e-05, "loss": 0.974, "step": 810 }, { "epoch": 0.49531863485351857, "grad_norm": 1.0350012597705058, "learning_rate": 5.376205787781351e-05, "loss": 0.9705, "step": 820 }, { "epoch": 0.5013591060102688, "grad_norm": 1.0845595520731828, "learning_rate": 5.3118971061093246e-05, "loss": 0.9582, "step": 830 }, { "epoch": 0.507399577167019, "grad_norm": 0.9453749574005488, "learning_rate": 5.2475884244372996e-05, "loss": 1.0293, "step": 840 }, { "epoch": 0.5134400483237692, "grad_norm": 1.042699724920899, "learning_rate": 5.183279742765273e-05, "loss": 0.952, "step": 850 }, { "epoch": 0.5194805194805194, "grad_norm": 1.024020216858902, "learning_rate": 5.118971061093247e-05, "loss": 1.018, "step": 860 }, { "epoch": 0.5255209906372698, "grad_norm": 1.0360224854752502, "learning_rate": 5.054662379421222e-05, "loss": 0.9295, "step": 870 }, { "epoch": 0.53156146179402, "grad_norm": 1.1683508369643563, "learning_rate": 4.9903536977491965e-05, "loss": 1.0133, "step": 880 }, { "epoch": 0.5376019329507702, "grad_norm": 0.8614966755997071, "learning_rate": 4.92604501607717e-05, "loss": 0.993, "step": 890 }, { "epoch": 0.5436424041075204, "grad_norm": 0.9857361206695465, "learning_rate": 4.8617363344051446e-05, "loss": 0.951, "step": 900 }, { "epoch": 0.5496828752642706, "grad_norm": 0.8956078547131043, "learning_rate": 4.797427652733119e-05, "loss": 0.9268, "step": 910 }, { "epoch": 0.5557233464210208, "grad_norm": 0.8885268581836658, "learning_rate": 4.7331189710610934e-05, "loss": 0.9688, "step": 920 }, { "epoch": 0.561763817577771, "grad_norm": 1.136712299735102, "learning_rate": 4.668810289389068e-05, "loss": 0.9545, "step": 930 }, { "epoch": 0.5678042887345213, "grad_norm": 1.114040839440962, "learning_rate": 4.604501607717042e-05, "loss": 1.0, "step": 940 }, { "epoch": 0.5738447598912715, "grad_norm": 1.2120423884348053, "learning_rate": 4.5401929260450165e-05, "loss": 0.9525, "step": 950 }, { "epoch": 0.5798852310480217, "grad_norm": 0.9841344976466476, "learning_rate": 4.47588424437299e-05, "loss": 1.0389, "step": 960 }, { "epoch": 0.585925702204772, "grad_norm": 1.0459005071658793, "learning_rate": 4.4115755627009646e-05, "loss": 1.0041, "step": 970 }, { "epoch": 0.5919661733615222, "grad_norm": 1.2096290639870764, "learning_rate": 4.347266881028939e-05, "loss": 0.9939, "step": 980 }, { "epoch": 0.5980066445182725, "grad_norm": 1.0171142436328486, "learning_rate": 4.2829581993569134e-05, "loss": 0.9432, "step": 990 }, { "epoch": 0.6040471156750227, "grad_norm": 1.0557726010616861, "learning_rate": 4.218649517684888e-05, "loss": 1.0041, "step": 1000 }, { "epoch": 0.6100875868317729, "grad_norm": 0.9854936325370102, "learning_rate": 4.154340836012862e-05, "loss": 0.9598, "step": 1010 }, { "epoch": 0.6161280579885231, "grad_norm": 1.0002881708013533, "learning_rate": 4.090032154340836e-05, "loss": 0.9537, "step": 1020 }, { "epoch": 0.6221685291452733, "grad_norm": 1.0163254545569222, "learning_rate": 4.02572347266881e-05, "loss": 0.9781, "step": 1030 }, { "epoch": 0.6282090003020235, "grad_norm": 1.0700856083800017, "learning_rate": 3.961414790996785e-05, "loss": 0.9418, "step": 1040 }, { "epoch": 0.6342494714587738, "grad_norm": 0.925802326406659, "learning_rate": 3.897106109324759e-05, "loss": 0.9355, "step": 1050 }, { "epoch": 0.640289942615524, "grad_norm": 1.0173964783122673, "learning_rate": 3.8327974276527335e-05, "loss": 0.9311, "step": 1060 }, { "epoch": 0.6463304137722743, "grad_norm": 1.0886760638416628, "learning_rate": 3.768488745980708e-05, "loss": 1.0104, "step": 1070 }, { "epoch": 0.6523708849290245, "grad_norm": 0.8784480294988184, "learning_rate": 3.7041800643086816e-05, "loss": 0.9631, "step": 1080 }, { "epoch": 0.6584113560857747, "grad_norm": 1.0061477546516637, "learning_rate": 3.639871382636656e-05, "loss": 0.9209, "step": 1090 }, { "epoch": 0.6644518272425249, "grad_norm": 1.0443361002098694, "learning_rate": 3.5755627009646303e-05, "loss": 0.9494, "step": 1100 }, { "epoch": 0.6704922983992752, "grad_norm": 1.064188874483296, "learning_rate": 3.511254019292605e-05, "loss": 0.9871, "step": 1110 }, { "epoch": 0.6765327695560254, "grad_norm": 0.9804777210337282, "learning_rate": 3.446945337620579e-05, "loss": 0.9357, "step": 1120 }, { "epoch": 0.6825732407127756, "grad_norm": 1.0053083581127769, "learning_rate": 3.3826366559485535e-05, "loss": 0.9637, "step": 1130 }, { "epoch": 0.6886137118695258, "grad_norm": 1.0062328413826747, "learning_rate": 3.318327974276528e-05, "loss": 0.9914, "step": 1140 }, { "epoch": 0.694654183026276, "grad_norm": 1.0659942748249296, "learning_rate": 3.2540192926045016e-05, "loss": 0.9332, "step": 1150 }, { "epoch": 0.7006946541830262, "grad_norm": 1.2425227959394274, "learning_rate": 3.189710610932476e-05, "loss": 0.9951, "step": 1160 }, { "epoch": 0.7067351253397764, "grad_norm": 1.0269910163244402, "learning_rate": 3.1254019292604504e-05, "loss": 0.9805, "step": 1170 }, { "epoch": 0.7127755964965268, "grad_norm": 1.067485855484705, "learning_rate": 3.061093247588425e-05, "loss": 0.975, "step": 1180 }, { "epoch": 0.718816067653277, "grad_norm": 0.9819234749980894, "learning_rate": 2.996784565916399e-05, "loss": 0.9805, "step": 1190 }, { "epoch": 0.7248565388100272, "grad_norm": 0.9915476687600292, "learning_rate": 2.9324758842443732e-05, "loss": 0.9365, "step": 1200 }, { "epoch": 0.7308970099667774, "grad_norm": 1.048821106029052, "learning_rate": 2.8681672025723473e-05, "loss": 0.9861, "step": 1210 }, { "epoch": 0.7369374811235276, "grad_norm": 0.9121022219179783, "learning_rate": 2.8038585209003217e-05, "loss": 0.9854, "step": 1220 }, { "epoch": 0.7429779522802779, "grad_norm": 0.9525288844616361, "learning_rate": 2.739549839228296e-05, "loss": 0.948, "step": 1230 }, { "epoch": 0.7490184234370281, "grad_norm": 1.1391100357014714, "learning_rate": 2.67524115755627e-05, "loss": 0.8887, "step": 1240 }, { "epoch": 0.7550588945937783, "grad_norm": 1.1078585866746686, "learning_rate": 2.6109324758842445e-05, "loss": 0.9715, "step": 1250 }, { "epoch": 0.7610993657505285, "grad_norm": 0.9323091046055703, "learning_rate": 2.546623794212219e-05, "loss": 0.9359, "step": 1260 }, { "epoch": 0.7671398369072787, "grad_norm": 1.146046378807531, "learning_rate": 2.482315112540193e-05, "loss": 0.8967, "step": 1270 }, { "epoch": 0.773180308064029, "grad_norm": 1.14163561365635, "learning_rate": 2.4180064308681673e-05, "loss": 0.9707, "step": 1280 }, { "epoch": 0.7792207792207793, "grad_norm": 1.0034307361335086, "learning_rate": 2.3536977491961414e-05, "loss": 0.9668, "step": 1290 }, { "epoch": 0.7852612503775295, "grad_norm": 1.1199578096409222, "learning_rate": 2.2893890675241158e-05, "loss": 0.968, "step": 1300 }, { "epoch": 0.7913017215342797, "grad_norm": 1.0809213063777794, "learning_rate": 2.22508038585209e-05, "loss": 0.9264, "step": 1310 }, { "epoch": 0.7973421926910299, "grad_norm": 1.1266044230874084, "learning_rate": 2.1607717041800642e-05, "loss": 1.0203, "step": 1320 }, { "epoch": 0.8033826638477801, "grad_norm": 0.8540728431635001, "learning_rate": 2.0964630225080386e-05, "loss": 0.924, "step": 1330 }, { "epoch": 0.8094231350045303, "grad_norm": 1.0063045365537524, "learning_rate": 2.032154340836013e-05, "loss": 0.9656, "step": 1340 }, { "epoch": 0.8154636061612806, "grad_norm": 0.9432512605760064, "learning_rate": 1.967845659163987e-05, "loss": 1.0254, "step": 1350 }, { "epoch": 0.8215040773180308, "grad_norm": 1.162199676010242, "learning_rate": 1.9035369774919614e-05, "loss": 0.9549, "step": 1360 }, { "epoch": 0.827544548474781, "grad_norm": 1.0184734737311432, "learning_rate": 1.8392282958199358e-05, "loss": 0.8937, "step": 1370 }, { "epoch": 0.8335850196315313, "grad_norm": 1.0321283898574098, "learning_rate": 1.77491961414791e-05, "loss": 0.9641, "step": 1380 }, { "epoch": 0.8396254907882815, "grad_norm": 0.948342731761788, "learning_rate": 1.7106109324758842e-05, "loss": 0.907, "step": 1390 }, { "epoch": 0.8456659619450317, "grad_norm": 1.016072519476065, "learning_rate": 1.6463022508038586e-05, "loss": 0.999, "step": 1400 }, { "epoch": 0.851706433101782, "grad_norm": 0.8891894724654406, "learning_rate": 1.581993569131833e-05, "loss": 0.9238, "step": 1410 }, { "epoch": 0.8577469042585322, "grad_norm": 1.0116016221718374, "learning_rate": 1.517684887459807e-05, "loss": 0.9488, "step": 1420 }, { "epoch": 0.8637873754152824, "grad_norm": 1.092739541532347, "learning_rate": 1.4533762057877815e-05, "loss": 0.9971, "step": 1430 }, { "epoch": 0.8698278465720326, "grad_norm": 0.8563957174375877, "learning_rate": 1.3890675241157558e-05, "loss": 1.0133, "step": 1440 }, { "epoch": 0.8758683177287828, "grad_norm": 1.089288756096709, "learning_rate": 1.3247588424437299e-05, "loss": 0.9127, "step": 1450 }, { "epoch": 0.881908788885533, "grad_norm": 0.9458299067214011, "learning_rate": 1.2604501607717043e-05, "loss": 0.9668, "step": 1460 }, { "epoch": 0.8879492600422833, "grad_norm": 1.2229020868039389, "learning_rate": 1.1961414790996785e-05, "loss": 0.9506, "step": 1470 }, { "epoch": 0.8939897311990336, "grad_norm": 1.0615217830735095, "learning_rate": 1.1318327974276529e-05, "loss": 0.9219, "step": 1480 }, { "epoch": 0.9000302023557838, "grad_norm": 1.1622944573297567, "learning_rate": 1.0675241157556271e-05, "loss": 0.9391, "step": 1490 }, { "epoch": 0.906070673512534, "grad_norm": 0.9443566657008828, "learning_rate": 1.0032154340836013e-05, "loss": 0.8955, "step": 1500 }, { "epoch": 0.9121111446692842, "grad_norm": 1.1367332066610845, "learning_rate": 9.389067524115757e-06, "loss": 0.9385, "step": 1510 }, { "epoch": 0.9181516158260344, "grad_norm": 0.9414922696266849, "learning_rate": 8.7459807073955e-06, "loss": 0.9318, "step": 1520 }, { "epoch": 0.9241920869827847, "grad_norm": 1.0844935096035118, "learning_rate": 8.102893890675242e-06, "loss": 0.9311, "step": 1530 }, { "epoch": 0.9302325581395349, "grad_norm": 1.120509038145209, "learning_rate": 7.459807073954985e-06, "loss": 0.9422, "step": 1540 }, { "epoch": 0.9362730292962851, "grad_norm": 0.9486397409946379, "learning_rate": 6.816720257234727e-06, "loss": 0.9162, "step": 1550 }, { "epoch": 0.9423135004530353, "grad_norm": 1.1412479158621043, "learning_rate": 6.17363344051447e-06, "loss": 0.9676, "step": 1560 }, { "epoch": 0.9483539716097855, "grad_norm": 1.050318112867144, "learning_rate": 5.530546623794212e-06, "loss": 0.9754, "step": 1570 }, { "epoch": 0.9543944427665358, "grad_norm": 1.0466438135565226, "learning_rate": 4.887459807073955e-06, "loss": 0.926, "step": 1580 }, { "epoch": 0.9604349139232861, "grad_norm": 0.9720115456232871, "learning_rate": 4.244372990353698e-06, "loss": 0.9447, "step": 1590 }, { "epoch": 0.9664753850800363, "grad_norm": 0.8786923129454015, "learning_rate": 3.6012861736334403e-06, "loss": 0.9443, "step": 1600 }, { "epoch": 0.9725158562367865, "grad_norm": 1.0733513985052048, "learning_rate": 2.9581993569131834e-06, "loss": 0.9053, "step": 1610 }, { "epoch": 0.9785563273935367, "grad_norm": 0.8731368575153022, "learning_rate": 2.315112540192926e-06, "loss": 0.9803, "step": 1620 }, { "epoch": 0.9845967985502869, "grad_norm": 1.0060959243542735, "learning_rate": 1.6720257234726688e-06, "loss": 0.9563, "step": 1630 }, { "epoch": 0.9906372697070371, "grad_norm": 0.9113226806468382, "learning_rate": 1.0289389067524116e-06, "loss": 0.9191, "step": 1640 }, { "epoch": 0.9966777408637874, "grad_norm": 0.9160377841463557, "learning_rate": 3.8585209003215437e-07, "loss": 0.9117, "step": 1650 } ], "logging_steps": 10, "max_steps": 1655, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }