| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.36997299197158606, | |
| "eval_steps": 500, | |
| "global_step": 2500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0014798919678863443, | |
| "grad_norm": 7.47622537612915, | |
| "learning_rate": 7.2e-06, | |
| "loss": 1.1232, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0029597839357726886, | |
| "grad_norm": 1.880067229270935, | |
| "learning_rate": 1.52e-05, | |
| "loss": 0.5006, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.004439675903659033, | |
| "grad_norm": 1.9676536321640015, | |
| "learning_rate": 2.32e-05, | |
| "loss": 0.2438, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.005919567871545377, | |
| "grad_norm": 1.628406047821045, | |
| "learning_rate": 3.12e-05, | |
| "loss": 0.1641, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.007399459839431722, | |
| "grad_norm": 2.5903799533843994, | |
| "learning_rate": 3.9200000000000004e-05, | |
| "loss": 0.1585, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.008879351807318065, | |
| "grad_norm": 1.520799160003662, | |
| "learning_rate": 4.72e-05, | |
| "loss": 0.1282, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.01035924377520441, | |
| "grad_norm": 1.7377681732177734, | |
| "learning_rate": 5.520000000000001e-05, | |
| "loss": 0.1256, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.011839135743090754, | |
| "grad_norm": 1.098388433456421, | |
| "learning_rate": 6.32e-05, | |
| "loss": 0.1031, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.013319027710977099, | |
| "grad_norm": 1.4509732723236084, | |
| "learning_rate": 7.12e-05, | |
| "loss": 0.0973, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.014798919678863444, | |
| "grad_norm": 1.9007303714752197, | |
| "learning_rate": 7.920000000000001e-05, | |
| "loss": 0.1152, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.016278811646749786, | |
| "grad_norm": 1.5740575790405273, | |
| "learning_rate": 8.72e-05, | |
| "loss": 0.1151, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.01775870361463613, | |
| "grad_norm": 1.2793081998825073, | |
| "learning_rate": 9.52e-05, | |
| "loss": 0.1182, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.019238595582522475, | |
| "grad_norm": 1.5838185548782349, | |
| "learning_rate": 9.999930010724872e-05, | |
| "loss": 0.0955, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.02071848755040882, | |
| "grad_norm": 1.8031917810440063, | |
| "learning_rate": 9.999142653881985e-05, | |
| "loss": 0.1141, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.022198379518295164, | |
| "grad_norm": 1.5219581127166748, | |
| "learning_rate": 9.997480591826183e-05, | |
| "loss": 0.1069, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.02367827148618151, | |
| "grad_norm": 1.1764310598373413, | |
| "learning_rate": 9.994944115370199e-05, | |
| "loss": 0.0964, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.025158163454067854, | |
| "grad_norm": 1.0616095066070557, | |
| "learning_rate": 9.991533668323974e-05, | |
| "loss": 0.0761, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.026638055421954198, | |
| "grad_norm": 1.1210483312606812, | |
| "learning_rate": 9.987249847416987e-05, | |
| "loss": 0.087, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.028117947389840543, | |
| "grad_norm": 1.1077345609664917, | |
| "learning_rate": 9.982093402193857e-05, | |
| "loss": 0.0931, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.029597839357726887, | |
| "grad_norm": 1.352004051208496, | |
| "learning_rate": 9.976065234883193e-05, | |
| "loss": 0.083, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.03107773132561323, | |
| "grad_norm": 1.0884106159210205, | |
| "learning_rate": 9.969166400239726e-05, | |
| "loss": 0.0783, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.03255762329349957, | |
| "grad_norm": 1.1031562089920044, | |
| "learning_rate": 9.961398105359764e-05, | |
| "loss": 0.1047, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.03403751526138592, | |
| "grad_norm": 1.0741270780563354, | |
| "learning_rate": 9.952761709469975e-05, | |
| "loss": 0.1017, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.03551740722927226, | |
| "grad_norm": 1.2804232835769653, | |
| "learning_rate": 9.94325872368957e-05, | |
| "loss": 0.0968, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.036997299197158606, | |
| "grad_norm": 1.0856305360794067, | |
| "learning_rate": 9.932890810765902e-05, | |
| "loss": 0.0644, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.03847719116504495, | |
| "grad_norm": 1.1068660020828247, | |
| "learning_rate": 9.921659784783526e-05, | |
| "loss": 0.0644, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.039957083132931295, | |
| "grad_norm": 1.043481469154358, | |
| "learning_rate": 9.909567610846788e-05, | |
| "loss": 0.0667, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.04143697510081764, | |
| "grad_norm": 1.3862495422363281, | |
| "learning_rate": 9.896616404736001e-05, | |
| "loss": 0.0694, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.042916867068703984, | |
| "grad_norm": 1.405779480934143, | |
| "learning_rate": 9.882808432537224e-05, | |
| "loss": 0.0823, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.04439675903659033, | |
| "grad_norm": 1.4005481004714966, | |
| "learning_rate": 9.86814611024578e-05, | |
| "loss": 0.0812, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.04587665100447667, | |
| "grad_norm": 0.8486709594726562, | |
| "learning_rate": 9.852632003343518e-05, | |
| "loss": 0.0966, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.04735654297236302, | |
| "grad_norm": 0.6197394132614136, | |
| "learning_rate": 9.836268826349933e-05, | |
| "loss": 0.0595, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.04883643494024936, | |
| "grad_norm": 0.776345431804657, | |
| "learning_rate": 9.819059442347193e-05, | |
| "loss": 0.0599, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.05031632690813571, | |
| "grad_norm": 0.7929945588111877, | |
| "learning_rate": 9.801006862479202e-05, | |
| "loss": 0.07, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.05179621887602205, | |
| "grad_norm": 0.9411725997924805, | |
| "learning_rate": 9.782114245424718e-05, | |
| "loss": 0.0637, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.053276110843908396, | |
| "grad_norm": 0.825547993183136, | |
| "learning_rate": 9.762384896844684e-05, | |
| "loss": 0.0628, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.05475600281179474, | |
| "grad_norm": 1.1270116567611694, | |
| "learning_rate": 9.741822268803833e-05, | |
| "loss": 0.0702, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.056235894779681085, | |
| "grad_norm": 0.947913408279419, | |
| "learning_rate": 9.720429959166675e-05, | |
| "loss": 0.073, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.05771578674756743, | |
| "grad_norm": 1.0599220991134644, | |
| "learning_rate": 9.69821171096798e-05, | |
| "loss": 0.0678, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.059195678715453774, | |
| "grad_norm": 1.0569720268249512, | |
| "learning_rate": 9.675171411757842e-05, | |
| "loss": 0.0653, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.06067557068334012, | |
| "grad_norm": 1.1427547931671143, | |
| "learning_rate": 9.65131309292149e-05, | |
| "loss": 0.0739, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.06215546265122646, | |
| "grad_norm": 0.816104531288147, | |
| "learning_rate": 9.626640928973892e-05, | |
| "loss": 0.0518, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.06363535461911281, | |
| "grad_norm": 0.7173371911048889, | |
| "learning_rate": 9.601159236829352e-05, | |
| "loss": 0.0687, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.06511524658699915, | |
| "grad_norm": 0.8344293236732483, | |
| "learning_rate": 9.574872475046166e-05, | |
| "loss": 0.059, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.0665951385548855, | |
| "grad_norm": 0.6896253824234009, | |
| "learning_rate": 9.547785243046505e-05, | |
| "loss": 0.046, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.06807503052277183, | |
| "grad_norm": 1.0698304176330566, | |
| "learning_rate": 9.519902280311653e-05, | |
| "loss": 0.0511, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.06955492249065819, | |
| "grad_norm": 0.7512239217758179, | |
| "learning_rate": 9.491228465552726e-05, | |
| "loss": 0.0578, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.07103481445854452, | |
| "grad_norm": 1.0382441282272339, | |
| "learning_rate": 9.461768815857053e-05, | |
| "loss": 0.0618, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.07251470642643087, | |
| "grad_norm": 0.7745275497436523, | |
| "learning_rate": 9.431528485810316e-05, | |
| "loss": 0.069, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.07399459839431721, | |
| "grad_norm": 0.7567397952079773, | |
| "learning_rate": 9.400512766594659e-05, | |
| "loss": 0.047, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.07547449036220356, | |
| "grad_norm": 0.7035396695137024, | |
| "learning_rate": 9.368727085062872e-05, | |
| "loss": 0.0563, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.0769543823300899, | |
| "grad_norm": 0.8480479717254639, | |
| "learning_rate": 9.336177002788862e-05, | |
| "loss": 0.0498, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.07843427429797625, | |
| "grad_norm": 0.6571253538131714, | |
| "learning_rate": 9.302868215094534e-05, | |
| "loss": 0.0579, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.07991416626586259, | |
| "grad_norm": 0.6242689490318298, | |
| "learning_rate": 9.268806550053264e-05, | |
| "loss": 0.0465, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.08139405823374894, | |
| "grad_norm": 1.0013155937194824, | |
| "learning_rate": 9.233997967470174e-05, | |
| "loss": 0.0792, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.08287395020163528, | |
| "grad_norm": 1.0697152614593506, | |
| "learning_rate": 9.198448557839321e-05, | |
| "loss": 0.0707, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.08435384216952163, | |
| "grad_norm": 0.9719803333282471, | |
| "learning_rate": 9.162164541278051e-05, | |
| "loss": 0.0586, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.08583373413740797, | |
| "grad_norm": 1.0201226472854614, | |
| "learning_rate": 9.125152266438649e-05, | |
| "loss": 0.0535, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.08731362610529432, | |
| "grad_norm": 0.8870009779930115, | |
| "learning_rate": 9.087418209397506e-05, | |
| "loss": 0.058, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.08879351807318066, | |
| "grad_norm": 0.7464373707771301, | |
| "learning_rate": 9.04896897252201e-05, | |
| "loss": 0.0676, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.090273410041067, | |
| "grad_norm": 0.6516630053520203, | |
| "learning_rate": 9.009811283315304e-05, | |
| "loss": 0.0506, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.09175330200895335, | |
| "grad_norm": 0.6615219712257385, | |
| "learning_rate": 8.969951993239177e-05, | |
| "loss": 0.0479, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.09323319397683968, | |
| "grad_norm": 0.6776785850524902, | |
| "learning_rate": 8.929398076515259e-05, | |
| "loss": 0.0459, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.09471308594472604, | |
| "grad_norm": 0.7998208403587341, | |
| "learning_rate": 8.888156628904724e-05, | |
| "loss": 0.0493, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.09619297791261237, | |
| "grad_norm": 0.8668547868728638, | |
| "learning_rate": 8.846234866466747e-05, | |
| "loss": 0.0431, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.09767286988049872, | |
| "grad_norm": 0.7501780390739441, | |
| "learning_rate": 8.803640124295902e-05, | |
| "loss": 0.0664, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.09915276184838506, | |
| "grad_norm": 0.5983700156211853, | |
| "learning_rate": 8.760379855238723e-05, | |
| "loss": 0.0424, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.10063265381627141, | |
| "grad_norm": 0.7453112006187439, | |
| "learning_rate": 8.716461628589683e-05, | |
| "loss": 0.0508, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.10211254578415775, | |
| "grad_norm": 0.7227299809455872, | |
| "learning_rate": 8.671893128766784e-05, | |
| "loss": 0.045, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.1035924377520441, | |
| "grad_norm": 0.7978153824806213, | |
| "learning_rate": 8.626682153967001e-05, | |
| "loss": 0.0473, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.10507232971993044, | |
| "grad_norm": 0.6989890336990356, | |
| "learning_rate": 8.580836614801827e-05, | |
| "loss": 0.046, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.10655222168781679, | |
| "grad_norm": 0.8369282484054565, | |
| "learning_rate": 8.534364532913144e-05, | |
| "loss": 0.049, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.10803211365570313, | |
| "grad_norm": 0.5447561144828796, | |
| "learning_rate": 8.487274039569675e-05, | |
| "loss": 0.0428, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.10951200562358948, | |
| "grad_norm": 0.7491251826286316, | |
| "learning_rate": 8.439573374244237e-05, | |
| "loss": 0.0368, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.11099189759147582, | |
| "grad_norm": 0.48374220728874207, | |
| "learning_rate": 8.391270883172073e-05, | |
| "loss": 0.0403, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.11247178955936217, | |
| "grad_norm": 0.9518898129463196, | |
| "learning_rate": 8.342375017890512e-05, | |
| "loss": 0.038, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.11395168152724851, | |
| "grad_norm": 0.8046047687530518, | |
| "learning_rate": 8.292894333760186e-05, | |
| "loss": 0.0414, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.11543157349513486, | |
| "grad_norm": 0.6393342614173889, | |
| "learning_rate": 8.242837488468087e-05, | |
| "loss": 0.0615, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.1169114654630212, | |
| "grad_norm": 0.6403966546058655, | |
| "learning_rate": 8.192213240512737e-05, | |
| "loss": 0.0403, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.11839135743090755, | |
| "grad_norm": 0.5846046805381775, | |
| "learning_rate": 8.141030447671686e-05, | |
| "loss": 0.0376, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.11987124939879389, | |
| "grad_norm": 1.1184839010238647, | |
| "learning_rate": 8.089298065451672e-05, | |
| "loss": 0.042, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.12135114136668024, | |
| "grad_norm": 0.804345965385437, | |
| "learning_rate": 8.037025145521657e-05, | |
| "loss": 0.0448, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.12283103333456658, | |
| "grad_norm": 0.40974166989326477, | |
| "learning_rate": 7.984220834129052e-05, | |
| "loss": 0.0691, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.12431092530245293, | |
| "grad_norm": 0.5297871232032776, | |
| "learning_rate": 7.93089437049939e-05, | |
| "loss": 0.0477, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.12579081727033928, | |
| "grad_norm": 0.6452186107635498, | |
| "learning_rate": 7.877055085219721e-05, | |
| "loss": 0.0505, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.12727070923822562, | |
| "grad_norm": 0.6497986316680908, | |
| "learning_rate": 7.82271239860604e-05, | |
| "loss": 0.0471, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.12875060120611195, | |
| "grad_norm": 0.6113291382789612, | |
| "learning_rate": 7.767875819054997e-05, | |
| "loss": 0.0485, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.1302304931739983, | |
| "grad_norm": 0.8844181895256042, | |
| "learning_rate": 7.712554941380206e-05, | |
| "loss": 0.0429, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.13171038514188466, | |
| "grad_norm": 0.7872292995452881, | |
| "learning_rate": 7.656759445133428e-05, | |
| "loss": 0.0471, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.133190277109771, | |
| "grad_norm": 0.8881542682647705, | |
| "learning_rate": 7.600499092910934e-05, | |
| "loss": 0.0498, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.13467016907765733, | |
| "grad_norm": 0.6990298628807068, | |
| "learning_rate": 7.543783728645328e-05, | |
| "loss": 0.0385, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.13615006104554367, | |
| "grad_norm": 0.8354172110557556, | |
| "learning_rate": 7.486623275883151e-05, | |
| "loss": 0.0347, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.13762995301343, | |
| "grad_norm": 0.8185162544250488, | |
| "learning_rate": 7.429027736048535e-05, | |
| "loss": 0.0409, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.13910984498131637, | |
| "grad_norm": 0.9761926531791687, | |
| "learning_rate": 7.37100718669326e-05, | |
| "loss": 0.0422, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.1405897369492027, | |
| "grad_norm": 0.5239071249961853, | |
| "learning_rate": 7.312571779733463e-05, | |
| "loss": 0.0269, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.14206962891708905, | |
| "grad_norm": 0.7226734757423401, | |
| "learning_rate": 7.253731739673349e-05, | |
| "loss": 0.0366, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.14354952088497538, | |
| "grad_norm": 0.6310585141181946, | |
| "learning_rate": 7.194497361816196e-05, | |
| "loss": 0.0527, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.14502941285286175, | |
| "grad_norm": 0.5752474665641785, | |
| "learning_rate": 7.134879010462988e-05, | |
| "loss": 0.0312, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.1465093048207481, | |
| "grad_norm": 0.5619791150093079, | |
| "learning_rate": 7.07488711709894e-05, | |
| "loss": 0.0314, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.14798919678863443, | |
| "grad_norm": 0.6874368190765381, | |
| "learning_rate": 7.014532178568314e-05, | |
| "loss": 0.0294, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.14946908875652076, | |
| "grad_norm": 0.6269967555999756, | |
| "learning_rate": 6.953824755237756e-05, | |
| "loss": 0.0357, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.15094898072440713, | |
| "grad_norm": 0.7482361793518066, | |
| "learning_rate": 6.892775469148553e-05, | |
| "loss": 0.043, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.15242887269229347, | |
| "grad_norm": 0.7447315454483032, | |
| "learning_rate": 6.831395002158067e-05, | |
| "loss": 0.0319, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.1539087646601798, | |
| "grad_norm": 0.5281906127929688, | |
| "learning_rate": 6.76969409407074e-05, | |
| "loss": 0.0311, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.15538865662806614, | |
| "grad_norm": 0.5784289240837097, | |
| "learning_rate": 6.707683540758915e-05, | |
| "loss": 0.0362, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.1568685485959525, | |
| "grad_norm": 0.5917581915855408, | |
| "learning_rate": 6.645374192273894e-05, | |
| "loss": 0.0406, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.15834844056383884, | |
| "grad_norm": 0.8599121570587158, | |
| "learning_rate": 6.582776950947474e-05, | |
| "loss": 0.0468, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.15982833253172518, | |
| "grad_norm": 0.5944121479988098, | |
| "learning_rate": 6.519902769484368e-05, | |
| "loss": 0.0464, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.16130822449961152, | |
| "grad_norm": 0.6295680999755859, | |
| "learning_rate": 6.456762649045796e-05, | |
| "loss": 0.0371, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.16278811646749788, | |
| "grad_norm": 1.0905753374099731, | |
| "learning_rate": 6.393367637324593e-05, | |
| "loss": 0.0465, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.16426800843538422, | |
| "grad_norm": 0.7028170228004456, | |
| "learning_rate": 6.329728826612192e-05, | |
| "loss": 0.0493, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.16574790040327056, | |
| "grad_norm": 0.6126824617385864, | |
| "learning_rate": 6.265857351857788e-05, | |
| "loss": 0.0369, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.1672277923711569, | |
| "grad_norm": 0.7399412393569946, | |
| "learning_rate": 6.201764388720049e-05, | |
| "loss": 0.0412, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.16870768433904326, | |
| "grad_norm": 0.5751072764396667, | |
| "learning_rate": 6.137461151611692e-05, | |
| "loss": 0.0446, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.1701875763069296, | |
| "grad_norm": 0.4830611050128937, | |
| "learning_rate": 6.072958891737296e-05, | |
| "loss": 0.0396, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.17166746827481594, | |
| "grad_norm": 0.5445456504821777, | |
| "learning_rate": 6.00826889512466e-05, | |
| "loss": 0.0367, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.17314736024270228, | |
| "grad_norm": 0.7560216188430786, | |
| "learning_rate": 5.943402480650071e-05, | |
| "loss": 0.0328, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.17462725221058864, | |
| "grad_norm": 0.6100801229476929, | |
| "learning_rate": 5.8783709980578414e-05, | |
| "loss": 0.031, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.17610714417847498, | |
| "grad_norm": 0.5339049696922302, | |
| "learning_rate": 5.813185825974419e-05, | |
| "loss": 0.0365, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.17758703614636132, | |
| "grad_norm": 0.7650025486946106, | |
| "learning_rate": 5.747858369917465e-05, | |
| "loss": 0.034, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.17906692811424765, | |
| "grad_norm": 0.6139253377914429, | |
| "learning_rate": 5.682400060300213e-05, | |
| "loss": 0.0399, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.180546820082134, | |
| "grad_norm": 0.514707088470459, | |
| "learning_rate": 5.6168223504314863e-05, | |
| "loss": 0.0371, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.18202671205002036, | |
| "grad_norm": 0.5504834651947021, | |
| "learning_rate": 5.551136714511691e-05, | |
| "loss": 0.0338, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.1835066040179067, | |
| "grad_norm": 0.6514374017715454, | |
| "learning_rate": 5.485354645625167e-05, | |
| "loss": 0.0392, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.18498649598579303, | |
| "grad_norm": 0.5081560015678406, | |
| "learning_rate": 5.419487653729234e-05, | |
| "loss": 0.0332, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.18646638795367937, | |
| "grad_norm": 0.904439389705658, | |
| "learning_rate": 5.353547263640273e-05, | |
| "loss": 0.0382, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.18794627992156573, | |
| "grad_norm": 0.5332797765731812, | |
| "learning_rate": 5.2875450130172324e-05, | |
| "loss": 0.0221, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.18942617188945207, | |
| "grad_norm": 0.6516315937042236, | |
| "learning_rate": 5.221492450342856e-05, | |
| "loss": 0.0371, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.1909060638573384, | |
| "grad_norm": 0.7484680414199829, | |
| "learning_rate": 5.155401132903045e-05, | |
| "loss": 0.0377, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.19238595582522475, | |
| "grad_norm": 0.9511647820472717, | |
| "learning_rate": 5.089282624764654e-05, | |
| "loss": 0.0386, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.1938658477931111, | |
| "grad_norm": 0.3802145719528198, | |
| "learning_rate": 5.0231484947521336e-05, | |
| "loss": 0.0324, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.19534573976099745, | |
| "grad_norm": 0.5430207252502441, | |
| "learning_rate": 4.9570103144233024e-05, | |
| "loss": 0.0296, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.1968256317288838, | |
| "grad_norm": 0.631252646446228, | |
| "learning_rate": 4.890879656044669e-05, | |
| "loss": 0.0241, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.19830552369677013, | |
| "grad_norm": 0.4435235261917114, | |
| "learning_rate": 4.824768090566618e-05, | |
| "loss": 0.0285, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.1997854156646565, | |
| "grad_norm": 0.7757057547569275, | |
| "learning_rate": 4.7586871855988326e-05, | |
| "loss": 0.0419, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.20126530763254283, | |
| "grad_norm": 0.6761030554771423, | |
| "learning_rate": 4.692648503386289e-05, | |
| "loss": 0.0338, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.20274519960042917, | |
| "grad_norm": 0.3639812767505646, | |
| "learning_rate": 4.6266635987862086e-05, | |
| "loss": 0.0244, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.2042250915683155, | |
| "grad_norm": 0.3727477490901947, | |
| "learning_rate": 4.560744017246284e-05, | |
| "loss": 0.025, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.20570498353620187, | |
| "grad_norm": 0.5406703948974609, | |
| "learning_rate": 4.4949012927845676e-05, | |
| "loss": 0.031, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.2071848755040882, | |
| "grad_norm": 0.45045414566993713, | |
| "learning_rate": 4.429146945971346e-05, | |
| "loss": 0.0236, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.20866476747197454, | |
| "grad_norm": 0.549584686756134, | |
| "learning_rate": 4.3634924819133746e-05, | |
| "loss": 0.0286, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.21014465943986088, | |
| "grad_norm": 0.7233926653862, | |
| "learning_rate": 4.297949388240823e-05, | |
| "loss": 0.0297, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.21162455140774725, | |
| "grad_norm": 0.5810157656669617, | |
| "learning_rate": 4.2325291330972664e-05, | |
| "loss": 0.0189, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.21310444337563358, | |
| "grad_norm": 0.5259445905685425, | |
| "learning_rate": 4.167243163133094e-05, | |
| "loss": 0.0271, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.21458433534351992, | |
| "grad_norm": 0.5740777254104614, | |
| "learning_rate": 4.1021029015026736e-05, | |
| "loss": 0.0367, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.21606422731140626, | |
| "grad_norm": 0.6542416214942932, | |
| "learning_rate": 4.037119745865641e-05, | |
| "loss": 0.0323, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.21754411927929262, | |
| "grad_norm": 0.5732387900352478, | |
| "learning_rate": 3.972305066392626e-05, | |
| "loss": 0.0295, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.21902401124717896, | |
| "grad_norm": 0.8444030284881592, | |
| "learning_rate": 3.9076702037758076e-05, | |
| "loss": 0.027, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.2205039032150653, | |
| "grad_norm": 0.36182349920272827, | |
| "learning_rate": 3.8432264672446293e-05, | |
| "loss": 0.0306, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.22198379518295164, | |
| "grad_norm": 0.367953360080719, | |
| "learning_rate": 3.778985132586995e-05, | |
| "loss": 0.0258, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.22346368715083798, | |
| "grad_norm": 0.43248283863067627, | |
| "learning_rate": 3.714957440176345e-05, | |
| "loss": 0.0237, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.22494357911872434, | |
| "grad_norm": 0.5691545605659485, | |
| "learning_rate": 3.651154593004911e-05, | |
| "loss": 0.0257, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.22642347108661068, | |
| "grad_norm": 0.4153839945793152, | |
| "learning_rate": 3.587587754723523e-05, | |
| "loss": 0.0256, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.22790336305449702, | |
| "grad_norm": 0.4405042827129364, | |
| "learning_rate": 3.5242680476882815e-05, | |
| "loss": 0.0243, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.22938325502238335, | |
| "grad_norm": 0.5467635989189148, | |
| "learning_rate": 3.461206551014481e-05, | |
| "loss": 0.0242, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.23086314699026972, | |
| "grad_norm": 0.4557804763317108, | |
| "learning_rate": 3.3984142986380764e-05, | |
| "loss": 0.0338, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.23234303895815606, | |
| "grad_norm": 0.39460909366607666, | |
| "learning_rate": 3.335902277385067e-05, | |
| "loss": 0.0205, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.2338229309260424, | |
| "grad_norm": 0.5078433752059937, | |
| "learning_rate": 3.2736814250491196e-05, | |
| "loss": 0.0248, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.23530282289392873, | |
| "grad_norm": 0.6240681409835815, | |
| "learning_rate": 3.211762628477771e-05, | |
| "loss": 0.0312, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.2367827148618151, | |
| "grad_norm": 0.37351563572883606, | |
| "learning_rate": 3.150156721667547e-05, | |
| "loss": 0.0245, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.23826260682970143, | |
| "grad_norm": 0.3536587953567505, | |
| "learning_rate": 3.088874483868325e-05, | |
| "loss": 0.021, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.23974249879758777, | |
| "grad_norm": 0.30240142345428467, | |
| "learning_rate": 3.0279266376972715e-05, | |
| "loss": 0.025, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.2412223907654741, | |
| "grad_norm": 0.4821987450122833, | |
| "learning_rate": 2.96732384726271e-05, | |
| "loss": 0.0427, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.24270228273336047, | |
| "grad_norm": 0.6150904297828674, | |
| "learning_rate": 2.907076716298196e-05, | |
| "loss": 0.0297, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.2441821747012468, | |
| "grad_norm": 0.49226638674736023, | |
| "learning_rate": 2.847195786307174e-05, | |
| "loss": 0.0267, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.24566206666913315, | |
| "grad_norm": 0.49682047963142395, | |
| "learning_rate": 2.7876915347185227e-05, | |
| "loss": 0.0326, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.2471419586370195, | |
| "grad_norm": 0.42303502559661865, | |
| "learning_rate": 2.7285743730533143e-05, | |
| "loss": 0.0212, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.24862185060490585, | |
| "grad_norm": 0.5483497977256775, | |
| "learning_rate": 2.6698546451030826e-05, | |
| "loss": 0.0202, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.25010174257279216, | |
| "grad_norm": 0.5056328177452087, | |
| "learning_rate": 2.611542625119975e-05, | |
| "loss": 0.0228, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.25158163454067856, | |
| "grad_norm": 0.32099300622940063, | |
| "learning_rate": 2.5536485160190482e-05, | |
| "loss": 0.0224, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.2530615265085649, | |
| "grad_norm": 0.5951581597328186, | |
| "learning_rate": 2.496182447593055e-05, | |
| "loss": 0.0272, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.25454141847645123, | |
| "grad_norm": 0.45417091250419617, | |
| "learning_rate": 2.4391544747400252e-05, | |
| "loss": 0.0227, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.25602131044433757, | |
| "grad_norm": 0.5122156739234924, | |
| "learning_rate": 2.3825745757039452e-05, | |
| "loss": 0.024, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.2575012024122239, | |
| "grad_norm": 0.4841585159301758, | |
| "learning_rate": 2.3264526503288642e-05, | |
| "loss": 0.0225, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.25898109438011024, | |
| "grad_norm": 0.43821659684181213, | |
| "learning_rate": 2.2707985183266978e-05, | |
| "loss": 0.0215, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.2604609863479966, | |
| "grad_norm": 0.42495104670524597, | |
| "learning_rate": 2.215621917559062e-05, | |
| "loss": 0.0213, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.2619408783158829, | |
| "grad_norm": 0.4166795015335083, | |
| "learning_rate": 2.1609325023334377e-05, | |
| "loss": 0.018, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.2634207702837693, | |
| "grad_norm": 0.29304540157318115, | |
| "learning_rate": 2.1067398417139466e-05, | |
| "loss": 0.029, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.26490066225165565, | |
| "grad_norm": 0.6335829496383667, | |
| "learning_rate": 2.0530534178470322e-05, | |
| "loss": 0.021, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.266380554219542, | |
| "grad_norm": 0.35307204723358154, | |
| "learning_rate": 1.9998826243023666e-05, | |
| "loss": 0.021, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.2678604461874283, | |
| "grad_norm": 0.4115915894508362, | |
| "learning_rate": 1.9472367644292457e-05, | |
| "loss": 0.0212, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.26934033815531466, | |
| "grad_norm": 0.5536908507347107, | |
| "learning_rate": 1.8951250497287716e-05, | |
| "loss": 0.0291, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.270820230123201, | |
| "grad_norm": 0.6118968725204468, | |
| "learning_rate": 1.843556598242109e-05, | |
| "loss": 0.0351, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.27230012209108734, | |
| "grad_norm": 0.5278725624084473, | |
| "learning_rate": 1.792540432955087e-05, | |
| "loss": 0.0243, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.2737800140589737, | |
| "grad_norm": 0.365327388048172, | |
| "learning_rate": 1.742085480219449e-05, | |
| "loss": 0.0303, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.27525990602686, | |
| "grad_norm": 0.5491426587104797, | |
| "learning_rate": 1.6922005681909843e-05, | |
| "loss": 0.0321, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.2767397979947464, | |
| "grad_norm": 0.5193561315536499, | |
| "learning_rate": 1.642894425284867e-05, | |
| "loss": 0.0207, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.27821968996263274, | |
| "grad_norm": 0.39915916323661804, | |
| "learning_rate": 1.5941756786484335e-05, | |
| "loss": 0.0197, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.2796995819305191, | |
| "grad_norm": 0.4479086101055145, | |
| "learning_rate": 1.5460528526516804e-05, | |
| "loss": 0.0233, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.2811794738984054, | |
| "grad_norm": 0.3311406373977661, | |
| "learning_rate": 1.498534367395748e-05, | |
| "loss": 0.0202, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.28265936586629176, | |
| "grad_norm": 0.2992391586303711, | |
| "learning_rate": 1.4516285372396437e-05, | |
| "loss": 0.0264, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.2841392578341781, | |
| "grad_norm": 0.480135440826416, | |
| "learning_rate": 1.4053435693454775e-05, | |
| "loss": 0.0221, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.28561914980206443, | |
| "grad_norm": 0.27029678225517273, | |
| "learning_rate": 1.359687562242437e-05, | |
| "loss": 0.0153, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.28709904176995077, | |
| "grad_norm": 0.5432844758033752, | |
| "learning_rate": 1.314668504409779e-05, | |
| "loss": 0.0207, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.28857893373783716, | |
| "grad_norm": 0.4192567765712738, | |
| "learning_rate": 1.2702942728790895e-05, | |
| "loss": 0.0168, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.2900588257057235, | |
| "grad_norm": 0.5646419525146484, | |
| "learning_rate": 1.2265726318560172e-05, | |
| "loss": 0.0269, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.29153871767360984, | |
| "grad_norm": 0.47364342212677, | |
| "learning_rate": 1.1835112313617697e-05, | |
| "loss": 0.0158, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.2930186096414962, | |
| "grad_norm": 0.3461420238018036, | |
| "learning_rate": 1.1411176058945771e-05, | |
| "loss": 0.0212, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.2944985016093825, | |
| "grad_norm": 0.34054312109947205, | |
| "learning_rate": 1.0993991731113817e-05, | |
| "loss": 0.0143, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.29597839357726885, | |
| "grad_norm": 0.48517104983329773, | |
| "learning_rate": 1.058363232529948e-05, | |
| "loss": 0.0256, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.2974582855451552, | |
| "grad_norm": 0.4162018299102783, | |
| "learning_rate": 1.0180169642516718e-05, | |
| "loss": 0.033, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.2989381775130415, | |
| "grad_norm": 0.4353783130645752, | |
| "learning_rate": 9.783674277052667e-06, | |
| "loss": 0.018, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.3004180694809279, | |
| "grad_norm": 0.4821432828903198, | |
| "learning_rate": 9.394215604115641e-06, | |
| "loss": 0.0136, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.30189796144881426, | |
| "grad_norm": 0.3058512806892395, | |
| "learning_rate": 9.011861767696522e-06, | |
| "loss": 0.0217, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.3033778534167006, | |
| "grad_norm": 0.4392576813697815, | |
| "learning_rate": 8.636679668645536e-06, | |
| "loss": 0.0182, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.30485774538458693, | |
| "grad_norm": 0.3896658718585968, | |
| "learning_rate": 8.268734952966505e-06, | |
| "loss": 0.0189, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.30633763735247327, | |
| "grad_norm": 0.4406259059906006, | |
| "learning_rate": 7.908092000330747e-06, | |
| "loss": 0.0145, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.3078175293203596, | |
| "grad_norm": 0.357083261013031, | |
| "learning_rate": 7.5548139128124364e-06, | |
| "loss": 0.0177, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.30929742128824594, | |
| "grad_norm": 0.2061689794063568, | |
| "learning_rate": 7.2089625038476606e-06, | |
| "loss": 0.0173, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.3107773132561323, | |
| "grad_norm": 0.4433155953884125, | |
| "learning_rate": 6.87059828741875e-06, | |
| "loss": 0.0188, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.3122572052240186, | |
| "grad_norm": 0.31645211577415466, | |
| "learning_rate": 6.539780467466172e-06, | |
| "loss": 0.0322, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.313737097191905, | |
| "grad_norm": 0.3037683367729187, | |
| "learning_rate": 6.216566927529455e-06, | |
| "loss": 0.0177, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.31521698915979135, | |
| "grad_norm": 0.2664813995361328, | |
| "learning_rate": 5.9010142206194e-06, | |
| "loss": 0.018, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.3166968811276777, | |
| "grad_norm": 0.22976835072040558, | |
| "learning_rate": 5.593177559322777e-06, | |
| "loss": 0.0145, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.318176773095564, | |
| "grad_norm": 0.3369593024253845, | |
| "learning_rate": 5.293110806141832e-06, | |
| "loss": 0.0218, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.31965666506345036, | |
| "grad_norm": 0.3195848762989044, | |
| "learning_rate": 5.000866464069842e-06, | |
| "loss": 0.0194, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.3211365570313367, | |
| "grad_norm": 0.19008630514144897, | |
| "learning_rate": 4.716495667404691e-06, | |
| "loss": 0.0174, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.32261644899922304, | |
| "grad_norm": 0.42658621072769165, | |
| "learning_rate": 4.440048172801725e-06, | |
| "loss": 0.0247, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.3240963409671094, | |
| "grad_norm": 0.6167186498641968, | |
| "learning_rate": 4.171572350567898e-06, | |
| "loss": 0.0193, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.32557623293499577, | |
| "grad_norm": 0.30169373750686646, | |
| "learning_rate": 3.9111151761983265e-06, | |
| "loss": 0.017, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.3270561249028821, | |
| "grad_norm": 0.3697860836982727, | |
| "learning_rate": 3.6587222221569075e-06, | |
| "loss": 0.0149, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.32853601687076844, | |
| "grad_norm": 0.43350812792778015, | |
| "learning_rate": 3.414437649902491e-06, | |
| "loss": 0.0142, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.3300159088386548, | |
| "grad_norm": 0.3299703896045685, | |
| "learning_rate": 3.1783042021619026e-06, | |
| "loss": 0.0161, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.3314958008065411, | |
| "grad_norm": 0.5686686038970947, | |
| "learning_rate": 2.9503631954511833e-06, | |
| "loss": 0.0204, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.33297569277442746, | |
| "grad_norm": 0.28825682401657104, | |
| "learning_rate": 2.7306545128464202e-06, | |
| "loss": 0.0225, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.3344555847423138, | |
| "grad_norm": 0.248334601521492, | |
| "learning_rate": 2.5192165970053307e-06, | |
| "loss": 0.0122, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.33593547671020013, | |
| "grad_norm": 0.4341728985309601, | |
| "learning_rate": 2.316086443440962e-06, | |
| "loss": 0.0164, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.3374153686780865, | |
| "grad_norm": 0.23554910719394684, | |
| "learning_rate": 2.1212995940485036e-06, | |
| "loss": 0.0171, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.33889526064597286, | |
| "grad_norm": 0.4276198744773865, | |
| "learning_rate": 1.9348901308864796e-06, | |
| "loss": 0.0246, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.3403751526138592, | |
| "grad_norm": 0.35541170835494995, | |
| "learning_rate": 1.7568906702134124e-06, | |
| "loss": 0.0112, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.34185504458174554, | |
| "grad_norm": 0.4070402681827545, | |
| "learning_rate": 1.5873323567808963e-06, | |
| "loss": 0.0202, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.3433349365496319, | |
| "grad_norm": 0.16610288619995117, | |
| "learning_rate": 1.4262448583841793e-06, | |
| "loss": 0.0129, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.3448148285175182, | |
| "grad_norm": 0.32411548495292664, | |
| "learning_rate": 1.2736563606711382e-06, | |
| "loss": 0.0173, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.34629472048540455, | |
| "grad_norm": 0.3122292160987854, | |
| "learning_rate": 1.1295935622106513e-06, | |
| "loss": 0.0183, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.3477746124532909, | |
| "grad_norm": 0.3919500708580017, | |
| "learning_rate": 9.94081669821062e-07, | |
| "loss": 0.0223, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.3492545044211773, | |
| "grad_norm": 0.3418029546737671, | |
| "learning_rate": 8.671443941597523e-07, | |
| "loss": 0.0172, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.3507343963890636, | |
| "grad_norm": 0.28939440846443176, | |
| "learning_rate": 7.488039455744611e-07, | |
| "loss": 0.0146, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.35221428835694996, | |
| "grad_norm": 0.407928466796875, | |
| "learning_rate": 6.390810302171146e-07, | |
| "loss": 0.0244, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.3536941803248363, | |
| "grad_norm": 0.32271912693977356, | |
| "learning_rate": 5.379948464208418e-07, | |
| "loss": 0.0214, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.35517407229272263, | |
| "grad_norm": 0.4125051498413086, | |
| "learning_rate": 4.455630813408329e-07, | |
| "loss": 0.0166, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.35665396426060897, | |
| "grad_norm": 0.24020278453826904, | |
| "learning_rate": 3.61801907859588e-07, | |
| "loss": 0.0174, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.3581338562284953, | |
| "grad_norm": 0.2490530014038086, | |
| "learning_rate": 2.867259817571355e-07, | |
| "loss": 0.0224, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.35961374819638164, | |
| "grad_norm": 0.1842329204082489, | |
| "learning_rate": 2.2034843914670588e-07, | |
| "loss": 0.0163, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.361093640164268, | |
| "grad_norm": 0.4745646119117737, | |
| "learning_rate": 1.626808941762703e-07, | |
| "loss": 0.0207, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.3625735321321544, | |
| "grad_norm": 0.4176884889602661, | |
| "learning_rate": 1.1373343699642158e-07, | |
| "loss": 0.0159, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.3640534241000407, | |
| "grad_norm": 0.280038982629776, | |
| "learning_rate": 7.351463199488651e-08, | |
| "loss": 0.017, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.36553331606792705, | |
| "grad_norm": 0.4648924767971039, | |
| "learning_rate": 4.203151629798563e-08, | |
| "loss": 0.0196, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.3670132080358134, | |
| "grad_norm": 0.42389336228370667, | |
| "learning_rate": 1.928959853936263e-08, | |
| "loss": 0.0275, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.3684931000036997, | |
| "grad_norm": 0.5853281021118164, | |
| "learning_rate": 5.292857896133097e-09, | |
| "loss": 0.0182, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.36997299197158606, | |
| "grad_norm": 0.2962166368961334, | |
| "learning_rate": 4.374339263035765e-11, | |
| "loss": 0.014, | |
| "step": 2500 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 2500, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |