| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "eval_steps": 500.0, | |
| "global_step": 1876, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0010666666666666667, | |
| "grad_norm": 9.75, | |
| "learning_rate": 3.5087719298245616e-07, | |
| "loss": 0.06797242164611816, | |
| "step": 1, | |
| "token_acc": 0.970873786407767 | |
| }, | |
| { | |
| "epoch": 0.010666666666666666, | |
| "grad_norm": 3.375, | |
| "learning_rate": 3.5087719298245615e-06, | |
| "loss": 0.04619172546598646, | |
| "step": 10, | |
| "token_acc": 0.9840170484816196 | |
| }, | |
| { | |
| "epoch": 0.021333333333333333, | |
| "grad_norm": 1.3515625, | |
| "learning_rate": 7.017543859649123e-06, | |
| "loss": 0.03136685192584991, | |
| "step": 20, | |
| "token_acc": 0.9862650602409638 | |
| }, | |
| { | |
| "epoch": 0.032, | |
| "grad_norm": 2.234375, | |
| "learning_rate": 1.0526315789473684e-05, | |
| "loss": 0.035260409116744995, | |
| "step": 30, | |
| "token_acc": 0.9865771812080537 | |
| }, | |
| { | |
| "epoch": 0.042666666666666665, | |
| "grad_norm": 2.90625, | |
| "learning_rate": 1.4035087719298246e-05, | |
| "loss": 0.03275148570537567, | |
| "step": 40, | |
| "token_acc": 0.985594237695078 | |
| }, | |
| { | |
| "epoch": 0.05333333333333334, | |
| "grad_norm": 2.296875, | |
| "learning_rate": 1.754385964912281e-05, | |
| "loss": 0.02667125463485718, | |
| "step": 50, | |
| "token_acc": 0.9887181949111858 | |
| }, | |
| { | |
| "epoch": 0.064, | |
| "grad_norm": 1.8203125, | |
| "learning_rate": 1.9999865771143805e-05, | |
| "loss": 0.042026185989379884, | |
| "step": 60, | |
| "token_acc": 0.9822286263208453 | |
| }, | |
| { | |
| "epoch": 0.07466666666666667, | |
| "grad_norm": 3.046875, | |
| "learning_rate": 1.9997479580610527e-05, | |
| "loss": 0.041328877210617065, | |
| "step": 70, | |
| "token_acc": 0.9809270883631096 | |
| }, | |
| { | |
| "epoch": 0.08533333333333333, | |
| "grad_norm": 2.140625, | |
| "learning_rate": 1.9992111345867986e-05, | |
| "loss": 0.03837056159973144, | |
| "step": 80, | |
| "token_acc": 0.9831162566328991 | |
| }, | |
| { | |
| "epoch": 0.096, | |
| "grad_norm": 1.59375, | |
| "learning_rate": 1.9983762668151084e-05, | |
| "loss": 0.032483524084091185, | |
| "step": 90, | |
| "token_acc": 0.9863896848137536 | |
| }, | |
| { | |
| "epoch": 0.10666666666666667, | |
| "grad_norm": 1.8671875, | |
| "learning_rate": 1.9972436037700082e-05, | |
| "loss": 0.03307614922523498, | |
| "step": 100, | |
| "token_acc": 0.9855560905151661 | |
| }, | |
| { | |
| "epoch": 0.11733333333333333, | |
| "grad_norm": 1.734375, | |
| "learning_rate": 1.9958134833017768e-05, | |
| "loss": 0.0380557119846344, | |
| "step": 110, | |
| "token_acc": 0.98277117204562 | |
| }, | |
| { | |
| "epoch": 0.128, | |
| "grad_norm": 3.53125, | |
| "learning_rate": 1.9940863319861758e-05, | |
| "loss": 0.047079157829284665, | |
| "step": 120, | |
| "token_acc": 0.9812545061283345 | |
| }, | |
| { | |
| "epoch": 0.13866666666666666, | |
| "grad_norm": 2.625, | |
| "learning_rate": 1.992062664997209e-05, | |
| "loss": 0.039805743098258975, | |
| "step": 130, | |
| "token_acc": 0.9844460397224216 | |
| }, | |
| { | |
| "epoch": 0.14933333333333335, | |
| "grad_norm": 3.03125, | |
| "learning_rate": 1.9897430859534553e-05, | |
| "loss": 0.029691782593727113, | |
| "step": 140, | |
| "token_acc": 0.9876781831360232 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 2.203125, | |
| "learning_rate": 1.9871282867380245e-05, | |
| "loss": 0.03704167008399963, | |
| "step": 150, | |
| "token_acc": 0.9851104707012488 | |
| }, | |
| { | |
| "epoch": 0.17066666666666666, | |
| "grad_norm": 2.828125, | |
| "learning_rate": 1.9842190472921802e-05, | |
| "loss": 0.03181208968162537, | |
| "step": 160, | |
| "token_acc": 0.9877049180327869 | |
| }, | |
| { | |
| "epoch": 0.18133333333333335, | |
| "grad_norm": 2.0, | |
| "learning_rate": 1.9810162353827e-05, | |
| "loss": 0.03544786870479584, | |
| "step": 170, | |
| "token_acc": 0.9834333733493398 | |
| }, | |
| { | |
| "epoch": 0.192, | |
| "grad_norm": 2.453125, | |
| "learning_rate": 1.977520806343039e-05, | |
| "loss": 0.02816782593727112, | |
| "step": 180, | |
| "token_acc": 0.9887935145445875 | |
| }, | |
| { | |
| "epoch": 0.20266666666666666, | |
| "grad_norm": 2.140625, | |
| "learning_rate": 1.9737338027883727e-05, | |
| "loss": 0.033563745021820066, | |
| "step": 190, | |
| "token_acc": 0.9869910864851843 | |
| }, | |
| { | |
| "epoch": 0.21333333333333335, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 1.969656354304606e-05, | |
| "loss": 0.037265950441360475, | |
| "step": 200, | |
| "token_acc": 0.983140655105973 | |
| }, | |
| { | |
| "epoch": 0.224, | |
| "grad_norm": 2.1875, | |
| "learning_rate": 1.9652896771114416e-05, | |
| "loss": 0.03406996726989746, | |
| "step": 210, | |
| "token_acc": 0.9867565615217915 | |
| }, | |
| { | |
| "epoch": 0.23466666666666666, | |
| "grad_norm": 1.875, | |
| "learning_rate": 1.960635073699605e-05, | |
| "loss": 0.036304399371147156, | |
| "step": 220, | |
| "token_acc": 0.9850169163847269 | |
| }, | |
| { | |
| "epoch": 0.24533333333333332, | |
| "grad_norm": 2.0625, | |
| "learning_rate": 1.95569393244234e-05, | |
| "loss": 0.03020932674407959, | |
| "step": 230, | |
| "token_acc": 0.9867437936852254 | |
| }, | |
| { | |
| "epoch": 0.256, | |
| "grad_norm": 3.046875, | |
| "learning_rate": 1.950467727181284e-05, | |
| "loss": 0.03292643427848816, | |
| "step": 240, | |
| "token_acc": 0.986077772443591 | |
| }, | |
| { | |
| "epoch": 0.26666666666666666, | |
| "grad_norm": 1.28125, | |
| "learning_rate": 1.9449580167868506e-05, | |
| "loss": 0.03676349222660065, | |
| "step": 250, | |
| "token_acc": 0.983389504092441 | |
| }, | |
| { | |
| "epoch": 0.2773333333333333, | |
| "grad_norm": 1.2421875, | |
| "learning_rate": 1.939166444693252e-05, | |
| "loss": 0.03957515358924866, | |
| "step": 260, | |
| "token_acc": 0.982388419782871 | |
| }, | |
| { | |
| "epoch": 0.288, | |
| "grad_norm": 1.875, | |
| "learning_rate": 1.9330947384082957e-05, | |
| "loss": 0.03727341294288635, | |
| "step": 270, | |
| "token_acc": 0.9828998073217726 | |
| }, | |
| { | |
| "epoch": 0.2986666666666667, | |
| "grad_norm": 1.0, | |
| "learning_rate": 1.9267447089981018e-05, | |
| "loss": 0.04420234560966492, | |
| "step": 280, | |
| "token_acc": 0.9821299203091041 | |
| }, | |
| { | |
| "epoch": 0.30933333333333335, | |
| "grad_norm": 2.046875, | |
| "learning_rate": 1.9201182505469035e-05, | |
| "loss": 0.03257325291633606, | |
| "step": 290, | |
| "token_acc": 0.986077772443591 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 2.265625, | |
| "learning_rate": 1.9132173395920756e-05, | |
| "loss": 0.04046020805835724, | |
| "step": 300, | |
| "token_acc": 0.9863243761996161 | |
| }, | |
| { | |
| "epoch": 0.33066666666666666, | |
| "grad_norm": 1.6640625, | |
| "learning_rate": 1.9060440345345762e-05, | |
| "loss": 0.0318397045135498, | |
| "step": 310, | |
| "token_acc": 0.9875 | |
| }, | |
| { | |
| "epoch": 0.3413333333333333, | |
| "grad_norm": 2.4375, | |
| "learning_rate": 1.8986004750249687e-05, | |
| "loss": 0.030359810590744017, | |
| "step": 320, | |
| "token_acc": 0.9879576107899807 | |
| }, | |
| { | |
| "epoch": 0.352, | |
| "grad_norm": 2.59375, | |
| "learning_rate": 1.890888881325205e-05, | |
| "loss": 0.03387542963027954, | |
| "step": 330, | |
| "token_acc": 0.9860710854947167 | |
| }, | |
| { | |
| "epoch": 0.3626666666666667, | |
| "grad_norm": 1.2109375, | |
| "learning_rate": 1.8829115536463682e-05, | |
| "loss": 0.02995218336582184, | |
| "step": 340, | |
| "token_acc": 0.9878048780487805 | |
| }, | |
| { | |
| "epoch": 0.37333333333333335, | |
| "grad_norm": 1.5546875, | |
| "learning_rate": 1.8746708714625668e-05, | |
| "loss": 0.03572494685649872, | |
| "step": 350, | |
| "token_acc": 0.9858173076923077 | |
| }, | |
| { | |
| "epoch": 0.384, | |
| "grad_norm": 1.796875, | |
| "learning_rate": 1.8661692928011858e-05, | |
| "loss": 0.027254277467727663, | |
| "step": 360, | |
| "token_acc": 0.987460815047022 | |
| }, | |
| { | |
| "epoch": 0.39466666666666667, | |
| "grad_norm": 2.640625, | |
| "learning_rate": 1.8574093535097062e-05, | |
| "loss": 0.03353258669376373, | |
| "step": 370, | |
| "token_acc": 0.984844840028867 | |
| }, | |
| { | |
| "epoch": 0.4053333333333333, | |
| "grad_norm": 1.6484375, | |
| "learning_rate": 1.8483936664993152e-05, | |
| "loss": 0.03917534947395325, | |
| "step": 380, | |
| "token_acc": 0.9830917874396136 | |
| }, | |
| { | |
| "epoch": 0.416, | |
| "grad_norm": 1.5703125, | |
| "learning_rate": 1.839124920965527e-05, | |
| "loss": 0.03518342077732086, | |
| "step": 390, | |
| "token_acc": 0.9841346153846153 | |
| }, | |
| { | |
| "epoch": 0.4266666666666667, | |
| "grad_norm": 1.7890625, | |
| "learning_rate": 1.829605881586051e-05, | |
| "loss": 0.03069465160369873, | |
| "step": 400, | |
| "token_acc": 0.9872504209766658 | |
| }, | |
| { | |
| "epoch": 0.43733333333333335, | |
| "grad_norm": 1.5546875, | |
| "learning_rate": 1.8198393876961446e-05, | |
| "loss": 0.02751390039920807, | |
| "step": 410, | |
| "token_acc": 0.9887613582018173 | |
| }, | |
| { | |
| "epoch": 0.448, | |
| "grad_norm": 1.6640625, | |
| "learning_rate": 1.8098283524416982e-05, | |
| "loss": 0.026150995492935182, | |
| "step": 420, | |
| "token_acc": 0.9882606612362242 | |
| }, | |
| { | |
| "epoch": 0.45866666666666667, | |
| "grad_norm": 1.828125, | |
| "learning_rate": 1.7995757619103012e-05, | |
| "loss": 0.03289719820022583, | |
| "step": 430, | |
| "token_acc": 0.9862617498192335 | |
| }, | |
| { | |
| "epoch": 0.4693333333333333, | |
| "grad_norm": 1.921875, | |
| "learning_rate": 1.7890846742405558e-05, | |
| "loss": 0.035283339023590085, | |
| "step": 440, | |
| "token_acc": 0.98579340235974 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 2.203125, | |
| "learning_rate": 1.7783582187098944e-05, | |
| "loss": 0.03278760313987732, | |
| "step": 450, | |
| "token_acc": 0.9858716475095786 | |
| }, | |
| { | |
| "epoch": 0.49066666666666664, | |
| "grad_norm": 1.6796875, | |
| "learning_rate": 1.7673995948011807e-05, | |
| "loss": 0.030364534258842467, | |
| "step": 460, | |
| "token_acc": 0.9877873563218391 | |
| }, | |
| { | |
| "epoch": 0.5013333333333333, | |
| "grad_norm": 1.40625, | |
| "learning_rate": 1.75621207124837e-05, | |
| "loss": 0.02944081127643585, | |
| "step": 470, | |
| "token_acc": 0.9857522337599613 | |
| }, | |
| { | |
| "epoch": 0.512, | |
| "grad_norm": 2.359375, | |
| "learning_rate": 1.7447989850615114e-05, | |
| "loss": 0.03358876705169678, | |
| "step": 480, | |
| "token_acc": 0.9858343337334934 | |
| }, | |
| { | |
| "epoch": 0.5226666666666666, | |
| "grad_norm": 1.9609375, | |
| "learning_rate": 1.733163740531386e-05, | |
| "loss": 0.03116101920604706, | |
| "step": 490, | |
| "token_acc": 0.9863603732950467 | |
| }, | |
| { | |
| "epoch": 0.5333333333333333, | |
| "grad_norm": 0.91015625, | |
| "learning_rate": 1.7213098082140774e-05, | |
| "loss": 0.030902951955795288, | |
| "step": 500, | |
| "token_acc": 0.9863276565123531 | |
| }, | |
| { | |
| "epoch": 0.544, | |
| "grad_norm": 1.1953125, | |
| "learning_rate": 1.7092407238957737e-05, | |
| "loss": 0.03449364304542542, | |
| "step": 510, | |
| "token_acc": 0.988502994011976 | |
| }, | |
| { | |
| "epoch": 0.5546666666666666, | |
| "grad_norm": 1.1875, | |
| "learning_rate": 1.6969600875381172e-05, | |
| "loss": 0.0299816370010376, | |
| "step": 520, | |
| "token_acc": 0.9869109947643979 | |
| }, | |
| { | |
| "epoch": 0.5653333333333334, | |
| "grad_norm": 1.2265625, | |
| "learning_rate": 1.684471562204407e-05, | |
| "loss": 0.033460429310798644, | |
| "step": 530, | |
| "token_acc": 0.9860744297719087 | |
| }, | |
| { | |
| "epoch": 0.576, | |
| "grad_norm": 1.6875, | |
| "learning_rate": 1.6717788729669822e-05, | |
| "loss": 0.029380664229393005, | |
| "step": 540, | |
| "token_acc": 0.9881756756756757 | |
| }, | |
| { | |
| "epoch": 0.5866666666666667, | |
| "grad_norm": 2.21875, | |
| "learning_rate": 1.658885805796111e-05, | |
| "loss": 0.0348785400390625, | |
| "step": 550, | |
| "token_acc": 0.9853047458443749 | |
| }, | |
| { | |
| "epoch": 0.5973333333333334, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 1.6457962064307104e-05, | |
| "loss": 0.025776177644729614, | |
| "step": 560, | |
| "token_acc": 0.9886883273164861 | |
| }, | |
| { | |
| "epoch": 0.608, | |
| "grad_norm": 2.453125, | |
| "learning_rate": 1.6325139792312414e-05, | |
| "loss": 0.03230432271957397, | |
| "step": 570, | |
| "token_acc": 0.9868641031764986 | |
| }, | |
| { | |
| "epoch": 0.6186666666666667, | |
| "grad_norm": 2.265625, | |
| "learning_rate": 1.6190430860151196e-05, | |
| "loss": 0.031005895137786864, | |
| "step": 580, | |
| "token_acc": 0.9841612670986322 | |
| }, | |
| { | |
| "epoch": 0.6293333333333333, | |
| "grad_norm": 2.546875, | |
| "learning_rate": 1.605387544874983e-05, | |
| "loss": 0.03286699056625366, | |
| "step": 590, | |
| "token_acc": 0.9843033083796184 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 1.515625, | |
| "learning_rate": 1.591551428980179e-05, | |
| "loss": 0.032039952278137204, | |
| "step": 600, | |
| "token_acc": 0.9858648778150455 | |
| }, | |
| { | |
| "epoch": 0.6506666666666666, | |
| "grad_norm": 1.3515625, | |
| "learning_rate": 1.5775388653618217e-05, | |
| "loss": 0.03594903945922852, | |
| "step": 610, | |
| "token_acc": 0.9849318344893566 | |
| }, | |
| { | |
| "epoch": 0.6613333333333333, | |
| "grad_norm": 2.625, | |
| "learning_rate": 1.5633540336817804e-05, | |
| "loss": 0.027490669488906862, | |
| "step": 620, | |
| "token_acc": 0.985925572519084 | |
| }, | |
| { | |
| "epoch": 0.672, | |
| "grad_norm": 1.75, | |
| "learning_rate": 1.549001164985973e-05, | |
| "loss": 0.03038315773010254, | |
| "step": 630, | |
| "token_acc": 0.9857865574560347 | |
| }, | |
| { | |
| "epoch": 0.6826666666666666, | |
| "grad_norm": 2.109375, | |
| "learning_rate": 1.5344845404423313e-05, | |
| "loss": 0.0375783383846283, | |
| "step": 640, | |
| "token_acc": 0.981414433985035 | |
| }, | |
| { | |
| "epoch": 0.6933333333333334, | |
| "grad_norm": 1.3515625, | |
| "learning_rate": 1.5198084900638161e-05, | |
| "loss": 0.03005017340183258, | |
| "step": 650, | |
| "token_acc": 0.9848082951531227 | |
| }, | |
| { | |
| "epoch": 0.704, | |
| "grad_norm": 0.7265625, | |
| "learning_rate": 1.5049773914168629e-05, | |
| "loss": 0.031009536981582642, | |
| "step": 660, | |
| "token_acc": 0.9857899807321773 | |
| }, | |
| { | |
| "epoch": 0.7146666666666667, | |
| "grad_norm": 2.34375, | |
| "learning_rate": 1.4899956683156413e-05, | |
| "loss": 0.033309900760650636, | |
| "step": 670, | |
| "token_acc": 0.98503861003861 | |
| }, | |
| { | |
| "epoch": 0.7253333333333334, | |
| "grad_norm": 1.9921875, | |
| "learning_rate": 1.4748677895025243e-05, | |
| "loss": 0.03143962621688843, | |
| "step": 680, | |
| "token_acc": 0.9863013698630136 | |
| }, | |
| { | |
| "epoch": 0.736, | |
| "grad_norm": 1.5703125, | |
| "learning_rate": 1.4595982673151511e-05, | |
| "loss": 0.028191816806793214, | |
| "step": 690, | |
| "token_acc": 0.9877785765636233 | |
| }, | |
| { | |
| "epoch": 0.7466666666666667, | |
| "grad_norm": 1.9921875, | |
| "learning_rate": 1.444191656340491e-05, | |
| "loss": 0.02878325283527374, | |
| "step": 700, | |
| "token_acc": 0.9867437936852254 | |
| }, | |
| { | |
| "epoch": 0.7573333333333333, | |
| "grad_norm": 0.875, | |
| "learning_rate": 1.4286525520563003e-05, | |
| "loss": 0.028445839881896973, | |
| "step": 710, | |
| "token_acc": 0.9870192307692308 | |
| }, | |
| { | |
| "epoch": 0.768, | |
| "grad_norm": 1.921875, | |
| "learning_rate": 1.4129855894603885e-05, | |
| "loss": 0.02481076419353485, | |
| "step": 720, | |
| "token_acc": 0.9894636015325671 | |
| }, | |
| { | |
| "epoch": 0.7786666666666666, | |
| "grad_norm": 1.703125, | |
| "learning_rate": 1.3971954416880897e-05, | |
| "loss": 0.032581061124801636, | |
| "step": 730, | |
| "token_acc": 0.9865222623345367 | |
| }, | |
| { | |
| "epoch": 0.7893333333333333, | |
| "grad_norm": 1.28125, | |
| "learning_rate": 1.3812868186183641e-05, | |
| "loss": 0.03237102627754211, | |
| "step": 740, | |
| "token_acc": 0.9857865574560347 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 1.3652644654689355e-05, | |
| "loss": 0.027326399087905885, | |
| "step": 750, | |
| "token_acc": 0.9896634615384615 | |
| }, | |
| { | |
| "epoch": 0.8106666666666666, | |
| "grad_norm": 2.59375, | |
| "learning_rate": 1.3491331613808909e-05, | |
| "loss": 0.032716572284698486, | |
| "step": 760, | |
| "token_acc": 0.9857831325301205 | |
| }, | |
| { | |
| "epoch": 0.8213333333333334, | |
| "grad_norm": 2.171875, | |
| "learning_rate": 1.3328977179931595e-05, | |
| "loss": 0.03095829486846924, | |
| "step": 770, | |
| "token_acc": 0.9869848156182213 | |
| }, | |
| { | |
| "epoch": 0.832, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 1.3165629780072991e-05, | |
| "loss": 0.029185777902603148, | |
| "step": 780, | |
| "token_acc": 0.9876960193003619 | |
| }, | |
| { | |
| "epoch": 0.8426666666666667, | |
| "grad_norm": 1.1328125, | |
| "learning_rate": 1.3001338137430163e-05, | |
| "loss": 0.024865221977233887, | |
| "step": 790, | |
| "token_acc": 0.9870720612880057 | |
| }, | |
| { | |
| "epoch": 0.8533333333333334, | |
| "grad_norm": 1.15625, | |
| "learning_rate": 1.2836151256848562e-05, | |
| "loss": 0.029257166385650634, | |
| "step": 800, | |
| "token_acc": 0.9872779644743159 | |
| }, | |
| { | |
| "epoch": 0.864, | |
| "grad_norm": 3.296875, | |
| "learning_rate": 1.2670118410204819e-05, | |
| "loss": 0.031032463908195494, | |
| "step": 810, | |
| "token_acc": 0.9853153586904189 | |
| }, | |
| { | |
| "epoch": 0.8746666666666667, | |
| "grad_norm": 1.5234375, | |
| "learning_rate": 1.250328912171001e-05, | |
| "loss": 0.027763417363166808, | |
| "step": 820, | |
| "token_acc": 0.9882719004308281 | |
| }, | |
| { | |
| "epoch": 0.8853333333333333, | |
| "grad_norm": 1.40625, | |
| "learning_rate": 1.2335713153137551e-05, | |
| "loss": 0.03198724389076233, | |
| "step": 830, | |
| "token_acc": 0.9862947823996153 | |
| }, | |
| { | |
| "epoch": 0.896, | |
| "grad_norm": 1.4453125, | |
| "learning_rate": 1.2167440488980309e-05, | |
| "loss": 0.02678609490394592, | |
| "step": 840, | |
| "token_acc": 0.9867053420352913 | |
| }, | |
| { | |
| "epoch": 0.9066666666666666, | |
| "grad_norm": 2.390625, | |
| "learning_rate": 1.1998521321541214e-05, | |
| "loss": 0.028643500804901124, | |
| "step": 850, | |
| "token_acc": 0.9868483978957436 | |
| }, | |
| { | |
| "epoch": 0.9173333333333333, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 1.1829006035961947e-05, | |
| "loss": 0.033641707897186277, | |
| "step": 860, | |
| "token_acc": 0.983855421686747 | |
| }, | |
| { | |
| "epoch": 0.928, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 1.165894519519407e-05, | |
| "loss": 0.02538594901561737, | |
| "step": 870, | |
| "token_acc": 0.9887290167865708 | |
| }, | |
| { | |
| "epoch": 0.9386666666666666, | |
| "grad_norm": 0.92578125, | |
| "learning_rate": 1.148838952491712e-05, | |
| "loss": 0.029299235343933104, | |
| "step": 880, | |
| "token_acc": 0.9872350674373795 | |
| }, | |
| { | |
| "epoch": 0.9493333333333334, | |
| "grad_norm": 1.4296875, | |
| "learning_rate": 1.1317389898408188e-05, | |
| "loss": 0.027896964550018312, | |
| "step": 890, | |
| "token_acc": 0.9877668505636843 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 1.1796875, | |
| "learning_rate": 1.1145997321367454e-05, | |
| "loss": 0.02889745831489563, | |
| "step": 900, | |
| "token_acc": 0.9889316650625601 | |
| }, | |
| { | |
| "epoch": 0.9706666666666667, | |
| "grad_norm": 2.203125, | |
| "learning_rate": 1.0974262916704223e-05, | |
| "loss": 0.030401945114135742, | |
| "step": 910, | |
| "token_acc": 0.986557849255881 | |
| }, | |
| { | |
| "epoch": 0.9813333333333333, | |
| "grad_norm": 1.9375, | |
| "learning_rate": 1.0802237909288005e-05, | |
| "loss": 0.029643896222114562, | |
| "step": 920, | |
| "token_acc": 0.9869942196531792 | |
| }, | |
| { | |
| "epoch": 0.992, | |
| "grad_norm": 2.90625, | |
| "learning_rate": 1.062997361066918e-05, | |
| "loss": 0.02902018129825592, | |
| "step": 930, | |
| "token_acc": 0.9874396135265701 | |
| }, | |
| { | |
| "epoch": 1.0021333333333333, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 1.0457521403773823e-05, | |
| "loss": 0.02213967442512512, | |
| "step": 940, | |
| "token_acc": 0.9900153609831029 | |
| }, | |
| { | |
| "epoch": 1.0128, | |
| "grad_norm": 1.3125, | |
| "learning_rate": 1.0284932727577203e-05, | |
| "loss": 0.010565431416034698, | |
| "step": 950, | |
| "token_acc": 0.9968839884947267 | |
| }, | |
| { | |
| "epoch": 1.0234666666666667, | |
| "grad_norm": 1.1640625, | |
| "learning_rate": 1.0112259061760605e-05, | |
| "loss": 0.01075158715248108, | |
| "step": 960, | |
| "token_acc": 0.9956741167988464 | |
| }, | |
| { | |
| "epoch": 1.0341333333333333, | |
| "grad_norm": 2.484375, | |
| "learning_rate": 9.939551911355979e-06, | |
| "loss": 0.016012924909591674, | |
| "step": 970, | |
| "token_acc": 0.9942335415665545 | |
| }, | |
| { | |
| "epoch": 1.0448, | |
| "grad_norm": 1.578125, | |
| "learning_rate": 9.766862791383054e-06, | |
| "loss": 0.012686330080032348, | |
| "step": 980, | |
| "token_acc": 0.995164410058027 | |
| }, | |
| { | |
| "epoch": 1.0554666666666668, | |
| "grad_norm": 0.9609375, | |
| "learning_rate": 9.59424321148343e-06, | |
| "loss": 0.006662490963935852, | |
| "step": 990, | |
| "token_acc": 0.9975984630163305 | |
| }, | |
| { | |
| "epoch": 1.0661333333333334, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 9.42174466055634e-06, | |
| "loss": 0.0062740519642829895, | |
| "step": 1000, | |
| "token_acc": 0.9980694980694981 | |
| }, | |
| { | |
| "epoch": 1.0768, | |
| "grad_norm": 2.515625, | |
| "learning_rate": 9.249418591400539e-06, | |
| "loss": 0.011450471729040146, | |
| "step": 1010, | |
| "token_acc": 0.9964046021093 | |
| }, | |
| { | |
| "epoch": 1.0874666666666666, | |
| "grad_norm": 2.75, | |
| "learning_rate": 9.07731640536698e-06, | |
| "loss": 0.014336982369422912, | |
| "step": 1020, | |
| "token_acc": 0.9954172696575012 | |
| }, | |
| { | |
| "epoch": 1.0981333333333334, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 8.90548943702686e-06, | |
| "loss": 0.009051916003227235, | |
| "step": 1030, | |
| "token_acc": 0.9963820549927641 | |
| }, | |
| { | |
| "epoch": 1.1088, | |
| "grad_norm": 3.40625, | |
| "learning_rate": 8.733988938859545e-06, | |
| "loss": 0.014770987629890441, | |
| "step": 1040, | |
| "token_acc": 0.9946962391513983 | |
| }, | |
| { | |
| "epoch": 1.1194666666666666, | |
| "grad_norm": 0.40625, | |
| "learning_rate": 8.562866065965013e-06, | |
| "loss": 0.0071306928992271425, | |
| "step": 1050, | |
| "token_acc": 0.9971112181030333 | |
| }, | |
| { | |
| "epoch": 1.1301333333333332, | |
| "grad_norm": 1.5703125, | |
| "learning_rate": 8.39217186080532e-06, | |
| "loss": 0.010715489089488984, | |
| "step": 1060, | |
| "token_acc": 0.9963916285783017 | |
| }, | |
| { | |
| "epoch": 1.1408, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 8.221957237979686e-06, | |
| "loss": 0.011085398495197296, | |
| "step": 1070, | |
| "token_acc": 0.9956374212312167 | |
| }, | |
| { | |
| "epoch": 1.1514666666666666, | |
| "grad_norm": 1.859375, | |
| "learning_rate": 8.052272969037702e-06, | |
| "loss": 0.011836430430412293, | |
| "step": 1080, | |
| "token_acc": 0.9942154736080984 | |
| }, | |
| { | |
| "epoch": 1.1621333333333332, | |
| "grad_norm": 2.5625, | |
| "learning_rate": 7.88316966733522e-06, | |
| "loss": 0.00989045649766922, | |
| "step": 1090, | |
| "token_acc": 0.9959281437125749 | |
| }, | |
| { | |
| "epoch": 1.1728, | |
| "grad_norm": 2.25, | |
| "learning_rate": 7.71469777293741e-06, | |
| "loss": 0.012199420481920242, | |
| "step": 1100, | |
| "token_acc": 0.995438175270108 | |
| }, | |
| { | |
| "epoch": 1.1834666666666667, | |
| "grad_norm": 1.4765625, | |
| "learning_rate": 7.546907537573529e-06, | |
| "loss": 0.01276719719171524, | |
| "step": 1110, | |
| "token_acc": 0.9954216867469879 | |
| }, | |
| { | |
| "epoch": 1.1941333333333333, | |
| "grad_norm": 1.171875, | |
| "learning_rate": 7.379849009647848e-06, | |
| "loss": 0.008200515061616898, | |
| "step": 1120, | |
| "token_acc": 0.9975851243660951 | |
| }, | |
| { | |
| "epoch": 1.2048, | |
| "grad_norm": 1.7734375, | |
| "learning_rate": 7.213572019311235e-06, | |
| "loss": 0.010789988934993744, | |
| "step": 1130, | |
| "token_acc": 0.9956751561749159 | |
| }, | |
| { | |
| "epoch": 1.2154666666666667, | |
| "grad_norm": 2.390625, | |
| "learning_rate": 7.04812616359784e-06, | |
| "loss": 0.00852215737104416, | |
| "step": 1140, | |
| "token_acc": 0.9968958930276982 | |
| }, | |
| { | |
| "epoch": 1.2261333333333333, | |
| "grad_norm": 2.375, | |
| "learning_rate": 6.883560791631307e-06, | |
| "loss": 0.010482875257730484, | |
| "step": 1150, | |
| "token_acc": 0.9961371318203767 | |
| }, | |
| { | |
| "epoch": 1.2368000000000001, | |
| "grad_norm": 0.95703125, | |
| "learning_rate": 6.7199249899049435e-06, | |
| "loss": 0.012379509210586549, | |
| "step": 1160, | |
| "token_acc": 0.9954249939802552 | |
| }, | |
| { | |
| "epoch": 1.2474666666666667, | |
| "grad_norm": 2.953125, | |
| "learning_rate": 6.557267567640217e-06, | |
| "loss": 0.010692089051008224, | |
| "step": 1170, | |
| "token_acc": 0.9959271681839962 | |
| }, | |
| { | |
| "epoch": 1.2581333333333333, | |
| "grad_norm": 0.65234375, | |
| "learning_rate": 6.3956370422279625e-06, | |
| "loss": 0.0074609115719795225, | |
| "step": 1180, | |
| "token_acc": 0.997589780670041 | |
| }, | |
| { | |
| "epoch": 1.2688, | |
| "grad_norm": 2.9375, | |
| "learning_rate": 6.235081624756629e-06, | |
| "loss": 0.009892877936363221, | |
| "step": 1190, | |
| "token_acc": 0.9968636911942099 | |
| }, | |
| { | |
| "epoch": 1.2794666666666665, | |
| "grad_norm": 1.765625, | |
| "learning_rate": 6.075649205631904e-06, | |
| "loss": 0.011288212984800339, | |
| "step": 1200, | |
| "token_acc": 0.9954414587332053 | |
| }, | |
| { | |
| "epoch": 1.2901333333333334, | |
| "grad_norm": 2.421875, | |
| "learning_rate": 5.917387340291969e-06, | |
| "loss": 0.009337464720010758, | |
| "step": 1210, | |
| "token_acc": 0.9973595775324052 | |
| }, | |
| { | |
| "epoch": 1.3008, | |
| "grad_norm": 3.578125, | |
| "learning_rate": 5.760343235022703e-06, | |
| "loss": 0.017143648862838746, | |
| "step": 1220, | |
| "token_acc": 0.9942321557317952 | |
| }, | |
| { | |
| "epoch": 1.3114666666666666, | |
| "grad_norm": 2.15625, | |
| "learning_rate": 5.604563732876989e-06, | |
| "loss": 0.013416722416877747, | |
| "step": 1230, | |
| "token_acc": 0.9958887545344619 | |
| }, | |
| { | |
| "epoch": 1.3221333333333334, | |
| "grad_norm": 3.375, | |
| "learning_rate": 5.450095299702392e-06, | |
| "loss": 0.017079284787178038, | |
| "step": 1240, | |
| "token_acc": 0.9930238152513832 | |
| }, | |
| { | |
| "epoch": 1.3328, | |
| "grad_norm": 1.8515625, | |
| "learning_rate": 5.296984010281367e-06, | |
| "loss": 0.009312470257282258, | |
| "step": 1250, | |
| "token_acc": 0.9961722488038277 | |
| }, | |
| { | |
| "epoch": 1.3434666666666666, | |
| "grad_norm": 1.8828125, | |
| "learning_rate": 5.145275534588053e-06, | |
| "loss": 0.012822112441062928, | |
| "step": 1260, | |
| "token_acc": 0.995438175270108 | |
| }, | |
| { | |
| "epoch": 1.3541333333333334, | |
| "grad_norm": 0.92578125, | |
| "learning_rate": 4.995015124165883e-06, | |
| "loss": 0.0064118683338165285, | |
| "step": 1270, | |
| "token_acc": 0.9976047904191617 | |
| }, | |
| { | |
| "epoch": 1.3648, | |
| "grad_norm": 0.7265625, | |
| "learning_rate": 4.8462475986299625e-06, | |
| "loss": 0.015998595952987672, | |
| "step": 1280, | |
| "token_acc": 0.9946898382814385 | |
| }, | |
| { | |
| "epoch": 1.3754666666666666, | |
| "grad_norm": 1.2109375, | |
| "learning_rate": 4.69901733229828e-06, | |
| "loss": 0.011354514211416245, | |
| "step": 1290, | |
| "token_acc": 0.9952049868137137 | |
| }, | |
| { | |
| "epoch": 1.3861333333333334, | |
| "grad_norm": 1.421875, | |
| "learning_rate": 4.5533682409557745e-06, | |
| "loss": 0.01483256220817566, | |
| "step": 1300, | |
| "token_acc": 0.995706106870229 | |
| }, | |
| { | |
| "epoch": 1.3968, | |
| "grad_norm": 3.078125, | |
| "learning_rate": 4.409343768755135e-06, | |
| "loss": 0.009935888648033141, | |
| "step": 1310, | |
| "token_acc": 0.99592032637389 | |
| }, | |
| { | |
| "epoch": 1.4074666666666666, | |
| "grad_norm": 1.2578125, | |
| "learning_rate": 4.266986875258299e-06, | |
| "loss": 0.013563062250614166, | |
| "step": 1320, | |
| "token_acc": 0.9956730769230769 | |
| }, | |
| { | |
| "epoch": 1.4181333333333335, | |
| "grad_norm": 1.203125, | |
| "learning_rate": 4.126340022622506e-06, | |
| "loss": 0.011859767884016038, | |
| "step": 1330, | |
| "token_acc": 0.9961713328547499 | |
| }, | |
| { | |
| "epoch": 1.4288, | |
| "grad_norm": 0.73046875, | |
| "learning_rate": 3.987445162934689e-06, | |
| "loss": 0.007380545139312744, | |
| "step": 1340, | |
| "token_acc": 0.9973627427475426 | |
| }, | |
| { | |
| "epoch": 1.4394666666666667, | |
| "grad_norm": 1.359375, | |
| "learning_rate": 3.850343725698018e-06, | |
| "loss": 0.011476223170757294, | |
| "step": 1350, | |
| "token_acc": 0.9949640287769784 | |
| }, | |
| { | |
| "epoch": 1.4501333333333333, | |
| "grad_norm": 1.3046875, | |
| "learning_rate": 3.7150766054743393e-06, | |
| "loss": 0.008989098668098449, | |
| "step": 1360, | |
| "token_acc": 0.9966370405957242 | |
| }, | |
| { | |
| "epoch": 1.4607999999999999, | |
| "grad_norm": 1.1171875, | |
| "learning_rate": 3.5816841496861563e-06, | |
| "loss": 0.008315403014421463, | |
| "step": 1370, | |
| "token_acc": 0.9968772519817439 | |
| }, | |
| { | |
| "epoch": 1.4714666666666667, | |
| "grad_norm": 1.3203125, | |
| "learning_rate": 3.4502061465818024e-06, | |
| "loss": 0.012508057057857513, | |
| "step": 1380, | |
| "token_acc": 0.9963776865491427 | |
| }, | |
| { | |
| "epoch": 1.4821333333333333, | |
| "grad_norm": 1.9453125, | |
| "learning_rate": 3.3206818133674478e-06, | |
| "loss": 0.013706037402153015, | |
| "step": 1390, | |
| "token_acc": 0.9961436490720655 | |
| }, | |
| { | |
| "epoch": 1.4928, | |
| "grad_norm": 2.171875, | |
| "learning_rate": 3.1931497845093753e-06, | |
| "loss": 0.010326726734638214, | |
| "step": 1400, | |
| "token_acc": 0.9968936678614098 | |
| }, | |
| { | |
| "epoch": 1.5034666666666667, | |
| "grad_norm": 0.4296875, | |
| "learning_rate": 3.0676481002101223e-06, | |
| "loss": 0.00966293215751648, | |
| "step": 1410, | |
| "token_acc": 0.9968697327233326 | |
| }, | |
| { | |
| "epoch": 1.5141333333333333, | |
| "grad_norm": 1.71875, | |
| "learning_rate": 2.9442141950618762e-06, | |
| "loss": 0.010416048765182494, | |
| "step": 1420, | |
| "token_acc": 0.9968862275449102 | |
| }, | |
| { | |
| "epoch": 1.5248, | |
| "grad_norm": 1.7578125, | |
| "learning_rate": 2.8228848868804714e-06, | |
| "loss": 0.010529063642024994, | |
| "step": 1430, | |
| "token_acc": 0.9954337899543378 | |
| }, | |
| { | |
| "epoch": 1.5354666666666668, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 2.7036963657234105e-06, | |
| "loss": 0.009810931980609894, | |
| "step": 1440, | |
| "token_acc": 0.9966208061790973 | |
| }, | |
| { | |
| "epoch": 1.5461333333333334, | |
| "grad_norm": 1.9609375, | |
| "learning_rate": 2.5866841830950994e-06, | |
| "loss": 0.010760371387004853, | |
| "step": 1450, | |
| "token_acc": 0.9954501915708812 | |
| }, | |
| { | |
| "epoch": 1.5568, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 2.47188324134255e-06, | |
| "loss": 0.011224661022424698, | |
| "step": 1460, | |
| "token_acc": 0.9966434907695996 | |
| }, | |
| { | |
| "epoch": 1.5674666666666668, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 2.3593277832447405e-06, | |
| "loss": 0.01038598120212555, | |
| "step": 1470, | |
| "token_acc": 0.9959552700452058 | |
| }, | |
| { | |
| "epoch": 1.5781333333333334, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 2.2490513817986757e-06, | |
| "loss": 0.010531876236200333, | |
| "step": 1480, | |
| "token_acc": 0.9968734968734969 | |
| }, | |
| { | |
| "epoch": 1.5888, | |
| "grad_norm": 1.8515625, | |
| "learning_rate": 2.141086930205255e-06, | |
| "loss": 0.010695824027061462, | |
| "step": 1490, | |
| "token_acc": 0.9956782713085234 | |
| }, | |
| { | |
| "epoch": 1.5994666666666668, | |
| "grad_norm": 1.5703125, | |
| "learning_rate": 2.035466632057905e-06, | |
| "loss": 0.007252989709377289, | |
| "step": 1500, | |
| "token_acc": 0.9966483121857793 | |
| }, | |
| { | |
| "epoch": 1.6101333333333332, | |
| "grad_norm": 0.18359375, | |
| "learning_rate": 1.9322219917369e-06, | |
| "loss": 0.007471411675214768, | |
| "step": 1510, | |
| "token_acc": 0.9971133028626413 | |
| }, | |
| { | |
| "epoch": 1.6208, | |
| "grad_norm": 2.15625, | |
| "learning_rate": 1.8313838050122423e-06, | |
| "loss": 0.015065881609916686, | |
| "step": 1520, | |
| "token_acc": 0.9946885562530179 | |
| }, | |
| { | |
| "epoch": 1.6314666666666666, | |
| "grad_norm": 2.46875, | |
| "learning_rate": 1.7329821498579257e-06, | |
| "loss": 0.013307811319828033, | |
| "step": 1530, | |
| "token_acc": 0.9947000722717417 | |
| }, | |
| { | |
| "epoch": 1.6421333333333332, | |
| "grad_norm": 1.375, | |
| "learning_rate": 1.6370463774802759e-06, | |
| "loss": 0.008927606046199799, | |
| "step": 1540, | |
| "token_acc": 0.9973513123043583 | |
| }, | |
| { | |
| "epoch": 1.6528, | |
| "grad_norm": 2.296875, | |
| "learning_rate": 1.5436051035630962e-06, | |
| "loss": 0.010501272231340408, | |
| "step": 1550, | |
| "token_acc": 0.9963820549927641 | |
| }, | |
| { | |
| "epoch": 1.6634666666666666, | |
| "grad_norm": 1.8046875, | |
| "learning_rate": 1.4526861997321794e-06, | |
| "loss": 0.013346484303474427, | |
| "step": 1560, | |
| "token_acc": 0.9951969260326609 | |
| }, | |
| { | |
| "epoch": 1.6741333333333333, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 1.3643167852417894e-06, | |
| "loss": 0.011042323708534241, | |
| "step": 1570, | |
| "token_acc": 0.9959359311498924 | |
| }, | |
| { | |
| "epoch": 1.6848, | |
| "grad_norm": 1.6953125, | |
| "learning_rate": 1.278523218885518e-06, | |
| "loss": 0.008789537847042084, | |
| "step": 1580, | |
| "token_acc": 0.9968877184582237 | |
| }, | |
| { | |
| "epoch": 1.6954666666666667, | |
| "grad_norm": 1.5546875, | |
| "learning_rate": 1.1953310911340165e-06, | |
| "loss": 0.009821423888206482, | |
| "step": 1590, | |
| "token_acc": 0.9949507093051214 | |
| }, | |
| { | |
| "epoch": 1.7061333333333333, | |
| "grad_norm": 2.09375, | |
| "learning_rate": 1.1147652165018597e-06, | |
| "loss": 0.009412066638469696, | |
| "step": 1600, | |
| "token_acc": 0.9966378482228626 | |
| }, | |
| { | |
| "epoch": 1.7168, | |
| "grad_norm": 0.703125, | |
| "learning_rate": 1.0368496261459004e-06, | |
| "loss": 0.011984483152627946, | |
| "step": 1610, | |
| "token_acc": 0.995906573561281 | |
| }, | |
| { | |
| "epoch": 1.7274666666666667, | |
| "grad_norm": 1.1875, | |
| "learning_rate": 9.616075606972597e-07, | |
| "loss": 0.007756148278713226, | |
| "step": 1620, | |
| "token_acc": 0.9971181556195965 | |
| }, | |
| { | |
| "epoch": 1.7381333333333333, | |
| "grad_norm": 2.046875, | |
| "learning_rate": 8.890614633291095e-07, | |
| "loss": 0.00872180163860321, | |
| "step": 1630, | |
| "token_acc": 0.9966313763233878 | |
| }, | |
| { | |
| "epoch": 1.7488000000000001, | |
| "grad_norm": 2.546875, | |
| "learning_rate": 8.192329730623471e-07, | |
| "loss": 0.011189891397953034, | |
| "step": 1640, | |
| "token_acc": 0.9973765800143095 | |
| }, | |
| { | |
| "epoch": 1.7594666666666665, | |
| "grad_norm": 1.3671875, | |
| "learning_rate": 7.521429183111139e-07, | |
| "loss": 0.011723892390727996, | |
| "step": 1650, | |
| "token_acc": 0.9959036144578314 | |
| }, | |
| { | |
| "epoch": 1.7701333333333333, | |
| "grad_norm": 2.3125, | |
| "learning_rate": 6.878113106700946e-07, | |
| "loss": 0.007294324040412903, | |
| "step": 1660, | |
| "token_acc": 0.9973493975903615 | |
| }, | |
| { | |
| "epoch": 1.7808000000000002, | |
| "grad_norm": 0.42578125, | |
| "learning_rate": 6.262573389454773e-07, | |
| "loss": 0.008933399617671967, | |
| "step": 1670, | |
| "token_acc": 0.9963741841914431 | |
| }, | |
| { | |
| "epoch": 1.7914666666666665, | |
| "grad_norm": 1.1796875, | |
| "learning_rate": 5.674993634313242e-07, | |
| "loss": 0.012564325332641601, | |
| "step": 1680, | |
| "token_acc": 0.9956720365472469 | |
| }, | |
| { | |
| "epoch": 1.8021333333333334, | |
| "grad_norm": 0.5625, | |
| "learning_rate": 5.115549104330575e-07, | |
| "loss": 0.00787913054227829, | |
| "step": 1690, | |
| "token_acc": 0.9976082276967233 | |
| }, | |
| { | |
| "epoch": 1.8128, | |
| "grad_norm": 1.1875, | |
| "learning_rate": 4.5844066703973676e-07, | |
| "loss": 0.007952670753002166, | |
| "step": 1700, | |
| "token_acc": 0.9973583093179635 | |
| }, | |
| { | |
| "epoch": 1.8234666666666666, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 4.081724761466288e-07, | |
| "loss": 0.013583377003669739, | |
| "step": 1710, | |
| "token_acc": 0.9946898382814385 | |
| }, | |
| { | |
| "epoch": 1.8341333333333334, | |
| "grad_norm": 0.9609375, | |
| "learning_rate": 3.6076533172959895e-07, | |
| "loss": 0.008094549179077148, | |
| "step": 1720, | |
| "token_acc": 0.9973519499277804 | |
| }, | |
| { | |
| "epoch": 1.8448, | |
| "grad_norm": 1.3046875, | |
| "learning_rate": 3.1623337437272084e-07, | |
| "loss": 0.010456332564353943, | |
| "step": 1730, | |
| "token_acc": 0.9963864129125511 | |
| }, | |
| { | |
| "epoch": 1.8554666666666666, | |
| "grad_norm": 1.375, | |
| "learning_rate": 2.745898870504116e-07, | |
| "loss": 0.009122979640960694, | |
| "step": 1740, | |
| "token_acc": 0.9980750721847931 | |
| }, | |
| { | |
| "epoch": 1.8661333333333334, | |
| "grad_norm": 2.0625, | |
| "learning_rate": 2.3584729116541305e-07, | |
| "loss": 0.010391271114349366, | |
| "step": 1750, | |
| "token_acc": 0.9958986731001206 | |
| }, | |
| { | |
| "epoch": 1.8768, | |
| "grad_norm": 1.171875, | |
| "learning_rate": 2.0001714284372253e-07, | |
| "loss": 0.009891322255134583, | |
| "step": 1760, | |
| "token_acc": 0.9959193470955353 | |
| }, | |
| { | |
| "epoch": 1.8874666666666666, | |
| "grad_norm": 1.359375, | |
| "learning_rate": 1.6711012948764426e-07, | |
| "loss": 0.010584476590156554, | |
| "step": 1770, | |
| "token_acc": 0.9956948098541019 | |
| }, | |
| { | |
| "epoch": 1.8981333333333335, | |
| "grad_norm": 2.65625, | |
| "learning_rate": 1.371360665879584e-07, | |
| "loss": 0.006497502326965332, | |
| "step": 1780, | |
| "token_acc": 0.9978401727861771 | |
| }, | |
| { | |
| "epoch": 1.9088, | |
| "grad_norm": 1.265625, | |
| "learning_rate": 1.1010389479615302e-07, | |
| "loss": 0.011741268634796142, | |
| "step": 1790, | |
| "token_acc": 0.9944698244770378 | |
| }, | |
| { | |
| "epoch": 1.9194666666666667, | |
| "grad_norm": 2.234375, | |
| "learning_rate": 8.60216772576139e-08, | |
| "loss": 0.010211998224258423, | |
| "step": 1800, | |
| "token_acc": 0.9961566178236848 | |
| }, | |
| { | |
| "epoch": 1.9301333333333335, | |
| "grad_norm": 1.5546875, | |
| "learning_rate": 6.489659720655295e-08, | |
| "loss": 0.008484911918640137, | |
| "step": 1810, | |
| "token_acc": 0.9968712394705175 | |
| }, | |
| { | |
| "epoch": 1.9407999999999999, | |
| "grad_norm": 2.0, | |
| "learning_rate": 4.673495582339649e-08, | |
| "loss": 0.011189290881156921, | |
| "step": 1820, | |
| "token_acc": 0.9954050785973397 | |
| }, | |
| { | |
| "epoch": 1.9514666666666667, | |
| "grad_norm": 1.5, | |
| "learning_rate": 3.154217035526874e-08, | |
| "loss": 0.012943205237388612, | |
| "step": 1830, | |
| "token_acc": 0.9949604031677466 | |
| }, | |
| { | |
| "epoch": 1.9621333333333333, | |
| "grad_norm": 2.078125, | |
| "learning_rate": 1.932277250014214e-08, | |
| "loss": 0.014319291710853577, | |
| "step": 1840, | |
| "token_acc": 0.9944883776659478 | |
| }, | |
| { | |
| "epoch": 1.9727999999999999, | |
| "grad_norm": 1.1640625, | |
| "learning_rate": 1.008040705511748e-08, | |
| "loss": 0.0120982825756073, | |
| "step": 1850, | |
| "token_acc": 0.9949016751638747 | |
| }, | |
| { | |
| "epoch": 1.9834666666666667, | |
| "grad_norm": 1.4609375, | |
| "learning_rate": 3.8178308292624585e-09, | |
| "loss": 0.00735490694642067, | |
| "step": 1860, | |
| "token_acc": 0.997832891885384 | |
| }, | |
| { | |
| "epoch": 1.9941333333333333, | |
| "grad_norm": 1.859375, | |
| "learning_rate": 5.369118213094382e-10, | |
| "loss": 0.009274058043956757, | |
| "step": 1870, | |
| "token_acc": 0.9959075589792971 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 1876, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 7.528224876952289e+17, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |