| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "eval_steps": 500.0, | |
| "global_step": 1876, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0010666666666666667, | |
| "grad_norm": 27.125, | |
| "learning_rate": 3.5087719298245616e-07, | |
| "loss": 1.6233890056610107, | |
| "step": 1, | |
| "token_acc": 0.702493551160791 | |
| }, | |
| { | |
| "epoch": 0.010666666666666666, | |
| "grad_norm": 19.25, | |
| "learning_rate": 3.5087719298245615e-06, | |
| "loss": 1.4475451575385199, | |
| "step": 10, | |
| "token_acc": 0.7265573770491803 | |
| }, | |
| { | |
| "epoch": 0.021333333333333333, | |
| "grad_norm": 12.1875, | |
| "learning_rate": 7.017543859649123e-06, | |
| "loss": 1.2004226684570312, | |
| "step": 20, | |
| "token_acc": 0.7449092613103859 | |
| }, | |
| { | |
| "epoch": 0.032, | |
| "grad_norm": 7.5625, | |
| "learning_rate": 1.0526315789473684e-05, | |
| "loss": 0.9809289932250976, | |
| "step": 30, | |
| "token_acc": 0.7659592602062558 | |
| }, | |
| { | |
| "epoch": 0.042666666666666665, | |
| "grad_norm": 6.1875, | |
| "learning_rate": 1.4035087719298246e-05, | |
| "loss": 0.8323905944824219, | |
| "step": 40, | |
| "token_acc": 0.7854956640027206 | |
| }, | |
| { | |
| "epoch": 0.05333333333333334, | |
| "grad_norm": 6.59375, | |
| "learning_rate": 1.754385964912281e-05, | |
| "loss": 0.7661696434020996, | |
| "step": 50, | |
| "token_acc": 0.7932169071121236 | |
| }, | |
| { | |
| "epoch": 0.064, | |
| "grad_norm": 6.15625, | |
| "learning_rate": 1.9999865771143805e-05, | |
| "loss": 0.7235654354095459, | |
| "step": 60, | |
| "token_acc": 0.8000837871805614 | |
| }, | |
| { | |
| "epoch": 0.07466666666666667, | |
| "grad_norm": 6.5, | |
| "learning_rate": 1.9997479580610527e-05, | |
| "loss": 0.7175994396209717, | |
| "step": 70, | |
| "token_acc": 0.8002734808990685 | |
| }, | |
| { | |
| "epoch": 0.08533333333333333, | |
| "grad_norm": 5.90625, | |
| "learning_rate": 1.9992111345867986e-05, | |
| "loss": 0.6924509048461914, | |
| "step": 80, | |
| "token_acc": 0.8082805238698775 | |
| }, | |
| { | |
| "epoch": 0.096, | |
| "grad_norm": 5.3125, | |
| "learning_rate": 1.9983762668151084e-05, | |
| "loss": 0.6639269828796387, | |
| "step": 90, | |
| "token_acc": 0.814126705163612 | |
| }, | |
| { | |
| "epoch": 0.10666666666666667, | |
| "grad_norm": 5.6875, | |
| "learning_rate": 1.9972436037700082e-05, | |
| "loss": 0.6697088718414307, | |
| "step": 100, | |
| "token_acc": 0.8140579955909785 | |
| }, | |
| { | |
| "epoch": 0.11733333333333333, | |
| "grad_norm": 5.5, | |
| "learning_rate": 1.9958134833017768e-05, | |
| "loss": 0.6650930881500244, | |
| "step": 110, | |
| "token_acc": 0.8157225237449118 | |
| }, | |
| { | |
| "epoch": 0.128, | |
| "grad_norm": 5.5625, | |
| "learning_rate": 1.9940863319861758e-05, | |
| "loss": 0.6610164642333984, | |
| "step": 120, | |
| "token_acc": 0.8128639945186708 | |
| }, | |
| { | |
| "epoch": 0.13866666666666666, | |
| "grad_norm": 6.125, | |
| "learning_rate": 1.992062664997209e-05, | |
| "loss": 0.627209997177124, | |
| "step": 130, | |
| "token_acc": 0.8250212765957446 | |
| }, | |
| { | |
| "epoch": 0.14933333333333335, | |
| "grad_norm": 6.21875, | |
| "learning_rate": 1.9897430859534553e-05, | |
| "loss": 0.6545108795166016, | |
| "step": 140, | |
| "token_acc": 0.8140264586160109 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 5.0625, | |
| "learning_rate": 1.9871282867380245e-05, | |
| "loss": 0.6527502536773682, | |
| "step": 150, | |
| "token_acc": 0.8172938078654548 | |
| }, | |
| { | |
| "epoch": 0.17066666666666666, | |
| "grad_norm": 6.09375, | |
| "learning_rate": 1.9842190472921802e-05, | |
| "loss": 0.6138824462890625, | |
| "step": 160, | |
| "token_acc": 0.8251860622462788 | |
| }, | |
| { | |
| "epoch": 0.18133333333333335, | |
| "grad_norm": 5.5, | |
| "learning_rate": 1.9810162353827e-05, | |
| "loss": 0.6213672637939454, | |
| "step": 170, | |
| "token_acc": 0.8211650979394556 | |
| }, | |
| { | |
| "epoch": 0.192, | |
| "grad_norm": 6.0, | |
| "learning_rate": 1.977520806343039e-05, | |
| "loss": 0.5975364685058594, | |
| "step": 180, | |
| "token_acc": 0.8272704146527601 | |
| }, | |
| { | |
| "epoch": 0.20266666666666666, | |
| "grad_norm": 5.59375, | |
| "learning_rate": 1.9737338027883727e-05, | |
| "loss": 0.6315351963043213, | |
| "step": 190, | |
| "token_acc": 0.8185825703628349 | |
| }, | |
| { | |
| "epoch": 0.21333333333333335, | |
| "grad_norm": 5.15625, | |
| "learning_rate": 1.969656354304606e-05, | |
| "loss": 0.6197149276733398, | |
| "step": 200, | |
| "token_acc": 0.8190742151138191 | |
| }, | |
| { | |
| "epoch": 0.224, | |
| "grad_norm": 5.8125, | |
| "learning_rate": 1.9652896771114416e-05, | |
| "loss": 0.6223199367523193, | |
| "step": 210, | |
| "token_acc": 0.8190856560163154 | |
| }, | |
| { | |
| "epoch": 0.23466666666666666, | |
| "grad_norm": 5.03125, | |
| "learning_rate": 1.960635073699605e-05, | |
| "loss": 0.637688159942627, | |
| "step": 220, | |
| "token_acc": 0.8177713409439428 | |
| }, | |
| { | |
| "epoch": 0.24533333333333332, | |
| "grad_norm": 5.8125, | |
| "learning_rate": 1.95569393244234e-05, | |
| "loss": 0.6076653003692627, | |
| "step": 230, | |
| "token_acc": 0.8262690567934328 | |
| }, | |
| { | |
| "epoch": 0.256, | |
| "grad_norm": 5.5, | |
| "learning_rate": 1.950467727181284e-05, | |
| "loss": 0.6383634567260742, | |
| "step": 240, | |
| "token_acc": 0.8165317426501917 | |
| }, | |
| { | |
| "epoch": 0.26666666666666666, | |
| "grad_norm": 5.15625, | |
| "learning_rate": 1.9449580167868506e-05, | |
| "loss": 0.6294333934783936, | |
| "step": 250, | |
| "token_acc": 0.819404255319149 | |
| }, | |
| { | |
| "epoch": 0.2773333333333333, | |
| "grad_norm": 5.34375, | |
| "learning_rate": 1.939166444693252e-05, | |
| "loss": 0.6120997428894043, | |
| "step": 260, | |
| "token_acc": 0.8252212389380531 | |
| }, | |
| { | |
| "epoch": 0.288, | |
| "grad_norm": 5.28125, | |
| "learning_rate": 1.9330947384082957e-05, | |
| "loss": 0.5947658538818359, | |
| "step": 270, | |
| "token_acc": 0.8248434591301405 | |
| }, | |
| { | |
| "epoch": 0.2986666666666667, | |
| "grad_norm": 5.21875, | |
| "learning_rate": 1.9267447089981018e-05, | |
| "loss": 0.5949877738952637, | |
| "step": 280, | |
| "token_acc": 0.8277702473737716 | |
| }, | |
| { | |
| "epoch": 0.30933333333333335, | |
| "grad_norm": 5.28125, | |
| "learning_rate": 1.9201182505469035e-05, | |
| "loss": 0.5923934936523437, | |
| "step": 290, | |
| "token_acc": 0.8287868519146052 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 6.71875, | |
| "learning_rate": 1.9132173395920756e-05, | |
| "loss": 0.6005728721618653, | |
| "step": 300, | |
| "token_acc": 0.8250169721656483 | |
| }, | |
| { | |
| "epoch": 0.33066666666666666, | |
| "grad_norm": 5.5625, | |
| "learning_rate": 1.9060440345345762e-05, | |
| "loss": 0.614538049697876, | |
| "step": 310, | |
| "token_acc": 0.8181046916094001 | |
| }, | |
| { | |
| "epoch": 0.3413333333333333, | |
| "grad_norm": 4.71875, | |
| "learning_rate": 1.8986004750249687e-05, | |
| "loss": 0.6015767574310302, | |
| "step": 320, | |
| "token_acc": 0.8247483718176436 | |
| }, | |
| { | |
| "epoch": 0.352, | |
| "grad_norm": 5.5625, | |
| "learning_rate": 1.890888881325205e-05, | |
| "loss": 0.6035000801086425, | |
| "step": 330, | |
| "token_acc": 0.8262050237610319 | |
| }, | |
| { | |
| "epoch": 0.3626666666666667, | |
| "grad_norm": 5.9375, | |
| "learning_rate": 1.8829115536463682e-05, | |
| "loss": 0.5983945846557617, | |
| "step": 340, | |
| "token_acc": 0.8245257452574526 | |
| }, | |
| { | |
| "epoch": 0.37333333333333335, | |
| "grad_norm": 5.8125, | |
| "learning_rate": 1.8746708714625668e-05, | |
| "loss": 0.6272837162017822, | |
| "step": 350, | |
| "token_acc": 0.8214831155608349 | |
| }, | |
| { | |
| "epoch": 0.384, | |
| "grad_norm": 5.75, | |
| "learning_rate": 1.8661692928011858e-05, | |
| "loss": 0.5912134647369385, | |
| "step": 360, | |
| "token_acc": 0.828818151032848 | |
| }, | |
| { | |
| "epoch": 0.39466666666666667, | |
| "grad_norm": 5.65625, | |
| "learning_rate": 1.8574093535097062e-05, | |
| "loss": 0.573200798034668, | |
| "step": 370, | |
| "token_acc": 0.8316343866802582 | |
| }, | |
| { | |
| "epoch": 0.4053333333333333, | |
| "grad_norm": 5.84375, | |
| "learning_rate": 1.8483936664993152e-05, | |
| "loss": 0.5843992233276367, | |
| "step": 380, | |
| "token_acc": 0.8279888315424316 | |
| }, | |
| { | |
| "epoch": 0.416, | |
| "grad_norm": 5.5, | |
| "learning_rate": 1.839124920965527e-05, | |
| "loss": 0.5969943046569824, | |
| "step": 390, | |
| "token_acc": 0.8256973795435334 | |
| }, | |
| { | |
| "epoch": 0.4266666666666667, | |
| "grad_norm": 5.0625, | |
| "learning_rate": 1.829605881586051e-05, | |
| "loss": 0.5709249019622803, | |
| "step": 400, | |
| "token_acc": 0.8314263920671243 | |
| }, | |
| { | |
| "epoch": 0.43733333333333335, | |
| "grad_norm": 5.4375, | |
| "learning_rate": 1.8198393876961446e-05, | |
| "loss": 0.5546571731567382, | |
| "step": 410, | |
| "token_acc": 0.834034515325835 | |
| }, | |
| { | |
| "epoch": 0.448, | |
| "grad_norm": 7.5625, | |
| "learning_rate": 1.8098283524416982e-05, | |
| "loss": 0.5464757919311524, | |
| "step": 420, | |
| "token_acc": 0.8344332135995322 | |
| }, | |
| { | |
| "epoch": 0.45866666666666667, | |
| "grad_norm": 5.3125, | |
| "learning_rate": 1.7995757619103012e-05, | |
| "loss": 0.5686863899230957, | |
| "step": 430, | |
| "token_acc": 0.8333759046402724 | |
| }, | |
| { | |
| "epoch": 0.4693333333333333, | |
| "grad_norm": 5.0, | |
| "learning_rate": 1.7890846742405558e-05, | |
| "loss": 0.575782299041748, | |
| "step": 440, | |
| "token_acc": 0.8317190263623347 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 5.53125, | |
| "learning_rate": 1.7783582187098944e-05, | |
| "loss": 0.5736880779266358, | |
| "step": 450, | |
| "token_acc": 0.8315915710125065 | |
| }, | |
| { | |
| "epoch": 0.49066666666666664, | |
| "grad_norm": 5.28125, | |
| "learning_rate": 1.7673995948011807e-05, | |
| "loss": 0.5740520000457764, | |
| "step": 460, | |
| "token_acc": 0.8287811539774668 | |
| }, | |
| { | |
| "epoch": 0.5013333333333333, | |
| "grad_norm": 5.15625, | |
| "learning_rate": 1.75621207124837e-05, | |
| "loss": 0.5786960124969482, | |
| "step": 470, | |
| "token_acc": 0.8299598599368008 | |
| }, | |
| { | |
| "epoch": 0.512, | |
| "grad_norm": 5.375, | |
| "learning_rate": 1.7447989850615114e-05, | |
| "loss": 0.5925831317901611, | |
| "step": 480, | |
| "token_acc": 0.8259212198221093 | |
| }, | |
| { | |
| "epoch": 0.5226666666666666, | |
| "grad_norm": 5.125, | |
| "learning_rate": 1.733163740531386e-05, | |
| "loss": 0.5791446685791015, | |
| "step": 490, | |
| "token_acc": 0.8287425149700599 | |
| }, | |
| { | |
| "epoch": 0.5333333333333333, | |
| "grad_norm": 5.46875, | |
| "learning_rate": 1.7213098082140774e-05, | |
| "loss": 0.5816606521606446, | |
| "step": 500, | |
| "token_acc": 0.8285325856062538 | |
| }, | |
| { | |
| "epoch": 0.544, | |
| "grad_norm": 5.34375, | |
| "learning_rate": 1.7092407238957737e-05, | |
| "loss": 0.5790934562683105, | |
| "step": 510, | |
| "token_acc": 0.8313161235717309 | |
| }, | |
| { | |
| "epoch": 0.5546666666666666, | |
| "grad_norm": 5.15625, | |
| "learning_rate": 1.6969600875381172e-05, | |
| "loss": 0.5422775268554687, | |
| "step": 520, | |
| "token_acc": 0.8432008189728715 | |
| }, | |
| { | |
| "epoch": 0.5653333333333334, | |
| "grad_norm": 5.125, | |
| "learning_rate": 1.684471562204407e-05, | |
| "loss": 0.5492764472961426, | |
| "step": 530, | |
| "token_acc": 0.8374480359718334 | |
| }, | |
| { | |
| "epoch": 0.576, | |
| "grad_norm": 5.1875, | |
| "learning_rate": 1.6717788729669822e-05, | |
| "loss": 0.5672526359558105, | |
| "step": 540, | |
| "token_acc": 0.8343422286639952 | |
| }, | |
| { | |
| "epoch": 0.5866666666666667, | |
| "grad_norm": 5.34375, | |
| "learning_rate": 1.658885805796111e-05, | |
| "loss": 0.5880655288696289, | |
| "step": 550, | |
| "token_acc": 0.828699779623665 | |
| }, | |
| { | |
| "epoch": 0.5973333333333334, | |
| "grad_norm": 5.40625, | |
| "learning_rate": 1.6457962064307104e-05, | |
| "loss": 0.5665912628173828, | |
| "step": 560, | |
| "token_acc": 0.8322487957407251 | |
| }, | |
| { | |
| "epoch": 0.608, | |
| "grad_norm": 5.75, | |
| "learning_rate": 1.6325139792312414e-05, | |
| "loss": 0.5551149368286132, | |
| "step": 570, | |
| "token_acc": 0.8370577281191807 | |
| }, | |
| { | |
| "epoch": 0.6186666666666667, | |
| "grad_norm": 5.3125, | |
| "learning_rate": 1.6190430860151196e-05, | |
| "loss": 0.5558858394622803, | |
| "step": 580, | |
| "token_acc": 0.8377869133264817 | |
| }, | |
| { | |
| "epoch": 0.6293333333333333, | |
| "grad_norm": 5.3125, | |
| "learning_rate": 1.605387544874983e-05, | |
| "loss": 0.5704761505126953, | |
| "step": 590, | |
| "token_acc": 0.83192066451941 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 4.9375, | |
| "learning_rate": 1.591551428980179e-05, | |
| "loss": 0.5583576202392578, | |
| "step": 600, | |
| "token_acc": 0.834020358374695 | |
| }, | |
| { | |
| "epoch": 0.6506666666666666, | |
| "grad_norm": 5.71875, | |
| "learning_rate": 1.5775388653618217e-05, | |
| "loss": 0.5596559524536133, | |
| "step": 610, | |
| "token_acc": 0.8347915594441585 | |
| }, | |
| { | |
| "epoch": 0.6613333333333333, | |
| "grad_norm": 5.28125, | |
| "learning_rate": 1.5633540336817804e-05, | |
| "loss": 0.5696552276611329, | |
| "step": 620, | |
| "token_acc": 0.8363047001620746 | |
| }, | |
| { | |
| "epoch": 0.672, | |
| "grad_norm": 5.5, | |
| "learning_rate": 1.549001164985973e-05, | |
| "loss": 0.5665851593017578, | |
| "step": 630, | |
| "token_acc": 0.8350402031316123 | |
| }, | |
| { | |
| "epoch": 0.6826666666666666, | |
| "grad_norm": 5.28125, | |
| "learning_rate": 1.5344845404423313e-05, | |
| "loss": 0.6030998229980469, | |
| "step": 640, | |
| "token_acc": 0.8214255373375244 | |
| }, | |
| { | |
| "epoch": 0.6933333333333334, | |
| "grad_norm": 5.21875, | |
| "learning_rate": 1.5198084900638161e-05, | |
| "loss": 0.557160472869873, | |
| "step": 650, | |
| "token_acc": 0.8347296268088348 | |
| }, | |
| { | |
| "epoch": 0.704, | |
| "grad_norm": 5.78125, | |
| "learning_rate": 1.5049773914168629e-05, | |
| "loss": 0.5760011672973633, | |
| "step": 660, | |
| "token_acc": 0.8304249812139934 | |
| }, | |
| { | |
| "epoch": 0.7146666666666667, | |
| "grad_norm": 5.21875, | |
| "learning_rate": 1.4899956683156413e-05, | |
| "loss": 0.5680459976196289, | |
| "step": 670, | |
| "token_acc": 0.8291992103682088 | |
| }, | |
| { | |
| "epoch": 0.7253333333333334, | |
| "grad_norm": 6.625, | |
| "learning_rate": 1.4748677895025243e-05, | |
| "loss": 0.5647514343261719, | |
| "step": 680, | |
| "token_acc": 0.8340217482930119 | |
| }, | |
| { | |
| "epoch": 0.736, | |
| "grad_norm": 5.53125, | |
| "learning_rate": 1.4595982673151511e-05, | |
| "loss": 0.5595580101013183, | |
| "step": 690, | |
| "token_acc": 0.8372705642420122 | |
| }, | |
| { | |
| "epoch": 0.7466666666666667, | |
| "grad_norm": 5.625, | |
| "learning_rate": 1.444191656340491e-05, | |
| "loss": 0.551822280883789, | |
| "step": 700, | |
| "token_acc": 0.8342021187153187 | |
| }, | |
| { | |
| "epoch": 0.7573333333333333, | |
| "grad_norm": 5.0625, | |
| "learning_rate": 1.4286525520563003e-05, | |
| "loss": 0.5694937705993652, | |
| "step": 710, | |
| "token_acc": 0.8321737654844731 | |
| }, | |
| { | |
| "epoch": 0.768, | |
| "grad_norm": 4.90625, | |
| "learning_rate": 1.4129855894603885e-05, | |
| "loss": 0.5161600112915039, | |
| "step": 720, | |
| "token_acc": 0.8459770114942529 | |
| }, | |
| { | |
| "epoch": 0.7786666666666666, | |
| "grad_norm": 5.0625, | |
| "learning_rate": 1.3971954416880897e-05, | |
| "loss": 0.5634379863739014, | |
| "step": 730, | |
| "token_acc": 0.833432531247343 | |
| }, | |
| { | |
| "epoch": 0.7893333333333333, | |
| "grad_norm": 4.6875, | |
| "learning_rate": 1.3812868186183641e-05, | |
| "loss": 0.5540994644165039, | |
| "step": 740, | |
| "token_acc": 0.8377988807868407 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 4.9375, | |
| "learning_rate": 1.3652644654689355e-05, | |
| "loss": 0.5608715057373047, | |
| "step": 750, | |
| "token_acc": 0.8339112050739957 | |
| }, | |
| { | |
| "epoch": 0.8106666666666666, | |
| "grad_norm": 5.40625, | |
| "learning_rate": 1.3491331613808909e-05, | |
| "loss": 0.5746842384338379, | |
| "step": 760, | |
| "token_acc": 0.8306396733582851 | |
| }, | |
| { | |
| "epoch": 0.8213333333333334, | |
| "grad_norm": 5.21875, | |
| "learning_rate": 1.3328977179931595e-05, | |
| "loss": 0.5586155891418457, | |
| "step": 770, | |
| "token_acc": 0.8344169551633877 | |
| }, | |
| { | |
| "epoch": 0.832, | |
| "grad_norm": 5.09375, | |
| "learning_rate": 1.3165629780072991e-05, | |
| "loss": 0.5604053020477295, | |
| "step": 780, | |
| "token_acc": 0.8320225679603351 | |
| }, | |
| { | |
| "epoch": 0.8426666666666667, | |
| "grad_norm": 5.03125, | |
| "learning_rate": 1.3001338137430163e-05, | |
| "loss": 0.549941110610962, | |
| "step": 790, | |
| "token_acc": 0.8354163148116872 | |
| }, | |
| { | |
| "epoch": 0.8533333333333334, | |
| "grad_norm": 5.21875, | |
| "learning_rate": 1.2836151256848562e-05, | |
| "loss": 0.5661550521850586, | |
| "step": 800, | |
| "token_acc": 0.8333191958605479 | |
| }, | |
| { | |
| "epoch": 0.864, | |
| "grad_norm": 5.84375, | |
| "learning_rate": 1.2670118410204819e-05, | |
| "loss": 0.5822794437408447, | |
| "step": 810, | |
| "token_acc": 0.8300913396481732 | |
| }, | |
| { | |
| "epoch": 0.8746666666666667, | |
| "grad_norm": 5.03125, | |
| "learning_rate": 1.250328912171001e-05, | |
| "loss": 0.5514832019805909, | |
| "step": 820, | |
| "token_acc": 0.8363110430491747 | |
| }, | |
| { | |
| "epoch": 0.8853333333333333, | |
| "grad_norm": 5.5, | |
| "learning_rate": 1.2335713153137551e-05, | |
| "loss": 0.5658942222595215, | |
| "step": 830, | |
| "token_acc": 0.8320276303597001 | |
| }, | |
| { | |
| "epoch": 0.896, | |
| "grad_norm": 5.59375, | |
| "learning_rate": 1.2167440488980309e-05, | |
| "loss": 0.5764975547790527, | |
| "step": 840, | |
| "token_acc": 0.8320033599328014 | |
| }, | |
| { | |
| "epoch": 0.9066666666666666, | |
| "grad_norm": 4.71875, | |
| "learning_rate": 1.1998521321541214e-05, | |
| "loss": 0.5525532722473144, | |
| "step": 850, | |
| "token_acc": 0.836985477356707 | |
| }, | |
| { | |
| "epoch": 0.9173333333333333, | |
| "grad_norm": 5.0625, | |
| "learning_rate": 1.1829006035961947e-05, | |
| "loss": 0.5769608974456787, | |
| "step": 860, | |
| "token_acc": 0.832285471537808 | |
| }, | |
| { | |
| "epoch": 0.928, | |
| "grad_norm": 5.8125, | |
| "learning_rate": 1.165894519519407e-05, | |
| "loss": 0.5438988685607911, | |
| "step": 870, | |
| "token_acc": 0.8355040595399188 | |
| }, | |
| { | |
| "epoch": 0.9386666666666666, | |
| "grad_norm": 5.375, | |
| "learning_rate": 1.148838952491712e-05, | |
| "loss": 0.5477108001708985, | |
| "step": 880, | |
| "token_acc": 0.8355240696483441 | |
| }, | |
| { | |
| "epoch": 0.9493333333333334, | |
| "grad_norm": 4.90625, | |
| "learning_rate": 1.1317389898408188e-05, | |
| "loss": 0.5485364913940429, | |
| "step": 890, | |
| "token_acc": 0.832925491519703 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 5.5625, | |
| "learning_rate": 1.1145997321367454e-05, | |
| "loss": 0.5587331771850585, | |
| "step": 900, | |
| "token_acc": 0.8355285376561973 | |
| }, | |
| { | |
| "epoch": 0.9706666666666667, | |
| "grad_norm": 5.15625, | |
| "learning_rate": 1.0974262916704223e-05, | |
| "loss": 0.5252433300018311, | |
| "step": 910, | |
| "token_acc": 0.8404669260700389 | |
| }, | |
| { | |
| "epoch": 0.9813333333333333, | |
| "grad_norm": 5.65625, | |
| "learning_rate": 1.0802237909288005e-05, | |
| "loss": 0.5581695079803467, | |
| "step": 920, | |
| "token_acc": 0.838423477222317 | |
| }, | |
| { | |
| "epoch": 0.992, | |
| "grad_norm": 5.71875, | |
| "learning_rate": 1.062997361066918e-05, | |
| "loss": 0.5427685737609863, | |
| "step": 930, | |
| "token_acc": 0.8367765067440669 | |
| }, | |
| { | |
| "epoch": 1.0021333333333333, | |
| "grad_norm": 4.65625, | |
| "learning_rate": 1.0457521403773823e-05, | |
| "loss": 0.5227243900299072, | |
| "step": 940, | |
| "token_acc": 0.8432694901537632 | |
| }, | |
| { | |
| "epoch": 1.0128, | |
| "grad_norm": 4.84375, | |
| "learning_rate": 1.0284932727577203e-05, | |
| "loss": 0.4229020595550537, | |
| "step": 950, | |
| "token_acc": 0.8669341072038238 | |
| }, | |
| { | |
| "epoch": 1.0234666666666667, | |
| "grad_norm": 5.03125, | |
| "learning_rate": 1.0112259061760605e-05, | |
| "loss": 0.43592014312744143, | |
| "step": 960, | |
| "token_acc": 0.8627684964200477 | |
| }, | |
| { | |
| "epoch": 1.0341333333333333, | |
| "grad_norm": 5.46875, | |
| "learning_rate": 9.939551911355979e-06, | |
| "loss": 0.43202896118164064, | |
| "step": 970, | |
| "token_acc": 0.8636055574381566 | |
| }, | |
| { | |
| "epoch": 1.0448, | |
| "grad_norm": 5.15625, | |
| "learning_rate": 9.766862791383054e-06, | |
| "loss": 0.4390109062194824, | |
| "step": 980, | |
| "token_acc": 0.8663960764417385 | |
| }, | |
| { | |
| "epoch": 1.0554666666666668, | |
| "grad_norm": 5.28125, | |
| "learning_rate": 9.59424321148343e-06, | |
| "loss": 0.416988468170166, | |
| "step": 990, | |
| "token_acc": 0.8692489851150202 | |
| }, | |
| { | |
| "epoch": 1.0661333333333334, | |
| "grad_norm": 4.96875, | |
| "learning_rate": 9.42174466055634e-06, | |
| "loss": 0.4186138153076172, | |
| "step": 1000, | |
| "token_acc": 0.8688482570061518 | |
| }, | |
| { | |
| "epoch": 1.0768, | |
| "grad_norm": 5.28125, | |
| "learning_rate": 9.249418591400539e-06, | |
| "loss": 0.4354249954223633, | |
| "step": 1010, | |
| "token_acc": 0.8667799490229396 | |
| }, | |
| { | |
| "epoch": 1.0874666666666666, | |
| "grad_norm": 5.15625, | |
| "learning_rate": 9.07731640536698e-06, | |
| "loss": 0.451226806640625, | |
| "step": 1020, | |
| "token_acc": 0.8601433994095319 | |
| }, | |
| { | |
| "epoch": 1.0981333333333334, | |
| "grad_norm": 5.0, | |
| "learning_rate": 8.90548943702686e-06, | |
| "loss": 0.4429470539093018, | |
| "step": 1030, | |
| "token_acc": 0.8604789908804228 | |
| }, | |
| { | |
| "epoch": 1.1088, | |
| "grad_norm": 5.4375, | |
| "learning_rate": 8.733988938859545e-06, | |
| "loss": 0.435594367980957, | |
| "step": 1040, | |
| "token_acc": 0.8667980126777454 | |
| }, | |
| { | |
| "epoch": 1.1194666666666666, | |
| "grad_norm": 8.3125, | |
| "learning_rate": 8.562866065965013e-06, | |
| "loss": 0.4241774082183838, | |
| "step": 1050, | |
| "token_acc": 0.8683653601963938 | |
| }, | |
| { | |
| "epoch": 1.1301333333333332, | |
| "grad_norm": 5.4375, | |
| "learning_rate": 8.39217186080532e-06, | |
| "loss": 0.42261734008789065, | |
| "step": 1060, | |
| "token_acc": 0.8686390532544379 | |
| }, | |
| { | |
| "epoch": 1.1408, | |
| "grad_norm": 5.03125, | |
| "learning_rate": 8.221957237979686e-06, | |
| "loss": 0.41465024948120116, | |
| "step": 1070, | |
| "token_acc": 0.8682988602785986 | |
| }, | |
| { | |
| "epoch": 1.1514666666666666, | |
| "grad_norm": 5.46875, | |
| "learning_rate": 8.052272969037702e-06, | |
| "loss": 0.4440279006958008, | |
| "step": 1080, | |
| "token_acc": 0.8630967631736272 | |
| }, | |
| { | |
| "epoch": 1.1621333333333332, | |
| "grad_norm": 5.09375, | |
| "learning_rate": 7.88316966733522e-06, | |
| "loss": 0.4302692413330078, | |
| "step": 1090, | |
| "token_acc": 0.8661821291150064 | |
| }, | |
| { | |
| "epoch": 1.1728, | |
| "grad_norm": 5.0, | |
| "learning_rate": 7.71469777293741e-06, | |
| "loss": 0.4230166435241699, | |
| "step": 1100, | |
| "token_acc": 0.8674463937621832 | |
| }, | |
| { | |
| "epoch": 1.1834666666666667, | |
| "grad_norm": 5.03125, | |
| "learning_rate": 7.546907537573529e-06, | |
| "loss": 0.4408688545227051, | |
| "step": 1110, | |
| "token_acc": 0.8641611190696891 | |
| }, | |
| { | |
| "epoch": 1.1941333333333333, | |
| "grad_norm": 5.34375, | |
| "learning_rate": 7.379849009647848e-06, | |
| "loss": 0.43018126487731934, | |
| "step": 1120, | |
| "token_acc": 0.8686655405405406 | |
| }, | |
| { | |
| "epoch": 1.2048, | |
| "grad_norm": 5.3125, | |
| "learning_rate": 7.213572019311235e-06, | |
| "loss": 0.4404290199279785, | |
| "step": 1130, | |
| "token_acc": 0.8649767638360795 | |
| }, | |
| { | |
| "epoch": 1.2154666666666667, | |
| "grad_norm": 6.625, | |
| "learning_rate": 7.04812616359784e-06, | |
| "loss": 0.40485620498657227, | |
| "step": 1140, | |
| "token_acc": 0.8732010559482245 | |
| }, | |
| { | |
| "epoch": 1.2261333333333333, | |
| "grad_norm": 5.46875, | |
| "learning_rate": 6.883560791631307e-06, | |
| "loss": 0.45158863067626953, | |
| "step": 1150, | |
| "token_acc": 0.8618751596695904 | |
| }, | |
| { | |
| "epoch": 1.2368000000000001, | |
| "grad_norm": 5.59375, | |
| "learning_rate": 6.7199249899049435e-06, | |
| "loss": 0.4398628234863281, | |
| "step": 1160, | |
| "token_acc": 0.8650072211366918 | |
| }, | |
| { | |
| "epoch": 1.2474666666666667, | |
| "grad_norm": 5.46875, | |
| "learning_rate": 6.557267567640217e-06, | |
| "loss": 0.4337770462036133, | |
| "step": 1170, | |
| "token_acc": 0.8668418375995931 | |
| }, | |
| { | |
| "epoch": 1.2581333333333333, | |
| "grad_norm": 5.96875, | |
| "learning_rate": 6.3956370422279625e-06, | |
| "loss": 0.4266934394836426, | |
| "step": 1180, | |
| "token_acc": 0.8647713851864349 | |
| }, | |
| { | |
| "epoch": 1.2688, | |
| "grad_norm": 5.59375, | |
| "learning_rate": 6.235081624756629e-06, | |
| "loss": 0.40284242630004885, | |
| "step": 1190, | |
| "token_acc": 0.8689100954294324 | |
| }, | |
| { | |
| "epoch": 1.2794666666666665, | |
| "grad_norm": 5.0625, | |
| "learning_rate": 6.075649205631904e-06, | |
| "loss": 0.43938732147216797, | |
| "step": 1200, | |
| "token_acc": 0.8652512265268144 | |
| }, | |
| { | |
| "epoch": 1.2901333333333334, | |
| "grad_norm": 6.09375, | |
| "learning_rate": 5.917387340291969e-06, | |
| "loss": 0.43057842254638673, | |
| "step": 1210, | |
| "token_acc": 0.8647868184134534 | |
| }, | |
| { | |
| "epoch": 1.3008, | |
| "grad_norm": 5.53125, | |
| "learning_rate": 5.760343235022703e-06, | |
| "loss": 0.43517169952392576, | |
| "step": 1220, | |
| "token_acc": 0.8625830635542682 | |
| }, | |
| { | |
| "epoch": 1.3114666666666666, | |
| "grad_norm": 5.75, | |
| "learning_rate": 5.604563732876989e-06, | |
| "loss": 0.4466823101043701, | |
| "step": 1230, | |
| "token_acc": 0.8609684210526316 | |
| }, | |
| { | |
| "epoch": 1.3221333333333334, | |
| "grad_norm": 5.5625, | |
| "learning_rate": 5.450095299702392e-06, | |
| "loss": 0.44326114654541016, | |
| "step": 1240, | |
| "token_acc": 0.8649106462268146 | |
| }, | |
| { | |
| "epoch": 1.3328, | |
| "grad_norm": 5.59375, | |
| "learning_rate": 5.296984010281367e-06, | |
| "loss": 0.40932507514953614, | |
| "step": 1250, | |
| "token_acc": 0.869938754678462 | |
| }, | |
| { | |
| "epoch": 1.3434666666666666, | |
| "grad_norm": 5.15625, | |
| "learning_rate": 5.145275534588053e-06, | |
| "loss": 0.42442498207092283, | |
| "step": 1260, | |
| "token_acc": 0.8686963177140913 | |
| }, | |
| { | |
| "epoch": 1.3541333333333334, | |
| "grad_norm": 5.125, | |
| "learning_rate": 4.995015124165883e-06, | |
| "loss": 0.42131719589233396, | |
| "step": 1270, | |
| "token_acc": 0.8664903600068248 | |
| }, | |
| { | |
| "epoch": 1.3648, | |
| "grad_norm": 4.90625, | |
| "learning_rate": 4.8462475986299625e-06, | |
| "loss": 0.42685232162475584, | |
| "step": 1280, | |
| "token_acc": 0.8674472267327579 | |
| }, | |
| { | |
| "epoch": 1.3754666666666666, | |
| "grad_norm": 5.46875, | |
| "learning_rate": 4.69901733229828e-06, | |
| "loss": 0.4310049057006836, | |
| "step": 1290, | |
| "token_acc": 0.8657729509587646 | |
| }, | |
| { | |
| "epoch": 1.3861333333333334, | |
| "grad_norm": 5.5, | |
| "learning_rate": 4.5533682409557745e-06, | |
| "loss": 0.42982044219970705, | |
| "step": 1300, | |
| "token_acc": 0.8655927725219467 | |
| }, | |
| { | |
| "epoch": 1.3968, | |
| "grad_norm": 5.25, | |
| "learning_rate": 4.409343768755135e-06, | |
| "loss": 0.41706085205078125, | |
| "step": 1310, | |
| "token_acc": 0.8671621390739792 | |
| }, | |
| { | |
| "epoch": 1.4074666666666666, | |
| "grad_norm": 5.375, | |
| "learning_rate": 4.266986875258299e-06, | |
| "loss": 0.42982168197631837, | |
| "step": 1320, | |
| "token_acc": 0.8655782082734117 | |
| }, | |
| { | |
| "epoch": 1.4181333333333335, | |
| "grad_norm": 5.0, | |
| "learning_rate": 4.126340022622506e-06, | |
| "loss": 0.40945024490356446, | |
| "step": 1330, | |
| "token_acc": 0.8718840088180431 | |
| }, | |
| { | |
| "epoch": 1.4288, | |
| "grad_norm": 5.84375, | |
| "learning_rate": 3.987445162934689e-06, | |
| "loss": 0.4395137786865234, | |
| "step": 1340, | |
| "token_acc": 0.8654642733862616 | |
| }, | |
| { | |
| "epoch": 1.4394666666666667, | |
| "grad_norm": 5.09375, | |
| "learning_rate": 3.850343725698018e-06, | |
| "loss": 0.4194896697998047, | |
| "step": 1350, | |
| "token_acc": 0.8659327530295272 | |
| }, | |
| { | |
| "epoch": 1.4501333333333333, | |
| "grad_norm": 5.40625, | |
| "learning_rate": 3.7150766054743393e-06, | |
| "loss": 0.4344645500183105, | |
| "step": 1360, | |
| "token_acc": 0.8671583587656833 | |
| }, | |
| { | |
| "epoch": 1.4607999999999999, | |
| "grad_norm": 5.71875, | |
| "learning_rate": 3.5816841496861563e-06, | |
| "loss": 0.4192943572998047, | |
| "step": 1370, | |
| "token_acc": 0.8677099367196853 | |
| }, | |
| { | |
| "epoch": 1.4714666666666667, | |
| "grad_norm": 5.1875, | |
| "learning_rate": 3.4502061465818024e-06, | |
| "loss": 0.4389029979705811, | |
| "step": 1380, | |
| "token_acc": 0.8614711328059249 | |
| }, | |
| { | |
| "epoch": 1.4821333333333333, | |
| "grad_norm": 5.25, | |
| "learning_rate": 3.3206818133674478e-06, | |
| "loss": 0.425324821472168, | |
| "step": 1390, | |
| "token_acc": 0.8668295148247979 | |
| }, | |
| { | |
| "epoch": 1.4928, | |
| "grad_norm": 5.25, | |
| "learning_rate": 3.1931497845093753e-06, | |
| "loss": 0.4116145133972168, | |
| "step": 1400, | |
| "token_acc": 0.8701353998126543 | |
| }, | |
| { | |
| "epoch": 1.5034666666666667, | |
| "grad_norm": 8.0625, | |
| "learning_rate": 3.0676481002101223e-06, | |
| "loss": 0.4524849414825439, | |
| "step": 1410, | |
| "token_acc": 0.8596744107173139 | |
| }, | |
| { | |
| "epoch": 1.5141333333333333, | |
| "grad_norm": 6.1875, | |
| "learning_rate": 2.9442141950618762e-06, | |
| "loss": 0.40929403305053713, | |
| "step": 1420, | |
| "token_acc": 0.8705962637550115 | |
| }, | |
| { | |
| "epoch": 1.5248, | |
| "grad_norm": 5.25, | |
| "learning_rate": 2.8228848868804714e-06, | |
| "loss": 0.4273798942565918, | |
| "step": 1430, | |
| "token_acc": 0.8684869739478958 | |
| }, | |
| { | |
| "epoch": 1.5354666666666668, | |
| "grad_norm": 5.71875, | |
| "learning_rate": 2.7036963657234105e-06, | |
| "loss": 0.421265983581543, | |
| "step": 1440, | |
| "token_acc": 0.8716198925189798 | |
| }, | |
| { | |
| "epoch": 1.5461333333333334, | |
| "grad_norm": 5.75, | |
| "learning_rate": 2.5866841830950994e-06, | |
| "loss": 0.4097298622131348, | |
| "step": 1450, | |
| "token_acc": 0.8729807855806836 | |
| }, | |
| { | |
| "epoch": 1.5568, | |
| "grad_norm": 5.84375, | |
| "learning_rate": 2.47188324134255e-06, | |
| "loss": 0.4039015293121338, | |
| "step": 1460, | |
| "token_acc": 0.8734842703298566 | |
| }, | |
| { | |
| "epoch": 1.5674666666666668, | |
| "grad_norm": 5.84375, | |
| "learning_rate": 2.3593277832447405e-06, | |
| "loss": 0.3996889591217041, | |
| "step": 1470, | |
| "token_acc": 0.8748932536293766 | |
| }, | |
| { | |
| "epoch": 1.5781333333333334, | |
| "grad_norm": 6.15625, | |
| "learning_rate": 2.2490513817986757e-06, | |
| "loss": 0.40860567092895506, | |
| "step": 1480, | |
| "token_acc": 0.8692588504966465 | |
| }, | |
| { | |
| "epoch": 1.5888, | |
| "grad_norm": 6.21875, | |
| "learning_rate": 2.141086930205255e-06, | |
| "loss": 0.4375762939453125, | |
| "step": 1490, | |
| "token_acc": 0.8629623316856997 | |
| }, | |
| { | |
| "epoch": 1.5994666666666668, | |
| "grad_norm": 5.1875, | |
| "learning_rate": 2.035466632057905e-06, | |
| "loss": 0.424249267578125, | |
| "step": 1500, | |
| "token_acc": 0.8679070550597744 | |
| }, | |
| { | |
| "epoch": 1.6101333333333332, | |
| "grad_norm": 5.0, | |
| "learning_rate": 1.9322219917369e-06, | |
| "loss": 0.40370688438415525, | |
| "step": 1510, | |
| "token_acc": 0.871133586177834 | |
| }, | |
| { | |
| "epoch": 1.6208, | |
| "grad_norm": 5.28125, | |
| "learning_rate": 1.8313838050122423e-06, | |
| "loss": 0.4515178680419922, | |
| "step": 1520, | |
| "token_acc": 0.8593498005262711 | |
| }, | |
| { | |
| "epoch": 1.6314666666666666, | |
| "grad_norm": 5.0625, | |
| "learning_rate": 1.7329821498579257e-06, | |
| "loss": 0.43173723220825194, | |
| "step": 1530, | |
| "token_acc": 0.8647822765469825 | |
| }, | |
| { | |
| "epoch": 1.6421333333333332, | |
| "grad_norm": 5.125, | |
| "learning_rate": 1.6370463774802759e-06, | |
| "loss": 0.42908735275268556, | |
| "step": 1540, | |
| "token_acc": 0.8682573498082659 | |
| }, | |
| { | |
| "epoch": 1.6528, | |
| "grad_norm": 5.34375, | |
| "learning_rate": 1.5436051035630962e-06, | |
| "loss": 0.4312245845794678, | |
| "step": 1550, | |
| "token_acc": 0.8645736105218498 | |
| }, | |
| { | |
| "epoch": 1.6634666666666666, | |
| "grad_norm": 5.65625, | |
| "learning_rate": 1.4526861997321794e-06, | |
| "loss": 0.4247132301330566, | |
| "step": 1560, | |
| "token_acc": 0.8694854474188976 | |
| }, | |
| { | |
| "epoch": 1.6741333333333333, | |
| "grad_norm": 5.40625, | |
| "learning_rate": 1.3643167852417894e-06, | |
| "loss": 0.4231447219848633, | |
| "step": 1570, | |
| "token_acc": 0.8674972640794679 | |
| }, | |
| { | |
| "epoch": 1.6848, | |
| "grad_norm": 5.4375, | |
| "learning_rate": 1.278523218885518e-06, | |
| "loss": 0.40172972679138186, | |
| "step": 1580, | |
| "token_acc": 0.8752116491703352 | |
| }, | |
| { | |
| "epoch": 1.6954666666666667, | |
| "grad_norm": 5.3125, | |
| "learning_rate": 1.1953310911340165e-06, | |
| "loss": 0.4114649295806885, | |
| "step": 1590, | |
| "token_acc": 0.8714602524735585 | |
| }, | |
| { | |
| "epoch": 1.7061333333333333, | |
| "grad_norm": 5.1875, | |
| "learning_rate": 1.1147652165018597e-06, | |
| "loss": 0.40552978515625, | |
| "step": 1600, | |
| "token_acc": 0.8678014905149052 | |
| }, | |
| { | |
| "epoch": 1.7168, | |
| "grad_norm": 5.53125, | |
| "learning_rate": 1.0368496261459004e-06, | |
| "loss": 0.4324502944946289, | |
| "step": 1610, | |
| "token_acc": 0.862758329818642 | |
| }, | |
| { | |
| "epoch": 1.7274666666666667, | |
| "grad_norm": 5.46875, | |
| "learning_rate": 9.616075606972597e-07, | |
| "loss": 0.4126314163208008, | |
| "step": 1620, | |
| "token_acc": 0.8694481830417228 | |
| }, | |
| { | |
| "epoch": 1.7381333333333333, | |
| "grad_norm": 5.75, | |
| "learning_rate": 8.890614633291095e-07, | |
| "loss": 0.43532743453979494, | |
| "step": 1630, | |
| "token_acc": 0.8601766904519198 | |
| }, | |
| { | |
| "epoch": 1.7488000000000001, | |
| "grad_norm": 5.15625, | |
| "learning_rate": 8.192329730623471e-07, | |
| "loss": 0.4172046661376953, | |
| "step": 1640, | |
| "token_acc": 0.8675934559633806 | |
| }, | |
| { | |
| "epoch": 1.7594666666666665, | |
| "grad_norm": 5.34375, | |
| "learning_rate": 7.521429183111139e-07, | |
| "loss": 0.4211602687835693, | |
| "step": 1650, | |
| "token_acc": 0.869084099421178 | |
| }, | |
| { | |
| "epoch": 1.7701333333333333, | |
| "grad_norm": 5.3125, | |
| "learning_rate": 6.878113106700946e-07, | |
| "loss": 0.4197425842285156, | |
| "step": 1660, | |
| "token_acc": 0.8680926916221033 | |
| }, | |
| { | |
| "epoch": 1.7808000000000002, | |
| "grad_norm": 5.28125, | |
| "learning_rate": 6.262573389454773e-07, | |
| "loss": 0.43222551345825194, | |
| "step": 1670, | |
| "token_acc": 0.8637358935489304 | |
| }, | |
| { | |
| "epoch": 1.7914666666666665, | |
| "grad_norm": 6.09375, | |
| "learning_rate": 5.674993634313242e-07, | |
| "loss": 0.4256401062011719, | |
| "step": 1680, | |
| "token_acc": 0.8653747652381766 | |
| }, | |
| { | |
| "epoch": 1.8021333333333334, | |
| "grad_norm": 5.03125, | |
| "learning_rate": 5.115549104330575e-07, | |
| "loss": 0.39786996841430666, | |
| "step": 1690, | |
| "token_acc": 0.8736002714625043 | |
| }, | |
| { | |
| "epoch": 1.8128, | |
| "grad_norm": 7.09375, | |
| "learning_rate": 4.5844066703973676e-07, | |
| "loss": 0.42459850311279296, | |
| "step": 1700, | |
| "token_acc": 0.8695799745439118 | |
| }, | |
| { | |
| "epoch": 1.8234666666666666, | |
| "grad_norm": 5.59375, | |
| "learning_rate": 4.081724761466288e-07, | |
| "loss": 0.44160709381103513, | |
| "step": 1710, | |
| "token_acc": 0.8608688288593446 | |
| }, | |
| { | |
| "epoch": 1.8341333333333334, | |
| "grad_norm": 5.5, | |
| "learning_rate": 3.6076533172959895e-07, | |
| "loss": 0.4103675842285156, | |
| "step": 1720, | |
| "token_acc": 0.8672603545324709 | |
| }, | |
| { | |
| "epoch": 1.8448, | |
| "grad_norm": 5.5625, | |
| "learning_rate": 3.1623337437272084e-07, | |
| "loss": 0.43485474586486816, | |
| "step": 1730, | |
| "token_acc": 0.8642153532608695 | |
| }, | |
| { | |
| "epoch": 1.8554666666666666, | |
| "grad_norm": 5.4375, | |
| "learning_rate": 2.745898870504116e-07, | |
| "loss": 0.4110551357269287, | |
| "step": 1740, | |
| "token_acc": 0.8730118009235506 | |
| }, | |
| { | |
| "epoch": 1.8661333333333334, | |
| "grad_norm": 5.21875, | |
| "learning_rate": 2.3584729116541305e-07, | |
| "loss": 0.41416378021240235, | |
| "step": 1750, | |
| "token_acc": 0.86998136540742 | |
| }, | |
| { | |
| "epoch": 1.8768, | |
| "grad_norm": 5.34375, | |
| "learning_rate": 2.0001714284372253e-07, | |
| "loss": 0.43006277084350586, | |
| "step": 1760, | |
| "token_acc": 0.8696650229552797 | |
| }, | |
| { | |
| "epoch": 1.8874666666666666, | |
| "grad_norm": 5.78125, | |
| "learning_rate": 1.6711012948764426e-07, | |
| "loss": 0.40975046157836914, | |
| "step": 1770, | |
| "token_acc": 0.8725116476069462 | |
| }, | |
| { | |
| "epoch": 1.8981333333333335, | |
| "grad_norm": 5.625, | |
| "learning_rate": 1.371360665879584e-07, | |
| "loss": 0.406182861328125, | |
| "step": 1780, | |
| "token_acc": 0.8718765721952038 | |
| }, | |
| { | |
| "epoch": 1.9088, | |
| "grad_norm": 5.25, | |
| "learning_rate": 1.1010389479615302e-07, | |
| "loss": 0.4136258602142334, | |
| "step": 1790, | |
| "token_acc": 0.870780770542107 | |
| }, | |
| { | |
| "epoch": 1.9194666666666667, | |
| "grad_norm": 6.4375, | |
| "learning_rate": 8.60216772576139e-08, | |
| "loss": 0.4269017219543457, | |
| "step": 1800, | |
| "token_acc": 0.8661390519955748 | |
| }, | |
| { | |
| "epoch": 1.9301333333333335, | |
| "grad_norm": 5.6875, | |
| "learning_rate": 6.489659720655295e-08, | |
| "loss": 0.42894792556762695, | |
| "step": 1810, | |
| "token_acc": 0.8644270122783083 | |
| }, | |
| { | |
| "epoch": 1.9407999999999999, | |
| "grad_norm": 5.34375, | |
| "learning_rate": 4.673495582339649e-08, | |
| "loss": 0.42904911041259763, | |
| "step": 1820, | |
| "token_acc": 0.8654405474764756 | |
| }, | |
| { | |
| "epoch": 1.9514666666666667, | |
| "grad_norm": 5.375, | |
| "learning_rate": 3.154217035526874e-08, | |
| "loss": 0.42716250419616697, | |
| "step": 1830, | |
| "token_acc": 0.869757174392936 | |
| }, | |
| { | |
| "epoch": 1.9621333333333333, | |
| "grad_norm": 5.28125, | |
| "learning_rate": 1.932277250014214e-08, | |
| "loss": 0.42876396179199217, | |
| "step": 1840, | |
| "token_acc": 0.8661888701517707 | |
| }, | |
| { | |
| "epoch": 1.9727999999999999, | |
| "grad_norm": 5.5625, | |
| "learning_rate": 1.008040705511748e-08, | |
| "loss": 0.44731903076171875, | |
| "step": 1850, | |
| "token_acc": 0.8617075232459848 | |
| }, | |
| { | |
| "epoch": 1.9834666666666667, | |
| "grad_norm": 5.40625, | |
| "learning_rate": 3.8178308292624585e-09, | |
| "loss": 0.4366297721862793, | |
| "step": 1860, | |
| "token_acc": 0.8642533936651584 | |
| }, | |
| { | |
| "epoch": 1.9941333333333333, | |
| "grad_norm": 5.59375, | |
| "learning_rate": 5.369118213094382e-10, | |
| "loss": 0.4206860542297363, | |
| "step": 1870, | |
| "token_acc": 0.8676668914362778 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 1876, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8.012174212254925e+17, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |