| { | |
| "best_metric": 0.3242824375629425, | |
| "best_model_checkpoint": "/workspace/plateer_classifier_v0.1_result/checkpoint-110000", | |
| "epoch": 0.6441270979878347, | |
| "eval_steps": 55000, | |
| "global_step": 110000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0014640195241643742, | |
| "grad_norm": 50.05304718017578, | |
| "learning_rate": 4.880000000000001e-06, | |
| "loss": 4.3958, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.0029280390483287485, | |
| "grad_norm": 48.363304138183594, | |
| "learning_rate": 9.88e-06, | |
| "loss": 1.6496, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.004392058572493123, | |
| "grad_norm": 54.546974182128906, | |
| "learning_rate": 1.488e-05, | |
| "loss": 0.8787, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.005856078096657497, | |
| "grad_norm": 50.317874908447266, | |
| "learning_rate": 1.9880000000000003e-05, | |
| "loss": 0.7721, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.007320097620821872, | |
| "grad_norm": 62.48823928833008, | |
| "learning_rate": 2.488e-05, | |
| "loss": 0.7047, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.008784117144986246, | |
| "grad_norm": 44.35001754760742, | |
| "learning_rate": 2.9880000000000002e-05, | |
| "loss": 0.6749, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.01024813666915062, | |
| "grad_norm": 36.486793518066406, | |
| "learning_rate": 3.4880000000000005e-05, | |
| "loss": 0.6409, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.011712156193314994, | |
| "grad_norm": 47.03588104248047, | |
| "learning_rate": 3.988e-05, | |
| "loss": 0.6406, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.013176175717479368, | |
| "grad_norm": 31.227832794189453, | |
| "learning_rate": 4.488e-05, | |
| "loss": 0.6149, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.014640195241643743, | |
| "grad_norm": 39.8408317565918, | |
| "learning_rate": 4.9880000000000004e-05, | |
| "loss": 0.5956, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.016104214765808117, | |
| "grad_norm": 41.118736267089844, | |
| "learning_rate": 5.4879999999999996e-05, | |
| "loss": 0.5905, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.017568234289972492, | |
| "grad_norm": 29.624338150024414, | |
| "learning_rate": 5.988e-05, | |
| "loss": 0.5608, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.019032253814136865, | |
| "grad_norm": 22.993818283081055, | |
| "learning_rate": 6.488e-05, | |
| "loss": 0.5614, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.02049627333830124, | |
| "grad_norm": 19.964269638061523, | |
| "learning_rate": 6.988e-05, | |
| "loss": 0.5569, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.021960292862465612, | |
| "grad_norm": 36.538047790527344, | |
| "learning_rate": 7.488e-05, | |
| "loss": 0.5316, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.023424312386629988, | |
| "grad_norm": 37.63505935668945, | |
| "learning_rate": 7.988e-05, | |
| "loss": 0.5364, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.024888331910794363, | |
| "grad_norm": 25.934967041015625, | |
| "learning_rate": 8.486000000000001e-05, | |
| "loss": 0.5234, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.026352351434958735, | |
| "grad_norm": 24.810028076171875, | |
| "learning_rate": 8.986e-05, | |
| "loss": 0.5155, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.02781637095912311, | |
| "grad_norm": 32.76811981201172, | |
| "learning_rate": 9.484e-05, | |
| "loss": 0.5022, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.029280390483287486, | |
| "grad_norm": 27.094772338867188, | |
| "learning_rate": 9.984e-05, | |
| "loss": 0.5023, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.029280390483287486, | |
| "eval_accuracy": 0.8572352668691132, | |
| "eval_loss": 0.5044249296188354, | |
| "eval_runtime": 11541.1431, | |
| "eval_samples_per_second": 210.432, | |
| "eval_steps_per_second": 6.576, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.03074441000745186, | |
| "grad_norm": 24.74563217163086, | |
| "learning_rate": 0.00010484, | |
| "loss": 0.5073, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 0.032208429531616234, | |
| "grad_norm": 17.229019165039062, | |
| "learning_rate": 0.00010984, | |
| "loss": 0.4932, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.03367244905578061, | |
| "grad_norm": 23.318979263305664, | |
| "learning_rate": 0.00011484000000000002, | |
| "loss": 0.504, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 0.035136468579944985, | |
| "grad_norm": 22.271846771240234, | |
| "learning_rate": 0.00011983999999999999, | |
| "loss": 0.4817, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.036600488104109354, | |
| "grad_norm": 24.304887771606445, | |
| "learning_rate": 0.00012484, | |
| "loss": 0.4966, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 0.03806450762827373, | |
| "grad_norm": 23.76158905029297, | |
| "learning_rate": 0.00012984000000000002, | |
| "loss": 0.4899, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.039528527152438105, | |
| "grad_norm": 20.765274047851562, | |
| "learning_rate": 0.00013484, | |
| "loss": 0.4773, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 0.04099254667660248, | |
| "grad_norm": 12.793950080871582, | |
| "learning_rate": 0.00013982000000000003, | |
| "loss": 0.4781, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.042456566200766856, | |
| "grad_norm": 14.128210067749023, | |
| "learning_rate": 0.00014482, | |
| "loss": 0.4687, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 0.043920585724931224, | |
| "grad_norm": 22.348928451538086, | |
| "learning_rate": 0.00014982, | |
| "loss": 0.4722, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.0453846052490956, | |
| "grad_norm": 17.29800796508789, | |
| "learning_rate": 0.00015480000000000002, | |
| "loss": 0.4692, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 0.046848624773259975, | |
| "grad_norm": 11.0147066116333, | |
| "learning_rate": 0.0001598, | |
| "loss": 0.4689, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.04831264429742435, | |
| "grad_norm": 11.713265419006348, | |
| "learning_rate": 0.0001648, | |
| "loss": 0.4788, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 0.049776663821588726, | |
| "grad_norm": 12.367693901062012, | |
| "learning_rate": 0.0001698, | |
| "loss": 0.4697, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.0512406833457531, | |
| "grad_norm": 8.11889934539795, | |
| "learning_rate": 0.00017480000000000002, | |
| "loss": 0.4696, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 0.05270470286991747, | |
| "grad_norm": 12.321019172668457, | |
| "learning_rate": 0.0001798, | |
| "loss": 0.461, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.054168722394081846, | |
| "grad_norm": 15.612183570861816, | |
| "learning_rate": 0.00018480000000000002, | |
| "loss": 0.4646, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 0.05563274191824622, | |
| "grad_norm": 10.72978687286377, | |
| "learning_rate": 0.0001898, | |
| "loss": 0.4673, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.0570967614424106, | |
| "grad_norm": 8.815441131591797, | |
| "learning_rate": 0.0001948, | |
| "loss": 0.4472, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 0.05856078096657497, | |
| "grad_norm": 8.681705474853516, | |
| "learning_rate": 0.0001998, | |
| "loss": 0.4629, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.05856078096657497, | |
| "eval_accuracy": 0.8688706572649133, | |
| "eval_loss": 0.457188218832016, | |
| "eval_runtime": 11537.8227, | |
| "eval_samples_per_second": 210.492, | |
| "eval_steps_per_second": 6.578, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.06002480049073934, | |
| "grad_norm": 13.643828392028809, | |
| "learning_rate": 0.0001997014219778306, | |
| "loss": 0.456, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 0.06148882001490372, | |
| "grad_norm": 13.211404800415039, | |
| "learning_rate": 0.00019939040320473745, | |
| "loss": 0.4666, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.06295283953906809, | |
| "grad_norm": 11.1001615524292, | |
| "learning_rate": 0.00019907938443164432, | |
| "loss": 0.4495, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 0.06441685906323247, | |
| "grad_norm": 8.222249984741211, | |
| "learning_rate": 0.00019876836565855117, | |
| "loss": 0.4483, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.06588087858739684, | |
| "grad_norm": 13.589752197265625, | |
| "learning_rate": 0.0001984585909605504, | |
| "loss": 0.4438, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 0.06734489811156122, | |
| "grad_norm": 9.988068580627441, | |
| "learning_rate": 0.00019814757218745724, | |
| "loss": 0.447, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.0688089176357256, | |
| "grad_norm": 8.311960220336914, | |
| "learning_rate": 0.0001978365534143641, | |
| "loss": 0.4476, | |
| "step": 11750 | |
| }, | |
| { | |
| "epoch": 0.07027293715988997, | |
| "grad_norm": 8.099685668945312, | |
| "learning_rate": 0.00019752553464127094, | |
| "loss": 0.4477, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.07173695668405435, | |
| "grad_norm": 8.23130989074707, | |
| "learning_rate": 0.00019721451586817782, | |
| "loss": 0.4385, | |
| "step": 12250 | |
| }, | |
| { | |
| "epoch": 0.07320097620821871, | |
| "grad_norm": 10.875362396240234, | |
| "learning_rate": 0.00019690349709508467, | |
| "loss": 0.4345, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.07466499573238308, | |
| "grad_norm": 9.479572296142578, | |
| "learning_rate": 0.00019659247832199152, | |
| "loss": 0.4345, | |
| "step": 12750 | |
| }, | |
| { | |
| "epoch": 0.07612901525654746, | |
| "grad_norm": 11.883151054382324, | |
| "learning_rate": 0.0001962814595488984, | |
| "loss": 0.4241, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.07759303478071183, | |
| "grad_norm": 8.15208911895752, | |
| "learning_rate": 0.00019597044077580524, | |
| "loss": 0.4335, | |
| "step": 13250 | |
| }, | |
| { | |
| "epoch": 0.07905705430487621, | |
| "grad_norm": 9.323240280151367, | |
| "learning_rate": 0.0001956594220027121, | |
| "loss": 0.4396, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.08052107382904058, | |
| "grad_norm": 7.250824928283691, | |
| "learning_rate": 0.00019534840322961897, | |
| "loss": 0.4376, | |
| "step": 13750 | |
| }, | |
| { | |
| "epoch": 0.08198509335320496, | |
| "grad_norm": 12.220071792602539, | |
| "learning_rate": 0.0001950373844565258, | |
| "loss": 0.4323, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.08344911287736934, | |
| "grad_norm": 8.460916519165039, | |
| "learning_rate": 0.00019472636568343266, | |
| "loss": 0.4271, | |
| "step": 14250 | |
| }, | |
| { | |
| "epoch": 0.08491313240153371, | |
| "grad_norm": 6.110500812530518, | |
| "learning_rate": 0.0001944153469103395, | |
| "loss": 0.4253, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.08637715192569809, | |
| "grad_norm": 10.618386268615723, | |
| "learning_rate": 0.00019410432813724636, | |
| "loss": 0.427, | |
| "step": 14750 | |
| }, | |
| { | |
| "epoch": 0.08784117144986245, | |
| "grad_norm": 9.827556610107422, | |
| "learning_rate": 0.00019379330936415324, | |
| "loss": 0.4254, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.08784117144986245, | |
| "eval_accuracy": 0.877075711565186, | |
| "eval_loss": 0.4201970100402832, | |
| "eval_runtime": 11537.2443, | |
| "eval_samples_per_second": 210.503, | |
| "eval_steps_per_second": 6.578, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.0892994385846771, | |
| "grad_norm": 10.84184455871582, | |
| "learning_rate": 0.00019349020046898423, | |
| "loss": 0.4211, | |
| "step": 15250 | |
| }, | |
| { | |
| "epoch": 0.09076336380737672, | |
| "grad_norm": 7.9568657875061035, | |
| "learning_rate": 0.00019317920297562402, | |
| "loss": 0.4203, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.09222728903007635, | |
| "grad_norm": 12.237702369689941, | |
| "learning_rate": 0.00019286820548226384, | |
| "loss": 0.4181, | |
| "step": 15750 | |
| }, | |
| { | |
| "epoch": 0.09369121425277596, | |
| "grad_norm": 25.739120483398438, | |
| "learning_rate": 0.00019255720798890363, | |
| "loss": 0.4143, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.09515513947547559, | |
| "grad_norm": 8.341870307922363, | |
| "learning_rate": 0.00019224621049554342, | |
| "loss": 0.4171, | |
| "step": 16250 | |
| }, | |
| { | |
| "epoch": 0.09661906469817522, | |
| "grad_norm": 10.707802772521973, | |
| "learning_rate": 0.0001919352130021832, | |
| "loss": 0.4058, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.09808298992087484, | |
| "grad_norm": 7.021149158477783, | |
| "learning_rate": 0.00019162421550882302, | |
| "loss": 0.4211, | |
| "step": 16750 | |
| }, | |
| { | |
| "epoch": 0.09954691514357447, | |
| "grad_norm": 11.840470314025879, | |
| "learning_rate": 0.0001913132180154628, | |
| "loss": 0.4093, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.10101084036627409, | |
| "grad_norm": 7.401727676391602, | |
| "learning_rate": 0.0001910022205221026, | |
| "loss": 0.4281, | |
| "step": 17250 | |
| }, | |
| { | |
| "epoch": 0.10247476558897371, | |
| "grad_norm": 7.601231575012207, | |
| "learning_rate": 0.00019069246701871584, | |
| "loss": 0.4044, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.10393869081167334, | |
| "grad_norm": 6.85632848739624, | |
| "learning_rate": 0.00019038146952535563, | |
| "loss": 0.4244, | |
| "step": 17750 | |
| }, | |
| { | |
| "epoch": 0.10540261603437297, | |
| "grad_norm": 10.810693740844727, | |
| "learning_rate": 0.00019007171602196887, | |
| "loss": 0.4216, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.1068665412570726, | |
| "grad_norm": 9.758743286132812, | |
| "learning_rate": 0.00018976071852860865, | |
| "loss": 0.417, | |
| "step": 18250 | |
| }, | |
| { | |
| "epoch": 0.10833046647977221, | |
| "grad_norm": 10.75692367553711, | |
| "learning_rate": 0.00018944972103524847, | |
| "loss": 0.4143, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.10979439170247184, | |
| "grad_norm": 10.375711441040039, | |
| "learning_rate": 0.00018913872354188826, | |
| "loss": 0.4075, | |
| "step": 18750 | |
| }, | |
| { | |
| "epoch": 0.11125831692517146, | |
| "grad_norm": 8.414403915405273, | |
| "learning_rate": 0.00018882772604852805, | |
| "loss": 0.4148, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.11272224214787109, | |
| "grad_norm": 9.86490249633789, | |
| "learning_rate": 0.00018851672855516786, | |
| "loss": 0.4074, | |
| "step": 19250 | |
| }, | |
| { | |
| "epoch": 0.11418616737057072, | |
| "grad_norm": 7.522060394287109, | |
| "learning_rate": 0.00018820573106180765, | |
| "loss": 0.4106, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.11565009259327033, | |
| "grad_norm": 7.423270225524902, | |
| "learning_rate": 0.00018789473356844744, | |
| "loss": 0.4034, | |
| "step": 19750 | |
| }, | |
| { | |
| "epoch": 0.11711401781596996, | |
| "grad_norm": 8.761688232421875, | |
| "learning_rate": 0.00018758373607508723, | |
| "loss": 0.4025, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.11711401781596996, | |
| "eval_accuracy": 0.8823756104911845, | |
| "eval_loss": 0.4016551673412323, | |
| "eval_runtime": 11547.1595, | |
| "eval_samples_per_second": 210.336, | |
| "eval_steps_per_second": 6.573, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.11857794303866959, | |
| "grad_norm": 9.6015043258667, | |
| "learning_rate": 0.0001872802025215677, | |
| "loss": 0.4087, | |
| "step": 20250 | |
| }, | |
| { | |
| "epoch": 0.12004186826136921, | |
| "grad_norm": 6.658656120300293, | |
| "learning_rate": 0.00018696920502820748, | |
| "loss": 0.408, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.12150579348406883, | |
| "grad_norm": 6.935655117034912, | |
| "learning_rate": 0.00018665820753484727, | |
| "loss": 0.3983, | |
| "step": 20750 | |
| }, | |
| { | |
| "epoch": 0.12296971870676845, | |
| "grad_norm": 7.918155193328857, | |
| "learning_rate": 0.00018634721004148706, | |
| "loss": 0.3994, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.12443364392946808, | |
| "grad_norm": 7.246758937835693, | |
| "learning_rate": 0.00018603621254812688, | |
| "loss": 0.4111, | |
| "step": 21250 | |
| }, | |
| { | |
| "epoch": 0.1258975691521677, | |
| "grad_norm": 8.375380516052246, | |
| "learning_rate": 0.00018572521505476667, | |
| "loss": 0.4006, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.12736149437486732, | |
| "grad_norm": 6.993825435638428, | |
| "learning_rate": 0.0001854154615513799, | |
| "loss": 0.4113, | |
| "step": 21750 | |
| }, | |
| { | |
| "epoch": 0.12882541959756696, | |
| "grad_norm": 8.703255653381348, | |
| "learning_rate": 0.00018510446405801972, | |
| "loss": 0.3977, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.13028934482026658, | |
| "grad_norm": 6.940033912658691, | |
| "learning_rate": 0.0001847934665646595, | |
| "loss": 0.4005, | |
| "step": 22250 | |
| }, | |
| { | |
| "epoch": 0.1317532700429662, | |
| "grad_norm": 6.712055683135986, | |
| "learning_rate": 0.0001844824690712993, | |
| "loss": 0.41, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.13321719526566583, | |
| "grad_norm": 6.171209812164307, | |
| "learning_rate": 0.0001841714715779391, | |
| "loss": 0.3971, | |
| "step": 22750 | |
| }, | |
| { | |
| "epoch": 0.13468112048836545, | |
| "grad_norm": 10.764921188354492, | |
| "learning_rate": 0.0001838604740845789, | |
| "loss": 0.4105, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.1361450457110651, | |
| "grad_norm": 8.0676908493042, | |
| "learning_rate": 0.0001835494765912187, | |
| "loss": 0.3958, | |
| "step": 23250 | |
| }, | |
| { | |
| "epoch": 0.1376089709337647, | |
| "grad_norm": 5.20599365234375, | |
| "learning_rate": 0.00018323847909785848, | |
| "loss": 0.3946, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.13907289615646432, | |
| "grad_norm": 5.9439239501953125, | |
| "learning_rate": 0.0001829274816044983, | |
| "loss": 0.3951, | |
| "step": 23750 | |
| }, | |
| { | |
| "epoch": 0.14053682137916396, | |
| "grad_norm": 9.821541786193848, | |
| "learning_rate": 0.0001826164841111381, | |
| "loss": 0.3906, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.14200074660186357, | |
| "grad_norm": 6.659691333770752, | |
| "learning_rate": 0.00018230673060775133, | |
| "loss": 0.4009, | |
| "step": 24250 | |
| }, | |
| { | |
| "epoch": 0.1434646718245632, | |
| "grad_norm": 6.624240398406982, | |
| "learning_rate": 0.00018199573311439112, | |
| "loss": 0.3975, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.14492859704726282, | |
| "grad_norm": 7.993641376495361, | |
| "learning_rate": 0.0001816847356210309, | |
| "loss": 0.3925, | |
| "step": 24750 | |
| }, | |
| { | |
| "epoch": 0.14639252226996244, | |
| "grad_norm": 6.6386613845825195, | |
| "learning_rate": 0.0001813737381276707, | |
| "loss": 0.3975, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.14785644749266208, | |
| "grad_norm": 9.204560279846191, | |
| "learning_rate": 0.0001810627406343105, | |
| "loss": 0.3997, | |
| "step": 25250 | |
| }, | |
| { | |
| "epoch": 0.1493203727153617, | |
| "grad_norm": 8.072566986083984, | |
| "learning_rate": 0.0001807517431409503, | |
| "loss": 0.4022, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 0.15078429793806133, | |
| "grad_norm": 10.15225601196289, | |
| "learning_rate": 0.0001804407456475901, | |
| "loss": 0.392, | |
| "step": 25750 | |
| }, | |
| { | |
| "epoch": 0.15224822316076095, | |
| "grad_norm": 7.751401901245117, | |
| "learning_rate": 0.0001801297481542299, | |
| "loss": 0.3946, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.15371214838346056, | |
| "grad_norm": 8.481501579284668, | |
| "learning_rate": 0.0001798187506608697, | |
| "loss": 0.3883, | |
| "step": 26250 | |
| }, | |
| { | |
| "epoch": 0.1551760736061602, | |
| "grad_norm": 9.861278533935547, | |
| "learning_rate": 0.00017950775316750948, | |
| "loss": 0.3824, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 0.15663999882885982, | |
| "grad_norm": 6.405235290527344, | |
| "learning_rate": 0.0001791967556741493, | |
| "loss": 0.4006, | |
| "step": 26750 | |
| }, | |
| { | |
| "epoch": 0.15810392405155946, | |
| "grad_norm": 9.90355110168457, | |
| "learning_rate": 0.00017888575818078909, | |
| "loss": 0.3881, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.15956784927425907, | |
| "grad_norm": 9.354215621948242, | |
| "learning_rate": 0.00017857476068742887, | |
| "loss": 0.3965, | |
| "step": 27250 | |
| }, | |
| { | |
| "epoch": 0.16103177449695869, | |
| "grad_norm": 9.162219047546387, | |
| "learning_rate": 0.00017826376319406866, | |
| "loss": 0.3933, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.16249569971965833, | |
| "grad_norm": 6.755202770233154, | |
| "learning_rate": 0.00017795276570070848, | |
| "loss": 0.3874, | |
| "step": 27750 | |
| }, | |
| { | |
| "epoch": 0.16395962494235794, | |
| "grad_norm": 8.385200500488281, | |
| "learning_rate": 0.00017764176820734827, | |
| "loss": 0.3873, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.16542355016505758, | |
| "grad_norm": 6.508645057678223, | |
| "learning_rate": 0.00017733077071398806, | |
| "loss": 0.3895, | |
| "step": 28250 | |
| }, | |
| { | |
| "epoch": 0.1668874753877572, | |
| "grad_norm": 8.241129875183105, | |
| "learning_rate": 0.00017702226120057472, | |
| "loss": 0.3912, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 0.1683514006104568, | |
| "grad_norm": 7.879597187042236, | |
| "learning_rate": 0.00017671126370721454, | |
| "loss": 0.3929, | |
| "step": 28750 | |
| }, | |
| { | |
| "epoch": 0.16981532583315645, | |
| "grad_norm": 12.0702486038208, | |
| "learning_rate": 0.00017640026621385432, | |
| "loss": 0.404, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.17127925105585606, | |
| "grad_norm": 8.789772033691406, | |
| "learning_rate": 0.0001760892687204941, | |
| "loss": 0.3823, | |
| "step": 29250 | |
| }, | |
| { | |
| "epoch": 0.1727431762785557, | |
| "grad_norm": 11.022305488586426, | |
| "learning_rate": 0.00017577827122713393, | |
| "loss": 0.3887, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 0.17420710150125532, | |
| "grad_norm": 7.665167331695557, | |
| "learning_rate": 0.00017546727373377372, | |
| "loss": 0.394, | |
| "step": 29750 | |
| }, | |
| { | |
| "epoch": 0.17567102672395493, | |
| "grad_norm": 11.05783748626709, | |
| "learning_rate": 0.0001751562762404135, | |
| "loss": 0.3938, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.17713495194665457, | |
| "grad_norm": 8.389631271362305, | |
| "learning_rate": 0.0001748452787470533, | |
| "loss": 0.39, | |
| "step": 30250 | |
| }, | |
| { | |
| "epoch": 0.1785988771693542, | |
| "grad_norm": 8.158947944641113, | |
| "learning_rate": 0.0001745342812536931, | |
| "loss": 0.3818, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 0.1800628023920538, | |
| "grad_norm": 7.684356689453125, | |
| "learning_rate": 0.0001742232837603329, | |
| "loss": 0.3905, | |
| "step": 30750 | |
| }, | |
| { | |
| "epoch": 0.18152672761475344, | |
| "grad_norm": 10.129668235778809, | |
| "learning_rate": 0.00017391353025694614, | |
| "loss": 0.3886, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.18299065283745305, | |
| "grad_norm": 6.924737453460693, | |
| "learning_rate": 0.00017360253276358593, | |
| "loss": 0.3892, | |
| "step": 31250 | |
| }, | |
| { | |
| "epoch": 0.1844545780601527, | |
| "grad_norm": 5.863354206085205, | |
| "learning_rate": 0.00017329153527022572, | |
| "loss": 0.3822, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 0.1859185032828523, | |
| "grad_norm": 9.10240650177002, | |
| "learning_rate": 0.00017298053777686553, | |
| "loss": 0.3895, | |
| "step": 31750 | |
| }, | |
| { | |
| "epoch": 0.18738242850555192, | |
| "grad_norm": 9.565494537353516, | |
| "learning_rate": 0.00017266954028350532, | |
| "loss": 0.383, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.18884635372825156, | |
| "grad_norm": 8.238012313842773, | |
| "learning_rate": 0.0001723585427901451, | |
| "loss": 0.3854, | |
| "step": 32250 | |
| }, | |
| { | |
| "epoch": 0.19031027895095118, | |
| "grad_norm": 9.350130081176758, | |
| "learning_rate": 0.0001720475452967849, | |
| "loss": 0.3922, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 0.19177420417365082, | |
| "grad_norm": 6.337550163269043, | |
| "learning_rate": 0.00017173654780342472, | |
| "loss": 0.3778, | |
| "step": 32750 | |
| }, | |
| { | |
| "epoch": 0.19323812939635043, | |
| "grad_norm": 8.421921730041504, | |
| "learning_rate": 0.00017142679430003793, | |
| "loss": 0.3929, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.19470205461905005, | |
| "grad_norm": 8.888238906860352, | |
| "learning_rate": 0.00017111579680667774, | |
| "loss": 0.3844, | |
| "step": 33250 | |
| }, | |
| { | |
| "epoch": 0.1961659798417497, | |
| "grad_norm": 10.774327278137207, | |
| "learning_rate": 0.00017080479931331753, | |
| "loss": 0.3804, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 0.1976299050644493, | |
| "grad_norm": 7.07879114151001, | |
| "learning_rate": 0.00017049380181995732, | |
| "loss": 0.3954, | |
| "step": 33750 | |
| }, | |
| { | |
| "epoch": 0.19909383028714894, | |
| "grad_norm": 7.102870941162109, | |
| "learning_rate": 0.00017018280432659714, | |
| "loss": 0.3815, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.20055775550984856, | |
| "grad_norm": 5.815110206604004, | |
| "learning_rate": 0.00016987180683323693, | |
| "loss": 0.3907, | |
| "step": 34250 | |
| }, | |
| { | |
| "epoch": 0.20202168073254817, | |
| "grad_norm": 7.749156475067139, | |
| "learning_rate": 0.00016956080933987672, | |
| "loss": 0.3798, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 0.2034856059552478, | |
| "grad_norm": 7.0530476570129395, | |
| "learning_rate": 0.0001692498118465165, | |
| "loss": 0.3947, | |
| "step": 34750 | |
| }, | |
| { | |
| "epoch": 0.20494953117794742, | |
| "grad_norm": 6.623088836669922, | |
| "learning_rate": 0.00016893881435315632, | |
| "loss": 0.3816, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.20641345640064707, | |
| "grad_norm": 8.431561470031738, | |
| "learning_rate": 0.0001686278168597961, | |
| "loss": 0.3815, | |
| "step": 35250 | |
| }, | |
| { | |
| "epoch": 0.20787738162334668, | |
| "grad_norm": 11.600255012512207, | |
| "learning_rate": 0.00016831806335640935, | |
| "loss": 0.3782, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 0.2093413068460463, | |
| "grad_norm": 5.186095237731934, | |
| "learning_rate": 0.00016800706586304914, | |
| "loss": 0.3828, | |
| "step": 35750 | |
| }, | |
| { | |
| "epoch": 0.21080523206874593, | |
| "grad_norm": 12.819711685180664, | |
| "learning_rate": 0.00016769606836968895, | |
| "loss": 0.3902, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.21226915729144555, | |
| "grad_norm": 7.843264579772949, | |
| "learning_rate": 0.00016738507087632874, | |
| "loss": 0.3716, | |
| "step": 36250 | |
| }, | |
| { | |
| "epoch": 0.2137330825141452, | |
| "grad_norm": 8.602349281311035, | |
| "learning_rate": 0.00016707407338296853, | |
| "loss": 0.3791, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 0.2151970077368448, | |
| "grad_norm": 7.939485549926758, | |
| "learning_rate": 0.00016676307588960832, | |
| "loss": 0.3752, | |
| "step": 36750 | |
| }, | |
| { | |
| "epoch": 0.21666093295954442, | |
| "grad_norm": 6.328729629516602, | |
| "learning_rate": 0.00016645207839624814, | |
| "loss": 0.3761, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 0.21812485818224406, | |
| "grad_norm": 6.196065902709961, | |
| "learning_rate": 0.00016614108090288793, | |
| "loss": 0.3817, | |
| "step": 37250 | |
| }, | |
| { | |
| "epoch": 0.21958878340494367, | |
| "grad_norm": 10.096115112304688, | |
| "learning_rate": 0.00016583008340952771, | |
| "loss": 0.3828, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 0.2210527086276433, | |
| "grad_norm": 6.120075702667236, | |
| "learning_rate": 0.0001655190859161675, | |
| "loss": 0.3774, | |
| "step": 37750 | |
| }, | |
| { | |
| "epoch": 0.22251663385034293, | |
| "grad_norm": 6.575611114501953, | |
| "learning_rate": 0.00016520808842280732, | |
| "loss": 0.3823, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.22398055907304254, | |
| "grad_norm": 7.636918067932129, | |
| "learning_rate": 0.0001648970909294471, | |
| "loss": 0.3846, | |
| "step": 38250 | |
| }, | |
| { | |
| "epoch": 0.22544448429574218, | |
| "grad_norm": 15.759072303771973, | |
| "learning_rate": 0.00016458733742606037, | |
| "loss": 0.3842, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 0.2269084095184418, | |
| "grad_norm": 10.398168563842773, | |
| "learning_rate": 0.0001642775839226736, | |
| "loss": 0.3794, | |
| "step": 38750 | |
| }, | |
| { | |
| "epoch": 0.22837233474114144, | |
| "grad_norm": 6.939914703369141, | |
| "learning_rate": 0.0001639665864293134, | |
| "loss": 0.3763, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 0.22983625996384105, | |
| "grad_norm": 11.021454811096191, | |
| "learning_rate": 0.0001636555889359532, | |
| "loss": 0.368, | |
| "step": 39250 | |
| }, | |
| { | |
| "epoch": 0.23130018518654066, | |
| "grad_norm": 7.381429195404053, | |
| "learning_rate": 0.00016334459144259298, | |
| "loss": 0.3783, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 0.2327641104092403, | |
| "grad_norm": 9.803789138793945, | |
| "learning_rate": 0.0001630335939492328, | |
| "loss": 0.3828, | |
| "step": 39750 | |
| }, | |
| { | |
| "epoch": 0.23422803563193992, | |
| "grad_norm": 7.722465991973877, | |
| "learning_rate": 0.00016272259645587259, | |
| "loss": 0.3764, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.23569196085463953, | |
| "grad_norm": 8.471487998962402, | |
| "learning_rate": 0.00016241159896251237, | |
| "loss": 0.3879, | |
| "step": 40250 | |
| }, | |
| { | |
| "epoch": 0.23715588607733917, | |
| "grad_norm": 9.46483039855957, | |
| "learning_rate": 0.00016210060146915216, | |
| "loss": 0.3772, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 0.2386198113000388, | |
| "grad_norm": 11.850425720214844, | |
| "learning_rate": 0.00016178960397579198, | |
| "loss": 0.3688, | |
| "step": 40750 | |
| }, | |
| { | |
| "epoch": 0.24008373652273843, | |
| "grad_norm": 7.718139171600342, | |
| "learning_rate": 0.00016147860648243177, | |
| "loss": 0.3728, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 0.24154766174543804, | |
| "grad_norm": 7.039102077484131, | |
| "learning_rate": 0.00016116760898907156, | |
| "loss": 0.3718, | |
| "step": 41250 | |
| }, | |
| { | |
| "epoch": 0.24301158696813766, | |
| "grad_norm": 6.891547679901123, | |
| "learning_rate": 0.00016085661149571137, | |
| "loss": 0.3713, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 0.2444755121908373, | |
| "grad_norm": 8.54554271697998, | |
| "learning_rate": 0.00016054561400235116, | |
| "loss": 0.3818, | |
| "step": 41750 | |
| }, | |
| { | |
| "epoch": 0.2459394374135369, | |
| "grad_norm": 6.554268836975098, | |
| "learning_rate": 0.00016023461650899095, | |
| "loss": 0.3706, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 0.24740336263623655, | |
| "grad_norm": 6.389885902404785, | |
| "learning_rate": 0.00015992361901563074, | |
| "loss": 0.3577, | |
| "step": 42250 | |
| }, | |
| { | |
| "epoch": 0.24886728785893616, | |
| "grad_norm": 6.833805561065674, | |
| "learning_rate": 0.00015961262152227056, | |
| "loss": 0.3722, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 0.2503312130816358, | |
| "grad_norm": 9.135841369628906, | |
| "learning_rate": 0.00015930162402891034, | |
| "loss": 0.3747, | |
| "step": 42750 | |
| }, | |
| { | |
| "epoch": 0.2517951383043354, | |
| "grad_norm": 7.466910362243652, | |
| "learning_rate": 0.00015899187052552358, | |
| "loss": 0.378, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 0.25325906352703503, | |
| "grad_norm": 14.597432136535645, | |
| "learning_rate": 0.00015868087303216337, | |
| "loss": 0.3743, | |
| "step": 43250 | |
| }, | |
| { | |
| "epoch": 0.25472298874973465, | |
| "grad_norm": 6.523279190063477, | |
| "learning_rate": 0.00015836987553880316, | |
| "loss": 0.3728, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 0.25618691397243426, | |
| "grad_norm": 5.352029800415039, | |
| "learning_rate": 0.00015805887804544298, | |
| "loss": 0.367, | |
| "step": 43750 | |
| }, | |
| { | |
| "epoch": 0.25765083919513393, | |
| "grad_norm": 8.408788681030273, | |
| "learning_rate": 0.00015774788055208277, | |
| "loss": 0.3694, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 0.25911476441783354, | |
| "grad_norm": 7.64408016204834, | |
| "learning_rate": 0.00015743688305872256, | |
| "loss": 0.3664, | |
| "step": 44250 | |
| }, | |
| { | |
| "epoch": 0.26057868964053316, | |
| "grad_norm": 4.888110637664795, | |
| "learning_rate": 0.00015712588556536234, | |
| "loss": 0.3637, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 0.26204261486323277, | |
| "grad_norm": 5.068843841552734, | |
| "learning_rate": 0.00015681488807200216, | |
| "loss": 0.369, | |
| "step": 44750 | |
| }, | |
| { | |
| "epoch": 0.2635065400859324, | |
| "grad_norm": 6.427637577056885, | |
| "learning_rate": 0.00015650389057864195, | |
| "loss": 0.3788, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 0.26497046530863205, | |
| "grad_norm": 8.00766658782959, | |
| "learning_rate": 0.00015619289308528174, | |
| "loss": 0.3638, | |
| "step": 45250 | |
| }, | |
| { | |
| "epoch": 0.26643439053133167, | |
| "grad_norm": 8.729680061340332, | |
| "learning_rate": 0.00015588189559192155, | |
| "loss": 0.3736, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 0.2678983157540313, | |
| "grad_norm": 10.317773818969727, | |
| "learning_rate": 0.00015557089809856134, | |
| "loss": 0.3618, | |
| "step": 45750 | |
| }, | |
| { | |
| "epoch": 0.2693622409767309, | |
| "grad_norm": 7.715869903564453, | |
| "learning_rate": 0.00015525990060520113, | |
| "loss": 0.3741, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 0.2708261661994305, | |
| "grad_norm": 5.711330890655518, | |
| "learning_rate": 0.00015494890311184092, | |
| "loss": 0.3745, | |
| "step": 46250 | |
| }, | |
| { | |
| "epoch": 0.2722900914221302, | |
| "grad_norm": 9.835432052612305, | |
| "learning_rate": 0.00015463790561848074, | |
| "loss": 0.3693, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 0.2737540166448298, | |
| "grad_norm": 6.019217014312744, | |
| "learning_rate": 0.00015432815211509395, | |
| "loss": 0.3674, | |
| "step": 46750 | |
| }, | |
| { | |
| "epoch": 0.2752179418675294, | |
| "grad_norm": 7.813283443450928, | |
| "learning_rate": 0.00015401715462173376, | |
| "loss": 0.3674, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 0.276681867090229, | |
| "grad_norm": 7.319979190826416, | |
| "learning_rate": 0.00015370615712837355, | |
| "loss": 0.3675, | |
| "step": 47250 | |
| }, | |
| { | |
| "epoch": 0.27814579231292863, | |
| "grad_norm": 8.74886703491211, | |
| "learning_rate": 0.00015339515963501334, | |
| "loss": 0.3633, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 0.2796097175356283, | |
| "grad_norm": 9.456360816955566, | |
| "learning_rate": 0.00015308416214165316, | |
| "loss": 0.379, | |
| "step": 47750 | |
| }, | |
| { | |
| "epoch": 0.2810736427583279, | |
| "grad_norm": 10.024221420288086, | |
| "learning_rate": 0.00015277316464829295, | |
| "loss": 0.375, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 0.2825375679810275, | |
| "grad_norm": 6.477073669433594, | |
| "learning_rate": 0.00015246216715493274, | |
| "loss": 0.3634, | |
| "step": 48250 | |
| }, | |
| { | |
| "epoch": 0.28400149320372714, | |
| "grad_norm": 8.587589263916016, | |
| "learning_rate": 0.00015215116966157255, | |
| "loss": 0.3693, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 0.28546541842642675, | |
| "grad_norm": 10.675822257995605, | |
| "learning_rate": 0.00015184017216821234, | |
| "loss": 0.3668, | |
| "step": 48750 | |
| }, | |
| { | |
| "epoch": 0.2869293436491264, | |
| "grad_norm": 10.77786636352539, | |
| "learning_rate": 0.00015153041866482558, | |
| "loss": 0.3711, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 0.28839326887182604, | |
| "grad_norm": 7.768797874450684, | |
| "learning_rate": 0.00015121942117146537, | |
| "loss": 0.3692, | |
| "step": 49250 | |
| }, | |
| { | |
| "epoch": 0.28985719409452565, | |
| "grad_norm": 6.11573600769043, | |
| "learning_rate": 0.00015090842367810516, | |
| "loss": 0.3618, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 0.29132111931722526, | |
| "grad_norm": 7.369346618652344, | |
| "learning_rate": 0.00015059742618474495, | |
| "loss": 0.365, | |
| "step": 49750 | |
| }, | |
| { | |
| "epoch": 0.2927850445399249, | |
| "grad_norm": 10.559876441955566, | |
| "learning_rate": 0.00015028642869138476, | |
| "loss": 0.369, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.29424896976262455, | |
| "grad_norm": 6.763681888580322, | |
| "learning_rate": 0.00014997543119802455, | |
| "loss": 0.3723, | |
| "step": 50250 | |
| }, | |
| { | |
| "epoch": 0.29571289498532416, | |
| "grad_norm": 14.075911521911621, | |
| "learning_rate": 0.00014966443370466434, | |
| "loss": 0.3656, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 0.2971768202080238, | |
| "grad_norm": 7.817617893218994, | |
| "learning_rate": 0.00014935343621130416, | |
| "loss": 0.3745, | |
| "step": 50750 | |
| }, | |
| { | |
| "epoch": 0.2986407454307234, | |
| "grad_norm": 5.018287181854248, | |
| "learning_rate": 0.00014904243871794395, | |
| "loss": 0.3664, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 0.300104670653423, | |
| "grad_norm": 9.846301078796387, | |
| "learning_rate": 0.00014873144122458373, | |
| "loss": 0.3644, | |
| "step": 51250 | |
| }, | |
| { | |
| "epoch": 0.30156859587612267, | |
| "grad_norm": 8.65786361694336, | |
| "learning_rate": 0.00014842044373122352, | |
| "loss": 0.3698, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 0.3030325210988223, | |
| "grad_norm": 6.303979873657227, | |
| "learning_rate": 0.00014810944623786334, | |
| "loss": 0.3707, | |
| "step": 51750 | |
| }, | |
| { | |
| "epoch": 0.3044964463215219, | |
| "grad_norm": 39.32520294189453, | |
| "learning_rate": 0.00014779844874450313, | |
| "loss": 0.3617, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 0.3059603715442215, | |
| "grad_norm": 6.535865306854248, | |
| "learning_rate": 0.00014748869524111637, | |
| "loss": 0.3642, | |
| "step": 52250 | |
| }, | |
| { | |
| "epoch": 0.3074242967669211, | |
| "grad_norm": 6.031300067901611, | |
| "learning_rate": 0.00014717769774775616, | |
| "loss": 0.363, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 0.3088882219896208, | |
| "grad_norm": 7.255093097686768, | |
| "learning_rate": 0.00014686670025439595, | |
| "loss": 0.3594, | |
| "step": 52750 | |
| }, | |
| { | |
| "epoch": 0.3103521472123204, | |
| "grad_norm": 7.491271018981934, | |
| "learning_rate": 0.00014655570276103576, | |
| "loss": 0.3697, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 0.31181607243502, | |
| "grad_norm": 8.154767036437988, | |
| "learning_rate": 0.00014624470526767555, | |
| "loss": 0.3667, | |
| "step": 53250 | |
| }, | |
| { | |
| "epoch": 0.31327999765771963, | |
| "grad_norm": 7.7836384773254395, | |
| "learning_rate": 0.00014593370777431534, | |
| "loss": 0.3756, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 0.31474392288041925, | |
| "grad_norm": 7.439420223236084, | |
| "learning_rate": 0.00014562271028095513, | |
| "loss": 0.3734, | |
| "step": 53750 | |
| }, | |
| { | |
| "epoch": 0.3162078481031189, | |
| "grad_norm": 7.654810428619385, | |
| "learning_rate": 0.00014531171278759494, | |
| "loss": 0.3689, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 0.31767177332581853, | |
| "grad_norm": 4.918389320373535, | |
| "learning_rate": 0.00014500195928420816, | |
| "loss": 0.3688, | |
| "step": 54250 | |
| }, | |
| { | |
| "epoch": 0.31913569854851814, | |
| "grad_norm": 6.2310895919799805, | |
| "learning_rate": 0.00014469096179084797, | |
| "loss": 0.3711, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 0.32059962377121776, | |
| "grad_norm": 7.458713054656982, | |
| "learning_rate": 0.00014437996429748776, | |
| "loss": 0.3614, | |
| "step": 54750 | |
| }, | |
| { | |
| "epoch": 0.32206354899391737, | |
| "grad_norm": 6.790125370025635, | |
| "learning_rate": 0.00014406896680412755, | |
| "loss": 0.3635, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 0.32206354899391737, | |
| "eval_accuracy": 0.8905084935576763, | |
| "eval_loss": 0.362331748008728, | |
| "eval_runtime": 11551.2138, | |
| "eval_samples_per_second": 210.262, | |
| "eval_steps_per_second": 6.571, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 0.32352747421661704, | |
| "grad_norm": 7.128218650817871, | |
| "learning_rate": 0.00014375796931076737, | |
| "loss": 0.357, | |
| "step": 55250 | |
| }, | |
| { | |
| "epoch": 0.32499139943931665, | |
| "grad_norm": 4.943136692047119, | |
| "learning_rate": 0.00014344697181740715, | |
| "loss": 0.3576, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 0.32645532466201627, | |
| "grad_norm": 7.633016109466553, | |
| "learning_rate": 0.00014313597432404694, | |
| "loss": 0.3655, | |
| "step": 55750 | |
| }, | |
| { | |
| "epoch": 0.3279192498847159, | |
| "grad_norm": 9.49149227142334, | |
| "learning_rate": 0.00014282497683068673, | |
| "loss": 0.3687, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 0.3293831751074155, | |
| "grad_norm": 7.4215521812438965, | |
| "learning_rate": 0.00014251397933732655, | |
| "loss": 0.3705, | |
| "step": 56250 | |
| }, | |
| { | |
| "epoch": 0.33084710033011516, | |
| "grad_norm": 5.638499736785889, | |
| "learning_rate": 0.00014220298184396634, | |
| "loss": 0.3709, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 0.3323110255528148, | |
| "grad_norm": 9.440450668334961, | |
| "learning_rate": 0.00014189198435060613, | |
| "loss": 0.35, | |
| "step": 56750 | |
| }, | |
| { | |
| "epoch": 0.3337749507755144, | |
| "grad_norm": 7.706991195678711, | |
| "learning_rate": 0.00014158098685724594, | |
| "loss": 0.3601, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 0.335238875998214, | |
| "grad_norm": 8.154605865478516, | |
| "learning_rate": 0.00014126998936388573, | |
| "loss": 0.3625, | |
| "step": 57250 | |
| }, | |
| { | |
| "epoch": 0.3367028012209136, | |
| "grad_norm": 7.608438491821289, | |
| "learning_rate": 0.00014095899187052552, | |
| "loss": 0.3588, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 0.3381667264436133, | |
| "grad_norm": 5.466573715209961, | |
| "learning_rate": 0.00014064799437716534, | |
| "loss": 0.3528, | |
| "step": 57750 | |
| }, | |
| { | |
| "epoch": 0.3396306516663129, | |
| "grad_norm": 7.514803409576416, | |
| "learning_rate": 0.00014033699688380512, | |
| "loss": 0.3624, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 0.3410945768890125, | |
| "grad_norm": 4.846391677856445, | |
| "learning_rate": 0.00014002599939044491, | |
| "loss": 0.3525, | |
| "step": 58250 | |
| }, | |
| { | |
| "epoch": 0.3425585021117121, | |
| "grad_norm": 6.116271018981934, | |
| "learning_rate": 0.0001397150018970847, | |
| "loss": 0.3556, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 0.34402242733441174, | |
| "grad_norm": 7.234938621520996, | |
| "learning_rate": 0.00013940400440372452, | |
| "loss": 0.3723, | |
| "step": 58750 | |
| }, | |
| { | |
| "epoch": 0.3454863525571114, | |
| "grad_norm": 8.690266609191895, | |
| "learning_rate": 0.0001390930069103643, | |
| "loss": 0.3671, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 0.346950277779811, | |
| "grad_norm": 5.558066368103027, | |
| "learning_rate": 0.0001387820094170041, | |
| "loss": 0.3563, | |
| "step": 59250 | |
| }, | |
| { | |
| "epoch": 0.34841420300251064, | |
| "grad_norm": 5.277857303619385, | |
| "learning_rate": 0.0001384710119236439, | |
| "loss": 0.3633, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 0.34987812822521025, | |
| "grad_norm": 4.810859680175781, | |
| "learning_rate": 0.00013816125842025712, | |
| "loss": 0.3615, | |
| "step": 59750 | |
| }, | |
| { | |
| "epoch": 0.35134205344790986, | |
| "grad_norm": 6.860721111297607, | |
| "learning_rate": 0.00013785026092689694, | |
| "loss": 0.3561, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.35280597867060953, | |
| "grad_norm": 6.673612117767334, | |
| "learning_rate": 0.00013753926343353673, | |
| "loss": 0.3513, | |
| "step": 60250 | |
| }, | |
| { | |
| "epoch": 0.35426990389330915, | |
| "grad_norm": 6.9296956062316895, | |
| "learning_rate": 0.00013722826594017652, | |
| "loss": 0.3563, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 0.35573382911600876, | |
| "grad_norm": 6.235531806945801, | |
| "learning_rate": 0.0001369172684468163, | |
| "loss": 0.3586, | |
| "step": 60750 | |
| }, | |
| { | |
| "epoch": 0.3571977543387084, | |
| "grad_norm": 6.549998760223389, | |
| "learning_rate": 0.00013660627095345612, | |
| "loss": 0.3572, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 0.358661679561408, | |
| "grad_norm": 6.800797939300537, | |
| "learning_rate": 0.0001362952734600959, | |
| "loss": 0.3687, | |
| "step": 61250 | |
| }, | |
| { | |
| "epoch": 0.3601256047841076, | |
| "grad_norm": 5.545276641845703, | |
| "learning_rate": 0.0001359842759667357, | |
| "loss": 0.3539, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 0.36158953000680727, | |
| "grad_norm": 8.63070011138916, | |
| "learning_rate": 0.00013567327847337552, | |
| "loss": 0.3605, | |
| "step": 61750 | |
| }, | |
| { | |
| "epoch": 0.3630534552295069, | |
| "grad_norm": 5.199543476104736, | |
| "learning_rate": 0.0001353622809800153, | |
| "loss": 0.3559, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 0.3645173804522065, | |
| "grad_norm": 27.297420501708984, | |
| "learning_rate": 0.0001350512834866551, | |
| "loss": 0.3676, | |
| "step": 62250 | |
| }, | |
| { | |
| "epoch": 0.3659813056749061, | |
| "grad_norm": 8.235854148864746, | |
| "learning_rate": 0.00013474152998326833, | |
| "loss": 0.3583, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 0.3674452308976057, | |
| "grad_norm": 6.224372386932373, | |
| "learning_rate": 0.00013443053248990812, | |
| "loss": 0.3623, | |
| "step": 62750 | |
| }, | |
| { | |
| "epoch": 0.3689091561203054, | |
| "grad_norm": 8.013957977294922, | |
| "learning_rate": 0.0001341195349965479, | |
| "loss": 0.3619, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 0.370373081343005, | |
| "grad_norm": 6.442314147949219, | |
| "learning_rate": 0.00013380853750318773, | |
| "loss": 0.3586, | |
| "step": 63250 | |
| }, | |
| { | |
| "epoch": 0.3718370065657046, | |
| "grad_norm": 6.883063793182373, | |
| "learning_rate": 0.00013349754000982752, | |
| "loss": 0.3635, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 0.37330093178840423, | |
| "grad_norm": 5.502562999725342, | |
| "learning_rate": 0.0001331865425164673, | |
| "loss": 0.3525, | |
| "step": 63750 | |
| }, | |
| { | |
| "epoch": 0.37476485701110385, | |
| "grad_norm": 6.841543197631836, | |
| "learning_rate": 0.00013287554502310712, | |
| "loss": 0.3564, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 0.3762287822338035, | |
| "grad_norm": 6.850903034210205, | |
| "learning_rate": 0.0001325645475297469, | |
| "loss": 0.3549, | |
| "step": 64250 | |
| }, | |
| { | |
| "epoch": 0.37769270745650313, | |
| "grad_norm": 5.823826313018799, | |
| "learning_rate": 0.00013225479402636015, | |
| "loss": 0.3488, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 0.37915663267920274, | |
| "grad_norm": 9.849250793457031, | |
| "learning_rate": 0.00013194379653299997, | |
| "loss": 0.3526, | |
| "step": 64750 | |
| }, | |
| { | |
| "epoch": 0.38062055790190236, | |
| "grad_norm": 7.8498992919921875, | |
| "learning_rate": 0.00013163279903963975, | |
| "loss": 0.3596, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 0.38208448312460197, | |
| "grad_norm": 7.845436096191406, | |
| "learning_rate": 0.00013132180154627954, | |
| "loss": 0.3497, | |
| "step": 65250 | |
| }, | |
| { | |
| "epoch": 0.38354840834730164, | |
| "grad_norm": 10.533845901489258, | |
| "learning_rate": 0.00013101080405291933, | |
| "loss": 0.3523, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 0.38501233357000125, | |
| "grad_norm": 9.09399127960205, | |
| "learning_rate": 0.00013069980655955912, | |
| "loss": 0.347, | |
| "step": 65750 | |
| }, | |
| { | |
| "epoch": 0.38647625879270087, | |
| "grad_norm": 7.205333232879639, | |
| "learning_rate": 0.00013038880906619894, | |
| "loss": 0.355, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 0.3879401840154005, | |
| "grad_norm": 6.770249843597412, | |
| "learning_rate": 0.00013007781157283873, | |
| "loss": 0.3549, | |
| "step": 66250 | |
| }, | |
| { | |
| "epoch": 0.3894041092381001, | |
| "grad_norm": 8.14482593536377, | |
| "learning_rate": 0.00012976681407947851, | |
| "loss": 0.3537, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 0.39086803446079976, | |
| "grad_norm": 5.998184680938721, | |
| "learning_rate": 0.0001294558165861183, | |
| "loss": 0.3562, | |
| "step": 66750 | |
| }, | |
| { | |
| "epoch": 0.3923319596834994, | |
| "grad_norm": 5.583696365356445, | |
| "learning_rate": 0.00012914481909275812, | |
| "loss": 0.3499, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 0.393795884906199, | |
| "grad_norm": 6.899207592010498, | |
| "learning_rate": 0.0001288338215993979, | |
| "loss": 0.3506, | |
| "step": 67250 | |
| }, | |
| { | |
| "epoch": 0.3952598101288986, | |
| "grad_norm": 6.205395221710205, | |
| "learning_rate": 0.0001285228241060377, | |
| "loss": 0.3512, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 0.3967237353515982, | |
| "grad_norm": 9.125551223754883, | |
| "learning_rate": 0.0001282118266126775, | |
| "loss": 0.3585, | |
| "step": 67750 | |
| }, | |
| { | |
| "epoch": 0.3981876605742979, | |
| "grad_norm": 6.943772792816162, | |
| "learning_rate": 0.0001279008291193173, | |
| "loss": 0.362, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 0.3996515857969975, | |
| "grad_norm": 6.106304168701172, | |
| "learning_rate": 0.0001275898316259571, | |
| "loss": 0.3545, | |
| "step": 68250 | |
| }, | |
| { | |
| "epoch": 0.4011155110196971, | |
| "grad_norm": 6.197811126708984, | |
| "learning_rate": 0.00012728007812257036, | |
| "loss": 0.3524, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 0.4025794362423967, | |
| "grad_norm": 8.07652759552002, | |
| "learning_rate": 0.00012696908062921015, | |
| "loss": 0.3467, | |
| "step": 68750 | |
| }, | |
| { | |
| "epoch": 0.40404336146509634, | |
| "grad_norm": 7.444363117218018, | |
| "learning_rate": 0.00012665808313584994, | |
| "loss": 0.3541, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 0.405507286687796, | |
| "grad_norm": 6.2395782470703125, | |
| "learning_rate": 0.00012634708564248972, | |
| "loss": 0.3488, | |
| "step": 69250 | |
| }, | |
| { | |
| "epoch": 0.4069712119104956, | |
| "grad_norm": 7.489956378936768, | |
| "learning_rate": 0.00012603608814912954, | |
| "loss": 0.3595, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 0.40843513713319524, | |
| "grad_norm": 6.762283802032471, | |
| "learning_rate": 0.00012572509065576933, | |
| "loss": 0.3555, | |
| "step": 69750 | |
| }, | |
| { | |
| "epoch": 0.40989906235589485, | |
| "grad_norm": 10.423229217529297, | |
| "learning_rate": 0.00012541409316240912, | |
| "loss": 0.3474, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 0.41136298757859446, | |
| "grad_norm": 7.812709331512451, | |
| "learning_rate": 0.0001251030956690489, | |
| "loss": 0.3588, | |
| "step": 70250 | |
| }, | |
| { | |
| "epoch": 0.41282691280129413, | |
| "grad_norm": 8.506246566772461, | |
| "learning_rate": 0.00012479334216566215, | |
| "loss": 0.3473, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 0.41429083802399375, | |
| "grad_norm": 6.0005784034729, | |
| "learning_rate": 0.00012448234467230196, | |
| "loss": 0.3423, | |
| "step": 70750 | |
| }, | |
| { | |
| "epoch": 0.41575476324669336, | |
| "grad_norm": 7.6112494468688965, | |
| "learning_rate": 0.00012417134717894175, | |
| "loss": 0.3469, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 0.417218688469393, | |
| "grad_norm": 6.460068225860596, | |
| "learning_rate": 0.00012386034968558154, | |
| "loss": 0.3514, | |
| "step": 71250 | |
| }, | |
| { | |
| "epoch": 0.4186826136920926, | |
| "grad_norm": 25.509037017822266, | |
| "learning_rate": 0.00012354935219222136, | |
| "loss": 0.3538, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 0.42014653891479226, | |
| "grad_norm": 5.778562068939209, | |
| "learning_rate": 0.00012323835469886114, | |
| "loss": 0.3409, | |
| "step": 71750 | |
| }, | |
| { | |
| "epoch": 0.42161046413749187, | |
| "grad_norm": 10.19543170928955, | |
| "learning_rate": 0.00012292735720550093, | |
| "loss": 0.3487, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 0.4230743893601915, | |
| "grad_norm": 7.6341633796691895, | |
| "learning_rate": 0.00012261635971214072, | |
| "loss": 0.3477, | |
| "step": 72250 | |
| }, | |
| { | |
| "epoch": 0.4245383145828911, | |
| "grad_norm": 5.656210422515869, | |
| "learning_rate": 0.00012230536221878054, | |
| "loss": 0.353, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 0.4260022398055907, | |
| "grad_norm": 7.81094217300415, | |
| "learning_rate": 0.00012199436472542031, | |
| "loss": 0.3589, | |
| "step": 72750 | |
| }, | |
| { | |
| "epoch": 0.4274661650282904, | |
| "grad_norm": 5.924116611480713, | |
| "learning_rate": 0.0001216833672320601, | |
| "loss": 0.346, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 0.42893009025099, | |
| "grad_norm": 6.293444633483887, | |
| "learning_rate": 0.00012137236973869992, | |
| "loss": 0.3496, | |
| "step": 73250 | |
| }, | |
| { | |
| "epoch": 0.4303940154736896, | |
| "grad_norm": 9.766921997070312, | |
| "learning_rate": 0.00012106137224533971, | |
| "loss": 0.347, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 0.4318579406963892, | |
| "grad_norm": 5.998900890350342, | |
| "learning_rate": 0.0001207503747519795, | |
| "loss": 0.3465, | |
| "step": 73750 | |
| }, | |
| { | |
| "epoch": 0.43332186591908883, | |
| "grad_norm": 8.364704132080078, | |
| "learning_rate": 0.00012043937725861929, | |
| "loss": 0.3429, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 0.4347857911417885, | |
| "grad_norm": 5.508989334106445, | |
| "learning_rate": 0.0001201283797652591, | |
| "loss": 0.355, | |
| "step": 74250 | |
| }, | |
| { | |
| "epoch": 0.4362497163644881, | |
| "grad_norm": 6.357595443725586, | |
| "learning_rate": 0.00011981738227189889, | |
| "loss": 0.3504, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 0.43771364158718773, | |
| "grad_norm": 8.691376686096191, | |
| "learning_rate": 0.00011950762876851213, | |
| "loss": 0.3471, | |
| "step": 74750 | |
| }, | |
| { | |
| "epoch": 0.43917756680988734, | |
| "grad_norm": 11.246256828308105, | |
| "learning_rate": 0.00011919663127515193, | |
| "loss": 0.3487, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 0.44064149203258696, | |
| "grad_norm": 6.3526811599731445, | |
| "learning_rate": 0.00011888563378179172, | |
| "loss": 0.3414, | |
| "step": 75250 | |
| }, | |
| { | |
| "epoch": 0.4421054172552866, | |
| "grad_norm": 9.6268310546875, | |
| "learning_rate": 0.00011857463628843152, | |
| "loss": 0.3457, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 0.44356934247798624, | |
| "grad_norm": 8.093045234680176, | |
| "learning_rate": 0.00011826363879507131, | |
| "loss": 0.3515, | |
| "step": 75750 | |
| }, | |
| { | |
| "epoch": 0.44503326770068585, | |
| "grad_norm": 7.497385025024414, | |
| "learning_rate": 0.00011795264130171111, | |
| "loss": 0.3361, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 0.44649719292338547, | |
| "grad_norm": 8.374622344970703, | |
| "learning_rate": 0.00011764164380835092, | |
| "loss": 0.3552, | |
| "step": 76250 | |
| }, | |
| { | |
| "epoch": 0.4479611181460851, | |
| "grad_norm": 8.583603858947754, | |
| "learning_rate": 0.0001173306463149907, | |
| "loss": 0.3395, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 0.44942504336878475, | |
| "grad_norm": 5.933279991149902, | |
| "learning_rate": 0.0001170196488216305, | |
| "loss": 0.3539, | |
| "step": 76750 | |
| }, | |
| { | |
| "epoch": 0.45088896859148436, | |
| "grad_norm": 7.1400556564331055, | |
| "learning_rate": 0.00011670989531824375, | |
| "loss": 0.3556, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 0.452352893814184, | |
| "grad_norm": 6.4177374839782715, | |
| "learning_rate": 0.00011639889782488354, | |
| "loss": 0.34, | |
| "step": 77250 | |
| }, | |
| { | |
| "epoch": 0.4538168190368836, | |
| "grad_norm": 8.248872756958008, | |
| "learning_rate": 0.00011608790033152333, | |
| "loss": 0.3454, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 0.4552807442595832, | |
| "grad_norm": 6.789691925048828, | |
| "learning_rate": 0.00011577690283816314, | |
| "loss": 0.3506, | |
| "step": 77750 | |
| }, | |
| { | |
| "epoch": 0.4567446694822829, | |
| "grad_norm": 7.519604206085205, | |
| "learning_rate": 0.00011546590534480293, | |
| "loss": 0.3438, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 0.4582085947049825, | |
| "grad_norm": 11.287620544433594, | |
| "learning_rate": 0.00011515490785144272, | |
| "loss": 0.3536, | |
| "step": 78250 | |
| }, | |
| { | |
| "epoch": 0.4596725199276821, | |
| "grad_norm": 5.6864914894104, | |
| "learning_rate": 0.00011484391035808254, | |
| "loss": 0.348, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 0.4611364451503817, | |
| "grad_norm": 7.405890941619873, | |
| "learning_rate": 0.00011453291286472232, | |
| "loss": 0.3395, | |
| "step": 78750 | |
| }, | |
| { | |
| "epoch": 0.4626003703730813, | |
| "grad_norm": 5.379487991333008, | |
| "learning_rate": 0.00011422315936133556, | |
| "loss": 0.3463, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 0.46406429559578094, | |
| "grad_norm": 7.769617080688477, | |
| "learning_rate": 0.00011391216186797535, | |
| "loss": 0.3458, | |
| "step": 79250 | |
| }, | |
| { | |
| "epoch": 0.4655282208184806, | |
| "grad_norm": 9.26171875, | |
| "learning_rate": 0.00011360116437461514, | |
| "loss": 0.3394, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 0.4669921460411802, | |
| "grad_norm": 9.037941932678223, | |
| "learning_rate": 0.00011329016688125493, | |
| "loss": 0.349, | |
| "step": 79750 | |
| }, | |
| { | |
| "epoch": 0.46845607126387984, | |
| "grad_norm": 8.776792526245117, | |
| "learning_rate": 0.00011297916938789475, | |
| "loss": 0.3384, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 0.46991999648657945, | |
| "grad_norm": 6.737313270568848, | |
| "learning_rate": 0.00011266817189453454, | |
| "loss": 0.3472, | |
| "step": 80250 | |
| }, | |
| { | |
| "epoch": 0.47138392170927906, | |
| "grad_norm": 7.2374114990234375, | |
| "learning_rate": 0.00011235717440117432, | |
| "loss": 0.3434, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 0.47284784693197873, | |
| "grad_norm": 6.939677715301514, | |
| "learning_rate": 0.00011204617690781414, | |
| "loss": 0.3451, | |
| "step": 80750 | |
| }, | |
| { | |
| "epoch": 0.47431177215467835, | |
| "grad_norm": 4.702803611755371, | |
| "learning_rate": 0.00011173517941445393, | |
| "loss": 0.3508, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 0.47577569737737796, | |
| "grad_norm": 7.359582901000977, | |
| "learning_rate": 0.00011142418192109372, | |
| "loss": 0.3415, | |
| "step": 81250 | |
| }, | |
| { | |
| "epoch": 0.4772396226000776, | |
| "grad_norm": 8.404651641845703, | |
| "learning_rate": 0.00011111442841770696, | |
| "loss": 0.3438, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 0.4787035478227772, | |
| "grad_norm": 6.176925182342529, | |
| "learning_rate": 0.00011080343092434675, | |
| "loss": 0.3484, | |
| "step": 81750 | |
| }, | |
| { | |
| "epoch": 0.48016747304547686, | |
| "grad_norm": 8.614276885986328, | |
| "learning_rate": 0.00011049243343098655, | |
| "loss": 0.3525, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 0.48163139826817647, | |
| "grad_norm": 5.756929874420166, | |
| "learning_rate": 0.00011018143593762635, | |
| "loss": 0.3432, | |
| "step": 82250 | |
| }, | |
| { | |
| "epoch": 0.4830953234908761, | |
| "grad_norm": 7.686267852783203, | |
| "learning_rate": 0.00010987043844426614, | |
| "loss": 0.3508, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 0.4845592487135757, | |
| "grad_norm": 6.590146541595459, | |
| "learning_rate": 0.00010955944095090593, | |
| "loss": 0.3357, | |
| "step": 82750 | |
| }, | |
| { | |
| "epoch": 0.4860231739362753, | |
| "grad_norm": 7.363981246948242, | |
| "learning_rate": 0.00010924968744751918, | |
| "loss": 0.3469, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 0.487487099158975, | |
| "grad_norm": 5.942411422729492, | |
| "learning_rate": 0.00010893868995415897, | |
| "loss": 0.3464, | |
| "step": 83250 | |
| }, | |
| { | |
| "epoch": 0.4889510243816746, | |
| "grad_norm": 8.531744003295898, | |
| "learning_rate": 0.00010862769246079879, | |
| "loss": 0.3349, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 0.4904149496043742, | |
| "grad_norm": 20.821125030517578, | |
| "learning_rate": 0.00010831669496743858, | |
| "loss": 0.3434, | |
| "step": 83750 | |
| }, | |
| { | |
| "epoch": 0.4918788748270738, | |
| "grad_norm": 9.569067001342773, | |
| "learning_rate": 0.00010800569747407836, | |
| "loss": 0.3421, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 0.49334280004977343, | |
| "grad_norm": 7.6851725578308105, | |
| "learning_rate": 0.00010769469998071815, | |
| "loss": 0.3407, | |
| "step": 84250 | |
| }, | |
| { | |
| "epoch": 0.4948067252724731, | |
| "grad_norm": 9.591890335083008, | |
| "learning_rate": 0.00010738370248735797, | |
| "loss": 0.347, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 0.4962706504951727, | |
| "grad_norm": 5.16259765625, | |
| "learning_rate": 0.00010707270499399776, | |
| "loss": 0.3383, | |
| "step": 84750 | |
| }, | |
| { | |
| "epoch": 0.49773457571787233, | |
| "grad_norm": 4.6993794441223145, | |
| "learning_rate": 0.00010676170750063755, | |
| "loss": 0.3392, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 0.49919850094057194, | |
| "grad_norm": 6.331507682800293, | |
| "learning_rate": 0.00010645071000727735, | |
| "loss": 0.351, | |
| "step": 85250 | |
| }, | |
| { | |
| "epoch": 0.5006624261632716, | |
| "grad_norm": 7.329137325286865, | |
| "learning_rate": 0.00010613971251391714, | |
| "loss": 0.3486, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 0.5021263513859712, | |
| "grad_norm": 6.907947540283203, | |
| "learning_rate": 0.00010582871502055694, | |
| "loss": 0.3443, | |
| "step": 85750 | |
| }, | |
| { | |
| "epoch": 0.5035902766086708, | |
| "grad_norm": 4.780885696411133, | |
| "learning_rate": 0.00010551771752719674, | |
| "loss": 0.3401, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 0.5050542018313705, | |
| "grad_norm": 9.042526245117188, | |
| "learning_rate": 0.00010520672003383653, | |
| "loss": 0.3402, | |
| "step": 86250 | |
| }, | |
| { | |
| "epoch": 0.5065181270540701, | |
| "grad_norm": 5.397533416748047, | |
| "learning_rate": 0.00010489572254047632, | |
| "loss": 0.3392, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 0.5079820522767697, | |
| "grad_norm": 7.72251033782959, | |
| "learning_rate": 0.00010458472504711612, | |
| "loss": 0.3337, | |
| "step": 86750 | |
| }, | |
| { | |
| "epoch": 0.5094459774994693, | |
| "grad_norm": 7.379674434661865, | |
| "learning_rate": 0.00010427497154372936, | |
| "loss": 0.3457, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 0.510909902722169, | |
| "grad_norm": 7.123027801513672, | |
| "learning_rate": 0.00010396397405036915, | |
| "loss": 0.3311, | |
| "step": 87250 | |
| }, | |
| { | |
| "epoch": 0.5123738279448685, | |
| "grad_norm": 6.388451099395752, | |
| "learning_rate": 0.00010365297655700897, | |
| "loss": 0.3386, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 0.5138377531675682, | |
| "grad_norm": 8.933717727661133, | |
| "learning_rate": 0.00010334197906364876, | |
| "loss": 0.3377, | |
| "step": 87750 | |
| }, | |
| { | |
| "epoch": 0.5153016783902679, | |
| "grad_norm": 5.813757419586182, | |
| "learning_rate": 0.000103032225560262, | |
| "loss": 0.3368, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 0.5167656036129674, | |
| "grad_norm": 10.707741737365723, | |
| "learning_rate": 0.00010272122806690178, | |
| "loss": 0.3429, | |
| "step": 88250 | |
| }, | |
| { | |
| "epoch": 0.5182295288356671, | |
| "grad_norm": 7.433245658874512, | |
| "learning_rate": 0.00010241023057354157, | |
| "loss": 0.3457, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 0.5196934540583666, | |
| "grad_norm": 6.408331394195557, | |
| "learning_rate": 0.00010209923308018139, | |
| "loss": 0.3409, | |
| "step": 88750 | |
| }, | |
| { | |
| "epoch": 0.5211573792810663, | |
| "grad_norm": 7.5843987464904785, | |
| "learning_rate": 0.00010178823558682118, | |
| "loss": 0.3347, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 0.522621304503766, | |
| "grad_norm": 9.049858093261719, | |
| "learning_rate": 0.00010147723809346097, | |
| "loss": 0.3392, | |
| "step": 89250 | |
| }, | |
| { | |
| "epoch": 0.5240852297264655, | |
| "grad_norm": 8.207107543945312, | |
| "learning_rate": 0.00010116624060010076, | |
| "loss": 0.334, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 0.5255491549491652, | |
| "grad_norm": 6.511790752410889, | |
| "learning_rate": 0.00010085648709671401, | |
| "loss": 0.3462, | |
| "step": 89750 | |
| }, | |
| { | |
| "epoch": 0.5270130801718648, | |
| "grad_norm": 5.541443824768066, | |
| "learning_rate": 0.0001005454896033538, | |
| "loss": 0.3318, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 0.5284770053945644, | |
| "grad_norm": 6.216821670532227, | |
| "learning_rate": 0.0001002344921099936, | |
| "loss": 0.338, | |
| "step": 90250 | |
| }, | |
| { | |
| "epoch": 0.5299409306172641, | |
| "grad_norm": 5.138360977172852, | |
| "learning_rate": 9.992349461663339e-05, | |
| "loss": 0.3457, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 0.5314048558399637, | |
| "grad_norm": 8.401073455810547, | |
| "learning_rate": 9.961249712327319e-05, | |
| "loss": 0.3523, | |
| "step": 90750 | |
| }, | |
| { | |
| "epoch": 0.5328687810626633, | |
| "grad_norm": 8.749157905578613, | |
| "learning_rate": 9.930149962991298e-05, | |
| "loss": 0.3391, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 0.5343327062853629, | |
| "grad_norm": 7.809004783630371, | |
| "learning_rate": 9.899050213655278e-05, | |
| "loss": 0.3422, | |
| "step": 91250 | |
| }, | |
| { | |
| "epoch": 0.5357966315080626, | |
| "grad_norm": 7.649618148803711, | |
| "learning_rate": 9.867950464319257e-05, | |
| "loss": 0.3512, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 0.5372605567307622, | |
| "grad_norm": 8.770468711853027, | |
| "learning_rate": 9.836850714983237e-05, | |
| "loss": 0.3367, | |
| "step": 91750 | |
| }, | |
| { | |
| "epoch": 0.5387244819534618, | |
| "grad_norm": 8.32112979888916, | |
| "learning_rate": 9.805750965647216e-05, | |
| "loss": 0.3384, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 0.5401884071761615, | |
| "grad_norm": 9.602888107299805, | |
| "learning_rate": 9.774651216311197e-05, | |
| "loss": 0.3344, | |
| "step": 92250 | |
| }, | |
| { | |
| "epoch": 0.541652332398861, | |
| "grad_norm": 3.2295093536376953, | |
| "learning_rate": 9.743551466975177e-05, | |
| "loss": 0.3314, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 0.5431162576215607, | |
| "grad_norm": 5.456012725830078, | |
| "learning_rate": 9.712451717639156e-05, | |
| "loss": 0.3313, | |
| "step": 92750 | |
| }, | |
| { | |
| "epoch": 0.5445801828442604, | |
| "grad_norm": 7.777164936065674, | |
| "learning_rate": 9.681351968303136e-05, | |
| "loss": 0.3417, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 0.5460441080669599, | |
| "grad_norm": 10.10175895690918, | |
| "learning_rate": 9.650252218967115e-05, | |
| "loss": 0.3357, | |
| "step": 93250 | |
| }, | |
| { | |
| "epoch": 0.5475080332896596, | |
| "grad_norm": 8.296233177185059, | |
| "learning_rate": 9.619152469631095e-05, | |
| "loss": 0.3368, | |
| "step": 93500 | |
| }, | |
| { | |
| "epoch": 0.5489719585123591, | |
| "grad_norm": 5.55683708190918, | |
| "learning_rate": 9.588052720295075e-05, | |
| "loss": 0.3338, | |
| "step": 93750 | |
| }, | |
| { | |
| "epoch": 0.5504358837350588, | |
| "grad_norm": 5.92700719833374, | |
| "learning_rate": 9.556952970959054e-05, | |
| "loss": 0.3431, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 0.5518998089577585, | |
| "grad_norm": 5.411899089813232, | |
| "learning_rate": 9.525853221623034e-05, | |
| "loss": 0.3393, | |
| "step": 94250 | |
| }, | |
| { | |
| "epoch": 0.553363734180458, | |
| "grad_norm": 6.517271995544434, | |
| "learning_rate": 9.494753472287013e-05, | |
| "loss": 0.3332, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 0.5548276594031577, | |
| "grad_norm": 9.099715232849121, | |
| "learning_rate": 9.463653722950994e-05, | |
| "loss": 0.3343, | |
| "step": 94750 | |
| }, | |
| { | |
| "epoch": 0.5562915846258573, | |
| "grad_norm": 4.845067501068115, | |
| "learning_rate": 9.432553973614972e-05, | |
| "loss": 0.3344, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 0.5577555098485569, | |
| "grad_norm": 8.56153392791748, | |
| "learning_rate": 9.401454224278953e-05, | |
| "loss": 0.33, | |
| "step": 95250 | |
| }, | |
| { | |
| "epoch": 0.5592194350712566, | |
| "grad_norm": 7.1542439460754395, | |
| "learning_rate": 9.370354474942933e-05, | |
| "loss": 0.3186, | |
| "step": 95500 | |
| }, | |
| { | |
| "epoch": 0.5606833602939562, | |
| "grad_norm": 7.00217342376709, | |
| "learning_rate": 9.339254725606912e-05, | |
| "loss": 0.335, | |
| "step": 95750 | |
| }, | |
| { | |
| "epoch": 0.5621472855166558, | |
| "grad_norm": 7.365664482116699, | |
| "learning_rate": 9.308279375268236e-05, | |
| "loss": 0.3303, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 0.5636112107393554, | |
| "grad_norm": 8.063042640686035, | |
| "learning_rate": 9.277179625932215e-05, | |
| "loss": 0.3441, | |
| "step": 96250 | |
| }, | |
| { | |
| "epoch": 0.565075135962055, | |
| "grad_norm": 5.403791904449463, | |
| "learning_rate": 9.246079876596195e-05, | |
| "loss": 0.3318, | |
| "step": 96500 | |
| }, | |
| { | |
| "epoch": 0.5665390611847547, | |
| "grad_norm": 5.911950588226318, | |
| "learning_rate": 9.215104526257519e-05, | |
| "loss": 0.3327, | |
| "step": 96750 | |
| }, | |
| { | |
| "epoch": 0.5680029864074543, | |
| "grad_norm": 5.484018802642822, | |
| "learning_rate": 9.184004776921499e-05, | |
| "loss": 0.3384, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 0.569466911630154, | |
| "grad_norm": 4.785627365112305, | |
| "learning_rate": 9.152905027585478e-05, | |
| "loss": 0.3437, | |
| "step": 97250 | |
| }, | |
| { | |
| "epoch": 0.5709308368528535, | |
| "grad_norm": 7.17230749130249, | |
| "learning_rate": 9.121805278249458e-05, | |
| "loss": 0.3331, | |
| "step": 97500 | |
| }, | |
| { | |
| "epoch": 0.5723947620755532, | |
| "grad_norm": 7.777104377746582, | |
| "learning_rate": 9.090705528913437e-05, | |
| "loss": 0.3371, | |
| "step": 97750 | |
| }, | |
| { | |
| "epoch": 0.5738586872982528, | |
| "grad_norm": 6.8572001457214355, | |
| "learning_rate": 9.059605779577417e-05, | |
| "loss": 0.3397, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 0.5753226125209524, | |
| "grad_norm": 9.132293701171875, | |
| "learning_rate": 9.028506030241398e-05, | |
| "loss": 0.3421, | |
| "step": 98250 | |
| }, | |
| { | |
| "epoch": 0.5767865377436521, | |
| "grad_norm": 7.351444244384766, | |
| "learning_rate": 8.997406280905376e-05, | |
| "loss": 0.3315, | |
| "step": 98500 | |
| }, | |
| { | |
| "epoch": 0.5782504629663516, | |
| "grad_norm": 5.444695949554443, | |
| "learning_rate": 8.966306531569357e-05, | |
| "loss": 0.3313, | |
| "step": 98750 | |
| }, | |
| { | |
| "epoch": 0.5797143881890513, | |
| "grad_norm": 6.229501724243164, | |
| "learning_rate": 8.935206782233336e-05, | |
| "loss": 0.3321, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 0.581178313411751, | |
| "grad_norm": 4.431236743927002, | |
| "learning_rate": 8.904107032897316e-05, | |
| "loss": 0.3326, | |
| "step": 99250 | |
| }, | |
| { | |
| "epoch": 0.5826422386344505, | |
| "grad_norm": 4.78348445892334, | |
| "learning_rate": 8.873007283561296e-05, | |
| "loss": 0.3362, | |
| "step": 99500 | |
| }, | |
| { | |
| "epoch": 0.5841061638571502, | |
| "grad_norm": 5.964051723480225, | |
| "learning_rate": 8.841907534225275e-05, | |
| "loss": 0.3408, | |
| "step": 99750 | |
| }, | |
| { | |
| "epoch": 0.5855700890798498, | |
| "grad_norm": 5.310559272766113, | |
| "learning_rate": 8.810807784889255e-05, | |
| "loss": 0.3328, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 0.5870340143025494, | |
| "grad_norm": 4.985818862915039, | |
| "learning_rate": 8.779708035553234e-05, | |
| "loss": 0.337, | |
| "step": 100250 | |
| }, | |
| { | |
| "epoch": 0.5884979395252491, | |
| "grad_norm": 4.851356506347656, | |
| "learning_rate": 8.748608286217213e-05, | |
| "loss": 0.3314, | |
| "step": 100500 | |
| }, | |
| { | |
| "epoch": 0.5899618647479486, | |
| "grad_norm": 6.863201141357422, | |
| "learning_rate": 8.717508536881193e-05, | |
| "loss": 0.3231, | |
| "step": 100750 | |
| }, | |
| { | |
| "epoch": 0.5914257899706483, | |
| "grad_norm": 6.387337684631348, | |
| "learning_rate": 8.686533186542517e-05, | |
| "loss": 0.322, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 0.5928897151933479, | |
| "grad_norm": 7.897363662719727, | |
| "learning_rate": 8.655433437206496e-05, | |
| "loss": 0.3361, | |
| "step": 101250 | |
| }, | |
| { | |
| "epoch": 0.5943536404160475, | |
| "grad_norm": 5.876019477844238, | |
| "learning_rate": 8.624333687870476e-05, | |
| "loss": 0.3211, | |
| "step": 101500 | |
| }, | |
| { | |
| "epoch": 0.5958175656387472, | |
| "grad_norm": 4.175768852233887, | |
| "learning_rate": 8.593233938534457e-05, | |
| "loss": 0.3317, | |
| "step": 101750 | |
| }, | |
| { | |
| "epoch": 0.5972814908614468, | |
| "grad_norm": 6.496226787567139, | |
| "learning_rate": 8.562134189198435e-05, | |
| "loss": 0.3289, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 0.5987454160841464, | |
| "grad_norm": 7.092103004455566, | |
| "learning_rate": 8.531034439862416e-05, | |
| "loss": 0.3329, | |
| "step": 102250 | |
| }, | |
| { | |
| "epoch": 0.600209341306846, | |
| "grad_norm": 7.335963726043701, | |
| "learning_rate": 8.499934690526395e-05, | |
| "loss": 0.3305, | |
| "step": 102500 | |
| }, | |
| { | |
| "epoch": 0.6016732665295457, | |
| "grad_norm": 6.620415687561035, | |
| "learning_rate": 8.468834941190375e-05, | |
| "loss": 0.3324, | |
| "step": 102750 | |
| }, | |
| { | |
| "epoch": 0.6031371917522453, | |
| "grad_norm": 6.866759777069092, | |
| "learning_rate": 8.437735191854355e-05, | |
| "loss": 0.3395, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 0.6046011169749449, | |
| "grad_norm": 7.7242045402526855, | |
| "learning_rate": 8.406759841515678e-05, | |
| "loss": 0.3368, | |
| "step": 103250 | |
| }, | |
| { | |
| "epoch": 0.6060650421976446, | |
| "grad_norm": 6.402958869934082, | |
| "learning_rate": 8.375660092179658e-05, | |
| "loss": 0.3366, | |
| "step": 103500 | |
| }, | |
| { | |
| "epoch": 0.6075289674203441, | |
| "grad_norm": 6.456150531768799, | |
| "learning_rate": 8.344560342843637e-05, | |
| "loss": 0.3372, | |
| "step": 103750 | |
| }, | |
| { | |
| "epoch": 0.6089928926430438, | |
| "grad_norm": 7.6825971603393555, | |
| "learning_rate": 8.313460593507617e-05, | |
| "loss": 0.3331, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 0.6104568178657435, | |
| "grad_norm": 11.974824905395508, | |
| "learning_rate": 8.282360844171596e-05, | |
| "loss": 0.3317, | |
| "step": 104250 | |
| }, | |
| { | |
| "epoch": 0.611920743088443, | |
| "grad_norm": 5.445409774780273, | |
| "learning_rate": 8.251261094835576e-05, | |
| "loss": 0.3303, | |
| "step": 104500 | |
| }, | |
| { | |
| "epoch": 0.6133846683111427, | |
| "grad_norm": 8.099034309387207, | |
| "learning_rate": 8.220161345499555e-05, | |
| "loss": 0.3317, | |
| "step": 104750 | |
| }, | |
| { | |
| "epoch": 0.6148485935338422, | |
| "grad_norm": 21.789043426513672, | |
| "learning_rate": 8.189061596163535e-05, | |
| "loss": 0.3146, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 0.6163125187565419, | |
| "grad_norm": 6.879361152648926, | |
| "learning_rate": 8.158086245824859e-05, | |
| "loss": 0.3346, | |
| "step": 105250 | |
| }, | |
| { | |
| "epoch": 0.6177764439792416, | |
| "grad_norm": 5.477085113525391, | |
| "learning_rate": 8.126986496488838e-05, | |
| "loss": 0.3274, | |
| "step": 105500 | |
| }, | |
| { | |
| "epoch": 0.6192403692019411, | |
| "grad_norm": 6.2816667556762695, | |
| "learning_rate": 8.095886747152818e-05, | |
| "loss": 0.3271, | |
| "step": 105750 | |
| }, | |
| { | |
| "epoch": 0.6207042944246408, | |
| "grad_norm": 9.089285850524902, | |
| "learning_rate": 8.064786997816797e-05, | |
| "loss": 0.3351, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 0.6221682196473404, | |
| "grad_norm": 6.114886283874512, | |
| "learning_rate": 8.033687248480777e-05, | |
| "loss": 0.3296, | |
| "step": 106250 | |
| }, | |
| { | |
| "epoch": 0.62363214487004, | |
| "grad_norm": 7.2542548179626465, | |
| "learning_rate": 8.002587499144756e-05, | |
| "loss": 0.3246, | |
| "step": 106500 | |
| }, | |
| { | |
| "epoch": 0.6250960700927397, | |
| "grad_norm": 5.58528995513916, | |
| "learning_rate": 7.971487749808737e-05, | |
| "loss": 0.3327, | |
| "step": 106750 | |
| }, | |
| { | |
| "epoch": 0.6265599953154393, | |
| "grad_norm": 3.898178815841675, | |
| "learning_rate": 7.940388000472715e-05, | |
| "loss": 0.3291, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 0.6280239205381389, | |
| "grad_norm": 5.644820690155029, | |
| "learning_rate": 7.909288251136696e-05, | |
| "loss": 0.3281, | |
| "step": 107250 | |
| }, | |
| { | |
| "epoch": 0.6294878457608385, | |
| "grad_norm": 6.363776206970215, | |
| "learning_rate": 7.878188501800676e-05, | |
| "loss": 0.3304, | |
| "step": 107500 | |
| }, | |
| { | |
| "epoch": 0.6309517709835382, | |
| "grad_norm": 5.209687232971191, | |
| "learning_rate": 7.847213151462e-05, | |
| "loss": 0.3224, | |
| "step": 107750 | |
| }, | |
| { | |
| "epoch": 0.6324156962062378, | |
| "grad_norm": 6.911553382873535, | |
| "learning_rate": 7.81611340212598e-05, | |
| "loss": 0.3246, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 0.6338796214289374, | |
| "grad_norm": 7.6557111740112305, | |
| "learning_rate": 7.785013652789959e-05, | |
| "loss": 0.322, | |
| "step": 108250 | |
| }, | |
| { | |
| "epoch": 0.6353435466516371, | |
| "grad_norm": 7.857481002807617, | |
| "learning_rate": 7.753913903453939e-05, | |
| "loss": 0.3318, | |
| "step": 108500 | |
| }, | |
| { | |
| "epoch": 0.6368074718743366, | |
| "grad_norm": 5.911120891571045, | |
| "learning_rate": 7.722814154117918e-05, | |
| "loss": 0.325, | |
| "step": 108750 | |
| }, | |
| { | |
| "epoch": 0.6382713970970363, | |
| "grad_norm": 8.592209815979004, | |
| "learning_rate": 7.691714404781898e-05, | |
| "loss": 0.3209, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 0.639735322319736, | |
| "grad_norm": 6.824602127075195, | |
| "learning_rate": 7.660614655445879e-05, | |
| "loss": 0.3331, | |
| "step": 109250 | |
| }, | |
| { | |
| "epoch": 0.6411992475424355, | |
| "grad_norm": 6.813981056213379, | |
| "learning_rate": 7.629514906109858e-05, | |
| "loss": 0.3313, | |
| "step": 109500 | |
| }, | |
| { | |
| "epoch": 0.6426631727651352, | |
| "grad_norm": 5.7169671058654785, | |
| "learning_rate": 7.598539555771181e-05, | |
| "loss": 0.3206, | |
| "step": 109750 | |
| }, | |
| { | |
| "epoch": 0.6441270979878347, | |
| "grad_norm": 5.429720401763916, | |
| "learning_rate": 7.56743980643516e-05, | |
| "loss": 0.3192, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 0.6441270979878347, | |
| "eval_accuracy": 0.8997983351325891, | |
| "eval_loss": 0.3242824375629425, | |
| "eval_runtime": 11546.6804, | |
| "eval_samples_per_second": 210.345, | |
| "eval_steps_per_second": 6.573, | |
| "step": 110000 | |
| } | |
| ], | |
| "logging_steps": 250, | |
| "max_steps": 170773, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 55000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.8505890873482936e+19, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |