| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 10.0, |
| "eval_steps": 500, |
| "global_step": 13290, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.007524454477050414, |
| "grad_norm": 205.97328186035156, |
| "learning_rate": 3.762227238525207e-08, |
| "loss": 6.3478, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.015048908954100828, |
| "grad_norm": 240.38754272460938, |
| "learning_rate": 7.524454477050414e-08, |
| "loss": 7.7512, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.022573363431151242, |
| "grad_norm": 176.9767608642578, |
| "learning_rate": 1.1286681715575622e-07, |
| "loss": 7.3317, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.030097817908201655, |
| "grad_norm": 303.99163818359375, |
| "learning_rate": 1.5048908954100828e-07, |
| "loss": 6.3909, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.03762227238525207, |
| "grad_norm": 243.18605041503906, |
| "learning_rate": 1.8811136192626038e-07, |
| "loss": 5.6329, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.045146726862302484, |
| "grad_norm": 218.3995819091797, |
| "learning_rate": 2.2573363431151243e-07, |
| "loss": 5.9957, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.0526711813393529, |
| "grad_norm": 175.67726135253906, |
| "learning_rate": 2.633559066967645e-07, |
| "loss": 5.7306, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.06019563581640331, |
| "grad_norm": 139.78561401367188, |
| "learning_rate": 3.0097817908201656e-07, |
| "loss": 4.8739, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.06772009029345373, |
| "grad_norm": 208.4263916015625, |
| "learning_rate": 3.3860045146726866e-07, |
| "loss": 4.8178, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.07524454477050414, |
| "grad_norm": 122.90266418457031, |
| "learning_rate": 3.7622272385252076e-07, |
| "loss": 4.3441, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.08276899924755456, |
| "grad_norm": 115.08711242675781, |
| "learning_rate": 4.138449962377728e-07, |
| "loss": 3.882, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.09029345372460497, |
| "grad_norm": 85.97943878173828, |
| "learning_rate": 4.5146726862302486e-07, |
| "loss": 3.7734, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.09781790820165538, |
| "grad_norm": 136.2003173828125, |
| "learning_rate": 4.89089541008277e-07, |
| "loss": 3.7928, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.1053423626787058, |
| "grad_norm": 109.00381469726562, |
| "learning_rate": 5.26711813393529e-07, |
| "loss": 3.1142, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.11286681715575621, |
| "grad_norm": 109.21756744384766, |
| "learning_rate": 5.643340857787811e-07, |
| "loss": 2.9789, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.12039127163280662, |
| "grad_norm": 89.74577331542969, |
| "learning_rate": 6.019563581640331e-07, |
| "loss": 2.8887, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.12791572610985705, |
| "grad_norm": 81.73179626464844, |
| "learning_rate": 6.395786305492853e-07, |
| "loss": 2.7142, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.13544018058690746, |
| "grad_norm": 77.00216674804688, |
| "learning_rate": 6.772009029345373e-07, |
| "loss": 2.5444, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.14296463506395787, |
| "grad_norm": 106.53274536132812, |
| "learning_rate": 7.148231753197895e-07, |
| "loss": 2.7361, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.1504890895410083, |
| "grad_norm": 71.52346801757812, |
| "learning_rate": 7.524454477050415e-07, |
| "loss": 1.9428, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.1580135440180587, |
| "grad_norm": 68.6385498046875, |
| "learning_rate": 7.900677200902936e-07, |
| "loss": 2.1022, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.1655379984951091, |
| "grad_norm": 67.29202270507812, |
| "learning_rate": 8.276899924755456e-07, |
| "loss": 1.8348, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.17306245297215953, |
| "grad_norm": 78.09269714355469, |
| "learning_rate": 8.653122648607977e-07, |
| "loss": 2.2978, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.18058690744920994, |
| "grad_norm": 52.45564651489258, |
| "learning_rate": 9.029345372460497e-07, |
| "loss": 1.7102, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.18811136192626035, |
| "grad_norm": 46.46923828125, |
| "learning_rate": 9.405568096313019e-07, |
| "loss": 1.6509, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.19563581640331076, |
| "grad_norm": 47.17067337036133, |
| "learning_rate": 9.78179082016554e-07, |
| "loss": 1.5885, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.20316027088036118, |
| "grad_norm": 53.28254318237305, |
| "learning_rate": 1.0158013544018059e-06, |
| "loss": 1.7244, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.2106847253574116, |
| "grad_norm": 40.88100814819336, |
| "learning_rate": 1.053423626787058e-06, |
| "loss": 1.4149, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.218209179834462, |
| "grad_norm": 51.29297637939453, |
| "learning_rate": 1.0910458991723102e-06, |
| "loss": 1.3588, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.22573363431151242, |
| "grad_norm": 46.602970123291016, |
| "learning_rate": 1.1286681715575621e-06, |
| "loss": 1.2849, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.23325808878856283, |
| "grad_norm": 48.245201110839844, |
| "learning_rate": 1.1662904439428143e-06, |
| "loss": 1.1217, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.24078254326561324, |
| "grad_norm": 55.32264709472656, |
| "learning_rate": 1.2039127163280662e-06, |
| "loss": 1.4073, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.24830699774266365, |
| "grad_norm": 55.40262985229492, |
| "learning_rate": 1.2415349887133184e-06, |
| "loss": 1.3804, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.2558314522197141, |
| "grad_norm": 55.93576431274414, |
| "learning_rate": 1.2791572610985705e-06, |
| "loss": 1.2969, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.2633559066967645, |
| "grad_norm": 40.739097595214844, |
| "learning_rate": 1.3167795334838227e-06, |
| "loss": 0.8772, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.2708803611738149, |
| "grad_norm": 56.57487487792969, |
| "learning_rate": 1.3544018058690746e-06, |
| "loss": 1.0485, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.27840481565086533, |
| "grad_norm": 37.80082702636719, |
| "learning_rate": 1.3920240782543268e-06, |
| "loss": 0.8924, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.28592927012791575, |
| "grad_norm": 40.33064651489258, |
| "learning_rate": 1.429646350639579e-06, |
| "loss": 1.0153, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.29345372460496616, |
| "grad_norm": 38.92075729370117, |
| "learning_rate": 1.4672686230248309e-06, |
| "loss": 0.919, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.3009781790820166, |
| "grad_norm": 36.200130462646484, |
| "learning_rate": 1.504890895410083e-06, |
| "loss": 0.8283, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.308502633559067, |
| "grad_norm": 34.788963317871094, |
| "learning_rate": 1.542513167795335e-06, |
| "loss": 1.1902, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.3160270880361174, |
| "grad_norm": 36.59656524658203, |
| "learning_rate": 1.5801354401805871e-06, |
| "loss": 0.9343, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.3235515425131678, |
| "grad_norm": 39.81496047973633, |
| "learning_rate": 1.617757712565839e-06, |
| "loss": 0.9014, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.3310759969902182, |
| "grad_norm": 24.904926300048828, |
| "learning_rate": 1.6553799849510912e-06, |
| "loss": 0.7461, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.33860045146726864, |
| "grad_norm": 38.34230422973633, |
| "learning_rate": 1.6930022573363434e-06, |
| "loss": 0.8552, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.34612490594431905, |
| "grad_norm": 26.474260330200195, |
| "learning_rate": 1.7306245297215953e-06, |
| "loss": 0.8746, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.35364936042136946, |
| "grad_norm": 36.48841094970703, |
| "learning_rate": 1.7682468021068475e-06, |
| "loss": 0.816, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.3611738148984199, |
| "grad_norm": 35.95697021484375, |
| "learning_rate": 1.8058690744920994e-06, |
| "loss": 0.7342, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.3686982693754703, |
| "grad_norm": 37.21470260620117, |
| "learning_rate": 1.8434913468773516e-06, |
| "loss": 0.7724, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.3762227238525207, |
| "grad_norm": 38.8176155090332, |
| "learning_rate": 1.8811136192626038e-06, |
| "loss": 0.9453, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.3837471783295711, |
| "grad_norm": 37.029815673828125, |
| "learning_rate": 1.918735891647856e-06, |
| "loss": 0.7449, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.3912716328066215, |
| "grad_norm": 36.2590217590332, |
| "learning_rate": 1.956358164033108e-06, |
| "loss": 0.8882, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.39879608728367194, |
| "grad_norm": 31.115354537963867, |
| "learning_rate": 1.99398043641836e-06, |
| "loss": 0.8969, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.40632054176072235, |
| "grad_norm": 27.557512283325195, |
| "learning_rate": 2.0316027088036117e-06, |
| "loss": 0.8884, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.41384499623777277, |
| "grad_norm": 32.3388671875, |
| "learning_rate": 2.069224981188864e-06, |
| "loss": 0.7083, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.4213694507148232, |
| "grad_norm": 37.25837707519531, |
| "learning_rate": 2.106847253574116e-06, |
| "loss": 0.7259, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.4288939051918736, |
| "grad_norm": 20.491844177246094, |
| "learning_rate": 2.144469525959368e-06, |
| "loss": 0.7148, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.436418359668924, |
| "grad_norm": 26.329750061035156, |
| "learning_rate": 2.1820917983446204e-06, |
| "loss": 0.8233, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.4439428141459744, |
| "grad_norm": 36.909969329833984, |
| "learning_rate": 2.2197140707298723e-06, |
| "loss": 0.8745, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.45146726862302483, |
| "grad_norm": 23.27708625793457, |
| "learning_rate": 2.2573363431151243e-06, |
| "loss": 0.7076, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.45899172310007524, |
| "grad_norm": 39.00196838378906, |
| "learning_rate": 2.294958615500376e-06, |
| "loss": 0.7312, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.46651617757712566, |
| "grad_norm": 25.750431060791016, |
| "learning_rate": 2.3325808878856286e-06, |
| "loss": 0.6018, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.47404063205417607, |
| "grad_norm": 26.11092185974121, |
| "learning_rate": 2.3702031602708805e-06, |
| "loss": 0.6264, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.4815650865312265, |
| "grad_norm": 25.451011657714844, |
| "learning_rate": 2.4078254326561325e-06, |
| "loss": 0.5659, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.4890895410082769, |
| "grad_norm": 20.302892684936523, |
| "learning_rate": 2.445447705041385e-06, |
| "loss": 0.6611, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.4966139954853273, |
| "grad_norm": 30.64501953125, |
| "learning_rate": 2.4830699774266368e-06, |
| "loss": 0.7641, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.5041384499623778, |
| "grad_norm": 24.991497039794922, |
| "learning_rate": 2.520692249811889e-06, |
| "loss": 0.7671, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.5116629044394282, |
| "grad_norm": 25.86818504333496, |
| "learning_rate": 2.558314522197141e-06, |
| "loss": 0.764, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.5191873589164786, |
| "grad_norm": 17.105379104614258, |
| "learning_rate": 2.595936794582393e-06, |
| "loss": 0.6704, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.526711813393529, |
| "grad_norm": 32.04035949707031, |
| "learning_rate": 2.6335590669676454e-06, |
| "loss": 0.6067, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.5342362678705794, |
| "grad_norm": 28.617063522338867, |
| "learning_rate": 2.6711813393528973e-06, |
| "loss": 0.5894, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.5417607223476298, |
| "grad_norm": 21.270259857177734, |
| "learning_rate": 2.7088036117381493e-06, |
| "loss": 0.636, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.5492851768246803, |
| "grad_norm": 19.81507110595703, |
| "learning_rate": 2.7464258841234016e-06, |
| "loss": 0.631, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.5568096313017307, |
| "grad_norm": 18.52845001220703, |
| "learning_rate": 2.7840481565086536e-06, |
| "loss": 0.6765, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.5643340857787811, |
| "grad_norm": 26.92551612854004, |
| "learning_rate": 2.8216704288939055e-06, |
| "loss": 0.6776, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.5718585402558315, |
| "grad_norm": 18.482553482055664, |
| "learning_rate": 2.859292701279158e-06, |
| "loss": 0.5727, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.5793829947328819, |
| "grad_norm": 22.597740173339844, |
| "learning_rate": 2.89691497366441e-06, |
| "loss": 0.5505, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.5869074492099323, |
| "grad_norm": 24.563085556030273, |
| "learning_rate": 2.9345372460496618e-06, |
| "loss": 0.6187, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.5944319036869827, |
| "grad_norm": 15.353199005126953, |
| "learning_rate": 2.9721595184349137e-06, |
| "loss": 0.6372, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.6019563581640331, |
| "grad_norm": 21.005054473876953, |
| "learning_rate": 3.009781790820166e-06, |
| "loss": 0.5677, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.6094808126410836, |
| "grad_norm": 12.04046630859375, |
| "learning_rate": 3.047404063205418e-06, |
| "loss": 0.5383, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.617005267118134, |
| "grad_norm": 19.093727111816406, |
| "learning_rate": 3.08502633559067e-06, |
| "loss": 0.5017, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.6245297215951844, |
| "grad_norm": 20.096912384033203, |
| "learning_rate": 3.1226486079759224e-06, |
| "loss": 0.5334, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.6320541760722348, |
| "grad_norm": 15.413702011108398, |
| "learning_rate": 3.1602708803611743e-06, |
| "loss": 0.5755, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.6395786305492852, |
| "grad_norm": 19.66105842590332, |
| "learning_rate": 3.1978931527464262e-06, |
| "loss": 0.4816, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.6471030850263356, |
| "grad_norm": 14.463790893554688, |
| "learning_rate": 3.235515425131678e-06, |
| "loss": 0.5436, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.654627539503386, |
| "grad_norm": 19.423931121826172, |
| "learning_rate": 3.2731376975169306e-06, |
| "loss": 0.4423, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.6621519939804364, |
| "grad_norm": 17.905784606933594, |
| "learning_rate": 3.3107599699021825e-06, |
| "loss": 0.5378, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.6696764484574869, |
| "grad_norm": 19.07693862915039, |
| "learning_rate": 3.3483822422874344e-06, |
| "loss": 0.5569, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.6772009029345373, |
| "grad_norm": 19.285032272338867, |
| "learning_rate": 3.386004514672687e-06, |
| "loss": 0.4729, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.6847253574115877, |
| "grad_norm": 18.800764083862305, |
| "learning_rate": 3.4236267870579388e-06, |
| "loss": 0.5417, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.6922498118886381, |
| "grad_norm": 20.45533561706543, |
| "learning_rate": 3.4612490594431907e-06, |
| "loss": 0.5456, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.6997742663656885, |
| "grad_norm": 13.277688980102539, |
| "learning_rate": 3.498871331828443e-06, |
| "loss": 0.4199, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.7072987208427389, |
| "grad_norm": 12.00981330871582, |
| "learning_rate": 3.536493604213695e-06, |
| "loss": 0.4254, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.7148231753197893, |
| "grad_norm": 22.597990036010742, |
| "learning_rate": 3.574115876598947e-06, |
| "loss": 0.5816, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.7223476297968398, |
| "grad_norm": 18.304636001586914, |
| "learning_rate": 3.611738148984199e-06, |
| "loss": 0.5138, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.7298720842738902, |
| "grad_norm": 19.998075485229492, |
| "learning_rate": 3.6493604213694513e-06, |
| "loss": 0.4879, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.7373965387509406, |
| "grad_norm": 13.545928955078125, |
| "learning_rate": 3.686982693754703e-06, |
| "loss": 0.4214, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.744920993227991, |
| "grad_norm": 14.774223327636719, |
| "learning_rate": 3.724604966139955e-06, |
| "loss": 0.4998, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.7524454477050414, |
| "grad_norm": 12.196511268615723, |
| "learning_rate": 3.7622272385252075e-06, |
| "loss": 0.4384, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.7599699021820918, |
| "grad_norm": 15.515963554382324, |
| "learning_rate": 3.7998495109104595e-06, |
| "loss": 0.4783, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.7674943566591422, |
| "grad_norm": 19.6131534576416, |
| "learning_rate": 3.837471783295712e-06, |
| "loss": 0.4698, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.7750188111361926, |
| "grad_norm": 15.661735534667969, |
| "learning_rate": 3.875094055680963e-06, |
| "loss": 0.4194, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.782543265613243, |
| "grad_norm": 15.907915115356445, |
| "learning_rate": 3.912716328066216e-06, |
| "loss": 0.5192, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.7900677200902935, |
| "grad_norm": 8.650323867797852, |
| "learning_rate": 3.950338600451468e-06, |
| "loss": 0.3647, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.7975921745673439, |
| "grad_norm": 14.708108901977539, |
| "learning_rate": 3.98796087283672e-06, |
| "loss": 0.5469, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.8051166290443943, |
| "grad_norm": 16.337339401245117, |
| "learning_rate": 4.025583145221972e-06, |
| "loss": 0.4287, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.8126410835214447, |
| "grad_norm": 11.901853561401367, |
| "learning_rate": 4.0632054176072235e-06, |
| "loss": 0.4745, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.8201655379984951, |
| "grad_norm": 13.786298751831055, |
| "learning_rate": 4.100827689992476e-06, |
| "loss": 0.3915, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.8276899924755455, |
| "grad_norm": 18.17781639099121, |
| "learning_rate": 4.138449962377728e-06, |
| "loss": 0.4134, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.835214446952596, |
| "grad_norm": 15.534759521484375, |
| "learning_rate": 4.17607223476298e-06, |
| "loss": 0.3891, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.8427389014296464, |
| "grad_norm": 19.764795303344727, |
| "learning_rate": 4.213694507148232e-06, |
| "loss": 0.338, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.8502633559066968, |
| "grad_norm": 19.5684814453125, |
| "learning_rate": 4.2513167795334845e-06, |
| "loss": 0.4936, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.8577878103837472, |
| "grad_norm": 16.872209548950195, |
| "learning_rate": 4.288939051918736e-06, |
| "loss": 0.354, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.8653122648607976, |
| "grad_norm": 16.32406997680664, |
| "learning_rate": 4.326561324303988e-06, |
| "loss": 0.4274, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.872836719337848, |
| "grad_norm": 16.827388763427734, |
| "learning_rate": 4.364183596689241e-06, |
| "loss": 0.4203, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.8803611738148984, |
| "grad_norm": 11.862984657287598, |
| "learning_rate": 4.401805869074492e-06, |
| "loss": 0.3622, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.8878856282919488, |
| "grad_norm": 11.026522636413574, |
| "learning_rate": 4.439428141459745e-06, |
| "loss": 0.3571, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.8954100827689992, |
| "grad_norm": 12.197684288024902, |
| "learning_rate": 4.477050413844997e-06, |
| "loss": 0.3711, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.9029345372460497, |
| "grad_norm": 14.74181079864502, |
| "learning_rate": 4.5146726862302485e-06, |
| "loss": 0.443, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.9104589917231001, |
| "grad_norm": 15.604998588562012, |
| "learning_rate": 4.552294958615501e-06, |
| "loss": 0.3191, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.9179834462001505, |
| "grad_norm": 16.301240921020508, |
| "learning_rate": 4.589917231000752e-06, |
| "loss": 0.4541, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.9255079006772009, |
| "grad_norm": 14.248018264770508, |
| "learning_rate": 4.627539503386005e-06, |
| "loss": 0.3961, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.9330323551542513, |
| "grad_norm": 12.575178146362305, |
| "learning_rate": 4.665161775771257e-06, |
| "loss": 0.4851, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.9405568096313017, |
| "grad_norm": 11.841970443725586, |
| "learning_rate": 4.702784048156509e-06, |
| "loss": 0.3934, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.9480812641083521, |
| "grad_norm": 10.266742706298828, |
| "learning_rate": 4.740406320541761e-06, |
| "loss": 0.3943, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.9556057185854026, |
| "grad_norm": 11.296609878540039, |
| "learning_rate": 4.778028592927013e-06, |
| "loss": 0.5257, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.963130173062453, |
| "grad_norm": 9.851863861083984, |
| "learning_rate": 4.815650865312265e-06, |
| "loss": 0.3885, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.9706546275395034, |
| "grad_norm": 11.191083908081055, |
| "learning_rate": 4.853273137697517e-06, |
| "loss": 0.3472, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.9781790820165538, |
| "grad_norm": 13.400888442993164, |
| "learning_rate": 4.89089541008277e-06, |
| "loss": 0.4212, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.9857035364936042, |
| "grad_norm": 13.300646781921387, |
| "learning_rate": 4.928517682468021e-06, |
| "loss": 0.4037, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.9932279909706546, |
| "grad_norm": 12.612085342407227, |
| "learning_rate": 4.9661399548532735e-06, |
| "loss": 0.4435, |
| "step": 1320 |
| }, |
| { |
| "epoch": 1.000752445447705, |
| "grad_norm": 12.677772521972656, |
| "learning_rate": 5.003762227238526e-06, |
| "loss": 0.405, |
| "step": 1330 |
| }, |
| { |
| "epoch": 1.0082768999247556, |
| "grad_norm": 10.081019401550293, |
| "learning_rate": 5.041384499623778e-06, |
| "loss": 0.3084, |
| "step": 1340 |
| }, |
| { |
| "epoch": 1.0158013544018059, |
| "grad_norm": 6.720037460327148, |
| "learning_rate": 5.07900677200903e-06, |
| "loss": 0.3676, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.0233258088788564, |
| "grad_norm": 12.8779296875, |
| "learning_rate": 5.116629044394282e-06, |
| "loss": 0.4041, |
| "step": 1360 |
| }, |
| { |
| "epoch": 1.0308502633559067, |
| "grad_norm": 12.37596321105957, |
| "learning_rate": 5.154251316779534e-06, |
| "loss": 0.4613, |
| "step": 1370 |
| }, |
| { |
| "epoch": 1.0383747178329572, |
| "grad_norm": 15.438199043273926, |
| "learning_rate": 5.191873589164786e-06, |
| "loss": 0.4007, |
| "step": 1380 |
| }, |
| { |
| "epoch": 1.0458991723100075, |
| "grad_norm": 11.500144004821777, |
| "learning_rate": 5.2294958615500376e-06, |
| "loss": 0.3785, |
| "step": 1390 |
| }, |
| { |
| "epoch": 1.053423626787058, |
| "grad_norm": 6.981774806976318, |
| "learning_rate": 5.267118133935291e-06, |
| "loss": 0.3816, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.0609480812641083, |
| "grad_norm": 11.432241439819336, |
| "learning_rate": 5.304740406320542e-06, |
| "loss": 0.4292, |
| "step": 1410 |
| }, |
| { |
| "epoch": 1.0684725357411589, |
| "grad_norm": 16.537931442260742, |
| "learning_rate": 5.342362678705795e-06, |
| "loss": 0.3867, |
| "step": 1420 |
| }, |
| { |
| "epoch": 1.0759969902182092, |
| "grad_norm": 14.49853801727295, |
| "learning_rate": 5.379984951091046e-06, |
| "loss": 0.3697, |
| "step": 1430 |
| }, |
| { |
| "epoch": 1.0835214446952597, |
| "grad_norm": 11.265471458435059, |
| "learning_rate": 5.4176072234762986e-06, |
| "loss": 0.3432, |
| "step": 1440 |
| }, |
| { |
| "epoch": 1.09104589917231, |
| "grad_norm": 10.750925064086914, |
| "learning_rate": 5.45522949586155e-06, |
| "loss": 0.363, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.0985703536493605, |
| "grad_norm": 9.414807319641113, |
| "learning_rate": 5.492851768246803e-06, |
| "loss": 0.366, |
| "step": 1460 |
| }, |
| { |
| "epoch": 1.1060948081264108, |
| "grad_norm": 12.20535945892334, |
| "learning_rate": 5.530474040632055e-06, |
| "loss": 0.3451, |
| "step": 1470 |
| }, |
| { |
| "epoch": 1.1136192626034613, |
| "grad_norm": 10.815361976623535, |
| "learning_rate": 5.568096313017307e-06, |
| "loss": 0.333, |
| "step": 1480 |
| }, |
| { |
| "epoch": 1.1211437170805116, |
| "grad_norm": 12.699690818786621, |
| "learning_rate": 5.605718585402559e-06, |
| "loss": 0.4013, |
| "step": 1490 |
| }, |
| { |
| "epoch": 1.1286681715575622, |
| "grad_norm": 13.174708366394043, |
| "learning_rate": 5.643340857787811e-06, |
| "loss": 0.3381, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.1361926260346125, |
| "grad_norm": 10.062464714050293, |
| "learning_rate": 5.680963130173063e-06, |
| "loss": 0.4081, |
| "step": 1510 |
| }, |
| { |
| "epoch": 1.143717080511663, |
| "grad_norm": 15.652090072631836, |
| "learning_rate": 5.718585402558316e-06, |
| "loss": 0.5176, |
| "step": 1520 |
| }, |
| { |
| "epoch": 1.1512415349887133, |
| "grad_norm": 12.49589729309082, |
| "learning_rate": 5.756207674943567e-06, |
| "loss": 0.3599, |
| "step": 1530 |
| }, |
| { |
| "epoch": 1.1587659894657638, |
| "grad_norm": 16.024024963378906, |
| "learning_rate": 5.79382994732882e-06, |
| "loss": 0.3548, |
| "step": 1540 |
| }, |
| { |
| "epoch": 1.1662904439428141, |
| "grad_norm": 11.08897876739502, |
| "learning_rate": 5.831452219714071e-06, |
| "loss": 0.3916, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.1738148984198646, |
| "grad_norm": 11.4227294921875, |
| "learning_rate": 5.8690744920993236e-06, |
| "loss": 0.3289, |
| "step": 1560 |
| }, |
| { |
| "epoch": 1.181339352896915, |
| "grad_norm": 8.900651931762695, |
| "learning_rate": 5.906696764484575e-06, |
| "loss": 0.3406, |
| "step": 1570 |
| }, |
| { |
| "epoch": 1.1888638073739655, |
| "grad_norm": 14.201620101928711, |
| "learning_rate": 5.9443190368698275e-06, |
| "loss": 0.341, |
| "step": 1580 |
| }, |
| { |
| "epoch": 1.1963882618510158, |
| "grad_norm": 12.406390190124512, |
| "learning_rate": 5.981941309255079e-06, |
| "loss": 0.3861, |
| "step": 1590 |
| }, |
| { |
| "epoch": 1.2039127163280663, |
| "grad_norm": 16.3964786529541, |
| "learning_rate": 6.019563581640332e-06, |
| "loss": 0.3044, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.2114371708051166, |
| "grad_norm": 7.37337064743042, |
| "learning_rate": 6.057185854025584e-06, |
| "loss": 0.3174, |
| "step": 1610 |
| }, |
| { |
| "epoch": 1.2189616252821671, |
| "grad_norm": 16.885831832885742, |
| "learning_rate": 6.094808126410836e-06, |
| "loss": 0.4005, |
| "step": 1620 |
| }, |
| { |
| "epoch": 1.2264860797592174, |
| "grad_norm": 10.093635559082031, |
| "learning_rate": 6.132430398796088e-06, |
| "loss": 0.3842, |
| "step": 1630 |
| }, |
| { |
| "epoch": 1.234010534236268, |
| "grad_norm": 10.68936824798584, |
| "learning_rate": 6.17005267118134e-06, |
| "loss": 0.2717, |
| "step": 1640 |
| }, |
| { |
| "epoch": 1.2415349887133182, |
| "grad_norm": 9.531023025512695, |
| "learning_rate": 6.2076749435665915e-06, |
| "loss": 0.32, |
| "step": 1650 |
| }, |
| { |
| "epoch": 1.2490594431903688, |
| "grad_norm": 11.77883529663086, |
| "learning_rate": 6.245297215951845e-06, |
| "loss": 0.2916, |
| "step": 1660 |
| }, |
| { |
| "epoch": 1.256583897667419, |
| "grad_norm": 13.740798950195312, |
| "learning_rate": 6.282919488337096e-06, |
| "loss": 0.3566, |
| "step": 1670 |
| }, |
| { |
| "epoch": 1.2641083521444696, |
| "grad_norm": 9.122705459594727, |
| "learning_rate": 6.320541760722349e-06, |
| "loss": 0.399, |
| "step": 1680 |
| }, |
| { |
| "epoch": 1.27163280662152, |
| "grad_norm": 8.534326553344727, |
| "learning_rate": 6.3581640331076e-06, |
| "loss": 0.2988, |
| "step": 1690 |
| }, |
| { |
| "epoch": 1.2791572610985704, |
| "grad_norm": 9.343318939208984, |
| "learning_rate": 6.3957863054928525e-06, |
| "loss": 0.2774, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.2866817155756207, |
| "grad_norm": 13.007920265197754, |
| "learning_rate": 6.433408577878104e-06, |
| "loss": 0.3807, |
| "step": 1710 |
| }, |
| { |
| "epoch": 1.2942061700526712, |
| "grad_norm": 12.973888397216797, |
| "learning_rate": 6.471030850263356e-06, |
| "loss": 0.3965, |
| "step": 1720 |
| }, |
| { |
| "epoch": 1.3017306245297215, |
| "grad_norm": 5.929915428161621, |
| "learning_rate": 6.508653122648608e-06, |
| "loss": 0.3176, |
| "step": 1730 |
| }, |
| { |
| "epoch": 1.309255079006772, |
| "grad_norm": 12.077717781066895, |
| "learning_rate": 6.546275395033861e-06, |
| "loss": 0.3405, |
| "step": 1740 |
| }, |
| { |
| "epoch": 1.3167795334838224, |
| "grad_norm": 13.16915225982666, |
| "learning_rate": 6.583897667419113e-06, |
| "loss": 0.3968, |
| "step": 1750 |
| }, |
| { |
| "epoch": 1.324303987960873, |
| "grad_norm": 7.098601341247559, |
| "learning_rate": 6.621519939804365e-06, |
| "loss": 0.3402, |
| "step": 1760 |
| }, |
| { |
| "epoch": 1.3318284424379232, |
| "grad_norm": 7.718910217285156, |
| "learning_rate": 6.6591422121896165e-06, |
| "loss": 0.3193, |
| "step": 1770 |
| }, |
| { |
| "epoch": 1.3393528969149737, |
| "grad_norm": 12.224291801452637, |
| "learning_rate": 6.696764484574869e-06, |
| "loss": 0.4085, |
| "step": 1780 |
| }, |
| { |
| "epoch": 1.346877351392024, |
| "grad_norm": 10.205738067626953, |
| "learning_rate": 6.73438675696012e-06, |
| "loss": 0.3806, |
| "step": 1790 |
| }, |
| { |
| "epoch": 1.3544018058690745, |
| "grad_norm": 13.36560344696045, |
| "learning_rate": 6.772009029345374e-06, |
| "loss": 0.4265, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.3619262603461249, |
| "grad_norm": 10.773680686950684, |
| "learning_rate": 6.809631301730625e-06, |
| "loss": 0.3876, |
| "step": 1810 |
| }, |
| { |
| "epoch": 1.3694507148231754, |
| "grad_norm": 12.094820976257324, |
| "learning_rate": 6.8472535741158775e-06, |
| "loss": 0.3982, |
| "step": 1820 |
| }, |
| { |
| "epoch": 1.3769751693002257, |
| "grad_norm": 10.780938148498535, |
| "learning_rate": 6.884875846501129e-06, |
| "loss": 0.3569, |
| "step": 1830 |
| }, |
| { |
| "epoch": 1.3844996237772762, |
| "grad_norm": 14.833603858947754, |
| "learning_rate": 6.922498118886381e-06, |
| "loss": 0.4042, |
| "step": 1840 |
| }, |
| { |
| "epoch": 1.3920240782543265, |
| "grad_norm": 10.232943534851074, |
| "learning_rate": 6.960120391271633e-06, |
| "loss": 0.3469, |
| "step": 1850 |
| }, |
| { |
| "epoch": 1.399548532731377, |
| "grad_norm": 8.74258041381836, |
| "learning_rate": 6.997742663656886e-06, |
| "loss": 0.2916, |
| "step": 1860 |
| }, |
| { |
| "epoch": 1.4070729872084273, |
| "grad_norm": 11.700883865356445, |
| "learning_rate": 7.035364936042137e-06, |
| "loss": 0.3765, |
| "step": 1870 |
| }, |
| { |
| "epoch": 1.4145974416854779, |
| "grad_norm": 7.85889196395874, |
| "learning_rate": 7.07298720842739e-06, |
| "loss": 0.3984, |
| "step": 1880 |
| }, |
| { |
| "epoch": 1.4221218961625282, |
| "grad_norm": 10.270554542541504, |
| "learning_rate": 7.1106094808126415e-06, |
| "loss": 0.2839, |
| "step": 1890 |
| }, |
| { |
| "epoch": 1.4296463506395787, |
| "grad_norm": 10.699418067932129, |
| "learning_rate": 7.148231753197894e-06, |
| "loss": 0.3338, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.437170805116629, |
| "grad_norm": 9.80615234375, |
| "learning_rate": 7.185854025583145e-06, |
| "loss": 0.299, |
| "step": 1910 |
| }, |
| { |
| "epoch": 1.4446952595936795, |
| "grad_norm": 11.072823524475098, |
| "learning_rate": 7.223476297968398e-06, |
| "loss": 0.3196, |
| "step": 1920 |
| }, |
| { |
| "epoch": 1.4522197140707298, |
| "grad_norm": 10.377110481262207, |
| "learning_rate": 7.261098570353649e-06, |
| "loss": 0.3172, |
| "step": 1930 |
| }, |
| { |
| "epoch": 1.4597441685477803, |
| "grad_norm": 13.72491455078125, |
| "learning_rate": 7.2987208427389025e-06, |
| "loss": 0.3497, |
| "step": 1940 |
| }, |
| { |
| "epoch": 1.4672686230248306, |
| "grad_norm": 7.965284824371338, |
| "learning_rate": 7.336343115124154e-06, |
| "loss": 0.3693, |
| "step": 1950 |
| }, |
| { |
| "epoch": 1.4747930775018812, |
| "grad_norm": 11.198545455932617, |
| "learning_rate": 7.373965387509406e-06, |
| "loss": 0.3379, |
| "step": 1960 |
| }, |
| { |
| "epoch": 1.4823175319789315, |
| "grad_norm": 8.948925971984863, |
| "learning_rate": 7.411587659894658e-06, |
| "loss": 0.3532, |
| "step": 1970 |
| }, |
| { |
| "epoch": 1.489841986455982, |
| "grad_norm": 12.37880802154541, |
| "learning_rate": 7.44920993227991e-06, |
| "loss": 0.3462, |
| "step": 1980 |
| }, |
| { |
| "epoch": 1.4973664409330323, |
| "grad_norm": 12.647643089294434, |
| "learning_rate": 7.486832204665162e-06, |
| "loss": 0.3667, |
| "step": 1990 |
| }, |
| { |
| "epoch": 1.5048908954100828, |
| "grad_norm": 10.249937057495117, |
| "learning_rate": 7.524454477050415e-06, |
| "loss": 0.3679, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.5124153498871333, |
| "grad_norm": 6.153459548950195, |
| "learning_rate": 7.5620767494356666e-06, |
| "loss": 0.3438, |
| "step": 2010 |
| }, |
| { |
| "epoch": 1.5199398043641836, |
| "grad_norm": 10.269697189331055, |
| "learning_rate": 7.599699021820919e-06, |
| "loss": 0.317, |
| "step": 2020 |
| }, |
| { |
| "epoch": 1.527464258841234, |
| "grad_norm": 6.430722236633301, |
| "learning_rate": 7.63732129420617e-06, |
| "loss": 0.3408, |
| "step": 2030 |
| }, |
| { |
| "epoch": 1.5349887133182845, |
| "grad_norm": 11.511789321899414, |
| "learning_rate": 7.674943566591424e-06, |
| "loss": 0.3218, |
| "step": 2040 |
| }, |
| { |
| "epoch": 1.542513167795335, |
| "grad_norm": 7.904773712158203, |
| "learning_rate": 7.712565838976675e-06, |
| "loss": 0.3227, |
| "step": 2050 |
| }, |
| { |
| "epoch": 1.5500376222723853, |
| "grad_norm": 8.252975463867188, |
| "learning_rate": 7.750188111361927e-06, |
| "loss": 0.3499, |
| "step": 2060 |
| }, |
| { |
| "epoch": 1.5575620767494356, |
| "grad_norm": 13.122828483581543, |
| "learning_rate": 7.787810383747178e-06, |
| "loss": 0.3733, |
| "step": 2070 |
| }, |
| { |
| "epoch": 1.565086531226486, |
| "grad_norm": 8.126852035522461, |
| "learning_rate": 7.825432656132431e-06, |
| "loss": 0.3419, |
| "step": 2080 |
| }, |
| { |
| "epoch": 1.5726109857035366, |
| "grad_norm": 12.114632606506348, |
| "learning_rate": 7.863054928517683e-06, |
| "loss": 0.2854, |
| "step": 2090 |
| }, |
| { |
| "epoch": 1.580135440180587, |
| "grad_norm": 11.291882514953613, |
| "learning_rate": 7.900677200902936e-06, |
| "loss": 0.2942, |
| "step": 2100 |
| }, |
| { |
| "epoch": 1.5876598946576372, |
| "grad_norm": 12.11705493927002, |
| "learning_rate": 7.938299473288188e-06, |
| "loss": 0.2712, |
| "step": 2110 |
| }, |
| { |
| "epoch": 1.5951843491346878, |
| "grad_norm": 9.345537185668945, |
| "learning_rate": 7.97592174567344e-06, |
| "loss": 0.2612, |
| "step": 2120 |
| }, |
| { |
| "epoch": 1.6027088036117383, |
| "grad_norm": 9.715005874633789, |
| "learning_rate": 8.01354401805869e-06, |
| "loss": 0.3601, |
| "step": 2130 |
| }, |
| { |
| "epoch": 1.6102332580887886, |
| "grad_norm": 13.192370414733887, |
| "learning_rate": 8.051166290443944e-06, |
| "loss": 0.3084, |
| "step": 2140 |
| }, |
| { |
| "epoch": 1.617757712565839, |
| "grad_norm": 7.89211368560791, |
| "learning_rate": 8.088788562829195e-06, |
| "loss": 0.3257, |
| "step": 2150 |
| }, |
| { |
| "epoch": 1.6252821670428894, |
| "grad_norm": 12.189757347106934, |
| "learning_rate": 8.126410835214447e-06, |
| "loss": 0.3334, |
| "step": 2160 |
| }, |
| { |
| "epoch": 1.63280662151994, |
| "grad_norm": 8.994878768920898, |
| "learning_rate": 8.164033107599699e-06, |
| "loss": 0.3087, |
| "step": 2170 |
| }, |
| { |
| "epoch": 1.6403310759969902, |
| "grad_norm": 7.929622173309326, |
| "learning_rate": 8.201655379984952e-06, |
| "loss": 0.3207, |
| "step": 2180 |
| }, |
| { |
| "epoch": 1.6478555304740405, |
| "grad_norm": 8.899545669555664, |
| "learning_rate": 8.239277652370203e-06, |
| "loss": 0.3533, |
| "step": 2190 |
| }, |
| { |
| "epoch": 1.655379984951091, |
| "grad_norm": 10.590201377868652, |
| "learning_rate": 8.276899924755456e-06, |
| "loss": 0.2689, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.6629044394281416, |
| "grad_norm": 8.942581176757812, |
| "learning_rate": 8.314522197140708e-06, |
| "loss": 0.375, |
| "step": 2210 |
| }, |
| { |
| "epoch": 1.670428893905192, |
| "grad_norm": 9.189504623413086, |
| "learning_rate": 8.35214446952596e-06, |
| "loss": 0.2767, |
| "step": 2220 |
| }, |
| { |
| "epoch": 1.6779533483822422, |
| "grad_norm": 9.808874130249023, |
| "learning_rate": 8.389766741911211e-06, |
| "loss": 0.3574, |
| "step": 2230 |
| }, |
| { |
| "epoch": 1.6854778028592927, |
| "grad_norm": 11.146899223327637, |
| "learning_rate": 8.427389014296464e-06, |
| "loss": 0.3606, |
| "step": 2240 |
| }, |
| { |
| "epoch": 1.6930022573363432, |
| "grad_norm": 7.3132147789001465, |
| "learning_rate": 8.465011286681716e-06, |
| "loss": 0.3625, |
| "step": 2250 |
| }, |
| { |
| "epoch": 1.7005267118133935, |
| "grad_norm": 8.39673137664795, |
| "learning_rate": 8.502633559066969e-06, |
| "loss": 0.303, |
| "step": 2260 |
| }, |
| { |
| "epoch": 1.7080511662904438, |
| "grad_norm": 7.691243648529053, |
| "learning_rate": 8.54025583145222e-06, |
| "loss": 0.2824, |
| "step": 2270 |
| }, |
| { |
| "epoch": 1.7155756207674944, |
| "grad_norm": 9.144414901733398, |
| "learning_rate": 8.577878103837472e-06, |
| "loss": 0.3247, |
| "step": 2280 |
| }, |
| { |
| "epoch": 1.723100075244545, |
| "grad_norm": 6.159042835235596, |
| "learning_rate": 8.615500376222724e-06, |
| "loss": 0.3105, |
| "step": 2290 |
| }, |
| { |
| "epoch": 1.7306245297215952, |
| "grad_norm": 11.039318084716797, |
| "learning_rate": 8.653122648607977e-06, |
| "loss": 0.3481, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.7381489841986455, |
| "grad_norm": 8.701876640319824, |
| "learning_rate": 8.690744920993228e-06, |
| "loss": 0.3395, |
| "step": 2310 |
| }, |
| { |
| "epoch": 1.745673438675696, |
| "grad_norm": 9.022965431213379, |
| "learning_rate": 8.728367193378481e-06, |
| "loss": 0.2569, |
| "step": 2320 |
| }, |
| { |
| "epoch": 1.7531978931527465, |
| "grad_norm": 13.029145240783691, |
| "learning_rate": 8.765989465763733e-06, |
| "loss": 0.3193, |
| "step": 2330 |
| }, |
| { |
| "epoch": 1.7607223476297968, |
| "grad_norm": 10.46054744720459, |
| "learning_rate": 8.803611738148985e-06, |
| "loss": 0.3152, |
| "step": 2340 |
| }, |
| { |
| "epoch": 1.7682468021068471, |
| "grad_norm": 8.319682121276855, |
| "learning_rate": 8.841234010534236e-06, |
| "loss": 0.3078, |
| "step": 2350 |
| }, |
| { |
| "epoch": 1.7757712565838977, |
| "grad_norm": 10.117603302001953, |
| "learning_rate": 8.87885628291949e-06, |
| "loss": 0.2769, |
| "step": 2360 |
| }, |
| { |
| "epoch": 1.7832957110609482, |
| "grad_norm": 10.86999797821045, |
| "learning_rate": 8.91647855530474e-06, |
| "loss": 0.3529, |
| "step": 2370 |
| }, |
| { |
| "epoch": 1.7908201655379985, |
| "grad_norm": 9.869336128234863, |
| "learning_rate": 8.954100827689994e-06, |
| "loss": 0.2522, |
| "step": 2380 |
| }, |
| { |
| "epoch": 1.7983446200150488, |
| "grad_norm": 11.369247436523438, |
| "learning_rate": 8.991723100075246e-06, |
| "loss": 0.2605, |
| "step": 2390 |
| }, |
| { |
| "epoch": 1.8058690744920993, |
| "grad_norm": 7.237309455871582, |
| "learning_rate": 9.029345372460497e-06, |
| "loss": 0.2847, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.8133935289691498, |
| "grad_norm": 8.000275611877441, |
| "learning_rate": 9.066967644845749e-06, |
| "loss": 0.3287, |
| "step": 2410 |
| }, |
| { |
| "epoch": 1.8209179834462002, |
| "grad_norm": 9.019206047058105, |
| "learning_rate": 9.104589917231002e-06, |
| "loss": 0.3403, |
| "step": 2420 |
| }, |
| { |
| "epoch": 1.8284424379232505, |
| "grad_norm": 8.670087814331055, |
| "learning_rate": 9.142212189616253e-06, |
| "loss": 0.3597, |
| "step": 2430 |
| }, |
| { |
| "epoch": 1.835966892400301, |
| "grad_norm": 8.644824028015137, |
| "learning_rate": 9.179834462001505e-06, |
| "loss": 0.3055, |
| "step": 2440 |
| }, |
| { |
| "epoch": 1.8434913468773515, |
| "grad_norm": 6.5850324630737305, |
| "learning_rate": 9.217456734386758e-06, |
| "loss": 0.3047, |
| "step": 2450 |
| }, |
| { |
| "epoch": 1.8510158013544018, |
| "grad_norm": 7.342716693878174, |
| "learning_rate": 9.25507900677201e-06, |
| "loss": 0.3001, |
| "step": 2460 |
| }, |
| { |
| "epoch": 1.858540255831452, |
| "grad_norm": 10.784932136535645, |
| "learning_rate": 9.292701279157263e-06, |
| "loss": 0.327, |
| "step": 2470 |
| }, |
| { |
| "epoch": 1.8660647103085026, |
| "grad_norm": 11.683695793151855, |
| "learning_rate": 9.330323551542514e-06, |
| "loss": 0.3108, |
| "step": 2480 |
| }, |
| { |
| "epoch": 1.8735891647855532, |
| "grad_norm": 7.701004505157471, |
| "learning_rate": 9.367945823927766e-06, |
| "loss": 0.3059, |
| "step": 2490 |
| }, |
| { |
| "epoch": 1.8811136192626035, |
| "grad_norm": 13.68051528930664, |
| "learning_rate": 9.405568096313017e-06, |
| "loss": 0.3088, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.8886380737396538, |
| "grad_norm": 8.20069408416748, |
| "learning_rate": 9.44319036869827e-06, |
| "loss": 0.292, |
| "step": 2510 |
| }, |
| { |
| "epoch": 1.8961625282167043, |
| "grad_norm": 9.032533645629883, |
| "learning_rate": 9.480812641083522e-06, |
| "loss": 0.3392, |
| "step": 2520 |
| }, |
| { |
| "epoch": 1.9036869826937548, |
| "grad_norm": 8.278692245483398, |
| "learning_rate": 9.518434913468775e-06, |
| "loss": 0.2788, |
| "step": 2530 |
| }, |
| { |
| "epoch": 1.911211437170805, |
| "grad_norm": 8.68079948425293, |
| "learning_rate": 9.556057185854027e-06, |
| "loss": 0.3204, |
| "step": 2540 |
| }, |
| { |
| "epoch": 1.9187358916478554, |
| "grad_norm": 6.456122875213623, |
| "learning_rate": 9.593679458239278e-06, |
| "loss": 0.2492, |
| "step": 2550 |
| }, |
| { |
| "epoch": 1.926260346124906, |
| "grad_norm": 9.514248847961426, |
| "learning_rate": 9.63130173062453e-06, |
| "loss": 0.3392, |
| "step": 2560 |
| }, |
| { |
| "epoch": 1.9337848006019565, |
| "grad_norm": 11.629880905151367, |
| "learning_rate": 9.668924003009783e-06, |
| "loss": 0.3183, |
| "step": 2570 |
| }, |
| { |
| "epoch": 1.9413092550790068, |
| "grad_norm": 9.674823760986328, |
| "learning_rate": 9.706546275395035e-06, |
| "loss": 0.3305, |
| "step": 2580 |
| }, |
| { |
| "epoch": 1.948833709556057, |
| "grad_norm": 12.23519515991211, |
| "learning_rate": 9.744168547780288e-06, |
| "loss": 0.3487, |
| "step": 2590 |
| }, |
| { |
| "epoch": 1.9563581640331076, |
| "grad_norm": 10.810482025146484, |
| "learning_rate": 9.78179082016554e-06, |
| "loss": 0.3323, |
| "step": 2600 |
| }, |
| { |
| "epoch": 1.963882618510158, |
| "grad_norm": 11.973986625671387, |
| "learning_rate": 9.81941309255079e-06, |
| "loss": 0.2864, |
| "step": 2610 |
| }, |
| { |
| "epoch": 1.9714070729872084, |
| "grad_norm": 13.691604614257812, |
| "learning_rate": 9.857035364936042e-06, |
| "loss": 0.3814, |
| "step": 2620 |
| }, |
| { |
| "epoch": 1.9789315274642587, |
| "grad_norm": 8.417920112609863, |
| "learning_rate": 9.894657637321296e-06, |
| "loss": 0.249, |
| "step": 2630 |
| }, |
| { |
| "epoch": 1.9864559819413092, |
| "grad_norm": 7.412376880645752, |
| "learning_rate": 9.932279909706547e-06, |
| "loss": 0.3331, |
| "step": 2640 |
| }, |
| { |
| "epoch": 1.9939804364183598, |
| "grad_norm": 10.459517478942871, |
| "learning_rate": 9.9699021820918e-06, |
| "loss": 0.3042, |
| "step": 2650 |
| }, |
| { |
| "epoch": 2.00150489089541, |
| "grad_norm": 13.24636459350586, |
| "learning_rate": 9.999163949502551e-06, |
| "loss": 0.3484, |
| "step": 2660 |
| }, |
| { |
| "epoch": 2.0090293453724604, |
| "grad_norm": 9.26960277557373, |
| "learning_rate": 9.9949836970153e-06, |
| "loss": 0.3132, |
| "step": 2670 |
| }, |
| { |
| "epoch": 2.016553799849511, |
| "grad_norm": 7.454202175140381, |
| "learning_rate": 9.99080344452805e-06, |
| "loss": 0.3403, |
| "step": 2680 |
| }, |
| { |
| "epoch": 2.0240782543265614, |
| "grad_norm": 6.4221391677856445, |
| "learning_rate": 9.9866231920408e-06, |
| "loss": 0.2614, |
| "step": 2690 |
| }, |
| { |
| "epoch": 2.0316027088036117, |
| "grad_norm": 9.066787719726562, |
| "learning_rate": 9.982442939553549e-06, |
| "loss": 0.3235, |
| "step": 2700 |
| }, |
| { |
| "epoch": 2.039127163280662, |
| "grad_norm": 12.979838371276855, |
| "learning_rate": 9.9782626870663e-06, |
| "loss": 0.2818, |
| "step": 2710 |
| }, |
| { |
| "epoch": 2.0466516177577128, |
| "grad_norm": 8.948103904724121, |
| "learning_rate": 9.97408243457905e-06, |
| "loss": 0.2811, |
| "step": 2720 |
| }, |
| { |
| "epoch": 2.054176072234763, |
| "grad_norm": 4.697547912597656, |
| "learning_rate": 9.9699021820918e-06, |
| "loss": 0.2272, |
| "step": 2730 |
| }, |
| { |
| "epoch": 2.0617005267118134, |
| "grad_norm": 10.13204288482666, |
| "learning_rate": 9.965721929604549e-06, |
| "loss": 0.3073, |
| "step": 2740 |
| }, |
| { |
| "epoch": 2.0692249811888637, |
| "grad_norm": 10.628745079040527, |
| "learning_rate": 9.9615416771173e-06, |
| "loss": 0.2673, |
| "step": 2750 |
| }, |
| { |
| "epoch": 2.0767494356659144, |
| "grad_norm": 9.946393966674805, |
| "learning_rate": 9.957361424630048e-06, |
| "loss": 0.3093, |
| "step": 2760 |
| }, |
| { |
| "epoch": 2.0842738901429647, |
| "grad_norm": 9.905533790588379, |
| "learning_rate": 9.953181172142798e-06, |
| "loss": 0.2494, |
| "step": 2770 |
| }, |
| { |
| "epoch": 2.091798344620015, |
| "grad_norm": 12.323580741882324, |
| "learning_rate": 9.949000919655549e-06, |
| "loss": 0.2977, |
| "step": 2780 |
| }, |
| { |
| "epoch": 2.0993227990970653, |
| "grad_norm": 12.842970848083496, |
| "learning_rate": 9.944820667168298e-06, |
| "loss": 0.2829, |
| "step": 2790 |
| }, |
| { |
| "epoch": 2.106847253574116, |
| "grad_norm": 8.416594505310059, |
| "learning_rate": 9.940640414681048e-06, |
| "loss": 0.2974, |
| "step": 2800 |
| }, |
| { |
| "epoch": 2.1143717080511664, |
| "grad_norm": 8.839917182922363, |
| "learning_rate": 9.936460162193797e-06, |
| "loss": 0.2905, |
| "step": 2810 |
| }, |
| { |
| "epoch": 2.1218961625282167, |
| "grad_norm": 9.319620132446289, |
| "learning_rate": 9.932279909706547e-06, |
| "loss": 0.2812, |
| "step": 2820 |
| }, |
| { |
| "epoch": 2.129420617005267, |
| "grad_norm": 6.369785785675049, |
| "learning_rate": 9.928099657219297e-06, |
| "loss": 0.2398, |
| "step": 2830 |
| }, |
| { |
| "epoch": 2.1369450714823177, |
| "grad_norm": 9.347760200500488, |
| "learning_rate": 9.923919404732046e-06, |
| "loss": 0.2982, |
| "step": 2840 |
| }, |
| { |
| "epoch": 2.144469525959368, |
| "grad_norm": 10.478535652160645, |
| "learning_rate": 9.919739152244797e-06, |
| "loss": 0.3347, |
| "step": 2850 |
| }, |
| { |
| "epoch": 2.1519939804364183, |
| "grad_norm": 9.874757766723633, |
| "learning_rate": 9.915558899757545e-06, |
| "loss": 0.2499, |
| "step": 2860 |
| }, |
| { |
| "epoch": 2.1595184349134686, |
| "grad_norm": 10.045086860656738, |
| "learning_rate": 9.911378647270296e-06, |
| "loss": 0.2496, |
| "step": 2870 |
| }, |
| { |
| "epoch": 2.1670428893905194, |
| "grad_norm": 10.65018367767334, |
| "learning_rate": 9.907198394783046e-06, |
| "loss": 0.2742, |
| "step": 2880 |
| }, |
| { |
| "epoch": 2.1745673438675697, |
| "grad_norm": 7.430573463439941, |
| "learning_rate": 9.903018142295795e-06, |
| "loss": 0.2774, |
| "step": 2890 |
| }, |
| { |
| "epoch": 2.18209179834462, |
| "grad_norm": 6.933887958526611, |
| "learning_rate": 9.898837889808545e-06, |
| "loss": 0.2552, |
| "step": 2900 |
| }, |
| { |
| "epoch": 2.1896162528216703, |
| "grad_norm": 6.639419078826904, |
| "learning_rate": 9.894657637321296e-06, |
| "loss": 0.3397, |
| "step": 2910 |
| }, |
| { |
| "epoch": 2.197140707298721, |
| "grad_norm": 10.355103492736816, |
| "learning_rate": 9.890477384834046e-06, |
| "loss": 0.3017, |
| "step": 2920 |
| }, |
| { |
| "epoch": 2.2046651617757713, |
| "grad_norm": 10.879512786865234, |
| "learning_rate": 9.886297132346795e-06, |
| "loss": 0.3511, |
| "step": 2930 |
| }, |
| { |
| "epoch": 2.2121896162528216, |
| "grad_norm": 10.200553894042969, |
| "learning_rate": 9.882116879859545e-06, |
| "loss": 0.2442, |
| "step": 2940 |
| }, |
| { |
| "epoch": 2.219714070729872, |
| "grad_norm": 9.567547798156738, |
| "learning_rate": 9.877936627372294e-06, |
| "loss": 0.3113, |
| "step": 2950 |
| }, |
| { |
| "epoch": 2.2272385252069227, |
| "grad_norm": 8.075740814208984, |
| "learning_rate": 9.873756374885044e-06, |
| "loss": 0.2494, |
| "step": 2960 |
| }, |
| { |
| "epoch": 2.234762979683973, |
| "grad_norm": 10.302295684814453, |
| "learning_rate": 9.869576122397795e-06, |
| "loss": 0.2663, |
| "step": 2970 |
| }, |
| { |
| "epoch": 2.2422874341610233, |
| "grad_norm": 8.013357162475586, |
| "learning_rate": 9.865395869910543e-06, |
| "loss": 0.3004, |
| "step": 2980 |
| }, |
| { |
| "epoch": 2.2498118886380736, |
| "grad_norm": 6.954658031463623, |
| "learning_rate": 9.861215617423294e-06, |
| "loss": 0.3297, |
| "step": 2990 |
| }, |
| { |
| "epoch": 2.2573363431151243, |
| "grad_norm": 9.596476554870605, |
| "learning_rate": 9.857035364936042e-06, |
| "loss": 0.3145, |
| "step": 3000 |
| }, |
| { |
| "epoch": 2.2648607975921746, |
| "grad_norm": 7.96026086807251, |
| "learning_rate": 9.852855112448793e-06, |
| "loss": 0.282, |
| "step": 3010 |
| }, |
| { |
| "epoch": 2.272385252069225, |
| "grad_norm": 12.317435264587402, |
| "learning_rate": 9.848674859961541e-06, |
| "loss": 0.3573, |
| "step": 3020 |
| }, |
| { |
| "epoch": 2.2799097065462752, |
| "grad_norm": 6.821436405181885, |
| "learning_rate": 9.844494607474292e-06, |
| "loss": 0.2626, |
| "step": 3030 |
| }, |
| { |
| "epoch": 2.287434161023326, |
| "grad_norm": 12.15832233428955, |
| "learning_rate": 9.840314354987042e-06, |
| "loss": 0.2442, |
| "step": 3040 |
| }, |
| { |
| "epoch": 2.2949586155003763, |
| "grad_norm": 9.358490943908691, |
| "learning_rate": 9.836134102499791e-06, |
| "loss": 0.2295, |
| "step": 3050 |
| }, |
| { |
| "epoch": 2.3024830699774266, |
| "grad_norm": 6.147063255310059, |
| "learning_rate": 9.831953850012541e-06, |
| "loss": 0.2546, |
| "step": 3060 |
| }, |
| { |
| "epoch": 2.310007524454477, |
| "grad_norm": 9.920206069946289, |
| "learning_rate": 9.82777359752529e-06, |
| "loss": 0.3145, |
| "step": 3070 |
| }, |
| { |
| "epoch": 2.3175319789315276, |
| "grad_norm": 7.524102210998535, |
| "learning_rate": 9.823593345038042e-06, |
| "loss": 0.2276, |
| "step": 3080 |
| }, |
| { |
| "epoch": 2.325056433408578, |
| "grad_norm": 10.635570526123047, |
| "learning_rate": 9.81941309255079e-06, |
| "loss": 0.2949, |
| "step": 3090 |
| }, |
| { |
| "epoch": 2.3325808878856282, |
| "grad_norm": 11.139577865600586, |
| "learning_rate": 9.815232840063541e-06, |
| "loss": 0.2903, |
| "step": 3100 |
| }, |
| { |
| "epoch": 2.3401053423626785, |
| "grad_norm": 9.326565742492676, |
| "learning_rate": 9.81105258757629e-06, |
| "loss": 0.2521, |
| "step": 3110 |
| }, |
| { |
| "epoch": 2.3476297968397293, |
| "grad_norm": 11.433185577392578, |
| "learning_rate": 9.80687233508904e-06, |
| "loss": 0.3183, |
| "step": 3120 |
| }, |
| { |
| "epoch": 2.3551542513167796, |
| "grad_norm": 11.322211265563965, |
| "learning_rate": 9.80269208260179e-06, |
| "loss": 0.343, |
| "step": 3130 |
| }, |
| { |
| "epoch": 2.36267870579383, |
| "grad_norm": 11.234041213989258, |
| "learning_rate": 9.79851183011454e-06, |
| "loss": 0.3125, |
| "step": 3140 |
| }, |
| { |
| "epoch": 2.37020316027088, |
| "grad_norm": 10.041305541992188, |
| "learning_rate": 9.79433157762729e-06, |
| "loss": 0.3574, |
| "step": 3150 |
| }, |
| { |
| "epoch": 2.377727614747931, |
| "grad_norm": 9.322949409484863, |
| "learning_rate": 9.790151325140038e-06, |
| "loss": 0.2861, |
| "step": 3160 |
| }, |
| { |
| "epoch": 2.3852520692249812, |
| "grad_norm": 6.304590225219727, |
| "learning_rate": 9.785971072652789e-06, |
| "loss": 0.3074, |
| "step": 3170 |
| }, |
| { |
| "epoch": 2.3927765237020315, |
| "grad_norm": 9.552847862243652, |
| "learning_rate": 9.78179082016554e-06, |
| "loss": 0.308, |
| "step": 3180 |
| }, |
| { |
| "epoch": 2.400300978179082, |
| "grad_norm": 10.4456148147583, |
| "learning_rate": 9.777610567678288e-06, |
| "loss": 0.3444, |
| "step": 3190 |
| }, |
| { |
| "epoch": 2.4078254326561326, |
| "grad_norm": 9.106879234313965, |
| "learning_rate": 9.773430315191038e-06, |
| "loss": 0.2704, |
| "step": 3200 |
| }, |
| { |
| "epoch": 2.415349887133183, |
| "grad_norm": 9.76545524597168, |
| "learning_rate": 9.769250062703787e-06, |
| "loss": 0.2425, |
| "step": 3210 |
| }, |
| { |
| "epoch": 2.422874341610233, |
| "grad_norm": 9.640020370483398, |
| "learning_rate": 9.765069810216537e-06, |
| "loss": 0.3644, |
| "step": 3220 |
| }, |
| { |
| "epoch": 2.4303987960872835, |
| "grad_norm": 9.917800903320312, |
| "learning_rate": 9.760889557729288e-06, |
| "loss": 0.2608, |
| "step": 3230 |
| }, |
| { |
| "epoch": 2.4379232505643342, |
| "grad_norm": 8.935245513916016, |
| "learning_rate": 9.756709305242037e-06, |
| "loss": 0.3022, |
| "step": 3240 |
| }, |
| { |
| "epoch": 2.4454477050413845, |
| "grad_norm": 13.130419731140137, |
| "learning_rate": 9.752529052754787e-06, |
| "loss": 0.3011, |
| "step": 3250 |
| }, |
| { |
| "epoch": 2.452972159518435, |
| "grad_norm": 7.8220906257629395, |
| "learning_rate": 9.748348800267537e-06, |
| "loss": 0.3396, |
| "step": 3260 |
| }, |
| { |
| "epoch": 2.460496613995485, |
| "grad_norm": 6.676682472229004, |
| "learning_rate": 9.744168547780288e-06, |
| "loss": 0.2493, |
| "step": 3270 |
| }, |
| { |
| "epoch": 2.468021068472536, |
| "grad_norm": 10.557263374328613, |
| "learning_rate": 9.739988295293036e-06, |
| "loss": 0.2786, |
| "step": 3280 |
| }, |
| { |
| "epoch": 2.475545522949586, |
| "grad_norm": 11.085116386413574, |
| "learning_rate": 9.735808042805787e-06, |
| "loss": 0.2823, |
| "step": 3290 |
| }, |
| { |
| "epoch": 2.4830699774266365, |
| "grad_norm": 12.245048522949219, |
| "learning_rate": 9.731627790318536e-06, |
| "loss": 0.2572, |
| "step": 3300 |
| }, |
| { |
| "epoch": 2.490594431903687, |
| "grad_norm": 10.905339241027832, |
| "learning_rate": 9.727447537831286e-06, |
| "loss": 0.2343, |
| "step": 3310 |
| }, |
| { |
| "epoch": 2.4981188863807375, |
| "grad_norm": 10.4273681640625, |
| "learning_rate": 9.723267285344036e-06, |
| "loss": 0.2734, |
| "step": 3320 |
| }, |
| { |
| "epoch": 2.505643340857788, |
| "grad_norm": 9.312917709350586, |
| "learning_rate": 9.719087032856785e-06, |
| "loss": 0.2631, |
| "step": 3330 |
| }, |
| { |
| "epoch": 2.513167795334838, |
| "grad_norm": 4.157855033874512, |
| "learning_rate": 9.714906780369535e-06, |
| "loss": 0.2968, |
| "step": 3340 |
| }, |
| { |
| "epoch": 2.520692249811889, |
| "grad_norm": 10.894584655761719, |
| "learning_rate": 9.710726527882284e-06, |
| "loss": 0.3412, |
| "step": 3350 |
| }, |
| { |
| "epoch": 2.528216704288939, |
| "grad_norm": 10.197948455810547, |
| "learning_rate": 9.706546275395035e-06, |
| "loss": 0.301, |
| "step": 3360 |
| }, |
| { |
| "epoch": 2.5357411587659895, |
| "grad_norm": 10.176607131958008, |
| "learning_rate": 9.702366022907785e-06, |
| "loss": 0.289, |
| "step": 3370 |
| }, |
| { |
| "epoch": 2.54326561324304, |
| "grad_norm": 10.578222274780273, |
| "learning_rate": 9.698185770420534e-06, |
| "loss": 0.245, |
| "step": 3380 |
| }, |
| { |
| "epoch": 2.55079006772009, |
| "grad_norm": 7.295693874359131, |
| "learning_rate": 9.694005517933284e-06, |
| "loss": 0.2981, |
| "step": 3390 |
| }, |
| { |
| "epoch": 2.558314522197141, |
| "grad_norm": 8.082398414611816, |
| "learning_rate": 9.689825265446033e-06, |
| "loss": 0.2726, |
| "step": 3400 |
| }, |
| { |
| "epoch": 2.565838976674191, |
| "grad_norm": 7.935963153839111, |
| "learning_rate": 9.685645012958783e-06, |
| "loss": 0.2706, |
| "step": 3410 |
| }, |
| { |
| "epoch": 2.5733634311512414, |
| "grad_norm": 10.23719596862793, |
| "learning_rate": 9.681464760471534e-06, |
| "loss": 0.2492, |
| "step": 3420 |
| }, |
| { |
| "epoch": 2.580887885628292, |
| "grad_norm": 8.610269546508789, |
| "learning_rate": 9.677284507984282e-06, |
| "loss": 0.2644, |
| "step": 3430 |
| }, |
| { |
| "epoch": 2.5884123401053425, |
| "grad_norm": 8.948534965515137, |
| "learning_rate": 9.673104255497033e-06, |
| "loss": 0.2806, |
| "step": 3440 |
| }, |
| { |
| "epoch": 2.595936794582393, |
| "grad_norm": 10.290976524353027, |
| "learning_rate": 9.668924003009783e-06, |
| "loss": 0.2455, |
| "step": 3450 |
| }, |
| { |
| "epoch": 2.603461249059443, |
| "grad_norm": 6.816092014312744, |
| "learning_rate": 9.664743750522533e-06, |
| "loss": 0.2674, |
| "step": 3460 |
| }, |
| { |
| "epoch": 2.6109857035364934, |
| "grad_norm": 6.8829264640808105, |
| "learning_rate": 9.660563498035282e-06, |
| "loss": 0.2256, |
| "step": 3470 |
| }, |
| { |
| "epoch": 2.618510158013544, |
| "grad_norm": 10.476147651672363, |
| "learning_rate": 9.656383245548033e-06, |
| "loss": 0.239, |
| "step": 3480 |
| }, |
| { |
| "epoch": 2.6260346124905944, |
| "grad_norm": 10.579778671264648, |
| "learning_rate": 9.652202993060781e-06, |
| "loss": 0.3197, |
| "step": 3490 |
| }, |
| { |
| "epoch": 2.6335590669676447, |
| "grad_norm": 7.390209197998047, |
| "learning_rate": 9.648022740573532e-06, |
| "loss": 0.2487, |
| "step": 3500 |
| }, |
| { |
| "epoch": 2.6410835214446955, |
| "grad_norm": 7.1727447509765625, |
| "learning_rate": 9.643842488086282e-06, |
| "loss": 0.276, |
| "step": 3510 |
| }, |
| { |
| "epoch": 2.648607975921746, |
| "grad_norm": 10.80611515045166, |
| "learning_rate": 9.63966223559903e-06, |
| "loss": 0.2532, |
| "step": 3520 |
| }, |
| { |
| "epoch": 2.656132430398796, |
| "grad_norm": 5.822349548339844, |
| "learning_rate": 9.635481983111781e-06, |
| "loss": 0.2249, |
| "step": 3530 |
| }, |
| { |
| "epoch": 2.6636568848758464, |
| "grad_norm": 8.362287521362305, |
| "learning_rate": 9.63130173062453e-06, |
| "loss": 0.2445, |
| "step": 3540 |
| }, |
| { |
| "epoch": 2.6711813393528967, |
| "grad_norm": 10.724336624145508, |
| "learning_rate": 9.62712147813728e-06, |
| "loss": 0.3065, |
| "step": 3550 |
| }, |
| { |
| "epoch": 2.6787057938299474, |
| "grad_norm": 7.8322601318359375, |
| "learning_rate": 9.62294122565003e-06, |
| "loss": 0.2532, |
| "step": 3560 |
| }, |
| { |
| "epoch": 2.6862302483069977, |
| "grad_norm": 8.452668190002441, |
| "learning_rate": 9.61876097316278e-06, |
| "loss": 0.2857, |
| "step": 3570 |
| }, |
| { |
| "epoch": 2.693754702784048, |
| "grad_norm": 7.749220848083496, |
| "learning_rate": 9.61458072067553e-06, |
| "loss": 0.2676, |
| "step": 3580 |
| }, |
| { |
| "epoch": 2.701279157261099, |
| "grad_norm": 5.934022426605225, |
| "learning_rate": 9.610400468188278e-06, |
| "loss": 0.2524, |
| "step": 3590 |
| }, |
| { |
| "epoch": 2.708803611738149, |
| "grad_norm": 8.625337600708008, |
| "learning_rate": 9.606220215701029e-06, |
| "loss": 0.3011, |
| "step": 3600 |
| }, |
| { |
| "epoch": 2.7163280662151994, |
| "grad_norm": 10.652859687805176, |
| "learning_rate": 9.60203996321378e-06, |
| "loss": 0.2581, |
| "step": 3610 |
| }, |
| { |
| "epoch": 2.7238525206922497, |
| "grad_norm": 6.213090896606445, |
| "learning_rate": 9.59785971072653e-06, |
| "loss": 0.2808, |
| "step": 3620 |
| }, |
| { |
| "epoch": 2.7313769751693, |
| "grad_norm": 10.768575668334961, |
| "learning_rate": 9.593679458239278e-06, |
| "loss": 0.3132, |
| "step": 3630 |
| }, |
| { |
| "epoch": 2.7389014296463507, |
| "grad_norm": 9.935160636901855, |
| "learning_rate": 9.589499205752029e-06, |
| "loss": 0.2761, |
| "step": 3640 |
| }, |
| { |
| "epoch": 2.746425884123401, |
| "grad_norm": 10.432900428771973, |
| "learning_rate": 9.585318953264779e-06, |
| "loss": 0.2838, |
| "step": 3650 |
| }, |
| { |
| "epoch": 2.7539503386004514, |
| "grad_norm": 7.264293670654297, |
| "learning_rate": 9.581138700777528e-06, |
| "loss": 0.267, |
| "step": 3660 |
| }, |
| { |
| "epoch": 2.761474793077502, |
| "grad_norm": 10.48466968536377, |
| "learning_rate": 9.576958448290278e-06, |
| "loss": 0.2085, |
| "step": 3670 |
| }, |
| { |
| "epoch": 2.7689992475545524, |
| "grad_norm": 10.649408340454102, |
| "learning_rate": 9.572778195803027e-06, |
| "loss": 0.3237, |
| "step": 3680 |
| }, |
| { |
| "epoch": 2.7765237020316027, |
| "grad_norm": 7.883304119110107, |
| "learning_rate": 9.568597943315777e-06, |
| "loss": 0.2566, |
| "step": 3690 |
| }, |
| { |
| "epoch": 2.784048156508653, |
| "grad_norm": 8.996721267700195, |
| "learning_rate": 9.564417690828528e-06, |
| "loss": 0.243, |
| "step": 3700 |
| }, |
| { |
| "epoch": 2.7915726109857033, |
| "grad_norm": 10.833441734313965, |
| "learning_rate": 9.560237438341276e-06, |
| "loss": 0.2329, |
| "step": 3710 |
| }, |
| { |
| "epoch": 2.799097065462754, |
| "grad_norm": 8.895788192749023, |
| "learning_rate": 9.556057185854027e-06, |
| "loss": 0.2802, |
| "step": 3720 |
| }, |
| { |
| "epoch": 2.8066215199398044, |
| "grad_norm": 10.032631874084473, |
| "learning_rate": 9.551876933366775e-06, |
| "loss": 0.2527, |
| "step": 3730 |
| }, |
| { |
| "epoch": 2.8141459744168547, |
| "grad_norm": 11.826578140258789, |
| "learning_rate": 9.547696680879526e-06, |
| "loss": 0.2601, |
| "step": 3740 |
| }, |
| { |
| "epoch": 2.8216704288939054, |
| "grad_norm": 9.224230766296387, |
| "learning_rate": 9.543516428392276e-06, |
| "loss": 0.2083, |
| "step": 3750 |
| }, |
| { |
| "epoch": 2.8291948833709557, |
| "grad_norm": 8.846405029296875, |
| "learning_rate": 9.539336175905025e-06, |
| "loss": 0.2695, |
| "step": 3760 |
| }, |
| { |
| "epoch": 2.836719337848006, |
| "grad_norm": 8.925997734069824, |
| "learning_rate": 9.535155923417775e-06, |
| "loss": 0.2629, |
| "step": 3770 |
| }, |
| { |
| "epoch": 2.8442437923250563, |
| "grad_norm": 7.406662940979004, |
| "learning_rate": 9.530975670930524e-06, |
| "loss": 0.2627, |
| "step": 3780 |
| }, |
| { |
| "epoch": 2.8517682468021066, |
| "grad_norm": 8.157476425170898, |
| "learning_rate": 9.526795418443274e-06, |
| "loss": 0.2843, |
| "step": 3790 |
| }, |
| { |
| "epoch": 2.8592927012791574, |
| "grad_norm": 8.885982513427734, |
| "learning_rate": 9.522615165956023e-06, |
| "loss": 0.2227, |
| "step": 3800 |
| }, |
| { |
| "epoch": 2.8668171557562077, |
| "grad_norm": 10.690934181213379, |
| "learning_rate": 9.518434913468775e-06, |
| "loss": 0.2469, |
| "step": 3810 |
| }, |
| { |
| "epoch": 2.874341610233258, |
| "grad_norm": 7.025122165679932, |
| "learning_rate": 9.514254660981524e-06, |
| "loss": 0.2279, |
| "step": 3820 |
| }, |
| { |
| "epoch": 2.8818660647103087, |
| "grad_norm": 10.257140159606934, |
| "learning_rate": 9.510074408494274e-06, |
| "loss": 0.2779, |
| "step": 3830 |
| }, |
| { |
| "epoch": 2.889390519187359, |
| "grad_norm": 8.374255180358887, |
| "learning_rate": 9.505894156007025e-06, |
| "loss": 0.2543, |
| "step": 3840 |
| }, |
| { |
| "epoch": 2.8969149736644093, |
| "grad_norm": 9.439533233642578, |
| "learning_rate": 9.501713903519773e-06, |
| "loss": 0.2717, |
| "step": 3850 |
| }, |
| { |
| "epoch": 2.9044394281414596, |
| "grad_norm": 6.661174774169922, |
| "learning_rate": 9.497533651032524e-06, |
| "loss": 0.2418, |
| "step": 3860 |
| }, |
| { |
| "epoch": 2.91196388261851, |
| "grad_norm": 9.886220932006836, |
| "learning_rate": 9.493353398545273e-06, |
| "loss": 0.2159, |
| "step": 3870 |
| }, |
| { |
| "epoch": 2.9194883370955607, |
| "grad_norm": 6.797257423400879, |
| "learning_rate": 9.489173146058023e-06, |
| "loss": 0.1922, |
| "step": 3880 |
| }, |
| { |
| "epoch": 2.927012791572611, |
| "grad_norm": 7.842899322509766, |
| "learning_rate": 9.484992893570772e-06, |
| "loss": 0.2535, |
| "step": 3890 |
| }, |
| { |
| "epoch": 2.9345372460496613, |
| "grad_norm": 9.077611923217773, |
| "learning_rate": 9.480812641083522e-06, |
| "loss": 0.2766, |
| "step": 3900 |
| }, |
| { |
| "epoch": 2.942061700526712, |
| "grad_norm": 8.896077156066895, |
| "learning_rate": 9.476632388596272e-06, |
| "loss": 0.2413, |
| "step": 3910 |
| }, |
| { |
| "epoch": 2.9495861550037623, |
| "grad_norm": 3.8827147483825684, |
| "learning_rate": 9.472452136109021e-06, |
| "loss": 0.2291, |
| "step": 3920 |
| }, |
| { |
| "epoch": 2.9571106094808126, |
| "grad_norm": 8.659483909606934, |
| "learning_rate": 9.468271883621772e-06, |
| "loss": 0.2654, |
| "step": 3930 |
| }, |
| { |
| "epoch": 2.964635063957863, |
| "grad_norm": 8.626335144042969, |
| "learning_rate": 9.46409163113452e-06, |
| "loss": 0.244, |
| "step": 3940 |
| }, |
| { |
| "epoch": 2.972159518434913, |
| "grad_norm": 11.298845291137695, |
| "learning_rate": 9.45991137864727e-06, |
| "loss": 0.298, |
| "step": 3950 |
| }, |
| { |
| "epoch": 2.979683972911964, |
| "grad_norm": 6.526998519897461, |
| "learning_rate": 9.455731126160021e-06, |
| "loss": 0.1483, |
| "step": 3960 |
| }, |
| { |
| "epoch": 2.9872084273890143, |
| "grad_norm": 5.75825309753418, |
| "learning_rate": 9.45155087367277e-06, |
| "loss": 0.215, |
| "step": 3970 |
| }, |
| { |
| "epoch": 2.9947328818660646, |
| "grad_norm": 6.820727348327637, |
| "learning_rate": 9.44737062118552e-06, |
| "loss": 0.2889, |
| "step": 3980 |
| }, |
| { |
| "epoch": 3.0022573363431153, |
| "grad_norm": 6.299858093261719, |
| "learning_rate": 9.44319036869827e-06, |
| "loss": 0.1838, |
| "step": 3990 |
| }, |
| { |
| "epoch": 3.0097817908201656, |
| "grad_norm": 8.910431861877441, |
| "learning_rate": 9.439010116211021e-06, |
| "loss": 0.265, |
| "step": 4000 |
| }, |
| { |
| "epoch": 3.017306245297216, |
| "grad_norm": 6.495157718658447, |
| "learning_rate": 9.43482986372377e-06, |
| "loss": 0.2014, |
| "step": 4010 |
| }, |
| { |
| "epoch": 3.024830699774266, |
| "grad_norm": 8.76952075958252, |
| "learning_rate": 9.43064961123652e-06, |
| "loss": 0.2443, |
| "step": 4020 |
| }, |
| { |
| "epoch": 3.032355154251317, |
| "grad_norm": 11.289894104003906, |
| "learning_rate": 9.426469358749269e-06, |
| "loss": 0.2421, |
| "step": 4030 |
| }, |
| { |
| "epoch": 3.0398796087283673, |
| "grad_norm": 9.681814193725586, |
| "learning_rate": 9.422289106262019e-06, |
| "loss": 0.2, |
| "step": 4040 |
| }, |
| { |
| "epoch": 3.0474040632054176, |
| "grad_norm": 9.460895538330078, |
| "learning_rate": 9.41810885377477e-06, |
| "loss": 0.22, |
| "step": 4050 |
| }, |
| { |
| "epoch": 3.054928517682468, |
| "grad_norm": 8.664941787719727, |
| "learning_rate": 9.413928601287518e-06, |
| "loss": 0.241, |
| "step": 4060 |
| }, |
| { |
| "epoch": 3.0624529721595186, |
| "grad_norm": 8.543761253356934, |
| "learning_rate": 9.409748348800269e-06, |
| "loss": 0.2509, |
| "step": 4070 |
| }, |
| { |
| "epoch": 3.069977426636569, |
| "grad_norm": 8.258243560791016, |
| "learning_rate": 9.405568096313017e-06, |
| "loss": 0.2447, |
| "step": 4080 |
| }, |
| { |
| "epoch": 3.077501881113619, |
| "grad_norm": 11.222923278808594, |
| "learning_rate": 9.401387843825768e-06, |
| "loss": 0.2414, |
| "step": 4090 |
| }, |
| { |
| "epoch": 3.0850263355906695, |
| "grad_norm": 10.979453086853027, |
| "learning_rate": 9.397207591338518e-06, |
| "loss": 0.2449, |
| "step": 4100 |
| }, |
| { |
| "epoch": 3.0925507900677203, |
| "grad_norm": 7.181826591491699, |
| "learning_rate": 9.393027338851267e-06, |
| "loss": 0.2719, |
| "step": 4110 |
| }, |
| { |
| "epoch": 3.1000752445447706, |
| "grad_norm": 7.384873867034912, |
| "learning_rate": 9.388847086364017e-06, |
| "loss": 0.2834, |
| "step": 4120 |
| }, |
| { |
| "epoch": 3.107599699021821, |
| "grad_norm": 6.647815704345703, |
| "learning_rate": 9.384666833876766e-06, |
| "loss": 0.2227, |
| "step": 4130 |
| }, |
| { |
| "epoch": 3.115124153498871, |
| "grad_norm": 6.282780170440674, |
| "learning_rate": 9.380486581389516e-06, |
| "loss": 0.2574, |
| "step": 4140 |
| }, |
| { |
| "epoch": 3.122648607975922, |
| "grad_norm": 10.085737228393555, |
| "learning_rate": 9.376306328902267e-06, |
| "loss": 0.292, |
| "step": 4150 |
| }, |
| { |
| "epoch": 3.130173062452972, |
| "grad_norm": 10.34396743774414, |
| "learning_rate": 9.372126076415015e-06, |
| "loss": 0.2346, |
| "step": 4160 |
| }, |
| { |
| "epoch": 3.1376975169300225, |
| "grad_norm": 11.896784782409668, |
| "learning_rate": 9.367945823927766e-06, |
| "loss": 0.2423, |
| "step": 4170 |
| }, |
| { |
| "epoch": 3.145221971407073, |
| "grad_norm": 10.978114128112793, |
| "learning_rate": 9.363765571440516e-06, |
| "loss": 0.2671, |
| "step": 4180 |
| }, |
| { |
| "epoch": 3.1527464258841236, |
| "grad_norm": 6.393170356750488, |
| "learning_rate": 9.359585318953267e-06, |
| "loss": 0.1836, |
| "step": 4190 |
| }, |
| { |
| "epoch": 3.160270880361174, |
| "grad_norm": 6.737048149108887, |
| "learning_rate": 9.355405066466015e-06, |
| "loss": 0.1594, |
| "step": 4200 |
| }, |
| { |
| "epoch": 3.167795334838224, |
| "grad_norm": 6.920555591583252, |
| "learning_rate": 9.351224813978766e-06, |
| "loss": 0.2712, |
| "step": 4210 |
| }, |
| { |
| "epoch": 3.1753197893152745, |
| "grad_norm": 6.888144493103027, |
| "learning_rate": 9.347044561491514e-06, |
| "loss": 0.2113, |
| "step": 4220 |
| }, |
| { |
| "epoch": 3.1828442437923252, |
| "grad_norm": 7.4439167976379395, |
| "learning_rate": 9.342864309004265e-06, |
| "loss": 0.2273, |
| "step": 4230 |
| }, |
| { |
| "epoch": 3.1903686982693755, |
| "grad_norm": 5.45890998840332, |
| "learning_rate": 9.338684056517015e-06, |
| "loss": 0.2555, |
| "step": 4240 |
| }, |
| { |
| "epoch": 3.197893152746426, |
| "grad_norm": 7.8858208656311035, |
| "learning_rate": 9.334503804029764e-06, |
| "loss": 0.2308, |
| "step": 4250 |
| }, |
| { |
| "epoch": 3.205417607223476, |
| "grad_norm": 8.43179702758789, |
| "learning_rate": 9.330323551542514e-06, |
| "loss": 0.2704, |
| "step": 4260 |
| }, |
| { |
| "epoch": 3.212942061700527, |
| "grad_norm": 9.604415893554688, |
| "learning_rate": 9.326143299055263e-06, |
| "loss": 0.2345, |
| "step": 4270 |
| }, |
| { |
| "epoch": 3.220466516177577, |
| "grad_norm": 5.360594749450684, |
| "learning_rate": 9.321963046568013e-06, |
| "loss": 0.2418, |
| "step": 4280 |
| }, |
| { |
| "epoch": 3.2279909706546275, |
| "grad_norm": 10.218423843383789, |
| "learning_rate": 9.317782794080764e-06, |
| "loss": 0.2553, |
| "step": 4290 |
| }, |
| { |
| "epoch": 3.235515425131678, |
| "grad_norm": 6.115988254547119, |
| "learning_rate": 9.313602541593512e-06, |
| "loss": 0.2389, |
| "step": 4300 |
| }, |
| { |
| "epoch": 3.2430398796087285, |
| "grad_norm": 8.351349830627441, |
| "learning_rate": 9.309422289106263e-06, |
| "loss": 0.2156, |
| "step": 4310 |
| }, |
| { |
| "epoch": 3.250564334085779, |
| "grad_norm": 8.038823127746582, |
| "learning_rate": 9.305242036619012e-06, |
| "loss": 0.1934, |
| "step": 4320 |
| }, |
| { |
| "epoch": 3.258088788562829, |
| "grad_norm": 7.686810493469238, |
| "learning_rate": 9.301061784131762e-06, |
| "loss": 0.2239, |
| "step": 4330 |
| }, |
| { |
| "epoch": 3.2656132430398794, |
| "grad_norm": 9.159664154052734, |
| "learning_rate": 9.296881531644512e-06, |
| "loss": 0.2275, |
| "step": 4340 |
| }, |
| { |
| "epoch": 3.27313769751693, |
| "grad_norm": 8.89784049987793, |
| "learning_rate": 9.292701279157263e-06, |
| "loss": 0.2308, |
| "step": 4350 |
| }, |
| { |
| "epoch": 3.2806621519939805, |
| "grad_norm": 6.140933990478516, |
| "learning_rate": 9.288521026670011e-06, |
| "loss": 0.2337, |
| "step": 4360 |
| }, |
| { |
| "epoch": 3.288186606471031, |
| "grad_norm": 9.233074188232422, |
| "learning_rate": 9.284340774182762e-06, |
| "loss": 0.263, |
| "step": 4370 |
| }, |
| { |
| "epoch": 3.295711060948081, |
| "grad_norm": 9.521575927734375, |
| "learning_rate": 9.280160521695512e-06, |
| "loss": 0.2321, |
| "step": 4380 |
| }, |
| { |
| "epoch": 3.303235515425132, |
| "grad_norm": 6.050286769866943, |
| "learning_rate": 9.275980269208261e-06, |
| "loss": 0.2889, |
| "step": 4390 |
| }, |
| { |
| "epoch": 3.310759969902182, |
| "grad_norm": 5.523685455322266, |
| "learning_rate": 9.271800016721011e-06, |
| "loss": 0.2682, |
| "step": 4400 |
| }, |
| { |
| "epoch": 3.3182844243792324, |
| "grad_norm": 10.648170471191406, |
| "learning_rate": 9.26761976423376e-06, |
| "loss": 0.238, |
| "step": 4410 |
| }, |
| { |
| "epoch": 3.3258088788562827, |
| "grad_norm": 8.683711051940918, |
| "learning_rate": 9.26343951174651e-06, |
| "loss": 0.1859, |
| "step": 4420 |
| }, |
| { |
| "epoch": 3.3333333333333335, |
| "grad_norm": 9.345295906066895, |
| "learning_rate": 9.25925925925926e-06, |
| "loss": 0.2727, |
| "step": 4430 |
| }, |
| { |
| "epoch": 3.340857787810384, |
| "grad_norm": 7.622416973114014, |
| "learning_rate": 9.25507900677201e-06, |
| "loss": 0.1864, |
| "step": 4440 |
| }, |
| { |
| "epoch": 3.348382242287434, |
| "grad_norm": 7.486326694488525, |
| "learning_rate": 9.25089875428476e-06, |
| "loss": 0.3072, |
| "step": 4450 |
| }, |
| { |
| "epoch": 3.3559066967644844, |
| "grad_norm": 8.330912590026855, |
| "learning_rate": 9.246718501797509e-06, |
| "loss": 0.2326, |
| "step": 4460 |
| }, |
| { |
| "epoch": 3.363431151241535, |
| "grad_norm": 6.218571662902832, |
| "learning_rate": 9.242538249310259e-06, |
| "loss": 0.2677, |
| "step": 4470 |
| }, |
| { |
| "epoch": 3.3709556057185854, |
| "grad_norm": 9.183655738830566, |
| "learning_rate": 9.23835799682301e-06, |
| "loss": 0.1941, |
| "step": 4480 |
| }, |
| { |
| "epoch": 3.3784800601956357, |
| "grad_norm": 5.508805274963379, |
| "learning_rate": 9.234177744335758e-06, |
| "loss": 0.1838, |
| "step": 4490 |
| }, |
| { |
| "epoch": 3.386004514672686, |
| "grad_norm": 7.009641647338867, |
| "learning_rate": 9.229997491848509e-06, |
| "loss": 0.2221, |
| "step": 4500 |
| }, |
| { |
| "epoch": 3.393528969149737, |
| "grad_norm": 8.951751708984375, |
| "learning_rate": 9.225817239361257e-06, |
| "loss": 0.2573, |
| "step": 4510 |
| }, |
| { |
| "epoch": 3.401053423626787, |
| "grad_norm": 11.373517990112305, |
| "learning_rate": 9.221636986874008e-06, |
| "loss": 0.1813, |
| "step": 4520 |
| }, |
| { |
| "epoch": 3.4085778781038374, |
| "grad_norm": 11.15352725982666, |
| "learning_rate": 9.217456734386758e-06, |
| "loss": 0.2251, |
| "step": 4530 |
| }, |
| { |
| "epoch": 3.4161023325808877, |
| "grad_norm": 9.159383773803711, |
| "learning_rate": 9.213276481899508e-06, |
| "loss": 0.2496, |
| "step": 4540 |
| }, |
| { |
| "epoch": 3.4236267870579384, |
| "grad_norm": 11.02437973022461, |
| "learning_rate": 9.209096229412257e-06, |
| "loss": 0.2551, |
| "step": 4550 |
| }, |
| { |
| "epoch": 3.4311512415349887, |
| "grad_norm": 9.18223762512207, |
| "learning_rate": 9.204915976925008e-06, |
| "loss": 0.2264, |
| "step": 4560 |
| }, |
| { |
| "epoch": 3.438675696012039, |
| "grad_norm": 7.3303961753845215, |
| "learning_rate": 9.200735724437758e-06, |
| "loss": 0.2193, |
| "step": 4570 |
| }, |
| { |
| "epoch": 3.44620015048909, |
| "grad_norm": 8.85081672668457, |
| "learning_rate": 9.196555471950507e-06, |
| "loss": 0.2481, |
| "step": 4580 |
| }, |
| { |
| "epoch": 3.45372460496614, |
| "grad_norm": 6.6570539474487305, |
| "learning_rate": 9.192375219463257e-06, |
| "loss": 0.2238, |
| "step": 4590 |
| }, |
| { |
| "epoch": 3.4612490594431904, |
| "grad_norm": 7.035717964172363, |
| "learning_rate": 9.188194966976006e-06, |
| "loss": 0.2144, |
| "step": 4600 |
| }, |
| { |
| "epoch": 3.4687735139202407, |
| "grad_norm": 6.024372100830078, |
| "learning_rate": 9.184014714488756e-06, |
| "loss": 0.209, |
| "step": 4610 |
| }, |
| { |
| "epoch": 3.476297968397291, |
| "grad_norm": 7.535146713256836, |
| "learning_rate": 9.179834462001505e-06, |
| "loss": 0.2353, |
| "step": 4620 |
| }, |
| { |
| "epoch": 3.4838224228743417, |
| "grad_norm": 6.437473773956299, |
| "learning_rate": 9.175654209514255e-06, |
| "loss": 0.2614, |
| "step": 4630 |
| }, |
| { |
| "epoch": 3.491346877351392, |
| "grad_norm": 8.510677337646484, |
| "learning_rate": 9.171473957027006e-06, |
| "loss": 0.259, |
| "step": 4640 |
| }, |
| { |
| "epoch": 3.4988713318284423, |
| "grad_norm": 6.0311360359191895, |
| "learning_rate": 9.167293704539754e-06, |
| "loss": 0.2372, |
| "step": 4650 |
| }, |
| { |
| "epoch": 3.506395786305493, |
| "grad_norm": 10.722698211669922, |
| "learning_rate": 9.163113452052505e-06, |
| "loss": 0.2969, |
| "step": 4660 |
| }, |
| { |
| "epoch": 3.5139202407825434, |
| "grad_norm": 9.502874374389648, |
| "learning_rate": 9.158933199565253e-06, |
| "loss": 0.2657, |
| "step": 4670 |
| }, |
| { |
| "epoch": 3.5214446952595937, |
| "grad_norm": 9.616406440734863, |
| "learning_rate": 9.154752947078004e-06, |
| "loss": 0.2262, |
| "step": 4680 |
| }, |
| { |
| "epoch": 3.528969149736644, |
| "grad_norm": 8.309393882751465, |
| "learning_rate": 9.150572694590754e-06, |
| "loss": 0.1627, |
| "step": 4690 |
| }, |
| { |
| "epoch": 3.5364936042136943, |
| "grad_norm": 9.303479194641113, |
| "learning_rate": 9.146392442103503e-06, |
| "loss": 0.1774, |
| "step": 4700 |
| }, |
| { |
| "epoch": 3.544018058690745, |
| "grad_norm": 8.03176212310791, |
| "learning_rate": 9.142212189616253e-06, |
| "loss": 0.248, |
| "step": 4710 |
| }, |
| { |
| "epoch": 3.5515425131677953, |
| "grad_norm": 7.2166242599487305, |
| "learning_rate": 9.138031937129004e-06, |
| "loss": 0.2682, |
| "step": 4720 |
| }, |
| { |
| "epoch": 3.5590669676448456, |
| "grad_norm": 7.98032283782959, |
| "learning_rate": 9.133851684641754e-06, |
| "loss": 0.2244, |
| "step": 4730 |
| }, |
| { |
| "epoch": 3.5665914221218964, |
| "grad_norm": 9.044601440429688, |
| "learning_rate": 9.129671432154503e-06, |
| "loss": 0.2135, |
| "step": 4740 |
| }, |
| { |
| "epoch": 3.5741158765989467, |
| "grad_norm": 10.123546600341797, |
| "learning_rate": 9.125491179667253e-06, |
| "loss": 0.2442, |
| "step": 4750 |
| }, |
| { |
| "epoch": 3.581640331075997, |
| "grad_norm": 6.806577682495117, |
| "learning_rate": 9.121310927180002e-06, |
| "loss": 0.2396, |
| "step": 4760 |
| }, |
| { |
| "epoch": 3.5891647855530473, |
| "grad_norm": 7.733734130859375, |
| "learning_rate": 9.117130674692752e-06, |
| "loss": 0.231, |
| "step": 4770 |
| }, |
| { |
| "epoch": 3.5966892400300976, |
| "grad_norm": 9.914606094360352, |
| "learning_rate": 9.112950422205503e-06, |
| "loss": 0.2523, |
| "step": 4780 |
| }, |
| { |
| "epoch": 3.6042136945071483, |
| "grad_norm": 11.457139015197754, |
| "learning_rate": 9.108770169718251e-06, |
| "loss": 0.2646, |
| "step": 4790 |
| }, |
| { |
| "epoch": 3.6117381489841986, |
| "grad_norm": 8.741832733154297, |
| "learning_rate": 9.104589917231002e-06, |
| "loss": 0.2069, |
| "step": 4800 |
| }, |
| { |
| "epoch": 3.619262603461249, |
| "grad_norm": 6.032299995422363, |
| "learning_rate": 9.10040966474375e-06, |
| "loss": 0.2159, |
| "step": 4810 |
| }, |
| { |
| "epoch": 3.6267870579382997, |
| "grad_norm": 12.495452880859375, |
| "learning_rate": 9.096229412256501e-06, |
| "loss": 0.2467, |
| "step": 4820 |
| }, |
| { |
| "epoch": 3.63431151241535, |
| "grad_norm": 8.642016410827637, |
| "learning_rate": 9.092049159769251e-06, |
| "loss": 0.2526, |
| "step": 4830 |
| }, |
| { |
| "epoch": 3.6418359668924003, |
| "grad_norm": 9.123907089233398, |
| "learning_rate": 9.087868907282e-06, |
| "loss": 0.2534, |
| "step": 4840 |
| }, |
| { |
| "epoch": 3.6493604213694506, |
| "grad_norm": 9.16486644744873, |
| "learning_rate": 9.08368865479475e-06, |
| "loss": 0.2276, |
| "step": 4850 |
| }, |
| { |
| "epoch": 3.656884875846501, |
| "grad_norm": 8.667745590209961, |
| "learning_rate": 9.079508402307499e-06, |
| "loss": 0.2348, |
| "step": 4860 |
| }, |
| { |
| "epoch": 3.6644093303235517, |
| "grad_norm": 6.899482727050781, |
| "learning_rate": 9.07532814982025e-06, |
| "loss": 0.2081, |
| "step": 4870 |
| }, |
| { |
| "epoch": 3.671933784800602, |
| "grad_norm": 9.625839233398438, |
| "learning_rate": 9.071147897333e-06, |
| "loss": 0.2378, |
| "step": 4880 |
| }, |
| { |
| "epoch": 3.6794582392776523, |
| "grad_norm": 6.170398712158203, |
| "learning_rate": 9.066967644845749e-06, |
| "loss": 0.2897, |
| "step": 4890 |
| }, |
| { |
| "epoch": 3.686982693754703, |
| "grad_norm": 9.555203437805176, |
| "learning_rate": 9.062787392358499e-06, |
| "loss": 0.2446, |
| "step": 4900 |
| }, |
| { |
| "epoch": 3.6945071482317533, |
| "grad_norm": 6.0709052085876465, |
| "learning_rate": 9.05860713987125e-06, |
| "loss": 0.2073, |
| "step": 4910 |
| }, |
| { |
| "epoch": 3.7020316027088036, |
| "grad_norm": 8.169535636901855, |
| "learning_rate": 9.054426887384e-06, |
| "loss": 0.3046, |
| "step": 4920 |
| }, |
| { |
| "epoch": 3.709556057185854, |
| "grad_norm": 9.92249870300293, |
| "learning_rate": 9.050246634896748e-06, |
| "loss": 0.2819, |
| "step": 4930 |
| }, |
| { |
| "epoch": 3.717080511662904, |
| "grad_norm": 8.319649696350098, |
| "learning_rate": 9.046066382409499e-06, |
| "loss": 0.184, |
| "step": 4940 |
| }, |
| { |
| "epoch": 3.724604966139955, |
| "grad_norm": 9.96754264831543, |
| "learning_rate": 9.041886129922248e-06, |
| "loss": 0.2558, |
| "step": 4950 |
| }, |
| { |
| "epoch": 3.7321294206170053, |
| "grad_norm": 9.089340209960938, |
| "learning_rate": 9.037705877434998e-06, |
| "loss": 0.1661, |
| "step": 4960 |
| }, |
| { |
| "epoch": 3.7396538750940556, |
| "grad_norm": 8.855995178222656, |
| "learning_rate": 9.033525624947748e-06, |
| "loss": 0.1761, |
| "step": 4970 |
| }, |
| { |
| "epoch": 3.7471783295711063, |
| "grad_norm": 8.411288261413574, |
| "learning_rate": 9.029345372460497e-06, |
| "loss": 0.2158, |
| "step": 4980 |
| }, |
| { |
| "epoch": 3.7547027840481566, |
| "grad_norm": 6.084978103637695, |
| "learning_rate": 9.025165119973247e-06, |
| "loss": 0.1652, |
| "step": 4990 |
| }, |
| { |
| "epoch": 3.762227238525207, |
| "grad_norm": 5.3416428565979, |
| "learning_rate": 9.020984867485996e-06, |
| "loss": 0.1894, |
| "step": 5000 |
| }, |
| { |
| "epoch": 3.769751693002257, |
| "grad_norm": 6.2633137702941895, |
| "learning_rate": 9.016804614998747e-06, |
| "loss": 0.2604, |
| "step": 5010 |
| }, |
| { |
| "epoch": 3.7772761474793075, |
| "grad_norm": 11.048465728759766, |
| "learning_rate": 9.012624362511497e-06, |
| "loss": 0.2726, |
| "step": 5020 |
| }, |
| { |
| "epoch": 3.7848006019563583, |
| "grad_norm": 8.949687004089355, |
| "learning_rate": 9.008444110024246e-06, |
| "loss": 0.2216, |
| "step": 5030 |
| }, |
| { |
| "epoch": 3.7923250564334086, |
| "grad_norm": 8.664268493652344, |
| "learning_rate": 9.004263857536996e-06, |
| "loss": 0.2437, |
| "step": 5040 |
| }, |
| { |
| "epoch": 3.799849510910459, |
| "grad_norm": 10.612902641296387, |
| "learning_rate": 9.000083605049745e-06, |
| "loss": 0.2243, |
| "step": 5050 |
| }, |
| { |
| "epoch": 3.8073739653875096, |
| "grad_norm": 6.690532684326172, |
| "learning_rate": 8.995903352562495e-06, |
| "loss": 0.1877, |
| "step": 5060 |
| }, |
| { |
| "epoch": 3.81489841986456, |
| "grad_norm": 6.29967737197876, |
| "learning_rate": 8.991723100075246e-06, |
| "loss": 0.2449, |
| "step": 5070 |
| }, |
| { |
| "epoch": 3.82242287434161, |
| "grad_norm": 8.491719245910645, |
| "learning_rate": 8.987542847587996e-06, |
| "loss": 0.2135, |
| "step": 5080 |
| }, |
| { |
| "epoch": 3.8299473288186605, |
| "grad_norm": 8.454863548278809, |
| "learning_rate": 8.983362595100745e-06, |
| "loss": 0.2471, |
| "step": 5090 |
| }, |
| { |
| "epoch": 3.837471783295711, |
| "grad_norm": 6.257556438446045, |
| "learning_rate": 8.979182342613495e-06, |
| "loss": 0.2089, |
| "step": 5100 |
| }, |
| { |
| "epoch": 3.8449962377727616, |
| "grad_norm": 8.640929222106934, |
| "learning_rate": 8.975002090126245e-06, |
| "loss": 0.2339, |
| "step": 5110 |
| }, |
| { |
| "epoch": 3.852520692249812, |
| "grad_norm": 7.349085807800293, |
| "learning_rate": 8.970821837638994e-06, |
| "loss": 0.2119, |
| "step": 5120 |
| }, |
| { |
| "epoch": 3.860045146726862, |
| "grad_norm": 10.50108814239502, |
| "learning_rate": 8.966641585151744e-06, |
| "loss": 0.263, |
| "step": 5130 |
| }, |
| { |
| "epoch": 3.867569601203913, |
| "grad_norm": 7.560617923736572, |
| "learning_rate": 8.962461332664493e-06, |
| "loss": 0.2156, |
| "step": 5140 |
| }, |
| { |
| "epoch": 3.875094055680963, |
| "grad_norm": 9.218497276306152, |
| "learning_rate": 8.958281080177244e-06, |
| "loss": 0.1824, |
| "step": 5150 |
| }, |
| { |
| "epoch": 3.8826185101580135, |
| "grad_norm": 7.747318744659424, |
| "learning_rate": 8.954100827689994e-06, |
| "loss": 0.1957, |
| "step": 5160 |
| }, |
| { |
| "epoch": 3.890142964635064, |
| "grad_norm": 5.597620487213135, |
| "learning_rate": 8.949920575202743e-06, |
| "loss": 0.2798, |
| "step": 5170 |
| }, |
| { |
| "epoch": 3.897667419112114, |
| "grad_norm": 6.475823402404785, |
| "learning_rate": 8.945740322715493e-06, |
| "loss": 0.2045, |
| "step": 5180 |
| }, |
| { |
| "epoch": 3.905191873589165, |
| "grad_norm": 8.230575561523438, |
| "learning_rate": 8.941560070228242e-06, |
| "loss": 0.2547, |
| "step": 5190 |
| }, |
| { |
| "epoch": 3.912716328066215, |
| "grad_norm": 7.861040115356445, |
| "learning_rate": 8.937379817740992e-06, |
| "loss": 0.2188, |
| "step": 5200 |
| }, |
| { |
| "epoch": 3.9202407825432655, |
| "grad_norm": 7.680462837219238, |
| "learning_rate": 8.933199565253743e-06, |
| "loss": 0.1942, |
| "step": 5210 |
| }, |
| { |
| "epoch": 3.927765237020316, |
| "grad_norm": 7.623648166656494, |
| "learning_rate": 8.929019312766491e-06, |
| "loss": 0.2264, |
| "step": 5220 |
| }, |
| { |
| "epoch": 3.9352896914973665, |
| "grad_norm": 7.062371253967285, |
| "learning_rate": 8.924839060279242e-06, |
| "loss": 0.2293, |
| "step": 5230 |
| }, |
| { |
| "epoch": 3.942814145974417, |
| "grad_norm": 10.482141494750977, |
| "learning_rate": 8.92065880779199e-06, |
| "loss": 0.2453, |
| "step": 5240 |
| }, |
| { |
| "epoch": 3.950338600451467, |
| "grad_norm": 5.7604522705078125, |
| "learning_rate": 8.91647855530474e-06, |
| "loss": 0.2279, |
| "step": 5250 |
| }, |
| { |
| "epoch": 3.9578630549285174, |
| "grad_norm": 12.028779983520508, |
| "learning_rate": 8.912298302817491e-06, |
| "loss": 0.2511, |
| "step": 5260 |
| }, |
| { |
| "epoch": 3.965387509405568, |
| "grad_norm": 7.708649158477783, |
| "learning_rate": 8.908118050330242e-06, |
| "loss": 0.199, |
| "step": 5270 |
| }, |
| { |
| "epoch": 3.9729119638826185, |
| "grad_norm": 7.3175578117370605, |
| "learning_rate": 8.90393779784299e-06, |
| "loss": 0.2619, |
| "step": 5280 |
| }, |
| { |
| "epoch": 3.9804364183596688, |
| "grad_norm": 8.127973556518555, |
| "learning_rate": 8.89975754535574e-06, |
| "loss": 0.2279, |
| "step": 5290 |
| }, |
| { |
| "epoch": 3.9879608728367195, |
| "grad_norm": 8.595681190490723, |
| "learning_rate": 8.895577292868491e-06, |
| "loss": 0.2238, |
| "step": 5300 |
| }, |
| { |
| "epoch": 3.99548532731377, |
| "grad_norm": 4.470604419708252, |
| "learning_rate": 8.89139704038124e-06, |
| "loss": 0.1792, |
| "step": 5310 |
| }, |
| { |
| "epoch": 4.00300978179082, |
| "grad_norm": 5.3823676109313965, |
| "learning_rate": 8.88721678789399e-06, |
| "loss": 0.178, |
| "step": 5320 |
| }, |
| { |
| "epoch": 4.010534236267871, |
| "grad_norm": 7.360695838928223, |
| "learning_rate": 8.883036535406739e-06, |
| "loss": 0.2077, |
| "step": 5330 |
| }, |
| { |
| "epoch": 4.018058690744921, |
| "grad_norm": 7.8422770500183105, |
| "learning_rate": 8.87885628291949e-06, |
| "loss": 0.158, |
| "step": 5340 |
| }, |
| { |
| "epoch": 4.0255831452219715, |
| "grad_norm": 6.228549480438232, |
| "learning_rate": 8.87467603043224e-06, |
| "loss": 0.2119, |
| "step": 5350 |
| }, |
| { |
| "epoch": 4.033107599699022, |
| "grad_norm": 10.569890022277832, |
| "learning_rate": 8.870495777944988e-06, |
| "loss": 0.1691, |
| "step": 5360 |
| }, |
| { |
| "epoch": 4.040632054176072, |
| "grad_norm": 8.88987922668457, |
| "learning_rate": 8.866315525457739e-06, |
| "loss": 0.1894, |
| "step": 5370 |
| }, |
| { |
| "epoch": 4.048156508653123, |
| "grad_norm": 3.7573678493499756, |
| "learning_rate": 8.862135272970487e-06, |
| "loss": 0.1762, |
| "step": 5380 |
| }, |
| { |
| "epoch": 4.055680963130173, |
| "grad_norm": 10.3701810836792, |
| "learning_rate": 8.857955020483238e-06, |
| "loss": 0.1756, |
| "step": 5390 |
| }, |
| { |
| "epoch": 4.063205417607223, |
| "grad_norm": 12.642711639404297, |
| "learning_rate": 8.853774767995987e-06, |
| "loss": 0.2486, |
| "step": 5400 |
| }, |
| { |
| "epoch": 4.070729872084274, |
| "grad_norm": 7.448667526245117, |
| "learning_rate": 8.849594515508737e-06, |
| "loss": 0.2261, |
| "step": 5410 |
| }, |
| { |
| "epoch": 4.078254326561324, |
| "grad_norm": 4.713575839996338, |
| "learning_rate": 8.845414263021487e-06, |
| "loss": 0.1614, |
| "step": 5420 |
| }, |
| { |
| "epoch": 4.085778781038375, |
| "grad_norm": 8.637237548828125, |
| "learning_rate": 8.841234010534236e-06, |
| "loss": 0.1796, |
| "step": 5430 |
| }, |
| { |
| "epoch": 4.0933032355154255, |
| "grad_norm": 8.004151344299316, |
| "learning_rate": 8.837053758046988e-06, |
| "loss": 0.1982, |
| "step": 5440 |
| }, |
| { |
| "epoch": 4.100827689992475, |
| "grad_norm": 7.788305759429932, |
| "learning_rate": 8.832873505559737e-06, |
| "loss": 0.2108, |
| "step": 5450 |
| }, |
| { |
| "epoch": 4.108352144469526, |
| "grad_norm": 9.111312866210938, |
| "learning_rate": 8.828693253072487e-06, |
| "loss": 0.2049, |
| "step": 5460 |
| }, |
| { |
| "epoch": 4.115876598946576, |
| "grad_norm": 8.527594566345215, |
| "learning_rate": 8.824513000585236e-06, |
| "loss": 0.1699, |
| "step": 5470 |
| }, |
| { |
| "epoch": 4.123401053423627, |
| "grad_norm": 11.286643981933594, |
| "learning_rate": 8.820332748097986e-06, |
| "loss": 0.2459, |
| "step": 5480 |
| }, |
| { |
| "epoch": 4.1309255079006775, |
| "grad_norm": 6.5479888916015625, |
| "learning_rate": 8.816152495610735e-06, |
| "loss": 0.2793, |
| "step": 5490 |
| }, |
| { |
| "epoch": 4.138449962377727, |
| "grad_norm": 9.018651008605957, |
| "learning_rate": 8.811972243123485e-06, |
| "loss": 0.2286, |
| "step": 5500 |
| }, |
| { |
| "epoch": 4.145974416854778, |
| "grad_norm": 7.51389741897583, |
| "learning_rate": 8.807791990636236e-06, |
| "loss": 0.2164, |
| "step": 5510 |
| }, |
| { |
| "epoch": 4.153498871331829, |
| "grad_norm": 9.141986846923828, |
| "learning_rate": 8.803611738148985e-06, |
| "loss": 0.1989, |
| "step": 5520 |
| }, |
| { |
| "epoch": 4.161023325808879, |
| "grad_norm": 11.049951553344727, |
| "learning_rate": 8.799431485661735e-06, |
| "loss": 0.2415, |
| "step": 5530 |
| }, |
| { |
| "epoch": 4.168547780285929, |
| "grad_norm": 8.980281829833984, |
| "learning_rate": 8.795251233174484e-06, |
| "loss": 0.1959, |
| "step": 5540 |
| }, |
| { |
| "epoch": 4.176072234762979, |
| "grad_norm": 9.708146095275879, |
| "learning_rate": 8.791070980687234e-06, |
| "loss": 0.2255, |
| "step": 5550 |
| }, |
| { |
| "epoch": 4.18359668924003, |
| "grad_norm": 7.6197943687438965, |
| "learning_rate": 8.786890728199984e-06, |
| "loss": 0.1875, |
| "step": 5560 |
| }, |
| { |
| "epoch": 4.191121143717081, |
| "grad_norm": 7.607595920562744, |
| "learning_rate": 8.782710475712733e-06, |
| "loss": 0.209, |
| "step": 5570 |
| }, |
| { |
| "epoch": 4.198645598194131, |
| "grad_norm": 5.671550750732422, |
| "learning_rate": 8.778530223225483e-06, |
| "loss": 0.1943, |
| "step": 5580 |
| }, |
| { |
| "epoch": 4.206170052671181, |
| "grad_norm": 7.70829963684082, |
| "learning_rate": 8.774349970738232e-06, |
| "loss": 0.1776, |
| "step": 5590 |
| }, |
| { |
| "epoch": 4.213694507148232, |
| "grad_norm": 8.614123344421387, |
| "learning_rate": 8.770169718250983e-06, |
| "loss": 0.1654, |
| "step": 5600 |
| }, |
| { |
| "epoch": 4.221218961625282, |
| "grad_norm": 8.027443885803223, |
| "learning_rate": 8.765989465763733e-06, |
| "loss": 0.1863, |
| "step": 5610 |
| }, |
| { |
| "epoch": 4.228743416102333, |
| "grad_norm": 7.041518211364746, |
| "learning_rate": 8.761809213276483e-06, |
| "loss": 0.218, |
| "step": 5620 |
| }, |
| { |
| "epoch": 4.236267870579383, |
| "grad_norm": 7.971219062805176, |
| "learning_rate": 8.757628960789232e-06, |
| "loss": 0.1982, |
| "step": 5630 |
| }, |
| { |
| "epoch": 4.243792325056433, |
| "grad_norm": 7.808959007263184, |
| "learning_rate": 8.753448708301982e-06, |
| "loss": 0.2318, |
| "step": 5640 |
| }, |
| { |
| "epoch": 4.251316779533484, |
| "grad_norm": 8.63860034942627, |
| "learning_rate": 8.749268455814733e-06, |
| "loss": 0.2269, |
| "step": 5650 |
| }, |
| { |
| "epoch": 4.258841234010534, |
| "grad_norm": 5.921948432922363, |
| "learning_rate": 8.745088203327482e-06, |
| "loss": 0.1728, |
| "step": 5660 |
| }, |
| { |
| "epoch": 4.266365688487585, |
| "grad_norm": 9.674760818481445, |
| "learning_rate": 8.740907950840232e-06, |
| "loss": 0.2094, |
| "step": 5670 |
| }, |
| { |
| "epoch": 4.273890142964635, |
| "grad_norm": 7.121826648712158, |
| "learning_rate": 8.73672769835298e-06, |
| "loss": 0.2164, |
| "step": 5680 |
| }, |
| { |
| "epoch": 4.281414597441685, |
| "grad_norm": 8.577960968017578, |
| "learning_rate": 8.732547445865731e-06, |
| "loss": 0.2483, |
| "step": 5690 |
| }, |
| { |
| "epoch": 4.288939051918736, |
| "grad_norm": 5.841510772705078, |
| "learning_rate": 8.728367193378481e-06, |
| "loss": 0.2338, |
| "step": 5700 |
| }, |
| { |
| "epoch": 4.296463506395786, |
| "grad_norm": 7.245088577270508, |
| "learning_rate": 8.72418694089123e-06, |
| "loss": 0.1879, |
| "step": 5710 |
| }, |
| { |
| "epoch": 4.303987960872837, |
| "grad_norm": 9.421006202697754, |
| "learning_rate": 8.72000668840398e-06, |
| "loss": 0.2822, |
| "step": 5720 |
| }, |
| { |
| "epoch": 4.311512415349887, |
| "grad_norm": 6.611949920654297, |
| "learning_rate": 8.71582643591673e-06, |
| "loss": 0.1874, |
| "step": 5730 |
| }, |
| { |
| "epoch": 4.319036869826937, |
| "grad_norm": 9.723703384399414, |
| "learning_rate": 8.71164618342948e-06, |
| "loss": 0.2005, |
| "step": 5740 |
| }, |
| { |
| "epoch": 4.326561324303988, |
| "grad_norm": 8.469480514526367, |
| "learning_rate": 8.70746593094223e-06, |
| "loss": 0.22, |
| "step": 5750 |
| }, |
| { |
| "epoch": 4.334085778781039, |
| "grad_norm": 9.64450454711914, |
| "learning_rate": 8.703285678454979e-06, |
| "loss": 0.2411, |
| "step": 5760 |
| }, |
| { |
| "epoch": 4.341610233258089, |
| "grad_norm": 11.526230812072754, |
| "learning_rate": 8.69910542596773e-06, |
| "loss": 0.2353, |
| "step": 5770 |
| }, |
| { |
| "epoch": 4.349134687735139, |
| "grad_norm": 5.962429523468018, |
| "learning_rate": 8.694925173480478e-06, |
| "loss": 0.1535, |
| "step": 5780 |
| }, |
| { |
| "epoch": 4.356659142212189, |
| "grad_norm": 5.385570526123047, |
| "learning_rate": 8.690744920993228e-06, |
| "loss": 0.2185, |
| "step": 5790 |
| }, |
| { |
| "epoch": 4.36418359668924, |
| "grad_norm": 5.992058753967285, |
| "learning_rate": 8.686564668505979e-06, |
| "loss": 0.2033, |
| "step": 5800 |
| }, |
| { |
| "epoch": 4.371708051166291, |
| "grad_norm": 6.653234481811523, |
| "learning_rate": 8.682384416018729e-06, |
| "loss": 0.1728, |
| "step": 5810 |
| }, |
| { |
| "epoch": 4.3792325056433405, |
| "grad_norm": 6.744475841522217, |
| "learning_rate": 8.678204163531478e-06, |
| "loss": 0.1967, |
| "step": 5820 |
| }, |
| { |
| "epoch": 4.386756960120391, |
| "grad_norm": 9.5464448928833, |
| "learning_rate": 8.674023911044228e-06, |
| "loss": 0.2279, |
| "step": 5830 |
| }, |
| { |
| "epoch": 4.394281414597442, |
| "grad_norm": 8.616525650024414, |
| "learning_rate": 8.669843658556979e-06, |
| "loss": 0.2534, |
| "step": 5840 |
| }, |
| { |
| "epoch": 4.401805869074492, |
| "grad_norm": 4.971557140350342, |
| "learning_rate": 8.665663406069727e-06, |
| "loss": 0.1697, |
| "step": 5850 |
| }, |
| { |
| "epoch": 4.409330323551543, |
| "grad_norm": 10.338679313659668, |
| "learning_rate": 8.661483153582478e-06, |
| "loss": 0.1967, |
| "step": 5860 |
| }, |
| { |
| "epoch": 4.416854778028593, |
| "grad_norm": 7.161668300628662, |
| "learning_rate": 8.657302901095226e-06, |
| "loss": 0.1669, |
| "step": 5870 |
| }, |
| { |
| "epoch": 4.424379232505643, |
| "grad_norm": 4.379268646240234, |
| "learning_rate": 8.653122648607977e-06, |
| "loss": 0.131, |
| "step": 5880 |
| }, |
| { |
| "epoch": 4.431903686982694, |
| "grad_norm": 8.893242835998535, |
| "learning_rate": 8.648942396120727e-06, |
| "loss": 0.2316, |
| "step": 5890 |
| }, |
| { |
| "epoch": 4.439428141459744, |
| "grad_norm": 6.582338333129883, |
| "learning_rate": 8.644762143633476e-06, |
| "loss": 0.2434, |
| "step": 5900 |
| }, |
| { |
| "epoch": 4.446952595936795, |
| "grad_norm": 6.187091827392578, |
| "learning_rate": 8.640581891146226e-06, |
| "loss": 0.1217, |
| "step": 5910 |
| }, |
| { |
| "epoch": 4.454477050413845, |
| "grad_norm": 8.071149826049805, |
| "learning_rate": 8.636401638658975e-06, |
| "loss": 0.1744, |
| "step": 5920 |
| }, |
| { |
| "epoch": 4.462001504890895, |
| "grad_norm": 12.656167030334473, |
| "learning_rate": 8.632221386171725e-06, |
| "loss": 0.2249, |
| "step": 5930 |
| }, |
| { |
| "epoch": 4.469525959367946, |
| "grad_norm": 9.815470695495605, |
| "learning_rate": 8.628041133684476e-06, |
| "loss": 0.1969, |
| "step": 5940 |
| }, |
| { |
| "epoch": 4.477050413844996, |
| "grad_norm": 8.92492389678955, |
| "learning_rate": 8.623860881197224e-06, |
| "loss": 0.1994, |
| "step": 5950 |
| }, |
| { |
| "epoch": 4.4845748683220465, |
| "grad_norm": 5.436546802520752, |
| "learning_rate": 8.619680628709975e-06, |
| "loss": 0.1291, |
| "step": 5960 |
| }, |
| { |
| "epoch": 4.492099322799097, |
| "grad_norm": 3.1124415397644043, |
| "learning_rate": 8.615500376222724e-06, |
| "loss": 0.1929, |
| "step": 5970 |
| }, |
| { |
| "epoch": 4.499623777276147, |
| "grad_norm": 9.723775863647461, |
| "learning_rate": 8.611320123735476e-06, |
| "loss": 0.1891, |
| "step": 5980 |
| }, |
| { |
| "epoch": 4.507148231753198, |
| "grad_norm": 9.545393943786621, |
| "learning_rate": 8.607139871248224e-06, |
| "loss": 0.1785, |
| "step": 5990 |
| }, |
| { |
| "epoch": 4.514672686230249, |
| "grad_norm": 5.5943098068237305, |
| "learning_rate": 8.602959618760975e-06, |
| "loss": 0.1526, |
| "step": 6000 |
| }, |
| { |
| "epoch": 4.5221971407072985, |
| "grad_norm": 5.557697772979736, |
| "learning_rate": 8.598779366273723e-06, |
| "loss": 0.2211, |
| "step": 6010 |
| }, |
| { |
| "epoch": 4.529721595184349, |
| "grad_norm": 3.947209596633911, |
| "learning_rate": 8.594599113786474e-06, |
| "loss": 0.1528, |
| "step": 6020 |
| }, |
| { |
| "epoch": 4.5372460496614, |
| "grad_norm": 7.38191556930542, |
| "learning_rate": 8.590418861299224e-06, |
| "loss": 0.1562, |
| "step": 6030 |
| }, |
| { |
| "epoch": 4.54477050413845, |
| "grad_norm": 10.425215721130371, |
| "learning_rate": 8.586238608811973e-06, |
| "loss": 0.1897, |
| "step": 6040 |
| }, |
| { |
| "epoch": 4.552294958615501, |
| "grad_norm": 5.334704399108887, |
| "learning_rate": 8.582058356324723e-06, |
| "loss": 0.174, |
| "step": 6050 |
| }, |
| { |
| "epoch": 4.5598194130925505, |
| "grad_norm": 6.482622146606445, |
| "learning_rate": 8.577878103837472e-06, |
| "loss": 0.1774, |
| "step": 6060 |
| }, |
| { |
| "epoch": 4.567343867569601, |
| "grad_norm": 9.02678394317627, |
| "learning_rate": 8.573697851350222e-06, |
| "loss": 0.2446, |
| "step": 6070 |
| }, |
| { |
| "epoch": 4.574868322046652, |
| "grad_norm": 3.656137466430664, |
| "learning_rate": 8.569517598862973e-06, |
| "loss": 0.1866, |
| "step": 6080 |
| }, |
| { |
| "epoch": 4.582392776523702, |
| "grad_norm": 6.158934593200684, |
| "learning_rate": 8.565337346375721e-06, |
| "loss": 0.2248, |
| "step": 6090 |
| }, |
| { |
| "epoch": 4.5899172310007526, |
| "grad_norm": 10.076424598693848, |
| "learning_rate": 8.561157093888472e-06, |
| "loss": 0.2325, |
| "step": 6100 |
| }, |
| { |
| "epoch": 4.597441685477802, |
| "grad_norm": 8.406209945678711, |
| "learning_rate": 8.55697684140122e-06, |
| "loss": 0.2106, |
| "step": 6110 |
| }, |
| { |
| "epoch": 4.604966139954853, |
| "grad_norm": 7.778573989868164, |
| "learning_rate": 8.552796588913971e-06, |
| "loss": 0.1637, |
| "step": 6120 |
| }, |
| { |
| "epoch": 4.612490594431904, |
| "grad_norm": 7.563833713531494, |
| "learning_rate": 8.54861633642672e-06, |
| "loss": 0.1741, |
| "step": 6130 |
| }, |
| { |
| "epoch": 4.620015048908954, |
| "grad_norm": 8.280135154724121, |
| "learning_rate": 8.54443608393947e-06, |
| "loss": 0.1773, |
| "step": 6140 |
| }, |
| { |
| "epoch": 4.6275395033860045, |
| "grad_norm": 9.068934440612793, |
| "learning_rate": 8.54025583145222e-06, |
| "loss": 0.2054, |
| "step": 6150 |
| }, |
| { |
| "epoch": 4.635063957863055, |
| "grad_norm": 6.354267597198486, |
| "learning_rate": 8.53607557896497e-06, |
| "loss": 0.201, |
| "step": 6160 |
| }, |
| { |
| "epoch": 4.642588412340105, |
| "grad_norm": 8.397027969360352, |
| "learning_rate": 8.531895326477721e-06, |
| "loss": 0.2111, |
| "step": 6170 |
| }, |
| { |
| "epoch": 4.650112866817156, |
| "grad_norm": 8.86711597442627, |
| "learning_rate": 8.52771507399047e-06, |
| "loss": 0.1677, |
| "step": 6180 |
| }, |
| { |
| "epoch": 4.657637321294207, |
| "grad_norm": 8.797131538391113, |
| "learning_rate": 8.52353482150322e-06, |
| "loss": 0.2505, |
| "step": 6190 |
| }, |
| { |
| "epoch": 4.6651617757712565, |
| "grad_norm": 7.0519208908081055, |
| "learning_rate": 8.519354569015969e-06, |
| "loss": 0.1844, |
| "step": 6200 |
| }, |
| { |
| "epoch": 4.672686230248307, |
| "grad_norm": 4.867956161499023, |
| "learning_rate": 8.51517431652872e-06, |
| "loss": 0.2102, |
| "step": 6210 |
| }, |
| { |
| "epoch": 4.680210684725357, |
| "grad_norm": 4.225649833679199, |
| "learning_rate": 8.510994064041468e-06, |
| "loss": 0.2158, |
| "step": 6220 |
| }, |
| { |
| "epoch": 4.687735139202408, |
| "grad_norm": 6.078823566436768, |
| "learning_rate": 8.506813811554219e-06, |
| "loss": 0.1992, |
| "step": 6230 |
| }, |
| { |
| "epoch": 4.6952595936794586, |
| "grad_norm": 10.225565910339355, |
| "learning_rate": 8.502633559066969e-06, |
| "loss": 0.2692, |
| "step": 6240 |
| }, |
| { |
| "epoch": 4.702784048156508, |
| "grad_norm": 4.66995096206665, |
| "learning_rate": 8.498453306579718e-06, |
| "loss": 0.1973, |
| "step": 6250 |
| }, |
| { |
| "epoch": 4.710308502633559, |
| "grad_norm": 6.222870826721191, |
| "learning_rate": 8.494273054092468e-06, |
| "loss": 0.2045, |
| "step": 6260 |
| }, |
| { |
| "epoch": 4.717832957110609, |
| "grad_norm": 8.30414867401123, |
| "learning_rate": 8.490092801605217e-06, |
| "loss": 0.2066, |
| "step": 6270 |
| }, |
| { |
| "epoch": 4.72535741158766, |
| "grad_norm": 9.2549467086792, |
| "learning_rate": 8.485912549117967e-06, |
| "loss": 0.2159, |
| "step": 6280 |
| }, |
| { |
| "epoch": 4.7328818660647105, |
| "grad_norm": 7.878752708435059, |
| "learning_rate": 8.481732296630718e-06, |
| "loss": 0.1806, |
| "step": 6290 |
| }, |
| { |
| "epoch": 4.74040632054176, |
| "grad_norm": 11.239638328552246, |
| "learning_rate": 8.477552044143466e-06, |
| "loss": 0.1698, |
| "step": 6300 |
| }, |
| { |
| "epoch": 4.747930775018811, |
| "grad_norm": 5.1768670082092285, |
| "learning_rate": 8.473371791656217e-06, |
| "loss": 0.1374, |
| "step": 6310 |
| }, |
| { |
| "epoch": 4.755455229495862, |
| "grad_norm": 9.750138282775879, |
| "learning_rate": 8.469191539168965e-06, |
| "loss": 0.167, |
| "step": 6320 |
| }, |
| { |
| "epoch": 4.762979683972912, |
| "grad_norm": 7.808964252471924, |
| "learning_rate": 8.465011286681716e-06, |
| "loss": 0.2278, |
| "step": 6330 |
| }, |
| { |
| "epoch": 4.7705041384499625, |
| "grad_norm": 5.109748840332031, |
| "learning_rate": 8.460831034194466e-06, |
| "loss": 0.1781, |
| "step": 6340 |
| }, |
| { |
| "epoch": 4.778028592927013, |
| "grad_norm": 8.040872573852539, |
| "learning_rate": 8.456650781707217e-06, |
| "loss": 0.2723, |
| "step": 6350 |
| }, |
| { |
| "epoch": 4.785553047404063, |
| "grad_norm": 8.216675758361816, |
| "learning_rate": 8.452470529219965e-06, |
| "loss": 0.1732, |
| "step": 6360 |
| }, |
| { |
| "epoch": 4.793077501881114, |
| "grad_norm": 10.798053741455078, |
| "learning_rate": 8.448290276732716e-06, |
| "loss": 0.208, |
| "step": 6370 |
| }, |
| { |
| "epoch": 4.800601956358164, |
| "grad_norm": 6.185002326965332, |
| "learning_rate": 8.444110024245466e-06, |
| "loss": 0.1798, |
| "step": 6380 |
| }, |
| { |
| "epoch": 4.808126410835214, |
| "grad_norm": 5.082795143127441, |
| "learning_rate": 8.439929771758215e-06, |
| "loss": 0.166, |
| "step": 6390 |
| }, |
| { |
| "epoch": 4.815650865312265, |
| "grad_norm": 7.006508827209473, |
| "learning_rate": 8.435749519270965e-06, |
| "loss": 0.176, |
| "step": 6400 |
| }, |
| { |
| "epoch": 4.823175319789315, |
| "grad_norm": 8.347260475158691, |
| "learning_rate": 8.431569266783714e-06, |
| "loss": 0.1977, |
| "step": 6410 |
| }, |
| { |
| "epoch": 4.830699774266366, |
| "grad_norm": 9.687190055847168, |
| "learning_rate": 8.427389014296464e-06, |
| "loss": 0.281, |
| "step": 6420 |
| }, |
| { |
| "epoch": 4.838224228743416, |
| "grad_norm": 5.852231979370117, |
| "learning_rate": 8.423208761809215e-06, |
| "loss": 0.2346, |
| "step": 6430 |
| }, |
| { |
| "epoch": 4.845748683220466, |
| "grad_norm": 11.33639144897461, |
| "learning_rate": 8.419028509321963e-06, |
| "loss": 0.2193, |
| "step": 6440 |
| }, |
| { |
| "epoch": 4.853273137697517, |
| "grad_norm": 6.299645900726318, |
| "learning_rate": 8.414848256834714e-06, |
| "loss": 0.1589, |
| "step": 6450 |
| }, |
| { |
| "epoch": 4.860797592174567, |
| "grad_norm": 10.101712226867676, |
| "learning_rate": 8.410668004347462e-06, |
| "loss": 0.2342, |
| "step": 6460 |
| }, |
| { |
| "epoch": 4.868322046651618, |
| "grad_norm": 6.40759801864624, |
| "learning_rate": 8.406487751860213e-06, |
| "loss": 0.2052, |
| "step": 6470 |
| }, |
| { |
| "epoch": 4.8758465011286685, |
| "grad_norm": 9.549158096313477, |
| "learning_rate": 8.402307499372963e-06, |
| "loss": 0.201, |
| "step": 6480 |
| }, |
| { |
| "epoch": 4.883370955605718, |
| "grad_norm": 9.75588321685791, |
| "learning_rate": 8.398127246885712e-06, |
| "loss": 0.1658, |
| "step": 6490 |
| }, |
| { |
| "epoch": 4.890895410082769, |
| "grad_norm": 8.68608283996582, |
| "learning_rate": 8.393946994398462e-06, |
| "loss": 0.1837, |
| "step": 6500 |
| }, |
| { |
| "epoch": 4.89841986455982, |
| "grad_norm": 9.34019660949707, |
| "learning_rate": 8.389766741911211e-06, |
| "loss": 0.1958, |
| "step": 6510 |
| }, |
| { |
| "epoch": 4.90594431903687, |
| "grad_norm": 8.452191352844238, |
| "learning_rate": 8.385586489423961e-06, |
| "loss": 0.2024, |
| "step": 6520 |
| }, |
| { |
| "epoch": 4.91346877351392, |
| "grad_norm": 8.62346076965332, |
| "learning_rate": 8.381406236936712e-06, |
| "loss": 0.2091, |
| "step": 6530 |
| }, |
| { |
| "epoch": 4.92099322799097, |
| "grad_norm": 6.744731903076172, |
| "learning_rate": 8.377225984449462e-06, |
| "loss": 0.215, |
| "step": 6540 |
| }, |
| { |
| "epoch": 4.928517682468021, |
| "grad_norm": 7.814108371734619, |
| "learning_rate": 8.373045731962211e-06, |
| "loss": 0.1593, |
| "step": 6550 |
| }, |
| { |
| "epoch": 4.936042136945072, |
| "grad_norm": 11.607344627380371, |
| "learning_rate": 8.368865479474961e-06, |
| "loss": 0.225, |
| "step": 6560 |
| }, |
| { |
| "epoch": 4.943566591422122, |
| "grad_norm": 9.131011009216309, |
| "learning_rate": 8.364685226987712e-06, |
| "loss": 0.1857, |
| "step": 6570 |
| }, |
| { |
| "epoch": 4.951091045899172, |
| "grad_norm": 5.4191999435424805, |
| "learning_rate": 8.36050497450046e-06, |
| "loss": 0.1724, |
| "step": 6580 |
| }, |
| { |
| "epoch": 4.958615500376222, |
| "grad_norm": 5.488987922668457, |
| "learning_rate": 8.35632472201321e-06, |
| "loss": 0.1378, |
| "step": 6590 |
| }, |
| { |
| "epoch": 4.966139954853273, |
| "grad_norm": 5.251589775085449, |
| "learning_rate": 8.35214446952596e-06, |
| "loss": 0.2182, |
| "step": 6600 |
| }, |
| { |
| "epoch": 4.973664409330324, |
| "grad_norm": 5.991082191467285, |
| "learning_rate": 8.34796421703871e-06, |
| "loss": 0.1919, |
| "step": 6610 |
| }, |
| { |
| "epoch": 4.981188863807374, |
| "grad_norm": 10.733721733093262, |
| "learning_rate": 8.34378396455146e-06, |
| "loss": 0.1839, |
| "step": 6620 |
| }, |
| { |
| "epoch": 4.988713318284424, |
| "grad_norm": 11.137850761413574, |
| "learning_rate": 8.339603712064209e-06, |
| "loss": 0.2389, |
| "step": 6630 |
| }, |
| { |
| "epoch": 4.996237772761475, |
| "grad_norm": 11.486577987670898, |
| "learning_rate": 8.33542345957696e-06, |
| "loss": 0.2458, |
| "step": 6640 |
| }, |
| { |
| "epoch": 5.003762227238525, |
| "grad_norm": 5.592317581176758, |
| "learning_rate": 8.331243207089708e-06, |
| "loss": 0.1371, |
| "step": 6650 |
| }, |
| { |
| "epoch": 5.011286681715576, |
| "grad_norm": 6.482915878295898, |
| "learning_rate": 8.327062954602458e-06, |
| "loss": 0.177, |
| "step": 6660 |
| }, |
| { |
| "epoch": 5.018811136192626, |
| "grad_norm": 7.077815055847168, |
| "learning_rate": 8.322882702115209e-06, |
| "loss": 0.1495, |
| "step": 6670 |
| }, |
| { |
| "epoch": 5.026335590669676, |
| "grad_norm": 7.756783962249756, |
| "learning_rate": 8.318702449627958e-06, |
| "loss": 0.1427, |
| "step": 6680 |
| }, |
| { |
| "epoch": 5.033860045146727, |
| "grad_norm": 6.053231239318848, |
| "learning_rate": 8.314522197140708e-06, |
| "loss": 0.1547, |
| "step": 6690 |
| }, |
| { |
| "epoch": 5.041384499623777, |
| "grad_norm": 6.568417072296143, |
| "learning_rate": 8.310341944653457e-06, |
| "loss": 0.1753, |
| "step": 6700 |
| }, |
| { |
| "epoch": 5.048908954100828, |
| "grad_norm": 5.332026958465576, |
| "learning_rate": 8.306161692166209e-06, |
| "loss": 0.1305, |
| "step": 6710 |
| }, |
| { |
| "epoch": 5.056433408577878, |
| "grad_norm": 9.05020809173584, |
| "learning_rate": 8.301981439678957e-06, |
| "loss": 0.1973, |
| "step": 6720 |
| }, |
| { |
| "epoch": 5.063957863054928, |
| "grad_norm": 8.415604591369629, |
| "learning_rate": 8.297801187191708e-06, |
| "loss": 0.2205, |
| "step": 6730 |
| }, |
| { |
| "epoch": 5.071482317531979, |
| "grad_norm": 5.819530487060547, |
| "learning_rate": 8.293620934704457e-06, |
| "loss": 0.1862, |
| "step": 6740 |
| }, |
| { |
| "epoch": 5.07900677200903, |
| "grad_norm": 7.946175575256348, |
| "learning_rate": 8.289440682217207e-06, |
| "loss": 0.1132, |
| "step": 6750 |
| }, |
| { |
| "epoch": 5.08653122648608, |
| "grad_norm": 10.299882888793945, |
| "learning_rate": 8.285260429729957e-06, |
| "loss": 0.1745, |
| "step": 6760 |
| }, |
| { |
| "epoch": 5.09405568096313, |
| "grad_norm": 7.671544551849365, |
| "learning_rate": 8.281080177242706e-06, |
| "loss": 0.1829, |
| "step": 6770 |
| }, |
| { |
| "epoch": 5.10158013544018, |
| "grad_norm": 9.563514709472656, |
| "learning_rate": 8.276899924755456e-06, |
| "loss": 0.1952, |
| "step": 6780 |
| }, |
| { |
| "epoch": 5.109104589917231, |
| "grad_norm": 5.524237632751465, |
| "learning_rate": 8.272719672268205e-06, |
| "loss": 0.1327, |
| "step": 6790 |
| }, |
| { |
| "epoch": 5.116629044394282, |
| "grad_norm": 7.356444835662842, |
| "learning_rate": 8.268539419780956e-06, |
| "loss": 0.1503, |
| "step": 6800 |
| }, |
| { |
| "epoch": 5.1241534988713315, |
| "grad_norm": 10.332460403442383, |
| "learning_rate": 8.264359167293706e-06, |
| "loss": 0.1583, |
| "step": 6810 |
| }, |
| { |
| "epoch": 5.131677953348382, |
| "grad_norm": 8.877423286437988, |
| "learning_rate": 8.260178914806455e-06, |
| "loss": 0.2441, |
| "step": 6820 |
| }, |
| { |
| "epoch": 5.139202407825433, |
| "grad_norm": 11.976480484008789, |
| "learning_rate": 8.255998662319205e-06, |
| "loss": 0.2151, |
| "step": 6830 |
| }, |
| { |
| "epoch": 5.146726862302483, |
| "grad_norm": 8.162457466125488, |
| "learning_rate": 8.251818409831954e-06, |
| "loss": 0.1607, |
| "step": 6840 |
| }, |
| { |
| "epoch": 5.154251316779534, |
| "grad_norm": 8.65699577331543, |
| "learning_rate": 8.247638157344704e-06, |
| "loss": 0.1581, |
| "step": 6850 |
| }, |
| { |
| "epoch": 5.1617757712565835, |
| "grad_norm": 8.015094757080078, |
| "learning_rate": 8.243457904857453e-06, |
| "loss": 0.2031, |
| "step": 6860 |
| }, |
| { |
| "epoch": 5.169300225733634, |
| "grad_norm": 8.649307250976562, |
| "learning_rate": 8.239277652370203e-06, |
| "loss": 0.1524, |
| "step": 6870 |
| }, |
| { |
| "epoch": 5.176824680210685, |
| "grad_norm": 11.47341537475586, |
| "learning_rate": 8.235097399882954e-06, |
| "loss": 0.1759, |
| "step": 6880 |
| }, |
| { |
| "epoch": 5.184349134687735, |
| "grad_norm": 11.23306941986084, |
| "learning_rate": 8.230917147395704e-06, |
| "loss": 0.1342, |
| "step": 6890 |
| }, |
| { |
| "epoch": 5.191873589164786, |
| "grad_norm": 4.827985763549805, |
| "learning_rate": 8.226736894908454e-06, |
| "loss": 0.2029, |
| "step": 6900 |
| }, |
| { |
| "epoch": 5.199398043641836, |
| "grad_norm": 6.810028076171875, |
| "learning_rate": 8.222556642421203e-06, |
| "loss": 0.1814, |
| "step": 6910 |
| }, |
| { |
| "epoch": 5.206922498118886, |
| "grad_norm": 9.269369125366211, |
| "learning_rate": 8.218376389933954e-06, |
| "loss": 0.1445, |
| "step": 6920 |
| }, |
| { |
| "epoch": 5.214446952595937, |
| "grad_norm": 4.727482318878174, |
| "learning_rate": 8.214196137446702e-06, |
| "loss": 0.1737, |
| "step": 6930 |
| }, |
| { |
| "epoch": 5.221971407072987, |
| "grad_norm": 7.885960102081299, |
| "learning_rate": 8.210015884959453e-06, |
| "loss": 0.1524, |
| "step": 6940 |
| }, |
| { |
| "epoch": 5.2294958615500375, |
| "grad_norm": 9.722622871398926, |
| "learning_rate": 8.205835632472201e-06, |
| "loss": 0.213, |
| "step": 6950 |
| }, |
| { |
| "epoch": 5.237020316027088, |
| "grad_norm": 7.540585994720459, |
| "learning_rate": 8.201655379984952e-06, |
| "loss": 0.1726, |
| "step": 6960 |
| }, |
| { |
| "epoch": 5.244544770504138, |
| "grad_norm": 9.993691444396973, |
| "learning_rate": 8.197475127497702e-06, |
| "loss": 0.1434, |
| "step": 6970 |
| }, |
| { |
| "epoch": 5.252069224981189, |
| "grad_norm": 11.053601264953613, |
| "learning_rate": 8.19329487501045e-06, |
| "loss": 0.1883, |
| "step": 6980 |
| }, |
| { |
| "epoch": 5.25959367945824, |
| "grad_norm": 8.340205192565918, |
| "learning_rate": 8.189114622523201e-06, |
| "loss": 0.1497, |
| "step": 6990 |
| }, |
| { |
| "epoch": 5.2671181339352895, |
| "grad_norm": 8.768745422363281, |
| "learning_rate": 8.18493437003595e-06, |
| "loss": 0.2163, |
| "step": 7000 |
| }, |
| { |
| "epoch": 5.27464258841234, |
| "grad_norm": 7.16154146194458, |
| "learning_rate": 8.1807541175487e-06, |
| "loss": 0.1395, |
| "step": 7010 |
| }, |
| { |
| "epoch": 5.282167042889391, |
| "grad_norm": 8.094215393066406, |
| "learning_rate": 8.17657386506145e-06, |
| "loss": 0.1749, |
| "step": 7020 |
| }, |
| { |
| "epoch": 5.289691497366441, |
| "grad_norm": 9.044081687927246, |
| "learning_rate": 8.1723936125742e-06, |
| "loss": 0.1954, |
| "step": 7030 |
| }, |
| { |
| "epoch": 5.297215951843492, |
| "grad_norm": 7.638929843902588, |
| "learning_rate": 8.16821336008695e-06, |
| "loss": 0.1637, |
| "step": 7040 |
| }, |
| { |
| "epoch": 5.3047404063205414, |
| "grad_norm": 9.528214454650879, |
| "learning_rate": 8.164033107599699e-06, |
| "loss": 0.2195, |
| "step": 7050 |
| }, |
| { |
| "epoch": 5.312264860797592, |
| "grad_norm": 8.045819282531738, |
| "learning_rate": 8.159852855112449e-06, |
| "loss": 0.16, |
| "step": 7060 |
| }, |
| { |
| "epoch": 5.319789315274643, |
| "grad_norm": 8.95510196685791, |
| "learning_rate": 8.1556726026252e-06, |
| "loss": 0.161, |
| "step": 7070 |
| }, |
| { |
| "epoch": 5.327313769751693, |
| "grad_norm": 8.730019569396973, |
| "learning_rate": 8.15149235013795e-06, |
| "loss": 0.1873, |
| "step": 7080 |
| }, |
| { |
| "epoch": 5.3348382242287435, |
| "grad_norm": 6.827531814575195, |
| "learning_rate": 8.147312097650698e-06, |
| "loss": 0.1695, |
| "step": 7090 |
| }, |
| { |
| "epoch": 5.342362678705793, |
| "grad_norm": 6.7226104736328125, |
| "learning_rate": 8.143131845163449e-06, |
| "loss": 0.1567, |
| "step": 7100 |
| }, |
| { |
| "epoch": 5.349887133182844, |
| "grad_norm": 7.858177661895752, |
| "learning_rate": 8.1389515926762e-06, |
| "loss": 0.1619, |
| "step": 7110 |
| }, |
| { |
| "epoch": 5.357411587659895, |
| "grad_norm": 7.37957763671875, |
| "learning_rate": 8.134771340188948e-06, |
| "loss": 0.1582, |
| "step": 7120 |
| }, |
| { |
| "epoch": 5.364936042136945, |
| "grad_norm": 9.118856430053711, |
| "learning_rate": 8.130591087701698e-06, |
| "loss": 0.2016, |
| "step": 7130 |
| }, |
| { |
| "epoch": 5.3724604966139955, |
| "grad_norm": 11.332245826721191, |
| "learning_rate": 8.126410835214447e-06, |
| "loss": 0.1848, |
| "step": 7140 |
| }, |
| { |
| "epoch": 5.379984951091046, |
| "grad_norm": 8.34212589263916, |
| "learning_rate": 8.122230582727197e-06, |
| "loss": 0.1948, |
| "step": 7150 |
| }, |
| { |
| "epoch": 5.387509405568096, |
| "grad_norm": 8.121428489685059, |
| "learning_rate": 8.118050330239948e-06, |
| "loss": 0.2094, |
| "step": 7160 |
| }, |
| { |
| "epoch": 5.395033860045147, |
| "grad_norm": 8.244184494018555, |
| "learning_rate": 8.113870077752696e-06, |
| "loss": 0.1341, |
| "step": 7170 |
| }, |
| { |
| "epoch": 5.402558314522198, |
| "grad_norm": 5.444628715515137, |
| "learning_rate": 8.109689825265447e-06, |
| "loss": 0.1488, |
| "step": 7180 |
| }, |
| { |
| "epoch": 5.4100827689992474, |
| "grad_norm": 7.506073951721191, |
| "learning_rate": 8.105509572778196e-06, |
| "loss": 0.1999, |
| "step": 7190 |
| }, |
| { |
| "epoch": 5.417607223476298, |
| "grad_norm": 4.846856594085693, |
| "learning_rate": 8.101329320290946e-06, |
| "loss": 0.2043, |
| "step": 7200 |
| }, |
| { |
| "epoch": 5.425131677953348, |
| "grad_norm": 11.258270263671875, |
| "learning_rate": 8.097149067803696e-06, |
| "loss": 0.1968, |
| "step": 7210 |
| }, |
| { |
| "epoch": 5.432656132430399, |
| "grad_norm": 4.456273078918457, |
| "learning_rate": 8.092968815316445e-06, |
| "loss": 0.1815, |
| "step": 7220 |
| }, |
| { |
| "epoch": 5.4401805869074495, |
| "grad_norm": 5.355717182159424, |
| "learning_rate": 8.088788562829195e-06, |
| "loss": 0.1554, |
| "step": 7230 |
| }, |
| { |
| "epoch": 5.447705041384499, |
| "grad_norm": 8.809466361999512, |
| "learning_rate": 8.084608310341944e-06, |
| "loss": 0.2145, |
| "step": 7240 |
| }, |
| { |
| "epoch": 5.45522949586155, |
| "grad_norm": 8.372282981872559, |
| "learning_rate": 8.080428057854695e-06, |
| "loss": 0.2094, |
| "step": 7250 |
| }, |
| { |
| "epoch": 5.4627539503386, |
| "grad_norm": 7.424798488616943, |
| "learning_rate": 8.076247805367445e-06, |
| "loss": 0.1853, |
| "step": 7260 |
| }, |
| { |
| "epoch": 5.470278404815651, |
| "grad_norm": 8.875120162963867, |
| "learning_rate": 8.072067552880195e-06, |
| "loss": 0.1997, |
| "step": 7270 |
| }, |
| { |
| "epoch": 5.4778028592927015, |
| "grad_norm": 7.930933475494385, |
| "learning_rate": 8.067887300392944e-06, |
| "loss": 0.212, |
| "step": 7280 |
| }, |
| { |
| "epoch": 5.485327313769751, |
| "grad_norm": 5.328012466430664, |
| "learning_rate": 8.063707047905694e-06, |
| "loss": 0.158, |
| "step": 7290 |
| }, |
| { |
| "epoch": 5.492851768246802, |
| "grad_norm": 6.645898818969727, |
| "learning_rate": 8.059526795418445e-06, |
| "loss": 0.168, |
| "step": 7300 |
| }, |
| { |
| "epoch": 5.500376222723853, |
| "grad_norm": 9.081933975219727, |
| "learning_rate": 8.055346542931194e-06, |
| "loss": 0.1814, |
| "step": 7310 |
| }, |
| { |
| "epoch": 5.507900677200903, |
| "grad_norm": 7.437675476074219, |
| "learning_rate": 8.051166290443944e-06, |
| "loss": 0.2142, |
| "step": 7320 |
| }, |
| { |
| "epoch": 5.5154251316779535, |
| "grad_norm": 6.354644775390625, |
| "learning_rate": 8.046986037956693e-06, |
| "loss": 0.165, |
| "step": 7330 |
| }, |
| { |
| "epoch": 5.522949586155004, |
| "grad_norm": 7.42842435836792, |
| "learning_rate": 8.042805785469443e-06, |
| "loss": 0.1831, |
| "step": 7340 |
| }, |
| { |
| "epoch": 5.530474040632054, |
| "grad_norm": 9.511542320251465, |
| "learning_rate": 8.038625532982193e-06, |
| "loss": 0.1782, |
| "step": 7350 |
| }, |
| { |
| "epoch": 5.537998495109105, |
| "grad_norm": 7.416419982910156, |
| "learning_rate": 8.034445280494942e-06, |
| "loss": 0.1983, |
| "step": 7360 |
| }, |
| { |
| "epoch": 5.545522949586155, |
| "grad_norm": 8.564135551452637, |
| "learning_rate": 8.030265028007693e-06, |
| "loss": 0.1695, |
| "step": 7370 |
| }, |
| { |
| "epoch": 5.553047404063205, |
| "grad_norm": 6.778014659881592, |
| "learning_rate": 8.026084775520441e-06, |
| "loss": 0.1599, |
| "step": 7380 |
| }, |
| { |
| "epoch": 5.560571858540256, |
| "grad_norm": 6.545214653015137, |
| "learning_rate": 8.021904523033192e-06, |
| "loss": 0.1395, |
| "step": 7390 |
| }, |
| { |
| "epoch": 5.568096313017306, |
| "grad_norm": 3.063966989517212, |
| "learning_rate": 8.017724270545942e-06, |
| "loss": 0.1383, |
| "step": 7400 |
| }, |
| { |
| "epoch": 5.575620767494357, |
| "grad_norm": 5.198939800262451, |
| "learning_rate": 8.01354401805869e-06, |
| "loss": 0.176, |
| "step": 7410 |
| }, |
| { |
| "epoch": 5.583145221971407, |
| "grad_norm": 8.018445014953613, |
| "learning_rate": 8.009363765571441e-06, |
| "loss": 0.1773, |
| "step": 7420 |
| }, |
| { |
| "epoch": 5.590669676448457, |
| "grad_norm": 8.4862060546875, |
| "learning_rate": 8.00518351308419e-06, |
| "loss": 0.2351, |
| "step": 7430 |
| }, |
| { |
| "epoch": 5.598194130925508, |
| "grad_norm": 3.4017207622528076, |
| "learning_rate": 8.001003260596942e-06, |
| "loss": 0.1826, |
| "step": 7440 |
| }, |
| { |
| "epoch": 5.605718585402558, |
| "grad_norm": 8.308197021484375, |
| "learning_rate": 7.99682300810969e-06, |
| "loss": 0.2002, |
| "step": 7450 |
| }, |
| { |
| "epoch": 5.613243039879609, |
| "grad_norm": 6.5446038246154785, |
| "learning_rate": 7.992642755622441e-06, |
| "loss": 0.1472, |
| "step": 7460 |
| }, |
| { |
| "epoch": 5.6207674943566595, |
| "grad_norm": 12.85771369934082, |
| "learning_rate": 7.98846250313519e-06, |
| "loss": 0.2382, |
| "step": 7470 |
| }, |
| { |
| "epoch": 5.628291948833709, |
| "grad_norm": 7.108580589294434, |
| "learning_rate": 7.98428225064794e-06, |
| "loss": 0.1629, |
| "step": 7480 |
| }, |
| { |
| "epoch": 5.63581640331076, |
| "grad_norm": 10.055646896362305, |
| "learning_rate": 7.98010199816069e-06, |
| "loss": 0.2107, |
| "step": 7490 |
| }, |
| { |
| "epoch": 5.643340857787811, |
| "grad_norm": 3.524488687515259, |
| "learning_rate": 7.97592174567344e-06, |
| "loss": 0.1614, |
| "step": 7500 |
| }, |
| { |
| "epoch": 5.650865312264861, |
| "grad_norm": 8.577242851257324, |
| "learning_rate": 7.97174149318619e-06, |
| "loss": 0.1752, |
| "step": 7510 |
| }, |
| { |
| "epoch": 5.658389766741911, |
| "grad_norm": 4.436498641967773, |
| "learning_rate": 7.967561240698938e-06, |
| "loss": 0.2289, |
| "step": 7520 |
| }, |
| { |
| "epoch": 5.665914221218961, |
| "grad_norm": 9.47412395477295, |
| "learning_rate": 7.963380988211689e-06, |
| "loss": 0.1646, |
| "step": 7530 |
| }, |
| { |
| "epoch": 5.673438675696012, |
| "grad_norm": 11.120841026306152, |
| "learning_rate": 7.959200735724439e-06, |
| "loss": 0.1805, |
| "step": 7540 |
| }, |
| { |
| "epoch": 5.680963130173063, |
| "grad_norm": 8.272597312927246, |
| "learning_rate": 7.955020483237188e-06, |
| "loss": 0.2055, |
| "step": 7550 |
| }, |
| { |
| "epoch": 5.688487584650113, |
| "grad_norm": 7.943882465362549, |
| "learning_rate": 7.950840230749938e-06, |
| "loss": 0.1849, |
| "step": 7560 |
| }, |
| { |
| "epoch": 5.696012039127163, |
| "grad_norm": 9.238296508789062, |
| "learning_rate": 7.946659978262687e-06, |
| "loss": 0.1651, |
| "step": 7570 |
| }, |
| { |
| "epoch": 5.703536493604213, |
| "grad_norm": 11.098034858703613, |
| "learning_rate": 7.942479725775437e-06, |
| "loss": 0.216, |
| "step": 7580 |
| }, |
| { |
| "epoch": 5.711060948081264, |
| "grad_norm": 11.07455825805664, |
| "learning_rate": 7.938299473288188e-06, |
| "loss": 0.2109, |
| "step": 7590 |
| }, |
| { |
| "epoch": 5.718585402558315, |
| "grad_norm": 6.9234819412231445, |
| "learning_rate": 7.934119220800936e-06, |
| "loss": 0.1456, |
| "step": 7600 |
| }, |
| { |
| "epoch": 5.726109857035365, |
| "grad_norm": 5.608709812164307, |
| "learning_rate": 7.929938968313687e-06, |
| "loss": 0.1424, |
| "step": 7610 |
| }, |
| { |
| "epoch": 5.733634311512415, |
| "grad_norm": 13.218008995056152, |
| "learning_rate": 7.925758715826437e-06, |
| "loss": 0.1895, |
| "step": 7620 |
| }, |
| { |
| "epoch": 5.741158765989466, |
| "grad_norm": 6.975159168243408, |
| "learning_rate": 7.921578463339188e-06, |
| "loss": 0.1616, |
| "step": 7630 |
| }, |
| { |
| "epoch": 5.748683220466516, |
| "grad_norm": 8.910100936889648, |
| "learning_rate": 7.917398210851936e-06, |
| "loss": 0.2042, |
| "step": 7640 |
| }, |
| { |
| "epoch": 5.756207674943567, |
| "grad_norm": 8.068119049072266, |
| "learning_rate": 7.913217958364687e-06, |
| "loss": 0.1748, |
| "step": 7650 |
| }, |
| { |
| "epoch": 5.763732129420617, |
| "grad_norm": 9.351244926452637, |
| "learning_rate": 7.909037705877435e-06, |
| "loss": 0.169, |
| "step": 7660 |
| }, |
| { |
| "epoch": 5.771256583897667, |
| "grad_norm": 7.046148300170898, |
| "learning_rate": 7.904857453390186e-06, |
| "loss": 0.2126, |
| "step": 7670 |
| }, |
| { |
| "epoch": 5.778781038374718, |
| "grad_norm": 7.9526686668396, |
| "learning_rate": 7.900677200902936e-06, |
| "loss": 0.1893, |
| "step": 7680 |
| }, |
| { |
| "epoch": 5.786305492851768, |
| "grad_norm": 10.231925964355469, |
| "learning_rate": 7.896496948415685e-06, |
| "loss": 0.2103, |
| "step": 7690 |
| }, |
| { |
| "epoch": 5.793829947328819, |
| "grad_norm": 9.099306106567383, |
| "learning_rate": 7.892316695928435e-06, |
| "loss": 0.1875, |
| "step": 7700 |
| }, |
| { |
| "epoch": 5.801354401805869, |
| "grad_norm": 9.167414665222168, |
| "learning_rate": 7.888136443441184e-06, |
| "loss": 0.1893, |
| "step": 7710 |
| }, |
| { |
| "epoch": 5.808878856282919, |
| "grad_norm": 12.738779067993164, |
| "learning_rate": 7.883956190953934e-06, |
| "loss": 0.2392, |
| "step": 7720 |
| }, |
| { |
| "epoch": 5.81640331075997, |
| "grad_norm": 7.125742435455322, |
| "learning_rate": 7.879775938466683e-06, |
| "loss": 0.1697, |
| "step": 7730 |
| }, |
| { |
| "epoch": 5.82392776523702, |
| "grad_norm": 6.5781450271606445, |
| "learning_rate": 7.875595685979433e-06, |
| "loss": 0.1694, |
| "step": 7740 |
| }, |
| { |
| "epoch": 5.831452219714071, |
| "grad_norm": 4.691234588623047, |
| "learning_rate": 7.871415433492184e-06, |
| "loss": 0.2257, |
| "step": 7750 |
| }, |
| { |
| "epoch": 5.838976674191121, |
| "grad_norm": 8.69082260131836, |
| "learning_rate": 7.867235181004933e-06, |
| "loss": 0.2275, |
| "step": 7760 |
| }, |
| { |
| "epoch": 5.846501128668171, |
| "grad_norm": 9.822325706481934, |
| "learning_rate": 7.863054928517683e-06, |
| "loss": 0.1628, |
| "step": 7770 |
| }, |
| { |
| "epoch": 5.854025583145222, |
| "grad_norm": 4.987610816955566, |
| "learning_rate": 7.858874676030432e-06, |
| "loss": 0.1633, |
| "step": 7780 |
| }, |
| { |
| "epoch": 5.861550037622273, |
| "grad_norm": 7.953482151031494, |
| "learning_rate": 7.854694423543182e-06, |
| "loss": 0.1631, |
| "step": 7790 |
| }, |
| { |
| "epoch": 5.8690744920993225, |
| "grad_norm": 9.97362995147705, |
| "learning_rate": 7.850514171055932e-06, |
| "loss": 0.2077, |
| "step": 7800 |
| }, |
| { |
| "epoch": 5.876598946576373, |
| "grad_norm": 10.382735252380371, |
| "learning_rate": 7.846333918568683e-06, |
| "loss": 0.1484, |
| "step": 7810 |
| }, |
| { |
| "epoch": 5.884123401053424, |
| "grad_norm": 11.967596054077148, |
| "learning_rate": 7.842153666081432e-06, |
| "loss": 0.1525, |
| "step": 7820 |
| }, |
| { |
| "epoch": 5.891647855530474, |
| "grad_norm": 6.947834014892578, |
| "learning_rate": 7.837973413594182e-06, |
| "loss": 0.1685, |
| "step": 7830 |
| }, |
| { |
| "epoch": 5.899172310007525, |
| "grad_norm": 9.017217636108398, |
| "learning_rate": 7.833793161106932e-06, |
| "loss": 0.1737, |
| "step": 7840 |
| }, |
| { |
| "epoch": 5.9066967644845745, |
| "grad_norm": 9.230958938598633, |
| "learning_rate": 7.829612908619681e-06, |
| "loss": 0.142, |
| "step": 7850 |
| }, |
| { |
| "epoch": 5.914221218961625, |
| "grad_norm": 7.715651035308838, |
| "learning_rate": 7.825432656132431e-06, |
| "loss": 0.1249, |
| "step": 7860 |
| }, |
| { |
| "epoch": 5.921745673438676, |
| "grad_norm": 2.6331450939178467, |
| "learning_rate": 7.82125240364518e-06, |
| "loss": 0.1421, |
| "step": 7870 |
| }, |
| { |
| "epoch": 5.929270127915726, |
| "grad_norm": 4.872918128967285, |
| "learning_rate": 7.81707215115793e-06, |
| "loss": 0.1519, |
| "step": 7880 |
| }, |
| { |
| "epoch": 5.936794582392777, |
| "grad_norm": 11.077669143676758, |
| "learning_rate": 7.812891898670681e-06, |
| "loss": 0.1566, |
| "step": 7890 |
| }, |
| { |
| "epoch": 5.944319036869827, |
| "grad_norm": 9.00078296661377, |
| "learning_rate": 7.80871164618343e-06, |
| "loss": 0.1939, |
| "step": 7900 |
| }, |
| { |
| "epoch": 5.951843491346877, |
| "grad_norm": 9.66382122039795, |
| "learning_rate": 7.80453139369618e-06, |
| "loss": 0.1994, |
| "step": 7910 |
| }, |
| { |
| "epoch": 5.959367945823928, |
| "grad_norm": 6.525386333465576, |
| "learning_rate": 7.800351141208929e-06, |
| "loss": 0.1517, |
| "step": 7920 |
| }, |
| { |
| "epoch": 5.966892400300978, |
| "grad_norm": 5.410614967346191, |
| "learning_rate": 7.796170888721679e-06, |
| "loss": 0.1293, |
| "step": 7930 |
| }, |
| { |
| "epoch": 5.9744168547780285, |
| "grad_norm": 8.867262840270996, |
| "learning_rate": 7.79199063623443e-06, |
| "loss": 0.1892, |
| "step": 7940 |
| }, |
| { |
| "epoch": 5.981941309255079, |
| "grad_norm": 8.707972526550293, |
| "learning_rate": 7.787810383747178e-06, |
| "loss": 0.1489, |
| "step": 7950 |
| }, |
| { |
| "epoch": 5.989465763732129, |
| "grad_norm": 10.210127830505371, |
| "learning_rate": 7.783630131259929e-06, |
| "loss": 0.1879, |
| "step": 7960 |
| }, |
| { |
| "epoch": 5.99699021820918, |
| "grad_norm": 5.96549129486084, |
| "learning_rate": 7.779449878772677e-06, |
| "loss": 0.1821, |
| "step": 7970 |
| }, |
| { |
| "epoch": 6.004514672686231, |
| "grad_norm": 7.090506553649902, |
| "learning_rate": 7.77526962628543e-06, |
| "loss": 0.1218, |
| "step": 7980 |
| }, |
| { |
| "epoch": 6.0120391271632805, |
| "grad_norm": 8.773550033569336, |
| "learning_rate": 7.771089373798178e-06, |
| "loss": 0.1558, |
| "step": 7990 |
| }, |
| { |
| "epoch": 6.019563581640331, |
| "grad_norm": 8.615151405334473, |
| "learning_rate": 7.766909121310929e-06, |
| "loss": 0.1811, |
| "step": 8000 |
| }, |
| { |
| "epoch": 6.027088036117381, |
| "grad_norm": 4.9068922996521, |
| "learning_rate": 7.762728868823677e-06, |
| "loss": 0.2012, |
| "step": 8010 |
| }, |
| { |
| "epoch": 6.034612490594432, |
| "grad_norm": 9.907990455627441, |
| "learning_rate": 7.758548616336428e-06, |
| "loss": 0.1682, |
| "step": 8020 |
| }, |
| { |
| "epoch": 6.042136945071483, |
| "grad_norm": 5.487863063812256, |
| "learning_rate": 7.754368363849178e-06, |
| "loss": 0.1742, |
| "step": 8030 |
| }, |
| { |
| "epoch": 6.049661399548532, |
| "grad_norm": 8.537084579467773, |
| "learning_rate": 7.750188111361927e-06, |
| "loss": 0.1652, |
| "step": 8040 |
| }, |
| { |
| "epoch": 6.057185854025583, |
| "grad_norm": 10.835139274597168, |
| "learning_rate": 7.746007858874677e-06, |
| "loss": 0.1993, |
| "step": 8050 |
| }, |
| { |
| "epoch": 6.064710308502634, |
| "grad_norm": 7.074265003204346, |
| "learning_rate": 7.741827606387426e-06, |
| "loss": 0.2075, |
| "step": 8060 |
| }, |
| { |
| "epoch": 6.072234762979684, |
| "grad_norm": 6.608835697174072, |
| "learning_rate": 7.737647353900176e-06, |
| "loss": 0.144, |
| "step": 8070 |
| }, |
| { |
| "epoch": 6.0797592174567345, |
| "grad_norm": 11.262980461120605, |
| "learning_rate": 7.733467101412927e-06, |
| "loss": 0.1397, |
| "step": 8080 |
| }, |
| { |
| "epoch": 6.087283671933784, |
| "grad_norm": 6.517685890197754, |
| "learning_rate": 7.729286848925675e-06, |
| "loss": 0.1875, |
| "step": 8090 |
| }, |
| { |
| "epoch": 6.094808126410835, |
| "grad_norm": 6.834563255310059, |
| "learning_rate": 7.725106596438426e-06, |
| "loss": 0.1883, |
| "step": 8100 |
| }, |
| { |
| "epoch": 6.102332580887886, |
| "grad_norm": 7.841973304748535, |
| "learning_rate": 7.720926343951174e-06, |
| "loss": 0.1722, |
| "step": 8110 |
| }, |
| { |
| "epoch": 6.109857035364936, |
| "grad_norm": 7.006568431854248, |
| "learning_rate": 7.716746091463925e-06, |
| "loss": 0.1157, |
| "step": 8120 |
| }, |
| { |
| "epoch": 6.1173814898419865, |
| "grad_norm": 7.466751575469971, |
| "learning_rate": 7.712565838976675e-06, |
| "loss": 0.122, |
| "step": 8130 |
| }, |
| { |
| "epoch": 6.124905944319037, |
| "grad_norm": 6.685294151306152, |
| "learning_rate": 7.708385586489424e-06, |
| "loss": 0.1509, |
| "step": 8140 |
| }, |
| { |
| "epoch": 6.132430398796087, |
| "grad_norm": 9.151078224182129, |
| "learning_rate": 7.704205334002174e-06, |
| "loss": 0.1428, |
| "step": 8150 |
| }, |
| { |
| "epoch": 6.139954853273138, |
| "grad_norm": 9.963911056518555, |
| "learning_rate": 7.700025081514925e-06, |
| "loss": 0.1362, |
| "step": 8160 |
| }, |
| { |
| "epoch": 6.147479307750188, |
| "grad_norm": 7.9521942138671875, |
| "learning_rate": 7.695844829027675e-06, |
| "loss": 0.2037, |
| "step": 8170 |
| }, |
| { |
| "epoch": 6.155003762227238, |
| "grad_norm": 10.13383960723877, |
| "learning_rate": 7.691664576540424e-06, |
| "loss": 0.1683, |
| "step": 8180 |
| }, |
| { |
| "epoch": 6.162528216704289, |
| "grad_norm": 7.142587184906006, |
| "learning_rate": 7.687484324053174e-06, |
| "loss": 0.1904, |
| "step": 8190 |
| }, |
| { |
| "epoch": 6.170052671181339, |
| "grad_norm": 5.827476501464844, |
| "learning_rate": 7.683304071565923e-06, |
| "loss": 0.1182, |
| "step": 8200 |
| }, |
| { |
| "epoch": 6.17757712565839, |
| "grad_norm": 3.5819246768951416, |
| "learning_rate": 7.679123819078673e-06, |
| "loss": 0.1291, |
| "step": 8210 |
| }, |
| { |
| "epoch": 6.1851015801354405, |
| "grad_norm": 10.036810874938965, |
| "learning_rate": 7.674943566591424e-06, |
| "loss": 0.1531, |
| "step": 8220 |
| }, |
| { |
| "epoch": 6.19262603461249, |
| "grad_norm": 10.45486068725586, |
| "learning_rate": 7.670763314104172e-06, |
| "loss": 0.1943, |
| "step": 8230 |
| }, |
| { |
| "epoch": 6.200150489089541, |
| "grad_norm": 2.4924612045288086, |
| "learning_rate": 7.666583061616923e-06, |
| "loss": 0.148, |
| "step": 8240 |
| }, |
| { |
| "epoch": 6.207674943566591, |
| "grad_norm": 4.770188808441162, |
| "learning_rate": 7.662402809129671e-06, |
| "loss": 0.14, |
| "step": 8250 |
| }, |
| { |
| "epoch": 6.215199398043642, |
| "grad_norm": 11.903753280639648, |
| "learning_rate": 7.658222556642422e-06, |
| "loss": 0.1417, |
| "step": 8260 |
| }, |
| { |
| "epoch": 6.2227238525206925, |
| "grad_norm": 2.068080425262451, |
| "learning_rate": 7.654042304155172e-06, |
| "loss": 0.1679, |
| "step": 8270 |
| }, |
| { |
| "epoch": 6.230248306997742, |
| "grad_norm": 7.85164737701416, |
| "learning_rate": 7.649862051667921e-06, |
| "loss": 0.1778, |
| "step": 8280 |
| }, |
| { |
| "epoch": 6.237772761474793, |
| "grad_norm": 5.108582019805908, |
| "learning_rate": 7.645681799180671e-06, |
| "loss": 0.1471, |
| "step": 8290 |
| }, |
| { |
| "epoch": 6.245297215951844, |
| "grad_norm": 4.792969703674316, |
| "learning_rate": 7.64150154669342e-06, |
| "loss": 0.126, |
| "step": 8300 |
| }, |
| { |
| "epoch": 6.252821670428894, |
| "grad_norm": 8.351136207580566, |
| "learning_rate": 7.63732129420617e-06, |
| "loss": 0.1144, |
| "step": 8310 |
| }, |
| { |
| "epoch": 6.260346124905944, |
| "grad_norm": 9.570772171020508, |
| "learning_rate": 7.63314104171892e-06, |
| "loss": 0.2101, |
| "step": 8320 |
| }, |
| { |
| "epoch": 6.267870579382995, |
| "grad_norm": 10.960972785949707, |
| "learning_rate": 7.6289607892316695e-06, |
| "loss": 0.2095, |
| "step": 8330 |
| }, |
| { |
| "epoch": 6.275395033860045, |
| "grad_norm": 8.7171049118042, |
| "learning_rate": 7.624780536744421e-06, |
| "loss": 0.1396, |
| "step": 8340 |
| }, |
| { |
| "epoch": 6.282919488337096, |
| "grad_norm": 6.793865203857422, |
| "learning_rate": 7.62060028425717e-06, |
| "loss": 0.1524, |
| "step": 8350 |
| }, |
| { |
| "epoch": 6.290443942814146, |
| "grad_norm": 13.218555450439453, |
| "learning_rate": 7.61642003176992e-06, |
| "loss": 0.1649, |
| "step": 8360 |
| }, |
| { |
| "epoch": 6.297968397291196, |
| "grad_norm": 5.084217071533203, |
| "learning_rate": 7.6122397792826694e-06, |
| "loss": 0.144, |
| "step": 8370 |
| }, |
| { |
| "epoch": 6.305492851768247, |
| "grad_norm": 9.322858810424805, |
| "learning_rate": 7.608059526795419e-06, |
| "loss": 0.1576, |
| "step": 8380 |
| }, |
| { |
| "epoch": 6.313017306245297, |
| "grad_norm": 7.766299247741699, |
| "learning_rate": 7.603879274308169e-06, |
| "loss": 0.1581, |
| "step": 8390 |
| }, |
| { |
| "epoch": 6.320541760722348, |
| "grad_norm": 10.082528114318848, |
| "learning_rate": 7.599699021820919e-06, |
| "loss": 0.1198, |
| "step": 8400 |
| }, |
| { |
| "epoch": 6.328066215199398, |
| "grad_norm": 6.375280857086182, |
| "learning_rate": 7.5955187693336685e-06, |
| "loss": 0.1053, |
| "step": 8410 |
| }, |
| { |
| "epoch": 6.335590669676448, |
| "grad_norm": 8.377439498901367, |
| "learning_rate": 7.591338516846418e-06, |
| "loss": 0.1259, |
| "step": 8420 |
| }, |
| { |
| "epoch": 6.343115124153499, |
| "grad_norm": 7.567884922027588, |
| "learning_rate": 7.5871582643591676e-06, |
| "loss": 0.1555, |
| "step": 8430 |
| }, |
| { |
| "epoch": 6.350639578630549, |
| "grad_norm": 9.82593059539795, |
| "learning_rate": 7.582978011871918e-06, |
| "loss": 0.228, |
| "step": 8440 |
| }, |
| { |
| "epoch": 6.3581640331076, |
| "grad_norm": 3.6331682205200195, |
| "learning_rate": 7.5787977593846675e-06, |
| "loss": 0.1114, |
| "step": 8450 |
| }, |
| { |
| "epoch": 6.3656884875846504, |
| "grad_norm": 6.77184534072876, |
| "learning_rate": 7.574617506897417e-06, |
| "loss": 0.1114, |
| "step": 8460 |
| }, |
| { |
| "epoch": 6.3732129420617, |
| "grad_norm": 9.455371856689453, |
| "learning_rate": 7.570437254410167e-06, |
| "loss": 0.1958, |
| "step": 8470 |
| }, |
| { |
| "epoch": 6.380737396538751, |
| "grad_norm": 9.314596176147461, |
| "learning_rate": 7.566257001922916e-06, |
| "loss": 0.1667, |
| "step": 8480 |
| }, |
| { |
| "epoch": 6.388261851015802, |
| "grad_norm": 7.729313850402832, |
| "learning_rate": 7.5620767494356666e-06, |
| "loss": 0.1515, |
| "step": 8490 |
| }, |
| { |
| "epoch": 6.395786305492852, |
| "grad_norm": 6.675351619720459, |
| "learning_rate": 7.557896496948416e-06, |
| "loss": 0.1819, |
| "step": 8500 |
| }, |
| { |
| "epoch": 6.403310759969902, |
| "grad_norm": 8.883378028869629, |
| "learning_rate": 7.553716244461166e-06, |
| "loss": 0.1788, |
| "step": 8510 |
| }, |
| { |
| "epoch": 6.410835214446952, |
| "grad_norm": 5.213517665863037, |
| "learning_rate": 7.549535991973915e-06, |
| "loss": 0.1769, |
| "step": 8520 |
| }, |
| { |
| "epoch": 6.418359668924003, |
| "grad_norm": 7.817873954772949, |
| "learning_rate": 7.5453557394866664e-06, |
| "loss": 0.1976, |
| "step": 8530 |
| }, |
| { |
| "epoch": 6.425884123401054, |
| "grad_norm": 10.222846984863281, |
| "learning_rate": 7.541175486999416e-06, |
| "loss": 0.209, |
| "step": 8540 |
| }, |
| { |
| "epoch": 6.433408577878104, |
| "grad_norm": 7.177674770355225, |
| "learning_rate": 7.5369952345121655e-06, |
| "loss": 0.1755, |
| "step": 8550 |
| }, |
| { |
| "epoch": 6.440933032355154, |
| "grad_norm": 8.744006156921387, |
| "learning_rate": 7.532814982024915e-06, |
| "loss": 0.1434, |
| "step": 8560 |
| }, |
| { |
| "epoch": 6.448457486832204, |
| "grad_norm": 7.118343353271484, |
| "learning_rate": 7.528634729537665e-06, |
| "loss": 0.1724, |
| "step": 8570 |
| }, |
| { |
| "epoch": 6.455981941309255, |
| "grad_norm": 10.586155891418457, |
| "learning_rate": 7.524454477050415e-06, |
| "loss": 0.1731, |
| "step": 8580 |
| }, |
| { |
| "epoch": 6.463506395786306, |
| "grad_norm": 4.590671539306641, |
| "learning_rate": 7.520274224563165e-06, |
| "loss": 0.1541, |
| "step": 8590 |
| }, |
| { |
| "epoch": 6.471030850263356, |
| "grad_norm": 5.464056015014648, |
| "learning_rate": 7.516093972075914e-06, |
| "loss": 0.1574, |
| "step": 8600 |
| }, |
| { |
| "epoch": 6.478555304740406, |
| "grad_norm": 8.129666328430176, |
| "learning_rate": 7.511913719588664e-06, |
| "loss": 0.1433, |
| "step": 8610 |
| }, |
| { |
| "epoch": 6.486079759217457, |
| "grad_norm": 5.378341197967529, |
| "learning_rate": 7.507733467101413e-06, |
| "loss": 0.1796, |
| "step": 8620 |
| }, |
| { |
| "epoch": 6.493604213694507, |
| "grad_norm": 6.7451605796813965, |
| "learning_rate": 7.503553214614163e-06, |
| "loss": 0.1559, |
| "step": 8630 |
| }, |
| { |
| "epoch": 6.501128668171558, |
| "grad_norm": 8.130012512207031, |
| "learning_rate": 7.499372962126913e-06, |
| "loss": 0.1444, |
| "step": 8640 |
| }, |
| { |
| "epoch": 6.508653122648608, |
| "grad_norm": 12.124320983886719, |
| "learning_rate": 7.495192709639663e-06, |
| "loss": 0.1965, |
| "step": 8650 |
| }, |
| { |
| "epoch": 6.516177577125658, |
| "grad_norm": 6.617115020751953, |
| "learning_rate": 7.491012457152412e-06, |
| "loss": 0.1638, |
| "step": 8660 |
| }, |
| { |
| "epoch": 6.523702031602709, |
| "grad_norm": 9.720376968383789, |
| "learning_rate": 7.486832204665162e-06, |
| "loss": 0.1831, |
| "step": 8670 |
| }, |
| { |
| "epoch": 6.531226486079759, |
| "grad_norm": 9.170286178588867, |
| "learning_rate": 7.482651952177911e-06, |
| "loss": 0.1484, |
| "step": 8680 |
| }, |
| { |
| "epoch": 6.53875094055681, |
| "grad_norm": 7.603338241577148, |
| "learning_rate": 7.478471699690662e-06, |
| "loss": 0.1604, |
| "step": 8690 |
| }, |
| { |
| "epoch": 6.54627539503386, |
| "grad_norm": 10.253371238708496, |
| "learning_rate": 7.474291447203411e-06, |
| "loss": 0.1618, |
| "step": 8700 |
| }, |
| { |
| "epoch": 6.55379984951091, |
| "grad_norm": 7.7780890464782715, |
| "learning_rate": 7.470111194716162e-06, |
| "loss": 0.1819, |
| "step": 8710 |
| }, |
| { |
| "epoch": 6.561324303987961, |
| "grad_norm": 8.585005760192871, |
| "learning_rate": 7.465930942228912e-06, |
| "loss": 0.1276, |
| "step": 8720 |
| }, |
| { |
| "epoch": 6.568848758465011, |
| "grad_norm": 7.06712532043457, |
| "learning_rate": 7.461750689741662e-06, |
| "loss": 0.1413, |
| "step": 8730 |
| }, |
| { |
| "epoch": 6.576373212942062, |
| "grad_norm": 8.646645545959473, |
| "learning_rate": 7.457570437254411e-06, |
| "loss": 0.1467, |
| "step": 8740 |
| }, |
| { |
| "epoch": 6.583897667419112, |
| "grad_norm": 4.02337646484375, |
| "learning_rate": 7.453390184767161e-06, |
| "loss": 0.1563, |
| "step": 8750 |
| }, |
| { |
| "epoch": 6.591422121896162, |
| "grad_norm": 8.277909278869629, |
| "learning_rate": 7.44920993227991e-06, |
| "loss": 0.1693, |
| "step": 8760 |
| }, |
| { |
| "epoch": 6.598946576373213, |
| "grad_norm": 10.024460792541504, |
| "learning_rate": 7.44502967979266e-06, |
| "loss": 0.1785, |
| "step": 8770 |
| }, |
| { |
| "epoch": 6.606471030850264, |
| "grad_norm": 8.2098388671875, |
| "learning_rate": 7.44084942730541e-06, |
| "loss": 0.1633, |
| "step": 8780 |
| }, |
| { |
| "epoch": 6.6139954853273135, |
| "grad_norm": 11.487617492675781, |
| "learning_rate": 7.43666917481816e-06, |
| "loss": 0.142, |
| "step": 8790 |
| }, |
| { |
| "epoch": 6.621519939804364, |
| "grad_norm": 10.144819259643555, |
| "learning_rate": 7.432488922330909e-06, |
| "loss": 0.1462, |
| "step": 8800 |
| }, |
| { |
| "epoch": 6.629044394281415, |
| "grad_norm": 7.244460582733154, |
| "learning_rate": 7.428308669843659e-06, |
| "loss": 0.1517, |
| "step": 8810 |
| }, |
| { |
| "epoch": 6.636568848758465, |
| "grad_norm": 2.242067575454712, |
| "learning_rate": 7.4241284173564084e-06, |
| "loss": 0.1339, |
| "step": 8820 |
| }, |
| { |
| "epoch": 6.644093303235516, |
| "grad_norm": 6.76652193069458, |
| "learning_rate": 7.419948164869159e-06, |
| "loss": 0.1485, |
| "step": 8830 |
| }, |
| { |
| "epoch": 6.6516177577125655, |
| "grad_norm": 10.644311904907227, |
| "learning_rate": 7.415767912381908e-06, |
| "loss": 0.1635, |
| "step": 8840 |
| }, |
| { |
| "epoch": 6.659142212189616, |
| "grad_norm": 10.187577247619629, |
| "learning_rate": 7.411587659894658e-06, |
| "loss": 0.1605, |
| "step": 8850 |
| }, |
| { |
| "epoch": 6.666666666666667, |
| "grad_norm": 4.452393054962158, |
| "learning_rate": 7.4074074074074075e-06, |
| "loss": 0.1381, |
| "step": 8860 |
| }, |
| { |
| "epoch": 6.674191121143717, |
| "grad_norm": 4.998362064361572, |
| "learning_rate": 7.403227154920157e-06, |
| "loss": 0.1349, |
| "step": 8870 |
| }, |
| { |
| "epoch": 6.681715575620768, |
| "grad_norm": 6.7441534996032715, |
| "learning_rate": 7.3990469024329074e-06, |
| "loss": 0.1463, |
| "step": 8880 |
| }, |
| { |
| "epoch": 6.689240030097817, |
| "grad_norm": 7.791492462158203, |
| "learning_rate": 7.394866649945658e-06, |
| "loss": 0.1352, |
| "step": 8890 |
| }, |
| { |
| "epoch": 6.696764484574868, |
| "grad_norm": 4.74707555770874, |
| "learning_rate": 7.390686397458407e-06, |
| "loss": 0.1588, |
| "step": 8900 |
| }, |
| { |
| "epoch": 6.704288939051919, |
| "grad_norm": 6.361732482910156, |
| "learning_rate": 7.386506144971157e-06, |
| "loss": 0.194, |
| "step": 8910 |
| }, |
| { |
| "epoch": 6.711813393528969, |
| "grad_norm": 5.4700093269348145, |
| "learning_rate": 7.382325892483907e-06, |
| "loss": 0.0973, |
| "step": 8920 |
| }, |
| { |
| "epoch": 6.7193378480060195, |
| "grad_norm": 6.188055992126465, |
| "learning_rate": 7.378145639996657e-06, |
| "loss": 0.1872, |
| "step": 8930 |
| }, |
| { |
| "epoch": 6.72686230248307, |
| "grad_norm": 9.050508499145508, |
| "learning_rate": 7.373965387509406e-06, |
| "loss": 0.1253, |
| "step": 8940 |
| }, |
| { |
| "epoch": 6.73438675696012, |
| "grad_norm": 7.569061756134033, |
| "learning_rate": 7.369785135022156e-06, |
| "loss": 0.14, |
| "step": 8950 |
| }, |
| { |
| "epoch": 6.741911211437171, |
| "grad_norm": 4.738898754119873, |
| "learning_rate": 7.3656048825349055e-06, |
| "loss": 0.1375, |
| "step": 8960 |
| }, |
| { |
| "epoch": 6.749435665914222, |
| "grad_norm": 8.632952690124512, |
| "learning_rate": 7.361424630047656e-06, |
| "loss": 0.1495, |
| "step": 8970 |
| }, |
| { |
| "epoch": 6.7569601203912715, |
| "grad_norm": 7.746445655822754, |
| "learning_rate": 7.3572443775604055e-06, |
| "loss": 0.1338, |
| "step": 8980 |
| }, |
| { |
| "epoch": 6.764484574868322, |
| "grad_norm": 8.78629207611084, |
| "learning_rate": 7.353064125073155e-06, |
| "loss": 0.1672, |
| "step": 8990 |
| }, |
| { |
| "epoch": 6.772009029345372, |
| "grad_norm": 10.222789764404297, |
| "learning_rate": 7.3488838725859046e-06, |
| "loss": 0.2174, |
| "step": 9000 |
| }, |
| { |
| "epoch": 6.779533483822423, |
| "grad_norm": 9.454453468322754, |
| "learning_rate": 7.344703620098654e-06, |
| "loss": 0.2156, |
| "step": 9010 |
| }, |
| { |
| "epoch": 6.787057938299474, |
| "grad_norm": 9.131001472473145, |
| "learning_rate": 7.340523367611404e-06, |
| "loss": 0.1334, |
| "step": 9020 |
| }, |
| { |
| "epoch": 6.794582392776523, |
| "grad_norm": 9.862558364868164, |
| "learning_rate": 7.336343115124154e-06, |
| "loss": 0.1254, |
| "step": 9030 |
| }, |
| { |
| "epoch": 6.802106847253574, |
| "grad_norm": 12.024687767028809, |
| "learning_rate": 7.332162862636904e-06, |
| "loss": 0.1612, |
| "step": 9040 |
| }, |
| { |
| "epoch": 6.809631301730624, |
| "grad_norm": 6.9240946769714355, |
| "learning_rate": 7.327982610149653e-06, |
| "loss": 0.1136, |
| "step": 9050 |
| }, |
| { |
| "epoch": 6.817155756207675, |
| "grad_norm": 6.730030536651611, |
| "learning_rate": 7.323802357662403e-06, |
| "loss": 0.1415, |
| "step": 9060 |
| }, |
| { |
| "epoch": 6.8246802106847255, |
| "grad_norm": 10.351944923400879, |
| "learning_rate": 7.319622105175154e-06, |
| "loss": 0.1725, |
| "step": 9070 |
| }, |
| { |
| "epoch": 6.832204665161775, |
| "grad_norm": 5.732717514038086, |
| "learning_rate": 7.3154418526879035e-06, |
| "loss": 0.1599, |
| "step": 9080 |
| }, |
| { |
| "epoch": 6.839729119638826, |
| "grad_norm": 10.521687507629395, |
| "learning_rate": 7.311261600200653e-06, |
| "loss": 0.1625, |
| "step": 9090 |
| }, |
| { |
| "epoch": 6.847253574115877, |
| "grad_norm": 9.526365280151367, |
| "learning_rate": 7.307081347713403e-06, |
| "loss": 0.1078, |
| "step": 9100 |
| }, |
| { |
| "epoch": 6.854778028592927, |
| "grad_norm": 2.771315097808838, |
| "learning_rate": 7.302901095226152e-06, |
| "loss": 0.123, |
| "step": 9110 |
| }, |
| { |
| "epoch": 6.8623024830699775, |
| "grad_norm": 9.39211654663086, |
| "learning_rate": 7.2987208427389025e-06, |
| "loss": 0.1554, |
| "step": 9120 |
| }, |
| { |
| "epoch": 6.869826937547028, |
| "grad_norm": 9.30453109741211, |
| "learning_rate": 7.294540590251652e-06, |
| "loss": 0.1153, |
| "step": 9130 |
| }, |
| { |
| "epoch": 6.877351392024078, |
| "grad_norm": 8.631791114807129, |
| "learning_rate": 7.290360337764402e-06, |
| "loss": 0.1256, |
| "step": 9140 |
| }, |
| { |
| "epoch": 6.884875846501129, |
| "grad_norm": 7.919106960296631, |
| "learning_rate": 7.286180085277151e-06, |
| "loss": 0.226, |
| "step": 9150 |
| }, |
| { |
| "epoch": 6.89240030097818, |
| "grad_norm": 12.256912231445312, |
| "learning_rate": 7.281999832789901e-06, |
| "loss": 0.1843, |
| "step": 9160 |
| }, |
| { |
| "epoch": 6.899924755455229, |
| "grad_norm": 9.76773452758789, |
| "learning_rate": 7.277819580302651e-06, |
| "loss": 0.1758, |
| "step": 9170 |
| }, |
| { |
| "epoch": 6.90744920993228, |
| "grad_norm": 7.5923075675964355, |
| "learning_rate": 7.273639327815401e-06, |
| "loss": 0.1876, |
| "step": 9180 |
| }, |
| { |
| "epoch": 6.91497366440933, |
| "grad_norm": 9.641352653503418, |
| "learning_rate": 7.26945907532815e-06, |
| "loss": 0.1493, |
| "step": 9190 |
| }, |
| { |
| "epoch": 6.922498118886381, |
| "grad_norm": 7.658628940582275, |
| "learning_rate": 7.2652788228409e-06, |
| "loss": 0.1506, |
| "step": 9200 |
| }, |
| { |
| "epoch": 6.9300225733634315, |
| "grad_norm": 10.023941040039062, |
| "learning_rate": 7.261098570353649e-06, |
| "loss": 0.1427, |
| "step": 9210 |
| }, |
| { |
| "epoch": 6.937547027840481, |
| "grad_norm": 10.135645866394043, |
| "learning_rate": 7.2569183178664e-06, |
| "loss": 0.1777, |
| "step": 9220 |
| }, |
| { |
| "epoch": 6.945071482317532, |
| "grad_norm": 8.289971351623535, |
| "learning_rate": 7.252738065379149e-06, |
| "loss": 0.1751, |
| "step": 9230 |
| }, |
| { |
| "epoch": 6.952595936794582, |
| "grad_norm": 8.860151290893555, |
| "learning_rate": 7.248557812891899e-06, |
| "loss": 0.1426, |
| "step": 9240 |
| }, |
| { |
| "epoch": 6.960120391271633, |
| "grad_norm": 6.4370903968811035, |
| "learning_rate": 7.244377560404649e-06, |
| "loss": 0.1784, |
| "step": 9250 |
| }, |
| { |
| "epoch": 6.9676448457486835, |
| "grad_norm": 12.02550220489502, |
| "learning_rate": 7.2401973079174e-06, |
| "loss": 0.1894, |
| "step": 9260 |
| }, |
| { |
| "epoch": 6.975169300225733, |
| "grad_norm": 10.728776931762695, |
| "learning_rate": 7.236017055430149e-06, |
| "loss": 0.1437, |
| "step": 9270 |
| }, |
| { |
| "epoch": 6.982693754702784, |
| "grad_norm": 6.684060096740723, |
| "learning_rate": 7.231836802942899e-06, |
| "loss": 0.1483, |
| "step": 9280 |
| }, |
| { |
| "epoch": 6.990218209179835, |
| "grad_norm": 9.681809425354004, |
| "learning_rate": 7.227656550455648e-06, |
| "loss": 0.1908, |
| "step": 9290 |
| }, |
| { |
| "epoch": 6.997742663656885, |
| "grad_norm": 7.41337251663208, |
| "learning_rate": 7.223476297968398e-06, |
| "loss": 0.1655, |
| "step": 9300 |
| }, |
| { |
| "epoch": 7.005267118133935, |
| "grad_norm": 9.84195327758789, |
| "learning_rate": 7.219296045481148e-06, |
| "loss": 0.1548, |
| "step": 9310 |
| }, |
| { |
| "epoch": 7.012791572610985, |
| "grad_norm": 4.892616271972656, |
| "learning_rate": 7.215115792993898e-06, |
| "loss": 0.1249, |
| "step": 9320 |
| }, |
| { |
| "epoch": 7.020316027088036, |
| "grad_norm": 5.397826671600342, |
| "learning_rate": 7.210935540506647e-06, |
| "loss": 0.1382, |
| "step": 9330 |
| }, |
| { |
| "epoch": 7.027840481565087, |
| "grad_norm": 5.65526008605957, |
| "learning_rate": 7.206755288019397e-06, |
| "loss": 0.1215, |
| "step": 9340 |
| }, |
| { |
| "epoch": 7.035364936042137, |
| "grad_norm": 10.712868690490723, |
| "learning_rate": 7.202575035532146e-06, |
| "loss": 0.1923, |
| "step": 9350 |
| }, |
| { |
| "epoch": 7.042889390519187, |
| "grad_norm": 5.386087417602539, |
| "learning_rate": 7.198394783044897e-06, |
| "loss": 0.1403, |
| "step": 9360 |
| }, |
| { |
| "epoch": 7.050413844996238, |
| "grad_norm": 5.49284029006958, |
| "learning_rate": 7.194214530557646e-06, |
| "loss": 0.1359, |
| "step": 9370 |
| }, |
| { |
| "epoch": 7.057938299473288, |
| "grad_norm": 10.279863357543945, |
| "learning_rate": 7.190034278070396e-06, |
| "loss": 0.1786, |
| "step": 9380 |
| }, |
| { |
| "epoch": 7.065462753950339, |
| "grad_norm": 1.2782478332519531, |
| "learning_rate": 7.185854025583145e-06, |
| "loss": 0.1316, |
| "step": 9390 |
| }, |
| { |
| "epoch": 7.072987208427389, |
| "grad_norm": 10.440459251403809, |
| "learning_rate": 7.181673773095895e-06, |
| "loss": 0.1618, |
| "step": 9400 |
| }, |
| { |
| "epoch": 7.080511662904439, |
| "grad_norm": 2.517453908920288, |
| "learning_rate": 7.1774935206086445e-06, |
| "loss": 0.1037, |
| "step": 9410 |
| }, |
| { |
| "epoch": 7.08803611738149, |
| "grad_norm": 13.917778968811035, |
| "learning_rate": 7.173313268121395e-06, |
| "loss": 0.1435, |
| "step": 9420 |
| }, |
| { |
| "epoch": 7.09556057185854, |
| "grad_norm": 12.317310333251953, |
| "learning_rate": 7.169133015634145e-06, |
| "loss": 0.1318, |
| "step": 9430 |
| }, |
| { |
| "epoch": 7.103085026335591, |
| "grad_norm": 9.198356628417969, |
| "learning_rate": 7.164952763146895e-06, |
| "loss": 0.1214, |
| "step": 9440 |
| }, |
| { |
| "epoch": 7.110609480812641, |
| "grad_norm": 9.149123191833496, |
| "learning_rate": 7.160772510659645e-06, |
| "loss": 0.1442, |
| "step": 9450 |
| }, |
| { |
| "epoch": 7.118133935289691, |
| "grad_norm": 7.893470287322998, |
| "learning_rate": 7.156592258172395e-06, |
| "loss": 0.1711, |
| "step": 9460 |
| }, |
| { |
| "epoch": 7.125658389766742, |
| "grad_norm": 11.737761497497559, |
| "learning_rate": 7.152412005685144e-06, |
| "loss": 0.1204, |
| "step": 9470 |
| }, |
| { |
| "epoch": 7.133182844243792, |
| "grad_norm": 7.861152648925781, |
| "learning_rate": 7.148231753197894e-06, |
| "loss": 0.1883, |
| "step": 9480 |
| }, |
| { |
| "epoch": 7.140707298720843, |
| "grad_norm": 4.260904312133789, |
| "learning_rate": 7.1440515007106434e-06, |
| "loss": 0.1673, |
| "step": 9490 |
| }, |
| { |
| "epoch": 7.148231753197893, |
| "grad_norm": 4.621326923370361, |
| "learning_rate": 7.139871248223393e-06, |
| "loss": 0.1511, |
| "step": 9500 |
| }, |
| { |
| "epoch": 7.155756207674943, |
| "grad_norm": 6.470260143280029, |
| "learning_rate": 7.135690995736143e-06, |
| "loss": 0.1465, |
| "step": 9510 |
| }, |
| { |
| "epoch": 7.163280662151994, |
| "grad_norm": 13.358270645141602, |
| "learning_rate": 7.131510743248893e-06, |
| "loss": 0.1431, |
| "step": 9520 |
| }, |
| { |
| "epoch": 7.170805116629045, |
| "grad_norm": 6.210266590118408, |
| "learning_rate": 7.1273304907616425e-06, |
| "loss": 0.141, |
| "step": 9530 |
| }, |
| { |
| "epoch": 7.178329571106095, |
| "grad_norm": 11.789570808410645, |
| "learning_rate": 7.123150238274392e-06, |
| "loss": 0.1504, |
| "step": 9540 |
| }, |
| { |
| "epoch": 7.185854025583145, |
| "grad_norm": 6.791030406951904, |
| "learning_rate": 7.118969985787142e-06, |
| "loss": 0.1321, |
| "step": 9550 |
| }, |
| { |
| "epoch": 7.193378480060195, |
| "grad_norm": 4.915945529937744, |
| "learning_rate": 7.114789733299892e-06, |
| "loss": 0.1531, |
| "step": 9560 |
| }, |
| { |
| "epoch": 7.200902934537246, |
| "grad_norm": 5.299534797668457, |
| "learning_rate": 7.1106094808126415e-06, |
| "loss": 0.1585, |
| "step": 9570 |
| }, |
| { |
| "epoch": 7.208427389014297, |
| "grad_norm": 7.799785137176514, |
| "learning_rate": 7.106429228325391e-06, |
| "loss": 0.1507, |
| "step": 9580 |
| }, |
| { |
| "epoch": 7.2159518434913466, |
| "grad_norm": 6.652139663696289, |
| "learning_rate": 7.102248975838141e-06, |
| "loss": 0.1303, |
| "step": 9590 |
| }, |
| { |
| "epoch": 7.223476297968397, |
| "grad_norm": 8.317214965820312, |
| "learning_rate": 7.09806872335089e-06, |
| "loss": 0.1529, |
| "step": 9600 |
| }, |
| { |
| "epoch": 7.231000752445448, |
| "grad_norm": 7.286659240722656, |
| "learning_rate": 7.093888470863641e-06, |
| "loss": 0.1251, |
| "step": 9610 |
| }, |
| { |
| "epoch": 7.238525206922498, |
| "grad_norm": 5.869454860687256, |
| "learning_rate": 7.089708218376391e-06, |
| "loss": 0.1439, |
| "step": 9620 |
| }, |
| { |
| "epoch": 7.246049661399549, |
| "grad_norm": 7.951076984405518, |
| "learning_rate": 7.0855279658891405e-06, |
| "loss": 0.1603, |
| "step": 9630 |
| }, |
| { |
| "epoch": 7.253574115876599, |
| "grad_norm": 2.182021141052246, |
| "learning_rate": 7.08134771340189e-06, |
| "loss": 0.1119, |
| "step": 9640 |
| }, |
| { |
| "epoch": 7.261098570353649, |
| "grad_norm": 11.444308280944824, |
| "learning_rate": 7.0771674609146405e-06, |
| "loss": 0.1395, |
| "step": 9650 |
| }, |
| { |
| "epoch": 7.2686230248307, |
| "grad_norm": 9.718505859375, |
| "learning_rate": 7.07298720842739e-06, |
| "loss": 0.1535, |
| "step": 9660 |
| }, |
| { |
| "epoch": 7.27614747930775, |
| "grad_norm": 6.41453218460083, |
| "learning_rate": 7.0688069559401396e-06, |
| "loss": 0.1113, |
| "step": 9670 |
| }, |
| { |
| "epoch": 7.283671933784801, |
| "grad_norm": 11.704097747802734, |
| "learning_rate": 7.064626703452889e-06, |
| "loss": 0.1497, |
| "step": 9680 |
| }, |
| { |
| "epoch": 7.291196388261851, |
| "grad_norm": 8.959087371826172, |
| "learning_rate": 7.060446450965639e-06, |
| "loss": 0.1714, |
| "step": 9690 |
| }, |
| { |
| "epoch": 7.298720842738901, |
| "grad_norm": 4.717805862426758, |
| "learning_rate": 7.056266198478389e-06, |
| "loss": 0.1232, |
| "step": 9700 |
| }, |
| { |
| "epoch": 7.306245297215952, |
| "grad_norm": 6.030142307281494, |
| "learning_rate": 7.052085945991139e-06, |
| "loss": 0.0906, |
| "step": 9710 |
| }, |
| { |
| "epoch": 7.313769751693002, |
| "grad_norm": 3.481350898742676, |
| "learning_rate": 7.047905693503888e-06, |
| "loss": 0.1353, |
| "step": 9720 |
| }, |
| { |
| "epoch": 7.3212942061700526, |
| "grad_norm": 12.188994407653809, |
| "learning_rate": 7.043725441016638e-06, |
| "loss": 0.2042, |
| "step": 9730 |
| }, |
| { |
| "epoch": 7.328818660647103, |
| "grad_norm": 4.677647113800049, |
| "learning_rate": 7.039545188529387e-06, |
| "loss": 0.1216, |
| "step": 9740 |
| }, |
| { |
| "epoch": 7.336343115124153, |
| "grad_norm": 9.761384010314941, |
| "learning_rate": 7.035364936042137e-06, |
| "loss": 0.1193, |
| "step": 9750 |
| }, |
| { |
| "epoch": 7.343867569601204, |
| "grad_norm": 8.550495147705078, |
| "learning_rate": 7.031184683554887e-06, |
| "loss": 0.1348, |
| "step": 9760 |
| }, |
| { |
| "epoch": 7.351392024078255, |
| "grad_norm": 8.725384712219238, |
| "learning_rate": 7.027004431067637e-06, |
| "loss": 0.1579, |
| "step": 9770 |
| }, |
| { |
| "epoch": 7.3589164785553045, |
| "grad_norm": 7.324394702911377, |
| "learning_rate": 7.022824178580386e-06, |
| "loss": 0.1552, |
| "step": 9780 |
| }, |
| { |
| "epoch": 7.366440933032355, |
| "grad_norm": 10.771610260009766, |
| "learning_rate": 7.018643926093136e-06, |
| "loss": 0.095, |
| "step": 9790 |
| }, |
| { |
| "epoch": 7.373965387509406, |
| "grad_norm": 9.422534942626953, |
| "learning_rate": 7.014463673605887e-06, |
| "loss": 0.1488, |
| "step": 9800 |
| }, |
| { |
| "epoch": 7.381489841986456, |
| "grad_norm": 10.947734832763672, |
| "learning_rate": 7.010283421118637e-06, |
| "loss": 0.1326, |
| "step": 9810 |
| }, |
| { |
| "epoch": 7.389014296463507, |
| "grad_norm": 7.345083236694336, |
| "learning_rate": 7.006103168631386e-06, |
| "loss": 0.1573, |
| "step": 9820 |
| }, |
| { |
| "epoch": 7.3965387509405565, |
| "grad_norm": 10.11557674407959, |
| "learning_rate": 7.001922916144136e-06, |
| "loss": 0.1321, |
| "step": 9830 |
| }, |
| { |
| "epoch": 7.404063205417607, |
| "grad_norm": 7.984189033508301, |
| "learning_rate": 6.997742663656886e-06, |
| "loss": 0.152, |
| "step": 9840 |
| }, |
| { |
| "epoch": 7.411587659894658, |
| "grad_norm": 12.054664611816406, |
| "learning_rate": 6.993562411169636e-06, |
| "loss": 0.1605, |
| "step": 9850 |
| }, |
| { |
| "epoch": 7.419112114371708, |
| "grad_norm": 5.764801025390625, |
| "learning_rate": 6.989382158682385e-06, |
| "loss": 0.1356, |
| "step": 9860 |
| }, |
| { |
| "epoch": 7.426636568848759, |
| "grad_norm": 6.675129413604736, |
| "learning_rate": 6.985201906195135e-06, |
| "loss": 0.1324, |
| "step": 9870 |
| }, |
| { |
| "epoch": 7.434161023325808, |
| "grad_norm": 8.892173767089844, |
| "learning_rate": 6.981021653707884e-06, |
| "loss": 0.1916, |
| "step": 9880 |
| }, |
| { |
| "epoch": 7.441685477802859, |
| "grad_norm": 8.714239120483398, |
| "learning_rate": 6.976841401220634e-06, |
| "loss": 0.15, |
| "step": 9890 |
| }, |
| { |
| "epoch": 7.44920993227991, |
| "grad_norm": 8.057106971740723, |
| "learning_rate": 6.972661148733384e-06, |
| "loss": 0.22, |
| "step": 9900 |
| }, |
| { |
| "epoch": 7.45673438675696, |
| "grad_norm": 4.123317718505859, |
| "learning_rate": 6.968480896246134e-06, |
| "loss": 0.1306, |
| "step": 9910 |
| }, |
| { |
| "epoch": 7.4642588412340105, |
| "grad_norm": 3.9179821014404297, |
| "learning_rate": 6.964300643758883e-06, |
| "loss": 0.1524, |
| "step": 9920 |
| }, |
| { |
| "epoch": 7.471783295711061, |
| "grad_norm": 3.3642544746398926, |
| "learning_rate": 6.960120391271633e-06, |
| "loss": 0.1418, |
| "step": 9930 |
| }, |
| { |
| "epoch": 7.479307750188111, |
| "grad_norm": 9.933600425720215, |
| "learning_rate": 6.9559401387843825e-06, |
| "loss": 0.1309, |
| "step": 9940 |
| }, |
| { |
| "epoch": 7.486832204665162, |
| "grad_norm": 7.034138202667236, |
| "learning_rate": 6.951759886297133e-06, |
| "loss": 0.1102, |
| "step": 9950 |
| }, |
| { |
| "epoch": 7.494356659142213, |
| "grad_norm": 6.040769100189209, |
| "learning_rate": 6.947579633809882e-06, |
| "loss": 0.169, |
| "step": 9960 |
| }, |
| { |
| "epoch": 7.5018811136192625, |
| "grad_norm": 6.807097434997559, |
| "learning_rate": 6.943399381322632e-06, |
| "loss": 0.1343, |
| "step": 9970 |
| }, |
| { |
| "epoch": 7.509405568096313, |
| "grad_norm": 8.873719215393066, |
| "learning_rate": 6.939219128835382e-06, |
| "loss": 0.2087, |
| "step": 9980 |
| }, |
| { |
| "epoch": 7.516930022573363, |
| "grad_norm": 6.61569356918335, |
| "learning_rate": 6.935038876348133e-06, |
| "loss": 0.1491, |
| "step": 9990 |
| }, |
| { |
| "epoch": 7.524454477050414, |
| "grad_norm": 7.067235946655273, |
| "learning_rate": 6.930858623860882e-06, |
| "loss": 0.1299, |
| "step": 10000 |
| }, |
| { |
| "epoch": 7.531978931527465, |
| "grad_norm": 6.512937068939209, |
| "learning_rate": 6.926678371373632e-06, |
| "loss": 0.1534, |
| "step": 10010 |
| }, |
| { |
| "epoch": 7.539503386004514, |
| "grad_norm": 6.557222843170166, |
| "learning_rate": 6.922498118886381e-06, |
| "loss": 0.1079, |
| "step": 10020 |
| }, |
| { |
| "epoch": 7.547027840481565, |
| "grad_norm": 11.565508842468262, |
| "learning_rate": 6.918317866399131e-06, |
| "loss": 0.1389, |
| "step": 10030 |
| }, |
| { |
| "epoch": 7.554552294958615, |
| "grad_norm": 6.491377830505371, |
| "learning_rate": 6.914137613911881e-06, |
| "loss": 0.1371, |
| "step": 10040 |
| }, |
| { |
| "epoch": 7.562076749435666, |
| "grad_norm": 7.546027660369873, |
| "learning_rate": 6.909957361424631e-06, |
| "loss": 0.1332, |
| "step": 10050 |
| }, |
| { |
| "epoch": 7.5696012039127165, |
| "grad_norm": 7.451356887817383, |
| "learning_rate": 6.9057771089373804e-06, |
| "loss": 0.1036, |
| "step": 10060 |
| }, |
| { |
| "epoch": 7.577125658389766, |
| "grad_norm": 11.477706909179688, |
| "learning_rate": 6.90159685645013e-06, |
| "loss": 0.1742, |
| "step": 10070 |
| }, |
| { |
| "epoch": 7.584650112866817, |
| "grad_norm": 6.118583679199219, |
| "learning_rate": 6.8974166039628795e-06, |
| "loss": 0.1317, |
| "step": 10080 |
| }, |
| { |
| "epoch": 7.592174567343868, |
| "grad_norm": 8.546781539916992, |
| "learning_rate": 6.89323635147563e-06, |
| "loss": 0.1236, |
| "step": 10090 |
| }, |
| { |
| "epoch": 7.599699021820918, |
| "grad_norm": 8.164753913879395, |
| "learning_rate": 6.8890560989883795e-06, |
| "loss": 0.1189, |
| "step": 10100 |
| }, |
| { |
| "epoch": 7.6072234762979685, |
| "grad_norm": 14.010082244873047, |
| "learning_rate": 6.884875846501129e-06, |
| "loss": 0.1578, |
| "step": 10110 |
| }, |
| { |
| "epoch": 7.614747930775019, |
| "grad_norm": 3.0297963619232178, |
| "learning_rate": 6.8806955940138786e-06, |
| "loss": 0.1092, |
| "step": 10120 |
| }, |
| { |
| "epoch": 7.622272385252069, |
| "grad_norm": 9.559354782104492, |
| "learning_rate": 6.876515341526628e-06, |
| "loss": 0.1473, |
| "step": 10130 |
| }, |
| { |
| "epoch": 7.62979683972912, |
| "grad_norm": 5.298492431640625, |
| "learning_rate": 6.872335089039378e-06, |
| "loss": 0.1332, |
| "step": 10140 |
| }, |
| { |
| "epoch": 7.63732129420617, |
| "grad_norm": 3.04228138923645, |
| "learning_rate": 6.868154836552128e-06, |
| "loss": 0.1232, |
| "step": 10150 |
| }, |
| { |
| "epoch": 7.64484574868322, |
| "grad_norm": 2.9913089275360107, |
| "learning_rate": 6.8639745840648785e-06, |
| "loss": 0.1245, |
| "step": 10160 |
| }, |
| { |
| "epoch": 7.652370203160271, |
| "grad_norm": 5.246922969818115, |
| "learning_rate": 6.859794331577628e-06, |
| "loss": 0.1365, |
| "step": 10170 |
| }, |
| { |
| "epoch": 7.659894657637321, |
| "grad_norm": 13.609551429748535, |
| "learning_rate": 6.855614079090378e-06, |
| "loss": 0.1861, |
| "step": 10180 |
| }, |
| { |
| "epoch": 7.667419112114372, |
| "grad_norm": 7.830902576446533, |
| "learning_rate": 6.851433826603128e-06, |
| "loss": 0.1085, |
| "step": 10190 |
| }, |
| { |
| "epoch": 7.674943566591422, |
| "grad_norm": 9.319975852966309, |
| "learning_rate": 6.8472535741158775e-06, |
| "loss": 0.147, |
| "step": 10200 |
| }, |
| { |
| "epoch": 7.682468021068472, |
| "grad_norm": 5.328279495239258, |
| "learning_rate": 6.843073321628627e-06, |
| "loss": 0.1406, |
| "step": 10210 |
| }, |
| { |
| "epoch": 7.689992475545523, |
| "grad_norm": 8.353121757507324, |
| "learning_rate": 6.838893069141377e-06, |
| "loss": 0.1533, |
| "step": 10220 |
| }, |
| { |
| "epoch": 7.697516930022573, |
| "grad_norm": 7.57094669342041, |
| "learning_rate": 6.834712816654126e-06, |
| "loss": 0.1177, |
| "step": 10230 |
| }, |
| { |
| "epoch": 7.705041384499624, |
| "grad_norm": 7.212436199188232, |
| "learning_rate": 6.8305325641668765e-06, |
| "loss": 0.1369, |
| "step": 10240 |
| }, |
| { |
| "epoch": 7.7125658389766745, |
| "grad_norm": 4.2448320388793945, |
| "learning_rate": 6.826352311679626e-06, |
| "loss": 0.1551, |
| "step": 10250 |
| }, |
| { |
| "epoch": 7.720090293453724, |
| "grad_norm": 8.300276756286621, |
| "learning_rate": 6.822172059192376e-06, |
| "loss": 0.188, |
| "step": 10260 |
| }, |
| { |
| "epoch": 7.727614747930775, |
| "grad_norm": 9.391825675964355, |
| "learning_rate": 6.817991806705125e-06, |
| "loss": 0.1468, |
| "step": 10270 |
| }, |
| { |
| "epoch": 7.735139202407826, |
| "grad_norm": 9.549453735351562, |
| "learning_rate": 6.813811554217875e-06, |
| "loss": 0.1597, |
| "step": 10280 |
| }, |
| { |
| "epoch": 7.742663656884876, |
| "grad_norm": 8.50312328338623, |
| "learning_rate": 6.809631301730625e-06, |
| "loss": 0.1622, |
| "step": 10290 |
| }, |
| { |
| "epoch": 7.750188111361926, |
| "grad_norm": 7.836225509643555, |
| "learning_rate": 6.805451049243375e-06, |
| "loss": 0.1124, |
| "step": 10300 |
| }, |
| { |
| "epoch": 7.757712565838977, |
| "grad_norm": 6.063055515289307, |
| "learning_rate": 6.801270796756124e-06, |
| "loss": 0.1202, |
| "step": 10310 |
| }, |
| { |
| "epoch": 7.765237020316027, |
| "grad_norm": 7.710981369018555, |
| "learning_rate": 6.797090544268874e-06, |
| "loss": 0.1505, |
| "step": 10320 |
| }, |
| { |
| "epoch": 7.772761474793078, |
| "grad_norm": 4.136016845703125, |
| "learning_rate": 6.792910291781623e-06, |
| "loss": 0.0856, |
| "step": 10330 |
| }, |
| { |
| "epoch": 7.780285929270128, |
| "grad_norm": 5.1353983879089355, |
| "learning_rate": 6.7887300392943746e-06, |
| "loss": 0.1399, |
| "step": 10340 |
| }, |
| { |
| "epoch": 7.787810383747178, |
| "grad_norm": 7.657298564910889, |
| "learning_rate": 6.784549786807124e-06, |
| "loss": 0.1707, |
| "step": 10350 |
| }, |
| { |
| "epoch": 7.795334838224229, |
| "grad_norm": 8.755562782287598, |
| "learning_rate": 6.780369534319874e-06, |
| "loss": 0.1414, |
| "step": 10360 |
| }, |
| { |
| "epoch": 7.802859292701279, |
| "grad_norm": 9.025842666625977, |
| "learning_rate": 6.776189281832623e-06, |
| "loss": 0.1517, |
| "step": 10370 |
| }, |
| { |
| "epoch": 7.81038374717833, |
| "grad_norm": 8.332140922546387, |
| "learning_rate": 6.772009029345374e-06, |
| "loss": 0.152, |
| "step": 10380 |
| }, |
| { |
| "epoch": 7.81790820165538, |
| "grad_norm": 5.564815998077393, |
| "learning_rate": 6.767828776858123e-06, |
| "loss": 0.1647, |
| "step": 10390 |
| }, |
| { |
| "epoch": 7.82543265613243, |
| "grad_norm": 9.147259712219238, |
| "learning_rate": 6.763648524370873e-06, |
| "loss": 0.1692, |
| "step": 10400 |
| }, |
| { |
| "epoch": 7.832957110609481, |
| "grad_norm": 3.85477614402771, |
| "learning_rate": 6.759468271883622e-06, |
| "loss": 0.1395, |
| "step": 10410 |
| }, |
| { |
| "epoch": 7.840481565086531, |
| "grad_norm": 10.106462478637695, |
| "learning_rate": 6.755288019396372e-06, |
| "loss": 0.1652, |
| "step": 10420 |
| }, |
| { |
| "epoch": 7.848006019563582, |
| "grad_norm": 9.62307357788086, |
| "learning_rate": 6.751107766909122e-06, |
| "loss": 0.1578, |
| "step": 10430 |
| }, |
| { |
| "epoch": 7.855530474040632, |
| "grad_norm": 10.081789016723633, |
| "learning_rate": 6.746927514421872e-06, |
| "loss": 0.1153, |
| "step": 10440 |
| }, |
| { |
| "epoch": 7.863054928517682, |
| "grad_norm": 8.883935928344727, |
| "learning_rate": 6.742747261934621e-06, |
| "loss": 0.1476, |
| "step": 10450 |
| }, |
| { |
| "epoch": 7.870579382994733, |
| "grad_norm": 4.969008922576904, |
| "learning_rate": 6.738567009447371e-06, |
| "loss": 0.1149, |
| "step": 10460 |
| }, |
| { |
| "epoch": 7.878103837471784, |
| "grad_norm": 6.76956033706665, |
| "learning_rate": 6.73438675696012e-06, |
| "loss": 0.1159, |
| "step": 10470 |
| }, |
| { |
| "epoch": 7.885628291948834, |
| "grad_norm": 11.153620719909668, |
| "learning_rate": 6.730206504472871e-06, |
| "loss": 0.1176, |
| "step": 10480 |
| }, |
| { |
| "epoch": 7.893152746425884, |
| "grad_norm": 9.627654075622559, |
| "learning_rate": 6.72602625198562e-06, |
| "loss": 0.1475, |
| "step": 10490 |
| }, |
| { |
| "epoch": 7.900677200902934, |
| "grad_norm": 5.48689603805542, |
| "learning_rate": 6.72184599949837e-06, |
| "loss": 0.1462, |
| "step": 10500 |
| }, |
| { |
| "epoch": 7.908201655379985, |
| "grad_norm": 9.38402271270752, |
| "learning_rate": 6.7176657470111194e-06, |
| "loss": 0.1272, |
| "step": 10510 |
| }, |
| { |
| "epoch": 7.915726109857036, |
| "grad_norm": 4.6745452880859375, |
| "learning_rate": 6.713485494523871e-06, |
| "loss": 0.1367, |
| "step": 10520 |
| }, |
| { |
| "epoch": 7.923250564334086, |
| "grad_norm": 9.271729469299316, |
| "learning_rate": 6.70930524203662e-06, |
| "loss": 0.1049, |
| "step": 10530 |
| }, |
| { |
| "epoch": 7.930775018811136, |
| "grad_norm": 8.473658561706543, |
| "learning_rate": 6.70512498954937e-06, |
| "loss": 0.1422, |
| "step": 10540 |
| }, |
| { |
| "epoch": 7.938299473288186, |
| "grad_norm": 11.029475212097168, |
| "learning_rate": 6.700944737062119e-06, |
| "loss": 0.141, |
| "step": 10550 |
| }, |
| { |
| "epoch": 7.945823927765237, |
| "grad_norm": 3.050215005874634, |
| "learning_rate": 6.696764484574869e-06, |
| "loss": 0.1018, |
| "step": 10560 |
| }, |
| { |
| "epoch": 7.953348382242288, |
| "grad_norm": 9.241053581237793, |
| "learning_rate": 6.692584232087619e-06, |
| "loss": 0.2095, |
| "step": 10570 |
| }, |
| { |
| "epoch": 7.9608728367193375, |
| "grad_norm": 6.521198749542236, |
| "learning_rate": 6.688403979600369e-06, |
| "loss": 0.1362, |
| "step": 10580 |
| }, |
| { |
| "epoch": 7.968397291196388, |
| "grad_norm": 6.801877021789551, |
| "learning_rate": 6.684223727113118e-06, |
| "loss": 0.1676, |
| "step": 10590 |
| }, |
| { |
| "epoch": 7.975921745673439, |
| "grad_norm": 1.8709025382995605, |
| "learning_rate": 6.680043474625868e-06, |
| "loss": 0.1454, |
| "step": 10600 |
| }, |
| { |
| "epoch": 7.983446200150489, |
| "grad_norm": 8.10856819152832, |
| "learning_rate": 6.6758632221386175e-06, |
| "loss": 0.1387, |
| "step": 10610 |
| }, |
| { |
| "epoch": 7.99097065462754, |
| "grad_norm": 8.119205474853516, |
| "learning_rate": 6.671682969651367e-06, |
| "loss": 0.1514, |
| "step": 10620 |
| }, |
| { |
| "epoch": 7.99849510910459, |
| "grad_norm": 5.9093918800354, |
| "learning_rate": 6.667502717164117e-06, |
| "loss": 0.1428, |
| "step": 10630 |
| }, |
| { |
| "epoch": 8.00601956358164, |
| "grad_norm": 6.897785186767578, |
| "learning_rate": 6.663322464676867e-06, |
| "loss": 0.1647, |
| "step": 10640 |
| }, |
| { |
| "epoch": 8.01354401805869, |
| "grad_norm": 6.557318210601807, |
| "learning_rate": 6.6591422121896165e-06, |
| "loss": 0.1131, |
| "step": 10650 |
| }, |
| { |
| "epoch": 8.021068472535742, |
| "grad_norm": 9.699820518493652, |
| "learning_rate": 6.654961959702366e-06, |
| "loss": 0.1507, |
| "step": 10660 |
| }, |
| { |
| "epoch": 8.028592927012792, |
| "grad_norm": 4.894190311431885, |
| "learning_rate": 6.650781707215116e-06, |
| "loss": 0.1215, |
| "step": 10670 |
| }, |
| { |
| "epoch": 8.036117381489841, |
| "grad_norm": 4.050377368927002, |
| "learning_rate": 6.646601454727866e-06, |
| "loss": 0.1528, |
| "step": 10680 |
| }, |
| { |
| "epoch": 8.043641835966893, |
| "grad_norm": 6.824169158935547, |
| "learning_rate": 6.6424212022406156e-06, |
| "loss": 0.1206, |
| "step": 10690 |
| }, |
| { |
| "epoch": 8.051166290443943, |
| "grad_norm": 8.67491340637207, |
| "learning_rate": 6.638240949753366e-06, |
| "loss": 0.1241, |
| "step": 10700 |
| }, |
| { |
| "epoch": 8.058690744920993, |
| "grad_norm": 3.6065561771392822, |
| "learning_rate": 6.6340606972661155e-06, |
| "loss": 0.125, |
| "step": 10710 |
| }, |
| { |
| "epoch": 8.066215199398044, |
| "grad_norm": 6.743816375732422, |
| "learning_rate": 6.629880444778866e-06, |
| "loss": 0.1568, |
| "step": 10720 |
| }, |
| { |
| "epoch": 8.073739653875094, |
| "grad_norm": 9.00485897064209, |
| "learning_rate": 6.6257001922916154e-06, |
| "loss": 0.2092, |
| "step": 10730 |
| }, |
| { |
| "epoch": 8.081264108352144, |
| "grad_norm": 6.077624797821045, |
| "learning_rate": 6.621519939804365e-06, |
| "loss": 0.1088, |
| "step": 10740 |
| }, |
| { |
| "epoch": 8.088788562829194, |
| "grad_norm": 11.94995403289795, |
| "learning_rate": 6.6173396873171145e-06, |
| "loss": 0.1527, |
| "step": 10750 |
| }, |
| { |
| "epoch": 8.096313017306246, |
| "grad_norm": 2.177652597427368, |
| "learning_rate": 6.613159434829864e-06, |
| "loss": 0.0979, |
| "step": 10760 |
| }, |
| { |
| "epoch": 8.103837471783295, |
| "grad_norm": 6.277168273925781, |
| "learning_rate": 6.6089791823426145e-06, |
| "loss": 0.0971, |
| "step": 10770 |
| }, |
| { |
| "epoch": 8.111361926260345, |
| "grad_norm": 4.3780646324157715, |
| "learning_rate": 6.604798929855364e-06, |
| "loss": 0.1324, |
| "step": 10780 |
| }, |
| { |
| "epoch": 8.118886380737397, |
| "grad_norm": 6.6845903396606445, |
| "learning_rate": 6.600618677368114e-06, |
| "loss": 0.2028, |
| "step": 10790 |
| }, |
| { |
| "epoch": 8.126410835214447, |
| "grad_norm": 7.709301471710205, |
| "learning_rate": 6.596438424880863e-06, |
| "loss": 0.1619, |
| "step": 10800 |
| }, |
| { |
| "epoch": 8.133935289691497, |
| "grad_norm": 4.897986888885498, |
| "learning_rate": 6.592258172393613e-06, |
| "loss": 0.1379, |
| "step": 10810 |
| }, |
| { |
| "epoch": 8.141459744168548, |
| "grad_norm": 5.920723915100098, |
| "learning_rate": 6.588077919906363e-06, |
| "loss": 0.1152, |
| "step": 10820 |
| }, |
| { |
| "epoch": 8.148984198645598, |
| "grad_norm": 7.485513687133789, |
| "learning_rate": 6.583897667419113e-06, |
| "loss": 0.1393, |
| "step": 10830 |
| }, |
| { |
| "epoch": 8.156508653122648, |
| "grad_norm": 7.579458236694336, |
| "learning_rate": 6.579717414931862e-06, |
| "loss": 0.1251, |
| "step": 10840 |
| }, |
| { |
| "epoch": 8.1640331075997, |
| "grad_norm": 5.181578636169434, |
| "learning_rate": 6.575537162444612e-06, |
| "loss": 0.0906, |
| "step": 10850 |
| }, |
| { |
| "epoch": 8.17155756207675, |
| "grad_norm": 6.30539083480835, |
| "learning_rate": 6.571356909957361e-06, |
| "loss": 0.1349, |
| "step": 10860 |
| }, |
| { |
| "epoch": 8.1790820165538, |
| "grad_norm": 10.169859886169434, |
| "learning_rate": 6.567176657470111e-06, |
| "loss": 0.1416, |
| "step": 10870 |
| }, |
| { |
| "epoch": 8.186606471030851, |
| "grad_norm": 13.061054229736328, |
| "learning_rate": 6.562996404982861e-06, |
| "loss": 0.1334, |
| "step": 10880 |
| }, |
| { |
| "epoch": 8.194130925507901, |
| "grad_norm": 6.3449482917785645, |
| "learning_rate": 6.558816152495612e-06, |
| "loss": 0.1587, |
| "step": 10890 |
| }, |
| { |
| "epoch": 8.20165537998495, |
| "grad_norm": 7.411153316497803, |
| "learning_rate": 6.554635900008361e-06, |
| "loss": 0.1223, |
| "step": 10900 |
| }, |
| { |
| "epoch": 8.209179834462002, |
| "grad_norm": 6.868873119354248, |
| "learning_rate": 6.5504556475211116e-06, |
| "loss": 0.1375, |
| "step": 10910 |
| }, |
| { |
| "epoch": 8.216704288939052, |
| "grad_norm": 6.650938510894775, |
| "learning_rate": 6.546275395033861e-06, |
| "loss": 0.1059, |
| "step": 10920 |
| }, |
| { |
| "epoch": 8.224228743416102, |
| "grad_norm": 3.331028699874878, |
| "learning_rate": 6.542095142546611e-06, |
| "loss": 0.1155, |
| "step": 10930 |
| }, |
| { |
| "epoch": 8.231753197893152, |
| "grad_norm": 5.483583927154541, |
| "learning_rate": 6.53791489005936e-06, |
| "loss": 0.1507, |
| "step": 10940 |
| }, |
| { |
| "epoch": 8.239277652370204, |
| "grad_norm": 7.282931327819824, |
| "learning_rate": 6.53373463757211e-06, |
| "loss": 0.1207, |
| "step": 10950 |
| }, |
| { |
| "epoch": 8.246802106847253, |
| "grad_norm": 5.5015435218811035, |
| "learning_rate": 6.52955438508486e-06, |
| "loss": 0.1597, |
| "step": 10960 |
| }, |
| { |
| "epoch": 8.254326561324303, |
| "grad_norm": 7.288213729858398, |
| "learning_rate": 6.52537413259761e-06, |
| "loss": 0.1198, |
| "step": 10970 |
| }, |
| { |
| "epoch": 8.261851015801355, |
| "grad_norm": 5.3646745681762695, |
| "learning_rate": 6.521193880110359e-06, |
| "loss": 0.1356, |
| "step": 10980 |
| }, |
| { |
| "epoch": 8.269375470278405, |
| "grad_norm": 10.937861442565918, |
| "learning_rate": 6.517013627623109e-06, |
| "loss": 0.1249, |
| "step": 10990 |
| }, |
| { |
| "epoch": 8.276899924755455, |
| "grad_norm": 9.01375961303711, |
| "learning_rate": 6.512833375135858e-06, |
| "loss": 0.1456, |
| "step": 11000 |
| }, |
| { |
| "epoch": 8.284424379232506, |
| "grad_norm": 8.625186920166016, |
| "learning_rate": 6.508653122648608e-06, |
| "loss": 0.1496, |
| "step": 11010 |
| }, |
| { |
| "epoch": 8.291948833709556, |
| "grad_norm": 7.765505313873291, |
| "learning_rate": 6.504472870161358e-06, |
| "loss": 0.108, |
| "step": 11020 |
| }, |
| { |
| "epoch": 8.299473288186606, |
| "grad_norm": 10.0932035446167, |
| "learning_rate": 6.500292617674108e-06, |
| "loss": 0.1414, |
| "step": 11030 |
| }, |
| { |
| "epoch": 8.306997742663658, |
| "grad_norm": 9.906752586364746, |
| "learning_rate": 6.496112365186857e-06, |
| "loss": 0.136, |
| "step": 11040 |
| }, |
| { |
| "epoch": 8.314522197140708, |
| "grad_norm": 3.120361328125, |
| "learning_rate": 6.491932112699607e-06, |
| "loss": 0.095, |
| "step": 11050 |
| }, |
| { |
| "epoch": 8.322046651617757, |
| "grad_norm": 13.94308090209961, |
| "learning_rate": 6.4877518602123565e-06, |
| "loss": 0.1316, |
| "step": 11060 |
| }, |
| { |
| "epoch": 8.329571106094807, |
| "grad_norm": 9.929215431213379, |
| "learning_rate": 6.483571607725108e-06, |
| "loss": 0.1213, |
| "step": 11070 |
| }, |
| { |
| "epoch": 8.337095560571859, |
| "grad_norm": 7.462619781494141, |
| "learning_rate": 6.479391355237857e-06, |
| "loss": 0.1175, |
| "step": 11080 |
| }, |
| { |
| "epoch": 8.344620015048909, |
| "grad_norm": 7.091246128082275, |
| "learning_rate": 6.475211102750607e-06, |
| "loss": 0.1412, |
| "step": 11090 |
| }, |
| { |
| "epoch": 8.352144469525959, |
| "grad_norm": 7.457467555999756, |
| "learning_rate": 6.471030850263356e-06, |
| "loss": 0.1843, |
| "step": 11100 |
| }, |
| { |
| "epoch": 8.35966892400301, |
| "grad_norm": 5.405447959899902, |
| "learning_rate": 6.466850597776107e-06, |
| "loss": 0.1798, |
| "step": 11110 |
| }, |
| { |
| "epoch": 8.36719337848006, |
| "grad_norm": 8.421366691589355, |
| "learning_rate": 6.462670345288856e-06, |
| "loss": 0.1321, |
| "step": 11120 |
| }, |
| { |
| "epoch": 8.37471783295711, |
| "grad_norm": 9.620317459106445, |
| "learning_rate": 6.458490092801606e-06, |
| "loss": 0.1528, |
| "step": 11130 |
| }, |
| { |
| "epoch": 8.382242287434162, |
| "grad_norm": 6.394067764282227, |
| "learning_rate": 6.454309840314355e-06, |
| "loss": 0.1401, |
| "step": 11140 |
| }, |
| { |
| "epoch": 8.389766741911211, |
| "grad_norm": 5.651474952697754, |
| "learning_rate": 6.450129587827105e-06, |
| "loss": 0.1092, |
| "step": 11150 |
| }, |
| { |
| "epoch": 8.397291196388261, |
| "grad_norm": 8.883152961730957, |
| "learning_rate": 6.445949335339855e-06, |
| "loss": 0.1388, |
| "step": 11160 |
| }, |
| { |
| "epoch": 8.404815650865313, |
| "grad_norm": 2.737121820449829, |
| "learning_rate": 6.441769082852605e-06, |
| "loss": 0.1235, |
| "step": 11170 |
| }, |
| { |
| "epoch": 8.412340105342363, |
| "grad_norm": 3.6822330951690674, |
| "learning_rate": 6.4375888303653545e-06, |
| "loss": 0.0854, |
| "step": 11180 |
| }, |
| { |
| "epoch": 8.419864559819413, |
| "grad_norm": 12.290861129760742, |
| "learning_rate": 6.433408577878104e-06, |
| "loss": 0.1463, |
| "step": 11190 |
| }, |
| { |
| "epoch": 8.427389014296464, |
| "grad_norm": 2.9195001125335693, |
| "learning_rate": 6.4292283253908536e-06, |
| "loss": 0.1494, |
| "step": 11200 |
| }, |
| { |
| "epoch": 8.434913468773514, |
| "grad_norm": 6.521566867828369, |
| "learning_rate": 6.425048072903604e-06, |
| "loss": 0.1082, |
| "step": 11210 |
| }, |
| { |
| "epoch": 8.442437923250564, |
| "grad_norm": 5.775163650512695, |
| "learning_rate": 6.4208678204163535e-06, |
| "loss": 0.1312, |
| "step": 11220 |
| }, |
| { |
| "epoch": 8.449962377727616, |
| "grad_norm": 6.358558177947998, |
| "learning_rate": 6.416687567929103e-06, |
| "loss": 0.1382, |
| "step": 11230 |
| }, |
| { |
| "epoch": 8.457486832204665, |
| "grad_norm": 9.709700584411621, |
| "learning_rate": 6.412507315441853e-06, |
| "loss": 0.1261, |
| "step": 11240 |
| }, |
| { |
| "epoch": 8.465011286681715, |
| "grad_norm": 7.283178329467773, |
| "learning_rate": 6.408327062954604e-06, |
| "loss": 0.1698, |
| "step": 11250 |
| }, |
| { |
| "epoch": 8.472535741158765, |
| "grad_norm": 10.301934242248535, |
| "learning_rate": 6.404146810467353e-06, |
| "loss": 0.1708, |
| "step": 11260 |
| }, |
| { |
| "epoch": 8.480060195635817, |
| "grad_norm": 10.021185874938965, |
| "learning_rate": 6.399966557980103e-06, |
| "loss": 0.0992, |
| "step": 11270 |
| }, |
| { |
| "epoch": 8.487584650112867, |
| "grad_norm": 10.817153930664062, |
| "learning_rate": 6.3957863054928525e-06, |
| "loss": 0.1722, |
| "step": 11280 |
| }, |
| { |
| "epoch": 8.495109104589917, |
| "grad_norm": 8.073854446411133, |
| "learning_rate": 6.391606053005602e-06, |
| "loss": 0.1598, |
| "step": 11290 |
| }, |
| { |
| "epoch": 8.502633559066968, |
| "grad_norm": 10.416369438171387, |
| "learning_rate": 6.3874258005183524e-06, |
| "loss": 0.108, |
| "step": 11300 |
| }, |
| { |
| "epoch": 8.510158013544018, |
| "grad_norm": 6.909933567047119, |
| "learning_rate": 6.383245548031102e-06, |
| "loss": 0.1307, |
| "step": 11310 |
| }, |
| { |
| "epoch": 8.517682468021068, |
| "grad_norm": 6.960227966308594, |
| "learning_rate": 6.3790652955438515e-06, |
| "loss": 0.1179, |
| "step": 11320 |
| }, |
| { |
| "epoch": 8.52520692249812, |
| "grad_norm": 8.188361167907715, |
| "learning_rate": 6.374885043056601e-06, |
| "loss": 0.1314, |
| "step": 11330 |
| }, |
| { |
| "epoch": 8.53273137697517, |
| "grad_norm": 7.911260604858398, |
| "learning_rate": 6.370704790569351e-06, |
| "loss": 0.1338, |
| "step": 11340 |
| }, |
| { |
| "epoch": 8.54025583145222, |
| "grad_norm": 5.278087139129639, |
| "learning_rate": 6.3665245380821e-06, |
| "loss": 0.0957, |
| "step": 11350 |
| }, |
| { |
| "epoch": 8.54778028592927, |
| "grad_norm": 8.951353073120117, |
| "learning_rate": 6.3623442855948506e-06, |
| "loss": 0.126, |
| "step": 11360 |
| }, |
| { |
| "epoch": 8.55530474040632, |
| "grad_norm": 5.146210670471191, |
| "learning_rate": 6.3581640331076e-06, |
| "loss": 0.1068, |
| "step": 11370 |
| }, |
| { |
| "epoch": 8.56282919488337, |
| "grad_norm": 9.495402336120605, |
| "learning_rate": 6.35398378062035e-06, |
| "loss": 0.0963, |
| "step": 11380 |
| }, |
| { |
| "epoch": 8.57035364936042, |
| "grad_norm": 10.180619239807129, |
| "learning_rate": 6.349803528133099e-06, |
| "loss": 0.1814, |
| "step": 11390 |
| }, |
| { |
| "epoch": 8.577878103837472, |
| "grad_norm": 5.210803508758545, |
| "learning_rate": 6.345623275645849e-06, |
| "loss": 0.1345, |
| "step": 11400 |
| }, |
| { |
| "epoch": 8.585402558314522, |
| "grad_norm": 4.041356563568115, |
| "learning_rate": 6.341443023158599e-06, |
| "loss": 0.0962, |
| "step": 11410 |
| }, |
| { |
| "epoch": 8.592927012791572, |
| "grad_norm": 12.49039077758789, |
| "learning_rate": 6.337262770671349e-06, |
| "loss": 0.1345, |
| "step": 11420 |
| }, |
| { |
| "epoch": 8.600451467268623, |
| "grad_norm": 1.2120592594146729, |
| "learning_rate": 6.333082518184099e-06, |
| "loss": 0.1211, |
| "step": 11430 |
| }, |
| { |
| "epoch": 8.607975921745673, |
| "grad_norm": 6.367912292480469, |
| "learning_rate": 6.328902265696849e-06, |
| "loss": 0.0813, |
| "step": 11440 |
| }, |
| { |
| "epoch": 8.615500376222723, |
| "grad_norm": 2.9278907775878906, |
| "learning_rate": 6.324722013209599e-06, |
| "loss": 0.1357, |
| "step": 11450 |
| }, |
| { |
| "epoch": 8.623024830699775, |
| "grad_norm": 12.494004249572754, |
| "learning_rate": 6.320541760722349e-06, |
| "loss": 0.1315, |
| "step": 11460 |
| }, |
| { |
| "epoch": 8.630549285176825, |
| "grad_norm": 9.539612770080566, |
| "learning_rate": 6.316361508235098e-06, |
| "loss": 0.1288, |
| "step": 11470 |
| }, |
| { |
| "epoch": 8.638073739653874, |
| "grad_norm": 9.347472190856934, |
| "learning_rate": 6.312181255747848e-06, |
| "loss": 0.1031, |
| "step": 11480 |
| }, |
| { |
| "epoch": 8.645598194130926, |
| "grad_norm": 7.605668067932129, |
| "learning_rate": 6.308001003260597e-06, |
| "loss": 0.1058, |
| "step": 11490 |
| }, |
| { |
| "epoch": 8.653122648607976, |
| "grad_norm": 10.471912384033203, |
| "learning_rate": 6.303820750773348e-06, |
| "loss": 0.1145, |
| "step": 11500 |
| }, |
| { |
| "epoch": 8.660647103085026, |
| "grad_norm": 4.992396354675293, |
| "learning_rate": 6.299640498286097e-06, |
| "loss": 0.1255, |
| "step": 11510 |
| }, |
| { |
| "epoch": 8.668171557562077, |
| "grad_norm": 11.861599922180176, |
| "learning_rate": 6.295460245798847e-06, |
| "loss": 0.1305, |
| "step": 11520 |
| }, |
| { |
| "epoch": 8.675696012039127, |
| "grad_norm": 1.827236294746399, |
| "learning_rate": 6.291279993311596e-06, |
| "loss": 0.1086, |
| "step": 11530 |
| }, |
| { |
| "epoch": 8.683220466516177, |
| "grad_norm": 9.01653003692627, |
| "learning_rate": 6.287099740824346e-06, |
| "loss": 0.1412, |
| "step": 11540 |
| }, |
| { |
| "epoch": 8.690744920993229, |
| "grad_norm": 3.50248122215271, |
| "learning_rate": 6.282919488337096e-06, |
| "loss": 0.1062, |
| "step": 11550 |
| }, |
| { |
| "epoch": 8.698269375470279, |
| "grad_norm": 5.9312567710876465, |
| "learning_rate": 6.278739235849846e-06, |
| "loss": 0.1023, |
| "step": 11560 |
| }, |
| { |
| "epoch": 8.705793829947329, |
| "grad_norm": 7.17442512512207, |
| "learning_rate": 6.274558983362595e-06, |
| "loss": 0.1337, |
| "step": 11570 |
| }, |
| { |
| "epoch": 8.713318284424378, |
| "grad_norm": 10.798535346984863, |
| "learning_rate": 6.270378730875345e-06, |
| "loss": 0.1254, |
| "step": 11580 |
| }, |
| { |
| "epoch": 8.72084273890143, |
| "grad_norm": 9.032575607299805, |
| "learning_rate": 6.266198478388094e-06, |
| "loss": 0.1385, |
| "step": 11590 |
| }, |
| { |
| "epoch": 8.72836719337848, |
| "grad_norm": 7.200125694274902, |
| "learning_rate": 6.262018225900845e-06, |
| "loss": 0.1088, |
| "step": 11600 |
| }, |
| { |
| "epoch": 8.73589164785553, |
| "grad_norm": 6.259599685668945, |
| "learning_rate": 6.257837973413595e-06, |
| "loss": 0.1281, |
| "step": 11610 |
| }, |
| { |
| "epoch": 8.743416102332581, |
| "grad_norm": 12.889073371887207, |
| "learning_rate": 6.253657720926345e-06, |
| "loss": 0.1614, |
| "step": 11620 |
| }, |
| { |
| "epoch": 8.750940556809631, |
| "grad_norm": 6.001211643218994, |
| "learning_rate": 6.249477468439094e-06, |
| "loss": 0.1526, |
| "step": 11630 |
| }, |
| { |
| "epoch": 8.758465011286681, |
| "grad_norm": 5.817295074462891, |
| "learning_rate": 6.245297215951845e-06, |
| "loss": 0.1266, |
| "step": 11640 |
| }, |
| { |
| "epoch": 8.765989465763733, |
| "grad_norm": 6.75652551651001, |
| "learning_rate": 6.241116963464594e-06, |
| "loss": 0.1105, |
| "step": 11650 |
| }, |
| { |
| "epoch": 8.773513920240783, |
| "grad_norm": 6.990323543548584, |
| "learning_rate": 6.236936710977344e-06, |
| "loss": 0.1269, |
| "step": 11660 |
| }, |
| { |
| "epoch": 8.781038374717832, |
| "grad_norm": 9.484482765197754, |
| "learning_rate": 6.232756458490093e-06, |
| "loss": 0.1584, |
| "step": 11670 |
| }, |
| { |
| "epoch": 8.788562829194884, |
| "grad_norm": 6.757113456726074, |
| "learning_rate": 6.228576206002843e-06, |
| "loss": 0.1423, |
| "step": 11680 |
| }, |
| { |
| "epoch": 8.796087283671934, |
| "grad_norm": 10.376631736755371, |
| "learning_rate": 6.224395953515593e-06, |
| "loss": 0.1294, |
| "step": 11690 |
| }, |
| { |
| "epoch": 8.803611738148984, |
| "grad_norm": 9.998665809631348, |
| "learning_rate": 6.220215701028343e-06, |
| "loss": 0.1348, |
| "step": 11700 |
| }, |
| { |
| "epoch": 8.811136192626035, |
| "grad_norm": 5.319802284240723, |
| "learning_rate": 6.216035448541092e-06, |
| "loss": 0.1504, |
| "step": 11710 |
| }, |
| { |
| "epoch": 8.818660647103085, |
| "grad_norm": 6.685969352722168, |
| "learning_rate": 6.211855196053842e-06, |
| "loss": 0.0937, |
| "step": 11720 |
| }, |
| { |
| "epoch": 8.826185101580135, |
| "grad_norm": 11.51011848449707, |
| "learning_rate": 6.2076749435665915e-06, |
| "loss": 0.1496, |
| "step": 11730 |
| }, |
| { |
| "epoch": 8.833709556057187, |
| "grad_norm": 7.66890287399292, |
| "learning_rate": 6.203494691079341e-06, |
| "loss": 0.1438, |
| "step": 11740 |
| }, |
| { |
| "epoch": 8.841234010534237, |
| "grad_norm": 8.105353355407715, |
| "learning_rate": 6.1993144385920914e-06, |
| "loss": 0.1031, |
| "step": 11750 |
| }, |
| { |
| "epoch": 8.848758465011286, |
| "grad_norm": 9.982457160949707, |
| "learning_rate": 6.195134186104841e-06, |
| "loss": 0.126, |
| "step": 11760 |
| }, |
| { |
| "epoch": 8.856282919488336, |
| "grad_norm": 6.818399429321289, |
| "learning_rate": 6.1909539336175905e-06, |
| "loss": 0.1298, |
| "step": 11770 |
| }, |
| { |
| "epoch": 8.863807373965388, |
| "grad_norm": 6.4164299964904785, |
| "learning_rate": 6.18677368113034e-06, |
| "loss": 0.1298, |
| "step": 11780 |
| }, |
| { |
| "epoch": 8.871331828442438, |
| "grad_norm": 9.575235366821289, |
| "learning_rate": 6.182593428643091e-06, |
| "loss": 0.1284, |
| "step": 11790 |
| }, |
| { |
| "epoch": 8.878856282919488, |
| "grad_norm": 5.450047492980957, |
| "learning_rate": 6.178413176155841e-06, |
| "loss": 0.1272, |
| "step": 11800 |
| }, |
| { |
| "epoch": 8.88638073739654, |
| "grad_norm": 10.198444366455078, |
| "learning_rate": 6.17423292366859e-06, |
| "loss": 0.0938, |
| "step": 11810 |
| }, |
| { |
| "epoch": 8.89390519187359, |
| "grad_norm": 6.001352310180664, |
| "learning_rate": 6.17005267118134e-06, |
| "loss": 0.0924, |
| "step": 11820 |
| }, |
| { |
| "epoch": 8.901429646350639, |
| "grad_norm": 6.211798191070557, |
| "learning_rate": 6.1658724186940895e-06, |
| "loss": 0.1193, |
| "step": 11830 |
| }, |
| { |
| "epoch": 8.90895410082769, |
| "grad_norm": 4.294961929321289, |
| "learning_rate": 6.16169216620684e-06, |
| "loss": 0.1484, |
| "step": 11840 |
| }, |
| { |
| "epoch": 8.91647855530474, |
| "grad_norm": 7.163177013397217, |
| "learning_rate": 6.1575119137195895e-06, |
| "loss": 0.0928, |
| "step": 11850 |
| }, |
| { |
| "epoch": 8.92400300978179, |
| "grad_norm": 7.8336591720581055, |
| "learning_rate": 6.153331661232339e-06, |
| "loss": 0.1004, |
| "step": 11860 |
| }, |
| { |
| "epoch": 8.931527464258842, |
| "grad_norm": 6.162928104400635, |
| "learning_rate": 6.1491514087450886e-06, |
| "loss": 0.1435, |
| "step": 11870 |
| }, |
| { |
| "epoch": 8.939051918735892, |
| "grad_norm": 7.090447902679443, |
| "learning_rate": 6.144971156257838e-06, |
| "loss": 0.1394, |
| "step": 11880 |
| }, |
| { |
| "epoch": 8.946576373212942, |
| "grad_norm": 6.38063907623291, |
| "learning_rate": 6.1407909037705885e-06, |
| "loss": 0.1458, |
| "step": 11890 |
| }, |
| { |
| "epoch": 8.954100827689992, |
| "grad_norm": 8.057353019714355, |
| "learning_rate": 6.136610651283338e-06, |
| "loss": 0.1522, |
| "step": 11900 |
| }, |
| { |
| "epoch": 8.961625282167043, |
| "grad_norm": 6.432565689086914, |
| "learning_rate": 6.132430398796088e-06, |
| "loss": 0.1279, |
| "step": 11910 |
| }, |
| { |
| "epoch": 8.969149736644093, |
| "grad_norm": 9.430665969848633, |
| "learning_rate": 6.128250146308837e-06, |
| "loss": 0.1376, |
| "step": 11920 |
| }, |
| { |
| "epoch": 8.976674191121143, |
| "grad_norm": 6.492280960083008, |
| "learning_rate": 6.124069893821587e-06, |
| "loss": 0.0997, |
| "step": 11930 |
| }, |
| { |
| "epoch": 8.984198645598195, |
| "grad_norm": 7.028443813323975, |
| "learning_rate": 6.119889641334337e-06, |
| "loss": 0.1294, |
| "step": 11940 |
| }, |
| { |
| "epoch": 8.991723100075244, |
| "grad_norm": 8.465397834777832, |
| "learning_rate": 6.115709388847087e-06, |
| "loss": 0.1444, |
| "step": 11950 |
| }, |
| { |
| "epoch": 8.999247554552294, |
| "grad_norm": 9.109210968017578, |
| "learning_rate": 6.111529136359836e-06, |
| "loss": 0.1535, |
| "step": 11960 |
| }, |
| { |
| "epoch": 9.006772009029346, |
| "grad_norm": 5.075557708740234, |
| "learning_rate": 6.107348883872587e-06, |
| "loss": 0.068, |
| "step": 11970 |
| }, |
| { |
| "epoch": 9.014296463506396, |
| "grad_norm": 4.022278785705566, |
| "learning_rate": 6.103168631385337e-06, |
| "loss": 0.1538, |
| "step": 11980 |
| }, |
| { |
| "epoch": 9.021820917983446, |
| "grad_norm": 7.8916850090026855, |
| "learning_rate": 6.0989883788980865e-06, |
| "loss": 0.1137, |
| "step": 11990 |
| }, |
| { |
| "epoch": 9.029345372460497, |
| "grad_norm": 8.236541748046875, |
| "learning_rate": 6.094808126410836e-06, |
| "loss": 0.1207, |
| "step": 12000 |
| }, |
| { |
| "epoch": 9.036869826937547, |
| "grad_norm": 5.704281806945801, |
| "learning_rate": 6.090627873923586e-06, |
| "loss": 0.1249, |
| "step": 12010 |
| }, |
| { |
| "epoch": 9.044394281414597, |
| "grad_norm": 4.6430277824401855, |
| "learning_rate": 6.086447621436335e-06, |
| "loss": 0.1156, |
| "step": 12020 |
| }, |
| { |
| "epoch": 9.051918735891649, |
| "grad_norm": 11.825648307800293, |
| "learning_rate": 6.082267368949086e-06, |
| "loss": 0.1083, |
| "step": 12030 |
| }, |
| { |
| "epoch": 9.059443190368698, |
| "grad_norm": 10.094679832458496, |
| "learning_rate": 6.078087116461835e-06, |
| "loss": 0.1102, |
| "step": 12040 |
| }, |
| { |
| "epoch": 9.066967644845748, |
| "grad_norm": 9.167254447937012, |
| "learning_rate": 6.073906863974585e-06, |
| "loss": 0.1148, |
| "step": 12050 |
| }, |
| { |
| "epoch": 9.0744920993228, |
| "grad_norm": 5.975985050201416, |
| "learning_rate": 6.069726611487334e-06, |
| "loss": 0.1222, |
| "step": 12060 |
| }, |
| { |
| "epoch": 9.08201655379985, |
| "grad_norm": 11.157576560974121, |
| "learning_rate": 6.065546359000084e-06, |
| "loss": 0.1458, |
| "step": 12070 |
| }, |
| { |
| "epoch": 9.0895410082769, |
| "grad_norm": 3.7663228511810303, |
| "learning_rate": 6.061366106512834e-06, |
| "loss": 0.1524, |
| "step": 12080 |
| }, |
| { |
| "epoch": 9.09706546275395, |
| "grad_norm": 4.96702766418457, |
| "learning_rate": 6.057185854025584e-06, |
| "loss": 0.1043, |
| "step": 12090 |
| }, |
| { |
| "epoch": 9.104589917231001, |
| "grad_norm": 9.441642761230469, |
| "learning_rate": 6.053005601538333e-06, |
| "loss": 0.1026, |
| "step": 12100 |
| }, |
| { |
| "epoch": 9.112114371708051, |
| "grad_norm": 5.106550693511963, |
| "learning_rate": 6.048825349051083e-06, |
| "loss": 0.111, |
| "step": 12110 |
| }, |
| { |
| "epoch": 9.119638826185101, |
| "grad_norm": 12.366744995117188, |
| "learning_rate": 6.044645096563832e-06, |
| "loss": 0.1293, |
| "step": 12120 |
| }, |
| { |
| "epoch": 9.127163280662153, |
| "grad_norm": 10.476715087890625, |
| "learning_rate": 6.040464844076582e-06, |
| "loss": 0.1035, |
| "step": 12130 |
| }, |
| { |
| "epoch": 9.134687735139202, |
| "grad_norm": 11.363449096679688, |
| "learning_rate": 6.036284591589332e-06, |
| "loss": 0.1441, |
| "step": 12140 |
| }, |
| { |
| "epoch": 9.142212189616252, |
| "grad_norm": 4.014682292938232, |
| "learning_rate": 6.032104339102082e-06, |
| "loss": 0.1294, |
| "step": 12150 |
| }, |
| { |
| "epoch": 9.149736644093304, |
| "grad_norm": 7.98138952255249, |
| "learning_rate": 6.027924086614832e-06, |
| "loss": 0.096, |
| "step": 12160 |
| }, |
| { |
| "epoch": 9.157261098570354, |
| "grad_norm": 8.680583000183105, |
| "learning_rate": 6.023743834127583e-06, |
| "loss": 0.1076, |
| "step": 12170 |
| }, |
| { |
| "epoch": 9.164785553047404, |
| "grad_norm": 8.325934410095215, |
| "learning_rate": 6.019563581640332e-06, |
| "loss": 0.0796, |
| "step": 12180 |
| }, |
| { |
| "epoch": 9.172310007524455, |
| "grad_norm": 5.094624996185303, |
| "learning_rate": 6.015383329153082e-06, |
| "loss": 0.0804, |
| "step": 12190 |
| }, |
| { |
| "epoch": 9.179834462001505, |
| "grad_norm": 4.378869533538818, |
| "learning_rate": 6.011203076665831e-06, |
| "loss": 0.111, |
| "step": 12200 |
| }, |
| { |
| "epoch": 9.187358916478555, |
| "grad_norm": 8.656767845153809, |
| "learning_rate": 6.007022824178581e-06, |
| "loss": 0.1048, |
| "step": 12210 |
| }, |
| { |
| "epoch": 9.194883370955605, |
| "grad_norm": 4.8737592697143555, |
| "learning_rate": 6.00284257169133e-06, |
| "loss": 0.1315, |
| "step": 12220 |
| }, |
| { |
| "epoch": 9.202407825432656, |
| "grad_norm": 7.291375637054443, |
| "learning_rate": 5.998662319204081e-06, |
| "loss": 0.1414, |
| "step": 12230 |
| }, |
| { |
| "epoch": 9.209932279909706, |
| "grad_norm": 6.14476203918457, |
| "learning_rate": 5.99448206671683e-06, |
| "loss": 0.1087, |
| "step": 12240 |
| }, |
| { |
| "epoch": 9.217456734386756, |
| "grad_norm": 9.752476692199707, |
| "learning_rate": 5.99030181422958e-06, |
| "loss": 0.1536, |
| "step": 12250 |
| }, |
| { |
| "epoch": 9.224981188863808, |
| "grad_norm": 8.39211368560791, |
| "learning_rate": 5.9861215617423294e-06, |
| "loss": 0.1192, |
| "step": 12260 |
| }, |
| { |
| "epoch": 9.232505643340858, |
| "grad_norm": 10.023391723632812, |
| "learning_rate": 5.981941309255079e-06, |
| "loss": 0.1397, |
| "step": 12270 |
| }, |
| { |
| "epoch": 9.240030097817908, |
| "grad_norm": 7.0415730476379395, |
| "learning_rate": 5.977761056767829e-06, |
| "loss": 0.1331, |
| "step": 12280 |
| }, |
| { |
| "epoch": 9.24755455229496, |
| "grad_norm": 7.50435733795166, |
| "learning_rate": 5.973580804280579e-06, |
| "loss": 0.1104, |
| "step": 12290 |
| }, |
| { |
| "epoch": 9.255079006772009, |
| "grad_norm": 2.4626896381378174, |
| "learning_rate": 5.9694005517933285e-06, |
| "loss": 0.1293, |
| "step": 12300 |
| }, |
| { |
| "epoch": 9.262603461249059, |
| "grad_norm": 8.800031661987305, |
| "learning_rate": 5.965220299306078e-06, |
| "loss": 0.0948, |
| "step": 12310 |
| }, |
| { |
| "epoch": 9.27012791572611, |
| "grad_norm": 5.665508270263672, |
| "learning_rate": 5.9610400468188276e-06, |
| "loss": 0.082, |
| "step": 12320 |
| }, |
| { |
| "epoch": 9.27765237020316, |
| "grad_norm": 6.743051052093506, |
| "learning_rate": 5.956859794331578e-06, |
| "loss": 0.1377, |
| "step": 12330 |
| }, |
| { |
| "epoch": 9.28517682468021, |
| "grad_norm": 2.95204496383667, |
| "learning_rate": 5.952679541844328e-06, |
| "loss": 0.1225, |
| "step": 12340 |
| }, |
| { |
| "epoch": 9.292701279157262, |
| "grad_norm": 8.680118560791016, |
| "learning_rate": 5.948499289357078e-06, |
| "loss": 0.1105, |
| "step": 12350 |
| }, |
| { |
| "epoch": 9.300225733634312, |
| "grad_norm": 7.5288615226745605, |
| "learning_rate": 5.9443190368698275e-06, |
| "loss": 0.1021, |
| "step": 12360 |
| }, |
| { |
| "epoch": 9.307750188111362, |
| "grad_norm": 5.6351637840271, |
| "learning_rate": 5.940138784382578e-06, |
| "loss": 0.0972, |
| "step": 12370 |
| }, |
| { |
| "epoch": 9.315274642588413, |
| "grad_norm": 6.20802640914917, |
| "learning_rate": 5.935958531895327e-06, |
| "loss": 0.1096, |
| "step": 12380 |
| }, |
| { |
| "epoch": 9.322799097065463, |
| "grad_norm": 12.660746574401855, |
| "learning_rate": 5.931778279408077e-06, |
| "loss": 0.1233, |
| "step": 12390 |
| }, |
| { |
| "epoch": 9.330323551542513, |
| "grad_norm": 6.66998291015625, |
| "learning_rate": 5.9275980269208265e-06, |
| "loss": 0.1361, |
| "step": 12400 |
| }, |
| { |
| "epoch": 9.337848006019563, |
| "grad_norm": 4.0259318351745605, |
| "learning_rate": 5.923417774433576e-06, |
| "loss": 0.1011, |
| "step": 12410 |
| }, |
| { |
| "epoch": 9.345372460496614, |
| "grad_norm": 4.9495744705200195, |
| "learning_rate": 5.9192375219463264e-06, |
| "loss": 0.1337, |
| "step": 12420 |
| }, |
| { |
| "epoch": 9.352896914973664, |
| "grad_norm": 7.259575366973877, |
| "learning_rate": 5.915057269459076e-06, |
| "loss": 0.0662, |
| "step": 12430 |
| }, |
| { |
| "epoch": 9.360421369450714, |
| "grad_norm": 3.8569116592407227, |
| "learning_rate": 5.9108770169718255e-06, |
| "loss": 0.1211, |
| "step": 12440 |
| }, |
| { |
| "epoch": 9.367945823927766, |
| "grad_norm": 2.6672203540802, |
| "learning_rate": 5.906696764484575e-06, |
| "loss": 0.1019, |
| "step": 12450 |
| }, |
| { |
| "epoch": 9.375470278404816, |
| "grad_norm": 6.545354843139648, |
| "learning_rate": 5.902516511997325e-06, |
| "loss": 0.0804, |
| "step": 12460 |
| }, |
| { |
| "epoch": 9.382994732881865, |
| "grad_norm": 10.449742317199707, |
| "learning_rate": 5.898336259510074e-06, |
| "loss": 0.1515, |
| "step": 12470 |
| }, |
| { |
| "epoch": 9.390519187358917, |
| "grad_norm": 5.685015678405762, |
| "learning_rate": 5.894156007022825e-06, |
| "loss": 0.1063, |
| "step": 12480 |
| }, |
| { |
| "epoch": 9.398043641835967, |
| "grad_norm": 7.485517978668213, |
| "learning_rate": 5.889975754535574e-06, |
| "loss": 0.1443, |
| "step": 12490 |
| }, |
| { |
| "epoch": 9.405568096313017, |
| "grad_norm": 6.86757230758667, |
| "learning_rate": 5.885795502048324e-06, |
| "loss": 0.1088, |
| "step": 12500 |
| }, |
| { |
| "epoch": 9.413092550790068, |
| "grad_norm": 9.321928977966309, |
| "learning_rate": 5.881615249561073e-06, |
| "loss": 0.1346, |
| "step": 12510 |
| }, |
| { |
| "epoch": 9.420617005267118, |
| "grad_norm": 4.986440181732178, |
| "learning_rate": 5.8774349970738245e-06, |
| "loss": 0.1582, |
| "step": 12520 |
| }, |
| { |
| "epoch": 9.428141459744168, |
| "grad_norm": 7.16530179977417, |
| "learning_rate": 5.873254744586574e-06, |
| "loss": 0.0851, |
| "step": 12530 |
| }, |
| { |
| "epoch": 9.43566591422122, |
| "grad_norm": 8.802489280700684, |
| "learning_rate": 5.8690744920993236e-06, |
| "loss": 0.117, |
| "step": 12540 |
| }, |
| { |
| "epoch": 9.44319036869827, |
| "grad_norm": 7.261384010314941, |
| "learning_rate": 5.864894239612073e-06, |
| "loss": 0.184, |
| "step": 12550 |
| }, |
| { |
| "epoch": 9.45071482317532, |
| "grad_norm": 11.29928207397461, |
| "learning_rate": 5.860713987124823e-06, |
| "loss": 0.0921, |
| "step": 12560 |
| }, |
| { |
| "epoch": 9.45823927765237, |
| "grad_norm": 9.241240501403809, |
| "learning_rate": 5.856533734637573e-06, |
| "loss": 0.1232, |
| "step": 12570 |
| }, |
| { |
| "epoch": 9.465763732129421, |
| "grad_norm": 8.743525505065918, |
| "learning_rate": 5.852353482150323e-06, |
| "loss": 0.109, |
| "step": 12580 |
| }, |
| { |
| "epoch": 9.47328818660647, |
| "grad_norm": 7.939570426940918, |
| "learning_rate": 5.848173229663072e-06, |
| "loss": 0.1403, |
| "step": 12590 |
| }, |
| { |
| "epoch": 9.48081264108352, |
| "grad_norm": 11.364997863769531, |
| "learning_rate": 5.843992977175822e-06, |
| "loss": 0.182, |
| "step": 12600 |
| }, |
| { |
| "epoch": 9.488337095560572, |
| "grad_norm": 6.514659881591797, |
| "learning_rate": 5.839812724688571e-06, |
| "loss": 0.1144, |
| "step": 12610 |
| }, |
| { |
| "epoch": 9.495861550037622, |
| "grad_norm": 5.120135307312012, |
| "learning_rate": 5.835632472201322e-06, |
| "loss": 0.1045, |
| "step": 12620 |
| }, |
| { |
| "epoch": 9.503386004514672, |
| "grad_norm": 6.3493733406066895, |
| "learning_rate": 5.831452219714071e-06, |
| "loss": 0.0998, |
| "step": 12630 |
| }, |
| { |
| "epoch": 9.510910458991724, |
| "grad_norm": 10.599377632141113, |
| "learning_rate": 5.827271967226821e-06, |
| "loss": 0.1201, |
| "step": 12640 |
| }, |
| { |
| "epoch": 9.518434913468774, |
| "grad_norm": 5.005101203918457, |
| "learning_rate": 5.82309171473957e-06, |
| "loss": 0.0909, |
| "step": 12650 |
| }, |
| { |
| "epoch": 9.525959367945823, |
| "grad_norm": 5.786133289337158, |
| "learning_rate": 5.81891146225232e-06, |
| "loss": 0.1231, |
| "step": 12660 |
| }, |
| { |
| "epoch": 9.533483822422875, |
| "grad_norm": 8.286248207092285, |
| "learning_rate": 5.81473120976507e-06, |
| "loss": 0.0995, |
| "step": 12670 |
| }, |
| { |
| "epoch": 9.541008276899925, |
| "grad_norm": 6.8975653648376465, |
| "learning_rate": 5.81055095727782e-06, |
| "loss": 0.1253, |
| "step": 12680 |
| }, |
| { |
| "epoch": 9.548532731376975, |
| "grad_norm": 7.303131580352783, |
| "learning_rate": 5.806370704790569e-06, |
| "loss": 0.1029, |
| "step": 12690 |
| }, |
| { |
| "epoch": 9.556057185854026, |
| "grad_norm": 3.548121213912964, |
| "learning_rate": 5.80219045230332e-06, |
| "loss": 0.106, |
| "step": 12700 |
| }, |
| { |
| "epoch": 9.563581640331076, |
| "grad_norm": 5.769835472106934, |
| "learning_rate": 5.79801019981607e-06, |
| "loss": 0.1213, |
| "step": 12710 |
| }, |
| { |
| "epoch": 9.571106094808126, |
| "grad_norm": 5.858428955078125, |
| "learning_rate": 5.79382994732882e-06, |
| "loss": 0.1673, |
| "step": 12720 |
| }, |
| { |
| "epoch": 9.578630549285176, |
| "grad_norm": 8.080878257751465, |
| "learning_rate": 5.789649694841569e-06, |
| "loss": 0.1614, |
| "step": 12730 |
| }, |
| { |
| "epoch": 9.586155003762228, |
| "grad_norm": 3.0256166458129883, |
| "learning_rate": 5.785469442354319e-06, |
| "loss": 0.1086, |
| "step": 12740 |
| }, |
| { |
| "epoch": 9.593679458239277, |
| "grad_norm": 9.315362930297852, |
| "learning_rate": 5.781289189867068e-06, |
| "loss": 0.1029, |
| "step": 12750 |
| }, |
| { |
| "epoch": 9.601203912716327, |
| "grad_norm": 11.079126358032227, |
| "learning_rate": 5.777108937379819e-06, |
| "loss": 0.1252, |
| "step": 12760 |
| }, |
| { |
| "epoch": 9.608728367193379, |
| "grad_norm": 5.017213821411133, |
| "learning_rate": 5.772928684892568e-06, |
| "loss": 0.1493, |
| "step": 12770 |
| }, |
| { |
| "epoch": 9.616252821670429, |
| "grad_norm": 7.71808385848999, |
| "learning_rate": 5.768748432405318e-06, |
| "loss": 0.1598, |
| "step": 12780 |
| }, |
| { |
| "epoch": 9.623777276147479, |
| "grad_norm": 6.4492363929748535, |
| "learning_rate": 5.764568179918067e-06, |
| "loss": 0.1126, |
| "step": 12790 |
| }, |
| { |
| "epoch": 9.63130173062453, |
| "grad_norm": 9.06157112121582, |
| "learning_rate": 5.760387927430817e-06, |
| "loss": 0.1009, |
| "step": 12800 |
| }, |
| { |
| "epoch": 9.63882618510158, |
| "grad_norm": 7.8551154136657715, |
| "learning_rate": 5.756207674943567e-06, |
| "loss": 0.1184, |
| "step": 12810 |
| }, |
| { |
| "epoch": 9.64635063957863, |
| "grad_norm": 5.052379608154297, |
| "learning_rate": 5.752027422456317e-06, |
| "loss": 0.0983, |
| "step": 12820 |
| }, |
| { |
| "epoch": 9.653875094055682, |
| "grad_norm": 6.5726518630981445, |
| "learning_rate": 5.747847169969066e-06, |
| "loss": 0.088, |
| "step": 12830 |
| }, |
| { |
| "epoch": 9.661399548532732, |
| "grad_norm": 7.628342628479004, |
| "learning_rate": 5.743666917481816e-06, |
| "loss": 0.0957, |
| "step": 12840 |
| }, |
| { |
| "epoch": 9.668924003009781, |
| "grad_norm": 9.543529510498047, |
| "learning_rate": 5.7394866649945655e-06, |
| "loss": 0.1356, |
| "step": 12850 |
| }, |
| { |
| "epoch": 9.676448457486833, |
| "grad_norm": 7.6209397315979, |
| "learning_rate": 5.735306412507315e-06, |
| "loss": 0.1546, |
| "step": 12860 |
| }, |
| { |
| "epoch": 9.683972911963883, |
| "grad_norm": 7.236486434936523, |
| "learning_rate": 5.7311261600200655e-06, |
| "loss": 0.1506, |
| "step": 12870 |
| }, |
| { |
| "epoch": 9.691497366440933, |
| "grad_norm": 7.329847812652588, |
| "learning_rate": 5.726945907532816e-06, |
| "loss": 0.1615, |
| "step": 12880 |
| }, |
| { |
| "epoch": 9.699021820917984, |
| "grad_norm": 4.845203399658203, |
| "learning_rate": 5.722765655045565e-06, |
| "loss": 0.1202, |
| "step": 12890 |
| }, |
| { |
| "epoch": 9.706546275395034, |
| "grad_norm": 9.867792129516602, |
| "learning_rate": 5.718585402558316e-06, |
| "loss": 0.1367, |
| "step": 12900 |
| }, |
| { |
| "epoch": 9.714070729872084, |
| "grad_norm": 11.250028610229492, |
| "learning_rate": 5.714405150071065e-06, |
| "loss": 0.1792, |
| "step": 12910 |
| }, |
| { |
| "epoch": 9.721595184349134, |
| "grad_norm": 5.1288886070251465, |
| "learning_rate": 5.710224897583815e-06, |
| "loss": 0.1051, |
| "step": 12920 |
| }, |
| { |
| "epoch": 9.729119638826186, |
| "grad_norm": 7.018002033233643, |
| "learning_rate": 5.7060446450965644e-06, |
| "loss": 0.0986, |
| "step": 12930 |
| }, |
| { |
| "epoch": 9.736644093303235, |
| "grad_norm": 4.279899597167969, |
| "learning_rate": 5.701864392609314e-06, |
| "loss": 0.1085, |
| "step": 12940 |
| }, |
| { |
| "epoch": 9.744168547780285, |
| "grad_norm": 3.208705186843872, |
| "learning_rate": 5.6976841401220635e-06, |
| "loss": 0.1091, |
| "step": 12950 |
| }, |
| { |
| "epoch": 9.751693002257337, |
| "grad_norm": 8.875226974487305, |
| "learning_rate": 5.693503887634814e-06, |
| "loss": 0.1267, |
| "step": 12960 |
| }, |
| { |
| "epoch": 9.759217456734387, |
| "grad_norm": 8.384805679321289, |
| "learning_rate": 5.6893236351475635e-06, |
| "loss": 0.1048, |
| "step": 12970 |
| }, |
| { |
| "epoch": 9.766741911211437, |
| "grad_norm": 6.142811298370361, |
| "learning_rate": 5.685143382660313e-06, |
| "loss": 0.1153, |
| "step": 12980 |
| }, |
| { |
| "epoch": 9.774266365688488, |
| "grad_norm": 9.738375663757324, |
| "learning_rate": 5.680963130173063e-06, |
| "loss": 0.1123, |
| "step": 12990 |
| }, |
| { |
| "epoch": 9.781790820165538, |
| "grad_norm": 5.795185089111328, |
| "learning_rate": 5.676782877685812e-06, |
| "loss": 0.1143, |
| "step": 13000 |
| }, |
| { |
| "epoch": 9.789315274642588, |
| "grad_norm": 8.338446617126465, |
| "learning_rate": 5.6726026251985625e-06, |
| "loss": 0.1388, |
| "step": 13010 |
| }, |
| { |
| "epoch": 9.79683972911964, |
| "grad_norm": 3.7553884983062744, |
| "learning_rate": 5.668422372711312e-06, |
| "loss": 0.1147, |
| "step": 13020 |
| }, |
| { |
| "epoch": 9.80436418359669, |
| "grad_norm": 4.050734043121338, |
| "learning_rate": 5.664242120224062e-06, |
| "loss": 0.1068, |
| "step": 13030 |
| }, |
| { |
| "epoch": 9.81188863807374, |
| "grad_norm": 9.254073143005371, |
| "learning_rate": 5.660061867736811e-06, |
| "loss": 0.1267, |
| "step": 13040 |
| }, |
| { |
| "epoch": 9.81941309255079, |
| "grad_norm": 10.481110572814941, |
| "learning_rate": 5.655881615249561e-06, |
| "loss": 0.1394, |
| "step": 13050 |
| }, |
| { |
| "epoch": 9.82693754702784, |
| "grad_norm": 5.782297611236572, |
| "learning_rate": 5.651701362762312e-06, |
| "loss": 0.094, |
| "step": 13060 |
| }, |
| { |
| "epoch": 9.83446200150489, |
| "grad_norm": 4.115938663482666, |
| "learning_rate": 5.6475211102750615e-06, |
| "loss": 0.1009, |
| "step": 13070 |
| }, |
| { |
| "epoch": 9.84198645598194, |
| "grad_norm": 9.126145362854004, |
| "learning_rate": 5.643340857787811e-06, |
| "loss": 0.1249, |
| "step": 13080 |
| }, |
| { |
| "epoch": 9.849510910458992, |
| "grad_norm": 5.1168131828308105, |
| "learning_rate": 5.639160605300561e-06, |
| "loss": 0.1231, |
| "step": 13090 |
| }, |
| { |
| "epoch": 9.857035364936042, |
| "grad_norm": 3.9374754428863525, |
| "learning_rate": 5.634980352813311e-06, |
| "loss": 0.1204, |
| "step": 13100 |
| }, |
| { |
| "epoch": 9.864559819413092, |
| "grad_norm": 6.21238899230957, |
| "learning_rate": 5.6308001003260606e-06, |
| "loss": 0.1009, |
| "step": 13110 |
| }, |
| { |
| "epoch": 9.872084273890144, |
| "grad_norm": 3.8661928176879883, |
| "learning_rate": 5.62661984783881e-06, |
| "loss": 0.1324, |
| "step": 13120 |
| }, |
| { |
| "epoch": 9.879608728367193, |
| "grad_norm": 9.107396125793457, |
| "learning_rate": 5.62243959535156e-06, |
| "loss": 0.12, |
| "step": 13130 |
| }, |
| { |
| "epoch": 9.887133182844243, |
| "grad_norm": 9.252467155456543, |
| "learning_rate": 5.618259342864309e-06, |
| "loss": 0.1414, |
| "step": 13140 |
| }, |
| { |
| "epoch": 9.894657637321295, |
| "grad_norm": 5.15799617767334, |
| "learning_rate": 5.61407909037706e-06, |
| "loss": 0.0885, |
| "step": 13150 |
| }, |
| { |
| "epoch": 9.902182091798345, |
| "grad_norm": 7.307941913604736, |
| "learning_rate": 5.609898837889809e-06, |
| "loss": 0.1298, |
| "step": 13160 |
| }, |
| { |
| "epoch": 9.909706546275395, |
| "grad_norm": 7.1728644371032715, |
| "learning_rate": 5.605718585402559e-06, |
| "loss": 0.082, |
| "step": 13170 |
| }, |
| { |
| "epoch": 9.917231000752446, |
| "grad_norm": 6.501073360443115, |
| "learning_rate": 5.601538332915308e-06, |
| "loss": 0.1097, |
| "step": 13180 |
| }, |
| { |
| "epoch": 9.924755455229496, |
| "grad_norm": 5.394291877746582, |
| "learning_rate": 5.597358080428058e-06, |
| "loss": 0.1422, |
| "step": 13190 |
| }, |
| { |
| "epoch": 9.932279909706546, |
| "grad_norm": 3.971874952316284, |
| "learning_rate": 5.593177827940808e-06, |
| "loss": 0.094, |
| "step": 13200 |
| }, |
| { |
| "epoch": 9.939804364183598, |
| "grad_norm": 6.758007526397705, |
| "learning_rate": 5.588997575453558e-06, |
| "loss": 0.1511, |
| "step": 13210 |
| }, |
| { |
| "epoch": 9.947328818660647, |
| "grad_norm": 12.515239715576172, |
| "learning_rate": 5.584817322966307e-06, |
| "loss": 0.1404, |
| "step": 13220 |
| }, |
| { |
| "epoch": 9.954853273137697, |
| "grad_norm": 10.174310684204102, |
| "learning_rate": 5.580637070479057e-06, |
| "loss": 0.1463, |
| "step": 13230 |
| }, |
| { |
| "epoch": 9.962377727614747, |
| "grad_norm": 6.401690483093262, |
| "learning_rate": 5.576456817991808e-06, |
| "loss": 0.1249, |
| "step": 13240 |
| }, |
| { |
| "epoch": 9.969902182091799, |
| "grad_norm": 9.405531883239746, |
| "learning_rate": 5.572276565504558e-06, |
| "loss": 0.1145, |
| "step": 13250 |
| }, |
| { |
| "epoch": 9.977426636568849, |
| "grad_norm": 8.298341751098633, |
| "learning_rate": 5.568096313017307e-06, |
| "loss": 0.1074, |
| "step": 13260 |
| }, |
| { |
| "epoch": 9.984951091045899, |
| "grad_norm": 6.206060409545898, |
| "learning_rate": 5.563916060530057e-06, |
| "loss": 0.1025, |
| "step": 13270 |
| }, |
| { |
| "epoch": 9.99247554552295, |
| "grad_norm": 6.96009635925293, |
| "learning_rate": 5.559735808042806e-06, |
| "loss": 0.12, |
| "step": 13280 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 0.6092808842658997, |
| "learning_rate": 5.555555555555557e-06, |
| "loss": 0.1308, |
| "step": 13290 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 26580, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 20, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|