| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.483729966002914, |
| "eval_steps": 100, |
| "global_step": 17900, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0013876361617983764, |
| "grad_norm": 0.0, |
| "learning_rate": 1.3867488443759631e-06, |
| "loss": 1.5686, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0027752723235967528, |
| "grad_norm": 0.0, |
| "learning_rate": 2.9275808936825885e-06, |
| "loss": 1.5599, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.004162908485395129, |
| "grad_norm": 0.0, |
| "learning_rate": 4.468412942989214e-06, |
| "loss": 1.6107, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.0055505446471935055, |
| "grad_norm": 0.0, |
| "learning_rate": 6.0092449922958395e-06, |
| "loss": 1.5163, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.0069381808089918826, |
| "grad_norm": 0.0, |
| "learning_rate": 7.550077041602466e-06, |
| "loss": 1.5544, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.008325816970790259, |
| "grad_norm": 0.0, |
| "learning_rate": 9.090909090909091e-06, |
| "loss": 1.5103, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.009713453132588635, |
| "grad_norm": 0.0, |
| "learning_rate": 1.0631741140215717e-05, |
| "loss": 1.5622, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.011101089294387011, |
| "grad_norm": 0.0, |
| "learning_rate": 1.2172573189522343e-05, |
| "loss": 1.6018, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.012488725456185389, |
| "grad_norm": 0.0, |
| "learning_rate": 1.371340523882897e-05, |
| "loss": 1.6178, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.013876361617983765, |
| "grad_norm": 0.0, |
| "learning_rate": 1.5254237288135596e-05, |
| "loss": 1.4605, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.013876361617983765, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 852.3731, |
| "eval_samples_per_second": 15.031, |
| "eval_steps_per_second": 1.879, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.015263997779782141, |
| "grad_norm": 0.0, |
| "learning_rate": 1.6795069337442222e-05, |
| "loss": 1.6031, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.016651633941580517, |
| "grad_norm": 0.0, |
| "learning_rate": 1.8335901386748848e-05, |
| "loss": 1.5871, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.018039270103378895, |
| "grad_norm": 0.0, |
| "learning_rate": 1.987673343605547e-05, |
| "loss": 1.6041, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.01942690626517727, |
| "grad_norm": 0.0, |
| "learning_rate": 2.1417565485362097e-05, |
| "loss": 1.5313, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.020814542426975648, |
| "grad_norm": 0.0, |
| "learning_rate": 2.295839753466872e-05, |
| "loss": 1.5775, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.022202178588774022, |
| "grad_norm": 0.0, |
| "learning_rate": 2.4499229583975346e-05, |
| "loss": 1.5831, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.0235898147505724, |
| "grad_norm": 0.0, |
| "learning_rate": 2.6040061633281976e-05, |
| "loss": 1.5453, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.024977450912370778, |
| "grad_norm": 0.0, |
| "learning_rate": 2.75808936825886e-05, |
| "loss": 1.5362, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.026365087074169152, |
| "grad_norm": 0.0, |
| "learning_rate": 2.9121725731895228e-05, |
| "loss": 1.48, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.02775272323596753, |
| "grad_norm": 0.0, |
| "learning_rate": 3.066255778120185e-05, |
| "loss": 1.5183, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.02775272323596753, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 853.1246, |
| "eval_samples_per_second": 15.018, |
| "eval_steps_per_second": 1.878, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.029140359397765905, |
| "grad_norm": 0.0, |
| "learning_rate": 3.2203389830508473e-05, |
| "loss": 1.4961, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.030527995559564283, |
| "grad_norm": 0.0, |
| "learning_rate": 3.37442218798151e-05, |
| "loss": 1.5633, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.03191563172136266, |
| "grad_norm": 0.0, |
| "learning_rate": 3.5285053929121726e-05, |
| "loss": 1.5646, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.033303267883161035, |
| "grad_norm": 0.0, |
| "learning_rate": 3.682588597842835e-05, |
| "loss": 1.464, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.03469090404495941, |
| "grad_norm": 0.0, |
| "learning_rate": 3.836671802773498e-05, |
| "loss": 1.5711, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.03607854020675779, |
| "grad_norm": 0.0, |
| "learning_rate": 3.9907550077041604e-05, |
| "loss": 1.5541, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.03746617636855616, |
| "grad_norm": 0.0, |
| "learning_rate": 4.144838212634823e-05, |
| "loss": 1.6075, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.03885381253035454, |
| "grad_norm": 0.0, |
| "learning_rate": 4.298921417565486e-05, |
| "loss": 1.5286, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.04024144869215292, |
| "grad_norm": 0.0, |
| "learning_rate": 4.453004622496148e-05, |
| "loss": 1.5678, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.041629084853951295, |
| "grad_norm": 0.0, |
| "learning_rate": 4.607087827426811e-05, |
| "loss": 1.5208, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.041629084853951295, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 856.7103, |
| "eval_samples_per_second": 14.955, |
| "eval_steps_per_second": 1.87, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.04301672101574967, |
| "grad_norm": 0.0, |
| "learning_rate": 4.7611710323574735e-05, |
| "loss": 1.5492, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.044404357177548044, |
| "grad_norm": 0.0, |
| "learning_rate": 4.915254237288136e-05, |
| "loss": 1.5481, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.04579199333934642, |
| "grad_norm": 0.0, |
| "learning_rate": 5.069337442218799e-05, |
| "loss": 1.5389, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.0471796295011448, |
| "grad_norm": 0.0, |
| "learning_rate": 5.223420647149461e-05, |
| "loss": 1.5258, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.04856726566294318, |
| "grad_norm": 0.0, |
| "learning_rate": 5.377503852080123e-05, |
| "loss": 1.4987, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.049954901824741556, |
| "grad_norm": 0.0, |
| "learning_rate": 5.5315870570107866e-05, |
| "loss": 1.5953, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.05134253798653993, |
| "grad_norm": 0.0, |
| "learning_rate": 5.685670261941448e-05, |
| "loss": 1.5349, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.052730174148338305, |
| "grad_norm": 0.0, |
| "learning_rate": 5.839753466872111e-05, |
| "loss": 1.5716, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.05411781031013668, |
| "grad_norm": 0.0, |
| "learning_rate": 5.993836671802774e-05, |
| "loss": 1.4449, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.05550544647193506, |
| "grad_norm": 0.0, |
| "learning_rate": 6.147919876733436e-05, |
| "loss": 1.5694, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.05550544647193506, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 854.6963, |
| "eval_samples_per_second": 14.99, |
| "eval_steps_per_second": 1.874, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.05689308263373344, |
| "grad_norm": 0.0, |
| "learning_rate": 6.302003081664099e-05, |
| "loss": 1.588, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.05828071879553181, |
| "grad_norm": 0.0, |
| "learning_rate": 6.456086286594762e-05, |
| "loss": 1.5056, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.05966835495733019, |
| "grad_norm": 0.0, |
| "learning_rate": 6.610169491525424e-05, |
| "loss": 1.6029, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.061055991119128565, |
| "grad_norm": 0.0, |
| "learning_rate": 6.764252696456087e-05, |
| "loss": 1.54, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.06244362728092694, |
| "grad_norm": 0.0, |
| "learning_rate": 6.918335901386748e-05, |
| "loss": 1.4744, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.06383126344272531, |
| "grad_norm": 0.0, |
| "learning_rate": 7.072419106317412e-05, |
| "loss": 1.6046, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.06521889960452369, |
| "grad_norm": 0.0, |
| "learning_rate": 7.226502311248075e-05, |
| "loss": 1.5482, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.06660653576632207, |
| "grad_norm": 0.0, |
| "learning_rate": 7.380585516178737e-05, |
| "loss": 1.5734, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.06799417192812045, |
| "grad_norm": 0.0, |
| "learning_rate": 7.534668721109399e-05, |
| "loss": 1.515, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.06938180808991883, |
| "grad_norm": 0.0, |
| "learning_rate": 7.688751926040063e-05, |
| "loss": 1.515, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.06938180808991883, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 860.2669, |
| "eval_samples_per_second": 14.893, |
| "eval_steps_per_second": 1.862, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.0707694442517172, |
| "grad_norm": 0.0, |
| "learning_rate": 7.842835130970725e-05, |
| "loss": 1.5486, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.07215708041351558, |
| "grad_norm": 0.0, |
| "learning_rate": 7.996918335901386e-05, |
| "loss": 1.5361, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.07354471657531396, |
| "grad_norm": 0.0, |
| "learning_rate": 8.151001540832049e-05, |
| "loss": 1.7084, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.07493235273711232, |
| "grad_norm": 0.0, |
| "learning_rate": 8.305084745762712e-05, |
| "loss": 1.5039, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.0763199888989107, |
| "grad_norm": 0.0, |
| "learning_rate": 8.459167950693376e-05, |
| "loss": 1.5072, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.07770762506070908, |
| "grad_norm": 0.0, |
| "learning_rate": 8.613251155624037e-05, |
| "loss": 1.5539, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.07909526122250746, |
| "grad_norm": 0.0, |
| "learning_rate": 8.7673343605547e-05, |
| "loss": 1.5531, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.08048289738430583, |
| "grad_norm": 0.0, |
| "learning_rate": 8.921417565485362e-05, |
| "loss": 1.5783, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.08187053354610421, |
| "grad_norm": 0.0, |
| "learning_rate": 9.075500770416026e-05, |
| "loss": 1.584, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.08325816970790259, |
| "grad_norm": 0.0, |
| "learning_rate": 9.229583975346687e-05, |
| "loss": 1.5598, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.08325816970790259, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 859.4877, |
| "eval_samples_per_second": 14.907, |
| "eval_steps_per_second": 1.864, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.08464580586970097, |
| "grad_norm": 0.0, |
| "learning_rate": 9.38366718027735e-05, |
| "loss": 1.5914, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.08603344203149935, |
| "grad_norm": 0.0, |
| "learning_rate": 9.537750385208013e-05, |
| "loss": 1.509, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.08742107819329771, |
| "grad_norm": 0.0, |
| "learning_rate": 9.691833590138675e-05, |
| "loss": 1.5504, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.08880871435509609, |
| "grad_norm": 0.0, |
| "learning_rate": 9.845916795069338e-05, |
| "loss": 1.5355, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.09019635051689447, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001, |
| "loss": 1.5586, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.09158398667869284, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010154083204930662, |
| "loss": 1.5139, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.09297162284049122, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010308166409861326, |
| "loss": 1.5743, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.0943592590022896, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010462249614791988, |
| "loss": 1.5265, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.09574689516408798, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001061633281972265, |
| "loss": 1.554, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.09713453132588636, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010770416024653314, |
| "loss": 1.5693, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.09713453132588636, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 853.3735, |
| "eval_samples_per_second": 15.013, |
| "eval_steps_per_second": 1.877, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.09852216748768473, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010924499229583975, |
| "loss": 1.6233, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.09990980364948311, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011078582434514639, |
| "loss": 1.5869, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.10129743981128148, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011232665639445301, |
| "loss": 1.5084, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.10268507597307985, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011386748844375963, |
| "loss": 1.448, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.10407271213487823, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011540832049306627, |
| "loss": 1.5386, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.10546034829667661, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011694915254237289, |
| "loss": 1.4706, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.10684798445847499, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001184899845916795, |
| "loss": 1.6041, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.10823562062027337, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012003081664098615, |
| "loss": 1.5857, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.10962325678207174, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012157164869029276, |
| "loss": 1.5199, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.11101089294387012, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001231124807395994, |
| "loss": 1.5045, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.11101089294387012, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 852.1951, |
| "eval_samples_per_second": 15.034, |
| "eval_steps_per_second": 1.88, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.1123985291056685, |
| "grad_norm": 0.0, |
| "learning_rate": 0.000124653312788906, |
| "loss": 1.5783, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.11378616526746688, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012619414483821262, |
| "loss": 1.5467, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.11517380142926524, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012773497688751926, |
| "loss": 1.5322, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.11656143759106362, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001292758089368259, |
| "loss": 1.5253, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.117949073752862, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013081664098613251, |
| "loss": 1.5313, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.11933670991466037, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013235747303543915, |
| "loss": 1.5569, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.12072434607645875, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013389830508474577, |
| "loss": 1.566, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.12211198223825713, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013543913713405238, |
| "loss": 1.5446, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.12349961840005551, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013697996918335902, |
| "loss": 1.5445, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.12488725456185389, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013852080123266563, |
| "loss": 1.4359, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.12488725456185389, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 855.2417, |
| "eval_samples_per_second": 14.981, |
| "eval_steps_per_second": 1.873, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.12627489072365225, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014006163328197227, |
| "loss": 1.5106, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.12766252688545063, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001416024653312789, |
| "loss": 1.4681, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.129050163047249, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014314329738058552, |
| "loss": 1.4964, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.13043779920904738, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014468412942989216, |
| "loss": 1.4468, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.13182543537084576, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014622496147919878, |
| "loss": 1.4915, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.13321307153264414, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001477657935285054, |
| "loss": 1.5481, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.13460070769444252, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014930662557781203, |
| "loss": 1.5247, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.1359883438562409, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015084745762711864, |
| "loss": 1.5824, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.13737598001803927, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015238828967642525, |
| "loss": 1.5115, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.13876361617983765, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001539291217257319, |
| "loss": 1.4536, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.13876361617983765, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 859.8289, |
| "eval_samples_per_second": 14.901, |
| "eval_steps_per_second": 1.863, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.14015125234163603, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015546995377503853, |
| "loss": 1.5573, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.1415388885034344, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015701078582434517, |
| "loss": 1.5338, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.14292652466523278, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015855161787365179, |
| "loss": 1.5633, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.14431416082703116, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001600924499229584, |
| "loss": 1.5012, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.14570179698882954, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016163328197226504, |
| "loss": 1.5334, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.14708943315062792, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016317411402157165, |
| "loss": 1.5824, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.14847706931242627, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016471494607087826, |
| "loss": 1.5572, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.14986470547422465, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001662557781201849, |
| "loss": 1.5407, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.15125234163602302, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016779661016949154, |
| "loss": 1.4918, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.1526399777978214, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016933744221879818, |
| "loss": 1.5496, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.1526399777978214, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 850.8719, |
| "eval_samples_per_second": 15.057, |
| "eval_steps_per_second": 1.883, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.15402761395961978, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001708782742681048, |
| "loss": 1.6318, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.15541525012141816, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001724191063174114, |
| "loss": 1.5595, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.15680288628321654, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017395993836671805, |
| "loss": 1.5493, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.15819052244501491, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017550077041602466, |
| "loss": 1.5708, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.1595781586068133, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017704160246533127, |
| "loss": 1.5479, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.16096579476861167, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001785824345146379, |
| "loss": 1.5459, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.16235343093041005, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018012326656394453, |
| "loss": 1.5629, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.16374106709220843, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018166409861325116, |
| "loss": 1.5617, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.1651287032540068, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001832049306625578, |
| "loss": 1.5623, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.16651633941580518, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018474576271186442, |
| "loss": 1.5399, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.16651633941580518, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 851.5599, |
| "eval_samples_per_second": 15.045, |
| "eval_steps_per_second": 1.881, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.16790397557760356, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018628659476117106, |
| "loss": 1.5636, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.16929161173940194, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018782742681047767, |
| "loss": 1.5902, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.17067924790120031, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018936825885978428, |
| "loss": 1.6026, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.1720668840629987, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019090909090909092, |
| "loss": 1.5568, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.17345452022479707, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019244992295839753, |
| "loss": 1.568, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.17484215638659542, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019399075500770417, |
| "loss": 1.5191, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.1762297925483938, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019553158705701081, |
| "loss": 1.5366, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.17761742871019218, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019707241910631743, |
| "loss": 1.553, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.17900506487199055, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019861325115562404, |
| "loss": 1.528, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.18039270103378893, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019999999880520304, |
| "loss": 1.5596, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.18039270103378893, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 846.8538, |
| "eval_samples_per_second": 15.129, |
| "eval_steps_per_second": 1.892, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.1817803371955873, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019999985542960055, |
| "loss": 1.5126, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.1831679733573857, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001999994730949955, |
| "loss": 1.5304, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.18455560951918407, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001999988518023016, |
| "loss": 1.523, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.18594324568098244, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019999799155300343, |
| "loss": 1.5223, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.18733088184278082, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019999689234915667, |
| "loss": 1.5163, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.1887185180045792, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019999555419338794, |
| "loss": 1.5422, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.19010615416637758, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001999939770888949, |
| "loss": 1.5268, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.19149379032817596, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019999216103944617, |
| "loss": 1.5667, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.19288142648997433, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019999010604938145, |
| "loss": 1.557, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.1942690626517727, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019998781212361122, |
| "loss": 1.52, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.1942690626517727, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 855.6828, |
| "eval_samples_per_second": 14.973, |
| "eval_steps_per_second": 1.872, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.1956566988135711, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001999852792676171, |
| "loss": 1.5547, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.19704433497536947, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019998250748745155, |
| "loss": 1.5801, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.19843197113716785, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019997949678973804, |
| "loss": 1.4726, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.19981960729896622, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019997624718167087, |
| "loss": 1.5853, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.2012072434607646, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001999727586710153, |
| "loss": 1.536, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.20259487962256295, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019996903126610747, |
| "loss": 1.5915, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.20398251578436133, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019996506497585434, |
| "loss": 1.5097, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.2053701519461597, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019996085980973375, |
| "loss": 1.5386, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.20675778810795808, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001999564157777943, |
| "loss": 1.5985, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.20814542426975646, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019995173289065544, |
| "loss": 1.58, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.20814542426975646, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 852.5849, |
| "eval_samples_per_second": 15.027, |
| "eval_steps_per_second": 1.879, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.20953306043155484, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001999468111595074, |
| "loss": 1.549, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.21092069659335322, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019994165059611106, |
| "loss": 1.6138, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.2123083327551516, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001999362512127981, |
| "loss": 1.5108, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.21369596891694997, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019993061302247086, |
| "loss": 1.5541, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.21508360507874835, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019992473603860228, |
| "loss": 1.5873, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.21647124124054673, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019991862027523603, |
| "loss": 1.5918, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.2178588774023451, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019991226574698624, |
| "loss": 1.5176, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.21924651356414349, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001999056724690377, |
| "loss": 1.5385, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.22063414972594186, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001998988404571456, |
| "loss": 1.565, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.22202178588774024, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019989176972763572, |
| "loss": 1.5304, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.22202178588774024, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 852.2663, |
| "eval_samples_per_second": 15.033, |
| "eval_steps_per_second": 1.88, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.22340942204953862, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019988446029740422, |
| "loss": 1.5396, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.224797058211337, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019987691218391763, |
| "loss": 1.5072, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.22618469437313538, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019986912540521294, |
| "loss": 1.4956, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.22757233053493375, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019986109997989732, |
| "loss": 1.5727, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.2289599666967321, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019985283592714834, |
| "loss": 1.6416, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.23034760285853048, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019984433326671364, |
| "loss": 1.5467, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.23173523902032886, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019983559201891123, |
| "loss": 1.5827, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.23312287518212724, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019982661220462907, |
| "loss": 1.5114, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.23451051134392562, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019981739384532525, |
| "loss": 1.52, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.235898147505724, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001998079369630279, |
| "loss": 1.572, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.235898147505724, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 850.3193, |
| "eval_samples_per_second": 15.067, |
| "eval_steps_per_second": 1.884, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.23728578366752237, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019979824158033522, |
| "loss": 1.5659, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.23867341982932075, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019978830772041513, |
| "loss": 1.5661, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.24006105599111913, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019977813540700555, |
| "loss": 1.4954, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.2414486921529175, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019976772466441417, |
| "loss": 1.5238, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.24283632831471588, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019975707551751846, |
| "loss": 1.4822, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.24422396447651426, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019974618799176551, |
| "loss": 1.5299, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.24561160063831264, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019973506211317212, |
| "loss": 1.5403, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.24699923680011102, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001997236979083246, |
| "loss": 1.606, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.2483868729619094, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019971209540437873, |
| "loss": 1.5796, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.24977450912370777, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001997002546290599, |
| "loss": 1.5143, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.24977450912370777, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 848.5406, |
| "eval_samples_per_second": 15.099, |
| "eval_steps_per_second": 1.888, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.2511621452855061, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019968817561066262, |
| "loss": 1.431, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.2525497814473045, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019967585837805092, |
| "loss": 1.5829, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.2539374176091029, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001996633029606579, |
| "loss": 1.6282, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.25532505377090126, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019965050938848598, |
| "loss": 1.5562, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.25671268993269963, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019963747769210661, |
| "loss": 1.6189, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.258100326094498, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019962420790266015, |
| "loss": 1.5318, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.2594879622562964, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019961070005185608, |
| "loss": 1.5493, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.26087559841809477, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019959695417197263, |
| "loss": 1.52, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.26226323457989315, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019958297029585691, |
| "loss": 1.54, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.2636508707416915, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019956874845692465, |
| "loss": 1.4546, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.2636508707416915, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 850.5993, |
| "eval_samples_per_second": 15.062, |
| "eval_steps_per_second": 1.883, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.2650385069034899, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019955428868916029, |
| "loss": 1.5184, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.2664261430652883, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001995395910271168, |
| "loss": 1.5142, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.26781377922708666, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001995246555059156, |
| "loss": 1.513, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.26920141538888503, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019950948216124656, |
| "loss": 1.5488, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.2705890515506834, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019949407102936776, |
| "loss": 1.584, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.2719766877124818, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001994784221471055, |
| "loss": 1.5061, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.27336432387428017, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019946253555185435, |
| "loss": 1.6085, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.27475196003607855, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019944641128157674, |
| "loss": 1.6, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.2761395961978769, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019943004937480314, |
| "loss": 1.5589, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.2775272323596753, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019941344987063186, |
| "loss": 1.5565, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.2775272323596753, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 858.9389, |
| "eval_samples_per_second": 14.916, |
| "eval_steps_per_second": 1.865, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.2789148685214737, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019939661280872896, |
| "loss": 1.5592, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.28030250468327206, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001993795382293282, |
| "loss": 1.5324, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.28169014084507044, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019936222617323085, |
| "loss": 1.512, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.2830777770068688, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019934467668180573, |
| "loss": 1.5857, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.2844654131686672, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019932688979698893, |
| "loss": 1.5779, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.28585304933046557, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001993088655612839, |
| "loss": 1.5252, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.28724068549226395, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019929060401776126, |
| "loss": 1.522, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.2886283216540623, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019927210521005861, |
| "loss": 1.5391, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.2900159578158607, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019925336918238067, |
| "loss": 1.5029, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.2914035939776591, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019923439597949888, |
| "loss": 1.5913, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.2914035939776591, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 846.883, |
| "eval_samples_per_second": 15.128, |
| "eval_steps_per_second": 1.892, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.29279123013945746, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019921518564675145, |
| "loss": 1.5026, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.29417886630125584, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019919573823004333, |
| "loss": 1.6166, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.2955665024630542, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001991760537758459, |
| "loss": 1.5524, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.29695413862485254, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019915613233119705, |
| "loss": 1.5988, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.2983417747866509, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019913597394370086, |
| "loss": 1.5131, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.2997294109484493, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019911557866152775, |
| "loss": 1.573, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.30111704711024767, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019909494653341412, |
| "loss": 1.526, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.30250468327204605, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019907407760866237, |
| "loss": 1.6097, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.3038923194338444, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019905297193714073, |
| "loss": 1.5376, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.3052799555956428, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019903162956928322, |
| "loss": 1.6057, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.3052799555956428, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 864.1262, |
| "eval_samples_per_second": 14.827, |
| "eval_steps_per_second": 1.854, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.3066675917574412, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001990100505560894, |
| "loss": 1.5049, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.30805522791923956, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019898823494912432, |
| "loss": 1.5858, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.30944286408103794, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019896618280051845, |
| "loss": 1.5559, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.3108305002428363, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019894389416296742, |
| "loss": 1.5449, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.3122181364046347, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019892136908973205, |
| "loss": 1.5155, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.31360577256643307, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001988986076346381, |
| "loss": 1.5085, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.31499340872823145, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019887560985207614, |
| "loss": 1.5528, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.31638104489002983, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001988523757970016, |
| "loss": 1.5497, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.3177686810518282, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019882890552493437, |
| "loss": 1.5805, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.3191563172136266, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019880519909195893, |
| "loss": 1.547, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.3191563172136266, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 850.4335, |
| "eval_samples_per_second": 15.065, |
| "eval_steps_per_second": 1.884, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.32054395337542496, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019878125655472395, |
| "loss": 1.5136, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.32193158953722334, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019875707797044237, |
| "loss": 1.4932, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.3233192256990217, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001987326633968912, |
| "loss": 1.5067, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.3247068618608201, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001987080128924113, |
| "loss": 1.5771, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.3260944980226185, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019868312651590743, |
| "loss": 1.5148, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.32748213418441685, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019865800432684787, |
| "loss": 1.5113, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.32886977034621523, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019863264638526445, |
| "loss": 1.6249, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.3302574065080136, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001986070527517523, |
| "loss": 1.51, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.331645042669812, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019858122348746985, |
| "loss": 1.5531, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.33303267883161036, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019855515865413855, |
| "loss": 1.4905, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.33303267883161036, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 855.2173, |
| "eval_samples_per_second": 14.981, |
| "eval_steps_per_second": 1.873, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.33442031499340874, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019852885831404268, |
| "loss": 1.5991, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.3358079511552071, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019850232253002946, |
| "loss": 1.5615, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.3371955873170055, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019847555136550852, |
| "loss": 1.5065, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.3385832234788039, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019844854488445214, |
| "loss": 1.538, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.33997085964060225, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019842130315139483, |
| "loss": 1.5472, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.34135849580240063, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019839382623143323, |
| "loss": 1.6322, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.342746131964199, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019836611419022605, |
| "loss": 1.5959, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.3441337681259974, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019833816709399372, |
| "loss": 1.6385, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.34552140428779576, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001983099850095185, |
| "loss": 1.5804, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.34690904044959414, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001982815680041441, |
| "loss": 1.6041, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.34690904044959414, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 858.6524, |
| "eval_samples_per_second": 14.921, |
| "eval_steps_per_second": 1.866, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.3482966766113925, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019825291614577561, |
| "loss": 1.602, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.34968431277319084, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019822402950287935, |
| "loss": 1.5554, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.3510719489349892, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001981949081444826, |
| "loss": 1.5689, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.3524595850967876, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019816555214017363, |
| "loss": 1.5012, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.353847221258586, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001981359615601013, |
| "loss": 1.5127, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.35523485742038435, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019810613647497512, |
| "loss": 1.5374, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.35662249358218273, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019807607695606486, |
| "loss": 1.4944, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.3580101297439811, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001980457830752006, |
| "loss": 1.5708, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.3593977659057795, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019801525490477237, |
| "loss": 1.5308, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.36078540206757787, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019798449251773014, |
| "loss": 1.6061, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.36078540206757787, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 855.3026, |
| "eval_samples_per_second": 14.979, |
| "eval_steps_per_second": 1.873, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.36217303822937624, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019795349598758345, |
| "loss": 1.4946, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.3635606743911746, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019792226538840142, |
| "loss": 1.5214, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.364948310552973, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019789080079481245, |
| "loss": 1.5454, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.3663359467147714, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019785910228200423, |
| "loss": 1.5109, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.36772358287656975, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019782716992572323, |
| "loss": 1.5324, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.36911121903836813, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019779500380227486, |
| "loss": 1.5535, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.3704988552001665, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019776260398852302, |
| "loss": 1.4757, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.3718864913619649, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001977299705618901, |
| "loss": 1.5326, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.37327412752376327, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019769710360035677, |
| "loss": 1.5597, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.37466176368556164, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001976640031824617, |
| "loss": 1.4312, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.37466176368556164, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 852.4079, |
| "eval_samples_per_second": 15.03, |
| "eval_steps_per_second": 1.879, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.37604939984736, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019763066938730138, |
| "loss": 1.4829, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.3774370360091584, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001975971022945301, |
| "loss": 1.6724, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.3788246721709568, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001975633019843595, |
| "loss": 1.5867, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.38021230833275516, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019752926853755864, |
| "loss": 1.592, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.38159994449455353, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019749500203545357, |
| "loss": 1.5938, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.3829875806563519, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019746050255992735, |
| "loss": 1.56, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.3843752168181503, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019742577019341966, |
| "loss": 1.5429, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.38576285297994867, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019739080501892674, |
| "loss": 1.5171, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.38715048914174705, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019735560712000116, |
| "loss": 1.6044, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.3885381253035454, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001973201765807516, |
| "loss": 1.5709, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.3885381253035454, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 850.963, |
| "eval_samples_per_second": 15.056, |
| "eval_steps_per_second": 1.883, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.3899257614653438, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019728451348584262, |
| "loss": 1.5263, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.3913133976271422, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019724861792049455, |
| "loss": 1.5946, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.39270103378894056, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019721248997048315, |
| "loss": 1.5997, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.39408866995073893, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001971761297221396, |
| "loss": 1.5798, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.3954763061125373, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019713953726235004, |
| "loss": 1.5016, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.3968639422743357, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001971027126785556, |
| "loss": 1.6087, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.39825157843613407, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001970656560587521, |
| "loss": 1.5384, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.39963921459793245, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019702836749148977, |
| "loss": 1.5269, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.4010268507597308, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001969908470658731, |
| "loss": 1.5338, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.4024144869215292, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001969530948715607, |
| "loss": 1.6045, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.4024144869215292, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 848.916, |
| "eval_samples_per_second": 15.092, |
| "eval_steps_per_second": 1.887, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.4038021230833275, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019691511099876493, |
| "loss": 1.5226, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.4051897592451259, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001968768955382519, |
| "loss": 1.5307, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.4065773954069243, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019683844858134087, |
| "loss": 1.5173, |
| "step": 2930 |
| }, |
| { |
| "epoch": 0.40796503156872266, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001967997702199045, |
| "loss": 1.5411, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.40935266773052104, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019676086054636844, |
| "loss": 1.5955, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.4107403038923194, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019672171965371088, |
| "loss": 1.5243, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.4121279400541178, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001966823476354627, |
| "loss": 1.6218, |
| "step": 2970 |
| }, |
| { |
| "epoch": 0.41351557621591617, |
| "grad_norm": 0.0, |
| "learning_rate": 0.000196642744585707, |
| "loss": 1.4745, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.41490321237771455, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019660291059907893, |
| "loss": 1.4971, |
| "step": 2990 |
| }, |
| { |
| "epoch": 0.4162908485395129, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001965628457707656, |
| "loss": 1.4997, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.4162908485395129, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 854.1371, |
| "eval_samples_per_second": 15.0, |
| "eval_steps_per_second": 1.876, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.4176784847013113, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019652255019650565, |
| "loss": 1.5693, |
| "step": 3010 |
| }, |
| { |
| "epoch": 0.4190661208631097, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019648202397258904, |
| "loss": 1.5664, |
| "step": 3020 |
| }, |
| { |
| "epoch": 0.42045375702490806, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019644126719585705, |
| "loss": 1.516, |
| "step": 3030 |
| }, |
| { |
| "epoch": 0.42184139318670644, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001964002799637018, |
| "loss": 1.5675, |
| "step": 3040 |
| }, |
| { |
| "epoch": 0.4232290293485048, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001963590623740661, |
| "loss": 1.4753, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.4246166655103032, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019631761452544323, |
| "loss": 1.6459, |
| "step": 3060 |
| }, |
| { |
| "epoch": 0.42600430167210157, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019627593651687668, |
| "loss": 1.5182, |
| "step": 3070 |
| }, |
| { |
| "epoch": 0.42739193783389995, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019623402844795996, |
| "loss": 1.5366, |
| "step": 3080 |
| }, |
| { |
| "epoch": 0.4287795739956983, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019619189041883637, |
| "loss": 1.5423, |
| "step": 3090 |
| }, |
| { |
| "epoch": 0.4301672101574967, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019614952253019862, |
| "loss": 1.6168, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.4301672101574967, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 862.9987, |
| "eval_samples_per_second": 14.846, |
| "eval_steps_per_second": 1.856, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.4315548463192951, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019610692488328873, |
| "loss": 1.6113, |
| "step": 3110 |
| }, |
| { |
| "epoch": 0.43294248248109346, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019606409757989777, |
| "loss": 1.5321, |
| "step": 3120 |
| }, |
| { |
| "epoch": 0.43433011864289184, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001960210407223656, |
| "loss": 1.5487, |
| "step": 3130 |
| }, |
| { |
| "epoch": 0.4357177548046902, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001959777544135806, |
| "loss": 1.4904, |
| "step": 3140 |
| }, |
| { |
| "epoch": 0.4371053909664886, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019593423875697945, |
| "loss": 1.5505, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.43849302712828697, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019589049385654685, |
| "loss": 1.5197, |
| "step": 3160 |
| }, |
| { |
| "epoch": 0.43988066329008535, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001958465198168154, |
| "loss": 1.5095, |
| "step": 3170 |
| }, |
| { |
| "epoch": 0.4412682994518837, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001958023167428651, |
| "loss": 1.4931, |
| "step": 3180 |
| }, |
| { |
| "epoch": 0.4426559356136821, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019575788474032336, |
| "loss": 1.5362, |
| "step": 3190 |
| }, |
| { |
| "epoch": 0.4440435717754805, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019571322391536463, |
| "loss": 1.5893, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.4440435717754805, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 856.8213, |
| "eval_samples_per_second": 14.953, |
| "eval_steps_per_second": 1.87, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.44543120793727886, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001956683343747101, |
| "loss": 1.5172, |
| "step": 3210 |
| }, |
| { |
| "epoch": 0.44681884409907724, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019562321622562754, |
| "loss": 1.6077, |
| "step": 3220 |
| }, |
| { |
| "epoch": 0.4482064802608756, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019557786957593093, |
| "loss": 1.5728, |
| "step": 3230 |
| }, |
| { |
| "epoch": 0.449594116422674, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001955322945339804, |
| "loss": 1.5058, |
| "step": 3240 |
| }, |
| { |
| "epoch": 0.4509817525844724, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019548649120868175, |
| "loss": 1.551, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.45236938874627075, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019544045970948628, |
| "loss": 1.5116, |
| "step": 3260 |
| }, |
| { |
| "epoch": 0.45375702490806913, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019539420014639058, |
| "loss": 1.4633, |
| "step": 3270 |
| }, |
| { |
| "epoch": 0.4551446610698675, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019534771262993622, |
| "loss": 1.5815, |
| "step": 3280 |
| }, |
| { |
| "epoch": 0.4565322972316659, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019530099727120944, |
| "loss": 1.5479, |
| "step": 3290 |
| }, |
| { |
| "epoch": 0.4579199333934642, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019525405418184094, |
| "loss": 1.5511, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.4579199333934642, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 854.7008, |
| "eval_samples_per_second": 14.99, |
| "eval_steps_per_second": 1.874, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.4593075695552626, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019520688347400564, |
| "loss": 1.5653, |
| "step": 3310 |
| }, |
| { |
| "epoch": 0.46069520571706096, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019515948526042237, |
| "loss": 1.5735, |
| "step": 3320 |
| }, |
| { |
| "epoch": 0.46208284187885934, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019511185965435363, |
| "loss": 1.4936, |
| "step": 3330 |
| }, |
| { |
| "epoch": 0.4634704780406577, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019506400676960514, |
| "loss": 1.5324, |
| "step": 3340 |
| }, |
| { |
| "epoch": 0.4648581142024561, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019501592672052596, |
| "loss": 1.5694, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.4662457503642545, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019496761962200777, |
| "loss": 1.5417, |
| "step": 3360 |
| }, |
| { |
| "epoch": 0.46763338652605285, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019491908558948498, |
| "loss": 1.5219, |
| "step": 3370 |
| }, |
| { |
| "epoch": 0.46902102268785123, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019487032473893413, |
| "loss": 1.5854, |
| "step": 3380 |
| }, |
| { |
| "epoch": 0.4704086588496496, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019482133718687388, |
| "loss": 1.5353, |
| "step": 3390 |
| }, |
| { |
| "epoch": 0.471796295011448, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001947721230503645, |
| "loss": 1.5644, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.471796295011448, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 849.4372, |
| "eval_samples_per_second": 15.083, |
| "eval_steps_per_second": 1.886, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.47318393117324636, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019472268244700788, |
| "loss": 1.5277, |
| "step": 3410 |
| }, |
| { |
| "epoch": 0.47457156733504474, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019467301549494685, |
| "loss": 1.4834, |
| "step": 3420 |
| }, |
| { |
| "epoch": 0.4759592034968431, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001946231223128653, |
| "loss": 1.5184, |
| "step": 3430 |
| }, |
| { |
| "epoch": 0.4773468396586415, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019457300301998763, |
| "loss": 1.557, |
| "step": 3440 |
| }, |
| { |
| "epoch": 0.4787344758204399, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019452265773607855, |
| "loss": 1.5981, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.48012211198223825, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001944720865814429, |
| "loss": 1.6308, |
| "step": 3460 |
| }, |
| { |
| "epoch": 0.48150974814403663, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001944212896769251, |
| "loss": 1.5711, |
| "step": 3470 |
| }, |
| { |
| "epoch": 0.482897384305835, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019437026714390915, |
| "loss": 1.5795, |
| "step": 3480 |
| }, |
| { |
| "epoch": 0.4842850204676334, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019431901910431812, |
| "loss": 1.4727, |
| "step": 3490 |
| }, |
| { |
| "epoch": 0.48567265662943176, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019426754568061406, |
| "loss": 1.5256, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.48567265662943176, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 856.6448, |
| "eval_samples_per_second": 14.956, |
| "eval_steps_per_second": 1.87, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.48706029279123014, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019421584699579747, |
| "loss": 1.5841, |
| "step": 3510 |
| }, |
| { |
| "epoch": 0.4884479289530285, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001941639231734072, |
| "loss": 1.5792, |
| "step": 3520 |
| }, |
| { |
| "epoch": 0.4898355651148269, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001941117743375201, |
| "loss": 1.5351, |
| "step": 3530 |
| }, |
| { |
| "epoch": 0.4912232012766253, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019405940061275066, |
| "loss": 1.6002, |
| "step": 3540 |
| }, |
| { |
| "epoch": 0.49261083743842365, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019400680212425077, |
| "loss": 1.5695, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.49399847360022203, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001939539789977095, |
| "loss": 1.6573, |
| "step": 3560 |
| }, |
| { |
| "epoch": 0.4953861097620204, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019390093135935262, |
| "loss": 1.6026, |
| "step": 3570 |
| }, |
| { |
| "epoch": 0.4967737459238188, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001938476593359424, |
| "loss": 1.5125, |
| "step": 3580 |
| }, |
| { |
| "epoch": 0.49816138208561717, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019379416305477734, |
| "loss": 1.4628, |
| "step": 3590 |
| }, |
| { |
| "epoch": 0.49954901824741554, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019374044264369183, |
| "loss": 1.4841, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.49954901824741554, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 853.8457, |
| "eval_samples_per_second": 15.005, |
| "eval_steps_per_second": 1.876, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.5009366544092139, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001936864982310558, |
| "loss": 1.4635, |
| "step": 3610 |
| }, |
| { |
| "epoch": 0.5023242905710122, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019363232994577438, |
| "loss": 1.5569, |
| "step": 3620 |
| }, |
| { |
| "epoch": 0.5037119267328106, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019357793791728787, |
| "loss": 1.5724, |
| "step": 3630 |
| }, |
| { |
| "epoch": 0.505099562894609, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019352332227557105, |
| "loss": 1.593, |
| "step": 3640 |
| }, |
| { |
| "epoch": 0.5064871990564074, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019346848315113314, |
| "loss": 1.516, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.5078748352182058, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019341342067501728, |
| "loss": 1.5541, |
| "step": 3660 |
| }, |
| { |
| "epoch": 0.5092624713800041, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001933581349788005, |
| "loss": 1.5348, |
| "step": 3670 |
| }, |
| { |
| "epoch": 0.5106501075418025, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019330262619459305, |
| "loss": 1.4744, |
| "step": 3680 |
| }, |
| { |
| "epoch": 0.5120377437036009, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001932468944550384, |
| "loss": 1.5221, |
| "step": 3690 |
| }, |
| { |
| "epoch": 0.5134253798653993, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019319093989331277, |
| "loss": 1.597, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.5134253798653993, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 850.2547, |
| "eval_samples_per_second": 15.068, |
| "eval_steps_per_second": 1.884, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.5148130160271976, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001931347626431248, |
| "loss": 1.5977, |
| "step": 3710 |
| }, |
| { |
| "epoch": 0.516200652188996, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019307836283871525, |
| "loss": 1.5051, |
| "step": 3720 |
| }, |
| { |
| "epoch": 0.5175882883507944, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019302174061485675, |
| "loss": 1.4407, |
| "step": 3730 |
| }, |
| { |
| "epoch": 0.5189759245125928, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019296489610685344, |
| "loss": 1.4938, |
| "step": 3740 |
| }, |
| { |
| "epoch": 0.5203635606743912, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001929078294505405, |
| "loss": 1.5282, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.5217511968361895, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001928505407822841, |
| "loss": 1.546, |
| "step": 3760 |
| }, |
| { |
| "epoch": 0.5231388329979879, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019279303023898086, |
| "loss": 1.568, |
| "step": 3770 |
| }, |
| { |
| "epoch": 0.5245264691597863, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001927352979580576, |
| "loss": 1.5893, |
| "step": 3780 |
| }, |
| { |
| "epoch": 0.5259141053215847, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019267734407747095, |
| "loss": 1.5501, |
| "step": 3790 |
| }, |
| { |
| "epoch": 0.527301741483383, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001926191687357072, |
| "loss": 1.5555, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.527301741483383, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 854.0196, |
| "eval_samples_per_second": 15.002, |
| "eval_steps_per_second": 1.876, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.5286893776451814, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019256077207178174, |
| "loss": 1.6144, |
| "step": 3810 |
| }, |
| { |
| "epoch": 0.5300770138069798, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019250215422523883, |
| "loss": 1.6102, |
| "step": 3820 |
| }, |
| { |
| "epoch": 0.5314646499687782, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019244331533615133, |
| "loss": 1.5673, |
| "step": 3830 |
| }, |
| { |
| "epoch": 0.5328522861305766, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019238425554512025, |
| "loss": 1.4782, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.5342399222923749, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001923249749932745, |
| "loss": 1.559, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.5356275584541733, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019226547382227046, |
| "loss": 1.5148, |
| "step": 3860 |
| }, |
| { |
| "epoch": 0.5370151946159717, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019220575217429174, |
| "loss": 1.5362, |
| "step": 3870 |
| }, |
| { |
| "epoch": 0.5384028307777701, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001921458101920489, |
| "loss": 1.5881, |
| "step": 3880 |
| }, |
| { |
| "epoch": 0.5397904669395684, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001920856480187788, |
| "loss": 1.6019, |
| "step": 3890 |
| }, |
| { |
| "epoch": 0.5411781031013668, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001920252657982446, |
| "loss": 1.5708, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.5411781031013668, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 851.0746, |
| "eval_samples_per_second": 15.054, |
| "eval_steps_per_second": 1.882, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.5425657392631652, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001919646636747353, |
| "loss": 1.5765, |
| "step": 3910 |
| }, |
| { |
| "epoch": 0.5439533754249636, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019190384179306526, |
| "loss": 1.5805, |
| "step": 3920 |
| }, |
| { |
| "epoch": 0.545341011586762, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019184280029857417, |
| "loss": 1.5299, |
| "step": 3930 |
| }, |
| { |
| "epoch": 0.5467286477485603, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019178153933712626, |
| "loss": 1.5059, |
| "step": 3940 |
| }, |
| { |
| "epoch": 0.5481162839103587, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019172005905511043, |
| "loss": 1.5024, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.5495039200721571, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001916583595994395, |
| "loss": 1.5994, |
| "step": 3960 |
| }, |
| { |
| "epoch": 0.5508915562339555, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019159644111755005, |
| "loss": 1.5222, |
| "step": 3970 |
| }, |
| { |
| "epoch": 0.5522791923957538, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019153430375740222, |
| "loss": 1.5622, |
| "step": 3980 |
| }, |
| { |
| "epoch": 0.5536668285575522, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001914719476674789, |
| "loss": 1.5364, |
| "step": 3990 |
| }, |
| { |
| "epoch": 0.5550544647193506, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001914093729967859, |
| "loss": 1.5421, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.5550544647193506, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 851.1421, |
| "eval_samples_per_second": 15.053, |
| "eval_steps_per_second": 1.882, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.556442100881149, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019134657989485114, |
| "loss": 1.5533, |
| "step": 4010 |
| }, |
| { |
| "epoch": 0.5578297370429474, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001912835685117247, |
| "loss": 1.5798, |
| "step": 4020 |
| }, |
| { |
| "epoch": 0.5592173732047457, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001912203389979781, |
| "loss": 1.5972, |
| "step": 4030 |
| }, |
| { |
| "epoch": 0.5606050093665441, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019115689150470423, |
| "loss": 1.5845, |
| "step": 4040 |
| }, |
| { |
| "epoch": 0.5619926455283425, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019109322618351678, |
| "loss": 1.5489, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.5633802816901409, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019102934318654998, |
| "loss": 1.5561, |
| "step": 4060 |
| }, |
| { |
| "epoch": 0.5647679178519392, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019096524266645824, |
| "loss": 1.5311, |
| "step": 4070 |
| }, |
| { |
| "epoch": 0.5661555540137376, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019090092477641574, |
| "loss": 1.491, |
| "step": 4080 |
| }, |
| { |
| "epoch": 0.567543190175536, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001908363896701161, |
| "loss": 1.5083, |
| "step": 4090 |
| }, |
| { |
| "epoch": 0.5689308263373344, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019077163750177195, |
| "loss": 1.4945, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.5689308263373344, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 853.7869, |
| "eval_samples_per_second": 15.006, |
| "eval_steps_per_second": 1.876, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.5703184624991328, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019070666842611468, |
| "loss": 1.5692, |
| "step": 4110 |
| }, |
| { |
| "epoch": 0.5717060986609311, |
| "grad_norm": 0.0, |
| "learning_rate": 0.000190641482598394, |
| "loss": 1.4878, |
| "step": 4120 |
| }, |
| { |
| "epoch": 0.5730937348227295, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019057608017437744, |
| "loss": 1.4801, |
| "step": 4130 |
| }, |
| { |
| "epoch": 0.5744813709845279, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019051046131035032, |
| "loss": 1.4968, |
| "step": 4140 |
| }, |
| { |
| "epoch": 0.5758690071463263, |
| "grad_norm": 0.0, |
| "learning_rate": 0.000190444626163115, |
| "loss": 1.5686, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.5772566433081247, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019037857488999067, |
| "loss": 1.5768, |
| "step": 4160 |
| }, |
| { |
| "epoch": 0.578644279469923, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019031230764881313, |
| "loss": 1.5261, |
| "step": 4170 |
| }, |
| { |
| "epoch": 0.5800319156317214, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019024582459793406, |
| "loss": 1.6063, |
| "step": 4180 |
| }, |
| { |
| "epoch": 0.5814195517935198, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019017912589622092, |
| "loss": 1.5674, |
| "step": 4190 |
| }, |
| { |
| "epoch": 0.5828071879553182, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00019011221170305657, |
| "loss": 1.6424, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.5828071879553182, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 856.6655, |
| "eval_samples_per_second": 14.956, |
| "eval_steps_per_second": 1.87, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.5841948241171165, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001900450821783387, |
| "loss": 1.5284, |
| "step": 4210 |
| }, |
| { |
| "epoch": 0.5855824602789149, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018997773748247955, |
| "loss": 1.5112, |
| "step": 4220 |
| }, |
| { |
| "epoch": 0.5869700964407133, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018991017777640555, |
| "loss": 1.5168, |
| "step": 4230 |
| }, |
| { |
| "epoch": 0.5883577326025117, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018984240322155702, |
| "loss": 1.5474, |
| "step": 4240 |
| }, |
| { |
| "epoch": 0.58974536876431, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001897744139798875, |
| "loss": 1.5246, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.5911330049261084, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018970621021386372, |
| "loss": 1.5666, |
| "step": 4260 |
| }, |
| { |
| "epoch": 0.5925206410879067, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001896377920864649, |
| "loss": 1.5671, |
| "step": 4270 |
| }, |
| { |
| "epoch": 0.5939082772497051, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018956915976118256, |
| "loss": 1.5047, |
| "step": 4280 |
| }, |
| { |
| "epoch": 0.5952959134115035, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018950031340202007, |
| "loss": 1.5048, |
| "step": 4290 |
| }, |
| { |
| "epoch": 0.5966835495733018, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018943125317349226, |
| "loss": 1.4808, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.5966835495733018, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 848.2106, |
| "eval_samples_per_second": 15.105, |
| "eval_steps_per_second": 1.889, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.5980711857351002, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018936197924062493, |
| "loss": 1.6047, |
| "step": 4310 |
| }, |
| { |
| "epoch": 0.5994588218968986, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001892924917689547, |
| "loss": 1.5637, |
| "step": 4320 |
| }, |
| { |
| "epoch": 0.600846458058697, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018922279092452836, |
| "loss": 1.5631, |
| "step": 4330 |
| }, |
| { |
| "epoch": 0.6022340942204953, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018915287687390256, |
| "loss": 1.5161, |
| "step": 4340 |
| }, |
| { |
| "epoch": 0.6036217303822937, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018908274978414344, |
| "loss": 1.5428, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.6050093665440921, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001890124098228263, |
| "loss": 1.4677, |
| "step": 4360 |
| }, |
| { |
| "epoch": 0.6063970027058905, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018894185715803504, |
| "loss": 1.558, |
| "step": 4370 |
| }, |
| { |
| "epoch": 0.6077846388676889, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018887109195836184, |
| "loss": 1.5269, |
| "step": 4380 |
| }, |
| { |
| "epoch": 0.6091722750294872, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018880011439290675, |
| "loss": 1.5649, |
| "step": 4390 |
| }, |
| { |
| "epoch": 0.6105599111912856, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018872892463127726, |
| "loss": 1.6088, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.6105599111912856, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 854.5624, |
| "eval_samples_per_second": 14.992, |
| "eval_steps_per_second": 1.875, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.611947547353084, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018865752284358807, |
| "loss": 1.5703, |
| "step": 4410 |
| }, |
| { |
| "epoch": 0.6133351835148824, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018858590920046032, |
| "loss": 1.627, |
| "step": 4420 |
| }, |
| { |
| "epoch": 0.6147228196766807, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018851408387302154, |
| "loss": 1.5132, |
| "step": 4430 |
| }, |
| { |
| "epoch": 0.6161104558384791, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001884420470329051, |
| "loss": 1.5088, |
| "step": 4440 |
| }, |
| { |
| "epoch": 0.6174980920002775, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018836979885224968, |
| "loss": 1.5003, |
| "step": 4450 |
| }, |
| { |
| "epoch": 0.6188857281620759, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018829733950369914, |
| "loss": 1.5189, |
| "step": 4460 |
| }, |
| { |
| "epoch": 0.6202733643238743, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018822466916040183, |
| "loss": 1.4951, |
| "step": 4470 |
| }, |
| { |
| "epoch": 0.6216610004856726, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018815178799601036, |
| "loss": 1.5847, |
| "step": 4480 |
| }, |
| { |
| "epoch": 0.623048636647471, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018807869618468103, |
| "loss": 1.5797, |
| "step": 4490 |
| }, |
| { |
| "epoch": 0.6244362728092694, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001880053939010736, |
| "loss": 1.5155, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.6244362728092694, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 849.8599, |
| "eval_samples_per_second": 15.075, |
| "eval_steps_per_second": 1.885, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.6258239089710678, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018793188132035072, |
| "loss": 1.4795, |
| "step": 4510 |
| }, |
| { |
| "epoch": 0.6272115451328661, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018785815861817762, |
| "loss": 1.5742, |
| "step": 4520 |
| }, |
| { |
| "epoch": 0.6285991812946645, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018778422597072147, |
| "loss": 1.6734, |
| "step": 4530 |
| }, |
| { |
| "epoch": 0.6299868174564629, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018771008355465135, |
| "loss": 1.5631, |
| "step": 4540 |
| }, |
| { |
| "epoch": 0.6313744536182613, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018763573154713744, |
| "loss": 1.5465, |
| "step": 4550 |
| }, |
| { |
| "epoch": 0.6327620897800597, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018756117012585084, |
| "loss": 1.5793, |
| "step": 4560 |
| }, |
| { |
| "epoch": 0.634149725941858, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018748639946896304, |
| "loss": 1.5554, |
| "step": 4570 |
| }, |
| { |
| "epoch": 0.6355373621036564, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018741141975514545, |
| "loss": 1.5383, |
| "step": 4580 |
| }, |
| { |
| "epoch": 0.6369249982654548, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018733623116356919, |
| "loss": 1.5009, |
| "step": 4590 |
| }, |
| { |
| "epoch": 0.6383126344272532, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018726083387390435, |
| "loss": 1.5424, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.6383126344272532, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 858.6756, |
| "eval_samples_per_second": 14.921, |
| "eval_steps_per_second": 1.866, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.6397002705890515, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001871852280663199, |
| "loss": 1.5264, |
| "step": 4610 |
| }, |
| { |
| "epoch": 0.6410879067508499, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001871094139214829, |
| "loss": 1.5278, |
| "step": 4620 |
| }, |
| { |
| "epoch": 0.6424755429126483, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018703339162055838, |
| "loss": 1.5058, |
| "step": 4630 |
| }, |
| { |
| "epoch": 0.6438631790744467, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001869571613452087, |
| "loss": 1.5311, |
| "step": 4640 |
| }, |
| { |
| "epoch": 0.6452508152362451, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018688072327759328, |
| "loss": 1.4861, |
| "step": 4650 |
| }, |
| { |
| "epoch": 0.6466384513980434, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018680407760036797, |
| "loss": 1.5559, |
| "step": 4660 |
| }, |
| { |
| "epoch": 0.6480260875598418, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001867272244966848, |
| "loss": 1.5407, |
| "step": 4670 |
| }, |
| { |
| "epoch": 0.6494137237216402, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018665016415019147, |
| "loss": 1.5442, |
| "step": 4680 |
| }, |
| { |
| "epoch": 0.6508013598834386, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018657289674503085, |
| "loss": 1.458, |
| "step": 4690 |
| }, |
| { |
| "epoch": 0.652188996045237, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018649542246584067, |
| "loss": 1.5634, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.652188996045237, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 854.275, |
| "eval_samples_per_second": 14.998, |
| "eval_steps_per_second": 1.875, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.6535766322070353, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001864177414977529, |
| "loss": 1.5132, |
| "step": 4710 |
| }, |
| { |
| "epoch": 0.6549642683688337, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018633985402639351, |
| "loss": 1.5802, |
| "step": 4720 |
| }, |
| { |
| "epoch": 0.6563519045306321, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001862617602378819, |
| "loss": 1.5437, |
| "step": 4730 |
| }, |
| { |
| "epoch": 0.6577395406924305, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001861834603188305, |
| "loss": 1.4459, |
| "step": 4740 |
| }, |
| { |
| "epoch": 0.6591271768542288, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018610495445634423, |
| "loss": 1.5517, |
| "step": 4750 |
| }, |
| { |
| "epoch": 0.6605148130160272, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018602624283802022, |
| "loss": 1.5395, |
| "step": 4760 |
| }, |
| { |
| "epoch": 0.6619024491778256, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018594732565194722, |
| "loss": 1.5535, |
| "step": 4770 |
| }, |
| { |
| "epoch": 0.663290085339624, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018586820308670525, |
| "loss": 1.518, |
| "step": 4780 |
| }, |
| { |
| "epoch": 0.6646777215014223, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018578887533136505, |
| "loss": 1.5607, |
| "step": 4790 |
| }, |
| { |
| "epoch": 0.6660653576632207, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018570934257548772, |
| "loss": 1.5749, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.6660653576632207, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 851.7238, |
| "eval_samples_per_second": 15.042, |
| "eval_steps_per_second": 1.881, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.6674529938250191, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018562960500912424, |
| "loss": 1.5956, |
| "step": 4810 |
| }, |
| { |
| "epoch": 0.6688406299868175, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001855496628228149, |
| "loss": 1.6314, |
| "step": 4820 |
| }, |
| { |
| "epoch": 0.6702282661486159, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018546951620758913, |
| "loss": 1.5407, |
| "step": 4830 |
| }, |
| { |
| "epoch": 0.6716159023104142, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018538916535496472, |
| "loss": 1.4737, |
| "step": 4840 |
| }, |
| { |
| "epoch": 0.6730035384722126, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018530861045694752, |
| "loss": 1.5509, |
| "step": 4850 |
| }, |
| { |
| "epoch": 0.674391174634011, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018522785170603097, |
| "loss": 1.5355, |
| "step": 4860 |
| }, |
| { |
| "epoch": 0.6757788107958094, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001851468892951957, |
| "loss": 1.5318, |
| "step": 4870 |
| }, |
| { |
| "epoch": 0.6771664469576077, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018506572341790898, |
| "loss": 1.5004, |
| "step": 4880 |
| }, |
| { |
| "epoch": 0.6785540831194061, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018498435426812418, |
| "loss": 1.4296, |
| "step": 4890 |
| }, |
| { |
| "epoch": 0.6799417192812045, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018490278204028057, |
| "loss": 1.5449, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.6799417192812045, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 851.692, |
| "eval_samples_per_second": 15.043, |
| "eval_steps_per_second": 1.881, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.6813293554430029, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001848210069293026, |
| "loss": 1.5653, |
| "step": 4910 |
| }, |
| { |
| "epoch": 0.6827169916048013, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018473902913059947, |
| "loss": 1.5689, |
| "step": 4920 |
| }, |
| { |
| "epoch": 0.6841046277665996, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018465684884006484, |
| "loss": 1.602, |
| "step": 4930 |
| }, |
| { |
| "epoch": 0.685492263928398, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018457446625407627, |
| "loss": 1.5003, |
| "step": 4940 |
| }, |
| { |
| "epoch": 0.6868799000901964, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018449188156949452, |
| "loss": 1.5839, |
| "step": 4950 |
| }, |
| { |
| "epoch": 0.6882675362519948, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001844090949836635, |
| "loss": 1.5864, |
| "step": 4960 |
| }, |
| { |
| "epoch": 0.6896551724137931, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018432610669440948, |
| "loss": 1.5096, |
| "step": 4970 |
| }, |
| { |
| "epoch": 0.6910428085755915, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018424291690004072, |
| "loss": 1.5729, |
| "step": 4980 |
| }, |
| { |
| "epoch": 0.6924304447373899, |
| "grad_norm": 0.0, |
| "learning_rate": 0.000184159525799347, |
| "loss": 1.5004, |
| "step": 4990 |
| }, |
| { |
| "epoch": 0.6938180808991883, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018407593359159917, |
| "loss": 1.5744, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.6938180808991883, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 852.3835, |
| "eval_samples_per_second": 15.031, |
| "eval_steps_per_second": 1.879, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.6952057170609867, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018399214047654864, |
| "loss": 1.5931, |
| "step": 5010 |
| }, |
| { |
| "epoch": 0.696593353222785, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018390814665442687, |
| "loss": 1.5866, |
| "step": 5020 |
| }, |
| { |
| "epoch": 0.6979809893845834, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018382395232594497, |
| "loss": 1.5144, |
| "step": 5030 |
| }, |
| { |
| "epoch": 0.6993686255463817, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018373955769229313, |
| "loss": 1.5864, |
| "step": 5040 |
| }, |
| { |
| "epoch": 0.7007562617081801, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001836549629551402, |
| "loss": 1.5325, |
| "step": 5050 |
| }, |
| { |
| "epoch": 0.7021438978699784, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018357016831663326, |
| "loss": 1.5071, |
| "step": 5060 |
| }, |
| { |
| "epoch": 0.7035315340317768, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018348517397939702, |
| "loss": 1.5151, |
| "step": 5070 |
| }, |
| { |
| "epoch": 0.7049191701935752, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018339998014653338, |
| "loss": 1.5249, |
| "step": 5080 |
| }, |
| { |
| "epoch": 0.7063068063553736, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018331458702162094, |
| "loss": 1.5084, |
| "step": 5090 |
| }, |
| { |
| "epoch": 0.707694442517172, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018322899480871461, |
| "loss": 1.4721, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.707694442517172, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 853.7906, |
| "eval_samples_per_second": 15.006, |
| "eval_steps_per_second": 1.876, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.7090820786789703, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018314320371234493, |
| "loss": 1.5513, |
| "step": 5110 |
| }, |
| { |
| "epoch": 0.7104697148407687, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018305721393751777, |
| "loss": 1.5281, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.7118573510025671, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018297102568971376, |
| "loss": 1.5382, |
| "step": 5130 |
| }, |
| { |
| "epoch": 0.7132449871643655, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018288463917488773, |
| "loss": 1.5517, |
| "step": 5140 |
| }, |
| { |
| "epoch": 0.7146326233261638, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018279805459946836, |
| "loss": 1.536, |
| "step": 5150 |
| }, |
| { |
| "epoch": 0.7160202594879622, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001827112721703576, |
| "loss": 1.5671, |
| "step": 5160 |
| }, |
| { |
| "epoch": 0.7174078956497606, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018262429209493013, |
| "loss": 1.4578, |
| "step": 5170 |
| }, |
| { |
| "epoch": 0.718795531811559, |
| "grad_norm": 0.0, |
| "learning_rate": 0.000182537114581033, |
| "loss": 1.5988, |
| "step": 5180 |
| }, |
| { |
| "epoch": 0.7201831679733574, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018244973983698505, |
| "loss": 1.5489, |
| "step": 5190 |
| }, |
| { |
| "epoch": 0.7215708041351557, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018236216807157636, |
| "loss": 1.4934, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.7215708041351557, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 853.1134, |
| "eval_samples_per_second": 15.018, |
| "eval_steps_per_second": 1.878, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.7229584402969541, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018227439949406786, |
| "loss": 1.5401, |
| "step": 5210 |
| }, |
| { |
| "epoch": 0.7243460764587525, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018218643431419077, |
| "loss": 1.6144, |
| "step": 5220 |
| }, |
| { |
| "epoch": 0.7257337126205509, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018209827274214612, |
| "loss": 1.5523, |
| "step": 5230 |
| }, |
| { |
| "epoch": 0.7271213487823492, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018200991498860424, |
| "loss": 1.5574, |
| "step": 5240 |
| }, |
| { |
| "epoch": 0.7285089849441476, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001819213612647042, |
| "loss": 1.5118, |
| "step": 5250 |
| }, |
| { |
| "epoch": 0.729896621105946, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018183261178205345, |
| "loss": 1.5509, |
| "step": 5260 |
| }, |
| { |
| "epoch": 0.7312842572677444, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001817436667527271, |
| "loss": 1.5251, |
| "step": 5270 |
| }, |
| { |
| "epoch": 0.7326718934295428, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001816545263892677, |
| "loss": 1.6438, |
| "step": 5280 |
| }, |
| { |
| "epoch": 0.7340595295913411, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001815651909046845, |
| "loss": 1.5911, |
| "step": 5290 |
| }, |
| { |
| "epoch": 0.7354471657531395, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018147566051245287, |
| "loss": 1.5005, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.7354471657531395, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 860.5659, |
| "eval_samples_per_second": 14.888, |
| "eval_steps_per_second": 1.862, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.7368348019149379, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001813859354265141, |
| "loss": 1.482, |
| "step": 5310 |
| }, |
| { |
| "epoch": 0.7382224380767363, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018129601586127476, |
| "loss": 1.4996, |
| "step": 5320 |
| }, |
| { |
| "epoch": 0.7396100742385346, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018120590203160594, |
| "loss": 1.6339, |
| "step": 5330 |
| }, |
| { |
| "epoch": 0.740997710400333, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001811155941528431, |
| "loss": 1.4808, |
| "step": 5340 |
| }, |
| { |
| "epoch": 0.7423853465621314, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018102509244078538, |
| "loss": 1.4793, |
| "step": 5350 |
| }, |
| { |
| "epoch": 0.7437729827239298, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018093439711169506, |
| "loss": 1.571, |
| "step": 5360 |
| }, |
| { |
| "epoch": 0.7451606188857282, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018084350838229712, |
| "loss": 1.5976, |
| "step": 5370 |
| }, |
| { |
| "epoch": 0.7465482550475265, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018075242646977863, |
| "loss": 1.5838, |
| "step": 5380 |
| }, |
| { |
| "epoch": 0.7479358912093249, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018066115159178842, |
| "loss": 1.5638, |
| "step": 5390 |
| }, |
| { |
| "epoch": 0.7493235273711233, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018056968396643628, |
| "loss": 1.5642, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.7493235273711233, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 851.0477, |
| "eval_samples_per_second": 15.054, |
| "eval_steps_per_second": 1.882, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.7507111635329217, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018047802381229263, |
| "loss": 1.6306, |
| "step": 5410 |
| }, |
| { |
| "epoch": 0.75209879969472, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018038617134838805, |
| "loss": 1.5442, |
| "step": 5420 |
| }, |
| { |
| "epoch": 0.7534864358565184, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018029412679421253, |
| "loss": 1.5067, |
| "step": 5430 |
| }, |
| { |
| "epoch": 0.7548740720183168, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00018020189036971516, |
| "loss": 1.5358, |
| "step": 5440 |
| }, |
| { |
| "epoch": 0.7562617081801152, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001801094622953035, |
| "loss": 1.4645, |
| "step": 5450 |
| }, |
| { |
| "epoch": 0.7576493443419136, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001800168427918431, |
| "loss": 1.4765, |
| "step": 5460 |
| }, |
| { |
| "epoch": 0.7590369805037119, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017992403208065685, |
| "loss": 1.5688, |
| "step": 5470 |
| }, |
| { |
| "epoch": 0.7604246166655103, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017983103038352467, |
| "loss": 1.5129, |
| "step": 5480 |
| }, |
| { |
| "epoch": 0.7618122528273087, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017973783792268278, |
| "loss": 1.5572, |
| "step": 5490 |
| }, |
| { |
| "epoch": 0.7631998889891071, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017964445492082333, |
| "loss": 1.572, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.7631998889891071, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 852.5786, |
| "eval_samples_per_second": 15.027, |
| "eval_steps_per_second": 1.879, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.7645875251509054, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001795508816010937, |
| "loss": 1.4957, |
| "step": 5510 |
| }, |
| { |
| "epoch": 0.7659751613127038, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001794571181870961, |
| "loss": 1.5166, |
| "step": 5520 |
| }, |
| { |
| "epoch": 0.7673627974745022, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017936316490288693, |
| "loss": 1.5996, |
| "step": 5530 |
| }, |
| { |
| "epoch": 0.7687504336363006, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017926902197297636, |
| "loss": 1.5391, |
| "step": 5540 |
| }, |
| { |
| "epoch": 0.770138069798099, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017917468962232777, |
| "loss": 1.5454, |
| "step": 5550 |
| }, |
| { |
| "epoch": 0.7715257059598973, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017908016807635706, |
| "loss": 1.5633, |
| "step": 5560 |
| }, |
| { |
| "epoch": 0.7729133421216957, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017898545756093233, |
| "loss": 1.5768, |
| "step": 5570 |
| }, |
| { |
| "epoch": 0.7743009782834941, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017889055830237326, |
| "loss": 1.5428, |
| "step": 5580 |
| }, |
| { |
| "epoch": 0.7756886144452925, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017879547052745043, |
| "loss": 1.5687, |
| "step": 5590 |
| }, |
| { |
| "epoch": 0.7770762506070908, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017870019446338496, |
| "loss": 1.5115, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.7770762506070908, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 850.6758, |
| "eval_samples_per_second": 15.061, |
| "eval_steps_per_second": 1.883, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.7784638867688892, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017860473033784796, |
| "loss": 1.5663, |
| "step": 5610 |
| }, |
| { |
| "epoch": 0.7798515229306876, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001785090783789599, |
| "loss": 1.4973, |
| "step": 5620 |
| }, |
| { |
| "epoch": 0.781239159092486, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017841323881529, |
| "loss": 1.5756, |
| "step": 5630 |
| }, |
| { |
| "epoch": 0.7826267952542844, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017831721187585594, |
| "loss": 1.4844, |
| "step": 5640 |
| }, |
| { |
| "epoch": 0.7840144314160827, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017822099779012297, |
| "loss": 1.5505, |
| "step": 5650 |
| }, |
| { |
| "epoch": 0.7854020675778811, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017812459678800374, |
| "loss": 1.593, |
| "step": 5660 |
| }, |
| { |
| "epoch": 0.7867897037396795, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001780280090998574, |
| "loss": 1.5968, |
| "step": 5670 |
| }, |
| { |
| "epoch": 0.7881773399014779, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017793123495648926, |
| "loss": 1.5763, |
| "step": 5680 |
| }, |
| { |
| "epoch": 0.7895649760632762, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017783427458915022, |
| "loss": 1.5819, |
| "step": 5690 |
| }, |
| { |
| "epoch": 0.7909526122250746, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017773712822953612, |
| "loss": 1.5361, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.7909526122250746, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 855.1786, |
| "eval_samples_per_second": 14.982, |
| "eval_steps_per_second": 1.873, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.792340248386873, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017763979610978728, |
| "loss": 1.5174, |
| "step": 5710 |
| }, |
| { |
| "epoch": 0.7937278845486714, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017754227846248784, |
| "loss": 1.4874, |
| "step": 5720 |
| }, |
| { |
| "epoch": 0.7951155207104698, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017744457552066538, |
| "loss": 1.5176, |
| "step": 5730 |
| }, |
| { |
| "epoch": 0.7965031568722681, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017734668751779025, |
| "loss": 1.5795, |
| "step": 5740 |
| }, |
| { |
| "epoch": 0.7978907930340665, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001772486146877749, |
| "loss": 1.505, |
| "step": 5750 |
| }, |
| { |
| "epoch": 0.7992784291958649, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001771503572649736, |
| "loss": 1.4841, |
| "step": 5760 |
| }, |
| { |
| "epoch": 0.8006660653576633, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001770519154841816, |
| "loss": 1.444, |
| "step": 5770 |
| }, |
| { |
| "epoch": 0.8020537015194616, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017695328958063477, |
| "loss": 1.585, |
| "step": 5780 |
| }, |
| { |
| "epoch": 0.80344133768126, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001768544797900089, |
| "loss": 1.5706, |
| "step": 5790 |
| }, |
| { |
| "epoch": 0.8048289738430584, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017675548634841923, |
| "loss": 1.6169, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.8048289738430584, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 854.03, |
| "eval_samples_per_second": 15.002, |
| "eval_steps_per_second": 1.876, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.8062166100048568, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017665630949241984, |
| "loss": 1.6402, |
| "step": 5810 |
| }, |
| { |
| "epoch": 0.807604246166655, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001765569494590031, |
| "loss": 1.5333, |
| "step": 5820 |
| }, |
| { |
| "epoch": 0.8089918823284534, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017645740648559912, |
| "loss": 1.4236, |
| "step": 5830 |
| }, |
| { |
| "epoch": 0.8103795184902518, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001763576808100751, |
| "loss": 1.5217, |
| "step": 5840 |
| }, |
| { |
| "epoch": 0.8117671546520502, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017625777267073488, |
| "loss": 1.521, |
| "step": 5850 |
| }, |
| { |
| "epoch": 0.8131547908138486, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017615768230631832, |
| "loss": 1.5287, |
| "step": 5860 |
| }, |
| { |
| "epoch": 0.8145424269756469, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001760574099560007, |
| "loss": 1.6059, |
| "step": 5870 |
| }, |
| { |
| "epoch": 0.8159300631374453, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017595695585939213, |
| "loss": 1.5461, |
| "step": 5880 |
| }, |
| { |
| "epoch": 0.8173176992992437, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017585632025653711, |
| "loss": 1.5032, |
| "step": 5890 |
| }, |
| { |
| "epoch": 0.8187053354610421, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001757555033879138, |
| "loss": 1.627, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.8187053354610421, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 851.9761, |
| "eval_samples_per_second": 15.038, |
| "eval_steps_per_second": 1.88, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.8200929716228404, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017565450549443355, |
| "loss": 1.5245, |
| "step": 5910 |
| }, |
| { |
| "epoch": 0.8214806077846388, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017555332681744022, |
| "loss": 1.5135, |
| "step": 5920 |
| }, |
| { |
| "epoch": 0.8228682439464372, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017545196759870976, |
| "loss": 1.6013, |
| "step": 5930 |
| }, |
| { |
| "epoch": 0.8242558801082356, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001753504280804495, |
| "loss": 1.5353, |
| "step": 5940 |
| }, |
| { |
| "epoch": 0.825643516270034, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001752487085052976, |
| "loss": 1.5625, |
| "step": 5950 |
| }, |
| { |
| "epoch": 0.8270311524318323, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001751468091163225, |
| "loss": 1.5264, |
| "step": 5960 |
| }, |
| { |
| "epoch": 0.8284187885936307, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001750447301570223, |
| "loss": 1.5596, |
| "step": 5970 |
| }, |
| { |
| "epoch": 0.8298064247554291, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017494247187132422, |
| "loss": 1.5619, |
| "step": 5980 |
| }, |
| { |
| "epoch": 0.8311940609172275, |
| "grad_norm": 0.0, |
| "learning_rate": 0.000174840034503584, |
| "loss": 1.5473, |
| "step": 5990 |
| }, |
| { |
| "epoch": 0.8325816970790259, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001747374182985853, |
| "loss": 1.5031, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.8325816970790259, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 852.2783, |
| "eval_samples_per_second": 15.033, |
| "eval_steps_per_second": 1.88, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.8339693332408242, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017463462350153912, |
| "loss": 1.5695, |
| "step": 6010 |
| }, |
| { |
| "epoch": 0.8353569694026226, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001745316503580833, |
| "loss": 1.4705, |
| "step": 6020 |
| }, |
| { |
| "epoch": 0.836744605564421, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017442849911428172, |
| "loss": 1.5375, |
| "step": 6030 |
| }, |
| { |
| "epoch": 0.8381322417262194, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017432517001662393, |
| "loss": 1.5085, |
| "step": 6040 |
| }, |
| { |
| "epoch": 0.8395198778880177, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001742216633120245, |
| "loss": 1.6365, |
| "step": 6050 |
| }, |
| { |
| "epoch": 0.8409075140498161, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017411797924782237, |
| "loss": 1.5646, |
| "step": 6060 |
| }, |
| { |
| "epoch": 0.8422951502116145, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017401411807178028, |
| "loss": 1.5703, |
| "step": 6070 |
| }, |
| { |
| "epoch": 0.8436827863734129, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017391008003208422, |
| "loss": 1.5321, |
| "step": 6080 |
| }, |
| { |
| "epoch": 0.8450704225352113, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017380586537734286, |
| "loss": 1.5316, |
| "step": 6090 |
| }, |
| { |
| "epoch": 0.8464580586970096, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017370147435658682, |
| "loss": 1.5733, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.8464580586970096, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 849.0242, |
| "eval_samples_per_second": 15.09, |
| "eval_steps_per_second": 1.887, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.847845694858808, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001735969072192682, |
| "loss": 1.5344, |
| "step": 6110 |
| }, |
| { |
| "epoch": 0.8492333310206064, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017349216421525993, |
| "loss": 1.5371, |
| "step": 6120 |
| }, |
| { |
| "epoch": 0.8506209671824048, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017338724559485527, |
| "loss": 1.5302, |
| "step": 6130 |
| }, |
| { |
| "epoch": 0.8520086033442031, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017328215160876702, |
| "loss": 1.5218, |
| "step": 6140 |
| }, |
| { |
| "epoch": 0.8533962395060015, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017317688250812708, |
| "loss": 1.5614, |
| "step": 6150 |
| }, |
| { |
| "epoch": 0.8547838756677999, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017307143854448587, |
| "loss": 1.527, |
| "step": 6160 |
| }, |
| { |
| "epoch": 0.8561715118295983, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017296581996981153, |
| "loss": 1.531, |
| "step": 6170 |
| }, |
| { |
| "epoch": 0.8575591479913967, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017286002703648955, |
| "loss": 1.5872, |
| "step": 6180 |
| }, |
| { |
| "epoch": 0.858946784153195, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017275405999732203, |
| "loss": 1.5583, |
| "step": 6190 |
| }, |
| { |
| "epoch": 0.8603344203149934, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017264791910552712, |
| "loss": 1.5403, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.8603344203149934, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 863.4297, |
| "eval_samples_per_second": 14.838, |
| "eval_steps_per_second": 1.855, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.8617220564767918, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001725416046147384, |
| "loss": 1.5541, |
| "step": 6210 |
| }, |
| { |
| "epoch": 0.8631096926385902, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017243511677900424, |
| "loss": 1.5928, |
| "step": 6220 |
| }, |
| { |
| "epoch": 0.8644973288003885, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017232845585278734, |
| "loss": 1.551, |
| "step": 6230 |
| }, |
| { |
| "epoch": 0.8658849649621869, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001722216220909639, |
| "loss": 1.4659, |
| "step": 6240 |
| }, |
| { |
| "epoch": 0.8672726011239853, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017211461574882325, |
| "loss": 1.5259, |
| "step": 6250 |
| }, |
| { |
| "epoch": 0.8686602372857837, |
| "grad_norm": 0.0, |
| "learning_rate": 0.000172007437082067, |
| "loss": 1.4713, |
| "step": 6260 |
| }, |
| { |
| "epoch": 0.870047873447582, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017190008634680861, |
| "loss": 1.5781, |
| "step": 6270 |
| }, |
| { |
| "epoch": 0.8714355096093804, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017179256379957266, |
| "loss": 1.5383, |
| "step": 6280 |
| }, |
| { |
| "epoch": 0.8728231457711788, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001716848696972944, |
| "loss": 1.5393, |
| "step": 6290 |
| }, |
| { |
| "epoch": 0.8742107819329772, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001715770042973189, |
| "loss": 1.5745, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.8742107819329772, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 856.7944, |
| "eval_samples_per_second": 14.953, |
| "eval_steps_per_second": 1.87, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.8755984180947756, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017146896785740062, |
| "loss": 1.5361, |
| "step": 6310 |
| }, |
| { |
| "epoch": 0.8769860542565739, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017136076063570274, |
| "loss": 1.514, |
| "step": 6320 |
| }, |
| { |
| "epoch": 0.8783736904183723, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017125238289079654, |
| "loss": 1.5195, |
| "step": 6330 |
| }, |
| { |
| "epoch": 0.8797613265801707, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017114383488166075, |
| "loss": 1.5063, |
| "step": 6340 |
| }, |
| { |
| "epoch": 0.8811489627419691, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017103511686768102, |
| "loss": 1.5504, |
| "step": 6350 |
| }, |
| { |
| "epoch": 0.8825365989037675, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017092622910864916, |
| "loss": 1.477, |
| "step": 6360 |
| }, |
| { |
| "epoch": 0.8839242350655658, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017081717186476268, |
| "loss": 1.5396, |
| "step": 6370 |
| }, |
| { |
| "epoch": 0.8853118712273642, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017070794539662408, |
| "loss": 1.6266, |
| "step": 6380 |
| }, |
| { |
| "epoch": 0.8866995073891626, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017059854996524017, |
| "loss": 1.4967, |
| "step": 6390 |
| }, |
| { |
| "epoch": 0.888087143550961, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001704889858320216, |
| "loss": 1.522, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.888087143550961, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 862.0033, |
| "eval_samples_per_second": 14.863, |
| "eval_steps_per_second": 1.858, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.8894747797127593, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017037925325878205, |
| "loss": 1.5414, |
| "step": 6410 |
| }, |
| { |
| "epoch": 0.8908624158745577, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017026935250773784, |
| "loss": 1.5846, |
| "step": 6420 |
| }, |
| { |
| "epoch": 0.8922500520363561, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00017015928384150702, |
| "loss": 1.5889, |
| "step": 6430 |
| }, |
| { |
| "epoch": 0.8936376881981545, |
| "grad_norm": 0.0, |
| "learning_rate": 0.000170049047523109, |
| "loss": 1.555, |
| "step": 6440 |
| }, |
| { |
| "epoch": 0.8950253243599529, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016993864381596374, |
| "loss": 1.5805, |
| "step": 6450 |
| }, |
| { |
| "epoch": 0.8964129605217512, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016982807298389124, |
| "loss": 1.5322, |
| "step": 6460 |
| }, |
| { |
| "epoch": 0.8978005966835496, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016971733529111088, |
| "loss": 1.5423, |
| "step": 6470 |
| }, |
| { |
| "epoch": 0.899188232845348, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001696064310022406, |
| "loss": 1.5465, |
| "step": 6480 |
| }, |
| { |
| "epoch": 0.9005758690071464, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001694953603822967, |
| "loss": 1.5719, |
| "step": 6490 |
| }, |
| { |
| "epoch": 0.9019635051689447, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016938412369669272, |
| "loss": 1.5242, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.9019635051689447, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 853.5172, |
| "eval_samples_per_second": 15.011, |
| "eval_steps_per_second": 1.877, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.9033511413307431, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016927272121123918, |
| "loss": 1.5607, |
| "step": 6510 |
| }, |
| { |
| "epoch": 0.9047387774925415, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001691611531921427, |
| "loss": 1.6793, |
| "step": 6520 |
| }, |
| { |
| "epoch": 0.9061264136543399, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001690494199060055, |
| "loss": 1.5096, |
| "step": 6530 |
| }, |
| { |
| "epoch": 0.9075140498161383, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016893752161982467, |
| "loss": 1.5335, |
| "step": 6540 |
| }, |
| { |
| "epoch": 0.9089016859779366, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016882545860099173, |
| "loss": 1.5678, |
| "step": 6550 |
| }, |
| { |
| "epoch": 0.910289322139735, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016871323111729161, |
| "loss": 1.5634, |
| "step": 6560 |
| }, |
| { |
| "epoch": 0.9116769583015334, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016860083943690248, |
| "loss": 1.5648, |
| "step": 6570 |
| }, |
| { |
| "epoch": 0.9130645944633318, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001684882838283947, |
| "loss": 1.4455, |
| "step": 6580 |
| }, |
| { |
| "epoch": 0.91445223062513, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016837556456073048, |
| "loss": 1.5331, |
| "step": 6590 |
| }, |
| { |
| "epoch": 0.9158398667869284, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016826268190326296, |
| "loss": 1.5532, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.9158398667869284, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 858.7079, |
| "eval_samples_per_second": 14.92, |
| "eval_steps_per_second": 1.866, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.9172275029487268, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001681496361257359, |
| "loss": 1.5508, |
| "step": 6610 |
| }, |
| { |
| "epoch": 0.9186151391105252, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016803642749828267, |
| "loss": 1.5596, |
| "step": 6620 |
| }, |
| { |
| "epoch": 0.9200027752723235, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001679230562914259, |
| "loss": 1.5676, |
| "step": 6630 |
| }, |
| { |
| "epoch": 0.9213904114341219, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001678095227760767, |
| "loss": 1.5076, |
| "step": 6640 |
| }, |
| { |
| "epoch": 0.9227780475959203, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016769582722353402, |
| "loss": 1.5471, |
| "step": 6650 |
| }, |
| { |
| "epoch": 0.9241656837577187, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016758196990548395, |
| "loss": 1.5696, |
| "step": 6660 |
| }, |
| { |
| "epoch": 0.9255533199195171, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001674679510939993, |
| "loss": 1.5948, |
| "step": 6670 |
| }, |
| { |
| "epoch": 0.9269409560813154, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001673537710615386, |
| "loss": 1.507, |
| "step": 6680 |
| }, |
| { |
| "epoch": 0.9283285922431138, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016723943008094573, |
| "loss": 1.549, |
| "step": 6690 |
| }, |
| { |
| "epoch": 0.9297162284049122, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016712492842544914, |
| "loss": 1.5729, |
| "step": 6700 |
| }, |
| { |
| "epoch": 0.9297162284049122, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 855.2661, |
| "eval_samples_per_second": 14.98, |
| "eval_steps_per_second": 1.873, |
| "step": 6700 |
| }, |
| { |
| "epoch": 0.9311038645667106, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016701026636866127, |
| "loss": 1.5305, |
| "step": 6710 |
| }, |
| { |
| "epoch": 0.932491500728509, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016689544418457778, |
| "loss": 1.5285, |
| "step": 6720 |
| }, |
| { |
| "epoch": 0.9338791368903073, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016678046214757707, |
| "loss": 1.546, |
| "step": 6730 |
| }, |
| { |
| "epoch": 0.9352667730521057, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016666532053241943, |
| "loss": 1.5479, |
| "step": 6740 |
| }, |
| { |
| "epoch": 0.9366544092139041, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016655001961424652, |
| "loss": 1.5483, |
| "step": 6750 |
| }, |
| { |
| "epoch": 0.9380420453757025, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016643455966858063, |
| "loss": 1.5553, |
| "step": 6760 |
| }, |
| { |
| "epoch": 0.9394296815375008, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016631894097132418, |
| "loss": 1.5295, |
| "step": 6770 |
| }, |
| { |
| "epoch": 0.9408173176992992, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016620316379875876, |
| "loss": 1.5047, |
| "step": 6780 |
| }, |
| { |
| "epoch": 0.9422049538610976, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016608722842754477, |
| "loss": 1.5155, |
| "step": 6790 |
| }, |
| { |
| "epoch": 0.943592590022896, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016597113513472066, |
| "loss": 1.575, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.943592590022896, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 854.8633, |
| "eval_samples_per_second": 14.987, |
| "eval_steps_per_second": 1.874, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.9449802261846943, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016585488419770217, |
| "loss": 1.613, |
| "step": 6810 |
| }, |
| { |
| "epoch": 0.9463678623464927, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001657384758942818, |
| "loss": 1.5741, |
| "step": 6820 |
| }, |
| { |
| "epoch": 0.9477554985082911, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016562191050262804, |
| "loss": 1.5771, |
| "step": 6830 |
| }, |
| { |
| "epoch": 0.9491431346700895, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001655051883012848, |
| "loss": 1.5545, |
| "step": 6840 |
| }, |
| { |
| "epoch": 0.9505307708318879, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016538830956917074, |
| "loss": 1.4783, |
| "step": 6850 |
| }, |
| { |
| "epoch": 0.9519184069936862, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016527127458557846, |
| "loss": 1.5369, |
| "step": 6860 |
| }, |
| { |
| "epoch": 0.9533060431554846, |
| "grad_norm": 0.0, |
| "learning_rate": 0.000165154083630174, |
| "loss": 1.5902, |
| "step": 6870 |
| }, |
| { |
| "epoch": 0.954693679317283, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016503673698299617, |
| "loss": 1.5219, |
| "step": 6880 |
| }, |
| { |
| "epoch": 0.9560813154790814, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016491923492445562, |
| "loss": 1.5369, |
| "step": 6890 |
| }, |
| { |
| "epoch": 0.9574689516408798, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016480157773533463, |
| "loss": 1.4893, |
| "step": 6900 |
| }, |
| { |
| "epoch": 0.9574689516408798, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 851.7875, |
| "eval_samples_per_second": 15.041, |
| "eval_steps_per_second": 1.881, |
| "step": 6900 |
| }, |
| { |
| "epoch": 0.9588565878026781, |
| "grad_norm": 0.0, |
| "learning_rate": 0.000164683765696786, |
| "loss": 1.567, |
| "step": 6910 |
| }, |
| { |
| "epoch": 0.9602442239644765, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001645657990903326, |
| "loss": 1.5598, |
| "step": 6920 |
| }, |
| { |
| "epoch": 0.9616318601262749, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016444767819786667, |
| "loss": 1.5641, |
| "step": 6930 |
| }, |
| { |
| "epoch": 0.9630194962880733, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001643294033016492, |
| "loss": 1.5821, |
| "step": 6940 |
| }, |
| { |
| "epoch": 0.9644071324498716, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016421097468430896, |
| "loss": 1.5396, |
| "step": 6950 |
| }, |
| { |
| "epoch": 0.96579476861167, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001640923926288423, |
| "loss": 1.5183, |
| "step": 6960 |
| }, |
| { |
| "epoch": 0.9671824047734684, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016397365741861215, |
| "loss": 1.5675, |
| "step": 6970 |
| }, |
| { |
| "epoch": 0.9685700409352668, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016385476933734728, |
| "loss": 1.5301, |
| "step": 6980 |
| }, |
| { |
| "epoch": 0.9699576770970652, |
| "grad_norm": 0.0, |
| "learning_rate": 0.000163735728669142, |
| "loss": 1.5445, |
| "step": 6990 |
| }, |
| { |
| "epoch": 0.9713453132588635, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016361653569845508, |
| "loss": 1.565, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.9713453132588635, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 859.3889, |
| "eval_samples_per_second": 14.908, |
| "eval_steps_per_second": 1.864, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.9727329494206619, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016349719071010921, |
| "loss": 1.5189, |
| "step": 7010 |
| }, |
| { |
| "epoch": 0.9741205855824603, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016337769398929046, |
| "loss": 1.4959, |
| "step": 7020 |
| }, |
| { |
| "epoch": 0.9755082217442587, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016325804582154738, |
| "loss": 1.5076, |
| "step": 7030 |
| }, |
| { |
| "epoch": 0.976895857906057, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016313824649279046, |
| "loss": 1.5328, |
| "step": 7040 |
| }, |
| { |
| "epoch": 0.9782834940678554, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001630182962892914, |
| "loss": 1.6521, |
| "step": 7050 |
| }, |
| { |
| "epoch": 0.9796711302296538, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016289819549768239, |
| "loss": 1.588, |
| "step": 7060 |
| }, |
| { |
| "epoch": 0.9810587663914522, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016277794440495557, |
| "loss": 1.5902, |
| "step": 7070 |
| }, |
| { |
| "epoch": 0.9824464025532506, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001626575432984621, |
| "loss": 1.5927, |
| "step": 7080 |
| }, |
| { |
| "epoch": 0.9838340387150489, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001625369924659117, |
| "loss": 1.6044, |
| "step": 7090 |
| }, |
| { |
| "epoch": 0.9852216748768473, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001624162921953719, |
| "loss": 1.531, |
| "step": 7100 |
| }, |
| { |
| "epoch": 0.9852216748768473, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 854.8233, |
| "eval_samples_per_second": 14.988, |
| "eval_steps_per_second": 1.874, |
| "step": 7100 |
| }, |
| { |
| "epoch": 0.9866093110386457, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016229544277526718, |
| "loss": 1.6236, |
| "step": 7110 |
| }, |
| { |
| "epoch": 0.9879969472004441, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016217444449437862, |
| "loss": 1.5169, |
| "step": 7120 |
| }, |
| { |
| "epoch": 0.9893845833622424, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016205329764184287, |
| "loss": 1.5986, |
| "step": 7130 |
| }, |
| { |
| "epoch": 0.9907722195240408, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016193200250715168, |
| "loss": 1.5644, |
| "step": 7140 |
| }, |
| { |
| "epoch": 0.9921598556858392, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001618105593801511, |
| "loss": 1.5578, |
| "step": 7150 |
| }, |
| { |
| "epoch": 0.9935474918476376, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016168896855104086, |
| "loss": 1.627, |
| "step": 7160 |
| }, |
| { |
| "epoch": 0.994935128009436, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001615672303103736, |
| "loss": 1.58, |
| "step": 7170 |
| }, |
| { |
| "epoch": 0.9963227641712343, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016144534494905418, |
| "loss": 1.5433, |
| "step": 7180 |
| }, |
| { |
| "epoch": 0.9977104003330327, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016132331275833917, |
| "loss": 1.5081, |
| "step": 7190 |
| }, |
| { |
| "epoch": 0.9990980364948311, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001612011340298358, |
| "loss": 1.5054, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.9990980364948311, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 857.3623, |
| "eval_samples_per_second": 14.944, |
| "eval_steps_per_second": 1.869, |
| "step": 7200 |
| }, |
| { |
| "epoch": 1.0004162908485394, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001610788090555016, |
| "loss": 1.5776, |
| "step": 7210 |
| }, |
| { |
| "epoch": 1.0018039270103378, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001609563381276435, |
| "loss": 1.6051, |
| "step": 7220 |
| }, |
| { |
| "epoch": 1.0031915631721362, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001608337215389173, |
| "loss": 1.6271, |
| "step": 7230 |
| }, |
| { |
| "epoch": 1.0045791993339346, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016071095958232676, |
| "loss": 1.5657, |
| "step": 7240 |
| }, |
| { |
| "epoch": 1.005966835495733, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016058805255122303, |
| "loss": 1.5736, |
| "step": 7250 |
| }, |
| { |
| "epoch": 1.0073544716575313, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016046500073930398, |
| "loss": 1.512, |
| "step": 7260 |
| }, |
| { |
| "epoch": 1.0087421078193297, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001603418044406134, |
| "loss": 1.4812, |
| "step": 7270 |
| }, |
| { |
| "epoch": 1.010129743981128, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00016021846394954036, |
| "loss": 1.5701, |
| "step": 7280 |
| }, |
| { |
| "epoch": 1.0115173801429265, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001600949795608185, |
| "loss": 1.5056, |
| "step": 7290 |
| }, |
| { |
| "epoch": 1.0129050163047248, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015997135156952535, |
| "loss": 1.4953, |
| "step": 7300 |
| }, |
| { |
| "epoch": 1.0129050163047248, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 862.1027, |
| "eval_samples_per_second": 14.861, |
| "eval_steps_per_second": 1.858, |
| "step": 7300 |
| }, |
| { |
| "epoch": 1.0142926524665232, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001598475802710815, |
| "loss": 1.504, |
| "step": 7310 |
| }, |
| { |
| "epoch": 1.0156802886283216, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015972366596125003, |
| "loss": 1.568, |
| "step": 7320 |
| }, |
| { |
| "epoch": 1.01706792479012, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001595996089361358, |
| "loss": 1.5443, |
| "step": 7330 |
| }, |
| { |
| "epoch": 1.0184555609519184, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015947540949218467, |
| "loss": 1.5165, |
| "step": 7340 |
| }, |
| { |
| "epoch": 1.0198431971137167, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001593510679261828, |
| "loss": 1.5269, |
| "step": 7350 |
| }, |
| { |
| "epoch": 1.0212308332755151, |
| "grad_norm": 0.0, |
| "learning_rate": 0.000159226584535256, |
| "loss": 1.5448, |
| "step": 7360 |
| }, |
| { |
| "epoch": 1.0226184694373135, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015910195961686897, |
| "loss": 1.5586, |
| "step": 7370 |
| }, |
| { |
| "epoch": 1.0240061055991119, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015897719346882457, |
| "loss": 1.5269, |
| "step": 7380 |
| }, |
| { |
| "epoch": 1.0253937417609102, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015885228638926323, |
| "loss": 1.5522, |
| "step": 7390 |
| }, |
| { |
| "epoch": 1.0267813779227086, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015872723867666207, |
| "loss": 1.5639, |
| "step": 7400 |
| }, |
| { |
| "epoch": 1.0267813779227086, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 856.4609, |
| "eval_samples_per_second": 14.959, |
| "eval_steps_per_second": 1.87, |
| "step": 7400 |
| }, |
| { |
| "epoch": 1.028169014084507, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015860205062983427, |
| "loss": 1.5362, |
| "step": 7410 |
| }, |
| { |
| "epoch": 1.0295566502463054, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015847672254792837, |
| "loss": 1.6302, |
| "step": 7420 |
| }, |
| { |
| "epoch": 1.0309442864081038, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015835125473042755, |
| "loss": 1.5936, |
| "step": 7430 |
| }, |
| { |
| "epoch": 1.0323319225699021, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001582256474771489, |
| "loss": 1.5836, |
| "step": 7440 |
| }, |
| { |
| "epoch": 1.0337195587317005, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015809990108824268, |
| "loss": 1.499, |
| "step": 7450 |
| }, |
| { |
| "epoch": 1.035107194893499, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015797401586419168, |
| "loss": 1.5871, |
| "step": 7460 |
| }, |
| { |
| "epoch": 1.0364948310552973, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001578479921058103, |
| "loss": 1.5045, |
| "step": 7470 |
| }, |
| { |
| "epoch": 1.0378824672170957, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001577218301142442, |
| "loss": 1.6008, |
| "step": 7480 |
| }, |
| { |
| "epoch": 1.039270103378894, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015759553019096924, |
| "loss": 1.5677, |
| "step": 7490 |
| }, |
| { |
| "epoch": 1.0406577395406924, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015746909263779086, |
| "loss": 1.4854, |
| "step": 7500 |
| }, |
| { |
| "epoch": 1.0406577395406924, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 854.7095, |
| "eval_samples_per_second": 14.99, |
| "eval_steps_per_second": 1.874, |
| "step": 7500 |
| }, |
| { |
| "epoch": 1.0420453757024908, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015734251775684338, |
| "loss": 1.5282, |
| "step": 7510 |
| }, |
| { |
| "epoch": 1.0434330118642892, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001572158058505894, |
| "loss": 1.5321, |
| "step": 7520 |
| }, |
| { |
| "epoch": 1.0448206480260875, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001570889572218188, |
| "loss": 1.46, |
| "step": 7530 |
| }, |
| { |
| "epoch": 1.046208284187886, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015696197217364826, |
| "loss": 1.5914, |
| "step": 7540 |
| }, |
| { |
| "epoch": 1.0475959203496843, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015683485100952043, |
| "loss": 1.503, |
| "step": 7550 |
| }, |
| { |
| "epoch": 1.0489835565114827, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015670759403320318, |
| "loss": 1.539, |
| "step": 7560 |
| }, |
| { |
| "epoch": 1.050371192673281, |
| "grad_norm": 0.0, |
| "learning_rate": 0.000156580201548789, |
| "loss": 1.4814, |
| "step": 7570 |
| }, |
| { |
| "epoch": 1.0517588288350794, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001564526738606941, |
| "loss": 1.5348, |
| "step": 7580 |
| }, |
| { |
| "epoch": 1.0531464649968778, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001563250112736578, |
| "loss": 1.5778, |
| "step": 7590 |
| }, |
| { |
| "epoch": 1.0545341011586762, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015619721409274186, |
| "loss": 1.5437, |
| "step": 7600 |
| }, |
| { |
| "epoch": 1.0545341011586762, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 853.4619, |
| "eval_samples_per_second": 15.012, |
| "eval_steps_per_second": 1.877, |
| "step": 7600 |
| }, |
| { |
| "epoch": 1.0559217373204746, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015606928262332952, |
| "loss": 1.5839, |
| "step": 7610 |
| }, |
| { |
| "epoch": 1.057309373482273, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015594121717112499, |
| "loss": 1.6073, |
| "step": 7620 |
| }, |
| { |
| "epoch": 1.0586970096440713, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015581301804215269, |
| "loss": 1.5089, |
| "step": 7630 |
| }, |
| { |
| "epoch": 1.0600846458058697, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015568468554275636, |
| "loss": 1.5612, |
| "step": 7640 |
| }, |
| { |
| "epoch": 1.061472281967668, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015555621997959853, |
| "loss": 1.5754, |
| "step": 7650 |
| }, |
| { |
| "epoch": 1.0628599181294665, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001554276216596597, |
| "loss": 1.5358, |
| "step": 7660 |
| }, |
| { |
| "epoch": 1.0642475542912648, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015529889089023753, |
| "loss": 1.5362, |
| "step": 7670 |
| }, |
| { |
| "epoch": 1.0656351904530632, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015517002797894627, |
| "loss": 1.5111, |
| "step": 7680 |
| }, |
| { |
| "epoch": 1.0670228266148616, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015504103323371585, |
| "loss": 1.4885, |
| "step": 7690 |
| }, |
| { |
| "epoch": 1.06841046277666, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001549119069627913, |
| "loss": 1.5007, |
| "step": 7700 |
| }, |
| { |
| "epoch": 1.06841046277666, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 859.3958, |
| "eval_samples_per_second": 14.908, |
| "eval_steps_per_second": 1.864, |
| "step": 7700 |
| }, |
| { |
| "epoch": 1.0697980989384583, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015478264947473193, |
| "loss": 1.6073, |
| "step": 7710 |
| }, |
| { |
| "epoch": 1.0711857351002567, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015465326107841056, |
| "loss": 1.5889, |
| "step": 7720 |
| }, |
| { |
| "epoch": 1.072573371262055, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015452374208301285, |
| "loss": 1.5805, |
| "step": 7730 |
| }, |
| { |
| "epoch": 1.0739610074238535, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001543940927980366, |
| "loss": 1.5473, |
| "step": 7740 |
| }, |
| { |
| "epoch": 1.0753486435856519, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015426431353329081, |
| "loss": 1.5056, |
| "step": 7750 |
| }, |
| { |
| "epoch": 1.0767362797474502, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015413440459889524, |
| "loss": 1.5471, |
| "step": 7760 |
| }, |
| { |
| "epoch": 1.0781239159092486, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015400436630527941, |
| "loss": 1.5527, |
| "step": 7770 |
| }, |
| { |
| "epoch": 1.079511552071047, |
| "grad_norm": 0.0, |
| "learning_rate": 0.000153874198963182, |
| "loss": 1.5679, |
| "step": 7780 |
| }, |
| { |
| "epoch": 1.0808991882328454, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015374390288364997, |
| "loss": 1.652, |
| "step": 7790 |
| }, |
| { |
| "epoch": 1.0822868243946437, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001536134783780381, |
| "loss": 1.5538, |
| "step": 7800 |
| }, |
| { |
| "epoch": 1.0822868243946437, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 860.0182, |
| "eval_samples_per_second": 14.897, |
| "eval_steps_per_second": 1.863, |
| "step": 7800 |
| }, |
| { |
| "epoch": 1.0836744605564421, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001534829257580078, |
| "loss": 1.5828, |
| "step": 7810 |
| }, |
| { |
| "epoch": 1.0850620967182405, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015335224533552687, |
| "loss": 1.4658, |
| "step": 7820 |
| }, |
| { |
| "epoch": 1.0864497328800389, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015322143742286831, |
| "loss": 1.5252, |
| "step": 7830 |
| }, |
| { |
| "epoch": 1.0878373690418373, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015309050233260993, |
| "loss": 1.5765, |
| "step": 7840 |
| }, |
| { |
| "epoch": 1.0892250052036356, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015295944037763335, |
| "loss": 1.5868, |
| "step": 7850 |
| }, |
| { |
| "epoch": 1.090612641365434, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001528282518711233, |
| "loss": 1.6112, |
| "step": 7860 |
| }, |
| { |
| "epoch": 1.0920002775272324, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001526969371265671, |
| "loss": 1.5436, |
| "step": 7870 |
| }, |
| { |
| "epoch": 1.0933879136890308, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015256549645775347, |
| "loss": 1.553, |
| "step": 7880 |
| }, |
| { |
| "epoch": 1.0947755498508291, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001524339301787723, |
| "loss": 1.5098, |
| "step": 7890 |
| }, |
| { |
| "epoch": 1.0961631860126275, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001523022386040134, |
| "loss": 1.5736, |
| "step": 7900 |
| }, |
| { |
| "epoch": 1.0961631860126275, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 859.1865, |
| "eval_samples_per_second": 14.912, |
| "eval_steps_per_second": 1.865, |
| "step": 7900 |
| }, |
| { |
| "epoch": 1.097550822174426, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001521704220481662, |
| "loss": 1.622, |
| "step": 7910 |
| }, |
| { |
| "epoch": 1.0989384583362243, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015203848082621867, |
| "loss": 1.5086, |
| "step": 7920 |
| }, |
| { |
| "epoch": 1.1003260944980227, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015190641525345656, |
| "loss": 1.5271, |
| "step": 7930 |
| }, |
| { |
| "epoch": 1.101713730659821, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015177422564546306, |
| "loss": 1.4512, |
| "step": 7940 |
| }, |
| { |
| "epoch": 1.1031013668216194, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015164191231811753, |
| "loss": 1.4596, |
| "step": 7950 |
| }, |
| { |
| "epoch": 1.1044890029834178, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015150947558759502, |
| "loss": 1.5792, |
| "step": 7960 |
| }, |
| { |
| "epoch": 1.1058766391452162, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001513769157703655, |
| "loss": 1.5391, |
| "step": 7970 |
| }, |
| { |
| "epoch": 1.1072642753070145, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001512442331831931, |
| "loss": 1.5552, |
| "step": 7980 |
| }, |
| { |
| "epoch": 1.108651911468813, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015111142814313517, |
| "loss": 1.4679, |
| "step": 7990 |
| }, |
| { |
| "epoch": 1.1100395476306113, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015097850096754177, |
| "loss": 1.5157, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.1100395476306113, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 858.8743, |
| "eval_samples_per_second": 14.917, |
| "eval_steps_per_second": 1.865, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.1114271837924097, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015084545197405493, |
| "loss": 1.527, |
| "step": 8010 |
| }, |
| { |
| "epoch": 1.112814819954208, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001507122814806075, |
| "loss": 1.5263, |
| "step": 8020 |
| }, |
| { |
| "epoch": 1.1142024561160064, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015057898980542293, |
| "loss": 1.5607, |
| "step": 8030 |
| }, |
| { |
| "epoch": 1.1155900922778048, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015044557726701408, |
| "loss": 1.5861, |
| "step": 8040 |
| }, |
| { |
| "epoch": 1.1169777284396032, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015031204418418275, |
| "loss": 1.547, |
| "step": 8050 |
| }, |
| { |
| "epoch": 1.1183653646014016, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015017839087601867, |
| "loss": 1.6436, |
| "step": 8060 |
| }, |
| { |
| "epoch": 1.1197530007632, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00015004461766189895, |
| "loss": 1.5417, |
| "step": 8070 |
| }, |
| { |
| "epoch": 1.1211406369249983, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001499107248614871, |
| "loss": 1.5294, |
| "step": 8080 |
| }, |
| { |
| "epoch": 1.1225282730867967, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014977671279473262, |
| "loss": 1.4875, |
| "step": 8090 |
| }, |
| { |
| "epoch": 1.123915909248595, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014964258178186976, |
| "loss": 1.5403, |
| "step": 8100 |
| }, |
| { |
| "epoch": 1.123915909248595, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 853.3464, |
| "eval_samples_per_second": 15.014, |
| "eval_steps_per_second": 1.877, |
| "step": 8100 |
| }, |
| { |
| "epoch": 1.1253035454103935, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014950833214341715, |
| "loss": 1.6199, |
| "step": 8110 |
| }, |
| { |
| "epoch": 1.1266911815721918, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014937396420017688, |
| "loss": 1.5757, |
| "step": 8120 |
| }, |
| { |
| "epoch": 1.1280788177339902, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001492394782732337, |
| "loss": 1.5729, |
| "step": 8130 |
| }, |
| { |
| "epoch": 1.1294664538957886, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014910487468395425, |
| "loss": 1.5811, |
| "step": 8140 |
| }, |
| { |
| "epoch": 1.130854090057587, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001489701537539864, |
| "loss": 1.5804, |
| "step": 8150 |
| }, |
| { |
| "epoch": 1.1322417262193853, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001488353158052585, |
| "loss": 1.4874, |
| "step": 8160 |
| }, |
| { |
| "epoch": 1.1336293623811837, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014870036115997834, |
| "loss": 1.5255, |
| "step": 8170 |
| }, |
| { |
| "epoch": 1.135016998542982, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001485652901406327, |
| "loss": 1.5675, |
| "step": 8180 |
| }, |
| { |
| "epoch": 1.1364046347047805, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001484301030699864, |
| "loss": 1.6013, |
| "step": 8190 |
| }, |
| { |
| "epoch": 1.1377922708665789, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014829480027108157, |
| "loss": 1.511, |
| "step": 8200 |
| }, |
| { |
| "epoch": 1.1377922708665789, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 859.6956, |
| "eval_samples_per_second": 14.903, |
| "eval_steps_per_second": 1.863, |
| "step": 8200 |
| }, |
| { |
| "epoch": 1.1391799070283772, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001481593820672369, |
| "loss": 1.5034, |
| "step": 8210 |
| }, |
| { |
| "epoch": 1.1405675431901756, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014802384878204685, |
| "loss": 1.4766, |
| "step": 8220 |
| }, |
| { |
| "epoch": 1.141955179351974, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001478882007393809, |
| "loss": 1.5747, |
| "step": 8230 |
| }, |
| { |
| "epoch": 1.1433428155137724, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001477524382633827, |
| "loss": 1.5423, |
| "step": 8240 |
| }, |
| { |
| "epoch": 1.1447304516755707, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014761656167846935, |
| "loss": 1.5247, |
| "step": 8250 |
| }, |
| { |
| "epoch": 1.1461180878373691, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014748057130933067, |
| "loss": 1.547, |
| "step": 8260 |
| }, |
| { |
| "epoch": 1.1475057239991675, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014734446748092842, |
| "loss": 1.5177, |
| "step": 8270 |
| }, |
| { |
| "epoch": 1.1488933601609659, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014720825051849534, |
| "loss": 1.5871, |
| "step": 8280 |
| }, |
| { |
| "epoch": 1.1502809963227643, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014707192074753467, |
| "loss": 1.5082, |
| "step": 8290 |
| }, |
| { |
| "epoch": 1.1516686324845626, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014693547849381906, |
| "loss": 1.5675, |
| "step": 8300 |
| }, |
| { |
| "epoch": 1.1516686324845626, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 858.871, |
| "eval_samples_per_second": 14.917, |
| "eval_steps_per_second": 1.865, |
| "step": 8300 |
| }, |
| { |
| "epoch": 1.153056268646361, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014679892408339012, |
| "loss": 1.5986, |
| "step": 8310 |
| }, |
| { |
| "epoch": 1.1544439048081594, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014666225784255724, |
| "loss": 1.5578, |
| "step": 8320 |
| }, |
| { |
| "epoch": 1.1558315409699578, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014652548009789736, |
| "loss": 1.5968, |
| "step": 8330 |
| }, |
| { |
| "epoch": 1.1572191771317561, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014638859117625358, |
| "loss": 1.5689, |
| "step": 8340 |
| }, |
| { |
| "epoch": 1.1586068132935545, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014625159140473475, |
| "loss": 1.4963, |
| "step": 8350 |
| }, |
| { |
| "epoch": 1.1599944494553527, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001461144811107147, |
| "loss": 1.4889, |
| "step": 8360 |
| }, |
| { |
| "epoch": 1.1613820856171513, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001459772606218312, |
| "loss": 1.5006, |
| "step": 8370 |
| }, |
| { |
| "epoch": 1.1627697217789494, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014583993026598557, |
| "loss": 1.5204, |
| "step": 8380 |
| }, |
| { |
| "epoch": 1.164157357940748, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014570249037134143, |
| "loss": 1.499, |
| "step": 8390 |
| }, |
| { |
| "epoch": 1.1655449941025462, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001455649412663243, |
| "loss": 1.5346, |
| "step": 8400 |
| }, |
| { |
| "epoch": 1.1655449941025462, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 864.8064, |
| "eval_samples_per_second": 14.815, |
| "eval_steps_per_second": 1.852, |
| "step": 8400 |
| }, |
| { |
| "epoch": 1.1669326302643448, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001454272832796206, |
| "loss": 1.5715, |
| "step": 8410 |
| }, |
| { |
| "epoch": 1.168320266426143, |
| "grad_norm": 0.0, |
| "learning_rate": 0.000145289516740177, |
| "loss": 1.4989, |
| "step": 8420 |
| }, |
| { |
| "epoch": 1.1697079025879416, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014515164197719945, |
| "loss": 1.5598, |
| "step": 8430 |
| }, |
| { |
| "epoch": 1.1710955387497397, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014501365932015266, |
| "loss": 1.6222, |
| "step": 8440 |
| }, |
| { |
| "epoch": 1.1724831749115383, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014487556909875907, |
| "loss": 1.5452, |
| "step": 8450 |
| }, |
| { |
| "epoch": 1.1738708110733365, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014473737164299815, |
| "loss": 1.5969, |
| "step": 8460 |
| }, |
| { |
| "epoch": 1.175258447235135, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014459906728310563, |
| "loss": 1.5379, |
| "step": 8470 |
| }, |
| { |
| "epoch": 1.1766460833969332, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014446065634957276, |
| "loss": 1.4996, |
| "step": 8480 |
| }, |
| { |
| "epoch": 1.1780337195587318, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014432213917314534, |
| "loss": 1.5666, |
| "step": 8490 |
| }, |
| { |
| "epoch": 1.17942135572053, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014418351608482314, |
| "loss": 1.489, |
| "step": 8500 |
| }, |
| { |
| "epoch": 1.17942135572053, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 870.4535, |
| "eval_samples_per_second": 14.719, |
| "eval_steps_per_second": 1.84, |
| "step": 8500 |
| }, |
| { |
| "epoch": 1.1808089918823286, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014404478741585902, |
| "loss": 1.4908, |
| "step": 8510 |
| }, |
| { |
| "epoch": 1.1821966280441267, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014390595349775804, |
| "loss": 1.5845, |
| "step": 8520 |
| }, |
| { |
| "epoch": 1.1835842642059253, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014376701466227687, |
| "loss": 1.5, |
| "step": 8530 |
| }, |
| { |
| "epoch": 1.1849719003677235, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014362797124142283, |
| "loss": 1.5638, |
| "step": 8540 |
| }, |
| { |
| "epoch": 1.1863595365295219, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014348882356745319, |
| "loss": 1.5205, |
| "step": 8550 |
| }, |
| { |
| "epoch": 1.1877471726913202, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001433495719728743, |
| "loss": 1.5674, |
| "step": 8560 |
| }, |
| { |
| "epoch": 1.1891348088531186, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014321021679044086, |
| "loss": 1.6119, |
| "step": 8570 |
| }, |
| { |
| "epoch": 1.190522445014917, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014307075835315515, |
| "loss": 1.5422, |
| "step": 8580 |
| }, |
| { |
| "epoch": 1.1919100811767154, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014293119699426604, |
| "loss": 1.6046, |
| "step": 8590 |
| }, |
| { |
| "epoch": 1.1932977173385138, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014279153304726857, |
| "loss": 1.4951, |
| "step": 8600 |
| }, |
| { |
| "epoch": 1.1932977173385138, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 864.037, |
| "eval_samples_per_second": 14.828, |
| "eval_steps_per_second": 1.854, |
| "step": 8600 |
| }, |
| { |
| "epoch": 1.1946853535003121, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014265176684590274, |
| "loss": 1.5689, |
| "step": 8610 |
| }, |
| { |
| "epoch": 1.1960729896621105, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014251189872415294, |
| "loss": 1.5362, |
| "step": 8620 |
| }, |
| { |
| "epoch": 1.1974606258239089, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014237192901624712, |
| "loss": 1.5422, |
| "step": 8630 |
| }, |
| { |
| "epoch": 1.1988482619857073, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014223185805665604, |
| "loss": 1.5692, |
| "step": 8640 |
| }, |
| { |
| "epoch": 1.2002358981475056, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014209168618009227, |
| "loss": 1.5128, |
| "step": 8650 |
| }, |
| { |
| "epoch": 1.201623534309304, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014195141372150966, |
| "loss": 1.5975, |
| "step": 8660 |
| }, |
| { |
| "epoch": 1.2030111704711024, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001418110410161024, |
| "loss": 1.5617, |
| "step": 8670 |
| }, |
| { |
| "epoch": 1.2043988066329008, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014167056839930407, |
| "loss": 1.5745, |
| "step": 8680 |
| }, |
| { |
| "epoch": 1.2057864427946992, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014152999620678722, |
| "loss": 1.579, |
| "step": 8690 |
| }, |
| { |
| "epoch": 1.2071740789564975, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014138932477446222, |
| "loss": 1.5969, |
| "step": 8700 |
| }, |
| { |
| "epoch": 1.2071740789564975, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 861.7557, |
| "eval_samples_per_second": 14.867, |
| "eval_steps_per_second": 1.859, |
| "step": 8700 |
| }, |
| { |
| "epoch": 1.208561715118296, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014124855443847662, |
| "loss": 1.5638, |
| "step": 8710 |
| }, |
| { |
| "epoch": 1.2099493512800943, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014110768553521425, |
| "loss": 1.5384, |
| "step": 8720 |
| }, |
| { |
| "epoch": 1.2113369874418927, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001409667184012946, |
| "loss": 1.5118, |
| "step": 8730 |
| }, |
| { |
| "epoch": 1.212724623603691, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014082565337357174, |
| "loss": 1.5268, |
| "step": 8740 |
| }, |
| { |
| "epoch": 1.2141122597654894, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001406844907891338, |
| "loss": 1.565, |
| "step": 8750 |
| }, |
| { |
| "epoch": 1.2154998959272878, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001405432309853019, |
| "loss": 1.6024, |
| "step": 8760 |
| }, |
| { |
| "epoch": 1.2168875320890862, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014040187429962964, |
| "loss": 1.5062, |
| "step": 8770 |
| }, |
| { |
| "epoch": 1.2182751682508846, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014026042106990194, |
| "loss": 1.6006, |
| "step": 8780 |
| }, |
| { |
| "epoch": 1.219662804412683, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00014011887163413456, |
| "loss": 1.5469, |
| "step": 8790 |
| }, |
| { |
| "epoch": 1.2210504405744813, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013997722633057313, |
| "loss": 1.6013, |
| "step": 8800 |
| }, |
| { |
| "epoch": 1.2210504405744813, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 864.0925, |
| "eval_samples_per_second": 14.827, |
| "eval_steps_per_second": 1.854, |
| "step": 8800 |
| }, |
| { |
| "epoch": 1.2224380767362797, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001398354854976923, |
| "loss": 1.5087, |
| "step": 8810 |
| }, |
| { |
| "epoch": 1.223825712898078, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013969364947419508, |
| "loss": 1.6569, |
| "step": 8820 |
| }, |
| { |
| "epoch": 1.2252133490598764, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013955171859901183, |
| "loss": 1.5641, |
| "step": 8830 |
| }, |
| { |
| "epoch": 1.2266009852216748, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013940969321129978, |
| "loss": 1.5895, |
| "step": 8840 |
| }, |
| { |
| "epoch": 1.2279886213834732, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013926757365044173, |
| "loss": 1.5392, |
| "step": 8850 |
| }, |
| { |
| "epoch": 1.2293762575452716, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013912536025604576, |
| "loss": 1.5429, |
| "step": 8860 |
| }, |
| { |
| "epoch": 1.23076389370707, |
| "grad_norm": 0.0, |
| "learning_rate": 0.000138983053367944, |
| "loss": 1.5436, |
| "step": 8870 |
| }, |
| { |
| "epoch": 1.2321515298688683, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013884065332619213, |
| "loss": 1.5116, |
| "step": 8880 |
| }, |
| { |
| "epoch": 1.2335391660306667, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001386981604710683, |
| "loss": 1.5252, |
| "step": 8890 |
| }, |
| { |
| "epoch": 1.234926802192465, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001385555751430725, |
| "loss": 1.5872, |
| "step": 8900 |
| }, |
| { |
| "epoch": 1.234926802192465, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 863.0711, |
| "eval_samples_per_second": 14.845, |
| "eval_steps_per_second": 1.856, |
| "step": 8900 |
| }, |
| { |
| "epoch": 1.2363144383542635, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013841289768292574, |
| "loss": 1.5248, |
| "step": 8910 |
| }, |
| { |
| "epoch": 1.2377020745160618, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013827012843156913, |
| "loss": 1.5388, |
| "step": 8920 |
| }, |
| { |
| "epoch": 1.2390897106778602, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001381272677301632, |
| "loss": 1.5608, |
| "step": 8930 |
| }, |
| { |
| "epoch": 1.2404773468396586, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013798431592008684, |
| "loss": 1.494, |
| "step": 8940 |
| }, |
| { |
| "epoch": 1.241864983001457, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001378412733429369, |
| "loss": 1.5242, |
| "step": 8950 |
| }, |
| { |
| "epoch": 1.2432526191632554, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001376981403405268, |
| "loss": 1.5133, |
| "step": 8960 |
| }, |
| { |
| "epoch": 1.2446402553250537, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013755491725488646, |
| "loss": 1.5281, |
| "step": 8970 |
| }, |
| { |
| "epoch": 1.2460278914868521, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013741160442826063, |
| "loss": 1.5155, |
| "step": 8980 |
| }, |
| { |
| "epoch": 1.2474155276486505, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013726820220310882, |
| "loss": 1.641, |
| "step": 8990 |
| }, |
| { |
| "epoch": 1.2488031638104489, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013712471092210403, |
| "loss": 1.5412, |
| "step": 9000 |
| }, |
| { |
| "epoch": 1.2488031638104489, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 879.3561, |
| "eval_samples_per_second": 14.57, |
| "eval_steps_per_second": 1.822, |
| "step": 9000 |
| }, |
| { |
| "epoch": 1.2501907999722472, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013698113092813205, |
| "loss": 1.4897, |
| "step": 9010 |
| }, |
| { |
| "epoch": 1.2515784361340456, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013683746256429078, |
| "loss": 1.4696, |
| "step": 9020 |
| }, |
| { |
| "epoch": 1.252966072295844, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001366937061738891, |
| "loss": 1.5857, |
| "step": 9030 |
| }, |
| { |
| "epoch": 1.2543537084576424, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013654986210044645, |
| "loss": 1.6397, |
| "step": 9040 |
| }, |
| { |
| "epoch": 1.2557413446194408, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013640593068769158, |
| "loss": 1.5742, |
| "step": 9050 |
| }, |
| { |
| "epoch": 1.2571289807812391, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013626191227956216, |
| "loss": 1.5915, |
| "step": 9060 |
| }, |
| { |
| "epoch": 1.2585166169430375, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013611780722020357, |
| "loss": 1.4973, |
| "step": 9070 |
| }, |
| { |
| "epoch": 1.259904253104836, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013597361585396836, |
| "loss": 1.5331, |
| "step": 9080 |
| }, |
| { |
| "epoch": 1.2612918892666343, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013582933852541524, |
| "loss": 1.6072, |
| "step": 9090 |
| }, |
| { |
| "epoch": 1.2626795254284326, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001356849755793084, |
| "loss": 1.4745, |
| "step": 9100 |
| }, |
| { |
| "epoch": 1.2626795254284326, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 872.9402, |
| "eval_samples_per_second": 14.677, |
| "eval_steps_per_second": 1.835, |
| "step": 9100 |
| }, |
| { |
| "epoch": 1.264067161590231, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001355405273606166, |
| "loss": 1.4407, |
| "step": 9110 |
| }, |
| { |
| "epoch": 1.2654547977520294, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013539599421451235, |
| "loss": 1.5321, |
| "step": 9120 |
| }, |
| { |
| "epoch": 1.2668424339138278, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001352513764863711, |
| "loss": 1.5258, |
| "step": 9130 |
| }, |
| { |
| "epoch": 1.2682300700756262, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013510667452177043, |
| "loss": 1.5099, |
| "step": 9140 |
| }, |
| { |
| "epoch": 1.2696177062374245, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013496188866648926, |
| "loss": 1.5893, |
| "step": 9150 |
| }, |
| { |
| "epoch": 1.271005342399223, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013481701926650687, |
| "loss": 1.5482, |
| "step": 9160 |
| }, |
| { |
| "epoch": 1.2723929785610213, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013467206666800227, |
| "loss": 1.5634, |
| "step": 9170 |
| }, |
| { |
| "epoch": 1.2737806147228197, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013452703121735323, |
| "loss": 1.6012, |
| "step": 9180 |
| }, |
| { |
| "epoch": 1.275168250884618, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001343819132611355, |
| "loss": 1.5565, |
| "step": 9190 |
| }, |
| { |
| "epoch": 1.2765558870464164, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013423671314612202, |
| "loss": 1.5516, |
| "step": 9200 |
| }, |
| { |
| "epoch": 1.2765558870464164, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 870.4397, |
| "eval_samples_per_second": 14.719, |
| "eval_steps_per_second": 1.84, |
| "step": 9200 |
| }, |
| { |
| "epoch": 1.2779435232082148, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013409143121928207, |
| "loss": 1.5608, |
| "step": 9210 |
| }, |
| { |
| "epoch": 1.2793311593700132, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013394606782778033, |
| "loss": 1.5208, |
| "step": 9220 |
| }, |
| { |
| "epoch": 1.2807187955318116, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013380062331897624, |
| "loss": 1.561, |
| "step": 9230 |
| }, |
| { |
| "epoch": 1.28210643169361, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013365509804042308, |
| "loss": 1.5254, |
| "step": 9240 |
| }, |
| { |
| "epoch": 1.2834940678554083, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013350949233986706, |
| "loss": 1.5598, |
| "step": 9250 |
| }, |
| { |
| "epoch": 1.2848817040172067, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013336380656524664, |
| "loss": 1.587, |
| "step": 9260 |
| }, |
| { |
| "epoch": 1.286269340179005, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013321804106469158, |
| "loss": 1.5162, |
| "step": 9270 |
| }, |
| { |
| "epoch": 1.2876569763408034, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001330721961865222, |
| "loss": 1.5323, |
| "step": 9280 |
| }, |
| { |
| "epoch": 1.2890446125026018, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013292627227924844, |
| "loss": 1.5477, |
| "step": 9290 |
| }, |
| { |
| "epoch": 1.2904322486644002, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013278026969156914, |
| "loss": 1.5326, |
| "step": 9300 |
| }, |
| { |
| "epoch": 1.2904322486644002, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 887.9291, |
| "eval_samples_per_second": 14.429, |
| "eval_steps_per_second": 1.804, |
| "step": 9300 |
| }, |
| { |
| "epoch": 1.2918198848261986, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013263418877237107, |
| "loss": 1.5434, |
| "step": 9310 |
| }, |
| { |
| "epoch": 1.293207520987997, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013248802987072836, |
| "loss": 1.5252, |
| "step": 9320 |
| }, |
| { |
| "epoch": 1.2945951571497953, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001323417933359013, |
| "loss": 1.5959, |
| "step": 9330 |
| }, |
| { |
| "epoch": 1.2959827933115937, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001321954795173357, |
| "loss": 1.5702, |
| "step": 9340 |
| }, |
| { |
| "epoch": 1.297370429473392, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001320490887646622, |
| "loss": 1.5861, |
| "step": 9350 |
| }, |
| { |
| "epoch": 1.2987580656351905, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013190262142769515, |
| "loss": 1.5369, |
| "step": 9360 |
| }, |
| { |
| "epoch": 1.3001457017969889, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013175607785643196, |
| "loss": 1.597, |
| "step": 9370 |
| }, |
| { |
| "epoch": 1.3015333379587872, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013160945840105215, |
| "loss": 1.5199, |
| "step": 9380 |
| }, |
| { |
| "epoch": 1.3029209741205856, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013146276341191669, |
| "loss": 1.5969, |
| "step": 9390 |
| }, |
| { |
| "epoch": 1.304308610282384, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013131599323956686, |
| "loss": 1.5627, |
| "step": 9400 |
| }, |
| { |
| "epoch": 1.304308610282384, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 883.1047, |
| "eval_samples_per_second": 14.508, |
| "eval_steps_per_second": 1.814, |
| "step": 9400 |
| }, |
| { |
| "epoch": 1.3056962464441824, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013116914823472383, |
| "loss": 1.5583, |
| "step": 9410 |
| }, |
| { |
| "epoch": 1.3070838826059807, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013102222874828738, |
| "loss": 1.5026, |
| "step": 9420 |
| }, |
| { |
| "epoch": 1.3084715187677791, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001308752351313354, |
| "loss": 1.5962, |
| "step": 9430 |
| }, |
| { |
| "epoch": 1.3098591549295775, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013072816773512287, |
| "loss": 1.5441, |
| "step": 9440 |
| }, |
| { |
| "epoch": 1.3112467910913759, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013058102691108106, |
| "loss": 1.5238, |
| "step": 9450 |
| }, |
| { |
| "epoch": 1.3126344272531743, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013043381301081674, |
| "loss": 1.5385, |
| "step": 9460 |
| }, |
| { |
| "epoch": 1.3140220634149726, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001302865263861113, |
| "loss": 1.5158, |
| "step": 9470 |
| }, |
| { |
| "epoch": 1.315409699576771, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00013013916738891985, |
| "loss": 1.5261, |
| "step": 9480 |
| }, |
| { |
| "epoch": 1.3167973357385694, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012999173637137052, |
| "loss": 1.579, |
| "step": 9490 |
| }, |
| { |
| "epoch": 1.3181849719003678, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012984423368576353, |
| "loss": 1.6033, |
| "step": 9500 |
| }, |
| { |
| "epoch": 1.3181849719003678, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 874.2116, |
| "eval_samples_per_second": 14.655, |
| "eval_steps_per_second": 1.833, |
| "step": 9500 |
| }, |
| { |
| "epoch": 1.3195726080621661, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001296966596845703, |
| "loss": 1.537, |
| "step": 9510 |
| }, |
| { |
| "epoch": 1.3209602442239645, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012954901472043273, |
| "loss": 1.4632, |
| "step": 9520 |
| }, |
| { |
| "epoch": 1.322347880385763, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012940129914616223, |
| "loss": 1.5292, |
| "step": 9530 |
| }, |
| { |
| "epoch": 1.3237355165475613, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012925351331473904, |
| "loss": 1.5498, |
| "step": 9540 |
| }, |
| { |
| "epoch": 1.3251231527093597, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012910565757931114, |
| "loss": 1.5441, |
| "step": 9550 |
| }, |
| { |
| "epoch": 1.326510788871158, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012895773229319366, |
| "loss": 1.511, |
| "step": 9560 |
| }, |
| { |
| "epoch": 1.3278984250329564, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001288097378098679, |
| "loss": 1.5437, |
| "step": 9570 |
| }, |
| { |
| "epoch": 1.3292860611947548, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012866167448298053, |
| "loss": 1.5918, |
| "step": 9580 |
| }, |
| { |
| "epoch": 1.3306736973565532, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001285135426663427, |
| "loss": 1.5121, |
| "step": 9590 |
| }, |
| { |
| "epoch": 1.3320613335183515, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001283653427139292, |
| "loss": 1.4988, |
| "step": 9600 |
| }, |
| { |
| "epoch": 1.3320613335183515, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 873.8623, |
| "eval_samples_per_second": 14.661, |
| "eval_steps_per_second": 1.833, |
| "step": 9600 |
| }, |
| { |
| "epoch": 1.33344896968015, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012821707497987772, |
| "loss": 1.544, |
| "step": 9610 |
| }, |
| { |
| "epoch": 1.3348366058419483, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012806873981848784, |
| "loss": 1.5734, |
| "step": 9620 |
| }, |
| { |
| "epoch": 1.3362242420037467, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012792033758422032, |
| "loss": 1.5773, |
| "step": 9630 |
| }, |
| { |
| "epoch": 1.337611878165545, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012777186863169613, |
| "loss": 1.5198, |
| "step": 9640 |
| }, |
| { |
| "epoch": 1.3389995143273434, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012762333331569576, |
| "loss": 1.5376, |
| "step": 9650 |
| }, |
| { |
| "epoch": 1.3403871504891418, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012747473199115823, |
| "loss": 1.587, |
| "step": 9660 |
| }, |
| { |
| "epoch": 1.3417747866509402, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012732606501318023, |
| "loss": 1.5984, |
| "step": 9670 |
| }, |
| { |
| "epoch": 1.3431624228127386, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012717733273701548, |
| "loss": 1.6267, |
| "step": 9680 |
| }, |
| { |
| "epoch": 1.344550058974537, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012702853551807357, |
| "loss": 1.6223, |
| "step": 9690 |
| }, |
| { |
| "epoch": 1.3459376951363353, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012687967371191947, |
| "loss": 1.5781, |
| "step": 9700 |
| }, |
| { |
| "epoch": 1.3459376951363353, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 873.0946, |
| "eval_samples_per_second": 14.674, |
| "eval_steps_per_second": 1.835, |
| "step": 9700 |
| }, |
| { |
| "epoch": 1.3473253312981337, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001267307476742723, |
| "loss": 1.6014, |
| "step": 9710 |
| }, |
| { |
| "epoch": 1.348712967459932, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001265817577610048, |
| "loss": 1.5637, |
| "step": 9720 |
| }, |
| { |
| "epoch": 1.3501006036217305, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012643270432814225, |
| "loss": 1.5874, |
| "step": 9730 |
| }, |
| { |
| "epoch": 1.3514882397835288, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001262835877318618, |
| "loss": 1.5469, |
| "step": 9740 |
| }, |
| { |
| "epoch": 1.3528758759453272, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012613440832849146, |
| "loss": 1.5123, |
| "step": 9750 |
| }, |
| { |
| "epoch": 1.3542635121071256, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012598516647450942, |
| "loss": 1.5328, |
| "step": 9760 |
| }, |
| { |
| "epoch": 1.355651148268924, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012583586252654298, |
| "loss": 1.5016, |
| "step": 9770 |
| }, |
| { |
| "epoch": 1.3570387844307223, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001256864968413679, |
| "loss": 1.5429, |
| "step": 9780 |
| }, |
| { |
| "epoch": 1.3584264205925207, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012553706977590744, |
| "loss": 1.5475, |
| "step": 9790 |
| }, |
| { |
| "epoch": 1.359814056754319, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012538758168723156, |
| "loss": 1.5815, |
| "step": 9800 |
| }, |
| { |
| "epoch": 1.359814056754319, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 873.1306, |
| "eval_samples_per_second": 14.674, |
| "eval_steps_per_second": 1.835, |
| "step": 9800 |
| }, |
| { |
| "epoch": 1.3612016929161173, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012523803293255602, |
| "loss": 1.5421, |
| "step": 9810 |
| }, |
| { |
| "epoch": 1.3625893290779159, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012508842386924152, |
| "loss": 1.4994, |
| "step": 9820 |
| }, |
| { |
| "epoch": 1.363976965239714, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012493875485479296, |
| "loss": 1.5126, |
| "step": 9830 |
| }, |
| { |
| "epoch": 1.3653646014015126, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012478902624685838, |
| "loss": 1.4954, |
| "step": 9840 |
| }, |
| { |
| "epoch": 1.3667522375633108, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012463923840322832, |
| "loss": 1.5676, |
| "step": 9850 |
| }, |
| { |
| "epoch": 1.3681398737251094, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001244893916818348, |
| "loss": 1.5358, |
| "step": 9860 |
| }, |
| { |
| "epoch": 1.3695275098869075, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001243394864407506, |
| "loss": 1.5445, |
| "step": 9870 |
| }, |
| { |
| "epoch": 1.3709151460487061, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012418952303818834, |
| "loss": 1.4529, |
| "step": 9880 |
| }, |
| { |
| "epoch": 1.3723027822105043, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012403950183249952, |
| "loss": 1.555, |
| "step": 9890 |
| }, |
| { |
| "epoch": 1.3736904183723029, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012388942318217384, |
| "loss": 1.5182, |
| "step": 9900 |
| }, |
| { |
| "epoch": 1.3736904183723029, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 880.2398, |
| "eval_samples_per_second": 14.555, |
| "eval_steps_per_second": 1.82, |
| "step": 9900 |
| }, |
| { |
| "epoch": 1.375078054534101, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001237392874458383, |
| "loss": 1.4459, |
| "step": 9910 |
| }, |
| { |
| "epoch": 1.3764656906958996, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001235890949822563, |
| "loss": 1.5648, |
| "step": 9920 |
| }, |
| { |
| "epoch": 1.3778533268576978, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001234388461503267, |
| "loss": 1.6218, |
| "step": 9930 |
| }, |
| { |
| "epoch": 1.3792409630194964, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012328854130908319, |
| "loss": 1.6005, |
| "step": 9940 |
| }, |
| { |
| "epoch": 1.3806285991812945, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001231381808176932, |
| "loss": 1.5869, |
| "step": 9950 |
| }, |
| { |
| "epoch": 1.3820162353430931, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001229877650354572, |
| "loss": 1.6159, |
| "step": 9960 |
| }, |
| { |
| "epoch": 1.3834038715048913, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012283729432180775, |
| "loss": 1.5293, |
| "step": 9970 |
| }, |
| { |
| "epoch": 1.38479150766669, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001226867690363087, |
| "loss": 1.5321, |
| "step": 9980 |
| }, |
| { |
| "epoch": 1.386179143828488, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012253618953865427, |
| "loss": 1.5327, |
| "step": 9990 |
| }, |
| { |
| "epoch": 1.3875667799902867, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012238555618866826, |
| "loss": 1.6345, |
| "step": 10000 |
| }, |
| { |
| "epoch": 1.3875667799902867, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 875.5953, |
| "eval_samples_per_second": 14.632, |
| "eval_steps_per_second": 1.83, |
| "step": 10000 |
| }, |
| { |
| "epoch": 1.3889544161520848, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012223486934630315, |
| "loss": 1.5467, |
| "step": 10010 |
| }, |
| { |
| "epoch": 1.3903420523138834, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012208412937163922, |
| "loss": 1.5573, |
| "step": 10020 |
| }, |
| { |
| "epoch": 1.3917296884756816, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012193333662488376, |
| "loss": 1.6015, |
| "step": 10030 |
| }, |
| { |
| "epoch": 1.3931173246374802, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012178249146637009, |
| "loss": 1.5711, |
| "step": 10040 |
| }, |
| { |
| "epoch": 1.3945049607992783, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012163159425655682, |
| "loss": 1.5398, |
| "step": 10050 |
| }, |
| { |
| "epoch": 1.395892596961077, |
| "grad_norm": 0.0, |
| "learning_rate": 0.000121480645356027, |
| "loss": 1.518, |
| "step": 10060 |
| }, |
| { |
| "epoch": 1.397280233122875, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012132964512548705, |
| "loss": 1.5997, |
| "step": 10070 |
| }, |
| { |
| "epoch": 1.3986678692846737, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012117859392576623, |
| "loss": 1.5428, |
| "step": 10080 |
| }, |
| { |
| "epoch": 1.4000555054464718, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012102749211781539, |
| "loss": 1.5063, |
| "step": 10090 |
| }, |
| { |
| "epoch": 1.4014431416082704, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012087634006270654, |
| "loss": 1.5924, |
| "step": 10100 |
| }, |
| { |
| "epoch": 1.4014431416082704, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 866.8356, |
| "eval_samples_per_second": 14.78, |
| "eval_steps_per_second": 1.848, |
| "step": 10100 |
| }, |
| { |
| "epoch": 1.4028307777700686, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012072513812163157, |
| "loss": 1.5603, |
| "step": 10110 |
| }, |
| { |
| "epoch": 1.4042184139318672, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012057388665590171, |
| "loss": 1.5096, |
| "step": 10120 |
| }, |
| { |
| "epoch": 1.4056060500936653, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012042258602694643, |
| "loss": 1.5635, |
| "step": 10130 |
| }, |
| { |
| "epoch": 1.406993686255464, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012027123659631272, |
| "loss": 1.508, |
| "step": 10140 |
| }, |
| { |
| "epoch": 1.408381322417262, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00012011983872566422, |
| "loss": 1.5646, |
| "step": 10150 |
| }, |
| { |
| "epoch": 1.4097689585790607, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001199683927767803, |
| "loss": 1.5866, |
| "step": 10160 |
| }, |
| { |
| "epoch": 1.4111565947408589, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011981689911155518, |
| "loss": 1.5451, |
| "step": 10170 |
| }, |
| { |
| "epoch": 1.4125442309026575, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011966535809199715, |
| "loss": 1.5747, |
| "step": 10180 |
| }, |
| { |
| "epoch": 1.4139318670644556, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011951377008022766, |
| "loss": 1.5125, |
| "step": 10190 |
| }, |
| { |
| "epoch": 1.415319503226254, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011936213543848043, |
| "loss": 1.4432, |
| "step": 10200 |
| }, |
| { |
| "epoch": 1.415319503226254, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 877.7234, |
| "eval_samples_per_second": 14.597, |
| "eval_steps_per_second": 1.825, |
| "step": 10200 |
| }, |
| { |
| "epoch": 1.4167071393880524, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011921045452910057, |
| "loss": 1.5242, |
| "step": 10210 |
| }, |
| { |
| "epoch": 1.4180947755498508, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011905872771454387, |
| "loss": 1.5433, |
| "step": 10220 |
| }, |
| { |
| "epoch": 1.4194824117116491, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011890695535737564, |
| "loss": 1.5994, |
| "step": 10230 |
| }, |
| { |
| "epoch": 1.4208700478734475, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011875513782027021, |
| "loss": 1.5355, |
| "step": 10240 |
| }, |
| { |
| "epoch": 1.4222576840352459, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011860327546600969, |
| "loss": 1.5373, |
| "step": 10250 |
| }, |
| { |
| "epoch": 1.4236453201970443, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011845136865748345, |
| "loss": 1.5215, |
| "step": 10260 |
| }, |
| { |
| "epoch": 1.4250329563588426, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011829941775768694, |
| "loss": 1.6019, |
| "step": 10270 |
| }, |
| { |
| "epoch": 1.426420592520641, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011814742312972109, |
| "loss": 1.5299, |
| "step": 10280 |
| }, |
| { |
| "epoch": 1.4278082286824394, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011799538513679127, |
| "loss": 1.5122, |
| "step": 10290 |
| }, |
| { |
| "epoch": 1.4291958648442378, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011784330414220643, |
| "loss": 1.5991, |
| "step": 10300 |
| }, |
| { |
| "epoch": 1.4291958648442378, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 872.4699, |
| "eval_samples_per_second": 14.685, |
| "eval_steps_per_second": 1.836, |
| "step": 10300 |
| }, |
| { |
| "epoch": 1.4305835010060362, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011769118050937837, |
| "loss": 1.6097, |
| "step": 10310 |
| }, |
| { |
| "epoch": 1.4319711371678345, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001175390146018207, |
| "loss": 1.5588, |
| "step": 10320 |
| }, |
| { |
| "epoch": 1.433358773329633, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011738680678314813, |
| "loss": 1.5589, |
| "step": 10330 |
| }, |
| { |
| "epoch": 1.4347464094914313, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011723455741707541, |
| "loss": 1.5128, |
| "step": 10340 |
| }, |
| { |
| "epoch": 1.4361340456532297, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011708226686741666, |
| "loss": 1.5162, |
| "step": 10350 |
| }, |
| { |
| "epoch": 1.437521681815028, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001169299354980844, |
| "loss": 1.5494, |
| "step": 10360 |
| }, |
| { |
| "epoch": 1.4389093179768264, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011677756367308866, |
| "loss": 1.5198, |
| "step": 10370 |
| }, |
| { |
| "epoch": 1.4402969541386248, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011662515175653615, |
| "loss": 1.5008, |
| "step": 10380 |
| }, |
| { |
| "epoch": 1.4416845903004232, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011647270011262939, |
| "loss": 1.4987, |
| "step": 10390 |
| }, |
| { |
| "epoch": 1.4430722264622216, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011632020910566586, |
| "loss": 1.5658, |
| "step": 10400 |
| }, |
| { |
| "epoch": 1.4430722264622216, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 872.8447, |
| "eval_samples_per_second": 14.678, |
| "eval_steps_per_second": 1.835, |
| "step": 10400 |
| }, |
| { |
| "epoch": 1.44445986262402, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011616767910003706, |
| "loss": 1.5572, |
| "step": 10410 |
| }, |
| { |
| "epoch": 1.4458474987858183, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011601511046022768, |
| "loss": 1.582, |
| "step": 10420 |
| }, |
| { |
| "epoch": 1.4472351349476167, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011586250355081481, |
| "loss": 1.535, |
| "step": 10430 |
| }, |
| { |
| "epoch": 1.448622771109415, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011570985873646685, |
| "loss": 1.5764, |
| "step": 10440 |
| }, |
| { |
| "epoch": 1.4500104072712134, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011555717638194288, |
| "loss": 1.5277, |
| "step": 10450 |
| }, |
| { |
| "epoch": 1.4513980434330118, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011540445685209167, |
| "loss": 1.5242, |
| "step": 10460 |
| }, |
| { |
| "epoch": 1.4527856795948102, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011525170051185081, |
| "loss": 1.5387, |
| "step": 10470 |
| }, |
| { |
| "epoch": 1.4541733157566086, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011509890772624586, |
| "loss": 1.5014, |
| "step": 10480 |
| }, |
| { |
| "epoch": 1.455560951918407, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011494607886038946, |
| "loss": 1.5327, |
| "step": 10490 |
| }, |
| { |
| "epoch": 1.4569485880802053, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011479321427948044, |
| "loss": 1.574, |
| "step": 10500 |
| }, |
| { |
| "epoch": 1.4569485880802053, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 877.3378, |
| "eval_samples_per_second": 14.603, |
| "eval_steps_per_second": 1.826, |
| "step": 10500 |
| }, |
| { |
| "epoch": 1.4583362242420037, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011464031434880305, |
| "loss": 1.4931, |
| "step": 10510 |
| }, |
| { |
| "epoch": 1.459723860403802, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011448737943372594, |
| "loss": 1.6437, |
| "step": 10520 |
| }, |
| { |
| "epoch": 1.4611114965656005, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011433440989970141, |
| "loss": 1.5004, |
| "step": 10530 |
| }, |
| { |
| "epoch": 1.4624991327273988, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011418140611226445, |
| "loss": 1.5678, |
| "step": 10540 |
| }, |
| { |
| "epoch": 1.4638867688891972, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011402836843703189, |
| "loss": 1.5217, |
| "step": 10550 |
| }, |
| { |
| "epoch": 1.4652744050509956, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001138752972397016, |
| "loss": 1.5304, |
| "step": 10560 |
| }, |
| { |
| "epoch": 1.466662041212794, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011372219288605148, |
| "loss": 1.567, |
| "step": 10570 |
| }, |
| { |
| "epoch": 1.4680496773745924, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011356905574193872, |
| "loss": 1.5239, |
| "step": 10580 |
| }, |
| { |
| "epoch": 1.4694373135363907, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011341588617329882, |
| "loss": 1.5732, |
| "step": 10590 |
| }, |
| { |
| "epoch": 1.470824949698189, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011326268454614481, |
| "loss": 1.4986, |
| "step": 10600 |
| }, |
| { |
| "epoch": 1.470824949698189, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 869.8756, |
| "eval_samples_per_second": 14.729, |
| "eval_steps_per_second": 1.842, |
| "step": 10600 |
| }, |
| { |
| "epoch": 1.4722125858599875, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011310945122656626, |
| "loss": 1.6011, |
| "step": 10610 |
| }, |
| { |
| "epoch": 1.4736002220217859, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011295618658072858, |
| "loss": 1.4677, |
| "step": 10620 |
| }, |
| { |
| "epoch": 1.4749878581835842, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011280289097487189, |
| "loss": 1.5537, |
| "step": 10630 |
| }, |
| { |
| "epoch": 1.4763754943453826, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001126495647753104, |
| "loss": 1.5312, |
| "step": 10640 |
| }, |
| { |
| "epoch": 1.477763130507181, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011249620834843145, |
| "loss": 1.5691, |
| "step": 10650 |
| }, |
| { |
| "epoch": 1.4791507666689794, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011234282206069444, |
| "loss": 1.5955, |
| "step": 10660 |
| }, |
| { |
| "epoch": 1.4805384028307778, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011218940627863037, |
| "loss": 1.6063, |
| "step": 10670 |
| }, |
| { |
| "epoch": 1.4819260389925761, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011203596136884049, |
| "loss": 1.5566, |
| "step": 10680 |
| }, |
| { |
| "epoch": 1.4833136751543745, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011188248769799584, |
| "loss": 1.563, |
| "step": 10690 |
| }, |
| { |
| "epoch": 1.484701311316173, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011172898563283601, |
| "loss": 1.5032, |
| "step": 10700 |
| }, |
| { |
| "epoch": 1.484701311316173, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 875.308, |
| "eval_samples_per_second": 14.637, |
| "eval_steps_per_second": 1.83, |
| "step": 10700 |
| }, |
| { |
| "epoch": 1.4860889474779713, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011157545554016864, |
| "loss": 1.5129, |
| "step": 10710 |
| }, |
| { |
| "epoch": 1.4874765836397696, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011142189778686814, |
| "loss": 1.5841, |
| "step": 10720 |
| }, |
| { |
| "epoch": 1.488864219801568, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001112683127398752, |
| "loss": 1.5418, |
| "step": 10730 |
| }, |
| { |
| "epoch": 1.4902518559633664, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011111470076619557, |
| "loss": 1.5277, |
| "step": 10740 |
| }, |
| { |
| "epoch": 1.4916394921251648, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011096106223289951, |
| "loss": 1.666, |
| "step": 10750 |
| }, |
| { |
| "epoch": 1.4930271282869632, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011080739750712057, |
| "loss": 1.594, |
| "step": 10760 |
| }, |
| { |
| "epoch": 1.4944147644487615, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011065370695605502, |
| "loss": 1.6252, |
| "step": 10770 |
| }, |
| { |
| "epoch": 1.49580240061056, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001104999909469608, |
| "loss": 1.568, |
| "step": 10780 |
| }, |
| { |
| "epoch": 1.4971900367723583, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011034624984715667, |
| "loss": 1.4767, |
| "step": 10790 |
| }, |
| { |
| "epoch": 1.4985776729341567, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011019248402402136, |
| "loss": 1.4872, |
| "step": 10800 |
| }, |
| { |
| "epoch": 1.4985776729341567, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 874.6368, |
| "eval_samples_per_second": 14.648, |
| "eval_steps_per_second": 1.832, |
| "step": 10800 |
| }, |
| { |
| "epoch": 1.499965309095955, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00011003869384499268, |
| "loss": 1.4777, |
| "step": 10810 |
| }, |
| { |
| "epoch": 1.5013529452577534, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010988487967756666, |
| "loss": 1.5252, |
| "step": 10820 |
| }, |
| { |
| "epoch": 1.5027405814195518, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001097310418892966, |
| "loss": 1.5309, |
| "step": 10830 |
| }, |
| { |
| "epoch": 1.5041282175813502, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010957718084779229, |
| "loss": 1.5586, |
| "step": 10840 |
| }, |
| { |
| "epoch": 1.5055158537431486, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010942329692071909, |
| "loss": 1.6205, |
| "step": 10850 |
| }, |
| { |
| "epoch": 1.506903489904947, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010926939047579699, |
| "loss": 1.5137, |
| "step": 10860 |
| }, |
| { |
| "epoch": 1.5082911260667453, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010911546188079986, |
| "loss": 1.5804, |
| "step": 10870 |
| }, |
| { |
| "epoch": 1.5096787622285437, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010896151150355445, |
| "loss": 1.4934, |
| "step": 10880 |
| }, |
| { |
| "epoch": 1.511066398390342, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010880753971193957, |
| "loss": 1.4905, |
| "step": 10890 |
| }, |
| { |
| "epoch": 1.5124540345521404, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010865354687388522, |
| "loss": 1.5298, |
| "step": 10900 |
| }, |
| { |
| "epoch": 1.5124540345521404, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 880.7605, |
| "eval_samples_per_second": 14.547, |
| "eval_steps_per_second": 1.819, |
| "step": 10900 |
| }, |
| { |
| "epoch": 1.5138416707139388, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010849953335737173, |
| "loss": 1.5989, |
| "step": 10910 |
| }, |
| { |
| "epoch": 1.5152293068757372, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010834549953042869, |
| "loss": 1.5482, |
| "step": 10920 |
| }, |
| { |
| "epoch": 1.5166169430375356, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010819144576113442, |
| "loss": 1.5114, |
| "step": 10930 |
| }, |
| { |
| "epoch": 1.518004579199334, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010803737241761474, |
| "loss": 1.4501, |
| "step": 10940 |
| }, |
| { |
| "epoch": 1.5193922153611323, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010788327986804237, |
| "loss": 1.4869, |
| "step": 10950 |
| }, |
| { |
| "epoch": 1.5207798515229307, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010772916848063581, |
| "loss": 1.4995, |
| "step": 10960 |
| }, |
| { |
| "epoch": 1.522167487684729, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010757503862365865, |
| "loss": 1.6415, |
| "step": 10970 |
| }, |
| { |
| "epoch": 1.5235551238465275, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010742089066541859, |
| "loss": 1.513, |
| "step": 10980 |
| }, |
| { |
| "epoch": 1.5249427600083258, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010726672497426656, |
| "loss": 1.6119, |
| "step": 10990 |
| }, |
| { |
| "epoch": 1.5263303961701242, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010711254191859595, |
| "loss": 1.5535, |
| "step": 11000 |
| }, |
| { |
| "epoch": 1.5263303961701242, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 866.7615, |
| "eval_samples_per_second": 14.781, |
| "eval_steps_per_second": 1.848, |
| "step": 11000 |
| }, |
| { |
| "epoch": 1.5277180323319226, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001069583418668415, |
| "loss": 1.5341, |
| "step": 11010 |
| }, |
| { |
| "epoch": 1.529105668493721, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010680412518747873, |
| "loss": 1.6164, |
| "step": 11020 |
| }, |
| { |
| "epoch": 1.5304933046555194, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010664989224902276, |
| "loss": 1.6643, |
| "step": 11030 |
| }, |
| { |
| "epoch": 1.5318809408173177, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010649564342002763, |
| "loss": 1.515, |
| "step": 11040 |
| }, |
| { |
| "epoch": 1.5332685769791161, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010634137906908534, |
| "loss": 1.476, |
| "step": 11050 |
| }, |
| { |
| "epoch": 1.5346562131409145, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010618709956482498, |
| "loss": 1.5775, |
| "step": 11060 |
| }, |
| { |
| "epoch": 1.5360438493027129, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010603280527591182, |
| "loss": 1.4662, |
| "step": 11070 |
| }, |
| { |
| "epoch": 1.5374314854645112, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010587849657104653, |
| "loss": 1.5799, |
| "step": 11080 |
| }, |
| { |
| "epoch": 1.5388191216263096, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010572417381896418, |
| "loss": 1.5966, |
| "step": 11090 |
| }, |
| { |
| "epoch": 1.540206757788108, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010556983738843335, |
| "loss": 1.5935, |
| "step": 11100 |
| }, |
| { |
| "epoch": 1.540206757788108, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 875.7742, |
| "eval_samples_per_second": 14.629, |
| "eval_steps_per_second": 1.829, |
| "step": 11100 |
| }, |
| { |
| "epoch": 1.5415943939499064, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010541548764825544, |
| "loss": 1.5677, |
| "step": 11110 |
| }, |
| { |
| "epoch": 1.5429820301117048, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010526112496726354, |
| "loss": 1.5742, |
| "step": 11120 |
| }, |
| { |
| "epoch": 1.5443696662735031, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010510674971432174, |
| "loss": 1.5705, |
| "step": 11130 |
| }, |
| { |
| "epoch": 1.5457573024353015, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001049523622583241, |
| "loss": 1.5183, |
| "step": 11140 |
| }, |
| { |
| "epoch": 1.5471449385971, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010479796296819393, |
| "loss": 1.48, |
| "step": 11150 |
| }, |
| { |
| "epoch": 1.548532574758898, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010464355221288271, |
| "loss": 1.5553, |
| "step": 11160 |
| }, |
| { |
| "epoch": 1.5499202109206967, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001044891303613694, |
| "loss": 1.5698, |
| "step": 11170 |
| }, |
| { |
| "epoch": 1.5513078470824948, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010433469778265945, |
| "loss": 1.5182, |
| "step": 11180 |
| }, |
| { |
| "epoch": 1.5526954832442934, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010418025484578396, |
| "loss": 1.5826, |
| "step": 11190 |
| }, |
| { |
| "epoch": 1.5540831194060916, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010402580191979873, |
| "loss": 1.5437, |
| "step": 11200 |
| }, |
| { |
| "epoch": 1.5540831194060916, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 877.2662, |
| "eval_samples_per_second": 14.604, |
| "eval_steps_per_second": 1.826, |
| "step": 11200 |
| }, |
| { |
| "epoch": 1.5554707555678902, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010387133937378348, |
| "loss": 1.5353, |
| "step": 11210 |
| }, |
| { |
| "epoch": 1.5568583917296883, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010371686757684092, |
| "loss": 1.5471, |
| "step": 11220 |
| }, |
| { |
| "epoch": 1.558246027891487, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010356238689809579, |
| "loss": 1.6254, |
| "step": 11230 |
| }, |
| { |
| "epoch": 1.559633664053285, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010340789770669421, |
| "loss": 1.5478, |
| "step": 11240 |
| }, |
| { |
| "epoch": 1.5610213002150837, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010325340037180244, |
| "loss": 1.5716, |
| "step": 11250 |
| }, |
| { |
| "epoch": 1.5624089363768818, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010309889526260639, |
| "loss": 1.59, |
| "step": 11260 |
| }, |
| { |
| "epoch": 1.5637965725386804, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010294438274831042, |
| "loss": 1.53, |
| "step": 11270 |
| }, |
| { |
| "epoch": 1.5651842087004786, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010278986319813663, |
| "loss": 1.4877, |
| "step": 11280 |
| }, |
| { |
| "epoch": 1.5665718448622772, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010263533698132393, |
| "loss": 1.5286, |
| "step": 11290 |
| }, |
| { |
| "epoch": 1.5679594810240753, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010248080446712716, |
| "loss": 1.4785, |
| "step": 11300 |
| }, |
| { |
| "epoch": 1.5679594810240753, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 869.2963, |
| "eval_samples_per_second": 14.738, |
| "eval_steps_per_second": 1.843, |
| "step": 11300 |
| }, |
| { |
| "epoch": 1.569347117185874, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010232626602481623, |
| "loss": 1.5692, |
| "step": 11310 |
| }, |
| { |
| "epoch": 1.570734753347672, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010217172202367515, |
| "loss": 1.4877, |
| "step": 11320 |
| }, |
| { |
| "epoch": 1.5721223895094707, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001020171728330013, |
| "loss": 1.5381, |
| "step": 11330 |
| }, |
| { |
| "epoch": 1.5735100256712689, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010186261882210437, |
| "loss": 1.4638, |
| "step": 11340 |
| }, |
| { |
| "epoch": 1.5748976618330675, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010170806036030568, |
| "loss": 1.5052, |
| "step": 11350 |
| }, |
| { |
| "epoch": 1.5762852979948656, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010155349781693708, |
| "loss": 1.5806, |
| "step": 11360 |
| }, |
| { |
| "epoch": 1.5776729341566642, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010139893156134024, |
| "loss": 1.5457, |
| "step": 11370 |
| }, |
| { |
| "epoch": 1.5790605703184624, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010124436196286561, |
| "loss": 1.5623, |
| "step": 11380 |
| }, |
| { |
| "epoch": 1.580448206480261, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010108978939087181, |
| "loss": 1.6168, |
| "step": 11390 |
| }, |
| { |
| "epoch": 1.5818358426420591, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010093521421472436, |
| "loss": 1.5384, |
| "step": 11400 |
| }, |
| { |
| "epoch": 1.5818358426420591, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 870.7754, |
| "eval_samples_per_second": 14.713, |
| "eval_steps_per_second": 1.84, |
| "step": 11400 |
| }, |
| { |
| "epoch": 1.5832234788038577, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010078063680379513, |
| "loss": 1.6739, |
| "step": 11410 |
| }, |
| { |
| "epoch": 1.5846111149656559, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010062605752746128, |
| "loss": 1.4739, |
| "step": 11420 |
| }, |
| { |
| "epoch": 1.5859987511274545, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010047147675510442, |
| "loss": 1.5422, |
| "step": 11430 |
| }, |
| { |
| "epoch": 1.5873863872892526, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010031689485610982, |
| "loss": 1.5205, |
| "step": 11440 |
| }, |
| { |
| "epoch": 1.5887740234510512, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010016231219986529, |
| "loss": 1.5092, |
| "step": 11450 |
| }, |
| { |
| "epoch": 1.5901616596128494, |
| "grad_norm": 0.0, |
| "learning_rate": 0.00010000772915576064, |
| "loss": 1.5224, |
| "step": 11460 |
| }, |
| { |
| "epoch": 1.591549295774648, |
| "grad_norm": 0.0, |
| "learning_rate": 9.985314609318644e-05, |
| "loss": 1.6409, |
| "step": 11470 |
| }, |
| { |
| "epoch": 1.5929369319364461, |
| "grad_norm": 0.0, |
| "learning_rate": 9.969856338153334e-05, |
| "loss": 1.5212, |
| "step": 11480 |
| }, |
| { |
| "epoch": 1.5943245680982447, |
| "grad_norm": 0.0, |
| "learning_rate": 9.954398139019123e-05, |
| "loss": 1.5068, |
| "step": 11490 |
| }, |
| { |
| "epoch": 1.595712204260043, |
| "grad_norm": 0.0, |
| "learning_rate": 9.938940048854822e-05, |
| "loss": 1.5086, |
| "step": 11500 |
| }, |
| { |
| "epoch": 1.595712204260043, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 863.1857, |
| "eval_samples_per_second": 14.843, |
| "eval_steps_per_second": 1.856, |
| "step": 11500 |
| }, |
| { |
| "epoch": 1.5970998404218415, |
| "grad_norm": 0.0, |
| "learning_rate": 9.923482104598986e-05, |
| "loss": 1.4987, |
| "step": 11510 |
| }, |
| { |
| "epoch": 1.5984874765836397, |
| "grad_norm": 0.0, |
| "learning_rate": 9.908024343189809e-05, |
| "loss": 1.5908, |
| "step": 11520 |
| }, |
| { |
| "epoch": 1.5998751127454383, |
| "grad_norm": 0.0, |
| "learning_rate": 9.892566801565061e-05, |
| "loss": 1.5734, |
| "step": 11530 |
| }, |
| { |
| "epoch": 1.6012627489072364, |
| "grad_norm": 0.0, |
| "learning_rate": 9.877109516661991e-05, |
| "loss": 1.5493, |
| "step": 11540 |
| }, |
| { |
| "epoch": 1.602650385069035, |
| "grad_norm": 0.0, |
| "learning_rate": 9.861652525417213e-05, |
| "loss": 1.5222, |
| "step": 11550 |
| }, |
| { |
| "epoch": 1.6040380212308332, |
| "grad_norm": 0.0, |
| "learning_rate": 9.846195864766656e-05, |
| "loss": 1.4728, |
| "step": 11560 |
| }, |
| { |
| "epoch": 1.6054256573926318, |
| "grad_norm": 0.0, |
| "learning_rate": 9.830739571645459e-05, |
| "loss": 1.5139, |
| "step": 11570 |
| }, |
| { |
| "epoch": 1.60681329355443, |
| "grad_norm": 0.0, |
| "learning_rate": 9.815283682987883e-05, |
| "loss": 1.5696, |
| "step": 11580 |
| }, |
| { |
| "epoch": 1.6082009297162285, |
| "grad_norm": 0.0, |
| "learning_rate": 9.799828235727209e-05, |
| "loss": 1.5483, |
| "step": 11590 |
| }, |
| { |
| "epoch": 1.6095885658780267, |
| "grad_norm": 0.0, |
| "learning_rate": 9.784373266795679e-05, |
| "loss": 1.5584, |
| "step": 11600 |
| }, |
| { |
| "epoch": 1.6095885658780267, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 866.2587, |
| "eval_samples_per_second": 14.79, |
| "eval_steps_per_second": 1.849, |
| "step": 11600 |
| }, |
| { |
| "epoch": 1.6109762020398253, |
| "grad_norm": 0.0, |
| "learning_rate": 9.768918813124384e-05, |
| "loss": 1.6297, |
| "step": 11610 |
| }, |
| { |
| "epoch": 1.6123638382016234, |
| "grad_norm": 0.0, |
| "learning_rate": 9.75346491164319e-05, |
| "loss": 1.5784, |
| "step": 11620 |
| }, |
| { |
| "epoch": 1.613751474363422, |
| "grad_norm": 0.0, |
| "learning_rate": 9.738011599280632e-05, |
| "loss": 1.553, |
| "step": 11630 |
| }, |
| { |
| "epoch": 1.6151391105252202, |
| "grad_norm": 0.0, |
| "learning_rate": 9.722558912963848e-05, |
| "loss": 1.5311, |
| "step": 11640 |
| }, |
| { |
| "epoch": 1.6165267466870188, |
| "grad_norm": 0.0, |
| "learning_rate": 9.707106889618481e-05, |
| "loss": 1.5268, |
| "step": 11650 |
| }, |
| { |
| "epoch": 1.617914382848817, |
| "grad_norm": 0.0, |
| "learning_rate": 9.691655566168576e-05, |
| "loss": 1.433, |
| "step": 11660 |
| }, |
| { |
| "epoch": 1.6193020190106155, |
| "grad_norm": 0.0, |
| "learning_rate": 9.676204979536521e-05, |
| "loss": 1.5892, |
| "step": 11670 |
| }, |
| { |
| "epoch": 1.6206896551724137, |
| "grad_norm": 0.0, |
| "learning_rate": 9.660755166642934e-05, |
| "loss": 1.4965, |
| "step": 11680 |
| }, |
| { |
| "epoch": 1.6220772913342123, |
| "grad_norm": 0.0, |
| "learning_rate": 9.645306164406594e-05, |
| "loss": 1.6108, |
| "step": 11690 |
| }, |
| { |
| "epoch": 1.6234649274960105, |
| "grad_norm": 0.0, |
| "learning_rate": 9.629858009744327e-05, |
| "loss": 1.5417, |
| "step": 11700 |
| }, |
| { |
| "epoch": 1.6234649274960105, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 868.366, |
| "eval_samples_per_second": 14.754, |
| "eval_steps_per_second": 1.845, |
| "step": 11700 |
| }, |
| { |
| "epoch": 1.624852563657809, |
| "grad_norm": 0.0, |
| "learning_rate": 9.614410739570947e-05, |
| "loss": 1.5061, |
| "step": 11710 |
| }, |
| { |
| "epoch": 1.6262401998196072, |
| "grad_norm": 0.0, |
| "learning_rate": 9.598964390799147e-05, |
| "loss": 1.5252, |
| "step": 11720 |
| }, |
| { |
| "epoch": 1.6276278359814058, |
| "grad_norm": 0.0, |
| "learning_rate": 9.583519000339429e-05, |
| "loss": 1.592, |
| "step": 11730 |
| }, |
| { |
| "epoch": 1.629015472143204, |
| "grad_norm": 0.0, |
| "learning_rate": 9.568074605099989e-05, |
| "loss": 1.6131, |
| "step": 11740 |
| }, |
| { |
| "epoch": 1.6304031083050026, |
| "grad_norm": 0.0, |
| "learning_rate": 9.552631241986657e-05, |
| "loss": 1.6038, |
| "step": 11750 |
| }, |
| { |
| "epoch": 1.6317907444668007, |
| "grad_norm": 0.0, |
| "learning_rate": 9.537188947902795e-05, |
| "loss": 1.5402, |
| "step": 11760 |
| }, |
| { |
| "epoch": 1.6331783806285993, |
| "grad_norm": 0.0, |
| "learning_rate": 9.521747759749202e-05, |
| "loss": 1.5562, |
| "step": 11770 |
| }, |
| { |
| "epoch": 1.6345660167903975, |
| "grad_norm": 0.0, |
| "learning_rate": 9.506307714424044e-05, |
| "loss": 1.56, |
| "step": 11780 |
| }, |
| { |
| "epoch": 1.635953652952196, |
| "grad_norm": 0.0, |
| "learning_rate": 9.490868848822752e-05, |
| "loss": 1.5394, |
| "step": 11790 |
| }, |
| { |
| "epoch": 1.6373412891139942, |
| "grad_norm": 0.0, |
| "learning_rate": 9.475431199837944e-05, |
| "loss": 1.5144, |
| "step": 11800 |
| }, |
| { |
| "epoch": 1.6373412891139942, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 878.0467, |
| "eval_samples_per_second": 14.591, |
| "eval_steps_per_second": 1.825, |
| "step": 11800 |
| }, |
| { |
| "epoch": 1.6387289252757928, |
| "grad_norm": 0.0, |
| "learning_rate": 9.459994804359317e-05, |
| "loss": 1.5351, |
| "step": 11810 |
| }, |
| { |
| "epoch": 1.640116561437591, |
| "grad_norm": 0.0, |
| "learning_rate": 9.444559699273583e-05, |
| "loss": 1.4772, |
| "step": 11820 |
| }, |
| { |
| "epoch": 1.6415041975993896, |
| "grad_norm": 0.0, |
| "learning_rate": 9.429125921464371e-05, |
| "loss": 1.555, |
| "step": 11830 |
| }, |
| { |
| "epoch": 1.6428918337611877, |
| "grad_norm": 0.0, |
| "learning_rate": 9.413693507812139e-05, |
| "loss": 1.5026, |
| "step": 11840 |
| }, |
| { |
| "epoch": 1.6442794699229863, |
| "grad_norm": 0.0, |
| "learning_rate": 9.398262495194074e-05, |
| "loss": 1.5238, |
| "step": 11850 |
| }, |
| { |
| "epoch": 1.6456671060847845, |
| "grad_norm": 0.0, |
| "learning_rate": 9.382832920484026e-05, |
| "loss": 1.5083, |
| "step": 11860 |
| }, |
| { |
| "epoch": 1.647054742246583, |
| "grad_norm": 0.0, |
| "learning_rate": 9.367404820552412e-05, |
| "loss": 1.5808, |
| "step": 11870 |
| }, |
| { |
| "epoch": 1.6484423784083813, |
| "grad_norm": 0.0, |
| "learning_rate": 9.35197823226611e-05, |
| "loss": 1.49, |
| "step": 11880 |
| }, |
| { |
| "epoch": 1.6498300145701799, |
| "grad_norm": 0.0, |
| "learning_rate": 9.336553192488398e-05, |
| "loss": 1.5472, |
| "step": 11890 |
| }, |
| { |
| "epoch": 1.651217650731978, |
| "grad_norm": 0.0, |
| "learning_rate": 9.321129738078853e-05, |
| "loss": 1.5131, |
| "step": 11900 |
| }, |
| { |
| "epoch": 1.651217650731978, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 867.5789, |
| "eval_samples_per_second": 14.768, |
| "eval_steps_per_second": 1.847, |
| "step": 11900 |
| }, |
| { |
| "epoch": 1.6526052868937766, |
| "grad_norm": 0.0, |
| "learning_rate": 9.305707905893263e-05, |
| "loss": 1.5538, |
| "step": 11910 |
| }, |
| { |
| "epoch": 1.6539929230555748, |
| "grad_norm": 0.0, |
| "learning_rate": 9.29028773278353e-05, |
| "loss": 1.4998, |
| "step": 11920 |
| }, |
| { |
| "epoch": 1.6553805592173734, |
| "grad_norm": 0.0, |
| "learning_rate": 9.274869255597603e-05, |
| "loss": 1.5737, |
| "step": 11930 |
| }, |
| { |
| "epoch": 1.6567681953791715, |
| "grad_norm": 0.0, |
| "learning_rate": 9.259452511179374e-05, |
| "loss": 1.5355, |
| "step": 11940 |
| }, |
| { |
| "epoch": 1.65815583154097, |
| "grad_norm": 0.0, |
| "learning_rate": 9.244037536368602e-05, |
| "loss": 1.508, |
| "step": 11950 |
| }, |
| { |
| "epoch": 1.6595434677027683, |
| "grad_norm": 0.0, |
| "learning_rate": 9.228624368000798e-05, |
| "loss": 1.4927, |
| "step": 11960 |
| }, |
| { |
| "epoch": 1.6609311038645667, |
| "grad_norm": 0.0, |
| "learning_rate": 9.213213042907176e-05, |
| "loss": 1.5265, |
| "step": 11970 |
| }, |
| { |
| "epoch": 1.662318740026365, |
| "grad_norm": 0.0, |
| "learning_rate": 9.197803597914541e-05, |
| "loss": 1.6099, |
| "step": 11980 |
| }, |
| { |
| "epoch": 1.6637063761881634, |
| "grad_norm": 0.0, |
| "learning_rate": 9.182396069845192e-05, |
| "loss": 1.4851, |
| "step": 11990 |
| }, |
| { |
| "epoch": 1.6650940123499618, |
| "grad_norm": 0.0, |
| "learning_rate": 9.166990495516866e-05, |
| "loss": 1.5752, |
| "step": 12000 |
| }, |
| { |
| "epoch": 1.6650940123499618, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 875.5831, |
| "eval_samples_per_second": 14.633, |
| "eval_steps_per_second": 1.83, |
| "step": 12000 |
| }, |
| { |
| "epoch": 1.6664816485117602, |
| "grad_norm": 0.0, |
| "learning_rate": 9.151586911742617e-05, |
| "loss": 1.5579, |
| "step": 12010 |
| }, |
| { |
| "epoch": 1.6678692846735585, |
| "grad_norm": 0.0, |
| "learning_rate": 9.136185355330759e-05, |
| "loss": 1.5998, |
| "step": 12020 |
| }, |
| { |
| "epoch": 1.669256920835357, |
| "grad_norm": 0.0, |
| "learning_rate": 9.120785863084738e-05, |
| "loss": 1.6114, |
| "step": 12030 |
| }, |
| { |
| "epoch": 1.6706445569971553, |
| "grad_norm": 0.0, |
| "learning_rate": 9.105388471803087e-05, |
| "loss": 1.5483, |
| "step": 12040 |
| }, |
| { |
| "epoch": 1.6720321931589537, |
| "grad_norm": 0.0, |
| "learning_rate": 9.08999321827931e-05, |
| "loss": 1.4476, |
| "step": 12050 |
| }, |
| { |
| "epoch": 1.673419829320752, |
| "grad_norm": 0.0, |
| "learning_rate": 9.07460013930181e-05, |
| "loss": 1.5994, |
| "step": 12060 |
| }, |
| { |
| "epoch": 1.6748074654825504, |
| "grad_norm": 0.0, |
| "learning_rate": 9.059209271653779e-05, |
| "loss": 1.5157, |
| "step": 12070 |
| }, |
| { |
| "epoch": 1.6761951016443488, |
| "grad_norm": 0.0, |
| "learning_rate": 9.043820652113138e-05, |
| "loss": 1.5291, |
| "step": 12080 |
| }, |
| { |
| "epoch": 1.6775827378061472, |
| "grad_norm": 0.0, |
| "learning_rate": 9.028434317452437e-05, |
| "loss": 1.5008, |
| "step": 12090 |
| }, |
| { |
| "epoch": 1.6789703739679456, |
| "grad_norm": 0.0, |
| "learning_rate": 9.013050304438751e-05, |
| "loss": 1.4394, |
| "step": 12100 |
| }, |
| { |
| "epoch": 1.6789703739679456, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 864.7788, |
| "eval_samples_per_second": 14.815, |
| "eval_steps_per_second": 1.852, |
| "step": 12100 |
| }, |
| { |
| "epoch": 1.680358010129744, |
| "grad_norm": 0.0, |
| "learning_rate": 8.997668649833623e-05, |
| "loss": 1.5515, |
| "step": 12110 |
| }, |
| { |
| "epoch": 1.6817456462915423, |
| "grad_norm": 0.0, |
| "learning_rate": 8.982289390392954e-05, |
| "loss": 1.5852, |
| "step": 12120 |
| }, |
| { |
| "epoch": 1.6831332824533407, |
| "grad_norm": 0.0, |
| "learning_rate": 8.966912562866926e-05, |
| "loss": 1.5582, |
| "step": 12130 |
| }, |
| { |
| "epoch": 1.684520918615139, |
| "grad_norm": 0.0, |
| "learning_rate": 8.951538203999897e-05, |
| "loss": 1.5402, |
| "step": 12140 |
| }, |
| { |
| "epoch": 1.6859085547769375, |
| "grad_norm": 0.0, |
| "learning_rate": 8.936166350530341e-05, |
| "loss": 1.5596, |
| "step": 12150 |
| }, |
| { |
| "epoch": 1.6872961909387358, |
| "grad_norm": 0.0, |
| "learning_rate": 8.920797039190736e-05, |
| "loss": 1.5621, |
| "step": 12160 |
| }, |
| { |
| "epoch": 1.6886838271005342, |
| "grad_norm": 0.0, |
| "learning_rate": 8.905430306707495e-05, |
| "loss": 1.5881, |
| "step": 12170 |
| }, |
| { |
| "epoch": 1.6900714632623326, |
| "grad_norm": 0.0, |
| "learning_rate": 8.890066189800851e-05, |
| "loss": 1.5164, |
| "step": 12180 |
| }, |
| { |
| "epoch": 1.691459099424131, |
| "grad_norm": 0.0, |
| "learning_rate": 8.874704725184803e-05, |
| "loss": 1.5392, |
| "step": 12190 |
| }, |
| { |
| "epoch": 1.6928467355859294, |
| "grad_norm": 0.0, |
| "learning_rate": 8.859345949567012e-05, |
| "loss": 1.5654, |
| "step": 12200 |
| }, |
| { |
| "epoch": 1.6928467355859294, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 880.6667, |
| "eval_samples_per_second": 14.548, |
| "eval_steps_per_second": 1.819, |
| "step": 12200 |
| }, |
| { |
| "epoch": 1.6942343717477277, |
| "grad_norm": 0.0, |
| "learning_rate": 8.843989899648697e-05, |
| "loss": 1.5485, |
| "step": 12210 |
| }, |
| { |
| "epoch": 1.695622007909526, |
| "grad_norm": 0.0, |
| "learning_rate": 8.82863661212458e-05, |
| "loss": 1.6134, |
| "step": 12220 |
| }, |
| { |
| "epoch": 1.6970096440713245, |
| "grad_norm": 0.0, |
| "learning_rate": 8.813286123682777e-05, |
| "loss": 1.5393, |
| "step": 12230 |
| }, |
| { |
| "epoch": 1.6983972802331229, |
| "grad_norm": 0.0, |
| "learning_rate": 8.797938471004722e-05, |
| "loss": 1.544, |
| "step": 12240 |
| }, |
| { |
| "epoch": 1.6997849163949212, |
| "grad_norm": 0.0, |
| "learning_rate": 8.782593690765054e-05, |
| "loss": 1.5545, |
| "step": 12250 |
| }, |
| { |
| "epoch": 1.7011725525567196, |
| "grad_norm": 0.0, |
| "learning_rate": 8.767251819631562e-05, |
| "loss": 1.5748, |
| "step": 12260 |
| }, |
| { |
| "epoch": 1.702560188718518, |
| "grad_norm": 0.0, |
| "learning_rate": 8.751912894265088e-05, |
| "loss": 1.4681, |
| "step": 12270 |
| }, |
| { |
| "epoch": 1.7039478248803164, |
| "grad_norm": 0.0, |
| "learning_rate": 8.736576951319424e-05, |
| "loss": 1.537, |
| "step": 12280 |
| }, |
| { |
| "epoch": 1.7053354610421148, |
| "grad_norm": 0.0, |
| "learning_rate": 8.721244027441238e-05, |
| "loss": 1.4676, |
| "step": 12290 |
| }, |
| { |
| "epoch": 1.7067230972039131, |
| "grad_norm": 0.0, |
| "learning_rate": 8.705914159269985e-05, |
| "loss": 1.5405, |
| "step": 12300 |
| }, |
| { |
| "epoch": 1.7067230972039131, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 872.2109, |
| "eval_samples_per_second": 14.689, |
| "eval_steps_per_second": 1.837, |
| "step": 12300 |
| }, |
| { |
| "epoch": 1.7081107333657115, |
| "grad_norm": 0.0, |
| "learning_rate": 8.690587383437825e-05, |
| "loss": 1.5012, |
| "step": 12310 |
| }, |
| { |
| "epoch": 1.7094983695275099, |
| "grad_norm": 0.0, |
| "learning_rate": 8.67526373656951e-05, |
| "loss": 1.5688, |
| "step": 12320 |
| }, |
| { |
| "epoch": 1.7108860056893083, |
| "grad_norm": 0.0, |
| "learning_rate": 8.659943255282333e-05, |
| "loss": 1.4852, |
| "step": 12330 |
| }, |
| { |
| "epoch": 1.7122736418511066, |
| "grad_norm": 0.0, |
| "learning_rate": 8.644625976186018e-05, |
| "loss": 1.5695, |
| "step": 12340 |
| }, |
| { |
| "epoch": 1.713661278012905, |
| "grad_norm": 0.0, |
| "learning_rate": 8.629311935882634e-05, |
| "loss": 1.5267, |
| "step": 12350 |
| }, |
| { |
| "epoch": 1.7150489141747034, |
| "grad_norm": 0.0, |
| "learning_rate": 8.614001170966508e-05, |
| "loss": 1.5443, |
| "step": 12360 |
| }, |
| { |
| "epoch": 1.7164365503365018, |
| "grad_norm": 0.0, |
| "learning_rate": 8.598693718024147e-05, |
| "loss": 1.5144, |
| "step": 12370 |
| }, |
| { |
| "epoch": 1.7178241864983002, |
| "grad_norm": 0.0, |
| "learning_rate": 8.583389613634142e-05, |
| "loss": 1.532, |
| "step": 12380 |
| }, |
| { |
| "epoch": 1.7192118226600985, |
| "grad_norm": 0.0, |
| "learning_rate": 8.56808889436708e-05, |
| "loss": 1.5548, |
| "step": 12390 |
| }, |
| { |
| "epoch": 1.720599458821897, |
| "grad_norm": 0.0, |
| "learning_rate": 8.552791596785458e-05, |
| "loss": 1.5301, |
| "step": 12400 |
| }, |
| { |
| "epoch": 1.720599458821897, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 874.1704, |
| "eval_samples_per_second": 14.656, |
| "eval_steps_per_second": 1.833, |
| "step": 12400 |
| }, |
| { |
| "epoch": 1.7219870949836953, |
| "grad_norm": 0.0, |
| "learning_rate": 8.5374977574436e-05, |
| "loss": 1.5205, |
| "step": 12410 |
| }, |
| { |
| "epoch": 1.7233747311454937, |
| "grad_norm": 0.0, |
| "learning_rate": 8.522207412887568e-05, |
| "loss": 1.564, |
| "step": 12420 |
| }, |
| { |
| "epoch": 1.724762367307292, |
| "grad_norm": 0.0, |
| "learning_rate": 8.506920599655064e-05, |
| "loss": 1.6104, |
| "step": 12430 |
| }, |
| { |
| "epoch": 1.7261500034690904, |
| "grad_norm": 0.0, |
| "learning_rate": 8.491637354275358e-05, |
| "loss": 1.5709, |
| "step": 12440 |
| }, |
| { |
| "epoch": 1.7275376396308888, |
| "grad_norm": 0.0, |
| "learning_rate": 8.4763577132692e-05, |
| "loss": 1.4994, |
| "step": 12450 |
| }, |
| { |
| "epoch": 1.7289252757926872, |
| "grad_norm": 0.0, |
| "learning_rate": 8.461081713148715e-05, |
| "loss": 1.5472, |
| "step": 12460 |
| }, |
| { |
| "epoch": 1.7303129119544856, |
| "grad_norm": 0.0, |
| "learning_rate": 8.445809390417332e-05, |
| "loss": 1.5493, |
| "step": 12470 |
| }, |
| { |
| "epoch": 1.731700548116284, |
| "grad_norm": 0.0, |
| "learning_rate": 8.430540781569696e-05, |
| "loss": 1.5698, |
| "step": 12480 |
| }, |
| { |
| "epoch": 1.7330881842780823, |
| "grad_norm": 0.0, |
| "learning_rate": 8.415275923091577e-05, |
| "loss": 1.6143, |
| "step": 12490 |
| }, |
| { |
| "epoch": 1.7344758204398807, |
| "grad_norm": 0.0, |
| "learning_rate": 8.400014851459779e-05, |
| "loss": 1.5465, |
| "step": 12500 |
| }, |
| { |
| "epoch": 1.7344758204398807, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 885.8145, |
| "eval_samples_per_second": 14.464, |
| "eval_steps_per_second": 1.809, |
| "step": 12500 |
| }, |
| { |
| "epoch": 1.735863456601679, |
| "grad_norm": 0.0, |
| "learning_rate": 8.384757603142059e-05, |
| "loss": 1.4982, |
| "step": 12510 |
| }, |
| { |
| "epoch": 1.7372510927634774, |
| "grad_norm": 0.0, |
| "learning_rate": 8.369504214597039e-05, |
| "loss": 1.5044, |
| "step": 12520 |
| }, |
| { |
| "epoch": 1.7386387289252758, |
| "grad_norm": 0.0, |
| "learning_rate": 8.354254722274117e-05, |
| "loss": 1.5543, |
| "step": 12530 |
| }, |
| { |
| "epoch": 1.7400263650870742, |
| "grad_norm": 0.0, |
| "learning_rate": 8.339009162613379e-05, |
| "loss": 1.5783, |
| "step": 12540 |
| }, |
| { |
| "epoch": 1.7414140012488726, |
| "grad_norm": 0.0, |
| "learning_rate": 8.323767572045515e-05, |
| "loss": 1.4921, |
| "step": 12550 |
| }, |
| { |
| "epoch": 1.742801637410671, |
| "grad_norm": 0.0, |
| "learning_rate": 8.308529986991736e-05, |
| "loss": 1.4773, |
| "step": 12560 |
| }, |
| { |
| "epoch": 1.7441892735724693, |
| "grad_norm": 0.0, |
| "learning_rate": 8.293296443863668e-05, |
| "loss": 1.5926, |
| "step": 12570 |
| }, |
| { |
| "epoch": 1.7455769097342677, |
| "grad_norm": 0.0, |
| "learning_rate": 8.27806697906329e-05, |
| "loss": 1.5717, |
| "step": 12580 |
| }, |
| { |
| "epoch": 1.746964545896066, |
| "grad_norm": 0.0, |
| "learning_rate": 8.262841628982833e-05, |
| "loss": 1.6031, |
| "step": 12590 |
| }, |
| { |
| "epoch": 1.7483521820578645, |
| "grad_norm": 0.0, |
| "learning_rate": 8.24762043000469e-05, |
| "loss": 1.5612, |
| "step": 12600 |
| }, |
| { |
| "epoch": 1.7483521820578645, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 882.2732, |
| "eval_samples_per_second": 14.522, |
| "eval_steps_per_second": 1.816, |
| "step": 12600 |
| }, |
| { |
| "epoch": 1.7497398182196628, |
| "grad_norm": 0.0, |
| "learning_rate": 8.232403418501344e-05, |
| "loss": 1.592, |
| "step": 12610 |
| }, |
| { |
| "epoch": 1.7511274543814612, |
| "grad_norm": 0.0, |
| "learning_rate": 8.217190630835262e-05, |
| "loss": 1.585, |
| "step": 12620 |
| }, |
| { |
| "epoch": 1.7525150905432596, |
| "grad_norm": 0.0, |
| "learning_rate": 8.201982103358826e-05, |
| "loss": 1.5278, |
| "step": 12630 |
| }, |
| { |
| "epoch": 1.753902726705058, |
| "grad_norm": 0.0, |
| "learning_rate": 8.186777872414233e-05, |
| "loss": 1.5234, |
| "step": 12640 |
| }, |
| { |
| "epoch": 1.7552903628668564, |
| "grad_norm": 0.0, |
| "learning_rate": 8.171577974333411e-05, |
| "loss": 1.51, |
| "step": 12650 |
| }, |
| { |
| "epoch": 1.7566779990286547, |
| "grad_norm": 0.0, |
| "learning_rate": 8.156382445437942e-05, |
| "loss": 1.5068, |
| "step": 12660 |
| }, |
| { |
| "epoch": 1.7580656351904531, |
| "grad_norm": 0.0, |
| "learning_rate": 8.141191322038958e-05, |
| "loss": 1.4468, |
| "step": 12670 |
| }, |
| { |
| "epoch": 1.7594532713522515, |
| "grad_norm": 0.0, |
| "learning_rate": 8.126004640437073e-05, |
| "loss": 1.5458, |
| "step": 12680 |
| }, |
| { |
| "epoch": 1.7608409075140499, |
| "grad_norm": 0.0, |
| "learning_rate": 8.11082243692228e-05, |
| "loss": 1.5736, |
| "step": 12690 |
| }, |
| { |
| "epoch": 1.7622285436758482, |
| "grad_norm": 0.0, |
| "learning_rate": 8.095644747773874e-05, |
| "loss": 1.554, |
| "step": 12700 |
| }, |
| { |
| "epoch": 1.7622285436758482, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 874.4978, |
| "eval_samples_per_second": 14.651, |
| "eval_steps_per_second": 1.832, |
| "step": 12700 |
| }, |
| { |
| "epoch": 1.7636161798376464, |
| "grad_norm": 0.0, |
| "learning_rate": 8.080471609260361e-05, |
| "loss": 1.5562, |
| "step": 12710 |
| }, |
| { |
| "epoch": 1.765003815999445, |
| "grad_norm": 0.0, |
| "learning_rate": 8.065303057639377e-05, |
| "loss": 1.5142, |
| "step": 12720 |
| }, |
| { |
| "epoch": 1.7663914521612432, |
| "grad_norm": 0.0, |
| "learning_rate": 8.050139129157592e-05, |
| "loss": 1.5234, |
| "step": 12730 |
| }, |
| { |
| "epoch": 1.7677790883230418, |
| "grad_norm": 0.0, |
| "learning_rate": 8.034979860050627e-05, |
| "loss": 1.5836, |
| "step": 12740 |
| }, |
| { |
| "epoch": 1.76916672448484, |
| "grad_norm": 0.0, |
| "learning_rate": 8.019825286542979e-05, |
| "loss": 1.5147, |
| "step": 12750 |
| }, |
| { |
| "epoch": 1.7705543606466385, |
| "grad_norm": 0.0, |
| "learning_rate": 8.004675444847914e-05, |
| "loss": 1.5478, |
| "step": 12760 |
| }, |
| { |
| "epoch": 1.7719419968084367, |
| "grad_norm": 0.0, |
| "learning_rate": 7.989530371167397e-05, |
| "loss": 1.5685, |
| "step": 12770 |
| }, |
| { |
| "epoch": 1.7733296329702353, |
| "grad_norm": 0.0, |
| "learning_rate": 7.974390101691993e-05, |
| "loss": 1.5897, |
| "step": 12780 |
| }, |
| { |
| "epoch": 1.7747172691320334, |
| "grad_norm": 0.0, |
| "learning_rate": 7.959254672600799e-05, |
| "loss": 1.559, |
| "step": 12790 |
| }, |
| { |
| "epoch": 1.776104905293832, |
| "grad_norm": 0.0, |
| "learning_rate": 7.944124120061332e-05, |
| "loss": 1.5369, |
| "step": 12800 |
| }, |
| { |
| "epoch": 1.776104905293832, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 875.101, |
| "eval_samples_per_second": 14.641, |
| "eval_steps_per_second": 1.831, |
| "step": 12800 |
| }, |
| { |
| "epoch": 1.7774925414556302, |
| "grad_norm": 0.0, |
| "learning_rate": 7.928998480229461e-05, |
| "loss": 1.5482, |
| "step": 12810 |
| }, |
| { |
| "epoch": 1.7788801776174288, |
| "grad_norm": 0.0, |
| "learning_rate": 7.913877789249319e-05, |
| "loss": 1.5226, |
| "step": 12820 |
| }, |
| { |
| "epoch": 1.780267813779227, |
| "grad_norm": 0.0, |
| "learning_rate": 7.89876208325321e-05, |
| "loss": 1.5313, |
| "step": 12830 |
| }, |
| { |
| "epoch": 1.7816554499410255, |
| "grad_norm": 0.0, |
| "learning_rate": 7.883651398361529e-05, |
| "loss": 1.542, |
| "step": 12840 |
| }, |
| { |
| "epoch": 1.7830430861028237, |
| "grad_norm": 0.0, |
| "learning_rate": 7.868545770682663e-05, |
| "loss": 1.5335, |
| "step": 12850 |
| }, |
| { |
| "epoch": 1.7844307222646223, |
| "grad_norm": 0.0, |
| "learning_rate": 7.853445236312931e-05, |
| "loss": 1.56, |
| "step": 12860 |
| }, |
| { |
| "epoch": 1.7858183584264204, |
| "grad_norm": 0.0, |
| "learning_rate": 7.838349831336461e-05, |
| "loss": 1.5989, |
| "step": 12870 |
| }, |
| { |
| "epoch": 1.787205994588219, |
| "grad_norm": 0.0, |
| "learning_rate": 7.823259591825144e-05, |
| "loss": 1.5369, |
| "step": 12880 |
| }, |
| { |
| "epoch": 1.7885936307500172, |
| "grad_norm": 0.0, |
| "learning_rate": 7.80817455383851e-05, |
| "loss": 1.6173, |
| "step": 12890 |
| }, |
| { |
| "epoch": 1.7899812669118158, |
| "grad_norm": 0.0, |
| "learning_rate": 7.793094753423674e-05, |
| "loss": 1.5219, |
| "step": 12900 |
| }, |
| { |
| "epoch": 1.7899812669118158, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 880.1196, |
| "eval_samples_per_second": 14.557, |
| "eval_steps_per_second": 1.82, |
| "step": 12900 |
| }, |
| { |
| "epoch": 1.791368903073614, |
| "grad_norm": 0.0, |
| "learning_rate": 7.778020226615225e-05, |
| "loss": 1.565, |
| "step": 12910 |
| }, |
| { |
| "epoch": 1.7927565392354126, |
| "grad_norm": 0.0, |
| "learning_rate": 7.762951009435154e-05, |
| "loss": 1.4944, |
| "step": 12920 |
| }, |
| { |
| "epoch": 1.7941441753972107, |
| "grad_norm": 0.0, |
| "learning_rate": 7.747887137892762e-05, |
| "loss": 1.5121, |
| "step": 12930 |
| }, |
| { |
| "epoch": 1.7955318115590093, |
| "grad_norm": 0.0, |
| "learning_rate": 7.732828647984586e-05, |
| "loss": 1.5269, |
| "step": 12940 |
| }, |
| { |
| "epoch": 1.7969194477208075, |
| "grad_norm": 0.0, |
| "learning_rate": 7.717775575694288e-05, |
| "loss": 1.5544, |
| "step": 12950 |
| }, |
| { |
| "epoch": 1.798307083882606, |
| "grad_norm": 0.0, |
| "learning_rate": 7.702727956992593e-05, |
| "loss": 1.475, |
| "step": 12960 |
| }, |
| { |
| "epoch": 1.7996947200444042, |
| "grad_norm": 0.0, |
| "learning_rate": 7.687685827837196e-05, |
| "loss": 1.5259, |
| "step": 12970 |
| }, |
| { |
| "epoch": 1.8010823562062028, |
| "grad_norm": 0.0, |
| "learning_rate": 7.672649224172667e-05, |
| "loss": 1.4519, |
| "step": 12980 |
| }, |
| { |
| "epoch": 1.802469992368001, |
| "grad_norm": 0.0, |
| "learning_rate": 7.657618181930375e-05, |
| "loss": 1.5886, |
| "step": 12990 |
| }, |
| { |
| "epoch": 1.8038576285297996, |
| "grad_norm": 0.0, |
| "learning_rate": 7.642592737028403e-05, |
| "loss": 1.6084, |
| "step": 13000 |
| }, |
| { |
| "epoch": 1.8038576285297996, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 877.6719, |
| "eval_samples_per_second": 14.598, |
| "eval_steps_per_second": 1.825, |
| "step": 13000 |
| }, |
| { |
| "epoch": 1.8052452646915977, |
| "grad_norm": 0.0, |
| "learning_rate": 7.627572925371458e-05, |
| "loss": 1.5702, |
| "step": 13010 |
| }, |
| { |
| "epoch": 1.8066329008533963, |
| "grad_norm": 0.0, |
| "learning_rate": 7.612558782850781e-05, |
| "loss": 1.6286, |
| "step": 13020 |
| }, |
| { |
| "epoch": 1.8080205370151945, |
| "grad_norm": 0.0, |
| "learning_rate": 7.597550345344068e-05, |
| "loss": 1.5058, |
| "step": 13030 |
| }, |
| { |
| "epoch": 1.809408173176993, |
| "grad_norm": 0.0, |
| "learning_rate": 7.582547648715385e-05, |
| "loss": 1.4792, |
| "step": 13040 |
| }, |
| { |
| "epoch": 1.8107958093387913, |
| "grad_norm": 0.0, |
| "learning_rate": 7.567550728815085e-05, |
| "loss": 1.5139, |
| "step": 13050 |
| }, |
| { |
| "epoch": 1.8121834455005899, |
| "grad_norm": 0.0, |
| "learning_rate": 7.552559621479697e-05, |
| "loss": 1.5075, |
| "step": 13060 |
| }, |
| { |
| "epoch": 1.813571081662388, |
| "grad_norm": 0.0, |
| "learning_rate": 7.53757436253188e-05, |
| "loss": 1.5765, |
| "step": 13070 |
| }, |
| { |
| "epoch": 1.8149587178241866, |
| "grad_norm": 0.0, |
| "learning_rate": 7.522594987780312e-05, |
| "loss": 1.5871, |
| "step": 13080 |
| }, |
| { |
| "epoch": 1.8163463539859848, |
| "grad_norm": 0.0, |
| "learning_rate": 7.50762153301961e-05, |
| "loss": 1.5039, |
| "step": 13090 |
| }, |
| { |
| "epoch": 1.8177339901477834, |
| "grad_norm": 0.0, |
| "learning_rate": 7.492654034030238e-05, |
| "loss": 1.5577, |
| "step": 13100 |
| }, |
| { |
| "epoch": 1.8177339901477834, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 874.1669, |
| "eval_samples_per_second": 14.656, |
| "eval_steps_per_second": 1.833, |
| "step": 13100 |
| }, |
| { |
| "epoch": 1.8191216263095815, |
| "grad_norm": 0.0, |
| "learning_rate": 7.477692526578439e-05, |
| "loss": 1.5594, |
| "step": 13110 |
| }, |
| { |
| "epoch": 1.8205092624713801, |
| "grad_norm": 0.0, |
| "learning_rate": 7.462737046416136e-05, |
| "loss": 1.5738, |
| "step": 13120 |
| }, |
| { |
| "epoch": 1.8218968986331783, |
| "grad_norm": 0.0, |
| "learning_rate": 7.44778762928084e-05, |
| "loss": 1.5344, |
| "step": 13130 |
| }, |
| { |
| "epoch": 1.8232845347949769, |
| "grad_norm": 0.0, |
| "learning_rate": 7.432844310895584e-05, |
| "loss": 1.5409, |
| "step": 13140 |
| }, |
| { |
| "epoch": 1.824672170956775, |
| "grad_norm": 0.0, |
| "learning_rate": 7.417907126968823e-05, |
| "loss": 1.5455, |
| "step": 13150 |
| }, |
| { |
| "epoch": 1.8260598071185736, |
| "grad_norm": 0.0, |
| "learning_rate": 7.40297611319436e-05, |
| "loss": 1.5883, |
| "step": 13160 |
| }, |
| { |
| "epoch": 1.8274474432803718, |
| "grad_norm": 0.0, |
| "learning_rate": 7.38805130525124e-05, |
| "loss": 1.5222, |
| "step": 13170 |
| }, |
| { |
| "epoch": 1.8288350794421704, |
| "grad_norm": 0.0, |
| "learning_rate": 7.373132738803692e-05, |
| "loss": 1.5567, |
| "step": 13180 |
| }, |
| { |
| "epoch": 1.8302227156039685, |
| "grad_norm": 0.0, |
| "learning_rate": 7.358220449501025e-05, |
| "loss": 1.5775, |
| "step": 13190 |
| }, |
| { |
| "epoch": 1.8316103517657671, |
| "grad_norm": 0.0, |
| "learning_rate": 7.343314472977545e-05, |
| "loss": 1.5057, |
| "step": 13200 |
| }, |
| { |
| "epoch": 1.8316103517657671, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 872.9713, |
| "eval_samples_per_second": 14.676, |
| "eval_steps_per_second": 1.835, |
| "step": 13200 |
| }, |
| { |
| "epoch": 1.8329979879275653, |
| "grad_norm": 0.0, |
| "learning_rate": 7.328414844852478e-05, |
| "loss": 1.5522, |
| "step": 13210 |
| }, |
| { |
| "epoch": 1.834385624089364, |
| "grad_norm": 0.0, |
| "learning_rate": 7.313521600729878e-05, |
| "loss": 1.5101, |
| "step": 13220 |
| }, |
| { |
| "epoch": 1.835773260251162, |
| "grad_norm": 0.0, |
| "learning_rate": 7.298634776198548e-05, |
| "loss": 1.5026, |
| "step": 13230 |
| }, |
| { |
| "epoch": 1.8371608964129607, |
| "grad_norm": 0.0, |
| "learning_rate": 7.28375440683194e-05, |
| "loss": 1.5102, |
| "step": 13240 |
| }, |
| { |
| "epoch": 1.8385485325747588, |
| "grad_norm": 0.0, |
| "learning_rate": 7.26888052818809e-05, |
| "loss": 1.5971, |
| "step": 13250 |
| }, |
| { |
| "epoch": 1.8399361687365574, |
| "grad_norm": 0.0, |
| "learning_rate": 7.254013175809523e-05, |
| "loss": 1.5755, |
| "step": 13260 |
| }, |
| { |
| "epoch": 1.8413238048983556, |
| "grad_norm": 0.0, |
| "learning_rate": 7.239152385223171e-05, |
| "loss": 1.6045, |
| "step": 13270 |
| }, |
| { |
| "epoch": 1.8427114410601542, |
| "grad_norm": 0.0, |
| "learning_rate": 7.224298191940272e-05, |
| "loss": 1.5351, |
| "step": 13280 |
| }, |
| { |
| "epoch": 1.8440990772219523, |
| "grad_norm": 0.0, |
| "learning_rate": 7.209450631456318e-05, |
| "loss": 1.552, |
| "step": 13290 |
| }, |
| { |
| "epoch": 1.845486713383751, |
| "grad_norm": 0.0, |
| "learning_rate": 7.194609739250944e-05, |
| "loss": 1.508, |
| "step": 13300 |
| }, |
| { |
| "epoch": 1.845486713383751, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 875.2085, |
| "eval_samples_per_second": 14.639, |
| "eval_steps_per_second": 1.83, |
| "step": 13300 |
| }, |
| { |
| "epoch": 1.846874349545549, |
| "grad_norm": 0.0, |
| "learning_rate": 7.17977555078784e-05, |
| "loss": 1.5756, |
| "step": 13310 |
| }, |
| { |
| "epoch": 1.8482619857073477, |
| "grad_norm": 0.0, |
| "learning_rate": 7.164948101514692e-05, |
| "loss": 1.5041, |
| "step": 13320 |
| }, |
| { |
| "epoch": 1.8496496218691458, |
| "grad_norm": 0.0, |
| "learning_rate": 7.150127426863076e-05, |
| "loss": 1.5462, |
| "step": 13330 |
| }, |
| { |
| "epoch": 1.8510372580309444, |
| "grad_norm": 0.0, |
| "learning_rate": 7.135313562248383e-05, |
| "loss": 1.5198, |
| "step": 13340 |
| }, |
| { |
| "epoch": 1.8524248941927426, |
| "grad_norm": 0.0, |
| "learning_rate": 7.120506543069718e-05, |
| "loss": 1.5542, |
| "step": 13350 |
| }, |
| { |
| "epoch": 1.8538125303545412, |
| "grad_norm": 0.0, |
| "learning_rate": 7.105706404709843e-05, |
| "loss": 1.5656, |
| "step": 13360 |
| }, |
| { |
| "epoch": 1.8552001665163393, |
| "grad_norm": 0.0, |
| "learning_rate": 7.090913182535073e-05, |
| "loss": 1.5231, |
| "step": 13370 |
| }, |
| { |
| "epoch": 1.856587802678138, |
| "grad_norm": 0.0, |
| "learning_rate": 7.076126911895197e-05, |
| "loss": 1.4914, |
| "step": 13380 |
| }, |
| { |
| "epoch": 1.857975438839936, |
| "grad_norm": 0.0, |
| "learning_rate": 7.061347628123385e-05, |
| "loss": 1.6405, |
| "step": 13390 |
| }, |
| { |
| "epoch": 1.8593630750017347, |
| "grad_norm": 0.0, |
| "learning_rate": 7.04657536653612e-05, |
| "loss": 1.5911, |
| "step": 13400 |
| }, |
| { |
| "epoch": 1.8593630750017347, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 876.3422, |
| "eval_samples_per_second": 14.62, |
| "eval_steps_per_second": 1.828, |
| "step": 13400 |
| }, |
| { |
| "epoch": 1.8607507111635329, |
| "grad_norm": 0.0, |
| "learning_rate": 7.031810162433106e-05, |
| "loss": 1.4791, |
| "step": 13410 |
| }, |
| { |
| "epoch": 1.8621383473253315, |
| "grad_norm": 0.0, |
| "learning_rate": 7.01705205109717e-05, |
| "loss": 1.5813, |
| "step": 13420 |
| }, |
| { |
| "epoch": 1.8635259834871296, |
| "grad_norm": 0.0, |
| "learning_rate": 7.002301067794204e-05, |
| "loss": 1.5423, |
| "step": 13430 |
| }, |
| { |
| "epoch": 1.8649136196489282, |
| "grad_norm": 0.0, |
| "learning_rate": 6.98755724777306e-05, |
| "loss": 1.5634, |
| "step": 13440 |
| }, |
| { |
| "epoch": 1.8663012558107264, |
| "grad_norm": 0.0, |
| "learning_rate": 6.97282062626548e-05, |
| "loss": 1.4679, |
| "step": 13450 |
| }, |
| { |
| "epoch": 1.867688891972525, |
| "grad_norm": 0.0, |
| "learning_rate": 6.958091238485989e-05, |
| "loss": 1.5489, |
| "step": 13460 |
| }, |
| { |
| "epoch": 1.8690765281343231, |
| "grad_norm": 0.0, |
| "learning_rate": 6.943369119631841e-05, |
| "loss": 1.4743, |
| "step": 13470 |
| }, |
| { |
| "epoch": 1.8704641642961217, |
| "grad_norm": 0.0, |
| "learning_rate": 6.928654304882916e-05, |
| "loss": 1.5471, |
| "step": 13480 |
| }, |
| { |
| "epoch": 1.8718518004579199, |
| "grad_norm": 0.0, |
| "learning_rate": 6.91394682940164e-05, |
| "loss": 1.5452, |
| "step": 13490 |
| }, |
| { |
| "epoch": 1.8732394366197183, |
| "grad_norm": 0.0, |
| "learning_rate": 6.899246728332895e-05, |
| "loss": 1.5628, |
| "step": 13500 |
| }, |
| { |
| "epoch": 1.8732394366197183, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 878.2498, |
| "eval_samples_per_second": 14.588, |
| "eval_steps_per_second": 1.824, |
| "step": 13500 |
| }, |
| { |
| "epoch": 1.8746270727815166, |
| "grad_norm": 0.0, |
| "learning_rate": 6.884554036803952e-05, |
| "loss": 1.5329, |
| "step": 13510 |
| }, |
| { |
| "epoch": 1.876014708943315, |
| "grad_norm": 0.0, |
| "learning_rate": 6.869868789924372e-05, |
| "loss": 1.5281, |
| "step": 13520 |
| }, |
| { |
| "epoch": 1.8774023451051134, |
| "grad_norm": 0.0, |
| "learning_rate": 6.855191022785918e-05, |
| "loss": 1.5502, |
| "step": 13530 |
| }, |
| { |
| "epoch": 1.8787899812669118, |
| "grad_norm": 0.0, |
| "learning_rate": 6.840520770462494e-05, |
| "loss": 1.4811, |
| "step": 13540 |
| }, |
| { |
| "epoch": 1.8801776174287101, |
| "grad_norm": 0.0, |
| "learning_rate": 6.825858068010034e-05, |
| "loss": 1.5324, |
| "step": 13550 |
| }, |
| { |
| "epoch": 1.8815652535905085, |
| "grad_norm": 0.0, |
| "learning_rate": 6.811202950466442e-05, |
| "loss": 1.5537, |
| "step": 13560 |
| }, |
| { |
| "epoch": 1.882952889752307, |
| "grad_norm": 0.0, |
| "learning_rate": 6.796555452851485e-05, |
| "loss": 1.4715, |
| "step": 13570 |
| }, |
| { |
| "epoch": 1.8843405259141053, |
| "grad_norm": 0.0, |
| "learning_rate": 6.781915610166731e-05, |
| "loss": 1.5999, |
| "step": 13580 |
| }, |
| { |
| "epoch": 1.8857281620759037, |
| "grad_norm": 0.0, |
| "learning_rate": 6.767283457395453e-05, |
| "loss": 1.5633, |
| "step": 13590 |
| }, |
| { |
| "epoch": 1.887115798237702, |
| "grad_norm": 0.0, |
| "learning_rate": 6.752659029502548e-05, |
| "loss": 1.4866, |
| "step": 13600 |
| }, |
| { |
| "epoch": 1.887115798237702, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 884.2082, |
| "eval_samples_per_second": 14.49, |
| "eval_steps_per_second": 1.812, |
| "step": 13600 |
| }, |
| { |
| "epoch": 1.8885034343995004, |
| "grad_norm": 0.0, |
| "learning_rate": 6.738042361434451e-05, |
| "loss": 1.5527, |
| "step": 13610 |
| }, |
| { |
| "epoch": 1.8898910705612988, |
| "grad_norm": 0.0, |
| "learning_rate": 6.72343348811906e-05, |
| "loss": 1.5674, |
| "step": 13620 |
| }, |
| { |
| "epoch": 1.8912787067230972, |
| "grad_norm": 0.0, |
| "learning_rate": 6.708832444465644e-05, |
| "loss": 1.573, |
| "step": 13630 |
| }, |
| { |
| "epoch": 1.8926663428848955, |
| "grad_norm": 0.0, |
| "learning_rate": 6.694239265364756e-05, |
| "loss": 1.5562, |
| "step": 13640 |
| }, |
| { |
| "epoch": 1.894053979046694, |
| "grad_norm": 0.0, |
| "learning_rate": 6.679653985688165e-05, |
| "loss": 1.6307, |
| "step": 13650 |
| }, |
| { |
| "epoch": 1.8954416152084923, |
| "grad_norm": 0.0, |
| "learning_rate": 6.665076640288761e-05, |
| "loss": 1.5187, |
| "step": 13660 |
| }, |
| { |
| "epoch": 1.8968292513702907, |
| "grad_norm": 0.0, |
| "learning_rate": 6.650507264000476e-05, |
| "loss": 1.5494, |
| "step": 13670 |
| }, |
| { |
| "epoch": 1.898216887532089, |
| "grad_norm": 0.0, |
| "learning_rate": 6.63594589163819e-05, |
| "loss": 1.506, |
| "step": 13680 |
| }, |
| { |
| "epoch": 1.8996045236938874, |
| "grad_norm": 0.0, |
| "learning_rate": 6.621392557997667e-05, |
| "loss": 1.5752, |
| "step": 13690 |
| }, |
| { |
| "epoch": 1.9009921598556858, |
| "grad_norm": 0.0, |
| "learning_rate": 6.606847297855459e-05, |
| "loss": 1.5399, |
| "step": 13700 |
| }, |
| { |
| "epoch": 1.9009921598556858, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 887.0642, |
| "eval_samples_per_second": 14.443, |
| "eval_steps_per_second": 1.806, |
| "step": 13700 |
| }, |
| { |
| "epoch": 1.9023797960174842, |
| "grad_norm": 0.0, |
| "learning_rate": 6.592310145968828e-05, |
| "loss": 1.5826, |
| "step": 13710 |
| }, |
| { |
| "epoch": 1.9037674321792826, |
| "grad_norm": 0.0, |
| "learning_rate": 6.577781137075647e-05, |
| "loss": 1.5534, |
| "step": 13720 |
| }, |
| { |
| "epoch": 1.905155068341081, |
| "grad_norm": 0.0, |
| "learning_rate": 6.563260305894349e-05, |
| "loss": 1.665, |
| "step": 13730 |
| }, |
| { |
| "epoch": 1.9065427045028793, |
| "grad_norm": 0.0, |
| "learning_rate": 6.54874768712382e-05, |
| "loss": 1.4964, |
| "step": 13740 |
| }, |
| { |
| "epoch": 1.9079303406646777, |
| "grad_norm": 0.0, |
| "learning_rate": 6.534243315443311e-05, |
| "loss": 1.5646, |
| "step": 13750 |
| }, |
| { |
| "epoch": 1.909317976826476, |
| "grad_norm": 0.0, |
| "learning_rate": 6.519747225512377e-05, |
| "loss": 1.5572, |
| "step": 13760 |
| }, |
| { |
| "epoch": 1.9107056129882745, |
| "grad_norm": 0.0, |
| "learning_rate": 6.505259451970782e-05, |
| "loss": 1.5625, |
| "step": 13770 |
| }, |
| { |
| "epoch": 1.9120932491500728, |
| "grad_norm": 0.0, |
| "learning_rate": 6.490780029438417e-05, |
| "loss": 1.4955, |
| "step": 13780 |
| }, |
| { |
| "epoch": 1.9134808853118712, |
| "grad_norm": 0.0, |
| "learning_rate": 6.47630899251521e-05, |
| "loss": 1.5368, |
| "step": 13790 |
| }, |
| { |
| "epoch": 1.9148685214736696, |
| "grad_norm": 0.0, |
| "learning_rate": 6.461846375781058e-05, |
| "loss": 1.494, |
| "step": 13800 |
| }, |
| { |
| "epoch": 1.9148685214736696, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 922.4284, |
| "eval_samples_per_second": 13.889, |
| "eval_steps_per_second": 1.737, |
| "step": 13800 |
| }, |
| { |
| "epoch": 1.916256157635468, |
| "grad_norm": 0.0, |
| "learning_rate": 6.447392213795737e-05, |
| "loss": 1.5269, |
| "step": 13810 |
| }, |
| { |
| "epoch": 1.9176437937972663, |
| "grad_norm": 0.0, |
| "learning_rate": 6.432946541098823e-05, |
| "loss": 1.5747, |
| "step": 13820 |
| }, |
| { |
| "epoch": 1.9190314299590647, |
| "grad_norm": 0.0, |
| "learning_rate": 6.418509392209593e-05, |
| "loss": 1.5638, |
| "step": 13830 |
| }, |
| { |
| "epoch": 1.920419066120863, |
| "grad_norm": 0.0, |
| "learning_rate": 6.404080801626966e-05, |
| "loss": 1.556, |
| "step": 13840 |
| }, |
| { |
| "epoch": 1.9218067022826615, |
| "grad_norm": 0.0, |
| "learning_rate": 6.389660803829414e-05, |
| "loss": 1.5173, |
| "step": 13850 |
| }, |
| { |
| "epoch": 1.9231943384444599, |
| "grad_norm": 0.0, |
| "learning_rate": 6.375249433274861e-05, |
| "loss": 1.5467, |
| "step": 13860 |
| }, |
| { |
| "epoch": 1.9245819746062582, |
| "grad_norm": 0.0, |
| "learning_rate": 6.360846724400628e-05, |
| "loss": 1.6175, |
| "step": 13870 |
| }, |
| { |
| "epoch": 1.9259696107680566, |
| "grad_norm": 0.0, |
| "learning_rate": 6.346452711623334e-05, |
| "loss": 1.5365, |
| "step": 13880 |
| }, |
| { |
| "epoch": 1.927357246929855, |
| "grad_norm": 0.0, |
| "learning_rate": 6.332067429338824e-05, |
| "loss": 1.5158, |
| "step": 13890 |
| }, |
| { |
| "epoch": 1.9287448830916534, |
| "grad_norm": 0.0, |
| "learning_rate": 6.317690911922063e-05, |
| "loss": 1.5795, |
| "step": 13900 |
| }, |
| { |
| "epoch": 1.9287448830916534, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 925.1585, |
| "eval_samples_per_second": 13.848, |
| "eval_steps_per_second": 1.732, |
| "step": 13900 |
| }, |
| { |
| "epoch": 1.9301325192534518, |
| "grad_norm": 0.0, |
| "learning_rate": 6.30332319372709e-05, |
| "loss": 1.5182, |
| "step": 13910 |
| }, |
| { |
| "epoch": 1.9315201554152501, |
| "grad_norm": 0.0, |
| "learning_rate": 6.28896430908691e-05, |
| "loss": 1.5741, |
| "step": 13920 |
| }, |
| { |
| "epoch": 1.9329077915770485, |
| "grad_norm": 0.0, |
| "learning_rate": 6.274614292313425e-05, |
| "loss": 1.5026, |
| "step": 13930 |
| }, |
| { |
| "epoch": 1.9342954277388469, |
| "grad_norm": 0.0, |
| "learning_rate": 6.260273177697334e-05, |
| "loss": 1.5389, |
| "step": 13940 |
| }, |
| { |
| "epoch": 1.9356830639006453, |
| "grad_norm": 0.0, |
| "learning_rate": 6.245940999508071e-05, |
| "loss": 1.5421, |
| "step": 13950 |
| }, |
| { |
| "epoch": 1.9370707000624436, |
| "grad_norm": 0.0, |
| "learning_rate": 6.231617791993724e-05, |
| "loss": 1.5632, |
| "step": 13960 |
| }, |
| { |
| "epoch": 1.938458336224242, |
| "grad_norm": 0.0, |
| "learning_rate": 6.217303589380925e-05, |
| "loss": 1.5596, |
| "step": 13970 |
| }, |
| { |
| "epoch": 1.9398459723860404, |
| "grad_norm": 0.0, |
| "learning_rate": 6.202998425874806e-05, |
| "loss": 1.5452, |
| "step": 13980 |
| }, |
| { |
| "epoch": 1.9412336085478388, |
| "grad_norm": 0.0, |
| "learning_rate": 6.188702335658892e-05, |
| "loss": 1.4568, |
| "step": 13990 |
| }, |
| { |
| "epoch": 1.9426212447096372, |
| "grad_norm": 0.0, |
| "learning_rate": 6.174415352895029e-05, |
| "loss": 1.5651, |
| "step": 14000 |
| }, |
| { |
| "epoch": 1.9426212447096372, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 935.3872, |
| "eval_samples_per_second": 13.697, |
| "eval_steps_per_second": 1.713, |
| "step": 14000 |
| }, |
| { |
| "epoch": 1.9440088808714355, |
| "grad_norm": 0.0, |
| "learning_rate": 6.160137511723291e-05, |
| "loss": 1.541, |
| "step": 14010 |
| }, |
| { |
| "epoch": 1.945396517033234, |
| "grad_norm": 0.0, |
| "learning_rate": 6.14586884626192e-05, |
| "loss": 1.6313, |
| "step": 14020 |
| }, |
| { |
| "epoch": 1.9467841531950323, |
| "grad_norm": 0.0, |
| "learning_rate": 6.131609390607223e-05, |
| "loss": 1.5859, |
| "step": 14030 |
| }, |
| { |
| "epoch": 1.9481717893568307, |
| "grad_norm": 0.0, |
| "learning_rate": 6.117359178833508e-05, |
| "loss": 1.5797, |
| "step": 14040 |
| }, |
| { |
| "epoch": 1.949559425518629, |
| "grad_norm": 0.0, |
| "learning_rate": 6.103118244992978e-05, |
| "loss": 1.5444, |
| "step": 14050 |
| }, |
| { |
| "epoch": 1.9509470616804274, |
| "grad_norm": 0.0, |
| "learning_rate": 6.0888866231156836e-05, |
| "loss": 1.4658, |
| "step": 14060 |
| }, |
| { |
| "epoch": 1.9523346978422258, |
| "grad_norm": 0.0, |
| "learning_rate": 6.0746643472094155e-05, |
| "loss": 1.606, |
| "step": 14070 |
| }, |
| { |
| "epoch": 1.9537223340040242, |
| "grad_norm": 0.0, |
| "learning_rate": 6.060451451259627e-05, |
| "loss": 1.4978, |
| "step": 14080 |
| }, |
| { |
| "epoch": 1.9551099701658226, |
| "grad_norm": 0.0, |
| "learning_rate": 6.0462479692293616e-05, |
| "loss": 1.5408, |
| "step": 14090 |
| }, |
| { |
| "epoch": 1.956497606327621, |
| "grad_norm": 0.0, |
| "learning_rate": 6.032053935059172e-05, |
| "loss": 1.5434, |
| "step": 14100 |
| }, |
| { |
| "epoch": 1.956497606327621, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 871.9972, |
| "eval_samples_per_second": 14.693, |
| "eval_steps_per_second": 1.837, |
| "step": 14100 |
| }, |
| { |
| "epoch": 1.9578852424894193, |
| "grad_norm": 0.0, |
| "learning_rate": 6.01786938266703e-05, |
| "loss": 1.4944, |
| "step": 14110 |
| }, |
| { |
| "epoch": 1.9592728786512177, |
| "grad_norm": 0.0, |
| "learning_rate": 6.003694345948243e-05, |
| "loss": 1.61, |
| "step": 14120 |
| }, |
| { |
| "epoch": 1.960660514813016, |
| "grad_norm": 0.0, |
| "learning_rate": 5.989528858775391e-05, |
| "loss": 1.5269, |
| "step": 14130 |
| }, |
| { |
| "epoch": 1.9620481509748144, |
| "grad_norm": 0.0, |
| "learning_rate": 5.975372954998228e-05, |
| "loss": 1.5771, |
| "step": 14140 |
| }, |
| { |
| "epoch": 1.9634357871366128, |
| "grad_norm": 0.0, |
| "learning_rate": 5.9612266684436136e-05, |
| "loss": 1.5687, |
| "step": 14150 |
| }, |
| { |
| "epoch": 1.9648234232984112, |
| "grad_norm": 0.0, |
| "learning_rate": 5.9470900329154164e-05, |
| "loss": 1.533, |
| "step": 14160 |
| }, |
| { |
| "epoch": 1.9662110594602096, |
| "grad_norm": 0.0, |
| "learning_rate": 5.932963082194449e-05, |
| "loss": 1.5323, |
| "step": 14170 |
| }, |
| { |
| "epoch": 1.967598695622008, |
| "grad_norm": 0.0, |
| "learning_rate": 5.918845850038388e-05, |
| "loss": 1.556, |
| "step": 14180 |
| }, |
| { |
| "epoch": 1.9689863317838063, |
| "grad_norm": 0.0, |
| "learning_rate": 5.9047383701816684e-05, |
| "loss": 1.5338, |
| "step": 14190 |
| }, |
| { |
| "epoch": 1.9703739679456047, |
| "grad_norm": 0.0, |
| "learning_rate": 5.890640676335439e-05, |
| "loss": 1.557, |
| "step": 14200 |
| }, |
| { |
| "epoch": 1.9703739679456047, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 872.3008, |
| "eval_samples_per_second": 14.688, |
| "eval_steps_per_second": 1.837, |
| "step": 14200 |
| }, |
| { |
| "epoch": 1.971761604107403, |
| "grad_norm": 0.0, |
| "learning_rate": 5.876552802187454e-05, |
| "loss": 1.551, |
| "step": 14210 |
| }, |
| { |
| "epoch": 1.9731492402692015, |
| "grad_norm": 0.0, |
| "learning_rate": 5.862474781402012e-05, |
| "loss": 1.4758, |
| "step": 14220 |
| }, |
| { |
| "epoch": 1.9745368764309998, |
| "grad_norm": 0.0, |
| "learning_rate": 5.8484066476198506e-05, |
| "loss": 1.5223, |
| "step": 14230 |
| }, |
| { |
| "epoch": 1.9759245125927982, |
| "grad_norm": 0.0, |
| "learning_rate": 5.834348434458097e-05, |
| "loss": 1.5176, |
| "step": 14240 |
| }, |
| { |
| "epoch": 1.9773121487545966, |
| "grad_norm": 0.0, |
| "learning_rate": 5.8203001755101616e-05, |
| "loss": 1.5878, |
| "step": 14250 |
| }, |
| { |
| "epoch": 1.9786997849163948, |
| "grad_norm": 0.0, |
| "learning_rate": 5.8062619043456775e-05, |
| "loss": 1.5916, |
| "step": 14260 |
| }, |
| { |
| "epoch": 1.9800874210781934, |
| "grad_norm": 0.0, |
| "learning_rate": 5.792233654510399e-05, |
| "loss": 1.6007, |
| "step": 14270 |
| }, |
| { |
| "epoch": 1.9814750572399915, |
| "grad_norm": 0.0, |
| "learning_rate": 5.778215459526145e-05, |
| "loss": 1.6015, |
| "step": 14280 |
| }, |
| { |
| "epoch": 1.98286269340179, |
| "grad_norm": 0.0, |
| "learning_rate": 5.764207352890702e-05, |
| "loss": 1.624, |
| "step": 14290 |
| }, |
| { |
| "epoch": 1.9842503295635883, |
| "grad_norm": 0.0, |
| "learning_rate": 5.750209368077754e-05, |
| "loss": 1.5387, |
| "step": 14300 |
| }, |
| { |
| "epoch": 1.9842503295635883, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 896.0816, |
| "eval_samples_per_second": 14.298, |
| "eval_steps_per_second": 1.788, |
| "step": 14300 |
| }, |
| { |
| "epoch": 1.9856379657253869, |
| "grad_norm": 0.0, |
| "learning_rate": 5.736221538536786e-05, |
| "loss": 1.5593, |
| "step": 14310 |
| }, |
| { |
| "epoch": 1.987025601887185, |
| "grad_norm": 0.0, |
| "learning_rate": 5.7222438976930295e-05, |
| "loss": 1.5958, |
| "step": 14320 |
| }, |
| { |
| "epoch": 1.9884132380489836, |
| "grad_norm": 0.0, |
| "learning_rate": 5.708276478947362e-05, |
| "loss": 1.5773, |
| "step": 14330 |
| }, |
| { |
| "epoch": 1.9898008742107818, |
| "grad_norm": 0.0, |
| "learning_rate": 5.694319315676242e-05, |
| "loss": 1.5789, |
| "step": 14340 |
| }, |
| { |
| "epoch": 1.9911885103725804, |
| "grad_norm": 0.0, |
| "learning_rate": 5.6803724412316074e-05, |
| "loss": 1.5386, |
| "step": 14350 |
| }, |
| { |
| "epoch": 1.9925761465343785, |
| "grad_norm": 0.0, |
| "learning_rate": 5.666435888940822e-05, |
| "loss": 1.6013, |
| "step": 14360 |
| }, |
| { |
| "epoch": 1.9939637826961771, |
| "grad_norm": 0.0, |
| "learning_rate": 5.6525096921065844e-05, |
| "loss": 1.5647, |
| "step": 14370 |
| }, |
| { |
| "epoch": 1.9953514188579753, |
| "grad_norm": 0.0, |
| "learning_rate": 5.6385938840068374e-05, |
| "loss": 1.6151, |
| "step": 14380 |
| }, |
| { |
| "epoch": 1.996739055019774, |
| "grad_norm": 0.0, |
| "learning_rate": 5.624688497894708e-05, |
| "loss": 1.5482, |
| "step": 14390 |
| }, |
| { |
| "epoch": 1.998126691181572, |
| "grad_norm": 0.0, |
| "learning_rate": 5.610793566998414e-05, |
| "loss": 1.5079, |
| "step": 14400 |
| }, |
| { |
| "epoch": 1.998126691181572, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 887.1687, |
| "eval_samples_per_second": 14.441, |
| "eval_steps_per_second": 1.806, |
| "step": 14400 |
| }, |
| { |
| "epoch": 1.9995143273433706, |
| "grad_norm": 0.0, |
| "learning_rate": 5.5969091245211994e-05, |
| "loss": 1.474, |
| "step": 14410 |
| }, |
| { |
| "epoch": 2.0009713453132587, |
| "grad_norm": 0.0, |
| "learning_rate": 5.583035203641227e-05, |
| "loss": 1.6786, |
| "step": 14420 |
| }, |
| { |
| "epoch": 2.0023589814750573, |
| "grad_norm": 0.0, |
| "learning_rate": 5.5691718375115334e-05, |
| "loss": 1.632, |
| "step": 14430 |
| }, |
| { |
| "epoch": 2.0037466176368555, |
| "grad_norm": 0.0, |
| "learning_rate": 5.5553190592599295e-05, |
| "loss": 1.5874, |
| "step": 14440 |
| }, |
| { |
| "epoch": 2.005134253798654, |
| "grad_norm": 0.0, |
| "learning_rate": 5.541476901988918e-05, |
| "loss": 1.5561, |
| "step": 14450 |
| }, |
| { |
| "epoch": 2.006521889960452, |
| "grad_norm": 0.0, |
| "learning_rate": 5.5276453987756314e-05, |
| "loss": 1.575, |
| "step": 14460 |
| }, |
| { |
| "epoch": 2.007909526122251, |
| "grad_norm": 0.0, |
| "learning_rate": 5.5138245826717394e-05, |
| "loss": 1.493, |
| "step": 14470 |
| }, |
| { |
| "epoch": 2.009297162284049, |
| "grad_norm": 0.0, |
| "learning_rate": 5.5000144867033776e-05, |
| "loss": 1.6124, |
| "step": 14480 |
| }, |
| { |
| "epoch": 2.0106847984458476, |
| "grad_norm": 0.0, |
| "learning_rate": 5.486215143871053e-05, |
| "loss": 1.5919, |
| "step": 14490 |
| }, |
| { |
| "epoch": 2.0120724346076457, |
| "grad_norm": 0.0, |
| "learning_rate": 5.472426587149591e-05, |
| "loss": 1.5504, |
| "step": 14500 |
| }, |
| { |
| "epoch": 2.0120724346076457, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 881.8543, |
| "eval_samples_per_second": 14.528, |
| "eval_steps_per_second": 1.817, |
| "step": 14500 |
| }, |
| { |
| "epoch": 2.0134600707694443, |
| "grad_norm": 0.0, |
| "learning_rate": 5.458648849488037e-05, |
| "loss": 1.4898, |
| "step": 14510 |
| }, |
| { |
| "epoch": 2.0148477069312425, |
| "grad_norm": 0.0, |
| "learning_rate": 5.44488196380958e-05, |
| "loss": 1.515, |
| "step": 14520 |
| }, |
| { |
| "epoch": 2.016235343093041, |
| "grad_norm": 0.0, |
| "learning_rate": 5.431125963011481e-05, |
| "loss": 1.5875, |
| "step": 14530 |
| }, |
| { |
| "epoch": 2.0176229792548392, |
| "grad_norm": 0.0, |
| "learning_rate": 5.4173808799649905e-05, |
| "loss": 1.6191, |
| "step": 14540 |
| }, |
| { |
| "epoch": 2.019010615416638, |
| "grad_norm": 0.0, |
| "learning_rate": 5.403646747515274e-05, |
| "loss": 1.5824, |
| "step": 14550 |
| }, |
| { |
| "epoch": 2.020398251578436, |
| "grad_norm": 0.0, |
| "learning_rate": 5.3899235984813166e-05, |
| "loss": 1.5268, |
| "step": 14560 |
| }, |
| { |
| "epoch": 2.0217858877402346, |
| "grad_norm": 0.0, |
| "learning_rate": 5.376211465655871e-05, |
| "loss": 1.4775, |
| "step": 14570 |
| }, |
| { |
| "epoch": 2.0231735239020328, |
| "grad_norm": 0.0, |
| "learning_rate": 5.362510381805357e-05, |
| "loss": 1.5118, |
| "step": 14580 |
| }, |
| { |
| "epoch": 2.0245611600638314, |
| "grad_norm": 0.0, |
| "learning_rate": 5.348820379669801e-05, |
| "loss": 1.58, |
| "step": 14590 |
| }, |
| { |
| "epoch": 2.0259487962256295, |
| "grad_norm": 0.0, |
| "learning_rate": 5.335141491962736e-05, |
| "loss": 1.5622, |
| "step": 14600 |
| }, |
| { |
| "epoch": 2.0259487962256295, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 886.1254, |
| "eval_samples_per_second": 14.458, |
| "eval_steps_per_second": 1.808, |
| "step": 14600 |
| }, |
| { |
| "epoch": 2.027336432387428, |
| "grad_norm": 0.0, |
| "learning_rate": 5.321473751371147e-05, |
| "loss": 1.4896, |
| "step": 14610 |
| }, |
| { |
| "epoch": 2.0287240685492263, |
| "grad_norm": 0.0, |
| "learning_rate": 5.30781719055538e-05, |
| "loss": 1.554, |
| "step": 14620 |
| }, |
| { |
| "epoch": 2.030111704711025, |
| "grad_norm": 0.0, |
| "learning_rate": 5.294171842149056e-05, |
| "loss": 1.4881, |
| "step": 14630 |
| }, |
| { |
| "epoch": 2.031499340872823, |
| "grad_norm": 0.0, |
| "learning_rate": 5.280537738759015e-05, |
| "loss": 1.5954, |
| "step": 14640 |
| }, |
| { |
| "epoch": 2.0328869770346216, |
| "grad_norm": 0.0, |
| "learning_rate": 5.266914912965222e-05, |
| "loss": 1.5027, |
| "step": 14650 |
| }, |
| { |
| "epoch": 2.03427461319642, |
| "grad_norm": 0.0, |
| "learning_rate": 5.2533033973206945e-05, |
| "loss": 1.5843, |
| "step": 14660 |
| }, |
| { |
| "epoch": 2.0356622493582184, |
| "grad_norm": 0.0, |
| "learning_rate": 5.2397032243514174e-05, |
| "loss": 1.5423, |
| "step": 14670 |
| }, |
| { |
| "epoch": 2.0370498855200165, |
| "grad_norm": 0.0, |
| "learning_rate": 5.2261144265562766e-05, |
| "loss": 1.5753, |
| "step": 14680 |
| }, |
| { |
| "epoch": 2.038437521681815, |
| "grad_norm": 0.0, |
| "learning_rate": 5.212537036406975e-05, |
| "loss": 1.5654, |
| "step": 14690 |
| }, |
| { |
| "epoch": 2.0398251578436133, |
| "grad_norm": 0.0, |
| "learning_rate": 5.19897108634796e-05, |
| "loss": 1.5264, |
| "step": 14700 |
| }, |
| { |
| "epoch": 2.0398251578436133, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 934.2204, |
| "eval_samples_per_second": 13.714, |
| "eval_steps_per_second": 1.715, |
| "step": 14700 |
| }, |
| { |
| "epoch": 2.041212794005412, |
| "grad_norm": 0.0, |
| "learning_rate": 5.18541660879633e-05, |
| "loss": 1.5304, |
| "step": 14710 |
| }, |
| { |
| "epoch": 2.04260043016721, |
| "grad_norm": 0.0, |
| "learning_rate": 5.1718736361417786e-05, |
| "loss": 1.5799, |
| "step": 14720 |
| }, |
| { |
| "epoch": 2.0439880663290086, |
| "grad_norm": 0.0, |
| "learning_rate": 5.158342200746511e-05, |
| "loss": 1.507, |
| "step": 14730 |
| }, |
| { |
| "epoch": 2.045375702490807, |
| "grad_norm": 0.0, |
| "learning_rate": 5.144822334945146e-05, |
| "loss": 1.6232, |
| "step": 14740 |
| }, |
| { |
| "epoch": 2.0467633386526054, |
| "grad_norm": 0.0, |
| "learning_rate": 5.131314071044675e-05, |
| "loss": 1.62, |
| "step": 14750 |
| }, |
| { |
| "epoch": 2.0481509748144036, |
| "grad_norm": 0.0, |
| "learning_rate": 5.117817441324353e-05, |
| "loss": 1.4876, |
| "step": 14760 |
| }, |
| { |
| "epoch": 2.049538610976202, |
| "grad_norm": 0.0, |
| "learning_rate": 5.104332478035645e-05, |
| "loss": 1.6082, |
| "step": 14770 |
| }, |
| { |
| "epoch": 2.0509262471380003, |
| "grad_norm": 0.0, |
| "learning_rate": 5.090859213402124e-05, |
| "loss": 1.4934, |
| "step": 14780 |
| }, |
| { |
| "epoch": 2.052313883299799, |
| "grad_norm": 0.0, |
| "learning_rate": 5.077397679619416e-05, |
| "loss": 1.6092, |
| "step": 14790 |
| }, |
| { |
| "epoch": 2.053701519461597, |
| "grad_norm": 0.0, |
| "learning_rate": 5.063947908855118e-05, |
| "loss": 1.5959, |
| "step": 14800 |
| }, |
| { |
| "epoch": 2.053701519461597, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 935.4436, |
| "eval_samples_per_second": 13.696, |
| "eval_steps_per_second": 1.713, |
| "step": 14800 |
| }, |
| { |
| "epoch": 2.0550891556233957, |
| "grad_norm": 0.0, |
| "learning_rate": 5.0505099332487146e-05, |
| "loss": 1.5665, |
| "step": 14810 |
| }, |
| { |
| "epoch": 2.056476791785194, |
| "grad_norm": 0.0, |
| "learning_rate": 5.037083784911502e-05, |
| "loss": 1.58, |
| "step": 14820 |
| }, |
| { |
| "epoch": 2.0578644279469924, |
| "grad_norm": 0.0, |
| "learning_rate": 5.023669495926516e-05, |
| "loss": 1.5067, |
| "step": 14830 |
| }, |
| { |
| "epoch": 2.0592520641087906, |
| "grad_norm": 0.0, |
| "learning_rate": 5.0102670983484604e-05, |
| "loss": 1.5376, |
| "step": 14840 |
| }, |
| { |
| "epoch": 2.060639700270589, |
| "grad_norm": 0.0, |
| "learning_rate": 4.996876624203608e-05, |
| "loss": 1.5834, |
| "step": 14850 |
| }, |
| { |
| "epoch": 2.0620273364323873, |
| "grad_norm": 0.0, |
| "learning_rate": 4.9834981054897535e-05, |
| "loss": 1.5117, |
| "step": 14860 |
| }, |
| { |
| "epoch": 2.063414972594186, |
| "grad_norm": 0.0, |
| "learning_rate": 4.970131574176117e-05, |
| "loss": 1.5067, |
| "step": 14870 |
| }, |
| { |
| "epoch": 2.064802608755984, |
| "grad_norm": 0.0, |
| "learning_rate": 4.956777062203278e-05, |
| "loss": 1.5413, |
| "step": 14880 |
| }, |
| { |
| "epoch": 2.0661902449177827, |
| "grad_norm": 0.0, |
| "learning_rate": 4.943434601483087e-05, |
| "loss": 1.5114, |
| "step": 14890 |
| }, |
| { |
| "epoch": 2.067577881079581, |
| "grad_norm": 0.0, |
| "learning_rate": 4.9301042238986005e-05, |
| "loss": 1.5587, |
| "step": 14900 |
| }, |
| { |
| "epoch": 2.067577881079581, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 877.1569, |
| "eval_samples_per_second": 14.606, |
| "eval_steps_per_second": 1.826, |
| "step": 14900 |
| }, |
| { |
| "epoch": 2.0689655172413794, |
| "grad_norm": 0.0, |
| "learning_rate": 4.916785961304008e-05, |
| "loss": 1.5112, |
| "step": 14910 |
| }, |
| { |
| "epoch": 2.0703531534031776, |
| "grad_norm": 0.0, |
| "learning_rate": 4.903479845524535e-05, |
| "loss": 1.5256, |
| "step": 14920 |
| }, |
| { |
| "epoch": 2.071740789564976, |
| "grad_norm": 0.0, |
| "learning_rate": 4.890185908356393e-05, |
| "loss": 1.5042, |
| "step": 14930 |
| }, |
| { |
| "epoch": 2.0731284257267744, |
| "grad_norm": 0.0, |
| "learning_rate": 4.876904181566686e-05, |
| "loss": 1.5899, |
| "step": 14940 |
| }, |
| { |
| "epoch": 2.074516061888573, |
| "grad_norm": 0.0, |
| "learning_rate": 4.863634696893349e-05, |
| "loss": 1.5495, |
| "step": 14950 |
| }, |
| { |
| "epoch": 2.075903698050371, |
| "grad_norm": 0.0, |
| "learning_rate": 4.850377486045045e-05, |
| "loss": 1.5809, |
| "step": 14960 |
| }, |
| { |
| "epoch": 2.0772913342121697, |
| "grad_norm": 0.0, |
| "learning_rate": 4.8371325807011234e-05, |
| "loss": 1.6356, |
| "step": 14970 |
| }, |
| { |
| "epoch": 2.078678970373968, |
| "grad_norm": 0.0, |
| "learning_rate": 4.823900012511524e-05, |
| "loss": 1.5579, |
| "step": 14980 |
| }, |
| { |
| "epoch": 2.0800666065357665, |
| "grad_norm": 0.0, |
| "learning_rate": 4.81067981309671e-05, |
| "loss": 1.5337, |
| "step": 14990 |
| }, |
| { |
| "epoch": 2.0814542426975646, |
| "grad_norm": 0.0, |
| "learning_rate": 4.797472014047576e-05, |
| "loss": 1.5839, |
| "step": 15000 |
| }, |
| { |
| "epoch": 2.0814542426975646, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 879.4065, |
| "eval_samples_per_second": 14.569, |
| "eval_steps_per_second": 1.822, |
| "step": 15000 |
| }, |
| { |
| "epoch": 2.0827031152431834, |
| "grad_norm": 0.0, |
| "learning_rate": 4.7842766469253945e-05, |
| "loss": 1.5855, |
| "step": 15010 |
| }, |
| { |
| "epoch": 2.0840907514049816, |
| "grad_norm": 0.0, |
| "learning_rate": 4.771093743261734e-05, |
| "loss": 1.6072, |
| "step": 15020 |
| }, |
| { |
| "epoch": 2.08547838756678, |
| "grad_norm": 0.0, |
| "learning_rate": 4.757923334558367e-05, |
| "loss": 1.5583, |
| "step": 15030 |
| }, |
| { |
| "epoch": 2.0868660237285783, |
| "grad_norm": 0.0, |
| "learning_rate": 4.744765452287221e-05, |
| "loss": 1.5175, |
| "step": 15040 |
| }, |
| { |
| "epoch": 2.088253659890377, |
| "grad_norm": 0.0, |
| "learning_rate": 4.731620127890284e-05, |
| "loss": 1.5471, |
| "step": 15050 |
| }, |
| { |
| "epoch": 2.089641296052175, |
| "grad_norm": 0.0, |
| "learning_rate": 4.718487392779543e-05, |
| "loss": 1.529, |
| "step": 15060 |
| }, |
| { |
| "epoch": 2.0910289322139737, |
| "grad_norm": 0.0, |
| "learning_rate": 4.705367278336888e-05, |
| "loss": 1.531, |
| "step": 15070 |
| }, |
| { |
| "epoch": 2.092416568375772, |
| "grad_norm": 0.0, |
| "learning_rate": 4.6922598159140616e-05, |
| "loss": 1.5668, |
| "step": 15080 |
| }, |
| { |
| "epoch": 2.0938042045375704, |
| "grad_norm": 0.0, |
| "learning_rate": 4.6791650368325715e-05, |
| "loss": 1.5357, |
| "step": 15090 |
| }, |
| { |
| "epoch": 2.0951918406993686, |
| "grad_norm": 0.0, |
| "learning_rate": 4.666082972383621e-05, |
| "loss": 1.5436, |
| "step": 15100 |
| }, |
| { |
| "epoch": 2.0951918406993686, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 916.3967, |
| "eval_samples_per_second": 13.981, |
| "eval_steps_per_second": 1.748, |
| "step": 15100 |
| }, |
| { |
| "epoch": 2.096579476861167, |
| "grad_norm": 0.0, |
| "learning_rate": 4.653013653828018e-05, |
| "loss": 1.5593, |
| "step": 15110 |
| }, |
| { |
| "epoch": 2.0979671130229653, |
| "grad_norm": 0.0, |
| "learning_rate": 4.639957112396123e-05, |
| "loss": 1.6224, |
| "step": 15120 |
| }, |
| { |
| "epoch": 2.099354749184764, |
| "grad_norm": 0.0, |
| "learning_rate": 4.626913379287768e-05, |
| "loss": 1.5973, |
| "step": 15130 |
| }, |
| { |
| "epoch": 2.100742385346562, |
| "grad_norm": 0.0, |
| "learning_rate": 4.6138824856721654e-05, |
| "loss": 1.5058, |
| "step": 15140 |
| }, |
| { |
| "epoch": 2.1021300215083607, |
| "grad_norm": 0.0, |
| "learning_rate": 4.600864462687855e-05, |
| "loss": 1.488, |
| "step": 15150 |
| }, |
| { |
| "epoch": 2.103517657670159, |
| "grad_norm": 0.0, |
| "learning_rate": 4.587859341442622e-05, |
| "loss": 1.4917, |
| "step": 15160 |
| }, |
| { |
| "epoch": 2.1049052938319575, |
| "grad_norm": 0.0, |
| "learning_rate": 4.574867153013421e-05, |
| "loss": 1.532, |
| "step": 15170 |
| }, |
| { |
| "epoch": 2.1062929299937556, |
| "grad_norm": 0.0, |
| "learning_rate": 4.561887928446296e-05, |
| "loss": 1.5549, |
| "step": 15180 |
| }, |
| { |
| "epoch": 2.107680566155554, |
| "grad_norm": 0.0, |
| "learning_rate": 4.5489216987563176e-05, |
| "loss": 1.5537, |
| "step": 15190 |
| }, |
| { |
| "epoch": 2.1090682023173524, |
| "grad_norm": 0.0, |
| "learning_rate": 4.535968494927507e-05, |
| "loss": 1.6042, |
| "step": 15200 |
| }, |
| { |
| "epoch": 2.1090682023173524, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 953.681, |
| "eval_samples_per_second": 13.434, |
| "eval_steps_per_second": 1.68, |
| "step": 15200 |
| }, |
| { |
| "epoch": 2.110455838479151, |
| "grad_norm": 0.0, |
| "learning_rate": 4.5230283479127575e-05, |
| "loss": 1.4479, |
| "step": 15210 |
| }, |
| { |
| "epoch": 2.111843474640949, |
| "grad_norm": 0.0, |
| "learning_rate": 4.510101288633753e-05, |
| "loss": 1.5663, |
| "step": 15220 |
| }, |
| { |
| "epoch": 2.1132311108027477, |
| "grad_norm": 0.0, |
| "learning_rate": 4.4971873479809147e-05, |
| "loss": 1.5425, |
| "step": 15230 |
| }, |
| { |
| "epoch": 2.114618746964546, |
| "grad_norm": 0.0, |
| "learning_rate": 4.484286556813314e-05, |
| "loss": 1.5906, |
| "step": 15240 |
| }, |
| { |
| "epoch": 2.1160063831263445, |
| "grad_norm": 0.0, |
| "learning_rate": 4.471398945958589e-05, |
| "loss": 1.5221, |
| "step": 15250 |
| }, |
| { |
| "epoch": 2.1173940192881426, |
| "grad_norm": 0.0, |
| "learning_rate": 4.4585245462128956e-05, |
| "loss": 1.5014, |
| "step": 15260 |
| }, |
| { |
| "epoch": 2.1187816554499412, |
| "grad_norm": 0.0, |
| "learning_rate": 4.445663388340815e-05, |
| "loss": 1.5336, |
| "step": 15270 |
| }, |
| { |
| "epoch": 2.1201692916117394, |
| "grad_norm": 0.0, |
| "learning_rate": 4.43281550307529e-05, |
| "loss": 1.6044, |
| "step": 15280 |
| }, |
| { |
| "epoch": 2.121556927773538, |
| "grad_norm": 0.0, |
| "learning_rate": 4.4199809211175344e-05, |
| "loss": 1.5086, |
| "step": 15290 |
| }, |
| { |
| "epoch": 2.122944563935336, |
| "grad_norm": 0.0, |
| "learning_rate": 4.407159673136988e-05, |
| "loss": 1.5354, |
| "step": 15300 |
| }, |
| { |
| "epoch": 2.122944563935336, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 926.3099, |
| "eval_samples_per_second": 13.831, |
| "eval_steps_per_second": 1.729, |
| "step": 15300 |
| }, |
| { |
| "epoch": 2.1243322000971343, |
| "grad_norm": 0.0, |
| "learning_rate": 4.3943517897712206e-05, |
| "loss": 1.495, |
| "step": 15310 |
| }, |
| { |
| "epoch": 2.125719836258933, |
| "grad_norm": 0.0, |
| "learning_rate": 4.3815573016258696e-05, |
| "loss": 1.4791, |
| "step": 15320 |
| }, |
| { |
| "epoch": 2.1271074724207315, |
| "grad_norm": 0.0, |
| "learning_rate": 4.368776239274554e-05, |
| "loss": 1.4759, |
| "step": 15330 |
| }, |
| { |
| "epoch": 2.1284951085825297, |
| "grad_norm": 0.0, |
| "learning_rate": 4.356008633258819e-05, |
| "loss": 1.5037, |
| "step": 15340 |
| }, |
| { |
| "epoch": 2.129882744744328, |
| "grad_norm": 0.0, |
| "learning_rate": 4.3432545140880584e-05, |
| "loss": 1.4478, |
| "step": 15350 |
| }, |
| { |
| "epoch": 2.1312703809061264, |
| "grad_norm": 0.0, |
| "learning_rate": 4.3305139122394235e-05, |
| "loss": 1.4855, |
| "step": 15360 |
| }, |
| { |
| "epoch": 2.132658017067925, |
| "grad_norm": 0.0, |
| "learning_rate": 4.3177868581577786e-05, |
| "loss": 1.5878, |
| "step": 15370 |
| }, |
| { |
| "epoch": 2.134045653229723, |
| "grad_norm": 0.0, |
| "learning_rate": 4.3050733822556075e-05, |
| "loss": 1.4945, |
| "step": 15380 |
| }, |
| { |
| "epoch": 2.1354332893915213, |
| "grad_norm": 0.0, |
| "learning_rate": 4.292373514912954e-05, |
| "loss": 1.54, |
| "step": 15390 |
| }, |
| { |
| "epoch": 2.13682092555332, |
| "grad_norm": 0.0, |
| "learning_rate": 4.279687286477331e-05, |
| "loss": 1.5607, |
| "step": 15400 |
| }, |
| { |
| "epoch": 2.13682092555332, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 938.5818, |
| "eval_samples_per_second": 13.65, |
| "eval_steps_per_second": 1.707, |
| "step": 15400 |
| }, |
| { |
| "epoch": 2.1382085617151185, |
| "grad_norm": 0.0, |
| "learning_rate": 4.267014727263671e-05, |
| "loss": 1.4647, |
| "step": 15410 |
| }, |
| { |
| "epoch": 2.1395961978769167, |
| "grad_norm": 0.0, |
| "learning_rate": 4.2543558675542374e-05, |
| "loss": 1.55, |
| "step": 15420 |
| }, |
| { |
| "epoch": 2.140983834038715, |
| "grad_norm": 0.0, |
| "learning_rate": 4.241710737598564e-05, |
| "loss": 1.4891, |
| "step": 15430 |
| }, |
| { |
| "epoch": 2.1423714702005134, |
| "grad_norm": 0.0, |
| "learning_rate": 4.2290793676133634e-05, |
| "loss": 1.5599, |
| "step": 15440 |
| }, |
| { |
| "epoch": 2.1437591063623116, |
| "grad_norm": 0.0, |
| "learning_rate": 4.2164617877824776e-05, |
| "loss": 1.5297, |
| "step": 15450 |
| }, |
| { |
| "epoch": 2.14514674252411, |
| "grad_norm": 0.0, |
| "learning_rate": 4.2038580282567975e-05, |
| "loss": 1.5349, |
| "step": 15460 |
| }, |
| { |
| "epoch": 2.1465343786859084, |
| "grad_norm": 0.0, |
| "learning_rate": 4.191268119154178e-05, |
| "loss": 1.5351, |
| "step": 15470 |
| }, |
| { |
| "epoch": 2.147922014847707, |
| "grad_norm": 0.0, |
| "learning_rate": 4.1786920905593864e-05, |
| "loss": 1.6173, |
| "step": 15480 |
| }, |
| { |
| "epoch": 2.149309651009505, |
| "grad_norm": 0.0, |
| "learning_rate": 4.166129972524019e-05, |
| "loss": 1.5345, |
| "step": 15490 |
| }, |
| { |
| "epoch": 2.1506972871713037, |
| "grad_norm": 0.0, |
| "learning_rate": 4.153581795066435e-05, |
| "loss": 1.4677, |
| "step": 15500 |
| }, |
| { |
| "epoch": 2.1506972871713037, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 929.9599, |
| "eval_samples_per_second": 13.777, |
| "eval_steps_per_second": 1.723, |
| "step": 15500 |
| }, |
| { |
| "epoch": 2.152084923333102, |
| "grad_norm": 0.0, |
| "learning_rate": 4.14104758817167e-05, |
| "loss": 1.5162, |
| "step": 15510 |
| }, |
| { |
| "epoch": 2.1534725594949005, |
| "grad_norm": 0.0, |
| "learning_rate": 4.1285273817913874e-05, |
| "loss": 1.6178, |
| "step": 15520 |
| }, |
| { |
| "epoch": 2.1548601956566986, |
| "grad_norm": 0.0, |
| "learning_rate": 4.1160212058437886e-05, |
| "loss": 1.6055, |
| "step": 15530 |
| }, |
| { |
| "epoch": 2.156247831818497, |
| "grad_norm": 0.0, |
| "learning_rate": 4.103529090213556e-05, |
| "loss": 1.5298, |
| "step": 15540 |
| }, |
| { |
| "epoch": 2.1576354679802954, |
| "grad_norm": 0.0, |
| "learning_rate": 4.09105106475176e-05, |
| "loss": 1.6078, |
| "step": 15550 |
| }, |
| { |
| "epoch": 2.159023104142094, |
| "grad_norm": 0.0, |
| "learning_rate": 4.078587159275811e-05, |
| "loss": 1.5427, |
| "step": 15560 |
| }, |
| { |
| "epoch": 2.160410740303892, |
| "grad_norm": 0.0, |
| "learning_rate": 4.066137403569381e-05, |
| "loss": 1.5159, |
| "step": 15570 |
| }, |
| { |
| "epoch": 2.1617983764656907, |
| "grad_norm": 0.0, |
| "learning_rate": 4.053701827382319e-05, |
| "loss": 1.5719, |
| "step": 15580 |
| }, |
| { |
| "epoch": 2.163186012627489, |
| "grad_norm": 0.0, |
| "learning_rate": 4.041280460430598e-05, |
| "loss": 1.5559, |
| "step": 15590 |
| }, |
| { |
| "epoch": 2.1645736487892875, |
| "grad_norm": 0.0, |
| "learning_rate": 4.028873332396237e-05, |
| "loss": 1.5674, |
| "step": 15600 |
| }, |
| { |
| "epoch": 2.1645736487892875, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 944.6173, |
| "eval_samples_per_second": 13.563, |
| "eval_steps_per_second": 1.696, |
| "step": 15600 |
| }, |
| { |
| "epoch": 2.1659612849510856, |
| "grad_norm": 0.0, |
| "learning_rate": 4.016480472927232e-05, |
| "loss": 1.5766, |
| "step": 15610 |
| }, |
| { |
| "epoch": 2.1673489211128842, |
| "grad_norm": 0.0, |
| "learning_rate": 4.0041019116374714e-05, |
| "loss": 1.5224, |
| "step": 15620 |
| }, |
| { |
| "epoch": 2.1687365572746824, |
| "grad_norm": 0.0, |
| "learning_rate": 3.9917376781066874e-05, |
| "loss": 1.6476, |
| "step": 15630 |
| }, |
| { |
| "epoch": 2.170124193436481, |
| "grad_norm": 0.0, |
| "learning_rate": 3.979387801880373e-05, |
| "loss": 1.5762, |
| "step": 15640 |
| }, |
| { |
| "epoch": 2.171511829598279, |
| "grad_norm": 0.0, |
| "learning_rate": 3.967052312469716e-05, |
| "loss": 1.5226, |
| "step": 15650 |
| }, |
| { |
| "epoch": 2.1728994657600778, |
| "grad_norm": 0.0, |
| "learning_rate": 3.954731239351512e-05, |
| "loss": 1.5913, |
| "step": 15660 |
| }, |
| { |
| "epoch": 2.174287101921876, |
| "grad_norm": 0.0, |
| "learning_rate": 3.942424611968123e-05, |
| "loss": 1.5287, |
| "step": 15670 |
| }, |
| { |
| "epoch": 2.1756747380836745, |
| "grad_norm": 0.0, |
| "learning_rate": 3.930132459727388e-05, |
| "loss": 1.5339, |
| "step": 15680 |
| }, |
| { |
| "epoch": 2.1770623742454727, |
| "grad_norm": 0.0, |
| "learning_rate": 3.917854812002547e-05, |
| "loss": 1.5704, |
| "step": 15690 |
| }, |
| { |
| "epoch": 2.1784500104072713, |
| "grad_norm": 0.0, |
| "learning_rate": 3.905591698132189e-05, |
| "loss": 1.4806, |
| "step": 15700 |
| }, |
| { |
| "epoch": 2.1784500104072713, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 927.5263, |
| "eval_samples_per_second": 13.813, |
| "eval_steps_per_second": 1.727, |
| "step": 15700 |
| }, |
| { |
| "epoch": 2.1798376465690694, |
| "grad_norm": 0.0, |
| "learning_rate": 3.893343147420174e-05, |
| "loss": 1.591, |
| "step": 15710 |
| }, |
| { |
| "epoch": 2.181225282730868, |
| "grad_norm": 0.0, |
| "learning_rate": 3.8811091891355614e-05, |
| "loss": 1.537, |
| "step": 15720 |
| }, |
| { |
| "epoch": 2.182612918892666, |
| "grad_norm": 0.0, |
| "learning_rate": 3.868889852512528e-05, |
| "loss": 1.5321, |
| "step": 15730 |
| }, |
| { |
| "epoch": 2.1840005550544648, |
| "grad_norm": 0.0, |
| "learning_rate": 3.856685166750329e-05, |
| "loss": 1.4817, |
| "step": 15740 |
| }, |
| { |
| "epoch": 2.185388191216263, |
| "grad_norm": 0.0, |
| "learning_rate": 3.844495161013198e-05, |
| "loss": 1.5511, |
| "step": 15750 |
| }, |
| { |
| "epoch": 2.1867758273780615, |
| "grad_norm": 0.0, |
| "learning_rate": 3.8323198644303005e-05, |
| "loss": 1.5125, |
| "step": 15760 |
| }, |
| { |
| "epoch": 2.1881634635398597, |
| "grad_norm": 0.0, |
| "learning_rate": 3.820159306095635e-05, |
| "loss": 1.5082, |
| "step": 15770 |
| }, |
| { |
| "epoch": 2.1895510997016583, |
| "grad_norm": 0.0, |
| "learning_rate": 3.8080135150679996e-05, |
| "loss": 1.5499, |
| "step": 15780 |
| }, |
| { |
| "epoch": 2.1909387358634564, |
| "grad_norm": 0.0, |
| "learning_rate": 3.795882520370898e-05, |
| "loss": 1.5333, |
| "step": 15790 |
| }, |
| { |
| "epoch": 2.192326372025255, |
| "grad_norm": 0.0, |
| "learning_rate": 3.78376635099247e-05, |
| "loss": 1.5596, |
| "step": 15800 |
| }, |
| { |
| "epoch": 2.192326372025255, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 933.9213, |
| "eval_samples_per_second": 13.719, |
| "eval_steps_per_second": 1.715, |
| "step": 15800 |
| }, |
| { |
| "epoch": 2.193714008187053, |
| "grad_norm": 0.0, |
| "learning_rate": 3.7716650358854386e-05, |
| "loss": 1.5425, |
| "step": 15810 |
| }, |
| { |
| "epoch": 2.195101644348852, |
| "grad_norm": 0.0, |
| "learning_rate": 3.759578603967028e-05, |
| "loss": 1.5336, |
| "step": 15820 |
| }, |
| { |
| "epoch": 2.19648928051065, |
| "grad_norm": 0.0, |
| "learning_rate": 3.7475070841189e-05, |
| "loss": 1.5973, |
| "step": 15830 |
| }, |
| { |
| "epoch": 2.1978769166724486, |
| "grad_norm": 0.0, |
| "learning_rate": 3.735450505187072e-05, |
| "loss": 1.524, |
| "step": 15840 |
| }, |
| { |
| "epoch": 2.1992645528342467, |
| "grad_norm": 0.0, |
| "learning_rate": 3.7234088959818715e-05, |
| "loss": 1.5217, |
| "step": 15850 |
| }, |
| { |
| "epoch": 2.2006521889960453, |
| "grad_norm": 0.0, |
| "learning_rate": 3.711382285277847e-05, |
| "loss": 1.5523, |
| "step": 15860 |
| }, |
| { |
| "epoch": 2.2020398251578435, |
| "grad_norm": 0.0, |
| "learning_rate": 3.699370701813715e-05, |
| "loss": 1.5474, |
| "step": 15870 |
| }, |
| { |
| "epoch": 2.203427461319642, |
| "grad_norm": 0.0, |
| "learning_rate": 3.687374174292268e-05, |
| "loss": 1.6118, |
| "step": 15880 |
| }, |
| { |
| "epoch": 2.2048150974814402, |
| "grad_norm": 0.0, |
| "learning_rate": 3.675392731380336e-05, |
| "loss": 1.4827, |
| "step": 15890 |
| }, |
| { |
| "epoch": 2.206202733643239, |
| "grad_norm": 0.0, |
| "learning_rate": 3.663426401708698e-05, |
| "loss": 1.5971, |
| "step": 15900 |
| }, |
| { |
| "epoch": 2.206202733643239, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 932.2836, |
| "eval_samples_per_second": 13.743, |
| "eval_steps_per_second": 1.718, |
| "step": 15900 |
| }, |
| { |
| "epoch": 2.207590369805037, |
| "grad_norm": 0.0, |
| "learning_rate": 3.6514752138720124e-05, |
| "loss": 1.5733, |
| "step": 15910 |
| }, |
| { |
| "epoch": 2.2089780059668356, |
| "grad_norm": 0.0, |
| "learning_rate": 3.6395391964287606e-05, |
| "loss": 1.5794, |
| "step": 15920 |
| }, |
| { |
| "epoch": 2.2103656421286337, |
| "grad_norm": 0.0, |
| "learning_rate": 3.627618377901174e-05, |
| "loss": 1.5671, |
| "step": 15930 |
| }, |
| { |
| "epoch": 2.2117532782904323, |
| "grad_norm": 0.0, |
| "learning_rate": 3.615712786775165e-05, |
| "loss": 1.5647, |
| "step": 15940 |
| }, |
| { |
| "epoch": 2.2131409144522305, |
| "grad_norm": 0.0, |
| "learning_rate": 3.603822451500252e-05, |
| "loss": 1.5265, |
| "step": 15950 |
| }, |
| { |
| "epoch": 2.214528550614029, |
| "grad_norm": 0.0, |
| "learning_rate": 3.5919474004895027e-05, |
| "loss": 1.5581, |
| "step": 15960 |
| }, |
| { |
| "epoch": 2.2159161867758272, |
| "grad_norm": 0.0, |
| "learning_rate": 3.580087662119464e-05, |
| "loss": 1.6019, |
| "step": 15970 |
| }, |
| { |
| "epoch": 2.217303822937626, |
| "grad_norm": 0.0, |
| "learning_rate": 3.568243264730092e-05, |
| "loss": 1.5216, |
| "step": 15980 |
| }, |
| { |
| "epoch": 2.218691459099424, |
| "grad_norm": 0.0, |
| "learning_rate": 3.5564142366246755e-05, |
| "loss": 1.5791, |
| "step": 15990 |
| }, |
| { |
| "epoch": 2.2200790952612226, |
| "grad_norm": 0.0, |
| "learning_rate": 3.544600606069785e-05, |
| "loss": 1.5498, |
| "step": 16000 |
| }, |
| { |
| "epoch": 2.2200790952612226, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 973.7526, |
| "eval_samples_per_second": 13.157, |
| "eval_steps_per_second": 1.645, |
| "step": 16000 |
| }, |
| { |
| "epoch": 2.2214667314230208, |
| "grad_norm": 0.0, |
| "learning_rate": 3.532802401295199e-05, |
| "loss": 1.5221, |
| "step": 16010 |
| }, |
| { |
| "epoch": 2.2228543675848194, |
| "grad_norm": 0.0, |
| "learning_rate": 3.521019650493824e-05, |
| "loss": 1.5488, |
| "step": 16020 |
| }, |
| { |
| "epoch": 2.2242420037466175, |
| "grad_norm": 0.0, |
| "learning_rate": 3.5092523818216486e-05, |
| "loss": 1.4977, |
| "step": 16030 |
| }, |
| { |
| "epoch": 2.225629639908416, |
| "grad_norm": 0.0, |
| "learning_rate": 3.4975006233976595e-05, |
| "loss": 1.5192, |
| "step": 16040 |
| }, |
| { |
| "epoch": 2.2270172760702143, |
| "grad_norm": 0.0, |
| "learning_rate": 3.485764403303787e-05, |
| "loss": 1.5094, |
| "step": 16050 |
| }, |
| { |
| "epoch": 2.228404912232013, |
| "grad_norm": 0.0, |
| "learning_rate": 3.4740437495848186e-05, |
| "loss": 1.6359, |
| "step": 16060 |
| }, |
| { |
| "epoch": 2.229792548393811, |
| "grad_norm": 0.0, |
| "learning_rate": 3.462338690248356e-05, |
| "loss": 1.5899, |
| "step": 16070 |
| }, |
| { |
| "epoch": 2.2311801845556096, |
| "grad_norm": 0.0, |
| "learning_rate": 3.4506492532647315e-05, |
| "loss": 1.5689, |
| "step": 16080 |
| }, |
| { |
| "epoch": 2.232567820717408, |
| "grad_norm": 0.0, |
| "learning_rate": 3.438975466566953e-05, |
| "loss": 1.5154, |
| "step": 16090 |
| }, |
| { |
| "epoch": 2.2339554568792064, |
| "grad_norm": 0.0, |
| "learning_rate": 3.427317358050616e-05, |
| "loss": 1.5396, |
| "step": 16100 |
| }, |
| { |
| "epoch": 2.2339554568792064, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 996.8854, |
| "eval_samples_per_second": 12.852, |
| "eval_steps_per_second": 1.607, |
| "step": 16100 |
| }, |
| { |
| "epoch": 2.2353430930410045, |
| "grad_norm": 0.0, |
| "learning_rate": 3.415674955573864e-05, |
| "loss": 1.5667, |
| "step": 16110 |
| }, |
| { |
| "epoch": 2.236730729202803, |
| "grad_norm": 0.0, |
| "learning_rate": 3.404048286957312e-05, |
| "loss": 1.5438, |
| "step": 16120 |
| }, |
| { |
| "epoch": 2.2381183653646013, |
| "grad_norm": 0.0, |
| "learning_rate": 3.3924373799839615e-05, |
| "loss": 1.5788, |
| "step": 16130 |
| }, |
| { |
| "epoch": 2.2395060015264, |
| "grad_norm": 0.0, |
| "learning_rate": 3.380842262399166e-05, |
| "loss": 1.5115, |
| "step": 16140 |
| }, |
| { |
| "epoch": 2.240893637688198, |
| "grad_norm": 0.0, |
| "learning_rate": 3.369262961910542e-05, |
| "loss": 1.5161, |
| "step": 16150 |
| }, |
| { |
| "epoch": 2.2422812738499966, |
| "grad_norm": 0.0, |
| "learning_rate": 3.3576995061879145e-05, |
| "loss": 1.5062, |
| "step": 16160 |
| }, |
| { |
| "epoch": 2.243668910011795, |
| "grad_norm": 0.0, |
| "learning_rate": 3.346151922863233e-05, |
| "loss": 1.504, |
| "step": 16170 |
| }, |
| { |
| "epoch": 2.2450565461735934, |
| "grad_norm": 0.0, |
| "learning_rate": 3.334620239530534e-05, |
| "loss": 1.5609, |
| "step": 16180 |
| }, |
| { |
| "epoch": 2.2464441823353916, |
| "grad_norm": 0.0, |
| "learning_rate": 3.3231044837458495e-05, |
| "loss": 1.5546, |
| "step": 16190 |
| }, |
| { |
| "epoch": 2.24783181849719, |
| "grad_norm": 0.0, |
| "learning_rate": 3.3116046830271594e-05, |
| "loss": 1.5937, |
| "step": 16200 |
| }, |
| { |
| "epoch": 2.24783181849719, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 1004.3974, |
| "eval_samples_per_second": 12.756, |
| "eval_steps_per_second": 1.595, |
| "step": 16200 |
| }, |
| { |
| "epoch": 2.2492194546589883, |
| "grad_norm": 0.0, |
| "learning_rate": 3.3001208648543055e-05, |
| "loss": 1.552, |
| "step": 16210 |
| }, |
| { |
| "epoch": 2.250607090820787, |
| "grad_norm": 0.0, |
| "learning_rate": 3.288653056668949e-05, |
| "loss": 1.4559, |
| "step": 16220 |
| }, |
| { |
| "epoch": 2.251994726982585, |
| "grad_norm": 0.0, |
| "learning_rate": 3.2772012858744916e-05, |
| "loss": 1.5379, |
| "step": 16230 |
| }, |
| { |
| "epoch": 2.2533823631443837, |
| "grad_norm": 0.0, |
| "learning_rate": 3.265765579836004e-05, |
| "loss": 1.5939, |
| "step": 16240 |
| }, |
| { |
| "epoch": 2.254769999306182, |
| "grad_norm": 0.0, |
| "learning_rate": 3.254345965880179e-05, |
| "loss": 1.5655, |
| "step": 16250 |
| }, |
| { |
| "epoch": 2.2561576354679804, |
| "grad_norm": 0.0, |
| "learning_rate": 3.2429424712952494e-05, |
| "loss": 1.5769, |
| "step": 16260 |
| }, |
| { |
| "epoch": 2.2575452716297786, |
| "grad_norm": 0.0, |
| "learning_rate": 3.231555123330937e-05, |
| "loss": 1.6075, |
| "step": 16270 |
| }, |
| { |
| "epoch": 2.258932907791577, |
| "grad_norm": 0.0, |
| "learning_rate": 3.220183949198368e-05, |
| "loss": 1.5309, |
| "step": 16280 |
| }, |
| { |
| "epoch": 2.2603205439533753, |
| "grad_norm": 0.0, |
| "learning_rate": 3.208828976070027e-05, |
| "loss": 1.5075, |
| "step": 16290 |
| }, |
| { |
| "epoch": 2.261708180115174, |
| "grad_norm": 0.0, |
| "learning_rate": 3.197490231079685e-05, |
| "loss": 1.5901, |
| "step": 16300 |
| }, |
| { |
| "epoch": 2.261708180115174, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 991.4676, |
| "eval_samples_per_second": 12.922, |
| "eval_steps_per_second": 1.616, |
| "step": 16300 |
| }, |
| { |
| "epoch": 2.263095816276972, |
| "grad_norm": 0.0, |
| "learning_rate": 3.186167741322337e-05, |
| "loss": 1.4599, |
| "step": 16310 |
| }, |
| { |
| "epoch": 2.2644834524387707, |
| "grad_norm": 0.0, |
| "learning_rate": 3.1748615338541224e-05, |
| "loss": 1.4669, |
| "step": 16320 |
| }, |
| { |
| "epoch": 2.265871088600569, |
| "grad_norm": 0.0, |
| "learning_rate": 3.163571635692286e-05, |
| "loss": 1.5273, |
| "step": 16330 |
| }, |
| { |
| "epoch": 2.2672587247623675, |
| "grad_norm": 0.0, |
| "learning_rate": 3.152298073815096e-05, |
| "loss": 1.5162, |
| "step": 16340 |
| }, |
| { |
| "epoch": 2.2686463609241656, |
| "grad_norm": 0.0, |
| "learning_rate": 3.141040875161779e-05, |
| "loss": 1.5584, |
| "step": 16350 |
| }, |
| { |
| "epoch": 2.270033997085964, |
| "grad_norm": 0.0, |
| "learning_rate": 3.129800066632463e-05, |
| "loss": 1.5418, |
| "step": 16360 |
| }, |
| { |
| "epoch": 2.2714216332477624, |
| "grad_norm": 0.0, |
| "learning_rate": 3.1185756750881143e-05, |
| "loss": 1.5496, |
| "step": 16370 |
| }, |
| { |
| "epoch": 2.272809269409561, |
| "grad_norm": 0.0, |
| "learning_rate": 3.1073677273504666e-05, |
| "loss": 1.5761, |
| "step": 16380 |
| }, |
| { |
| "epoch": 2.274196905571359, |
| "grad_norm": 0.0, |
| "learning_rate": 3.096176250201953e-05, |
| "loss": 1.6242, |
| "step": 16390 |
| }, |
| { |
| "epoch": 2.2755845417331577, |
| "grad_norm": 0.0, |
| "learning_rate": 3.0850012703856567e-05, |
| "loss": 1.5603, |
| "step": 16400 |
| }, |
| { |
| "epoch": 2.2755845417331577, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 887.9758, |
| "eval_samples_per_second": 14.428, |
| "eval_steps_per_second": 1.804, |
| "step": 16400 |
| }, |
| { |
| "epoch": 2.276972177894956, |
| "grad_norm": 0.0, |
| "learning_rate": 3.073842814605239e-05, |
| "loss": 1.5251, |
| "step": 16410 |
| }, |
| { |
| "epoch": 2.2783598140567545, |
| "grad_norm": 0.0, |
| "learning_rate": 3.0627009095248734e-05, |
| "loss": 1.6049, |
| "step": 16420 |
| }, |
| { |
| "epoch": 2.2797474502185526, |
| "grad_norm": 0.0, |
| "learning_rate": 3.0515755817691794e-05, |
| "loss": 1.5205, |
| "step": 16430 |
| }, |
| { |
| "epoch": 2.2811350863803512, |
| "grad_norm": 0.0, |
| "learning_rate": 3.0404668579231686e-05, |
| "loss": 1.4944, |
| "step": 16440 |
| }, |
| { |
| "epoch": 2.2825227225421494, |
| "grad_norm": 0.0, |
| "learning_rate": 3.029374764532181e-05, |
| "loss": 1.5663, |
| "step": 16450 |
| }, |
| { |
| "epoch": 2.283910358703948, |
| "grad_norm": 0.0, |
| "learning_rate": 3.0182993281018034e-05, |
| "loss": 1.5988, |
| "step": 16460 |
| }, |
| { |
| "epoch": 2.285297994865746, |
| "grad_norm": 0.0, |
| "learning_rate": 3.0072405750978283e-05, |
| "loss": 1.5206, |
| "step": 16470 |
| }, |
| { |
| "epoch": 2.2866856310275447, |
| "grad_norm": 0.0, |
| "learning_rate": 2.9961985319461804e-05, |
| "loss": 1.5273, |
| "step": 16480 |
| }, |
| { |
| "epoch": 2.288073267189343, |
| "grad_norm": 0.0, |
| "learning_rate": 2.985173225032858e-05, |
| "loss": 1.4942, |
| "step": 16490 |
| }, |
| { |
| "epoch": 2.2894609033511415, |
| "grad_norm": 0.0, |
| "learning_rate": 2.9741646807038558e-05, |
| "loss": 1.5831, |
| "step": 16500 |
| }, |
| { |
| "epoch": 2.2894609033511415, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 897.0246, |
| "eval_samples_per_second": 14.283, |
| "eval_steps_per_second": 1.786, |
| "step": 16500 |
| }, |
| { |
| "epoch": 2.2908485395129397, |
| "grad_norm": 0.0, |
| "learning_rate": 2.963172925265123e-05, |
| "loss": 1.5637, |
| "step": 16510 |
| }, |
| { |
| "epoch": 2.2922361756747383, |
| "grad_norm": 0.0, |
| "learning_rate": 2.9521979849824855e-05, |
| "loss": 1.5101, |
| "step": 16520 |
| }, |
| { |
| "epoch": 2.2936238118365364, |
| "grad_norm": 0.0, |
| "learning_rate": 2.9412398860815936e-05, |
| "loss": 1.6035, |
| "step": 16530 |
| }, |
| { |
| "epoch": 2.295011447998335, |
| "grad_norm": 0.0, |
| "learning_rate": 2.9302986547478416e-05, |
| "loss": 1.5706, |
| "step": 16540 |
| }, |
| { |
| "epoch": 2.296399084160133, |
| "grad_norm": 0.0, |
| "learning_rate": 2.9193743171263288e-05, |
| "loss": 1.5611, |
| "step": 16550 |
| }, |
| { |
| "epoch": 2.2977867203219318, |
| "grad_norm": 0.0, |
| "learning_rate": 2.9084668993217832e-05, |
| "loss": 1.5604, |
| "step": 16560 |
| }, |
| { |
| "epoch": 2.29917435648373, |
| "grad_norm": 0.0, |
| "learning_rate": 2.8975764273984953e-05, |
| "loss": 1.5665, |
| "step": 16570 |
| }, |
| { |
| "epoch": 2.3005619926455285, |
| "grad_norm": 0.0, |
| "learning_rate": 2.886702927380266e-05, |
| "loss": 1.5259, |
| "step": 16580 |
| }, |
| { |
| "epoch": 2.3019496288073267, |
| "grad_norm": 0.0, |
| "learning_rate": 2.8758464252503402e-05, |
| "loss": 1.5501, |
| "step": 16590 |
| }, |
| { |
| "epoch": 2.3033372649691253, |
| "grad_norm": 0.0, |
| "learning_rate": 2.8650069469513497e-05, |
| "loss": 1.5792, |
| "step": 16600 |
| }, |
| { |
| "epoch": 2.3033372649691253, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 889.8596, |
| "eval_samples_per_second": 14.398, |
| "eval_steps_per_second": 1.8, |
| "step": 16600 |
| }, |
| { |
| "epoch": 2.3047249011309234, |
| "grad_norm": 0.0, |
| "learning_rate": 2.8541845183852345e-05, |
| "loss": 1.5979, |
| "step": 16610 |
| }, |
| { |
| "epoch": 2.306112537292722, |
| "grad_norm": 0.0, |
| "learning_rate": 2.8433791654132013e-05, |
| "loss": 1.5241, |
| "step": 16620 |
| }, |
| { |
| "epoch": 2.30750017345452, |
| "grad_norm": 0.0, |
| "learning_rate": 2.8325909138556515e-05, |
| "loss": 1.5629, |
| "step": 16630 |
| }, |
| { |
| "epoch": 2.308887809616319, |
| "grad_norm": 0.0, |
| "learning_rate": 2.821819789492125e-05, |
| "loss": 1.563, |
| "step": 16640 |
| }, |
| { |
| "epoch": 2.310275445778117, |
| "grad_norm": 0.0, |
| "learning_rate": 2.8110658180612226e-05, |
| "loss": 1.5398, |
| "step": 16650 |
| }, |
| { |
| "epoch": 2.3116630819399155, |
| "grad_norm": 0.0, |
| "learning_rate": 2.8003290252605685e-05, |
| "loss": 1.5296, |
| "step": 16660 |
| }, |
| { |
| "epoch": 2.3130507181017137, |
| "grad_norm": 0.0, |
| "learning_rate": 2.789609436746734e-05, |
| "loss": 1.5364, |
| "step": 16670 |
| }, |
| { |
| "epoch": 2.3144383542635123, |
| "grad_norm": 0.0, |
| "learning_rate": 2.7789070781351745e-05, |
| "loss": 1.5231, |
| "step": 16680 |
| }, |
| { |
| "epoch": 2.3158259904253105, |
| "grad_norm": 0.0, |
| "learning_rate": 2.768221975000177e-05, |
| "loss": 1.503, |
| "step": 16690 |
| }, |
| { |
| "epoch": 2.317213626587109, |
| "grad_norm": 0.0, |
| "learning_rate": 2.757554152874796e-05, |
| "loss": 1.6102, |
| "step": 16700 |
| }, |
| { |
| "epoch": 2.317213626587109, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 893.7238, |
| "eval_samples_per_second": 14.336, |
| "eval_steps_per_second": 1.793, |
| "step": 16700 |
| }, |
| { |
| "epoch": 2.318601262748907, |
| "grad_norm": 0.0, |
| "learning_rate": 2.746903637250793e-05, |
| "loss": 1.5784, |
| "step": 16710 |
| }, |
| { |
| "epoch": 2.3199888989107054, |
| "grad_norm": 0.0, |
| "learning_rate": 2.736270453578562e-05, |
| "loss": 1.5198, |
| "step": 16720 |
| }, |
| { |
| "epoch": 2.321376535072504, |
| "grad_norm": 0.0, |
| "learning_rate": 2.7256546272670946e-05, |
| "loss": 1.4816, |
| "step": 16730 |
| }, |
| { |
| "epoch": 2.3227641712343026, |
| "grad_norm": 0.0, |
| "learning_rate": 2.7150561836838994e-05, |
| "loss": 1.5082, |
| "step": 16740 |
| }, |
| { |
| "epoch": 2.3241518073961007, |
| "grad_norm": 0.0, |
| "learning_rate": 2.7044751481549525e-05, |
| "loss": 1.5617, |
| "step": 16750 |
| }, |
| { |
| "epoch": 2.325539443557899, |
| "grad_norm": 0.0, |
| "learning_rate": 2.693911545964619e-05, |
| "loss": 1.5301, |
| "step": 16760 |
| }, |
| { |
| "epoch": 2.3269270797196975, |
| "grad_norm": 0.0, |
| "learning_rate": 2.6833654023556177e-05, |
| "loss": 1.5309, |
| "step": 16770 |
| }, |
| { |
| "epoch": 2.328314715881496, |
| "grad_norm": 0.0, |
| "learning_rate": 2.6728367425289493e-05, |
| "loss": 1.5755, |
| "step": 16780 |
| }, |
| { |
| "epoch": 2.3297023520432942, |
| "grad_norm": 0.0, |
| "learning_rate": 2.6623255916438217e-05, |
| "loss": 1.5467, |
| "step": 16790 |
| }, |
| { |
| "epoch": 2.3310899882050924, |
| "grad_norm": 0.0, |
| "learning_rate": 2.651831974817619e-05, |
| "loss": 1.5429, |
| "step": 16800 |
| }, |
| { |
| "epoch": 2.3310899882050924, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 897.8065, |
| "eval_samples_per_second": 14.27, |
| "eval_steps_per_second": 1.784, |
| "step": 16800 |
| }, |
| { |
| "epoch": 2.332477624366891, |
| "grad_norm": 0.0, |
| "learning_rate": 2.641355917125816e-05, |
| "loss": 1.4949, |
| "step": 16810 |
| }, |
| { |
| "epoch": 2.3338652605286896, |
| "grad_norm": 0.0, |
| "learning_rate": 2.6308974436019375e-05, |
| "loss": 1.5429, |
| "step": 16820 |
| }, |
| { |
| "epoch": 2.3352528966904877, |
| "grad_norm": 0.0, |
| "learning_rate": 2.620456579237476e-05, |
| "loss": 1.5654, |
| "step": 16830 |
| }, |
| { |
| "epoch": 2.336640532852286, |
| "grad_norm": 0.0, |
| "learning_rate": 2.6100333489818572e-05, |
| "loss": 1.5314, |
| "step": 16840 |
| }, |
| { |
| "epoch": 2.3380281690140845, |
| "grad_norm": 0.0, |
| "learning_rate": 2.5996277777423628e-05, |
| "loss": 1.5607, |
| "step": 16850 |
| }, |
| { |
| "epoch": 2.339415805175883, |
| "grad_norm": 0.0, |
| "learning_rate": 2.5892398903840832e-05, |
| "loss": 1.5424, |
| "step": 16860 |
| }, |
| { |
| "epoch": 2.3408034413376813, |
| "grad_norm": 0.0, |
| "learning_rate": 2.5788697117298377e-05, |
| "loss": 1.5879, |
| "step": 16870 |
| }, |
| { |
| "epoch": 2.3421910774994794, |
| "grad_norm": 0.0, |
| "learning_rate": 2.568517266560141e-05, |
| "loss": 1.6442, |
| "step": 16880 |
| }, |
| { |
| "epoch": 2.343578713661278, |
| "grad_norm": 0.0, |
| "learning_rate": 2.558182579613133e-05, |
| "loss": 1.604, |
| "step": 16890 |
| }, |
| { |
| "epoch": 2.3449663498230766, |
| "grad_norm": 0.0, |
| "learning_rate": 2.5478656755845077e-05, |
| "loss": 1.5769, |
| "step": 16900 |
| }, |
| { |
| "epoch": 2.3449663498230766, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 892.889, |
| "eval_samples_per_second": 14.349, |
| "eval_steps_per_second": 1.794, |
| "step": 16900 |
| }, |
| { |
| "epoch": 2.3463539859848748, |
| "grad_norm": 0.0, |
| "learning_rate": 2.537566579127475e-05, |
| "loss": 1.6231, |
| "step": 16910 |
| }, |
| { |
| "epoch": 2.347741622146673, |
| "grad_norm": 0.0, |
| "learning_rate": 2.5272853148526876e-05, |
| "loss": 1.5962, |
| "step": 16920 |
| }, |
| { |
| "epoch": 2.3491292583084715, |
| "grad_norm": 0.0, |
| "learning_rate": 2.517021907328191e-05, |
| "loss": 1.5489, |
| "step": 16930 |
| }, |
| { |
| "epoch": 2.35051689447027, |
| "grad_norm": 0.0, |
| "learning_rate": 2.506776381079351e-05, |
| "loss": 1.607, |
| "step": 16940 |
| }, |
| { |
| "epoch": 2.3519045306320683, |
| "grad_norm": 0.0, |
| "learning_rate": 2.4965487605888137e-05, |
| "loss": 1.5333, |
| "step": 16950 |
| }, |
| { |
| "epoch": 2.3532921667938664, |
| "grad_norm": 0.0, |
| "learning_rate": 2.486339070296434e-05, |
| "loss": 1.5035, |
| "step": 16960 |
| }, |
| { |
| "epoch": 2.354679802955665, |
| "grad_norm": 0.0, |
| "learning_rate": 2.4761473345992247e-05, |
| "loss": 1.5349, |
| "step": 16970 |
| }, |
| { |
| "epoch": 2.3560674391174636, |
| "grad_norm": 0.0, |
| "learning_rate": 2.4659735778512836e-05, |
| "loss": 1.4888, |
| "step": 16980 |
| }, |
| { |
| "epoch": 2.357455075279262, |
| "grad_norm": 0.0, |
| "learning_rate": 2.4558178243637587e-05, |
| "loss": 1.5775, |
| "step": 16990 |
| }, |
| { |
| "epoch": 2.35884271144106, |
| "grad_norm": 0.0, |
| "learning_rate": 2.4456800984047736e-05, |
| "loss": 1.4904, |
| "step": 17000 |
| }, |
| { |
| "epoch": 2.35884271144106, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 891.2441, |
| "eval_samples_per_second": 14.375, |
| "eval_steps_per_second": 1.797, |
| "step": 17000 |
| }, |
| { |
| "epoch": 2.3602303476028585, |
| "grad_norm": 0.0, |
| "learning_rate": 2.4355604241993656e-05, |
| "loss": 1.6108, |
| "step": 17010 |
| }, |
| { |
| "epoch": 2.361617983764657, |
| "grad_norm": 0.0, |
| "learning_rate": 2.4254588259294465e-05, |
| "loss": 1.4995, |
| "step": 17020 |
| }, |
| { |
| "epoch": 2.3630056199264553, |
| "grad_norm": 0.0, |
| "learning_rate": 2.41537532773373e-05, |
| "loss": 1.5408, |
| "step": 17030 |
| }, |
| { |
| "epoch": 2.3643932560882535, |
| "grad_norm": 0.0, |
| "learning_rate": 2.4053099537076794e-05, |
| "loss": 1.5079, |
| "step": 17040 |
| }, |
| { |
| "epoch": 2.365780892250052, |
| "grad_norm": 0.0, |
| "learning_rate": 2.3952627279034424e-05, |
| "loss": 1.4928, |
| "step": 17050 |
| }, |
| { |
| "epoch": 2.3671685284118507, |
| "grad_norm": 0.0, |
| "learning_rate": 2.3852336743298053e-05, |
| "loss": 1.5503, |
| "step": 17060 |
| }, |
| { |
| "epoch": 2.368556164573649, |
| "grad_norm": 0.0, |
| "learning_rate": 2.3752228169521307e-05, |
| "loss": 1.5749, |
| "step": 17070 |
| }, |
| { |
| "epoch": 2.369943800735447, |
| "grad_norm": 0.0, |
| "learning_rate": 2.3652301796923003e-05, |
| "loss": 1.5013, |
| "step": 17080 |
| }, |
| { |
| "epoch": 2.3713314368972456, |
| "grad_norm": 0.0, |
| "learning_rate": 2.3552557864286495e-05, |
| "loss": 1.516, |
| "step": 17090 |
| }, |
| { |
| "epoch": 2.3727190730590437, |
| "grad_norm": 0.0, |
| "learning_rate": 2.3452996609959253e-05, |
| "loss": 1.533, |
| "step": 17100 |
| }, |
| { |
| "epoch": 2.3727190730590437, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 890.5528, |
| "eval_samples_per_second": 14.387, |
| "eval_steps_per_second": 1.799, |
| "step": 17100 |
| }, |
| { |
| "epoch": 2.3741067092208423, |
| "grad_norm": 0.0, |
| "learning_rate": 2.335361827185224e-05, |
| "loss": 1.4783, |
| "step": 17110 |
| }, |
| { |
| "epoch": 2.3754943453826405, |
| "grad_norm": 0.0, |
| "learning_rate": 2.3254423087439237e-05, |
| "loss": 1.4655, |
| "step": 17120 |
| }, |
| { |
| "epoch": 2.376881981544439, |
| "grad_norm": 0.0, |
| "learning_rate": 2.3155411293756412e-05, |
| "loss": 1.5946, |
| "step": 17130 |
| }, |
| { |
| "epoch": 2.3782696177062372, |
| "grad_norm": 0.0, |
| "learning_rate": 2.3056583127401733e-05, |
| "loss": 1.6442, |
| "step": 17140 |
| }, |
| { |
| "epoch": 2.379657253868036, |
| "grad_norm": 0.0, |
| "learning_rate": 2.2957938824534343e-05, |
| "loss": 1.5603, |
| "step": 17150 |
| }, |
| { |
| "epoch": 2.381044890029834, |
| "grad_norm": 0.0, |
| "learning_rate": 2.2859478620873975e-05, |
| "loss": 1.6026, |
| "step": 17160 |
| }, |
| { |
| "epoch": 2.3824325261916326, |
| "grad_norm": 0.0, |
| "learning_rate": 2.2761202751700528e-05, |
| "loss": 1.6088, |
| "step": 17170 |
| }, |
| { |
| "epoch": 2.3838201623534307, |
| "grad_norm": 0.0, |
| "learning_rate": 2.2663111451853368e-05, |
| "loss": 1.5083, |
| "step": 17180 |
| }, |
| { |
| "epoch": 2.3852077985152293, |
| "grad_norm": 0.0, |
| "learning_rate": 2.256520495573087e-05, |
| "loss": 1.5413, |
| "step": 17190 |
| }, |
| { |
| "epoch": 2.3865954346770275, |
| "grad_norm": 0.0, |
| "learning_rate": 2.2467483497289677e-05, |
| "loss": 1.5837, |
| "step": 17200 |
| }, |
| { |
| "epoch": 2.3865954346770275, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 889.6194, |
| "eval_samples_per_second": 14.402, |
| "eval_steps_per_second": 1.801, |
| "step": 17200 |
| }, |
| { |
| "epoch": 2.387983070838826, |
| "grad_norm": 0.0, |
| "learning_rate": 2.2369947310044392e-05, |
| "loss": 1.5767, |
| "step": 17210 |
| }, |
| { |
| "epoch": 2.3893707070006243, |
| "grad_norm": 0.0, |
| "learning_rate": 2.227259662706689e-05, |
| "loss": 1.5455, |
| "step": 17220 |
| }, |
| { |
| "epoch": 2.390758343162423, |
| "grad_norm": 0.0, |
| "learning_rate": 2.217543168098565e-05, |
| "loss": 1.5668, |
| "step": 17230 |
| }, |
| { |
| "epoch": 2.392145979324221, |
| "grad_norm": 0.0, |
| "learning_rate": 2.207845270398544e-05, |
| "loss": 1.6349, |
| "step": 17240 |
| }, |
| { |
| "epoch": 2.3935336154860196, |
| "grad_norm": 0.0, |
| "learning_rate": 2.1981659927806576e-05, |
| "loss": 1.5506, |
| "step": 17250 |
| }, |
| { |
| "epoch": 2.3949212516478178, |
| "grad_norm": 0.0, |
| "learning_rate": 2.1885053583744485e-05, |
| "loss": 1.5319, |
| "step": 17260 |
| }, |
| { |
| "epoch": 2.3963088878096164, |
| "grad_norm": 0.0, |
| "learning_rate": 2.1788633902648992e-05, |
| "loss": 1.5557, |
| "step": 17270 |
| }, |
| { |
| "epoch": 2.3976965239714145, |
| "grad_norm": 0.0, |
| "learning_rate": 2.1692401114923975e-05, |
| "loss": 1.5497, |
| "step": 17280 |
| }, |
| { |
| "epoch": 2.399084160133213, |
| "grad_norm": 0.0, |
| "learning_rate": 2.1596355450526673e-05, |
| "loss": 1.5477, |
| "step": 17290 |
| }, |
| { |
| "epoch": 2.4004717962950113, |
| "grad_norm": 0.0, |
| "learning_rate": 2.15004971389672e-05, |
| "loss": 1.5048, |
| "step": 17300 |
| }, |
| { |
| "epoch": 2.4004717962950113, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 884.422, |
| "eval_samples_per_second": 14.486, |
| "eval_steps_per_second": 1.811, |
| "step": 17300 |
| }, |
| { |
| "epoch": 2.40185943245681, |
| "grad_norm": 0.0, |
| "learning_rate": 2.140482640930791e-05, |
| "loss": 1.6127, |
| "step": 17310 |
| }, |
| { |
| "epoch": 2.403247068618608, |
| "grad_norm": 0.0, |
| "learning_rate": 2.130934349016297e-05, |
| "loss": 1.5357, |
| "step": 17320 |
| }, |
| { |
| "epoch": 2.4046347047804066, |
| "grad_norm": 0.0, |
| "learning_rate": 2.121404860969778e-05, |
| "loss": 1.5203, |
| "step": 17330 |
| }, |
| { |
| "epoch": 2.406022340942205, |
| "grad_norm": 0.0, |
| "learning_rate": 2.1118941995628294e-05, |
| "loss": 1.5295, |
| "step": 17340 |
| }, |
| { |
| "epoch": 2.4074099771040034, |
| "grad_norm": 0.0, |
| "learning_rate": 2.10240238752207e-05, |
| "loss": 1.52, |
| "step": 17350 |
| }, |
| { |
| "epoch": 2.4087976132658016, |
| "grad_norm": 0.0, |
| "learning_rate": 2.092929447529072e-05, |
| "loss": 1.6179, |
| "step": 17360 |
| }, |
| { |
| "epoch": 2.4101852494276, |
| "grad_norm": 0.0, |
| "learning_rate": 2.083475402220312e-05, |
| "loss": 1.5312, |
| "step": 17370 |
| }, |
| { |
| "epoch": 2.4115728855893983, |
| "grad_norm": 0.0, |
| "learning_rate": 2.074040274187111e-05, |
| "loss": 1.5649, |
| "step": 17380 |
| }, |
| { |
| "epoch": 2.412960521751197, |
| "grad_norm": 0.0, |
| "learning_rate": 2.0646240859755926e-05, |
| "loss": 1.5582, |
| "step": 17390 |
| }, |
| { |
| "epoch": 2.414348157912995, |
| "grad_norm": 0.0, |
| "learning_rate": 2.0552268600866174e-05, |
| "loss": 1.4689, |
| "step": 17400 |
| }, |
| { |
| "epoch": 2.414348157912995, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 881.4697, |
| "eval_samples_per_second": 14.535, |
| "eval_steps_per_second": 1.817, |
| "step": 17400 |
| }, |
| { |
| "epoch": 2.4157357940747937, |
| "grad_norm": 0.0, |
| "learning_rate": 2.0458486189757385e-05, |
| "loss": 1.5304, |
| "step": 17410 |
| }, |
| { |
| "epoch": 2.417123430236592, |
| "grad_norm": 0.0, |
| "learning_rate": 2.0364893850531342e-05, |
| "loss": 1.5111, |
| "step": 17420 |
| }, |
| { |
| "epoch": 2.4185110663983904, |
| "grad_norm": 0.0, |
| "learning_rate": 2.0271491806835717e-05, |
| "loss": 1.5422, |
| "step": 17430 |
| }, |
| { |
| "epoch": 2.4198987025601886, |
| "grad_norm": 0.0, |
| "learning_rate": 2.017828028186345e-05, |
| "loss": 1.5842, |
| "step": 17440 |
| }, |
| { |
| "epoch": 2.421286338721987, |
| "grad_norm": 0.0, |
| "learning_rate": 2.008525949835214e-05, |
| "loss": 1.5357, |
| "step": 17450 |
| }, |
| { |
| "epoch": 2.4226739748837853, |
| "grad_norm": 0.0, |
| "learning_rate": 1.999242967858367e-05, |
| "loss": 1.5144, |
| "step": 17460 |
| }, |
| { |
| "epoch": 2.424061611045584, |
| "grad_norm": 0.0, |
| "learning_rate": 1.9899791044383575e-05, |
| "loss": 1.553, |
| "step": 17470 |
| }, |
| { |
| "epoch": 2.425449247207382, |
| "grad_norm": 0.0, |
| "learning_rate": 1.980734381712056e-05, |
| "loss": 1.5922, |
| "step": 17480 |
| }, |
| { |
| "epoch": 2.4268368833691807, |
| "grad_norm": 0.0, |
| "learning_rate": 1.9715088217705856e-05, |
| "loss": 1.5451, |
| "step": 17490 |
| }, |
| { |
| "epoch": 2.428224519530979, |
| "grad_norm": 0.0, |
| "learning_rate": 1.9623024466592877e-05, |
| "loss": 1.5234, |
| "step": 17500 |
| }, |
| { |
| "epoch": 2.428224519530979, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 873.5127, |
| "eval_samples_per_second": 14.667, |
| "eval_steps_per_second": 1.834, |
| "step": 17500 |
| }, |
| { |
| "epoch": 2.4296121556927774, |
| "grad_norm": 0.0, |
| "learning_rate": 1.9531152783776553e-05, |
| "loss": 1.5719, |
| "step": 17510 |
| }, |
| { |
| "epoch": 2.4309997918545756, |
| "grad_norm": 0.0, |
| "learning_rate": 1.9439473388792895e-05, |
| "loss": 1.6198, |
| "step": 17520 |
| }, |
| { |
| "epoch": 2.432387428016374, |
| "grad_norm": 0.0, |
| "learning_rate": 1.934798650071833e-05, |
| "loss": 1.5601, |
| "step": 17530 |
| }, |
| { |
| "epoch": 2.4337750641781724, |
| "grad_norm": 0.0, |
| "learning_rate": 1.9256692338169345e-05, |
| "loss": 1.5462, |
| "step": 17540 |
| }, |
| { |
| "epoch": 2.435162700339971, |
| "grad_norm": 0.0, |
| "learning_rate": 1.9165591119301918e-05, |
| "loss": 1.4857, |
| "step": 17550 |
| }, |
| { |
| "epoch": 2.436550336501769, |
| "grad_norm": 0.0, |
| "learning_rate": 1.9074683061810873e-05, |
| "loss": 1.5428, |
| "step": 17560 |
| }, |
| { |
| "epoch": 2.4379379726635677, |
| "grad_norm": 0.0, |
| "learning_rate": 1.898396838292953e-05, |
| "loss": 1.5551, |
| "step": 17570 |
| }, |
| { |
| "epoch": 2.439325608825366, |
| "grad_norm": 0.0, |
| "learning_rate": 1.889344729942909e-05, |
| "loss": 1.5344, |
| "step": 17580 |
| }, |
| { |
| "epoch": 2.4407132449871645, |
| "grad_norm": 0.0, |
| "learning_rate": 1.880312002761818e-05, |
| "loss": 1.4525, |
| "step": 17590 |
| }, |
| { |
| "epoch": 2.4421008811489626, |
| "grad_norm": 0.0, |
| "learning_rate": 1.8712986783342213e-05, |
| "loss": 1.5635, |
| "step": 17600 |
| }, |
| { |
| "epoch": 2.4421008811489626, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 874.238, |
| "eval_samples_per_second": 14.655, |
| "eval_steps_per_second": 1.832, |
| "step": 17600 |
| }, |
| { |
| "epoch": 2.443488517310761, |
| "grad_norm": 0.0, |
| "learning_rate": 1.8623047781983015e-05, |
| "loss": 1.5471, |
| "step": 17610 |
| }, |
| { |
| "epoch": 2.4448761534725594, |
| "grad_norm": 0.0, |
| "learning_rate": 1.8533303238458242e-05, |
| "loss": 1.5249, |
| "step": 17620 |
| }, |
| { |
| "epoch": 2.446263789634358, |
| "grad_norm": 0.0, |
| "learning_rate": 1.8443753367220895e-05, |
| "loss": 1.6026, |
| "step": 17630 |
| }, |
| { |
| "epoch": 2.447651425796156, |
| "grad_norm": 0.0, |
| "learning_rate": 1.8354398382258718e-05, |
| "loss": 1.5442, |
| "step": 17640 |
| }, |
| { |
| "epoch": 2.4490390619579547, |
| "grad_norm": 0.0, |
| "learning_rate": 1.8265238497093818e-05, |
| "loss": 1.5441, |
| "step": 17650 |
| }, |
| { |
| "epoch": 2.450426698119753, |
| "grad_norm": 0.0, |
| "learning_rate": 1.8176273924782105e-05, |
| "loss": 1.5742, |
| "step": 17660 |
| }, |
| { |
| "epoch": 2.4518143342815515, |
| "grad_norm": 0.0, |
| "learning_rate": 1.8087504877912685e-05, |
| "loss": 1.517, |
| "step": 17670 |
| }, |
| { |
| "epoch": 2.4532019704433496, |
| "grad_norm": 0.0, |
| "learning_rate": 1.7998931568607515e-05, |
| "loss": 1.4875, |
| "step": 17680 |
| }, |
| { |
| "epoch": 2.4545896066051482, |
| "grad_norm": 0.0, |
| "learning_rate": 1.7910554208520814e-05, |
| "loss": 1.5491, |
| "step": 17690 |
| }, |
| { |
| "epoch": 2.4559772427669464, |
| "grad_norm": 0.0, |
| "learning_rate": 1.7822373008838555e-05, |
| "loss": 1.5338, |
| "step": 17700 |
| }, |
| { |
| "epoch": 2.4559772427669464, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 877.3156, |
| "eval_samples_per_second": 14.604, |
| "eval_steps_per_second": 1.826, |
| "step": 17700 |
| }, |
| { |
| "epoch": 2.457364878928745, |
| "grad_norm": 0.0, |
| "learning_rate": 1.7734388180277916e-05, |
| "loss": 1.5268, |
| "step": 17710 |
| }, |
| { |
| "epoch": 2.458752515090543, |
| "grad_norm": 0.0, |
| "learning_rate": 1.7646599933086892e-05, |
| "loss": 1.5664, |
| "step": 17720 |
| }, |
| { |
| "epoch": 2.4601401512523418, |
| "grad_norm": 0.0, |
| "learning_rate": 1.7559008477043715e-05, |
| "loss": 1.5986, |
| "step": 17730 |
| }, |
| { |
| "epoch": 2.46152778741414, |
| "grad_norm": 0.0, |
| "learning_rate": 1.7471614021456363e-05, |
| "loss": 1.494, |
| "step": 17740 |
| }, |
| { |
| "epoch": 2.4629154235759385, |
| "grad_norm": 0.0, |
| "learning_rate": 1.7384416775162015e-05, |
| "loss": 1.5648, |
| "step": 17750 |
| }, |
| { |
| "epoch": 2.4643030597377367, |
| "grad_norm": 0.0, |
| "learning_rate": 1.729741694652668e-05, |
| "loss": 1.54, |
| "step": 17760 |
| }, |
| { |
| "epoch": 2.4656906958995353, |
| "grad_norm": 0.0, |
| "learning_rate": 1.7210614743444588e-05, |
| "loss": 1.5335, |
| "step": 17770 |
| }, |
| { |
| "epoch": 2.4670783320613334, |
| "grad_norm": 0.0, |
| "learning_rate": 1.7124010373337684e-05, |
| "loss": 1.5415, |
| "step": 17780 |
| }, |
| { |
| "epoch": 2.468465968223132, |
| "grad_norm": 0.0, |
| "learning_rate": 1.703760404315522e-05, |
| "loss": 1.5377, |
| "step": 17790 |
| }, |
| { |
| "epoch": 2.46985360438493, |
| "grad_norm": 0.0, |
| "learning_rate": 1.6951395959373194e-05, |
| "loss": 1.5515, |
| "step": 17800 |
| }, |
| { |
| "epoch": 2.46985360438493, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 878.8065, |
| "eval_samples_per_second": 14.579, |
| "eval_steps_per_second": 1.823, |
| "step": 17800 |
| }, |
| { |
| "epoch": 2.471241240546729, |
| "grad_norm": 0.0, |
| "learning_rate": 1.6865386327993927e-05, |
| "loss": 1.5407, |
| "step": 17810 |
| }, |
| { |
| "epoch": 2.472628876708527, |
| "grad_norm": 0.0, |
| "learning_rate": 1.6779575354545406e-05, |
| "loss": 1.5614, |
| "step": 17820 |
| }, |
| { |
| "epoch": 2.4740165128703255, |
| "grad_norm": 0.0, |
| "learning_rate": 1.6693963244081002e-05, |
| "loss": 1.4807, |
| "step": 17830 |
| }, |
| { |
| "epoch": 2.4754041490321237, |
| "grad_norm": 0.0, |
| "learning_rate": 1.660855020117885e-05, |
| "loss": 1.5384, |
| "step": 17840 |
| }, |
| { |
| "epoch": 2.4767917851939223, |
| "grad_norm": 0.0, |
| "learning_rate": 1.652333642994144e-05, |
| "loss": 1.5561, |
| "step": 17850 |
| }, |
| { |
| "epoch": 2.4781794213557204, |
| "grad_norm": 0.0, |
| "learning_rate": 1.6438322133994986e-05, |
| "loss": 1.5725, |
| "step": 17860 |
| }, |
| { |
| "epoch": 2.479567057517519, |
| "grad_norm": 0.0, |
| "learning_rate": 1.6353507516489118e-05, |
| "loss": 1.5946, |
| "step": 17870 |
| }, |
| { |
| "epoch": 2.480954693679317, |
| "grad_norm": 0.0, |
| "learning_rate": 1.6268892780096322e-05, |
| "loss": 1.5852, |
| "step": 17880 |
| }, |
| { |
| "epoch": 2.482342329841116, |
| "grad_norm": 0.0, |
| "learning_rate": 1.618447812701137e-05, |
| "loss": 1.6317, |
| "step": 17890 |
| }, |
| { |
| "epoch": 2.483729966002914, |
| "grad_norm": 0.0, |
| "learning_rate": 1.6100263758950995e-05, |
| "loss": 1.5009, |
| "step": 17900 |
| }, |
| { |
| "epoch": 2.483729966002914, |
| "eval_loss": 1.5553832054138184, |
| "eval_runtime": 901.7145, |
| "eval_samples_per_second": 14.208, |
| "eval_steps_per_second": 1.777, |
| "step": 17900 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 21621, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 6.391892638486782e+18, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|