| { |
| "best_global_step": 3400, |
| "best_metric": 0.7816377282142639, |
| "best_model_checkpoint": "./lfm_kokoro_complete/checkpoint-3400", |
| "epoch": 2.936096718480138, |
| "eval_steps": 100, |
| "global_step": 3400, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0008635578583765112, |
| "grad_norm": 5.131196975708008, |
| "learning_rate": 0.0, |
| "loss": 2.8308, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.008635578583765112, |
| "grad_norm": 5.2136335372924805, |
| "learning_rate": 5.172413793103448e-06, |
| "loss": 2.6503, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.017271157167530225, |
| "grad_norm": 2.5024237632751465, |
| "learning_rate": 1.091954022988506e-05, |
| "loss": 2.6409, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.025906735751295335, |
| "grad_norm": 1.3332571983337402, |
| "learning_rate": 1.6666666666666667e-05, |
| "loss": 2.505, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.03454231433506045, |
| "grad_norm": 1.121747374534607, |
| "learning_rate": 2.2413793103448276e-05, |
| "loss": 2.3877, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.04317789291882556, |
| "grad_norm": 0.5361054539680481, |
| "learning_rate": 2.8160919540229884e-05, |
| "loss": 2.2456, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.05181347150259067, |
| "grad_norm": 0.4509966969490051, |
| "learning_rate": 3.390804597701149e-05, |
| "loss": 2.18, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.06044905008635579, |
| "grad_norm": 0.3262108266353607, |
| "learning_rate": 3.965517241379311e-05, |
| "loss": 2.1533, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.0690846286701209, |
| "grad_norm": 0.31236183643341064, |
| "learning_rate": 4.5402298850574716e-05, |
| "loss": 2.1012, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.07772020725388601, |
| "grad_norm": 0.2791730463504791, |
| "learning_rate": 5.1149425287356324e-05, |
| "loss": 2.0615, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.08635578583765112, |
| "grad_norm": 0.29012593626976013, |
| "learning_rate": 5.689655172413794e-05, |
| "loss": 2.0758, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.08635578583765112, |
| "eval_loss": 2.024885654449463, |
| "eval_runtime": 74.288, |
| "eval_samples_per_second": 31.163, |
| "eval_steps_per_second": 3.904, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.09499136442141623, |
| "grad_norm": 0.27982184290885925, |
| "learning_rate": 6.264367816091954e-05, |
| "loss": 1.9746, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.10362694300518134, |
| "grad_norm": 0.3128826320171356, |
| "learning_rate": 6.839080459770116e-05, |
| "loss": 2.0059, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.11226252158894647, |
| "grad_norm": 0.29881423711776733, |
| "learning_rate": 7.413793103448277e-05, |
| "loss": 2.0007, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.12089810017271158, |
| "grad_norm": 0.3187066316604614, |
| "learning_rate": 7.988505747126437e-05, |
| "loss": 1.9892, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.12953367875647667, |
| "grad_norm": 0.2999899983406067, |
| "learning_rate": 8.563218390804599e-05, |
| "loss": 1.9454, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.1381692573402418, |
| "grad_norm": 0.32296231389045715, |
| "learning_rate": 9.137931034482759e-05, |
| "loss": 1.9215, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.14680483592400692, |
| "grad_norm": 0.3282780051231384, |
| "learning_rate": 9.71264367816092e-05, |
| "loss": 1.9189, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.15544041450777202, |
| "grad_norm": 0.3565793037414551, |
| "learning_rate": 0.0001028735632183908, |
| "loss": 1.9356, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.16407599309153714, |
| "grad_norm": 0.35819345712661743, |
| "learning_rate": 0.00010862068965517242, |
| "loss": 1.877, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.17271157167530224, |
| "grad_norm": 0.38044115900993347, |
| "learning_rate": 0.00011436781609195404, |
| "loss": 1.9072, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.17271157167530224, |
| "eval_loss": 1.8915574550628662, |
| "eval_runtime": 74.2599, |
| "eval_samples_per_second": 31.174, |
| "eval_steps_per_second": 3.905, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.18134715025906736, |
| "grad_norm": 0.38403257727622986, |
| "learning_rate": 0.00012011494252873562, |
| "loss": 1.9223, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.18998272884283246, |
| "grad_norm": 0.40475621819496155, |
| "learning_rate": 0.00012586206896551724, |
| "loss": 1.8787, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.19861830742659758, |
| "grad_norm": 0.37040725350379944, |
| "learning_rate": 0.00013160919540229887, |
| "loss": 1.8916, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.20725388601036268, |
| "grad_norm": 0.41001173853874207, |
| "learning_rate": 0.00013735632183908047, |
| "loss": 1.8767, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.2158894645941278, |
| "grad_norm": 0.42713987827301025, |
| "learning_rate": 0.0001431034482758621, |
| "loss": 1.8756, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.22452504317789293, |
| "grad_norm": 0.42754629254341125, |
| "learning_rate": 0.00014885057471264367, |
| "loss": 1.8208, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.23316062176165803, |
| "grad_norm": 0.45471611618995667, |
| "learning_rate": 0.0001545977011494253, |
| "loss": 1.8424, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.24179620034542315, |
| "grad_norm": 0.419595330953598, |
| "learning_rate": 0.0001603448275862069, |
| "loss": 1.7868, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.2504317789291883, |
| "grad_norm": 0.40803587436676025, |
| "learning_rate": 0.0001660919540229885, |
| "loss": 1.8174, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.25906735751295334, |
| "grad_norm": 0.4197799265384674, |
| "learning_rate": 0.00017183908045977013, |
| "loss": 1.8143, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.25906735751295334, |
| "eval_loss": 1.8168917894363403, |
| "eval_runtime": 74.2622, |
| "eval_samples_per_second": 31.173, |
| "eval_steps_per_second": 3.905, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.26770293609671847, |
| "grad_norm": 0.494597464799881, |
| "learning_rate": 0.00017758620689655173, |
| "loss": 1.8581, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.2763385146804836, |
| "grad_norm": 0.41333019733428955, |
| "learning_rate": 0.00018333333333333334, |
| "loss": 1.7674, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.2849740932642487, |
| "grad_norm": 0.38664960861206055, |
| "learning_rate": 0.00018908045977011494, |
| "loss": 1.8403, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.29360967184801384, |
| "grad_norm": 0.5136725902557373, |
| "learning_rate": 0.00019482758620689657, |
| "loss": 1.8034, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.3022452504317789, |
| "grad_norm": 0.4233579635620117, |
| "learning_rate": 0.00019999994949995492, |
| "loss": 1.8085, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.31088082901554404, |
| "grad_norm": 0.47240543365478516, |
| "learning_rate": 0.0001999938895562612, |
| "loss": 1.7553, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.31951640759930916, |
| "grad_norm": 0.42770665884017944, |
| "learning_rate": 0.00019997773030485974, |
| "loss": 1.81, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.3281519861830743, |
| "grad_norm": 0.41995662450790405, |
| "learning_rate": 0.00019995147337782283, |
| "loss": 1.7934, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.33678756476683935, |
| "grad_norm": 0.4032181203365326, |
| "learning_rate": 0.00019991512142708033, |
| "loss": 1.7521, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.3454231433506045, |
| "grad_norm": 0.4150594174861908, |
| "learning_rate": 0.00019986867812415198, |
| "loss": 1.7813, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.3454231433506045, |
| "eval_loss": 1.749611735343933, |
| "eval_runtime": 74.4513, |
| "eval_samples_per_second": 31.094, |
| "eval_steps_per_second": 3.895, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.3540587219343696, |
| "grad_norm": 0.47208553552627563, |
| "learning_rate": 0.00019981214815977647, |
| "loss": 1.7951, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.3626943005181347, |
| "grad_norm": 0.38943392038345337, |
| "learning_rate": 0.00019974553724343773, |
| "loss": 1.7224, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.37132987910189985, |
| "grad_norm": 0.39383020997047424, |
| "learning_rate": 0.00019966885210278822, |
| "loss": 1.7327, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.3799654576856649, |
| "grad_norm": 0.4155268669128418, |
| "learning_rate": 0.00019958210048296956, |
| "loss": 1.7447, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.38860103626943004, |
| "grad_norm": 0.393373042345047, |
| "learning_rate": 0.00019948529114583013, |
| "loss": 1.7679, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.39723661485319517, |
| "grad_norm": 0.4065350592136383, |
| "learning_rate": 0.0001993784338690403, |
| "loss": 1.7041, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.4058721934369603, |
| "grad_norm": 0.42317506670951843, |
| "learning_rate": 0.0001992615394451047, |
| "loss": 1.662, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.41450777202072536, |
| "grad_norm": 0.460857629776001, |
| "learning_rate": 0.00019913461968027227, |
| "loss": 1.7228, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.4231433506044905, |
| "grad_norm": 0.47063159942626953, |
| "learning_rate": 0.00019899768739334393, |
| "loss": 1.759, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.4317789291882556, |
| "grad_norm": 0.44678857922554016, |
| "learning_rate": 0.00019885075641437776, |
| "loss": 1.6796, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.4317789291882556, |
| "eval_loss": 1.684213638305664, |
| "eval_runtime": 75.8643, |
| "eval_samples_per_second": 30.515, |
| "eval_steps_per_second": 3.823, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.44041450777202074, |
| "grad_norm": 0.44266021251678467, |
| "learning_rate": 0.00019869384158329223, |
| "loss": 1.7078, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.44905008635578586, |
| "grad_norm": 0.5028413534164429, |
| "learning_rate": 0.00019852695874836735, |
| "loss": 1.6464, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.45768566493955093, |
| "grad_norm": 0.40895670652389526, |
| "learning_rate": 0.00019835012476464406, |
| "loss": 1.6559, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.46632124352331605, |
| "grad_norm": 0.5382914543151855, |
| "learning_rate": 0.00019816335749222187, |
| "loss": 1.6413, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.4749568221070812, |
| "grad_norm": 0.5184707045555115, |
| "learning_rate": 0.00019796667579445492, |
| "loss": 1.6706, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.4835924006908463, |
| "grad_norm": 0.4206818640232086, |
| "learning_rate": 0.00019776009953604692, |
| "loss": 1.6429, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.49222797927461137, |
| "grad_norm": 0.5006670355796814, |
| "learning_rate": 0.0001975436495810447, |
| "loss": 1.6556, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.5008635578583766, |
| "grad_norm": 0.4992609918117523, |
| "learning_rate": 0.0001973173477907311, |
| "loss": 1.6296, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.5094991364421416, |
| "grad_norm": 0.488678902387619, |
| "learning_rate": 0.0001970812170214169, |
| "loss": 1.6366, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.5181347150259067, |
| "grad_norm": 0.5439748167991638, |
| "learning_rate": 0.00019683528112213235, |
| "loss": 1.6546, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.5181347150259067, |
| "eval_loss": 1.6274060010910034, |
| "eval_runtime": 74.2854, |
| "eval_samples_per_second": 31.164, |
| "eval_steps_per_second": 3.904, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.5267702936096719, |
| "grad_norm": 0.49797549843788147, |
| "learning_rate": 0.00019657956493221844, |
| "loss": 1.6206, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.5354058721934369, |
| "grad_norm": 0.5434479117393494, |
| "learning_rate": 0.00019631409427881832, |
| "loss": 1.6198, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.5440414507772021, |
| "grad_norm": 0.48510608077049255, |
| "learning_rate": 0.00019603889597426838, |
| "loss": 1.5839, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.5526770293609672, |
| "grad_norm": 0.4494125545024872, |
| "learning_rate": 0.00019575399781339065, |
| "loss": 1.6295, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.5613126079447323, |
| "grad_norm": 0.45310178399086, |
| "learning_rate": 0.00019545942857068527, |
| "loss": 1.6039, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.5699481865284974, |
| "grad_norm": 0.5253885984420776, |
| "learning_rate": 0.00019515521799742444, |
| "loss": 1.5997, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.5785837651122625, |
| "grad_norm": 0.48614782094955444, |
| "learning_rate": 0.00019484139681864745, |
| "loss": 1.5761, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.5872193436960277, |
| "grad_norm": 0.502662718296051, |
| "learning_rate": 0.00019451799673005757, |
| "loss": 1.5793, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.5958549222797928, |
| "grad_norm": 0.5655169486999512, |
| "learning_rate": 0.00019418505039482068, |
| "loss": 1.5643, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.6044905008635578, |
| "grad_norm": 0.507977306842804, |
| "learning_rate": 0.00019384259144026653, |
| "loss": 1.5549, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.6044905008635578, |
| "eval_loss": 1.5656999349594116, |
| "eval_runtime": 74.2824, |
| "eval_samples_per_second": 31.165, |
| "eval_steps_per_second": 3.904, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.613126079447323, |
| "grad_norm": 0.5120140910148621, |
| "learning_rate": 0.00019349065445449214, |
| "loss": 1.5388, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.6217616580310881, |
| "grad_norm": 0.5686282515525818, |
| "learning_rate": 0.00019312927498286867, |
| "loss": 1.5975, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.6303972366148531, |
| "grad_norm": 0.5706737637519836, |
| "learning_rate": 0.00019275848952445115, |
| "loss": 1.5062, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.6390328151986183, |
| "grad_norm": 0.4991269111633301, |
| "learning_rate": 0.0001923783355282923, |
| "loss": 1.5513, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.6476683937823834, |
| "grad_norm": 0.6073980927467346, |
| "learning_rate": 0.00019198885138966009, |
| "loss": 1.5004, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.6563039723661486, |
| "grad_norm": 0.5857861042022705, |
| "learning_rate": 0.00019159007644615981, |
| "loss": 1.5607, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.6649395509499136, |
| "grad_norm": 0.5783904194831848, |
| "learning_rate": 0.00019118205097376113, |
| "loss": 1.5616, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.6735751295336787, |
| "grad_norm": 0.5480038523674011, |
| "learning_rate": 0.00019076481618273018, |
| "loss": 1.5609, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.6822107081174439, |
| "grad_norm": 0.6719979047775269, |
| "learning_rate": 0.00019033841421346734, |
| "loss": 1.5448, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.690846286701209, |
| "grad_norm": 0.6396545171737671, |
| "learning_rate": 0.00018990288813225105, |
| "loss": 1.4898, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.690846286701209, |
| "eval_loss": 1.5024266242980957, |
| "eval_runtime": 74.2858, |
| "eval_samples_per_second": 31.163, |
| "eval_steps_per_second": 3.904, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.6994818652849741, |
| "grad_norm": 0.6165493130683899, |
| "learning_rate": 0.0001894582819268883, |
| "loss": 1.4581, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.7081174438687392, |
| "grad_norm": 0.5979147553443909, |
| "learning_rate": 0.00018900464050227169, |
| "loss": 1.5436, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.7167530224525043, |
| "grad_norm": 0.6082155108451843, |
| "learning_rate": 0.0001885420096758443, |
| "loss": 1.5205, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.7253886010362695, |
| "grad_norm": 0.6365352272987366, |
| "learning_rate": 0.0001880704361729719, |
| "loss": 1.5159, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.7340241796200345, |
| "grad_norm": 0.6347801685333252, |
| "learning_rate": 0.000187589967622224, |
| "loss": 1.4908, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.7426597582037997, |
| "grad_norm": 0.5811082720756531, |
| "learning_rate": 0.00018710065255056314, |
| "loss": 1.4738, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.7512953367875648, |
| "grad_norm": 0.6715326905250549, |
| "learning_rate": 0.00018660254037844388, |
| "loss": 1.4448, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.7599309153713298, |
| "grad_norm": 0.661300778388977, |
| "learning_rate": 0.00018609568141482132, |
| "loss": 1.4712, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.768566493955095, |
| "grad_norm": 0.5695850253105164, |
| "learning_rate": 0.00018558012685206997, |
| "loss": 1.4348, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.7772020725388601, |
| "grad_norm": 0.610674262046814, |
| "learning_rate": 0.00018505592876081318, |
| "loss": 1.504, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.7772020725388601, |
| "eval_loss": 1.4339938163757324, |
| "eval_runtime": 74.5243, |
| "eval_samples_per_second": 31.064, |
| "eval_steps_per_second": 3.891, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.7858376511226253, |
| "grad_norm": 0.6574162244796753, |
| "learning_rate": 0.00018452314008466432, |
| "loss": 1.4541, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.7944732297063903, |
| "grad_norm": 0.622951090335846, |
| "learning_rate": 0.00018398181463487933, |
| "loss": 1.4335, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.8031088082901554, |
| "grad_norm": 0.7158159017562866, |
| "learning_rate": 0.0001834320070849219, |
| "loss": 1.3933, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.8117443868739206, |
| "grad_norm": 0.6937190294265747, |
| "learning_rate": 0.0001828737729649414, |
| "loss": 1.4129, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.8203799654576857, |
| "grad_norm": 0.6910032629966736, |
| "learning_rate": 0.00018230716865616452, |
| "loss": 1.4415, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.8290155440414507, |
| "grad_norm": 0.7496052980422974, |
| "learning_rate": 0.00018173225138520065, |
| "loss": 1.3115, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.8376511226252159, |
| "grad_norm": 0.8548805117607117, |
| "learning_rate": 0.00018114907921826215, |
| "loss": 1.3782, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.846286701208981, |
| "grad_norm": 0.7024548053741455, |
| "learning_rate": 0.0001805577110552997, |
| "loss": 1.3649, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.8549222797927462, |
| "grad_norm": 0.6912006139755249, |
| "learning_rate": 0.0001799582066240534, |
| "loss": 1.3884, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.8635578583765112, |
| "grad_norm": 0.7504778504371643, |
| "learning_rate": 0.0001793506264740203, |
| "loss": 1.4177, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.8635578583765112, |
| "eval_loss": 1.371172308921814, |
| "eval_runtime": 74.2999, |
| "eval_samples_per_second": 31.157, |
| "eval_steps_per_second": 3.903, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.8721934369602763, |
| "grad_norm": 0.7364081740379333, |
| "learning_rate": 0.00017873503197033902, |
| "loss": 1.3732, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.8808290155440415, |
| "grad_norm": 0.7568293809890747, |
| "learning_rate": 0.00017811148528759183, |
| "loss": 1.3572, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.8894645941278065, |
| "grad_norm": 0.8201608657836914, |
| "learning_rate": 0.00017748004940352518, |
| "loss": 1.3735, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.8981001727115717, |
| "grad_norm": 0.7080292701721191, |
| "learning_rate": 0.00017684078809268887, |
| "loss": 1.3454, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.9067357512953368, |
| "grad_norm": 0.870185911655426, |
| "learning_rate": 0.00017619376591999493, |
| "loss": 1.3371, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.9153713298791019, |
| "grad_norm": 0.767082691192627, |
| "learning_rate": 0.00017553904823419667, |
| "loss": 1.3524, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.924006908462867, |
| "grad_norm": 0.6791857481002808, |
| "learning_rate": 0.00017487670116128832, |
| "loss": 1.3515, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.9326424870466321, |
| "grad_norm": 0.897812008857727, |
| "learning_rate": 0.0001742067915978266, |
| "loss": 1.3075, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.9412780656303973, |
| "grad_norm": 0.8470781445503235, |
| "learning_rate": 0.00017352938720417398, |
| "loss": 1.2876, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.9499136442141624, |
| "grad_norm": 0.7665865421295166, |
| "learning_rate": 0.0001728445563976652, |
| "loss": 1.4049, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.9499136442141624, |
| "eval_loss": 1.3132154941558838, |
| "eval_runtime": 74.4946, |
| "eval_samples_per_second": 31.076, |
| "eval_steps_per_second": 3.893, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.9585492227979274, |
| "grad_norm": 0.709002673625946, |
| "learning_rate": 0.0001721523683456972, |
| "loss": 1.3671, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.9671848013816926, |
| "grad_norm": 0.7099783420562744, |
| "learning_rate": 0.00017145289295874302, |
| "loss": 1.3471, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.9758203799654577, |
| "grad_norm": 0.6939783096313477, |
| "learning_rate": 0.00017074620088329122, |
| "loss": 1.3012, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.9844559585492227, |
| "grad_norm": 0.8194535374641418, |
| "learning_rate": 0.00017003236349471035, |
| "loss": 1.2853, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.9930915371329879, |
| "grad_norm": 0.7694395184516907, |
| "learning_rate": 0.00016931145289004023, |
| "loss": 1.3093, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.001727115716753, |
| "grad_norm": 0.7333533763885498, |
| "learning_rate": 0.0001685835418807103, |
| "loss": 1.3436, |
| "step": 1160 |
| }, |
| { |
| "epoch": 1.0103626943005182, |
| "grad_norm": 0.7282711863517761, |
| "learning_rate": 0.00016784870398518545, |
| "loss": 1.3019, |
| "step": 1170 |
| }, |
| { |
| "epoch": 1.0189982728842832, |
| "grad_norm": 0.8324429392814636, |
| "learning_rate": 0.00016710701342154106, |
| "loss": 1.2171, |
| "step": 1180 |
| }, |
| { |
| "epoch": 1.0276338514680483, |
| "grad_norm": 0.7838461995124817, |
| "learning_rate": 0.00016635854509996668, |
| "loss": 1.2805, |
| "step": 1190 |
| }, |
| { |
| "epoch": 1.0362694300518134, |
| "grad_norm": 0.9009427428245544, |
| "learning_rate": 0.00016560337461520036, |
| "loss": 1.2174, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.0362694300518134, |
| "eval_loss": 1.247739315032959, |
| "eval_runtime": 74.3416, |
| "eval_samples_per_second": 31.14, |
| "eval_steps_per_second": 3.901, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.0449050086355787, |
| "grad_norm": 0.817688524723053, |
| "learning_rate": 0.00016484157823889363, |
| "loss": 1.3382, |
| "step": 1210 |
| }, |
| { |
| "epoch": 1.0535405872193437, |
| "grad_norm": 0.9377408623695374, |
| "learning_rate": 0.00016407323291190803, |
| "loss": 1.187, |
| "step": 1220 |
| }, |
| { |
| "epoch": 1.0621761658031088, |
| "grad_norm": 0.7849322557449341, |
| "learning_rate": 0.00016329841623654434, |
| "loss": 1.2647, |
| "step": 1230 |
| }, |
| { |
| "epoch": 1.0708117443868739, |
| "grad_norm": 0.8397180438041687, |
| "learning_rate": 0.00016251720646870443, |
| "loss": 1.2102, |
| "step": 1240 |
| }, |
| { |
| "epoch": 1.079447322970639, |
| "grad_norm": 0.9595755934715271, |
| "learning_rate": 0.00016172968250998792, |
| "loss": 1.1938, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.0880829015544042, |
| "grad_norm": 0.7337958216667175, |
| "learning_rate": 0.00016093592389972286, |
| "loss": 1.2553, |
| "step": 1260 |
| }, |
| { |
| "epoch": 1.0967184801381693, |
| "grad_norm": 0.7563393115997314, |
| "learning_rate": 0.0001601360108069324, |
| "loss": 1.2577, |
| "step": 1270 |
| }, |
| { |
| "epoch": 1.1053540587219344, |
| "grad_norm": 0.8453429937362671, |
| "learning_rate": 0.0001593300240222379, |
| "loss": 1.2466, |
| "step": 1280 |
| }, |
| { |
| "epoch": 1.1139896373056994, |
| "grad_norm": 0.8459578156471252, |
| "learning_rate": 0.00015851804494969893, |
| "loss": 1.2145, |
| "step": 1290 |
| }, |
| { |
| "epoch": 1.1226252158894645, |
| "grad_norm": 0.9956552982330322, |
| "learning_rate": 0.00015770015559859172, |
| "loss": 1.1838, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.1226252158894645, |
| "eval_loss": 1.1956804990768433, |
| "eval_runtime": 74.2605, |
| "eval_samples_per_second": 31.174, |
| "eval_steps_per_second": 3.905, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.1312607944732298, |
| "grad_norm": 1.0404267311096191, |
| "learning_rate": 0.00015687643857512616, |
| "loss": 1.2361, |
| "step": 1310 |
| }, |
| { |
| "epoch": 1.1398963730569949, |
| "grad_norm": 1.0246553421020508, |
| "learning_rate": 0.00015604697707410255, |
| "loss": 1.1873, |
| "step": 1320 |
| }, |
| { |
| "epoch": 1.14853195164076, |
| "grad_norm": 0.8831927180290222, |
| "learning_rate": 0.0001552118548705094, |
| "loss": 1.1783, |
| "step": 1330 |
| }, |
| { |
| "epoch": 1.157167530224525, |
| "grad_norm": 0.9147486686706543, |
| "learning_rate": 0.0001543711563110616, |
| "loss": 1.1853, |
| "step": 1340 |
| }, |
| { |
| "epoch": 1.16580310880829, |
| "grad_norm": 0.9496821165084839, |
| "learning_rate": 0.000153524966305682, |
| "loss": 1.1501, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.1744386873920551, |
| "grad_norm": 0.9167485237121582, |
| "learning_rate": 0.00015267337031892527, |
| "loss": 1.2301, |
| "step": 1360 |
| }, |
| { |
| "epoch": 1.1830742659758204, |
| "grad_norm": 0.861179769039154, |
| "learning_rate": 0.0001518164543613462, |
| "loss": 1.1827, |
| "step": 1370 |
| }, |
| { |
| "epoch": 1.1917098445595855, |
| "grad_norm": 1.0012174844741821, |
| "learning_rate": 0.00015095430498081257, |
| "loss": 1.1598, |
| "step": 1380 |
| }, |
| { |
| "epoch": 1.2003454231433506, |
| "grad_norm": 0.9084812998771667, |
| "learning_rate": 0.000150087009253764, |
| "loss": 1.1446, |
| "step": 1390 |
| }, |
| { |
| "epoch": 1.2089810017271156, |
| "grad_norm": 0.9342795610427856, |
| "learning_rate": 0.0001492146547764172, |
| "loss": 1.1408, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.2089810017271156, |
| "eval_loss": 1.1498360633850098, |
| "eval_runtime": 74.2845, |
| "eval_samples_per_second": 31.164, |
| "eval_steps_per_second": 3.904, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.2176165803108807, |
| "grad_norm": 1.0500714778900146, |
| "learning_rate": 0.00014833732965591887, |
| "loss": 1.1475, |
| "step": 1410 |
| }, |
| { |
| "epoch": 1.226252158894646, |
| "grad_norm": 1.031998872756958, |
| "learning_rate": 0.00014745512250144695, |
| "loss": 1.121, |
| "step": 1420 |
| }, |
| { |
| "epoch": 1.234887737478411, |
| "grad_norm": 1.0070405006408691, |
| "learning_rate": 0.00014656812241526117, |
| "loss": 1.1167, |
| "step": 1430 |
| }, |
| { |
| "epoch": 1.2435233160621761, |
| "grad_norm": 1.0366291999816895, |
| "learning_rate": 0.0001456764189837037, |
| "loss": 1.1365, |
| "step": 1440 |
| }, |
| { |
| "epoch": 1.2521588946459412, |
| "grad_norm": 0.9328962564468384, |
| "learning_rate": 0.000144780102268151, |
| "loss": 1.1804, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.2607944732297063, |
| "grad_norm": 0.875531017780304, |
| "learning_rate": 0.000143879262795918, |
| "loss": 1.1061, |
| "step": 1460 |
| }, |
| { |
| "epoch": 1.2694300518134716, |
| "grad_norm": 1.023848533630371, |
| "learning_rate": 0.00014297399155111432, |
| "loss": 1.0955, |
| "step": 1470 |
| }, |
| { |
| "epoch": 1.2780656303972366, |
| "grad_norm": 0.9239136576652527, |
| "learning_rate": 0.00014206437996545554, |
| "loss": 1.1792, |
| "step": 1480 |
| }, |
| { |
| "epoch": 1.2867012089810017, |
| "grad_norm": 0.9566736221313477, |
| "learning_rate": 0.0001411505199090283, |
| "loss": 1.1599, |
| "step": 1490 |
| }, |
| { |
| "epoch": 1.2953367875647668, |
| "grad_norm": 0.8936079740524292, |
| "learning_rate": 0.00014023250368101157, |
| "loss": 1.0861, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.2953367875647668, |
| "eval_loss": 1.0975605249404907, |
| "eval_runtime": 74.2879, |
| "eval_samples_per_second": 31.163, |
| "eval_steps_per_second": 3.904, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.3039723661485318, |
| "grad_norm": 0.7882747650146484, |
| "learning_rate": 0.00013931042400035462, |
| "loss": 1.0991, |
| "step": 1510 |
| }, |
| { |
| "epoch": 1.3126079447322971, |
| "grad_norm": 1.0932565927505493, |
| "learning_rate": 0.00013838437399641226, |
| "loss": 1.1312, |
| "step": 1520 |
| }, |
| { |
| "epoch": 1.3212435233160622, |
| "grad_norm": 0.849192202091217, |
| "learning_rate": 0.00013745444719953908, |
| "loss": 1.1094, |
| "step": 1530 |
| }, |
| { |
| "epoch": 1.3298791018998273, |
| "grad_norm": 1.168214201927185, |
| "learning_rate": 0.0001365207375316428, |
| "loss": 1.0642, |
| "step": 1540 |
| }, |
| { |
| "epoch": 1.3385146804835923, |
| "grad_norm": 0.8027725219726562, |
| "learning_rate": 0.00013558333929669826, |
| "loss": 1.0682, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.3471502590673574, |
| "grad_norm": 0.9536592960357666, |
| "learning_rate": 0.0001346423471712228, |
| "loss": 1.1241, |
| "step": 1560 |
| }, |
| { |
| "epoch": 1.3557858376511227, |
| "grad_norm": 1.0561705827713013, |
| "learning_rate": 0.00013369785619471398, |
| "loss": 1.1582, |
| "step": 1570 |
| }, |
| { |
| "epoch": 1.3644214162348878, |
| "grad_norm": 1.3560823202133179, |
| "learning_rate": 0.0001327499617600508, |
| "loss": 1.0265, |
| "step": 1580 |
| }, |
| { |
| "epoch": 1.3730569948186528, |
| "grad_norm": 1.0698766708374023, |
| "learning_rate": 0.00013179875960385885, |
| "loss": 1.0433, |
| "step": 1590 |
| }, |
| { |
| "epoch": 1.381692573402418, |
| "grad_norm": 1.011797308921814, |
| "learning_rate": 0.00013084434579684114, |
| "loss": 1.0428, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.381692573402418, |
| "eval_loss": 1.050079345703125, |
| "eval_runtime": 74.2925, |
| "eval_samples_per_second": 31.161, |
| "eval_steps_per_second": 3.903, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.390328151986183, |
| "grad_norm": 1.052328109741211, |
| "learning_rate": 0.00012988681673407502, |
| "loss": 1.0955, |
| "step": 1610 |
| }, |
| { |
| "epoch": 1.3989637305699483, |
| "grad_norm": 0.975304126739502, |
| "learning_rate": 0.0001289262691252763, |
| "loss": 1.0776, |
| "step": 1620 |
| }, |
| { |
| "epoch": 1.4075993091537133, |
| "grad_norm": 0.9506198763847351, |
| "learning_rate": 0.00012796279998503174, |
| "loss": 1.0708, |
| "step": 1630 |
| }, |
| { |
| "epoch": 1.4162348877374784, |
| "grad_norm": 0.9697166085243225, |
| "learning_rate": 0.0001269965066230005, |
| "loss": 1.0098, |
| "step": 1640 |
| }, |
| { |
| "epoch": 1.4248704663212435, |
| "grad_norm": 1.0433659553527832, |
| "learning_rate": 0.00012602748663408613, |
| "loss": 1.0346, |
| "step": 1650 |
| }, |
| { |
| "epoch": 1.4335060449050085, |
| "grad_norm": 1.0530465841293335, |
| "learning_rate": 0.00012505583788857924, |
| "loss": 1.1224, |
| "step": 1660 |
| }, |
| { |
| "epoch": 1.4421416234887738, |
| "grad_norm": 0.9902591705322266, |
| "learning_rate": 0.0001240816585222731, |
| "loss": 1.1215, |
| "step": 1670 |
| }, |
| { |
| "epoch": 1.450777202072539, |
| "grad_norm": 0.9624248147010803, |
| "learning_rate": 0.00012310504692655166, |
| "loss": 1.028, |
| "step": 1680 |
| }, |
| { |
| "epoch": 1.459412780656304, |
| "grad_norm": 1.3916789293289185, |
| "learning_rate": 0.0001221261017384522, |
| "loss": 1.0322, |
| "step": 1690 |
| }, |
| { |
| "epoch": 1.468048359240069, |
| "grad_norm": 1.3031835556030273, |
| "learning_rate": 0.00012114492183070323, |
| "loss": 0.9959, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.468048359240069, |
| "eval_loss": 1.0126487016677856, |
| "eval_runtime": 74.2735, |
| "eval_samples_per_second": 31.169, |
| "eval_steps_per_second": 3.904, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.4766839378238341, |
| "grad_norm": 0.9306958913803101, |
| "learning_rate": 0.00012016160630173807, |
| "loss": 1.0158, |
| "step": 1710 |
| }, |
| { |
| "epoch": 1.4853195164075994, |
| "grad_norm": 1.1065701246261597, |
| "learning_rate": 0.00011917625446568626, |
| "loss": 1.0134, |
| "step": 1720 |
| }, |
| { |
| "epoch": 1.4939550949913645, |
| "grad_norm": 0.9095447659492493, |
| "learning_rate": 0.00011818896584234287, |
| "loss": 1.0405, |
| "step": 1730 |
| }, |
| { |
| "epoch": 1.5025906735751295, |
| "grad_norm": 1.1259651184082031, |
| "learning_rate": 0.00011719984014711693, |
| "loss": 1.0454, |
| "step": 1740 |
| }, |
| { |
| "epoch": 1.5112262521588946, |
| "grad_norm": 1.0899256467819214, |
| "learning_rate": 0.00011620897728096047, |
| "loss": 1.0925, |
| "step": 1750 |
| }, |
| { |
| "epoch": 1.5198618307426597, |
| "grad_norm": 1.173726201057434, |
| "learning_rate": 0.00011521647732027843, |
| "loss": 1.0111, |
| "step": 1760 |
| }, |
| { |
| "epoch": 1.528497409326425, |
| "grad_norm": 0.9733538031578064, |
| "learning_rate": 0.00011422244050682097, |
| "loss": 1.0432, |
| "step": 1770 |
| }, |
| { |
| "epoch": 1.5371329879101898, |
| "grad_norm": 1.2745634317398071, |
| "learning_rate": 0.00011322696723755935, |
| "loss": 1.035, |
| "step": 1780 |
| }, |
| { |
| "epoch": 1.545768566493955, |
| "grad_norm": 0.9993700385093689, |
| "learning_rate": 0.00011223015805454573, |
| "loss": 1.0128, |
| "step": 1790 |
| }, |
| { |
| "epoch": 1.5544041450777202, |
| "grad_norm": 1.0131609439849854, |
| "learning_rate": 0.00011123211363475863, |
| "loss": 1.0223, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.5544041450777202, |
| "eval_loss": 0.9773589372634888, |
| "eval_runtime": 74.3225, |
| "eval_samples_per_second": 31.148, |
| "eval_steps_per_second": 3.902, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.5630397236614852, |
| "grad_norm": 0.9971020221710205, |
| "learning_rate": 0.00011023293477993446, |
| "loss": 1.0477, |
| "step": 1810 |
| }, |
| { |
| "epoch": 1.5716753022452505, |
| "grad_norm": 0.9657288193702698, |
| "learning_rate": 0.00010923272240638676, |
| "loss": 1.0412, |
| "step": 1820 |
| }, |
| { |
| "epoch": 1.5803108808290154, |
| "grad_norm": 1.0480608940124512, |
| "learning_rate": 0.00010823157753481367, |
| "loss": 1.0009, |
| "step": 1830 |
| }, |
| { |
| "epoch": 1.5889464594127807, |
| "grad_norm": 1.0142576694488525, |
| "learning_rate": 0.00010722960128009491, |
| "loss": 1.0039, |
| "step": 1840 |
| }, |
| { |
| "epoch": 1.5975820379965457, |
| "grad_norm": 1.158823847770691, |
| "learning_rate": 0.00010622689484107935, |
| "loss": 1.033, |
| "step": 1850 |
| }, |
| { |
| "epoch": 1.6062176165803108, |
| "grad_norm": 1.025612473487854, |
| "learning_rate": 0.00010522355949036386, |
| "loss": 0.9911, |
| "step": 1860 |
| }, |
| { |
| "epoch": 1.614853195164076, |
| "grad_norm": 1.2156877517700195, |
| "learning_rate": 0.00010421969656406495, |
| "loss": 0.9672, |
| "step": 1870 |
| }, |
| { |
| "epoch": 1.623488773747841, |
| "grad_norm": 1.1628742218017578, |
| "learning_rate": 0.00010321540745158382, |
| "loss": 0.9499, |
| "step": 1880 |
| }, |
| { |
| "epoch": 1.6321243523316062, |
| "grad_norm": 0.9938153624534607, |
| "learning_rate": 0.00010221079358536619, |
| "loss": 0.972, |
| "step": 1890 |
| }, |
| { |
| "epoch": 1.6407599309153713, |
| "grad_norm": 1.0305359363555908, |
| "learning_rate": 0.00010120595643065769, |
| "loss": 0.9733, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.6407599309153713, |
| "eval_loss": 0.9481803774833679, |
| "eval_runtime": 74.2826, |
| "eval_samples_per_second": 31.165, |
| "eval_steps_per_second": 3.904, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.6493955094991364, |
| "grad_norm": 1.048543930053711, |
| "learning_rate": 0.00010020099747525586, |
| "loss": 0.9864, |
| "step": 1910 |
| }, |
| { |
| "epoch": 1.6580310880829017, |
| "grad_norm": 0.9962440729141235, |
| "learning_rate": 9.919601821926009e-05, |
| "loss": 0.9375, |
| "step": 1920 |
| }, |
| { |
| "epoch": 1.6666666666666665, |
| "grad_norm": 0.939709484577179, |
| "learning_rate": 9.819112016482001e-05, |
| "loss": 1.0237, |
| "step": 1930 |
| }, |
| { |
| "epoch": 1.6753022452504318, |
| "grad_norm": 0.9809345602989197, |
| "learning_rate": 9.718640480588409e-05, |
| "loss": 0.9283, |
| "step": 1940 |
| }, |
| { |
| "epoch": 1.6839378238341969, |
| "grad_norm": 1.0643101930618286, |
| "learning_rate": 9.618197361794854e-05, |
| "loss": 0.9252, |
| "step": 1950 |
| }, |
| { |
| "epoch": 1.692573402417962, |
| "grad_norm": 1.0364254713058472, |
| "learning_rate": 9.517792804780867e-05, |
| "loss": 0.9815, |
| "step": 1960 |
| }, |
| { |
| "epoch": 1.7012089810017272, |
| "grad_norm": 1.0348941087722778, |
| "learning_rate": 9.417436950331256e-05, |
| "loss": 0.9443, |
| "step": 1970 |
| }, |
| { |
| "epoch": 1.709844559585492, |
| "grad_norm": 0.9953681230545044, |
| "learning_rate": 9.31713993431191e-05, |
| "loss": 0.8284, |
| "step": 1980 |
| }, |
| { |
| "epoch": 1.7184801381692574, |
| "grad_norm": 1.092307209968567, |
| "learning_rate": 9.216911886646085e-05, |
| "loss": 0.9878, |
| "step": 1990 |
| }, |
| { |
| "epoch": 1.7271157167530224, |
| "grad_norm": 1.0429028272628784, |
| "learning_rate": 9.116762930291282e-05, |
| "loss": 0.9367, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.7271157167530224, |
| "eval_loss": 0.9206886887550354, |
| "eval_runtime": 76.0782, |
| "eval_samples_per_second": 30.429, |
| "eval_steps_per_second": 3.812, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.7357512953367875, |
| "grad_norm": 1.346091389656067, |
| "learning_rate": 9.016703180216834e-05, |
| "loss": 0.9867, |
| "step": 2010 |
| }, |
| { |
| "epoch": 1.7443868739205528, |
| "grad_norm": 1.3426605463027954, |
| "learning_rate": 8.916742742382316e-05, |
| "loss": 0.9452, |
| "step": 2020 |
| }, |
| { |
| "epoch": 1.7530224525043177, |
| "grad_norm": 1.1281945705413818, |
| "learning_rate": 8.816891712716834e-05, |
| "loss": 0.9285, |
| "step": 2030 |
| }, |
| { |
| "epoch": 1.761658031088083, |
| "grad_norm": 1.0097074508666992, |
| "learning_rate": 8.717160176099358e-05, |
| "loss": 0.9402, |
| "step": 2040 |
| }, |
| { |
| "epoch": 1.770293609671848, |
| "grad_norm": 0.9986193180084229, |
| "learning_rate": 8.617558205340144e-05, |
| "loss": 0.9679, |
| "step": 2050 |
| }, |
| { |
| "epoch": 1.778929188255613, |
| "grad_norm": 0.9868291020393372, |
| "learning_rate": 8.518095860163395e-05, |
| "loss": 0.9703, |
| "step": 2060 |
| }, |
| { |
| "epoch": 1.7875647668393784, |
| "grad_norm": 1.131056785583496, |
| "learning_rate": 8.418783186191236e-05, |
| "loss": 0.9633, |
| "step": 2070 |
| }, |
| { |
| "epoch": 1.7962003454231432, |
| "grad_norm": 1.214119791984558, |
| "learning_rate": 8.31963021392911e-05, |
| "loss": 1.0013, |
| "step": 2080 |
| }, |
| { |
| "epoch": 1.8048359240069085, |
| "grad_norm": 1.0295566320419312, |
| "learning_rate": 8.220646957752716e-05, |
| "loss": 0.924, |
| "step": 2090 |
| }, |
| { |
| "epoch": 1.8134715025906736, |
| "grad_norm": 1.0361146926879883, |
| "learning_rate": 8.121843414896547e-05, |
| "loss": 0.9298, |
| "step": 2100 |
| }, |
| { |
| "epoch": 1.8134715025906736, |
| "eval_loss": 0.8968186974525452, |
| "eval_runtime": 74.3145, |
| "eval_samples_per_second": 31.151, |
| "eval_steps_per_second": 3.902, |
| "step": 2100 |
| }, |
| { |
| "epoch": 1.8221070811744386, |
| "grad_norm": 1.1014063358306885, |
| "learning_rate": 8.023229564444188e-05, |
| "loss": 0.868, |
| "step": 2110 |
| }, |
| { |
| "epoch": 1.830742659758204, |
| "grad_norm": 1.1780112981796265, |
| "learning_rate": 7.924815366320434e-05, |
| "loss": 0.904, |
| "step": 2120 |
| }, |
| { |
| "epoch": 1.8393782383419688, |
| "grad_norm": 1.1275811195373535, |
| "learning_rate": 7.826610760285343e-05, |
| "loss": 0.8838, |
| "step": 2130 |
| }, |
| { |
| "epoch": 1.848013816925734, |
| "grad_norm": 1.2763252258300781, |
| "learning_rate": 7.728625664930336e-05, |
| "loss": 0.8688, |
| "step": 2140 |
| }, |
| { |
| "epoch": 1.8566493955094991, |
| "grad_norm": 1.1182912588119507, |
| "learning_rate": 7.630869976676413e-05, |
| "loss": 0.9025, |
| "step": 2150 |
| }, |
| { |
| "epoch": 1.8652849740932642, |
| "grad_norm": 1.165228247642517, |
| "learning_rate": 7.533353568774634e-05, |
| "loss": 0.9962, |
| "step": 2160 |
| }, |
| { |
| "epoch": 1.8739205526770295, |
| "grad_norm": 1.0472362041473389, |
| "learning_rate": 7.436086290308919e-05, |
| "loss": 0.9682, |
| "step": 2170 |
| }, |
| { |
| "epoch": 1.8825561312607944, |
| "grad_norm": 1.0311408042907715, |
| "learning_rate": 7.339077965201305e-05, |
| "loss": 0.9335, |
| "step": 2180 |
| }, |
| { |
| "epoch": 1.8911917098445596, |
| "grad_norm": 1.0582579374313354, |
| "learning_rate": 7.242338391219734e-05, |
| "loss": 0.985, |
| "step": 2190 |
| }, |
| { |
| "epoch": 1.8998272884283247, |
| "grad_norm": 0.923521876335144, |
| "learning_rate": 7.145877338988487e-05, |
| "loss": 0.9738, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.8998272884283247, |
| "eval_loss": 0.872840166091919, |
| "eval_runtime": 74.2854, |
| "eval_samples_per_second": 31.164, |
| "eval_steps_per_second": 3.904, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.9084628670120898, |
| "grad_norm": 1.0986098051071167, |
| "learning_rate": 7.049704551001358e-05, |
| "loss": 0.9313, |
| "step": 2210 |
| }, |
| { |
| "epoch": 1.917098445595855, |
| "grad_norm": 1.050269365310669, |
| "learning_rate": 6.953829740637662e-05, |
| "loss": 0.8601, |
| "step": 2220 |
| }, |
| { |
| "epoch": 1.92573402417962, |
| "grad_norm": 1.2400585412979126, |
| "learning_rate": 6.858262591181206e-05, |
| "loss": 0.8907, |
| "step": 2230 |
| }, |
| { |
| "epoch": 1.9343696027633852, |
| "grad_norm": 1.0596503019332886, |
| "learning_rate": 6.763012754842277e-05, |
| "loss": 0.9053, |
| "step": 2240 |
| }, |
| { |
| "epoch": 1.9430051813471503, |
| "grad_norm": 0.9167270064353943, |
| "learning_rate": 6.668089851782769e-05, |
| "loss": 0.9776, |
| "step": 2250 |
| }, |
| { |
| "epoch": 1.9516407599309153, |
| "grad_norm": 1.2059139013290405, |
| "learning_rate": 6.573503469144566e-05, |
| "loss": 0.8575, |
| "step": 2260 |
| }, |
| { |
| "epoch": 1.9602763385146806, |
| "grad_norm": 1.0621919631958008, |
| "learning_rate": 6.479263160081242e-05, |
| "loss": 0.9437, |
| "step": 2270 |
| }, |
| { |
| "epoch": 1.9689119170984455, |
| "grad_norm": 1.1505554914474487, |
| "learning_rate": 6.385378442793188e-05, |
| "loss": 0.8951, |
| "step": 2280 |
| }, |
| { |
| "epoch": 1.9775474956822108, |
| "grad_norm": 1.052738904953003, |
| "learning_rate": 6.29185879956632e-05, |
| "loss": 0.8694, |
| "step": 2290 |
| }, |
| { |
| "epoch": 1.9861830742659758, |
| "grad_norm": 0.9112501740455627, |
| "learning_rate": 6.198713675814318e-05, |
| "loss": 0.8679, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.9861830742659758, |
| "eval_loss": 0.8568958640098572, |
| "eval_runtime": 74.2813, |
| "eval_samples_per_second": 31.165, |
| "eval_steps_per_second": 3.904, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.994818652849741, |
| "grad_norm": 0.9624518156051636, |
| "learning_rate": 6.105952479124696e-05, |
| "loss": 0.9374, |
| "step": 2310 |
| }, |
| { |
| "epoch": 2.003454231433506, |
| "grad_norm": 1.1013827323913574, |
| "learning_rate": 6.0135845783086145e-05, |
| "loss": 0.8569, |
| "step": 2320 |
| }, |
| { |
| "epoch": 2.012089810017271, |
| "grad_norm": 1.0752055644989014, |
| "learning_rate": 5.921619302454645e-05, |
| "loss": 0.9713, |
| "step": 2330 |
| }, |
| { |
| "epoch": 2.0207253886010363, |
| "grad_norm": 1.123271107673645, |
| "learning_rate": 5.830065939986553e-05, |
| "loss": 0.8359, |
| "step": 2340 |
| }, |
| { |
| "epoch": 2.029360967184801, |
| "grad_norm": 1.0255523920059204, |
| "learning_rate": 5.73893373772515e-05, |
| "loss": 0.8339, |
| "step": 2350 |
| }, |
| { |
| "epoch": 2.0379965457685665, |
| "grad_norm": 1.169399619102478, |
| "learning_rate": 5.6482318999543807e-05, |
| "loss": 0.8717, |
| "step": 2360 |
| }, |
| { |
| "epoch": 2.0466321243523318, |
| "grad_norm": 1.0524979829788208, |
| "learning_rate": 5.5579695874917115e-05, |
| "loss": 0.8328, |
| "step": 2370 |
| }, |
| { |
| "epoch": 2.0552677029360966, |
| "grad_norm": 0.9218592047691345, |
| "learning_rate": 5.468155916762869e-05, |
| "loss": 0.8556, |
| "step": 2380 |
| }, |
| { |
| "epoch": 2.063903281519862, |
| "grad_norm": 1.2179642915725708, |
| "learning_rate": 5.3787999588811136e-05, |
| "loss": 0.8256, |
| "step": 2390 |
| }, |
| { |
| "epoch": 2.0725388601036268, |
| "grad_norm": 1.2335243225097656, |
| "learning_rate": 5.28991073873105e-05, |
| "loss": 0.891, |
| "step": 2400 |
| }, |
| { |
| "epoch": 2.0725388601036268, |
| "eval_loss": 0.8398398756980896, |
| "eval_runtime": 74.3373, |
| "eval_samples_per_second": 31.142, |
| "eval_steps_per_second": 3.901, |
| "step": 2400 |
| }, |
| { |
| "epoch": 2.081174438687392, |
| "grad_norm": 1.2605432271957397, |
| "learning_rate": 5.201497234057111e-05, |
| "loss": 0.7942, |
| "step": 2410 |
| }, |
| { |
| "epoch": 2.0898100172711573, |
| "grad_norm": 1.0830875635147095, |
| "learning_rate": 5.1135683745568455e-05, |
| "loss": 0.8772, |
| "step": 2420 |
| }, |
| { |
| "epoch": 2.098445595854922, |
| "grad_norm": 0.9472030997276306, |
| "learning_rate": 5.02613304097898e-05, |
| "loss": 0.926, |
| "step": 2430 |
| }, |
| { |
| "epoch": 2.1070811744386875, |
| "grad_norm": 1.091093897819519, |
| "learning_rate": 4.939200064226509e-05, |
| "loss": 0.8607, |
| "step": 2440 |
| }, |
| { |
| "epoch": 2.1157167530224523, |
| "grad_norm": 1.186557412147522, |
| "learning_rate": 4.8527782244647656e-05, |
| "loss": 0.9168, |
| "step": 2450 |
| }, |
| { |
| "epoch": 2.1243523316062176, |
| "grad_norm": 1.1364177465438843, |
| "learning_rate": 4.766876250234621e-05, |
| "loss": 0.8785, |
| "step": 2460 |
| }, |
| { |
| "epoch": 2.132987910189983, |
| "grad_norm": 1.1587848663330078, |
| "learning_rate": 4.681502817570929e-05, |
| "loss": 0.8479, |
| "step": 2470 |
| }, |
| { |
| "epoch": 2.1416234887737478, |
| "grad_norm": 1.1975603103637695, |
| "learning_rate": 4.59666654912623e-05, |
| "loss": 0.9014, |
| "step": 2480 |
| }, |
| { |
| "epoch": 2.150259067357513, |
| "grad_norm": 1.0785084962844849, |
| "learning_rate": 4.512376013299895e-05, |
| "loss": 0.8464, |
| "step": 2490 |
| }, |
| { |
| "epoch": 2.158894645941278, |
| "grad_norm": 1.0377299785614014, |
| "learning_rate": 4.428639723372706e-05, |
| "loss": 0.8461, |
| "step": 2500 |
| }, |
| { |
| "epoch": 2.158894645941278, |
| "eval_loss": 0.8259330987930298, |
| "eval_runtime": 74.3192, |
| "eval_samples_per_second": 31.149, |
| "eval_steps_per_second": 3.902, |
| "step": 2500 |
| }, |
| { |
| "epoch": 2.167530224525043, |
| "grad_norm": 1.2178107500076294, |
| "learning_rate": 4.345466136647018e-05, |
| "loss": 0.7985, |
| "step": 2510 |
| }, |
| { |
| "epoch": 2.1761658031088085, |
| "grad_norm": 1.0462040901184082, |
| "learning_rate": 4.2628636535926005e-05, |
| "loss": 0.8091, |
| "step": 2520 |
| }, |
| { |
| "epoch": 2.1848013816925733, |
| "grad_norm": 1.0872950553894043, |
| "learning_rate": 4.180840616998164e-05, |
| "loss": 0.8729, |
| "step": 2530 |
| }, |
| { |
| "epoch": 2.1934369602763386, |
| "grad_norm": 1.2299045324325562, |
| "learning_rate": 4.099405311128774e-05, |
| "loss": 0.8864, |
| "step": 2540 |
| }, |
| { |
| "epoch": 2.2020725388601035, |
| "grad_norm": 1.0725489854812622, |
| "learning_rate": 4.018565960889137e-05, |
| "loss": 0.9033, |
| "step": 2550 |
| }, |
| { |
| "epoch": 2.2107081174438687, |
| "grad_norm": 1.1338095664978027, |
| "learning_rate": 3.9383307309928744e-05, |
| "loss": 0.8792, |
| "step": 2560 |
| }, |
| { |
| "epoch": 2.219343696027634, |
| "grad_norm": 1.0339998006820679, |
| "learning_rate": 3.858707725137921e-05, |
| "loss": 0.8888, |
| "step": 2570 |
| }, |
| { |
| "epoch": 2.227979274611399, |
| "grad_norm": 1.1130526065826416, |
| "learning_rate": 3.7797049851880325e-05, |
| "loss": 0.7557, |
| "step": 2580 |
| }, |
| { |
| "epoch": 2.236614853195164, |
| "grad_norm": 1.013401746749878, |
| "learning_rate": 3.701330490360583e-05, |
| "loss": 0.8868, |
| "step": 2590 |
| }, |
| { |
| "epoch": 2.245250431778929, |
| "grad_norm": 1.0278291702270508, |
| "learning_rate": 3.623592156420661e-05, |
| "loss": 0.8474, |
| "step": 2600 |
| }, |
| { |
| "epoch": 2.245250431778929, |
| "eval_loss": 0.8156528472900391, |
| "eval_runtime": 74.3107, |
| "eval_samples_per_second": 31.153, |
| "eval_steps_per_second": 3.903, |
| "step": 2600 |
| }, |
| { |
| "epoch": 2.2538860103626943, |
| "grad_norm": 1.040278673171997, |
| "learning_rate": 3.546497834881572e-05, |
| "loss": 0.8268, |
| "step": 2610 |
| }, |
| { |
| "epoch": 2.2625215889464596, |
| "grad_norm": 1.1358767747879028, |
| "learning_rate": 3.4700553122118714e-05, |
| "loss": 0.8267, |
| "step": 2620 |
| }, |
| { |
| "epoch": 2.2711571675302245, |
| "grad_norm": 1.1165881156921387, |
| "learning_rate": 3.394272309048895e-05, |
| "loss": 0.9085, |
| "step": 2630 |
| }, |
| { |
| "epoch": 2.2797927461139897, |
| "grad_norm": 0.9508546590805054, |
| "learning_rate": 3.319156479419032e-05, |
| "loss": 0.8471, |
| "step": 2640 |
| }, |
| { |
| "epoch": 2.2884283246977546, |
| "grad_norm": 1.192872166633606, |
| "learning_rate": 3.244715409964625e-05, |
| "loss": 0.8641, |
| "step": 2650 |
| }, |
| { |
| "epoch": 2.29706390328152, |
| "grad_norm": 1.092782735824585, |
| "learning_rate": 3.170956619177749e-05, |
| "loss": 0.8154, |
| "step": 2660 |
| }, |
| { |
| "epoch": 2.305699481865285, |
| "grad_norm": 1.425848126411438, |
| "learning_rate": 3.097887556640855e-05, |
| "loss": 0.8828, |
| "step": 2670 |
| }, |
| { |
| "epoch": 2.31433506044905, |
| "grad_norm": 1.099098801612854, |
| "learning_rate": 3.025515602274346e-05, |
| "loss": 0.8424, |
| "step": 2680 |
| }, |
| { |
| "epoch": 2.3229706390328153, |
| "grad_norm": 1.2581557035446167, |
| "learning_rate": 2.9538480655912415e-05, |
| "loss": 0.8606, |
| "step": 2690 |
| }, |
| { |
| "epoch": 2.33160621761658, |
| "grad_norm": 1.152100682258606, |
| "learning_rate": 2.8828921849588898e-05, |
| "loss": 0.8429, |
| "step": 2700 |
| }, |
| { |
| "epoch": 2.33160621761658, |
| "eval_loss": 0.8052871227264404, |
| "eval_runtime": 74.353, |
| "eval_samples_per_second": 31.135, |
| "eval_steps_per_second": 3.9, |
| "step": 2700 |
| }, |
| { |
| "epoch": 2.3402417962003454, |
| "grad_norm": 1.0980738401412964, |
| "learning_rate": 2.8126551268679134e-05, |
| "loss": 0.8846, |
| "step": 2710 |
| }, |
| { |
| "epoch": 2.3488773747841103, |
| "grad_norm": 0.9057421684265137, |
| "learning_rate": 2.7431439852084072e-05, |
| "loss": 0.8655, |
| "step": 2720 |
| }, |
| { |
| "epoch": 2.3575129533678756, |
| "grad_norm": 1.0247701406478882, |
| "learning_rate": 2.6743657805534396e-05, |
| "loss": 0.7428, |
| "step": 2730 |
| }, |
| { |
| "epoch": 2.366148531951641, |
| "grad_norm": 1.1374061107635498, |
| "learning_rate": 2.6063274594500086e-05, |
| "loss": 0.8294, |
| "step": 2740 |
| }, |
| { |
| "epoch": 2.3747841105354057, |
| "grad_norm": 1.0613446235656738, |
| "learning_rate": 2.5390358937174165e-05, |
| "loss": 0.8164, |
| "step": 2750 |
| }, |
| { |
| "epoch": 2.383419689119171, |
| "grad_norm": 1.053713083267212, |
| "learning_rate": 2.472497879753235e-05, |
| "loss": 0.8204, |
| "step": 2760 |
| }, |
| { |
| "epoch": 2.3920552677029363, |
| "grad_norm": 1.1319301128387451, |
| "learning_rate": 2.4067201378468807e-05, |
| "loss": 0.7942, |
| "step": 2770 |
| }, |
| { |
| "epoch": 2.400690846286701, |
| "grad_norm": 1.3451365232467651, |
| "learning_rate": 2.3417093115008525e-05, |
| "loss": 0.8676, |
| "step": 2780 |
| }, |
| { |
| "epoch": 2.4093264248704664, |
| "grad_norm": 0.9686126112937927, |
| "learning_rate": 2.277471966759771e-05, |
| "loss": 0.8581, |
| "step": 2790 |
| }, |
| { |
| "epoch": 2.4179620034542313, |
| "grad_norm": 1.1396652460098267, |
| "learning_rate": 2.2140145915471778e-05, |
| "loss": 0.9025, |
| "step": 2800 |
| }, |
| { |
| "epoch": 2.4179620034542313, |
| "eval_loss": 0.7989487051963806, |
| "eval_runtime": 74.4139, |
| "eval_samples_per_second": 31.11, |
| "eval_steps_per_second": 3.897, |
| "step": 2800 |
| }, |
| { |
| "epoch": 2.4265975820379966, |
| "grad_norm": 1.4245641231536865, |
| "learning_rate": 2.1513435950102924e-05, |
| "loss": 0.793, |
| "step": 2810 |
| }, |
| { |
| "epoch": 2.4352331606217614, |
| "grad_norm": 1.1148205995559692, |
| "learning_rate": 2.0894653068726688e-05, |
| "loss": 0.8414, |
| "step": 2820 |
| }, |
| { |
| "epoch": 2.4438687392055267, |
| "grad_norm": 1.168946385383606, |
| "learning_rate": 2.0283859767949078e-05, |
| "loss": 0.8287, |
| "step": 2830 |
| }, |
| { |
| "epoch": 2.452504317789292, |
| "grad_norm": 1.1215981245040894, |
| "learning_rate": 1.9681117737434606e-05, |
| "loss": 0.8029, |
| "step": 2840 |
| }, |
| { |
| "epoch": 2.461139896373057, |
| "grad_norm": 1.2339212894439697, |
| "learning_rate": 1.9086487853675382e-05, |
| "loss": 0.8861, |
| "step": 2850 |
| }, |
| { |
| "epoch": 2.469775474956822, |
| "grad_norm": 1.1001675128936768, |
| "learning_rate": 1.8500030173842885e-05, |
| "loss": 0.865, |
| "step": 2860 |
| }, |
| { |
| "epoch": 2.4784110535405874, |
| "grad_norm": 1.1429920196533203, |
| "learning_rate": 1.7921803929722082e-05, |
| "loss": 0.8753, |
| "step": 2870 |
| }, |
| { |
| "epoch": 2.4870466321243523, |
| "grad_norm": 1.0880659818649292, |
| "learning_rate": 1.7351867521729072e-05, |
| "loss": 0.8774, |
| "step": 2880 |
| }, |
| { |
| "epoch": 2.4956822107081176, |
| "grad_norm": 0.9570063948631287, |
| "learning_rate": 1.6790278513012925e-05, |
| "loss": 0.8067, |
| "step": 2890 |
| }, |
| { |
| "epoch": 2.5043177892918824, |
| "grad_norm": 1.0723425149917603, |
| "learning_rate": 1.6237093623641443e-05, |
| "loss": 0.801, |
| "step": 2900 |
| }, |
| { |
| "epoch": 2.5043177892918824, |
| "eval_loss": 0.7922360301017761, |
| "eval_runtime": 74.2817, |
| "eval_samples_per_second": 31.165, |
| "eval_steps_per_second": 3.904, |
| "step": 2900 |
| }, |
| { |
| "epoch": 2.5129533678756477, |
| "grad_norm": 1.1429632902145386, |
| "learning_rate": 1.569236872487283e-05, |
| "loss": 0.7751, |
| "step": 2910 |
| }, |
| { |
| "epoch": 2.5215889464594126, |
| "grad_norm": 1.1071122884750366, |
| "learning_rate": 1.5156158833512523e-05, |
| "loss": 0.7598, |
| "step": 2920 |
| }, |
| { |
| "epoch": 2.530224525043178, |
| "grad_norm": 1.057666301727295, |
| "learning_rate": 1.462851810635658e-05, |
| "loss": 0.8577, |
| "step": 2930 |
| }, |
| { |
| "epoch": 2.538860103626943, |
| "grad_norm": 1.0120848417282104, |
| "learning_rate": 1.410949983472205e-05, |
| "loss": 0.8649, |
| "step": 2940 |
| }, |
| { |
| "epoch": 2.547495682210708, |
| "grad_norm": 1.1220532655715942, |
| "learning_rate": 1.3599156439064309e-05, |
| "loss": 0.8426, |
| "step": 2950 |
| }, |
| { |
| "epoch": 2.5561312607944733, |
| "grad_norm": 1.1136960983276367, |
| "learning_rate": 1.3097539463682874e-05, |
| "loss": 0.8952, |
| "step": 2960 |
| }, |
| { |
| "epoch": 2.5647668393782386, |
| "grad_norm": 1.004520297050476, |
| "learning_rate": 1.26046995715153e-05, |
| "loss": 0.8849, |
| "step": 2970 |
| }, |
| { |
| "epoch": 2.5734024179620034, |
| "grad_norm": 1.01373291015625, |
| "learning_rate": 1.2120686539020376e-05, |
| "loss": 0.8147, |
| "step": 2980 |
| }, |
| { |
| "epoch": 2.5820379965457687, |
| "grad_norm": 1.0711711645126343, |
| "learning_rate": 1.1645549251150711e-05, |
| "loss": 0.7414, |
| "step": 2990 |
| }, |
| { |
| "epoch": 2.5906735751295336, |
| "grad_norm": 1.0850844383239746, |
| "learning_rate": 1.1179335696415306e-05, |
| "loss": 0.8152, |
| "step": 3000 |
| }, |
| { |
| "epoch": 2.5906735751295336, |
| "eval_loss": 0.7877171635627747, |
| "eval_runtime": 74.2634, |
| "eval_samples_per_second": 31.173, |
| "eval_steps_per_second": 3.905, |
| "step": 3000 |
| }, |
| { |
| "epoch": 2.599309153713299, |
| "grad_norm": 1.0988068580627441, |
| "learning_rate": 1.0722092962032927e-05, |
| "loss": 0.7355, |
| "step": 3010 |
| }, |
| { |
| "epoch": 2.6079447322970637, |
| "grad_norm": 1.098163366317749, |
| "learning_rate": 1.0273867229176094e-05, |
| "loss": 0.7886, |
| "step": 3020 |
| }, |
| { |
| "epoch": 2.616580310880829, |
| "grad_norm": 1.2323061227798462, |
| "learning_rate": 9.834703768307063e-06, |
| "loss": 0.7491, |
| "step": 3030 |
| }, |
| { |
| "epoch": 2.6252158894645943, |
| "grad_norm": 0.9928609728813171, |
| "learning_rate": 9.404646934605399e-06, |
| "loss": 0.921, |
| "step": 3040 |
| }, |
| { |
| "epoch": 2.633851468048359, |
| "grad_norm": 1.285148024559021, |
| "learning_rate": 8.983740163488107e-06, |
| "loss": 0.8066, |
| "step": 3050 |
| }, |
| { |
| "epoch": 2.6424870466321244, |
| "grad_norm": 1.1035611629486084, |
| "learning_rate": 8.572025966222841e-06, |
| "loss": 0.8209, |
| "step": 3060 |
| }, |
| { |
| "epoch": 2.6511226252158897, |
| "grad_norm": 0.8766908049583435, |
| "learning_rate": 8.169545925634115e-06, |
| "loss": 0.8807, |
| "step": 3070 |
| }, |
| { |
| "epoch": 2.6597582037996546, |
| "grad_norm": 1.2940890789031982, |
| "learning_rate": 7.776340691903604e-06, |
| "loss": 0.875, |
| "step": 3080 |
| }, |
| { |
| "epoch": 2.66839378238342, |
| "grad_norm": 1.0446336269378662, |
| "learning_rate": 7.392449978464478e-06, |
| "loss": 0.789, |
| "step": 3090 |
| }, |
| { |
| "epoch": 2.6770293609671847, |
| "grad_norm": 1.1468182802200317, |
| "learning_rate": 7.0179125579902915e-06, |
| "loss": 0.8416, |
| "step": 3100 |
| }, |
| { |
| "epoch": 2.6770293609671847, |
| "eval_loss": 0.7846628427505493, |
| "eval_runtime": 74.271, |
| "eval_samples_per_second": 31.17, |
| "eval_steps_per_second": 3.905, |
| "step": 3100 |
| }, |
| { |
| "epoch": 2.68566493955095, |
| "grad_norm": 0.9985455870628357, |
| "learning_rate": 6.652766258479126e-06, |
| "loss": 0.7779, |
| "step": 3110 |
| }, |
| { |
| "epoch": 2.694300518134715, |
| "grad_norm": 1.0844076871871948, |
| "learning_rate": 6.2970479594328e-06, |
| "loss": 0.7998, |
| "step": 3120 |
| }, |
| { |
| "epoch": 2.70293609671848, |
| "grad_norm": 1.0988919734954834, |
| "learning_rate": 5.950793588132253e-06, |
| "loss": 0.7566, |
| "step": 3130 |
| }, |
| { |
| "epoch": 2.7115716753022454, |
| "grad_norm": 1.1833341121673584, |
| "learning_rate": 5.614038116008824e-06, |
| "loss": 0.7846, |
| "step": 3140 |
| }, |
| { |
| "epoch": 2.7202072538860103, |
| "grad_norm": 1.0258663892745972, |
| "learning_rate": 5.286815555112101e-06, |
| "loss": 0.7837, |
| "step": 3150 |
| }, |
| { |
| "epoch": 2.7288428324697755, |
| "grad_norm": 1.0312175750732422, |
| "learning_rate": 4.969158954674902e-06, |
| "loss": 0.8348, |
| "step": 3160 |
| }, |
| { |
| "epoch": 2.737478411053541, |
| "grad_norm": 1.152564287185669, |
| "learning_rate": 4.6611003977751425e-06, |
| "loss": 0.8359, |
| "step": 3170 |
| }, |
| { |
| "epoch": 2.7461139896373057, |
| "grad_norm": 1.0136488676071167, |
| "learning_rate": 4.362670998095597e-06, |
| "loss": 0.8209, |
| "step": 3180 |
| }, |
| { |
| "epoch": 2.754749568221071, |
| "grad_norm": 0.9231327176094055, |
| "learning_rate": 4.073900896781402e-06, |
| "loss": 0.8148, |
| "step": 3190 |
| }, |
| { |
| "epoch": 2.763385146804836, |
| "grad_norm": 0.9632856845855713, |
| "learning_rate": 3.7948192593957877e-06, |
| "loss": 0.7827, |
| "step": 3200 |
| }, |
| { |
| "epoch": 2.763385146804836, |
| "eval_loss": 0.7829655408859253, |
| "eval_runtime": 74.3256, |
| "eval_samples_per_second": 31.147, |
| "eval_steps_per_second": 3.902, |
| "step": 3200 |
| }, |
| { |
| "epoch": 2.772020725388601, |
| "grad_norm": 0.9402443170547485, |
| "learning_rate": 3.525454272974427e-06, |
| "loss": 0.8453, |
| "step": 3210 |
| }, |
| { |
| "epoch": 2.780656303972366, |
| "grad_norm": 1.1812800168991089, |
| "learning_rate": 3.265833143178543e-06, |
| "loss": 0.8018, |
| "step": 3220 |
| }, |
| { |
| "epoch": 2.7892918825561313, |
| "grad_norm": 0.9228396415710449, |
| "learning_rate": 3.0159820915471426e-06, |
| "loss": 0.7994, |
| "step": 3230 |
| }, |
| { |
| "epoch": 2.7979274611398965, |
| "grad_norm": 0.9304484724998474, |
| "learning_rate": 2.7759263528487345e-06, |
| "loss": 0.744, |
| "step": 3240 |
| }, |
| { |
| "epoch": 2.8065630397236614, |
| "grad_norm": 1.064064621925354, |
| "learning_rate": 2.5456901725325224e-06, |
| "loss": 0.8863, |
| "step": 3250 |
| }, |
| { |
| "epoch": 2.8151986183074267, |
| "grad_norm": 1.0431768894195557, |
| "learning_rate": 2.3252968042797083e-06, |
| "loss": 0.8184, |
| "step": 3260 |
| }, |
| { |
| "epoch": 2.823834196891192, |
| "grad_norm": 1.1347994804382324, |
| "learning_rate": 2.114768507654885e-06, |
| "loss": 0.8135, |
| "step": 3270 |
| }, |
| { |
| "epoch": 2.832469775474957, |
| "grad_norm": 1.2335035800933838, |
| "learning_rate": 1.9141265458578196e-06, |
| "loss": 0.8298, |
| "step": 3280 |
| }, |
| { |
| "epoch": 2.8411053540587217, |
| "grad_norm": 0.9481128454208374, |
| "learning_rate": 1.7233911835758843e-06, |
| "loss": 0.8401, |
| "step": 3290 |
| }, |
| { |
| "epoch": 2.849740932642487, |
| "grad_norm": 1.182274580001831, |
| "learning_rate": 1.5425816849373386e-06, |
| "loss": 0.7997, |
| "step": 3300 |
| }, |
| { |
| "epoch": 2.849740932642487, |
| "eval_loss": 0.7820163369178772, |
| "eval_runtime": 74.5484, |
| "eval_samples_per_second": 31.054, |
| "eval_steps_per_second": 3.89, |
| "step": 3300 |
| }, |
| { |
| "epoch": 2.8583765112262522, |
| "grad_norm": 1.0886763334274292, |
| "learning_rate": 1.3717163115656962e-06, |
| "loss": 0.78, |
| "step": 3310 |
| }, |
| { |
| "epoch": 2.867012089810017, |
| "grad_norm": 0.9863650798797607, |
| "learning_rate": 1.2108123207352662e-06, |
| "loss": 0.8321, |
| "step": 3320 |
| }, |
| { |
| "epoch": 2.8756476683937824, |
| "grad_norm": 1.0511281490325928, |
| "learning_rate": 1.0598859636282156e-06, |
| "loss": 0.8616, |
| "step": 3330 |
| }, |
| { |
| "epoch": 2.8842832469775477, |
| "grad_norm": 1.1429880857467651, |
| "learning_rate": 9.189524836932029e-07, |
| "loss": 0.8199, |
| "step": 3340 |
| }, |
| { |
| "epoch": 2.8929188255613125, |
| "grad_norm": 1.071950912475586, |
| "learning_rate": 7.88026115105811e-07, |
| "loss": 0.8046, |
| "step": 3350 |
| }, |
| { |
| "epoch": 2.901554404145078, |
| "grad_norm": 1.19888174533844, |
| "learning_rate": 6.671200813308742e-07, |
| "loss": 0.8907, |
| "step": 3360 |
| }, |
| { |
| "epoch": 2.910189982728843, |
| "grad_norm": 0.9800041317939758, |
| "learning_rate": 5.562465937869577e-07, |
| "loss": 0.8213, |
| "step": 3370 |
| }, |
| { |
| "epoch": 2.918825561312608, |
| "grad_norm": 1.047254204750061, |
| "learning_rate": 4.5541685061299964e-07, |
| "loss": 0.8474, |
| "step": 3380 |
| }, |
| { |
| "epoch": 2.927461139896373, |
| "grad_norm": 1.0879237651824951, |
| "learning_rate": 3.646410355372831e-07, |
| "loss": 0.7963, |
| "step": 3390 |
| }, |
| { |
| "epoch": 2.936096718480138, |
| "grad_norm": 1.0805079936981201, |
| "learning_rate": 2.8392831684891374e-07, |
| "loss": 0.8146, |
| "step": 3400 |
| }, |
| { |
| "epoch": 2.936096718480138, |
| "eval_loss": 0.7816377282142639, |
| "eval_runtime": 74.3018, |
| "eval_samples_per_second": 31.157, |
| "eval_steps_per_second": 3.903, |
| "step": 3400 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 3474, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 200, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.6308483890777948e+18, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|