| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 12.0, |
| "eval_steps": 500, |
| "global_step": 5616, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.002136752136752137, |
| "grad_norm": 6.609935760498047, |
| "learning_rate": 9.998219373219375e-05, |
| "loss": 10.1057, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.10683760683760683, |
| "grad_norm": 15.156593322753906, |
| "learning_rate": 9.910968660968662e-05, |
| "loss": 6.382, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.21367521367521367, |
| "grad_norm": 12.51414966583252, |
| "learning_rate": 9.821937321937323e-05, |
| "loss": 3.4357, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.32051282051282054, |
| "grad_norm": 2.1484034061431885, |
| "learning_rate": 9.732905982905983e-05, |
| "loss": 1.354, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.42735042735042733, |
| "grad_norm": 0.6900449395179749, |
| "learning_rate": 9.643874643874644e-05, |
| "loss": 0.7456, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.5341880341880342, |
| "grad_norm": 0.8403804302215576, |
| "learning_rate": 9.554843304843305e-05, |
| "loss": 0.7155, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.6410256410256411, |
| "grad_norm": 0.5271189212799072, |
| "learning_rate": 9.465811965811966e-05, |
| "loss": 0.6685, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.7478632478632479, |
| "grad_norm": 0.5689147114753723, |
| "learning_rate": 9.376780626780627e-05, |
| "loss": 0.6531, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.8547008547008547, |
| "grad_norm": 0.6126474738121033, |
| "learning_rate": 9.287749287749287e-05, |
| "loss": 0.6391, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.9615384615384616, |
| "grad_norm": 0.4521735906600952, |
| "learning_rate": 9.198717948717949e-05, |
| "loss": 0.6862, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_bleu": 0.04672201686543051, |
| "eval_loss": 0.5959261655807495, |
| "eval_rouge1": 0.1002624774361735, |
| "eval_rouge2": 0.04186894193526817, |
| "eval_rougeL": 0.0864802858192733, |
| "eval_rougeLsum": 0.08708246891026986, |
| "eval_runtime": 119.4408, |
| "eval_sacrebleu": 4.6722016865430485, |
| "eval_samples_per_second": 3.483, |
| "eval_steps_per_second": 0.435, |
| "step": 468 |
| }, |
| { |
| "epoch": 1.0683760683760684, |
| "grad_norm": 0.5592613220214844, |
| "learning_rate": 9.10968660968661e-05, |
| "loss": 0.6163, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.1752136752136753, |
| "grad_norm": 0.5470952391624451, |
| "learning_rate": 9.020655270655272e-05, |
| "loss": 0.5398, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.282051282051282, |
| "grad_norm": 0.8431143164634705, |
| "learning_rate": 8.931623931623932e-05, |
| "loss": 0.5341, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.3888888888888888, |
| "grad_norm": 0.952499508857727, |
| "learning_rate": 8.842592592592593e-05, |
| "loss": 0.5174, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.4957264957264957, |
| "grad_norm": 0.6722558736801147, |
| "learning_rate": 8.753561253561254e-05, |
| "loss": 0.485, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.6025641025641026, |
| "grad_norm": 0.6445268392562866, |
| "learning_rate": 8.664529914529916e-05, |
| "loss": 0.5493, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.7094017094017095, |
| "grad_norm": 0.6697048544883728, |
| "learning_rate": 8.575498575498576e-05, |
| "loss": 0.5587, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.8162393162393162, |
| "grad_norm": 0.39390167593955994, |
| "learning_rate": 8.486467236467237e-05, |
| "loss": 0.5105, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.9230769230769231, |
| "grad_norm": 0.6466375589370728, |
| "learning_rate": 8.397435897435898e-05, |
| "loss": 0.5139, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_bleu": 0.016857307800214223, |
| "eval_loss": 0.5903982520103455, |
| "eval_rouge1": 0.1420201153596669, |
| "eval_rouge2": 0.0579418436167993, |
| "eval_rougeL": 0.12100579265783531, |
| "eval_rougeLsum": 0.12159562644037325, |
| "eval_runtime": 118.3383, |
| "eval_sacrebleu": 1.6857307800214218, |
| "eval_samples_per_second": 3.515, |
| "eval_steps_per_second": 0.439, |
| "step": 936 |
| }, |
| { |
| "epoch": 2.02991452991453, |
| "grad_norm": 1.3548823595046997, |
| "learning_rate": 8.308404558404559e-05, |
| "loss": 0.4904, |
| "step": 950 |
| }, |
| { |
| "epoch": 2.1367521367521367, |
| "grad_norm": 0.5524053573608398, |
| "learning_rate": 8.21937321937322e-05, |
| "loss": 0.422, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.2435897435897436, |
| "grad_norm": 0.6330443024635315, |
| "learning_rate": 8.13034188034188e-05, |
| "loss": 0.3609, |
| "step": 1050 |
| }, |
| { |
| "epoch": 2.3504273504273505, |
| "grad_norm": 0.4444356858730316, |
| "learning_rate": 8.041310541310541e-05, |
| "loss": 0.3737, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.4572649572649574, |
| "grad_norm": 0.47359105944633484, |
| "learning_rate": 7.952279202279203e-05, |
| "loss": 0.4244, |
| "step": 1150 |
| }, |
| { |
| "epoch": 2.564102564102564, |
| "grad_norm": 0.9119735956192017, |
| "learning_rate": 7.863247863247864e-05, |
| "loss": 0.4327, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.6709401709401708, |
| "grad_norm": 0.565905749797821, |
| "learning_rate": 7.774216524216525e-05, |
| "loss": 0.4251, |
| "step": 1250 |
| }, |
| { |
| "epoch": 2.7777777777777777, |
| "grad_norm": 0.8833394050598145, |
| "learning_rate": 7.685185185185185e-05, |
| "loss": 0.3912, |
| "step": 1300 |
| }, |
| { |
| "epoch": 2.8846153846153846, |
| "grad_norm": 0.5193057656288147, |
| "learning_rate": 7.596153846153846e-05, |
| "loss": 0.3983, |
| "step": 1350 |
| }, |
| { |
| "epoch": 2.9914529914529915, |
| "grad_norm": 0.5576235055923462, |
| "learning_rate": 7.507122507122507e-05, |
| "loss": 0.4458, |
| "step": 1400 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_bleu": 0.02090423126560895, |
| "eval_loss": 0.6054907441139221, |
| "eval_rouge1": 0.14516790628410786, |
| "eval_rouge2": 0.06127825912334307, |
| "eval_rougeL": 0.12493260250258728, |
| "eval_rougeLsum": 0.125945256795481, |
| "eval_runtime": 117.4378, |
| "eval_sacrebleu": 2.090423126560895, |
| "eval_samples_per_second": 3.542, |
| "eval_steps_per_second": 0.443, |
| "step": 1404 |
| }, |
| { |
| "epoch": 3.0982905982905984, |
| "grad_norm": 0.548673152923584, |
| "learning_rate": 7.418091168091168e-05, |
| "loss": 0.3028, |
| "step": 1450 |
| }, |
| { |
| "epoch": 3.2051282051282053, |
| "grad_norm": 0.5551701188087463, |
| "learning_rate": 7.32905982905983e-05, |
| "loss": 0.3121, |
| "step": 1500 |
| }, |
| { |
| "epoch": 3.3119658119658117, |
| "grad_norm": 0.77285236120224, |
| "learning_rate": 7.240028490028491e-05, |
| "loss": 0.3426, |
| "step": 1550 |
| }, |
| { |
| "epoch": 3.4188034188034186, |
| "grad_norm": 0.5966659784317017, |
| "learning_rate": 7.150997150997152e-05, |
| "loss": 0.316, |
| "step": 1600 |
| }, |
| { |
| "epoch": 3.5256410256410255, |
| "grad_norm": 0.5765504240989685, |
| "learning_rate": 7.061965811965813e-05, |
| "loss": 0.3229, |
| "step": 1650 |
| }, |
| { |
| "epoch": 3.6324786324786325, |
| "grad_norm": 0.4516245722770691, |
| "learning_rate": 6.972934472934474e-05, |
| "loss": 0.3153, |
| "step": 1700 |
| }, |
| { |
| "epoch": 3.7393162393162394, |
| "grad_norm": 0.6088860034942627, |
| "learning_rate": 6.883903133903134e-05, |
| "loss": 0.3197, |
| "step": 1750 |
| }, |
| { |
| "epoch": 3.8461538461538463, |
| "grad_norm": 0.5132762789726257, |
| "learning_rate": 6.794871794871795e-05, |
| "loss": 0.3189, |
| "step": 1800 |
| }, |
| { |
| "epoch": 3.952991452991453, |
| "grad_norm": 0.9502617716789246, |
| "learning_rate": 6.705840455840457e-05, |
| "loss": 0.3306, |
| "step": 1850 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_bleu": 0.011934404928163667, |
| "eval_loss": 0.6525160074234009, |
| "eval_rouge1": 0.08905472709315013, |
| "eval_rouge2": 0.03442774422531769, |
| "eval_rougeL": 0.07354540899487223, |
| "eval_rougeLsum": 0.07383306372734402, |
| "eval_runtime": 118.2147, |
| "eval_sacrebleu": 1.1934404928163673, |
| "eval_samples_per_second": 3.519, |
| "eval_steps_per_second": 0.44, |
| "step": 1872 |
| }, |
| { |
| "epoch": 4.05982905982906, |
| "grad_norm": 0.5066898465156555, |
| "learning_rate": 6.616809116809118e-05, |
| "loss": 0.2767, |
| "step": 1900 |
| }, |
| { |
| "epoch": 4.166666666666667, |
| "grad_norm": 0.7272780537605286, |
| "learning_rate": 6.527777777777778e-05, |
| "loss": 0.2513, |
| "step": 1950 |
| }, |
| { |
| "epoch": 4.273504273504273, |
| "grad_norm": 0.7428900599479675, |
| "learning_rate": 6.438746438746439e-05, |
| "loss": 0.2388, |
| "step": 2000 |
| }, |
| { |
| "epoch": 4.380341880341881, |
| "grad_norm": 0.6635400652885437, |
| "learning_rate": 6.3497150997151e-05, |
| "loss": 0.2486, |
| "step": 2050 |
| }, |
| { |
| "epoch": 4.487179487179487, |
| "grad_norm": 1.197430968284607, |
| "learning_rate": 6.260683760683761e-05, |
| "loss": 0.2518, |
| "step": 2100 |
| }, |
| { |
| "epoch": 4.594017094017094, |
| "grad_norm": 0.6132161021232605, |
| "learning_rate": 6.171652421652422e-05, |
| "loss": 0.261, |
| "step": 2150 |
| }, |
| { |
| "epoch": 4.700854700854701, |
| "grad_norm": 0.8142279982566833, |
| "learning_rate": 6.082621082621083e-05, |
| "loss": 0.2483, |
| "step": 2200 |
| }, |
| { |
| "epoch": 4.8076923076923075, |
| "grad_norm": 0.6046409606933594, |
| "learning_rate": 5.9935897435897434e-05, |
| "loss": 0.2468, |
| "step": 2250 |
| }, |
| { |
| "epoch": 4.914529914529915, |
| "grad_norm": 0.6721770167350769, |
| "learning_rate": 5.9045584045584046e-05, |
| "loss": 0.2362, |
| "step": 2300 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_bleu": 0.01982264823992269, |
| "eval_loss": 0.6933444142341614, |
| "eval_rouge1": 0.1119075273549715, |
| "eval_rouge2": 0.04113128900127839, |
| "eval_rougeL": 0.09342164825270136, |
| "eval_rougeLsum": 0.09348866145324419, |
| "eval_runtime": 117.21, |
| "eval_sacrebleu": 1.982264823992269, |
| "eval_samples_per_second": 3.549, |
| "eval_steps_per_second": 0.444, |
| "step": 2340 |
| }, |
| { |
| "epoch": 5.021367521367521, |
| "grad_norm": 0.5348103642463684, |
| "learning_rate": 5.815527065527066e-05, |
| "loss": 0.2422, |
| "step": 2350 |
| }, |
| { |
| "epoch": 5.128205128205128, |
| "grad_norm": 0.676667332649231, |
| "learning_rate": 5.726495726495726e-05, |
| "loss": 0.1809, |
| "step": 2400 |
| }, |
| { |
| "epoch": 5.235042735042735, |
| "grad_norm": 0.5570082664489746, |
| "learning_rate": 5.6374643874643875e-05, |
| "loss": 0.2015, |
| "step": 2450 |
| }, |
| { |
| "epoch": 5.3418803418803416, |
| "grad_norm": 0.5290353298187256, |
| "learning_rate": 5.548433048433048e-05, |
| "loss": 0.202, |
| "step": 2500 |
| }, |
| { |
| "epoch": 5.448717948717949, |
| "grad_norm": 0.5909162163734436, |
| "learning_rate": 5.459401709401709e-05, |
| "loss": 0.1949, |
| "step": 2550 |
| }, |
| { |
| "epoch": 5.555555555555555, |
| "grad_norm": 0.7708612680435181, |
| "learning_rate": 5.370370370370371e-05, |
| "loss": 0.196, |
| "step": 2600 |
| }, |
| { |
| "epoch": 5.662393162393163, |
| "grad_norm": 0.7512525320053101, |
| "learning_rate": 5.281339031339032e-05, |
| "loss": 0.1814, |
| "step": 2650 |
| }, |
| { |
| "epoch": 5.769230769230769, |
| "grad_norm": 0.5676941275596619, |
| "learning_rate": 5.192307692307693e-05, |
| "loss": 0.1882, |
| "step": 2700 |
| }, |
| { |
| "epoch": 5.8760683760683765, |
| "grad_norm": 0.6577980518341064, |
| "learning_rate": 5.103276353276354e-05, |
| "loss": 0.1808, |
| "step": 2750 |
| }, |
| { |
| "epoch": 5.982905982905983, |
| "grad_norm": 0.8404538631439209, |
| "learning_rate": 5.0142450142450145e-05, |
| "loss": 0.1873, |
| "step": 2800 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_bleu": 0.01596588236585614, |
| "eval_loss": 0.7384710311889648, |
| "eval_rouge1": 0.09046890592711473, |
| "eval_rouge2": 0.033374593261045735, |
| "eval_rougeL": 0.0744312633791386, |
| "eval_rougeLsum": 0.07484231387423937, |
| "eval_runtime": 119.5498, |
| "eval_sacrebleu": 1.5965882365856137, |
| "eval_samples_per_second": 3.48, |
| "eval_steps_per_second": 0.435, |
| "step": 2808 |
| }, |
| { |
| "epoch": 6.089743589743589, |
| "grad_norm": 0.4537138044834137, |
| "learning_rate": 4.925213675213676e-05, |
| "loss": 0.1509, |
| "step": 2850 |
| }, |
| { |
| "epoch": 6.196581196581197, |
| "grad_norm": 0.9405523538589478, |
| "learning_rate": 4.836182336182337e-05, |
| "loss": 0.1451, |
| "step": 2900 |
| }, |
| { |
| "epoch": 6.303418803418803, |
| "grad_norm": 0.690613865852356, |
| "learning_rate": 4.7471509971509974e-05, |
| "loss": 0.1544, |
| "step": 2950 |
| }, |
| { |
| "epoch": 6.410256410256411, |
| "grad_norm": 0.5879324078559875, |
| "learning_rate": 4.6581196581196586e-05, |
| "loss": 0.1501, |
| "step": 3000 |
| }, |
| { |
| "epoch": 6.517094017094017, |
| "grad_norm": 0.5145525336265564, |
| "learning_rate": 4.569088319088319e-05, |
| "loss": 0.1472, |
| "step": 3050 |
| }, |
| { |
| "epoch": 6.6239316239316235, |
| "grad_norm": 0.7620146870613098, |
| "learning_rate": 4.48005698005698e-05, |
| "loss": 0.1499, |
| "step": 3100 |
| }, |
| { |
| "epoch": 6.730769230769231, |
| "grad_norm": 0.6944196820259094, |
| "learning_rate": 4.3910256410256415e-05, |
| "loss": 0.1381, |
| "step": 3150 |
| }, |
| { |
| "epoch": 6.837606837606837, |
| "grad_norm": 0.8817417621612549, |
| "learning_rate": 4.301994301994302e-05, |
| "loss": 0.1445, |
| "step": 3200 |
| }, |
| { |
| "epoch": 6.944444444444445, |
| "grad_norm": 0.4323042929172516, |
| "learning_rate": 4.212962962962963e-05, |
| "loss": 0.1446, |
| "step": 3250 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_bleu": 0.01693650686803547, |
| "eval_loss": 0.7932016849517822, |
| "eval_rouge1": 0.09734769451765327, |
| "eval_rouge2": 0.035070400258041395, |
| "eval_rougeL": 0.07970926682147024, |
| "eval_rougeLsum": 0.08024775367668704, |
| "eval_runtime": 117.0761, |
| "eval_sacrebleu": 1.693650686803547, |
| "eval_samples_per_second": 3.553, |
| "eval_steps_per_second": 0.444, |
| "step": 3276 |
| }, |
| { |
| "epoch": 7.051282051282051, |
| "grad_norm": 0.49853190779685974, |
| "learning_rate": 4.123931623931624e-05, |
| "loss": 0.1209, |
| "step": 3300 |
| }, |
| { |
| "epoch": 7.1581196581196584, |
| "grad_norm": 0.7226719260215759, |
| "learning_rate": 4.034900284900285e-05, |
| "loss": 0.1065, |
| "step": 3350 |
| }, |
| { |
| "epoch": 7.264957264957265, |
| "grad_norm": 0.4723590910434723, |
| "learning_rate": 3.945868945868946e-05, |
| "loss": 0.1074, |
| "step": 3400 |
| }, |
| { |
| "epoch": 7.371794871794872, |
| "grad_norm": 0.7136197090148926, |
| "learning_rate": 3.856837606837607e-05, |
| "loss": 0.109, |
| "step": 3450 |
| }, |
| { |
| "epoch": 7.478632478632479, |
| "grad_norm": 0.7125486135482788, |
| "learning_rate": 3.767806267806268e-05, |
| "loss": 0.1141, |
| "step": 3500 |
| }, |
| { |
| "epoch": 7.585470085470085, |
| "grad_norm": 0.6564122438430786, |
| "learning_rate": 3.678774928774929e-05, |
| "loss": 0.1079, |
| "step": 3550 |
| }, |
| { |
| "epoch": 7.6923076923076925, |
| "grad_norm": 0.8024164438247681, |
| "learning_rate": 3.58974358974359e-05, |
| "loss": 0.1256, |
| "step": 3600 |
| }, |
| { |
| "epoch": 7.799145299145299, |
| "grad_norm": 0.5174832940101624, |
| "learning_rate": 3.500712250712251e-05, |
| "loss": 0.1221, |
| "step": 3650 |
| }, |
| { |
| "epoch": 7.905982905982906, |
| "grad_norm": 0.5609320402145386, |
| "learning_rate": 3.411680911680912e-05, |
| "loss": 0.1241, |
| "step": 3700 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_bleu": 0.01642080481375883, |
| "eval_loss": 0.8241382837295532, |
| "eval_rouge1": 0.10111779789655295, |
| "eval_rouge2": 0.03756803371781556, |
| "eval_rougeL": 0.08381874904742523, |
| "eval_rougeLsum": 0.08407042216117307, |
| "eval_runtime": 118.3205, |
| "eval_sacrebleu": 1.6420804813758827, |
| "eval_samples_per_second": 3.516, |
| "eval_steps_per_second": 0.439, |
| "step": 3744 |
| }, |
| { |
| "epoch": 8.012820512820513, |
| "grad_norm": 0.42624956369400024, |
| "learning_rate": 3.3226495726495725e-05, |
| "loss": 0.114, |
| "step": 3750 |
| }, |
| { |
| "epoch": 8.11965811965812, |
| "grad_norm": 0.5220057368278503, |
| "learning_rate": 3.2336182336182337e-05, |
| "loss": 0.0839, |
| "step": 3800 |
| }, |
| { |
| "epoch": 8.226495726495726, |
| "grad_norm": 0.6766859889030457, |
| "learning_rate": 3.144586894586894e-05, |
| "loss": 0.0932, |
| "step": 3850 |
| }, |
| { |
| "epoch": 8.333333333333334, |
| "grad_norm": 0.5749387741088867, |
| "learning_rate": 3.055555555555556e-05, |
| "loss": 0.1015, |
| "step": 3900 |
| }, |
| { |
| "epoch": 8.44017094017094, |
| "grad_norm": 0.5146998763084412, |
| "learning_rate": 2.966524216524217e-05, |
| "loss": 0.0841, |
| "step": 3950 |
| }, |
| { |
| "epoch": 8.547008547008547, |
| "grad_norm": 0.6653253436088562, |
| "learning_rate": 2.8774928774928778e-05, |
| "loss": 0.0943, |
| "step": 4000 |
| }, |
| { |
| "epoch": 8.653846153846153, |
| "grad_norm": 0.5048075914382935, |
| "learning_rate": 2.7884615384615386e-05, |
| "loss": 0.0922, |
| "step": 4050 |
| }, |
| { |
| "epoch": 8.760683760683762, |
| "grad_norm": 0.6959784626960754, |
| "learning_rate": 2.6994301994301995e-05, |
| "loss": 0.0934, |
| "step": 4100 |
| }, |
| { |
| "epoch": 8.867521367521368, |
| "grad_norm": 0.44051143527030945, |
| "learning_rate": 2.6103988603988607e-05, |
| "loss": 0.091, |
| "step": 4150 |
| }, |
| { |
| "epoch": 8.974358974358974, |
| "grad_norm": 0.49819517135620117, |
| "learning_rate": 2.5213675213675215e-05, |
| "loss": 0.0931, |
| "step": 4200 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_bleu": 0.014140141904671576, |
| "eval_loss": 0.8493850231170654, |
| "eval_rouge1": 0.10066028182849125, |
| "eval_rouge2": 0.038329211239954836, |
| "eval_rougeL": 0.0832079573438359, |
| "eval_rougeLsum": 0.08395177587376235, |
| "eval_runtime": 116.8091, |
| "eval_sacrebleu": 1.414014190467158, |
| "eval_samples_per_second": 3.561, |
| "eval_steps_per_second": 0.445, |
| "step": 4212 |
| }, |
| { |
| "epoch": 9.081196581196581, |
| "grad_norm": 0.41481533646583557, |
| "learning_rate": 2.4323361823361824e-05, |
| "loss": 0.0834, |
| "step": 4250 |
| }, |
| { |
| "epoch": 9.188034188034187, |
| "grad_norm": 0.4956184923648834, |
| "learning_rate": 2.3433048433048436e-05, |
| "loss": 0.0798, |
| "step": 4300 |
| }, |
| { |
| "epoch": 9.294871794871796, |
| "grad_norm": 0.45922133326530457, |
| "learning_rate": 2.2542735042735044e-05, |
| "loss": 0.0721, |
| "step": 4350 |
| }, |
| { |
| "epoch": 9.401709401709402, |
| "grad_norm": 0.5544161796569824, |
| "learning_rate": 2.1652421652421653e-05, |
| "loss": 0.0726, |
| "step": 4400 |
| }, |
| { |
| "epoch": 9.508547008547009, |
| "grad_norm": 0.6368575692176819, |
| "learning_rate": 2.076210826210826e-05, |
| "loss": 0.0794, |
| "step": 4450 |
| }, |
| { |
| "epoch": 9.615384615384615, |
| "grad_norm": 0.4519352614879608, |
| "learning_rate": 1.987179487179487e-05, |
| "loss": 0.0754, |
| "step": 4500 |
| }, |
| { |
| "epoch": 9.722222222222221, |
| "grad_norm": 0.4205610752105713, |
| "learning_rate": 1.8981481481481482e-05, |
| "loss": 0.0725, |
| "step": 4550 |
| }, |
| { |
| "epoch": 9.82905982905983, |
| "grad_norm": 0.5720301866531372, |
| "learning_rate": 1.8091168091168094e-05, |
| "loss": 0.0723, |
| "step": 4600 |
| }, |
| { |
| "epoch": 9.935897435897436, |
| "grad_norm": 0.7100991010665894, |
| "learning_rate": 1.7200854700854702e-05, |
| "loss": 0.0709, |
| "step": 4650 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_bleu": 0.013252890319355445, |
| "eval_loss": 0.8691538572311401, |
| "eval_rouge1": 0.10388912458002297, |
| "eval_rouge2": 0.038333106011199644, |
| "eval_rougeL": 0.08534074309444875, |
| "eval_rougeLsum": 0.08616990109781715, |
| "eval_runtime": 118.036, |
| "eval_sacrebleu": 1.3252890319355448, |
| "eval_samples_per_second": 3.524, |
| "eval_steps_per_second": 0.441, |
| "step": 4680 |
| }, |
| { |
| "epoch": 10.042735042735043, |
| "grad_norm": 0.6815042495727539, |
| "learning_rate": 1.631054131054131e-05, |
| "loss": 0.0736, |
| "step": 4700 |
| }, |
| { |
| "epoch": 10.149572649572649, |
| "grad_norm": 0.4753289222717285, |
| "learning_rate": 1.542022792022792e-05, |
| "loss": 0.0654, |
| "step": 4750 |
| }, |
| { |
| "epoch": 10.256410256410255, |
| "grad_norm": 0.5216003656387329, |
| "learning_rate": 1.4529914529914531e-05, |
| "loss": 0.0662, |
| "step": 4800 |
| }, |
| { |
| "epoch": 10.363247863247864, |
| "grad_norm": 1.007856011390686, |
| "learning_rate": 1.3639601139601142e-05, |
| "loss": 0.0644, |
| "step": 4850 |
| }, |
| { |
| "epoch": 10.47008547008547, |
| "grad_norm": 0.612964391708374, |
| "learning_rate": 1.274928774928775e-05, |
| "loss": 0.0643, |
| "step": 4900 |
| }, |
| { |
| "epoch": 10.576923076923077, |
| "grad_norm": 0.6739233732223511, |
| "learning_rate": 1.1858974358974359e-05, |
| "loss": 0.0639, |
| "step": 4950 |
| }, |
| { |
| "epoch": 10.683760683760683, |
| "grad_norm": 1.0595338344573975, |
| "learning_rate": 1.0968660968660969e-05, |
| "loss": 0.0656, |
| "step": 5000 |
| }, |
| { |
| "epoch": 10.790598290598291, |
| "grad_norm": 0.5138745307922363, |
| "learning_rate": 1.007834757834758e-05, |
| "loss": 0.0612, |
| "step": 5050 |
| }, |
| { |
| "epoch": 10.897435897435898, |
| "grad_norm": 0.6354550123214722, |
| "learning_rate": 9.18803418803419e-06, |
| "loss": 0.0659, |
| "step": 5100 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_bleu": 0.013803126905796853, |
| "eval_loss": 0.8812764286994934, |
| "eval_rouge1": 0.11534416583758171, |
| "eval_rouge2": 0.04454497187398293, |
| "eval_rougeL": 0.0963692302601049, |
| "eval_rougeLsum": 0.09670221754303093, |
| "eval_runtime": 119.7112, |
| "eval_sacrebleu": 1.3803126905796859, |
| "eval_samples_per_second": 3.475, |
| "eval_steps_per_second": 0.434, |
| "step": 5148 |
| }, |
| { |
| "epoch": 11.004273504273504, |
| "grad_norm": 0.3222929537296295, |
| "learning_rate": 8.297720797720798e-06, |
| "loss": 0.0657, |
| "step": 5150 |
| }, |
| { |
| "epoch": 11.11111111111111, |
| "grad_norm": 0.604082465171814, |
| "learning_rate": 7.4074074074074075e-06, |
| "loss": 0.0644, |
| "step": 5200 |
| }, |
| { |
| "epoch": 11.217948717948717, |
| "grad_norm": 0.4696787893772125, |
| "learning_rate": 6.517094017094018e-06, |
| "loss": 0.064, |
| "step": 5250 |
| }, |
| { |
| "epoch": 11.324786324786325, |
| "grad_norm": 0.4635153114795685, |
| "learning_rate": 5.626780626780627e-06, |
| "loss": 0.0604, |
| "step": 5300 |
| }, |
| { |
| "epoch": 11.431623931623932, |
| "grad_norm": 0.48816901445388794, |
| "learning_rate": 4.7364672364672365e-06, |
| "loss": 0.0584, |
| "step": 5350 |
| }, |
| { |
| "epoch": 11.538461538461538, |
| "grad_norm": 0.4458998739719391, |
| "learning_rate": 3.846153846153847e-06, |
| "loss": 0.0547, |
| "step": 5400 |
| }, |
| { |
| "epoch": 11.645299145299145, |
| "grad_norm": 0.4526476263999939, |
| "learning_rate": 2.955840455840456e-06, |
| "loss": 0.0576, |
| "step": 5450 |
| }, |
| { |
| "epoch": 11.752136752136753, |
| "grad_norm": 0.4015202224254608, |
| "learning_rate": 2.0655270655270656e-06, |
| "loss": 0.0583, |
| "step": 5500 |
| }, |
| { |
| "epoch": 11.85897435897436, |
| "grad_norm": 0.8102580308914185, |
| "learning_rate": 1.1752136752136752e-06, |
| "loss": 0.0573, |
| "step": 5550 |
| }, |
| { |
| "epoch": 11.965811965811966, |
| "grad_norm": 0.40322017669677734, |
| "learning_rate": 2.8490028490028494e-07, |
| "loss": 0.0534, |
| "step": 5600 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 5616, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 12, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.215745681784832e+16, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|