| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 8.596543209876543, |
| "eval_steps": 256, |
| "global_step": 17408, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.12641975308641976, |
| "grad_norm": 0.49028417468070984, |
| "learning_rate": 0.000498046875, |
| "loss": 1.2244819402694702, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.12641975308641976, |
| "eval_bleu": 0.023714342494794382, |
| "eval_loss": 1.1823350772625063, |
| "eval_mse_loss": 1.1823350772625063, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.12641975308641976, |
| "eval_bleu": 0.023714342494794382, |
| "eval_loss": 1.1823350772625063, |
| "eval_mse_loss": 1.1823350772625063, |
| "eval_runtime": 7.3875, |
| "eval_samples_per_second": 354.383, |
| "eval_steps_per_second": 5.55, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.2528395061728395, |
| "grad_norm": 0.578209638595581, |
| "learning_rate": 0.000998046875, |
| "loss": 1.1352403163909912, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.2528395061728395, |
| "eval_bleu": 0.022467213421973074, |
| "eval_loss": 1.0758944168323423, |
| "eval_mse_loss": 1.0758944168323423, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.2528395061728395, |
| "eval_bleu": 0.022467213421973074, |
| "eval_loss": 1.0758944168323423, |
| "eval_mse_loss": 1.0758944168323423, |
| "eval_runtime": 7.512, |
| "eval_samples_per_second": 348.51, |
| "eval_steps_per_second": 5.458, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.37925925925925924, |
| "grad_norm": 1.1504969596862793, |
| "learning_rate": 0.0009995882304697813, |
| "loss": 1.0155020952224731, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.37925925925925924, |
| "eval_bleu": 0.03303487719486685, |
| "eval_loss": 0.9521724465416699, |
| "eval_mse_loss": 0.9521724465416699, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.37925925925925924, |
| "eval_bleu": 0.03303487719486685, |
| "eval_loss": 0.9521724465416699, |
| "eval_mse_loss": 0.9521724465416699, |
| "eval_runtime": 7.3269, |
| "eval_samples_per_second": 357.313, |
| "eval_steps_per_second": 5.596, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.505679012345679, |
| "grad_norm": 0.9992234706878662, |
| "learning_rate": 0.0009983471408586747, |
| "loss": 0.9070967435836792, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.505679012345679, |
| "eval_bleu": 0.05283670615976002, |
| "eval_loss": 0.8704100134896069, |
| "eval_mse_loss": 0.8704100134896069, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.505679012345679, |
| "eval_bleu": 0.05283670615976002, |
| "eval_loss": 0.8704100134896069, |
| "eval_mse_loss": 0.8704100134896069, |
| "eval_runtime": 7.9933, |
| "eval_samples_per_second": 327.523, |
| "eval_steps_per_second": 5.129, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.6320987654320988, |
| "grad_norm": 0.931308925151825, |
| "learning_rate": 0.000996278785066807, |
| "loss": 0.8445582389831543, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.6320987654320988, |
| "eval_bleu": 0.062080457666779255, |
| "eval_loss": 0.8229870461836094, |
| "eval_mse_loss": 0.8229870461836094, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.6320987654320988, |
| "eval_bleu": 0.062080457666779255, |
| "eval_loss": 0.8229870461836094, |
| "eval_mse_loss": 0.8229870461836094, |
| "eval_runtime": 8.2397, |
| "eval_samples_per_second": 317.731, |
| "eval_steps_per_second": 4.976, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.7585185185185185, |
| "grad_norm": 1.1758294105529785, |
| "learning_rate": 0.0009933865966059944, |
| "loss": 0.7974789142608643, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.7585185185185185, |
| "eval_bleu": 0.08761458974204005, |
| "eval_loss": 0.7755879759788513, |
| "eval_mse_loss": 0.7755879759788513, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.7585185185185185, |
| "eval_bleu": 0.08761458974204005, |
| "eval_loss": 0.7755879759788513, |
| "eval_mse_loss": 0.7755879759788513, |
| "eval_runtime": 7.2349, |
| "eval_samples_per_second": 361.856, |
| "eval_steps_per_second": 5.667, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.8849382716049383, |
| "grad_norm": 1.6121838092803955, |
| "learning_rate": 0.0009896753765666925, |
| "loss": 0.7649396657943726, |
| "step": 1792 |
| }, |
| { |
| "epoch": 0.8849382716049383, |
| "eval_bleu": 0.09849079764244906, |
| "eval_loss": 0.7508636625801645, |
| "eval_mse_loss": 0.7508636625801645, |
| "step": 1792 |
| }, |
| { |
| "epoch": 0.8849382716049383, |
| "eval_bleu": 0.09849079764244906, |
| "eval_loss": 0.7508636625801645, |
| "eval_mse_loss": 0.7508636625801645, |
| "eval_runtime": 7.7694, |
| "eval_samples_per_second": 336.963, |
| "eval_steps_per_second": 5.277, |
| "step": 1792 |
| }, |
| { |
| "epoch": 1.011358024691358, |
| "grad_norm": 1.966202974319458, |
| "learning_rate": 0.0009851512856480936, |
| "loss": 0.7395287156105042, |
| "step": 2048 |
| }, |
| { |
| "epoch": 1.011358024691358, |
| "eval_bleu": 0.11072005270411464, |
| "eval_loss": 0.7292228166649981, |
| "eval_mse_loss": 0.7292228166649981, |
| "step": 2048 |
| }, |
| { |
| "epoch": 1.011358024691358, |
| "eval_bleu": 0.11072005270411464, |
| "eval_loss": 0.7292228166649981, |
| "eval_mse_loss": 0.7292228166649981, |
| "eval_runtime": 7.9736, |
| "eval_samples_per_second": 328.334, |
| "eval_steps_per_second": 5.142, |
| "step": 2048 |
| }, |
| { |
| "epoch": 1.1377777777777778, |
| "grad_norm": 1.720017433166504, |
| "learning_rate": 0.0009798218339312412, |
| "loss": 0.7159179449081421, |
| "step": 2304 |
| }, |
| { |
| "epoch": 1.1377777777777778, |
| "eval_bleu": 0.11064736251702126, |
| "eval_loss": 0.7017144793417396, |
| "eval_mse_loss": 0.7017144793417396, |
| "step": 2304 |
| }, |
| { |
| "epoch": 1.1377777777777778, |
| "eval_bleu": 0.11064736251702126, |
| "eval_loss": 0.7017144793417396, |
| "eval_mse_loss": 0.7017144793417396, |
| "eval_runtime": 7.2741, |
| "eval_samples_per_second": 359.908, |
| "eval_steps_per_second": 5.636, |
| "step": 2304 |
| }, |
| { |
| "epoch": 1.2641975308641975, |
| "grad_norm": 1.0777727365493774, |
| "learning_rate": 0.0009736958684121462, |
| "loss": 0.6964433789253235, |
| "step": 2560 |
| }, |
| { |
| "epoch": 1.2641975308641975, |
| "eval_bleu": 0.1276517391343738, |
| "eval_loss": 0.6854300062830855, |
| "eval_mse_loss": 0.6854300062830855, |
| "step": 2560 |
| }, |
| { |
| "epoch": 1.2641975308641975, |
| "eval_bleu": 0.1276517391343738, |
| "eval_loss": 0.6854300062830855, |
| "eval_mse_loss": 0.6854300062830855, |
| "eval_runtime": 7.5038, |
| "eval_samples_per_second": 348.888, |
| "eval_steps_per_second": 5.464, |
| "step": 2560 |
| }, |
| { |
| "epoch": 1.3906172839506172, |
| "grad_norm": 2.31036639213562, |
| "learning_rate": 0.0009667835583155982, |
| "loss": 0.6787664890289307, |
| "step": 2816 |
| }, |
| { |
| "epoch": 1.3906172839506172, |
| "eval_bleu": 0.14606448546092324, |
| "eval_loss": 0.6666799565640892, |
| "eval_mse_loss": 0.6666799565640892, |
| "step": 2816 |
| }, |
| { |
| "epoch": 1.3906172839506172, |
| "eval_bleu": 0.14606448546092324, |
| "eval_loss": 0.6666799565640892, |
| "eval_mse_loss": 0.6666799565640892, |
| "eval_runtime": 8.1414, |
| "eval_samples_per_second": 321.565, |
| "eval_steps_per_second": 5.036, |
| "step": 2816 |
| }, |
| { |
| "epoch": 1.5170370370370372, |
| "grad_norm": 1.8440918922424316, |
| "learning_rate": 0.0009590963782140483, |
| "loss": 0.6634169220924377, |
| "step": 3072 |
| }, |
| { |
| "epoch": 1.5170370370370372, |
| "eval_bleu": 0.14900832500200872, |
| "eval_loss": 0.6537455029603911, |
| "eval_mse_loss": 0.6537455029603911, |
| "step": 3072 |
| }, |
| { |
| "epoch": 1.5170370370370372, |
| "eval_bleu": 0.14900832500200872, |
| "eval_loss": 0.6537455029603911, |
| "eval_mse_loss": 0.6537455029603911, |
| "eval_runtime": 8.5409, |
| "eval_samples_per_second": 306.525, |
| "eval_steps_per_second": 4.8, |
| "step": 3072 |
| }, |
| { |
| "epoch": 1.643456790123457, |
| "grad_norm": 2.082489013671875, |
| "learning_rate": 0.0009506470889795917, |
| "loss": 0.6479848027229309, |
| "step": 3328 |
| }, |
| { |
| "epoch": 1.643456790123457, |
| "eval_bleu": 0.15870707580244742, |
| "eval_loss": 0.6412253728727015, |
| "eval_mse_loss": 0.6412253728727015, |
| "step": 3328 |
| }, |
| { |
| "epoch": 1.643456790123457, |
| "eval_bleu": 0.15870707580244742, |
| "eval_loss": 0.6412253728727015, |
| "eval_mse_loss": 0.6412253728727015, |
| "eval_runtime": 7.43, |
| "eval_samples_per_second": 352.355, |
| "eval_steps_per_second": 5.518, |
| "step": 3328 |
| }, |
| { |
| "epoch": 1.7698765432098766, |
| "grad_norm": 2.5578348636627197, |
| "learning_rate": 0.0009414497166006652, |
| "loss": 0.6374112963676453, |
| "step": 3584 |
| }, |
| { |
| "epoch": 1.7698765432098766, |
| "eval_bleu": 0.16517790474378305, |
| "eval_loss": 0.6340285481476202, |
| "eval_mse_loss": 0.6340285481476202, |
| "step": 3584 |
| }, |
| { |
| "epoch": 1.7698765432098766, |
| "eval_bleu": 0.16517790474378305, |
| "eval_loss": 0.6340285481476202, |
| "eval_mse_loss": 0.6340285481476202, |
| "eval_runtime": 7.8092, |
| "eval_samples_per_second": 335.246, |
| "eval_steps_per_second": 5.25, |
| "step": 3584 |
| }, |
| { |
| "epoch": 1.8962962962962964, |
| "grad_norm": 1.9383974075317383, |
| "learning_rate": 0.0009315195288986269, |
| "loss": 0.623887836933136, |
| "step": 3840 |
| }, |
| { |
| "epoch": 1.8962962962962964, |
| "eval_bleu": 0.17847564897785617, |
| "eval_loss": 0.6204298661976326, |
| "eval_mse_loss": 0.6204298661976326, |
| "step": 3840 |
| }, |
| { |
| "epoch": 1.8962962962962964, |
| "eval_bleu": 0.17847564897785617, |
| "eval_loss": 0.6204298661976326, |
| "eval_mse_loss": 0.6204298661976326, |
| "eval_runtime": 7.8808, |
| "eval_samples_per_second": 332.2, |
| "eval_steps_per_second": 5.203, |
| "step": 3840 |
| }, |
| { |
| "epoch": 2.022716049382716, |
| "grad_norm": 2.074370861053467, |
| "learning_rate": 0.0009208730101828687, |
| "loss": 0.612588107585907, |
| "step": 4096 |
| }, |
| { |
| "epoch": 2.022716049382716, |
| "eval_bleu": 0.20019494813992078, |
| "eval_loss": 0.6014744639396667, |
| "eval_mse_loss": 0.6014744639396667, |
| "step": 4096 |
| }, |
| { |
| "epoch": 2.022716049382716, |
| "eval_bleu": 0.20019494813992078, |
| "eval_loss": 0.6014744639396667, |
| "eval_mse_loss": 0.6014744639396667, |
| "eval_runtime": 8.3023, |
| "eval_samples_per_second": 315.333, |
| "eval_steps_per_second": 4.938, |
| "step": 4096 |
| }, |
| { |
| "epoch": 2.149135802469136, |
| "grad_norm": 2.1671674251556396, |
| "learning_rate": 0.0009095278338865343, |
| "loss": 0.5997830629348755, |
| "step": 4352 |
| }, |
| { |
| "epoch": 2.149135802469136, |
| "eval_bleu": 0.1987893921535938, |
| "eval_loss": 0.5986893453249117, |
| "eval_mse_loss": 0.5986893453249117, |
| "step": 4352 |
| }, |
| { |
| "epoch": 2.149135802469136, |
| "eval_bleu": 0.1987893921535938, |
| "eval_loss": 0.5986893453249117, |
| "eval_mse_loss": 0.5986893453249117, |
| "eval_runtime": 7.4027, |
| "eval_samples_per_second": 353.653, |
| "eval_steps_per_second": 5.538, |
| "step": 4352 |
| }, |
| { |
| "epoch": 2.2755555555555556, |
| "grad_norm": 3.125169038772583, |
| "learning_rate": 0.0008975028332282684, |
| "loss": 0.5991740822792053, |
| "step": 4608 |
| }, |
| { |
| "epoch": 2.2755555555555556, |
| "eval_bleu": 0.20658330468877353, |
| "eval_loss": 0.6155260525098661, |
| "eval_mse_loss": 0.6155260525098661, |
| "step": 4608 |
| }, |
| { |
| "epoch": 2.2755555555555556, |
| "eval_bleu": 0.20658330468877353, |
| "eval_loss": 0.6155260525098661, |
| "eval_mse_loss": 0.6155260525098661, |
| "eval_runtime": 7.4818, |
| "eval_samples_per_second": 349.915, |
| "eval_steps_per_second": 5.48, |
| "step": 4608 |
| }, |
| { |
| "epoch": 2.4019753086419753, |
| "grad_norm": 4.6324992179870605, |
| "learning_rate": 0.0008848179699486985, |
| "loss": 0.6436169147491455, |
| "step": 4864 |
| }, |
| { |
| "epoch": 2.4019753086419753, |
| "eval_bleu": 0.19786110402686427, |
| "eval_loss": 0.6434223041301821, |
| "eval_mse_loss": 0.6434223041301821, |
| "step": 4864 |
| }, |
| { |
| "epoch": 2.4019753086419753, |
| "eval_bleu": 0.19786110402686427, |
| "eval_loss": 0.6434223041301821, |
| "eval_mse_loss": 0.6434223041301821, |
| "eval_runtime": 8.06, |
| "eval_samples_per_second": 324.813, |
| "eval_steps_per_second": 5.087, |
| "step": 4864 |
| }, |
| { |
| "epoch": 2.528395061728395, |
| "grad_norm": 4.647606372833252, |
| "learning_rate": 0.0008714943011735476, |
| "loss": 0.6646981239318848, |
| "step": 5120 |
| }, |
| { |
| "epoch": 2.528395061728395, |
| "eval_bleu": 0.17652369495305426, |
| "eval_loss": 0.6581660363732315, |
| "eval_mse_loss": 0.6581660363732315, |
| "step": 5120 |
| }, |
| { |
| "epoch": 2.528395061728395, |
| "eval_bleu": 0.17652369495305426, |
| "eval_loss": 0.6581660363732315, |
| "eval_mse_loss": 0.6581660363732315, |
| "eval_runtime": 8.4642, |
| "eval_samples_per_second": 309.304, |
| "eval_steps_per_second": 4.844, |
| "step": 5120 |
| }, |
| { |
| "epoch": 2.6548148148148147, |
| "grad_norm": 4.655036926269531, |
| "learning_rate": 0.000857553944458386, |
| "loss": 0.670133650302887, |
| "step": 5376 |
| }, |
| { |
| "epoch": 2.6548148148148147, |
| "eval_bleu": 0.16665801459747773, |
| "eval_loss": 0.6649525950594646, |
| "eval_mse_loss": 0.6649525950594646, |
| "step": 5376 |
| }, |
| { |
| "epoch": 2.6548148148148147, |
| "eval_bleu": 0.16665801459747773, |
| "eval_loss": 0.6649525950594646, |
| "eval_mse_loss": 0.6649525950594646, |
| "eval_runtime": 7.3082, |
| "eval_samples_per_second": 358.23, |
| "eval_steps_per_second": 5.61, |
| "step": 5376 |
| }, |
| { |
| "epoch": 2.7812345679012345, |
| "grad_norm": 6.748809814453125, |
| "learning_rate": 0.000843020041073049, |
| "loss": 0.6625139117240906, |
| "step": 5632 |
| }, |
| { |
| "epoch": 2.7812345679012345, |
| "eval_bleu": 0.173997037607852, |
| "eval_loss": 0.6807852968937014, |
| "eval_mse_loss": 0.6807852968937014, |
| "step": 5632 |
| }, |
| { |
| "epoch": 2.7812345679012345, |
| "eval_bleu": 0.173997037607852, |
| "eval_loss": 0.6807852968937014, |
| "eval_mse_loss": 0.6807852968937014, |
| "eval_runtime": 7.5532, |
| "eval_samples_per_second": 346.607, |
| "eval_steps_per_second": 5.428, |
| "step": 5632 |
| }, |
| { |
| "epoch": 2.907654320987654, |
| "grad_norm": 4.406154632568359, |
| "learning_rate": 0.0008279167175866678, |
| "loss": 0.6519719958305359, |
| "step": 5888 |
| }, |
| { |
| "epoch": 2.907654320987654, |
| "eval_bleu": 0.19570266564584513, |
| "eval_loss": 0.6394063074414323, |
| "eval_mse_loss": 0.6394063074414323, |
| "step": 5888 |
| }, |
| { |
| "epoch": 2.907654320987654, |
| "eval_bleu": 0.19570266564584513, |
| "eval_loss": 0.6394063074414323, |
| "eval_mse_loss": 0.6394063074414323, |
| "eval_runtime": 7.9229, |
| "eval_samples_per_second": 330.434, |
| "eval_steps_per_second": 5.175, |
| "step": 5888 |
| }, |
| { |
| "epoch": 3.034074074074074, |
| "grad_norm": 4.702131271362305, |
| "learning_rate": 0.0008122690458170833, |
| "loss": 0.6394258737564087, |
| "step": 6144 |
| }, |
| { |
| "epoch": 3.034074074074074, |
| "eval_bleu": 0.18025210346558498, |
| "eval_loss": 0.6386287488588472, |
| "eval_mse_loss": 0.6386287488588472, |
| "step": 6144 |
| }, |
| { |
| "epoch": 3.034074074074074, |
| "eval_bleu": 0.18025210346558498, |
| "eval_loss": 0.6386287488588472, |
| "eval_mse_loss": 0.6386287488588472, |
| "eval_runtime": 8.212, |
| "eval_samples_per_second": 318.804, |
| "eval_steps_per_second": 4.993, |
| "step": 6144 |
| }, |
| { |
| "epoch": 3.1604938271604937, |
| "grad_norm": 3.3306374549865723, |
| "learning_rate": 0.0007961030012111305, |
| "loss": 0.6313198208808899, |
| "step": 6400 |
| }, |
| { |
| "epoch": 3.1604938271604937, |
| "eval_bleu": 0.19600803362588348, |
| "eval_loss": 0.6227354814366597, |
| "eval_mse_loss": 0.6227354814366597, |
| "step": 6400 |
| }, |
| { |
| "epoch": 3.1604938271604937, |
| "eval_bleu": 0.19600803362588348, |
| "eval_loss": 0.6227354814366597, |
| "eval_mse_loss": 0.6227354814366597, |
| "eval_runtime": 7.9462, |
| "eval_samples_per_second": 329.467, |
| "eval_steps_per_second": 5.16, |
| "step": 6400 |
| }, |
| { |
| "epoch": 3.286913580246914, |
| "grad_norm": 4.519600868225098, |
| "learning_rate": 0.0007794454197248784, |
| "loss": 0.6270281672477722, |
| "step": 6656 |
| }, |
| { |
| "epoch": 3.286913580246914, |
| "eval_bleu": 0.21406015637359221, |
| "eval_loss": 0.6298785456796971, |
| "eval_mse_loss": 0.6298785456796971, |
| "step": 6656 |
| }, |
| { |
| "epoch": 3.286913580246914, |
| "eval_bleu": 0.21406015637359221, |
| "eval_loss": 0.6298785456796971, |
| "eval_mse_loss": 0.6298785456796971, |
| "eval_runtime": 7.1729, |
| "eval_samples_per_second": 364.985, |
| "eval_steps_per_second": 5.716, |
| "step": 6656 |
| }, |
| { |
| "epoch": 3.413333333333333, |
| "grad_norm": 4.557300090789795, |
| "learning_rate": 0.0007623239532754083, |
| "loss": 0.6133501529693604, |
| "step": 6912 |
| }, |
| { |
| "epoch": 3.413333333333333, |
| "eval_bleu": 0.21008851882230098, |
| "eval_loss": 0.6231076019566234, |
| "eval_mse_loss": 0.6231076019566234, |
| "step": 6912 |
| }, |
| { |
| "epoch": 3.413333333333333, |
| "eval_bleu": 0.21008851882230098, |
| "eval_loss": 0.6231076019566234, |
| "eval_mse_loss": 0.6231076019566234, |
| "eval_runtime": 7.9356, |
| "eval_samples_per_second": 329.904, |
| "eval_steps_per_second": 5.167, |
| "step": 6912 |
| }, |
| { |
| "epoch": 3.5397530864197533, |
| "grad_norm": 4.322099208831787, |
| "learning_rate": 0.0007447670238380815, |
| "loss": 0.6112697720527649, |
| "step": 7168 |
| }, |
| { |
| "epoch": 3.5397530864197533, |
| "eval_bleu": 0.2060799623946906, |
| "eval_loss": 0.6111486045325675, |
| "eval_mse_loss": 0.6111486045325675, |
| "step": 7168 |
| }, |
| { |
| "epoch": 3.5397530864197533, |
| "eval_bleu": 0.2060799623946906, |
| "eval_loss": 0.6111486045325675, |
| "eval_mse_loss": 0.6111486045325675, |
| "eval_runtime": 8.1866, |
| "eval_samples_per_second": 319.793, |
| "eval_steps_per_second": 5.008, |
| "step": 7168 |
| }, |
| { |
| "epoch": 3.6661728395061726, |
| "grad_norm": 4.115531921386719, |
| "learning_rate": 0.0007268037762654929, |
| "loss": 0.6039376854896545, |
| "step": 7424 |
| }, |
| { |
| "epoch": 3.6661728395061726, |
| "eval_bleu": 0.21503632161601938, |
| "eval_loss": 0.6128283637325939, |
| "eval_mse_loss": 0.6128283637325939, |
| "step": 7424 |
| }, |
| { |
| "epoch": 3.6661728395061726, |
| "eval_bleu": 0.21503632161601938, |
| "eval_loss": 0.6128283637325939, |
| "eval_mse_loss": 0.6128283637325939, |
| "eval_runtime": 8.2856, |
| "eval_samples_per_second": 315.97, |
| "eval_steps_per_second": 4.948, |
| "step": 7424 |
| }, |
| { |
| "epoch": 3.7925925925925927, |
| "grad_norm": 4.654777526855469, |
| "learning_rate": 0.0007084640299064357, |
| "loss": 0.5996431708335876, |
| "step": 7680 |
| }, |
| { |
| "epoch": 3.7925925925925927, |
| "eval_bleu": 0.22869408936536856, |
| "eval_loss": 0.5915597095722105, |
| "eval_mse_loss": 0.5915597095722105, |
| "step": 7680 |
| }, |
| { |
| "epoch": 3.7925925925925927, |
| "eval_bleu": 0.22869408936536856, |
| "eval_loss": 0.5915597095722105, |
| "eval_mse_loss": 0.5915597095722105, |
| "eval_runtime": 8.2255, |
| "eval_samples_per_second": 318.278, |
| "eval_steps_per_second": 4.984, |
| "step": 7680 |
| }, |
| { |
| "epoch": 3.9190123456790125, |
| "grad_norm": 3.9882354736328125, |
| "learning_rate": 0.0006897782291051889, |
| "loss": 0.5931000709533691, |
| "step": 7936 |
| }, |
| { |
| "epoch": 3.9190123456790125, |
| "eval_bleu": 0.2349496066220062, |
| "eval_loss": 0.5926558040991062, |
| "eval_mse_loss": 0.5926558040991062, |
| "step": 7936 |
| }, |
| { |
| "epoch": 3.9190123456790125, |
| "eval_bleu": 0.2349496066220062, |
| "eval_loss": 0.5926558040991062, |
| "eval_mse_loss": 0.5926558040991062, |
| "eval_runtime": 8.1758, |
| "eval_samples_per_second": 320.212, |
| "eval_steps_per_second": 5.015, |
| "step": 7936 |
| }, |
| { |
| "epoch": 4.045432098765432, |
| "grad_norm": 4.266123294830322, |
| "learning_rate": 0.000670777392663298, |
| "loss": 0.5860158205032349, |
| "step": 8192 |
| }, |
| { |
| "epoch": 4.045432098765432, |
| "eval_bleu": 0.25634671575676854, |
| "eval_loss": 0.5731269004868298, |
| "eval_mse_loss": 0.5731269004868298, |
| "step": 8192 |
| }, |
| { |
| "epoch": 4.045432098765432, |
| "eval_bleu": 0.25634671575676854, |
| "eval_loss": 0.5731269004868298, |
| "eval_mse_loss": 0.5731269004868298, |
| "eval_runtime": 7.4662, |
| "eval_samples_per_second": 350.648, |
| "eval_steps_per_second": 5.491, |
| "step": 8192 |
| }, |
| { |
| "epoch": 4.1718518518518515, |
| "grad_norm": 5.411952018737793, |
| "learning_rate": 0.0006514930623477486, |
| "loss": 0.5818273425102234, |
| "step": 8448 |
| }, |
| { |
| "epoch": 4.1718518518518515, |
| "eval_bleu": 0.2520441554433453, |
| "eval_loss": 0.5782059794519006, |
| "eval_mse_loss": 0.5782059794519006, |
| "step": 8448 |
| }, |
| { |
| "epoch": 4.1718518518518515, |
| "eval_bleu": 0.2520441554433453, |
| "eval_loss": 0.5782059794519006, |
| "eval_mse_loss": 0.5782059794519006, |
| "eval_runtime": 7.4057, |
| "eval_samples_per_second": 353.512, |
| "eval_steps_per_second": 5.536, |
| "step": 8448 |
| }, |
| { |
| "epoch": 4.298271604938272, |
| "grad_norm": 3.5435891151428223, |
| "learning_rate": 0.0006319572505310022, |
| "loss": 0.5773241519927979, |
| "step": 8704 |
| }, |
| { |
| "epoch": 4.298271604938272, |
| "eval_bleu": 0.2534190472706376, |
| "eval_loss": 0.5589999033183586, |
| "eval_mse_loss": 0.5589999033183586, |
| "step": 8704 |
| }, |
| { |
| "epoch": 4.298271604938272, |
| "eval_bleu": 0.2534190472706376, |
| "eval_loss": 0.5589999033183586, |
| "eval_mse_loss": 0.5589999033183586, |
| "eval_runtime": 7.7623, |
| "eval_samples_per_second": 337.273, |
| "eval_steps_per_second": 5.282, |
| "step": 8704 |
| }, |
| { |
| "epoch": 4.424691358024692, |
| "grad_norm": 3.660297155380249, |
| "learning_rate": 0.000612202387049823, |
| "loss": 0.5752817988395691, |
| "step": 8960 |
| }, |
| { |
| "epoch": 4.424691358024692, |
| "eval_bleu": 0.25405581690850026, |
| "eval_loss": 0.5717670982930718, |
| "eval_mse_loss": 0.5717670982930718, |
| "step": 8960 |
| }, |
| { |
| "epoch": 4.424691358024692, |
| "eval_bleu": 0.25405581690850026, |
| "eval_loss": 0.5717670982930718, |
| "eval_mse_loss": 0.5717670982930718, |
| "eval_runtime": 8.0671, |
| "eval_samples_per_second": 324.527, |
| "eval_steps_per_second": 5.082, |
| "step": 8960 |
| }, |
| { |
| "epoch": 4.551111111111111, |
| "grad_norm": 4.61627197265625, |
| "learning_rate": 0.0005922612653711009, |
| "loss": 0.5677421689033508, |
| "step": 9216 |
| }, |
| { |
| "epoch": 4.551111111111111, |
| "eval_bleu": 0.2616680879763428, |
| "eval_loss": 0.5714971786592065, |
| "eval_mse_loss": 0.5714971786592065, |
| "step": 9216 |
| }, |
| { |
| "epoch": 4.551111111111111, |
| "eval_bleu": 0.2616680879763428, |
| "eval_loss": 0.5714971786592065, |
| "eval_mse_loss": 0.5714971786592065, |
| "eval_runtime": 8.2747, |
| "eval_samples_per_second": 316.384, |
| "eval_steps_per_second": 4.955, |
| "step": 9216 |
| }, |
| { |
| "epoch": 4.67753086419753, |
| "grad_norm": 4.289632797241211, |
| "learning_rate": 0.0005721669881540442, |
| "loss": 0.5655048489570618, |
| "step": 9472 |
| }, |
| { |
| "epoch": 4.67753086419753, |
| "eval_bleu": 0.27326453853008387, |
| "eval_loss": 0.5655288245619797, |
| "eval_mse_loss": 0.5655288245619797, |
| "step": 9472 |
| }, |
| { |
| "epoch": 4.67753086419753, |
| "eval_bleu": 0.27326453853008387, |
| "eval_loss": 0.5655288245619797, |
| "eval_mse_loss": 0.5655288245619797, |
| "eval_runtime": 8.1079, |
| "eval_samples_per_second": 322.895, |
| "eval_steps_per_second": 5.057, |
| "step": 9472 |
| }, |
| { |
| "epoch": 4.803950617283951, |
| "grad_norm": 4.95919132232666, |
| "learning_rate": 0.0005519529122991056, |
| "loss": 0.5713540315628052, |
| "step": 9728 |
| }, |
| { |
| "epoch": 4.803950617283951, |
| "eval_bleu": 0.2627683773991363, |
| "eval_loss": 0.5628622283295888, |
| "eval_mse_loss": 0.5628622283295888, |
| "step": 9728 |
| }, |
| { |
| "epoch": 4.803950617283951, |
| "eval_bleu": 0.2627683773991363, |
| "eval_loss": 0.5628622283295888, |
| "eval_mse_loss": 0.5628622283295888, |
| "eval_runtime": 7.9584, |
| "eval_samples_per_second": 328.962, |
| "eval_steps_per_second": 5.152, |
| "step": 9728 |
| }, |
| { |
| "epoch": 4.930370370370371, |
| "grad_norm": 6.370813369750977, |
| "learning_rate": 0.0005316525935748631, |
| "loss": 0.5834425687789917, |
| "step": 9984 |
| }, |
| { |
| "epoch": 4.930370370370371, |
| "eval_bleu": 0.26641828019601, |
| "eval_loss": 0.5816229349229394, |
| "eval_mse_loss": 0.5816229349229394, |
| "step": 9984 |
| }, |
| { |
| "epoch": 4.930370370370371, |
| "eval_bleu": 0.26641828019601, |
| "eval_loss": 0.5816229349229394, |
| "eval_mse_loss": 0.5816229349229394, |
| "eval_runtime": 7.1012, |
| "eval_samples_per_second": 368.672, |
| "eval_steps_per_second": 5.774, |
| "step": 9984 |
| }, |
| { |
| "epoch": 5.05679012345679, |
| "grad_norm": 6.872233867645264, |
| "learning_rate": 0.0005112997309147753, |
| "loss": 0.5829513669013977, |
| "step": 10240 |
| }, |
| { |
| "epoch": 5.05679012345679, |
| "eval_bleu": 0.2556546592309415, |
| "eval_loss": 0.5965675159198481, |
| "eval_mse_loss": 0.5965675159198481, |
| "step": 10240 |
| }, |
| { |
| "epoch": 5.05679012345679, |
| "eval_bleu": 0.2556546592309415, |
| "eval_loss": 0.5965675159198481, |
| "eval_mse_loss": 0.5965675159198481, |
| "eval_runtime": 7.6184, |
| "eval_samples_per_second": 343.643, |
| "eval_steps_per_second": 5.382, |
| "step": 10240 |
| }, |
| { |
| "epoch": 5.18320987654321, |
| "grad_norm": 6.554806232452393, |
| "learning_rate": 0.0004909281104762817, |
| "loss": 0.5979596376419067, |
| "step": 10496 |
| }, |
| { |
| "epoch": 5.18320987654321, |
| "eval_bleu": 0.24496216459175849, |
| "eval_loss": 0.5925210903330547, |
| "eval_mse_loss": 0.5925210903330547, |
| "step": 10496 |
| }, |
| { |
| "epoch": 5.18320987654321, |
| "eval_bleu": 0.24496216459175849, |
| "eval_loss": 0.5925210903330547, |
| "eval_mse_loss": 0.5925210903330547, |
| "eval_runtime": 8.1186, |
| "eval_samples_per_second": 322.468, |
| "eval_steps_per_second": 5.05, |
| "step": 10496 |
| }, |
| { |
| "epoch": 5.3096296296296295, |
| "grad_norm": 8.099956512451172, |
| "learning_rate": 0.0004705715495551068, |
| "loss": 0.5981637835502625, |
| "step": 10752 |
| }, |
| { |
| "epoch": 5.3096296296296295, |
| "eval_bleu": 0.24892557597714382, |
| "eval_loss": 0.6025734194895116, |
| "eval_mse_loss": 0.6025734194895116, |
| "step": 10752 |
| }, |
| { |
| "epoch": 5.3096296296296295, |
| "eval_bleu": 0.24892557597714382, |
| "eval_loss": 0.6025734194895116, |
| "eval_mse_loss": 0.6025734194895116, |
| "eval_runtime": 8.3839, |
| "eval_samples_per_second": 312.265, |
| "eval_steps_per_second": 4.89, |
| "step": 10752 |
| }, |
| { |
| "epoch": 5.43604938271605, |
| "grad_norm": 6.063875675201416, |
| "learning_rate": 0.00045026384044787715, |
| "loss": 0.600553572177887, |
| "step": 11008 |
| }, |
| { |
| "epoch": 5.43604938271605, |
| "eval_bleu": 0.2584749312819124, |
| "eval_loss": 0.5978762725504433, |
| "eval_mse_loss": 0.5978762725504433, |
| "step": 11008 |
| }, |
| { |
| "epoch": 5.43604938271605, |
| "eval_bleu": 0.2584749312819124, |
| "eval_loss": 0.5978762725504433, |
| "eval_mse_loss": 0.5978762725504433, |
| "eval_runtime": 8.4473, |
| "eval_samples_per_second": 309.921, |
| "eval_steps_per_second": 4.854, |
| "step": 11008 |
| }, |
| { |
| "epoch": 5.562469135802469, |
| "grad_norm": 6.819189071655273, |
| "learning_rate": 0.0004300386943562342, |
| "loss": 0.6050165891647339, |
| "step": 11264 |
| }, |
| { |
| "epoch": 5.562469135802469, |
| "eval_bleu": 0.23936172268263006, |
| "eval_loss": 0.6197842156014791, |
| "eval_mse_loss": 0.6197842156014791, |
| "step": 11264 |
| }, |
| { |
| "epoch": 5.562469135802469, |
| "eval_bleu": 0.23936172268263006, |
| "eval_loss": 0.6197842156014791, |
| "eval_mse_loss": 0.6197842156014791, |
| "eval_runtime": 7.275, |
| "eval_samples_per_second": 359.864, |
| "eval_steps_per_second": 5.636, |
| "step": 11264 |
| }, |
| { |
| "epoch": 5.688888888888889, |
| "grad_norm": 8.074504852294922, |
| "learning_rate": 0.0004099296854255696, |
| "loss": 0.6032764315605164, |
| "step": 11520 |
| }, |
| { |
| "epoch": 5.688888888888889, |
| "eval_bleu": 0.2563038856939558, |
| "eval_loss": 0.5922774854229718, |
| "eval_mse_loss": 0.5922774854229718, |
| "step": 11520 |
| }, |
| { |
| "epoch": 5.688888888888889, |
| "eval_bleu": 0.2563038856939558, |
| "eval_loss": 0.5922774854229718, |
| "eval_mse_loss": 0.5922774854229718, |
| "eval_runtime": 7.7562, |
| "eval_samples_per_second": 337.535, |
| "eval_steps_per_second": 5.286, |
| "step": 11520 |
| }, |
| { |
| "epoch": 5.815308641975308, |
| "grad_norm": 7.949519157409668, |
| "learning_rate": 0.00038997019501127406, |
| "loss": 0.5983560681343079, |
| "step": 11776 |
| }, |
| { |
| "epoch": 5.815308641975308, |
| "eval_bleu": 0.25476493381302434, |
| "eval_loss": 0.6024370585999838, |
| "eval_mse_loss": 0.6024370585999838, |
| "step": 11776 |
| }, |
| { |
| "epoch": 5.815308641975308, |
| "eval_bleu": 0.25476493381302434, |
| "eval_loss": 0.6024370585999838, |
| "eval_mse_loss": 0.6024370585999838, |
| "eval_runtime": 7.9866, |
| "eval_samples_per_second": 327.799, |
| "eval_steps_per_second": 5.134, |
| "step": 11776 |
| }, |
| { |
| "epoch": 5.941728395061729, |
| "grad_norm": 7.21051025390625, |
| "learning_rate": 0.00037019335626502263, |
| "loss": 0.5926205515861511, |
| "step": 12032 |
| }, |
| { |
| "epoch": 5.941728395061729, |
| "eval_bleu": 0.2579463063130564, |
| "eval_loss": 0.5958762241572868, |
| "eval_mse_loss": 0.5958762241572868, |
| "step": 12032 |
| }, |
| { |
| "epoch": 5.941728395061729, |
| "eval_bleu": 0.2579463063130564, |
| "eval_loss": 0.5958762241572868, |
| "eval_mse_loss": 0.5958762241572868, |
| "eval_runtime": 8.1465, |
| "eval_samples_per_second": 321.364, |
| "eval_steps_per_second": 5.033, |
| "step": 12032 |
| }, |
| { |
| "epoch": 6.068148148148148, |
| "grad_norm": 6.125677108764648, |
| "learning_rate": 0.0003506319991330833, |
| "loss": 0.5896713137626648, |
| "step": 12288 |
| }, |
| { |
| "epoch": 6.068148148148148, |
| "eval_bleu": 0.2633499094850468, |
| "eval_loss": 0.5901360758920995, |
| "eval_mse_loss": 0.5901360758920995, |
| "step": 12288 |
| }, |
| { |
| "epoch": 6.068148148148148, |
| "eval_bleu": 0.2633499094850468, |
| "eval_loss": 0.5901360758920995, |
| "eval_mse_loss": 0.5901360758920995, |
| "eval_runtime": 7.4076, |
| "eval_samples_per_second": 353.421, |
| "eval_steps_per_second": 5.535, |
| "step": 12288 |
| }, |
| { |
| "epoch": 6.194567901234568, |
| "grad_norm": 8.257464408874512, |
| "learning_rate": 0.00033131859585795183, |
| "loss": 0.5833750367164612, |
| "step": 12544 |
| }, |
| { |
| "epoch": 6.194567901234568, |
| "eval_bleu": 0.2632131293368136, |
| "eval_loss": 0.5901335754045626, |
| "eval_mse_loss": 0.5901335754045626, |
| "step": 12544 |
| }, |
| { |
| "epoch": 6.194567901234568, |
| "eval_bleu": 0.2632131293368136, |
| "eval_loss": 0.5901335754045626, |
| "eval_mse_loss": 0.5901335754045626, |
| "eval_runtime": 7.4243, |
| "eval_samples_per_second": 352.625, |
| "eval_steps_per_second": 5.522, |
| "step": 12544 |
| }, |
| { |
| "epoch": 6.320987654320987, |
| "grad_norm": 4.863134384155273, |
| "learning_rate": 0.0003122852070737825, |
| "loss": 0.581442654132843, |
| "step": 12800 |
| }, |
| { |
| "epoch": 6.320987654320987, |
| "eval_bleu": 0.26764936587915455, |
| "eval_loss": 0.5768222677998427, |
| "eval_mse_loss": 0.5768222677998427, |
| "step": 12800 |
| }, |
| { |
| "epoch": 6.320987654320987, |
| "eval_bleu": 0.26764936587915455, |
| "eval_loss": 0.5768222677998427, |
| "eval_mse_loss": 0.5768222677998427, |
| "eval_runtime": 7.9069, |
| "eval_samples_per_second": 331.105, |
| "eval_steps_per_second": 5.185, |
| "step": 12800 |
| }, |
| { |
| "epoch": 6.4474074074074075, |
| "grad_norm": 8.697264671325684, |
| "learning_rate": 0.00029356342858509677, |
| "loss": 0.5772220492362976, |
| "step": 13056 |
| }, |
| { |
| "epoch": 6.4474074074074075, |
| "eval_bleu": 0.27568777700132663, |
| "eval_loss": 0.584543146738192, |
| "eval_mse_loss": 0.584543146738192, |
| "step": 13056 |
| }, |
| { |
| "epoch": 6.4474074074074075, |
| "eval_bleu": 0.27568777700132663, |
| "eval_loss": 0.584543146738192, |
| "eval_mse_loss": 0.584543146738192, |
| "eval_runtime": 8.0062, |
| "eval_samples_per_second": 326.995, |
| "eval_steps_per_second": 5.121, |
| "step": 13056 |
| }, |
| { |
| "epoch": 6.573827160493828, |
| "grad_norm": 6.309488773345947, |
| "learning_rate": 0.0002751843389171185, |
| "loss": 0.5722501277923584, |
| "step": 13312 |
| }, |
| { |
| "epoch": 6.573827160493828, |
| "eval_bleu": 0.27306378384191476, |
| "eval_loss": 0.5755636982801484, |
| "eval_mse_loss": 0.5755636982801484, |
| "step": 13312 |
| }, |
| { |
| "epoch": 6.573827160493828, |
| "eval_bleu": 0.27306378384191476, |
| "eval_loss": 0.5755636982801484, |
| "eval_mse_loss": 0.5755636982801484, |
| "eval_runtime": 8.591, |
| "eval_samples_per_second": 304.738, |
| "eval_steps_per_second": 4.772, |
| "step": 13312 |
| }, |
| { |
| "epoch": 6.700246913580247, |
| "grad_norm": 5.390190601348877, |
| "learning_rate": 0.0002571784477248029, |
| "loss": 0.5714833736419678, |
| "step": 13568 |
| }, |
| { |
| "epoch": 6.700246913580247, |
| "eval_bleu": 0.2848300030499236, |
| "eval_loss": 0.570485861563101, |
| "eval_mse_loss": 0.570485861563101, |
| "step": 13568 |
| }, |
| { |
| "epoch": 6.700246913580247, |
| "eval_bleu": 0.2848300030499236, |
| "eval_loss": 0.570485861563101, |
| "eval_mse_loss": 0.570485861563101, |
| "eval_runtime": 7.2761, |
| "eval_samples_per_second": 359.808, |
| "eval_steps_per_second": 5.635, |
| "step": 13568 |
| }, |
| { |
| "epoch": 6.826666666666666, |
| "grad_norm": 7.215004920959473, |
| "learning_rate": 0.0002395756451462014, |
| "loss": 0.5674658417701721, |
| "step": 13824 |
| }, |
| { |
| "epoch": 6.826666666666666, |
| "eval_bleu": 0.2756130503152812, |
| "eval_loss": 0.5792907963438731, |
| "eval_mse_loss": 0.5792907963438731, |
| "step": 13824 |
| }, |
| { |
| "epoch": 6.826666666666666, |
| "eval_bleu": 0.2756130503152812, |
| "eval_loss": 0.5792907963438731, |
| "eval_mse_loss": 0.5792907963438731, |
| "eval_runtime": 7.3879, |
| "eval_samples_per_second": 354.362, |
| "eval_steps_per_second": 5.55, |
| "step": 13824 |
| }, |
| { |
| "epoch": 6.953086419753086, |
| "grad_norm": 6.6079816818237305, |
| "learning_rate": 0.00022240515218423758, |
| "loss": 0.5638896226882935, |
| "step": 14080 |
| }, |
| { |
| "epoch": 6.953086419753086, |
| "eval_bleu": 0.2958995495334816, |
| "eval_loss": 0.5594088427904176, |
| "eval_mse_loss": 0.5594088427904176, |
| "step": 14080 |
| }, |
| { |
| "epoch": 6.953086419753086, |
| "eval_bleu": 0.2958995495334816, |
| "eval_loss": 0.5594088427904176, |
| "eval_mse_loss": 0.5594088427904176, |
| "eval_runtime": 7.9094, |
| "eval_samples_per_second": 330.998, |
| "eval_steps_per_second": 5.184, |
| "step": 14080 |
| }, |
| { |
| "epoch": 7.079506172839507, |
| "grad_norm": 6.562555313110352, |
| "learning_rate": 0.00020569547219925934, |
| "loss": 0.5631716251373291, |
| "step": 14336 |
| }, |
| { |
| "epoch": 7.079506172839507, |
| "eval_bleu": 0.2905136606672008, |
| "eval_loss": 0.5631622737500726, |
| "eval_mse_loss": 0.5631622737500726, |
| "step": 14336 |
| }, |
| { |
| "epoch": 7.079506172839507, |
| "eval_bleu": 0.2905136606672008, |
| "eval_loss": 0.5631622737500726, |
| "eval_mse_loss": 0.5631622737500726, |
| "eval_runtime": 8.3002, |
| "eval_samples_per_second": 315.415, |
| "eval_steps_per_second": 4.94, |
| "step": 14336 |
| }, |
| { |
| "epoch": 7.205925925925926, |
| "grad_norm": 5.195821285247803, |
| "learning_rate": 0.00018947434359289434, |
| "loss": 0.5603777170181274, |
| "step": 14592 |
| }, |
| { |
| "epoch": 7.205925925925926, |
| "eval_bleu": 0.29437709495493225, |
| "eval_loss": 0.5465006559360318, |
| "eval_mse_loss": 0.5465006559360318, |
| "step": 14592 |
| }, |
| { |
| "epoch": 7.205925925925926, |
| "eval_bleu": 0.29437709495493225, |
| "eval_loss": 0.5465006559360318, |
| "eval_mse_loss": 0.5465006559360318, |
| "eval_runtime": 7.1426, |
| "eval_samples_per_second": 366.535, |
| "eval_steps_per_second": 5.74, |
| "step": 14592 |
| }, |
| { |
| "epoch": 7.332345679012346, |
| "grad_norm": 8.156927108764648, |
| "learning_rate": 0.0001737686937617491, |
| "loss": 0.5557982325553894, |
| "step": 14848 |
| }, |
| { |
| "epoch": 7.332345679012346, |
| "eval_bleu": 0.29449963629268144, |
| "eval_loss": 0.557792792959911, |
| "eval_mse_loss": 0.557792792959911, |
| "step": 14848 |
| }, |
| { |
| "epoch": 7.332345679012346, |
| "eval_bleu": 0.29449963629268144, |
| "eval_loss": 0.557792792959911, |
| "eval_mse_loss": 0.557792792959911, |
| "eval_runtime": 7.7275, |
| "eval_samples_per_second": 338.789, |
| "eval_steps_per_second": 5.306, |
| "step": 14848 |
| }, |
| { |
| "epoch": 7.458765432098765, |
| "grad_norm": 5.172349452972412, |
| "learning_rate": 0.00015860459439739582, |
| "loss": 0.5558417439460754, |
| "step": 15104 |
| }, |
| { |
| "epoch": 7.458765432098765, |
| "eval_bleu": 0.31102631980865114, |
| "eval_loss": 0.5469118814642836, |
| "eval_mse_loss": 0.5469118814642836, |
| "step": 15104 |
| }, |
| { |
| "epoch": 7.458765432098765, |
| "eval_bleu": 0.31102631980865114, |
| "eval_loss": 0.5469118814642836, |
| "eval_mse_loss": 0.5469118814642836, |
| "eval_runtime": 7.9676, |
| "eval_samples_per_second": 328.58, |
| "eval_steps_per_second": 5.146, |
| "step": 15104 |
| }, |
| { |
| "epoch": 7.5851851851851855, |
| "grad_norm": 5.335220813751221, |
| "learning_rate": 0.0001440072182068436, |
| "loss": 0.5527888536453247, |
| "step": 15360 |
| }, |
| { |
| "epoch": 7.5851851851851855, |
| "eval_bleu": 0.3040853321881768, |
| "eval_loss": 0.5505978399660529, |
| "eval_mse_loss": 0.5505978399660529, |
| "step": 15360 |
| }, |
| { |
| "epoch": 7.5851851851851855, |
| "eval_bleu": 0.3040853321881768, |
| "eval_loss": 0.5505978399660529, |
| "eval_mse_loss": 0.5505978399660529, |
| "eval_runtime": 8.0279, |
| "eval_samples_per_second": 326.113, |
| "eval_steps_per_second": 5.107, |
| "step": 15360 |
| }, |
| { |
| "epoch": 7.711604938271605, |
| "grad_norm": 3.120297908782959, |
| "learning_rate": 0.00013000079712534475, |
| "loss": 0.5498585104942322, |
| "step": 15616 |
| }, |
| { |
| "epoch": 7.711604938271605, |
| "eval_bleu": 0.30098409224195205, |
| "eval_loss": 0.54582195819878, |
| "eval_mse_loss": 0.54582195819878, |
| "step": 15616 |
| }, |
| { |
| "epoch": 7.711604938271605, |
| "eval_bleu": 0.30098409224195205, |
| "eval_loss": 0.54582195819878, |
| "eval_mse_loss": 0.54582195819878, |
| "eval_runtime": 8.0703, |
| "eval_samples_per_second": 324.401, |
| "eval_steps_per_second": 5.08, |
| "step": 15616 |
| }, |
| { |
| "epoch": 7.838024691358025, |
| "grad_norm": 4.802690029144287, |
| "learning_rate": 0.00011660858209089819, |
| "loss": 0.5511511564254761, |
| "step": 15872 |
| }, |
| { |
| "epoch": 7.838024691358025, |
| "eval_bleu": 0.3068134406523234, |
| "eval_loss": 0.5499871998298459, |
| "eval_mse_loss": 0.5499871998298459, |
| "step": 15872 |
| }, |
| { |
| "epoch": 7.838024691358025, |
| "eval_bleu": 0.3068134406523234, |
| "eval_loss": 0.5499871998298459, |
| "eval_mse_loss": 0.5499871998298459, |
| "eval_runtime": 7.2713, |
| "eval_samples_per_second": 360.046, |
| "eval_steps_per_second": 5.639, |
| "step": 15872 |
| }, |
| { |
| "epoch": 7.964444444444444, |
| "grad_norm": 3.471189260482788, |
| "learning_rate": 0.00010385280444723056, |
| "loss": 0.5520000457763672, |
| "step": 16128 |
| }, |
| { |
| "epoch": 7.964444444444444, |
| "eval_bleu": 0.3052295052303889, |
| "eval_loss": 0.5441080797009352, |
| "eval_mse_loss": 0.5441080797009352, |
| "step": 16128 |
| }, |
| { |
| "epoch": 7.964444444444444, |
| "eval_bleu": 0.3052295052303889, |
| "eval_loss": 0.5441080797009352, |
| "eval_mse_loss": 0.5441080797009352, |
| "eval_runtime": 7.7809, |
| "eval_samples_per_second": 336.463, |
| "eval_steps_per_second": 5.269, |
| "step": 16128 |
| }, |
| { |
| "epoch": 8.090864197530864, |
| "grad_norm": 5.1973466873168945, |
| "learning_rate": 9.175463903932168e-05, |
| "loss": 0.5478553771972656, |
| "step": 16384 |
| }, |
| { |
| "epoch": 8.090864197530864, |
| "eval_bleu": 0.2907079883758542, |
| "eval_loss": 0.5581492403658425, |
| "eval_mse_loss": 0.5581492403658425, |
| "step": 16384 |
| }, |
| { |
| "epoch": 8.090864197530864, |
| "eval_bleu": 0.2907079883758542, |
| "eval_loss": 0.5581492403658425, |
| "eval_mse_loss": 0.5581492403658425, |
| "eval_runtime": 8.0444, |
| "eval_samples_per_second": 325.444, |
| "eval_steps_per_second": 5.097, |
| "step": 16384 |
| }, |
| { |
| "epoch": 8.217283950617285, |
| "grad_norm": 3.1891634464263916, |
| "learning_rate": 8.033416906274093e-05, |
| "loss": 0.5465660095214844, |
| "step": 16640 |
| }, |
| { |
| "epoch": 8.217283950617285, |
| "eval_bleu": 0.3151093389207819, |
| "eval_loss": 0.5382588927338763, |
| "eval_mse_loss": 0.5382588927338763, |
| "step": 16640 |
| }, |
| { |
| "epoch": 8.217283950617285, |
| "eval_bleu": 0.3151093389207819, |
| "eval_loss": 0.5382588927338763, |
| "eval_mse_loss": 0.5382588927338763, |
| "eval_runtime": 8.1703, |
| "eval_samples_per_second": 320.43, |
| "eval_steps_per_second": 5.018, |
| "step": 16640 |
| }, |
| { |
| "epoch": 8.343703703703703, |
| "grad_norm": 3.5024101734161377, |
| "learning_rate": 6.961035272514177e-05, |
| "loss": 0.5455322265625, |
| "step": 16896 |
| }, |
| { |
| "epoch": 8.343703703703703, |
| "eval_bleu": 0.31440992086262937, |
| "eval_loss": 0.5380844590140552, |
| "eval_mse_loss": 0.5380844590140552, |
| "step": 16896 |
| }, |
| { |
| "epoch": 8.343703703703703, |
| "eval_bleu": 0.31440992086262937, |
| "eval_loss": 0.5380844590140552, |
| "eval_mse_loss": 0.5380844590140552, |
| "eval_runtime": 7.1273, |
| "eval_samples_per_second": 367.32, |
| "eval_steps_per_second": 5.753, |
| "step": 16896 |
| }, |
| { |
| "epoch": 8.470123456790123, |
| "grad_norm": 3.242410659790039, |
| "learning_rate": 5.960099177526024e-05, |
| "loss": 0.5413248538970947, |
| "step": 17152 |
| }, |
| { |
| "epoch": 8.470123456790123, |
| "eval_bleu": 0.3030533280572146, |
| "eval_loss": 0.5433335965726434, |
| "eval_mse_loss": 0.5433335965726434, |
| "step": 17152 |
| }, |
| { |
| "epoch": 8.470123456790123, |
| "eval_bleu": 0.3030533280572146, |
| "eval_loss": 0.5433335965726434, |
| "eval_mse_loss": 0.5433335965726434, |
| "eval_runtime": 8.049, |
| "eval_samples_per_second": 325.26, |
| "eval_steps_per_second": 5.094, |
| "step": 17152 |
| }, |
| { |
| "epoch": 8.596543209876543, |
| "grad_norm": 3.3194503784179688, |
| "learning_rate": 5.032270195165667e-05, |
| "loss": 0.5402602553367615, |
| "step": 17408 |
| }, |
| { |
| "epoch": 8.596543209876543, |
| "eval_bleu": 0.3284021854773732, |
| "eval_loss": 0.5290337989969951, |
| "eval_mse_loss": 0.5290337989969951, |
| "step": 17408 |
| }, |
| { |
| "epoch": 8.596543209876543, |
| "eval_bleu": 0.3284021854773732, |
| "eval_loss": 0.5290337989969951, |
| "eval_mse_loss": 0.5290337989969951, |
| "eval_runtime": 7.8967, |
| "eval_samples_per_second": 331.532, |
| "eval_steps_per_second": 5.192, |
| "step": 17408 |
| } |
| ], |
| "logging_steps": 256, |
| "max_steps": 20250, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 256, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|