| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.44131273299286744, |
| "eval_steps": 1024, |
| "global_step": 10240, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.011032818324821687, |
| "grad_norm": 0.20388168096542358, |
| "learning_rate": 0.000498046875, |
| "loss": 2.512310028076172, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.022065636649643373, |
| "grad_norm": 0.3460715115070343, |
| "learning_rate": 0.000998046875, |
| "loss": 2.018148422241211, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.03309845497446506, |
| "grad_norm": 0.5453425645828247, |
| "learning_rate": 0.000999688448778502, |
| "loss": 1.8114819526672363, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.04413127329928675, |
| "grad_norm": 0.9051710367202759, |
| "learning_rate": 0.0009987492950653055, |
| "loss": 1.75458824634552, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.04413127329928675, |
| "eval_bleu": 0.9399749239907662, |
| "eval_cos_loss": 0.47715059564566054, |
| "eval_dec_loss": 0.10647435713885055, |
| "eval_loss": 1.73655072051579, |
| "eval_mse2_loss": 0.1675516524389863, |
| "eval_mse_loss": 1.3505616757407117, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.522849405561683, |
| "flow/improvement_ratio": 0.8910323953323527, |
| "flow/mag_ratio_mean": 0.5448383128465112, |
| "flow/mag_ratio_std": 0.23550461588510826, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.04413127329928675, |
| "eval_bleu": 0.9399749239907662, |
| "eval_cos_loss": 0.47715059564566054, |
| "eval_dec_loss": 0.10647435713885055, |
| "eval_loss": 1.73655072051579, |
| "eval_mse2_loss": 0.1675516524389863, |
| "eval_mse_loss": 1.3505616757407117, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 152.3582, |
| "eval_samples_per_second": 196.904, |
| "eval_steps_per_second": 3.078, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.522849405561683, |
| "flow/improvement_ratio": 0.8910323953323527, |
| "flow/mag_ratio_mean": 0.5448383128465112, |
| "flow/mag_ratio_std": 0.23550461588510826, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.05516409162410843, |
| "grad_norm": 0.5948837399482727, |
| "learning_rate": 0.0009971837136430763, |
| "loss": 1.732498049736023, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.06619690994893011, |
| "grad_norm": 0.6182620525360107, |
| "learning_rate": 0.0009949936708776692, |
| "loss": 1.7030788660049438, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.07722972827375181, |
| "grad_norm": 1.142866611480713, |
| "learning_rate": 0.0009921819174566252, |
| "loss": 1.7001720666885376, |
| "step": 1792 |
| }, |
| { |
| "epoch": 0.0882625465985735, |
| "grad_norm": 0.8506317734718323, |
| "learning_rate": 0.000988751984934317, |
| "loss": 1.6855676174163818, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.0882625465985735, |
| "eval_bleu": 0.9434877818736953, |
| "eval_cos_loss": 0.46131489716613217, |
| "eval_dec_loss": 0.09124524126659388, |
| "eval_loss": 1.6677722422553023, |
| "eval_mse2_loss": 0.15732173794812992, |
| "eval_mse_loss": 1.3088257922800874, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5386851041683002, |
| "flow/improvement_ratio": 0.8871048878250855, |
| "flow/mag_ratio_mean": 0.5622577369848548, |
| "flow/mag_ratio_std": 0.24945266208033573, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.0882625465985735, |
| "eval_bleu": 0.9434877818736953, |
| "eval_cos_loss": 0.46131489716613217, |
| "eval_dec_loss": 0.09124524126659388, |
| "eval_loss": 1.6677722422553023, |
| "eval_mse2_loss": 0.15732173794812992, |
| "eval_mse_loss": 1.3088257922800874, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 150.2408, |
| "eval_samples_per_second": 199.68, |
| "eval_steps_per_second": 3.122, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5386851041683002, |
| "flow/improvement_ratio": 0.8871048878250855, |
| "flow/mag_ratio_mean": 0.5622577369848548, |
| "flow/mag_ratio_std": 0.24945266208033573, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.09929536492339518, |
| "grad_norm": 0.7440093159675598, |
| "learning_rate": 0.0009847081812963268, |
| "loss": 1.6802997589111328, |
| "step": 2304 |
| }, |
| { |
| "epoch": 0.11032818324821686, |
| "grad_norm": 0.9319222569465637, |
| "learning_rate": 0.0009800555855486275, |
| "loss": 1.6744197607040405, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.12136100157303854, |
| "grad_norm": 0.8629500865936279, |
| "learning_rate": 0.0009748000413383664, |
| "loss": 1.6740639209747314, |
| "step": 2816 |
| }, |
| { |
| "epoch": 0.13239381989786023, |
| "grad_norm": 0.9893732666969299, |
| "learning_rate": 0.0009689481496142604, |
| "loss": 1.664785623550415, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.13239381989786023, |
| "eval_bleu": 0.9404068449586629, |
| "eval_cos_loss": 0.4534218231243874, |
| "eval_dec_loss": 0.10443712005824614, |
| "eval_loss": 1.6572931651621738, |
| "eval_mse2_loss": 0.1550013455850229, |
| "eval_mse_loss": 1.288264540212749, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5465781726816824, |
| "flow/improvement_ratio": 0.8946911343125138, |
| "flow/mag_ratio_mean": 0.5628405101517878, |
| "flow/mag_ratio_std": 0.24253392000315285, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.13239381989786023, |
| "eval_bleu": 0.9404068449586629, |
| "eval_cos_loss": 0.4534218231243874, |
| "eval_dec_loss": 0.10443712005824614, |
| "eval_loss": 1.6572931651621738, |
| "eval_mse2_loss": 0.1550013455850229, |
| "eval_mse_loss": 1.288264540212749, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 153.5602, |
| "eval_samples_per_second": 195.363, |
| "eval_steps_per_second": 3.054, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5465781726816824, |
| "flow/improvement_ratio": 0.8946911343125138, |
| "flow/mag_ratio_mean": 0.5628405101517878, |
| "flow/mag_ratio_std": 0.24253392000315285, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.14342663822268192, |
| "grad_norm": 0.9933224320411682, |
| "learning_rate": 0.0009625072603358231, |
| "loss": 1.6605451107025146, |
| "step": 3328 |
| }, |
| { |
| "epoch": 0.15445945654750362, |
| "grad_norm": 1.221793532371521, |
| "learning_rate": 0.0009554854632418371, |
| "loss": 1.6490036249160767, |
| "step": 3584 |
| }, |
| { |
| "epoch": 0.1654922748723253, |
| "grad_norm": 0.8394345045089722, |
| "learning_rate": 0.000947891577689663, |
| "loss": 1.649448275566101, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.176525093197147, |
| "grad_norm": 1.245514154434204, |
| "learning_rate": 0.0009397351415781539, |
| "loss": 1.6489267349243164, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.176525093197147, |
| "eval_bleu": 0.942338221101898, |
| "eval_cos_loss": 0.450266804585833, |
| "eval_dec_loss": 0.1002394597608048, |
| "eval_loss": 1.6427197324187517, |
| "eval_mse2_loss": 0.15268841918025697, |
| "eval_mse_loss": 1.280517189741643, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5497331933172018, |
| "flow/improvement_ratio": 0.8926444684010325, |
| "flow/mag_ratio_mean": 0.5659083429175907, |
| "flow/mag_ratio_std": 0.24314757854318314, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.176525093197147, |
| "eval_bleu": 0.942338221101898, |
| "eval_cos_loss": 0.450266804585833, |
| "eval_dec_loss": 0.1002394597608048, |
| "eval_loss": 1.6427197324187517, |
| "eval_mse2_loss": 0.15268841918025697, |
| "eval_mse_loss": 1.280517189741643, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 152.0027, |
| "eval_samples_per_second": 197.365, |
| "eval_steps_per_second": 3.085, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5497331933172018, |
| "flow/improvement_ratio": 0.8926444684010325, |
| "flow/mag_ratio_mean": 0.5659083429175907, |
| "flow/mag_ratio_std": 0.24314757854318314, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.18755791152196866, |
| "grad_norm": 1.0416312217712402, |
| "learning_rate": 0.000931026399368079, |
| "loss": 1.6447768211364746, |
| "step": 4352 |
| }, |
| { |
| "epoch": 0.19859072984679035, |
| "grad_norm": 1.1173036098480225, |
| "learning_rate": 0.0009217762892151117, |
| "loss": 1.6489276885986328, |
| "step": 4608 |
| }, |
| { |
| "epoch": 0.20962354817161205, |
| "grad_norm": 0.930402934551239, |
| "learning_rate": 0.0009119964292315354, |
| "loss": 1.6420283317565918, |
| "step": 4864 |
| }, |
| { |
| "epoch": 0.22065636649643372, |
| "grad_norm": 0.9209682941436768, |
| "learning_rate": 0.0009016991028939279, |
| "loss": 1.6357425451278687, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.22065636649643372, |
| "eval_bleu": 0.9431814239032791, |
| "eval_cos_loss": 0.4463651056991203, |
| "eval_dec_loss": 0.09822412853889755, |
| "eval_loss": 1.628653102846288, |
| "eval_mse2_loss": 0.14985301353529826, |
| "eval_mse_loss": 1.2716914730539708, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5536348958259453, |
| "flow/improvement_ratio": 0.8939987117293547, |
| "flow/mag_ratio_mean": 0.576050937048662, |
| "flow/mag_ratio_std": 0.2491114061397276, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.22065636649643372, |
| "eval_bleu": 0.9431814239032791, |
| "eval_cos_loss": 0.4463651056991203, |
| "eval_dec_loss": 0.09822412853889755, |
| "eval_loss": 1.628653102846288, |
| "eval_mse2_loss": 0.14985301353529826, |
| "eval_mse_loss": 1.2716914730539708, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 150.4792, |
| "eval_samples_per_second": 199.363, |
| "eval_steps_per_second": 3.117, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5536348958259453, |
| "flow/improvement_ratio": 0.8939987117293547, |
| "flow/mag_ratio_mean": 0.576050937048662, |
| "flow/mag_ratio_std": 0.2491114061397276, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.23168918482125542, |
| "grad_norm": 0.6372450590133667, |
| "learning_rate": 0.0008908972436151494, |
| "loss": 1.6375595331192017, |
| "step": 5376 |
| }, |
| { |
| "epoch": 0.2427220031460771, |
| "grad_norm": 0.833997368812561, |
| "learning_rate": 0.0008796044185000127, |
| "loss": 1.6372840404510498, |
| "step": 5632 |
| }, |
| { |
| "epoch": 0.2537548214708988, |
| "grad_norm": 0.6318811178207397, |
| "learning_rate": 0.0008678348113050368, |
| "loss": 1.628332257270813, |
| "step": 5888 |
| }, |
| { |
| "epoch": 0.26478763979572045, |
| "grad_norm": 0.7464238405227661, |
| "learning_rate": 0.0008556032046236897, |
| "loss": 1.6342945098876953, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.26478763979572045, |
| "eval_bleu": 0.941904927802284, |
| "eval_cos_loss": 0.44474837520737637, |
| "eval_dec_loss": 0.102102437767504, |
| "eval_loss": 1.628202569764306, |
| "eval_mse2_loss": 0.15077992649411343, |
| "eval_mse_loss": 1.266597391954109, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5552516249197124, |
| "flow/improvement_ratio": 0.8912770662988935, |
| "flow/mag_ratio_mean": 0.5855364076364269, |
| "flow/mag_ratio_std": 0.25087345015011364, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.26478763979572045, |
| "eval_bleu": 0.941904927802284, |
| "eval_cos_loss": 0.44474837520737637, |
| "eval_dec_loss": 0.102102437767504, |
| "eval_loss": 1.628202569764306, |
| "eval_mse2_loss": 0.15077992649411343, |
| "eval_mse_loss": 1.266597391954109, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 151.0104, |
| "eval_samples_per_second": 198.662, |
| "eval_steps_per_second": 3.106, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5552516249197124, |
| "flow/improvement_ratio": 0.8912770662988935, |
| "flow/mag_ratio_mean": 0.5855364076364269, |
| "flow/mag_ratio_std": 0.25087345015011364, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.2758204581205422, |
| "grad_norm": 1.1199694871902466, |
| "learning_rate": 0.000842924961319492, |
| "loss": 1.6266489028930664, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.28685327644536385, |
| "grad_norm": 0.8668828010559082, |
| "learning_rate": 0.0008298160052303045, |
| "loss": 1.62454092502594, |
| "step": 6656 |
| }, |
| { |
| "epoch": 0.2978860947701855, |
| "grad_norm": 0.8108460307121277, |
| "learning_rate": 0.0008162928011680314, |
| "loss": 1.624453067779541, |
| "step": 6912 |
| }, |
| { |
| "epoch": 0.30891891309500724, |
| "grad_norm": 0.8465085625648499, |
| "learning_rate": 0.000802372334238864, |
| "loss": 1.6209194660186768, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.30891891309500724, |
| "eval_bleu": 0.942268661485615, |
| "eval_cos_loss": 0.4415781778186115, |
| "eval_dec_loss": 0.09969639404813872, |
| "eval_loss": 1.61412800298825, |
| "eval_mse2_loss": 0.14795255090699774, |
| "eval_mse_loss": 1.2580732640935415, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5584218213553114, |
| "flow/improvement_ratio": 0.8916484548338949, |
| "flow/mag_ratio_mean": 0.5767976182864419, |
| "flow/mag_ratio_std": 0.2534670445011623, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.30891891309500724, |
| "eval_bleu": 0.942268661485615, |
| "eval_cos_loss": 0.4415781778186115, |
| "eval_dec_loss": 0.09969639404813872, |
| "eval_loss": 1.61412800298825, |
| "eval_mse2_loss": 0.14795255090699774, |
| "eval_mse_loss": 1.2580732640935415, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 150.313, |
| "eval_samples_per_second": 199.584, |
| "eval_steps_per_second": 3.12, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5584218213553114, |
| "flow/improvement_ratio": 0.8916484548338949, |
| "flow/mag_ratio_mean": 0.5767976182864419, |
| "flow/mag_ratio_std": 0.2534670445011623, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.3199517314198289, |
| "grad_norm": 1.8415089845657349, |
| "learning_rate": 0.0007880720885100349, |
| "loss": 1.6192532777786255, |
| "step": 7424 |
| }, |
| { |
| "epoch": 0.3309845497446506, |
| "grad_norm": 0.7575666904449463, |
| "learning_rate": 0.0007734100250498788, |
| "loss": 1.6192028522491455, |
| "step": 7680 |
| }, |
| { |
| "epoch": 0.3420173680694723, |
| "grad_norm": 1.108810544013977, |
| "learning_rate": 0.000758404559368781, |
| "loss": 1.614426851272583, |
| "step": 7936 |
| }, |
| { |
| "epoch": 0.353050186394294, |
| "grad_norm": 1.224976897239685, |
| "learning_rate": 0.0007430745382893488, |
| "loss": 1.612691879272461, |
| "step": 8192 |
| }, |
| { |
| "epoch": 0.353050186394294, |
| "eval_bleu": 0.9417854724917855, |
| "eval_cos_loss": 0.4400326657905253, |
| "eval_dec_loss": 0.1005224303852743, |
| "eval_loss": 1.6120708153954446, |
| "eval_mse2_loss": 0.14864133708258429, |
| "eval_mse_loss": 1.254655804715431, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5599673331927643, |
| "flow/improvement_ratio": 0.8927549306771903, |
| "flow/mag_ratio_mean": 0.5825493686488951, |
| "flow/mag_ratio_std": 0.25123427366651196, |
| "step": 8192 |
| }, |
| { |
| "epoch": 0.353050186394294, |
| "eval_bleu": 0.9417854724917855, |
| "eval_cos_loss": 0.4400326657905253, |
| "eval_dec_loss": 0.1005224303852743, |
| "eval_loss": 1.6120708153954446, |
| "eval_mse2_loss": 0.14864133708258429, |
| "eval_mse_loss": 1.254655804715431, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 153.0586, |
| "eval_samples_per_second": 196.003, |
| "eval_steps_per_second": 3.064, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5599673331927643, |
| "flow/improvement_ratio": 0.8927549306771903, |
| "flow/mag_ratio_mean": 0.5825493686488951, |
| "flow/mag_ratio_std": 0.25123427366651196, |
| "step": 8192 |
| }, |
| { |
| "epoch": 0.36408300471911564, |
| "grad_norm": 1.2766138315200806, |
| "learning_rate": 0.0007274392162748551, |
| "loss": 1.6162679195404053, |
| "step": 8448 |
| }, |
| { |
| "epoch": 0.3751158230439373, |
| "grad_norm": 0.862872302532196, |
| "learning_rate": 0.000711518231245687, |
| "loss": 1.6088062524795532, |
| "step": 8704 |
| }, |
| { |
| "epoch": 0.38614864136875904, |
| "grad_norm": 0.7975575923919678, |
| "learning_rate": 0.0006953315799141723, |
| "loss": 1.6033779382705688, |
| "step": 8960 |
| }, |
| { |
| "epoch": 0.3971814596935807, |
| "grad_norm": 1.822509765625, |
| "learning_rate": 0.0006788995926687669, |
| "loss": 1.6062895059585571, |
| "step": 9216 |
| }, |
| { |
| "epoch": 0.3971814596935807, |
| "eval_bleu": 0.9410773493346376, |
| "eval_cos_loss": 0.4370425479498499, |
| "eval_dec_loss": 0.10427019556861188, |
| "eval_loss": 1.6075624590997757, |
| "eval_mse2_loss": 0.1481303899272927, |
| "eval_mse_loss": 1.247209641470838, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5629574498896406, |
| "flow/improvement_ratio": 0.8962287469459241, |
| "flow/mag_ratio_mean": 0.5783110863364327, |
| "flow/mag_ratio_std": 0.2480927841432059, |
| "step": 9216 |
| }, |
| { |
| "epoch": 0.3971814596935807, |
| "eval_bleu": 0.9410773493346376, |
| "eval_cos_loss": 0.4370425479498499, |
| "eval_dec_loss": 0.10427019556861188, |
| "eval_loss": 1.6075624590997757, |
| "eval_mse2_loss": 0.1481303899272927, |
| "eval_mse_loss": 1.247209641470838, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 159.7751, |
| "eval_samples_per_second": 187.764, |
| "eval_steps_per_second": 2.935, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5629574498896406, |
| "flow/improvement_ratio": 0.8962287469459241, |
| "flow/mag_ratio_mean": 0.5783110863364327, |
| "flow/mag_ratio_std": 0.2480927841432059, |
| "step": 9216 |
| }, |
| { |
| "epoch": 0.4082142780184024, |
| "grad_norm": 1.1795552968978882, |
| "learning_rate": 0.0006622429080391422, |
| "loss": 1.6098705530166626, |
| "step": 9472 |
| }, |
| { |
| "epoch": 0.4192470963432241, |
| "grad_norm": 0.8205899000167847, |
| "learning_rate": 0.0006453824467742515, |
| "loss": 1.6050623655319214, |
| "step": 9728 |
| }, |
| { |
| "epoch": 0.43027991466804577, |
| "grad_norm": 0.6470943093299866, |
| "learning_rate": 0.0006283393855659275, |
| "loss": 1.61065673828125, |
| "step": 9984 |
| }, |
| { |
| "epoch": 0.44131273299286744, |
| "grad_norm": 0.9093553423881531, |
| "learning_rate": 0.0006111351304510173, |
| "loss": 1.6007680892944336, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.44131273299286744, |
| "eval_bleu": 0.9417572640186486, |
| "eval_cos_loss": 0.4365569194242644, |
| "eval_dec_loss": 0.10090916226905927, |
| "eval_loss": 1.6032250524838088, |
| "eval_mse2_loss": 0.14803322787478027, |
| "eval_mse_loss": 1.2463789934288465, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5634430805757356, |
| "flow/improvement_ratio": 0.8962225704304954, |
| "flow/mag_ratio_mean": 0.5846807780042131, |
| "flow/mag_ratio_std": 0.25302369241267125, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.44131273299286744, |
| "eval_bleu": 0.9417572640186486, |
| "eval_cos_loss": 0.4365569194242644, |
| "eval_dec_loss": 0.10090916226905927, |
| "eval_loss": 1.6032250524838088, |
| "eval_mse2_loss": 0.14803322787478027, |
| "eval_mse_loss": 1.2463789934288465, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 152.6772, |
| "eval_samples_per_second": 196.493, |
| "eval_steps_per_second": 3.072, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5634430805757356, |
| "flow/improvement_ratio": 0.8962225704304954, |
| "flow/mag_ratio_mean": 0.5846807780042131, |
| "flow/mag_ratio_std": 0.25302369241267125, |
| "step": 10240 |
| } |
| ], |
| "logging_steps": 256, |
| "max_steps": 23204, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 1024, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|