| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 1024, |
| "global_step": 23204, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.011032818324821687, |
| "grad_norm": 0.3007344603538513, |
| "learning_rate": 0.000498046875, |
| "loss": 1.9607043266296387, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.022065636649643373, |
| "grad_norm": 0.38754719495773315, |
| "learning_rate": 0.000998046875, |
| "loss": 1.8510947227478027, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.03309845497446506, |
| "grad_norm": 0.41651925444602966, |
| "learning_rate": 0.000999688448778502, |
| "loss": 1.7883503437042236, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.04413127329928675, |
| "grad_norm": 0.4576423764228821, |
| "learning_rate": 0.0009987492950653055, |
| "loss": 1.7437095642089844, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.04413127329928675, |
| "eval_bleu": 0.7290582309773348, |
| "eval_cos_loss": 0.6748774711257105, |
| "eval_dec_loss": 0.0016125108924325007, |
| "eval_loss": 1.7118930867485909, |
| "eval_mse2_loss": 0.23725909220257294, |
| "eval_mse_loss": 1.7118930867485909, |
| "eval_rec_loss": 0.05790480172861296, |
| "eval_var_loss": 0.029386979561529435, |
| "flow/cos_sim": 0.32512253071707703, |
| "flow/improvement_ratio": 0.773648498536173, |
| "flow/mag_ratio_mean": 0.3315794987083752, |
| "flow/mag_ratio_std": 0.19750540018844198, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.04413127329928675, |
| "eval_bleu": 0.7290582309773348, |
| "eval_cos_loss": 0.6748774711257105, |
| "eval_dec_loss": 0.0016125108924325007, |
| "eval_loss": 1.7118930867485909, |
| "eval_mse2_loss": 0.23725909220257294, |
| "eval_mse_loss": 1.7118930867485909, |
| "eval_rec_loss": 0.05790480172861296, |
| "eval_runtime": 103.0234, |
| "eval_samples_per_second": 291.196, |
| "eval_steps_per_second": 4.552, |
| "eval_var_loss": 0.029386979561529435, |
| "flow/cos_sim": 0.32512253071707703, |
| "flow/improvement_ratio": 0.773648498536173, |
| "flow/mag_ratio_mean": 0.3315794987083752, |
| "flow/mag_ratio_std": 0.19750540018844198, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.05516409162410843, |
| "grad_norm": 0.46116578578948975, |
| "learning_rate": 0.0009971837136430763, |
| "loss": 1.6916401386260986, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.06619690994893011, |
| "grad_norm": 0.4674736559391022, |
| "learning_rate": 0.0009949936708776692, |
| "loss": 1.6614705324172974, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.07722972827375181, |
| "grad_norm": 0.4964284598827362, |
| "learning_rate": 0.0009921819174566252, |
| "loss": 1.6426620483398438, |
| "step": 1792 |
| }, |
| { |
| "epoch": 0.0882625465985735, |
| "grad_norm": 0.5635536313056946, |
| "learning_rate": 0.000988751984934317, |
| "loss": 1.6190364360809326, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.0882625465985735, |
| "eval_bleu": 0.7361472993029746, |
| "eval_cos_loss": 0.6193856069528217, |
| "eval_dec_loss": 0.0013400374704601193, |
| "eval_loss": 1.6149477191062878, |
| "eval_mse2_loss": 0.2126978265959571, |
| "eval_mse_loss": 1.6149477191062878, |
| "eval_rec_loss": 0.055882355892288085, |
| "eval_var_loss": 0.02890209875492526, |
| "flow/cos_sim": 0.3806143929836338, |
| "flow/improvement_ratio": 0.8031272210800318, |
| "flow/mag_ratio_mean": 0.38922329186630655, |
| "flow/mag_ratio_std": 0.2309291490168968, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.0882625465985735, |
| "eval_bleu": 0.7361472993029746, |
| "eval_cos_loss": 0.6193856069528217, |
| "eval_dec_loss": 0.0013400374704601193, |
| "eval_loss": 1.6149477191062878, |
| "eval_mse2_loss": 0.2126978265959571, |
| "eval_mse_loss": 1.6149477191062878, |
| "eval_rec_loss": 0.055882355892288085, |
| "eval_runtime": 102.671, |
| "eval_samples_per_second": 292.196, |
| "eval_steps_per_second": 4.568, |
| "eval_var_loss": 0.02890209875492526, |
| "flow/cos_sim": 0.3806143929836338, |
| "flow/improvement_ratio": 0.8031272210800318, |
| "flow/mag_ratio_mean": 0.38922329186630655, |
| "flow/mag_ratio_std": 0.2309291490168968, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.09929536492339518, |
| "grad_norm": 0.5123931169509888, |
| "learning_rate": 0.0009847081812963268, |
| "loss": 1.6022895574569702, |
| "step": 2304 |
| }, |
| { |
| "epoch": 0.11032818324821686, |
| "grad_norm": 0.5382006764411926, |
| "learning_rate": 0.0009800555855486275, |
| "loss": 1.5898725986480713, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.12136100157303854, |
| "grad_norm": 0.5514854192733765, |
| "learning_rate": 0.0009748000413383664, |
| "loss": 1.5826457738876343, |
| "step": 2816 |
| }, |
| { |
| "epoch": 0.13239381989786023, |
| "grad_norm": 0.5678655505180359, |
| "learning_rate": 0.0009689481496142604, |
| "loss": 1.5685003995895386, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.13239381989786023, |
| "eval_bleu": 0.7259279887313845, |
| "eval_cos_loss": 0.5904010080579501, |
| "eval_dec_loss": 0.001531593777309569, |
| "eval_loss": 1.5695596038659752, |
| "eval_mse2_loss": 0.20936787379448857, |
| "eval_mse_loss": 1.5695596038659752, |
| "eval_rec_loss": 0.060287337766082555, |
| "eval_var_loss": 0.02963222060868862, |
| "flow/cos_sim": 0.409598992768127, |
| "flow/improvement_ratio": 0.8170475746268657, |
| "flow/mag_ratio_mean": 0.4326363442294887, |
| "flow/mag_ratio_std": 0.22964929263474845, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.13239381989786023, |
| "eval_bleu": 0.7259279887313845, |
| "eval_cos_loss": 0.5904010080579501, |
| "eval_dec_loss": 0.001531593777309569, |
| "eval_loss": 1.5695596038659752, |
| "eval_mse2_loss": 0.20936787379448857, |
| "eval_mse_loss": 1.5695596038659752, |
| "eval_rec_loss": 0.060287337766082555, |
| "eval_runtime": 103.2177, |
| "eval_samples_per_second": 290.648, |
| "eval_steps_per_second": 4.544, |
| "eval_var_loss": 0.02963222060868862, |
| "flow/cos_sim": 0.409598992768127, |
| "flow/improvement_ratio": 0.8170475746268657, |
| "flow/mag_ratio_mean": 0.4326363442294887, |
| "flow/mag_ratio_std": 0.22964929263474845, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.14342663822268192, |
| "grad_norm": 0.5561569333076477, |
| "learning_rate": 0.0009625072603358231, |
| "loss": 1.5565699338912964, |
| "step": 3328 |
| }, |
| { |
| "epoch": 0.15445945654750362, |
| "grad_norm": 0.5337810516357422, |
| "learning_rate": 0.0009554854632418371, |
| "loss": 1.556112289428711, |
| "step": 3584 |
| }, |
| { |
| "epoch": 0.1654922748723253, |
| "grad_norm": 0.6065189242362976, |
| "learning_rate": 0.000947891577689663, |
| "loss": 1.5425442457199097, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.176525093197147, |
| "grad_norm": 0.6436013579368591, |
| "learning_rate": 0.0009397351415781539, |
| "loss": 1.5381078720092773, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.176525093197147, |
| "eval_bleu": 0.7609616675139879, |
| "eval_cos_loss": 0.5725635654255271, |
| "eval_dec_loss": 0.0013841146930163827, |
| "eval_loss": 1.5326353372541317, |
| "eval_mse2_loss": 0.20037362373459822, |
| "eval_mse_loss": 1.5326353372541317, |
| "eval_rec_loss": 0.05205997703934529, |
| "eval_var_loss": 0.029761007865831288, |
| "flow/cos_sim": 0.42743643495573924, |
| "flow/improvement_ratio": 0.8228500355789656, |
| "flow/mag_ratio_mean": 0.44373360606653095, |
| "flow/mag_ratio_std": 0.2451275099060937, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.176525093197147, |
| "eval_bleu": 0.7609616675139879, |
| "eval_cos_loss": 0.5725635654255271, |
| "eval_dec_loss": 0.0013841146930163827, |
| "eval_loss": 1.5326353372541317, |
| "eval_mse2_loss": 0.20037362373459822, |
| "eval_mse_loss": 1.5326353372541317, |
| "eval_rec_loss": 0.05205997703934529, |
| "eval_runtime": 103.0729, |
| "eval_samples_per_second": 291.056, |
| "eval_steps_per_second": 4.55, |
| "eval_var_loss": 0.029761007865831288, |
| "flow/cos_sim": 0.42743643495573924, |
| "flow/improvement_ratio": 0.8228500355789656, |
| "flow/mag_ratio_mean": 0.44373360606653095, |
| "flow/mag_ratio_std": 0.2451275099060937, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.18755791152196866, |
| "grad_norm": 0.5533596873283386, |
| "learning_rate": 0.000931026399368079, |
| "loss": 1.5355464220046997, |
| "step": 4352 |
| }, |
| { |
| "epoch": 0.19859072984679035, |
| "grad_norm": 0.6129039525985718, |
| "learning_rate": 0.0009217762892151117, |
| "loss": 1.526825189590454, |
| "step": 4608 |
| }, |
| { |
| "epoch": 0.20962354817161205, |
| "grad_norm": 0.6128653287887573, |
| "learning_rate": 0.0009119964292315354, |
| "loss": 1.5186046361923218, |
| "step": 4864 |
| }, |
| { |
| "epoch": 0.22065636649643372, |
| "grad_norm": 0.6013854146003723, |
| "learning_rate": 0.0009016991028939279, |
| "loss": 1.5184156894683838, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.22065636649643372, |
| "eval_bleu": 0.7313483153096204, |
| "eval_cos_loss": 0.565770648880554, |
| "eval_dec_loss": 0.0013971831941510986, |
| "eval_loss": 1.5208095035064957, |
| "eval_mse2_loss": 0.19698964767872906, |
| "eval_mse_loss": 1.5208095035064957, |
| "eval_rec_loss": 0.058415787606271724, |
| "eval_var_loss": 0.029480641187508223, |
| "flow/cos_sim": 0.4342293481328594, |
| "flow/improvement_ratio": 0.8310012437387316, |
| "flow/mag_ratio_mean": 0.4485036200170578, |
| "flow/mag_ratio_std": 0.24114183547781476, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.22065636649643372, |
| "eval_bleu": 0.7313483153096204, |
| "eval_cos_loss": 0.565770648880554, |
| "eval_dec_loss": 0.0013971831941510986, |
| "eval_loss": 1.5208095035064957, |
| "eval_mse2_loss": 0.19698964767872906, |
| "eval_mse_loss": 1.5208095035064957, |
| "eval_rec_loss": 0.058415787606271724, |
| "eval_runtime": 104.1181, |
| "eval_samples_per_second": 288.134, |
| "eval_steps_per_second": 4.505, |
| "eval_var_loss": 0.029480641187508223, |
| "flow/cos_sim": 0.4342293481328594, |
| "flow/improvement_ratio": 0.8310012437387316, |
| "flow/mag_ratio_mean": 0.4485036200170578, |
| "flow/mag_ratio_std": 0.24114183547781476, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.23168918482125542, |
| "grad_norm": 0.5818307995796204, |
| "learning_rate": 0.0008908972436151494, |
| "loss": 1.5146307945251465, |
| "step": 5376 |
| }, |
| { |
| "epoch": 0.2427220031460771, |
| "grad_norm": 0.5968588590621948, |
| "learning_rate": 0.0008796044185000127, |
| "loss": 1.5090495347976685, |
| "step": 5632 |
| }, |
| { |
| "epoch": 0.2537548214708988, |
| "grad_norm": 0.645140528678894, |
| "learning_rate": 0.0008678348113050368, |
| "loss": 1.5024750232696533, |
| "step": 5888 |
| }, |
| { |
| "epoch": 0.26478763979572045, |
| "grad_norm": 0.6232675313949585, |
| "learning_rate": 0.0008556032046236897, |
| "loss": 1.499906301498413, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.26478763979572045, |
| "eval_bleu": 0.7912708006589123, |
| "eval_cos_loss": 0.5567008724598996, |
| "eval_dec_loss": 0.0014503563844457592, |
| "eval_loss": 1.5032868258226146, |
| "eval_mse2_loss": 0.19748503063469808, |
| "eval_mse_loss": 1.5032868258226146, |
| "eval_rec_loss": 0.05662109937145512, |
| "eval_var_loss": 0.029432428198487265, |
| "flow/cos_sim": 0.44329912652339, |
| "flow/improvement_ratio": 0.8293576759061834, |
| "flow/mag_ratio_mean": 0.4766448940803756, |
| "flow/mag_ratio_std": 0.2504093461771255, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.26478763979572045, |
| "eval_bleu": 0.7912708006589123, |
| "eval_cos_loss": 0.5567008724598996, |
| "eval_dec_loss": 0.0014503563844457592, |
| "eval_loss": 1.5032868258226146, |
| "eval_mse2_loss": 0.19748503063469808, |
| "eval_mse_loss": 1.5032868258226146, |
| "eval_rec_loss": 0.05662109937145512, |
| "eval_runtime": 102.8737, |
| "eval_samples_per_second": 291.62, |
| "eval_steps_per_second": 4.559, |
| "eval_var_loss": 0.029432428198487265, |
| "flow/cos_sim": 0.44329912652339, |
| "flow/improvement_ratio": 0.8293576759061834, |
| "flow/mag_ratio_mean": 0.4766448940803756, |
| "flow/mag_ratio_std": 0.2504093461771255, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.2758204581205422, |
| "grad_norm": 0.6002918481826782, |
| "learning_rate": 0.000842924961319492, |
| "loss": 1.5013189315795898, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.28685327644536385, |
| "grad_norm": 0.6131093502044678, |
| "learning_rate": 0.0008298160052303045, |
| "loss": 1.491563320159912, |
| "step": 6656 |
| }, |
| { |
| "epoch": 0.2978860947701855, |
| "grad_norm": 0.6153339743614197, |
| "learning_rate": 0.0008162928011680314, |
| "loss": 1.4890822172164917, |
| "step": 6912 |
| }, |
| { |
| "epoch": 0.30891891309500724, |
| "grad_norm": 0.5415698885917664, |
| "learning_rate": 0.000802372334238864, |
| "loss": 1.4869613647460938, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.30891891309500724, |
| "eval_bleu": 0.6991471025766374, |
| "eval_cos_loss": 0.5479137339571646, |
| "eval_dec_loss": 0.0014181479605397324, |
| "eval_loss": 1.4831991663365476, |
| "eval_mse2_loss": 0.19134751513505033, |
| "eval_mse_loss": 1.4831991663365476, |
| "eval_rec_loss": 0.059703294130197086, |
| "eval_var_loss": 0.029254676190330023, |
| "flow/cos_sim": 0.45208626534384705, |
| "flow/improvement_ratio": 0.8373922797154262, |
| "flow/mag_ratio_mean": 0.46522473710686413, |
| "flow/mag_ratio_std": 0.24233753331053232, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.30891891309500724, |
| "eval_bleu": 0.6991471025766374, |
| "eval_cos_loss": 0.5479137339571646, |
| "eval_dec_loss": 0.0014181479605397324, |
| "eval_loss": 1.4831991663365476, |
| "eval_mse2_loss": 0.19134751513505033, |
| "eval_mse_loss": 1.4831991663365476, |
| "eval_rec_loss": 0.059703294130197086, |
| "eval_runtime": 102.4636, |
| "eval_samples_per_second": 292.787, |
| "eval_steps_per_second": 4.577, |
| "eval_var_loss": 0.029254676190330023, |
| "flow/cos_sim": 0.45208626534384705, |
| "flow/improvement_ratio": 0.8373922797154262, |
| "flow/mag_ratio_mean": 0.46522473710686413, |
| "flow/mag_ratio_std": 0.24233753331053232, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.3199517314198289, |
| "grad_norm": 0.6683939695358276, |
| "learning_rate": 0.0007880720885100349, |
| "loss": 1.476445198059082, |
| "step": 7424 |
| }, |
| { |
| "epoch": 0.3309845497446506, |
| "grad_norm": 0.5962712168693542, |
| "learning_rate": 0.0007734100250498788, |
| "loss": 1.4769901037216187, |
| "step": 7680 |
| }, |
| { |
| "epoch": 0.3420173680694723, |
| "grad_norm": 0.5617682933807373, |
| "learning_rate": 0.000758404559368781, |
| "loss": 1.4828119277954102, |
| "step": 7936 |
| }, |
| { |
| "epoch": 0.353050186394294, |
| "grad_norm": 0.7243582606315613, |
| "learning_rate": 0.0007430745382893488, |
| "loss": 1.4768471717834473, |
| "step": 8192 |
| }, |
| { |
| "epoch": 0.353050186394294, |
| "eval_bleu": 0.8304965060986523, |
| "eval_cos_loss": 0.540344935744556, |
| "eval_dec_loss": 0.001444703027248449, |
| "eval_loss": 1.4680257085035604, |
| "eval_mse2_loss": 0.1896642409979916, |
| "eval_mse_loss": 1.4680257085035604, |
| "eval_rec_loss": 0.056618061303885886, |
| "eval_var_loss": 0.02941279357144319, |
| "flow/cos_sim": 0.45965506501797676, |
| "flow/improvement_ratio": 0.8406627575980067, |
| "flow/mag_ratio_mean": 0.478700284002178, |
| "flow/mag_ratio_std": 0.25183968741629426, |
| "step": 8192 |
| }, |
| { |
| "epoch": 0.353050186394294, |
| "eval_bleu": 0.8304965060986523, |
| "eval_cos_loss": 0.540344935744556, |
| "eval_dec_loss": 0.001444703027248449, |
| "eval_loss": 1.4680257085035604, |
| "eval_mse2_loss": 0.1896642409979916, |
| "eval_mse_loss": 1.4680257085035604, |
| "eval_rec_loss": 0.056618061303885886, |
| "eval_runtime": 102.5949, |
| "eval_samples_per_second": 292.412, |
| "eval_steps_per_second": 4.571, |
| "eval_var_loss": 0.02941279357144319, |
| "flow/cos_sim": 0.45965506501797676, |
| "flow/improvement_ratio": 0.8406627575980067, |
| "flow/mag_ratio_mean": 0.478700284002178, |
| "flow/mag_ratio_std": 0.25183968741629426, |
| "step": 8192 |
| }, |
| { |
| "epoch": 0.36408300471911564, |
| "grad_norm": 0.6018216013908386, |
| "learning_rate": 0.0007274392162748551, |
| "loss": 1.4694677591323853, |
| "step": 8448 |
| }, |
| { |
| "epoch": 0.3751158230439373, |
| "grad_norm": 0.5599421262741089, |
| "learning_rate": 0.000711518231245687, |
| "loss": 1.4721711874008179, |
| "step": 8704 |
| }, |
| { |
| "epoch": 0.38614864136875904, |
| "grad_norm": 0.6241788864135742, |
| "learning_rate": 0.0006953315799141723, |
| "loss": 1.459176778793335, |
| "step": 8960 |
| }, |
| { |
| "epoch": 0.3971814596935807, |
| "grad_norm": 0.6998386383056641, |
| "learning_rate": 0.0006788995926687669, |
| "loss": 1.4632288217544556, |
| "step": 9216 |
| }, |
| { |
| "epoch": 0.3971814596935807, |
| "eval_bleu": 0.7369729060948697, |
| "eval_cos_loss": 0.5354188728942546, |
| "eval_dec_loss": 0.0013972995771112035, |
| "eval_loss": 1.456240051336634, |
| "eval_mse2_loss": 0.18758021689045912, |
| "eval_mse_loss": 1.456240051336634, |
| "eval_rec_loss": 0.05933690067730161, |
| "eval_var_loss": 0.029272472025203045, |
| "flow/cos_sim": 0.4645811278047338, |
| "flow/improvement_ratio": 0.841912091286706, |
| "flow/mag_ratio_mean": 0.4744996659791292, |
| "flow/mag_ratio_std": 0.25510632248321324, |
| "step": 9216 |
| }, |
| { |
| "epoch": 0.3971814596935807, |
| "eval_bleu": 0.7369729060948697, |
| "eval_cos_loss": 0.5354188728942546, |
| "eval_dec_loss": 0.0013972995771112035, |
| "eval_loss": 1.456240051336634, |
| "eval_mse2_loss": 0.18758021689045912, |
| "eval_mse_loss": 1.456240051336634, |
| "eval_rec_loss": 0.05933690067730161, |
| "eval_runtime": 102.7869, |
| "eval_samples_per_second": 291.866, |
| "eval_steps_per_second": 4.563, |
| "eval_var_loss": 0.029272472025203045, |
| "flow/cos_sim": 0.4645811278047338, |
| "flow/improvement_ratio": 0.841912091286706, |
| "flow/mag_ratio_mean": 0.4744996659791292, |
| "flow/mag_ratio_std": 0.25510632248321324, |
| "step": 9216 |
| }, |
| { |
| "epoch": 0.4082142780184024, |
| "grad_norm": 0.5962811708450317, |
| "learning_rate": 0.0006622429080391422, |
| "loss": 1.4640510082244873, |
| "step": 9472 |
| }, |
| { |
| "epoch": 0.4192470963432241, |
| "grad_norm": 0.588157594203949, |
| "learning_rate": 0.0006453824467742515, |
| "loss": 1.4573228359222412, |
| "step": 9728 |
| }, |
| { |
| "epoch": 0.43027991466804577, |
| "grad_norm": 0.5932533740997314, |
| "learning_rate": 0.0006283393855659275, |
| "loss": 1.45904541015625, |
| "step": 9984 |
| }, |
| { |
| "epoch": 0.44131273299286744, |
| "grad_norm": 0.6125295162200928, |
| "learning_rate": 0.0006111351304510173, |
| "loss": 1.455463171005249, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.44131273299286744, |
| "eval_bleu": 0.782062866367082, |
| "eval_cos_loss": 0.5326331170128861, |
| "eval_dec_loss": 0.0014520329028074289, |
| "eval_loss": 1.453022389777942, |
| "eval_mse2_loss": 0.18840382176675777, |
| "eval_mse_loss": 1.453022389777942, |
| "eval_rec_loss": 0.05694365586195864, |
| "eval_var_loss": 0.030047652452612227, |
| "flow/cos_sim": 0.4673668822881255, |
| "flow/improvement_ratio": 0.8451325959488273, |
| "flow/mag_ratio_mean": 0.4745017219581075, |
| "flow/mag_ratio_std": 0.2538460113092272, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.44131273299286744, |
| "eval_bleu": 0.782062866367082, |
| "eval_cos_loss": 0.5326331170128861, |
| "eval_dec_loss": 0.0014520329028074289, |
| "eval_loss": 1.453022389777942, |
| "eval_mse2_loss": 0.18840382176675777, |
| "eval_mse_loss": 1.453022389777942, |
| "eval_rec_loss": 0.05694365586195864, |
| "eval_runtime": 102.6396, |
| "eval_samples_per_second": 292.285, |
| "eval_steps_per_second": 4.569, |
| "eval_var_loss": 0.030047652452612227, |
| "flow/cos_sim": 0.4673668822881255, |
| "flow/improvement_ratio": 0.8451325959488273, |
| "flow/mag_ratio_mean": 0.4745017219581075, |
| "flow/mag_ratio_std": 0.2538460113092272, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.45234555131768917, |
| "grad_norm": 0.6636393666267395, |
| "learning_rate": 0.0005937912899254605, |
| "loss": 1.449182152748108, |
| "step": 10496 |
| }, |
| { |
| "epoch": 0.46337836964251083, |
| "grad_norm": 0.5821182727813721, |
| "learning_rate": 0.0005763296478040787, |
| "loss": 1.4548357725143433, |
| "step": 10752 |
| }, |
| { |
| "epoch": 0.4744111879673325, |
| "grad_norm": 0.6481524109840393, |
| "learning_rate": 0.0005587721358601663, |
| "loss": 1.4508562088012695, |
| "step": 11008 |
| }, |
| { |
| "epoch": 0.4854440062921542, |
| "grad_norm": 0.653151273727417, |
| "learning_rate": 0.0005411408062792448, |
| "loss": 1.4442917108535767, |
| "step": 11264 |
| }, |
| { |
| "epoch": 0.4854440062921542, |
| "eval_bleu": 0.7210051310934674, |
| "eval_cos_loss": 0.5317811018495417, |
| "eval_dec_loss": 0.001381080663882877, |
| "eval_loss": 1.4509358108679116, |
| "eval_mse2_loss": 0.18439998461811274, |
| "eval_mse_loss": 1.4509358108679116, |
| "eval_rec_loss": 0.06008440565301983, |
| "eval_var_loss": 0.02928201055952481, |
| "flow/cos_sim": 0.46821889872235783, |
| "flow/improvement_ratio": 0.8448605187920365, |
| "flow/mag_ratio_mean": 0.4716693379604486, |
| "flow/mag_ratio_std": 0.25824843223161026, |
| "step": 11264 |
| }, |
| { |
| "epoch": 0.4854440062921542, |
| "eval_bleu": 0.7210051310934674, |
| "eval_cos_loss": 0.5317811018495417, |
| "eval_dec_loss": 0.001381080663882877, |
| "eval_loss": 1.4509358108679116, |
| "eval_mse2_loss": 0.18439998461811274, |
| "eval_mse_loss": 1.4509358108679116, |
| "eval_rec_loss": 0.06008440565301983, |
| "eval_runtime": 102.2168, |
| "eval_samples_per_second": 293.494, |
| "eval_steps_per_second": 4.588, |
| "eval_var_loss": 0.02928201055952481, |
| "flow/cos_sim": 0.46821889872235783, |
| "flow/improvement_ratio": 0.8448605187920365, |
| "flow/mag_ratio_mean": 0.4716693379604486, |
| "flow/mag_ratio_std": 0.25824843223161026, |
| "step": 11264 |
| }, |
| { |
| "epoch": 0.4964768246169759, |
| "grad_norm": 0.6343415379524231, |
| "learning_rate": 0.0005234578039615789, |
| "loss": 1.439915418624878, |
| "step": 11520 |
| }, |
| { |
| "epoch": 0.5075096429417976, |
| "grad_norm": 0.7004493474960327, |
| "learning_rate": 0.0005057453387082458, |
| "loss": 1.4451959133148193, |
| "step": 11776 |
| }, |
| { |
| "epoch": 0.5185424612666193, |
| "grad_norm": 0.7312789559364319, |
| "learning_rate": 0.0004880256573256866, |
| "loss": 1.4458304643630981, |
| "step": 12032 |
| }, |
| { |
| "epoch": 0.5295752795914409, |
| "grad_norm": 0.6173807382583618, |
| "learning_rate": 0.0004703210156837805, |
| "loss": 1.4372222423553467, |
| "step": 12288 |
| }, |
| { |
| "epoch": 0.5295752795914409, |
| "eval_bleu": 0.7895861883551821, |
| "eval_cos_loss": 0.5291873634751163, |
| "eval_dec_loss": 0.0014323489154225587, |
| "eval_loss": 1.4454485322875001, |
| "eval_mse2_loss": 0.18589616244408622, |
| "eval_mse_loss": 1.4454485322875001, |
| "eval_rec_loss": 0.05557121256036736, |
| "eval_var_loss": 0.02942733407052341, |
| "flow/cos_sim": 0.4708126370967832, |
| "flow/improvement_ratio": 0.8446106520542966, |
| "flow/mag_ratio_mean": 0.4888702236385996, |
| "flow/mag_ratio_std": 0.2535232830403456, |
| "step": 12288 |
| }, |
| { |
| "epoch": 0.5295752795914409, |
| "eval_bleu": 0.7895861883551821, |
| "eval_cos_loss": 0.5291873634751163, |
| "eval_dec_loss": 0.0014323489154225587, |
| "eval_loss": 1.4454485322875001, |
| "eval_mse2_loss": 0.18589616244408622, |
| "eval_mse_loss": 1.4454485322875001, |
| "eval_rec_loss": 0.05557121256036736, |
| "eval_runtime": 103.43, |
| "eval_samples_per_second": 290.051, |
| "eval_steps_per_second": 4.534, |
| "eval_var_loss": 0.02942733407052341, |
| "flow/cos_sim": 0.4708126370967832, |
| "flow/improvement_ratio": 0.8446106520542966, |
| "flow/mag_ratio_mean": 0.4888702236385996, |
| "flow/mag_ratio_std": 0.2535232830403456, |
| "step": 12288 |
| }, |
| { |
| "epoch": 0.5406080979162626, |
| "grad_norm": 0.6357247233390808, |
| "learning_rate": 0.0004526536507625343, |
| "loss": 1.4381682872772217, |
| "step": 12544 |
| }, |
| { |
| "epoch": 0.5516409162410844, |
| "grad_norm": 0.6554076671600342, |
| "learning_rate": 0.00043504575272249973, |
| "loss": 1.433600664138794, |
| "step": 12800 |
| }, |
| { |
| "epoch": 0.562673734565906, |
| "grad_norm": 0.6298866271972656, |
| "learning_rate": 0.0004175194370339921, |
| "loss": 1.4380649328231812, |
| "step": 13056 |
| }, |
| { |
| "epoch": 0.5737065528907277, |
| "grad_norm": 0.6736286282539368, |
| "learning_rate": 0.0004000967167001243, |
| "loss": 1.4344258308410645, |
| "step": 13312 |
| }, |
| { |
| "epoch": 0.5737065528907277, |
| "eval_bleu": 0.754198615923969, |
| "eval_cos_loss": 0.5237328007912585, |
| "eval_dec_loss": 0.0013661543356946239, |
| "eval_loss": 1.4330016496593256, |
| "eval_mse2_loss": 0.18149238913806517, |
| "eval_mse_loss": 1.4330016496593256, |
| "eval_rec_loss": 0.05589268211104564, |
| "eval_var_loss": 0.029215975571225194, |
| "flow/cos_sim": 0.47626719946291907, |
| "flow/improvement_ratio": 0.8467817164179104, |
| "flow/mag_ratio_mean": 0.48472079412261054, |
| "flow/mag_ratio_std": 0.25520913404569445, |
| "step": 13312 |
| }, |
| { |
| "epoch": 0.5737065528907277, |
| "eval_bleu": 0.754198615923969, |
| "eval_cos_loss": 0.5237328007912585, |
| "eval_dec_loss": 0.0013661543356946239, |
| "eval_loss": 1.4330016496593256, |
| "eval_mse2_loss": 0.18149238913806517, |
| "eval_mse_loss": 1.4330016496593256, |
| "eval_rec_loss": 0.05589268211104564, |
| "eval_runtime": 104.5844, |
| "eval_samples_per_second": 286.85, |
| "eval_steps_per_second": 4.484, |
| "eval_var_loss": 0.029215975571225194, |
| "flow/cos_sim": 0.47626719946291907, |
| "flow/improvement_ratio": 0.8467817164179104, |
| "flow/mag_ratio_mean": 0.48472079412261054, |
| "flow/mag_ratio_std": 0.25520913404569445, |
| "step": 13312 |
| }, |
| { |
| "epoch": 0.5847393712155494, |
| "grad_norm": 0.6239475011825562, |
| "learning_rate": 0.00038279947460853446, |
| "loss": 1.4331660270690918, |
| "step": 13568 |
| }, |
| { |
| "epoch": 0.595772189540371, |
| "grad_norm": 0.6627410054206848, |
| "learning_rate": 0.00036564943604654345, |
| "loss": 1.4354665279388428, |
| "step": 13824 |
| }, |
| { |
| "epoch": 0.6068050078651928, |
| "grad_norm": 0.6042789816856384, |
| "learning_rate": 0.00034866814141425254, |
| "loss": 1.4358711242675781, |
| "step": 14080 |
| }, |
| { |
| "epoch": 0.6178378261900145, |
| "grad_norm": 0.6111028790473938, |
| "learning_rate": 0.0003318769191698637, |
| "loss": 1.4299204349517822, |
| "step": 14336 |
| }, |
| { |
| "epoch": 0.6178378261900145, |
| "eval_bleu": 0.7007889817819709, |
| "eval_cos_loss": 0.5228769168543663, |
| "eval_dec_loss": 0.001358627397164917, |
| "eval_loss": 1.4326896403135776, |
| "eval_mse2_loss": 0.18112752599312043, |
| "eval_mse_loss": 1.4326896403135776, |
| "eval_rec_loss": 0.05488209239939954, |
| "eval_var_loss": 0.02930486012401103, |
| "flow/cos_sim": 0.4771230810486686, |
| "flow/improvement_ratio": 0.8480699183081767, |
| "flow/mag_ratio_mean": 0.48936520539112943, |
| "flow/mag_ratio_std": 0.2627385834386862, |
| "step": 14336 |
| }, |
| { |
| "epoch": 0.6178378261900145, |
| "eval_bleu": 0.7007889817819709, |
| "eval_cos_loss": 0.5228769168543663, |
| "eval_dec_loss": 0.001358627397164917, |
| "eval_loss": 1.4326896403135776, |
| "eval_mse2_loss": 0.18112752599312043, |
| "eval_mse_loss": 1.4326896403135776, |
| "eval_rec_loss": 0.05488209239939954, |
| "eval_runtime": 103.0204, |
| "eval_samples_per_second": 291.205, |
| "eval_steps_per_second": 4.552, |
| "eval_var_loss": 0.02930486012401103, |
| "flow/cos_sim": 0.4771230810486686, |
| "flow/improvement_ratio": 0.8480699183081767, |
| "flow/mag_ratio_mean": 0.48936520539112943, |
| "flow/mag_ratio_std": 0.2627385834386862, |
| "step": 14336 |
| }, |
| { |
| "epoch": 0.6288706445148361, |
| "grad_norm": 0.6931398510932922, |
| "learning_rate": 0.00031529685904119485, |
| "loss": 1.4271036386489868, |
| "step": 14592 |
| }, |
| { |
| "epoch": 0.6399034628396578, |
| "grad_norm": 0.616621196269989, |
| "learning_rate": 0.0002989487855370421, |
| "loss": 1.4223978519439697, |
| "step": 14848 |
| }, |
| { |
| "epoch": 0.6509362811644795, |
| "grad_norm": 0.7069717645645142, |
| "learning_rate": 0.00028285323179165424, |
| "loss": 1.4210408926010132, |
| "step": 15104 |
| }, |
| { |
| "epoch": 0.6619690994893012, |
| "grad_norm": 0.5767509937286377, |
| "learning_rate": 0.0002670304137751759, |
| "loss": 1.4249491691589355, |
| "step": 15360 |
| }, |
| { |
| "epoch": 0.6619690994893012, |
| "eval_bleu": 0.7712555700160785, |
| "eval_cos_loss": 0.520311662700893, |
| "eval_dec_loss": 0.0013948907095809597, |
| "eval_loss": 1.4238692244995377, |
| "eval_mse2_loss": 0.1801110237900382, |
| "eval_mse_loss": 1.4238692244995377, |
| "eval_rec_loss": 0.05672604351370002, |
| "eval_var_loss": 0.029106232196664507, |
| "flow/cos_sim": 0.4796883367907518, |
| "flow/improvement_ratio": 0.8454879620181981, |
| "flow/mag_ratio_mean": 0.4905342829507043, |
| "flow/mag_ratio_std": 0.2601209406786636, |
| "step": 15360 |
| }, |
| { |
| "epoch": 0.6619690994893012, |
| "eval_bleu": 0.7712555700160785, |
| "eval_cos_loss": 0.520311662700893, |
| "eval_dec_loss": 0.0013948907095809597, |
| "eval_loss": 1.4238692244995377, |
| "eval_mse2_loss": 0.1801110237900382, |
| "eval_mse_loss": 1.4238692244995377, |
| "eval_rec_loss": 0.05672604351370002, |
| "eval_runtime": 102.9224, |
| "eval_samples_per_second": 291.482, |
| "eval_steps_per_second": 4.557, |
| "eval_var_loss": 0.029106232196664507, |
| "flow/cos_sim": 0.4796883367907518, |
| "flow/improvement_ratio": 0.8454879620181981, |
| "flow/mag_ratio_mean": 0.4905342829507043, |
| "flow/mag_ratio_std": 0.2601209406786636, |
| "step": 15360 |
| }, |
| { |
| "epoch": 0.6730019178141229, |
| "grad_norm": 0.7135971784591675, |
| "learning_rate": 0.0002515002049024435, |
| "loss": 1.4220284223556519, |
| "step": 15616 |
| }, |
| { |
| "epoch": 0.6840347361389446, |
| "grad_norm": 0.6657771468162537, |
| "learning_rate": 0.00023628211107203429, |
| "loss": 1.421180248260498, |
| "step": 15872 |
| }, |
| { |
| "epoch": 0.6950675544637662, |
| "grad_norm": 0.6840319037437439, |
| "learning_rate": 0.00022139524616691188, |
| "loss": 1.4254897832870483, |
| "step": 16128 |
| }, |
| { |
| "epoch": 0.706100372788588, |
| "grad_norm": 0.6978499889373779, |
| "learning_rate": 0.000206858308047443, |
| "loss": 1.4185926914215088, |
| "step": 16384 |
| }, |
| { |
| "epoch": 0.706100372788588, |
| "eval_bleu": 0.7788876579155211, |
| "eval_cos_loss": 0.5166550292643403, |
| "eval_dec_loss": 0.0013616397724124983, |
| "eval_loss": 1.4177445305435896, |
| "eval_mse2_loss": 0.17684134553426872, |
| "eval_mse_loss": 1.4177445305435896, |
| "eval_rec_loss": 0.05370217473951103, |
| "eval_var_loss": 0.02986719635233823, |
| "flow/cos_sim": 0.48334496971894936, |
| "flow/improvement_ratio": 0.851445895522388, |
| "flow/mag_ratio_mean": 0.49312538899846675, |
| "flow/mag_ratio_std": 0.2614598782586136, |
| "step": 16384 |
| }, |
| { |
| "epoch": 0.706100372788588, |
| "eval_bleu": 0.7788876579155211, |
| "eval_cos_loss": 0.5166550292643403, |
| "eval_dec_loss": 0.0013616397724124983, |
| "eval_loss": 1.4177445305435896, |
| "eval_mse2_loss": 0.17684134553426872, |
| "eval_mse_loss": 1.4177445305435896, |
| "eval_rec_loss": 0.05370217473951103, |
| "eval_runtime": 103.4098, |
| "eval_samples_per_second": 290.108, |
| "eval_steps_per_second": 4.535, |
| "eval_var_loss": 0.02986719635233823, |
| "flow/cos_sim": 0.48334496971894936, |
| "flow/improvement_ratio": 0.851445895522388, |
| "flow/mag_ratio_mean": 0.49312538899846675, |
| "flow/mag_ratio_std": 0.2614598782586136, |
| "step": 16384 |
| }, |
| { |
| "epoch": 0.7171331911134096, |
| "grad_norm": 0.7240028977394104, |
| "learning_rate": 0.00019268955506693798, |
| "loss": 1.4189178943634033, |
| "step": 16640 |
| }, |
| { |
| "epoch": 0.7281660094382313, |
| "grad_norm": 0.6644338369369507, |
| "learning_rate": 0.00017890678313921, |
| "loss": 1.4202007055282593, |
| "step": 16896 |
| }, |
| { |
| "epoch": 0.739198827763053, |
| "grad_norm": 0.8413478136062622, |
| "learning_rate": 0.00016552730338695792, |
| "loss": 1.419106364250183, |
| "step": 17152 |
| }, |
| { |
| "epoch": 0.7502316460878746, |
| "grad_norm": 0.741065263748169, |
| "learning_rate": 0.00015256792039904465, |
| "loss": 1.415405511856079, |
| "step": 17408 |
| }, |
| { |
| "epoch": 0.7502316460878746, |
| "eval_bleu": 0.7637354358631164, |
| "eval_cos_loss": 0.5136227607727051, |
| "eval_dec_loss": 0.0013235103740173923, |
| "eval_loss": 1.4103716327183282, |
| "eval_mse2_loss": 0.17683548507278662, |
| "eval_mse_loss": 1.4103716327183282, |
| "eval_rec_loss": 0.05761792201366125, |
| "eval_var_loss": 0.03023185586926144, |
| "flow/cos_sim": 0.4863772399898277, |
| "flow/improvement_ratio": 0.8529339908028463, |
| "flow/mag_ratio_mean": 0.4943711748128253, |
| "flow/mag_ratio_std": 0.2643810258046396, |
| "step": 17408 |
| }, |
| { |
| "epoch": 0.7502316460878746, |
| "eval_bleu": 0.7637354358631164, |
| "eval_cos_loss": 0.5136227607727051, |
| "eval_dec_loss": 0.0013235103740173923, |
| "eval_loss": 1.4103716327183282, |
| "eval_mse2_loss": 0.17683548507278662, |
| "eval_mse_loss": 1.4103716327183282, |
| "eval_rec_loss": 0.05761792201366125, |
| "eval_runtime": 103.2171, |
| "eval_samples_per_second": 290.65, |
| "eval_steps_per_second": 4.544, |
| "eval_var_loss": 0.03023185586926144, |
| "flow/cos_sim": 0.4863772399898277, |
| "flow/improvement_ratio": 0.8529339908028463, |
| "flow/mag_ratio_mean": 0.4943711748128253, |
| "flow/mag_ratio_std": 0.2643810258046396, |
| "step": 17408 |
| }, |
| { |
| "epoch": 0.7612644644126964, |
| "grad_norm": 0.5421018600463867, |
| "learning_rate": 0.00014004491112398103, |
| "loss": 1.4142208099365234, |
| "step": 17664 |
| }, |
| { |
| "epoch": 0.7722972827375181, |
| "grad_norm": 0.665582537651062, |
| "learning_rate": 0.00012797400442612433, |
| "loss": 1.411756992340088, |
| "step": 17920 |
| }, |
| { |
| "epoch": 0.7833301010623397, |
| "grad_norm": 0.6837579607963562, |
| "learning_rate": 0.00011637036133026895, |
| "loss": 1.4075802564620972, |
| "step": 18176 |
| }, |
| { |
| "epoch": 0.7943629193871614, |
| "grad_norm": 0.7160040736198425, |
| "learning_rate": 0.00010524855597944216, |
| "loss": 1.4070231914520264, |
| "step": 18432 |
| }, |
| { |
| "epoch": 0.7943629193871614, |
| "eval_bleu": 0.8024029342579875, |
| "eval_cos_loss": 0.5127464083593283, |
| "eval_dec_loss": 0.0013179335473900858, |
| "eval_loss": 1.4091586799763922, |
| "eval_mse2_loss": 0.17562630394501472, |
| "eval_mse_loss": 1.4091586799763922, |
| "eval_rec_loss": 0.059627406716124334, |
| "eval_var_loss": 0.029311500787576123, |
| "flow/cos_sim": 0.4872535904333281, |
| "flow/improvement_ratio": 0.8548329780096693, |
| "flow/mag_ratio_mean": 0.49533584078491877, |
| "flow/mag_ratio_std": 0.2655049035988891, |
| "step": 18432 |
| }, |
| { |
| "epoch": 0.7943629193871614, |
| "eval_bleu": 0.8024029342579875, |
| "eval_cos_loss": 0.5127464083593283, |
| "eval_dec_loss": 0.0013179335473900858, |
| "eval_loss": 1.4091586799763922, |
| "eval_mse2_loss": 0.17562630394501472, |
| "eval_mse_loss": 1.4091586799763922, |
| "eval_rec_loss": 0.059627406716124334, |
| "eval_runtime": 103.4418, |
| "eval_samples_per_second": 290.018, |
| "eval_steps_per_second": 4.534, |
| "eval_var_loss": 0.029311500787576123, |
| "flow/cos_sim": 0.4872535904333281, |
| "flow/improvement_ratio": 0.8548329780096693, |
| "flow/mag_ratio_mean": 0.49533584078491877, |
| "flow/mag_ratio_std": 0.2655049035988891, |
| "step": 18432 |
| }, |
| { |
| "epoch": 0.8053957377119831, |
| "grad_norm": 0.727080762386322, |
| "learning_rate": 9.462255732982089e-05, |
| "loss": 1.406097650527954, |
| "step": 18688 |
| }, |
| { |
| "epoch": 0.8164285560368048, |
| "grad_norm": 0.6209878921508789, |
| "learning_rate": 8.450571160576348e-05, |
| "loss": 1.4059816598892212, |
| "step": 18944 |
| }, |
| { |
| "epoch": 0.8274613743616265, |
| "grad_norm": 0.659706175327301, |
| "learning_rate": 7.491072553698764e-05, |
| "loss": 1.410292148590088, |
| "step": 19200 |
| }, |
| { |
| "epoch": 0.8384941926864482, |
| "grad_norm": 0.5520651340484619, |
| "learning_rate": 6.584965039895586e-05, |
| "loss": 1.402584195137024, |
| "step": 19456 |
| }, |
| { |
| "epoch": 0.8384941926864482, |
| "eval_bleu": 0.7435766156577157, |
| "eval_cos_loss": 0.5144387822923884, |
| "eval_dec_loss": 0.0013450082680801236, |
| "eval_loss": 1.4127296161041585, |
| "eval_mse2_loss": 0.17700788906134013, |
| "eval_mse_loss": 1.4127296161041585, |
| "eval_rec_loss": 0.058054142113306374, |
| "eval_var_loss": 0.0291894421593022, |
| "flow/cos_sim": 0.4855612163731793, |
| "flow/improvement_ratio": 0.8498689588199038, |
| "flow/mag_ratio_mean": 0.4951269815344292, |
| "flow/mag_ratio_std": 0.26389562489508567, |
| "step": 19456 |
| }, |
| { |
| "epoch": 0.8384941926864482, |
| "eval_bleu": 0.7435766156577157, |
| "eval_cos_loss": 0.5144387822923884, |
| "eval_dec_loss": 0.0013450082680801236, |
| "eval_loss": 1.4127296161041585, |
| "eval_mse2_loss": 0.17700788906134013, |
| "eval_mse_loss": 1.4127296161041585, |
| "eval_rec_loss": 0.058054142113306374, |
| "eval_runtime": 103.8996, |
| "eval_samples_per_second": 288.74, |
| "eval_steps_per_second": 4.514, |
| "eval_var_loss": 0.0291894421593022, |
| "flow/cos_sim": 0.4855612163731793, |
| "flow/improvement_ratio": 0.8498689588199038, |
| "flow/mag_ratio_mean": 0.4951269815344292, |
| "flow/mag_ratio_std": 0.26389562489508567, |
| "step": 19456 |
| }, |
| { |
| "epoch": 0.8495270110112698, |
| "grad_norm": 0.7207916378974915, |
| "learning_rate": 5.73338668765051e-05, |
| "loss": 1.408148169517517, |
| "step": 19712 |
| }, |
| { |
| "epoch": 0.8605598293360915, |
| "grad_norm": 0.6444937586784363, |
| "learning_rate": 4.9374070769740984e-05, |
| "loss": 1.4169082641601562, |
| "step": 19968 |
| }, |
| { |
| "epoch": 0.8715926476609133, |
| "grad_norm": 0.6508966088294983, |
| "learning_rate": 4.198025956014095e-05, |
| "loss": 1.412489891052246, |
| "step": 20224 |
| }, |
| { |
| "epoch": 0.8826254659857349, |
| "grad_norm": 0.8207064270973206, |
| "learning_rate": 3.516171985374755e-05, |
| "loss": 1.4014993906021118, |
| "step": 20480 |
| }, |
| { |
| "epoch": 0.8826254659857349, |
| "eval_bleu": 0.7371724072330055, |
| "eval_cos_loss": 0.5137777864805925, |
| "eval_dec_loss": 0.0013706799051735545, |
| "eval_loss": 1.409610672546094, |
| "eval_mse2_loss": 0.17626210351361396, |
| "eval_mse_loss": 1.409610672546094, |
| "eval_rec_loss": 0.054663843655986574, |
| "eval_var_loss": 0.029133995291965604, |
| "flow/cos_sim": 0.48622221402776267, |
| "flow/improvement_ratio": 0.8532393833975802, |
| "flow/mag_ratio_mean": 0.4940188680249237, |
| "flow/mag_ratio_std": 0.2655889735674299, |
| "step": 20480 |
| }, |
| { |
| "epoch": 0.8826254659857349, |
| "eval_bleu": 0.7371724072330055, |
| "eval_cos_loss": 0.5137777864805925, |
| "eval_dec_loss": 0.0013706799051735545, |
| "eval_loss": 1.409610672546094, |
| "eval_mse2_loss": 0.17626210351361396, |
| "eval_mse_loss": 1.409610672546094, |
| "eval_rec_loss": 0.054663843655986574, |
| "eval_runtime": 104.0379, |
| "eval_samples_per_second": 288.356, |
| "eval_steps_per_second": 4.508, |
| "eval_var_loss": 0.029133995291965604, |
| "flow/cos_sim": 0.48622221402776267, |
| "flow/improvement_ratio": 0.8532393833975802, |
| "flow/mag_ratio_mean": 0.4940188680249237, |
| "flow/mag_ratio_std": 0.2655889735674299, |
| "step": 20480 |
| }, |
| { |
| "epoch": 0.8936582843105566, |
| "grad_norm": 0.5996214151382446, |
| "learning_rate": 2.8927015717215733e-05, |
| "loss": 1.4027345180511475, |
| "step": 20736 |
| }, |
| { |
| "epoch": 0.9046911026353783, |
| "grad_norm": 0.6789088845252991, |
| "learning_rate": 2.3283977921370547e-05, |
| "loss": 1.4052367210388184, |
| "step": 20992 |
| }, |
| { |
| "epoch": 0.9157239209601999, |
| "grad_norm": 0.6676909327507019, |
| "learning_rate": 1.8239694105780413e-05, |
| "loss": 1.406872034072876, |
| "step": 21248 |
| }, |
| { |
| "epoch": 0.9267567392850217, |
| "grad_norm": 0.5955349802970886, |
| "learning_rate": 1.3800499876701955e-05, |
| "loss": 1.4064586162567139, |
| "step": 21504 |
| }, |
| { |
| "epoch": 0.9267567392850217, |
| "eval_bleu": 0.7426715244464189, |
| "eval_cos_loss": 0.5071186275878695, |
| "eval_dec_loss": 0.001350255208637894, |
| "eval_loss": 1.3937010752366805, |
| "eval_mse2_loss": 0.17541809607225695, |
| "eval_mse_loss": 1.3937010752366805, |
| "eval_rec_loss": 0.05103444970691445, |
| "eval_var_loss": 0.02931836185091213, |
| "flow/cos_sim": 0.49288137139542015, |
| "flow/improvement_ratio": 0.8528173863252343, |
| "flow/mag_ratio_mean": 0.4987420951252553, |
| "flow/mag_ratio_std": 0.26658764935886936, |
| "step": 21504 |
| }, |
| { |
| "epoch": 0.9267567392850217, |
| "eval_bleu": 0.7426715244464189, |
| "eval_cos_loss": 0.5071186275878695, |
| "eval_dec_loss": 0.001350255208637894, |
| "eval_loss": 1.3937010752366805, |
| "eval_mse2_loss": 0.17541809607225695, |
| "eval_mse_loss": 1.3937010752366805, |
| "eval_rec_loss": 0.05103444970691445, |
| "eval_runtime": 104.142, |
| "eval_samples_per_second": 288.068, |
| "eval_steps_per_second": 4.503, |
| "eval_var_loss": 0.02931836185091213, |
| "flow/cos_sim": 0.49288137139542015, |
| "flow/improvement_ratio": 0.8528173863252343, |
| "flow/mag_ratio_mean": 0.4987420951252553, |
| "flow/mag_ratio_std": 0.26658764935886936, |
| "step": 21504 |
| }, |
| { |
| "epoch": 0.9377895576098434, |
| "grad_norm": 0.8665277361869812, |
| "learning_rate": 9.971970849576406e-06, |
| "loss": 1.4001104831695557, |
| "step": 21760 |
| }, |
| { |
| "epoch": 0.948822375934665, |
| "grad_norm": 0.6160731315612793, |
| "learning_rate": 6.758915646072339e-06, |
| "loss": 1.4023921489715576, |
| "step": 22016 |
| }, |
| { |
| "epoch": 0.9598551942594867, |
| "grad_norm": 0.6823092103004456, |
| "learning_rate": 4.1653698544703575e-06, |
| "loss": 1.4057680368423462, |
| "step": 22272 |
| }, |
| { |
| "epoch": 0.9708880125843083, |
| "grad_norm": 0.7474303841590881, |
| "learning_rate": 2.1945909609756286e-06, |
| "loss": 1.402069330215454, |
| "step": 22528 |
| }, |
| { |
| "epoch": 0.9708880125843083, |
| "eval_bleu": 0.7359243412878435, |
| "eval_cos_loss": 0.5119307249593836, |
| "eval_dec_loss": 0.0013969406839550735, |
| "eval_loss": 1.4065255351158092, |
| "eval_mse2_loss": 0.17711426552806073, |
| "eval_mse_loss": 1.4065255351158092, |
| "eval_rec_loss": 0.056425910651572604, |
| "eval_var_loss": 0.02955100304091663, |
| "flow/cos_sim": 0.4880692758031491, |
| "flow/improvement_ratio": 0.8548107675906184, |
| "flow/mag_ratio_mean": 0.49543472253945847, |
| "flow/mag_ratio_std": 0.263321697140045, |
| "step": 22528 |
| }, |
| { |
| "epoch": 0.9708880125843083, |
| "eval_bleu": 0.7359243412878435, |
| "eval_cos_loss": 0.5119307249593836, |
| "eval_dec_loss": 0.0013969406839550735, |
| "eval_loss": 1.4065255351158092, |
| "eval_mse2_loss": 0.17711426552806073, |
| "eval_mse_loss": 1.4065255351158092, |
| "eval_rec_loss": 0.056425910651572604, |
| "eval_runtime": 103.1789, |
| "eval_samples_per_second": 290.757, |
| "eval_steps_per_second": 4.546, |
| "eval_var_loss": 0.02955100304091663, |
| "flow/cos_sim": 0.4880692758031491, |
| "flow/improvement_ratio": 0.8548107675906184, |
| "flow/mag_ratio_mean": 0.49543472253945847, |
| "flow/mag_ratio_std": 0.263321697140045, |
| "step": 22528 |
| }, |
| { |
| "epoch": 0.9819208309091301, |
| "grad_norm": 0.6597904562950134, |
| "learning_rate": 8.490542583243222e-07, |
| "loss": 1.4066376686096191, |
| "step": 22784 |
| }, |
| { |
| "epoch": 0.9929536492339518, |
| "grad_norm": 0.7082860469818115, |
| "learning_rate": 1.3044973682302396e-07, |
| "loss": 1.4058468341827393, |
| "step": 23040 |
| } |
| ], |
| "logging_steps": 256, |
| "max_steps": 23204, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 1024, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|