| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.4854440062921542, |
| "eval_steps": 1024, |
| "global_step": 11264, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.011032818324821687, |
| "grad_norm": 0.10309942811727524, |
| "learning_rate": 0.000498046875, |
| "loss": 1.9074174165725708, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.022065636649643373, |
| "grad_norm": 0.2910110056400299, |
| "learning_rate": 0.000998046875, |
| "loss": 1.5273144245147705, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.03309845497446506, |
| "grad_norm": 0.3859289586544037, |
| "learning_rate": 0.000999688448778502, |
| "loss": 1.3800736665725708, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.04413127329928675, |
| "grad_norm": 0.5722110867500305, |
| "learning_rate": 0.0009987492950653055, |
| "loss": 1.342606544494629, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.04413127329928675, |
| "eval_bleu": 0.9366650964401493, |
| "eval_cos_loss": 0.4710617309440174, |
| "eval_dec_loss": 0.11786629736169314, |
| "eval_loss": 1.3323029561845987, |
| "eval_mse2_loss": 0.1665979178824913, |
| "eval_mse_loss": 1.3323029561845987, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5289382661329404, |
| "flow/improvement_ratio": 0.8936813888010948, |
| "flow/mag_ratio_mean": 0.5435932263382462, |
| "flow/mag_ratio_std": 0.2489985737210906, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.04413127329928675, |
| "eval_bleu": 0.9366650964401493, |
| "eval_cos_loss": 0.4710617309440174, |
| "eval_dec_loss": 0.11786629736169314, |
| "eval_loss": 1.3323029561845987, |
| "eval_mse2_loss": 0.1665979178824913, |
| "eval_mse_loss": 1.3323029561845987, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 157.3375, |
| "eval_samples_per_second": 190.673, |
| "eval_steps_per_second": 2.981, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5289382661329404, |
| "flow/improvement_ratio": 0.8936813888010948, |
| "flow/mag_ratio_mean": 0.5435932263382462, |
| "flow/mag_ratio_std": 0.2489985737210906, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.05516409162410843, |
| "grad_norm": 0.6506242752075195, |
| "learning_rate": 0.0009971837136430763, |
| "loss": 1.3261979818344116, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.06619690994893011, |
| "grad_norm": 0.6324401497840881, |
| "learning_rate": 0.0009949936708776692, |
| "loss": 1.3123514652252197, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.07722972827375181, |
| "grad_norm": 1.1031574010849, |
| "learning_rate": 0.0009921819174566252, |
| "loss": 1.3050185441970825, |
| "step": 1792 |
| }, |
| { |
| "epoch": 0.0882625465985735, |
| "grad_norm": 0.762417733669281, |
| "learning_rate": 0.000988751984934317, |
| "loss": 1.3001574277877808, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.0882625465985735, |
| "eval_bleu": 0.938925796606621, |
| "eval_cos_loss": 0.4579503086330032, |
| "eval_dec_loss": 0.10506504188690867, |
| "eval_loss": 1.2970875999820766, |
| "eval_mse2_loss": 0.15707123614768229, |
| "eval_mse_loss": 1.2970875999820766, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5420496905409197, |
| "flow/improvement_ratio": 0.8918823948038667, |
| "flow/mag_ratio_mean": 0.5503126610316702, |
| "flow/mag_ratio_std": 0.25175602854823254, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.0882625465985735, |
| "eval_bleu": 0.938925796606621, |
| "eval_cos_loss": 0.4579503086330032, |
| "eval_dec_loss": 0.10506504188690867, |
| "eval_loss": 1.2970875999820766, |
| "eval_mse2_loss": 0.15707123614768229, |
| "eval_mse_loss": 1.2970875999820766, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 151.9416, |
| "eval_samples_per_second": 197.444, |
| "eval_steps_per_second": 3.087, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5420496905409197, |
| "flow/improvement_ratio": 0.8918823948038667, |
| "flow/mag_ratio_mean": 0.5503126610316702, |
| "flow/mag_ratio_std": 0.25175602854823254, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.09929536492339518, |
| "grad_norm": 0.39165085554122925, |
| "learning_rate": 0.0009847081812963268, |
| "loss": 1.2909460067749023, |
| "step": 2304 |
| }, |
| { |
| "epoch": 0.11032818324821686, |
| "grad_norm": 0.6050369739532471, |
| "learning_rate": 0.0009800555855486275, |
| "loss": 1.291382908821106, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.12136100157303854, |
| "grad_norm": 0.6340572237968445, |
| "learning_rate": 0.0009748000413383664, |
| "loss": 1.2860350608825684, |
| "step": 2816 |
| }, |
| { |
| "epoch": 0.13239381989786023, |
| "grad_norm": 0.8046131134033203, |
| "learning_rate": 0.0009689481496142604, |
| "loss": 1.2806360721588135, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.13239381989786023, |
| "eval_bleu": 0.9365596012238808, |
| "eval_cos_loss": 0.4510079253075728, |
| "eval_dec_loss": 0.1170106883853801, |
| "eval_loss": 1.2785198518208094, |
| "eval_mse2_loss": 0.15482012001372603, |
| "eval_mse_loss": 1.2785198518208094, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5489920710703966, |
| "flow/improvement_ratio": 0.895310169598187, |
| "flow/mag_ratio_mean": 0.5600611698398712, |
| "flow/mag_ratio_std": 0.2589119763326035, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.13239381989786023, |
| "eval_bleu": 0.9365596012238808, |
| "eval_cos_loss": 0.4510079253075728, |
| "eval_dec_loss": 0.1170106883853801, |
| "eval_loss": 1.2785198518208094, |
| "eval_mse2_loss": 0.15482012001372603, |
| "eval_mse_loss": 1.2785198518208094, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 150.2303, |
| "eval_samples_per_second": 199.693, |
| "eval_steps_per_second": 3.122, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5489920710703966, |
| "flow/improvement_ratio": 0.895310169598187, |
| "flow/mag_ratio_mean": 0.5600611698398712, |
| "flow/mag_ratio_std": 0.2589119763326035, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.14342663822268192, |
| "grad_norm": 0.7344346046447754, |
| "learning_rate": 0.0009625072603358231, |
| "loss": 1.277908444404602, |
| "step": 3328 |
| }, |
| { |
| "epoch": 0.15445945654750362, |
| "grad_norm": 0.7456739544868469, |
| "learning_rate": 0.0009554854632418371, |
| "loss": 1.274967074394226, |
| "step": 3584 |
| }, |
| { |
| "epoch": 0.1654922748723253, |
| "grad_norm": 0.528167724609375, |
| "learning_rate": 0.000947891577689663, |
| "loss": 1.2722811698913574, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.176525093197147, |
| "grad_norm": 0.7374073266983032, |
| "learning_rate": 0.0009397351415781539, |
| "loss": 1.2716022729873657, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.176525093197147, |
| "eval_bleu": 0.9383145863088955, |
| "eval_cos_loss": 0.44795799712890755, |
| "eval_dec_loss": 0.11301154795406597, |
| "eval_loss": 1.2707049117159488, |
| "eval_mse2_loss": 0.15204078735890927, |
| "eval_mse_loss": 1.2707049117159488, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.552042003760714, |
| "flow/improvement_ratio": 0.8948889724227157, |
| "flow/mag_ratio_mean": 0.5576132778674047, |
| "flow/mag_ratio_std": 0.25525683488672984, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.176525093197147, |
| "eval_bleu": 0.9383145863088955, |
| "eval_cos_loss": 0.44795799712890755, |
| "eval_dec_loss": 0.11301154795406597, |
| "eval_loss": 1.2707049117159488, |
| "eval_mse2_loss": 0.15204078735890927, |
| "eval_mse_loss": 1.2707049117159488, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 149.5476, |
| "eval_samples_per_second": 200.605, |
| "eval_steps_per_second": 3.136, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.552042003760714, |
| "flow/improvement_ratio": 0.8948889724227157, |
| "flow/mag_ratio_mean": 0.5576132778674047, |
| "flow/mag_ratio_std": 0.25525683488672984, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.18755791152196866, |
| "grad_norm": 1.123129963874817, |
| "learning_rate": 0.000931026399368079, |
| "loss": 1.2691912651062012, |
| "step": 4352 |
| }, |
| { |
| "epoch": 0.19859072984679035, |
| "grad_norm": 0.49173882603645325, |
| "learning_rate": 0.0009217762892151117, |
| "loss": 1.26752769947052, |
| "step": 4608 |
| }, |
| { |
| "epoch": 0.20962354817161205, |
| "grad_norm": 0.5665431618690491, |
| "learning_rate": 0.0009119964292315354, |
| "loss": 1.2669333219528198, |
| "step": 4864 |
| }, |
| { |
| "epoch": 0.22065636649643372, |
| "grad_norm": 0.4946308732032776, |
| "learning_rate": 0.0009016991028939279, |
| "loss": 1.2646225690841675, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.22065636649643372, |
| "eval_bleu": 0.9396675860722136, |
| "eval_cos_loss": 0.44516199083724767, |
| "eval_dec_loss": 0.10893038547893705, |
| "eval_loss": 1.264682760879175, |
| "eval_mse2_loss": 0.1498125367073108, |
| "eval_mse_loss": 1.264682760879175, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5548380070657872, |
| "flow/improvement_ratio": 0.8946977740665997, |
| "flow/mag_ratio_mean": 0.5694006043456511, |
| "flow/mag_ratio_std": 0.2655116878211625, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.22065636649643372, |
| "eval_bleu": 0.9396675860722136, |
| "eval_cos_loss": 0.44516199083724767, |
| "eval_dec_loss": 0.10893038547893705, |
| "eval_loss": 1.264682760879175, |
| "eval_mse2_loss": 0.1498125367073108, |
| "eval_mse_loss": 1.264682760879175, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 151.8799, |
| "eval_samples_per_second": 197.524, |
| "eval_steps_per_second": 3.088, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5548380070657872, |
| "flow/improvement_ratio": 0.8946977740665997, |
| "flow/mag_ratio_mean": 0.5694006043456511, |
| "flow/mag_ratio_std": 0.2655116878211625, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.23168918482125542, |
| "grad_norm": 0.5147830843925476, |
| "learning_rate": 0.0008908972436151494, |
| "loss": 1.261371374130249, |
| "step": 5376 |
| }, |
| { |
| "epoch": 0.2427220031460771, |
| "grad_norm": 0.7221893668174744, |
| "learning_rate": 0.0008796044185000127, |
| "loss": 1.259010672569275, |
| "step": 5632 |
| }, |
| { |
| "epoch": 0.2537548214708988, |
| "grad_norm": 0.6270182132720947, |
| "learning_rate": 0.0008678348113050368, |
| "loss": 1.2565613985061646, |
| "step": 5888 |
| }, |
| { |
| "epoch": 0.26478763979572045, |
| "grad_norm": 0.3954711854457855, |
| "learning_rate": 0.0008556032046236897, |
| "loss": 1.258548378944397, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.26478763979572045, |
| "eval_bleu": 0.9381239377332383, |
| "eval_cos_loss": 0.4434889930524806, |
| "eval_dec_loss": 0.11391587999226378, |
| "eval_loss": 1.2588644528439812, |
| "eval_mse2_loss": 0.15056055846181252, |
| "eval_mse_loss": 1.2588644528439812, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5565110067568863, |
| "flow/improvement_ratio": 0.8946461940625074, |
| "flow/mag_ratio_mean": 0.5628604918146438, |
| "flow/mag_ratio_std": 0.2606462057528974, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.26478763979572045, |
| "eval_bleu": 0.9381239377332383, |
| "eval_cos_loss": 0.4434889930524806, |
| "eval_dec_loss": 0.11391587999226378, |
| "eval_loss": 1.2588644528439812, |
| "eval_mse2_loss": 0.15056055846181252, |
| "eval_mse_loss": 1.2588644528439812, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 153.8457, |
| "eval_samples_per_second": 195.001, |
| "eval_steps_per_second": 3.049, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5565110067568863, |
| "flow/improvement_ratio": 0.8946461940625074, |
| "flow/mag_ratio_mean": 0.5628604918146438, |
| "flow/mag_ratio_std": 0.2606462057528974, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.2758204581205422, |
| "grad_norm": 0.8126729130744934, |
| "learning_rate": 0.000842924961319492, |
| "loss": 1.2565950155258179, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.28685327644536385, |
| "grad_norm": 0.84797203540802, |
| "learning_rate": 0.0008298160052303045, |
| "loss": 1.2548315525054932, |
| "step": 6656 |
| }, |
| { |
| "epoch": 0.2978860947701855, |
| "grad_norm": 0.561568021774292, |
| "learning_rate": 0.0008162928011680314, |
| "loss": 1.2526129484176636, |
| "step": 6912 |
| }, |
| { |
| "epoch": 0.30891891309500724, |
| "grad_norm": 0.45474377274513245, |
| "learning_rate": 0.000802372334238864, |
| "loss": 1.2513761520385742, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.30891891309500724, |
| "eval_bleu": 0.9385536520845816, |
| "eval_cos_loss": 0.4402598062557961, |
| "eval_dec_loss": 0.11249503215500858, |
| "eval_loss": 1.2510530173397267, |
| "eval_mse2_loss": 0.1480516226116274, |
| "eval_mse_loss": 1.2510530173397267, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5597401952692694, |
| "flow/improvement_ratio": 0.895444744939743, |
| "flow/mag_ratio_mean": 0.5710282248220464, |
| "flow/mag_ratio_std": 0.26387540328858505, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.30891891309500724, |
| "eval_bleu": 0.9385536520845816, |
| "eval_cos_loss": 0.4402598062557961, |
| "eval_dec_loss": 0.11249503215500858, |
| "eval_loss": 1.2510530173397267, |
| "eval_mse2_loss": 0.1480516226116274, |
| "eval_mse_loss": 1.2510530173397267, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 152.7181, |
| "eval_samples_per_second": 196.44, |
| "eval_steps_per_second": 3.071, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5597401952692694, |
| "flow/improvement_ratio": 0.895444744939743, |
| "flow/mag_ratio_mean": 0.5710282248220464, |
| "flow/mag_ratio_std": 0.26387540328858505, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.3199517314198289, |
| "grad_norm": 1.3543585538864136, |
| "learning_rate": 0.0007880720885100349, |
| "loss": 1.2521653175354004, |
| "step": 7424 |
| }, |
| { |
| "epoch": 0.3309845497446506, |
| "grad_norm": 0.4370076358318329, |
| "learning_rate": 0.0007734100250498788, |
| "loss": 1.249273419380188, |
| "step": 7680 |
| }, |
| { |
| "epoch": 0.3420173680694723, |
| "grad_norm": 1.0196475982666016, |
| "learning_rate": 0.000758404559368781, |
| "loss": 1.2500712871551514, |
| "step": 7936 |
| }, |
| { |
| "epoch": 0.353050186394294, |
| "grad_norm": 0.733001708984375, |
| "learning_rate": 0.0007430745382893488, |
| "loss": 1.245364785194397, |
| "step": 8192 |
| }, |
| { |
| "epoch": 0.353050186394294, |
| "eval_bleu": 0.9376793187397806, |
| "eval_cos_loss": 0.4385024095013706, |
| "eval_dec_loss": 0.11364057421017049, |
| "eval_loss": 1.2459661925016945, |
| "eval_mse2_loss": 0.148339767350571, |
| "eval_mse_loss": 1.2459661925016945, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5614975882745755, |
| "flow/improvement_ratio": 0.8961417695352518, |
| "flow/mag_ratio_mean": 0.5688313084370547, |
| "flow/mag_ratio_std": 0.26494109700483554, |
| "step": 8192 |
| }, |
| { |
| "epoch": 0.353050186394294, |
| "eval_bleu": 0.9376793187397806, |
| "eval_cos_loss": 0.4385024095013706, |
| "eval_dec_loss": 0.11364057421017049, |
| "eval_loss": 1.2459661925016945, |
| "eval_mse2_loss": 0.148339767350571, |
| "eval_mse_loss": 1.2459661925016945, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 152.8054, |
| "eval_samples_per_second": 196.328, |
| "eval_steps_per_second": 3.069, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5614975882745755, |
| "flow/improvement_ratio": 0.8961417695352518, |
| "flow/mag_ratio_mean": 0.5688313084370547, |
| "flow/mag_ratio_std": 0.26494109700483554, |
| "step": 8192 |
| }, |
| { |
| "epoch": 0.36408300471911564, |
| "grad_norm": 0.676328718662262, |
| "learning_rate": 0.0007274392162748551, |
| "loss": 1.2448910474777222, |
| "step": 8448 |
| }, |
| { |
| "epoch": 0.3751158230439373, |
| "grad_norm": 0.6379961967468262, |
| "learning_rate": 0.000711518231245687, |
| "loss": 1.2442706823349, |
| "step": 8704 |
| }, |
| { |
| "epoch": 0.38614864136875904, |
| "grad_norm": 0.5386805534362793, |
| "learning_rate": 0.0006953315799141723, |
| "loss": 1.2446835041046143, |
| "step": 8960 |
| }, |
| { |
| "epoch": 0.3971814596935807, |
| "grad_norm": 0.8263258934020996, |
| "learning_rate": 0.0006788995926687669, |
| "loss": 1.2411766052246094, |
| "step": 9216 |
| }, |
| { |
| "epoch": 0.3971814596935807, |
| "eval_bleu": 0.9372486918854673, |
| "eval_cos_loss": 0.43675092898452206, |
| "eval_dec_loss": 0.11516488874867273, |
| "eval_loss": 1.241364901762273, |
| "eval_mse2_loss": 0.1478570194196091, |
| "eval_mse_loss": 1.241364901762273, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5632490722863659, |
| "flow/improvement_ratio": 0.8974738620491679, |
| "flow/mag_ratio_mean": 0.5655419154207844, |
| "flow/mag_ratio_std": 0.2603240320041998, |
| "step": 9216 |
| }, |
| { |
| "epoch": 0.3971814596935807, |
| "eval_bleu": 0.9372486918854673, |
| "eval_cos_loss": 0.43675092898452206, |
| "eval_dec_loss": 0.11516488874867273, |
| "eval_loss": 1.241364901762273, |
| "eval_mse2_loss": 0.1478570194196091, |
| "eval_mse_loss": 1.241364901762273, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 152.8433, |
| "eval_samples_per_second": 196.28, |
| "eval_steps_per_second": 3.069, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5632490722863659, |
| "flow/improvement_ratio": 0.8974738620491679, |
| "flow/mag_ratio_mean": 0.5655419154207844, |
| "flow/mag_ratio_std": 0.2603240320041998, |
| "step": 9216 |
| }, |
| { |
| "epoch": 0.4082142780184024, |
| "grad_norm": 0.7855456471443176, |
| "learning_rate": 0.0006622429080391422, |
| "loss": 1.2460049390792847, |
| "step": 9472 |
| }, |
| { |
| "epoch": 0.4192470963432241, |
| "grad_norm": 0.4608207941055298, |
| "learning_rate": 0.0006453824467742515, |
| "loss": 1.2414920330047607, |
| "step": 9728 |
| }, |
| { |
| "epoch": 0.43027991466804577, |
| "grad_norm": 0.5247617959976196, |
| "learning_rate": 0.0006283393855659275, |
| "loss": 1.2424880266189575, |
| "step": 9984 |
| }, |
| { |
| "epoch": 0.44131273299286744, |
| "grad_norm": 0.8765453100204468, |
| "learning_rate": 0.0006111351304510173, |
| "loss": 1.237776517868042, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.44131273299286744, |
| "eval_bleu": 0.937646836000478, |
| "eval_cos_loss": 0.4353823194752878, |
| "eval_dec_loss": 0.11402556833737632, |
| "eval_loss": 1.2377641976260936, |
| "eval_mse2_loss": 0.1474350707204357, |
| "eval_mse_loss": 1.2377641976260936, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.564617679063191, |
| "flow/improvement_ratio": 0.899760089830549, |
| "flow/mag_ratio_mean": 0.5730336795229394, |
| "flow/mag_ratio_std": 0.26344449729172154, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.44131273299286744, |
| "eval_bleu": 0.937646836000478, |
| "eval_cos_loss": 0.4353823194752878, |
| "eval_dec_loss": 0.11402556833737632, |
| "eval_loss": 1.2377641976260936, |
| "eval_mse2_loss": 0.1474350707204357, |
| "eval_mse_loss": 1.2377641976260936, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 151.9737, |
| "eval_samples_per_second": 197.403, |
| "eval_steps_per_second": 3.086, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.564617679063191, |
| "flow/improvement_ratio": 0.899760089830549, |
| "flow/mag_ratio_mean": 0.5730336795229394, |
| "flow/mag_ratio_std": 0.26344449729172154, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.45234555131768917, |
| "grad_norm": 0.6895334124565125, |
| "learning_rate": 0.0005937912899254605, |
| "loss": 1.2384426593780518, |
| "step": 10496 |
| }, |
| { |
| "epoch": 0.46337836964251083, |
| "grad_norm": 0.6421330571174622, |
| "learning_rate": 0.0005763296478040787, |
| "loss": 1.240878939628601, |
| "step": 10752 |
| }, |
| { |
| "epoch": 0.4744111879673325, |
| "grad_norm": 0.7770284414291382, |
| "learning_rate": 0.0005587721358601663, |
| "loss": 1.2393468618392944, |
| "step": 11008 |
| }, |
| { |
| "epoch": 0.4854440062921542, |
| "grad_norm": 1.0520166158676147, |
| "learning_rate": 0.0005411408062792448, |
| "loss": 1.237922191619873, |
| "step": 11264 |
| }, |
| { |
| "epoch": 0.4854440062921542, |
| "eval_bleu": 0.93652744913201, |
| "eval_cos_loss": 0.4366011674851497, |
| "eval_dec_loss": 0.11468809016390459, |
| "eval_loss": 1.2409222840246108, |
| "eval_mse2_loss": 0.14564816977804912, |
| "eval_mse_loss": 1.2409222840246108, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5633988297824413, |
| "flow/improvement_ratio": 0.897065937773251, |
| "flow/mag_ratio_mean": 0.5639294942558956, |
| "flow/mag_ratio_std": 0.25510865748564066, |
| "step": 11264 |
| }, |
| { |
| "epoch": 0.4854440062921542, |
| "eval_bleu": 0.93652744913201, |
| "eval_cos_loss": 0.4366011674851497, |
| "eval_dec_loss": 0.11468809016390459, |
| "eval_loss": 1.2409222840246108, |
| "eval_mse2_loss": 0.14564816977804912, |
| "eval_mse_loss": 1.2409222840246108, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 152.4483, |
| "eval_samples_per_second": 196.788, |
| "eval_steps_per_second": 3.076, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5633988297824413, |
| "flow/improvement_ratio": 0.897065937773251, |
| "flow/mag_ratio_mean": 0.5639294942558956, |
| "flow/mag_ratio_std": 0.25510865748564066, |
| "step": 11264 |
| } |
| ], |
| "logging_steps": 256, |
| "max_steps": 23204, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 1024, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|