| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9267567392850217, |
| "eval_steps": 1024, |
| "global_step": 21504, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.011032818324821687, |
| "grad_norm": 11.799630165100098, |
| "learning_rate": 0.000498046875, |
| "loss": 104.40645599365234, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.022065636649643373, |
| "grad_norm": 24.033342361450195, |
| "learning_rate": 0.000998046875, |
| "loss": 2.2942047119140625, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.03309845497446506, |
| "grad_norm": 21.89491081237793, |
| "learning_rate": 0.000999688448778502, |
| "loss": 2.301217555999756, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.04413127329928675, |
| "grad_norm": 24.77373695373535, |
| "learning_rate": 0.0009987492950653055, |
| "loss": 2.2358601093292236, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.04413127329928675, |
| "eval_bleu": 0.9163079254973637, |
| "eval_cos_loss": 0.8463371174930255, |
| "eval_dec_loss": 0.15604417777455437, |
| "eval_loss": 2.1683921397113597, |
| "eval_mse2_loss": 0.2577589998430789, |
| "eval_mse_loss": 1.9106331385020763, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.1536628818556444, |
| "flow/improvement_ratio": 0.5908637553898256, |
| "flow/mag_ratio_mean": 0.2672089617262517, |
| "flow/mag_ratio_std": 0.1647362987052149, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.04413127329928675, |
| "eval_bleu": 0.9163079254973637, |
| "eval_cos_loss": 0.8463371174930255, |
| "eval_dec_loss": 0.15604417777455437, |
| "eval_loss": 2.1683921397113597, |
| "eval_mse2_loss": 0.2577589998430789, |
| "eval_mse_loss": 1.9106331385020763, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 151.0019, |
| "eval_samples_per_second": 198.673, |
| "eval_steps_per_second": 3.106, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.1536628818556444, |
| "flow/improvement_ratio": 0.5908637553898256, |
| "flow/mag_ratio_mean": 0.2672089617262517, |
| "flow/mag_ratio_std": 0.1647362987052149, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.05516409162410843, |
| "grad_norm": 20.877023696899414, |
| "learning_rate": 0.0009971837136430763, |
| "loss": 2.162022352218628, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.06619690994893011, |
| "grad_norm": 16.984447479248047, |
| "learning_rate": 0.0009949936708776692, |
| "loss": 1.9889535903930664, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.07722972827375181, |
| "grad_norm": 18.61001205444336, |
| "learning_rate": 0.0009921819174566252, |
| "loss": 1.8391456604003906, |
| "step": 1792 |
| }, |
| { |
| "epoch": 0.0882625465985735, |
| "grad_norm": 9.276031494140625, |
| "learning_rate": 0.000988751984934317, |
| "loss": 1.736743688583374, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.0882625465985735, |
| "eval_bleu": 0.9323255434649543, |
| "eval_cos_loss": 0.5421419106821007, |
| "eval_dec_loss": 0.12311239358681098, |
| "eval_loss": 1.6836593265472444, |
| "eval_mse2_loss": 0.19289209935138982, |
| "eval_mse_loss": 1.4907672278154125, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.4578580862677682, |
| "flow/improvement_ratio": 0.8665720767049647, |
| "flow/mag_ratio_mean": 0.49134117085288076, |
| "flow/mag_ratio_std": 0.21147727505611713, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.0882625465985735, |
| "eval_bleu": 0.9323255434649543, |
| "eval_cos_loss": 0.5421419106821007, |
| "eval_dec_loss": 0.12311239358681098, |
| "eval_loss": 1.6836593265472444, |
| "eval_mse2_loss": 0.19289209935138982, |
| "eval_mse_loss": 1.4907672278154125, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 148.8988, |
| "eval_samples_per_second": 201.479, |
| "eval_steps_per_second": 3.15, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.4578580862677682, |
| "flow/improvement_ratio": 0.8665720767049647, |
| "flow/mag_ratio_mean": 0.49134117085288076, |
| "flow/mag_ratio_std": 0.21147727505611713, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.09929536492339518, |
| "grad_norm": 7.179141044616699, |
| "learning_rate": 0.0009847081812963268, |
| "loss": 1.6638906002044678, |
| "step": 2304 |
| }, |
| { |
| "epoch": 0.11032818324821686, |
| "grad_norm": 5.170185089111328, |
| "learning_rate": 0.0009800555855486275, |
| "loss": 1.6207516193389893, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.12136100157303854, |
| "grad_norm": 5.038082122802734, |
| "learning_rate": 0.0009748000413383664, |
| "loss": 1.5859862565994263, |
| "step": 2816 |
| }, |
| { |
| "epoch": 0.13239381989786023, |
| "grad_norm": 3.0588953495025635, |
| "learning_rate": 0.0009689481496142604, |
| "loss": 1.5566623210906982, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.13239381989786023, |
| "eval_bleu": 0.9326258807918764, |
| "eval_cos_loss": 0.48918826811349214, |
| "eval_dec_loss": 0.12837675790082037, |
| "eval_loss": 1.5384274639808801, |
| "eval_mse2_loss": 0.17169118979211045, |
| "eval_mse_loss": 1.3667362735215536, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5108117323313186, |
| "flow/improvement_ratio": 0.8871580227605824, |
| "flow/mag_ratio_mean": 0.5184402796886624, |
| "flow/mag_ratio_std": 0.24528262540221468, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.13239381989786023, |
| "eval_bleu": 0.9326258807918764, |
| "eval_cos_loss": 0.48918826811349214, |
| "eval_dec_loss": 0.12837675790082037, |
| "eval_loss": 1.5384274639808801, |
| "eval_mse2_loss": 0.17169118979211045, |
| "eval_mse_loss": 1.3667362735215536, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 149.1668, |
| "eval_samples_per_second": 201.117, |
| "eval_steps_per_second": 3.144, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5108117323313186, |
| "flow/improvement_ratio": 0.8871580227605824, |
| "flow/mag_ratio_mean": 0.5184402796886624, |
| "flow/mag_ratio_std": 0.24528262540221468, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.14342663822268192, |
| "grad_norm": 2.7404696941375732, |
| "learning_rate": 0.0009625072603358231, |
| "loss": 1.5345162153244019, |
| "step": 3328 |
| }, |
| { |
| "epoch": 0.15445945654750362, |
| "grad_norm": 2.8666281700134277, |
| "learning_rate": 0.0009554854632418371, |
| "loss": 1.518531084060669, |
| "step": 3584 |
| }, |
| { |
| "epoch": 0.1654922748723253, |
| "grad_norm": 1.7333565950393677, |
| "learning_rate": 0.000947891577689663, |
| "loss": 1.5072828531265259, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.176525093197147, |
| "grad_norm": 2.1371614933013916, |
| "learning_rate": 0.0009397351415781539, |
| "loss": 1.4991329908370972, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.176525093197147, |
| "eval_bleu": 0.9359739466567191, |
| "eval_cos_loss": 0.47476763149568524, |
| "eval_dec_loss": 0.11913440864024831, |
| "eval_loss": 1.4943630281033546, |
| "eval_mse2_loss": 0.16356163808722485, |
| "eval_mse_loss": 1.3308013893647996, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5252323670427936, |
| "flow/improvement_ratio": 0.8878522331018184, |
| "flow/mag_ratio_mean": 0.5341137046498785, |
| "flow/mag_ratio_std": 0.25290533919324243, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.176525093197147, |
| "eval_bleu": 0.9359739466567191, |
| "eval_cos_loss": 0.47476763149568524, |
| "eval_dec_loss": 0.11913440864024831, |
| "eval_loss": 1.4943630281033546, |
| "eval_mse2_loss": 0.16356163808722485, |
| "eval_mse_loss": 1.3308013893647996, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 149.7517, |
| "eval_samples_per_second": 200.332, |
| "eval_steps_per_second": 3.132, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5252323670427936, |
| "flow/improvement_ratio": 0.8878522331018184, |
| "flow/mag_ratio_mean": 0.5341137046498785, |
| "flow/mag_ratio_std": 0.25290533919324243, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.18755791152196866, |
| "grad_norm": 1.6938560009002686, |
| "learning_rate": 0.000931026399368079, |
| "loss": 1.4890222549438477, |
| "step": 4352 |
| }, |
| { |
| "epoch": 0.19859072984679035, |
| "grad_norm": 1.565247893333435, |
| "learning_rate": 0.0009217762892151117, |
| "loss": 1.4860106706619263, |
| "step": 4608 |
| }, |
| { |
| "epoch": 0.20962354817161205, |
| "grad_norm": 1.3487786054611206, |
| "learning_rate": 0.0009119964292315354, |
| "loss": 1.4800715446472168, |
| "step": 4864 |
| }, |
| { |
| "epoch": 0.22065636649643372, |
| "grad_norm": 1.212808609008789, |
| "learning_rate": 0.0009016991028939279, |
| "loss": 1.4720630645751953, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.22065636649643372, |
| "eval_bleu": 0.9371513693794589, |
| "eval_cos_loss": 0.46655392564181836, |
| "eval_dec_loss": 0.11585041659592248, |
| "eval_loss": 1.4702420382103178, |
| "eval_mse2_loss": 0.15908411601141317, |
| "eval_mse_loss": 1.3111579230090957, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5334460733414713, |
| "flow/improvement_ratio": 0.8902226530158444, |
| "flow/mag_ratio_mean": 0.5489529184441069, |
| "flow/mag_ratio_std": 0.2660856873178279, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.22065636649643372, |
| "eval_bleu": 0.9371513693794589, |
| "eval_cos_loss": 0.46655392564181836, |
| "eval_dec_loss": 0.11585041659592248, |
| "eval_loss": 1.4702420382103178, |
| "eval_mse2_loss": 0.15908411601141317, |
| "eval_mse_loss": 1.3111579230090957, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 153.0228, |
| "eval_samples_per_second": 196.049, |
| "eval_steps_per_second": 3.065, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5334460733414713, |
| "flow/improvement_ratio": 0.8902226530158444, |
| "flow/mag_ratio_mean": 0.5489529184441069, |
| "flow/mag_ratio_std": 0.2660856873178279, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.23168918482125542, |
| "grad_norm": 1.2820218801498413, |
| "learning_rate": 0.0008908972436151494, |
| "loss": 1.4665789604187012, |
| "step": 5376 |
| }, |
| { |
| "epoch": 0.2427220031460771, |
| "grad_norm": 1.428142786026001, |
| "learning_rate": 0.0008796044185000127, |
| "loss": 1.4617829322814941, |
| "step": 5632 |
| }, |
| { |
| "epoch": 0.2537548214708988, |
| "grad_norm": 1.7342983484268188, |
| "learning_rate": 0.0008678348113050368, |
| "loss": 1.454346776008606, |
| "step": 5888 |
| }, |
| { |
| "epoch": 0.26478763979572045, |
| "grad_norm": 1.012923240661621, |
| "learning_rate": 0.0008556032046236897, |
| "loss": 1.4543688297271729, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.26478763979572045, |
| "eval_bleu": 0.9360335622230687, |
| "eval_cos_loss": 0.4603669429892924, |
| "eval_dec_loss": 0.11954856799669199, |
| "eval_loss": 1.452896745982709, |
| "eval_mse2_loss": 0.15859206668984915, |
| "eval_mse_loss": 1.2943046784350105, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5396330592347615, |
| "flow/improvement_ratio": 0.8903370405565193, |
| "flow/mag_ratio_mean": 0.5503829778003286, |
| "flow/mag_ratio_std": 0.26988969053795087, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.26478763979572045, |
| "eval_bleu": 0.9360335622230687, |
| "eval_cos_loss": 0.4603669429892924, |
| "eval_dec_loss": 0.11954856799669199, |
| "eval_loss": 1.452896745982709, |
| "eval_mse2_loss": 0.15859206668984915, |
| "eval_mse_loss": 1.2943046784350105, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 152.3001, |
| "eval_samples_per_second": 196.979, |
| "eval_steps_per_second": 3.079, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5396330592347615, |
| "flow/improvement_ratio": 0.8903370405565193, |
| "flow/mag_ratio_mean": 0.5503829778003286, |
| "flow/mag_ratio_std": 0.26988969053795087, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.2758204581205422, |
| "grad_norm": 1.2126809358596802, |
| "learning_rate": 0.000842924961319492, |
| "loss": 1.4489810466766357, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.28685327644536385, |
| "grad_norm": 1.6154913902282715, |
| "learning_rate": 0.0008298160052303045, |
| "loss": 1.4455806016921997, |
| "step": 6656 |
| }, |
| { |
| "epoch": 0.2978860947701855, |
| "grad_norm": 1.1049504280090332, |
| "learning_rate": 0.0008162928011680314, |
| "loss": 1.4431121349334717, |
| "step": 6912 |
| }, |
| { |
| "epoch": 0.30891891309500724, |
| "grad_norm": 1.161877989768982, |
| "learning_rate": 0.000802372334238864, |
| "loss": 1.440521478652954, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.30891891309500724, |
| "eval_bleu": 0.9368360477187275, |
| "eval_cos_loss": 0.4548988695592006, |
| "eval_dec_loss": 0.11761775160673013, |
| "eval_loss": 1.4366158300371312, |
| "eval_mse2_loss": 0.1551807067636996, |
| "eval_mse_loss": 1.2814351221137463, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.545101128788645, |
| "flow/improvement_ratio": 0.8925937072300454, |
| "flow/mag_ratio_mean": 0.5560947322387939, |
| "flow/mag_ratio_std": 0.26709621954065904, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.30891891309500724, |
| "eval_bleu": 0.9368360477187275, |
| "eval_cos_loss": 0.4548988695592006, |
| "eval_dec_loss": 0.11761775160673013, |
| "eval_loss": 1.4366158300371312, |
| "eval_mse2_loss": 0.1551807067636996, |
| "eval_mse_loss": 1.2814351221137463, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 150.401, |
| "eval_samples_per_second": 199.467, |
| "eval_steps_per_second": 3.118, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.545101128788645, |
| "flow/improvement_ratio": 0.8925937072300454, |
| "flow/mag_ratio_mean": 0.5560947322387939, |
| "flow/mag_ratio_std": 0.26709621954065904, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.3199517314198289, |
| "grad_norm": 1.3750261068344116, |
| "learning_rate": 0.0007880720885100349, |
| "loss": 1.4386309385299683, |
| "step": 7424 |
| }, |
| { |
| "epoch": 0.3309845497446506, |
| "grad_norm": 0.9077029228210449, |
| "learning_rate": 0.0007734100250498788, |
| "loss": 1.4343377351760864, |
| "step": 7680 |
| }, |
| { |
| "epoch": 0.3420173680694723, |
| "grad_norm": 1.2454605102539062, |
| "learning_rate": 0.000758404559368781, |
| "loss": 1.4340428113937378, |
| "step": 7936 |
| }, |
| { |
| "epoch": 0.353050186394294, |
| "grad_norm": 1.4264285564422607, |
| "learning_rate": 0.0007430745382893488, |
| "loss": 1.4275590181350708, |
| "step": 8192 |
| }, |
| { |
| "epoch": 0.353050186394294, |
| "eval_bleu": 0.9352995295168319, |
| "eval_cos_loss": 0.4525942064043301, |
| "eval_dec_loss": 0.12027457510150953, |
| "eval_loss": 1.4305989170379476, |
| "eval_mse2_loss": 0.15546214358130497, |
| "eval_mse_loss": 1.2751367725034766, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.547405791689338, |
| "flow/improvement_ratio": 0.8944777580720784, |
| "flow/mag_ratio_mean": 0.5549619147009941, |
| "flow/mag_ratio_std": 0.2656229132973055, |
| "step": 8192 |
| }, |
| { |
| "epoch": 0.353050186394294, |
| "eval_bleu": 0.9352995295168319, |
| "eval_cos_loss": 0.4525942064043301, |
| "eval_dec_loss": 0.12027457510150953, |
| "eval_loss": 1.4305989170379476, |
| "eval_mse2_loss": 0.15546214358130497, |
| "eval_mse_loss": 1.2751367725034766, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 153.0138, |
| "eval_samples_per_second": 196.061, |
| "eval_steps_per_second": 3.065, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.547405791689338, |
| "flow/improvement_ratio": 0.8944777580720784, |
| "flow/mag_ratio_mean": 0.5549619147009941, |
| "flow/mag_ratio_std": 0.2656229132973055, |
| "step": 8192 |
| }, |
| { |
| "epoch": 0.36408300471911564, |
| "grad_norm": 1.2861735820770264, |
| "learning_rate": 0.0007274392162748551, |
| "loss": 1.4297358989715576, |
| "step": 8448 |
| }, |
| { |
| "epoch": 0.3751158230439373, |
| "grad_norm": 0.9941537380218506, |
| "learning_rate": 0.000711518231245687, |
| "loss": 1.4256483316421509, |
| "step": 8704 |
| }, |
| { |
| "epoch": 0.38614864136875904, |
| "grad_norm": 1.144028663635254, |
| "learning_rate": 0.0006953315799141723, |
| "loss": 1.4252721071243286, |
| "step": 8960 |
| }, |
| { |
| "epoch": 0.3971814596935807, |
| "grad_norm": 1.3483773469924927, |
| "learning_rate": 0.0006788995926687669, |
| "loss": 1.4217591285705566, |
| "step": 9216 |
| }, |
| { |
| "epoch": 0.3971814596935807, |
| "eval_bleu": 0.9351012282354647, |
| "eval_cos_loss": 0.44966854437836196, |
| "eval_dec_loss": 0.12245929105155694, |
| "eval_loss": 1.4224536047815515, |
| "eval_mse2_loss": 0.15468242016237682, |
| "eval_mse_loss": 1.2677711858424043, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5503314502203642, |
| "flow/improvement_ratio": 0.892241062640127, |
| "flow/mag_ratio_mean": 0.5635909434320576, |
| "flow/mag_ratio_std": 0.27180960243825975, |
| "step": 9216 |
| }, |
| { |
| "epoch": 0.3971814596935807, |
| "eval_bleu": 0.9351012282354647, |
| "eval_cos_loss": 0.44966854437836196, |
| "eval_dec_loss": 0.12245929105155694, |
| "eval_loss": 1.4224536047815515, |
| "eval_mse2_loss": 0.15468242016237682, |
| "eval_mse_loss": 1.2677711858424043, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 149.98, |
| "eval_samples_per_second": 200.027, |
| "eval_steps_per_second": 3.127, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5503314502203642, |
| "flow/improvement_ratio": 0.892241062640127, |
| "flow/mag_ratio_mean": 0.5635909434320576, |
| "flow/mag_ratio_std": 0.27180960243825975, |
| "step": 9216 |
| }, |
| { |
| "epoch": 0.4082142780184024, |
| "grad_norm": 1.1067237854003906, |
| "learning_rate": 0.0006622429080391422, |
| "loss": 1.4252803325653076, |
| "step": 9472 |
| }, |
| { |
| "epoch": 0.4192470963432241, |
| "grad_norm": 0.9428814649581909, |
| "learning_rate": 0.0006453824467742515, |
| "loss": 1.4204916954040527, |
| "step": 9728 |
| }, |
| { |
| "epoch": 0.43027991466804577, |
| "grad_norm": 0.9498366713523865, |
| "learning_rate": 0.0006283393855659275, |
| "loss": 1.421370506286621, |
| "step": 9984 |
| }, |
| { |
| "epoch": 0.44131273299286744, |
| "grad_norm": 1.0918892621994019, |
| "learning_rate": 0.0006111351304510173, |
| "loss": 1.4164854288101196, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.44131273299286744, |
| "eval_bleu": 0.9359500949598805, |
| "eval_cos_loss": 0.4479471450802614, |
| "eval_dec_loss": 0.11968307011623754, |
| "eval_loss": 1.4188302267334862, |
| "eval_mse2_loss": 0.1544050017653752, |
| "eval_mse_loss": 1.2644252238243119, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5520528566354374, |
| "flow/improvement_ratio": 0.8973728770386181, |
| "flow/mag_ratio_mean": 0.5616697732573633, |
| "flow/mag_ratio_std": 0.26565590021071406, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.44131273299286744, |
| "eval_bleu": 0.9359500949598805, |
| "eval_cos_loss": 0.4479471450802614, |
| "eval_dec_loss": 0.11968307011623754, |
| "eval_loss": 1.4188302267334862, |
| "eval_mse2_loss": 0.1544050017653752, |
| "eval_mse_loss": 1.2644252238243119, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 154.1693, |
| "eval_samples_per_second": 194.591, |
| "eval_steps_per_second": 3.042, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5520528566354374, |
| "flow/improvement_ratio": 0.8973728770386181, |
| "flow/mag_ratio_mean": 0.5616697732573633, |
| "flow/mag_ratio_std": 0.26565590021071406, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.45234555131768917, |
| "grad_norm": 1.4444184303283691, |
| "learning_rate": 0.0005937912899254605, |
| "loss": 1.4161497354507446, |
| "step": 10496 |
| }, |
| { |
| "epoch": 0.46337836964251083, |
| "grad_norm": 1.1204206943511963, |
| "learning_rate": 0.0005763296478040787, |
| "loss": 1.4160338640213013, |
| "step": 10752 |
| }, |
| { |
| "epoch": 0.4744111879673325, |
| "grad_norm": 1.2254257202148438, |
| "learning_rate": 0.0005587721358601663, |
| "loss": 1.4160186052322388, |
| "step": 11008 |
| }, |
| { |
| "epoch": 0.4854440062921542, |
| "grad_norm": 1.4265046119689941, |
| "learning_rate": 0.0005411408062792448, |
| "loss": 1.4131176471710205, |
| "step": 11264 |
| }, |
| { |
| "epoch": 0.4854440062921542, |
| "eval_bleu": 0.9355559828150384, |
| "eval_cos_loss": 0.4478943133150845, |
| "eval_dec_loss": 0.1184240544337962, |
| "eval_loss": 1.416160212396813, |
| "eval_mse2_loss": 0.15195046149210126, |
| "eval_mse_loss": 1.2642097521438274, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5521056849056724, |
| "flow/improvement_ratio": 0.8935815333557535, |
| "flow/mag_ratio_mean": 0.5588731608156965, |
| "flow/mag_ratio_std": 0.2649187071364063, |
| "step": 11264 |
| }, |
| { |
| "epoch": 0.4854440062921542, |
| "eval_bleu": 0.9355559828150384, |
| "eval_cos_loss": 0.4478943133150845, |
| "eval_dec_loss": 0.1184240544337962, |
| "eval_loss": 1.416160212396813, |
| "eval_mse2_loss": 0.15195046149210126, |
| "eval_mse_loss": 1.2642097521438274, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 153.2221, |
| "eval_samples_per_second": 195.794, |
| "eval_steps_per_second": 3.061, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5521056849056724, |
| "flow/improvement_ratio": 0.8935815333557535, |
| "flow/mag_ratio_mean": 0.5588731608156965, |
| "flow/mag_ratio_std": 0.2649187071364063, |
| "step": 11264 |
| }, |
| { |
| "epoch": 0.4964768246169759, |
| "grad_norm": 1.052992582321167, |
| "learning_rate": 0.0005234578039615789, |
| "loss": 1.4113584756851196, |
| "step": 11520 |
| }, |
| { |
| "epoch": 0.5075096429417976, |
| "grad_norm": 1.5359535217285156, |
| "learning_rate": 0.0005057453387082458, |
| "loss": 1.410513997077942, |
| "step": 11776 |
| }, |
| { |
| "epoch": 0.5185424612666193, |
| "grad_norm": 1.2274341583251953, |
| "learning_rate": 0.0004880256573256866, |
| "loss": 1.4113372564315796, |
| "step": 12032 |
| }, |
| { |
| "epoch": 0.5295752795914409, |
| "grad_norm": 0.7585653066635132, |
| "learning_rate": 0.0004703210156837805, |
| "loss": 1.4105942249298096, |
| "step": 12288 |
| }, |
| { |
| "epoch": 0.5295752795914409, |
| "eval_bleu": 0.9365001737473854, |
| "eval_cos_loss": 0.4468586768295719, |
| "eval_dec_loss": 0.11578587821519959, |
| "eval_loss": 1.4128359007174527, |
| "eval_mse2_loss": 0.1518495731802383, |
| "eval_mse_loss": 1.2609863276166449, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5531413205651078, |
| "flow/improvement_ratio": 0.8957801499346426, |
| "flow/mag_ratio_mean": 0.558759307556315, |
| "flow/mag_ratio_std": 0.2658895003770206, |
| "step": 12288 |
| }, |
| { |
| "epoch": 0.5295752795914409, |
| "eval_bleu": 0.9365001737473854, |
| "eval_cos_loss": 0.4468586768295719, |
| "eval_dec_loss": 0.11578587821519959, |
| "eval_loss": 1.4128359007174527, |
| "eval_mse2_loss": 0.1518495731802383, |
| "eval_mse_loss": 1.2609863276166449, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 152.5586, |
| "eval_samples_per_second": 196.646, |
| "eval_steps_per_second": 3.074, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5531413205651078, |
| "flow/improvement_ratio": 0.8957801499346426, |
| "flow/mag_ratio_mean": 0.558759307556315, |
| "flow/mag_ratio_std": 0.2658895003770206, |
| "step": 12288 |
| }, |
| { |
| "epoch": 0.5406080979162626, |
| "grad_norm": 0.7903220653533936, |
| "learning_rate": 0.0004526536507625343, |
| "loss": 1.4081207513809204, |
| "step": 12544 |
| }, |
| { |
| "epoch": 0.5516409162410844, |
| "grad_norm": 1.299485445022583, |
| "learning_rate": 0.00043504575272249973, |
| "loss": 1.4094181060791016, |
| "step": 12800 |
| }, |
| { |
| "epoch": 0.562673734565906, |
| "grad_norm": 1.1680424213409424, |
| "learning_rate": 0.0004175194370339921, |
| "loss": 1.4080712795257568, |
| "step": 13056 |
| }, |
| { |
| "epoch": 0.5737065528907277, |
| "grad_norm": 1.1761500835418701, |
| "learning_rate": 0.0004000967167001243, |
| "loss": 1.406380534172058, |
| "step": 13312 |
| }, |
| { |
| "epoch": 0.5737065528907277, |
| "eval_bleu": 0.9375331490996615, |
| "eval_cos_loss": 0.4439614661721024, |
| "eval_dec_loss": 0.11194793060858811, |
| "eval_loss": 1.4055488673862873, |
| "eval_mse2_loss": 0.1498908800547565, |
| "eval_mse_loss": 1.2556579890790016, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5560385347175192, |
| "flow/improvement_ratio": 0.8952096234252458, |
| "flow/mag_ratio_mean": 0.5663808779930001, |
| "flow/mag_ratio_std": 0.2659331502627208, |
| "step": 13312 |
| }, |
| { |
| "epoch": 0.5737065528907277, |
| "eval_bleu": 0.9375331490996615, |
| "eval_cos_loss": 0.4439614661721024, |
| "eval_dec_loss": 0.11194793060858811, |
| "eval_loss": 1.4055488673862873, |
| "eval_mse2_loss": 0.1498908800547565, |
| "eval_mse_loss": 1.2556579890790016, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 151.6081, |
| "eval_samples_per_second": 197.879, |
| "eval_steps_per_second": 3.094, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5560385347175192, |
| "flow/improvement_ratio": 0.8952096234252458, |
| "flow/mag_ratio_mean": 0.5663808779930001, |
| "flow/mag_ratio_std": 0.2659331502627208, |
| "step": 13312 |
| }, |
| { |
| "epoch": 0.5847393712155494, |
| "grad_norm": 0.7058362364768982, |
| "learning_rate": 0.00038279947460853446, |
| "loss": 1.4039076566696167, |
| "step": 13568 |
| }, |
| { |
| "epoch": 0.595772189540371, |
| "grad_norm": 1.867743730545044, |
| "learning_rate": 0.00036564943604654345, |
| "loss": 1.4034653902053833, |
| "step": 13824 |
| }, |
| { |
| "epoch": 0.6068050078651928, |
| "grad_norm": 0.9271458983421326, |
| "learning_rate": 0.00034866814141425254, |
| "loss": 1.4046720266342163, |
| "step": 14080 |
| }, |
| { |
| "epoch": 0.6178378261900145, |
| "grad_norm": 0.8186226487159729, |
| "learning_rate": 0.0003318769191698637, |
| "loss": 1.4014854431152344, |
| "step": 14336 |
| }, |
| { |
| "epoch": 0.6178378261900145, |
| "eval_bleu": 0.9378049095613451, |
| "eval_cos_loss": 0.44325206020493496, |
| "eval_dec_loss": 0.11280616647454658, |
| "eval_loss": 1.402791700637671, |
| "eval_mse2_loss": 0.14996937808515168, |
| "eval_mse_loss": 1.2528223221235946, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5567479390325323, |
| "flow/improvement_ratio": 0.8963780609021055, |
| "flow/mag_ratio_mean": 0.5675655131909385, |
| "flow/mag_ratio_std": 0.2700714123274472, |
| "step": 14336 |
| }, |
| { |
| "epoch": 0.6178378261900145, |
| "eval_bleu": 0.9378049095613451, |
| "eval_cos_loss": 0.44325206020493496, |
| "eval_dec_loss": 0.11280616647454658, |
| "eval_loss": 1.402791700637671, |
| "eval_mse2_loss": 0.14996937808515168, |
| "eval_mse_loss": 1.2528223221235946, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 152.2222, |
| "eval_samples_per_second": 197.08, |
| "eval_steps_per_second": 3.081, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5567479390325323, |
| "flow/improvement_ratio": 0.8963780609021055, |
| "flow/mag_ratio_mean": 0.5675655131909385, |
| "flow/mag_ratio_std": 0.2700714123274472, |
| "step": 14336 |
| }, |
| { |
| "epoch": 0.6288706445148361, |
| "grad_norm": 1.4290482997894287, |
| "learning_rate": 0.00031529685904119485, |
| "loss": 1.4003877639770508, |
| "step": 14592 |
| }, |
| { |
| "epoch": 0.6399034628396578, |
| "grad_norm": 0.92575603723526, |
| "learning_rate": 0.0002989487855370421, |
| "loss": 1.4029277563095093, |
| "step": 14848 |
| }, |
| { |
| "epoch": 0.6509362811644795, |
| "grad_norm": 0.9941800236701965, |
| "learning_rate": 0.00028285323179165424, |
| "loss": 1.401476263999939, |
| "step": 15104 |
| }, |
| { |
| "epoch": 0.6619690994893012, |
| "grad_norm": 0.5996092557907104, |
| "learning_rate": 0.0002670304137751759, |
| "loss": 1.40423583984375, |
| "step": 15360 |
| }, |
| { |
| "epoch": 0.6619690994893012, |
| "eval_bleu": 0.9359784532013938, |
| "eval_cos_loss": 0.44205584766259837, |
| "eval_dec_loss": 0.12092216288460407, |
| "eval_loss": 1.4025819934507422, |
| "eval_mse2_loss": 0.15172467312451873, |
| "eval_mse_loss": 1.2508573216924281, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5579441513206913, |
| "flow/improvement_ratio": 0.893738461964166, |
| "flow/mag_ratio_mean": 0.5637592339058166, |
| "flow/mag_ratio_std": 0.2644218521903573, |
| "step": 15360 |
| }, |
| { |
| "epoch": 0.6619690994893012, |
| "eval_bleu": 0.9359784532013938, |
| "eval_cos_loss": 0.44205584766259837, |
| "eval_dec_loss": 0.12092216288460407, |
| "eval_loss": 1.4025819934507422, |
| "eval_mse2_loss": 0.15172467312451873, |
| "eval_mse_loss": 1.2508573216924281, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 150.8251, |
| "eval_samples_per_second": 198.906, |
| "eval_steps_per_second": 3.11, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5579441513206913, |
| "flow/improvement_ratio": 0.893738461964166, |
| "flow/mag_ratio_mean": 0.5637592339058166, |
| "flow/mag_ratio_std": 0.2644218521903573, |
| "step": 15360 |
| }, |
| { |
| "epoch": 0.6730019178141229, |
| "grad_norm": 1.2950412034988403, |
| "learning_rate": 0.0002515002049024435, |
| "loss": 1.4007450342178345, |
| "step": 15616 |
| }, |
| { |
| "epoch": 0.6840347361389446, |
| "grad_norm": 0.9485428333282471, |
| "learning_rate": 0.00023628211107203429, |
| "loss": 1.4006445407867432, |
| "step": 15872 |
| }, |
| { |
| "epoch": 0.6950675544637662, |
| "grad_norm": 0.7694559693336487, |
| "learning_rate": 0.00022139524616691188, |
| "loss": 1.3995271921157837, |
| "step": 16128 |
| }, |
| { |
| "epoch": 0.706100372788588, |
| "grad_norm": 1.0373378992080688, |
| "learning_rate": 0.000206858308047443, |
| "loss": 1.396484136581421, |
| "step": 16384 |
| }, |
| { |
| "epoch": 0.706100372788588, |
| "eval_bleu": 0.9382976118785468, |
| "eval_cos_loss": 0.4408198703072473, |
| "eval_dec_loss": 0.11163298516814261, |
| "eval_loss": 1.3957897457740962, |
| "eval_mse2_loss": 0.14787080404219596, |
| "eval_mse_loss": 1.247918940556329, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5591801271509769, |
| "flow/improvement_ratio": 0.8951915981672974, |
| "flow/mag_ratio_mean": 0.5686852338471646, |
| "flow/mag_ratio_std": 0.26723379195371927, |
| "step": 16384 |
| }, |
| { |
| "epoch": 0.706100372788588, |
| "eval_bleu": 0.9382976118785468, |
| "eval_cos_loss": 0.4408198703072473, |
| "eval_dec_loss": 0.11163298516814261, |
| "eval_loss": 1.3957897457740962, |
| "eval_mse2_loss": 0.14787080404219596, |
| "eval_mse_loss": 1.247918940556329, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 149.6268, |
| "eval_samples_per_second": 200.499, |
| "eval_steps_per_second": 3.134, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5591801271509769, |
| "flow/improvement_ratio": 0.8951915981672974, |
| "flow/mag_ratio_mean": 0.5686852338471646, |
| "flow/mag_ratio_std": 0.26723379195371927, |
| "step": 16384 |
| }, |
| { |
| "epoch": 0.7171331911134096, |
| "grad_norm": 0.7638829946517944, |
| "learning_rate": 0.00019268955506693798, |
| "loss": 1.3981057405471802, |
| "step": 16640 |
| }, |
| { |
| "epoch": 0.7281660094382313, |
| "grad_norm": 1.2486227750778198, |
| "learning_rate": 0.00017890678313921, |
| "loss": 1.396310806274414, |
| "step": 16896 |
| }, |
| { |
| "epoch": 0.739198827763053, |
| "grad_norm": 1.3649706840515137, |
| "learning_rate": 0.00016552730338695792, |
| "loss": 1.395443320274353, |
| "step": 17152 |
| }, |
| { |
| "epoch": 0.7502316460878746, |
| "grad_norm": 0.7208181023597717, |
| "learning_rate": 0.00015256792039904465, |
| "loss": 1.3944549560546875, |
| "step": 17408 |
| }, |
| { |
| "epoch": 0.7502316460878746, |
| "eval_bleu": 0.9394802954217433, |
| "eval_cos_loss": 0.43953821437953633, |
| "eval_dec_loss": 0.11125741161501357, |
| "eval_loss": 1.390636357925594, |
| "eval_mse2_loss": 0.14740569860950462, |
| "eval_mse_loss": 1.243230658299379, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5604617825703326, |
| "flow/improvement_ratio": 0.8963484140093139, |
| "flow/mag_ratio_mean": 0.5701523471488628, |
| "flow/mag_ratio_std": 0.2696803689701979, |
| "step": 17408 |
| }, |
| { |
| "epoch": 0.7502316460878746, |
| "eval_bleu": 0.9394802954217433, |
| "eval_cos_loss": 0.43953821437953633, |
| "eval_dec_loss": 0.11125741161501357, |
| "eval_loss": 1.390636357925594, |
| "eval_mse2_loss": 0.14740569860950462, |
| "eval_mse_loss": 1.243230658299379, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 150.0541, |
| "eval_samples_per_second": 199.928, |
| "eval_steps_per_second": 3.126, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5604617825703326, |
| "flow/improvement_ratio": 0.8963484140093139, |
| "flow/mag_ratio_mean": 0.5701523471488628, |
| "flow/mag_ratio_std": 0.2696803689701979, |
| "step": 17408 |
| }, |
| { |
| "epoch": 0.7612644644126964, |
| "grad_norm": 0.7441651821136475, |
| "learning_rate": 0.00014004491112398103, |
| "loss": 1.3953999280929565, |
| "step": 17664 |
| }, |
| { |
| "epoch": 0.7722972827375181, |
| "grad_norm": 0.6111723780632019, |
| "learning_rate": 0.00012797400442612433, |
| "loss": 1.3906549215316772, |
| "step": 17920 |
| }, |
| { |
| "epoch": 0.7833301010623397, |
| "grad_norm": 0.7702776789665222, |
| "learning_rate": 0.00011637036133026895, |
| "loss": 1.393618106842041, |
| "step": 18176 |
| }, |
| { |
| "epoch": 0.7943629193871614, |
| "grad_norm": 1.200044870376587, |
| "learning_rate": 0.00010524855597944216, |
| "loss": 1.3919320106506348, |
| "step": 18432 |
| }, |
| { |
| "epoch": 0.7943629193871614, |
| "eval_bleu": 0.9383138215986374, |
| "eval_cos_loss": 0.44080494201259574, |
| "eval_dec_loss": 0.11339724150253956, |
| "eval_loss": 1.3953827009526396, |
| "eval_mse2_loss": 0.14798823779008027, |
| "eval_mse_loss": 1.247394463400851, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5591950585593038, |
| "flow/improvement_ratio": 0.8969450570118707, |
| "flow/mag_ratio_mean": 0.5669063877449361, |
| "flow/mag_ratio_std": 0.26610757334273, |
| "step": 18432 |
| }, |
| { |
| "epoch": 0.7943629193871614, |
| "eval_bleu": 0.9383138215986374, |
| "eval_cos_loss": 0.44080494201259574, |
| "eval_dec_loss": 0.11339724150253956, |
| "eval_loss": 1.3953827009526396, |
| "eval_mse2_loss": 0.14798823779008027, |
| "eval_mse_loss": 1.247394463400851, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 148.9298, |
| "eval_samples_per_second": 201.437, |
| "eval_steps_per_second": 3.149, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5591950585593038, |
| "flow/improvement_ratio": 0.8969450570118707, |
| "flow/mag_ratio_mean": 0.5669063877449361, |
| "flow/mag_ratio_std": 0.26610757334273, |
| "step": 18432 |
| }, |
| { |
| "epoch": 0.8053957377119831, |
| "grad_norm": 1.2096973657608032, |
| "learning_rate": 9.462255732982089e-05, |
| "loss": 1.3907443284988403, |
| "step": 18688 |
| }, |
| { |
| "epoch": 0.8164285560368048, |
| "grad_norm": 0.7669377326965332, |
| "learning_rate": 8.450571160576348e-05, |
| "loss": 1.3931423425674438, |
| "step": 18944 |
| }, |
| { |
| "epoch": 0.8274613743616265, |
| "grad_norm": 0.6018663644790649, |
| "learning_rate": 7.491072553698764e-05, |
| "loss": 1.3906762599945068, |
| "step": 19200 |
| }, |
| { |
| "epoch": 0.8384941926864482, |
| "grad_norm": 0.5031753182411194, |
| "learning_rate": 6.584965039895586e-05, |
| "loss": 1.387755274772644, |
| "step": 19456 |
| }, |
| { |
| "epoch": 0.8384941926864482, |
| "eval_bleu": 0.9374315797458206, |
| "eval_cos_loss": 0.4402599502474006, |
| "eval_dec_loss": 0.11256631666909593, |
| "eval_loss": 1.3931237858241554, |
| "eval_mse2_loss": 0.14820685115323137, |
| "eval_mse_loss": 1.2449169349568738, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5597400505151322, |
| "flow/improvement_ratio": 0.8949816590433182, |
| "flow/mag_ratio_mean": 0.5695700204448659, |
| "flow/mag_ratio_std": 0.2675396429895084, |
| "step": 19456 |
| }, |
| { |
| "epoch": 0.8384941926864482, |
| "eval_bleu": 0.9374315797458206, |
| "eval_cos_loss": 0.4402599502474006, |
| "eval_dec_loss": 0.11256631666909593, |
| "eval_loss": 1.3931237858241554, |
| "eval_mse2_loss": 0.14820685115323137, |
| "eval_mse_loss": 1.2449169349568738, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 151.1209, |
| "eval_samples_per_second": 198.517, |
| "eval_steps_per_second": 3.103, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5597400505151322, |
| "flow/improvement_ratio": 0.8949816590433182, |
| "flow/mag_ratio_mean": 0.5695700204448659, |
| "flow/mag_ratio_std": 0.2675396429895084, |
| "step": 19456 |
| }, |
| { |
| "epoch": 0.8495270110112698, |
| "grad_norm": 0.5668984055519104, |
| "learning_rate": 5.73338668765051e-05, |
| "loss": 1.3939695358276367, |
| "step": 19712 |
| }, |
| { |
| "epoch": 0.8605598293360915, |
| "grad_norm": 0.5667815208435059, |
| "learning_rate": 4.9374070769740984e-05, |
| "loss": 1.392187237739563, |
| "step": 19968 |
| }, |
| { |
| "epoch": 0.8715926476609133, |
| "grad_norm": 0.8259353637695312, |
| "learning_rate": 4.198025956014095e-05, |
| "loss": 1.3923636674880981, |
| "step": 20224 |
| }, |
| { |
| "epoch": 0.8826254659857349, |
| "grad_norm": 1.0611343383789062, |
| "learning_rate": 3.516171985374755e-05, |
| "loss": 1.3925312757492065, |
| "step": 20480 |
| }, |
| { |
| "epoch": 0.8826254659857349, |
| "eval_bleu": 0.9374427943864152, |
| "eval_cos_loss": 0.44028231228338377, |
| "eval_dec_loss": 0.11508091816158374, |
| "eval_loss": 1.3936327850894887, |
| "eval_mse2_loss": 0.14858852614408363, |
| "eval_mse_loss": 1.2450442581034418, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5597176894958593, |
| "flow/improvement_ratio": 0.8979350520349515, |
| "flow/mag_ratio_mean": 0.5660728585999658, |
| "flow/mag_ratio_std": 0.2672899484253133, |
| "step": 20480 |
| }, |
| { |
| "epoch": 0.8826254659857349, |
| "eval_bleu": 0.9374427943864152, |
| "eval_cos_loss": 0.44028231228338377, |
| "eval_dec_loss": 0.11508091816158374, |
| "eval_loss": 1.3936327850894887, |
| "eval_mse2_loss": 0.14858852614408363, |
| "eval_mse_loss": 1.2450442581034418, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 153.7462, |
| "eval_samples_per_second": 195.127, |
| "eval_steps_per_second": 3.05, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5597176894958593, |
| "flow/improvement_ratio": 0.8979350520349515, |
| "flow/mag_ratio_mean": 0.5660728585999658, |
| "flow/mag_ratio_std": 0.2672899484253133, |
| "step": 20480 |
| }, |
| { |
| "epoch": 0.8936582843105566, |
| "grad_norm": 0.3865830898284912, |
| "learning_rate": 2.8927015717215733e-05, |
| "loss": 1.3925455808639526, |
| "step": 20736 |
| }, |
| { |
| "epoch": 0.9046911026353783, |
| "grad_norm": 0.5185984373092651, |
| "learning_rate": 2.3283977921370547e-05, |
| "loss": 1.391736388206482, |
| "step": 20992 |
| }, |
| { |
| "epoch": 0.9157239209601999, |
| "grad_norm": 0.5432521104812622, |
| "learning_rate": 1.8239694105780413e-05, |
| "loss": 1.3918673992156982, |
| "step": 21248 |
| }, |
| { |
| "epoch": 0.9267567392850217, |
| "grad_norm": 0.3220946788787842, |
| "learning_rate": 1.3800499876701955e-05, |
| "loss": 1.3899786472320557, |
| "step": 21504 |
| }, |
| { |
| "epoch": 0.9267567392850217, |
| "eval_bleu": 0.9376202589977451, |
| "eval_cos_loss": 0.4390181550211998, |
| "eval_dec_loss": 0.11783787643691815, |
| "eval_loss": 1.3900849003273288, |
| "eval_mse2_loss": 0.14892718683618472, |
| "eval_mse_loss": 1.2411577106793044, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5609818455506997, |
| "flow/improvement_ratio": 0.8963389490713188, |
| "flow/mag_ratio_mean": 0.5705793747769744, |
| "flow/mag_ratio_std": 0.27008461628133046, |
| "step": 21504 |
| }, |
| { |
| "epoch": 0.9267567392850217, |
| "eval_bleu": 0.9376202589977451, |
| "eval_cos_loss": 0.4390181550211998, |
| "eval_dec_loss": 0.11783787643691815, |
| "eval_loss": 1.3900849003273288, |
| "eval_mse2_loss": 0.14892718683618472, |
| "eval_mse_loss": 1.2411577106793044, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 150.1598, |
| "eval_samples_per_second": 199.787, |
| "eval_steps_per_second": 3.123, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5609818455506997, |
| "flow/improvement_ratio": 0.8963389490713188, |
| "flow/mag_ratio_mean": 0.5705793747769744, |
| "flow/mag_ratio_std": 0.27008461628133046, |
| "step": 21504 |
| } |
| ], |
| "logging_steps": 256, |
| "max_steps": 23204, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 1024, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|