| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.8826254659857349, |
| "eval_steps": 1024, |
| "global_step": 20480, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.011032818324821687, |
| "grad_norm": 10.797319412231445, |
| "learning_rate": 0.000498046875, |
| "loss": 105.79659271240234, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.022065636649643373, |
| "grad_norm": 25.741575241088867, |
| "learning_rate": 0.000998046875, |
| "loss": 2.3060808181762695, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.03309845497446506, |
| "grad_norm": 14.934273719787598, |
| "learning_rate": 0.000999688448778502, |
| "loss": 2.2731690406799316, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.04413127329928675, |
| "grad_norm": 28.308002471923828, |
| "learning_rate": 0.0009987492950653055, |
| "loss": 2.175210475921631, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.04413127329928675, |
| "eval_bleu": 0.9214307463191996, |
| "eval_cos_loss": 0.7069334270857545, |
| "eval_dec_loss": 0.14960542684599662, |
| "eval_loss": 2.0735362056475966, |
| "eval_mse2_loss": 0.2511829924164042, |
| "eval_mse_loss": 1.8223532110389107, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.29306657075373604, |
| "flow/improvement_ratio": 0.7837164053784759, |
| "flow/mag_ratio_mean": 0.434395147856873, |
| "flow/mag_ratio_std": 0.1415387201728597, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.04413127329928675, |
| "eval_bleu": 0.9214307463191996, |
| "eval_cos_loss": 0.7069334270857545, |
| "eval_dec_loss": 0.14960542684599662, |
| "eval_loss": 2.0735362056475966, |
| "eval_mse2_loss": 0.2511829924164042, |
| "eval_mse_loss": 1.8223532110389107, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 171.0537, |
| "eval_samples_per_second": 175.384, |
| "eval_steps_per_second": 2.742, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.29306657075373604, |
| "flow/improvement_ratio": 0.7837164053784759, |
| "flow/mag_ratio_mean": 0.434395147856873, |
| "flow/mag_ratio_std": 0.1415387201728597, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.05516409162410843, |
| "grad_norm": 20.717588424682617, |
| "learning_rate": 0.0009971837136430763, |
| "loss": 1.9890590906143188, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.06619690994893011, |
| "grad_norm": 16.1776123046875, |
| "learning_rate": 0.0009949936708776692, |
| "loss": 1.8445225954055786, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.07722972827375181, |
| "grad_norm": 15.698042869567871, |
| "learning_rate": 0.0009921819174566252, |
| "loss": 1.7741947174072266, |
| "step": 1792 |
| }, |
| { |
| "epoch": 0.0882625465985735, |
| "grad_norm": 12.729333877563477, |
| "learning_rate": 0.000988751984934317, |
| "loss": 1.714084267616272, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.0882625465985735, |
| "eval_bleu": 0.9356201248298646, |
| "eval_cos_loss": 0.5489801322854658, |
| "eval_dec_loss": 0.11870824870889757, |
| "eval_loss": 1.7085340877077473, |
| "eval_mse2_loss": 0.1958959426865903, |
| "eval_mse_loss": 1.5126381456724871, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.4510198644102255, |
| "flow/improvement_ratio": 0.8669772742907884, |
| "flow/mag_ratio_mean": 0.5195024066261137, |
| "flow/mag_ratio_std": 0.21416059253947822, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.0882625465985735, |
| "eval_bleu": 0.9356201248298646, |
| "eval_cos_loss": 0.5489801322854658, |
| "eval_dec_loss": 0.11870824870889757, |
| "eval_loss": 1.7085340877077473, |
| "eval_mse2_loss": 0.1958959426865903, |
| "eval_mse_loss": 1.5126381456724871, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 172.0919, |
| "eval_samples_per_second": 174.325, |
| "eval_steps_per_second": 2.725, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.4510198644102255, |
| "flow/improvement_ratio": 0.8669772742907884, |
| "flow/mag_ratio_mean": 0.5195024066261137, |
| "flow/mag_ratio_std": 0.21416059253947822, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.09929536492339518, |
| "grad_norm": 9.853597640991211, |
| "learning_rate": 0.0009847081812963268, |
| "loss": 1.6746653318405151, |
| "step": 2304 |
| }, |
| { |
| "epoch": 0.11032818324821686, |
| "grad_norm": 7.131998062133789, |
| "learning_rate": 0.0009800555855486275, |
| "loss": 1.646998405456543, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.12136100157303854, |
| "grad_norm": 7.610518932342529, |
| "learning_rate": 0.0009748000413383664, |
| "loss": 1.62706458568573, |
| "step": 2816 |
| }, |
| { |
| "epoch": 0.13239381989786023, |
| "grad_norm": 5.741150856018066, |
| "learning_rate": 0.0009689481496142604, |
| "loss": 1.6093950271606445, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.13239381989786023, |
| "eval_bleu": 0.9338383707066459, |
| "eval_cos_loss": 0.5054820228868456, |
| "eval_dec_loss": 0.12964063795851366, |
| "eval_loss": 1.5915681512625233, |
| "eval_mse2_loss": 0.1819949251438763, |
| "eval_mse_loss": 1.4095732276119404, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.4945179769860656, |
| "flow/improvement_ratio": 0.8849164608444995, |
| "flow/mag_ratio_mean": 0.5184873009541395, |
| "flow/mag_ratio_std": 0.229987337486322, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.13239381989786023, |
| "eval_bleu": 0.9338383707066459, |
| "eval_cos_loss": 0.5054820228868456, |
| "eval_dec_loss": 0.12964063795851366, |
| "eval_loss": 1.5915681512625233, |
| "eval_mse2_loss": 0.1819949251438763, |
| "eval_mse_loss": 1.4095732276119404, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 176.4575, |
| "eval_samples_per_second": 170.013, |
| "eval_steps_per_second": 2.658, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.4945179769860656, |
| "flow/improvement_ratio": 0.8849164608444995, |
| "flow/mag_ratio_mean": 0.5184873009541395, |
| "flow/mag_ratio_std": 0.229987337486322, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.14342663822268192, |
| "grad_norm": 5.7491865158081055, |
| "learning_rate": 0.0009625072603358231, |
| "loss": 1.5970690250396729, |
| "step": 3328 |
| }, |
| { |
| "epoch": 0.15445945654750362, |
| "grad_norm": 6.157455921173096, |
| "learning_rate": 0.0009554854632418371, |
| "loss": 1.5825296640396118, |
| "step": 3584 |
| }, |
| { |
| "epoch": 0.1654922748723253, |
| "grad_norm": 4.907317638397217, |
| "learning_rate": 0.000947891577689663, |
| "loss": 1.5728044509887695, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.176525093197147, |
| "grad_norm": 3.5090701580047607, |
| "learning_rate": 0.0009397351415781539, |
| "loss": 1.565865159034729, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.176525093197147, |
| "eval_bleu": 0.9365961616358232, |
| "eval_cos_loss": 0.49659409953841266, |
| "eval_dec_loss": 0.12140450885753705, |
| "eval_loss": 1.564894216655414, |
| "eval_mse2_loss": 0.17669188814249628, |
| "eval_mse_loss": 1.3882023289259562, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5034058995719658, |
| "flow/improvement_ratio": 0.8819394332767804, |
| "flow/mag_ratio_mean": 0.5346324385356293, |
| "flow/mag_ratio_std": 0.23142226367616958, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.176525093197147, |
| "eval_bleu": 0.9365961616358232, |
| "eval_cos_loss": 0.49659409953841266, |
| "eval_dec_loss": 0.12140450885753705, |
| "eval_loss": 1.564894216655414, |
| "eval_mse2_loss": 0.17669188814249628, |
| "eval_mse_loss": 1.3882023289259562, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 160.4591, |
| "eval_samples_per_second": 186.964, |
| "eval_steps_per_second": 2.923, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5034058995719658, |
| "flow/improvement_ratio": 0.8819394332767804, |
| "flow/mag_ratio_mean": 0.5346324385356293, |
| "flow/mag_ratio_std": 0.23142226367616958, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.18755791152196866, |
| "grad_norm": 4.376228332519531, |
| "learning_rate": 0.000931026399368079, |
| "loss": 1.5588833093643188, |
| "step": 4352 |
| }, |
| { |
| "epoch": 0.19859072984679035, |
| "grad_norm": 4.248619556427002, |
| "learning_rate": 0.0009217762892151117, |
| "loss": 1.5539987087249756, |
| "step": 4608 |
| }, |
| { |
| "epoch": 0.20962354817161205, |
| "grad_norm": 4.112342357635498, |
| "learning_rate": 0.0009119964292315354, |
| "loss": 1.547034502029419, |
| "step": 4864 |
| }, |
| { |
| "epoch": 0.22065636649643372, |
| "grad_norm": 4.957840919494629, |
| "learning_rate": 0.0009016991028939279, |
| "loss": 1.539028525352478, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.22065636649643372, |
| "eval_bleu": 0.937221428849788, |
| "eval_cos_loss": 0.48556288504905537, |
| "eval_dec_loss": 0.11899168453395748, |
| "eval_loss": 1.5318274358188166, |
| "eval_mse2_loss": 0.17039804826222504, |
| "eval_mse_loss": 1.3614293873183, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5144371166666434, |
| "flow/improvement_ratio": 0.8873141655789764, |
| "flow/mag_ratio_mean": 0.5353354320470204, |
| "flow/mag_ratio_std": 0.2365191124204888, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.22065636649643372, |
| "eval_bleu": 0.937221428849788, |
| "eval_cos_loss": 0.48556288504905537, |
| "eval_dec_loss": 0.11899168453395748, |
| "eval_loss": 1.5318274358188166, |
| "eval_mse2_loss": 0.17039804826222504, |
| "eval_mse_loss": 1.3614293873183, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 169.2874, |
| "eval_samples_per_second": 177.213, |
| "eval_steps_per_second": 2.77, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5144371166666434, |
| "flow/improvement_ratio": 0.8873141655789764, |
| "flow/mag_ratio_mean": 0.5353354320470204, |
| "flow/mag_ratio_std": 0.2365191124204888, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.23168918482125542, |
| "grad_norm": 3.4134156703948975, |
| "learning_rate": 0.0008908972436151494, |
| "loss": 1.532220721244812, |
| "step": 5376 |
| }, |
| { |
| "epoch": 0.2427220031460771, |
| "grad_norm": 3.1423563957214355, |
| "learning_rate": 0.0008796044185000127, |
| "loss": 1.525950312614441, |
| "step": 5632 |
| }, |
| { |
| "epoch": 0.2537548214708988, |
| "grad_norm": 2.3490381240844727, |
| "learning_rate": 0.0008678348113050368, |
| "loss": 1.5172430276870728, |
| "step": 5888 |
| }, |
| { |
| "epoch": 0.26478763979572045, |
| "grad_norm": 6.032445430755615, |
| "learning_rate": 0.0008556032046236897, |
| "loss": 1.5190675258636475, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.26478763979572045, |
| "eval_bleu": 0.9354061397916119, |
| "eval_cos_loss": 0.4837510161308337, |
| "eval_dec_loss": 0.1238921330338205, |
| "eval_loss": 1.5238950331328012, |
| "eval_mse2_loss": 0.17081616409043512, |
| "eval_mse_loss": 1.353078869360088, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5162489819628343, |
| "flow/improvement_ratio": 0.8831684786373618, |
| "flow/mag_ratio_mean": 0.5513551437905603, |
| "flow/mag_ratio_std": 0.24864270314097658, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.26478763979572045, |
| "eval_bleu": 0.9354061397916119, |
| "eval_cos_loss": 0.4837510161308337, |
| "eval_dec_loss": 0.1238921330338205, |
| "eval_loss": 1.5238950331328012, |
| "eval_mse2_loss": 0.17081616409043512, |
| "eval_mse_loss": 1.353078869360088, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 172.5467, |
| "eval_samples_per_second": 173.866, |
| "eval_steps_per_second": 2.718, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5162489819628343, |
| "flow/improvement_ratio": 0.8831684786373618, |
| "flow/mag_ratio_mean": 0.5513551437905603, |
| "flow/mag_ratio_std": 0.24864270314097658, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.2758204581205422, |
| "grad_norm": 4.0988078117370605, |
| "learning_rate": 0.000842924961319492, |
| "loss": 1.5116130113601685, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.28685327644536385, |
| "grad_norm": 3.4181602001190186, |
| "learning_rate": 0.0008298160052303045, |
| "loss": 1.506291389465332, |
| "step": 6656 |
| }, |
| { |
| "epoch": 0.2978860947701855, |
| "grad_norm": 3.047140598297119, |
| "learning_rate": 0.0008162928011680314, |
| "loss": 1.5031940937042236, |
| "step": 6912 |
| }, |
| { |
| "epoch": 0.30891891309500724, |
| "grad_norm": 3.206284761428833, |
| "learning_rate": 0.000802372334238864, |
| "loss": 1.5017902851104736, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.30891891309500724, |
| "eval_bleu": 0.9368851499593083, |
| "eval_cos_loss": 0.47378275395710584, |
| "eval_dec_loss": 0.11915739709888694, |
| "eval_loss": 1.4968281847073326, |
| "eval_mse2_loss": 0.1652413869876343, |
| "eval_mse_loss": 1.3315867993877386, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5262172466783381, |
| "flow/improvement_ratio": 0.8891915549347396, |
| "flow/mag_ratio_mean": 0.5413357251361489, |
| "flow/mag_ratio_std": 0.24727411558633167, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.30891891309500724, |
| "eval_bleu": 0.9368851499593083, |
| "eval_cos_loss": 0.47378275395710584, |
| "eval_dec_loss": 0.11915739709888694, |
| "eval_loss": 1.4968281847073326, |
| "eval_mse2_loss": 0.1652413869876343, |
| "eval_mse_loss": 1.3315867993877386, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 176.7103, |
| "eval_samples_per_second": 169.769, |
| "eval_steps_per_second": 2.654, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5262172466783381, |
| "flow/improvement_ratio": 0.8891915549347396, |
| "flow/mag_ratio_mean": 0.5413357251361489, |
| "flow/mag_ratio_std": 0.24727411558633167, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.3199517314198289, |
| "grad_norm": 3.409893274307251, |
| "learning_rate": 0.0007880720885100349, |
| "loss": 1.4971449375152588, |
| "step": 7424 |
| }, |
| { |
| "epoch": 0.3309845497446506, |
| "grad_norm": 3.073834180831909, |
| "learning_rate": 0.0007734100250498788, |
| "loss": 1.49185311794281, |
| "step": 7680 |
| }, |
| { |
| "epoch": 0.3420173680694723, |
| "grad_norm": 4.364046096801758, |
| "learning_rate": 0.000758404559368781, |
| "loss": 1.4903632402420044, |
| "step": 7936 |
| }, |
| { |
| "epoch": 0.353050186394294, |
| "grad_norm": 2.848015785217285, |
| "learning_rate": 0.0007430745382893488, |
| "loss": 1.4838106632232666, |
| "step": 8192 |
| }, |
| { |
| "epoch": 0.353050186394294, |
| "eval_bleu": 0.9353698110567827, |
| "eval_cos_loss": 0.4692605264913807, |
| "eval_dec_loss": 0.12152907604983113, |
| "eval_loss": 1.4847185436342316, |
| "eval_mse2_loss": 0.16447527768578865, |
| "eval_mse_loss": 1.3202432660914178, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5307394738898856, |
| "flow/improvement_ratio": 0.8924712222268079, |
| "flow/mag_ratio_mean": 0.5445197708825312, |
| "flow/mag_ratio_std": 0.24787098121668485, |
| "step": 8192 |
| }, |
| { |
| "epoch": 0.353050186394294, |
| "eval_bleu": 0.9353698110567827, |
| "eval_cos_loss": 0.4692605264913807, |
| "eval_dec_loss": 0.12152907604983113, |
| "eval_loss": 1.4847185436342316, |
| "eval_mse2_loss": 0.16447527768578865, |
| "eval_mse_loss": 1.3202432660914178, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 173.5643, |
| "eval_samples_per_second": 172.847, |
| "eval_steps_per_second": 2.702, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5307394738898856, |
| "flow/improvement_ratio": 0.8924712222268079, |
| "flow/mag_ratio_mean": 0.5445197708825312, |
| "flow/mag_ratio_std": 0.24787098121668485, |
| "step": 8192 |
| }, |
| { |
| "epoch": 0.36408300471911564, |
| "grad_norm": 2.5942811965942383, |
| "learning_rate": 0.0007274392162748551, |
| "loss": 1.4843038320541382, |
| "step": 8448 |
| }, |
| { |
| "epoch": 0.3751158230439373, |
| "grad_norm": 3.0958163738250732, |
| "learning_rate": 0.000711518231245687, |
| "loss": 1.4775118827819824, |
| "step": 8704 |
| }, |
| { |
| "epoch": 0.38614864136875904, |
| "grad_norm": 3.219830274581909, |
| "learning_rate": 0.0006953315799141723, |
| "loss": 1.4766325950622559, |
| "step": 8960 |
| }, |
| { |
| "epoch": 0.3971814596935807, |
| "grad_norm": 2.296929359436035, |
| "learning_rate": 0.0006788995926687669, |
| "loss": 1.4715560674667358, |
| "step": 9216 |
| }, |
| { |
| "epoch": 0.3971814596935807, |
| "eval_bleu": 0.9356008220185699, |
| "eval_cos_loss": 0.4650544371050812, |
| "eval_dec_loss": 0.1218992033397465, |
| "eval_loss": 1.4720673169662704, |
| "eval_mse2_loss": 0.1628535425167348, |
| "eval_mse_loss": 1.3092137732739642, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.534945561814664, |
| "flow/improvement_ratio": 0.8920619619934798, |
| "flow/mag_ratio_mean": 0.5498689174143745, |
| "flow/mag_ratio_std": 0.2565161931489322, |
| "step": 9216 |
| }, |
| { |
| "epoch": 0.3971814596935807, |
| "eval_bleu": 0.9356008220185699, |
| "eval_cos_loss": 0.4650544371050812, |
| "eval_dec_loss": 0.1218992033397465, |
| "eval_loss": 1.4720673169662704, |
| "eval_mse2_loss": 0.1628535425167348, |
| "eval_mse_loss": 1.3092137732739642, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 165.3408, |
| "eval_samples_per_second": 181.443, |
| "eval_steps_per_second": 2.837, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.534945561814664, |
| "flow/improvement_ratio": 0.8920619619934798, |
| "flow/mag_ratio_mean": 0.5498689174143745, |
| "flow/mag_ratio_std": 0.2565161931489322, |
| "step": 9216 |
| }, |
| { |
| "epoch": 0.4082142780184024, |
| "grad_norm": 2.3320987224578857, |
| "learning_rate": 0.0006622429080391422, |
| "loss": 1.4737770557403564, |
| "step": 9472 |
| }, |
| { |
| "epoch": 0.4192470963432241, |
| "grad_norm": 2.76210355758667, |
| "learning_rate": 0.0006453824467742515, |
| "loss": 1.467403531074524, |
| "step": 9728 |
| }, |
| { |
| "epoch": 0.43027991466804577, |
| "grad_norm": 2.2834606170654297, |
| "learning_rate": 0.0006283393855659275, |
| "loss": 1.466551661491394, |
| "step": 9984 |
| }, |
| { |
| "epoch": 0.44131273299286744, |
| "grad_norm": 2.1904284954071045, |
| "learning_rate": 0.0006111351304510173, |
| "loss": 1.46049165725708, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.44131273299286744, |
| "eval_bleu": 0.9362346289783122, |
| "eval_cos_loss": 0.46082611143716107, |
| "eval_dec_loss": 0.1203397757717287, |
| "eval_loss": 1.4601926084266288, |
| "eval_mse2_loss": 0.1609408200613217, |
| "eval_mse_loss": 1.2992517866813806, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5391738893253717, |
| "flow/improvement_ratio": 0.8964146429033422, |
| "flow/mag_ratio_mean": 0.5516184565863376, |
| "flow/mag_ratio_std": 0.25244328309732206, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.44131273299286744, |
| "eval_bleu": 0.9362346289783122, |
| "eval_cos_loss": 0.46082611143716107, |
| "eval_dec_loss": 0.1203397757717287, |
| "eval_loss": 1.4601926084266288, |
| "eval_mse2_loss": 0.1609408200613217, |
| "eval_mse_loss": 1.2992517866813806, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 179.4357, |
| "eval_samples_per_second": 167.191, |
| "eval_steps_per_second": 2.614, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5391738893253717, |
| "flow/improvement_ratio": 0.8964146429033422, |
| "flow/mag_ratio_mean": 0.5516184565863376, |
| "flow/mag_ratio_std": 0.25244328309732206, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.45234555131768917, |
| "grad_norm": 2.3216679096221924, |
| "learning_rate": 0.0005937912899254605, |
| "loss": 1.4583410024642944, |
| "step": 10496 |
| }, |
| { |
| "epoch": 0.46337836964251083, |
| "grad_norm": 2.843108892440796, |
| "learning_rate": 0.0005763296478040787, |
| "loss": 1.4574695825576782, |
| "step": 10752 |
| }, |
| { |
| "epoch": 0.4744111879673325, |
| "grad_norm": 2.320223808288574, |
| "learning_rate": 0.0005587721358601663, |
| "loss": 1.4572159051895142, |
| "step": 11008 |
| }, |
| { |
| "epoch": 0.4854440062921542, |
| "grad_norm": 2.517941474914551, |
| "learning_rate": 0.0005411408062792448, |
| "loss": 1.4533445835113525, |
| "step": 11264 |
| }, |
| { |
| "epoch": 0.4854440062921542, |
| "eval_bleu": 0.9349888888103645, |
| "eval_cos_loss": 0.4606767813407028, |
| "eval_dec_loss": 0.12063868982848455, |
| "eval_loss": 1.4557419222301002, |
| "eval_mse2_loss": 0.15818865597248077, |
| "eval_mse_loss": 1.2975532645101486, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5393232155456218, |
| "flow/improvement_ratio": 0.892127673127758, |
| "flow/mag_ratio_mean": 0.550250798717999, |
| "flow/mag_ratio_std": 0.2556351939879501, |
| "step": 11264 |
| }, |
| { |
| "epoch": 0.4854440062921542, |
| "eval_bleu": 0.9349888888103645, |
| "eval_cos_loss": 0.4606767813407028, |
| "eval_dec_loss": 0.12063868982848455, |
| "eval_loss": 1.4557419222301002, |
| "eval_mse2_loss": 0.15818865597248077, |
| "eval_mse_loss": 1.2975532645101486, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 169.0185, |
| "eval_samples_per_second": 177.495, |
| "eval_steps_per_second": 2.775, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5393232155456218, |
| "flow/improvement_ratio": 0.892127673127758, |
| "flow/mag_ratio_mean": 0.550250798717999, |
| "flow/mag_ratio_std": 0.2556351939879501, |
| "step": 11264 |
| }, |
| { |
| "epoch": 0.4964768246169759, |
| "grad_norm": 2.285221815109253, |
| "learning_rate": 0.0005234578039615789, |
| "loss": 1.4499876499176025, |
| "step": 11520 |
| }, |
| { |
| "epoch": 0.5075096429417976, |
| "grad_norm": 2.029639482498169, |
| "learning_rate": 0.0005057453387082458, |
| "loss": 1.4481278657913208, |
| "step": 11776 |
| }, |
| { |
| "epoch": 0.5185424612666193, |
| "grad_norm": 2.0235965251922607, |
| "learning_rate": 0.0004880256573256866, |
| "loss": 1.4484679698944092, |
| "step": 12032 |
| }, |
| { |
| "epoch": 0.5295752795914409, |
| "grad_norm": 1.6718403100967407, |
| "learning_rate": 0.0004703210156837805, |
| "loss": 1.447327733039856, |
| "step": 12288 |
| }, |
| { |
| "epoch": 0.5295752795914409, |
| "eval_bleu": 0.9364176777903095, |
| "eval_cos_loss": 0.4588198344082212, |
| "eval_dec_loss": 0.11679136855746193, |
| "eval_loss": 1.4490999984842883, |
| "eval_mse2_loss": 0.15755183694522773, |
| "eval_mse_loss": 1.2915481588225375, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5411801628593697, |
| "flow/improvement_ratio": 0.8935188584998727, |
| "flow/mag_ratio_mean": 0.5548275625273618, |
| "flow/mag_ratio_std": 0.25878340491989277, |
| "step": 12288 |
| }, |
| { |
| "epoch": 0.5295752795914409, |
| "eval_bleu": 0.9364176777903095, |
| "eval_cos_loss": 0.4588198344082212, |
| "eval_dec_loss": 0.11679136855746193, |
| "eval_loss": 1.4490999984842883, |
| "eval_mse2_loss": 0.15755183694522773, |
| "eval_mse_loss": 1.2915481588225375, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 167.4569, |
| "eval_samples_per_second": 179.151, |
| "eval_steps_per_second": 2.801, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5411801628593697, |
| "flow/improvement_ratio": 0.8935188584998727, |
| "flow/mag_ratio_mean": 0.5548275625273618, |
| "flow/mag_ratio_std": 0.25878340491989277, |
| "step": 12288 |
| }, |
| { |
| "epoch": 0.5406080979162626, |
| "grad_norm": 1.9634923934936523, |
| "learning_rate": 0.0004526536507625343, |
| "loss": 1.443847894668579, |
| "step": 12544 |
| }, |
| { |
| "epoch": 0.5516409162410844, |
| "grad_norm": 2.0251376628875732, |
| "learning_rate": 0.00043504575272249973, |
| "loss": 1.4448539018630981, |
| "step": 12800 |
| }, |
| { |
| "epoch": 0.562673734565906, |
| "grad_norm": 1.487449049949646, |
| "learning_rate": 0.0004175194370339921, |
| "loss": 1.44254732131958, |
| "step": 13056 |
| }, |
| { |
| "epoch": 0.5737065528907277, |
| "grad_norm": 2.1053073406219482, |
| "learning_rate": 0.0004000967167001243, |
| "loss": 1.4406143426895142, |
| "step": 13312 |
| }, |
| { |
| "epoch": 0.5737065528907277, |
| "eval_bleu": 0.9372209976548388, |
| "eval_cos_loss": 0.45495398104317913, |
| "eval_dec_loss": 0.11334308082345071, |
| "eval_loss": 1.438905306970641, |
| "eval_mse2_loss": 0.15505272262830977, |
| "eval_mse_loss": 1.2838525835639123, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5450460188932764, |
| "flow/improvement_ratio": 0.8936982345479384, |
| "flow/mag_ratio_mean": 0.5579607989996481, |
| "flow/mag_ratio_std": 0.2575365855876825, |
| "step": 13312 |
| }, |
| { |
| "epoch": 0.5737065528907277, |
| "eval_bleu": 0.9372209976548388, |
| "eval_cos_loss": 0.45495398104317913, |
| "eval_dec_loss": 0.11334308082345071, |
| "eval_loss": 1.438905306970641, |
| "eval_mse2_loss": 0.15505272262830977, |
| "eval_mse_loss": 1.2838525835639123, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 177.5771, |
| "eval_samples_per_second": 168.941, |
| "eval_steps_per_second": 2.641, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5450460188932764, |
| "flow/improvement_ratio": 0.8936982345479384, |
| "flow/mag_ratio_mean": 0.5579607989996481, |
| "flow/mag_ratio_std": 0.2575365855876825, |
| "step": 13312 |
| }, |
| { |
| "epoch": 0.5847393712155494, |
| "grad_norm": 2.0249550342559814, |
| "learning_rate": 0.00038279947460853446, |
| "loss": 1.4377583265304565, |
| "step": 13568 |
| }, |
| { |
| "epoch": 0.595772189540371, |
| "grad_norm": 1.6271026134490967, |
| "learning_rate": 0.00036564943604654345, |
| "loss": 1.4369451999664307, |
| "step": 13824 |
| }, |
| { |
| "epoch": 0.6068050078651928, |
| "grad_norm": 1.2092267274856567, |
| "learning_rate": 0.00034866814141425254, |
| "loss": 1.437900424003601, |
| "step": 14080 |
| }, |
| { |
| "epoch": 0.6178378261900145, |
| "grad_norm": 1.7238409519195557, |
| "learning_rate": 0.0003318769191698637, |
| "loss": 1.4345003366470337, |
| "step": 14336 |
| }, |
| { |
| "epoch": 0.6178378261900145, |
| "eval_bleu": 0.9375344922384511, |
| "eval_cos_loss": 0.45383678259117516, |
| "eval_dec_loss": 0.11429015738961062, |
| "eval_loss": 1.4351127869538916, |
| "eval_mse2_loss": 0.15503559347345378, |
| "eval_mse_loss": 1.280077194608351, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5461632174723692, |
| "flow/improvement_ratio": 0.8951520059408664, |
| "flow/mag_ratio_mean": 0.5569842827599695, |
| "flow/mag_ratio_std": 0.26188866851299303, |
| "step": 14336 |
| }, |
| { |
| "epoch": 0.6178378261900145, |
| "eval_bleu": 0.9375344922384511, |
| "eval_cos_loss": 0.45383678259117516, |
| "eval_dec_loss": 0.11429015738961062, |
| "eval_loss": 1.4351127869538916, |
| "eval_mse2_loss": 0.15503559347345378, |
| "eval_mse_loss": 1.280077194608351, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 176.4023, |
| "eval_samples_per_second": 170.066, |
| "eval_steps_per_second": 2.659, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5461632174723692, |
| "flow/improvement_ratio": 0.8951520059408664, |
| "flow/mag_ratio_mean": 0.5569842827599695, |
| "flow/mag_ratio_std": 0.26188866851299303, |
| "step": 14336 |
| }, |
| { |
| "epoch": 0.6288706445148361, |
| "grad_norm": 1.5894383192062378, |
| "learning_rate": 0.00031529685904119485, |
| "loss": 1.4325922727584839, |
| "step": 14592 |
| }, |
| { |
| "epoch": 0.6399034628396578, |
| "grad_norm": 1.9905190467834473, |
| "learning_rate": 0.0002989487855370421, |
| "loss": 1.4348605871200562, |
| "step": 14848 |
| }, |
| { |
| "epoch": 0.6509362811644795, |
| "grad_norm": 1.5708714723587036, |
| "learning_rate": 0.00028285323179165424, |
| "loss": 1.4332606792449951, |
| "step": 15104 |
| }, |
| { |
| "epoch": 0.6619690994893012, |
| "grad_norm": 1.5457080602645874, |
| "learning_rate": 0.0002670304137751759, |
| "loss": 1.435610294342041, |
| "step": 15360 |
| }, |
| { |
| "epoch": 0.6619690994893012, |
| "eval_bleu": 0.9355922222002754, |
| "eval_cos_loss": 0.4520330929171556, |
| "eval_dec_loss": 0.12299507638332305, |
| "eval_loss": 1.4329845295277739, |
| "eval_mse2_loss": 0.15654502583465088, |
| "eval_mse_loss": 1.2764395058536326, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5479669079724659, |
| "flow/improvement_ratio": 0.8930472631190123, |
| "flow/mag_ratio_mean": 0.5571114101897933, |
| "flow/mag_ratio_std": 0.256343261551247, |
| "step": 15360 |
| }, |
| { |
| "epoch": 0.6619690994893012, |
| "eval_bleu": 0.9355922222002754, |
| "eval_cos_loss": 0.4520330929171556, |
| "eval_dec_loss": 0.12299507638332305, |
| "eval_loss": 1.4329845295277739, |
| "eval_mse2_loss": 0.15654502583465088, |
| "eval_mse_loss": 1.2764395058536326, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 174.468, |
| "eval_samples_per_second": 171.951, |
| "eval_steps_per_second": 2.688, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5479669079724659, |
| "flow/improvement_ratio": 0.8930472631190123, |
| "flow/mag_ratio_mean": 0.5571114101897933, |
| "flow/mag_ratio_std": 0.256343261551247, |
| "step": 15360 |
| }, |
| { |
| "epoch": 0.6730019178141229, |
| "grad_norm": 1.6477590799331665, |
| "learning_rate": 0.0002515002049024435, |
| "loss": 1.4318206310272217, |
| "step": 15616 |
| }, |
| { |
| "epoch": 0.6840347361389446, |
| "grad_norm": 1.5333731174468994, |
| "learning_rate": 0.00023628211107203429, |
| "loss": 1.4313552379608154, |
| "step": 15872 |
| }, |
| { |
| "epoch": 0.6950675544637662, |
| "grad_norm": 1.147505283355713, |
| "learning_rate": 0.00022139524616691188, |
| "loss": 1.4301337003707886, |
| "step": 16128 |
| }, |
| { |
| "epoch": 0.706100372788588, |
| "grad_norm": 1.1893175840377808, |
| "learning_rate": 0.000206858308047443, |
| "loss": 1.427048921585083, |
| "step": 16384 |
| }, |
| { |
| "epoch": 0.706100372788588, |
| "eval_bleu": 0.9378969893045919, |
| "eval_cos_loss": 0.45077633311245235, |
| "eval_dec_loss": 0.11347989061736126, |
| "eval_loss": 1.426147088567331, |
| "eval_mse2_loss": 0.1526812995071096, |
| "eval_mse_loss": 1.2734657897115516, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5492236670146364, |
| "flow/improvement_ratio": 0.8943654500853533, |
| "flow/mag_ratio_mean": 0.560582883195328, |
| "flow/mag_ratio_std": 0.25867489536306754, |
| "step": 16384 |
| }, |
| { |
| "epoch": 0.706100372788588, |
| "eval_bleu": 0.9378969893045919, |
| "eval_cos_loss": 0.45077633311245235, |
| "eval_dec_loss": 0.11347989061736126, |
| "eval_loss": 1.426147088567331, |
| "eval_mse2_loss": 0.1526812995071096, |
| "eval_mse_loss": 1.2734657897115516, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 174.3978, |
| "eval_samples_per_second": 172.021, |
| "eval_steps_per_second": 2.689, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5492236670146364, |
| "flow/improvement_ratio": 0.8943654500853533, |
| "flow/mag_ratio_mean": 0.560582883195328, |
| "flow/mag_ratio_std": 0.25867489536306754, |
| "step": 16384 |
| }, |
| { |
| "epoch": 0.7171331911134096, |
| "grad_norm": 0.9107947945594788, |
| "learning_rate": 0.00019268955506693798, |
| "loss": 1.42864990234375, |
| "step": 16640 |
| }, |
| { |
| "epoch": 0.7281660094382313, |
| "grad_norm": 1.4668571949005127, |
| "learning_rate": 0.00017890678313921, |
| "loss": 1.426837682723999, |
| "step": 16896 |
| }, |
| { |
| "epoch": 0.739198827763053, |
| "grad_norm": 1.6071405410766602, |
| "learning_rate": 0.00016552730338695792, |
| "loss": 1.4257097244262695, |
| "step": 17152 |
| }, |
| { |
| "epoch": 0.7502316460878746, |
| "grad_norm": 1.0349161624908447, |
| "learning_rate": 0.00015256792039904465, |
| "loss": 1.424682855606079, |
| "step": 17408 |
| }, |
| { |
| "epoch": 0.7502316460878746, |
| "eval_bleu": 0.9392870027928614, |
| "eval_cos_loss": 0.4492604330277392, |
| "eval_dec_loss": 0.11262787379888392, |
| "eval_loss": 1.4202917224562752, |
| "eval_mse2_loss": 0.15203414787488706, |
| "eval_mse_loss": 1.2682575782987355, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5507395660190948, |
| "flow/improvement_ratio": 0.8961705508262618, |
| "flow/mag_ratio_mean": 0.5607809571823331, |
| "flow/mag_ratio_std": 0.2602766063739496, |
| "step": 17408 |
| }, |
| { |
| "epoch": 0.7502316460878746, |
| "eval_bleu": 0.9392870027928614, |
| "eval_cos_loss": 0.4492604330277392, |
| "eval_dec_loss": 0.11262787379888392, |
| "eval_loss": 1.4202917224562752, |
| "eval_mse2_loss": 0.15203414787488706, |
| "eval_mse_loss": 1.2682575782987355, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 188.3193, |
| "eval_samples_per_second": 159.304, |
| "eval_steps_per_second": 2.49, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5507395660190948, |
| "flow/improvement_ratio": 0.8961705508262618, |
| "flow/mag_ratio_mean": 0.5607809571823331, |
| "flow/mag_ratio_std": 0.2602766063739496, |
| "step": 17408 |
| }, |
| { |
| "epoch": 0.7612644644126964, |
| "grad_norm": 1.1615740060806274, |
| "learning_rate": 0.00014004491112398103, |
| "loss": 1.4255716800689697, |
| "step": 17664 |
| }, |
| { |
| "epoch": 0.7722972827375181, |
| "grad_norm": 0.9076006412506104, |
| "learning_rate": 0.00012797400442612433, |
| "loss": 1.4207247495651245, |
| "step": 17920 |
| }, |
| { |
| "epoch": 0.7833301010623397, |
| "grad_norm": 1.1816908121109009, |
| "learning_rate": 0.00011637036133026895, |
| "loss": 1.4235727787017822, |
| "step": 18176 |
| }, |
| { |
| "epoch": 0.7943629193871614, |
| "grad_norm": 1.0465947389602661, |
| "learning_rate": 0.00010524855597944216, |
| "loss": 1.421402096748352, |
| "step": 18432 |
| }, |
| { |
| "epoch": 0.7943629193871614, |
| "eval_bleu": 0.9381946887096906, |
| "eval_cos_loss": 0.4503502440986349, |
| "eval_dec_loss": 0.11471369324811995, |
| "eval_loss": 1.4244044193072614, |
| "eval_mse2_loss": 0.1525979548184348, |
| "eval_mse_loss": 1.2718064660456643, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5496497546940216, |
| "flow/improvement_ratio": 0.8959581610490518, |
| "flow/mag_ratio_mean": 0.5600753023680339, |
| "flow/mag_ratio_std": 0.2604317919277687, |
| "step": 18432 |
| }, |
| { |
| "epoch": 0.7943629193871614, |
| "eval_bleu": 0.9381946887096906, |
| "eval_cos_loss": 0.4503502440986349, |
| "eval_dec_loss": 0.11471369324811995, |
| "eval_loss": 1.4244044193072614, |
| "eval_mse2_loss": 0.1525979548184348, |
| "eval_mse_loss": 1.2718064660456643, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 180.9403, |
| "eval_samples_per_second": 165.801, |
| "eval_steps_per_second": 2.592, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5496497546940216, |
| "flow/improvement_ratio": 0.8959581610490518, |
| "flow/mag_ratio_mean": 0.5600753023680339, |
| "flow/mag_ratio_std": 0.2604317919277687, |
| "step": 18432 |
| }, |
| { |
| "epoch": 0.8053957377119831, |
| "grad_norm": 1.0948517322540283, |
| "learning_rate": 9.462255732982089e-05, |
| "loss": 1.4208451509475708, |
| "step": 18688 |
| }, |
| { |
| "epoch": 0.8164285560368048, |
| "grad_norm": 0.8038628101348877, |
| "learning_rate": 8.450571160576348e-05, |
| "loss": 1.4227826595306396, |
| "step": 18944 |
| }, |
| { |
| "epoch": 0.8274613743616265, |
| "grad_norm": 0.7290875315666199, |
| "learning_rate": 7.491072553698764e-05, |
| "loss": 1.4202244281768799, |
| "step": 19200 |
| }, |
| { |
| "epoch": 0.8384941926864482, |
| "grad_norm": 0.7819137573242188, |
| "learning_rate": 6.584965039895586e-05, |
| "loss": 1.4174209833145142, |
| "step": 19456 |
| }, |
| { |
| "epoch": 0.8384941926864482, |
| "eval_bleu": 0.9372464164495187, |
| "eval_cos_loss": 0.4498716953720874, |
| "eval_dec_loss": 0.11378647393183604, |
| "eval_loss": 1.422363788588469, |
| "eval_mse2_loss": 0.15286639037289854, |
| "eval_mse_loss": 1.2694973953243003, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5501283043101907, |
| "flow/improvement_ratio": 0.8944210361824361, |
| "flow/mag_ratio_mean": 0.5602786459648279, |
| "flow/mag_ratio_std": 0.26204406249243567, |
| "step": 19456 |
| }, |
| { |
| "epoch": 0.8384941926864482, |
| "eval_bleu": 0.9372464164495187, |
| "eval_cos_loss": 0.4498716953720874, |
| "eval_dec_loss": 0.11378647393183604, |
| "eval_loss": 1.422363788588469, |
| "eval_mse2_loss": 0.15286639037289854, |
| "eval_mse_loss": 1.2694973953243003, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 175.4483, |
| "eval_samples_per_second": 170.991, |
| "eval_steps_per_second": 2.673, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5501283043101907, |
| "flow/improvement_ratio": 0.8944210361824361, |
| "flow/mag_ratio_mean": 0.5602786459648279, |
| "flow/mag_ratio_std": 0.26204406249243567, |
| "step": 19456 |
| }, |
| { |
| "epoch": 0.8495270110112698, |
| "grad_norm": 0.7724987268447876, |
| "learning_rate": 5.73338668765051e-05, |
| "loss": 1.4236266613006592, |
| "step": 19712 |
| }, |
| { |
| "epoch": 0.8605598293360915, |
| "grad_norm": 0.6551377177238464, |
| "learning_rate": 4.9374070769740984e-05, |
| "loss": 1.4212990999221802, |
| "step": 19968 |
| }, |
| { |
| "epoch": 0.8715926476609133, |
| "grad_norm": 0.8115680813789368, |
| "learning_rate": 4.198025956014095e-05, |
| "loss": 1.4218370914459229, |
| "step": 20224 |
| }, |
| { |
| "epoch": 0.8826254659857349, |
| "grad_norm": 1.0555589199066162, |
| "learning_rate": 3.516171985374755e-05, |
| "loss": 1.4221084117889404, |
| "step": 20480 |
| }, |
| { |
| "epoch": 0.8826254659857349, |
| "eval_bleu": 0.937209637932583, |
| "eval_cos_loss": 0.4497852080158079, |
| "eval_dec_loss": 0.11631931441583868, |
| "eval_loss": 1.4226631736958713, |
| "eval_mse2_loss": 0.15323443922089108, |
| "eval_mse_loss": 1.2694287335694725, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5502147936363464, |
| "flow/improvement_ratio": 0.8968729432711977, |
| "flow/mag_ratio_mean": 0.5599655887719664, |
| "flow/mag_ratio_std": 0.2619784128055898, |
| "step": 20480 |
| }, |
| { |
| "epoch": 0.8826254659857349, |
| "eval_bleu": 0.937209637932583, |
| "eval_cos_loss": 0.4497852080158079, |
| "eval_dec_loss": 0.11631931441583868, |
| "eval_loss": 1.4226631736958713, |
| "eval_mse2_loss": 0.15323443922089108, |
| "eval_mse_loss": 1.2694287335694725, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 172.9963, |
| "eval_samples_per_second": 173.414, |
| "eval_steps_per_second": 2.711, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5502147936363464, |
| "flow/improvement_ratio": 0.8968729432711977, |
| "flow/mag_ratio_mean": 0.5599655887719664, |
| "flow/mag_ratio_std": 0.2619784128055898, |
| "step": 20480 |
| } |
| ], |
| "logging_steps": 256, |
| "max_steps": 23204, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 1024, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|