| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.8826254659857349, |
| "eval_steps": 1024, |
| "global_step": 20480, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.011032818324821687, |
| "grad_norm": 0.12727995216846466, |
| "learning_rate": 0.000498046875, |
| "loss": 2.2427074909210205, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.022065636649643373, |
| "grad_norm": 0.37040260434150696, |
| "learning_rate": 0.000998046875, |
| "loss": 1.787421464920044, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.03309845497446506, |
| "grad_norm": 0.42376086115837097, |
| "learning_rate": 0.000999688448778502, |
| "loss": 1.6120189428329468, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.04413127329928675, |
| "grad_norm": 0.728442370891571, |
| "learning_rate": 0.0009987492950653055, |
| "loss": 1.5639891624450684, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.04413127329928675, |
| "eval_bleu": 0.9365129221049671, |
| "eval_cos_loss": 0.4736882319836728, |
| "eval_dec_loss": 0.11874023166252796, |
| "eval_loss": 1.5516616545760555, |
| "eval_mse2_loss": 0.1660867908647828, |
| "eval_mse3_loss": 0.04509278692241544, |
| "eval_mse_loss": 1.340482075839663, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5263117668089836, |
| "flow/improvement_ratio": 0.8939692384398568, |
| "flow/mag_ratio_mean": 0.5442470624121521, |
| "flow/mag_ratio_std": 0.24557339487426572, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.04413127329928675, |
| "eval_bleu": 0.9365129221049671, |
| "eval_cos_loss": 0.4736882319836728, |
| "eval_dec_loss": 0.11874023166252796, |
| "eval_loss": 1.5516616545760555, |
| "eval_mse2_loss": 0.1660867908647828, |
| "eval_mse3_loss": 0.04509278692241544, |
| "eval_mse_loss": 1.340482075839663, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 152.4374, |
| "eval_samples_per_second": 196.802, |
| "eval_steps_per_second": 3.077, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5263117668089836, |
| "flow/improvement_ratio": 0.8939692384398568, |
| "flow/mag_ratio_mean": 0.5442470624121521, |
| "flow/mag_ratio_std": 0.24557339487426572, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.05516409162410843, |
| "grad_norm": 0.5939557552337646, |
| "learning_rate": 0.0009971837136430763, |
| "loss": 1.5434116125106812, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.06619690994893011, |
| "grad_norm": 0.5294345021247864, |
| "learning_rate": 0.0009949936708776692, |
| "loss": 1.522328495979309, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.07722972827375181, |
| "grad_norm": 1.3655198812484741, |
| "learning_rate": 0.0009921819174566252, |
| "loss": 1.514147162437439, |
| "step": 1792 |
| }, |
| { |
| "epoch": 0.0882625465985735, |
| "grad_norm": 0.7738495469093323, |
| "learning_rate": 0.000988751984934317, |
| "loss": 1.5043810606002808, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.0882625465985735, |
| "eval_bleu": 0.9391092887836987, |
| "eval_cos_loss": 0.45917267693893743, |
| "eval_dec_loss": 0.1044372236793007, |
| "eval_loss": 1.5007227758354724, |
| "eval_mse2_loss": 0.15699366443574048, |
| "eval_mse3_loss": 0.0420123795543843, |
| "eval_mse_loss": 1.3017167356222676, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5408273230610626, |
| "flow/improvement_ratio": 0.8913466692733358, |
| "flow/mag_ratio_mean": 0.5480725960945015, |
| "flow/mag_ratio_std": 0.25143888014466015, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.0882625465985735, |
| "eval_bleu": 0.9391092887836987, |
| "eval_cos_loss": 0.45917267693893743, |
| "eval_dec_loss": 0.1044372236793007, |
| "eval_loss": 1.5007227758354724, |
| "eval_mse2_loss": 0.15699366443574048, |
| "eval_mse3_loss": 0.0420123795543843, |
| "eval_mse_loss": 1.3017167356222676, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 149.7255, |
| "eval_samples_per_second": 200.367, |
| "eval_steps_per_second": 3.132, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5408273230610626, |
| "flow/improvement_ratio": 0.8913466692733358, |
| "flow/mag_ratio_mean": 0.5480725960945015, |
| "flow/mag_ratio_std": 0.25143888014466015, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.09929536492339518, |
| "grad_norm": 0.723221480846405, |
| "learning_rate": 0.0009847081812963268, |
| "loss": 1.496282696723938, |
| "step": 2304 |
| }, |
| { |
| "epoch": 0.11032818324821686, |
| "grad_norm": 0.8804053068161011, |
| "learning_rate": 0.0009800555855486275, |
| "loss": 1.4938578605651855, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.12136100157303854, |
| "grad_norm": 0.9812144637107849, |
| "learning_rate": 0.0009748000413383664, |
| "loss": 1.4887880086898804, |
| "step": 2816 |
| }, |
| { |
| "epoch": 0.13239381989786023, |
| "grad_norm": 1.3136183023452759, |
| "learning_rate": 0.0009689481496142604, |
| "loss": 1.4831873178482056, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.13239381989786023, |
| "eval_bleu": 0.9371003601305814, |
| "eval_cos_loss": 0.4514941948690394, |
| "eval_dec_loss": 0.11555046006354061, |
| "eval_loss": 1.478005530483433, |
| "eval_mse2_loss": 0.15441496636885316, |
| "eval_mse3_loss": 0.04251180985557245, |
| "eval_mse_loss": 1.2810787536950508, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5485058038600726, |
| "flow/improvement_ratio": 0.8955106252292072, |
| "flow/mag_ratio_mean": 0.5588331800788197, |
| "flow/mag_ratio_std": 0.2562640742071148, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.13239381989786023, |
| "eval_bleu": 0.9371003601305814, |
| "eval_cos_loss": 0.4514941948690394, |
| "eval_dec_loss": 0.11555046006354061, |
| "eval_loss": 1.478005530483433, |
| "eval_mse2_loss": 0.15441496636885316, |
| "eval_mse3_loss": 0.04251180985557245, |
| "eval_mse_loss": 1.2810787536950508, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 149.3327, |
| "eval_samples_per_second": 200.894, |
| "eval_steps_per_second": 3.141, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5485058038600726, |
| "flow/improvement_ratio": 0.8955106252292072, |
| "flow/mag_ratio_mean": 0.5588331800788197, |
| "flow/mag_ratio_std": 0.2562640742071148, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.14342663822268192, |
| "grad_norm": 0.7567078471183777, |
| "learning_rate": 0.0009625072603358231, |
| "loss": 1.4780577421188354, |
| "step": 3328 |
| }, |
| { |
| "epoch": 0.15445945654750362, |
| "grad_norm": 0.9316087365150452, |
| "learning_rate": 0.0009554854632418371, |
| "loss": 1.4720772504806519, |
| "step": 3584 |
| }, |
| { |
| "epoch": 0.1654922748723253, |
| "grad_norm": 0.8111740946769714, |
| "learning_rate": 0.000947891577689663, |
| "loss": 1.469363808631897, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.176525093197147, |
| "grad_norm": 1.054283618927002, |
| "learning_rate": 0.0009397351415781539, |
| "loss": 1.46699059009552, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.176525093197147, |
| "eval_bleu": 0.9394680773832225, |
| "eval_cos_loss": 0.44803570569959533, |
| "eval_dec_loss": 0.10979667206459653, |
| "eval_loss": 1.4644885746909102, |
| "eval_mse2_loss": 0.15139158122511562, |
| "eval_mse3_loss": 0.04131953247876437, |
| "eval_mse_loss": 1.271777458790777, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5519642917586288, |
| "flow/improvement_ratio": 0.8933102408451821, |
| "flow/mag_ratio_mean": 0.5608082282771942, |
| "flow/mag_ratio_std": 0.25901126921939444, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.176525093197147, |
| "eval_bleu": 0.9394680773832225, |
| "eval_cos_loss": 0.44803570569959533, |
| "eval_dec_loss": 0.10979667206459653, |
| "eval_loss": 1.4644885746909102, |
| "eval_mse2_loss": 0.15139158122511562, |
| "eval_mse3_loss": 0.04131953247876437, |
| "eval_mse_loss": 1.271777458790777, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 150.2566, |
| "eval_samples_per_second": 199.658, |
| "eval_steps_per_second": 3.121, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5519642917586288, |
| "flow/improvement_ratio": 0.8933102408451821, |
| "flow/mag_ratio_mean": 0.5608082282771942, |
| "flow/mag_ratio_std": 0.25901126921939444, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.18755791152196866, |
| "grad_norm": 1.008694052696228, |
| "learning_rate": 0.000931026399368079, |
| "loss": 1.4621409177780151, |
| "step": 4352 |
| }, |
| { |
| "epoch": 0.19859072984679035, |
| "grad_norm": 1.2265310287475586, |
| "learning_rate": 0.0009217762892151117, |
| "loss": 1.4644461870193481, |
| "step": 4608 |
| }, |
| { |
| "epoch": 0.20962354817161205, |
| "grad_norm": 0.8491294384002686, |
| "learning_rate": 0.0009119964292315354, |
| "loss": 1.4609150886535645, |
| "step": 4864 |
| }, |
| { |
| "epoch": 0.22065636649643372, |
| "grad_norm": 0.8653346300125122, |
| "learning_rate": 0.0009016991028939279, |
| "loss": 1.4556578397750854, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.22065636649643372, |
| "eval_bleu": 0.9395664278045301, |
| "eval_cos_loss": 0.4446420454775601, |
| "eval_dec_loss": 0.10941354105713716, |
| "eval_loss": 1.4521360926028253, |
| "eval_mse2_loss": 0.14856901723565832, |
| "eval_mse3_loss": 0.04055595990103572, |
| "eval_mse_loss": 1.26301111900476, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.555357955666239, |
| "flow/improvement_ratio": 0.896109628270684, |
| "flow/mag_ratio_mean": 0.5672629550575956, |
| "flow/mag_ratio_std": 0.2583374333089349, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.22065636649643372, |
| "eval_bleu": 0.9395664278045301, |
| "eval_cos_loss": 0.4446420454775601, |
| "eval_dec_loss": 0.10941354105713716, |
| "eval_loss": 1.4521360926028253, |
| "eval_mse2_loss": 0.14856901723565832, |
| "eval_mse3_loss": 0.04055595990103572, |
| "eval_mse_loss": 1.26301111900476, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 150.4389, |
| "eval_samples_per_second": 199.416, |
| "eval_steps_per_second": 3.118, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.555357955666239, |
| "flow/improvement_ratio": 0.896109628270684, |
| "flow/mag_ratio_mean": 0.5672629550575956, |
| "flow/mag_ratio_std": 0.2583374333089349, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.23168918482125542, |
| "grad_norm": 0.7437342405319214, |
| "learning_rate": 0.0008908972436151494, |
| "loss": 1.4530967473983765, |
| "step": 5376 |
| }, |
| { |
| "epoch": 0.2427220031460771, |
| "grad_norm": 1.2461498975753784, |
| "learning_rate": 0.0008796044185000127, |
| "loss": 1.4508432149887085, |
| "step": 5632 |
| }, |
| { |
| "epoch": 0.2537548214708988, |
| "grad_norm": 0.9174250364303589, |
| "learning_rate": 0.0008678348113050368, |
| "loss": 1.445737600326538, |
| "step": 5888 |
| }, |
| { |
| "epoch": 0.26478763979572045, |
| "grad_norm": 0.7107803821563721, |
| "learning_rate": 0.0008556032046236897, |
| "loss": 1.448463797569275, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.26478763979572045, |
| "eval_bleu": 0.9383079878806191, |
| "eval_cos_loss": 0.44315467447614365, |
| "eval_dec_loss": 0.11260034935846766, |
| "eval_loss": 1.4483204581844273, |
| "eval_mse2_loss": 0.14934769469791892, |
| "eval_mse3_loss": 0.04125582999281728, |
| "eval_mse_loss": 1.257716933293129, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5568453247613235, |
| "flow/improvement_ratio": 0.8934365713011736, |
| "flow/mag_ratio_mean": 0.5627565476685953, |
| "flow/mag_ratio_std": 0.26418959474894027, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.26478763979572045, |
| "eval_bleu": 0.9383079878806191, |
| "eval_cos_loss": 0.44315467447614365, |
| "eval_dec_loss": 0.11260034935846766, |
| "eval_loss": 1.4483204581844273, |
| "eval_mse2_loss": 0.14934769469791892, |
| "eval_mse3_loss": 0.04125582999281728, |
| "eval_mse_loss": 1.257716933293129, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 151.7824, |
| "eval_samples_per_second": 197.651, |
| "eval_steps_per_second": 3.09, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5568453247613235, |
| "flow/improvement_ratio": 0.8934365713011736, |
| "flow/mag_ratio_mean": 0.5627565476685953, |
| "flow/mag_ratio_std": 0.26418959474894027, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.2758204581205422, |
| "grad_norm": 0.9640243649482727, |
| "learning_rate": 0.000842924961319492, |
| "loss": 1.4444574117660522, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.28685327644536385, |
| "grad_norm": 0.897240161895752, |
| "learning_rate": 0.0008298160052303045, |
| "loss": 1.4424360990524292, |
| "step": 6656 |
| }, |
| { |
| "epoch": 0.2978860947701855, |
| "grad_norm": 0.8083540201187134, |
| "learning_rate": 0.0008162928011680314, |
| "loss": 1.4411871433258057, |
| "step": 6912 |
| }, |
| { |
| "epoch": 0.30891891309500724, |
| "grad_norm": 1.0060639381408691, |
| "learning_rate": 0.000802372334238864, |
| "loss": 1.439369559288025, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.30891891309500724, |
| "eval_bleu": 0.9390257063353732, |
| "eval_cos_loss": 0.4395336633933378, |
| "eval_dec_loss": 0.11084323670905727, |
| "eval_loss": 1.4360539821673557, |
| "eval_mse2_loss": 0.14675505667416525, |
| "eval_mse3_loss": 0.04034361798070006, |
| "eval_mse_loss": 1.2489553087555778, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5604663359712182, |
| "flow/improvement_ratio": 0.8947566227872235, |
| "flow/mag_ratio_mean": 0.5714088110273072, |
| "flow/mag_ratio_std": 0.2653434300092238, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.30891891309500724, |
| "eval_bleu": 0.9390257063353732, |
| "eval_cos_loss": 0.4395336633933378, |
| "eval_dec_loss": 0.11084323670905727, |
| "eval_loss": 1.4360539821673557, |
| "eval_mse2_loss": 0.14675505667416525, |
| "eval_mse3_loss": 0.04034361798070006, |
| "eval_mse_loss": 1.2489553087555778, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 150.3295, |
| "eval_samples_per_second": 199.562, |
| "eval_steps_per_second": 3.12, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5604663359712182, |
| "flow/improvement_ratio": 0.8947566227872235, |
| "flow/mag_ratio_mean": 0.5714088110273072, |
| "flow/mag_ratio_std": 0.2653434300092238, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.3199517314198289, |
| "grad_norm": 1.4121514558792114, |
| "learning_rate": 0.0007880720885100349, |
| "loss": 1.4375568628311157, |
| "step": 7424 |
| }, |
| { |
| "epoch": 0.3309845497446506, |
| "grad_norm": 0.9962936639785767, |
| "learning_rate": 0.0007734100250498788, |
| "loss": 1.435603141784668, |
| "step": 7680 |
| }, |
| { |
| "epoch": 0.3420173680694723, |
| "grad_norm": 1.7380033731460571, |
| "learning_rate": 0.000758404559368781, |
| "loss": 1.436031460762024, |
| "step": 7936 |
| }, |
| { |
| "epoch": 0.353050186394294, |
| "grad_norm": 0.9169597625732422, |
| "learning_rate": 0.0007430745382893488, |
| "loss": 1.430087685585022, |
| "step": 8192 |
| }, |
| { |
| "epoch": 0.353050186394294, |
| "eval_bleu": 0.9373010926402804, |
| "eval_cos_loss": 0.4382981645908437, |
| "eval_dec_loss": 0.11403963093492966, |
| "eval_loss": 1.4327775341615494, |
| "eval_mse2_loss": 0.14718564421828115, |
| "eval_mse3_loss": 0.040787868026985544, |
| "eval_mse_loss": 1.244804022917107, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5617018342018127, |
| "flow/improvement_ratio": 0.8961344680298112, |
| "flow/mag_ratio_mean": 0.5686201650196555, |
| "flow/mag_ratio_std": 0.26075691680537105, |
| "step": 8192 |
| }, |
| { |
| "epoch": 0.353050186394294, |
| "eval_bleu": 0.9373010926402804, |
| "eval_cos_loss": 0.4382981645908437, |
| "eval_dec_loss": 0.11403963093492966, |
| "eval_loss": 1.4327775341615494, |
| "eval_mse2_loss": 0.14718564421828115, |
| "eval_mse3_loss": 0.040787868026985544, |
| "eval_mse_loss": 1.244804022917107, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 151.6067, |
| "eval_samples_per_second": 197.88, |
| "eval_steps_per_second": 3.094, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5617018342018127, |
| "flow/improvement_ratio": 0.8961344680298112, |
| "flow/mag_ratio_mean": 0.5686201650196555, |
| "flow/mag_ratio_std": 0.26075691680537105, |
| "step": 8192 |
| }, |
| { |
| "epoch": 0.36408300471911564, |
| "grad_norm": 1.2372357845306396, |
| "learning_rate": 0.0007274392162748551, |
| "loss": 1.433260440826416, |
| "step": 8448 |
| }, |
| { |
| "epoch": 0.3751158230439373, |
| "grad_norm": 0.7132707834243774, |
| "learning_rate": 0.000711518231245687, |
| "loss": 1.4292922019958496, |
| "step": 8704 |
| }, |
| { |
| "epoch": 0.38614864136875904, |
| "grad_norm": 0.933404266834259, |
| "learning_rate": 0.0006953315799141723, |
| "loss": 1.428688645362854, |
| "step": 8960 |
| }, |
| { |
| "epoch": 0.3971814596935807, |
| "grad_norm": 1.2554126977920532, |
| "learning_rate": 0.0006788995926687669, |
| "loss": 1.425850510597229, |
| "step": 9216 |
| }, |
| { |
| "epoch": 0.3971814596935807, |
| "eval_bleu": 0.9372418767062352, |
| "eval_cos_loss": 0.43612730547563355, |
| "eval_dec_loss": 0.11531878670633856, |
| "eval_loss": 1.4266213120173799, |
| "eval_mse2_loss": 0.1465762988813142, |
| "eval_mse3_loss": 0.040784004980376536, |
| "eval_mse_loss": 1.2392610072581245, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5638726928086677, |
| "flow/improvement_ratio": 0.8970644686267828, |
| "flow/mag_ratio_mean": 0.5689206580871712, |
| "flow/mag_ratio_std": 0.25856476135726675, |
| "step": 9216 |
| }, |
| { |
| "epoch": 0.3971814596935807, |
| "eval_bleu": 0.9372418767062352, |
| "eval_cos_loss": 0.43612730547563355, |
| "eval_dec_loss": 0.11531878670633856, |
| "eval_loss": 1.4266213120173799, |
| "eval_mse2_loss": 0.1465762988813142, |
| "eval_mse3_loss": 0.040784004980376536, |
| "eval_mse_loss": 1.2392610072581245, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 150.6594, |
| "eval_samples_per_second": 199.125, |
| "eval_steps_per_second": 3.113, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5638726928086677, |
| "flow/improvement_ratio": 0.8970644686267828, |
| "flow/mag_ratio_mean": 0.5689206580871712, |
| "flow/mag_ratio_std": 0.25856476135726675, |
| "step": 9216 |
| }, |
| { |
| "epoch": 0.4082142780184024, |
| "grad_norm": 1.1525744199752808, |
| "learning_rate": 0.0006622429080391422, |
| "loss": 1.429610252380371, |
| "step": 9472 |
| }, |
| { |
| "epoch": 0.4192470963432241, |
| "grad_norm": 0.7822180986404419, |
| "learning_rate": 0.0006453824467742515, |
| "loss": 1.424994945526123, |
| "step": 9728 |
| }, |
| { |
| "epoch": 0.43027991466804577, |
| "grad_norm": 0.5769438743591309, |
| "learning_rate": 0.0006283393855659275, |
| "loss": 1.4268593788146973, |
| "step": 9984 |
| }, |
| { |
| "epoch": 0.44131273299286744, |
| "grad_norm": 1.1103806495666504, |
| "learning_rate": 0.0006111351304510173, |
| "loss": 1.4214195013046265, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.44131273299286744, |
| "eval_bleu": 0.9379655406873967, |
| "eval_cos_loss": 0.4348486333386476, |
| "eval_dec_loss": 0.11333615507986118, |
| "eval_loss": 1.423404996329025, |
| "eval_mse2_loss": 0.14630243560271478, |
| "eval_mse3_loss": 0.0406117777207068, |
| "eval_mse_loss": 1.2364907806107739, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5651513632934994, |
| "flow/improvement_ratio": 0.8989848929173403, |
| "flow/mag_ratio_mean": 0.5767871759085259, |
| "flow/mag_ratio_std": 0.2627801252390022, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.44131273299286744, |
| "eval_bleu": 0.9379655406873967, |
| "eval_cos_loss": 0.4348486333386476, |
| "eval_dec_loss": 0.11333615507986118, |
| "eval_loss": 1.423404996329025, |
| "eval_mse2_loss": 0.14630243560271478, |
| "eval_mse3_loss": 0.0406117777207068, |
| "eval_mse_loss": 1.2364907806107739, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 150.5926, |
| "eval_samples_per_second": 199.213, |
| "eval_steps_per_second": 3.114, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5651513632934994, |
| "flow/improvement_ratio": 0.8989848929173403, |
| "flow/mag_ratio_mean": 0.5767871759085259, |
| "flow/mag_ratio_std": 0.2627801252390022, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.45234555131768917, |
| "grad_norm": 0.9379155039787292, |
| "learning_rate": 0.0005937912899254605, |
| "loss": 1.4213385581970215, |
| "step": 10496 |
| }, |
| { |
| "epoch": 0.46337836964251083, |
| "grad_norm": 0.7825599312782288, |
| "learning_rate": 0.0005763296478040787, |
| "loss": 1.4202309846878052, |
| "step": 10752 |
| }, |
| { |
| "epoch": 0.4744111879673325, |
| "grad_norm": 0.9089685082435608, |
| "learning_rate": 0.0005587721358601663, |
| "loss": 1.4216351509094238, |
| "step": 11008 |
| }, |
| { |
| "epoch": 0.4854440062921542, |
| "grad_norm": 0.8983961939811707, |
| "learning_rate": 0.0005411408062792448, |
| "loss": 1.4181705713272095, |
| "step": 11264 |
| }, |
| { |
| "epoch": 0.4854440062921542, |
| "eval_bleu": 0.936860898611364, |
| "eval_cos_loss": 0.43602795717812803, |
| "eval_dec_loss": 0.11359805543261614, |
| "eval_loss": 1.4229239845580892, |
| "eval_mse2_loss": 0.1442256863596343, |
| "eval_mse3_loss": 0.04007743425897634, |
| "eval_mse_loss": 1.2386208620152748, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5639720439656711, |
| "flow/improvement_ratio": 0.8950573275846713, |
| "flow/mag_ratio_mean": 0.5711015071441878, |
| "flow/mag_ratio_std": 0.2601570950896501, |
| "step": 11264 |
| }, |
| { |
| "epoch": 0.4854440062921542, |
| "eval_bleu": 0.936860898611364, |
| "eval_cos_loss": 0.43602795717812803, |
| "eval_dec_loss": 0.11359805543261614, |
| "eval_loss": 1.4229239845580892, |
| "eval_mse2_loss": 0.1442256863596343, |
| "eval_mse3_loss": 0.04007743425897634, |
| "eval_mse_loss": 1.2386208620152748, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 149.9228, |
| "eval_samples_per_second": 200.103, |
| "eval_steps_per_second": 3.128, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5639720439656711, |
| "flow/improvement_ratio": 0.8950573275846713, |
| "flow/mag_ratio_mean": 0.5711015071441878, |
| "flow/mag_ratio_std": 0.2601570950896501, |
| "step": 11264 |
| }, |
| { |
| "epoch": 0.4964768246169759, |
| "grad_norm": 0.8639885783195496, |
| "learning_rate": 0.0005234578039615789, |
| "loss": 1.4164997339248657, |
| "step": 11520 |
| }, |
| { |
| "epoch": 0.5075096429417976, |
| "grad_norm": 1.0776760578155518, |
| "learning_rate": 0.0005057453387082458, |
| "loss": 1.41534423828125, |
| "step": 11776 |
| }, |
| { |
| "epoch": 0.5185424612666193, |
| "grad_norm": 1.164801001548767, |
| "learning_rate": 0.0004880256573256866, |
| "loss": 1.417317509651184, |
| "step": 12032 |
| }, |
| { |
| "epoch": 0.5295752795914409, |
| "grad_norm": 0.853115439414978, |
| "learning_rate": 0.0004703210156837805, |
| "loss": 1.4166315793991089, |
| "step": 12288 |
| }, |
| { |
| "epoch": 0.5295752795914409, |
| "eval_bleu": 0.9380693394475813, |
| "eval_cos_loss": 0.43475201853048573, |
| "eval_dec_loss": 0.11086099378979092, |
| "eval_loss": 1.4183173507515556, |
| "eval_mse2_loss": 0.14407628963687527, |
| "eval_mse3_loss": 0.03990168983081002, |
| "eval_mse_loss": 1.2343393711647246, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5652479861082553, |
| "flow/improvement_ratio": 0.896212916130196, |
| "flow/mag_ratio_mean": 0.5760958854323511, |
| "flow/mag_ratio_std": 0.2636355218539106, |
| "step": 12288 |
| }, |
| { |
| "epoch": 0.5295752795914409, |
| "eval_bleu": 0.9380693394475813, |
| "eval_cos_loss": 0.43475201853048573, |
| "eval_dec_loss": 0.11086099378979092, |
| "eval_loss": 1.4183173507515556, |
| "eval_mse2_loss": 0.14407628963687527, |
| "eval_mse3_loss": 0.03990168983081002, |
| "eval_mse_loss": 1.2343393711647246, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 151.7991, |
| "eval_samples_per_second": 197.63, |
| "eval_steps_per_second": 3.09, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5652479861082553, |
| "flow/improvement_ratio": 0.896212916130196, |
| "flow/mag_ratio_mean": 0.5760958854323511, |
| "flow/mag_ratio_std": 0.2636355218539106, |
| "step": 12288 |
| }, |
| { |
| "epoch": 0.5406080979162626, |
| "grad_norm": 0.5429331064224243, |
| "learning_rate": 0.0004526536507625343, |
| "loss": 1.4145379066467285, |
| "step": 12544 |
| }, |
| { |
| "epoch": 0.5516409162410844, |
| "grad_norm": 1.1079273223876953, |
| "learning_rate": 0.00043504575272249973, |
| "loss": 1.4163099527359009, |
| "step": 12800 |
| }, |
| { |
| "epoch": 0.562673734565906, |
| "grad_norm": 1.1678777933120728, |
| "learning_rate": 0.0004175194370339921, |
| "loss": 1.4152926206588745, |
| "step": 13056 |
| }, |
| { |
| "epoch": 0.5737065528907277, |
| "grad_norm": 0.950139045715332, |
| "learning_rate": 0.0004000967167001243, |
| "loss": 1.4137598276138306, |
| "step": 13312 |
| }, |
| { |
| "epoch": 0.5737065528907277, |
| "eval_bleu": 0.9388806472418573, |
| "eval_cos_loss": 0.4328056685070493, |
| "eval_dec_loss": 0.10686975376589149, |
| "eval_loss": 1.4127923059565173, |
| "eval_mse2_loss": 0.14246540646880929, |
| "eval_mse3_loss": 0.03924089211867308, |
| "eval_mse_loss": 1.231086006296723, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5671943300314295, |
| "flow/improvement_ratio": 0.8961777138049161, |
| "flow/mag_ratio_mean": 0.5744174228294063, |
| "flow/mag_ratio_std": 0.2622520730121812, |
| "step": 13312 |
| }, |
| { |
| "epoch": 0.5737065528907277, |
| "eval_bleu": 0.9388806472418573, |
| "eval_cos_loss": 0.4328056685070493, |
| "eval_dec_loss": 0.10686975376589149, |
| "eval_loss": 1.4127923059565173, |
| "eval_mse2_loss": 0.14246540646880929, |
| "eval_mse3_loss": 0.03924089211867308, |
| "eval_mse_loss": 1.231086006296723, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 151.3818, |
| "eval_samples_per_second": 198.174, |
| "eval_steps_per_second": 3.098, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5671943300314295, |
| "flow/improvement_ratio": 0.8961777138049161, |
| "flow/mag_ratio_mean": 0.5744174228294063, |
| "flow/mag_ratio_std": 0.2622520730121812, |
| "step": 13312 |
| }, |
| { |
| "epoch": 0.5847393712155494, |
| "grad_norm": 0.7284323573112488, |
| "learning_rate": 0.00038279947460853446, |
| "loss": 1.4107180833816528, |
| "step": 13568 |
| }, |
| { |
| "epoch": 0.595772189540371, |
| "grad_norm": 1.2418670654296875, |
| "learning_rate": 0.00036564943604654345, |
| "loss": 1.410542368888855, |
| "step": 13824 |
| }, |
| { |
| "epoch": 0.6068050078651928, |
| "grad_norm": 1.182166576385498, |
| "learning_rate": 0.00034866814141425254, |
| "loss": 1.4119616746902466, |
| "step": 14080 |
| }, |
| { |
| "epoch": 0.6178378261900145, |
| "grad_norm": 0.7055562138557434, |
| "learning_rate": 0.0003318769191698637, |
| "loss": 1.4102239608764648, |
| "step": 14336 |
| }, |
| { |
| "epoch": 0.6178378261900145, |
| "eval_bleu": 0.9391033205296514, |
| "eval_cos_loss": 0.4321882768607597, |
| "eval_dec_loss": 0.10784589936202174, |
| "eval_loss": 1.4102330451835192, |
| "eval_mse2_loss": 0.14264666664003056, |
| "eval_mse3_loss": 0.03934626972306766, |
| "eval_mse_loss": 1.2282401104725755, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5678117201526536, |
| "flow/improvement_ratio": 0.8967333109394066, |
| "flow/mag_ratio_mean": 0.5778467524280426, |
| "flow/mag_ratio_std": 0.2675150448897246, |
| "step": 14336 |
| }, |
| { |
| "epoch": 0.6178378261900145, |
| "eval_bleu": 0.9391033205296514, |
| "eval_cos_loss": 0.4321882768607597, |
| "eval_dec_loss": 0.10784589936202174, |
| "eval_loss": 1.4102330451835192, |
| "eval_mse2_loss": 0.14264666664003056, |
| "eval_mse3_loss": 0.03934626972306766, |
| "eval_mse_loss": 1.2282401104725755, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 152.545, |
| "eval_samples_per_second": 196.663, |
| "eval_steps_per_second": 3.075, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5678117201526536, |
| "flow/improvement_ratio": 0.8967333109394066, |
| "flow/mag_ratio_mean": 0.5778467524280426, |
| "flow/mag_ratio_std": 0.2675150448897246, |
| "step": 14336 |
| }, |
| { |
| "epoch": 0.6288706445148361, |
| "grad_norm": 1.5477943420410156, |
| "learning_rate": 0.00031529685904119485, |
| "loss": 1.4078161716461182, |
| "step": 14592 |
| }, |
| { |
| "epoch": 0.6399034628396578, |
| "grad_norm": 1.0092837810516357, |
| "learning_rate": 0.0002989487855370421, |
| "loss": 1.4115574359893799, |
| "step": 14848 |
| }, |
| { |
| "epoch": 0.6509362811644795, |
| "grad_norm": 0.9162316918373108, |
| "learning_rate": 0.00028285323179165424, |
| "loss": 1.4091846942901611, |
| "step": 15104 |
| }, |
| { |
| "epoch": 0.6619690994893012, |
| "grad_norm": 0.5473514199256897, |
| "learning_rate": 0.0002670304137751759, |
| "loss": 1.4136096239089966, |
| "step": 15360 |
| }, |
| { |
| "epoch": 0.6619690994893012, |
| "eval_bleu": 0.9371399350692442, |
| "eval_cos_loss": 0.43145928612904255, |
| "eval_dec_loss": 0.11678189866201107, |
| "eval_loss": 1.412680120102124, |
| "eval_mse2_loss": 0.14467608757110548, |
| "eval_mse3_loss": 0.040516993604791066, |
| "eval_mse_loss": 1.227487043785388, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5685407153324786, |
| "flow/improvement_ratio": 0.8939294283832314, |
| "flow/mag_ratio_mean": 0.5773617385038688, |
| "flow/mag_ratio_std": 0.2658700548064734, |
| "step": 15360 |
| }, |
| { |
| "epoch": 0.6619690994893012, |
| "eval_bleu": 0.9371399350692442, |
| "eval_cos_loss": 0.43145928612904255, |
| "eval_dec_loss": 0.11678189866201107, |
| "eval_loss": 1.412680120102124, |
| "eval_mse2_loss": 0.14467608757110548, |
| "eval_mse3_loss": 0.040516993604791066, |
| "eval_mse_loss": 1.227487043785388, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 153.1626, |
| "eval_samples_per_second": 195.87, |
| "eval_steps_per_second": 3.062, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5685407153324786, |
| "flow/improvement_ratio": 0.8939294283832314, |
| "flow/mag_ratio_mean": 0.5773617385038688, |
| "flow/mag_ratio_std": 0.2658700548064734, |
| "step": 15360 |
| }, |
| { |
| "epoch": 0.6730019178141229, |
| "grad_norm": 1.117191195487976, |
| "learning_rate": 0.0002515002049024435, |
| "loss": 1.409903645515442, |
| "step": 15616 |
| }, |
| { |
| "epoch": 0.6840347361389446, |
| "grad_norm": 0.9312120079994202, |
| "learning_rate": 0.00023628211107203429, |
| "loss": 1.4103206396102905, |
| "step": 15872 |
| }, |
| { |
| "epoch": 0.6950675544637662, |
| "grad_norm": 0.6241945028305054, |
| "learning_rate": 0.00022139524616691188, |
| "loss": 1.4096852540969849, |
| "step": 16128 |
| }, |
| { |
| "epoch": 0.706100372788588, |
| "grad_norm": 1.1047911643981934, |
| "learning_rate": 0.000206858308047443, |
| "loss": 1.4056518077850342, |
| "step": 16384 |
| }, |
| { |
| "epoch": 0.706100372788588, |
| "eval_bleu": 0.939369810755522, |
| "eval_cos_loss": 0.4307268185656208, |
| "eval_dec_loss": 0.10748835562515868, |
| "eval_loss": 1.4051913678773176, |
| "eval_mse2_loss": 0.14105384695186798, |
| "eval_mse3_loss": 0.03896740539225815, |
| "eval_mse_loss": 1.2251701146554845, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5692731790196921, |
| "flow/improvement_ratio": 0.8951106891194894, |
| "flow/mag_ratio_mean": 0.576439301342344, |
| "flow/mag_ratio_std": 0.26492403600134573, |
| "step": 16384 |
| }, |
| { |
| "epoch": 0.706100372788588, |
| "eval_bleu": 0.939369810755522, |
| "eval_cos_loss": 0.4307268185656208, |
| "eval_dec_loss": 0.10748835562515868, |
| "eval_loss": 1.4051913678773176, |
| "eval_mse2_loss": 0.14105384695186798, |
| "eval_mse3_loss": 0.03896740539225815, |
| "eval_mse_loss": 1.2251701146554845, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 151.3979, |
| "eval_samples_per_second": 198.153, |
| "eval_steps_per_second": 3.098, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5692731790196921, |
| "flow/improvement_ratio": 0.8951106891194894, |
| "flow/mag_ratio_mean": 0.576439301342344, |
| "flow/mag_ratio_std": 0.26492403600134573, |
| "step": 16384 |
| }, |
| { |
| "epoch": 0.7171331911134096, |
| "grad_norm": 0.6667467355728149, |
| "learning_rate": 0.00019268955506693798, |
| "loss": 1.4079476594924927, |
| "step": 16640 |
| }, |
| { |
| "epoch": 0.7281660094382313, |
| "grad_norm": 1.067337989807129, |
| "learning_rate": 0.00017890678313921, |
| "loss": 1.4071507453918457, |
| "step": 16896 |
| }, |
| { |
| "epoch": 0.739198827763053, |
| "grad_norm": 1.381058692932129, |
| "learning_rate": 0.00016552730338695792, |
| "loss": 1.4063572883605957, |
| "step": 17152 |
| }, |
| { |
| "epoch": 0.7502316460878746, |
| "grad_norm": 1.0033129453659058, |
| "learning_rate": 0.00015256792039904465, |
| "loss": 1.404827356338501, |
| "step": 17408 |
| }, |
| { |
| "epoch": 0.7502316460878746, |
| "eval_bleu": 0.9408599959594597, |
| "eval_cos_loss": 0.4294143016023168, |
| "eval_dec_loss": 0.10632320484722346, |
| "eval_loss": 1.4002188098456052, |
| "eval_mse2_loss": 0.14073645838224558, |
| "eval_mse3_loss": 0.038728228359143614, |
| "eval_mse_loss": 1.2207541257333654, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5705856997321155, |
| "flow/improvement_ratio": 0.8967551362794092, |
| "flow/mag_ratio_mean": 0.579864633871294, |
| "flow/mag_ratio_std": 0.2687839522544763, |
| "step": 17408 |
| }, |
| { |
| "epoch": 0.7502316460878746, |
| "eval_bleu": 0.9408599959594597, |
| "eval_cos_loss": 0.4294143016023168, |
| "eval_dec_loss": 0.10632320484722346, |
| "eval_loss": 1.4002188098456052, |
| "eval_mse2_loss": 0.14073645838224558, |
| "eval_mse3_loss": 0.038728228359143614, |
| "eval_mse_loss": 1.2207541257333654, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 151.5248, |
| "eval_samples_per_second": 197.987, |
| "eval_steps_per_second": 3.095, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5705856997321155, |
| "flow/improvement_ratio": 0.8967551362794092, |
| "flow/mag_ratio_mean": 0.579864633871294, |
| "flow/mag_ratio_std": 0.2687839522544763, |
| "step": 17408 |
| }, |
| { |
| "epoch": 0.7612644644126964, |
| "grad_norm": 0.7275887727737427, |
| "learning_rate": 0.00014004491112398103, |
| "loss": 1.4066880941390991, |
| "step": 17664 |
| }, |
| { |
| "epoch": 0.7722972827375181, |
| "grad_norm": 0.4016956686973572, |
| "learning_rate": 0.00012797400442612433, |
| "loss": 1.4000524282455444, |
| "step": 17920 |
| }, |
| { |
| "epoch": 0.7833301010623397, |
| "grad_norm": 0.6152352690696716, |
| "learning_rate": 0.00011637036133026895, |
| "loss": 1.4042223691940308, |
| "step": 18176 |
| }, |
| { |
| "epoch": 0.7943629193871614, |
| "grad_norm": 1.1916025876998901, |
| "learning_rate": 0.00010524855597944216, |
| "loss": 1.4021508693695068, |
| "step": 18432 |
| }, |
| { |
| "epoch": 0.7943629193871614, |
| "eval_bleu": 0.9395503562023391, |
| "eval_cos_loss": 0.43083440838083786, |
| "eval_dec_loss": 0.10896516631621478, |
| "eval_loss": 1.4056353792707041, |
| "eval_mse2_loss": 0.14135168713610818, |
| "eval_mse3_loss": 0.03911572252350575, |
| "eval_mse_loss": 1.225167971175871, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5691655881877647, |
| "flow/improvement_ratio": 0.8966002826497499, |
| "flow/mag_ratio_mean": 0.5784899838951859, |
| "flow/mag_ratio_std": 0.26509309523522473, |
| "step": 18432 |
| }, |
| { |
| "epoch": 0.7943629193871614, |
| "eval_bleu": 0.9395503562023391, |
| "eval_cos_loss": 0.43083440838083786, |
| "eval_dec_loss": 0.10896516631621478, |
| "eval_loss": 1.4056353792707041, |
| "eval_mse2_loss": 0.14135168713610818, |
| "eval_mse3_loss": 0.03911572252350575, |
| "eval_mse_loss": 1.225167971175871, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 153.6467, |
| "eval_samples_per_second": 195.253, |
| "eval_steps_per_second": 3.052, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5691655881877647, |
| "flow/improvement_ratio": 0.8966002826497499, |
| "flow/mag_ratio_mean": 0.5784899838951859, |
| "flow/mag_ratio_std": 0.26509309523522473, |
| "step": 18432 |
| }, |
| { |
| "epoch": 0.8053957377119831, |
| "grad_norm": 0.8944941759109497, |
| "learning_rate": 9.462255732982089e-05, |
| "loss": 1.4011187553405762, |
| "step": 18688 |
| }, |
| { |
| "epoch": 0.8164285560368048, |
| "grad_norm": 0.6294699311256409, |
| "learning_rate": 8.450571160576348e-05, |
| "loss": 1.4046047925949097, |
| "step": 18944 |
| }, |
| { |
| "epoch": 0.8274613743616265, |
| "grad_norm": 0.4316425323486328, |
| "learning_rate": 7.491072553698764e-05, |
| "loss": 1.4013915061950684, |
| "step": 19200 |
| }, |
| { |
| "epoch": 0.8384941926864482, |
| "grad_norm": 0.42900189757347107, |
| "learning_rate": 6.584965039895586e-05, |
| "loss": 1.398647427558899, |
| "step": 19456 |
| }, |
| { |
| "epoch": 0.8384941926864482, |
| "eval_bleu": 0.9387487761199217, |
| "eval_cos_loss": 0.4305588583320951, |
| "eval_dec_loss": 0.10771015434186341, |
| "eval_loss": 1.4043200140568748, |
| "eval_mse2_loss": 0.14159503931811115, |
| "eval_mse3_loss": 0.03919749533030779, |
| "eval_mse_loss": 1.2235274815610222, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5694411402063837, |
| "flow/improvement_ratio": 0.8948449469578545, |
| "flow/mag_ratio_mean": 0.5805903251237198, |
| "flow/mag_ratio_std": 0.267388239534679, |
| "step": 19456 |
| }, |
| { |
| "epoch": 0.8384941926864482, |
| "eval_bleu": 0.9387487761199217, |
| "eval_cos_loss": 0.4305588583320951, |
| "eval_dec_loss": 0.10771015434186341, |
| "eval_loss": 1.4043200140568748, |
| "eval_mse2_loss": 0.14159503931811115, |
| "eval_mse3_loss": 0.03919749533030779, |
| "eval_mse_loss": 1.2235274815610222, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 152.1206, |
| "eval_samples_per_second": 197.212, |
| "eval_steps_per_second": 3.083, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5694411402063837, |
| "flow/improvement_ratio": 0.8948449469578545, |
| "flow/mag_ratio_mean": 0.5805903251237198, |
| "flow/mag_ratio_std": 0.267388239534679, |
| "step": 19456 |
| }, |
| { |
| "epoch": 0.8495270110112698, |
| "grad_norm": 0.797498881816864, |
| "learning_rate": 5.73338668765051e-05, |
| "loss": 1.4058088064193726, |
| "step": 19712 |
| }, |
| { |
| "epoch": 0.8605598293360915, |
| "grad_norm": 0.463348925113678, |
| "learning_rate": 4.9374070769740984e-05, |
| "loss": 1.4032570123672485, |
| "step": 19968 |
| }, |
| { |
| "epoch": 0.8715926476609133, |
| "grad_norm": 0.8888425827026367, |
| "learning_rate": 4.198025956014095e-05, |
| "loss": 1.403628945350647, |
| "step": 20224 |
| }, |
| { |
| "epoch": 0.8826254659857349, |
| "grad_norm": 0.9965147376060486, |
| "learning_rate": 3.516171985374755e-05, |
| "loss": 1.404217004776001, |
| "step": 20480 |
| }, |
| { |
| "epoch": 0.8826254659857349, |
| "eval_bleu": 0.9386200539251414, |
| "eval_cos_loss": 0.43061384094803573, |
| "eval_dec_loss": 0.1105148816930015, |
| "eval_loss": 1.4051892691329597, |
| "eval_mse2_loss": 0.1420527106758628, |
| "eval_mse3_loss": 0.03952200293366207, |
| "eval_mse_loss": 1.2236145594989314, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5693861599415858, |
| "flow/improvement_ratio": 0.8978289964356656, |
| "flow/mag_ratio_mean": 0.5780887319080865, |
| "flow/mag_ratio_std": 0.26583226638307955, |
| "step": 20480 |
| }, |
| { |
| "epoch": 0.8826254659857349, |
| "eval_bleu": 0.9386200539251414, |
| "eval_cos_loss": 0.43061384094803573, |
| "eval_dec_loss": 0.1105148816930015, |
| "eval_loss": 1.4051892691329597, |
| "eval_mse2_loss": 0.1420527106758628, |
| "eval_mse3_loss": 0.03952200293366207, |
| "eval_mse_loss": 1.2236145594989314, |
| "eval_rec_loss": 0.047009017791098624, |
| "eval_runtime": 151.5082, |
| "eval_samples_per_second": 198.009, |
| "eval_steps_per_second": 3.096, |
| "eval_var_loss": 0.01723895594080501, |
| "flow/cos_sim": 0.5693861599415858, |
| "flow/improvement_ratio": 0.8978289964356656, |
| "flow/mag_ratio_mean": 0.5780887319080865, |
| "flow/mag_ratio_std": 0.26583226638307955, |
| "step": 20480 |
| } |
| ], |
| "logging_steps": 256, |
| "max_steps": 23204, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 1024, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|