| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 1024, |
| "global_step": 23204, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.011032818324821687, |
| "grad_norm": 0.4159204065799713, |
| "learning_rate": 0.000498046875, |
| "loss": 1.9663532972335815, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.022065636649643373, |
| "grad_norm": 0.5120339393615723, |
| "learning_rate": 0.000998046875, |
| "loss": 1.8649803400039673, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.03309845497446506, |
| "grad_norm": 0.5888333320617676, |
| "learning_rate": 0.000999688448778502, |
| "loss": 1.8195451498031616, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.04413127329928675, |
| "grad_norm": 0.5426309704780579, |
| "learning_rate": 0.0009987492950653055, |
| "loss": 1.7870738506317139, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.04413127329928675, |
| "eval_bleu": 0.7367960024565896, |
| "eval_cos_loss": 0.733703343853005, |
| "eval_dec_loss": 0.0009557238793241849, |
| "eval_loss": 1.7733750612750998, |
| "eval_mse2_loss": 0.24898268771705342, |
| "eval_mse_loss": 1.7733750612750998, |
| "eval_rec_loss": 0.07178680001815627, |
| "eval_var_loss": 0.03918575903357092, |
| "flow/cos_sim": 0.2662966569413, |
| "flow/improvement_ratio": 0.7138748223593494, |
| "flow/mag_ratio_mean": 0.2806141933144283, |
| "flow/mag_ratio_std": 0.19538807297057942, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.04413127329928675, |
| "eval_bleu": 0.7367960024565896, |
| "eval_cos_loss": 0.733703343853005, |
| "eval_dec_loss": 0.0009557238793241849, |
| "eval_loss": 1.7733750612750998, |
| "eval_mse2_loss": 0.24898268771705342, |
| "eval_mse_loss": 1.7733750612750998, |
| "eval_rec_loss": 0.07178680001815627, |
| "eval_runtime": 102.2497, |
| "eval_samples_per_second": 293.399, |
| "eval_steps_per_second": 4.587, |
| "eval_var_loss": 0.03918575903357092, |
| "flow/cos_sim": 0.2662966569413, |
| "flow/improvement_ratio": 0.7138748223593494, |
| "flow/mag_ratio_mean": 0.2806141933144283, |
| "flow/mag_ratio_std": 0.19538807297057942, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.05516409162410843, |
| "grad_norm": 0.559907853603363, |
| "learning_rate": 0.0009971837136430763, |
| "loss": 1.7695353031158447, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.06619690994893011, |
| "grad_norm": 0.5250961780548096, |
| "learning_rate": 0.0009949936708776692, |
| "loss": 1.7464680671691895, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.07722972827375181, |
| "grad_norm": 0.5413498282432556, |
| "learning_rate": 0.0009921819174566252, |
| "loss": 1.7358378171920776, |
| "step": 1792 |
| }, |
| { |
| "epoch": 0.0882625465985735, |
| "grad_norm": 0.5907743573188782, |
| "learning_rate": 0.000988751984934317, |
| "loss": 1.7120215892791748, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.0882625465985735, |
| "eval_bleu": 0.0, |
| "eval_cos_loss": 0.6918347015309689, |
| "eval_dec_loss": 0.0008496561978685745, |
| "eval_loss": 1.7068908603460804, |
| "eval_mse2_loss": 0.2311722892624483, |
| "eval_mse_loss": 1.7068908603460804, |
| "eval_rec_loss": 0.07192788415673826, |
| "eval_var_loss": 0.039484830477884585, |
| "flow/cos_sim": 0.308165298945614, |
| "flow/improvement_ratio": 0.7413157426726336, |
| "flow/mag_ratio_mean": 0.32819121402463935, |
| "flow/mag_ratio_std": 0.22936584363614065, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.0882625465985735, |
| "eval_bleu": 0.0, |
| "eval_cos_loss": 0.6918347015309689, |
| "eval_dec_loss": 0.0008496561978685745, |
| "eval_loss": 1.7068908603460804, |
| "eval_mse2_loss": 0.2311722892624483, |
| "eval_mse_loss": 1.7068908603460804, |
| "eval_rec_loss": 0.07192788415673826, |
| "eval_runtime": 115.5959, |
| "eval_samples_per_second": 259.525, |
| "eval_steps_per_second": 4.057, |
| "eval_var_loss": 0.039484830477884585, |
| "flow/cos_sim": 0.308165298945614, |
| "flow/improvement_ratio": 0.7413157426726336, |
| "flow/mag_ratio_mean": 0.32819121402463935, |
| "flow/mag_ratio_std": 0.22936584363614065, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.09929536492339518, |
| "grad_norm": 0.5151912569999695, |
| "learning_rate": 0.0009847081812963268, |
| "loss": 1.6995676755905151, |
| "step": 2304 |
| }, |
| { |
| "epoch": 0.11032818324821686, |
| "grad_norm": 0.5708624124526978, |
| "learning_rate": 0.0009800555855486275, |
| "loss": 1.691954493522644, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.12136100157303854, |
| "grad_norm": 0.5266889333724976, |
| "learning_rate": 0.0009748000413383664, |
| "loss": 1.6838405132293701, |
| "step": 2816 |
| }, |
| { |
| "epoch": 0.13239381989786023, |
| "grad_norm": 0.5887771844863892, |
| "learning_rate": 0.0009689481496142604, |
| "loss": 1.6767997741699219, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.13239381989786023, |
| "eval_bleu": 0.0, |
| "eval_cos_loss": 0.6758898748263621, |
| "eval_dec_loss": 0.0008988727467537456, |
| "eval_loss": 1.6722511675820422, |
| "eval_mse2_loss": 0.22564460429301395, |
| "eval_mse_loss": 1.6722511675820422, |
| "eval_rec_loss": 0.07889290152093781, |
| "eval_var_loss": 0.03933484109241698, |
| "flow/cos_sim": 0.3241101224412288, |
| "flow/improvement_ratio": 0.7517435145276442, |
| "flow/mag_ratio_mean": 0.345418343729556, |
| "flow/mag_ratio_std": 0.2452551066112925, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.13239381989786023, |
| "eval_bleu": 0.0, |
| "eval_cos_loss": 0.6758898748263621, |
| "eval_dec_loss": 0.0008988727467537456, |
| "eval_loss": 1.6722511675820422, |
| "eval_mse2_loss": 0.22564460429301395, |
| "eval_mse_loss": 1.6722511675820422, |
| "eval_rec_loss": 0.07889290152093781, |
| "eval_runtime": 104.0696, |
| "eval_samples_per_second": 288.269, |
| "eval_steps_per_second": 4.507, |
| "eval_var_loss": 0.03933484109241698, |
| "flow/cos_sim": 0.3241101224412288, |
| "flow/improvement_ratio": 0.7517435145276442, |
| "flow/mag_ratio_mean": 0.345418343729556, |
| "flow/mag_ratio_std": 0.2452551066112925, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.14342663822268192, |
| "grad_norm": 0.541419506072998, |
| "learning_rate": 0.0009625072603358231, |
| "loss": 1.673985242843628, |
| "step": 3328 |
| }, |
| { |
| "epoch": 0.15445945654750362, |
| "grad_norm": 0.5490341186523438, |
| "learning_rate": 0.0009554854632418371, |
| "loss": 1.6683088541030884, |
| "step": 3584 |
| }, |
| { |
| "epoch": 0.1654922748723253, |
| "grad_norm": 0.5031152963638306, |
| "learning_rate": 0.000947891577689663, |
| "loss": 1.6581937074661255, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.176525093197147, |
| "grad_norm": 0.5746980905532837, |
| "learning_rate": 0.0009397351415781539, |
| "loss": 1.6514787673950195, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.176525093197147, |
| "eval_bleu": 0.8306582531325222, |
| "eval_cos_loss": 0.6603395223363376, |
| "eval_dec_loss": 0.0008816038105594141, |
| "eval_loss": 1.655877443518974, |
| "eval_mse2_loss": 0.2203613625152279, |
| "eval_mse_loss": 1.655877443518974, |
| "eval_rec_loss": 0.07825224432371446, |
| "eval_var_loss": 0.03895786802953621, |
| "flow/cos_sim": 0.3396604795699943, |
| "flow/improvement_ratio": 0.7647032693250856, |
| "flow/mag_ratio_mean": 0.3492967891159342, |
| "flow/mag_ratio_std": 0.24545467633809617, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.176525093197147, |
| "eval_bleu": 0.8306582531325222, |
| "eval_cos_loss": 0.6603395223363376, |
| "eval_dec_loss": 0.0008816038105594141, |
| "eval_loss": 1.655877443518974, |
| "eval_mse2_loss": 0.2203613625152279, |
| "eval_mse_loss": 1.655877443518974, |
| "eval_rec_loss": 0.07825224432371446, |
| "eval_runtime": 104.9703, |
| "eval_samples_per_second": 285.795, |
| "eval_steps_per_second": 4.468, |
| "eval_var_loss": 0.03895786802953621, |
| "flow/cos_sim": 0.3396604795699943, |
| "flow/improvement_ratio": 0.7647032693250856, |
| "flow/mag_ratio_mean": 0.3492967891159342, |
| "flow/mag_ratio_std": 0.24545467633809617, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.18755791152196866, |
| "grad_norm": 0.5913429260253906, |
| "learning_rate": 0.000931026399368079, |
| "loss": 1.6492750644683838, |
| "step": 4352 |
| }, |
| { |
| "epoch": 0.19859072984679035, |
| "grad_norm": 0.5509883761405945, |
| "learning_rate": 0.0009217762892151117, |
| "loss": 1.6520144939422607, |
| "step": 4608 |
| }, |
| { |
| "epoch": 0.20962354817161205, |
| "grad_norm": 0.5679228901863098, |
| "learning_rate": 0.0009119964292315354, |
| "loss": 1.6389240026474, |
| "step": 4864 |
| }, |
| { |
| "epoch": 0.22065636649643372, |
| "grad_norm": 0.5711391568183899, |
| "learning_rate": 0.0009016991028939279, |
| "loss": 1.6339716911315918, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.22065636649643372, |
| "eval_bleu": 0.0, |
| "eval_cos_loss": 0.650760640467662, |
| "eval_dec_loss": 0.0008636942622270425, |
| "eval_loss": 1.6368520514034768, |
| "eval_mse2_loss": 0.2165674694629112, |
| "eval_mse_loss": 1.6368520514034768, |
| "eval_rec_loss": 0.07919453000272515, |
| "eval_var_loss": 0.038470808579437514, |
| "flow/cos_sim": 0.3492393595005658, |
| "flow/improvement_ratio": 0.7687677682589874, |
| "flow/mag_ratio_mean": 0.3705801928856734, |
| "flow/mag_ratio_std": 0.24794671269876362, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.22065636649643372, |
| "eval_bleu": 0.0, |
| "eval_cos_loss": 0.650760640467662, |
| "eval_dec_loss": 0.0008636942622270425, |
| "eval_loss": 1.6368520514034768, |
| "eval_mse2_loss": 0.2165674694629112, |
| "eval_mse_loss": 1.6368520514034768, |
| "eval_rec_loss": 0.07919453000272515, |
| "eval_runtime": 106.3849, |
| "eval_samples_per_second": 281.995, |
| "eval_steps_per_second": 4.409, |
| "eval_var_loss": 0.038470808579437514, |
| "flow/cos_sim": 0.3492393595005658, |
| "flow/improvement_ratio": 0.7687677682589874, |
| "flow/mag_ratio_mean": 0.3705801928856734, |
| "flow/mag_ratio_std": 0.24794671269876362, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.23168918482125542, |
| "grad_norm": 0.5657662153244019, |
| "learning_rate": 0.0008908972436151494, |
| "loss": 1.6298149824142456, |
| "step": 5376 |
| }, |
| { |
| "epoch": 0.2427220031460771, |
| "grad_norm": 0.5444105267524719, |
| "learning_rate": 0.0008796044185000127, |
| "loss": 1.6245518922805786, |
| "step": 5632 |
| }, |
| { |
| "epoch": 0.2537548214708988, |
| "grad_norm": 0.5386772751808167, |
| "learning_rate": 0.0008678348113050368, |
| "loss": 1.6245043277740479, |
| "step": 5888 |
| }, |
| { |
| "epoch": 0.26478763979572045, |
| "grad_norm": 0.593573808670044, |
| "learning_rate": 0.0008556032046236897, |
| "loss": 1.6186625957489014, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.26478763979572045, |
| "eval_bleu": 0.6616874634914218, |
| "eval_cos_loss": 0.6420060966823147, |
| "eval_dec_loss": 0.0008773751602825105, |
| "eval_loss": 1.6212111037931463, |
| "eval_mse2_loss": 0.21424534742130655, |
| "eval_mse_loss": 1.6212111037931463, |
| "eval_rec_loss": 0.07690000988797212, |
| "eval_var_loss": 0.03987920811292586, |
| "flow/cos_sim": 0.35799390230097494, |
| "flow/improvement_ratio": 0.7726656893677295, |
| "flow/mag_ratio_mean": 0.378862192890029, |
| "flow/mag_ratio_std": 0.25638675289367563, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.26478763979572045, |
| "eval_bleu": 0.6616874634914218, |
| "eval_cos_loss": 0.6420060966823147, |
| "eval_dec_loss": 0.0008773751602825105, |
| "eval_loss": 1.6212111037931463, |
| "eval_mse2_loss": 0.21424534742130655, |
| "eval_mse_loss": 1.6212111037931463, |
| "eval_rec_loss": 0.07690000988797212, |
| "eval_runtime": 107.8088, |
| "eval_samples_per_second": 278.27, |
| "eval_steps_per_second": 4.35, |
| "eval_var_loss": 0.03987920811292586, |
| "flow/cos_sim": 0.35799390230097494, |
| "flow/improvement_ratio": 0.7726656893677295, |
| "flow/mag_ratio_mean": 0.378862192890029, |
| "flow/mag_ratio_std": 0.25638675289367563, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.2758204581205422, |
| "grad_norm": 0.6531504988670349, |
| "learning_rate": 0.000842924961319492, |
| "loss": 1.6246501207351685, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.28685327644536385, |
| "grad_norm": 0.623314619064331, |
| "learning_rate": 0.0008298160052303045, |
| "loss": 1.6117119789123535, |
| "step": 6656 |
| }, |
| { |
| "epoch": 0.2978860947701855, |
| "grad_norm": 0.6569280028343201, |
| "learning_rate": 0.0008162928011680314, |
| "loss": 1.6110011339187622, |
| "step": 6912 |
| }, |
| { |
| "epoch": 0.30891891309500724, |
| "grad_norm": 0.5780096054077148, |
| "learning_rate": 0.000802372334238864, |
| "loss": 1.6017733812332153, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.30891891309500724, |
| "eval_bleu": 0.0, |
| "eval_cos_loss": 0.6337974287553637, |
| "eval_dec_loss": 0.000868526577018946, |
| "eval_loss": 1.610691403529283, |
| "eval_mse2_loss": 0.20906051554913713, |
| "eval_mse_loss": 1.610691403529283, |
| "eval_rec_loss": 0.07160911561329482, |
| "eval_var_loss": 0.038991474600107684, |
| "flow/cos_sim": 0.36620256860754385, |
| "flow/improvement_ratio": 0.7784959132483265, |
| "flow/mag_ratio_mean": 0.3764179727034782, |
| "flow/mag_ratio_std": 0.2555681874693584, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.30891891309500724, |
| "eval_bleu": 0.0, |
| "eval_cos_loss": 0.6337974287553637, |
| "eval_dec_loss": 0.000868526577018946, |
| "eval_loss": 1.610691403529283, |
| "eval_mse2_loss": 0.20906051554913713, |
| "eval_mse_loss": 1.610691403529283, |
| "eval_rec_loss": 0.07160911561329482, |
| "eval_runtime": 109.8071, |
| "eval_samples_per_second": 273.206, |
| "eval_steps_per_second": 4.271, |
| "eval_var_loss": 0.038991474600107684, |
| "flow/cos_sim": 0.36620256860754385, |
| "flow/improvement_ratio": 0.7784959132483265, |
| "flow/mag_ratio_mean": 0.3764179727034782, |
| "flow/mag_ratio_std": 0.2555681874693584, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.3199517314198289, |
| "grad_norm": 0.69647616147995, |
| "learning_rate": 0.0007880720885100349, |
| "loss": 1.6023547649383545, |
| "step": 7424 |
| }, |
| { |
| "epoch": 0.3309845497446506, |
| "grad_norm": 0.5961915850639343, |
| "learning_rate": 0.0007734100250498788, |
| "loss": 1.6072485446929932, |
| "step": 7680 |
| }, |
| { |
| "epoch": 0.3420173680694723, |
| "grad_norm": 0.5459280610084534, |
| "learning_rate": 0.000758404559368781, |
| "loss": 1.5960863828659058, |
| "step": 7936 |
| }, |
| { |
| "epoch": 0.353050186394294, |
| "grad_norm": 0.6102398037910461, |
| "learning_rate": 0.0007430745382893488, |
| "loss": 1.5963565111160278, |
| "step": 8192 |
| }, |
| { |
| "epoch": 0.353050186394294, |
| "eval_bleu": 0.7197940696165602, |
| "eval_cos_loss": 0.6262508921785903, |
| "eval_dec_loss": 0.0008588121543493944, |
| "eval_loss": 1.5951941470855844, |
| "eval_mse2_loss": 0.2093085884920824, |
| "eval_mse_loss": 1.5951941470855844, |
| "eval_rec_loss": 0.07648367836261227, |
| "eval_var_loss": 0.038927744613336854, |
| "flow/cos_sim": 0.37374910820267604, |
| "flow/improvement_ratio": 0.7826825692963753, |
| "flow/mag_ratio_mean": 0.3904182631959285, |
| "flow/mag_ratio_std": 0.2646757621310159, |
| "step": 8192 |
| }, |
| { |
| "epoch": 0.353050186394294, |
| "eval_bleu": 0.7197940696165602, |
| "eval_cos_loss": 0.6262508921785903, |
| "eval_dec_loss": 0.0008588121543493944, |
| "eval_loss": 1.5951941470855844, |
| "eval_mse2_loss": 0.2093085884920824, |
| "eval_mse_loss": 1.5951941470855844, |
| "eval_rec_loss": 0.07648367836261227, |
| "eval_runtime": 109.4311, |
| "eval_samples_per_second": 274.145, |
| "eval_steps_per_second": 4.286, |
| "eval_var_loss": 0.038927744613336854, |
| "flow/cos_sim": 0.37374910820267604, |
| "flow/improvement_ratio": 0.7826825692963753, |
| "flow/mag_ratio_mean": 0.3904182631959285, |
| "flow/mag_ratio_std": 0.2646757621310159, |
| "step": 8192 |
| }, |
| { |
| "epoch": 0.36408300471911564, |
| "grad_norm": 0.5960484147071838, |
| "learning_rate": 0.0007274392162748551, |
| "loss": 1.596308946609497, |
| "step": 8448 |
| }, |
| { |
| "epoch": 0.3751158230439373, |
| "grad_norm": 0.6219334006309509, |
| "learning_rate": 0.000711518231245687, |
| "loss": 1.596260666847229, |
| "step": 8704 |
| }, |
| { |
| "epoch": 0.38614864136875904, |
| "grad_norm": 0.6005035638809204, |
| "learning_rate": 0.0006953315799141723, |
| "loss": 1.5805628299713135, |
| "step": 8960 |
| }, |
| { |
| "epoch": 0.3971814596935807, |
| "grad_norm": 0.6499058604240417, |
| "learning_rate": 0.0006788995926687669, |
| "loss": 1.5909146070480347, |
| "step": 9216 |
| }, |
| { |
| "epoch": 0.3971814596935807, |
| "eval_bleu": 0.7005635922151724, |
| "eval_cos_loss": 0.620984929012083, |
| "eval_dec_loss": 0.0008564891244093927, |
| "eval_loss": 1.5817514581720966, |
| "eval_mse2_loss": 0.2068242670884773, |
| "eval_mse_loss": 1.5817514581720966, |
| "eval_rec_loss": 0.07730723383711345, |
| "eval_var_loss": 0.039546219771032905, |
| "flow/cos_sim": 0.3790150721952605, |
| "flow/improvement_ratio": 0.7841706645768335, |
| "flow/mag_ratio_mean": 0.39196032393715785, |
| "flow/mag_ratio_std": 0.2648706884145228, |
| "step": 9216 |
| }, |
| { |
| "epoch": 0.3971814596935807, |
| "eval_bleu": 0.7005635922151724, |
| "eval_cos_loss": 0.620984929012083, |
| "eval_dec_loss": 0.0008564891244093927, |
| "eval_loss": 1.5817514581720966, |
| "eval_mse2_loss": 0.2068242670884773, |
| "eval_mse_loss": 1.5817514581720966, |
| "eval_rec_loss": 0.07730723383711345, |
| "eval_runtime": 108.7623, |
| "eval_samples_per_second": 275.831, |
| "eval_steps_per_second": 4.312, |
| "eval_var_loss": 0.039546219771032905, |
| "flow/cos_sim": 0.3790150721952605, |
| "flow/improvement_ratio": 0.7841706645768335, |
| "flow/mag_ratio_mean": 0.39196032393715785, |
| "flow/mag_ratio_std": 0.2648706884145228, |
| "step": 9216 |
| }, |
| { |
| "epoch": 0.4082142780184024, |
| "grad_norm": 0.6517037153244019, |
| "learning_rate": 0.0006622429080391422, |
| "loss": 1.5825490951538086, |
| "step": 9472 |
| }, |
| { |
| "epoch": 0.4192470963432241, |
| "grad_norm": 0.5423082709312439, |
| "learning_rate": 0.0006453824467742515, |
| "loss": 1.5777390003204346, |
| "step": 9728 |
| }, |
| { |
| "epoch": 0.43027991466804577, |
| "grad_norm": 0.5632091164588928, |
| "learning_rate": 0.0006283393855659275, |
| "loss": 1.5704185962677002, |
| "step": 9984 |
| }, |
| { |
| "epoch": 0.44131273299286744, |
| "grad_norm": 0.6414577960968018, |
| "learning_rate": 0.0006111351304510173, |
| "loss": 1.576694369316101, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.44131273299286744, |
| "eval_bleu": 0.0, |
| "eval_cos_loss": 0.6192940467202079, |
| "eval_dec_loss": 0.0008655648372353684, |
| "eval_loss": 1.5782830768556737, |
| "eval_mse2_loss": 0.20706720681968274, |
| "eval_mse_loss": 1.5782830768556737, |
| "eval_rec_loss": 0.08069103678215796, |
| "eval_var_loss": 0.03956426539098912, |
| "flow/cos_sim": 0.38070595251725936, |
| "flow/improvement_ratio": 0.7898565209242326, |
| "flow/mag_ratio_mean": 0.3968030217486912, |
| "flow/mag_ratio_std": 0.26485012424017573, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.44131273299286744, |
| "eval_bleu": 0.0, |
| "eval_cos_loss": 0.6192940467202079, |
| "eval_dec_loss": 0.0008655648372353684, |
| "eval_loss": 1.5782830768556737, |
| "eval_mse2_loss": 0.20706720681968274, |
| "eval_mse_loss": 1.5782830768556737, |
| "eval_rec_loss": 0.08069103678215796, |
| "eval_runtime": 109.5337, |
| "eval_samples_per_second": 273.888, |
| "eval_steps_per_second": 4.282, |
| "eval_var_loss": 0.03956426539098912, |
| "flow/cos_sim": 0.38070595251725936, |
| "flow/improvement_ratio": 0.7898565209242326, |
| "flow/mag_ratio_mean": 0.3968030217486912, |
| "flow/mag_ratio_std": 0.26485012424017573, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.45234555131768917, |
| "grad_norm": 0.6964972019195557, |
| "learning_rate": 0.0005937912899254605, |
| "loss": 1.5744432210922241, |
| "step": 10496 |
| }, |
| { |
| "epoch": 0.46337836964251083, |
| "grad_norm": 0.6759219765663147, |
| "learning_rate": 0.0005763296478040787, |
| "loss": 1.5734292268753052, |
| "step": 10752 |
| }, |
| { |
| "epoch": 0.4744111879673325, |
| "grad_norm": 0.6557888984680176, |
| "learning_rate": 0.0005587721358601663, |
| "loss": 1.5690912008285522, |
| "step": 11008 |
| }, |
| { |
| "epoch": 0.4854440062921542, |
| "grad_norm": 0.6908146739006042, |
| "learning_rate": 0.0005411408062792448, |
| "loss": 1.5694100856781006, |
| "step": 11264 |
| }, |
| { |
| "epoch": 0.4854440062921542, |
| "eval_bleu": 0.6358310640509648, |
| "eval_cos_loss": 0.6153862965005293, |
| "eval_dec_loss": 0.0008486716838240542, |
| "eval_loss": 1.5730945626809907, |
| "eval_mse2_loss": 0.20262245840228188, |
| "eval_mse_loss": 1.5730945626809907, |
| "eval_rec_loss": 0.08374404142013014, |
| "eval_var_loss": 0.03941917608478176, |
| "flow/cos_sim": 0.3846137027369379, |
| "flow/improvement_ratio": 0.7866138059701493, |
| "flow/mag_ratio_mean": 0.4038491099119695, |
| "flow/mag_ratio_std": 0.2661667937980786, |
| "step": 11264 |
| }, |
| { |
| "epoch": 0.4854440062921542, |
| "eval_bleu": 0.6358310640509648, |
| "eval_cos_loss": 0.6153862965005293, |
| "eval_dec_loss": 0.0008486716838240542, |
| "eval_loss": 1.5730945626809907, |
| "eval_mse2_loss": 0.20262245840228188, |
| "eval_mse_loss": 1.5730945626809907, |
| "eval_rec_loss": 0.08374404142013014, |
| "eval_runtime": 106.1904, |
| "eval_samples_per_second": 282.511, |
| "eval_steps_per_second": 4.417, |
| "eval_var_loss": 0.03941917608478176, |
| "flow/cos_sim": 0.3846137027369379, |
| "flow/improvement_ratio": 0.7866138059701493, |
| "flow/mag_ratio_mean": 0.4038491099119695, |
| "flow/mag_ratio_std": 0.2661667937980786, |
| "step": 11264 |
| }, |
| { |
| "epoch": 0.4964768246169759, |
| "grad_norm": 0.7366820573806763, |
| "learning_rate": 0.0005234578039615789, |
| "loss": 1.5634552240371704, |
| "step": 11520 |
| }, |
| { |
| "epoch": 0.5075096429417976, |
| "grad_norm": 0.6397421956062317, |
| "learning_rate": 0.0005057453387082458, |
| "loss": 1.5681736469268799, |
| "step": 11776 |
| }, |
| { |
| "epoch": 0.5185424612666193, |
| "grad_norm": 0.6078880429267883, |
| "learning_rate": 0.0004880256573256866, |
| "loss": 1.5718961954116821, |
| "step": 12032 |
| }, |
| { |
| "epoch": 0.5295752795914409, |
| "grad_norm": 0.603773832321167, |
| "learning_rate": 0.0004703210156837805, |
| "loss": 1.5669455528259277, |
| "step": 12288 |
| }, |
| { |
| "epoch": 0.5295752795914409, |
| "eval_bleu": 0.5597890627146911, |
| "eval_cos_loss": 0.6101163517691688, |
| "eval_dec_loss": 0.000845343751481549, |
| "eval_loss": 1.561377385277738, |
| "eval_mse2_loss": 0.20208943659054446, |
| "eval_mse_loss": 1.561377385277738, |
| "eval_rec_loss": 0.08231798076787705, |
| "eval_var_loss": 0.03889469863541091, |
| "flow/cos_sim": 0.38988364791310925, |
| "flow/improvement_ratio": 0.7917666134041256, |
| "flow/mag_ratio_mean": 0.4048269701791979, |
| "flow/mag_ratio_std": 0.27302380535267057, |
| "step": 12288 |
| }, |
| { |
| "epoch": 0.5295752795914409, |
| "eval_bleu": 0.5597890627146911, |
| "eval_cos_loss": 0.6101163517691688, |
| "eval_dec_loss": 0.000845343751481549, |
| "eval_loss": 1.561377385277738, |
| "eval_mse2_loss": 0.20208943659054446, |
| "eval_mse_loss": 1.561377385277738, |
| "eval_rec_loss": 0.08231798076787705, |
| "eval_runtime": 110.3087, |
| "eval_samples_per_second": 271.964, |
| "eval_steps_per_second": 4.252, |
| "eval_var_loss": 0.03889469863541091, |
| "flow/cos_sim": 0.38988364791310925, |
| "flow/improvement_ratio": 0.7917666134041256, |
| "flow/mag_ratio_mean": 0.4048269701791979, |
| "flow/mag_ratio_std": 0.27302380535267057, |
| "step": 12288 |
| }, |
| { |
| "epoch": 0.5406080979162626, |
| "grad_norm": 0.5982694625854492, |
| "learning_rate": 0.0004526536507625343, |
| "loss": 1.5554094314575195, |
| "step": 12544 |
| }, |
| { |
| "epoch": 0.5516409162410844, |
| "grad_norm": 0.6010884046554565, |
| "learning_rate": 0.00043504575272249973, |
| "loss": 1.555287480354309, |
| "step": 12800 |
| }, |
| { |
| "epoch": 0.562673734565906, |
| "grad_norm": 0.6438276171684265, |
| "learning_rate": 0.0004175194370339921, |
| "loss": 1.55382239818573, |
| "step": 13056 |
| }, |
| { |
| "epoch": 0.5737065528907277, |
| "grad_norm": 0.7003888487815857, |
| "learning_rate": 0.0004000967167001243, |
| "loss": 1.556836485862732, |
| "step": 13312 |
| }, |
| { |
| "epoch": 0.5737065528907277, |
| "eval_bleu": 0.0, |
| "eval_cos_loss": 0.6069312471189479, |
| "eval_dec_loss": 0.0008443268671437456, |
| "eval_loss": 1.5585744848637693, |
| "eval_mse2_loss": 0.20021586301230165, |
| "eval_mse_loss": 1.5585744848637693, |
| "eval_rec_loss": 0.07187017253530559, |
| "eval_var_loss": 0.0385018095318506, |
| "flow/cos_sim": 0.3930687547238397, |
| "flow/improvement_ratio": 0.7943652274766202, |
| "flow/mag_ratio_mean": 0.40750807034435554, |
| "flow/mag_ratio_std": 0.26731591651053316, |
| "step": 13312 |
| }, |
| { |
| "epoch": 0.5737065528907277, |
| "eval_bleu": 0.0, |
| "eval_cos_loss": 0.6069312471189479, |
| "eval_dec_loss": 0.0008443268671437456, |
| "eval_loss": 1.5585744848637693, |
| "eval_mse2_loss": 0.20021586301230165, |
| "eval_mse_loss": 1.5585744848637693, |
| "eval_rec_loss": 0.07187017253530559, |
| "eval_runtime": 104.9396, |
| "eval_samples_per_second": 285.879, |
| "eval_steps_per_second": 4.469, |
| "eval_var_loss": 0.0385018095318506, |
| "flow/cos_sim": 0.3930687547238397, |
| "flow/improvement_ratio": 0.7943652274766202, |
| "flow/mag_ratio_mean": 0.40750807034435554, |
| "flow/mag_ratio_std": 0.26731591651053316, |
| "step": 13312 |
| }, |
| { |
| "epoch": 0.5847393712155494, |
| "grad_norm": 0.7015474438667297, |
| "learning_rate": 0.00038279947460853446, |
| "loss": 1.555707335472107, |
| "step": 13568 |
| }, |
| { |
| "epoch": 0.595772189540371, |
| "grad_norm": 0.6728999018669128, |
| "learning_rate": 0.00036564943604654345, |
| "loss": 1.5453166961669922, |
| "step": 13824 |
| }, |
| { |
| "epoch": 0.6068050078651928, |
| "grad_norm": 0.7017489075660706, |
| "learning_rate": 0.00034866814141425254, |
| "loss": 1.5511311292648315, |
| "step": 14080 |
| }, |
| { |
| "epoch": 0.6178378261900145, |
| "grad_norm": 0.696759045124054, |
| "learning_rate": 0.0003318769191698637, |
| "loss": 1.5481939315795898, |
| "step": 14336 |
| }, |
| { |
| "epoch": 0.6178378261900145, |
| "eval_bleu": 0.0, |
| "eval_cos_loss": 0.6003155639685039, |
| "eval_dec_loss": 0.0008366067935938516, |
| "eval_loss": 1.553359952816831, |
| "eval_mse2_loss": 0.19910312760105012, |
| "eval_mse_loss": 1.553359952816831, |
| "eval_rec_loss": 0.08619301475838685, |
| "eval_var_loss": 0.03906391611866859, |
| "flow/cos_sim": 0.3996844376836504, |
| "flow/improvement_ratio": 0.7983297797154262, |
| "flow/mag_ratio_mean": 0.41047868202490084, |
| "flow/mag_ratio_std": 0.269344186827318, |
| "step": 14336 |
| }, |
| { |
| "epoch": 0.6178378261900145, |
| "eval_bleu": 0.0, |
| "eval_cos_loss": 0.6003155639685039, |
| "eval_dec_loss": 0.0008366067935938516, |
| "eval_loss": 1.553359952816831, |
| "eval_mse2_loss": 0.19910312760105012, |
| "eval_mse_loss": 1.553359952816831, |
| "eval_rec_loss": 0.08619301475838685, |
| "eval_runtime": 103.8441, |
| "eval_samples_per_second": 288.895, |
| "eval_steps_per_second": 4.516, |
| "eval_var_loss": 0.03906391611866859, |
| "flow/cos_sim": 0.3996844376836504, |
| "flow/improvement_ratio": 0.7983297797154262, |
| "flow/mag_ratio_mean": 0.41047868202490084, |
| "flow/mag_ratio_std": 0.269344186827318, |
| "step": 14336 |
| }, |
| { |
| "epoch": 0.6288706445148361, |
| "grad_norm": 1.059499740600586, |
| "learning_rate": 0.00031529685904119485, |
| "loss": 1.5369458198547363, |
| "step": 14592 |
| }, |
| { |
| "epoch": 0.6399034628396578, |
| "grad_norm": 0.6795445680618286, |
| "learning_rate": 0.0002989487855370421, |
| "loss": 1.546617865562439, |
| "step": 14848 |
| }, |
| { |
| "epoch": 0.6509362811644795, |
| "grad_norm": 0.6012146472930908, |
| "learning_rate": 0.00028285323179165424, |
| "loss": 1.5389968156814575, |
| "step": 15104 |
| }, |
| { |
| "epoch": 0.6619690994893012, |
| "grad_norm": 0.66009920835495, |
| "learning_rate": 0.0002670304137751759, |
| "loss": 1.5420887470245361, |
| "step": 15360 |
| }, |
| { |
| "epoch": 0.6619690994893012, |
| "eval_bleu": 0.0, |
| "eval_cos_loss": 0.602212758778509, |
| "eval_dec_loss": 0.0008536874761753805, |
| "eval_loss": 1.5440079855766378, |
| "eval_mse2_loss": 0.19924425627631165, |
| "eval_mse_loss": 1.5440079855766378, |
| "eval_rec_loss": 0.07816322648456928, |
| "eval_var_loss": 0.03964522071897602, |
| "flow/cos_sim": 0.3977872424923789, |
| "flow/improvement_ratio": 0.7939765458422174, |
| "flow/mag_ratio_mean": 0.41111982891808696, |
| "flow/mag_ratio_std": 0.2661404178213717, |
| "step": 15360 |
| }, |
| { |
| "epoch": 0.6619690994893012, |
| "eval_bleu": 0.0, |
| "eval_cos_loss": 0.602212758778509, |
| "eval_dec_loss": 0.0008536874761753805, |
| "eval_loss": 1.5440079855766378, |
| "eval_mse2_loss": 0.19924425627631165, |
| "eval_mse_loss": 1.5440079855766378, |
| "eval_rec_loss": 0.07816322648456928, |
| "eval_runtime": 104.4092, |
| "eval_samples_per_second": 287.331, |
| "eval_steps_per_second": 4.492, |
| "eval_var_loss": 0.03964522071897602, |
| "flow/cos_sim": 0.3977872424923789, |
| "flow/improvement_ratio": 0.7939765458422174, |
| "flow/mag_ratio_mean": 0.41111982891808696, |
| "flow/mag_ratio_std": 0.2661404178213717, |
| "step": 15360 |
| }, |
| { |
| "epoch": 0.6730019178141229, |
| "grad_norm": 0.7557597160339355, |
| "learning_rate": 0.0002515002049024435, |
| "loss": 1.5416761636734009, |
| "step": 15616 |
| }, |
| { |
| "epoch": 0.6840347361389446, |
| "grad_norm": 0.7103043794631958, |
| "learning_rate": 0.00023628211107203429, |
| "loss": 1.5380470752716064, |
| "step": 15872 |
| }, |
| { |
| "epoch": 0.6950675544637662, |
| "grad_norm": 0.6369643807411194, |
| "learning_rate": 0.00022139524616691188, |
| "loss": 1.5429226160049438, |
| "step": 16128 |
| }, |
| { |
| "epoch": 0.706100372788588, |
| "grad_norm": 0.7409862875938416, |
| "learning_rate": 0.000206858308047443, |
| "loss": 1.5365649461746216, |
| "step": 16384 |
| }, |
| { |
| "epoch": 0.706100372788588, |
| "eval_bleu": 0.7575044618944372, |
| "eval_cos_loss": 0.6018435463213971, |
| "eval_dec_loss": 0.0008337082014663201, |
| "eval_loss": 1.5481067753549833, |
| "eval_mse2_loss": 0.19446404248094762, |
| "eval_mse_loss": 1.5481067753549833, |
| "eval_rec_loss": 0.076209959254336, |
| "eval_var_loss": 0.03873575751635947, |
| "flow/cos_sim": 0.3981564519629041, |
| "flow/improvement_ratio": 0.7944651741717161, |
| "flow/mag_ratio_mean": 0.4150417370201428, |
| "flow/mag_ratio_std": 0.2710818174297113, |
| "step": 16384 |
| }, |
| { |
| "epoch": 0.706100372788588, |
| "eval_bleu": 0.7575044618944372, |
| "eval_cos_loss": 0.6018435463213971, |
| "eval_dec_loss": 0.0008337082014663201, |
| "eval_loss": 1.5481067753549833, |
| "eval_mse2_loss": 0.19446404248094762, |
| "eval_mse_loss": 1.5481067753549833, |
| "eval_rec_loss": 0.076209959254336, |
| "eval_runtime": 104.7354, |
| "eval_samples_per_second": 286.436, |
| "eval_steps_per_second": 4.478, |
| "eval_var_loss": 0.03873575751635947, |
| "flow/cos_sim": 0.3981564519629041, |
| "flow/improvement_ratio": 0.7944651741717161, |
| "flow/mag_ratio_mean": 0.4150417370201428, |
| "flow/mag_ratio_std": 0.2710818174297113, |
| "step": 16384 |
| }, |
| { |
| "epoch": 0.7171331911134096, |
| "grad_norm": 0.710279643535614, |
| "learning_rate": 0.00019268955506693798, |
| "loss": 1.5333166122436523, |
| "step": 16640 |
| }, |
| { |
| "epoch": 0.7281660094382313, |
| "grad_norm": 0.7324961423873901, |
| "learning_rate": 0.00017890678313921, |
| "loss": 1.539929747581482, |
| "step": 16896 |
| }, |
| { |
| "epoch": 0.739198827763053, |
| "grad_norm": 0.7484199404716492, |
| "learning_rate": 0.00016552730338695792, |
| "loss": 1.531807780265808, |
| "step": 17152 |
| }, |
| { |
| "epoch": 0.7502316460878746, |
| "grad_norm": 0.7395180463790894, |
| "learning_rate": 0.00015256792039904465, |
| "loss": 1.531101942062378, |
| "step": 17408 |
| }, |
| { |
| "epoch": 0.7502316460878746, |
| "eval_bleu": 0.0, |
| "eval_cos_loss": 0.5935310315984145, |
| "eval_dec_loss": 0.0008232986817089494, |
| "eval_loss": 1.5361263070787703, |
| "eval_mse2_loss": 0.19419101290484228, |
| "eval_mse_loss": 1.5361263070787703, |
| "eval_rec_loss": 0.07587623949259964, |
| "eval_var_loss": 0.03908565207553317, |
| "flow/cos_sim": 0.40646896878285194, |
| "flow/improvement_ratio": 0.8020500177259384, |
| "flow/mag_ratio_mean": 0.41979390077753614, |
| "flow/mag_ratio_std": 0.27610172201067146, |
| "step": 17408 |
| }, |
| { |
| "epoch": 0.7502316460878746, |
| "eval_bleu": 0.0, |
| "eval_cos_loss": 0.5935310315984145, |
| "eval_dec_loss": 0.0008232986817089494, |
| "eval_loss": 1.5361263070787703, |
| "eval_mse2_loss": 0.19419101290484228, |
| "eval_mse_loss": 1.5361263070787703, |
| "eval_rec_loss": 0.07587623949259964, |
| "eval_runtime": 104.2066, |
| "eval_samples_per_second": 287.89, |
| "eval_steps_per_second": 4.501, |
| "eval_var_loss": 0.03908565207553317, |
| "flow/cos_sim": 0.40646896878285194, |
| "flow/improvement_ratio": 0.8020500177259384, |
| "flow/mag_ratio_mean": 0.41979390077753614, |
| "flow/mag_ratio_std": 0.27610172201067146, |
| "step": 17408 |
| }, |
| { |
| "epoch": 0.7612644644126964, |
| "grad_norm": 0.6441617608070374, |
| "learning_rate": 0.00014004491112398103, |
| "loss": 1.53444242477417, |
| "step": 17664 |
| }, |
| { |
| "epoch": 0.7722972827375181, |
| "grad_norm": 0.6133081316947937, |
| "learning_rate": 0.00012797400442612433, |
| "loss": 1.5353076457977295, |
| "step": 17920 |
| }, |
| { |
| "epoch": 0.7833301010623397, |
| "grad_norm": 0.6650050282478333, |
| "learning_rate": 0.00011637036133026895, |
| "loss": 1.5271316766738892, |
| "step": 18176 |
| }, |
| { |
| "epoch": 0.7943629193871614, |
| "grad_norm": 0.7909660339355469, |
| "learning_rate": 0.00010524855597944216, |
| "loss": 1.5325292348861694, |
| "step": 18432 |
| }, |
| { |
| "epoch": 0.7943629193871614, |
| "eval_bleu": 0.48791535301561595, |
| "eval_cos_loss": 0.5922040477998729, |
| "eval_dec_loss": 0.0008058249080707051, |
| "eval_loss": 1.529377325003081, |
| "eval_mse2_loss": 0.19290883859782332, |
| "eval_mse_loss": 1.529377325003081, |
| "eval_rec_loss": 0.07509699613173633, |
| "eval_var_loss": 0.03962860657557496, |
| "flow/cos_sim": 0.4077959513740499, |
| "flow/improvement_ratio": 0.8030939055149997, |
| "flow/mag_ratio_mean": 0.4195916212316769, |
| "flow/mag_ratio_std": 0.27258260306646065, |
| "step": 18432 |
| }, |
| { |
| "epoch": 0.7943629193871614, |
| "eval_bleu": 0.48791535301561595, |
| "eval_cos_loss": 0.5922040477998729, |
| "eval_dec_loss": 0.0008058249080707051, |
| "eval_loss": 1.529377325003081, |
| "eval_mse2_loss": 0.19290883859782332, |
| "eval_mse_loss": 1.529377325003081, |
| "eval_rec_loss": 0.07509699613173633, |
| "eval_runtime": 104.6596, |
| "eval_samples_per_second": 286.644, |
| "eval_steps_per_second": 4.481, |
| "eval_var_loss": 0.03962860657557496, |
| "flow/cos_sim": 0.4077959513740499, |
| "flow/improvement_ratio": 0.8030939055149997, |
| "flow/mag_ratio_mean": 0.4195916212316769, |
| "flow/mag_ratio_std": 0.27258260306646065, |
| "step": 18432 |
| }, |
| { |
| "epoch": 0.8053957377119831, |
| "grad_norm": 0.828404426574707, |
| "learning_rate": 9.462255732982089e-05, |
| "loss": 1.5252734422683716, |
| "step": 18688 |
| }, |
| { |
| "epoch": 0.8164285560368048, |
| "grad_norm": 0.6670092940330505, |
| "learning_rate": 8.450571160576348e-05, |
| "loss": 1.5244230031967163, |
| "step": 18944 |
| }, |
| { |
| "epoch": 0.8274613743616265, |
| "grad_norm": 0.7132073640823364, |
| "learning_rate": 7.491072553698764e-05, |
| "loss": 1.526404619216919, |
| "step": 19200 |
| }, |
| { |
| "epoch": 0.8384941926864482, |
| "grad_norm": 0.6539149284362793, |
| "learning_rate": 6.584965039895586e-05, |
| "loss": 1.525586724281311, |
| "step": 19456 |
| }, |
| { |
| "epoch": 0.8384941926864482, |
| "eval_bleu": 0.5134466095771528, |
| "eval_cos_loss": 0.5857723312718528, |
| "eval_dec_loss": 0.000789512290270347, |
| "eval_loss": 1.519604679363877, |
| "eval_mse2_loss": 0.19140840205810725, |
| "eval_mse_loss": 1.519604679363877, |
| "eval_rec_loss": 0.06644379393036749, |
| "eval_var_loss": 0.03968206735085577, |
| "flow/cos_sim": 0.4142276692365024, |
| "flow/improvement_ratio": 0.8054704157782516, |
| "flow/mag_ratio_mean": 0.4234118685285166, |
| "flow/mag_ratio_std": 0.2757805520728199, |
| "step": 19456 |
| }, |
| { |
| "epoch": 0.8384941926864482, |
| "eval_bleu": 0.5134466095771528, |
| "eval_cos_loss": 0.5857723312718528, |
| "eval_dec_loss": 0.000789512290270347, |
| "eval_loss": 1.519604679363877, |
| "eval_mse2_loss": 0.19140840205810725, |
| "eval_mse_loss": 1.519604679363877, |
| "eval_rec_loss": 0.06644379393036749, |
| "eval_runtime": 105.2533, |
| "eval_samples_per_second": 285.027, |
| "eval_steps_per_second": 4.456, |
| "eval_var_loss": 0.03968206735085577, |
| "flow/cos_sim": 0.4142276692365024, |
| "flow/improvement_ratio": 0.8054704157782516, |
| "flow/mag_ratio_mean": 0.4234118685285166, |
| "flow/mag_ratio_std": 0.2757805520728199, |
| "step": 19456 |
| }, |
| { |
| "epoch": 0.8495270110112698, |
| "grad_norm": 0.6579424142837524, |
| "learning_rate": 5.73338668765051e-05, |
| "loss": 1.5306607484817505, |
| "step": 19712 |
| }, |
| { |
| "epoch": 0.8605598293360915, |
| "grad_norm": 0.7089149355888367, |
| "learning_rate": 4.9374070769740984e-05, |
| "loss": 1.533019781112671, |
| "step": 19968 |
| }, |
| { |
| "epoch": 0.8715926476609133, |
| "grad_norm": 0.7739511728286743, |
| "learning_rate": 4.198025956014095e-05, |
| "loss": 1.523693323135376, |
| "step": 20224 |
| }, |
| { |
| "epoch": 0.8826254659857349, |
| "grad_norm": 0.7731000185012817, |
| "learning_rate": 3.516171985374755e-05, |
| "loss": 1.5237990617752075, |
| "step": 20480 |
| }, |
| { |
| "epoch": 0.8826254659857349, |
| "eval_bleu": 0.0, |
| "eval_cos_loss": 0.5918379776132132, |
| "eval_dec_loss": 0.0008275368400102383, |
| "eval_loss": 1.533869057576031, |
| "eval_mse2_loss": 0.1938457629923373, |
| "eval_mse_loss": 1.533869057576031, |
| "eval_rec_loss": 0.07740985165669848, |
| "eval_var_loss": 0.03880171602500527, |
| "flow/cos_sim": 0.40816202270450874, |
| "flow/improvement_ratio": 0.8036269545555115, |
| "flow/mag_ratio_mean": 0.4203644880989213, |
| "flow/mag_ratio_std": 0.27537077541417404, |
| "step": 20480 |
| }, |
| { |
| "epoch": 0.8826254659857349, |
| "eval_bleu": 0.0, |
| "eval_cos_loss": 0.5918379776132132, |
| "eval_dec_loss": 0.0008275368400102383, |
| "eval_loss": 1.533869057576031, |
| "eval_mse2_loss": 0.1938457629923373, |
| "eval_mse_loss": 1.533869057576031, |
| "eval_rec_loss": 0.07740985165669848, |
| "eval_runtime": 105.2342, |
| "eval_samples_per_second": 285.078, |
| "eval_steps_per_second": 4.457, |
| "eval_var_loss": 0.03880171602500527, |
| "flow/cos_sim": 0.40816202270450874, |
| "flow/improvement_ratio": 0.8036269545555115, |
| "flow/mag_ratio_mean": 0.4203644880989213, |
| "flow/mag_ratio_std": 0.27537077541417404, |
| "step": 20480 |
| }, |
| { |
| "epoch": 0.8936582843105566, |
| "grad_norm": 0.6202792525291443, |
| "learning_rate": 2.8927015717215733e-05, |
| "loss": 1.5295757055282593, |
| "step": 20736 |
| }, |
| { |
| "epoch": 0.9046911026353783, |
| "grad_norm": 0.7841973900794983, |
| "learning_rate": 2.3283977921370547e-05, |
| "loss": 1.523586630821228, |
| "step": 20992 |
| }, |
| { |
| "epoch": 0.9157239209601999, |
| "grad_norm": 0.6595759391784668, |
| "learning_rate": 1.8239694105780413e-05, |
| "loss": 1.5318045616149902, |
| "step": 21248 |
| }, |
| { |
| "epoch": 0.9267567392850217, |
| "grad_norm": 0.609197735786438, |
| "learning_rate": 1.3800499876701955e-05, |
| "loss": 1.5228415727615356, |
| "step": 21504 |
| }, |
| { |
| "epoch": 0.9267567392850217, |
| "eval_bleu": 0.0, |
| "eval_cos_loss": 0.5857761577883763, |
| "eval_dec_loss": 0.000830967118222041, |
| "eval_loss": 1.5174758200452272, |
| "eval_mse2_loss": 0.19284123948006743, |
| "eval_mse_loss": 1.5174758200452272, |
| "eval_rec_loss": 0.0729556814678061, |
| "eval_var_loss": 0.03933353780874057, |
| "flow/cos_sim": 0.4142238413855465, |
| "flow/improvement_ratio": 0.8069585110587097, |
| "flow/mag_ratio_mean": 0.42255140481981385, |
| "flow/mag_ratio_std": 0.27569390684048506, |
| "step": 21504 |
| }, |
| { |
| "epoch": 0.9267567392850217, |
| "eval_bleu": 0.0, |
| "eval_cos_loss": 0.5857761577883763, |
| "eval_dec_loss": 0.000830967118222041, |
| "eval_loss": 1.5174758200452272, |
| "eval_mse2_loss": 0.19284123948006743, |
| "eval_mse_loss": 1.5174758200452272, |
| "eval_rec_loss": 0.0729556814678061, |
| "eval_runtime": 104.1539, |
| "eval_samples_per_second": 288.035, |
| "eval_steps_per_second": 4.503, |
| "eval_var_loss": 0.03933353780874057, |
| "flow/cos_sim": 0.4142238413855465, |
| "flow/improvement_ratio": 0.8069585110587097, |
| "flow/mag_ratio_mean": 0.42255140481981385, |
| "flow/mag_ratio_std": 0.27569390684048506, |
| "step": 21504 |
| }, |
| { |
| "epoch": 0.9377895576098434, |
| "grad_norm": 0.9727445244789124, |
| "learning_rate": 9.971970849576406e-06, |
| "loss": 1.5182249546051025, |
| "step": 21760 |
| }, |
| { |
| "epoch": 0.948822375934665, |
| "grad_norm": 0.7612808346748352, |
| "learning_rate": 6.758915646072339e-06, |
| "loss": 1.5259754657745361, |
| "step": 22016 |
| }, |
| { |
| "epoch": 0.9598551942594867, |
| "grad_norm": 0.713859498500824, |
| "learning_rate": 4.1653698544703575e-06, |
| "loss": 1.5199190378189087, |
| "step": 22272 |
| }, |
| { |
| "epoch": 0.9708880125843083, |
| "grad_norm": 0.7317867875099182, |
| "learning_rate": 2.1945909609756286e-06, |
| "loss": 1.519974946975708, |
| "step": 22528 |
| }, |
| { |
| "epoch": 0.9708880125843083, |
| "eval_bleu": 0.6275773435896929, |
| "eval_cos_loss": 0.589492563372736, |
| "eval_dec_loss": 0.0008282276563101144, |
| "eval_loss": 1.5297406591586213, |
| "eval_mse2_loss": 0.1940601731160048, |
| "eval_mse_loss": 1.5297406591586213, |
| "eval_rec_loss": 0.0773585034049809, |
| "eval_var_loss": 0.039062532038290874, |
| "flow/cos_sim": 0.41050743770751874, |
| "flow/improvement_ratio": 0.8078469260415034, |
| "flow/mag_ratio_mean": 0.4194970330449818, |
| "flow/mag_ratio_std": 0.271954351873286, |
| "step": 22528 |
| }, |
| { |
| "epoch": 0.9708880125843083, |
| "eval_bleu": 0.6275773435896929, |
| "eval_cos_loss": 0.589492563372736, |
| "eval_dec_loss": 0.0008282276563101144, |
| "eval_loss": 1.5297406591586213, |
| "eval_mse2_loss": 0.1940601731160048, |
| "eval_mse_loss": 1.5297406591586213, |
| "eval_rec_loss": 0.0773585034049809, |
| "eval_runtime": 104.1334, |
| "eval_samples_per_second": 288.092, |
| "eval_steps_per_second": 4.504, |
| "eval_var_loss": 0.039062532038290874, |
| "flow/cos_sim": 0.41050743770751874, |
| "flow/improvement_ratio": 0.8078469260415034, |
| "flow/mag_ratio_mean": 0.4194970330449818, |
| "flow/mag_ratio_std": 0.271954351873286, |
| "step": 22528 |
| }, |
| { |
| "epoch": 0.9819208309091301, |
| "grad_norm": 0.6444236636161804, |
| "learning_rate": 8.490542583243222e-07, |
| "loss": 1.5288830995559692, |
| "step": 22784 |
| }, |
| { |
| "epoch": 0.9929536492339518, |
| "grad_norm": 1.0045595169067383, |
| "learning_rate": 1.3044973682302396e-07, |
| "loss": 1.5305051803588867, |
| "step": 23040 |
| } |
| ], |
| "logging_steps": 256, |
| "max_steps": 23204, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 1024, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|