| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 1024, |
| "global_step": 23204, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.011032818324821687, |
| "grad_norm": 0.11718960851430893, |
| "learning_rate": 0.000498046875, |
| "loss": 8.126412391662598, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.022065636649643373, |
| "grad_norm": 0.5918834209442139, |
| "learning_rate": 0.000998046875, |
| "loss": 7.521966934204102, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.03309845497446506, |
| "grad_norm": 0.6679062247276306, |
| "learning_rate": 0.000999688448778502, |
| "loss": 7.341925144195557, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.04413127329928675, |
| "grad_norm": 1.1320008039474487, |
| "learning_rate": 0.0009987492950653055, |
| "loss": 7.3012261390686035, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.04413127329928675, |
| "eval_bleu": 0.4137140409513573, |
| "eval_loss": 7.362073738691904, |
| "eval_mse_loss": 1.3527455637449903, |
| "eval_recon_loss": 6.009328172150959, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.04413127329928675, |
| "eval_bleu": 0.4137140409513573, |
| "eval_loss": 7.362073738691904, |
| "eval_mse_loss": 1.3527455637449903, |
| "eval_recon_loss": 6.009328172150959, |
| "eval_runtime": 122.6264, |
| "eval_samples_per_second": 244.645, |
| "eval_steps_per_second": 3.825, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.05516409162410843, |
| "grad_norm": 0.583996593952179, |
| "learning_rate": 0.0009971837136430763, |
| "loss": 7.304327011108398, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.06619690994893011, |
| "grad_norm": 0.4174424707889557, |
| "learning_rate": 0.0009949936708776692, |
| "loss": 7.320664882659912, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.07722972827375181, |
| "grad_norm": 0.6613262295722961, |
| "learning_rate": 0.0009921819174566252, |
| "loss": 7.289535999298096, |
| "step": 1792 |
| }, |
| { |
| "epoch": 0.0882625465985735, |
| "grad_norm": 0.8233232498168945, |
| "learning_rate": 0.000988751984934317, |
| "loss": 7.30844259262085, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.0882625465985735, |
| "eval_bleu": 0.4137402227214601, |
| "eval_loss": 7.285478673255774, |
| "eval_mse_loss": 1.312072853543865, |
| "eval_recon_loss": 5.973405825812171, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.0882625465985735, |
| "eval_bleu": 0.4137402227214601, |
| "eval_loss": 7.285478673255774, |
| "eval_mse_loss": 1.312072853543865, |
| "eval_recon_loss": 5.973405825812171, |
| "eval_runtime": 120.626, |
| "eval_samples_per_second": 248.703, |
| "eval_steps_per_second": 3.888, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.09929536492339518, |
| "grad_norm": 0.6929961442947388, |
| "learning_rate": 0.0009847081812963268, |
| "loss": 7.224529266357422, |
| "step": 2304 |
| }, |
| { |
| "epoch": 0.11032818324821686, |
| "grad_norm": 0.4629116356372833, |
| "learning_rate": 0.0009800555855486275, |
| "loss": 7.270728588104248, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.12136100157303854, |
| "grad_norm": 0.8039212226867676, |
| "learning_rate": 0.0009748000413383664, |
| "loss": 7.244544982910156, |
| "step": 2816 |
| }, |
| { |
| "epoch": 0.13239381989786023, |
| "grad_norm": 0.7267772555351257, |
| "learning_rate": 0.0009689481496142604, |
| "loss": 7.242873191833496, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.13239381989786023, |
| "eval_bleu": 0.41751728447473074, |
| "eval_loss": 7.207233533676245, |
| "eval_mse_loss": 1.2975257886752392, |
| "eval_recon_loss": 5.909707753643044, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.13239381989786023, |
| "eval_bleu": 0.41751728447473074, |
| "eval_loss": 7.207233533676245, |
| "eval_mse_loss": 1.2975257886752392, |
| "eval_recon_loss": 5.909707753643044, |
| "eval_runtime": 120.6478, |
| "eval_samples_per_second": 248.658, |
| "eval_steps_per_second": 3.887, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.14342663822268192, |
| "grad_norm": 0.6957461833953857, |
| "learning_rate": 0.0009625072603358231, |
| "loss": 7.226602077484131, |
| "step": 3328 |
| }, |
| { |
| "epoch": 0.15445945654750362, |
| "grad_norm": 0.7835198044776917, |
| "learning_rate": 0.0009554854632418371, |
| "loss": 7.225058555603027, |
| "step": 3584 |
| }, |
| { |
| "epoch": 0.1654922748723253, |
| "grad_norm": 0.698896586894989, |
| "learning_rate": 0.000947891577689663, |
| "loss": 7.229842662811279, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.176525093197147, |
| "grad_norm": 0.5856732130050659, |
| "learning_rate": 0.0009397351415781539, |
| "loss": 7.255417346954346, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.176525093197147, |
| "eval_bleu": 0.4222208504795976, |
| "eval_loss": 7.213959849465376, |
| "eval_mse_loss": 1.284232896782442, |
| "eval_recon_loss": 5.929726949378626, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.176525093197147, |
| "eval_bleu": 0.4222208504795976, |
| "eval_loss": 7.213959849465376, |
| "eval_mse_loss": 1.284232896782442, |
| "eval_recon_loss": 5.929726949378626, |
| "eval_runtime": 120.9546, |
| "eval_samples_per_second": 248.027, |
| "eval_steps_per_second": 3.877, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.18755791152196866, |
| "grad_norm": 0.8607048392295837, |
| "learning_rate": 0.000931026399368079, |
| "loss": 7.224533557891846, |
| "step": 4352 |
| }, |
| { |
| "epoch": 0.19859072984679035, |
| "grad_norm": 0.6015220284461975, |
| "learning_rate": 0.0009217762892151117, |
| "loss": 7.171858310699463, |
| "step": 4608 |
| }, |
| { |
| "epoch": 0.20962354817161205, |
| "grad_norm": 0.5955944061279297, |
| "learning_rate": 0.0009119964292315354, |
| "loss": 7.180020809173584, |
| "step": 4864 |
| }, |
| { |
| "epoch": 0.22065636649643372, |
| "grad_norm": 0.9265109896659851, |
| "learning_rate": 0.0009016991028939279, |
| "loss": 7.237715244293213, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.22065636649643372, |
| "eval_bleu": 0.42211295717074476, |
| "eval_loss": 7.192519418720497, |
| "eval_mse_loss": 1.2717160906618847, |
| "eval_recon_loss": 5.920803332379632, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.22065636649643372, |
| "eval_bleu": 0.42211295717074476, |
| "eval_loss": 7.192519418720497, |
| "eval_mse_loss": 1.2717160906618847, |
| "eval_recon_loss": 5.920803332379632, |
| "eval_runtime": 123.6227, |
| "eval_samples_per_second": 242.674, |
| "eval_steps_per_second": 3.794, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.23168918482125542, |
| "grad_norm": 1.0009490251541138, |
| "learning_rate": 0.0008908972436151494, |
| "loss": 7.181396484375, |
| "step": 5376 |
| }, |
| { |
| "epoch": 0.2427220031460771, |
| "grad_norm": 0.5662420988082886, |
| "learning_rate": 0.0008796044185000127, |
| "loss": 7.214040756225586, |
| "step": 5632 |
| }, |
| { |
| "epoch": 0.2537548214708988, |
| "grad_norm": 0.5989573001861572, |
| "learning_rate": 0.0008678348113050368, |
| "loss": 7.233224391937256, |
| "step": 5888 |
| }, |
| { |
| "epoch": 0.26478763979572045, |
| "grad_norm": 0.6971510648727417, |
| "learning_rate": 0.0008556032046236897, |
| "loss": 7.238555431365967, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.26478763979572045, |
| "eval_bleu": 0.42175834100767645, |
| "eval_loss": 7.158817366496332, |
| "eval_mse_loss": 1.2668565204148607, |
| "eval_recon_loss": 5.891960855231865, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.26478763979572045, |
| "eval_bleu": 0.42175834100767645, |
| "eval_loss": 7.158817366496332, |
| "eval_mse_loss": 1.2668565204148607, |
| "eval_recon_loss": 5.891960855231865, |
| "eval_runtime": 122.7695, |
| "eval_samples_per_second": 244.36, |
| "eval_steps_per_second": 3.82, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.2758204581205422, |
| "grad_norm": 0.5552608966827393, |
| "learning_rate": 0.000842924961319492, |
| "loss": 7.156804084777832, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.28685327644536385, |
| "grad_norm": 0.5489348769187927, |
| "learning_rate": 0.0008298160052303045, |
| "loss": 7.185822486877441, |
| "step": 6656 |
| }, |
| { |
| "epoch": 0.2978860947701855, |
| "grad_norm": 0.6533068418502808, |
| "learning_rate": 0.0008162928011680314, |
| "loss": 7.207901477813721, |
| "step": 6912 |
| }, |
| { |
| "epoch": 0.30891891309500724, |
| "grad_norm": 0.6510070562362671, |
| "learning_rate": 0.000802372334238864, |
| "loss": 7.252415180206299, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.30891891309500724, |
| "eval_bleu": 0.42033993073908893, |
| "eval_loss": 7.211322486527693, |
| "eval_mse_loss": 1.262001540106751, |
| "eval_recon_loss": 5.949320952267026, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.30891891309500724, |
| "eval_bleu": 0.42033993073908893, |
| "eval_loss": 7.211322486527693, |
| "eval_mse_loss": 1.262001540106751, |
| "eval_recon_loss": 5.949320952267026, |
| "eval_runtime": 122.2468, |
| "eval_samples_per_second": 245.405, |
| "eval_steps_per_second": 3.837, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.3199517314198289, |
| "grad_norm": 0.629555881023407, |
| "learning_rate": 0.0007880720885100349, |
| "loss": 7.190337181091309, |
| "step": 7424 |
| }, |
| { |
| "epoch": 0.3309845497446506, |
| "grad_norm": 0.7832825183868408, |
| "learning_rate": 0.0007734100250498788, |
| "loss": 7.169421672821045, |
| "step": 7680 |
| }, |
| { |
| "epoch": 0.3420173680694723, |
| "grad_norm": 1.125791072845459, |
| "learning_rate": 0.000758404559368781, |
| "loss": 7.217397212982178, |
| "step": 7936 |
| }, |
| { |
| "epoch": 0.353050186394294, |
| "grad_norm": 1.4169881343841553, |
| "learning_rate": 0.0007430745382893488, |
| "loss": 7.163691520690918, |
| "step": 8192 |
| }, |
| { |
| "epoch": 0.353050186394294, |
| "eval_bleu": 0.4273075759012065, |
| "eval_loss": 7.0710403741295655, |
| "eval_mse_loss": 1.2539655610696594, |
| "eval_recon_loss": 5.817074826022964, |
| "step": 8192 |
| }, |
| { |
| "epoch": 0.353050186394294, |
| "eval_bleu": 0.4273075759012065, |
| "eval_loss": 7.0710403741295655, |
| "eval_mse_loss": 1.2539655610696594, |
| "eval_recon_loss": 5.817074826022964, |
| "eval_runtime": 119.9191, |
| "eval_samples_per_second": 250.169, |
| "eval_steps_per_second": 3.911, |
| "step": 8192 |
| }, |
| { |
| "epoch": 0.36408300471911564, |
| "grad_norm": 0.6753491759300232, |
| "learning_rate": 0.0007274392162748551, |
| "loss": 7.200345516204834, |
| "step": 8448 |
| }, |
| { |
| "epoch": 0.3751158230439373, |
| "grad_norm": 0.5481099486351013, |
| "learning_rate": 0.000711518231245687, |
| "loss": 7.261969566345215, |
| "step": 8704 |
| }, |
| { |
| "epoch": 0.38614864136875904, |
| "grad_norm": 0.9790687561035156, |
| "learning_rate": 0.0006953315799141723, |
| "loss": 7.222372055053711, |
| "step": 8960 |
| }, |
| { |
| "epoch": 0.3971814596935807, |
| "grad_norm": 0.6765880584716797, |
| "learning_rate": 0.0006788995926687669, |
| "loss": 7.16965913772583, |
| "step": 9216 |
| }, |
| { |
| "epoch": 0.3971814596935807, |
| "eval_bleu": 0.4247464090003691, |
| "eval_loss": 7.120412382235659, |
| "eval_mse_loss": 1.2528218912925801, |
| "eval_recon_loss": 5.867590499076762, |
| "step": 9216 |
| }, |
| { |
| "epoch": 0.3971814596935807, |
| "eval_bleu": 0.4247464090003691, |
| "eval_loss": 7.120412382235659, |
| "eval_mse_loss": 1.2528218912925801, |
| "eval_recon_loss": 5.867590499076762, |
| "eval_runtime": 122.0364, |
| "eval_samples_per_second": 245.828, |
| "eval_steps_per_second": 3.843, |
| "step": 9216 |
| }, |
| { |
| "epoch": 0.4082142780184024, |
| "grad_norm": 0.65935879945755, |
| "learning_rate": 0.0006622429080391422, |
| "loss": 7.197686195373535, |
| "step": 9472 |
| }, |
| { |
| "epoch": 0.4192470963432241, |
| "grad_norm": 0.6421726942062378, |
| "learning_rate": 0.0006453824467742515, |
| "loss": 7.234926700592041, |
| "step": 9728 |
| }, |
| { |
| "epoch": 0.43027991466804577, |
| "grad_norm": 0.5790821313858032, |
| "learning_rate": 0.0006283393855659275, |
| "loss": 7.189432621002197, |
| "step": 9984 |
| }, |
| { |
| "epoch": 0.44131273299286744, |
| "grad_norm": 1.3850535154342651, |
| "learning_rate": 0.0006111351304510173, |
| "loss": 7.234661102294922, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.44131273299286744, |
| "eval_bleu": 0.418702569877124, |
| "eval_loss": 7.200763400429602, |
| "eval_mse_loss": 1.2514713542547815, |
| "eval_recon_loss": 5.949292033465941, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.44131273299286744, |
| "eval_bleu": 0.418702569877124, |
| "eval_loss": 7.200763400429602, |
| "eval_mse_loss": 1.2514713542547815, |
| "eval_recon_loss": 5.949292033465941, |
| "eval_runtime": 120.1297, |
| "eval_samples_per_second": 249.73, |
| "eval_steps_per_second": 3.904, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.45234555131768917, |
| "grad_norm": 0.7837287187576294, |
| "learning_rate": 0.0005937912899254605, |
| "loss": 7.249295711517334, |
| "step": 10496 |
| }, |
| { |
| "epoch": 0.46337836964251083, |
| "grad_norm": 0.6780616641044617, |
| "learning_rate": 0.0005763296478040787, |
| "loss": 7.172219276428223, |
| "step": 10752 |
| }, |
| { |
| "epoch": 0.4744111879673325, |
| "grad_norm": 0.7398590445518494, |
| "learning_rate": 0.0005587721358601663, |
| "loss": 7.186757564544678, |
| "step": 11008 |
| }, |
| { |
| "epoch": 0.4854440062921542, |
| "grad_norm": 0.9151716232299805, |
| "learning_rate": 0.0005411408062792448, |
| "loss": 7.159968852996826, |
| "step": 11264 |
| }, |
| { |
| "epoch": 0.4854440062921542, |
| "eval_bleu": 0.4226532958201888, |
| "eval_loss": 7.1653254210059325, |
| "eval_mse_loss": 1.2460996743712598, |
| "eval_recon_loss": 5.9192257395177, |
| "step": 11264 |
| }, |
| { |
| "epoch": 0.4854440062921542, |
| "eval_bleu": 0.4226532958201888, |
| "eval_loss": 7.1653254210059325, |
| "eval_mse_loss": 1.2460996743712598, |
| "eval_recon_loss": 5.9192257395177, |
| "eval_runtime": 121.8437, |
| "eval_samples_per_second": 246.217, |
| "eval_steps_per_second": 3.849, |
| "step": 11264 |
| }, |
| { |
| "epoch": 0.4964768246169759, |
| "grad_norm": 0.8292572498321533, |
| "learning_rate": 0.0005234578039615789, |
| "loss": 7.178791046142578, |
| "step": 11520 |
| }, |
| { |
| "epoch": 0.5075096429417976, |
| "grad_norm": 0.6656509637832642, |
| "learning_rate": 0.0005057453387082458, |
| "loss": 7.218838691711426, |
| "step": 11776 |
| }, |
| { |
| "epoch": 0.5185424612666193, |
| "grad_norm": 0.5986668467521667, |
| "learning_rate": 0.0004880256573256866, |
| "loss": 7.1541748046875, |
| "step": 12032 |
| }, |
| { |
| "epoch": 0.5295752795914409, |
| "grad_norm": 0.9848335385322571, |
| "learning_rate": 0.0004703210156837805, |
| "loss": 7.192195415496826, |
| "step": 12288 |
| }, |
| { |
| "epoch": 0.5295752795914409, |
| "eval_bleu": 0.4265346559703667, |
| "eval_loss": 7.1317149693015285, |
| "eval_mse_loss": 1.2411813456366565, |
| "eval_recon_loss": 5.890533620106386, |
| "step": 12288 |
| }, |
| { |
| "epoch": 0.5295752795914409, |
| "eval_bleu": 0.4265346559703667, |
| "eval_loss": 7.1317149693015285, |
| "eval_mse_loss": 1.2411813456366565, |
| "eval_recon_loss": 5.890533620106386, |
| "eval_runtime": 121.6345, |
| "eval_samples_per_second": 246.641, |
| "eval_steps_per_second": 3.856, |
| "step": 12288 |
| }, |
| { |
| "epoch": 0.5406080979162626, |
| "grad_norm": 0.4458930194377899, |
| "learning_rate": 0.0004526536507625343, |
| "loss": 7.163888931274414, |
| "step": 12544 |
| }, |
| { |
| "epoch": 0.5516409162410844, |
| "grad_norm": 0.6304380893707275, |
| "learning_rate": 0.00043504575272249973, |
| "loss": 7.172398567199707, |
| "step": 12800 |
| }, |
| { |
| "epoch": 0.562673734565906, |
| "grad_norm": 0.4299321174621582, |
| "learning_rate": 0.0004175194370339921, |
| "loss": 7.1996941566467285, |
| "step": 13056 |
| }, |
| { |
| "epoch": 0.5737065528907277, |
| "grad_norm": 0.7110609412193298, |
| "learning_rate": 0.0004000967167001243, |
| "loss": 7.176135540008545, |
| "step": 13312 |
| }, |
| { |
| "epoch": 0.5737065528907277, |
| "eval_bleu": 0.4210106981552223, |
| "eval_loss": 7.17737479606417, |
| "eval_mse_loss": 1.24177454910807, |
| "eval_recon_loss": 5.9356002522938285, |
| "step": 13312 |
| }, |
| { |
| "epoch": 0.5737065528907277, |
| "eval_bleu": 0.4210106981552223, |
| "eval_loss": 7.17737479606417, |
| "eval_mse_loss": 1.24177454910807, |
| "eval_recon_loss": 5.9356002522938285, |
| "eval_runtime": 121.837, |
| "eval_samples_per_second": 246.231, |
| "eval_steps_per_second": 3.849, |
| "step": 13312 |
| }, |
| { |
| "epoch": 0.5847393712155494, |
| "grad_norm": 0.6122337579727173, |
| "learning_rate": 0.00038279947460853446, |
| "loss": 7.161331653594971, |
| "step": 13568 |
| }, |
| { |
| "epoch": 0.595772189540371, |
| "grad_norm": 0.3825603723526001, |
| "learning_rate": 0.00036564943604654345, |
| "loss": 7.182938575744629, |
| "step": 13824 |
| }, |
| { |
| "epoch": 0.6068050078651928, |
| "grad_norm": 0.789337158203125, |
| "learning_rate": 0.00034866814141425254, |
| "loss": 7.209775447845459, |
| "step": 14080 |
| }, |
| { |
| "epoch": 0.6178378261900145, |
| "grad_norm": 0.5527840256690979, |
| "learning_rate": 0.0003318769191698637, |
| "loss": 7.182364463806152, |
| "step": 14336 |
| }, |
| { |
| "epoch": 0.6178378261900145, |
| "eval_bleu": 0.4225600590459926, |
| "eval_loss": 7.143745667390478, |
| "eval_mse_loss": 1.2410022706619457, |
| "eval_recon_loss": 5.902743396474354, |
| "step": 14336 |
| }, |
| { |
| "epoch": 0.6178378261900145, |
| "eval_bleu": 0.4225600590459926, |
| "eval_loss": 7.143745667390478, |
| "eval_mse_loss": 1.2410022706619457, |
| "eval_recon_loss": 5.902743396474354, |
| "eval_runtime": 120.9077, |
| "eval_samples_per_second": 248.123, |
| "eval_steps_per_second": 3.879, |
| "step": 14336 |
| }, |
| { |
| "epoch": 0.6288706445148361, |
| "grad_norm": 1.0367250442504883, |
| "learning_rate": 0.00031529685904119485, |
| "loss": 7.168930530548096, |
| "step": 14592 |
| }, |
| { |
| "epoch": 0.6399034628396578, |
| "grad_norm": 0.6979912519454956, |
| "learning_rate": 0.0002989487855370421, |
| "loss": 7.185887813568115, |
| "step": 14848 |
| }, |
| { |
| "epoch": 0.6509362811644795, |
| "grad_norm": 0.9760459661483765, |
| "learning_rate": 0.00028285323179165424, |
| "loss": 7.213273048400879, |
| "step": 15104 |
| }, |
| { |
| "epoch": 0.6619690994893012, |
| "grad_norm": 0.5782520771026611, |
| "learning_rate": 0.0002670304137751759, |
| "loss": 7.2389631271362305, |
| "step": 15360 |
| }, |
| { |
| "epoch": 0.6619690994893012, |
| "eval_bleu": 0.4214572010245335, |
| "eval_loss": 7.171980977820944, |
| "eval_mse_loss": 1.238288759422709, |
| "eval_recon_loss": 5.933692220431655, |
| "step": 15360 |
| }, |
| { |
| "epoch": 0.6619690994893012, |
| "eval_bleu": 0.4214572010245335, |
| "eval_loss": 7.171980977820944, |
| "eval_mse_loss": 1.238288759422709, |
| "eval_recon_loss": 5.933692220431655, |
| "eval_runtime": 122.1262, |
| "eval_samples_per_second": 245.648, |
| "eval_steps_per_second": 3.84, |
| "step": 15360 |
| }, |
| { |
| "epoch": 0.6730019178141229, |
| "grad_norm": 0.458218514919281, |
| "learning_rate": 0.0002515002049024435, |
| "loss": 7.234853744506836, |
| "step": 15616 |
| }, |
| { |
| "epoch": 0.6840347361389446, |
| "grad_norm": 0.6497246026992798, |
| "learning_rate": 0.00023628211107203429, |
| "loss": 7.127511978149414, |
| "step": 15872 |
| }, |
| { |
| "epoch": 0.6950675544637662, |
| "grad_norm": 0.47331640124320984, |
| "learning_rate": 0.00022139524616691188, |
| "loss": 7.157367706298828, |
| "step": 16128 |
| }, |
| { |
| "epoch": 0.706100372788588, |
| "grad_norm": 0.5251054167747498, |
| "learning_rate": 0.000206858308047443, |
| "loss": 7.214804649353027, |
| "step": 16384 |
| }, |
| { |
| "epoch": 0.706100372788588, |
| "eval_bleu": 0.4238607122458957, |
| "eval_loss": 7.129481719246805, |
| "eval_mse_loss": 1.2356282551405526, |
| "eval_recon_loss": 5.893853451397373, |
| "step": 16384 |
| }, |
| { |
| "epoch": 0.706100372788588, |
| "eval_bleu": 0.4238607122458957, |
| "eval_loss": 7.129481719246805, |
| "eval_mse_loss": 1.2356282551405526, |
| "eval_recon_loss": 5.893853451397373, |
| "eval_runtime": 122.2268, |
| "eval_samples_per_second": 245.445, |
| "eval_steps_per_second": 3.837, |
| "step": 16384 |
| }, |
| { |
| "epoch": 0.7171331911134096, |
| "grad_norm": 1.383571982383728, |
| "learning_rate": 0.00019268955506693798, |
| "loss": 7.159656524658203, |
| "step": 16640 |
| }, |
| { |
| "epoch": 0.7281660094382313, |
| "grad_norm": 0.8240185976028442, |
| "learning_rate": 0.00017890678313921, |
| "loss": 7.187053203582764, |
| "step": 16896 |
| }, |
| { |
| "epoch": 0.739198827763053, |
| "grad_norm": 0.9478136301040649, |
| "learning_rate": 0.00016552730338695792, |
| "loss": 7.169220447540283, |
| "step": 17152 |
| }, |
| { |
| "epoch": 0.7502316460878746, |
| "grad_norm": 1.9408297538757324, |
| "learning_rate": 0.00015256792039904465, |
| "loss": 7.1799798011779785, |
| "step": 17408 |
| }, |
| { |
| "epoch": 0.7502316460878746, |
| "eval_bleu": 0.4214146319298905, |
| "eval_loss": 7.153863178895735, |
| "eval_mse_loss": 1.2358840449786643, |
| "eval_recon_loss": 5.917979137221379, |
| "step": 17408 |
| }, |
| { |
| "epoch": 0.7502316460878746, |
| "eval_bleu": 0.4214146319298905, |
| "eval_loss": 7.153863178895735, |
| "eval_mse_loss": 1.2358840449786643, |
| "eval_recon_loss": 5.917979137221379, |
| "eval_runtime": 121.0512, |
| "eval_samples_per_second": 247.829, |
| "eval_steps_per_second": 3.874, |
| "step": 17408 |
| }, |
| { |
| "epoch": 0.7612644644126964, |
| "grad_norm": 0.42684435844421387, |
| "learning_rate": 0.00014004491112398103, |
| "loss": 7.144793510437012, |
| "step": 17664 |
| }, |
| { |
| "epoch": 0.7722972827375181, |
| "grad_norm": 1.1356747150421143, |
| "learning_rate": 0.00012797400442612433, |
| "loss": 7.137683868408203, |
| "step": 17920 |
| }, |
| { |
| "epoch": 0.7833301010623397, |
| "grad_norm": 0.804119884967804, |
| "learning_rate": 0.00011637036133026895, |
| "loss": 7.187657833099365, |
| "step": 18176 |
| }, |
| { |
| "epoch": 0.7943629193871614, |
| "grad_norm": 0.5904266834259033, |
| "learning_rate": 0.00010524855597944216, |
| "loss": 7.176294326782227, |
| "step": 18432 |
| }, |
| { |
| "epoch": 0.7943629193871614, |
| "eval_bleu": 0.42259771720418604, |
| "eval_loss": 7.143732512175148, |
| "eval_mse_loss": 1.2345045111072597, |
| "eval_recon_loss": 5.909227997509401, |
| "step": 18432 |
| }, |
| { |
| "epoch": 0.7943629193871614, |
| "eval_bleu": 0.42259771720418604, |
| "eval_loss": 7.143732512175148, |
| "eval_mse_loss": 1.2345045111072597, |
| "eval_recon_loss": 5.909227997509401, |
| "eval_runtime": 120.9093, |
| "eval_samples_per_second": 248.12, |
| "eval_steps_per_second": 3.879, |
| "step": 18432 |
| }, |
| { |
| "epoch": 0.8053957377119831, |
| "grad_norm": 0.7857964634895325, |
| "learning_rate": 9.462255732982089e-05, |
| "loss": 7.177124977111816, |
| "step": 18688 |
| }, |
| { |
| "epoch": 0.8164285560368048, |
| "grad_norm": 0.5492353439331055, |
| "learning_rate": 8.450571160576348e-05, |
| "loss": 7.213504791259766, |
| "step": 18944 |
| }, |
| { |
| "epoch": 0.8274613743616265, |
| "grad_norm": 1.4939922094345093, |
| "learning_rate": 7.491072553698764e-05, |
| "loss": 7.130130767822266, |
| "step": 19200 |
| }, |
| { |
| "epoch": 0.8384941926864482, |
| "grad_norm": 0.6733608841896057, |
| "learning_rate": 6.584965039895586e-05, |
| "loss": 7.213354587554932, |
| "step": 19456 |
| }, |
| { |
| "epoch": 0.8384941926864482, |
| "eval_bleu": 0.42425975662031007, |
| "eval_loss": 7.170489285800503, |
| "eval_mse_loss": 1.2338256403835597, |
| "eval_recon_loss": 5.936663639825036, |
| "step": 19456 |
| }, |
| { |
| "epoch": 0.8384941926864482, |
| "eval_bleu": 0.42425975662031007, |
| "eval_loss": 7.170489285800503, |
| "eval_mse_loss": 1.2338256403835597, |
| "eval_recon_loss": 5.936663639825036, |
| "eval_runtime": 120.4436, |
| "eval_samples_per_second": 249.079, |
| "eval_steps_per_second": 3.894, |
| "step": 19456 |
| }, |
| { |
| "epoch": 0.8495270110112698, |
| "grad_norm": 0.6659030318260193, |
| "learning_rate": 5.73338668765051e-05, |
| "loss": 7.2163591384887695, |
| "step": 19712 |
| }, |
| { |
| "epoch": 0.8605598293360915, |
| "grad_norm": 0.5635488033294678, |
| "learning_rate": 4.9374070769740984e-05, |
| "loss": 7.225249290466309, |
| "step": 19968 |
| }, |
| { |
| "epoch": 0.8715926476609133, |
| "grad_norm": 1.0609259605407715, |
| "learning_rate": 4.198025956014095e-05, |
| "loss": 7.194465637207031, |
| "step": 20224 |
| }, |
| { |
| "epoch": 0.8826254659857349, |
| "grad_norm": 1.3461576700210571, |
| "learning_rate": 3.516171985374755e-05, |
| "loss": 7.195147514343262, |
| "step": 20480 |
| }, |
| { |
| "epoch": 0.8826254659857349, |
| "eval_bleu": 0.42110197936728205, |
| "eval_loss": 7.151369848231009, |
| "eval_mse_loss": 1.233785225892626, |
| "eval_recon_loss": 5.917584622084205, |
| "step": 20480 |
| }, |
| { |
| "epoch": 0.8826254659857349, |
| "eval_bleu": 0.42110197936728205, |
| "eval_loss": 7.151369848231009, |
| "eval_mse_loss": 1.233785225892626, |
| "eval_recon_loss": 5.917584622084205, |
| "eval_runtime": 120.8036, |
| "eval_samples_per_second": 248.337, |
| "eval_steps_per_second": 3.882, |
| "step": 20480 |
| }, |
| { |
| "epoch": 0.8936582843105566, |
| "grad_norm": 0.49557796120643616, |
| "learning_rate": 2.8927015717215733e-05, |
| "loss": 7.159825801849365, |
| "step": 20736 |
| }, |
| { |
| "epoch": 0.9046911026353783, |
| "grad_norm": 0.4444299638271332, |
| "learning_rate": 2.3283977921370547e-05, |
| "loss": 7.15433931350708, |
| "step": 20992 |
| }, |
| { |
| "epoch": 0.9157239209601999, |
| "grad_norm": 0.49471017718315125, |
| "learning_rate": 1.8239694105780413e-05, |
| "loss": 7.151764392852783, |
| "step": 21248 |
| }, |
| { |
| "epoch": 0.9267567392850217, |
| "grad_norm": 0.27193328738212585, |
| "learning_rate": 1.3800499876701955e-05, |
| "loss": 7.129524230957031, |
| "step": 21504 |
| }, |
| { |
| "epoch": 0.9267567392850217, |
| "eval_bleu": 0.421951847647465, |
| "eval_loss": 7.1731349867798375, |
| "eval_mse_loss": 1.2322176178889488, |
| "eval_recon_loss": 5.940917385158254, |
| "step": 21504 |
| }, |
| { |
| "epoch": 0.9267567392850217, |
| "eval_bleu": 0.421951847647465, |
| "eval_loss": 7.1731349867798375, |
| "eval_mse_loss": 1.2322176178889488, |
| "eval_recon_loss": 5.940917385158254, |
| "eval_runtime": 119.5557, |
| "eval_samples_per_second": 250.929, |
| "eval_steps_per_second": 3.923, |
| "step": 21504 |
| }, |
| { |
| "epoch": 0.9377895576098434, |
| "grad_norm": 0.38439345359802246, |
| "learning_rate": 9.971970849576406e-06, |
| "loss": 7.190091609954834, |
| "step": 21760 |
| }, |
| { |
| "epoch": 0.948822375934665, |
| "grad_norm": 0.5374737977981567, |
| "learning_rate": 6.758915646072339e-06, |
| "loss": 7.214999675750732, |
| "step": 22016 |
| }, |
| { |
| "epoch": 0.9598551942594867, |
| "grad_norm": 0.55973219871521, |
| "learning_rate": 4.1653698544703575e-06, |
| "loss": 7.165055274963379, |
| "step": 22272 |
| }, |
| { |
| "epoch": 0.9708880125843083, |
| "grad_norm": 0.2763802111148834, |
| "learning_rate": 2.1945909609756286e-06, |
| "loss": 7.138909816741943, |
| "step": 22528 |
| }, |
| { |
| "epoch": 0.9708880125843083, |
| "eval_bleu": 0.42247179102439836, |
| "eval_loss": 7.163274863635554, |
| "eval_mse_loss": 1.2323216156664687, |
| "eval_recon_loss": 5.930953238310336, |
| "step": 22528 |
| }, |
| { |
| "epoch": 0.9708880125843083, |
| "eval_bleu": 0.42247179102439836, |
| "eval_loss": 7.163274863635554, |
| "eval_mse_loss": 1.2323216156664687, |
| "eval_recon_loss": 5.930953238310336, |
| "eval_runtime": 119.657, |
| "eval_samples_per_second": 250.717, |
| "eval_steps_per_second": 3.92, |
| "step": 22528 |
| }, |
| { |
| "epoch": 0.9819208309091301, |
| "grad_norm": 0.19876758754253387, |
| "learning_rate": 8.490542583243222e-07, |
| "loss": 7.1233296394348145, |
| "step": 22784 |
| }, |
| { |
| "epoch": 0.9929536492339518, |
| "grad_norm": 0.17880426347255707, |
| "learning_rate": 1.3044973682302396e-07, |
| "loss": 7.214967250823975, |
| "step": 23040 |
| } |
| ], |
| "logging_steps": 256, |
| "max_steps": 23204, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 1024, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|