Attila1011's picture
Upload folder using huggingface_hub
8d1be2a verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 1024,
"global_step": 23204,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.011032818324821687,
"grad_norm": 0.3007344603538513,
"learning_rate": 0.000498046875,
"loss": 1.9607043266296387,
"step": 256
},
{
"epoch": 0.022065636649643373,
"grad_norm": 0.38754719495773315,
"learning_rate": 0.000998046875,
"loss": 1.8510947227478027,
"step": 512
},
{
"epoch": 0.03309845497446506,
"grad_norm": 0.41651925444602966,
"learning_rate": 0.000999688448778502,
"loss": 1.7883503437042236,
"step": 768
},
{
"epoch": 0.04413127329928675,
"grad_norm": 0.4576423764228821,
"learning_rate": 0.0009987492950653055,
"loss": 1.7437095642089844,
"step": 1024
},
{
"epoch": 0.04413127329928675,
"eval_bleu": 0.7290582309773348,
"eval_cos_loss": 0.6748774711257105,
"eval_dec_loss": 0.0016125108924325007,
"eval_loss": 1.7118930867485909,
"eval_mse2_loss": 0.23725909220257294,
"eval_mse_loss": 1.7118930867485909,
"eval_rec_loss": 0.05790480172861296,
"eval_var_loss": 0.029386979561529435,
"flow/cos_sim": 0.32512253071707703,
"flow/improvement_ratio": 0.773648498536173,
"flow/mag_ratio_mean": 0.3315794987083752,
"flow/mag_ratio_std": 0.19750540018844198,
"step": 1024
},
{
"epoch": 0.04413127329928675,
"eval_bleu": 0.7290582309773348,
"eval_cos_loss": 0.6748774711257105,
"eval_dec_loss": 0.0016125108924325007,
"eval_loss": 1.7118930867485909,
"eval_mse2_loss": 0.23725909220257294,
"eval_mse_loss": 1.7118930867485909,
"eval_rec_loss": 0.05790480172861296,
"eval_runtime": 103.0234,
"eval_samples_per_second": 291.196,
"eval_steps_per_second": 4.552,
"eval_var_loss": 0.029386979561529435,
"flow/cos_sim": 0.32512253071707703,
"flow/improvement_ratio": 0.773648498536173,
"flow/mag_ratio_mean": 0.3315794987083752,
"flow/mag_ratio_std": 0.19750540018844198,
"step": 1024
},
{
"epoch": 0.05516409162410843,
"grad_norm": 0.46116578578948975,
"learning_rate": 0.0009971837136430763,
"loss": 1.6916401386260986,
"step": 1280
},
{
"epoch": 0.06619690994893011,
"grad_norm": 0.4674736559391022,
"learning_rate": 0.0009949936708776692,
"loss": 1.6614705324172974,
"step": 1536
},
{
"epoch": 0.07722972827375181,
"grad_norm": 0.4964284598827362,
"learning_rate": 0.0009921819174566252,
"loss": 1.6426620483398438,
"step": 1792
},
{
"epoch": 0.0882625465985735,
"grad_norm": 0.5635536313056946,
"learning_rate": 0.000988751984934317,
"loss": 1.6190364360809326,
"step": 2048
},
{
"epoch": 0.0882625465985735,
"eval_bleu": 0.7361472993029746,
"eval_cos_loss": 0.6193856069528217,
"eval_dec_loss": 0.0013400374704601193,
"eval_loss": 1.6149477191062878,
"eval_mse2_loss": 0.2126978265959571,
"eval_mse_loss": 1.6149477191062878,
"eval_rec_loss": 0.055882355892288085,
"eval_var_loss": 0.02890209875492526,
"flow/cos_sim": 0.3806143929836338,
"flow/improvement_ratio": 0.8031272210800318,
"flow/mag_ratio_mean": 0.38922329186630655,
"flow/mag_ratio_std": 0.2309291490168968,
"step": 2048
},
{
"epoch": 0.0882625465985735,
"eval_bleu": 0.7361472993029746,
"eval_cos_loss": 0.6193856069528217,
"eval_dec_loss": 0.0013400374704601193,
"eval_loss": 1.6149477191062878,
"eval_mse2_loss": 0.2126978265959571,
"eval_mse_loss": 1.6149477191062878,
"eval_rec_loss": 0.055882355892288085,
"eval_runtime": 102.671,
"eval_samples_per_second": 292.196,
"eval_steps_per_second": 4.568,
"eval_var_loss": 0.02890209875492526,
"flow/cos_sim": 0.3806143929836338,
"flow/improvement_ratio": 0.8031272210800318,
"flow/mag_ratio_mean": 0.38922329186630655,
"flow/mag_ratio_std": 0.2309291490168968,
"step": 2048
},
{
"epoch": 0.09929536492339518,
"grad_norm": 0.5123931169509888,
"learning_rate": 0.0009847081812963268,
"loss": 1.6022895574569702,
"step": 2304
},
{
"epoch": 0.11032818324821686,
"grad_norm": 0.5382006764411926,
"learning_rate": 0.0009800555855486275,
"loss": 1.5898725986480713,
"step": 2560
},
{
"epoch": 0.12136100157303854,
"grad_norm": 0.5514854192733765,
"learning_rate": 0.0009748000413383664,
"loss": 1.5826457738876343,
"step": 2816
},
{
"epoch": 0.13239381989786023,
"grad_norm": 0.5678655505180359,
"learning_rate": 0.0009689481496142604,
"loss": 1.5685003995895386,
"step": 3072
},
{
"epoch": 0.13239381989786023,
"eval_bleu": 0.7259279887313845,
"eval_cos_loss": 0.5904010080579501,
"eval_dec_loss": 0.001531593777309569,
"eval_loss": 1.5695596038659752,
"eval_mse2_loss": 0.20936787379448857,
"eval_mse_loss": 1.5695596038659752,
"eval_rec_loss": 0.060287337766082555,
"eval_var_loss": 0.02963222060868862,
"flow/cos_sim": 0.409598992768127,
"flow/improvement_ratio": 0.8170475746268657,
"flow/mag_ratio_mean": 0.4326363442294887,
"flow/mag_ratio_std": 0.22964929263474845,
"step": 3072
},
{
"epoch": 0.13239381989786023,
"eval_bleu": 0.7259279887313845,
"eval_cos_loss": 0.5904010080579501,
"eval_dec_loss": 0.001531593777309569,
"eval_loss": 1.5695596038659752,
"eval_mse2_loss": 0.20936787379448857,
"eval_mse_loss": 1.5695596038659752,
"eval_rec_loss": 0.060287337766082555,
"eval_runtime": 103.2177,
"eval_samples_per_second": 290.648,
"eval_steps_per_second": 4.544,
"eval_var_loss": 0.02963222060868862,
"flow/cos_sim": 0.409598992768127,
"flow/improvement_ratio": 0.8170475746268657,
"flow/mag_ratio_mean": 0.4326363442294887,
"flow/mag_ratio_std": 0.22964929263474845,
"step": 3072
},
{
"epoch": 0.14342663822268192,
"grad_norm": 0.5561569333076477,
"learning_rate": 0.0009625072603358231,
"loss": 1.5565699338912964,
"step": 3328
},
{
"epoch": 0.15445945654750362,
"grad_norm": 0.5337810516357422,
"learning_rate": 0.0009554854632418371,
"loss": 1.556112289428711,
"step": 3584
},
{
"epoch": 0.1654922748723253,
"grad_norm": 0.6065189242362976,
"learning_rate": 0.000947891577689663,
"loss": 1.5425442457199097,
"step": 3840
},
{
"epoch": 0.176525093197147,
"grad_norm": 0.6436013579368591,
"learning_rate": 0.0009397351415781539,
"loss": 1.5381078720092773,
"step": 4096
},
{
"epoch": 0.176525093197147,
"eval_bleu": 0.7609616675139879,
"eval_cos_loss": 0.5725635654255271,
"eval_dec_loss": 0.0013841146930163827,
"eval_loss": 1.5326353372541317,
"eval_mse2_loss": 0.20037362373459822,
"eval_mse_loss": 1.5326353372541317,
"eval_rec_loss": 0.05205997703934529,
"eval_var_loss": 0.029761007865831288,
"flow/cos_sim": 0.42743643495573924,
"flow/improvement_ratio": 0.8228500355789656,
"flow/mag_ratio_mean": 0.44373360606653095,
"flow/mag_ratio_std": 0.2451275099060937,
"step": 4096
},
{
"epoch": 0.176525093197147,
"eval_bleu": 0.7609616675139879,
"eval_cos_loss": 0.5725635654255271,
"eval_dec_loss": 0.0013841146930163827,
"eval_loss": 1.5326353372541317,
"eval_mse2_loss": 0.20037362373459822,
"eval_mse_loss": 1.5326353372541317,
"eval_rec_loss": 0.05205997703934529,
"eval_runtime": 103.0729,
"eval_samples_per_second": 291.056,
"eval_steps_per_second": 4.55,
"eval_var_loss": 0.029761007865831288,
"flow/cos_sim": 0.42743643495573924,
"flow/improvement_ratio": 0.8228500355789656,
"flow/mag_ratio_mean": 0.44373360606653095,
"flow/mag_ratio_std": 0.2451275099060937,
"step": 4096
},
{
"epoch": 0.18755791152196866,
"grad_norm": 0.5533596873283386,
"learning_rate": 0.000931026399368079,
"loss": 1.5355464220046997,
"step": 4352
},
{
"epoch": 0.19859072984679035,
"grad_norm": 0.6129039525985718,
"learning_rate": 0.0009217762892151117,
"loss": 1.526825189590454,
"step": 4608
},
{
"epoch": 0.20962354817161205,
"grad_norm": 0.6128653287887573,
"learning_rate": 0.0009119964292315354,
"loss": 1.5186046361923218,
"step": 4864
},
{
"epoch": 0.22065636649643372,
"grad_norm": 0.6013854146003723,
"learning_rate": 0.0009016991028939279,
"loss": 1.5184156894683838,
"step": 5120
},
{
"epoch": 0.22065636649643372,
"eval_bleu": 0.7313483153096204,
"eval_cos_loss": 0.565770648880554,
"eval_dec_loss": 0.0013971831941510986,
"eval_loss": 1.5208095035064957,
"eval_mse2_loss": 0.19698964767872906,
"eval_mse_loss": 1.5208095035064957,
"eval_rec_loss": 0.058415787606271724,
"eval_var_loss": 0.029480641187508223,
"flow/cos_sim": 0.4342293481328594,
"flow/improvement_ratio": 0.8310012437387316,
"flow/mag_ratio_mean": 0.4485036200170578,
"flow/mag_ratio_std": 0.24114183547781476,
"step": 5120
},
{
"epoch": 0.22065636649643372,
"eval_bleu": 0.7313483153096204,
"eval_cos_loss": 0.565770648880554,
"eval_dec_loss": 0.0013971831941510986,
"eval_loss": 1.5208095035064957,
"eval_mse2_loss": 0.19698964767872906,
"eval_mse_loss": 1.5208095035064957,
"eval_rec_loss": 0.058415787606271724,
"eval_runtime": 104.1181,
"eval_samples_per_second": 288.134,
"eval_steps_per_second": 4.505,
"eval_var_loss": 0.029480641187508223,
"flow/cos_sim": 0.4342293481328594,
"flow/improvement_ratio": 0.8310012437387316,
"flow/mag_ratio_mean": 0.4485036200170578,
"flow/mag_ratio_std": 0.24114183547781476,
"step": 5120
},
{
"epoch": 0.23168918482125542,
"grad_norm": 0.5818307995796204,
"learning_rate": 0.0008908972436151494,
"loss": 1.5146307945251465,
"step": 5376
},
{
"epoch": 0.2427220031460771,
"grad_norm": 0.5968588590621948,
"learning_rate": 0.0008796044185000127,
"loss": 1.5090495347976685,
"step": 5632
},
{
"epoch": 0.2537548214708988,
"grad_norm": 0.645140528678894,
"learning_rate": 0.0008678348113050368,
"loss": 1.5024750232696533,
"step": 5888
},
{
"epoch": 0.26478763979572045,
"grad_norm": 0.6232675313949585,
"learning_rate": 0.0008556032046236897,
"loss": 1.499906301498413,
"step": 6144
},
{
"epoch": 0.26478763979572045,
"eval_bleu": 0.7912708006589123,
"eval_cos_loss": 0.5567008724598996,
"eval_dec_loss": 0.0014503563844457592,
"eval_loss": 1.5032868258226146,
"eval_mse2_loss": 0.19748503063469808,
"eval_mse_loss": 1.5032868258226146,
"eval_rec_loss": 0.05662109937145512,
"eval_var_loss": 0.029432428198487265,
"flow/cos_sim": 0.44329912652339,
"flow/improvement_ratio": 0.8293576759061834,
"flow/mag_ratio_mean": 0.4766448940803756,
"flow/mag_ratio_std": 0.2504093461771255,
"step": 6144
},
{
"epoch": 0.26478763979572045,
"eval_bleu": 0.7912708006589123,
"eval_cos_loss": 0.5567008724598996,
"eval_dec_loss": 0.0014503563844457592,
"eval_loss": 1.5032868258226146,
"eval_mse2_loss": 0.19748503063469808,
"eval_mse_loss": 1.5032868258226146,
"eval_rec_loss": 0.05662109937145512,
"eval_runtime": 102.8737,
"eval_samples_per_second": 291.62,
"eval_steps_per_second": 4.559,
"eval_var_loss": 0.029432428198487265,
"flow/cos_sim": 0.44329912652339,
"flow/improvement_ratio": 0.8293576759061834,
"flow/mag_ratio_mean": 0.4766448940803756,
"flow/mag_ratio_std": 0.2504093461771255,
"step": 6144
},
{
"epoch": 0.2758204581205422,
"grad_norm": 0.6002918481826782,
"learning_rate": 0.000842924961319492,
"loss": 1.5013189315795898,
"step": 6400
},
{
"epoch": 0.28685327644536385,
"grad_norm": 0.6131093502044678,
"learning_rate": 0.0008298160052303045,
"loss": 1.491563320159912,
"step": 6656
},
{
"epoch": 0.2978860947701855,
"grad_norm": 0.6153339743614197,
"learning_rate": 0.0008162928011680314,
"loss": 1.4890822172164917,
"step": 6912
},
{
"epoch": 0.30891891309500724,
"grad_norm": 0.5415698885917664,
"learning_rate": 0.000802372334238864,
"loss": 1.4869613647460938,
"step": 7168
},
{
"epoch": 0.30891891309500724,
"eval_bleu": 0.6991471025766374,
"eval_cos_loss": 0.5479137339571646,
"eval_dec_loss": 0.0014181479605397324,
"eval_loss": 1.4831991663365476,
"eval_mse2_loss": 0.19134751513505033,
"eval_mse_loss": 1.4831991663365476,
"eval_rec_loss": 0.059703294130197086,
"eval_var_loss": 0.029254676190330023,
"flow/cos_sim": 0.45208626534384705,
"flow/improvement_ratio": 0.8373922797154262,
"flow/mag_ratio_mean": 0.46522473710686413,
"flow/mag_ratio_std": 0.24233753331053232,
"step": 7168
},
{
"epoch": 0.30891891309500724,
"eval_bleu": 0.6991471025766374,
"eval_cos_loss": 0.5479137339571646,
"eval_dec_loss": 0.0014181479605397324,
"eval_loss": 1.4831991663365476,
"eval_mse2_loss": 0.19134751513505033,
"eval_mse_loss": 1.4831991663365476,
"eval_rec_loss": 0.059703294130197086,
"eval_runtime": 102.4636,
"eval_samples_per_second": 292.787,
"eval_steps_per_second": 4.577,
"eval_var_loss": 0.029254676190330023,
"flow/cos_sim": 0.45208626534384705,
"flow/improvement_ratio": 0.8373922797154262,
"flow/mag_ratio_mean": 0.46522473710686413,
"flow/mag_ratio_std": 0.24233753331053232,
"step": 7168
},
{
"epoch": 0.3199517314198289,
"grad_norm": 0.6683939695358276,
"learning_rate": 0.0007880720885100349,
"loss": 1.476445198059082,
"step": 7424
},
{
"epoch": 0.3309845497446506,
"grad_norm": 0.5962712168693542,
"learning_rate": 0.0007734100250498788,
"loss": 1.4769901037216187,
"step": 7680
},
{
"epoch": 0.3420173680694723,
"grad_norm": 0.5617682933807373,
"learning_rate": 0.000758404559368781,
"loss": 1.4828119277954102,
"step": 7936
},
{
"epoch": 0.353050186394294,
"grad_norm": 0.7243582606315613,
"learning_rate": 0.0007430745382893488,
"loss": 1.4768471717834473,
"step": 8192
},
{
"epoch": 0.353050186394294,
"eval_bleu": 0.8304965060986523,
"eval_cos_loss": 0.540344935744556,
"eval_dec_loss": 0.001444703027248449,
"eval_loss": 1.4680257085035604,
"eval_mse2_loss": 0.1896642409979916,
"eval_mse_loss": 1.4680257085035604,
"eval_rec_loss": 0.056618061303885886,
"eval_var_loss": 0.02941279357144319,
"flow/cos_sim": 0.45965506501797676,
"flow/improvement_ratio": 0.8406627575980067,
"flow/mag_ratio_mean": 0.478700284002178,
"flow/mag_ratio_std": 0.25183968741629426,
"step": 8192
},
{
"epoch": 0.353050186394294,
"eval_bleu": 0.8304965060986523,
"eval_cos_loss": 0.540344935744556,
"eval_dec_loss": 0.001444703027248449,
"eval_loss": 1.4680257085035604,
"eval_mse2_loss": 0.1896642409979916,
"eval_mse_loss": 1.4680257085035604,
"eval_rec_loss": 0.056618061303885886,
"eval_runtime": 102.5949,
"eval_samples_per_second": 292.412,
"eval_steps_per_second": 4.571,
"eval_var_loss": 0.02941279357144319,
"flow/cos_sim": 0.45965506501797676,
"flow/improvement_ratio": 0.8406627575980067,
"flow/mag_ratio_mean": 0.478700284002178,
"flow/mag_ratio_std": 0.25183968741629426,
"step": 8192
},
{
"epoch": 0.36408300471911564,
"grad_norm": 0.6018216013908386,
"learning_rate": 0.0007274392162748551,
"loss": 1.4694677591323853,
"step": 8448
},
{
"epoch": 0.3751158230439373,
"grad_norm": 0.5599421262741089,
"learning_rate": 0.000711518231245687,
"loss": 1.4721711874008179,
"step": 8704
},
{
"epoch": 0.38614864136875904,
"grad_norm": 0.6241788864135742,
"learning_rate": 0.0006953315799141723,
"loss": 1.459176778793335,
"step": 8960
},
{
"epoch": 0.3971814596935807,
"grad_norm": 0.6998386383056641,
"learning_rate": 0.0006788995926687669,
"loss": 1.4632288217544556,
"step": 9216
},
{
"epoch": 0.3971814596935807,
"eval_bleu": 0.7369729060948697,
"eval_cos_loss": 0.5354188728942546,
"eval_dec_loss": 0.0013972995771112035,
"eval_loss": 1.456240051336634,
"eval_mse2_loss": 0.18758021689045912,
"eval_mse_loss": 1.456240051336634,
"eval_rec_loss": 0.05933690067730161,
"eval_var_loss": 0.029272472025203045,
"flow/cos_sim": 0.4645811278047338,
"flow/improvement_ratio": 0.841912091286706,
"flow/mag_ratio_mean": 0.4744996659791292,
"flow/mag_ratio_std": 0.25510632248321324,
"step": 9216
},
{
"epoch": 0.3971814596935807,
"eval_bleu": 0.7369729060948697,
"eval_cos_loss": 0.5354188728942546,
"eval_dec_loss": 0.0013972995771112035,
"eval_loss": 1.456240051336634,
"eval_mse2_loss": 0.18758021689045912,
"eval_mse_loss": 1.456240051336634,
"eval_rec_loss": 0.05933690067730161,
"eval_runtime": 102.7869,
"eval_samples_per_second": 291.866,
"eval_steps_per_second": 4.563,
"eval_var_loss": 0.029272472025203045,
"flow/cos_sim": 0.4645811278047338,
"flow/improvement_ratio": 0.841912091286706,
"flow/mag_ratio_mean": 0.4744996659791292,
"flow/mag_ratio_std": 0.25510632248321324,
"step": 9216
},
{
"epoch": 0.4082142780184024,
"grad_norm": 0.5962811708450317,
"learning_rate": 0.0006622429080391422,
"loss": 1.4640510082244873,
"step": 9472
},
{
"epoch": 0.4192470963432241,
"grad_norm": 0.588157594203949,
"learning_rate": 0.0006453824467742515,
"loss": 1.4573228359222412,
"step": 9728
},
{
"epoch": 0.43027991466804577,
"grad_norm": 0.5932533740997314,
"learning_rate": 0.0006283393855659275,
"loss": 1.45904541015625,
"step": 9984
},
{
"epoch": 0.44131273299286744,
"grad_norm": 0.6125295162200928,
"learning_rate": 0.0006111351304510173,
"loss": 1.455463171005249,
"step": 10240
},
{
"epoch": 0.44131273299286744,
"eval_bleu": 0.782062866367082,
"eval_cos_loss": 0.5326331170128861,
"eval_dec_loss": 0.0014520329028074289,
"eval_loss": 1.453022389777942,
"eval_mse2_loss": 0.18840382176675777,
"eval_mse_loss": 1.453022389777942,
"eval_rec_loss": 0.05694365586195864,
"eval_var_loss": 0.030047652452612227,
"flow/cos_sim": 0.4673668822881255,
"flow/improvement_ratio": 0.8451325959488273,
"flow/mag_ratio_mean": 0.4745017219581075,
"flow/mag_ratio_std": 0.2538460113092272,
"step": 10240
},
{
"epoch": 0.44131273299286744,
"eval_bleu": 0.782062866367082,
"eval_cos_loss": 0.5326331170128861,
"eval_dec_loss": 0.0014520329028074289,
"eval_loss": 1.453022389777942,
"eval_mse2_loss": 0.18840382176675777,
"eval_mse_loss": 1.453022389777942,
"eval_rec_loss": 0.05694365586195864,
"eval_runtime": 102.6396,
"eval_samples_per_second": 292.285,
"eval_steps_per_second": 4.569,
"eval_var_loss": 0.030047652452612227,
"flow/cos_sim": 0.4673668822881255,
"flow/improvement_ratio": 0.8451325959488273,
"flow/mag_ratio_mean": 0.4745017219581075,
"flow/mag_ratio_std": 0.2538460113092272,
"step": 10240
},
{
"epoch": 0.45234555131768917,
"grad_norm": 0.6636393666267395,
"learning_rate": 0.0005937912899254605,
"loss": 1.449182152748108,
"step": 10496
},
{
"epoch": 0.46337836964251083,
"grad_norm": 0.5821182727813721,
"learning_rate": 0.0005763296478040787,
"loss": 1.4548357725143433,
"step": 10752
},
{
"epoch": 0.4744111879673325,
"grad_norm": 0.6481524109840393,
"learning_rate": 0.0005587721358601663,
"loss": 1.4508562088012695,
"step": 11008
},
{
"epoch": 0.4854440062921542,
"grad_norm": 0.653151273727417,
"learning_rate": 0.0005411408062792448,
"loss": 1.4442917108535767,
"step": 11264
},
{
"epoch": 0.4854440062921542,
"eval_bleu": 0.7210051310934674,
"eval_cos_loss": 0.5317811018495417,
"eval_dec_loss": 0.001381080663882877,
"eval_loss": 1.4509358108679116,
"eval_mse2_loss": 0.18439998461811274,
"eval_mse_loss": 1.4509358108679116,
"eval_rec_loss": 0.06008440565301983,
"eval_var_loss": 0.02928201055952481,
"flow/cos_sim": 0.46821889872235783,
"flow/improvement_ratio": 0.8448605187920365,
"flow/mag_ratio_mean": 0.4716693379604486,
"flow/mag_ratio_std": 0.25824843223161026,
"step": 11264
},
{
"epoch": 0.4854440062921542,
"eval_bleu": 0.7210051310934674,
"eval_cos_loss": 0.5317811018495417,
"eval_dec_loss": 0.001381080663882877,
"eval_loss": 1.4509358108679116,
"eval_mse2_loss": 0.18439998461811274,
"eval_mse_loss": 1.4509358108679116,
"eval_rec_loss": 0.06008440565301983,
"eval_runtime": 102.2168,
"eval_samples_per_second": 293.494,
"eval_steps_per_second": 4.588,
"eval_var_loss": 0.02928201055952481,
"flow/cos_sim": 0.46821889872235783,
"flow/improvement_ratio": 0.8448605187920365,
"flow/mag_ratio_mean": 0.4716693379604486,
"flow/mag_ratio_std": 0.25824843223161026,
"step": 11264
},
{
"epoch": 0.4964768246169759,
"grad_norm": 0.6343415379524231,
"learning_rate": 0.0005234578039615789,
"loss": 1.439915418624878,
"step": 11520
},
{
"epoch": 0.5075096429417976,
"grad_norm": 0.7004493474960327,
"learning_rate": 0.0005057453387082458,
"loss": 1.4451959133148193,
"step": 11776
},
{
"epoch": 0.5185424612666193,
"grad_norm": 0.7312789559364319,
"learning_rate": 0.0004880256573256866,
"loss": 1.4458304643630981,
"step": 12032
},
{
"epoch": 0.5295752795914409,
"grad_norm": 0.6173807382583618,
"learning_rate": 0.0004703210156837805,
"loss": 1.4372222423553467,
"step": 12288
},
{
"epoch": 0.5295752795914409,
"eval_bleu": 0.7895861883551821,
"eval_cos_loss": 0.5291873634751163,
"eval_dec_loss": 0.0014323489154225587,
"eval_loss": 1.4454485322875001,
"eval_mse2_loss": 0.18589616244408622,
"eval_mse_loss": 1.4454485322875001,
"eval_rec_loss": 0.05557121256036736,
"eval_var_loss": 0.02942733407052341,
"flow/cos_sim": 0.4708126370967832,
"flow/improvement_ratio": 0.8446106520542966,
"flow/mag_ratio_mean": 0.4888702236385996,
"flow/mag_ratio_std": 0.2535232830403456,
"step": 12288
},
{
"epoch": 0.5295752795914409,
"eval_bleu": 0.7895861883551821,
"eval_cos_loss": 0.5291873634751163,
"eval_dec_loss": 0.0014323489154225587,
"eval_loss": 1.4454485322875001,
"eval_mse2_loss": 0.18589616244408622,
"eval_mse_loss": 1.4454485322875001,
"eval_rec_loss": 0.05557121256036736,
"eval_runtime": 103.43,
"eval_samples_per_second": 290.051,
"eval_steps_per_second": 4.534,
"eval_var_loss": 0.02942733407052341,
"flow/cos_sim": 0.4708126370967832,
"flow/improvement_ratio": 0.8446106520542966,
"flow/mag_ratio_mean": 0.4888702236385996,
"flow/mag_ratio_std": 0.2535232830403456,
"step": 12288
},
{
"epoch": 0.5406080979162626,
"grad_norm": 0.6357247233390808,
"learning_rate": 0.0004526536507625343,
"loss": 1.4381682872772217,
"step": 12544
},
{
"epoch": 0.5516409162410844,
"grad_norm": 0.6554076671600342,
"learning_rate": 0.00043504575272249973,
"loss": 1.433600664138794,
"step": 12800
},
{
"epoch": 0.562673734565906,
"grad_norm": 0.6298866271972656,
"learning_rate": 0.0004175194370339921,
"loss": 1.4380649328231812,
"step": 13056
},
{
"epoch": 0.5737065528907277,
"grad_norm": 0.6736286282539368,
"learning_rate": 0.0004000967167001243,
"loss": 1.4344258308410645,
"step": 13312
},
{
"epoch": 0.5737065528907277,
"eval_bleu": 0.754198615923969,
"eval_cos_loss": 0.5237328007912585,
"eval_dec_loss": 0.0013661543356946239,
"eval_loss": 1.4330016496593256,
"eval_mse2_loss": 0.18149238913806517,
"eval_mse_loss": 1.4330016496593256,
"eval_rec_loss": 0.05589268211104564,
"eval_var_loss": 0.029215975571225194,
"flow/cos_sim": 0.47626719946291907,
"flow/improvement_ratio": 0.8467817164179104,
"flow/mag_ratio_mean": 0.48472079412261054,
"flow/mag_ratio_std": 0.25520913404569445,
"step": 13312
},
{
"epoch": 0.5737065528907277,
"eval_bleu": 0.754198615923969,
"eval_cos_loss": 0.5237328007912585,
"eval_dec_loss": 0.0013661543356946239,
"eval_loss": 1.4330016496593256,
"eval_mse2_loss": 0.18149238913806517,
"eval_mse_loss": 1.4330016496593256,
"eval_rec_loss": 0.05589268211104564,
"eval_runtime": 104.5844,
"eval_samples_per_second": 286.85,
"eval_steps_per_second": 4.484,
"eval_var_loss": 0.029215975571225194,
"flow/cos_sim": 0.47626719946291907,
"flow/improvement_ratio": 0.8467817164179104,
"flow/mag_ratio_mean": 0.48472079412261054,
"flow/mag_ratio_std": 0.25520913404569445,
"step": 13312
},
{
"epoch": 0.5847393712155494,
"grad_norm": 0.6239475011825562,
"learning_rate": 0.00038279947460853446,
"loss": 1.4331660270690918,
"step": 13568
},
{
"epoch": 0.595772189540371,
"grad_norm": 0.6627410054206848,
"learning_rate": 0.00036564943604654345,
"loss": 1.4354665279388428,
"step": 13824
},
{
"epoch": 0.6068050078651928,
"grad_norm": 0.6042789816856384,
"learning_rate": 0.00034866814141425254,
"loss": 1.4358711242675781,
"step": 14080
},
{
"epoch": 0.6178378261900145,
"grad_norm": 0.6111028790473938,
"learning_rate": 0.0003318769191698637,
"loss": 1.4299204349517822,
"step": 14336
},
{
"epoch": 0.6178378261900145,
"eval_bleu": 0.7007889817819709,
"eval_cos_loss": 0.5228769168543663,
"eval_dec_loss": 0.001358627397164917,
"eval_loss": 1.4326896403135776,
"eval_mse2_loss": 0.18112752599312043,
"eval_mse_loss": 1.4326896403135776,
"eval_rec_loss": 0.05488209239939954,
"eval_var_loss": 0.02930486012401103,
"flow/cos_sim": 0.4771230810486686,
"flow/improvement_ratio": 0.8480699183081767,
"flow/mag_ratio_mean": 0.48936520539112943,
"flow/mag_ratio_std": 0.2627385834386862,
"step": 14336
},
{
"epoch": 0.6178378261900145,
"eval_bleu": 0.7007889817819709,
"eval_cos_loss": 0.5228769168543663,
"eval_dec_loss": 0.001358627397164917,
"eval_loss": 1.4326896403135776,
"eval_mse2_loss": 0.18112752599312043,
"eval_mse_loss": 1.4326896403135776,
"eval_rec_loss": 0.05488209239939954,
"eval_runtime": 103.0204,
"eval_samples_per_second": 291.205,
"eval_steps_per_second": 4.552,
"eval_var_loss": 0.02930486012401103,
"flow/cos_sim": 0.4771230810486686,
"flow/improvement_ratio": 0.8480699183081767,
"flow/mag_ratio_mean": 0.48936520539112943,
"flow/mag_ratio_std": 0.2627385834386862,
"step": 14336
},
{
"epoch": 0.6288706445148361,
"grad_norm": 0.6931398510932922,
"learning_rate": 0.00031529685904119485,
"loss": 1.4271036386489868,
"step": 14592
},
{
"epoch": 0.6399034628396578,
"grad_norm": 0.616621196269989,
"learning_rate": 0.0002989487855370421,
"loss": 1.4223978519439697,
"step": 14848
},
{
"epoch": 0.6509362811644795,
"grad_norm": 0.7069717645645142,
"learning_rate": 0.00028285323179165424,
"loss": 1.4210408926010132,
"step": 15104
},
{
"epoch": 0.6619690994893012,
"grad_norm": 0.5767509937286377,
"learning_rate": 0.0002670304137751759,
"loss": 1.4249491691589355,
"step": 15360
},
{
"epoch": 0.6619690994893012,
"eval_bleu": 0.7712555700160785,
"eval_cos_loss": 0.520311662700893,
"eval_dec_loss": 0.0013948907095809597,
"eval_loss": 1.4238692244995377,
"eval_mse2_loss": 0.1801110237900382,
"eval_mse_loss": 1.4238692244995377,
"eval_rec_loss": 0.05672604351370002,
"eval_var_loss": 0.029106232196664507,
"flow/cos_sim": 0.4796883367907518,
"flow/improvement_ratio": 0.8454879620181981,
"flow/mag_ratio_mean": 0.4905342829507043,
"flow/mag_ratio_std": 0.2601209406786636,
"step": 15360
},
{
"epoch": 0.6619690994893012,
"eval_bleu": 0.7712555700160785,
"eval_cos_loss": 0.520311662700893,
"eval_dec_loss": 0.0013948907095809597,
"eval_loss": 1.4238692244995377,
"eval_mse2_loss": 0.1801110237900382,
"eval_mse_loss": 1.4238692244995377,
"eval_rec_loss": 0.05672604351370002,
"eval_runtime": 102.9224,
"eval_samples_per_second": 291.482,
"eval_steps_per_second": 4.557,
"eval_var_loss": 0.029106232196664507,
"flow/cos_sim": 0.4796883367907518,
"flow/improvement_ratio": 0.8454879620181981,
"flow/mag_ratio_mean": 0.4905342829507043,
"flow/mag_ratio_std": 0.2601209406786636,
"step": 15360
},
{
"epoch": 0.6730019178141229,
"grad_norm": 0.7135971784591675,
"learning_rate": 0.0002515002049024435,
"loss": 1.4220284223556519,
"step": 15616
},
{
"epoch": 0.6840347361389446,
"grad_norm": 0.6657771468162537,
"learning_rate": 0.00023628211107203429,
"loss": 1.421180248260498,
"step": 15872
},
{
"epoch": 0.6950675544637662,
"grad_norm": 0.6840319037437439,
"learning_rate": 0.00022139524616691188,
"loss": 1.4254897832870483,
"step": 16128
},
{
"epoch": 0.706100372788588,
"grad_norm": 0.6978499889373779,
"learning_rate": 0.000206858308047443,
"loss": 1.4185926914215088,
"step": 16384
},
{
"epoch": 0.706100372788588,
"eval_bleu": 0.7788876579155211,
"eval_cos_loss": 0.5166550292643403,
"eval_dec_loss": 0.0013616397724124983,
"eval_loss": 1.4177445305435896,
"eval_mse2_loss": 0.17684134553426872,
"eval_mse_loss": 1.4177445305435896,
"eval_rec_loss": 0.05370217473951103,
"eval_var_loss": 0.02986719635233823,
"flow/cos_sim": 0.48334496971894936,
"flow/improvement_ratio": 0.851445895522388,
"flow/mag_ratio_mean": 0.49312538899846675,
"flow/mag_ratio_std": 0.2614598782586136,
"step": 16384
},
{
"epoch": 0.706100372788588,
"eval_bleu": 0.7788876579155211,
"eval_cos_loss": 0.5166550292643403,
"eval_dec_loss": 0.0013616397724124983,
"eval_loss": 1.4177445305435896,
"eval_mse2_loss": 0.17684134553426872,
"eval_mse_loss": 1.4177445305435896,
"eval_rec_loss": 0.05370217473951103,
"eval_runtime": 103.4098,
"eval_samples_per_second": 290.108,
"eval_steps_per_second": 4.535,
"eval_var_loss": 0.02986719635233823,
"flow/cos_sim": 0.48334496971894936,
"flow/improvement_ratio": 0.851445895522388,
"flow/mag_ratio_mean": 0.49312538899846675,
"flow/mag_ratio_std": 0.2614598782586136,
"step": 16384
},
{
"epoch": 0.7171331911134096,
"grad_norm": 0.7240028977394104,
"learning_rate": 0.00019268955506693798,
"loss": 1.4189178943634033,
"step": 16640
},
{
"epoch": 0.7281660094382313,
"grad_norm": 0.6644338369369507,
"learning_rate": 0.00017890678313921,
"loss": 1.4202007055282593,
"step": 16896
},
{
"epoch": 0.739198827763053,
"grad_norm": 0.8413478136062622,
"learning_rate": 0.00016552730338695792,
"loss": 1.419106364250183,
"step": 17152
},
{
"epoch": 0.7502316460878746,
"grad_norm": 0.741065263748169,
"learning_rate": 0.00015256792039904465,
"loss": 1.415405511856079,
"step": 17408
},
{
"epoch": 0.7502316460878746,
"eval_bleu": 0.7637354358631164,
"eval_cos_loss": 0.5136227607727051,
"eval_dec_loss": 0.0013235103740173923,
"eval_loss": 1.4103716327183282,
"eval_mse2_loss": 0.17683548507278662,
"eval_mse_loss": 1.4103716327183282,
"eval_rec_loss": 0.05761792201366125,
"eval_var_loss": 0.03023185586926144,
"flow/cos_sim": 0.4863772399898277,
"flow/improvement_ratio": 0.8529339908028463,
"flow/mag_ratio_mean": 0.4943711748128253,
"flow/mag_ratio_std": 0.2643810258046396,
"step": 17408
},
{
"epoch": 0.7502316460878746,
"eval_bleu": 0.7637354358631164,
"eval_cos_loss": 0.5136227607727051,
"eval_dec_loss": 0.0013235103740173923,
"eval_loss": 1.4103716327183282,
"eval_mse2_loss": 0.17683548507278662,
"eval_mse_loss": 1.4103716327183282,
"eval_rec_loss": 0.05761792201366125,
"eval_runtime": 103.2171,
"eval_samples_per_second": 290.65,
"eval_steps_per_second": 4.544,
"eval_var_loss": 0.03023185586926144,
"flow/cos_sim": 0.4863772399898277,
"flow/improvement_ratio": 0.8529339908028463,
"flow/mag_ratio_mean": 0.4943711748128253,
"flow/mag_ratio_std": 0.2643810258046396,
"step": 17408
},
{
"epoch": 0.7612644644126964,
"grad_norm": 0.5421018600463867,
"learning_rate": 0.00014004491112398103,
"loss": 1.4142208099365234,
"step": 17664
},
{
"epoch": 0.7722972827375181,
"grad_norm": 0.665582537651062,
"learning_rate": 0.00012797400442612433,
"loss": 1.411756992340088,
"step": 17920
},
{
"epoch": 0.7833301010623397,
"grad_norm": 0.6837579607963562,
"learning_rate": 0.00011637036133026895,
"loss": 1.4075802564620972,
"step": 18176
},
{
"epoch": 0.7943629193871614,
"grad_norm": 0.7160040736198425,
"learning_rate": 0.00010524855597944216,
"loss": 1.4070231914520264,
"step": 18432
},
{
"epoch": 0.7943629193871614,
"eval_bleu": 0.8024029342579875,
"eval_cos_loss": 0.5127464083593283,
"eval_dec_loss": 0.0013179335473900858,
"eval_loss": 1.4091586799763922,
"eval_mse2_loss": 0.17562630394501472,
"eval_mse_loss": 1.4091586799763922,
"eval_rec_loss": 0.059627406716124334,
"eval_var_loss": 0.029311500787576123,
"flow/cos_sim": 0.4872535904333281,
"flow/improvement_ratio": 0.8548329780096693,
"flow/mag_ratio_mean": 0.49533584078491877,
"flow/mag_ratio_std": 0.2655049035988891,
"step": 18432
},
{
"epoch": 0.7943629193871614,
"eval_bleu": 0.8024029342579875,
"eval_cos_loss": 0.5127464083593283,
"eval_dec_loss": 0.0013179335473900858,
"eval_loss": 1.4091586799763922,
"eval_mse2_loss": 0.17562630394501472,
"eval_mse_loss": 1.4091586799763922,
"eval_rec_loss": 0.059627406716124334,
"eval_runtime": 103.4418,
"eval_samples_per_second": 290.018,
"eval_steps_per_second": 4.534,
"eval_var_loss": 0.029311500787576123,
"flow/cos_sim": 0.4872535904333281,
"flow/improvement_ratio": 0.8548329780096693,
"flow/mag_ratio_mean": 0.49533584078491877,
"flow/mag_ratio_std": 0.2655049035988891,
"step": 18432
},
{
"epoch": 0.8053957377119831,
"grad_norm": 0.727080762386322,
"learning_rate": 9.462255732982089e-05,
"loss": 1.406097650527954,
"step": 18688
},
{
"epoch": 0.8164285560368048,
"grad_norm": 0.6209878921508789,
"learning_rate": 8.450571160576348e-05,
"loss": 1.4059816598892212,
"step": 18944
},
{
"epoch": 0.8274613743616265,
"grad_norm": 0.659706175327301,
"learning_rate": 7.491072553698764e-05,
"loss": 1.410292148590088,
"step": 19200
},
{
"epoch": 0.8384941926864482,
"grad_norm": 0.5520651340484619,
"learning_rate": 6.584965039895586e-05,
"loss": 1.402584195137024,
"step": 19456
},
{
"epoch": 0.8384941926864482,
"eval_bleu": 0.7435766156577157,
"eval_cos_loss": 0.5144387822923884,
"eval_dec_loss": 0.0013450082680801236,
"eval_loss": 1.4127296161041585,
"eval_mse2_loss": 0.17700788906134013,
"eval_mse_loss": 1.4127296161041585,
"eval_rec_loss": 0.058054142113306374,
"eval_var_loss": 0.0291894421593022,
"flow/cos_sim": 0.4855612163731793,
"flow/improvement_ratio": 0.8498689588199038,
"flow/mag_ratio_mean": 0.4951269815344292,
"flow/mag_ratio_std": 0.26389562489508567,
"step": 19456
},
{
"epoch": 0.8384941926864482,
"eval_bleu": 0.7435766156577157,
"eval_cos_loss": 0.5144387822923884,
"eval_dec_loss": 0.0013450082680801236,
"eval_loss": 1.4127296161041585,
"eval_mse2_loss": 0.17700788906134013,
"eval_mse_loss": 1.4127296161041585,
"eval_rec_loss": 0.058054142113306374,
"eval_runtime": 103.8996,
"eval_samples_per_second": 288.74,
"eval_steps_per_second": 4.514,
"eval_var_loss": 0.0291894421593022,
"flow/cos_sim": 0.4855612163731793,
"flow/improvement_ratio": 0.8498689588199038,
"flow/mag_ratio_mean": 0.4951269815344292,
"flow/mag_ratio_std": 0.26389562489508567,
"step": 19456
},
{
"epoch": 0.8495270110112698,
"grad_norm": 0.7207916378974915,
"learning_rate": 5.73338668765051e-05,
"loss": 1.408148169517517,
"step": 19712
},
{
"epoch": 0.8605598293360915,
"grad_norm": 0.6444937586784363,
"learning_rate": 4.9374070769740984e-05,
"loss": 1.4169082641601562,
"step": 19968
},
{
"epoch": 0.8715926476609133,
"grad_norm": 0.6508966088294983,
"learning_rate": 4.198025956014095e-05,
"loss": 1.412489891052246,
"step": 20224
},
{
"epoch": 0.8826254659857349,
"grad_norm": 0.8207064270973206,
"learning_rate": 3.516171985374755e-05,
"loss": 1.4014993906021118,
"step": 20480
},
{
"epoch": 0.8826254659857349,
"eval_bleu": 0.7371724072330055,
"eval_cos_loss": 0.5137777864805925,
"eval_dec_loss": 0.0013706799051735545,
"eval_loss": 1.409610672546094,
"eval_mse2_loss": 0.17626210351361396,
"eval_mse_loss": 1.409610672546094,
"eval_rec_loss": 0.054663843655986574,
"eval_var_loss": 0.029133995291965604,
"flow/cos_sim": 0.48622221402776267,
"flow/improvement_ratio": 0.8532393833975802,
"flow/mag_ratio_mean": 0.4940188680249237,
"flow/mag_ratio_std": 0.2655889735674299,
"step": 20480
},
{
"epoch": 0.8826254659857349,
"eval_bleu": 0.7371724072330055,
"eval_cos_loss": 0.5137777864805925,
"eval_dec_loss": 0.0013706799051735545,
"eval_loss": 1.409610672546094,
"eval_mse2_loss": 0.17626210351361396,
"eval_mse_loss": 1.409610672546094,
"eval_rec_loss": 0.054663843655986574,
"eval_runtime": 104.0379,
"eval_samples_per_second": 288.356,
"eval_steps_per_second": 4.508,
"eval_var_loss": 0.029133995291965604,
"flow/cos_sim": 0.48622221402776267,
"flow/improvement_ratio": 0.8532393833975802,
"flow/mag_ratio_mean": 0.4940188680249237,
"flow/mag_ratio_std": 0.2655889735674299,
"step": 20480
},
{
"epoch": 0.8936582843105566,
"grad_norm": 0.5996214151382446,
"learning_rate": 2.8927015717215733e-05,
"loss": 1.4027345180511475,
"step": 20736
},
{
"epoch": 0.9046911026353783,
"grad_norm": 0.6789088845252991,
"learning_rate": 2.3283977921370547e-05,
"loss": 1.4052367210388184,
"step": 20992
},
{
"epoch": 0.9157239209601999,
"grad_norm": 0.6676909327507019,
"learning_rate": 1.8239694105780413e-05,
"loss": 1.406872034072876,
"step": 21248
},
{
"epoch": 0.9267567392850217,
"grad_norm": 0.5955349802970886,
"learning_rate": 1.3800499876701955e-05,
"loss": 1.4064586162567139,
"step": 21504
},
{
"epoch": 0.9267567392850217,
"eval_bleu": 0.7426715244464189,
"eval_cos_loss": 0.5071186275878695,
"eval_dec_loss": 0.001350255208637894,
"eval_loss": 1.3937010752366805,
"eval_mse2_loss": 0.17541809607225695,
"eval_mse_loss": 1.3937010752366805,
"eval_rec_loss": 0.05103444970691445,
"eval_var_loss": 0.02931836185091213,
"flow/cos_sim": 0.49288137139542015,
"flow/improvement_ratio": 0.8528173863252343,
"flow/mag_ratio_mean": 0.4987420951252553,
"flow/mag_ratio_std": 0.26658764935886936,
"step": 21504
},
{
"epoch": 0.9267567392850217,
"eval_bleu": 0.7426715244464189,
"eval_cos_loss": 0.5071186275878695,
"eval_dec_loss": 0.001350255208637894,
"eval_loss": 1.3937010752366805,
"eval_mse2_loss": 0.17541809607225695,
"eval_mse_loss": 1.3937010752366805,
"eval_rec_loss": 0.05103444970691445,
"eval_runtime": 104.142,
"eval_samples_per_second": 288.068,
"eval_steps_per_second": 4.503,
"eval_var_loss": 0.02931836185091213,
"flow/cos_sim": 0.49288137139542015,
"flow/improvement_ratio": 0.8528173863252343,
"flow/mag_ratio_mean": 0.4987420951252553,
"flow/mag_ratio_std": 0.26658764935886936,
"step": 21504
},
{
"epoch": 0.9377895576098434,
"grad_norm": 0.8665277361869812,
"learning_rate": 9.971970849576406e-06,
"loss": 1.4001104831695557,
"step": 21760
},
{
"epoch": 0.948822375934665,
"grad_norm": 0.6160731315612793,
"learning_rate": 6.758915646072339e-06,
"loss": 1.4023921489715576,
"step": 22016
},
{
"epoch": 0.9598551942594867,
"grad_norm": 0.6823092103004456,
"learning_rate": 4.1653698544703575e-06,
"loss": 1.4057680368423462,
"step": 22272
},
{
"epoch": 0.9708880125843083,
"grad_norm": 0.7474303841590881,
"learning_rate": 2.1945909609756286e-06,
"loss": 1.402069330215454,
"step": 22528
},
{
"epoch": 0.9708880125843083,
"eval_bleu": 0.7359243412878435,
"eval_cos_loss": 0.5119307249593836,
"eval_dec_loss": 0.0013969406839550735,
"eval_loss": 1.4065255351158092,
"eval_mse2_loss": 0.17711426552806073,
"eval_mse_loss": 1.4065255351158092,
"eval_rec_loss": 0.056425910651572604,
"eval_var_loss": 0.02955100304091663,
"flow/cos_sim": 0.4880692758031491,
"flow/improvement_ratio": 0.8548107675906184,
"flow/mag_ratio_mean": 0.49543472253945847,
"flow/mag_ratio_std": 0.263321697140045,
"step": 22528
},
{
"epoch": 0.9708880125843083,
"eval_bleu": 0.7359243412878435,
"eval_cos_loss": 0.5119307249593836,
"eval_dec_loss": 0.0013969406839550735,
"eval_loss": 1.4065255351158092,
"eval_mse2_loss": 0.17711426552806073,
"eval_mse_loss": 1.4065255351158092,
"eval_rec_loss": 0.056425910651572604,
"eval_runtime": 103.1789,
"eval_samples_per_second": 290.757,
"eval_steps_per_second": 4.546,
"eval_var_loss": 0.02955100304091663,
"flow/cos_sim": 0.4880692758031491,
"flow/improvement_ratio": 0.8548107675906184,
"flow/mag_ratio_mean": 0.49543472253945847,
"flow/mag_ratio_std": 0.263321697140045,
"step": 22528
},
{
"epoch": 0.9819208309091301,
"grad_norm": 0.6597904562950134,
"learning_rate": 8.490542583243222e-07,
"loss": 1.4066376686096191,
"step": 22784
},
{
"epoch": 0.9929536492339518,
"grad_norm": 0.7082860469818115,
"learning_rate": 1.3044973682302396e-07,
"loss": 1.4058468341827393,
"step": 23040
}
],
"logging_steps": 256,
"max_steps": 23204,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 1024,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}