| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.709436053761951, |
| "eval_steps": 1024, |
| "global_step": 15360, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.011823934229365849, |
| "grad_norm": 1.1487267017364502, |
| "learning_rate": 0.000498046875, |
| "loss": 11.798027992248535, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.023647868458731697, |
| "grad_norm": 0.8386015295982361, |
| "learning_rate": 0.000998046875, |
| "loss": 1.7853779792785645, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.03547180268809755, |
| "grad_norm": 0.7344363331794739, |
| "learning_rate": 0.000999640996023194, |
| "loss": 1.103014588356018, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.047295736917463395, |
| "grad_norm": 1.1188315153121948, |
| "learning_rate": 0.0009985588674043958, |
| "loss": 0.9580796360969543, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.047295736917463395, |
| "eval_bleu": 0.9192375404443994, |
| "eval_ce_loss": 0.26167990953648745, |
| "eval_cos_loss": 0.26406568816127296, |
| "eval_loss": 0.9037111119864738, |
| "eval_mse_loss": 0.6016385473617135, |
| "eval_rec_loss": 0.013986090569199833, |
| "flow/cos_sim": 0.7359343250048215, |
| "flow/improvement_ratio": 0.9760946458605326, |
| "flow/mag_ratio_mean": 0.7269674200717717, |
| "flow/mag_ratio_std": 0.1390784539316343, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.047295736917463395, |
| "eval_bleu": 0.9192375404443994, |
| "eval_ce_loss": 0.26167990953648745, |
| "eval_cos_loss": 0.26406568816127296, |
| "eval_loss": 0.9037111119864738, |
| "eval_mse_loss": 0.6016385473617135, |
| "eval_rec_loss": 0.013986090569199833, |
| "eval_runtime": 144.0156, |
| "eval_samples_per_second": 194.375, |
| "eval_steps_per_second": 3.041, |
| "flow/cos_sim": 0.7359343250048215, |
| "flow/improvement_ratio": 0.9760946458605326, |
| "flow/mag_ratio_mean": 0.7269674200717717, |
| "flow/mag_ratio_std": 0.1390784539316343, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.05911967114682925, |
| "grad_norm": 1.0113043785095215, |
| "learning_rate": 0.0009967551747861387, |
| "loss": 0.8836896419525146, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.0709436053761951, |
| "grad_norm": 0.9680636525154114, |
| "learning_rate": 0.000994232528651847, |
| "loss": 0.8432819247245789, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.08276753960556095, |
| "grad_norm": 1.166627049446106, |
| "learning_rate": 0.0009909945800260092, |
| "loss": 0.7870283126831055, |
| "step": 1792 |
| }, |
| { |
| "epoch": 0.09459147383492679, |
| "grad_norm": 0.7747617363929749, |
| "learning_rate": 0.0009870460151900522, |
| "loss": 0.7735522389411926, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.09459147383492679, |
| "eval_bleu": 0.9264483676285591, |
| "eval_ce_loss": 0.22021640589690372, |
| "eval_cos_loss": 0.15934634184864557, |
| "eval_loss": 0.751849727815689, |
| "eval_mse_loss": 0.51190055103879, |
| "eval_rec_loss": 0.0037981332031229603, |
| "flow/cos_sim": 0.8406536742432477, |
| "flow/improvement_ratio": 0.9754726998337871, |
| "flow/mag_ratio_mean": 0.8395581131112085, |
| "flow/mag_ratio_std": 0.09706963221096013, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.09459147383492679, |
| "eval_bleu": 0.9264483676285591, |
| "eval_ce_loss": 0.22021640589690372, |
| "eval_cos_loss": 0.15934634184864557, |
| "eval_loss": 0.751849727815689, |
| "eval_mse_loss": 0.51190055103879, |
| "eval_rec_loss": 0.0037981332031229603, |
| "eval_runtime": 139.3758, |
| "eval_samples_per_second": 200.845, |
| "eval_steps_per_second": 3.143, |
| "flow/cos_sim": 0.8406536742432477, |
| "flow/improvement_ratio": 0.9754726998337871, |
| "flow/mag_ratio_mean": 0.8395581131112085, |
| "flow/mag_ratio_std": 0.09706963221096013, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.10641540806429264, |
| "grad_norm": 1.3507742881774902, |
| "learning_rate": 0.0009823925488998885, |
| "loss": 0.7531520128250122, |
| "step": 2304 |
| }, |
| { |
| "epoch": 0.1182393422936585, |
| "grad_norm": 1.090326189994812, |
| "learning_rate": 0.0009770409161149525, |
| "loss": 0.7384664416313171, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.13006327652302435, |
| "grad_norm": 1.6648627519607544, |
| "learning_rate": 0.0009709988622506973, |
| "loss": 0.7159472107887268, |
| "step": 2816 |
| }, |
| { |
| "epoch": 0.1418872107523902, |
| "grad_norm": 0.720405638217926, |
| "learning_rate": 0.000964275131968659, |
| "loss": 0.7134207487106323, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.1418872107523902, |
| "eval_bleu": 0.9362046037171927, |
| "eval_ce_loss": 0.18669388993094638, |
| "eval_cos_loss": 0.09599785676829892, |
| "eval_loss": 0.7009708360177741, |
| "eval_mse_loss": 0.5027340843797274, |
| "eval_rec_loss": 0.0019430727923829022, |
| "flow/cos_sim": 0.9040021625555814, |
| "flow/improvement_ratio": 0.9753212502830104, |
| "flow/mag_ratio_mean": 0.906250716208323, |
| "flow/mag_ratio_std": 0.07307879087519428, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.1418872107523902, |
| "eval_bleu": 0.9362046037171927, |
| "eval_ce_loss": 0.18669388993094638, |
| "eval_cos_loss": 0.09599785676829892, |
| "eval_loss": 0.7009708360177741, |
| "eval_mse_loss": 0.5027340843797274, |
| "eval_rec_loss": 0.0019430727923829022, |
| "eval_runtime": 139.2617, |
| "eval_samples_per_second": 201.01, |
| "eval_steps_per_second": 3.145, |
| "flow/cos_sim": 0.9040021625555814, |
| "flow/improvement_ratio": 0.9753212502830104, |
| "flow/mag_ratio_mean": 0.906250716208323, |
| "flow/mag_ratio_std": 0.07307879087519428, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.15371114498175603, |
| "grad_norm": 1.171225905418396, |
| "learning_rate": 0.0009568794565203123, |
| "loss": 0.6967981457710266, |
| "step": 3328 |
| }, |
| { |
| "epoch": 0.1655350792111219, |
| "grad_norm": 0.9184499979019165, |
| "learning_rate": 0.0009488225396630347, |
| "loss": 0.6859588623046875, |
| "step": 3584 |
| }, |
| { |
| "epoch": 0.17735901344048774, |
| "grad_norm": 1.0972322225570679, |
| "learning_rate": 0.0009401160421685646, |
| "loss": 0.68483966588974, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.18918294766985358, |
| "grad_norm": 1.2944236993789673, |
| "learning_rate": 0.0009307725649463714, |
| "loss": 0.6722217202186584, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.18918294766985358, |
| "eval_bleu": 0.9367529454304925, |
| "eval_ce_loss": 0.17851220300531687, |
| "eval_cos_loss": 0.0685597131088308, |
| "eval_loss": 0.6638682314522191, |
| "eval_mse_loss": 0.4771829223660029, |
| "eval_rec_loss": 0.0013171346048419428, |
| "flow/cos_sim": 0.9314403127045392, |
| "flow/improvement_ratio": 0.9747336862021929, |
| "flow/mag_ratio_mean": 0.9314602082722807, |
| "flow/mag_ratio_std": 0.05983651272068013, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.18918294766985358, |
| "eval_bleu": 0.9367529454304925, |
| "eval_ce_loss": 0.17851220300531687, |
| "eval_cos_loss": 0.0685597131088308, |
| "eval_loss": 0.6638682314522191, |
| "eval_mse_loss": 0.4771829223660029, |
| "eval_rec_loss": 0.0013171346048419428, |
| "eval_runtime": 143.247, |
| "eval_samples_per_second": 195.418, |
| "eval_steps_per_second": 3.058, |
| "flow/cos_sim": 0.9314403127045392, |
| "flow/improvement_ratio": 0.9747336862021929, |
| "flow/mag_ratio_mean": 0.9314602082722807, |
| "flow/mag_ratio_std": 0.05983651272068013, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.20100688189921945, |
| "grad_norm": 1.0950664281845093, |
| "learning_rate": 0.0009208056308063659, |
| "loss": 0.6635431051254272, |
| "step": 4352 |
| }, |
| { |
| "epoch": 0.2128308161285853, |
| "grad_norm": 1.0510311126708984, |
| "learning_rate": 0.0009102296648873445, |
| "loss": 0.652130126953125, |
| "step": 4608 |
| }, |
| { |
| "epoch": 0.22465475035795113, |
| "grad_norm": 0.7107524275779724, |
| "learning_rate": 0.0008990599737794927, |
| "loss": 0.6548014283180237, |
| "step": 4864 |
| }, |
| { |
| "epoch": 0.236478684587317, |
| "grad_norm": 1.119279146194458, |
| "learning_rate": 0.0008873127233711644, |
| "loss": 0.644295871257782, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.236478684587317, |
| "eval_bleu": 0.9375087809294717, |
| "eval_ce_loss": 0.17866700632629498, |
| "eval_cos_loss": 0.05568918508379699, |
| "eval_loss": 0.6384022356304404, |
| "eval_mse_loss": 0.4531491862856634, |
| "eval_rec_loss": 0.0010171248973892112, |
| "flow/cos_sim": 0.9443108406785417, |
| "flow/improvement_ratio": 0.9754358607612245, |
| "flow/mag_ratio_mean": 0.9432625220790846, |
| "flow/mag_ratio_std": 0.0532344374550532, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.236478684587317, |
| "eval_bleu": 0.9375087809294717, |
| "eval_ce_loss": 0.17866700632629498, |
| "eval_cos_loss": 0.05568918508379699, |
| "eval_loss": 0.6384022356304404, |
| "eval_mse_loss": 0.4531491862856634, |
| "eval_rec_loss": 0.0010171248973892112, |
| "eval_runtime": 140.7376, |
| "eval_samples_per_second": 198.902, |
| "eval_steps_per_second": 3.112, |
| "flow/cos_sim": 0.9443108406785417, |
| "flow/improvement_ratio": 0.9754358607612245, |
| "flow/mag_ratio_mean": 0.9432625220790846, |
| "flow/mag_ratio_std": 0.0532344374550532, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.24830261881668284, |
| "grad_norm": 1.223329782485962, |
| "learning_rate": 0.0008750049154520011, |
| "loss": 0.6385497450828552, |
| "step": 5376 |
| }, |
| { |
| "epoch": 0.2601265530460487, |
| "grad_norm": 0.7951129078865051, |
| "learning_rate": 0.0008621543631062487, |
| "loss": 0.6350575089454651, |
| "step": 5632 |
| }, |
| { |
| "epoch": 0.27195048727541454, |
| "grad_norm": 0.8830247521400452, |
| "learning_rate": 0.0008487796649318904, |
| "loss": 0.6269800066947937, |
| "step": 5888 |
| }, |
| { |
| "epoch": 0.2837744215047804, |
| "grad_norm": 1.0399079322814941, |
| "learning_rate": 0.0008349001781229053, |
| "loss": 0.6236906051635742, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.2837744215047804, |
| "eval_bleu": 0.9406288888554537, |
| "eval_ce_loss": 0.16271350685439018, |
| "eval_cos_loss": 0.04946032597696128, |
| "eval_loss": 0.6158577807962078, |
| "eval_mse_loss": 0.4472781540868489, |
| "eval_rec_loss": 0.0009200886164281037, |
| "flow/cos_sim": 0.9505396935765602, |
| "flow/improvement_ratio": 0.9752184091365501, |
| "flow/mag_ratio_mean": 0.9574754819205907, |
| "flow/mag_ratio_std": 0.04891788842131014, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.2837744215047804, |
| "eval_bleu": 0.9406288888554537, |
| "eval_ce_loss": 0.16271350685439018, |
| "eval_cos_loss": 0.04946032597696128, |
| "eval_loss": 0.6158577807962078, |
| "eval_mse_loss": 0.4472781540868489, |
| "eval_rec_loss": 0.0009200886164281037, |
| "eval_runtime": 138.7881, |
| "eval_samples_per_second": 201.696, |
| "eval_steps_per_second": 3.156, |
| "flow/cos_sim": 0.9505396935765602, |
| "flow/improvement_ratio": 0.9752184091365501, |
| "flow/mag_ratio_mean": 0.9574754819205907, |
| "flow/mag_ratio_std": 0.04891788842131014, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.2955983557341462, |
| "grad_norm": 1.2804330587387085, |
| "learning_rate": 0.0008205359904536107, |
| "loss": 0.6104704737663269, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.30742228996351206, |
| "grad_norm": 1.038807988166809, |
| "learning_rate": 0.0008057078912056363, |
| "loss": 0.6035579442977905, |
| "step": 6656 |
| }, |
| { |
| "epoch": 0.3192462241928779, |
| "grad_norm": 1.1162539720535278, |
| "learning_rate": 0.0007904373410796086, |
| "loss": 0.6099694967269897, |
| "step": 6912 |
| }, |
| { |
| "epoch": 0.3310701584222438, |
| "grad_norm": 0.8053554892539978, |
| "learning_rate": 0.0007747464411350876, |
| "loss": 0.6010444760322571, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.3310701584222438, |
| "eval_bleu": 0.9403609007223014, |
| "eval_ce_loss": 0.16489822024130793, |
| "eval_cos_loss": 0.042615741474307293, |
| "eval_loss": 0.5965314044799979, |
| "eval_mse_loss": 0.4266453656839998, |
| "eval_rec_loss": 0.0007262465627901213, |
| "flow/cos_sim": 0.9573842790573155, |
| "flow/improvement_ratio": 0.9754182146564466, |
| "flow/mag_ratio_mean": 0.9583017167435389, |
| "flow/mag_ratio_std": 0.044361623012584096, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.3310701584222438, |
| "eval_bleu": 0.9403609007223014, |
| "eval_ce_loss": 0.16489822024130793, |
| "eval_cos_loss": 0.042615741474307293, |
| "eval_loss": 0.5965314044799979, |
| "eval_mse_loss": 0.4266453656839998, |
| "eval_rec_loss": 0.0007262465627901213, |
| "eval_runtime": 141.0373, |
| "eval_samples_per_second": 198.479, |
| "eval_steps_per_second": 3.106, |
| "flow/cos_sim": 0.9573842790573155, |
| "flow/improvement_ratio": 0.9754182146564466, |
| "flow/mag_ratio_mean": 0.9583017167435389, |
| "flow/mag_ratio_std": 0.044361623012584096, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.34289409265160964, |
| "grad_norm": 0.926774799823761, |
| "learning_rate": 0.000758657900803716, |
| "loss": 0.6045265793800354, |
| "step": 7424 |
| }, |
| { |
| "epoch": 0.3547180268809755, |
| "grad_norm": 0.6091651320457458, |
| "learning_rate": 0.000742195005021869, |
| "loss": 0.6046400666236877, |
| "step": 7680 |
| }, |
| { |
| "epoch": 0.3665419611103413, |
| "grad_norm": 0.9995866417884827, |
| "learning_rate": 0.0007253815805303786, |
| "loss": 0.5923656225204468, |
| "step": 7936 |
| }, |
| { |
| "epoch": 0.37836589533970716, |
| "grad_norm": 0.8947123885154724, |
| "learning_rate": 0.0007082419613901028, |
| "loss": 0.5886037349700928, |
| "step": 8192 |
| }, |
| { |
| "epoch": 0.37836589533970716, |
| "eval_bleu": 0.9428308469149022, |
| "eval_ce_loss": 0.15756705103943883, |
| "eval_cos_loss": 0.03951380795641849, |
| "eval_loss": 0.5839528071281572, |
| "eval_mse_loss": 0.42179430878325685, |
| "eval_rec_loss": 0.0006400666907252946, |
| "flow/cos_sim": 0.9604862126857723, |
| "flow/improvement_ratio": 0.9746591260988419, |
| "flow/mag_ratio_mean": 0.9592139723638421, |
| "flow/mag_ratio_std": 0.04008094550505893, |
| "step": 8192 |
| }, |
| { |
| "epoch": 0.37836589533970716, |
| "eval_bleu": 0.9428308469149022, |
| "eval_ce_loss": 0.15756705103943883, |
| "eval_cos_loss": 0.03951380795641849, |
| "eval_loss": 0.5839528071281572, |
| "eval_mse_loss": 0.42179430878325685, |
| "eval_rec_loss": 0.0006400666907252946, |
| "eval_runtime": 142.8618, |
| "eval_samples_per_second": 195.945, |
| "eval_steps_per_second": 3.066, |
| "flow/cos_sim": 0.9604862126857723, |
| "flow/improvement_ratio": 0.9746591260988419, |
| "flow/mag_ratio_mean": 0.9592139723638421, |
| "flow/mag_ratio_std": 0.04008094550505893, |
| "step": 8192 |
| }, |
| { |
| "epoch": 0.390189829569073, |
| "grad_norm": 0.9762691259384155, |
| "learning_rate": 0.0006908009537632514, |
| "loss": 0.591374397277832, |
| "step": 8448 |
| }, |
| { |
| "epoch": 0.4020137637984389, |
| "grad_norm": 0.9119466543197632, |
| "learning_rate": 0.0006730838000114403, |
| "loss": 0.5883693099021912, |
| "step": 8704 |
| }, |
| { |
| "epoch": 0.41383769802780473, |
| "grad_norm": 0.7088457942008972, |
| "learning_rate": 0.0006551161421624341, |
| "loss": 0.591505229473114, |
| "step": 8960 |
| }, |
| { |
| "epoch": 0.4256616322571706, |
| "grad_norm": 0.962149441242218, |
| "learning_rate": 0.0006369239847984517, |
| "loss": 0.5791484117507935, |
| "step": 9216 |
| }, |
| { |
| "epoch": 0.4256616322571706, |
| "eval_bleu": 0.9397317595032229, |
| "eval_ce_loss": 0.1654567693755643, |
| "eval_cos_loss": 0.036672262636493876, |
| "eval_loss": 0.5844078893394775, |
| "eval_mse_loss": 0.41475171422305174, |
| "eval_rec_loss": 0.0005321793435803258, |
| "flow/cos_sim": 0.9633277582523485, |
| "flow/improvement_ratio": 0.9749675198504913, |
| "flow/mag_ratio_mean": 0.9673667468436776, |
| "flow/mag_ratio_std": 0.03610456871829893, |
| "step": 9216 |
| }, |
| { |
| "epoch": 0.4256616322571706, |
| "eval_bleu": 0.9397317595032229, |
| "eval_ce_loss": 0.1654567693755643, |
| "eval_cos_loss": 0.036672262636493876, |
| "eval_loss": 0.5844078893394775, |
| "eval_mse_loss": 0.41475171422305174, |
| "eval_rec_loss": 0.0005321793435803258, |
| "eval_runtime": 140.1978, |
| "eval_samples_per_second": 199.668, |
| "eval_steps_per_second": 3.124, |
| "flow/cos_sim": 0.9633277582523485, |
| "flow/improvement_ratio": 0.9749675198504913, |
| "flow/mag_ratio_mean": 0.9673667468436776, |
| "flow/mag_ratio_std": 0.03610456871829893, |
| "step": 9216 |
| }, |
| { |
| "epoch": 0.4374855664865364, |
| "grad_norm": 0.853702187538147, |
| "learning_rate": 0.0006185336574197479, |
| "loss": 0.5742002725601196, |
| "step": 9472 |
| }, |
| { |
| "epoch": 0.44930950071590225, |
| "grad_norm": 0.9982457160949707, |
| "learning_rate": 0.0005999717763379407, |
| "loss": 0.5812740921974182, |
| "step": 9728 |
| }, |
| { |
| "epoch": 0.4611334349452681, |
| "grad_norm": 1.16475510597229, |
| "learning_rate": 0.0005812652061542363, |
| "loss": 0.5733819603919983, |
| "step": 9984 |
| }, |
| { |
| "epoch": 0.472957369174634, |
| "grad_norm": 1.0706837177276611, |
| "learning_rate": 0.0005624410208783071, |
| "loss": 0.5738887190818787, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.472957369174634, |
| "eval_bleu": 0.9459802582441368, |
| "eval_ce_loss": 0.14516126815459296, |
| "eval_cos_loss": 0.033954130285780995, |
| "eval_loss": 0.5643789048336413, |
| "eval_mse_loss": 0.4153396790702593, |
| "eval_rec_loss": 0.0004825431066599768, |
| "flow/cos_sim": 0.9660458944431723, |
| "flow/improvement_ratio": 0.9768404536051293, |
| "flow/mag_ratio_mean": 0.964692687198996, |
| "flow/mag_ratio_std": 0.033606582049059266, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.472957369174634, |
| "eval_bleu": 0.9459802582441368, |
| "eval_ce_loss": 0.14516126815459296, |
| "eval_cos_loss": 0.033954130285780995, |
| "eval_loss": 0.5643789048336413, |
| "eval_mse_loss": 0.4153396790702593, |
| "eval_rec_loss": 0.0004825431066599768, |
| "eval_runtime": 138.7003, |
| "eval_samples_per_second": 201.824, |
| "eval_steps_per_second": 3.158, |
| "flow/cos_sim": 0.9660458944431723, |
| "flow/improvement_ratio": 0.9768404536051293, |
| "flow/mag_ratio_mean": 0.964692687198996, |
| "flow/mag_ratio_std": 0.033606582049059266, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.48478130340399983, |
| "grad_norm": 1.2897191047668457, |
| "learning_rate": 0.0005435264647440881, |
| "loss": 0.5754253268241882, |
| "step": 10496 |
| }, |
| { |
| "epoch": 0.49660523763336567, |
| "grad_norm": 0.8889420628547668, |
| "learning_rate": 0.000524548912779213, |
| "loss": 0.5645499229431152, |
| "step": 10752 |
| }, |
| { |
| "epoch": 0.5084291718627315, |
| "grad_norm": 1.0012623071670532, |
| "learning_rate": 0.0005055358311851499, |
| "loss": 0.570586085319519, |
| "step": 11008 |
| }, |
| { |
| "epoch": 0.5202531060920974, |
| "grad_norm": 0.7268548011779785, |
| "learning_rate": 0.0004865147375853812, |
| "loss": 0.5649828910827637, |
| "step": 11264 |
| }, |
| { |
| "epoch": 0.5202531060920974, |
| "eval_bleu": 0.9438391043268173, |
| "eval_ce_loss": 0.15168385392003883, |
| "eval_cos_loss": 0.031372046842320596, |
| "eval_loss": 0.5559442507349737, |
| "eval_mse_loss": 0.40068320514948946, |
| "eval_rec_loss": 0.0004399877304908156, |
| "flow/cos_sim": 0.9686279725538541, |
| "flow/improvement_ratio": 0.9761811211773249, |
| "flow/mag_ratio_mean": 0.9699762897676529, |
| "flow/mag_ratio_std": 0.031497096553546926, |
| "step": 11264 |
| }, |
| { |
| "epoch": 0.5202531060920974, |
| "eval_bleu": 0.9438391043268173, |
| "eval_ce_loss": 0.15168385392003883, |
| "eval_cos_loss": 0.031372046842320596, |
| "eval_loss": 0.5559442507349737, |
| "eval_mse_loss": 0.40068320514948946, |
| "eval_rec_loss": 0.0004399877304908156, |
| "eval_runtime": 142.7289, |
| "eval_samples_per_second": 196.127, |
| "eval_steps_per_second": 3.069, |
| "flow/cos_sim": 0.9686279725538541, |
| "flow/improvement_ratio": 0.9761811211773249, |
| "flow/mag_ratio_mean": 0.9699762897676529, |
| "flow/mag_ratio_std": 0.031497096553546926, |
| "step": 11264 |
| }, |
| { |
| "epoch": 0.5320770403214632, |
| "grad_norm": 1.215234637260437, |
| "learning_rate": 0.0004675131611991607, |
| "loss": 0.5652971863746643, |
| "step": 11520 |
| }, |
| { |
| "epoch": 0.5439009745508291, |
| "grad_norm": 0.622643768787384, |
| "learning_rate": 0.0004485586029984899, |
| "loss": 0.5565246343612671, |
| "step": 11776 |
| }, |
| { |
| "epoch": 0.5557249087801949, |
| "grad_norm": 1.0961378812789917, |
| "learning_rate": 0.00042967849590597266, |
| "loss": 0.5489715933799744, |
| "step": 12032 |
| }, |
| { |
| "epoch": 0.5675488430095608, |
| "grad_norm": 0.9782881140708923, |
| "learning_rate": 0.0004109001650911621, |
| "loss": 0.5525087118148804, |
| "step": 12288 |
| }, |
| { |
| "epoch": 0.5675488430095608, |
| "eval_bleu": 0.9412862694661508, |
| "eval_ce_loss": 0.1635204538911343, |
| "eval_cos_loss": 0.030069323277874895, |
| "eval_loss": 0.5582960859689539, |
| "eval_mse_loss": 0.39135666776737665, |
| "eval_rec_loss": 0.00041203244906421176, |
| "flow/cos_sim": 0.969930695344324, |
| "flow/improvement_ratio": 0.9753037164472553, |
| "flow/mag_ratio_mean": 0.9802452540833112, |
| "flow/mag_ratio_std": 0.030909983262623827, |
| "step": 12288 |
| }, |
| { |
| "epoch": 0.5675488430095608, |
| "eval_bleu": 0.9412862694661508, |
| "eval_ce_loss": 0.1635204538911343, |
| "eval_cos_loss": 0.030069323277874895, |
| "eval_loss": 0.5582960859689539, |
| "eval_mse_loss": 0.39135666776737665, |
| "eval_rec_loss": 0.00041203244906421176, |
| "eval_runtime": 141.5788, |
| "eval_samples_per_second": 197.72, |
| "eval_steps_per_second": 3.094, |
| "flow/cos_sim": 0.969930695344324, |
| "flow/improvement_ratio": 0.9753037164472553, |
| "flow/mag_ratio_mean": 0.9802452540833112, |
| "flow/mag_ratio_std": 0.030909983262623827, |
| "step": 12288 |
| }, |
| { |
| "epoch": 0.5793727772389267, |
| "grad_norm": 1.1210603713989258, |
| "learning_rate": 0.0003922507884228551, |
| "loss": 0.5510575175285339, |
| "step": 12544 |
| }, |
| { |
| "epoch": 0.5911967114682924, |
| "grad_norm": 0.7037401795387268, |
| "learning_rate": 0.00037375735713457723, |
| "loss": 0.5481619238853455, |
| "step": 12800 |
| }, |
| { |
| "epoch": 0.6030206456976583, |
| "grad_norm": 0.7474592924118042, |
| "learning_rate": 0.00035544663676018276, |
| "loss": 0.5506067872047424, |
| "step": 13056 |
| }, |
| { |
| "epoch": 0.6148445799270241, |
| "grad_norm": 0.7188865542411804, |
| "learning_rate": 0.00033734512839611255, |
| "loss": 0.5451632738113403, |
| "step": 13312 |
| }, |
| { |
| "epoch": 0.6148445799270241, |
| "eval_bleu": 0.9428834105382213, |
| "eval_ce_loss": 0.1552433374967239, |
| "eval_cos_loss": 0.02893113123771807, |
| "eval_loss": 0.5456167366951024, |
| "eval_mse_loss": 0.3870963177316265, |
| "eval_rec_loss": 0.0003839662346667844, |
| "flow/cos_sim": 0.9710688916243375, |
| "flow/improvement_ratio": 0.9738836331998921, |
| "flow/mag_ratio_mean": 0.9706440245451993, |
| "flow/mag_ratio_std": 0.029164127312328446, |
| "step": 13312 |
| }, |
| { |
| "epoch": 0.6148445799270241, |
| "eval_bleu": 0.9428834105382213, |
| "eval_ce_loss": 0.1552433374967239, |
| "eval_cos_loss": 0.02893113123771807, |
| "eval_loss": 0.5456167366951024, |
| "eval_mse_loss": 0.3870963177316265, |
| "eval_rec_loss": 0.0003839662346667844, |
| "eval_runtime": 141.1326, |
| "eval_samples_per_second": 198.345, |
| "eval_steps_per_second": 3.103, |
| "flow/cos_sim": 0.9710688916243375, |
| "flow/improvement_ratio": 0.9738836331998921, |
| "flow/mag_ratio_mean": 0.9706440245451993, |
| "flow/mag_ratio_std": 0.029164127312328446, |
| "step": 13312 |
| }, |
| { |
| "epoch": 0.62666851415639, |
| "grad_norm": 0.7716706991195679, |
| "learning_rate": 0.0003194790303463687, |
| "loss": 0.537281334400177, |
| "step": 13568 |
| }, |
| { |
| "epoch": 0.6384924483857558, |
| "grad_norm": 1.332189917564392, |
| "learning_rate": 0.00030187420020572406, |
| "loss": 0.5493588447570801, |
| "step": 13824 |
| }, |
| { |
| "epoch": 0.6503163826151217, |
| "grad_norm": 0.7042582035064697, |
| "learning_rate": 0.00028455611743603626, |
| "loss": 0.5357389450073242, |
| "step": 14080 |
| }, |
| { |
| "epoch": 0.6621403168444876, |
| "grad_norm": 0.9820289611816406, |
| "learning_rate": 0.0002675498464898373, |
| "loss": 0.5378322601318359, |
| "step": 14336 |
| }, |
| { |
| "epoch": 0.6621403168444876, |
| "eval_bleu": 0.9466625254289972, |
| "eval_ce_loss": 0.14409655002562421, |
| "eval_cos_loss": 0.028247294284097137, |
| "eval_loss": 0.5304203147757544, |
| "eval_mse_loss": 0.38314900074375274, |
| "eval_rec_loss": 0.0003500353127845417, |
| "flow/cos_sim": 0.9717527268684074, |
| "flow/improvement_ratio": 0.9754393091212669, |
| "flow/mag_ratio_mean": 0.973318548915593, |
| "flow/mag_ratio_std": 0.028512526988200674, |
| "step": 14336 |
| }, |
| { |
| "epoch": 0.6621403168444876, |
| "eval_bleu": 0.9466625254289972, |
| "eval_ce_loss": 0.14409655002562421, |
| "eval_cos_loss": 0.028247294284097137, |
| "eval_loss": 0.5304203147757544, |
| "eval_mse_loss": 0.38314900074375274, |
| "eval_rec_loss": 0.0003500353127845417, |
| "eval_runtime": 142.684, |
| "eval_samples_per_second": 196.189, |
| "eval_steps_per_second": 3.07, |
| "flow/cos_sim": 0.9717527268684074, |
| "flow/improvement_ratio": 0.9754393091212669, |
| "flow/mag_ratio_mean": 0.973318548915593, |
| "flow/mag_ratio_std": 0.028512526988200674, |
| "step": 14336 |
| }, |
| { |
| "epoch": 0.6739642510738534, |
| "grad_norm": 0.9396564364433289, |
| "learning_rate": 0.0002508800005345623, |
| "loss": 0.5384619832038879, |
| "step": 14592 |
| }, |
| { |
| "epoch": 0.6857881853032193, |
| "grad_norm": 1.3034203052520752, |
| "learning_rate": 0.00023457070582992562, |
| "loss": 0.5381016135215759, |
| "step": 14848 |
| }, |
| { |
| "epoch": 0.6976121195325851, |
| "grad_norm": 0.7913278341293335, |
| "learning_rate": 0.00021864556680999692, |
| "loss": 0.5290021896362305, |
| "step": 15104 |
| }, |
| { |
| "epoch": 0.709436053761951, |
| "grad_norm": 0.9535462856292725, |
| "learning_rate": 0.0002031276319205152, |
| "loss": 0.534302830696106, |
| "step": 15360 |
| }, |
| { |
| "epoch": 0.709436053761951, |
| "eval_bleu": 0.9467887438059129, |
| "eval_ce_loss": 0.14362057716241233, |
| "eval_cos_loss": 0.02781058812607505, |
| "eval_loss": 0.5298489093372266, |
| "eval_mse_loss": 0.38311818289702343, |
| "eval_rec_loss": 0.00032909089600148805, |
| "flow/cos_sim": 0.9721894303685454, |
| "flow/improvement_ratio": 0.974593520844908, |
| "flow/mag_ratio_mean": 0.9698141316572825, |
| "flow/mag_ratio_std": 0.02832854554465372, |
| "step": 15360 |
| }, |
| { |
| "epoch": 0.709436053761951, |
| "eval_bleu": 0.9467887438059129, |
| "eval_ce_loss": 0.14362057716241233, |
| "eval_cos_loss": 0.02781058812607505, |
| "eval_loss": 0.5298489093372266, |
| "eval_mse_loss": 0.38311818289702343, |
| "eval_rec_loss": 0.00032909089600148805, |
| "eval_runtime": 141.9779, |
| "eval_samples_per_second": 197.164, |
| "eval_steps_per_second": 3.085, |
| "flow/cos_sim": 0.9721894303685454, |
| "flow/improvement_ratio": 0.974593520844908, |
| "flow/mag_ratio_mean": 0.9698141316572825, |
| "flow/mag_ratio_std": 0.02832854554465372, |
| "step": 15360 |
| } |
| ], |
| "logging_steps": 256, |
| "max_steps": 21651, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 1024, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|