{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.3971814596935807, "eval_steps": 1024, "global_step": 9216, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04413127329928675, "grad_norm": 2.52653431892395, "learning_rate": 9.990234375e-05, "loss": 12.195906639099121, "step": 1024 }, { "epoch": 0.04413127329928675, "eval_bleu": 0.20492372710719675, "eval_ce_clean_loss": 1.7283075054062964, "eval_ce_pred_loss": 4.658400398327598, "eval_flow_consistency_loss": 0.1969631168443257, "eval_flow_mse_loss": 0.8933940772562902, "eval_loss": 5.981063390337328, "flow/cos_sim": 0.40864291520261053, "flow/improvement_ratio": 0.974421706789338, "flow/mag_ratio_mean": 0.3482704608679326, "flow/mag_ratio_std": 0.11673642702893153, "step": 1024 }, { "epoch": 0.04413127329928675, "eval_bleu": 0.20492372710719675, "eval_ce_clean_loss": 1.7283075054062964, "eval_ce_pred_loss": 4.658400398327598, "eval_flow_consistency_loss": 0.1969631168443257, "eval_flow_mse_loss": 0.8933940772562902, "eval_loss": 5.981063390337328, "eval_runtime": 220.5442, "eval_samples_per_second": 136.027, "eval_steps_per_second": 2.127, "flow/cos_sim": 0.40864291520261053, "flow/improvement_ratio": 0.974421706789338, "flow/mag_ratio_mean": 0.3482704608679326, "flow/mag_ratio_std": 0.11673642702893153, "step": 1024 }, { "epoch": 0.0882625465985735, "grad_norm": 0.8477205038070679, "learning_rate": 9.9476028157316e-05, "loss": 4.296904563903809, "step": 2048 }, { "epoch": 0.0882625465985735, "eval_bleu": 0.4203954756104412, "eval_ce_clean_loss": 0.31376014100208977, "eval_ce_pred_loss": 2.8760192099410586, "eval_flow_consistency_loss": 0.2066244624698086, "eval_flow_mse_loss": 0.9511578829049556, "eval_loss": 3.3814436672592976, "flow/cos_sim": 0.5975300241380866, "flow/improvement_ratio": 0.9937155933014111, "flow/mag_ratio_mean": 0.5349513870566639, "flow/mag_ratio_std": 0.10326376852831606, "step": 2048 }, { "epoch": 0.0882625465985735, "eval_bleu": 0.4203954756104412, "eval_ce_clean_loss": 0.31376014100208977, "eval_ce_pred_loss": 2.8760192099410586, "eval_flow_consistency_loss": 0.2066244624698086, "eval_flow_mse_loss": 0.9511578829049556, "eval_loss": 3.3814436672592976, "eval_runtime": 220.3483, "eval_samples_per_second": 136.148, "eval_steps_per_second": 2.128, "flow/cos_sim": 0.5975300241380866, "flow/improvement_ratio": 0.9937155933014111, "flow/mag_ratio_mean": 0.5349513870566639, "flow/mag_ratio_std": 0.10326376852831606, "step": 2048 }, { "epoch": 0.13239381989786023, "grad_norm": 0.5322298407554626, "learning_rate": 9.791307026072513e-05, "loss": 3.1445906162261963, "step": 3072 }, { "epoch": 0.13239381989786023, "eval_bleu": 0.48539258855261896, "eval_ce_clean_loss": 0.13052247220010899, "eval_ce_pred_loss": 2.364091571205969, "eval_flow_consistency_loss": 0.22353638632338185, "eval_flow_mse_loss": 0.9629143555281259, "eval_loss": 2.860069102569938, "flow/cos_sim": 0.6811817029137601, "flow/improvement_ratio": 0.9940002780479155, "flow/mag_ratio_mean": 0.6364521633333234, "flow/mag_ratio_std": 0.09635336341252967, "step": 3072 }, { "epoch": 0.13239381989786023, "eval_bleu": 0.48539258855261896, "eval_ce_clean_loss": 0.13052247220010899, "eval_ce_pred_loss": 2.364091571205969, "eval_flow_consistency_loss": 0.22353638632338185, "eval_flow_mse_loss": 0.9629143555281259, "eval_loss": 2.860069102569938, "eval_runtime": 219.3797, "eval_samples_per_second": 136.749, "eval_steps_per_second": 2.138, "flow/cos_sim": 0.6811817029137601, "flow/improvement_ratio": 0.9940002780479155, "flow/mag_ratio_mean": 0.6364521633333234, "flow/mag_ratio_std": 0.09635336341252967, "step": 3072 }, { "epoch": 0.176525093197147, "grad_norm": 0.4867124855518341, "learning_rate": 9.53439476074686e-05, "loss": 2.76550555229187, "step": 4096 }, { "epoch": 0.176525093197147, "eval_bleu": 0.5224707415420085, "eval_ce_clean_loss": 0.07004804110952786, "eval_ce_pred_loss": 2.1067297717909823, "eval_flow_consistency_loss": 0.22862433567484305, "eval_flow_mse_loss": 0.9485902956553868, "eval_loss": 2.6076613288444244, "flow/cos_sim": 0.7283694244651143, "flow/improvement_ratio": 0.9943000164621675, "flow/mag_ratio_mean": 0.693297458991313, "flow/mag_ratio_std": 0.10988020541062996, "step": 4096 }, { "epoch": 0.176525093197147, "eval_bleu": 0.5224707415420085, "eval_ce_clean_loss": 0.07004804110952786, "eval_ce_pred_loss": 2.1067297717909823, "eval_flow_consistency_loss": 0.22862433567484305, "eval_flow_mse_loss": 0.9485902956553868, "eval_loss": 2.6076613288444244, "eval_runtime": 220.6021, "eval_samples_per_second": 135.991, "eval_steps_per_second": 2.126, "flow/cos_sim": 0.7283694244651143, "flow/improvement_ratio": 0.9943000164621675, "flow/mag_ratio_mean": 0.693297458991313, "flow/mag_ratio_std": 0.10988020541062996, "step": 4096 }, { "epoch": 0.22065636649643372, "grad_norm": 1.0894174575805664, "learning_rate": 9.182261125213742e-05, "loss": 2.59098219871521, "step": 5120 }, { "epoch": 0.22065636649643372, "eval_bleu": 0.5393664611968214, "eval_ce_clean_loss": 0.042873796897688145, "eval_ce_pred_loss": 1.9514871731495806, "eval_flow_consistency_loss": 0.21636765890284135, "eval_flow_mse_loss": 0.9199989291904832, "eval_loss": 2.43709755960558, "flow/cos_sim": 0.7597128864544541, "flow/improvement_ratio": 0.9947783349673631, "flow/mag_ratio_mean": 0.7298253399731, "flow/mag_ratio_std": 0.11818532291442346, "step": 5120 }, { "epoch": 0.22065636649643372, "eval_bleu": 0.5393664611968214, "eval_ce_clean_loss": 0.042873796897688145, "eval_ce_pred_loss": 1.9514871731495806, "eval_flow_consistency_loss": 0.21636765890284135, "eval_flow_mse_loss": 0.9199989291904832, "eval_loss": 2.43709755960558, "eval_runtime": 221.6485, "eval_samples_per_second": 135.349, "eval_steps_per_second": 2.116, "flow/cos_sim": 0.7597128864544541, "flow/improvement_ratio": 0.9947783349673631, "flow/mag_ratio_mean": 0.7298253399731, "flow/mag_ratio_std": 0.11818532291442346, "step": 5120 }, { "epoch": 0.26478763979572045, "grad_norm": 0.6129837036132812, "learning_rate": 8.742300854391668e-05, "loss": 2.4653327465057373, "step": 6144 }, { "epoch": 0.26478763979572045, "eval_bleu": 0.5560727460810808, "eval_ce_clean_loss": 0.028736061462238907, "eval_ce_pred_loss": 1.8302468360105812, "eval_flow_consistency_loss": 0.19070348343742427, "eval_flow_mse_loss": 0.9170380932435807, "eval_loss": 2.3222986602071507, "flow/cos_sim": 0.785038922641323, "flow/improvement_ratio": 0.9949500379023521, "flow/mag_ratio_mean": 0.7534581313509423, "flow/mag_ratio_std": 0.1224009687267641, "step": 6144 }, { "epoch": 0.26478763979572045, "eval_bleu": 0.5560727460810808, "eval_ce_clean_loss": 0.028736061462238907, "eval_ce_pred_loss": 1.8302468360105812, "eval_flow_consistency_loss": 0.19070348343742427, "eval_flow_mse_loss": 0.9170380932435807, "eval_loss": 2.3222986602071507, "eval_runtime": 223.1234, "eval_samples_per_second": 134.455, "eval_steps_per_second": 2.102, "flow/cos_sim": 0.785038922641323, "flow/improvement_ratio": 0.9949500379023521, "flow/mag_ratio_mean": 0.7534581313509423, "flow/mag_ratio_std": 0.1224009687267641, "step": 6144 }, { "epoch": 0.30891891309500724, "grad_norm": 0.62188321352005, "learning_rate": 8.223753024725232e-05, "loss": 2.393148899078369, "step": 7168 }, { "epoch": 0.30891891309500724, "eval_bleu": 0.564904310555124, "eval_ce_clean_loss": 0.020537127653307626, "eval_ce_pred_loss": 1.7842765613151257, "eval_flow_consistency_loss": 0.17003079746831964, "eval_flow_mse_loss": 0.9064064752826813, "eval_loss": 2.2609525786788223, "flow/cos_sim": 0.7994557322977957, "flow/improvement_ratio": 0.9958853139551972, "flow/mag_ratio_mean": 0.7681991188510903, "flow/mag_ratio_std": 0.12335055292860023, "step": 7168 }, { "epoch": 0.30891891309500724, "eval_bleu": 0.564904310555124, "eval_ce_clean_loss": 0.020537127653307626, "eval_ce_pred_loss": 1.7842765613151257, "eval_flow_consistency_loss": 0.17003079746831964, "eval_flow_mse_loss": 0.9064064752826813, "eval_loss": 2.2609525786788223, "eval_runtime": 222.6102, "eval_samples_per_second": 134.765, "eval_steps_per_second": 2.107, "flow/cos_sim": 0.7994557322977957, "flow/improvement_ratio": 0.9958853139551972, "flow/mag_ratio_mean": 0.7681991188510903, "flow/mag_ratio_std": 0.12335055292860023, "step": 7168 }, { "epoch": 0.353050186394294, "grad_norm": 0.5962383151054382, "learning_rate": 7.638108666591397e-05, "loss": 2.3131587505340576, "step": 8192 }, { "epoch": 0.353050186394294, "eval_bleu": 0.5698637066625777, "eval_ce_clean_loss": 0.015508384108225673, "eval_ce_pred_loss": 1.7501515601235411, "eval_flow_consistency_loss": 0.15371561949568263, "eval_flow_mse_loss": 0.9227519265370074, "eval_loss": 2.240224189595627, "flow/cos_sim": 0.809604713911695, "flow/improvement_ratio": 0.9950154384316158, "flow/mag_ratio_mean": 0.7853530231061012, "flow/mag_ratio_std": 0.1324605667419525, "step": 8192 }, { "epoch": 0.353050186394294, "eval_bleu": 0.5698637066625777, "eval_ce_clean_loss": 0.015508384108225673, "eval_ce_pred_loss": 1.7501515601235411, "eval_flow_consistency_loss": 0.15371561949568263, "eval_flow_mse_loss": 0.9227519265370074, "eval_loss": 2.240224189595627, "eval_runtime": 221.2743, "eval_samples_per_second": 135.578, "eval_steps_per_second": 2.12, "flow/cos_sim": 0.809604713911695, "flow/improvement_ratio": 0.9950154384316158, "flow/mag_ratio_mean": 0.7853530231061012, "flow/mag_ratio_std": 0.1324605667419525, "step": 8192 }, { "epoch": 0.3971814596935807, "grad_norm": 0.9759780764579773, "learning_rate": 6.997172522088177e-05, "loss": 2.277475118637085, "step": 9216 }, { "epoch": 0.3971814596935807, "eval_bleu": 0.5782382780348954, "eval_ce_clean_loss": 0.012276997226776916, "eval_ce_pred_loss": 1.6780265332030844, "eval_flow_consistency_loss": 0.14418257795163056, "eval_flow_mse_loss": 0.89588076715022, "eval_loss": 2.1548676056140015, "flow/cos_sim": 0.8200861430371494, "flow/improvement_ratio": 0.9951881179169043, "flow/mag_ratio_mean": 0.7937601523867039, "flow/mag_ratio_std": 0.1297300179629946, "step": 9216 }, { "epoch": 0.3971814596935807, "eval_bleu": 0.5782382780348954, "eval_ce_clean_loss": 0.012276997226776916, "eval_ce_pred_loss": 1.6780265332030844, "eval_flow_consistency_loss": 0.14418257795163056, "eval_flow_mse_loss": 0.89588076715022, "eval_loss": 2.1548676056140015, "eval_runtime": 222.088, "eval_samples_per_second": 135.082, "eval_steps_per_second": 2.112, "flow/cos_sim": 0.8200861430371494, "flow/improvement_ratio": 0.9951881179169043, "flow/mag_ratio_mean": 0.7937601523867039, "flow/mag_ratio_std": 0.1297300179629946, "step": 9216 } ], "logging_steps": 1024, "max_steps": 23204, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1024, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }