| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.3971814596935807, |
| "eval_steps": 1024, |
| "global_step": 9216, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.04413127329928675, |
| "grad_norm": 2.52653431892395, |
| "learning_rate": 9.990234375e-05, |
| "loss": 12.195906639099121, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.04413127329928675, |
| "eval_bleu": 0.20492372710719675, |
| "eval_ce_clean_loss": 1.7283075054062964, |
| "eval_ce_pred_loss": 4.658400398327598, |
| "eval_flow_consistency_loss": 0.1969631168443257, |
| "eval_flow_mse_loss": 0.8933940772562902, |
| "eval_loss": 5.981063390337328, |
| "flow/cos_sim": 0.40864291520261053, |
| "flow/improvement_ratio": 0.974421706789338, |
| "flow/mag_ratio_mean": 0.3482704608679326, |
| "flow/mag_ratio_std": 0.11673642702893153, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.04413127329928675, |
| "eval_bleu": 0.20492372710719675, |
| "eval_ce_clean_loss": 1.7283075054062964, |
| "eval_ce_pred_loss": 4.658400398327598, |
| "eval_flow_consistency_loss": 0.1969631168443257, |
| "eval_flow_mse_loss": 0.8933940772562902, |
| "eval_loss": 5.981063390337328, |
| "eval_runtime": 220.5442, |
| "eval_samples_per_second": 136.027, |
| "eval_steps_per_second": 2.127, |
| "flow/cos_sim": 0.40864291520261053, |
| "flow/improvement_ratio": 0.974421706789338, |
| "flow/mag_ratio_mean": 0.3482704608679326, |
| "flow/mag_ratio_std": 0.11673642702893153, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.0882625465985735, |
| "grad_norm": 0.8477205038070679, |
| "learning_rate": 9.9476028157316e-05, |
| "loss": 4.296904563903809, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.0882625465985735, |
| "eval_bleu": 0.4203954756104412, |
| "eval_ce_clean_loss": 0.31376014100208977, |
| "eval_ce_pred_loss": 2.8760192099410586, |
| "eval_flow_consistency_loss": 0.2066244624698086, |
| "eval_flow_mse_loss": 0.9511578829049556, |
| "eval_loss": 3.3814436672592976, |
| "flow/cos_sim": 0.5975300241380866, |
| "flow/improvement_ratio": 0.9937155933014111, |
| "flow/mag_ratio_mean": 0.5349513870566639, |
| "flow/mag_ratio_std": 0.10326376852831606, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.0882625465985735, |
| "eval_bleu": 0.4203954756104412, |
| "eval_ce_clean_loss": 0.31376014100208977, |
| "eval_ce_pred_loss": 2.8760192099410586, |
| "eval_flow_consistency_loss": 0.2066244624698086, |
| "eval_flow_mse_loss": 0.9511578829049556, |
| "eval_loss": 3.3814436672592976, |
| "eval_runtime": 220.3483, |
| "eval_samples_per_second": 136.148, |
| "eval_steps_per_second": 2.128, |
| "flow/cos_sim": 0.5975300241380866, |
| "flow/improvement_ratio": 0.9937155933014111, |
| "flow/mag_ratio_mean": 0.5349513870566639, |
| "flow/mag_ratio_std": 0.10326376852831606, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.13239381989786023, |
| "grad_norm": 0.5322298407554626, |
| "learning_rate": 9.791307026072513e-05, |
| "loss": 3.1445906162261963, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.13239381989786023, |
| "eval_bleu": 0.48539258855261896, |
| "eval_ce_clean_loss": 0.13052247220010899, |
| "eval_ce_pred_loss": 2.364091571205969, |
| "eval_flow_consistency_loss": 0.22353638632338185, |
| "eval_flow_mse_loss": 0.9629143555281259, |
| "eval_loss": 2.860069102569938, |
| "flow/cos_sim": 0.6811817029137601, |
| "flow/improvement_ratio": 0.9940002780479155, |
| "flow/mag_ratio_mean": 0.6364521633333234, |
| "flow/mag_ratio_std": 0.09635336341252967, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.13239381989786023, |
| "eval_bleu": 0.48539258855261896, |
| "eval_ce_clean_loss": 0.13052247220010899, |
| "eval_ce_pred_loss": 2.364091571205969, |
| "eval_flow_consistency_loss": 0.22353638632338185, |
| "eval_flow_mse_loss": 0.9629143555281259, |
| "eval_loss": 2.860069102569938, |
| "eval_runtime": 219.3797, |
| "eval_samples_per_second": 136.749, |
| "eval_steps_per_second": 2.138, |
| "flow/cos_sim": 0.6811817029137601, |
| "flow/improvement_ratio": 0.9940002780479155, |
| "flow/mag_ratio_mean": 0.6364521633333234, |
| "flow/mag_ratio_std": 0.09635336341252967, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.176525093197147, |
| "grad_norm": 0.4867124855518341, |
| "learning_rate": 9.53439476074686e-05, |
| "loss": 2.76550555229187, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.176525093197147, |
| "eval_bleu": 0.5224707415420085, |
| "eval_ce_clean_loss": 0.07004804110952786, |
| "eval_ce_pred_loss": 2.1067297717909823, |
| "eval_flow_consistency_loss": 0.22862433567484305, |
| "eval_flow_mse_loss": 0.9485902956553868, |
| "eval_loss": 2.6076613288444244, |
| "flow/cos_sim": 0.7283694244651143, |
| "flow/improvement_ratio": 0.9943000164621675, |
| "flow/mag_ratio_mean": 0.693297458991313, |
| "flow/mag_ratio_std": 0.10988020541062996, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.176525093197147, |
| "eval_bleu": 0.5224707415420085, |
| "eval_ce_clean_loss": 0.07004804110952786, |
| "eval_ce_pred_loss": 2.1067297717909823, |
| "eval_flow_consistency_loss": 0.22862433567484305, |
| "eval_flow_mse_loss": 0.9485902956553868, |
| "eval_loss": 2.6076613288444244, |
| "eval_runtime": 220.6021, |
| "eval_samples_per_second": 135.991, |
| "eval_steps_per_second": 2.126, |
| "flow/cos_sim": 0.7283694244651143, |
| "flow/improvement_ratio": 0.9943000164621675, |
| "flow/mag_ratio_mean": 0.693297458991313, |
| "flow/mag_ratio_std": 0.10988020541062996, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.22065636649643372, |
| "grad_norm": 1.0894174575805664, |
| "learning_rate": 9.182261125213742e-05, |
| "loss": 2.59098219871521, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.22065636649643372, |
| "eval_bleu": 0.5393664611968214, |
| "eval_ce_clean_loss": 0.042873796897688145, |
| "eval_ce_pred_loss": 1.9514871731495806, |
| "eval_flow_consistency_loss": 0.21636765890284135, |
| "eval_flow_mse_loss": 0.9199989291904832, |
| "eval_loss": 2.43709755960558, |
| "flow/cos_sim": 0.7597128864544541, |
| "flow/improvement_ratio": 0.9947783349673631, |
| "flow/mag_ratio_mean": 0.7298253399731, |
| "flow/mag_ratio_std": 0.11818532291442346, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.22065636649643372, |
| "eval_bleu": 0.5393664611968214, |
| "eval_ce_clean_loss": 0.042873796897688145, |
| "eval_ce_pred_loss": 1.9514871731495806, |
| "eval_flow_consistency_loss": 0.21636765890284135, |
| "eval_flow_mse_loss": 0.9199989291904832, |
| "eval_loss": 2.43709755960558, |
| "eval_runtime": 221.6485, |
| "eval_samples_per_second": 135.349, |
| "eval_steps_per_second": 2.116, |
| "flow/cos_sim": 0.7597128864544541, |
| "flow/improvement_ratio": 0.9947783349673631, |
| "flow/mag_ratio_mean": 0.7298253399731, |
| "flow/mag_ratio_std": 0.11818532291442346, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.26478763979572045, |
| "grad_norm": 0.6129837036132812, |
| "learning_rate": 8.742300854391668e-05, |
| "loss": 2.4653327465057373, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.26478763979572045, |
| "eval_bleu": 0.5560727460810808, |
| "eval_ce_clean_loss": 0.028736061462238907, |
| "eval_ce_pred_loss": 1.8302468360105812, |
| "eval_flow_consistency_loss": 0.19070348343742427, |
| "eval_flow_mse_loss": 0.9170380932435807, |
| "eval_loss": 2.3222986602071507, |
| "flow/cos_sim": 0.785038922641323, |
| "flow/improvement_ratio": 0.9949500379023521, |
| "flow/mag_ratio_mean": 0.7534581313509423, |
| "flow/mag_ratio_std": 0.1224009687267641, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.26478763979572045, |
| "eval_bleu": 0.5560727460810808, |
| "eval_ce_clean_loss": 0.028736061462238907, |
| "eval_ce_pred_loss": 1.8302468360105812, |
| "eval_flow_consistency_loss": 0.19070348343742427, |
| "eval_flow_mse_loss": 0.9170380932435807, |
| "eval_loss": 2.3222986602071507, |
| "eval_runtime": 223.1234, |
| "eval_samples_per_second": 134.455, |
| "eval_steps_per_second": 2.102, |
| "flow/cos_sim": 0.785038922641323, |
| "flow/improvement_ratio": 0.9949500379023521, |
| "flow/mag_ratio_mean": 0.7534581313509423, |
| "flow/mag_ratio_std": 0.1224009687267641, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.30891891309500724, |
| "grad_norm": 0.62188321352005, |
| "learning_rate": 8.223753024725232e-05, |
| "loss": 2.393148899078369, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.30891891309500724, |
| "eval_bleu": 0.564904310555124, |
| "eval_ce_clean_loss": 0.020537127653307626, |
| "eval_ce_pred_loss": 1.7842765613151257, |
| "eval_flow_consistency_loss": 0.17003079746831964, |
| "eval_flow_mse_loss": 0.9064064752826813, |
| "eval_loss": 2.2609525786788223, |
| "flow/cos_sim": 0.7994557322977957, |
| "flow/improvement_ratio": 0.9958853139551972, |
| "flow/mag_ratio_mean": 0.7681991188510903, |
| "flow/mag_ratio_std": 0.12335055292860023, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.30891891309500724, |
| "eval_bleu": 0.564904310555124, |
| "eval_ce_clean_loss": 0.020537127653307626, |
| "eval_ce_pred_loss": 1.7842765613151257, |
| "eval_flow_consistency_loss": 0.17003079746831964, |
| "eval_flow_mse_loss": 0.9064064752826813, |
| "eval_loss": 2.2609525786788223, |
| "eval_runtime": 222.6102, |
| "eval_samples_per_second": 134.765, |
| "eval_steps_per_second": 2.107, |
| "flow/cos_sim": 0.7994557322977957, |
| "flow/improvement_ratio": 0.9958853139551972, |
| "flow/mag_ratio_mean": 0.7681991188510903, |
| "flow/mag_ratio_std": 0.12335055292860023, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.353050186394294, |
| "grad_norm": 0.5962383151054382, |
| "learning_rate": 7.638108666591397e-05, |
| "loss": 2.3131587505340576, |
| "step": 8192 |
| }, |
| { |
| "epoch": 0.353050186394294, |
| "eval_bleu": 0.5698637066625777, |
| "eval_ce_clean_loss": 0.015508384108225673, |
| "eval_ce_pred_loss": 1.7501515601235411, |
| "eval_flow_consistency_loss": 0.15371561949568263, |
| "eval_flow_mse_loss": 0.9227519265370074, |
| "eval_loss": 2.240224189595627, |
| "flow/cos_sim": 0.809604713911695, |
| "flow/improvement_ratio": 0.9950154384316158, |
| "flow/mag_ratio_mean": 0.7853530231061012, |
| "flow/mag_ratio_std": 0.1324605667419525, |
| "step": 8192 |
| }, |
| { |
| "epoch": 0.353050186394294, |
| "eval_bleu": 0.5698637066625777, |
| "eval_ce_clean_loss": 0.015508384108225673, |
| "eval_ce_pred_loss": 1.7501515601235411, |
| "eval_flow_consistency_loss": 0.15371561949568263, |
| "eval_flow_mse_loss": 0.9227519265370074, |
| "eval_loss": 2.240224189595627, |
| "eval_runtime": 221.2743, |
| "eval_samples_per_second": 135.578, |
| "eval_steps_per_second": 2.12, |
| "flow/cos_sim": 0.809604713911695, |
| "flow/improvement_ratio": 0.9950154384316158, |
| "flow/mag_ratio_mean": 0.7853530231061012, |
| "flow/mag_ratio_std": 0.1324605667419525, |
| "step": 8192 |
| }, |
| { |
| "epoch": 0.3971814596935807, |
| "grad_norm": 0.9759780764579773, |
| "learning_rate": 6.997172522088177e-05, |
| "loss": 2.277475118637085, |
| "step": 9216 |
| }, |
| { |
| "epoch": 0.3971814596935807, |
| "eval_bleu": 0.5782382780348954, |
| "eval_ce_clean_loss": 0.012276997226776916, |
| "eval_ce_pred_loss": 1.6780265332030844, |
| "eval_flow_consistency_loss": 0.14418257795163056, |
| "eval_flow_mse_loss": 0.89588076715022, |
| "eval_loss": 2.1548676056140015, |
| "flow/cos_sim": 0.8200861430371494, |
| "flow/improvement_ratio": 0.9951881179169043, |
| "flow/mag_ratio_mean": 0.7937601523867039, |
| "flow/mag_ratio_std": 0.1297300179629946, |
| "step": 9216 |
| }, |
| { |
| "epoch": 0.3971814596935807, |
| "eval_bleu": 0.5782382780348954, |
| "eval_ce_clean_loss": 0.012276997226776916, |
| "eval_ce_pred_loss": 1.6780265332030844, |
| "eval_flow_consistency_loss": 0.14418257795163056, |
| "eval_flow_mse_loss": 0.89588076715022, |
| "eval_loss": 2.1548676056140015, |
| "eval_runtime": 222.088, |
| "eval_samples_per_second": 135.082, |
| "eval_steps_per_second": 2.112, |
| "flow/cos_sim": 0.8200861430371494, |
| "flow/improvement_ratio": 0.9951881179169043, |
| "flow/mag_ratio_mean": 0.7937601523867039, |
| "flow/mag_ratio_std": 0.1297300179629946, |
| "step": 9216 |
| } |
| ], |
| "logging_steps": 1024, |
| "max_steps": 23204, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 1024, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|