{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.30891891309500724, "eval_steps": 1024, "global_step": 7168, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04413127329928675, "grad_norm": 0.7938550114631653, "learning_rate": 9.990234375e-05, "loss": 12.336601257324219, "step": 1024 }, { "epoch": 0.04413127329928675, "eval_bleu": 0.16407143517604253, "eval_ce_clean_loss": 1.724545789679993, "eval_ce_pred_loss": 5.058502770690267, "eval_flow_mse_loss": 0.929569506823127, "eval_loss": 6.195067207442164, "flow/cos_sim": 0.055166260543853235, "flow/improvement_ratio": 0.596057767425773, "flow/mag_ratio_mean": 0.4713213621680416, "flow/mag_ratio_std": 0.4101032533371118, "step": 1024 }, { "epoch": 0.04413127329928675, "eval_bleu": 0.16407143517604253, "eval_ce_clean_loss": 1.724545789679993, "eval_ce_pred_loss": 5.058502770690267, "eval_flow_mse_loss": 0.929569506823127, "eval_loss": 6.195067207442164, "eval_runtime": 212.5631, "eval_samples_per_second": 141.135, "eval_steps_per_second": 2.206, "flow/cos_sim": 0.055166260543853235, "flow/improvement_ratio": 0.596057767425773, "flow/mag_ratio_mean": 0.4713213621680416, "flow/mag_ratio_std": 0.4101032533371118, "step": 1024 }, { "epoch": 0.0882625465985735, "grad_norm": 0.450653612613678, "learning_rate": 9.9476028157316e-05, "loss": 4.612443923950195, "step": 2048 }, { "epoch": 0.0882625465985735, "eval_bleu": 0.3321661463154056, "eval_ce_clean_loss": 0.2987517699885216, "eval_ce_pred_loss": 3.625808963389285, "eval_flow_mse_loss": 1.015768651387839, "eval_loss": 3.8525866531868225, "flow/cos_sim": 0.1616891078921015, "flow/improvement_ratio": 0.7423238582702588, "flow/mag_ratio_mean": 0.6468003783017587, "flow/mag_ratio_std": 0.5354506893834071, "step": 2048 }, { "epoch": 0.0882625465985735, "eval_bleu": 0.3321661463154056, "eval_ce_clean_loss": 0.2987517699885216, "eval_ce_pred_loss": 3.625808963389285, "eval_flow_mse_loss": 1.015768651387839, "eval_loss": 3.8525866531868225, "eval_runtime": 208.1773, "eval_samples_per_second": 144.108, "eval_steps_per_second": 2.253, "flow/cos_sim": 0.1616891078921015, "flow/improvement_ratio": 0.7423238582702588, "flow/mag_ratio_mean": 0.6468003783017587, "flow/mag_ratio_std": 0.5354506893834071, "step": 2048 }, { "epoch": 0.13239381989786023, "grad_norm": 0.37855076789855957, "learning_rate": 9.791307026072513e-05, "loss": 3.617213010787964, "step": 3072 }, { "epoch": 0.13239381989786023, "eval_bleu": 0.3836053070830755, "eval_ce_clean_loss": 0.12278842976860908, "eval_ce_pred_loss": 3.1099086214484437, "eval_flow_mse_loss": 1.0722369169121357, "eval_loss": 3.3719613491090885, "flow/cos_sim": 0.22163581698815196, "flow/improvement_ratio": 0.8109787233602772, "flow/mag_ratio_mean": 0.6704668670829171, "flow/mag_ratio_std": 0.4992133626805694, "step": 3072 }, { "epoch": 0.13239381989786023, "eval_bleu": 0.3836053070830755, "eval_ce_clean_loss": 0.12278842976860908, "eval_ce_pred_loss": 3.1099086214484437, "eval_flow_mse_loss": 1.0722369169121357, "eval_loss": 3.3719613491090885, "eval_runtime": 209.1152, "eval_samples_per_second": 143.462, "eval_steps_per_second": 2.243, "flow/cos_sim": 0.22163581698815196, "flow/improvement_ratio": 0.8109787233602772, "flow/mag_ratio_mean": 0.6704668670829171, "flow/mag_ratio_std": 0.4992133626805694, "step": 3072 }, { "epoch": 0.176525093197147, "grad_norm": 0.4389702081680298, "learning_rate": 9.53439476074686e-05, "loss": 3.2931454181671143, "step": 4096 }, { "epoch": 0.176525093197147, "eval_bleu": 0.40933125434293716, "eval_ce_clean_loss": 0.06711880822997612, "eval_ce_pred_loss": 2.8558058159183592, "eval_flow_mse_loss": 1.081392692604553, "eval_loss": 3.147575543125047, "flow/cos_sim": 0.25172679529769587, "flow/improvement_ratio": 0.8514864339248966, "flow/mag_ratio_mean": 0.6699917584594125, "flow/mag_ratio_std": 0.44682612055654464, "step": 4096 }, { "epoch": 0.176525093197147, "eval_bleu": 0.40933125434293716, "eval_ce_clean_loss": 0.06711880822997612, "eval_ce_pred_loss": 2.8558058159183592, "eval_flow_mse_loss": 1.081392692604553, "eval_loss": 3.147575543125047, "eval_runtime": 210.459, "eval_samples_per_second": 142.546, "eval_steps_per_second": 2.228, "flow/cos_sim": 0.25172679529769587, "flow/improvement_ratio": 0.8514864339248966, "flow/mag_ratio_mean": 0.6699917584594125, "flow/mag_ratio_std": 0.44682612055654464, "step": 4096 }, { "epoch": 0.22065636649643372, "grad_norm": 0.44552379846572876, "learning_rate": 9.182261125213742e-05, "loss": 3.127476692199707, "step": 5120 }, { "epoch": 0.22065636649643372, "eval_bleu": 0.42390360625279244, "eval_ce_clean_loss": 0.04179192618377554, "eval_ce_pred_loss": 2.713604238495898, "eval_flow_mse_loss": 1.0599637749606867, "eval_loss": 3.00127863375617, "flow/cos_sim": 0.25444072618413327, "flow/improvement_ratio": 0.8746775842424649, "flow/mag_ratio_mean": 0.671740317395501, "flow/mag_ratio_std": 0.40047251164659, "step": 5120 }, { "epoch": 0.22065636649643372, "eval_bleu": 0.42390360625279244, "eval_ce_clean_loss": 0.04179192618377554, "eval_ce_pred_loss": 2.713604238495898, "eval_flow_mse_loss": 1.0599637749606867, "eval_loss": 3.00127863375617, "eval_runtime": 211.2198, "eval_samples_per_second": 142.032, "eval_steps_per_second": 2.22, "flow/cos_sim": 0.25444072618413327, "flow/improvement_ratio": 0.8746775842424649, "flow/mag_ratio_mean": 0.671740317395501, "flow/mag_ratio_std": 0.40047251164659, "step": 5120 }, { "epoch": 0.26478763979572045, "grad_norm": 0.4731499254703522, "learning_rate": 8.742300854391668e-05, "loss": 3.012479782104492, "step": 6144 }, { "epoch": 0.26478763979572045, "eval_bleu": 0.4346563578061454, "eval_ce_clean_loss": 0.028318230050808586, "eval_ce_pred_loss": 2.6327081789085858, "eval_flow_mse_loss": 1.056245713854141, "eval_loss": 2.927459636985112, "flow/cos_sim": 0.2519478304808074, "flow/improvement_ratio": 0.8869821624969368, "flow/mag_ratio_mean": 0.6784741002867725, "flow/mag_ratio_std": 0.37580890851869764, "step": 6144 }, { "epoch": 0.26478763979572045, "eval_bleu": 0.4346563578061454, "eval_ce_clean_loss": 0.028318230050808586, "eval_ce_pred_loss": 2.6327081789085858, "eval_flow_mse_loss": 1.056245713854141, "eval_loss": 2.927459636985112, "eval_runtime": 211.7504, "eval_samples_per_second": 141.676, "eval_steps_per_second": 2.215, "flow/cos_sim": 0.2519478304808074, "flow/improvement_ratio": 0.8869821624969368, "flow/mag_ratio_mean": 0.6784741002867725, "flow/mag_ratio_std": 0.37580890851869764, "step": 6144 }, { "epoch": 0.30891891309500724, "grad_norm": 0.6240633726119995, "learning_rate": 8.223753024725232e-05, "loss": 2.9197511672973633, "step": 7168 }, { "epoch": 0.30891891309500724, "eval_bleu": 0.44790457353944046, "eval_ce_clean_loss": 0.020565879328656934, "eval_ce_pred_loss": 2.529554044768246, "eval_flow_mse_loss": 1.036346342009522, "eval_loss": 2.8276000312650638, "flow/cos_sim": 0.24807281547517918, "flow/improvement_ratio": 0.8986854375298343, "flow/mag_ratio_mean": 0.6740282399059613, "flow/mag_ratio_std": 0.3316722640287139, "step": 7168 }, { "epoch": 0.30891891309500724, "eval_bleu": 0.44790457353944046, "eval_ce_clean_loss": 0.020565879328656934, "eval_ce_pred_loss": 2.529554044768246, "eval_flow_mse_loss": 1.036346342009522, "eval_loss": 2.8276000312650638, "eval_runtime": 211.5611, "eval_samples_per_second": 141.803, "eval_steps_per_second": 2.217, "flow/cos_sim": 0.24807281547517918, "flow/improvement_ratio": 0.8986854375298343, "flow/mag_ratio_mean": 0.6740282399059613, "flow/mag_ratio_std": 0.3316722640287139, "step": 7168 } ], "logging_steps": 1024, "max_steps": 23204, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1024, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }