| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.30891891309500724, |
| "eval_steps": 1024, |
| "global_step": 7168, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.04413127329928675, |
| "grad_norm": 0.7938550114631653, |
| "learning_rate": 9.990234375e-05, |
| "loss": 12.336601257324219, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.04413127329928675, |
| "eval_bleu": 0.16407143517604253, |
| "eval_ce_clean_loss": 1.724545789679993, |
| "eval_ce_pred_loss": 5.058502770690267, |
| "eval_flow_mse_loss": 0.929569506823127, |
| "eval_loss": 6.195067207442164, |
| "flow/cos_sim": 0.055166260543853235, |
| "flow/improvement_ratio": 0.596057767425773, |
| "flow/mag_ratio_mean": 0.4713213621680416, |
| "flow/mag_ratio_std": 0.4101032533371118, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.04413127329928675, |
| "eval_bleu": 0.16407143517604253, |
| "eval_ce_clean_loss": 1.724545789679993, |
| "eval_ce_pred_loss": 5.058502770690267, |
| "eval_flow_mse_loss": 0.929569506823127, |
| "eval_loss": 6.195067207442164, |
| "eval_runtime": 212.5631, |
| "eval_samples_per_second": 141.135, |
| "eval_steps_per_second": 2.206, |
| "flow/cos_sim": 0.055166260543853235, |
| "flow/improvement_ratio": 0.596057767425773, |
| "flow/mag_ratio_mean": 0.4713213621680416, |
| "flow/mag_ratio_std": 0.4101032533371118, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.0882625465985735, |
| "grad_norm": 0.450653612613678, |
| "learning_rate": 9.9476028157316e-05, |
| "loss": 4.612443923950195, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.0882625465985735, |
| "eval_bleu": 0.3321661463154056, |
| "eval_ce_clean_loss": 0.2987517699885216, |
| "eval_ce_pred_loss": 3.625808963389285, |
| "eval_flow_mse_loss": 1.015768651387839, |
| "eval_loss": 3.8525866531868225, |
| "flow/cos_sim": 0.1616891078921015, |
| "flow/improvement_ratio": 0.7423238582702588, |
| "flow/mag_ratio_mean": 0.6468003783017587, |
| "flow/mag_ratio_std": 0.5354506893834071, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.0882625465985735, |
| "eval_bleu": 0.3321661463154056, |
| "eval_ce_clean_loss": 0.2987517699885216, |
| "eval_ce_pred_loss": 3.625808963389285, |
| "eval_flow_mse_loss": 1.015768651387839, |
| "eval_loss": 3.8525866531868225, |
| "eval_runtime": 208.1773, |
| "eval_samples_per_second": 144.108, |
| "eval_steps_per_second": 2.253, |
| "flow/cos_sim": 0.1616891078921015, |
| "flow/improvement_ratio": 0.7423238582702588, |
| "flow/mag_ratio_mean": 0.6468003783017587, |
| "flow/mag_ratio_std": 0.5354506893834071, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.13239381989786023, |
| "grad_norm": 0.37855076789855957, |
| "learning_rate": 9.791307026072513e-05, |
| "loss": 3.617213010787964, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.13239381989786023, |
| "eval_bleu": 0.3836053070830755, |
| "eval_ce_clean_loss": 0.12278842976860908, |
| "eval_ce_pred_loss": 3.1099086214484437, |
| "eval_flow_mse_loss": 1.0722369169121357, |
| "eval_loss": 3.3719613491090885, |
| "flow/cos_sim": 0.22163581698815196, |
| "flow/improvement_ratio": 0.8109787233602772, |
| "flow/mag_ratio_mean": 0.6704668670829171, |
| "flow/mag_ratio_std": 0.4992133626805694, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.13239381989786023, |
| "eval_bleu": 0.3836053070830755, |
| "eval_ce_clean_loss": 0.12278842976860908, |
| "eval_ce_pred_loss": 3.1099086214484437, |
| "eval_flow_mse_loss": 1.0722369169121357, |
| "eval_loss": 3.3719613491090885, |
| "eval_runtime": 209.1152, |
| "eval_samples_per_second": 143.462, |
| "eval_steps_per_second": 2.243, |
| "flow/cos_sim": 0.22163581698815196, |
| "flow/improvement_ratio": 0.8109787233602772, |
| "flow/mag_ratio_mean": 0.6704668670829171, |
| "flow/mag_ratio_std": 0.4992133626805694, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.176525093197147, |
| "grad_norm": 0.4389702081680298, |
| "learning_rate": 9.53439476074686e-05, |
| "loss": 3.2931454181671143, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.176525093197147, |
| "eval_bleu": 0.40933125434293716, |
| "eval_ce_clean_loss": 0.06711880822997612, |
| "eval_ce_pred_loss": 2.8558058159183592, |
| "eval_flow_mse_loss": 1.081392692604553, |
| "eval_loss": 3.147575543125047, |
| "flow/cos_sim": 0.25172679529769587, |
| "flow/improvement_ratio": 0.8514864339248966, |
| "flow/mag_ratio_mean": 0.6699917584594125, |
| "flow/mag_ratio_std": 0.44682612055654464, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.176525093197147, |
| "eval_bleu": 0.40933125434293716, |
| "eval_ce_clean_loss": 0.06711880822997612, |
| "eval_ce_pred_loss": 2.8558058159183592, |
| "eval_flow_mse_loss": 1.081392692604553, |
| "eval_loss": 3.147575543125047, |
| "eval_runtime": 210.459, |
| "eval_samples_per_second": 142.546, |
| "eval_steps_per_second": 2.228, |
| "flow/cos_sim": 0.25172679529769587, |
| "flow/improvement_ratio": 0.8514864339248966, |
| "flow/mag_ratio_mean": 0.6699917584594125, |
| "flow/mag_ratio_std": 0.44682612055654464, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.22065636649643372, |
| "grad_norm": 0.44552379846572876, |
| "learning_rate": 9.182261125213742e-05, |
| "loss": 3.127476692199707, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.22065636649643372, |
| "eval_bleu": 0.42390360625279244, |
| "eval_ce_clean_loss": 0.04179192618377554, |
| "eval_ce_pred_loss": 2.713604238495898, |
| "eval_flow_mse_loss": 1.0599637749606867, |
| "eval_loss": 3.00127863375617, |
| "flow/cos_sim": 0.25444072618413327, |
| "flow/improvement_ratio": 0.8746775842424649, |
| "flow/mag_ratio_mean": 0.671740317395501, |
| "flow/mag_ratio_std": 0.40047251164659, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.22065636649643372, |
| "eval_bleu": 0.42390360625279244, |
| "eval_ce_clean_loss": 0.04179192618377554, |
| "eval_ce_pred_loss": 2.713604238495898, |
| "eval_flow_mse_loss": 1.0599637749606867, |
| "eval_loss": 3.00127863375617, |
| "eval_runtime": 211.2198, |
| "eval_samples_per_second": 142.032, |
| "eval_steps_per_second": 2.22, |
| "flow/cos_sim": 0.25444072618413327, |
| "flow/improvement_ratio": 0.8746775842424649, |
| "flow/mag_ratio_mean": 0.671740317395501, |
| "flow/mag_ratio_std": 0.40047251164659, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.26478763979572045, |
| "grad_norm": 0.4731499254703522, |
| "learning_rate": 8.742300854391668e-05, |
| "loss": 3.012479782104492, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.26478763979572045, |
| "eval_bleu": 0.4346563578061454, |
| "eval_ce_clean_loss": 0.028318230050808586, |
| "eval_ce_pred_loss": 2.6327081789085858, |
| "eval_flow_mse_loss": 1.056245713854141, |
| "eval_loss": 2.927459636985112, |
| "flow/cos_sim": 0.2519478304808074, |
| "flow/improvement_ratio": 0.8869821624969368, |
| "flow/mag_ratio_mean": 0.6784741002867725, |
| "flow/mag_ratio_std": 0.37580890851869764, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.26478763979572045, |
| "eval_bleu": 0.4346563578061454, |
| "eval_ce_clean_loss": 0.028318230050808586, |
| "eval_ce_pred_loss": 2.6327081789085858, |
| "eval_flow_mse_loss": 1.056245713854141, |
| "eval_loss": 2.927459636985112, |
| "eval_runtime": 211.7504, |
| "eval_samples_per_second": 141.676, |
| "eval_steps_per_second": 2.215, |
| "flow/cos_sim": 0.2519478304808074, |
| "flow/improvement_ratio": 0.8869821624969368, |
| "flow/mag_ratio_mean": 0.6784741002867725, |
| "flow/mag_ratio_std": 0.37580890851869764, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.30891891309500724, |
| "grad_norm": 0.6240633726119995, |
| "learning_rate": 8.223753024725232e-05, |
| "loss": 2.9197511672973633, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.30891891309500724, |
| "eval_bleu": 0.44790457353944046, |
| "eval_ce_clean_loss": 0.020565879328656934, |
| "eval_ce_pred_loss": 2.529554044768246, |
| "eval_flow_mse_loss": 1.036346342009522, |
| "eval_loss": 2.8276000312650638, |
| "flow/cos_sim": 0.24807281547517918, |
| "flow/improvement_ratio": 0.8986854375298343, |
| "flow/mag_ratio_mean": 0.6740282399059613, |
| "flow/mag_ratio_std": 0.3316722640287139, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.30891891309500724, |
| "eval_bleu": 0.44790457353944046, |
| "eval_ce_clean_loss": 0.020565879328656934, |
| "eval_ce_pred_loss": 2.529554044768246, |
| "eval_flow_mse_loss": 1.036346342009522, |
| "eval_loss": 2.8276000312650638, |
| "eval_runtime": 211.5611, |
| "eval_samples_per_second": 141.803, |
| "eval_steps_per_second": 2.217, |
| "flow/cos_sim": 0.24807281547517918, |
| "flow/improvement_ratio": 0.8986854375298343, |
| "flow/mag_ratio_mean": 0.6740282399059613, |
| "flow/mag_ratio_std": 0.3316722640287139, |
| "step": 7168 |
| } |
| ], |
| "logging_steps": 1024, |
| "max_steps": 23204, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 1024, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|