{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.2482223658694247, "eval_steps": 256, "global_step": 768, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04137039431157078, "grad_norm": 95.44776916503906, "learning_rate": 9.773140025240866e-09, "loss": 16.2614, "step": 128 }, { "epoch": 0.08274078862314156, "grad_norm": 81.94233703613281, "learning_rate": 1.9623233908948195e-08, "loss": 16.1329, "step": 256 }, { "epoch": 0.08274078862314156, "eval_bleu": 0.0, "eval_cap_loss": 9.05574489775158, "eval_con_loss": 3.4572216215587797, "eval_loss": 15.97018808031839, "step": 256 }, { "epoch": 0.08274078862314156, "eval_bleu": 0.0, "eval_cap_loss": 9.05574489775158, "eval_con_loss": 3.4572216215587797, "eval_loss": 15.97018808031839, "eval_runtime": 170.2059, "eval_samples_per_second": 5.875, "eval_steps_per_second": 0.37, "step": 256 }, { "epoch": 0.12411118293471235, "grad_norm": 92.045166015625, "learning_rate": 2.9473327792655523e-08, "loss": 15.8603, "step": 384 }, { "epoch": 0.16548157724628312, "grad_norm": 67.55699920654297, "learning_rate": 3.9323421676362855e-08, "loss": 15.4976, "step": 512 }, { "epoch": 0.16548157724628312, "eval_bleu": 0.0, "eval_cap_loss": 8.317013150169736, "eval_con_loss": 3.4565826900421626, "eval_loss": 15.230178530254062, "step": 512 }, { "epoch": 0.16548157724628312, "eval_bleu": 0.0, "eval_cap_loss": 8.317013150169736, "eval_con_loss": 3.4565826900421626, "eval_loss": 15.230178530254062, "eval_runtime": 169.6131, "eval_samples_per_second": 5.896, "eval_steps_per_second": 0.371, "step": 512 }, { "epoch": 0.2068519715578539, "grad_norm": 68.02971649169922, "learning_rate": 4.917351556007019e-08, "loss": 15.0425, "step": 640 }, { "epoch": 0.2482223658694247, "grad_norm": 63.56317138671875, "learning_rate": 5.902360944377751e-08, "loss": 14.5238, "step": 768 }, { "epoch": 0.2482223658694247, "eval_bleu": 0.0009372288320443345, "eval_cap_loss": 7.270466600145612, "eval_con_loss": 3.45575193374876, "eval_loss": 14.181970460074288, "step": 768 }, { "epoch": 0.2482223658694247, "eval_bleu": 0.0009372288320443345, "eval_cap_loss": 7.270466600145612, "eval_con_loss": 3.45575193374876, "eval_loss": 14.181970460074288, "eval_runtime": 168.362, "eval_samples_per_second": 5.94, "eval_steps_per_second": 0.374, "step": 768 } ], "logging_steps": 128, "max_steps": 4331600, "num_input_tokens_seen": 0, "num_train_epochs": 1400, "save_steps": 256, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }