{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 512, "global_step": 2025, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.12641975308641976, "grad_norm": 0.015813976526260376, "learning_rate": 0.000498046875, "loss": 0.44495490193367004, "step": 256 }, { "epoch": 0.2528395061728395, "grad_norm": 0.006338431965559721, "learning_rate": 0.000998046875, "loss": 0.016131538897752762, "step": 512 }, { "epoch": 0.2528395061728395, "eval_bleu": 0.9709026962307936, "eval_loss": 0.014414189542393858, "eval_mse_loss": 0.014414189542393858, "step": 512 }, { "epoch": 0.2528395061728395, "eval_bleu": 0.9709026962307936, "eval_loss": 0.014414189542393858, "eval_mse_loss": 0.014414189542393858, "eval_runtime": 8.5639, "eval_samples_per_second": 305.702, "eval_steps_per_second": 4.788, "step": 512 }, { "epoch": 0.37925925925925924, "grad_norm": 0.005291212350130081, "learning_rate": 0.0009315344337660421, "loss": 0.013332298956811428, "step": 768 }, { "epoch": 0.505679012345679, "grad_norm": 0.004620287101715803, "learning_rate": 0.0007439821899385376, "loss": 0.012249683029949665, "step": 1024 }, { "epoch": 0.505679012345679, "eval_bleu": 0.9708651851802215, "eval_loss": 0.012245714698532006, "eval_mse_loss": 0.012245714698532006, "step": 1024 }, { "epoch": 0.505679012345679, "eval_bleu": 0.9708651851802215, "eval_loss": 0.012245714698532006, "eval_mse_loss": 0.012245714698532006, "eval_runtime": 7.9385, "eval_samples_per_second": 329.784, "eval_steps_per_second": 5.165, "step": 1024 }, { "epoch": 0.6320987654320988, "grad_norm": 0.004163551609963179, "learning_rate": 0.0004890997654891032, "loss": 0.011324185878038406, "step": 1280 }, { "epoch": 0.7585185185185185, "grad_norm": 0.004867972806096077, "learning_rate": 0.00023722540797531234, "loss": 0.01125150453299284, "step": 1536 }, { "epoch": 0.7585185185185185, "eval_bleu": 0.9709484876437786, "eval_loss": 0.010418103053802398, "eval_mse_loss": 0.010418103053802398, "step": 1536 }, { "epoch": 0.7585185185185185, "eval_bleu": 0.9709484876437786, "eval_loss": 0.010418103053802398, "eval_mse_loss": 0.010418103053802398, "eval_runtime": 7.0369, "eval_samples_per_second": 372.037, "eval_steps_per_second": 5.826, "step": 1536 }, { "epoch": 0.8849382716049383, "grad_norm": 0.005268606822937727, "learning_rate": 5.786724825584927e-05, "loss": 0.011080899275839329, "step": 1792 } ], "logging_steps": 256, "max_steps": 2025, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 512, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }