| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 512, |
| "global_step": 2025, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.12641975308641976, |
| "grad_norm": 0.015813976526260376, |
| "learning_rate": 0.000498046875, |
| "loss": 0.44495490193367004, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.2528395061728395, |
| "grad_norm": 0.006338431965559721, |
| "learning_rate": 0.000998046875, |
| "loss": 0.016131538897752762, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.2528395061728395, |
| "eval_bleu": 0.9709026962307936, |
| "eval_loss": 0.014414189542393858, |
| "eval_mse_loss": 0.014414189542393858, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.2528395061728395, |
| "eval_bleu": 0.9709026962307936, |
| "eval_loss": 0.014414189542393858, |
| "eval_mse_loss": 0.014414189542393858, |
| "eval_runtime": 8.5639, |
| "eval_samples_per_second": 305.702, |
| "eval_steps_per_second": 4.788, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.37925925925925924, |
| "grad_norm": 0.005291212350130081, |
| "learning_rate": 0.0009315344337660421, |
| "loss": 0.013332298956811428, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.505679012345679, |
| "grad_norm": 0.004620287101715803, |
| "learning_rate": 0.0007439821899385376, |
| "loss": 0.012249683029949665, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.505679012345679, |
| "eval_bleu": 0.9708651851802215, |
| "eval_loss": 0.012245714698532006, |
| "eval_mse_loss": 0.012245714698532006, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.505679012345679, |
| "eval_bleu": 0.9708651851802215, |
| "eval_loss": 0.012245714698532006, |
| "eval_mse_loss": 0.012245714698532006, |
| "eval_runtime": 7.9385, |
| "eval_samples_per_second": 329.784, |
| "eval_steps_per_second": 5.165, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.6320987654320988, |
| "grad_norm": 0.004163551609963179, |
| "learning_rate": 0.0004890997654891032, |
| "loss": 0.011324185878038406, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.7585185185185185, |
| "grad_norm": 0.004867972806096077, |
| "learning_rate": 0.00023722540797531234, |
| "loss": 0.01125150453299284, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.7585185185185185, |
| "eval_bleu": 0.9709484876437786, |
| "eval_loss": 0.010418103053802398, |
| "eval_mse_loss": 0.010418103053802398, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.7585185185185185, |
| "eval_bleu": 0.9709484876437786, |
| "eval_loss": 0.010418103053802398, |
| "eval_mse_loss": 0.010418103053802398, |
| "eval_runtime": 7.0369, |
| "eval_samples_per_second": 372.037, |
| "eval_steps_per_second": 5.826, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.8849382716049383, |
| "grad_norm": 0.005268606822937727, |
| "learning_rate": 5.786724825584927e-05, |
| "loss": 0.011080899275839329, |
| "step": 1792 |
| } |
| ], |
| "logging_steps": 256, |
| "max_steps": 2025, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 512, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|