| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 9054, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.723878948531036e-05, | |
| "loss": 2.7103, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.447757897062072e-05, | |
| "loss": 1.9564, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.171636845593108e-05, | |
| "loss": 1.8423, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 3.895515794124144e-05, | |
| "loss": 1.778, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 3.61939474265518e-05, | |
| "loss": 1.7212, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 3.343273691186216e-05, | |
| "loss": 1.6849, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 3.0671526397172526e-05, | |
| "loss": 1.6508, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 2.791031588248288e-05, | |
| "loss": 1.6336, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 2.5149105367793242e-05, | |
| "loss": 1.6204, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 2.2387894853103602e-05, | |
| "loss": 1.6058, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 1.962668433841396e-05, | |
| "loss": 1.5936, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 1.686547382372432e-05, | |
| "loss": 1.5766, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 1.4104263309034682e-05, | |
| "loss": 1.5578, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 1.1343052794345042e-05, | |
| "loss": 1.548, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 8.581842279655401e-06, | |
| "loss": 1.5482, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 5.820631764965762e-06, | |
| "loss": 1.5412, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 3.059421250276121e-06, | |
| "loss": 1.5365, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 2.982107355864811e-07, | |
| "loss": 1.535, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 9054, | |
| "total_flos": 7.624223298839347e+16, | |
| "train_loss": 1.70131384965624, | |
| "train_runtime": 4587.4305, | |
| "train_samples_per_second": 63.145, | |
| "train_steps_per_second": 1.974 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 9054, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "total_flos": 7.624223298839347e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |