| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.9178082191780823, | |
| "global_step": 10500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.9086757990867582e-05, | |
| "loss": 1.0336, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 1.8173515981735163e-05, | |
| "loss": 0.8372, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 1.726027397260274e-05, | |
| "loss": 0.8389, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 1.634703196347032e-05, | |
| "loss": 0.813, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 1.54337899543379e-05, | |
| "loss": 0.7993, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 1.4520547945205482e-05, | |
| "loss": 0.783, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.360730593607306e-05, | |
| "loss": 0.7708, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 1.2694063926940641e-05, | |
| "loss": 0.7512, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.178082191780822e-05, | |
| "loss": 0.7617, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 1.08675799086758e-05, | |
| "loss": 0.7586, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 0.8747021555900574, | |
| "eval_runtime": 251.8476, | |
| "eval_samples_per_second": 41.97, | |
| "eval_steps_per_second": 2.625, | |
| "step": 5475 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 9.95433789954338e-06, | |
| "loss": 0.737, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 9.04109589041096e-06, | |
| "loss": 0.5544, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 8.127853881278539e-06, | |
| "loss": 0.5876, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 7.214611872146119e-06, | |
| "loss": 0.5841, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 6.301369863013699e-06, | |
| "loss": 0.5772, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 5.388127853881279e-06, | |
| "loss": 0.5743, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 4.4748858447488585e-06, | |
| "loss": 0.5722, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 3.5616438356164386e-06, | |
| "loss": 0.5506, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 2.6484018264840183e-06, | |
| "loss": 0.5588, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 1.7351598173515982e-06, | |
| "loss": 0.5428, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 8.219178082191781e-07, | |
| "loss": 0.563, | |
| "step": 10500 | |
| } | |
| ], | |
| "max_steps": 10950, | |
| "num_train_epochs": 2, | |
| "total_flos": 3.292319537616845e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |