| { | |
| "best_metric": 0.2633977234363556, | |
| "best_model_checkpoint": "saved_model/checkpoint-14890", | |
| "epoch": 0.9999664215439374, | |
| "eval_steps": 500, | |
| "global_step": 14890, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 1.986568166554735e-05, | |
| "loss": 0.3718, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.9731363331094694e-05, | |
| "loss": 0.3007, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.959704499664204e-05, | |
| "loss": 0.2808, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 1.946272666218939e-05, | |
| "loss": 0.2593, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 1.9328408327736737e-05, | |
| "loss": 0.2444, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 1.9194089993284085e-05, | |
| "loss": 0.2381, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 1.9059771658831433e-05, | |
| "loss": 0.2374, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 1.892545332437878e-05, | |
| "loss": 0.2278, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 1.8791134989926128e-05, | |
| "loss": 0.2193, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 1.8656816655473473e-05, | |
| "loss": 0.219, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 1.852249832102082e-05, | |
| "loss": 0.2241, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 1.8388179986568168e-05, | |
| "loss": 0.2268, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 1.8253861652115516e-05, | |
| "loss": 0.2135, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 1.8119543317662864e-05, | |
| "loss": 0.2164, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 1.798522498321021e-05, | |
| "loss": 0.214, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 1.785090664875756e-05, | |
| "loss": 0.2042, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 1.7716588314304904e-05, | |
| "loss": 0.2031, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 1.758226997985225e-05, | |
| "loss": 0.2045, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.74479516453996e-05, | |
| "loss": 0.1993, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.7313633310946947e-05, | |
| "loss": 0.2012, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.717931497649429e-05, | |
| "loss": 0.1971, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 1.704499664204164e-05, | |
| "loss": 0.1918, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 1.6910678307588987e-05, | |
| "loss": 0.1972, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.6776359973136335e-05, | |
| "loss": 0.1855, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 1.6642041638683682e-05, | |
| "loss": 0.1873, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 1.650772330423103e-05, | |
| "loss": 0.1807, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 1.6373404969778378e-05, | |
| "loss": 0.1842, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.6239086635325722e-05, | |
| "loss": 0.1968, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 1.610476830087307e-05, | |
| "loss": 0.1786, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.9212894560107455, | |
| "eval_loss": 0.2633977234363556, | |
| "eval_macro_f1": 0.9043313763923932, | |
| "eval_runtime": 742.0543, | |
| "eval_samples_per_second": 80.264, | |
| "eval_steps_per_second": 20.066, | |
| "step": 14890 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 74450, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "total_flos": 6.268568271740928e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |