| { | |
| "best_metric": 0.647897481918335, | |
| "best_model_checkpoint": "./results/run-c08284a0/checkpoint-2891", | |
| "epoch": 4.0, | |
| "global_step": 11564, | |
| "is_hyper_param_search": true, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 2.8353956812177094e-08, | |
| "loss": 2.7583, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 1.3536297892436678e-05, | |
| "loss": 1.4307, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.225493686513294e-05, | |
| "loss": 0.7673, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_f1": 0.5625945861094284, | |
| "eval_loss": 0.647897481918335, | |
| "eval_runtime": 49.2048, | |
| "eval_samples_per_second": 156.753, | |
| "eval_steps_per_second": 19.612, | |
| "step": 2891 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 1.0973575837829205e-05, | |
| "loss": 0.7003, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 9.692214810525468e-06, | |
| "loss": 0.5472, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 8.41085378322173e-06, | |
| "loss": 0.5301, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_f1": 0.5762638185336738, | |
| "eval_loss": 0.6582794785499573, | |
| "eval_runtime": 49.218, | |
| "eval_samples_per_second": 156.711, | |
| "eval_steps_per_second": 19.607, | |
| "step": 5782 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 7.1294927559179925e-06, | |
| "loss": 0.4915, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 5.848131728614256e-06, | |
| "loss": 0.2902, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 4.5667707013105184e-06, | |
| "loss": 0.3177, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_f1": 0.5971968060546914, | |
| "eval_loss": 0.797668993473053, | |
| "eval_runtime": 49.1971, | |
| "eval_samples_per_second": 156.778, | |
| "eval_steps_per_second": 19.615, | |
| "step": 8673 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "learning_rate": 3.285409674006782e-06, | |
| "loss": 0.2443, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 3.46, | |
| "learning_rate": 2.004048646703045e-06, | |
| "loss": 0.1242, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "learning_rate": 7.226876193993077e-07, | |
| "loss": 0.1309, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_f1": 0.6079231608164175, | |
| "eval_loss": 0.9303044676780701, | |
| "eval_runtime": 49.2903, | |
| "eval_samples_per_second": 156.481, | |
| "eval_steps_per_second": 19.578, | |
| "step": 11564 | |
| } | |
| ], | |
| "max_steps": 11564, | |
| "num_train_epochs": 4, | |
| "total_flos": 2.0623921072128e+16, | |
| "trial_name": null, | |
| "trial_params": { | |
| "learning_rate": 1.4176978406088547e-05, | |
| "num_train_epochs": 4, | |
| "per_device_train_batch_size": 8, | |
| "warmup_steps": 500, | |
| "weight_decay": 0.06913192321078417 | |
| } | |
| } | |