File size: 2,695 Bytes
0f92b28 f00b7cc 57f1ae4 f00b7cc 0f92b28 694a3e4 f00b7cc 57f1ae4 694a3e4 f00b7cc 181a7d9 f00b7cc 181a7d9 f00b7cc 181a7d9 f00b7cc 181a7d9 f00b7cc 181a7d9 f00b7cc 181a7d9 f00b7cc 181a7d9 f00b7cc 181a7d9 f00b7cc 181a7d9 f00b7cc 0f92b28 f00b7cc 0f92b28 181a7d9 0f92b28 f00b7cc 0f92b28 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 28,
"global_step": 67,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.12,
"grad_norm": 9.4375,
"learning_rate": 1.8153846153846155e-05,
"loss": 0.4282,
"step": 8
},
{
"epoch": 0.24,
"grad_norm": 10.5,
"learning_rate": 1.5692307692307693e-05,
"loss": 0.4335,
"step": 16
},
{
"epoch": 0.36,
"grad_norm": 5.03125,
"learning_rate": 1.3230769230769231e-05,
"loss": 0.3984,
"step": 24
},
{
"epoch": 0.42,
"eval_accuracy": 0.8166666666666667,
"eval_f1_score": 0.8080459770114942,
"eval_gmean": 0.7501820727030913,
"eval_loss": 0.5816406011581421,
"eval_precision": 0.8133022774327122,
"eval_recall": 0.8166666666666667,
"eval_runtime": 175.4354,
"eval_samples_per_second": 0.342,
"eval_steps_per_second": 0.046,
"step": 28
},
{
"epoch": 0.48,
"grad_norm": 6.625,
"learning_rate": 1.076923076923077e-05,
"loss": 0.3979,
"step": 32
},
{
"epoch": 0.6,
"grad_norm": 7.875,
"learning_rate": 8.307692307692309e-06,
"loss": 0.3657,
"step": 40
},
{
"epoch": 0.72,
"grad_norm": 10.0,
"learning_rate": 5.846153846153847e-06,
"loss": 0.38,
"step": 48
},
{
"epoch": 0.84,
"grad_norm": 9.6875,
"learning_rate": 3.384615384615385e-06,
"loss": 0.4054,
"step": 56
},
{
"epoch": 0.84,
"eval_accuracy": 0.8166666666666667,
"eval_f1_score": 0.8080459770114942,
"eval_gmean": 0.7501820727030913,
"eval_loss": 0.5850911736488342,
"eval_precision": 0.8133022774327122,
"eval_recall": 0.8166666666666667,
"eval_runtime": 173.49,
"eval_samples_per_second": 0.346,
"eval_steps_per_second": 0.046,
"step": 56
},
{
"epoch": 0.96,
"grad_norm": 8.8125,
"learning_rate": 9.230769230769232e-07,
"loss": 0.3864,
"step": 64
},
{
"epoch": 1.0,
"step": 67,
"total_flos": 8.435821078904832e+16,
"train_loss": 0.405215135261194,
"train_runtime": 12957.5785,
"train_samples_per_second": 0.33,
"train_steps_per_second": 0.005
}
],
"logging_steps": 8,
"max_steps": 67,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"total_flos": 8.435821078904832e+16,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}
|