File size: 2,573 Bytes
0d82ec1 0a52e9f 15ca0d9 0a52e9f 0d82ec1 943b793 15ca0d9 0d82ec1 943b793 15ca0d9 943b793 15ca0d9 0d82ec1 943b793 15ca0d9 943b793 15ca0d9 0d82ec1 15ca0d9 7e59b56 0a52e9f 0d82ec1 943b793 0d82ec1 943b793 15ca0d9 0d82ec1 0a52e9f 0d82ec1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.5555555555555556,
"eval_steps": 20,
"global_step": 60,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.009259259259259259,
"eval_loss": 2.1662161350250244,
"eval_runtime": 31.2323,
"eval_samples_per_second": 48.059,
"eval_steps_per_second": 6.019,
"step": 1
},
{
"epoch": 0.09259259259259259,
"grad_norm": 2.140625,
"learning_rate": 6.666666666666667e-05,
"loss": 1.8573,
"step": 10
},
{
"epoch": 0.18518518518518517,
"grad_norm": 1.5390625,
"learning_rate": 0.00013333333333333334,
"loss": 1.8314,
"step": 20
},
{
"epoch": 0.18518518518518517,
"eval_loss": 1.7036446332931519,
"eval_runtime": 27.1456,
"eval_samples_per_second": 55.294,
"eval_steps_per_second": 6.926,
"step": 20
},
{
"epoch": 0.2777777777777778,
"grad_norm": 1.765625,
"learning_rate": 0.0002,
"loss": 1.8651,
"step": 30
},
{
"epoch": 0.37037037037037035,
"grad_norm": 1.5390625,
"learning_rate": 0.00019984815164333163,
"loss": 1.9748,
"step": 40
},
{
"epoch": 0.37037037037037035,
"eval_loss": 1.8448469638824463,
"eval_runtime": 42.3761,
"eval_samples_per_second": 35.421,
"eval_steps_per_second": 4.436,
"step": 40
},
{
"epoch": 0.46296296296296297,
"grad_norm": 1.25,
"learning_rate": 0.00019939306773179497,
"loss": 2.0039,
"step": 50
},
{
"epoch": 0.5555555555555556,
"grad_norm": 1.25,
"learning_rate": 0.00019863613034027224,
"loss": 2.0413,
"step": 60
},
{
"epoch": 0.5555555555555556,
"eval_loss": 1.89992094039917,
"eval_runtime": 42.6851,
"eval_samples_per_second": 35.164,
"eval_steps_per_second": 4.404,
"step": 60
}
],
"logging_steps": 10,
"max_steps": 600,
"num_input_tokens_seen": 0,
"num_train_epochs": 6,
"save_steps": 20,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 3.897208656730522e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}
|