File size: 2,612 Bytes
9003bbb 00fcad7 9003bbb 00fcad7 9003bbb 00fcad7 9003bbb 00fcad7 9003bbb 00fcad7 9003bbb 00fcad7 9003bbb 00fcad7 9003bbb 00fcad7 9003bbb 00fcad7 9003bbb 00fcad7 9003bbb 00fcad7 9003bbb 00fcad7 9003bbb 00fcad7 9003bbb 00fcad7 9003bbb 00fcad7 9003bbb 00fcad7 9003bbb 00fcad7 9003bbb 00fcad7 9003bbb 00fcad7 9003bbb 00fcad7 9003bbb 00fcad7 9003bbb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.4271844660194173,
"eval_steps": 500,
"global_step": 500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.24271844660194175,
"grad_norm": 2.8362834453582764,
"learning_rate": 4.960355987055016e-05,
"loss": 6.0742,
"step": 50
},
{
"epoch": 0.4854368932038835,
"grad_norm": 2.9484496116638184,
"learning_rate": 4.9199029126213595e-05,
"loss": 5.7218,
"step": 100
},
{
"epoch": 0.7281553398058253,
"grad_norm": 3.542572259902954,
"learning_rate": 4.879449838187702e-05,
"loss": 5.4227,
"step": 150
},
{
"epoch": 0.970873786407767,
"grad_norm": 4.634098052978516,
"learning_rate": 4.8389967637540455e-05,
"loss": 5.1655,
"step": 200
},
{
"epoch": 1.2135922330097086,
"grad_norm": 5.699330806732178,
"learning_rate": 4.798543689320388e-05,
"loss": 4.8009,
"step": 250
},
{
"epoch": 1.4563106796116505,
"grad_norm": 5.711933135986328,
"learning_rate": 4.7580906148867315e-05,
"loss": 4.607,
"step": 300
},
{
"epoch": 1.6990291262135924,
"grad_norm": 5.113691806793213,
"learning_rate": 4.717637540453075e-05,
"loss": 4.462,
"step": 350
},
{
"epoch": 1.941747572815534,
"grad_norm": 5.632521152496338,
"learning_rate": 4.6771844660194174e-05,
"loss": 4.3695,
"step": 400
},
{
"epoch": 2.1844660194174756,
"grad_norm": 5.428906440734863,
"learning_rate": 4.636731391585761e-05,
"loss": 4.1549,
"step": 450
},
{
"epoch": 2.4271844660194173,
"grad_norm": 5.037013530731201,
"learning_rate": 4.596278317152104e-05,
"loss": 4.0921,
"step": 500
}
],
"logging_steps": 50,
"max_steps": 6180,
"num_input_tokens_seen": 0,
"num_train_epochs": 30,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 232176746496000.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}
|