File size: 2,373 Bytes
e5f0f5a 2231611 e5f0f5a 2231611 a1a87bd 2231611 a1a87bd e5f0f5a 2231611 a1a87bd 2231611 a1a87bd e5f0f5a 2231611 a1a87bd 2231611 a1a87bd e5f0f5a 2231611 a1a87bd 2231611 a1a87bd e5f0f5a 2231611 a1a87bd 2231611 a1a87bd 2231611 a1a87bd 2231611 a1a87bd 2231611 a1a87bd 2231611 a1a87bd 2231611 a1a87bd 2231611 a1a87bd 2231611 e5f0f5a 2231611 a1a87bd 2231611 a1a87bd e5f0f5a 2231611 e5f0f5a a1a87bd 2231611 e5f0f5a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 81,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.12345679012345678,
"grad_norm": 18.884857177734375,
"learning_rate": 8.765432098765433e-05,
"loss": 11.7134,
"step": 10
},
{
"epoch": 0.24691358024691357,
"grad_norm": 1.8587737083435059,
"learning_rate": 7.530864197530865e-05,
"loss": 1.5567,
"step": 20
},
{
"epoch": 0.37037037037037035,
"grad_norm": 0.13458853960037231,
"learning_rate": 6.296296296296296e-05,
"loss": 0.0378,
"step": 30
},
{
"epoch": 0.49382716049382713,
"grad_norm": 0.05857311934232712,
"learning_rate": 5.061728395061729e-05,
"loss": 0.0212,
"step": 40
},
{
"epoch": 0.6172839506172839,
"grad_norm": 0.009554409421980381,
"learning_rate": 3.82716049382716e-05,
"loss": 0.0069,
"step": 50
},
{
"epoch": 0.7407407407407407,
"grad_norm": 0.0058960807509720325,
"learning_rate": 2.5925925925925925e-05,
"loss": 0.0027,
"step": 60
},
{
"epoch": 0.8641975308641975,
"grad_norm": 0.0033985786139965057,
"learning_rate": 1.3580246913580247e-05,
"loss": 0.0026,
"step": 70
},
{
"epoch": 0.9876543209876543,
"grad_norm": 0.002752589527517557,
"learning_rate": 1.234567901234568e-06,
"loss": 0.0026,
"step": 80
},
{
"epoch": 1.0,
"step": 81,
"total_flos": 1.6678153976020992e+16,
"train_loss": 0.0,
"train_runtime": 0.009,
"train_samples_per_second": 18069.762,
"train_steps_per_second": 9034.881
}
],
"logging_steps": 10,
"max_steps": 81,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.6678153976020992e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}
|