File size: 1,812 Bytes
b489fe6 74f9d8f b489fe6 74f9d8f b489fe6 74f9d8f b489fe6 74f9d8f b489fe6 74f9d8f b489fe6 74f9d8f b489fe6 74f9d8f b489fe6 74f9d8f b489fe6 74f9d8f b489fe6 74f9d8f b489fe6 74f9d8f b489fe6 74f9d8f b489fe6 74f9d8f b489fe6 74f9d8f b489fe6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9896907216494846,
"eval_steps": 500,
"global_step": 72,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.13745704467353953,
"grad_norm": 2.5390625,
"learning_rate": 0.0002,
"loss": 2.5496,
"step": 10
},
{
"epoch": 0.27491408934707906,
"grad_norm": 1.5771484375,
"learning_rate": 0.0002,
"loss": 1.1222,
"step": 20
},
{
"epoch": 0.41237113402061853,
"grad_norm": 0.66748046875,
"learning_rate": 0.0002,
"loss": 0.5563,
"step": 30
},
{
"epoch": 0.5498281786941581,
"grad_norm": 0.6455078125,
"learning_rate": 0.0002,
"loss": 0.5064,
"step": 40
},
{
"epoch": 0.6872852233676976,
"grad_norm": 0.62451171875,
"learning_rate": 0.0002,
"loss": 0.4592,
"step": 50
},
{
"epoch": 0.8247422680412371,
"grad_norm": 0.62939453125,
"learning_rate": 0.0002,
"loss": 0.4184,
"step": 60
},
{
"epoch": 0.9621993127147767,
"grad_norm": 0.576171875,
"learning_rate": 0.0002,
"loss": 0.4028,
"step": 70
}
],
"logging_steps": 10,
"max_steps": 72,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 10,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 7155741247733760.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}
|