File size: 1,813 Bytes
b489fe6 74f9d8f b489fe6 74f9d8f b489fe6 74f9d8f b489fe6 74f9d8f b489fe6 74f9d8f b489fe6 74f9d8f b489fe6 74f9d8f b489fe6 74f9d8f b489fe6 74f9d8f b489fe6 74f9d8f b489fe6 74f9d8f b489fe6 74f9d8f b489fe6 74f9d8f b489fe6 74f9d8f b489fe6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9621993127147767,
"eval_steps": 500,
"global_step": 70,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.13745704467353953,
"grad_norm": 2.5390625,
"learning_rate": 0.0002,
"loss": 2.5496,
"step": 10
},
{
"epoch": 0.27491408934707906,
"grad_norm": 1.5771484375,
"learning_rate": 0.0002,
"loss": 1.1222,
"step": 20
},
{
"epoch": 0.41237113402061853,
"grad_norm": 0.66748046875,
"learning_rate": 0.0002,
"loss": 0.5563,
"step": 30
},
{
"epoch": 0.5498281786941581,
"grad_norm": 0.6455078125,
"learning_rate": 0.0002,
"loss": 0.5064,
"step": 40
},
{
"epoch": 0.6872852233676976,
"grad_norm": 0.62451171875,
"learning_rate": 0.0002,
"loss": 0.4592,
"step": 50
},
{
"epoch": 0.8247422680412371,
"grad_norm": 0.62939453125,
"learning_rate": 0.0002,
"loss": 0.4184,
"step": 60
},
{
"epoch": 0.9621993127147767,
"grad_norm": 0.576171875,
"learning_rate": 0.0002,
"loss": 0.4028,
"step": 70
}
],
"logging_steps": 10,
"max_steps": 72,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 10,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 6958061868515328.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}
|