File size: 1,994 Bytes
d9fc591 90d59b0 d9fc591 90d59b0 d9fc591 90d59b0 d9fc591 90d59b0 d9fc591 90d59b0 d9fc591 90d59b0 d9fc591 90d59b0 c0e6a1a 90d59b0 d9fc591 90d59b0 c0e6a1a 90d59b0 d9fc591 90d59b0 c0e6a1a 90d59b0 d9fc591 90d59b0 c0e6a1a 90d59b0 d9fc591 90d59b0 d9fc591 90d59b0 d9fc591 90d59b0 d9fc591 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.5555555555555554,
"eval_steps": 500,
"global_step": 12,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.5925925925925926,
"grad_norm": 1.2005751132965088,
"learning_rate": 0.0001666666666666667,
"loss": 9.4962,
"step": 2
},
{
"epoch": 1.1851851851851851,
"grad_norm": 1.47324538230896,
"learning_rate": 0.00013333333333333334,
"loss": 9.1456,
"step": 4
},
{
"epoch": 1.7777777777777777,
"grad_norm": 2.0616180896759033,
"learning_rate": 0.0001,
"loss": 8.731,
"step": 6
},
{
"epoch": 2.3703703703703702,
"grad_norm": 2.349423885345459,
"learning_rate": 6.666666666666667e-05,
"loss": 8.2574,
"step": 8
},
{
"epoch": 2.962962962962963,
"grad_norm": 1.723225474357605,
"learning_rate": 3.3333333333333335e-05,
"loss": 7.9055,
"step": 10
},
{
"epoch": 3.5555555555555554,
"grad_norm": 1.465832233428955,
"learning_rate": 0.0,
"loss": 7.7976,
"step": 12
},
{
"epoch": 3.5555555555555554,
"step": 12,
"total_flos": 69844190099040.0,
"train_loss": 8.555556774139404,
"train_runtime": 79.8982,
"train_samples_per_second": 2.703,
"train_steps_per_second": 0.15
}
],
"logging_steps": 2,
"max_steps": 12,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 69844190099040.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}
|