File size: 2,496 Bytes
a0321b2 fd8ed3d a0321b2 fd8ed3d a0321b2 fd8ed3d a0321b2 fd8ed3d a0321b2 fd8ed3d a0321b2 fd8ed3d a0321b2 fd8ed3d a0321b2 fd8ed3d a0321b2 fd8ed3d a0321b2 fd8ed3d a0321b2 fd8ed3d a0321b2 fd8ed3d a0321b2 fd8ed3d a0321b2 fd8ed3d a0321b2 fd8ed3d a0321b2 fd8ed3d a0321b2 fd8ed3d a0321b2 fd8ed3d a0321b2 fd8ed3d a0321b2 fd8ed3d a0321b2 fd8ed3d a0321b2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 | {
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 134,
"global_step": 367,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0027247956403269754,
"eval_loss": 3.203904390335083,
"eval_runtime": 4.2695,
"eval_samples_per_second": 72.375,
"eval_steps_per_second": 18.269,
"step": 1
},
{
"epoch": 0.1362397820163488,
"grad_norm": 0.7537987232208252,
"learning_rate": 0.0004,
"loss": 1.3941,
"step": 50
},
{
"epoch": 0.2724795640326976,
"grad_norm": 0.5926509499549866,
"learning_rate": 0.0004,
"loss": 0.8495,
"step": 100
},
{
"epoch": 0.3651226158038147,
"eval_loss": 0.6168258190155029,
"eval_runtime": 4.2281,
"eval_samples_per_second": 73.082,
"eval_steps_per_second": 18.448,
"step": 134
},
{
"epoch": 0.4087193460490463,
"grad_norm": 0.7156445384025574,
"learning_rate": 0.0004,
"loss": 0.6471,
"step": 150
},
{
"epoch": 0.5449591280653951,
"grad_norm": 0.9673421382904053,
"learning_rate": 0.0004,
"loss": 0.5422,
"step": 200
},
{
"epoch": 0.6811989100817438,
"grad_norm": 0.6035718321800232,
"learning_rate": 0.0004,
"loss": 0.4256,
"step": 250
},
{
"epoch": 0.7302452316076294,
"eval_loss": 0.4029657542705536,
"eval_runtime": 4.2417,
"eval_samples_per_second": 72.848,
"eval_steps_per_second": 18.389,
"step": 268
},
{
"epoch": 0.8174386920980926,
"grad_norm": 0.5255013704299927,
"learning_rate": 0.0004,
"loss": 0.3641,
"step": 300
},
{
"epoch": 0.9536784741144414,
"grad_norm": 0.41406139731407166,
"learning_rate": 0.0004,
"loss": 0.2957,
"step": 350
}
],
"logging_steps": 50,
"max_steps": 400,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.189318078660608e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}
|