File size: 2,170 Bytes
d85315f e10defd d85315f 9f3778a d85315f 9f3778a d85315f 9f3778a d85315f 9f3778a d85315f 9f3778a d85315f 9f3778a d85315f 9f3778a d85315f 9f3778a d85315f 9f3778a d85315f 9f3778a d85315f 9f3778a d85315f 9f3778a d85315f 9f3778a d85315f 9f3778a d85315f 9f3778a d85315f 9f3778a d85315f 9f3778a d85315f 9f3778a d85315f 9f3778a d85315f 9f3778a d85315f 9f3778a d85315f 9f3778a d85315f 9f3778a d85315f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.177545691906005,
"eval_steps": 500,
"global_step": 100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.4177545691906005,
"grad_norm": 0.28227752447128296,
"learning_rate": 2.9999999999999997e-05,
"loss": 4.1508,
"step": 10
},
{
"epoch": 0.835509138381201,
"grad_norm": 0.31433430314064026,
"learning_rate": 5.9999999999999995e-05,
"loss": 4.1593,
"step": 20
},
{
"epoch": 1.2532637075718016,
"grad_norm": 0.3350953161716461,
"learning_rate": 8.999999999999999e-05,
"loss": 4.0414,
"step": 30
},
{
"epoch": 1.671018276762402,
"grad_norm": 0.2885706126689911,
"learning_rate": 0.00011999999999999999,
"loss": 3.8411,
"step": 40
},
{
"epoch": 2.0887728459530024,
"grad_norm": 0.23711609840393066,
"learning_rate": 0.00015,
"loss": 3.6434,
"step": 50
},
{
"epoch": 2.506527415143603,
"grad_norm": 0.21583135426044464,
"learning_rate": 0.00017999999999999998,
"loss": 3.4636,
"step": 60
},
{
"epoch": 2.9242819843342036,
"grad_norm": 0.18754692375659943,
"learning_rate": 0.00020999999999999998,
"loss": 3.3154,
"step": 70
},
{
"epoch": 3.342036553524804,
"grad_norm": 0.15951760113239288,
"learning_rate": 0.00023999999999999998,
"loss": 3.2195,
"step": 80
},
{
"epoch": 3.759791122715405,
"grad_norm": 0.14639759063720703,
"learning_rate": 0.00027,
"loss": 3.122,
"step": 90
},
{
"epoch": 4.177545691906005,
"grad_norm": 0.1860765665769577,
"learning_rate": 0.0003,
"loss": 3.0677,
"step": 100
}
],
"logging_steps": 10,
"max_steps": 300,
"num_input_tokens_seen": 0,
"num_train_epochs": 14,
"save_steps": 100,
"total_flos": 1.6201284405755904e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}
|