File size: 2,545 Bytes
248896c 025b305 89543b3 025b305 248896c 89543b3 248896c 89543b3 248896c 89543b3 248896c 89543b3 248896c 89543b3 248896c 66075b0 89543b3 66075b0 89543b3 66075b0 89543b3 66075b0 89543b3 66075b0 d0009ed 89543b3 d0009ed 89543b3 d0009ed 89543b3 d0009ed 89543b3 d0009ed 29f0568 89543b3 29f0568 89543b3 29f0568 89543b3 29f0568 89543b3 29f0568 025b305 89543b3 025b305 89543b3 025b305 89543b3 025b305 89543b3 025b305 248896c 89543b3 248896c 025b305 248896c 025b305 248896c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 14.285714285714286,
"eval_steps": 200,
"global_step": 100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.14285714285714285,
"eval_loss": 3.078927516937256,
"eval_runtime": 4.841,
"eval_samples_per_second": 310.059,
"eval_steps_per_second": 3.305,
"step": 1
},
{
"epoch": 1.4285714285714286,
"grad_norm": 9.25,
"learning_rate": 0.00019863613034027224,
"loss": 6.1788,
"step": 10
},
{
"epoch": 2.857142857142857,
"grad_norm": 5.25,
"learning_rate": 0.0001879473751206489,
"loss": 5.2684,
"step": 20
},
{
"epoch": 4.285714285714286,
"grad_norm": 5.28125,
"learning_rate": 0.00016772815716257412,
"loss": 4.6887,
"step": 30
},
{
"epoch": 5.714285714285714,
"grad_norm": 2.3125,
"learning_rate": 0.00014016954246529696,
"loss": 4.3802,
"step": 40
},
{
"epoch": 7.142857142857143,
"grad_norm": 3.484375,
"learning_rate": 0.00010825793454723325,
"loss": 4.1083,
"step": 50
},
{
"epoch": 8.571428571428571,
"grad_norm": 2.5625,
"learning_rate": 7.54514512859201e-05,
"loss": 3.8961,
"step": 60
},
{
"epoch": 10.0,
"grad_norm": 2.96875,
"learning_rate": 4.530518418775733e-05,
"loss": 3.8097,
"step": 70
},
{
"epoch": 11.428571428571429,
"grad_norm": 1.546875,
"learning_rate": 2.1085949060360654e-05,
"loss": 3.7435,
"step": 80
},
{
"epoch": 12.857142857142858,
"grad_norm": 2.328125,
"learning_rate": 5.418275829936537e-06,
"loss": 3.721,
"step": 90
},
{
"epoch": 14.285714285714286,
"grad_norm": 1.453125,
"learning_rate": 0.0,
"loss": 3.7058,
"step": 100
}
],
"logging_steps": 10,
"max_steps": 100,
"num_input_tokens_seen": 0,
"num_train_epochs": 15,
"save_steps": 200,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.57855601360896e+16,
"train_batch_size": 24,
"trial_name": null,
"trial_params": null
}
|