File size: 3,184 Bytes
6cf2b79 3ae3d68 9c66697 3ae3d68 9c66697 6cf2b79 3ae3d68 124b0f7 5eaf652 124b0f7 6cf2b79 3ae3d68 124b0f7 6cf2b79 3ae3d68 124b0f7 5eaf652 124b0f7 0ab62a9 6cf2b79 3ae3d68 124b0f7 5eaf652 124b0f7 558eca1 6cf2b79 3ae3d68 124b0f7 3ae3d68 124b0f7 5eaf652 124b0f7 074543a 804daff 3ae3d68 124b0f7 804daff 124b0f7 074543a 9c66697 6cf2b79 3ae3d68 6cf2b79 3ae3d68 6cf2b79 9c66697 6cf2b79 9c66697 3ae3d68 6cf2b79 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 | {
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.042105263157894736,
"eval_steps": 3,
"global_step": 10,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.004210526315789474,
"grad_norm": 0.42628446221351624,
"learning_rate": 2e-05,
"loss": 0.1517,
"step": 1
},
{
"epoch": 0.004210526315789474,
"eval_loss": 0.24416916072368622,
"eval_runtime": 29.4222,
"eval_samples_per_second": 3.399,
"eval_steps_per_second": 1.699,
"step": 1
},
{
"epoch": 0.008421052631578947,
"grad_norm": 0.433437317609787,
"learning_rate": 4e-05,
"loss": 0.1501,
"step": 2
},
{
"epoch": 0.01263157894736842,
"grad_norm": 0.3588545322418213,
"learning_rate": 6e-05,
"loss": 0.1181,
"step": 3
},
{
"epoch": 0.01263157894736842,
"eval_loss": 0.2361646592617035,
"eval_runtime": 29.4356,
"eval_samples_per_second": 3.397,
"eval_steps_per_second": 1.699,
"step": 3
},
{
"epoch": 0.016842105263157894,
"grad_norm": 1.395027756690979,
"learning_rate": 8e-05,
"loss": 0.4504,
"step": 4
},
{
"epoch": 0.021052631578947368,
"grad_norm": 1.5977227687835693,
"learning_rate": 0.0001,
"loss": 0.4201,
"step": 5
},
{
"epoch": 0.02526315789473684,
"grad_norm": 1.294385552406311,
"learning_rate": 0.00012,
"loss": 0.3502,
"step": 6
},
{
"epoch": 0.02526315789473684,
"eval_loss": 0.14960011839866638,
"eval_runtime": 29.4235,
"eval_samples_per_second": 3.399,
"eval_steps_per_second": 1.699,
"step": 6
},
{
"epoch": 0.029473684210526315,
"grad_norm": 0.3078320622444153,
"learning_rate": 0.00014,
"loss": 0.0808,
"step": 7
},
{
"epoch": 0.03368421052631579,
"grad_norm": 0.6034978032112122,
"learning_rate": 0.00016,
"loss": 0.0923,
"step": 8
},
{
"epoch": 0.037894736842105266,
"grad_norm": 0.5751827955245972,
"learning_rate": 0.00018,
"loss": 0.0495,
"step": 9
},
{
"epoch": 0.037894736842105266,
"eval_loss": 0.04392697289586067,
"eval_runtime": 29.5398,
"eval_samples_per_second": 3.385,
"eval_steps_per_second": 1.693,
"step": 9
},
{
"epoch": 0.042105263157894736,
"grad_norm": 0.9490816593170166,
"learning_rate": 0.0002,
"loss": 0.0434,
"step": 10
}
],
"logging_steps": 1,
"max_steps": 10,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 5,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5589685858467840.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}
|