Training in progress, step 615, checkpoint
Browse files- last-checkpoint/model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +35 -3
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 735396724
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4ff836c102370effe0873d714d18fb4deb97c990377413beb28af0209b7117fd
|
| 3 |
size 735396724
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1470915147
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:23121584ddfa42bf24d51afe9540f4af170e58a7d92879f2ef5afc774afcf860
|
| 3 |
size 1470915147
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d09f12437570e9924f51bc4a821db9b068e306d232df0cca1a764d5a6c61a79f
|
| 3 |
size 16389
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2b886aa3d5bf3a5412d189e2e63abc0e0362d43ead0c1373f953b7ddf9847afb
|
| 3 |
size 16389
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:be74bf3f842bae7677c9d0f21bf8c2bf8bdef7bc5e4729c1e101ce625035e9bd
|
| 3 |
size 16389
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fb1124a4af024eed9c934cf3ba9d0d996a738e061f5691271c14c7e5100af77b
|
| 3 |
size 16389
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:33fc7fdc06cdeb8bb2807eccde88ebb8242a3864a3857fb765782ad7bd05e4c1
|
| 3 |
size 16389
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b5836b27184f990ca621738efc9434979e8a956e3ee20e9620b60dc22e0b28dc
|
| 3 |
size 16389
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:40658d030b33d76a0abb569872fbae573ee257ce1a678d173300ad883007eb86
|
| 3 |
size 16389
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5e84ac527cf9cfef8619fd0f3a76b8211c6167dcc7c225bfa4dafe4104b5fd35
|
| 3 |
size 16389
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ff8cacffbc62e563c7b7ee69a67f0fcd9357f9269af92f22b2b3802c932e9df3
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": 492,
|
| 3 |
"best_metric": 0.7237721816258966,
|
| 4 |
"best_model_checkpoint": "model/checkpoint-492",
|
| 5 |
-
"epoch": 1.
|
| 6 |
"eval_steps": 123,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -115,6 +115,38 @@
|
|
| 115 |
"eval_samples_per_second": 1980.667,
|
| 116 |
"eval_steps_per_second": 30.951,
|
| 117 |
"step": 492
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
}
|
| 119 |
],
|
| 120 |
"logging_steps": 50,
|
|
@@ -134,7 +166,7 @@
|
|
| 134 |
"attributes": {}
|
| 135 |
}
|
| 136 |
},
|
| 137 |
-
"total_flos":
|
| 138 |
"train_batch_size": 256,
|
| 139 |
"trial_name": null,
|
| 140 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": 492,
|
| 3 |
"best_metric": 0.7237721816258966,
|
| 4 |
"best_model_checkpoint": "model/checkpoint-492",
|
| 5 |
+
"epoch": 1.2576687116564418,
|
| 6 |
"eval_steps": 123,
|
| 7 |
+
"global_step": 615,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 115 |
"eval_samples_per_second": 1980.667,
|
| 116 |
"eval_steps_per_second": 30.951,
|
| 117 |
"step": 492
|
| 118 |
+
},
|
| 119 |
+
{
|
| 120 |
+
"epoch": 1.0224948875255624,
|
| 121 |
+
"grad_norm": 0.2750433385372162,
|
| 122 |
+
"learning_rate": 1.8455893306060422e-05,
|
| 123 |
+
"loss": 0.1016,
|
| 124 |
+
"step": 500
|
| 125 |
+
},
|
| 126 |
+
{
|
| 127 |
+
"epoch": 1.1247443762781186,
|
| 128 |
+
"grad_norm": 0.29885414242744446,
|
| 129 |
+
"learning_rate": 1.8083934841122383e-05,
|
| 130 |
+
"loss": 0.0981,
|
| 131 |
+
"step": 550
|
| 132 |
+
},
|
| 133 |
+
{
|
| 134 |
+
"epoch": 1.2269938650306749,
|
| 135 |
+
"grad_norm": 0.23177891969680786,
|
| 136 |
+
"learning_rate": 1.7676508057876326e-05,
|
| 137 |
+
"loss": 0.0969,
|
| 138 |
+
"step": 600
|
| 139 |
+
},
|
| 140 |
+
{
|
| 141 |
+
"epoch": 1.2576687116564418,
|
| 142 |
+
"eval_entity_f1": 0.7210088680783326,
|
| 143 |
+
"eval_entity_precision": 0.6396341157410339,
|
| 144 |
+
"eval_entity_recall": 0.8732237602792275,
|
| 145 |
+
"eval_loss": 0.09714934974908829,
|
| 146 |
+
"eval_runtime": 75.4582,
|
| 147 |
+
"eval_samples_per_second": 1987.855,
|
| 148 |
+
"eval_steps_per_second": 31.064,
|
| 149 |
+
"step": 615
|
| 150 |
}
|
| 151 |
],
|
| 152 |
"logging_steps": 50,
|
|
|
|
| 166 |
"attributes": {}
|
| 167 |
}
|
| 168 |
},
|
| 169 |
+
"total_flos": 7.797577516843008e+16,
|
| 170 |
"train_batch_size": 256,
|
| 171 |
"trial_name": null,
|
| 172 |
"trial_params": null
|