Training in progress, step 861, checkpoint
Browse files- last-checkpoint/model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +38 -6
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 735396724
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6b0e470e1eacd956bee52a20508ccdb7f508b31c3c5f624b811daf5cfa3c7cf3
|
| 3 |
size 735396724
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1470915147
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:333e703f3783d34a6d44079c09bfb829d5cf3ee625752b283ca8731d7c2d4193
|
| 3 |
size 1470915147
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:386936f571a3bb73993bb57c8096f93a00e47a91407b87b14eafaf523d8d4243
|
| 3 |
size 16389
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bc802edde1abd3269a9fb5bd5728b077d66511bd1040cd61d61bfd756d8edf82
|
| 3 |
size 16389
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5669e1a3fa934bf58fc578de28d959d51d71136e896799b0f88f3a63a39c20ae
|
| 3 |
size 16389
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0a1e0244fee1d7a41af4c1e6bb36d9c21f1092f569f8c69a338a0d753130e2f1
|
| 3 |
size 16389
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d38a5b73c21e3fdc1d3880bc420afefb2bcaee15f4859f00a2dedf626d423293
|
| 3 |
size 16389
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6428c4cb1b09dbf4c49c45d7c925fedb46fbf7d14d24a337331ceb01c537fd7e
|
| 3 |
size 16389
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:962516c0946264663175debb272171a6aeeef3bcbf3a51bb37623d3e002784bc
|
| 3 |
size 16389
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:58076390752eb289cf58b117a31dac67edb3ce8024e8cda265f0e1778fdf0150
|
| 3 |
size 16389
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:79b33311c9d1ac1e2e9a207a403ad11f68a52ddbcc7079e83ceb4058a94228f8
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"best_global_step":
|
| 3 |
-
"best_metric": 0.
|
| 4 |
-
"best_model_checkpoint": "model/checkpoint-
|
| 5 |
-
"epoch": 1.
|
| 6 |
"eval_steps": 123,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -172,6 +172,38 @@
|
|
| 172 |
"eval_samples_per_second": 1995.539,
|
| 173 |
"eval_steps_per_second": 31.184,
|
| 174 |
"step": 738
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
}
|
| 176 |
],
|
| 177 |
"logging_steps": 50,
|
|
@@ -191,7 +223,7 @@
|
|
| 191 |
"attributes": {}
|
| 192 |
}
|
| 193 |
},
|
| 194 |
-
"total_flos":
|
| 195 |
"train_batch_size": 256,
|
| 196 |
"trial_name": null,
|
| 197 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_global_step": 861,
|
| 3 |
+
"best_metric": 0.728103938603657,
|
| 4 |
+
"best_model_checkpoint": "model/checkpoint-861",
|
| 5 |
+
"epoch": 1.7607361963190185,
|
| 6 |
"eval_steps": 123,
|
| 7 |
+
"global_step": 861,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 172 |
"eval_samples_per_second": 1995.539,
|
| 173 |
"eval_steps_per_second": 31.184,
|
| 174 |
"step": 738
|
| 175 |
+
},
|
| 176 |
+
{
|
| 177 |
+
"epoch": 1.5337423312883436,
|
| 178 |
+
"grad_norm": 0.23476801812648773,
|
| 179 |
+
"learning_rate": 1.6260024056459024e-05,
|
| 180 |
+
"loss": 0.094,
|
| 181 |
+
"step": 750
|
| 182 |
+
},
|
| 183 |
+
{
|
| 184 |
+
"epoch": 1.6359918200408998,
|
| 185 |
+
"grad_norm": 0.2882135808467865,
|
| 186 |
+
"learning_rate": 1.573003455354235e-05,
|
| 187 |
+
"loss": 0.0935,
|
| 188 |
+
"step": 800
|
| 189 |
+
},
|
| 190 |
+
{
|
| 191 |
+
"epoch": 1.738241308793456,
|
| 192 |
+
"grad_norm": 0.19589418172836304,
|
| 193 |
+
"learning_rate": 1.5174904485609352e-05,
|
| 194 |
+
"loss": 0.0954,
|
| 195 |
+
"step": 850
|
| 196 |
+
},
|
| 197 |
+
{
|
| 198 |
+
"epoch": 1.7607361963190185,
|
| 199 |
+
"eval_entity_f1": 0.728103938603657,
|
| 200 |
+
"eval_entity_precision": 0.6492946761951431,
|
| 201 |
+
"eval_entity_recall": 0.871873007639857,
|
| 202 |
+
"eval_loss": 0.09316740930080414,
|
| 203 |
+
"eval_runtime": 75.2979,
|
| 204 |
+
"eval_samples_per_second": 1992.087,
|
| 205 |
+
"eval_steps_per_second": 31.13,
|
| 206 |
+
"step": 861
|
| 207 |
}
|
| 208 |
],
|
| 209 |
"logging_steps": 50,
|
|
|
|
| 223 |
"attributes": {}
|
| 224 |
}
|
| 225 |
},
|
| 226 |
+
"total_flos": 1.091555797457961e+17,
|
| 227 |
"train_batch_size": 256,
|
| 228 |
"trial_name": null,
|
| 229 |
"trial_params": null
|