Training in progress, step 738, checkpoint
Browse files- last-checkpoint/model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +31 -6
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 735396724
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8577b4857b879d05e0f8a6cfe7c43b8c8ced9e7dfe63a4280e2e26a0e15e3c94
|
| 3 |
size 735396724
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1470915147
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:94b0395fd5b76ce19e34d146a8c289af03287be06f938af8e5a3ecf61883d775
|
| 3 |
size 1470915147
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2438a2027427b3d96e56afa02342e40b518cc5a4732faf79d6925d5d55c577f0
|
| 3 |
size 16389
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b38c30466207633871e8888206bfe5061612dc77969df08738de7f38180f016e
|
| 3 |
size 16389
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:500ceadf909eafd207b60a2fffbb122c8876c30e24e54f8c5c803d79507c972c
|
| 3 |
size 16389
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:67ea961a54ce2c81798cc07b051c3e0a86c346a5b46ed3c52e9ad1a579e735e6
|
| 3 |
size 16389
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0b516e159fa8c859060ac9216c8c88e777ae6b3be75cd282753aa03e2d8ddd5d
|
| 3 |
size 16389
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c00c04a0504831ef1b89143e9318053ab8faf9658d240bdb98b2f4c94afaf578
|
| 3 |
size 16389
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d58dcb0bbaa1efa8f03be4baf87c9eab892f94695cc44466d3b7dcde1769437c
|
| 3 |
size 16389
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:05560e77f104933fad67bf5dcd7fe3899a199c567c3ae3bd1089287dcba01cbe
|
| 3 |
size 16389
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1095ec08fae81706719696411543a90b06fc787d5310b31ba6bec1d086872596
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"best_global_step":
|
| 3 |
-
"best_metric": 0.
|
| 4 |
-
"best_model_checkpoint": "model/checkpoint-
|
| 5 |
-
"epoch": 1.
|
| 6 |
"eval_steps": 123,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -147,6 +147,31 @@
|
|
| 147 |
"eval_samples_per_second": 1987.855,
|
| 148 |
"eval_steps_per_second": 31.064,
|
| 149 |
"step": 615
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
}
|
| 151 |
],
|
| 152 |
"logging_steps": 50,
|
|
@@ -166,7 +191,7 @@
|
|
| 166 |
"attributes": {}
|
| 167 |
}
|
| 168 |
},
|
| 169 |
-
"total_flos":
|
| 170 |
"train_batch_size": 256,
|
| 171 |
"trial_name": null,
|
| 172 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_global_step": 738,
|
| 3 |
+
"best_metric": 0.7250351529639901,
|
| 4 |
+
"best_model_checkpoint": "model/checkpoint-738",
|
| 5 |
+
"epoch": 1.50920245398773,
|
| 6 |
"eval_steps": 123,
|
| 7 |
+
"global_step": 738,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 147 |
"eval_samples_per_second": 1987.855,
|
| 148 |
"eval_steps_per_second": 31.064,
|
| 149 |
"step": 615
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"epoch": 1.329243353783231,
|
| 153 |
+
"grad_norm": 0.28684553503990173,
|
| 154 |
+
"learning_rate": 1.723540054406503e-05,
|
| 155 |
+
"loss": 0.0963,
|
| 156 |
+
"step": 650
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 1.4314928425357873,
|
| 160 |
+
"grad_norm": 0.2544998824596405,
|
| 161 |
+
"learning_rate": 1.6762547661862417e-05,
|
| 162 |
+
"loss": 0.0946,
|
| 163 |
+
"step": 700
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"epoch": 1.50920245398773,
|
| 167 |
+
"eval_entity_f1": 0.7250351529639901,
|
| 168 |
+
"eval_entity_precision": 0.6421383532568621,
|
| 169 |
+
"eval_entity_recall": 0.8727158772868242,
|
| 170 |
+
"eval_loss": 0.09457383304834366,
|
| 171 |
+
"eval_runtime": 75.1677,
|
| 172 |
+
"eval_samples_per_second": 1995.539,
|
| 173 |
+
"eval_steps_per_second": 31.184,
|
| 174 |
+
"step": 738
|
| 175 |
}
|
| 176 |
],
|
| 177 |
"logging_steps": 50,
|
|
|
|
| 191 |
"attributes": {}
|
| 192 |
}
|
| 193 |
},
|
| 194 |
+
"total_flos": 9.35469922690007e+16,
|
| 195 |
"train_batch_size": 256,
|
| 196 |
"trial_name": null,
|
| 197 |
"trial_params": null
|