Training in progress, step 369, checkpoint
Browse files- last-checkpoint/model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +38 -6
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 735396724
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:00617d127e52142db152f51035e6d793d67b79698553c806fb0c755de24a4944
|
| 3 |
size 735396724
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1470915147
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0aec0adc0e2b4b305e7a1da493a55e418ee0ee5d560c8ff61b106568d5b0bac5
|
| 3 |
size 1470915147
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5daafd01e11a8e378bee423bfc78dc889b9595f52486fe44309d6cfdb1e8f39e
|
| 3 |
size 16389
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bd946779b63f36a745f76b3dea5b82fdda34f69f71b29d30bc33c6469ec6efc7
|
| 3 |
size 16389
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dfb52d3d9719c3a23780cb0726fd105f13eb3795bef452c37de45847245f48c4
|
| 3 |
size 16389
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d3ea23a163b521be4be62f809b70e9314364da460e9093ec7e246193690284e2
|
| 3 |
size 16389
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d3aa0ea2b718d2e995605d91b46b0095c0770bb817cc0b7bb955f992dbfd3445
|
| 3 |
size 16389
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:274b05ab7fd4442bba200371ded2027858747aed543ac8bcea81c8efc8330216
|
| 3 |
size 16389
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f70ecf73265273c79012c58e718f348331144177d9b2caba269cbcb34dabd37a
|
| 3 |
size 16389
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:49c8994ebdec0308f7fb2f8c656db7abc240ad44611736b9d1866dfbb4e908d2
|
| 3 |
size 16389
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d4b81c99c7d092f3e4d54713b045255676277930acb48938b544aa14bde54b79
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"best_global_step":
|
| 3 |
-
"best_metric": 0.
|
| 4 |
-
"best_model_checkpoint": "model/checkpoint-
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 123,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -58,6 +58,38 @@
|
|
| 58 |
"eval_samples_per_second": 1993.05,
|
| 59 |
"eval_steps_per_second": 31.145,
|
| 60 |
"step": 246
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
}
|
| 62 |
],
|
| 63 |
"logging_steps": 50,
|
|
@@ -77,7 +109,7 @@
|
|
| 77 |
"attributes": {}
|
| 78 |
}
|
| 79 |
},
|
| 80 |
-
"total_flos":
|
| 81 |
"train_batch_size": 256,
|
| 82 |
"trial_name": null,
|
| 83 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_global_step": 369,
|
| 3 |
+
"best_metric": 0.7108936942946532,
|
| 4 |
+
"best_model_checkpoint": "model/checkpoint-369",
|
| 5 |
+
"epoch": 0.754601226993865,
|
| 6 |
"eval_steps": 123,
|
| 7 |
+
"global_step": 369,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 58 |
"eval_samples_per_second": 1993.05,
|
| 59 |
"eval_steps_per_second": 31.145,
|
| 60 |
"step": 246
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"epoch": 0.5112474437627812,
|
| 64 |
+
"grad_norm": 0.3463546633720398,
|
| 65 |
+
"learning_rate": 1.9732369264463184e-05,
|
| 66 |
+
"loss": 0.1163,
|
| 67 |
+
"step": 250
|
| 68 |
+
},
|
| 69 |
+
{
|
| 70 |
+
"epoch": 0.6134969325153374,
|
| 71 |
+
"grad_norm": 0.31447625160217285,
|
| 72 |
+
"learning_rate": 1.9558884178116507e-05,
|
| 73 |
+
"loss": 0.1118,
|
| 74 |
+
"step": 300
|
| 75 |
+
},
|
| 76 |
+
{
|
| 77 |
+
"epoch": 0.7157464212678937,
|
| 78 |
+
"grad_norm": 0.32359057664871216,
|
| 79 |
+
"learning_rate": 1.934345942335807e-05,
|
| 80 |
+
"loss": 0.1105,
|
| 81 |
+
"step": 350
|
| 82 |
+
},
|
| 83 |
+
{
|
| 84 |
+
"epoch": 0.754601226993865,
|
| 85 |
+
"eval_entity_f1": 0.7108936942946532,
|
| 86 |
+
"eval_entity_precision": 0.6230731442494926,
|
| 87 |
+
"eval_entity_recall": 0.8653623799180904,
|
| 88 |
+
"eval_loss": 0.10435672849416733,
|
| 89 |
+
"eval_runtime": 75.8105,
|
| 90 |
+
"eval_samples_per_second": 1978.617,
|
| 91 |
+
"eval_steps_per_second": 30.919,
|
| 92 |
+
"step": 369
|
| 93 |
}
|
| 94 |
],
|
| 95 |
"logging_steps": 50,
|
|
|
|
| 109 |
"attributes": {}
|
| 110 |
}
|
| 111 |
},
|
| 112 |
+
"total_flos": 4.673116000432947e+16,
|
| 113 |
"train_batch_size": 256,
|
| 114 |
"trial_name": null,
|
| 115 |
"trial_params": null
|