Training in progress, step 240, checkpoint
Browse files- last-checkpoint/model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +31 -3
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 136062744
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:57aa943ba31b7fd0f2d2258b3638435908884af1e83b7e3da9763ba67b95fa40
|
| 3 |
size 136062744
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 272133748
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:78f609766fd9499e6af357bcd74eef24836222dee0149f07897fd8895e50aade
|
| 3 |
size 272133748
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:93e80a2275824ab49f6bc0b217bb315cd0a85d3c25b43a245828495794a78d4d
|
| 3 |
size 15984
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:381eb7d8287a93e17a40cc15be93d534da9dbf37378fcc74868d5615daf19b34
|
| 3 |
size 15984
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2d4d987ee650d278db90b1b49f5d5e57d81bba91b4e110659d4027a225f63078
|
| 3 |
size 15984
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1bfd04834fa55090f0aa6f19062eb69d5e7e7d567f3b51b2a09c93679da782f7
|
| 3 |
size 15984
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3e15a8fd81fd90d6fe35aa6feb35c5e13dd4fe18af2950ff7fcf4c6b68016d32
|
| 3 |
size 15984
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a4d603165c2d1acc537a09a3e1f8f3831fbd36a555d1b4282034bf9a666af8e7
|
| 3 |
size 15984
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c3b4cdeeb7d7c2d37111aeb034296baee0b0b647a48bc49f1ac03a01bf25b677
|
| 3 |
size 15984
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:887f0c6920abc07c1199dc922f55301b3e567adfbbf72707fdf5afb2c202b331
|
| 3 |
size 15984
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:941d9e9f4cfb6894bf574771af69c852f299b452dcf03e677dae3dadf692a003
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 200,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -163,6 +163,34 @@
|
|
| 163 |
"eval_samples_per_second": 1454.261,
|
| 164 |
"eval_steps_per_second": 182.146,
|
| 165 |
"step": 200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 166 |
}
|
| 167 |
],
|
| 168 |
"logging_steps": 10,
|
|
@@ -182,7 +210,7 @@
|
|
| 182 |
"attributes": {}
|
| 183 |
}
|
| 184 |
},
|
| 185 |
-
"total_flos":
|
| 186 |
"train_batch_size": 1,
|
| 187 |
"trial_name": null,
|
| 188 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 4.530805687203792,
|
| 5 |
"eval_steps": 200,
|
| 6 |
+
"global_step": 240,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 163 |
"eval_samples_per_second": 1454.261,
|
| 164 |
"eval_steps_per_second": 182.146,
|
| 165 |
"step": 200
|
| 166 |
+
},
|
| 167 |
+
{
|
| 168 |
+
"epoch": 3.966824644549763,
|
| 169 |
+
"grad_norm": 1.78125,
|
| 170 |
+
"learning_rate": 0.00019847451999183694,
|
| 171 |
+
"loss": 2.1714,
|
| 172 |
+
"step": 210
|
| 173 |
+
},
|
| 174 |
+
{
|
| 175 |
+
"epoch": 4.151658767772512,
|
| 176 |
+
"grad_norm": 3.453125,
|
| 177 |
+
"learning_rate": 0.00019824084210910925,
|
| 178 |
+
"loss": 2.0489,
|
| 179 |
+
"step": 220
|
| 180 |
+
},
|
| 181 |
+
{
|
| 182 |
+
"epoch": 4.341232227488152,
|
| 183 |
+
"grad_norm": 2.296875,
|
| 184 |
+
"learning_rate": 0.00019799067644341844,
|
| 185 |
+
"loss": 1.7888,
|
| 186 |
+
"step": 230
|
| 187 |
+
},
|
| 188 |
+
{
|
| 189 |
+
"epoch": 4.530805687203792,
|
| 190 |
+
"grad_norm": 1.9375,
|
| 191 |
+
"learning_rate": 0.0001977240649801253,
|
| 192 |
+
"loss": 2.055,
|
| 193 |
+
"step": 240
|
| 194 |
}
|
| 195 |
],
|
| 196 |
"logging_steps": 10,
|
|
|
|
| 210 |
"attributes": {}
|
| 211 |
}
|
| 212 |
},
|
| 213 |
+
"total_flos": 3.267013366723379e+16,
|
| 214 |
"train_batch_size": 1,
|
| 215 |
"trial_name": null,
|
| 216 |
"trial_params": null
|