Training in progress, step 160, checkpoint
Browse files- last-checkpoint/model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +25 -3
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 3671315016
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4c00f4770dc8e7b019dc6a6debaa362bab68c6cfd35eca15267ad109c23a42fe
|
| 3 |
size 3671315016
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 3730396474
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3346e3093582358d266962c8edb157180d5be4fa3300662d51e548543c8eb4e4
|
| 3 |
size 3730396474
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2355912033cdf8a210471f65d6956ea7b204dc1f992bb7ff160a01398842e73d
|
| 3 |
size 15024
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7f1df9b508fa40b405b039f0683e8579bc073a168693465fb82192e1ad1fd78d
|
| 3 |
size 15024
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f5502dbebdf9f40b2dd0bbf6873e0329b5b773ddb393ab99bfe392b8af956e80
|
| 3 |
size 15024
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:deb9b135706ccb81d17045fbd75611752ff5ba7ec0435cca72d7997d0abd01ac
|
| 3 |
size 15024
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:66f97637bcae20735f25e4b45c4e868550cd21597f26c3bd0dc18e1466020699
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 6.
|
| 5 |
"eval_steps": 20,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -169,6 +169,28 @@
|
|
| 169 |
"eval_samples_per_second": 94.253,
|
| 170 |
"eval_steps_per_second": 2.951,
|
| 171 |
"step": 140
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
}
|
| 173 |
],
|
| 174 |
"logging_steps": 10,
|
|
@@ -188,7 +210,7 @@
|
|
| 188 |
"attributes": {}
|
| 189 |
}
|
| 190 |
},
|
| 191 |
-
"total_flos":
|
| 192 |
"train_batch_size": 8,
|
| 193 |
"trial_name": null,
|
| 194 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 6.956521739130435,
|
| 5 |
"eval_steps": 20,
|
| 6 |
+
"global_step": 160,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 169 |
"eval_samples_per_second": 94.253,
|
| 170 |
"eval_steps_per_second": 2.951,
|
| 171 |
"step": 140
|
| 172 |
+
},
|
| 173 |
+
{
|
| 174 |
+
"epoch": 6.521739130434782,
|
| 175 |
+
"grad_norm": 1.0625,
|
| 176 |
+
"learning_rate": 0.00016772815716257412,
|
| 177 |
+
"loss": 0.1172,
|
| 178 |
+
"step": 150
|
| 179 |
+
},
|
| 180 |
+
{
|
| 181 |
+
"epoch": 6.956521739130435,
|
| 182 |
+
"grad_norm": 0.9375,
|
| 183 |
+
"learning_rate": 0.0001627176358473537,
|
| 184 |
+
"loss": 0.1326,
|
| 185 |
+
"step": 160
|
| 186 |
+
},
|
| 187 |
+
{
|
| 188 |
+
"epoch": 6.956521739130435,
|
| 189 |
+
"eval_loss": 3.5411362648010254,
|
| 190 |
+
"eval_runtime": 15.9272,
|
| 191 |
+
"eval_samples_per_second": 94.241,
|
| 192 |
+
"eval_steps_per_second": 2.951,
|
| 193 |
+
"step": 160
|
| 194 |
}
|
| 195 |
],
|
| 196 |
"logging_steps": 10,
|
|
|
|
| 210 |
"attributes": {}
|
| 211 |
}
|
| 212 |
},
|
| 213 |
+
"total_flos": 9.594909694623744e+16,
|
| 214 |
"train_batch_size": 8,
|
| 215 |
"trial_name": null,
|
| 216 |
"trial_params": null
|