Training in progress, step 160, checkpoint
Browse files- last-checkpoint/model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +25 -3
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 2471645608
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:057b49db93b4bb11e49863668fa73539c65d76cca34601fff825c1691bb64851
|
| 3 |
size 2471645608
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 2510806010
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d304751a2ea1c94cc1b9b0de681233ea14fc47a301587cfdabe4ca3c3d0e44bc
|
| 3 |
size 2510806010
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:56b323eece943c8be03268f2f2cd0f5781052da6f4d6e81974e7dc0391e6f9b0
|
| 3 |
size 15024
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e8a5396b6ce82a1fa517dfb220ab4ae7fd088b5d9659632b81104386db9d3bac
|
| 3 |
size 15024
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:507e713485c774580f6a9da657c78542ed7cbfe40136e2096ef127927ec8b96e
|
| 3 |
size 15024
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ac3c203718bd461046b02f6c1bbd4eca077b667406a41352c7b69bf3764ca88c
|
| 3 |
size 15024
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:91ab173e3ec81f1fcc90a5bec767634b3731350aacc0be314a1243781b9ad361
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 20,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -169,6 +169,28 @@
|
|
| 169 |
"eval_samples_per_second": 105.726,
|
| 170 |
"eval_steps_per_second": 2.465,
|
| 171 |
"step": 140
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
}
|
| 173 |
],
|
| 174 |
"logging_steps": 10,
|
|
@@ -188,7 +210,7 @@
|
|
| 188 |
"attributes": {}
|
| 189 |
}
|
| 190 |
},
|
| 191 |
-
"total_flos":
|
| 192 |
"train_batch_size": 11,
|
| 193 |
"trial_name": null,
|
| 194 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 9.411764705882353,
|
| 5 |
"eval_steps": 20,
|
| 6 |
+
"global_step": 160,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 169 |
"eval_samples_per_second": 105.726,
|
| 170 |
"eval_steps_per_second": 2.465,
|
| 171 |
"step": 140
|
| 172 |
+
},
|
| 173 |
+
{
|
| 174 |
+
"epoch": 8.823529411764707,
|
| 175 |
+
"grad_norm": 0.76171875,
|
| 176 |
+
"learning_rate": 0.00014759473930370736,
|
| 177 |
+
"loss": 0.0647,
|
| 178 |
+
"step": 150
|
| 179 |
+
},
|
| 180 |
+
{
|
| 181 |
+
"epoch": 9.411764705882353,
|
| 182 |
+
"grad_norm": 0.67578125,
|
| 183 |
+
"learning_rate": 0.00014016954246529696,
|
| 184 |
+
"loss": 0.0456,
|
| 185 |
+
"step": 160
|
| 186 |
+
},
|
| 187 |
+
{
|
| 188 |
+
"epoch": 9.411764705882353,
|
| 189 |
+
"eval_loss": 3.7919914722442627,
|
| 190 |
+
"eval_runtime": 15.5596,
|
| 191 |
+
"eval_samples_per_second": 96.468,
|
| 192 |
+
"eval_steps_per_second": 2.249,
|
| 193 |
+
"step": 160
|
| 194 |
}
|
| 195 |
],
|
| 196 |
"logging_steps": 10,
|
|
|
|
| 210 |
"attributes": {}
|
| 211 |
}
|
| 212 |
},
|
| 213 |
+
"total_flos": 8.344306217648128e+16,
|
| 214 |
"train_batch_size": 11,
|
| 215 |
"trial_name": null,
|
| 216 |
"trial_params": null
|