Training in progress, step 15600, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
|
@@ -1335,6 +1335,10 @@ You can finetune this model on your own dataset.
|
|
| 1335 |
| 0.2704 | 15300 | 0.3625 |
|
| 1336 |
| 0.2712 | 15350 | 0.4088 |
|
| 1337 |
| 0.2721 | 15400 | 0.4126 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1338 |
|
| 1339 |
</details>
|
| 1340 |
|
|
|
|
| 1335 |
| 0.2704 | 15300 | 0.3625 |
|
| 1336 |
| 0.2712 | 15350 | 0.4088 |
|
| 1337 |
| 0.2721 | 15400 | 0.4126 |
|
| 1338 |
+
| 0.2730 | 15450 | 0.4662 |
|
| 1339 |
+
| 0.2739 | 15500 | 0.3889 |
|
| 1340 |
+
| 0.2748 | 15550 | 0.3618 |
|
| 1341 |
+
| 0.2757 | 15600 | 0.4126 |
|
| 1342 |
|
| 1343 |
</details>
|
| 1344 |
|
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 90864192
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:36f741855bfd9ab9855541a5740f7ebf89eb5ff81a18f782d3409b6a6441f247
|
| 3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 180609210
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f354c3d85cd72229183ba1225666edf02dbe0f02e60066712ba24c7167587d87
|
| 3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ece4d2ab35a1b635eb36bee8c77e304be4a94349bd773a556811f31851337605
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ab0c87524374314917bf4b3aa26c95868e23e32a6021e3f7a331e9d46fadb1d1
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f7630ebf9456fffd04ad85b9111cf3e42b6fca916321363c383a4caf1724f287
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -2164,6 +2164,34 @@
|
|
| 2164 |
"learning_rate": 4.0441970509120186e-05,
|
| 2165 |
"loss": 0.4126,
|
| 2166 |
"step": 15400
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2167 |
}
|
| 2168 |
],
|
| 2169 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.27566220777155376,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 15600,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 2164 |
"learning_rate": 4.0441970509120186e-05,
|
| 2165 |
"loss": 0.4126,
|
| 2166 |
"step": 15400
|
| 2167 |
+
},
|
| 2168 |
+
{
|
| 2169 |
+
"epoch": 0.2730116096199042,
|
| 2170 |
+
"grad_norm": 1.7254865169525146,
|
| 2171 |
+
"learning_rate": 4.0392884490781644e-05,
|
| 2172 |
+
"loss": 0.4662,
|
| 2173 |
+
"step": 15450
|
| 2174 |
+
},
|
| 2175 |
+
{
|
| 2176 |
+
"epoch": 0.27389514233712076,
|
| 2177 |
+
"grad_norm": 4.502954483032227,
|
| 2178 |
+
"learning_rate": 4.034379847244311e-05,
|
| 2179 |
+
"loss": 0.3889,
|
| 2180 |
+
"step": 15500
|
| 2181 |
+
},
|
| 2182 |
+
{
|
| 2183 |
+
"epoch": 0.2747786750543373,
|
| 2184 |
+
"grad_norm": 2.4406206607818604,
|
| 2185 |
+
"learning_rate": 4.029471245410458e-05,
|
| 2186 |
+
"loss": 0.3618,
|
| 2187 |
+
"step": 15550
|
| 2188 |
+
},
|
| 2189 |
+
{
|
| 2190 |
+
"epoch": 0.27566220777155376,
|
| 2191 |
+
"grad_norm": 1.6272777318954468,
|
| 2192 |
+
"learning_rate": 4.024562643576604e-05,
|
| 2193 |
+
"loss": 0.4126,
|
| 2194 |
+
"step": 15600
|
| 2195 |
}
|
| 2196 |
],
|
| 2197 |
"logging_steps": 50,
|