Training in progress, step 13000, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
|
@@ -1281,6 +1281,10 @@ You can finetune this model on your own dataset.
|
|
| 1281 |
| 0.2244 | 12700 | 0.4847 |
|
| 1282 |
| 0.2253 | 12750 | 0.4 |
|
| 1283 |
| 0.2262 | 12800 | 0.4755 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1284 |
|
| 1285 |
|
| 1286 |
### Framework Versions
|
|
|
|
| 1281 |
| 0.2244 | 12700 | 0.4847 |
|
| 1282 |
| 0.2253 | 12750 | 0.4 |
|
| 1283 |
| 0.2262 | 12800 | 0.4755 |
|
| 1284 |
+
| 0.2271 | 12850 | 0.3399 |
|
| 1285 |
+
| 0.2280 | 12900 | 0.3297 |
|
| 1286 |
+
| 0.2288 | 12950 | 0.4071 |
|
| 1287 |
+
| 0.2297 | 13000 | 0.4069 |
|
| 1288 |
|
| 1289 |
|
| 1290 |
### Framework Versions
|
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 90864192
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6a99dbe886ae3b48dcbc17c9add8a4bd329b9679468fb6c162134aa8463356d5
|
| 3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 180609210
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:172018835a66c5e0120e97d48b6c921f1ef2ffa9a8cbf21cd65db32f82f39fdc
|
| 3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d0a97494fc3a06e5f40c026a0f41674c967ae21ee669dae8157fc95b5874a10f
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c530f7e876503d8e13ff43d14d04017a4f2689d24e141257ebdc42956b0bd010
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f8a946928e63f8835465228f3f0df594649164b69e587f90a1cdfe3fc4f57565
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -1800,6 +1800,34 @@
|
|
| 1800 |
"learning_rate": 4.299248002199054e-05,
|
| 1801 |
"loss": 0.4755,
|
| 1802 |
"step": 12800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1803 |
}
|
| 1804 |
],
|
| 1805 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.22971850647629483,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 13000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 1800 |
"learning_rate": 4.299248002199054e-05,
|
| 1801 |
"loss": 0.4755,
|
| 1802 |
"step": 12800
|
| 1803 |
+
},
|
| 1804 |
+
{
|
| 1805 |
+
"epoch": 0.22706790832464527,
|
| 1806 |
+
"grad_norm": 1.7713844776153564,
|
| 1807 |
+
"learning_rate": 4.2943394003652e-05,
|
| 1808 |
+
"loss": 0.3399,
|
| 1809 |
+
"step": 12850
|
| 1810 |
+
},
|
| 1811 |
+
{
|
| 1812 |
+
"epoch": 0.22795144104186177,
|
| 1813 |
+
"grad_norm": 1.2936394214630127,
|
| 1814 |
+
"learning_rate": 4.289528970568024e-05,
|
| 1815 |
+
"loss": 0.3297,
|
| 1816 |
+
"step": 12900
|
| 1817 |
+
},
|
| 1818 |
+
{
|
| 1819 |
+
"epoch": 0.2288349737590783,
|
| 1820 |
+
"grad_norm": 1.6622658967971802,
|
| 1821 |
+
"learning_rate": 4.28462036873417e-05,
|
| 1822 |
+
"loss": 0.4071,
|
| 1823 |
+
"step": 12950
|
| 1824 |
+
},
|
| 1825 |
+
{
|
| 1826 |
+
"epoch": 0.22971850647629483,
|
| 1827 |
+
"grad_norm": 1.3949196338653564,
|
| 1828 |
+
"learning_rate": 4.279711766900316e-05,
|
| 1829 |
+
"loss": 0.4069,
|
| 1830 |
+
"step": 13000
|
| 1831 |
}
|
| 1832 |
],
|
| 1833 |
"logging_steps": 50,
|