Training in progress, step 2240, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 100697728
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f0632b909fd1c9aca69067d0bed149621d32fc888cd885edfe8a9c9167000cd0
|
| 3 |
size 100697728
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 201541754
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:12617d04abf8d5849ea18ccd1caeaec1b356dd520401e75c5bd06ee3fc8e8aa6
|
| 3 |
size 201541754
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4b4460282b72eb6f8903bafdf89fcbbe259019be8c29ef42adde2e2a7ce8a185
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1f05e22fba1c2918a10ec5938a0705454f8a1b28f2198ed555d6f8a3e293559a
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -1783,14 +1783,30 @@
|
|
| 1783 |
"loss": 0.3809,
|
| 1784 |
"num_input_tokens_seen": 1503138,
|
| 1785 |
"step": 2220
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1786 |
}
|
| 1787 |
],
|
| 1788 |
"logging_steps": 10,
|
| 1789 |
"max_steps": 2795,
|
| 1790 |
-
"num_input_tokens_seen":
|
| 1791 |
"num_train_epochs": 1,
|
| 1792 |
"save_steps": 20,
|
| 1793 |
-
"total_flos": 3.
|
| 1794 |
"train_batch_size": 1,
|
| 1795 |
"trial_name": null,
|
| 1796 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.8014311270125224,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 2240,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 1783 |
"loss": 0.3809,
|
| 1784 |
"num_input_tokens_seen": 1503138,
|
| 1785 |
"step": 2220
|
| 1786 |
+
},
|
| 1787 |
+
{
|
| 1788 |
+
"epoch": 0.7978533094812165,
|
| 1789 |
+
"grad_norm": 0.39187124371528625,
|
| 1790 |
+
"learning_rate": 4.042933810375671e-05,
|
| 1791 |
+
"loss": 0.3779,
|
| 1792 |
+
"num_input_tokens_seen": 1509279,
|
| 1793 |
+
"step": 2230
|
| 1794 |
+
},
|
| 1795 |
+
{
|
| 1796 |
+
"epoch": 0.8014311270125224,
|
| 1797 |
+
"grad_norm": 0.3644355833530426,
|
| 1798 |
+
"learning_rate": 3.971377459749553e-05,
|
| 1799 |
+
"loss": 0.3883,
|
| 1800 |
+
"num_input_tokens_seen": 1515223,
|
| 1801 |
+
"step": 2240
|
| 1802 |
}
|
| 1803 |
],
|
| 1804 |
"logging_steps": 10,
|
| 1805 |
"max_steps": 2795,
|
| 1806 |
+
"num_input_tokens_seen": 1515223,
|
| 1807 |
"num_train_epochs": 1,
|
| 1808 |
"save_steps": 20,
|
| 1809 |
+
"total_flos": 3.4072014423619584e+16,
|
| 1810 |
"train_batch_size": 1,
|
| 1811 |
"trial_name": null,
|
| 1812 |
"trial_params": null
|