Training in progress, step 2220, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 100697728
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fe4cb03d3561140a6dc960340bfc20239d79b812171f8665033b7d5950060c16
|
| 3 |
size 100697728
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 201541754
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:02194e64ab0e46b57c09b90a41edf3c57c35eb6a761d021610ca25fe7722be96
|
| 3 |
size 201541754
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2417621c0c251f9779f9e28a0066e04b9dea3b724de678a9d2863d36bd10fb07
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dcda9d1b5e596e9a16f8101054c6161f73e1500ea7e6c07e8c2aa6c3a698c4fe
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -1767,14 +1767,30 @@
|
|
| 1767 |
"loss": 0.4195,
|
| 1768 |
"num_input_tokens_seen": 1488275,
|
| 1769 |
"step": 2200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1770 |
}
|
| 1771 |
],
|
| 1772 |
"logging_steps": 10,
|
| 1773 |
"max_steps": 2795,
|
| 1774 |
-
"num_input_tokens_seen":
|
| 1775 |
"num_train_epochs": 1,
|
| 1776 |
"save_steps": 20,
|
| 1777 |
-
"total_flos": 3.
|
| 1778 |
"train_batch_size": 1,
|
| 1779 |
"trial_name": null,
|
| 1780 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.7942754919499105,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 2220,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 1767 |
"loss": 0.4195,
|
| 1768 |
"num_input_tokens_seen": 1488275,
|
| 1769 |
"step": 2200
|
| 1770 |
+
},
|
| 1771 |
+
{
|
| 1772 |
+
"epoch": 0.7906976744186046,
|
| 1773 |
+
"grad_norm": 0.31744202971458435,
|
| 1774 |
+
"learning_rate": 4.186046511627907e-05,
|
| 1775 |
+
"loss": 0.3672,
|
| 1776 |
+
"num_input_tokens_seen": 1496031,
|
| 1777 |
+
"step": 2210
|
| 1778 |
+
},
|
| 1779 |
+
{
|
| 1780 |
+
"epoch": 0.7942754919499105,
|
| 1781 |
+
"grad_norm": 0.3008958697319031,
|
| 1782 |
+
"learning_rate": 4.114490161001789e-05,
|
| 1783 |
+
"loss": 0.3809,
|
| 1784 |
+
"num_input_tokens_seen": 1503138,
|
| 1785 |
+
"step": 2220
|
| 1786 |
}
|
| 1787 |
],
|
| 1788 |
"logging_steps": 10,
|
| 1789 |
"max_steps": 2795,
|
| 1790 |
+
"num_input_tokens_seen": 1503138,
|
| 1791 |
"num_train_epochs": 1,
|
| 1792 |
"save_steps": 20,
|
| 1793 |
+
"total_flos": 3.3800265450491904e+16,
|
| 1794 |
"train_batch_size": 1,
|
| 1795 |
"trial_name": null,
|
| 1796 |
"trial_params": null
|