Training in progress, step 43600, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
|
@@ -1387,6 +1387,10 @@ You can finetune this model on your own dataset.
|
|
| 1387 |
| 0.7651 | 43300 | 0.3379 |
|
| 1388 |
| 0.7660 | 43350 | 0.3574 |
|
| 1389 |
| 0.7669 | 43400 | 0.3664 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1390 |
|
| 1391 |
</details>
|
| 1392 |
|
|
|
|
| 1387 |
| 0.7651 | 43300 | 0.3379 |
|
| 1388 |
| 0.7660 | 43350 | 0.3574 |
|
| 1389 |
| 0.7669 | 43400 | 0.3664 |
|
| 1390 |
+
| 0.7678 | 43450 | 0.3274 |
|
| 1391 |
+
| 0.7687 | 43500 | 0.2443 |
|
| 1392 |
+
| 0.7696 | 43550 | 0.2998 |
|
| 1393 |
+
| 0.7704 | 43600 | 0.3619 |
|
| 1394 |
|
| 1395 |
</details>
|
| 1396 |
|
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 90864192
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:582d7207fbae78615ca0e36023c8110cb512dfcbe185f219cdbef0f8c88f6707
|
| 3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 180609210
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:38bd1bddb06afe7bdefce01320f5544bdf1fc670f6d4527df71f8729233aad04
|
| 3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5ec0767005b2ab07f761617d9b9ec8dd40511cb9cacfed18b38312ca707add4f
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f1a993fdfb900c15fe63d9947d4534a020649b78ba75ceb8b6036bf3ab2fc94f
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c6d4bed09415fa87d626fafe2a8875f4460b02e4d1ca2a4d5c36913f61694143
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -6084,6 +6084,34 @@
|
|
| 6084 |
"learning_rate": 1.2966562604307789e-05,
|
| 6085 |
"loss": 0.3664,
|
| 6086 |
"step": 43400
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6087 |
}
|
| 6088 |
],
|
| 6089 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.7704405294128042,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 43600,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 6084 |
"learning_rate": 1.2966562604307789e-05,
|
| 6085 |
"loss": 0.3664,
|
| 6086 |
"step": 43400
|
| 6087 |
+
},
|
| 6088 |
+
{
|
| 6089 |
+
"epoch": 0.7677899312611546,
|
| 6090 |
+
"grad_norm": 2.931711196899414,
|
| 6091 |
+
"learning_rate": 1.2917476585969254e-05,
|
| 6092 |
+
"loss": 0.3274,
|
| 6093 |
+
"step": 43450
|
| 6094 |
+
},
|
| 6095 |
+
{
|
| 6096 |
+
"epoch": 0.7686734639783711,
|
| 6097 |
+
"grad_norm": 1.3495726585388184,
|
| 6098 |
+
"learning_rate": 1.2868390567630716e-05,
|
| 6099 |
+
"loss": 0.2443,
|
| 6100 |
+
"step": 43500
|
| 6101 |
+
},
|
| 6102 |
+
{
|
| 6103 |
+
"epoch": 0.7695569966955876,
|
| 6104 |
+
"grad_norm": 1.4437354803085327,
|
| 6105 |
+
"learning_rate": 1.281930454929218e-05,
|
| 6106 |
+
"loss": 0.2998,
|
| 6107 |
+
"step": 43550
|
| 6108 |
+
},
|
| 6109 |
+
{
|
| 6110 |
+
"epoch": 0.7704405294128042,
|
| 6111 |
+
"grad_norm": 4.394979000091553,
|
| 6112 |
+
"learning_rate": 1.2770218530953642e-05,
|
| 6113 |
+
"loss": 0.3619,
|
| 6114 |
+
"step": 43600
|
| 6115 |
}
|
| 6116 |
],
|
| 6117 |
"logging_steps": 50,
|