Training in progress, step 23200, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
|
@@ -1487,6 +1487,10 @@ You can finetune this model on your own dataset.
|
|
| 1487 |
| 0.4047 | 22900 | 0.3044 |
|
| 1488 |
| 0.4055 | 22950 | 0.357 |
|
| 1489 |
| 0.4064 | 23000 | 0.3616 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1490 |
|
| 1491 |
</details>
|
| 1492 |
|
|
|
|
| 1487 |
| 0.4047 | 22900 | 0.3044 |
|
| 1488 |
| 0.4055 | 22950 | 0.357 |
|
| 1489 |
| 0.4064 | 23000 | 0.3616 |
|
| 1490 |
+
| 0.4073 | 23050 | 0.3139 |
|
| 1491 |
+
| 0.4082 | 23100 | 0.3474 |
|
| 1492 |
+
| 0.4091 | 23150 | 0.3208 |
|
| 1493 |
+
| 0.4100 | 23200 | 0.3798 |
|
| 1494 |
|
| 1495 |
</details>
|
| 1496 |
|
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 90864192
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7cfe587849d1306f54e0ca75ee4b8dc42ffa4c0050923c00408ab072955907d3
|
| 3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 180609210
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6c016c7f7476d35ba0914e4807cd567e2323f8abd2649d40533bb1edf8afea2d
|
| 3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a7301644e101f87025474e0abd0c4e21251cc4c43a5173ce57ba0318fade3400
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a8bb4812f9196d1a220df2036c293c5cb5d81dc224d96c15decb25fee077dd8a
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:55ce77f59b929ccf856f258ba2d8bdee259c33a00d44ab9b7d2ff7d9ff4f481c
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -3228,6 +3228,34 @@
|
|
| 3228 |
"learning_rate": 3.2983840882762956e-05,
|
| 3229 |
"loss": 0.3616,
|
| 3230 |
"step": 23000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3231 |
}
|
| 3232 |
],
|
| 3233 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.4099591807884646,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 23200,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 3228 |
"learning_rate": 3.2983840882762956e-05,
|
| 3229 |
"loss": 0.3616,
|
| 3230 |
"step": 23000
|
| 3231 |
+
},
|
| 3232 |
+
{
|
| 3233 |
+
"epoch": 0.407308582636815,
|
| 3234 |
+
"grad_norm": 2.8382747173309326,
|
| 3235 |
+
"learning_rate": 3.2934754864424413e-05,
|
| 3236 |
+
"loss": 0.3139,
|
| 3237 |
+
"step": 23050
|
| 3238 |
+
},
|
| 3239 |
+
{
|
| 3240 |
+
"epoch": 0.40819211535403155,
|
| 3241 |
+
"grad_norm": 3.052281618118286,
|
| 3242 |
+
"learning_rate": 3.2885668846085885e-05,
|
| 3243 |
+
"loss": 0.3474,
|
| 3244 |
+
"step": 23100
|
| 3245 |
+
},
|
| 3246 |
+
{
|
| 3247 |
+
"epoch": 0.4090756480712481,
|
| 3248 |
+
"grad_norm": 1.373552680015564,
|
| 3249 |
+
"learning_rate": 3.283756454811412e-05,
|
| 3250 |
+
"loss": 0.3208,
|
| 3251 |
+
"step": 23150
|
| 3252 |
+
},
|
| 3253 |
+
{
|
| 3254 |
+
"epoch": 0.4099591807884646,
|
| 3255 |
+
"grad_norm": 1.6797386407852173,
|
| 3256 |
+
"learning_rate": 3.278847852977558e-05,
|
| 3257 |
+
"loss": 0.3798,
|
| 3258 |
+
"step": 23200
|
| 3259 |
}
|
| 3260 |
],
|
| 3261 |
"logging_steps": 50,
|