Training in progress, step 21600, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
|
@@ -1451,6 +1451,14 @@ You can finetune this model on your own dataset.
|
|
| 1451 |
| 0.3729 | 21100 | 0.2912 |
|
| 1452 |
| 0.3737 | 21150 | 0.3402 |
|
| 1453 |
| 0.3746 | 21200 | 0.3462 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1454 |
|
| 1455 |
</details>
|
| 1456 |
|
|
|
|
| 1451 |
| 0.3729 | 21100 | 0.2912 |
|
| 1452 |
| 0.3737 | 21150 | 0.3402 |
|
| 1453 |
| 0.3746 | 21200 | 0.3462 |
|
| 1454 |
+
| 0.3755 | 21250 | 0.2775 |
|
| 1455 |
+
| 0.3764 | 21300 | 0.3825 |
|
| 1456 |
+
| 0.3773 | 21350 | 0.3931 |
|
| 1457 |
+
| 0.3782 | 21400 | 0.4059 |
|
| 1458 |
+
| 0.3790 | 21450 | 0.3264 |
|
| 1459 |
+
| 0.3799 | 21500 | 0.2605 |
|
| 1460 |
+
| 0.3808 | 21550 | 0.3658 |
|
| 1461 |
+
| 0.3817 | 21600 | 0.3274 |
|
| 1462 |
|
| 1463 |
</details>
|
| 1464 |
|
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 90864192
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c445c6520c6e5f38ea82be358a6aa5ed9b820190229b0b058633fcc33d08a87d
|
| 3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 180609210
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b19db173fc766529b02b99cfb2249ee2f6c33d285fe8b221a72a08efcd3e58d8
|
| 3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4b87c442444260c01d6f6ddc3b986a278c1c2c567a0e9dfe7a512d3828e95a3e
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9c7f4b6f777ecf36ed08c5fe9d684383c44178ad62db77048c5678684b40b9a1
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:00d6dd904f2193bc75011c53daf8f9374a62437ce21a5da2951c5f768935bcd3
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -2976,6 +2976,62 @@
|
|
| 2976 |
"learning_rate": 3.475093754295027e-05,
|
| 2977 |
"loss": 0.3462,
|
| 2978 |
"step": 21200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2979 |
}
|
| 2980 |
],
|
| 2981 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.381686133837536,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 21600,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 2976 |
"learning_rate": 3.475093754295027e-05,
|
| 2977 |
"loss": 0.3462,
|
| 2978 |
"step": 21200
|
| 2979 |
+
},
|
| 2980 |
+
{
|
| 2981 |
+
"epoch": 0.3755014048170204,
|
| 2982 |
+
"grad_norm": 1.593705415725708,
|
| 2983 |
+
"learning_rate": 3.470185152461173e-05,
|
| 2984 |
+
"loss": 0.2775,
|
| 2985 |
+
"step": 21250
|
| 2986 |
+
},
|
| 2987 |
+
{
|
| 2988 |
+
"epoch": 0.3763849375342369,
|
| 2989 |
+
"grad_norm": 2.1807069778442383,
|
| 2990 |
+
"learning_rate": 3.465276550627319e-05,
|
| 2991 |
+
"loss": 0.3825,
|
| 2992 |
+
"step": 21300
|
| 2993 |
+
},
|
| 2994 |
+
{
|
| 2995 |
+
"epoch": 0.37726847025145344,
|
| 2996 |
+
"grad_norm": 1.6359409093856812,
|
| 2997 |
+
"learning_rate": 3.460367948793466e-05,
|
| 2998 |
+
"loss": 0.3931,
|
| 2999 |
+
"step": 21350
|
| 3000 |
+
},
|
| 3001 |
+
{
|
| 3002 |
+
"epoch": 0.3781520029686699,
|
| 3003 |
+
"grad_norm": 1.5960018634796143,
|
| 3004 |
+
"learning_rate": 3.455459346959612e-05,
|
| 3005 |
+
"loss": 0.4059,
|
| 3006 |
+
"step": 21400
|
| 3007 |
+
},
|
| 3008 |
+
{
|
| 3009 |
+
"epoch": 0.37903553568588644,
|
| 3010 |
+
"grad_norm": 3.367835283279419,
|
| 3011 |
+
"learning_rate": 3.4505507451257585e-05,
|
| 3012 |
+
"loss": 0.3264,
|
| 3013 |
+
"step": 21450
|
| 3014 |
+
},
|
| 3015 |
+
{
|
| 3016 |
+
"epoch": 0.37991906840310297,
|
| 3017 |
+
"grad_norm": 1.5965161323547363,
|
| 3018 |
+
"learning_rate": 3.445642143291905e-05,
|
| 3019 |
+
"loss": 0.2605,
|
| 3020 |
+
"step": 21500
|
| 3021 |
+
},
|
| 3022 |
+
{
|
| 3023 |
+
"epoch": 0.3808026011203195,
|
| 3024 |
+
"grad_norm": 1.5011396408081055,
|
| 3025 |
+
"learning_rate": 3.440733541458051e-05,
|
| 3026 |
+
"loss": 0.3658,
|
| 3027 |
+
"step": 21550
|
| 3028 |
+
},
|
| 3029 |
+
{
|
| 3030 |
+
"epoch": 0.381686133837536,
|
| 3031 |
+
"grad_norm": 1.5021259784698486,
|
| 3032 |
+
"learning_rate": 3.435824939624198e-05,
|
| 3033 |
+
"loss": 0.3274,
|
| 3034 |
+
"step": 21600
|
| 3035 |
}
|
| 3036 |
],
|
| 3037 |
"logging_steps": 50,
|