Training in progress, step 14200, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
|
@@ -1307,6 +1307,10 @@ You can finetune this model on your own dataset.
|
|
| 1307 |
| 0.2456 | 13900 | 0.4071 |
|
| 1308 |
| 0.2465 | 13950 | 0.3434 |
|
| 1309 |
| 0.2474 | 14000 | 0.3885 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1310 |
|
| 1311 |
</details>
|
| 1312 |
|
|
|
|
| 1307 |
| 0.2456 | 13900 | 0.4071 |
|
| 1308 |
| 0.2465 | 13950 | 0.3434 |
|
| 1309 |
| 0.2474 | 14000 | 0.3885 |
|
| 1310 |
+
| 0.2483 | 14050 | 0.4296 |
|
| 1311 |
+
| 0.2492 | 14100 | 0.3853 |
|
| 1312 |
+
| 0.2500 | 14150 | 0.4068 |
|
| 1313 |
+
| 0.2509 | 14200 | 0.4071 |
|
| 1314 |
|
| 1315 |
</details>
|
| 1316 |
|
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 90864192
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b4503e7e25e7a5b0e3fa0b1c8a23c410d156686b9c3f0b8c96dc82d39a0e1b1c
|
| 3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 180609210
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4d5d943bf4dc121c9308255043fd16448b3eb77f764283d83e12427c3022fba6
|
| 3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:215f5b0f373f4e3ad43a7c01ffdbf3925bbe7ae7185a457cfe9021eb94481c4b
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f2ca0c424595d60ecac702728b12c84553636fe40229728f0d7faf3360276035
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:639b76dd84e3ff9b63a4b686b5478e8730dc63d794913f7a823faab8ec6c2e15
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -1968,6 +1968,34 @@
|
|
| 1968 |
"learning_rate": 4.181539730223243e-05,
|
| 1969 |
"loss": 0.3885,
|
| 1970 |
"step": 14000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1971 |
}
|
| 1972 |
],
|
| 1973 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.25092329168949123,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 14200,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 1968 |
"learning_rate": 4.181539730223243e-05,
|
| 1969 |
"loss": 0.3885,
|
| 1970 |
"step": 14000
|
| 1971 |
+
},
|
| 1972 |
+
{
|
| 1973 |
+
"epoch": 0.2482726935378417,
|
| 1974 |
+
"grad_norm": 2.5290989875793457,
|
| 1975 |
+
"learning_rate": 4.17663112838939e-05,
|
| 1976 |
+
"loss": 0.4296,
|
| 1977 |
+
"step": 14050
|
| 1978 |
+
},
|
| 1979 |
+
{
|
| 1980 |
+
"epoch": 0.24915622625505823,
|
| 1981 |
+
"grad_norm": 1.9654839038848877,
|
| 1982 |
+
"learning_rate": 4.171722526555536e-05,
|
| 1983 |
+
"loss": 0.3853,
|
| 1984 |
+
"step": 14100
|
| 1985 |
+
},
|
| 1986 |
+
{
|
| 1987 |
+
"epoch": 0.25003975897227476,
|
| 1988 |
+
"grad_norm": 1.68603515625,
|
| 1989 |
+
"learning_rate": 4.166813924721683e-05,
|
| 1990 |
+
"loss": 0.4068,
|
| 1991 |
+
"step": 14150
|
| 1992 |
+
},
|
| 1993 |
+
{
|
| 1994 |
+
"epoch": 0.25092329168949123,
|
| 1995 |
+
"grad_norm": 1.9062405824661255,
|
| 1996 |
+
"learning_rate": 4.161905322887829e-05,
|
| 1997 |
+
"loss": 0.4071,
|
| 1998 |
+
"step": 14200
|
| 1999 |
}
|
| 2000 |
],
|
| 2001 |
"logging_steps": 50,
|