Training in progress, step 21200, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
|
@@ -1447,6 +1447,10 @@ You can finetune this model on your own dataset.
|
|
| 1447 |
| 0.3693 | 20900 | 0.4061 |
|
| 1448 |
| 0.3702 | 20950 | 0.3412 |
|
| 1449 |
| 0.3711 | 21000 | 0.3619 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1450 |
|
| 1451 |
</details>
|
| 1452 |
|
|
|
|
| 1447 |
| 0.3693 | 20900 | 0.4061 |
|
| 1448 |
| 0.3702 | 20950 | 0.3412 |
|
| 1449 |
| 0.3711 | 21000 | 0.3619 |
|
| 1450 |
+
| 0.3720 | 21050 | 0.314 |
|
| 1451 |
+
| 0.3729 | 21100 | 0.2912 |
|
| 1452 |
+
| 0.3737 | 21150 | 0.3402 |
|
| 1453 |
+
| 0.3746 | 21200 | 0.3462 |
|
| 1454 |
|
| 1455 |
</details>
|
| 1456 |
|
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 90864192
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4eb970c7628ca165b5a062a28ad2708c90ea109132db4b038f4972c1098f4cd3
|
| 3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 180609210
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:07ae8bcd2a7dafcb1ceb8e1ef8877981025e7fe727faad70bf73cbc035f46114
|
| 3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:741bdb1704d229c73eec48b6812d828a33d2944f4e04e42ed9af5864cb8f4858
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eb6b2b89ac841094a59187934ba361ecf73eba3e326ef63976f096359b2648c3
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1bb71367724378f66d77db9556efcf84bf028b8330a0ff63dbe88a7a3d8a40e3
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -2948,6 +2948,34 @@
|
|
| 2948 |
"learning_rate": 3.494728161630441e-05,
|
| 2949 |
"loss": 0.3619,
|
| 2950 |
"step": 21000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2951 |
}
|
| 2952 |
],
|
| 2953 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.37461787209980385,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 21200,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 2948 |
"learning_rate": 3.494728161630441e-05,
|
| 2949 |
"loss": 0.3619,
|
| 2950 |
"step": 21000
|
| 2951 |
+
},
|
| 2952 |
+
{
|
| 2953 |
+
"epoch": 0.3719672739481543,
|
| 2954 |
+
"grad_norm": 1.3728086948394775,
|
| 2955 |
+
"learning_rate": 3.4898195597965874e-05,
|
| 2956 |
+
"loss": 0.314,
|
| 2957 |
+
"step": 21050
|
| 2958 |
+
},
|
| 2959 |
+
{
|
| 2960 |
+
"epoch": 0.3728508066653708,
|
| 2961 |
+
"grad_norm": 1.6389710903167725,
|
| 2962 |
+
"learning_rate": 3.4849109579627346e-05,
|
| 2963 |
+
"loss": 0.2912,
|
| 2964 |
+
"step": 21100
|
| 2965 |
+
},
|
| 2966 |
+
{
|
| 2967 |
+
"epoch": 0.3737343393825873,
|
| 2968 |
+
"grad_norm": 3.552582025527954,
|
| 2969 |
+
"learning_rate": 3.4800023561288803e-05,
|
| 2970 |
+
"loss": 0.3402,
|
| 2971 |
+
"step": 21150
|
| 2972 |
+
},
|
| 2973 |
+
{
|
| 2974 |
+
"epoch": 0.37461787209980385,
|
| 2975 |
+
"grad_norm": 1.6479156017303467,
|
| 2976 |
+
"learning_rate": 3.475093754295027e-05,
|
| 2977 |
+
"loss": 0.3462,
|
| 2978 |
+
"step": 21200
|
| 2979 |
}
|
| 2980 |
],
|
| 2981 |
"logging_steps": 50,
|