Training in progress, step 8600, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
|
@@ -1193,6 +1193,10 @@ You can finetune this model on your own dataset.
|
|
| 1193 |
| 0.1467 | 8300 | 0.3967 |
|
| 1194 |
| 0.1475 | 8350 | 0.4911 |
|
| 1195 |
| 0.1484 | 8400 | 0.4076 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1196 |
|
| 1197 |
|
| 1198 |
### Framework Versions
|
|
|
|
| 1193 |
| 0.1467 | 8300 | 0.3967 |
|
| 1194 |
| 0.1475 | 8350 | 0.4911 |
|
| 1195 |
| 0.1484 | 8400 | 0.4076 |
|
| 1196 |
+
| 0.1493 | 8450 | 0.398 |
|
| 1197 |
+
| 0.1502 | 8500 | 0.4203 |
|
| 1198 |
+
| 0.1511 | 8550 | 0.414 |
|
| 1199 |
+
| 0.1520 | 8600 | 0.3436 |
|
| 1200 |
|
| 1201 |
|
| 1202 |
### Framework Versions
|
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 90864192
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:03eb7a5af20bd02ca3c82b2b9c70071b68c323ac87a7799ea87cd5139a94eb91
|
| 3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 180609210
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:40262b6becfc7b5455f773e7a9a3bb594892eb51a5604b32e91c90609a89bdfb
|
| 3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:66f5a47fc1fb0afabfab2902b2aa5af7ab84c4d2097ab2d93b83ccc74c03acb1
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b161abd57fc77c436995db62a6c89b28c6220f59213ab301dd9a979ea4c14232
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f6c145ff6cd9415e52757f69b1d04fadde604f73651764ac12eda7fa02fc8ae0
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -1184,6 +1184,34 @@
|
|
| 1184 |
"learning_rate": 4.731106791541497e-05,
|
| 1185 |
"loss": 0.4076,
|
| 1186 |
"step": 8400
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1187 |
}
|
| 1188 |
],
|
| 1189 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.1519676273612412,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 8600,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 1184 |
"learning_rate": 4.731106791541497e-05,
|
| 1185 |
"loss": 0.4076,
|
| 1186 |
"step": 8400
|
| 1187 |
+
},
|
| 1188 |
+
{
|
| 1189 |
+
"epoch": 0.14931702920959164,
|
| 1190 |
+
"grad_norm": 2.1234054565429688,
|
| 1191 |
+
"learning_rate": 4.7261981897076444e-05,
|
| 1192 |
+
"loss": 0.398,
|
| 1193 |
+
"step": 8450
|
| 1194 |
+
},
|
| 1195 |
+
{
|
| 1196 |
+
"epoch": 0.15020056192680814,
|
| 1197 |
+
"grad_norm": 2.1532113552093506,
|
| 1198 |
+
"learning_rate": 4.72128958787379e-05,
|
| 1199 |
+
"loss": 0.4203,
|
| 1200 |
+
"step": 8500
|
| 1201 |
+
},
|
| 1202 |
+
{
|
| 1203 |
+
"epoch": 0.15108409464402467,
|
| 1204 |
+
"grad_norm": 1.8909550905227661,
|
| 1205 |
+
"learning_rate": 4.7163809860399366e-05,
|
| 1206 |
+
"loss": 0.414,
|
| 1207 |
+
"step": 8550
|
| 1208 |
+
},
|
| 1209 |
+
{
|
| 1210 |
+
"epoch": 0.1519676273612412,
|
| 1211 |
+
"grad_norm": 1.9415462017059326,
|
| 1212 |
+
"learning_rate": 4.711472384206083e-05,
|
| 1213 |
+
"loss": 0.3436,
|
| 1214 |
+
"step": 8600
|
| 1215 |
}
|
| 1216 |
],
|
| 1217 |
"logging_steps": 50,
|