Training in progress, step 4800, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 8137792
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d38acfef07e8b9c25b0f953b52d66883913113feb31b61889fc3ec1af5ed15a0
|
| 3 |
size 8137792
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16386426
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:051fd82246e33283a777d0c7f3efc8b17be972592e2fcbb972183e1c283ec081
|
| 3 |
size 16386426
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15006
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7cfb8b1ca968b0128dd6a5e5fd90cc093f91f1d135c8f1707272ea159a70d310
|
| 3 |
size 15006
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:48003e3a01e3b2d0428d595eea3b48211cbf545a4cb199328e02370517381aad
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 9.
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -408,6 +408,13 @@
|
|
| 408 |
"learning_rate": 6.866666666666666e-05,
|
| 409 |
"loss": 1.2603,
|
| 410 |
"step": 4700
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 411 |
}
|
| 412 |
],
|
| 413 |
"logging_steps": 100,
|
|
@@ -427,7 +434,7 @@
|
|
| 427 |
"attributes": {}
|
| 428 |
}
|
| 429 |
},
|
| 430 |
-
"total_flos": 7.
|
| 431 |
"train_batch_size": 8,
|
| 432 |
"trial_name": null,
|
| 433 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 9.598500234338385,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 4800,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 408 |
"learning_rate": 6.866666666666666e-05,
|
| 409 |
"loss": 1.2603,
|
| 410 |
"step": 4700
|
| 411 |
+
},
|
| 412 |
+
{
|
| 413 |
+
"epoch": 9.598500234338385,
|
| 414 |
+
"grad_norm": 0.18435333669185638,
|
| 415 |
+
"learning_rate": 6.800000000000001e-05,
|
| 416 |
+
"loss": 1.2519,
|
| 417 |
+
"step": 4800
|
| 418 |
}
|
| 419 |
],
|
| 420 |
"logging_steps": 100,
|
|
|
|
| 434 |
"attributes": {}
|
| 435 |
}
|
| 436 |
},
|
| 437 |
+
"total_flos": 7.491516205199524e+18,
|
| 438 |
"train_batch_size": 8,
|
| 439 |
"trial_name": null,
|
| 440 |
"trial_params": null
|