Training in progress, step 12300, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 8137792
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:29956a42858cc1a71f6dee08eba9d6e063dd6c0c7c16732af2ead7faaa6180fd
|
| 3 |
size 8137792
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16386426
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9b071d214a59097a7835acd1070421e3cc4395f35771d49f259461dfbfd7da5c
|
| 3 |
size 16386426
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15006
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2dfb58711cfeb2781bd70eb8fee55449848e381fcf989517924a67b3347d62ca
|
| 3 |
size 15006
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:307cb858f017d4acb2a340daefe90cd63c5a85459a0131a558c685415867466b
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 24.
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -1053,6 +1053,13 @@
|
|
| 1053 |
"learning_rate": 1.866666666666667e-05,
|
| 1054 |
"loss": 1.2256,
|
| 1055 |
"step": 12200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1056 |
}
|
| 1057 |
],
|
| 1058 |
"logging_steps": 100,
|
|
@@ -1072,7 +1079,7 @@
|
|
| 1072 |
"attributes": {}
|
| 1073 |
}
|
| 1074 |
},
|
| 1075 |
-
"total_flos": 1.
|
| 1076 |
"train_batch_size": 8,
|
| 1077 |
"trial_name": null,
|
| 1078 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 24.59615685049211,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 12300,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 1053 |
"learning_rate": 1.866666666666667e-05,
|
| 1054 |
"loss": 1.2256,
|
| 1055 |
"step": 12200
|
| 1056 |
+
},
|
| 1057 |
+
{
|
| 1058 |
+
"epoch": 24.59615685049211,
|
| 1059 |
+
"grad_norm": 0.24531888961791992,
|
| 1060 |
+
"learning_rate": 1.8e-05,
|
| 1061 |
+
"loss": 1.2227,
|
| 1062 |
+
"step": 12300
|
| 1063 |
}
|
| 1064 |
],
|
| 1065 |
"logging_steps": 100,
|
|
|
|
| 1079 |
"attributes": {}
|
| 1080 |
}
|
| 1081 |
},
|
| 1082 |
+
"total_flos": 1.9197001691257897e+19,
|
| 1083 |
"train_batch_size": 8,
|
| 1084 |
"trial_name": null,
|
| 1085 |
"trial_params": null
|