Training in progress, step 8000, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 891558696
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:14e55e875213d92682e84555aa6b33ea2bd487aa3e64808e8e018ff13e39def4
|
| 3 |
size 891558696
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1783272762
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3f06185fe3645dcc7fe9ce829eede891f7480be5faf7d32fbc087ca425886173
|
| 3 |
size 1783272762
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:089eaed71453cd0e3401835315e75796803c6c4fdbddff74a2269b34ba454a8b
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c911fb82d73d273c8bd13fe16df7396949ad9b406bf6a976c6d2d8dce418f3d4
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
-
"best_metric": 0.
|
| 3 |
-
"best_model_checkpoint": "./fine-tuned/checkpoint-
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -1177,6 +1177,84 @@
|
|
| 1177 |
"eval_samples_per_second": 22.765,
|
| 1178 |
"eval_steps_per_second": 5.691,
|
| 1179 |
"step": 7500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1180 |
}
|
| 1181 |
],
|
| 1182 |
"logging_steps": 50,
|
|
@@ -1196,7 +1274,7 @@
|
|
| 1196 |
"attributes": {}
|
| 1197 |
}
|
| 1198 |
},
|
| 1199 |
-
"total_flos": 1.
|
| 1200 |
"train_batch_size": 4,
|
| 1201 |
"trial_name": null,
|
| 1202 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_metric": 0.08401757478713989,
|
| 3 |
+
"best_model_checkpoint": "./fine-tuned/checkpoint-8000",
|
| 4 |
+
"epoch": 0.64,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 8000,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 1177 |
"eval_samples_per_second": 22.765,
|
| 1178 |
"eval_steps_per_second": 5.691,
|
| 1179 |
"step": 7500
|
| 1180 |
+
},
|
| 1181 |
+
{
|
| 1182 |
+
"epoch": 0.604,
|
| 1183 |
+
"grad_norm": 0.14278633892536163,
|
| 1184 |
+
"learning_rate": 2.09424e-05,
|
| 1185 |
+
"loss": 0.0665,
|
| 1186 |
+
"step": 7550
|
| 1187 |
+
},
|
| 1188 |
+
{
|
| 1189 |
+
"epoch": 0.608,
|
| 1190 |
+
"grad_norm": 0.19127364456653595,
|
| 1191 |
+
"learning_rate": 2.0882400000000002e-05,
|
| 1192 |
+
"loss": 0.0652,
|
| 1193 |
+
"step": 7600
|
| 1194 |
+
},
|
| 1195 |
+
{
|
| 1196 |
+
"epoch": 0.612,
|
| 1197 |
+
"grad_norm": 0.18026478588581085,
|
| 1198 |
+
"learning_rate": 2.0822400000000002e-05,
|
| 1199 |
+
"loss": 0.0673,
|
| 1200 |
+
"step": 7650
|
| 1201 |
+
},
|
| 1202 |
+
{
|
| 1203 |
+
"epoch": 0.616,
|
| 1204 |
+
"grad_norm": 0.1525663435459137,
|
| 1205 |
+
"learning_rate": 2.0762400000000003e-05,
|
| 1206 |
+
"loss": 0.0596,
|
| 1207 |
+
"step": 7700
|
| 1208 |
+
},
|
| 1209 |
+
{
|
| 1210 |
+
"epoch": 0.62,
|
| 1211 |
+
"grad_norm": 0.1647537797689438,
|
| 1212 |
+
"learning_rate": 2.07024e-05,
|
| 1213 |
+
"loss": 0.0638,
|
| 1214 |
+
"step": 7750
|
| 1215 |
+
},
|
| 1216 |
+
{
|
| 1217 |
+
"epoch": 0.624,
|
| 1218 |
+
"grad_norm": 0.14861586689949036,
|
| 1219 |
+
"learning_rate": 2.06424e-05,
|
| 1220 |
+
"loss": 0.0616,
|
| 1221 |
+
"step": 7800
|
| 1222 |
+
},
|
| 1223 |
+
{
|
| 1224 |
+
"epoch": 0.628,
|
| 1225 |
+
"grad_norm": 0.13468614220619202,
|
| 1226 |
+
"learning_rate": 2.05824e-05,
|
| 1227 |
+
"loss": 0.0645,
|
| 1228 |
+
"step": 7850
|
| 1229 |
+
},
|
| 1230 |
+
{
|
| 1231 |
+
"epoch": 0.632,
|
| 1232 |
+
"grad_norm": 0.22103475034236908,
|
| 1233 |
+
"learning_rate": 2.0522400000000002e-05,
|
| 1234 |
+
"loss": 0.0618,
|
| 1235 |
+
"step": 7900
|
| 1236 |
+
},
|
| 1237 |
+
{
|
| 1238 |
+
"epoch": 0.636,
|
| 1239 |
+
"grad_norm": 0.21467621624469757,
|
| 1240 |
+
"learning_rate": 2.0462400000000002e-05,
|
| 1241 |
+
"loss": 0.0609,
|
| 1242 |
+
"step": 7950
|
| 1243 |
+
},
|
| 1244 |
+
{
|
| 1245 |
+
"epoch": 0.64,
|
| 1246 |
+
"grad_norm": 0.09938893467187881,
|
| 1247 |
+
"learning_rate": 2.04024e-05,
|
| 1248 |
+
"loss": 0.0671,
|
| 1249 |
+
"step": 8000
|
| 1250 |
+
},
|
| 1251 |
+
{
|
| 1252 |
+
"epoch": 0.64,
|
| 1253 |
+
"eval_loss": 0.08401757478713989,
|
| 1254 |
+
"eval_runtime": 88.0793,
|
| 1255 |
+
"eval_samples_per_second": 22.707,
|
| 1256 |
+
"eval_steps_per_second": 5.677,
|
| 1257 |
+
"step": 8000
|
| 1258 |
}
|
| 1259 |
],
|
| 1260 |
"logging_steps": 50,
|
|
|
|
| 1274 |
"attributes": {}
|
| 1275 |
}
|
| 1276 |
},
|
| 1277 |
+
"total_flos": 1.948665249792e+16,
|
| 1278 |
"train_batch_size": 4,
|
| 1279 |
"trial_name": null,
|
| 1280 |
"trial_params": null
|