Training in progress, step 65000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +103 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893438545
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:988099a377e3ae6ef89fd2f1f761be64fa6a19032354dab4bc5333d2740798f7
|
| 3 |
size 893438545
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:65f03d0e43e34a2e0bd81bc161f8b22fc6c59b8c9b7c6cda78db789b3b576cc9
|
| 3 |
size 449471589
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15523
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8d496ee2298f65cd4dad9053d5676d64850869fdb37f1b20e4f79d1c4026aca1
|
| 3 |
size 15523
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 559
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9941c77ea6765c024840da9e5a9b406fef84b8e5ef3a55221f4dca4db4e76a10
|
| 3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:de3c7b468f562a170fe98313b8778b6d4fc7ff5fb03b102a017eca4a43908ee2
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 5.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -1206,11 +1206,111 @@
|
|
| 1206 |
"eval_samples_per_second": 25.283,
|
| 1207 |
"eval_steps_per_second": 0.809,
|
| 1208 |
"step": 60000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1209 |
}
|
| 1210 |
],
|
| 1211 |
"max_steps": 1000000,
|
| 1212 |
"num_train_epochs": 86,
|
| 1213 |
-
"total_flos": 2.
|
| 1214 |
"trial_name": null,
|
| 1215 |
"trial_params": null
|
| 1216 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 5.529091527730521,
|
| 5 |
+
"global_step": 65000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 1206 |
"eval_samples_per_second": 25.283,
|
| 1207 |
"eval_steps_per_second": 0.809,
|
| 1208 |
"step": 60000
|
| 1209 |
+
},
|
| 1210 |
+
{
|
| 1211 |
+
"epoch": 5.15,
|
| 1212 |
+
"learning_rate": 9.999999999999999e-06,
|
| 1213 |
+
"loss": 0.4108,
|
| 1214 |
+
"step": 60500
|
| 1215 |
+
},
|
| 1216 |
+
{
|
| 1217 |
+
"epoch": 5.19,
|
| 1218 |
+
"learning_rate": 9.999999999999999e-06,
|
| 1219 |
+
"loss": 0.4094,
|
| 1220 |
+
"step": 61000
|
| 1221 |
+
},
|
| 1222 |
+
{
|
| 1223 |
+
"epoch": 5.19,
|
| 1224 |
+
"eval_loss": 0.3796501159667969,
|
| 1225 |
+
"eval_runtime": 18.1293,
|
| 1226 |
+
"eval_samples_per_second": 27.58,
|
| 1227 |
+
"eval_steps_per_second": 0.883,
|
| 1228 |
+
"step": 61000
|
| 1229 |
+
},
|
| 1230 |
+
{
|
| 1231 |
+
"epoch": 5.23,
|
| 1232 |
+
"learning_rate": 9.999999999999999e-06,
|
| 1233 |
+
"loss": 0.4092,
|
| 1234 |
+
"step": 61500
|
| 1235 |
+
},
|
| 1236 |
+
{
|
| 1237 |
+
"epoch": 5.27,
|
| 1238 |
+
"learning_rate": 9.999999999999999e-06,
|
| 1239 |
+
"loss": 0.4091,
|
| 1240 |
+
"step": 62000
|
| 1241 |
+
},
|
| 1242 |
+
{
|
| 1243 |
+
"epoch": 5.27,
|
| 1244 |
+
"eval_loss": 0.3790924549102783,
|
| 1245 |
+
"eval_runtime": 20.9048,
|
| 1246 |
+
"eval_samples_per_second": 23.918,
|
| 1247 |
+
"eval_steps_per_second": 0.765,
|
| 1248 |
+
"step": 62000
|
| 1249 |
+
},
|
| 1250 |
+
{
|
| 1251 |
+
"epoch": 5.32,
|
| 1252 |
+
"learning_rate": 9.999999999999999e-06,
|
| 1253 |
+
"loss": 0.408,
|
| 1254 |
+
"step": 62500
|
| 1255 |
+
},
|
| 1256 |
+
{
|
| 1257 |
+
"epoch": 5.36,
|
| 1258 |
+
"learning_rate": 9.999999999999999e-06,
|
| 1259 |
+
"loss": 0.4102,
|
| 1260 |
+
"step": 63000
|
| 1261 |
+
},
|
| 1262 |
+
{
|
| 1263 |
+
"epoch": 5.36,
|
| 1264 |
+
"eval_loss": 0.3805426061153412,
|
| 1265 |
+
"eval_runtime": 27.4404,
|
| 1266 |
+
"eval_samples_per_second": 18.221,
|
| 1267 |
+
"eval_steps_per_second": 0.583,
|
| 1268 |
+
"step": 63000
|
| 1269 |
+
},
|
| 1270 |
+
{
|
| 1271 |
+
"epoch": 5.4,
|
| 1272 |
+
"learning_rate": 9.999999999999999e-06,
|
| 1273 |
+
"loss": 0.4086,
|
| 1274 |
+
"step": 63500
|
| 1275 |
+
},
|
| 1276 |
+
{
|
| 1277 |
+
"epoch": 5.44,
|
| 1278 |
+
"learning_rate": 9.999999999999999e-06,
|
| 1279 |
+
"loss": 0.4087,
|
| 1280 |
+
"step": 64000
|
| 1281 |
+
},
|
| 1282 |
+
{
|
| 1283 |
+
"epoch": 5.44,
|
| 1284 |
+
"eval_loss": 0.37830984592437744,
|
| 1285 |
+
"eval_runtime": 14.8851,
|
| 1286 |
+
"eval_samples_per_second": 33.591,
|
| 1287 |
+
"eval_steps_per_second": 1.075,
|
| 1288 |
+
"step": 64000
|
| 1289 |
+
},
|
| 1290 |
+
{
|
| 1291 |
+
"epoch": 5.49,
|
| 1292 |
+
"learning_rate": 9.999999999999999e-06,
|
| 1293 |
+
"loss": 0.4081,
|
| 1294 |
+
"step": 64500
|
| 1295 |
+
},
|
| 1296 |
+
{
|
| 1297 |
+
"epoch": 5.53,
|
| 1298 |
+
"learning_rate": 9.999999999999999e-06,
|
| 1299 |
+
"loss": 0.4083,
|
| 1300 |
+
"step": 65000
|
| 1301 |
+
},
|
| 1302 |
+
{
|
| 1303 |
+
"epoch": 5.53,
|
| 1304 |
+
"eval_loss": 0.3796636164188385,
|
| 1305 |
+
"eval_runtime": 17.3567,
|
| 1306 |
+
"eval_samples_per_second": 28.807,
|
| 1307 |
+
"eval_steps_per_second": 0.922,
|
| 1308 |
+
"step": 65000
|
| 1309 |
}
|
| 1310 |
],
|
| 1311 |
"max_steps": 1000000,
|
| 1312 |
"num_train_epochs": 86,
|
| 1313 |
+
"total_flos": 2.990133517637586e+21,
|
| 1314 |
"trial_name": null,
|
| 1315 |
"trial_params": null
|
| 1316 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:65f03d0e43e34a2e0bd81bc161f8b22fc6c59b8c9b7c6cda78db789b3b576cc9
|
| 3 |
size 449471589
|