Training in progress, step 190000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +2 -2
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a79698e8ce41155df8f2ac96cd99d16ef6b016cb4ff277c5445301a3db1fa101
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:16799df5e196079b65225695fe6105fc300cf5157671e1537af93401d1b8d74a
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8d372c7ec90718e7e982db780f8d7b62ea26c5068bb213efc91ce4e4dbff2188
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:200bd99b77ad19fc80e27fc902aca5e36eebe6570a18efbc020b994d75bc6fba
|
| 3 |
+
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c0f78dbe211932d0c37e15c11f906dcb55aae5aedb42250507005f499cf4150e
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fb7cd6974be7a03bcf0342d3707956fc7f1e60ef89c3b2bd3445ff8c5a29e4db
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4347385b2c0f1e2d9aeca7fb88e1c942d1dae99e52563bdfa6c48a7a4a2bf97f
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 2.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -1338,11 +1338,85 @@
|
|
| 1338 |
"eval_samples_per_second": 983.42,
|
| 1339 |
"eval_steps_per_second": 15.735,
|
| 1340 |
"step": 180000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1341 |
}
|
| 1342 |
],
|
| 1343 |
"max_steps": 1000000,
|
| 1344 |
"num_train_epochs": 16,
|
| 1345 |
-
"total_flos": 1.
|
| 1346 |
"trial_name": null,
|
| 1347 |
"trial_params": null
|
| 1348 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 2.9013391970925526,
|
| 5 |
+
"global_step": 190000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 1338 |
"eval_samples_per_second": 983.42,
|
| 1339 |
"eval_steps_per_second": 15.735,
|
| 1340 |
"step": 180000
|
| 1341 |
+
},
|
| 1342 |
+
{
|
| 1343 |
+
"epoch": 2.76,
|
| 1344 |
+
"learning_rate": 0.00014353363063128005,
|
| 1345 |
+
"loss": 0.3183,
|
| 1346 |
+
"step": 181000
|
| 1347 |
+
},
|
| 1348 |
+
{
|
| 1349 |
+
"epoch": 2.78,
|
| 1350 |
+
"learning_rate": 0.0001434361090155131,
|
| 1351 |
+
"loss": 0.3177,
|
| 1352 |
+
"step": 182000
|
| 1353 |
+
},
|
| 1354 |
+
{
|
| 1355 |
+
"epoch": 2.79,
|
| 1356 |
+
"learning_rate": 0.00014333789367262136,
|
| 1357 |
+
"loss": 0.3178,
|
| 1358 |
+
"step": 183000
|
| 1359 |
+
},
|
| 1360 |
+
{
|
| 1361 |
+
"epoch": 2.81,
|
| 1362 |
+
"learning_rate": 0.00014323898567667202,
|
| 1363 |
+
"loss": 0.3177,
|
| 1364 |
+
"step": 184000
|
| 1365 |
+
},
|
| 1366 |
+
{
|
| 1367 |
+
"epoch": 2.82,
|
| 1368 |
+
"learning_rate": 0.00014313938610930712,
|
| 1369 |
+
"loss": 0.3171,
|
| 1370 |
+
"step": 185000
|
| 1371 |
+
},
|
| 1372 |
+
{
|
| 1373 |
+
"epoch": 2.82,
|
| 1374 |
+
"eval_runtime": 1.0441,
|
| 1375 |
+
"eval_samples_per_second": 957.721,
|
| 1376 |
+
"eval_steps_per_second": 15.324,
|
| 1377 |
+
"step": 185000
|
| 1378 |
+
},
|
| 1379 |
+
{
|
| 1380 |
+
"epoch": 2.84,
|
| 1381 |
+
"learning_rate": 0.00014303909605973154,
|
| 1382 |
+
"loss": 0.3167,
|
| 1383 |
+
"step": 186000
|
| 1384 |
+
},
|
| 1385 |
+
{
|
| 1386 |
+
"epoch": 2.86,
|
| 1387 |
+
"learning_rate": 0.0001429381166247012,
|
| 1388 |
+
"loss": 0.3168,
|
| 1389 |
+
"step": 187000
|
| 1390 |
+
},
|
| 1391 |
+
{
|
| 1392 |
+
"epoch": 2.87,
|
| 1393 |
+
"learning_rate": 0.00014283644890851103,
|
| 1394 |
+
"loss": 0.3164,
|
| 1395 |
+
"step": 188000
|
| 1396 |
+
},
|
| 1397 |
+
{
|
| 1398 |
+
"epoch": 2.89,
|
| 1399 |
+
"learning_rate": 0.00014273409402298291,
|
| 1400 |
+
"loss": 0.3161,
|
| 1401 |
+
"step": 189000
|
| 1402 |
+
},
|
| 1403 |
+
{
|
| 1404 |
+
"epoch": 2.9,
|
| 1405 |
+
"learning_rate": 0.00014263105308745343,
|
| 1406 |
+
"loss": 0.3155,
|
| 1407 |
+
"step": 190000
|
| 1408 |
+
},
|
| 1409 |
+
{
|
| 1410 |
+
"epoch": 2.9,
|
| 1411 |
+
"eval_runtime": 1.0119,
|
| 1412 |
+
"eval_samples_per_second": 988.212,
|
| 1413 |
+
"eval_steps_per_second": 15.811,
|
| 1414 |
+
"step": 190000
|
| 1415 |
}
|
| 1416 |
],
|
| 1417 |
"max_steps": 1000000,
|
| 1418 |
"num_train_epochs": 16,
|
| 1419 |
+
"total_flos": 1.3319036454402982e+22,
|
| 1420 |
"trial_name": null,
|
| 1421 |
"trial_params": null
|
| 1422 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:16799df5e196079b65225695fe6105fc300cf5157671e1537af93401d1b8d74a
|
| 3 |
size 449471589
|