Training in progress, step 580000
Browse files- config.json +1 -1
- last-checkpoint/config.json +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- last-checkpoint/training_args.bin +1 -1
- pytorch_model.bin +1 -1
- training_args.bin +1 -1
config.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
{
|
| 2 |
-
"_name_or_path": "/scratch/project/dd-23-53/experiments/pixel-base-bigrams/checkpoint-
|
| 3 |
"architectures": [
|
| 4 |
"PIXELForPreTraining"
|
| 5 |
],
|
|
|
|
| 1 |
{
|
| 2 |
+
"_name_or_path": "/scratch/project/dd-23-53/experiments/pixel-base-bigrams/checkpoint-570000",
|
| 3 |
"architectures": [
|
| 4 |
"PIXELForPreTraining"
|
| 5 |
],
|
last-checkpoint/config.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
{
|
| 2 |
-
"_name_or_path": "/scratch/project/dd-23-53/experiments/pixel-base-bigrams/checkpoint-
|
| 3 |
"architectures": [
|
| 4 |
"PIXELForPreTraining"
|
| 5 |
],
|
|
|
|
| 1 |
{
|
| 2 |
+
"_name_or_path": "/scratch/project/dd-23-53/experiments/pixel-base-bigrams/checkpoint-570000",
|
| 3 |
"architectures": [
|
| 4 |
"PIXELForPreTraining"
|
| 5 |
],
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:111490892a3e244f3ca9b9b0f1e04552db811c5c11d50e5dae81456c571735a9
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b5c26410b4d902c6d077c2953f9eace1e47991bc5dcd2f62ceb38a14b3e3ee84
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2c101353913b86ff575c9018e352c46e19aa450e36d2ee34d697b2d5ed877d1d
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2c101353913b86ff575c9018e352c46e19aa450e36d2ee34d697b2d5ed877d1d
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2c101353913b86ff575c9018e352c46e19aa450e36d2ee34d697b2d5ed877d1d
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2c101353913b86ff575c9018e352c46e19aa450e36d2ee34d697b2d5ed877d1d
|
| 3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2c101353913b86ff575c9018e352c46e19aa450e36d2ee34d697b2d5ed877d1d
|
| 3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2c101353913b86ff575c9018e352c46e19aa450e36d2ee34d697b2d5ed877d1d
|
| 3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2c101353913b86ff575c9018e352c46e19aa450e36d2ee34d697b2d5ed877d1d
|
| 3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2c101353913b86ff575c9018e352c46e19aa450e36d2ee34d697b2d5ed877d1d
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fe2528712b13528834db613d1453dd7fb04ea3b7a1940fa3521b7d0be4a96c0a
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 6.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -11406,11 +11406,211 @@
|
|
| 11406 |
"eval_samples_per_second": 887.536,
|
| 11407 |
"eval_steps_per_second": 13.91,
|
| 11408 |
"step": 570000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11409 |
}
|
| 11410 |
],
|
| 11411 |
"max_steps": 1000000,
|
| 11412 |
"num_train_epochs": 12,
|
| 11413 |
-
"total_flos":
|
| 11414 |
"trial_name": null,
|
| 11415 |
"trial_params": null
|
| 11416 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 6.111528724222923,
|
| 5 |
+
"global_step": 580000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 11406 |
"eval_samples_per_second": 887.536,
|
| 11407 |
"eval_steps_per_second": 13.91,
|
| 11408 |
"step": 570000
|
| 11409 |
+
},
|
| 11410 |
+
{
|
| 11411 |
+
"epoch": 6.01,
|
| 11412 |
+
"learning_rate": 6.950709521760712e-05,
|
| 11413 |
+
"loss": 0.2048,
|
| 11414 |
+
"step": 570500
|
| 11415 |
+
},
|
| 11416 |
+
{
|
| 11417 |
+
"epoch": 6.01,
|
| 11418 |
+
"learning_rate": 6.939267446224418e-05,
|
| 11419 |
+
"loss": 0.2046,
|
| 11420 |
+
"step": 571000
|
| 11421 |
+
},
|
| 11422 |
+
{
|
| 11423 |
+
"epoch": 6.01,
|
| 11424 |
+
"eval_loss": 0.19425606727600098,
|
| 11425 |
+
"eval_runtime": 2.5589,
|
| 11426 |
+
"eval_samples_per_second": 897.645,
|
| 11427 |
+
"eval_steps_per_second": 14.068,
|
| 11428 |
+
"step": 571000
|
| 11429 |
+
},
|
| 11430 |
+
{
|
| 11431 |
+
"epoch": 6.02,
|
| 11432 |
+
"learning_rate": 6.927828270690422e-05,
|
| 11433 |
+
"loss": 0.2047,
|
| 11434 |
+
"step": 571500
|
| 11435 |
+
},
|
| 11436 |
+
{
|
| 11437 |
+
"epoch": 6.02,
|
| 11438 |
+
"learning_rate": 6.91639202643299e-05,
|
| 11439 |
+
"loss": 0.2042,
|
| 11440 |
+
"step": 572000
|
| 11441 |
+
},
|
| 11442 |
+
{
|
| 11443 |
+
"epoch": 6.02,
|
| 11444 |
+
"eval_loss": 0.19538278877735138,
|
| 11445 |
+
"eval_runtime": 2.5656,
|
| 11446 |
+
"eval_samples_per_second": 895.308,
|
| 11447 |
+
"eval_steps_per_second": 14.032,
|
| 11448 |
+
"step": 572000
|
| 11449 |
+
},
|
| 11450 |
+
{
|
| 11451 |
+
"epoch": 6.03,
|
| 11452 |
+
"learning_rate": 6.904958744718383e-05,
|
| 11453 |
+
"loss": 0.2046,
|
| 11454 |
+
"step": 572500
|
| 11455 |
+
},
|
| 11456 |
+
{
|
| 11457 |
+
"epoch": 6.03,
|
| 11458 |
+
"learning_rate": 6.893528456804756e-05,
|
| 11459 |
+
"loss": 0.2042,
|
| 11460 |
+
"step": 573000
|
| 11461 |
+
},
|
| 11462 |
+
{
|
| 11463 |
+
"epoch": 6.03,
|
| 11464 |
+
"eval_loss": 0.19409753382205963,
|
| 11465 |
+
"eval_runtime": 2.5486,
|
| 11466 |
+
"eval_samples_per_second": 901.27,
|
| 11467 |
+
"eval_steps_per_second": 14.125,
|
| 11468 |
+
"step": 573000
|
| 11469 |
+
},
|
| 11470 |
+
{
|
| 11471 |
+
"epoch": 6.04,
|
| 11472 |
+
"learning_rate": 6.882101193942075e-05,
|
| 11473 |
+
"loss": 0.2042,
|
| 11474 |
+
"step": 573500
|
| 11475 |
+
},
|
| 11476 |
+
{
|
| 11477 |
+
"epoch": 6.04,
|
| 11478 |
+
"learning_rate": 6.870676987372044e-05,
|
| 11479 |
+
"loss": 0.2041,
|
| 11480 |
+
"step": 574000
|
| 11481 |
+
},
|
| 11482 |
+
{
|
| 11483 |
+
"epoch": 6.04,
|
| 11484 |
+
"eval_loss": 0.1939525604248047,
|
| 11485 |
+
"eval_runtime": 2.5437,
|
| 11486 |
+
"eval_samples_per_second": 903.03,
|
| 11487 |
+
"eval_steps_per_second": 14.153,
|
| 11488 |
+
"step": 574000
|
| 11489 |
+
},
|
| 11490 |
+
{
|
| 11491 |
+
"epoch": 6.05,
|
| 11492 |
+
"learning_rate": 6.859255868328003e-05,
|
| 11493 |
+
"loss": 0.2039,
|
| 11494 |
+
"step": 574500
|
| 11495 |
+
},
|
| 11496 |
+
{
|
| 11497 |
+
"epoch": 6.06,
|
| 11498 |
+
"learning_rate": 6.847837868034861e-05,
|
| 11499 |
+
"loss": 0.2042,
|
| 11500 |
+
"step": 575000
|
| 11501 |
+
},
|
| 11502 |
+
{
|
| 11503 |
+
"epoch": 6.06,
|
| 11504 |
+
"eval_loss": 0.1951504349708557,
|
| 11505 |
+
"eval_runtime": 2.5798,
|
| 11506 |
+
"eval_samples_per_second": 890.377,
|
| 11507 |
+
"eval_steps_per_second": 13.955,
|
| 11508 |
+
"step": 575000
|
| 11509 |
+
},
|
| 11510 |
+
{
|
| 11511 |
+
"epoch": 6.06,
|
| 11512 |
+
"learning_rate": 6.836423017708996e-05,
|
| 11513 |
+
"loss": 0.2038,
|
| 11514 |
+
"step": 575500
|
| 11515 |
+
},
|
| 11516 |
+
{
|
| 11517 |
+
"epoch": 6.07,
|
| 11518 |
+
"learning_rate": 6.825011348558167e-05,
|
| 11519 |
+
"loss": 0.204,
|
| 11520 |
+
"step": 576000
|
| 11521 |
+
},
|
| 11522 |
+
{
|
| 11523 |
+
"epoch": 6.07,
|
| 11524 |
+
"eval_loss": 0.19505272805690765,
|
| 11525 |
+
"eval_runtime": 2.5475,
|
| 11526 |
+
"eval_samples_per_second": 901.67,
|
| 11527 |
+
"eval_steps_per_second": 14.132,
|
| 11528 |
+
"step": 576000
|
| 11529 |
+
},
|
| 11530 |
+
{
|
| 11531 |
+
"epoch": 6.07,
|
| 11532 |
+
"learning_rate": 6.813602891781443e-05,
|
| 11533 |
+
"loss": 0.2039,
|
| 11534 |
+
"step": 576500
|
| 11535 |
+
},
|
| 11536 |
+
{
|
| 11537 |
+
"epoch": 6.08,
|
| 11538 |
+
"learning_rate": 6.802197678569109e-05,
|
| 11539 |
+
"loss": 0.2038,
|
| 11540 |
+
"step": 577000
|
| 11541 |
+
},
|
| 11542 |
+
{
|
| 11543 |
+
"epoch": 6.08,
|
| 11544 |
+
"eval_loss": 0.19440634548664093,
|
| 11545 |
+
"eval_runtime": 2.537,
|
| 11546 |
+
"eval_samples_per_second": 905.395,
|
| 11547 |
+
"eval_steps_per_second": 14.19,
|
| 11548 |
+
"step": 577000
|
| 11549 |
+
},
|
| 11550 |
+
{
|
| 11551 |
+
"epoch": 6.08,
|
| 11552 |
+
"learning_rate": 6.790795740102589e-05,
|
| 11553 |
+
"loss": 0.2038,
|
| 11554 |
+
"step": 577500
|
| 11555 |
+
},
|
| 11556 |
+
{
|
| 11557 |
+
"epoch": 6.09,
|
| 11558 |
+
"learning_rate": 6.779397107554339e-05,
|
| 11559 |
+
"loss": 0.2038,
|
| 11560 |
+
"step": 578000
|
| 11561 |
+
},
|
| 11562 |
+
{
|
| 11563 |
+
"epoch": 6.09,
|
| 11564 |
+
"eval_loss": 0.19268804788589478,
|
| 11565 |
+
"eval_runtime": 2.5143,
|
| 11566 |
+
"eval_samples_per_second": 913.559,
|
| 11567 |
+
"eval_steps_per_second": 14.318,
|
| 11568 |
+
"step": 578000
|
| 11569 |
+
},
|
| 11570 |
+
{
|
| 11571 |
+
"epoch": 6.09,
|
| 11572 |
+
"learning_rate": 6.768001812087789e-05,
|
| 11573 |
+
"loss": 0.2038,
|
| 11574 |
+
"step": 578500
|
| 11575 |
+
},
|
| 11576 |
+
{
|
| 11577 |
+
"epoch": 6.1,
|
| 11578 |
+
"learning_rate": 6.756609884857239e-05,
|
| 11579 |
+
"loss": 0.2037,
|
| 11580 |
+
"step": 579000
|
| 11581 |
+
},
|
| 11582 |
+
{
|
| 11583 |
+
"epoch": 6.1,
|
| 11584 |
+
"eval_loss": 0.19323283433914185,
|
| 11585 |
+
"eval_runtime": 2.5075,
|
| 11586 |
+
"eval_samples_per_second": 916.04,
|
| 11587 |
+
"eval_steps_per_second": 14.357,
|
| 11588 |
+
"step": 579000
|
| 11589 |
+
},
|
| 11590 |
+
{
|
| 11591 |
+
"epoch": 6.11,
|
| 11592 |
+
"learning_rate": 6.745221357007786e-05,
|
| 11593 |
+
"loss": 0.2037,
|
| 11594 |
+
"step": 579500
|
| 11595 |
+
},
|
| 11596 |
+
{
|
| 11597 |
+
"epoch": 6.11,
|
| 11598 |
+
"learning_rate": 6.733836259675233e-05,
|
| 11599 |
+
"loss": 0.2036,
|
| 11600 |
+
"step": 580000
|
| 11601 |
+
},
|
| 11602 |
+
{
|
| 11603 |
+
"epoch": 6.11,
|
| 11604 |
+
"eval_loss": 0.19253070652484894,
|
| 11605 |
+
"eval_runtime": 2.4507,
|
| 11606 |
+
"eval_samples_per_second": 937.277,
|
| 11607 |
+
"eval_steps_per_second": 14.69,
|
| 11608 |
+
"step": 580000
|
| 11609 |
}
|
| 11610 |
],
|
| 11611 |
"max_steps": 1000000,
|
| 11612 |
"num_train_epochs": 12,
|
| 11613 |
+
"total_flos": 4.065782281075041e+22,
|
| 11614 |
"trial_name": null,
|
| 11615 |
"trial_params": null
|
| 11616 |
}
|
last-checkpoint/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 3311
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4a6ae323073c615770ba7c18bfa53b7b7169d86c9cda5252cdcc294ff053a147
|
| 3 |
size 3311
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b5c26410b4d902c6d077c2953f9eace1e47991bc5dcd2f62ceb38a14b3e3ee84
|
| 3 |
size 449471589
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 3311
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4a6ae323073c615770ba7c18bfa53b7b7169d86c9cda5252cdcc294ff053a147
|
| 3 |
size 3311
|