Training in progress, step 280000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +2 -2
- last-checkpoint/rng_state_6.pth +2 -2
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 202194449
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:685baa1115c9996a6d3f73f4d770cb44301323ea5ee9d7eb0866dbaffb748894
|
| 3 |
size 202194449
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 102501541
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:866ffe9cab4eb4a4754c64cb5d7ed4f31ed0ad83e08ecaffbb7f5035db70963f
|
| 3 |
size 102501541
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:69c627f38132dd40cef0053712aa1270dae63d12ac97e7fc6aaa3050230ece5f
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3ff5c53b911e025186f3d4cefd2ae203cd3c867a78b103c8c95c51e8d20b5b99
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ba38210b26270fe6dc17e8687ed18c17cb3c73130918a06af7f287737355cbb7
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:088007f9c38eed502aa04c5f433e4da63387bd973584b94625ca26c1cb52eacf
|
| 3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14439
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2014d323a6a226b752e2eff97f028102e8f8d7d7ca11f19c3f47a58b4e4654ab
|
| 3 |
size 14439
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:539ae7fa3d9912913294dc2719a2dd73be6f83eb9b37e29816e50a7b5d35e566
|
| 3 |
+
size 14439
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:75cd206de91cc2704e6a99c57b9e488ddcdbbc4b410b702c69de32467cde6e75
|
| 3 |
+
size 14439
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c28314a75be0e567739244bb3713c7239bcefe683fa1af29178e1fe22d80967d
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:80db35d9a6af0da10cd7cebe3bf01f3932293e48fc6a8607555efe3b5591e844
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 8.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -5406,11 +5406,211 @@
|
|
| 5406 |
"eval_samples_per_second": 1913.356,
|
| 5407 |
"eval_steps_per_second": 30.614,
|
| 5408 |
"step": 270000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5409 |
}
|
| 5410 |
],
|
| 5411 |
"max_steps": 500000,
|
| 5412 |
"num_train_epochs": 16,
|
| 5413 |
-
"total_flos": 8.
|
| 5414 |
"trial_name": null,
|
| 5415 |
"trial_params": null
|
| 5416 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 8.578694200189958,
|
| 5 |
+
"global_step": 280000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 5406 |
"eval_samples_per_second": 1913.356,
|
| 5407 |
"eval_steps_per_second": 30.614,
|
| 5408 |
"step": 270000
|
| 5409 |
+
},
|
| 5410 |
+
{
|
| 5411 |
+
"epoch": 8.29,
|
| 5412 |
+
"learning_rate": 0.00014733147931802578,
|
| 5413 |
+
"loss": 0.7368,
|
| 5414 |
+
"step": 270500
|
| 5415 |
+
},
|
| 5416 |
+
{
|
| 5417 |
+
"epoch": 8.3,
|
| 5418 |
+
"learning_rate": 0.00014685268692538238,
|
| 5419 |
+
"loss": 0.7368,
|
| 5420 |
+
"step": 271000
|
| 5421 |
+
},
|
| 5422 |
+
{
|
| 5423 |
+
"epoch": 8.3,
|
| 5424 |
+
"eval_loss": 0.9023635387420654,
|
| 5425 |
+
"eval_runtime": 0.5188,
|
| 5426 |
+
"eval_samples_per_second": 1927.356,
|
| 5427 |
+
"eval_steps_per_second": 30.838,
|
| 5428 |
+
"step": 271000
|
| 5429 |
+
},
|
| 5430 |
+
{
|
| 5431 |
+
"epoch": 8.32,
|
| 5432 |
+
"learning_rate": 0.00014637398363044946,
|
| 5433 |
+
"loss": 0.7143,
|
| 5434 |
+
"step": 271500
|
| 5435 |
+
},
|
| 5436 |
+
{
|
| 5437 |
+
"epoch": 8.33,
|
| 5438 |
+
"learning_rate": 0.00014589537466824955,
|
| 5439 |
+
"loss": 0.3428,
|
| 5440 |
+
"step": 272000
|
| 5441 |
+
},
|
| 5442 |
+
{
|
| 5443 |
+
"epoch": 8.33,
|
| 5444 |
+
"eval_loss": 0.7834916710853577,
|
| 5445 |
+
"eval_runtime": 0.5348,
|
| 5446 |
+
"eval_samples_per_second": 1869.839,
|
| 5447 |
+
"eval_steps_per_second": 29.917,
|
| 5448 |
+
"step": 272000
|
| 5449 |
+
},
|
| 5450 |
+
{
|
| 5451 |
+
"epoch": 8.35,
|
| 5452 |
+
"learning_rate": 0.00014541686527277356,
|
| 5453 |
+
"loss": 0.3388,
|
| 5454 |
+
"step": 272500
|
| 5455 |
+
},
|
| 5456 |
+
{
|
| 5457 |
+
"epoch": 8.36,
|
| 5458 |
+
"learning_rate": 0.00014493846067692358,
|
| 5459 |
+
"loss": 0.3376,
|
| 5460 |
+
"step": 273000
|
| 5461 |
+
},
|
| 5462 |
+
{
|
| 5463 |
+
"epoch": 8.36,
|
| 5464 |
+
"eval_loss": 0.7843596935272217,
|
| 5465 |
+
"eval_runtime": 0.5178,
|
| 5466 |
+
"eval_samples_per_second": 1931.2,
|
| 5467 |
+
"eval_steps_per_second": 30.899,
|
| 5468 |
+
"step": 273000
|
| 5469 |
+
},
|
| 5470 |
+
{
|
| 5471 |
+
"epoch": 8.38,
|
| 5472 |
+
"learning_rate": 0.00014446016611245567,
|
| 5473 |
+
"loss": 0.3362,
|
| 5474 |
+
"step": 273500
|
| 5475 |
+
},
|
| 5476 |
+
{
|
| 5477 |
+
"epoch": 8.39,
|
| 5478 |
+
"learning_rate": 0.00014398198680992252,
|
| 5479 |
+
"loss": 0.3369,
|
| 5480 |
+
"step": 274000
|
| 5481 |
+
},
|
| 5482 |
+
{
|
| 5483 |
+
"epoch": 8.39,
|
| 5484 |
+
"eval_loss": 0.7844694256782532,
|
| 5485 |
+
"eval_runtime": 0.5316,
|
| 5486 |
+
"eval_samples_per_second": 1881.272,
|
| 5487 |
+
"eval_steps_per_second": 30.1,
|
| 5488 |
+
"step": 274000
|
| 5489 |
+
},
|
| 5490 |
+
{
|
| 5491 |
+
"epoch": 8.41,
|
| 5492 |
+
"learning_rate": 0.00014350392799861636,
|
| 5493 |
+
"loss": 0.336,
|
| 5494 |
+
"step": 274500
|
| 5495 |
+
},
|
| 5496 |
+
{
|
| 5497 |
+
"epoch": 8.43,
|
| 5498 |
+
"learning_rate": 0.0001430259949065118,
|
| 5499 |
+
"loss": 0.3356,
|
| 5500 |
+
"step": 275000
|
| 5501 |
+
},
|
| 5502 |
+
{
|
| 5503 |
+
"epoch": 8.43,
|
| 5504 |
+
"eval_loss": 0.7838680148124695,
|
| 5505 |
+
"eval_runtime": 0.52,
|
| 5506 |
+
"eval_samples_per_second": 1923.025,
|
| 5507 |
+
"eval_steps_per_second": 30.768,
|
| 5508 |
+
"step": 275000
|
| 5509 |
+
},
|
| 5510 |
+
{
|
| 5511 |
+
"epoch": 8.44,
|
| 5512 |
+
"learning_rate": 0.0001425481927602085,
|
| 5513 |
+
"loss": 0.3348,
|
| 5514 |
+
"step": 275500
|
| 5515 |
+
},
|
| 5516 |
+
{
|
| 5517 |
+
"epoch": 8.46,
|
| 5518 |
+
"learning_rate": 0.0001420705267848743,
|
| 5519 |
+
"loss": 0.3352,
|
| 5520 |
+
"step": 276000
|
| 5521 |
+
},
|
| 5522 |
+
{
|
| 5523 |
+
"epoch": 8.46,
|
| 5524 |
+
"eval_loss": 0.7744572162628174,
|
| 5525 |
+
"eval_runtime": 0.5156,
|
| 5526 |
+
"eval_samples_per_second": 1939.351,
|
| 5527 |
+
"eval_steps_per_second": 31.03,
|
| 5528 |
+
"step": 276000
|
| 5529 |
+
},
|
| 5530 |
+
{
|
| 5531 |
+
"epoch": 8.47,
|
| 5532 |
+
"learning_rate": 0.00014159300220418757,
|
| 5533 |
+
"loss": 0.3342,
|
| 5534 |
+
"step": 276500
|
| 5535 |
+
},
|
| 5536 |
+
{
|
| 5537 |
+
"epoch": 8.49,
|
| 5538 |
+
"learning_rate": 0.0001411156242402808,
|
| 5539 |
+
"loss": 0.3341,
|
| 5540 |
+
"step": 277000
|
| 5541 |
+
},
|
| 5542 |
+
{
|
| 5543 |
+
"epoch": 8.49,
|
| 5544 |
+
"eval_loss": 0.7838852405548096,
|
| 5545 |
+
"eval_runtime": 0.5192,
|
| 5546 |
+
"eval_samples_per_second": 1925.877,
|
| 5547 |
+
"eval_steps_per_second": 30.814,
|
| 5548 |
+
"step": 277000
|
| 5549 |
+
},
|
| 5550 |
+
{
|
| 5551 |
+
"epoch": 8.5,
|
| 5552 |
+
"learning_rate": 0.0001406383981136829,
|
| 5553 |
+
"loss": 0.3339,
|
| 5554 |
+
"step": 277500
|
| 5555 |
+
},
|
| 5556 |
+
{
|
| 5557 |
+
"epoch": 8.52,
|
| 5558 |
+
"learning_rate": 0.00014016132904326226,
|
| 5559 |
+
"loss": 0.334,
|
| 5560 |
+
"step": 278000
|
| 5561 |
+
},
|
| 5562 |
+
{
|
| 5563 |
+
"epoch": 8.52,
|
| 5564 |
+
"eval_loss": 0.7889499664306641,
|
| 5565 |
+
"eval_runtime": 0.5333,
|
| 5566 |
+
"eval_samples_per_second": 1875.279,
|
| 5567 |
+
"eval_steps_per_second": 30.004,
|
| 5568 |
+
"step": 278000
|
| 5569 |
+
},
|
| 5570 |
+
{
|
| 5571 |
+
"epoch": 8.53,
|
| 5572 |
+
"learning_rate": 0.00013968442224616989,
|
| 5573 |
+
"loss": 0.3338,
|
| 5574 |
+
"step": 278500
|
| 5575 |
+
},
|
| 5576 |
+
{
|
| 5577 |
+
"epoch": 8.55,
|
| 5578 |
+
"learning_rate": 0.00013920768293778195,
|
| 5579 |
+
"loss": 0.3337,
|
| 5580 |
+
"step": 279000
|
| 5581 |
+
},
|
| 5582 |
+
{
|
| 5583 |
+
"epoch": 8.55,
|
| 5584 |
+
"eval_loss": 0.7802003026008606,
|
| 5585 |
+
"eval_runtime": 0.5176,
|
| 5586 |
+
"eval_samples_per_second": 1931.905,
|
| 5587 |
+
"eval_steps_per_second": 30.91,
|
| 5588 |
+
"step": 279000
|
| 5589 |
+
},
|
| 5590 |
+
{
|
| 5591 |
+
"epoch": 8.56,
|
| 5592 |
+
"learning_rate": 0.00013873111633164336,
|
| 5593 |
+
"loss": 0.3336,
|
| 5594 |
+
"step": 279500
|
| 5595 |
+
},
|
| 5596 |
+
{
|
| 5597 |
+
"epoch": 8.58,
|
| 5598 |
+
"learning_rate": 0.00013825472763941,
|
| 5599 |
+
"loss": 0.3338,
|
| 5600 |
+
"step": 280000
|
| 5601 |
+
},
|
| 5602 |
+
{
|
| 5603 |
+
"epoch": 8.58,
|
| 5604 |
+
"eval_loss": 0.7855395674705505,
|
| 5605 |
+
"eval_runtime": 0.525,
|
| 5606 |
+
"eval_samples_per_second": 1904.885,
|
| 5607 |
+
"eval_steps_per_second": 30.478,
|
| 5608 |
+
"step": 280000
|
| 5609 |
}
|
| 5610 |
],
|
| 5611 |
"max_steps": 500000,
|
| 5612 |
"num_train_epochs": 16,
|
| 5613 |
+
"total_flos": 8.945575982070193e+21,
|
| 5614 |
"trial_name": null,
|
| 5615 |
"trial_params": null
|
| 5616 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 102501541
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:866ffe9cab4eb4a4754c64cb5d7ed4f31ed0ad83e08ecaffbb7f5035db70963f
|
| 3 |
size 102501541
|