Training in progress, epoch 0, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step5550/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step5550/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step5550/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step5550/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step5550/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +160 -4
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1037269336
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:67f14b217337322ca15f39ff83faddad85eaa2b699e839156e4b5a8f9a547f46
|
| 3 |
size 1037269336
|
last-checkpoint/global_step5550/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7c87cdd188b7d8338c5735456cd0b5d5f76da1ae45c65ae64203c43bb2cc5cbb
|
| 3 |
+
size 781993445
|
last-checkpoint/global_step5550/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bcd4e113b5fb8155b35a8a0c4e6fc10ed855790f08ce29e733f8d66fda5af32d
|
| 3 |
+
size 781993509
|
last-checkpoint/global_step5550/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ae567e275bbee69ba582162f258fe56cae2d7f50794f0cc5549cee9c1092a4b0
|
| 3 |
+
size 781993509
|
last-checkpoint/global_step5550/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cd59427a0336d2178aac48cec15bb26146284b95ac46e9b37d8119c87145dc57
|
| 3 |
+
size 781993509
|
last-checkpoint/global_step5550/mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:67e00afbb6ddc69255a0727f2ff587593f7d4cec424b3a448da9f5de9671f71e
|
| 3 |
+
size 2610290277
|
last-checkpoint/latest
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
global_step5550
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0c363c9d6409891b8a0c216d60fd16b304f275f82249966d3bc42689f8ffeca4
|
| 3 |
size 15429
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:12da4effb035daeb8df40f07293059c0cd2a4fed6029443b6a20828e64db1c2a
|
| 3 |
size 15429
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e949608ee1f454e74a01438a4b477930ebee2355ab0167ac452e85c9078851f2
|
| 3 |
size 15429
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5a78f5e77cc25088c5130959b2bf3c1ee05d44c7e4aef6524adc45fb65662182
|
| 3 |
size 15429
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1401
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6a52d6724c4ae1e5cf5a59cbc21dfbb5e7c37003b46ec99ab94837465b1b4c4d
|
| 3 |
size 1401
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
-
"best_metric": 1.
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 50,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -8510,6 +8510,162 @@
|
|
| 8510 |
"eval_samples_per_second": 172.204,
|
| 8511 |
"eval_steps_per_second": 10.799,
|
| 8512 |
"step": 5450
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8513 |
}
|
| 8514 |
],
|
| 8515 |
"logging_steps": 5,
|
|
@@ -8538,7 +8694,7 @@
|
|
| 8538 |
"attributes": {}
|
| 8539 |
}
|
| 8540 |
},
|
| 8541 |
-
"total_flos": 1.
|
| 8542 |
"train_batch_size": 4,
|
| 8543 |
"trial_name": null,
|
| 8544 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
+
"best_metric": 1.7976654767990112,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.8068033144352377,
|
| 6 |
"eval_steps": 50,
|
| 7 |
+
"global_step": 5550,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 8510 |
"eval_samples_per_second": 172.204,
|
| 8511 |
"eval_steps_per_second": 10.799,
|
| 8512 |
"step": 5450
|
| 8513 |
+
},
|
| 8514 |
+
{
|
| 8515 |
+
"epoch": 0.7929931676115715,
|
| 8516 |
+
"grad_norm": 2.3272275924682617,
|
| 8517 |
+
"learning_rate": 4.369321767175934e-05,
|
| 8518 |
+
"loss": 1.8659,
|
| 8519 |
+
"step": 5455
|
| 8520 |
+
},
|
| 8521 |
+
{
|
| 8522 |
+
"epoch": 0.793720017444396,
|
| 8523 |
+
"grad_norm": 2.3955276012420654,
|
| 8524 |
+
"learning_rate": 4.3614388885420454e-05,
|
| 8525 |
+
"loss": 1.9935,
|
| 8526 |
+
"step": 5460
|
| 8527 |
+
},
|
| 8528 |
+
{
|
| 8529 |
+
"epoch": 0.7944468672772206,
|
| 8530 |
+
"grad_norm": 2.2696611881256104,
|
| 8531 |
+
"learning_rate": 4.353557642249021e-05,
|
| 8532 |
+
"loss": 2.107,
|
| 8533 |
+
"step": 5465
|
| 8534 |
+
},
|
| 8535 |
+
{
|
| 8536 |
+
"epoch": 0.795173717110045,
|
| 8537 |
+
"grad_norm": 2.432871103286743,
|
| 8538 |
+
"learning_rate": 4.345678048137896e-05,
|
| 8539 |
+
"loss": 2.1197,
|
| 8540 |
+
"step": 5470
|
| 8541 |
+
},
|
| 8542 |
+
{
|
| 8543 |
+
"epoch": 0.7959005669428696,
|
| 8544 |
+
"grad_norm": 2.0708091259002686,
|
| 8545 |
+
"learning_rate": 4.337800126045534e-05,
|
| 8546 |
+
"loss": 2.096,
|
| 8547 |
+
"step": 5475
|
| 8548 |
+
},
|
| 8549 |
+
{
|
| 8550 |
+
"epoch": 0.7966274167756942,
|
| 8551 |
+
"grad_norm": 2.519308567047119,
|
| 8552 |
+
"learning_rate": 4.3299238958045964e-05,
|
| 8553 |
+
"loss": 1.914,
|
| 8554 |
+
"step": 5480
|
| 8555 |
+
},
|
| 8556 |
+
{
|
| 8557 |
+
"epoch": 0.7973542666085187,
|
| 8558 |
+
"grad_norm": 2.087770938873291,
|
| 8559 |
+
"learning_rate": 4.3220493772434835e-05,
|
| 8560 |
+
"loss": 1.9734,
|
| 8561 |
+
"step": 5485
|
| 8562 |
+
},
|
| 8563 |
+
{
|
| 8564 |
+
"epoch": 0.7980811164413432,
|
| 8565 |
+
"grad_norm": 2.3013477325439453,
|
| 8566 |
+
"learning_rate": 4.314176590186285e-05,
|
| 8567 |
+
"loss": 2.0952,
|
| 8568 |
+
"step": 5490
|
| 8569 |
+
},
|
| 8570 |
+
{
|
| 8571 |
+
"epoch": 0.7988079662741677,
|
| 8572 |
+
"grad_norm": 2.3956501483917236,
|
| 8573 |
+
"learning_rate": 4.306305554452735e-05,
|
| 8574 |
+
"loss": 2.1661,
|
| 8575 |
+
"step": 5495
|
| 8576 |
+
},
|
| 8577 |
+
{
|
| 8578 |
+
"epoch": 0.7995348161069923,
|
| 8579 |
+
"grad_norm": 2.290743827819824,
|
| 8580 |
+
"learning_rate": 4.298436289858153e-05,
|
| 8581 |
+
"loss": 1.9764,
|
| 8582 |
+
"step": 5500
|
| 8583 |
+
},
|
| 8584 |
+
{
|
| 8585 |
+
"epoch": 0.7995348161069923,
|
| 8586 |
+
"eval_loss": 1.8048888444900513,
|
| 8587 |
+
"eval_runtime": 20.7967,
|
| 8588 |
+
"eval_samples_per_second": 158.727,
|
| 8589 |
+
"eval_steps_per_second": 9.953,
|
| 8590 |
+
"step": 5500
|
| 8591 |
+
},
|
| 8592 |
+
{
|
| 8593 |
+
"epoch": 0.8002616659398168,
|
| 8594 |
+
"grad_norm": 2.141601324081421,
|
| 8595 |
+
"learning_rate": 4.2905688162134085e-05,
|
| 8596 |
+
"loss": 1.8667,
|
| 8597 |
+
"step": 5505
|
| 8598 |
+
},
|
| 8599 |
+
{
|
| 8600 |
+
"epoch": 0.8009885157726414,
|
| 8601 |
+
"grad_norm": 2.3627877235412598,
|
| 8602 |
+
"learning_rate": 4.2827031533248535e-05,
|
| 8603 |
+
"loss": 2.1887,
|
| 8604 |
+
"step": 5510
|
| 8605 |
+
},
|
| 8606 |
+
{
|
| 8607 |
+
"epoch": 0.8017153656054659,
|
| 8608 |
+
"grad_norm": 2.5023484230041504,
|
| 8609 |
+
"learning_rate": 4.2748393209942855e-05,
|
| 8610 |
+
"loss": 2.13,
|
| 8611 |
+
"step": 5515
|
| 8612 |
+
},
|
| 8613 |
+
{
|
| 8614 |
+
"epoch": 0.8024422154382904,
|
| 8615 |
+
"grad_norm": 2.540010690689087,
|
| 8616 |
+
"learning_rate": 4.266977339018893e-05,
|
| 8617 |
+
"loss": 2.1042,
|
| 8618 |
+
"step": 5520
|
| 8619 |
+
},
|
| 8620 |
+
{
|
| 8621 |
+
"epoch": 0.803169065271115,
|
| 8622 |
+
"grad_norm": 2.3447046279907227,
|
| 8623 |
+
"learning_rate": 4.259117227191208e-05,
|
| 8624 |
+
"loss": 2.1636,
|
| 8625 |
+
"step": 5525
|
| 8626 |
+
},
|
| 8627 |
+
{
|
| 8628 |
+
"epoch": 0.8038959151039395,
|
| 8629 |
+
"grad_norm": 2.090090751647949,
|
| 8630 |
+
"learning_rate": 4.251259005299049e-05,
|
| 8631 |
+
"loss": 1.8241,
|
| 8632 |
+
"step": 5530
|
| 8633 |
+
},
|
| 8634 |
+
{
|
| 8635 |
+
"epoch": 0.8046227649367641,
|
| 8636 |
+
"grad_norm": 2.4929826259613037,
|
| 8637 |
+
"learning_rate": 4.243402693125484e-05,
|
| 8638 |
+
"loss": 2.0696,
|
| 8639 |
+
"step": 5535
|
| 8640 |
+
},
|
| 8641 |
+
{
|
| 8642 |
+
"epoch": 0.8053496147695886,
|
| 8643 |
+
"grad_norm": 1.9764723777770996,
|
| 8644 |
+
"learning_rate": 4.235548310448767e-05,
|
| 8645 |
+
"loss": 1.9418,
|
| 8646 |
+
"step": 5540
|
| 8647 |
+
},
|
| 8648 |
+
{
|
| 8649 |
+
"epoch": 0.8060764646024131,
|
| 8650 |
+
"grad_norm": 2.151935338973999,
|
| 8651 |
+
"learning_rate": 4.2276958770423e-05,
|
| 8652 |
+
"loss": 1.9833,
|
| 8653 |
+
"step": 5545
|
| 8654 |
+
},
|
| 8655 |
+
{
|
| 8656 |
+
"epoch": 0.8068033144352377,
|
| 8657 |
+
"grad_norm": 2.3030054569244385,
|
| 8658 |
+
"learning_rate": 4.2198454126745694e-05,
|
| 8659 |
+
"loss": 2.0953,
|
| 8660 |
+
"step": 5550
|
| 8661 |
+
},
|
| 8662 |
+
{
|
| 8663 |
+
"epoch": 0.8068033144352377,
|
| 8664 |
+
"eval_loss": 1.7976654767990112,
|
| 8665 |
+
"eval_runtime": 19.0973,
|
| 8666 |
+
"eval_samples_per_second": 172.852,
|
| 8667 |
+
"eval_steps_per_second": 10.839,
|
| 8668 |
+
"step": 5550
|
| 8669 |
}
|
| 8670 |
],
|
| 8671 |
"logging_steps": 5,
|
|
|
|
| 8694 |
"attributes": {}
|
| 8695 |
}
|
| 8696 |
},
|
| 8697 |
+
"total_flos": 1.4465642591598674e+18,
|
| 8698 |
"train_batch_size": 4,
|
| 8699 |
"trial_name": null,
|
| 8700 |
"trial_params": null
|