Training in progress, epoch 0, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step4400/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step4400/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step4400/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step4400/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step4400/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +160 -4
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1037269336
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0217b4b0c3c7f1987944f70686eb3cc84294e0febf0ed767a56782cb9017db42
|
| 3 |
size 1037269336
|
last-checkpoint/global_step4400/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:208f3f993987f330acb84602114113a61e43ebe3d5eb09c047705e04b4dea90b
|
| 3 |
+
size 781993445
|
last-checkpoint/global_step4400/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:909cd42d1608b5fe7874afb79b6fbdaf4ca93180010eca90c2478a5b0460e210
|
| 3 |
+
size 781993509
|
last-checkpoint/global_step4400/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:80ac4ee0863674394e18bf040c39367c44210bf27eb718840c9e014a8198505b
|
| 3 |
+
size 781993509
|
last-checkpoint/global_step4400/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5edea25fbe4b088f32fadb2bae52dbcec4468d967cb0d3a0fb41d535943734f6
|
| 3 |
+
size 781993509
|
last-checkpoint/global_step4400/mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:051d7c061bd63c18a72c7f60192548c38149641e49dddb376a120d33da3567ef
|
| 3 |
+
size 2610290277
|
last-checkpoint/latest
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
global_step4400
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4ec6429d51b78e62a781ea28a18634f451844f66fee400b9be20b2072a6fac5e
|
| 3 |
size 15429
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3e3e04848cc38a3a002981db4be3e84294dc9e5c12327b6e3c23b02534523094
|
| 3 |
size 15429
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9a5a84f5b27ded1de3f5ceb77963092ac6c45b3bb6acfbc406627cbc633009a1
|
| 3 |
size 15429
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a8b169b571920e7c4ea3cfadebde4b2c5412429683ec1e5c89095379be2aeec0
|
| 3 |
size 15429
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1401
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7f6239c54a9e14ade75dd1dbb72d423d68c7c1273e9d5fb21d6effe590197848
|
| 3 |
size 1401
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
-
"best_metric": 1.
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 50,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -6716,6 +6716,162 @@
|
|
| 6716 |
"eval_samples_per_second": 175.036,
|
| 6717 |
"eval_steps_per_second": 10.976,
|
| 6718 |
"step": 4300
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6719 |
}
|
| 6720 |
],
|
| 6721 |
"logging_steps": 5,
|
|
@@ -6744,7 +6900,7 @@
|
|
| 6744 |
"attributes": {}
|
| 6745 |
}
|
| 6746 |
},
|
| 6747 |
-
"total_flos": 1.
|
| 6748 |
"train_batch_size": 4,
|
| 6749 |
"trial_name": null,
|
| 6750 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
+
"best_metric": 1.9043115377426147,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.6396278528855939,
|
| 6 |
"eval_steps": 50,
|
| 7 |
+
"global_step": 4400,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 6716 |
"eval_samples_per_second": 175.036,
|
| 6717 |
"eval_steps_per_second": 10.976,
|
| 6718 |
"step": 4300
|
| 6719 |
+
},
|
| 6720 |
+
{
|
| 6721 |
+
"epoch": 0.6258177060619277,
|
| 6722 |
+
"grad_norm": 2.382782220840454,
|
| 6723 |
+
"learning_rate": 6.184769707241625e-05,
|
| 6724 |
+
"loss": 1.9673,
|
| 6725 |
+
"step": 4305
|
| 6726 |
+
},
|
| 6727 |
+
{
|
| 6728 |
+
"epoch": 0.6265445558947521,
|
| 6729 |
+
"grad_norm": 2.4369523525238037,
|
| 6730 |
+
"learning_rate": 6.177041004251455e-05,
|
| 6731 |
+
"loss": 2.2144,
|
| 6732 |
+
"step": 4310
|
| 6733 |
+
},
|
| 6734 |
+
{
|
| 6735 |
+
"epoch": 0.6272714057275767,
|
| 6736 |
+
"grad_norm": 2.43398380279541,
|
| 6737 |
+
"learning_rate": 6.16930936282599e-05,
|
| 6738 |
+
"loss": 2.0025,
|
| 6739 |
+
"step": 4315
|
| 6740 |
+
},
|
| 6741 |
+
{
|
| 6742 |
+
"epoch": 0.6279982555604012,
|
| 6743 |
+
"grad_norm": 2.472754955291748,
|
| 6744 |
+
"learning_rate": 6.161574802429627e-05,
|
| 6745 |
+
"loss": 2.1328,
|
| 6746 |
+
"step": 4320
|
| 6747 |
+
},
|
| 6748 |
+
{
|
| 6749 |
+
"epoch": 0.6287251053932258,
|
| 6750 |
+
"grad_norm": 2.6764614582061768,
|
| 6751 |
+
"learning_rate": 6.153837342534111e-05,
|
| 6752 |
+
"loss": 2.1554,
|
| 6753 |
+
"step": 4325
|
| 6754 |
+
},
|
| 6755 |
+
{
|
| 6756 |
+
"epoch": 0.6294519552260504,
|
| 6757 |
+
"grad_norm": 2.3212342262268066,
|
| 6758 |
+
"learning_rate": 6.146097002618492e-05,
|
| 6759 |
+
"loss": 2.1615,
|
| 6760 |
+
"step": 4330
|
| 6761 |
+
},
|
| 6762 |
+
{
|
| 6763 |
+
"epoch": 0.6301788050588748,
|
| 6764 |
+
"grad_norm": 2.824336290359497,
|
| 6765 |
+
"learning_rate": 6.138353802169061e-05,
|
| 6766 |
+
"loss": 2.0653,
|
| 6767 |
+
"step": 4335
|
| 6768 |
+
},
|
| 6769 |
+
{
|
| 6770 |
+
"epoch": 0.6309056548916994,
|
| 6771 |
+
"grad_norm": 2.4014430046081543,
|
| 6772 |
+
"learning_rate": 6.130607760679321e-05,
|
| 6773 |
+
"loss": 2.0374,
|
| 6774 |
+
"step": 4340
|
| 6775 |
+
},
|
| 6776 |
+
{
|
| 6777 |
+
"epoch": 0.6316325047245239,
|
| 6778 |
+
"grad_norm": 2.458951950073242,
|
| 6779 |
+
"learning_rate": 6.122858897649921e-05,
|
| 6780 |
+
"loss": 2.1722,
|
| 6781 |
+
"step": 4345
|
| 6782 |
+
},
|
| 6783 |
+
{
|
| 6784 |
+
"epoch": 0.6323593545573485,
|
| 6785 |
+
"grad_norm": 2.567749500274658,
|
| 6786 |
+
"learning_rate": 6.115107232588612e-05,
|
| 6787 |
+
"loss": 2.1671,
|
| 6788 |
+
"step": 4350
|
| 6789 |
+
},
|
| 6790 |
+
{
|
| 6791 |
+
"epoch": 0.6323593545573485,
|
| 6792 |
+
"eval_loss": 1.9125865697860718,
|
| 6793 |
+
"eval_runtime": 22.1706,
|
| 6794 |
+
"eval_samples_per_second": 148.891,
|
| 6795 |
+
"eval_steps_per_second": 9.337,
|
| 6796 |
+
"step": 4350
|
| 6797 |
+
},
|
| 6798 |
+
{
|
| 6799 |
+
"epoch": 0.6330862043901729,
|
| 6800 |
+
"grad_norm": 2.322906255722046,
|
| 6801 |
+
"learning_rate": 6.107352785010202e-05,
|
| 6802 |
+
"loss": 2.1378,
|
| 6803 |
+
"step": 4355
|
| 6804 |
+
},
|
| 6805 |
+
{
|
| 6806 |
+
"epoch": 0.6338130542229975,
|
| 6807 |
+
"grad_norm": 2.1527748107910156,
|
| 6808 |
+
"learning_rate": 6.0995955744365073e-05,
|
| 6809 |
+
"loss": 2.0096,
|
| 6810 |
+
"step": 4360
|
| 6811 |
+
},
|
| 6812 |
+
{
|
| 6813 |
+
"epoch": 0.6345399040558221,
|
| 6814 |
+
"grad_norm": 2.6586174964904785,
|
| 6815 |
+
"learning_rate": 6.0918356203962934e-05,
|
| 6816 |
+
"loss": 2.2011,
|
| 6817 |
+
"step": 4365
|
| 6818 |
+
},
|
| 6819 |
+
{
|
| 6820 |
+
"epoch": 0.6352667538886466,
|
| 6821 |
+
"grad_norm": 2.559743642807007,
|
| 6822 |
+
"learning_rate": 6.084072942425234e-05,
|
| 6823 |
+
"loss": 2.0937,
|
| 6824 |
+
"step": 4370
|
| 6825 |
+
},
|
| 6826 |
+
{
|
| 6827 |
+
"epoch": 0.6359936037214712,
|
| 6828 |
+
"grad_norm": 2.8032941818237305,
|
| 6829 |
+
"learning_rate": 6.076307560065865e-05,
|
| 6830 |
+
"loss": 1.971,
|
| 6831 |
+
"step": 4375
|
| 6832 |
+
},
|
| 6833 |
+
{
|
| 6834 |
+
"epoch": 0.6367204535542956,
|
| 6835 |
+
"grad_norm": 2.3299427032470703,
|
| 6836 |
+
"learning_rate": 6.068539492867526e-05,
|
| 6837 |
+
"loss": 2.0369,
|
| 6838 |
+
"step": 4380
|
| 6839 |
+
},
|
| 6840 |
+
{
|
| 6841 |
+
"epoch": 0.6374473033871202,
|
| 6842 |
+
"grad_norm": 2.167146682739258,
|
| 6843 |
+
"learning_rate": 6.0607687603863155e-05,
|
| 6844 |
+
"loss": 1.9857,
|
| 6845 |
+
"step": 4385
|
| 6846 |
+
},
|
| 6847 |
+
{
|
| 6848 |
+
"epoch": 0.6381741532199448,
|
| 6849 |
+
"grad_norm": 2.151320219039917,
|
| 6850 |
+
"learning_rate": 6.052995382185044e-05,
|
| 6851 |
+
"loss": 2.1305,
|
| 6852 |
+
"step": 4390
|
| 6853 |
+
},
|
| 6854 |
+
{
|
| 6855 |
+
"epoch": 0.6389010030527693,
|
| 6856 |
+
"grad_norm": 2.5785205364227295,
|
| 6857 |
+
"learning_rate": 6.045219377833183e-05,
|
| 6858 |
+
"loss": 1.8801,
|
| 6859 |
+
"step": 4395
|
| 6860 |
+
},
|
| 6861 |
+
{
|
| 6862 |
+
"epoch": 0.6396278528855939,
|
| 6863 |
+
"grad_norm": 2.6063733100891113,
|
| 6864 |
+
"learning_rate": 6.037440766906813e-05,
|
| 6865 |
+
"loss": 1.8297,
|
| 6866 |
+
"step": 4400
|
| 6867 |
+
},
|
| 6868 |
+
{
|
| 6869 |
+
"epoch": 0.6396278528855939,
|
| 6870 |
+
"eval_loss": 1.9043115377426147,
|
| 6871 |
+
"eval_runtime": 18.9211,
|
| 6872 |
+
"eval_samples_per_second": 174.461,
|
| 6873 |
+
"eval_steps_per_second": 10.94,
|
| 6874 |
+
"step": 4400
|
| 6875 |
}
|
| 6876 |
],
|
| 6877 |
"logging_steps": 5,
|
|
|
|
| 6900 |
"attributes": {}
|
| 6901 |
}
|
| 6902 |
},
|
| 6903 |
+
"total_flos": 1.1471040004625531e+18,
|
| 6904 |
"train_batch_size": 4,
|
| 6905 |
"trial_name": null,
|
| 6906 |
"trial_params": null
|