Training in progress, epoch 0, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step2550/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2550/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2550/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2550/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2550/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +160 -4
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1037269336
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3c06c7c3813fddb5ac98100afaa4381d3baef788a23ae2c7f74869fe4672a6cf
|
| 3 |
size 1037269336
|
last-checkpoint/global_step2550/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:331533aa0273d7ed844034657286226b3238718c45c184054676c41ff94315dd
|
| 3 |
+
size 781993445
|
last-checkpoint/global_step2550/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d4a023f96ecf4f996521f9cfb06b8f8627eb5336444c379a4bce76a9ef3dcade
|
| 3 |
+
size 781993509
|
last-checkpoint/global_step2550/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6200ae39dfd5a06d648527ccaf311a71c7b1d147fe5d7fc58fbb95ac931b0cad
|
| 3 |
+
size 781993509
|
last-checkpoint/global_step2550/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:943ace536a9d78bb2de6e5316a56efdfd48708f6d372b403c76fc312c6969138
|
| 3 |
+
size 781993509
|
last-checkpoint/global_step2550/mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6dc68d7703a7eef8d86cc41de4437cc78da7c72c2af5952b0b228df8dee727a7
|
| 3 |
+
size 2610290277
|
last-checkpoint/latest
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
global_step2550
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:91b0ee28e509aba9c3f4fd9aa443f72e6d6a57f938b3ceddd2bc7bbaf5cf585f
|
| 3 |
size 15429
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0fc20aa2b364f08ed1de312499fe9555a2f7695c5ffc4d37b3434fdc9e8e70c8
|
| 3 |
size 15429
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:638e7b950d339cd9bb18ebffa7e43a1c200644eb1f4d72e3469233185fb09e21
|
| 3 |
size 15429
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:166aa68437e50207c09f7d061179b22a36411d587c77f77e4583f632d4d5ebe2
|
| 3 |
size 15429
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1401
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:726509c8d205ff37449ab18ba69ba9d65a16125d4029696d3a738278dbe2b999
|
| 3 |
size 1401
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
-
"best_metric": 2.
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 50,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -3830,6 +3830,162 @@
|
|
| 3830 |
"eval_samples_per_second": 174.376,
|
| 3831 |
"eval_steps_per_second": 10.935,
|
| 3832 |
"step": 2450
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3833 |
}
|
| 3834 |
],
|
| 3835 |
"logging_steps": 5,
|
|
@@ -3858,7 +4014,7 @@
|
|
| 3858 |
"attributes": {}
|
| 3859 |
}
|
| 3860 |
},
|
| 3861 |
-
"total_flos": 6.
|
| 3862 |
"train_batch_size": 4,
|
| 3863 |
"trial_name": null,
|
| 3864 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
+
"best_metric": 2.0672757625579834,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.3706934147405146,
|
| 6 |
"eval_steps": 50,
|
| 7 |
+
"global_step": 2550,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 3830 |
"eval_samples_per_second": 174.376,
|
| 3831 |
"eval_steps_per_second": 10.935,
|
| 3832 |
"step": 2450
|
| 3833 |
+
},
|
| 3834 |
+
{
|
| 3835 |
+
"epoch": 0.3568832679168484,
|
| 3836 |
+
"grad_norm": 2.373387098312378,
|
| 3837 |
+
"learning_rate": 8.68570360607412e-05,
|
| 3838 |
+
"loss": 2.087,
|
| 3839 |
+
"step": 2455
|
| 3840 |
+
},
|
| 3841 |
+
{
|
| 3842 |
+
"epoch": 0.35761011774967294,
|
| 3843 |
+
"grad_norm": 2.527256488800049,
|
| 3844 |
+
"learning_rate": 8.680298155139663e-05,
|
| 3845 |
+
"loss": 2.1579,
|
| 3846 |
+
"step": 2460
|
| 3847 |
+
},
|
| 3848 |
+
{
|
| 3849 |
+
"epoch": 0.35833696758249745,
|
| 3850 |
+
"grad_norm": 2.339224100112915,
|
| 3851 |
+
"learning_rate": 8.674883463823014e-05,
|
| 3852 |
+
"loss": 2.3154,
|
| 3853 |
+
"step": 2465
|
| 3854 |
+
},
|
| 3855 |
+
{
|
| 3856 |
+
"epoch": 0.359063817415322,
|
| 3857 |
+
"grad_norm": 2.3437659740448,
|
| 3858 |
+
"learning_rate": 8.669459545755653e-05,
|
| 3859 |
+
"loss": 2.1505,
|
| 3860 |
+
"step": 2470
|
| 3861 |
+
},
|
| 3862 |
+
{
|
| 3863 |
+
"epoch": 0.3597906672481465,
|
| 3864 |
+
"grad_norm": 2.31026291847229,
|
| 3865 |
+
"learning_rate": 8.664026414592286e-05,
|
| 3866 |
+
"loss": 2.4049,
|
| 3867 |
+
"step": 2475
|
| 3868 |
+
},
|
| 3869 |
+
{
|
| 3870 |
+
"epoch": 0.3605175170809711,
|
| 3871 |
+
"grad_norm": 2.880200147628784,
|
| 3872 |
+
"learning_rate": 8.658584084010815e-05,
|
| 3873 |
+
"loss": 2.3197,
|
| 3874 |
+
"step": 2480
|
| 3875 |
+
},
|
| 3876 |
+
{
|
| 3877 |
+
"epoch": 0.36124436691379563,
|
| 3878 |
+
"grad_norm": 2.4933598041534424,
|
| 3879 |
+
"learning_rate": 8.653132567712298e-05,
|
| 3880 |
+
"loss": 2.2212,
|
| 3881 |
+
"step": 2485
|
| 3882 |
+
},
|
| 3883 |
+
{
|
| 3884 |
+
"epoch": 0.36197121674662014,
|
| 3885 |
+
"grad_norm": 2.8316283226013184,
|
| 3886 |
+
"learning_rate": 8.647671879420927e-05,
|
| 3887 |
+
"loss": 2.302,
|
| 3888 |
+
"step": 2490
|
| 3889 |
+
},
|
| 3890 |
+
{
|
| 3891 |
+
"epoch": 0.3626980665794447,
|
| 3892 |
+
"grad_norm": 2.6056923866271973,
|
| 3893 |
+
"learning_rate": 8.64220203288397e-05,
|
| 3894 |
+
"loss": 2.3031,
|
| 3895 |
+
"step": 2495
|
| 3896 |
+
},
|
| 3897 |
+
{
|
| 3898 |
+
"epoch": 0.3634249164122692,
|
| 3899 |
+
"grad_norm": 2.3827829360961914,
|
| 3900 |
+
"learning_rate": 8.636723041871766e-05,
|
| 3901 |
+
"loss": 2.3328,
|
| 3902 |
+
"step": 2500
|
| 3903 |
+
},
|
| 3904 |
+
{
|
| 3905 |
+
"epoch": 0.3634249164122692,
|
| 3906 |
+
"eval_loss": 2.0754590034484863,
|
| 3907 |
+
"eval_runtime": 25.4986,
|
| 3908 |
+
"eval_samples_per_second": 129.458,
|
| 3909 |
+
"eval_steps_per_second": 8.118,
|
| 3910 |
+
"step": 2500
|
| 3911 |
+
},
|
| 3912 |
+
{
|
| 3913 |
+
"epoch": 0.36415176624509377,
|
| 3914 |
+
"grad_norm": 2.934666872024536,
|
| 3915 |
+
"learning_rate": 8.631234920177665e-05,
|
| 3916 |
+
"loss": 2.1663,
|
| 3917 |
+
"step": 2505
|
| 3918 |
+
},
|
| 3919 |
+
{
|
| 3920 |
+
"epoch": 0.3648786160779183,
|
| 3921 |
+
"grad_norm": 2.3056254386901855,
|
| 3922 |
+
"learning_rate": 8.625737681618008e-05,
|
| 3923 |
+
"loss": 2.1278,
|
| 3924 |
+
"step": 2510
|
| 3925 |
+
},
|
| 3926 |
+
{
|
| 3927 |
+
"epoch": 0.36560546591074283,
|
| 3928 |
+
"grad_norm": 2.4940974712371826,
|
| 3929 |
+
"learning_rate": 8.620231340032087e-05,
|
| 3930 |
+
"loss": 2.0522,
|
| 3931 |
+
"step": 2515
|
| 3932 |
+
},
|
| 3933 |
+
{
|
| 3934 |
+
"epoch": 0.3663323157435674,
|
| 3935 |
+
"grad_norm": 2.724717855453491,
|
| 3936 |
+
"learning_rate": 8.614715909282107e-05,
|
| 3937 |
+
"loss": 2.1553,
|
| 3938 |
+
"step": 2520
|
| 3939 |
+
},
|
| 3940 |
+
{
|
| 3941 |
+
"epoch": 0.3670591655763919,
|
| 3942 |
+
"grad_norm": 2.628826379776001,
|
| 3943 |
+
"learning_rate": 8.609191403253163e-05,
|
| 3944 |
+
"loss": 1.991,
|
| 3945 |
+
"step": 2525
|
| 3946 |
+
},
|
| 3947 |
+
{
|
| 3948 |
+
"epoch": 0.36778601540921646,
|
| 3949 |
+
"grad_norm": 2.2899041175842285,
|
| 3950 |
+
"learning_rate": 8.603657835853188e-05,
|
| 3951 |
+
"loss": 1.974,
|
| 3952 |
+
"step": 2530
|
| 3953 |
+
},
|
| 3954 |
+
{
|
| 3955 |
+
"epoch": 0.368512865242041,
|
| 3956 |
+
"grad_norm": 2.5030078887939453,
|
| 3957 |
+
"learning_rate": 8.598115221012935e-05,
|
| 3958 |
+
"loss": 2.3256,
|
| 3959 |
+
"step": 2535
|
| 3960 |
+
},
|
| 3961 |
+
{
|
| 3962 |
+
"epoch": 0.3692397150748655,
|
| 3963 |
+
"grad_norm": 2.282642364501953,
|
| 3964 |
+
"learning_rate": 8.592563572685929e-05,
|
| 3965 |
+
"loss": 2.1428,
|
| 3966 |
+
"step": 2540
|
| 3967 |
+
},
|
| 3968 |
+
{
|
| 3969 |
+
"epoch": 0.3699665649076901,
|
| 3970 |
+
"grad_norm": 2.9469528198242188,
|
| 3971 |
+
"learning_rate": 8.587002904848438e-05,
|
| 3972 |
+
"loss": 2.1632,
|
| 3973 |
+
"step": 2545
|
| 3974 |
+
},
|
| 3975 |
+
{
|
| 3976 |
+
"epoch": 0.3706934147405146,
|
| 3977 |
+
"grad_norm": 2.4431910514831543,
|
| 3978 |
+
"learning_rate": 8.581433231499436e-05,
|
| 3979 |
+
"loss": 2.2365,
|
| 3980 |
+
"step": 2550
|
| 3981 |
+
},
|
| 3982 |
+
{
|
| 3983 |
+
"epoch": 0.3706934147405146,
|
| 3984 |
+
"eval_loss": 2.0672757625579834,
|
| 3985 |
+
"eval_runtime": 19.0363,
|
| 3986 |
+
"eval_samples_per_second": 173.406,
|
| 3987 |
+
"eval_steps_per_second": 10.874,
|
| 3988 |
+
"step": 2550
|
| 3989 |
}
|
| 3990 |
],
|
| 3991 |
"logging_steps": 5,
|
|
|
|
| 4014 |
"attributes": {}
|
| 4015 |
}
|
| 4016 |
},
|
| 4017 |
+
"total_flos": 6.639732802893906e+17,
|
| 4018 |
"train_batch_size": 4,
|
| 4019 |
"trial_name": null,
|
| 4020 |
"trial_params": null
|