Training in progress, epoch 0, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step4650/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step4650/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step4650/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step4650/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step4650/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +238 -4
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1037269336
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:83240b591b305b77a7f0e03a1614297d5289c0ea99896646801a0c1dbd574862
|
| 3 |
size 1037269336
|
last-checkpoint/global_step4650/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3e51d545c208c91904449605d111c48b0b9cd7cffe8820bff9335d42b333c838
|
| 3 |
+
size 781993445
|
last-checkpoint/global_step4650/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:042c2f523987a0cc723abcd2f7298c860835da787e4bbd1db139579226329fb2
|
| 3 |
+
size 781993509
|
last-checkpoint/global_step4650/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:87022fc476e96c495276b8a125e2cd268dfd3b042f21127f9100c1570e463907
|
| 3 |
+
size 781993509
|
last-checkpoint/global_step4650/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:557b588731c35b0d0eafa43689a8fb120627a74098c020a845f3338f31555e7c
|
| 3 |
+
size 781993509
|
last-checkpoint/global_step4650/mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:986e5ce5926507822e2b6e2503b40a1f334a287ff6957abefa9b133fbadf4b81
|
| 3 |
+
size 2610290277
|
last-checkpoint/latest
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
global_step4650
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1a1d5dc1450e1f7d92df3b8367376288a592dc32fb455c0cd4248d71d3a7f2b5
|
| 3 |
size 15429
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9fb2912e97dbc350f2bdb8248e072bd5fc3be1df66f8fc3c1a669133cca92882
|
| 3 |
size 15429
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e97f3af51e8b6ba933c0395cf8132efd073aae835daafe97b9b1543a75390d4e
|
| 3 |
size 15429
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:29aafcf8ce3f67acef842d3fa0b0a4c6e670568793675e69ea643de91260101d
|
| 3 |
size 15429
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1401
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8a589015430e6f0d6c31bfd6d790e8fd16af3732cfc9fd2552a05ca53c4825d5
|
| 3 |
size 1401
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
-
"best_metric": 1.
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 50,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -7028,6 +7028,240 @@
|
|
| 7028 |
"eval_samples_per_second": 170.525,
|
| 7029 |
"eval_steps_per_second": 10.693,
|
| 7030 |
"step": 4500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7031 |
}
|
| 7032 |
],
|
| 7033 |
"logging_steps": 5,
|
|
@@ -7056,7 +7290,7 @@
|
|
| 7056 |
"attributes": {}
|
| 7057 |
}
|
| 7058 |
},
|
| 7059 |
-
"total_flos": 1.
|
| 7060 |
"train_batch_size": 4,
|
| 7061 |
"trial_name": null,
|
| 7062 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
+
"best_metric": 1.8841668367385864,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.6759703445268208,
|
| 6 |
"eval_steps": 50,
|
| 7 |
+
"global_step": 4650,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 7028 |
"eval_samples_per_second": 170.525,
|
| 7029 |
"eval_steps_per_second": 10.693,
|
| 7030 |
"step": 4500
|
| 7031 |
+
},
|
| 7032 |
+
{
|
| 7033 |
+
"epoch": 0.6548916993749091,
|
| 7034 |
+
"grad_norm": 2.508161783218384,
|
| 7035 |
+
"learning_rate": 5.873522555612962e-05,
|
| 7036 |
+
"loss": 2.2274,
|
| 7037 |
+
"step": 4505
|
| 7038 |
+
},
|
| 7039 |
+
{
|
| 7040 |
+
"epoch": 0.6556185492077337,
|
| 7041 |
+
"grad_norm": 2.5135767459869385,
|
| 7042 |
+
"learning_rate": 5.8656915655531224e-05,
|
| 7043 |
+
"loss": 2.0161,
|
| 7044 |
+
"step": 4510
|
| 7045 |
+
},
|
| 7046 |
+
{
|
| 7047 |
+
"epoch": 0.6563453990405582,
|
| 7048 |
+
"grad_norm": 2.4507250785827637,
|
| 7049 |
+
"learning_rate": 5.8578584208798255e-05,
|
| 7050 |
+
"loss": 1.9389,
|
| 7051 |
+
"step": 4515
|
| 7052 |
+
},
|
| 7053 |
+
{
|
| 7054 |
+
"epoch": 0.6570722488733828,
|
| 7055 |
+
"grad_norm": 2.4963390827178955,
|
| 7056 |
+
"learning_rate": 5.850023141313007e-05,
|
| 7057 |
+
"loss": 2.2685,
|
| 7058 |
+
"step": 4520
|
| 7059 |
+
},
|
| 7060 |
+
{
|
| 7061 |
+
"epoch": 0.6577990987062073,
|
| 7062 |
+
"grad_norm": 2.3876497745513916,
|
| 7063 |
+
"learning_rate": 5.842185746577973e-05,
|
| 7064 |
+
"loss": 2.1684,
|
| 7065 |
+
"step": 4525
|
| 7066 |
+
},
|
| 7067 |
+
{
|
| 7068 |
+
"epoch": 0.6585259485390318,
|
| 7069 |
+
"grad_norm": 2.5659289360046387,
|
| 7070 |
+
"learning_rate": 5.834346256405354e-05,
|
| 7071 |
+
"loss": 2.0895,
|
| 7072 |
+
"step": 4530
|
| 7073 |
+
},
|
| 7074 |
+
{
|
| 7075 |
+
"epoch": 0.6592527983718564,
|
| 7076 |
+
"grad_norm": 2.480208396911621,
|
| 7077 |
+
"learning_rate": 5.826504690531059e-05,
|
| 7078 |
+
"loss": 2.086,
|
| 7079 |
+
"step": 4535
|
| 7080 |
+
},
|
| 7081 |
+
{
|
| 7082 |
+
"epoch": 0.6599796482046809,
|
| 7083 |
+
"grad_norm": 2.4734959602355957,
|
| 7084 |
+
"learning_rate": 5.818661068696221e-05,
|
| 7085 |
+
"loss": 2.1213,
|
| 7086 |
+
"step": 4540
|
| 7087 |
+
},
|
| 7088 |
+
{
|
| 7089 |
+
"epoch": 0.6607064980375055,
|
| 7090 |
+
"grad_norm": 2.8239712715148926,
|
| 7091 |
+
"learning_rate": 5.810815410647147e-05,
|
| 7092 |
+
"loss": 2.0349,
|
| 7093 |
+
"step": 4545
|
| 7094 |
+
},
|
| 7095 |
+
{
|
| 7096 |
+
"epoch": 0.66143334787033,
|
| 7097 |
+
"grad_norm": 2.229339122772217,
|
| 7098 |
+
"learning_rate": 5.8029677361352714e-05,
|
| 7099 |
+
"loss": 1.9909,
|
| 7100 |
+
"step": 4550
|
| 7101 |
+
},
|
| 7102 |
+
{
|
| 7103 |
+
"epoch": 0.66143334787033,
|
| 7104 |
+
"eval_loss": 1.8908016681671143,
|
| 7105 |
+
"eval_runtime": 22.1348,
|
| 7106 |
+
"eval_samples_per_second": 149.132,
|
| 7107 |
+
"eval_steps_per_second": 9.352,
|
| 7108 |
+
"step": 4550
|
| 7109 |
+
},
|
| 7110 |
+
{
|
| 7111 |
+
"epoch": 0.6621601977031545,
|
| 7112 |
+
"grad_norm": 2.306365966796875,
|
| 7113 |
+
"learning_rate": 5.795118064917109e-05,
|
| 7114 |
+
"loss": 1.9745,
|
| 7115 |
+
"step": 4555
|
| 7116 |
+
},
|
| 7117 |
+
{
|
| 7118 |
+
"epoch": 0.662887047535979,
|
| 7119 |
+
"grad_norm": 2.618732213973999,
|
| 7120 |
+
"learning_rate": 5.787266416754193e-05,
|
| 7121 |
+
"loss": 2.1639,
|
| 7122 |
+
"step": 4560
|
| 7123 |
+
},
|
| 7124 |
+
{
|
| 7125 |
+
"epoch": 0.6636138973688036,
|
| 7126 |
+
"grad_norm": 2.4831111431121826,
|
| 7127 |
+
"learning_rate": 5.779412811413042e-05,
|
| 7128 |
+
"loss": 1.8808,
|
| 7129 |
+
"step": 4565
|
| 7130 |
+
},
|
| 7131 |
+
{
|
| 7132 |
+
"epoch": 0.6643407472016282,
|
| 7133 |
+
"grad_norm": 2.3205296993255615,
|
| 7134 |
+
"learning_rate": 5.771557268665096e-05,
|
| 7135 |
+
"loss": 1.9686,
|
| 7136 |
+
"step": 4570
|
| 7137 |
+
},
|
| 7138 |
+
{
|
| 7139 |
+
"epoch": 0.6650675970344527,
|
| 7140 |
+
"grad_norm": 2.1423285007476807,
|
| 7141 |
+
"learning_rate": 5.763699808286676e-05,
|
| 7142 |
+
"loss": 1.9517,
|
| 7143 |
+
"step": 4575
|
| 7144 |
+
},
|
| 7145 |
+
{
|
| 7146 |
+
"epoch": 0.6657944468672772,
|
| 7147 |
+
"grad_norm": 2.134899854660034,
|
| 7148 |
+
"learning_rate": 5.755840450058927e-05,
|
| 7149 |
+
"loss": 2.0311,
|
| 7150 |
+
"step": 4580
|
| 7151 |
+
},
|
| 7152 |
+
{
|
| 7153 |
+
"epoch": 0.6665212967001017,
|
| 7154 |
+
"grad_norm": 2.3795955181121826,
|
| 7155 |
+
"learning_rate": 5.747979213767777e-05,
|
| 7156 |
+
"loss": 1.9214,
|
| 7157 |
+
"step": 4585
|
| 7158 |
+
},
|
| 7159 |
+
{
|
| 7160 |
+
"epoch": 0.6672481465329263,
|
| 7161 |
+
"grad_norm": 2.3388452529907227,
|
| 7162 |
+
"learning_rate": 5.740116119203877e-05,
|
| 7163 |
+
"loss": 2.1742,
|
| 7164 |
+
"step": 4590
|
| 7165 |
+
},
|
| 7166 |
+
{
|
| 7167 |
+
"epoch": 0.6679749963657509,
|
| 7168 |
+
"grad_norm": 2.438502073287964,
|
| 7169 |
+
"learning_rate": 5.732251186162558e-05,
|
| 7170 |
+
"loss": 1.9072,
|
| 7171 |
+
"step": 4595
|
| 7172 |
+
},
|
| 7173 |
+
{
|
| 7174 |
+
"epoch": 0.6687018461985754,
|
| 7175 |
+
"grad_norm": 2.352613925933838,
|
| 7176 |
+
"learning_rate": 5.7243844344437806e-05,
|
| 7177 |
+
"loss": 2.162,
|
| 7178 |
+
"step": 4600
|
| 7179 |
+
},
|
| 7180 |
+
{
|
| 7181 |
+
"epoch": 0.6687018461985754,
|
| 7182 |
+
"eval_loss": 1.893505334854126,
|
| 7183 |
+
"eval_runtime": 19.1578,
|
| 7184 |
+
"eval_samples_per_second": 172.305,
|
| 7185 |
+
"eval_steps_per_second": 10.805,
|
| 7186 |
+
"step": 4600
|
| 7187 |
+
},
|
| 7188 |
+
{
|
| 7189 |
+
"epoch": 0.6694286960313999,
|
| 7190 |
+
"grad_norm": 2.3778982162475586,
|
| 7191 |
+
"learning_rate": 5.716515883852082e-05,
|
| 7192 |
+
"loss": 2.0784,
|
| 7193 |
+
"step": 4605
|
| 7194 |
+
},
|
| 7195 |
+
{
|
| 7196 |
+
"epoch": 0.6701555458642244,
|
| 7197 |
+
"grad_norm": 2.6638474464416504,
|
| 7198 |
+
"learning_rate": 5.708645554196528e-05,
|
| 7199 |
+
"loss": 2.0468,
|
| 7200 |
+
"step": 4610
|
| 7201 |
+
},
|
| 7202 |
+
{
|
| 7203 |
+
"epoch": 0.670882395697049,
|
| 7204 |
+
"grad_norm": 2.4324584007263184,
|
| 7205 |
+
"learning_rate": 5.700773465290667e-05,
|
| 7206 |
+
"loss": 2.0943,
|
| 7207 |
+
"step": 4615
|
| 7208 |
+
},
|
| 7209 |
+
{
|
| 7210 |
+
"epoch": 0.6716092455298736,
|
| 7211 |
+
"grad_norm": 2.2958381175994873,
|
| 7212 |
+
"learning_rate": 5.692899636952473e-05,
|
| 7213 |
+
"loss": 2.0988,
|
| 7214 |
+
"step": 4620
|
| 7215 |
+
},
|
| 7216 |
+
{
|
| 7217 |
+
"epoch": 0.6723360953626981,
|
| 7218 |
+
"grad_norm": 2.202683448791504,
|
| 7219 |
+
"learning_rate": 5.6850240890042966e-05,
|
| 7220 |
+
"loss": 2.1533,
|
| 7221 |
+
"step": 4625
|
| 7222 |
+
},
|
| 7223 |
+
{
|
| 7224 |
+
"epoch": 0.6730629451955226,
|
| 7225 |
+
"grad_norm": 1.9483098983764648,
|
| 7226 |
+
"learning_rate": 5.677146841272821e-05,
|
| 7227 |
+
"loss": 1.9827,
|
| 7228 |
+
"step": 4630
|
| 7229 |
+
},
|
| 7230 |
+
{
|
| 7231 |
+
"epoch": 0.6737897950283471,
|
| 7232 |
+
"grad_norm": 2.550309658050537,
|
| 7233 |
+
"learning_rate": 5.669267913589012e-05,
|
| 7234 |
+
"loss": 1.9718,
|
| 7235 |
+
"step": 4635
|
| 7236 |
+
},
|
| 7237 |
+
{
|
| 7238 |
+
"epoch": 0.6745166448611717,
|
| 7239 |
+
"grad_norm": 2.50044846534729,
|
| 7240 |
+
"learning_rate": 5.661387325788056e-05,
|
| 7241 |
+
"loss": 2.0441,
|
| 7242 |
+
"step": 4640
|
| 7243 |
+
},
|
| 7244 |
+
{
|
| 7245 |
+
"epoch": 0.6752434946939962,
|
| 7246 |
+
"grad_norm": 2.406494140625,
|
| 7247 |
+
"learning_rate": 5.653505097709326e-05,
|
| 7248 |
+
"loss": 1.9735,
|
| 7249 |
+
"step": 4645
|
| 7250 |
+
},
|
| 7251 |
+
{
|
| 7252 |
+
"epoch": 0.6759703445268208,
|
| 7253 |
+
"grad_norm": 2.304180383682251,
|
| 7254 |
+
"learning_rate": 5.645621249196321e-05,
|
| 7255 |
+
"loss": 1.9182,
|
| 7256 |
+
"step": 4650
|
| 7257 |
+
},
|
| 7258 |
+
{
|
| 7259 |
+
"epoch": 0.6759703445268208,
|
| 7260 |
+
"eval_loss": 1.8841668367385864,
|
| 7261 |
+
"eval_runtime": 18.863,
|
| 7262 |
+
"eval_samples_per_second": 174.999,
|
| 7263 |
+
"eval_steps_per_second": 10.974,
|
| 7264 |
+
"step": 4650
|
| 7265 |
}
|
| 7266 |
],
|
| 7267 |
"logging_steps": 5,
|
|
|
|
| 7290 |
"attributes": {}
|
| 7291 |
}
|
| 7292 |
},
|
| 7293 |
+
"total_flos": 1.211591505395843e+18,
|
| 7294 |
"train_batch_size": 4,
|
| 7295 |
"trial_name": null,
|
| 7296 |
"trial_params": null
|