Training in progress, epoch 2, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step2700/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2700/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2700/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2700/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2700/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +82 -4
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 98088784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3e89cd6eb023a8692705065f92f0d5371ce91e0df62544eb097bf03d18a65d84
|
| 3 |
size 98088784
|
last-checkpoint/global_step2700/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:02b7fa48200a5a042da0464ddaf341e8d30efd19d0e7b003acd0e140fc773067
|
| 3 |
+
size 73939813
|
last-checkpoint/global_step2700/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:772bd63266afa3802bdd244de18d28b742842cc53dec6c297b70d6eaac8e06b5
|
| 3 |
+
size 73939813
|
last-checkpoint/global_step2700/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:974d834923240a6423062503234126d5259de91b219571e9a4e038fcc8cf1b1f
|
| 3 |
+
size 73939877
|
last-checkpoint/global_step2700/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e3537ad743b04b039461b68e9164c6804f256bfe23970460856f048a4790810c
|
| 3 |
+
size 73939877
|
last-checkpoint/global_step2700/mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1c8843c00c3514b22c11b1cbcc95febc90df3000d919ac4c3992920b30483c41
|
| 3 |
+
size 564993061
|
last-checkpoint/latest
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
global_step2700
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ede0b1fc36335323175e2edd53261702567c37590048e71947e3c426e85b5358
|
| 3 |
size 15429
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4c225af08c27371f3ad7414b25e230715ecb3f8cb70b10c4259ad6c0fda5ccd6
|
| 3 |
size 15429
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e88a43fa2cddcb58d4826c017c03a1d3e5471c2689be34ccc5b5029741e5921b
|
| 3 |
size 15429
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0df0f25af541c09f9f007e452faad4d72697d4a9c57c257b511f80692e8b902e
|
| 3 |
size 15429
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1401
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4eaf8009039a6f74bcd21b41a171fcf3e5ca003f7176f1eba8b4e8308150dea7
|
| 3 |
size 1401
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
-
"best_metric": 0.
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 2.
|
| 6 |
"eval_steps": 50,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -4142,6 +4142,84 @@
|
|
| 4142 |
"eval_samples_per_second": 125.519,
|
| 4143 |
"eval_steps_per_second": 15.698,
|
| 4144 |
"step": 2650
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4145 |
}
|
| 4146 |
],
|
| 4147 |
"logging_steps": 5,
|
|
@@ -4170,7 +4248,7 @@
|
|
| 4170 |
"attributes": {}
|
| 4171 |
}
|
| 4172 |
},
|
| 4173 |
-
"total_flos": 1.
|
| 4174 |
"train_batch_size": 2,
|
| 4175 |
"trial_name": null,
|
| 4176 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
+
"best_metric": 0.6300840377807617,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 2.6466625842008575,
|
| 6 |
"eval_steps": 50,
|
| 7 |
+
"global_step": 2700,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 4142 |
"eval_samples_per_second": 125.519,
|
| 4143 |
"eval_steps_per_second": 15.698,
|
| 4144 |
"step": 2650
|
| 4145 |
+
},
|
| 4146 |
+
{
|
| 4147 |
+
"epoch": 2.60257195345989,
|
| 4148 |
+
"grad_norm": 0.2412237972021103,
|
| 4149 |
+
"learning_rate": 5.170746606940992e-05,
|
| 4150 |
+
"loss": 0.671,
|
| 4151 |
+
"step": 2655
|
| 4152 |
+
},
|
| 4153 |
+
{
|
| 4154 |
+
"epoch": 2.6074709124311086,
|
| 4155 |
+
"grad_norm": 0.22563929855823517,
|
| 4156 |
+
"learning_rate": 5.153028819149837e-05,
|
| 4157 |
+
"loss": 0.6472,
|
| 4158 |
+
"step": 2660
|
| 4159 |
+
},
|
| 4160 |
+
{
|
| 4161 |
+
"epoch": 2.612369871402327,
|
| 4162 |
+
"grad_norm": 0.21387755870819092,
|
| 4163 |
+
"learning_rate": 5.135315006687393e-05,
|
| 4164 |
+
"loss": 0.6689,
|
| 4165 |
+
"step": 2665
|
| 4166 |
+
},
|
| 4167 |
+
{
|
| 4168 |
+
"epoch": 2.6172688303735456,
|
| 4169 |
+
"grad_norm": 0.21877914667129517,
|
| 4170 |
+
"learning_rate": 5.117605351590485e-05,
|
| 4171 |
+
"loss": 0.6697,
|
| 4172 |
+
"step": 2670
|
| 4173 |
+
},
|
| 4174 |
+
{
|
| 4175 |
+
"epoch": 2.6221677893447644,
|
| 4176 |
+
"grad_norm": 0.236678346991539,
|
| 4177 |
+
"learning_rate": 5.099900035853222e-05,
|
| 4178 |
+
"loss": 0.6506,
|
| 4179 |
+
"step": 2675
|
| 4180 |
+
},
|
| 4181 |
+
{
|
| 4182 |
+
"epoch": 2.6270667483159826,
|
| 4183 |
+
"grad_norm": 0.2781834602355957,
|
| 4184 |
+
"learning_rate": 5.082199241425118e-05,
|
| 4185 |
+
"loss": 0.6589,
|
| 4186 |
+
"step": 2680
|
| 4187 |
+
},
|
| 4188 |
+
{
|
| 4189 |
+
"epoch": 2.6319657072872014,
|
| 4190 |
+
"grad_norm": 0.24861498177051544,
|
| 4191 |
+
"learning_rate": 5.064503150209222e-05,
|
| 4192 |
+
"loss": 0.6571,
|
| 4193 |
+
"step": 2685
|
| 4194 |
+
},
|
| 4195 |
+
{
|
| 4196 |
+
"epoch": 2.63686466625842,
|
| 4197 |
+
"grad_norm": 0.2387225478887558,
|
| 4198 |
+
"learning_rate": 5.046811944060252e-05,
|
| 4199 |
+
"loss": 0.6723,
|
| 4200 |
+
"step": 2690
|
| 4201 |
+
},
|
| 4202 |
+
{
|
| 4203 |
+
"epoch": 2.641763625229639,
|
| 4204 |
+
"grad_norm": 0.20970548689365387,
|
| 4205 |
+
"learning_rate": 5.029125804782722e-05,
|
| 4206 |
+
"loss": 0.6585,
|
| 4207 |
+
"step": 2695
|
| 4208 |
+
},
|
| 4209 |
+
{
|
| 4210 |
+
"epoch": 2.6466625842008575,
|
| 4211 |
+
"grad_norm": 0.2293289601802826,
|
| 4212 |
+
"learning_rate": 5.01144491412908e-05,
|
| 4213 |
+
"loss": 0.6658,
|
| 4214 |
+
"step": 2700
|
| 4215 |
+
},
|
| 4216 |
+
{
|
| 4217 |
+
"epoch": 2.6466625842008575,
|
| 4218 |
+
"eval_loss": 0.6300840377807617,
|
| 4219 |
+
"eval_runtime": 15.5653,
|
| 4220 |
+
"eval_samples_per_second": 125.857,
|
| 4221 |
+
"eval_steps_per_second": 15.74,
|
| 4222 |
+
"step": 2700
|
| 4223 |
}
|
| 4224 |
],
|
| 4225 |
"logging_steps": 5,
|
|
|
|
| 4248 |
"attributes": {}
|
| 4249 |
}
|
| 4250 |
},
|
| 4251 |
+
"total_flos": 1.3915592955642511e+18,
|
| 4252 |
"train_batch_size": 2,
|
| 4253 |
"trial_name": null,
|
| 4254 |
"trial_params": null
|