Training in progress, epoch 2, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step2150/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2150/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2150/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2150/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2150/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +82 -4
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 98088784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e4350d345d97b56ca7b59dbc011925ec1904d8a564b4e38969c9579b074ff804
|
| 3 |
size 98088784
|
last-checkpoint/global_step2150/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:79a44a13ae97270c3c416f31cb0b1f80f12d2017b4f0d8cf1c6268d7d80a8a9e
|
| 3 |
+
size 73939813
|
last-checkpoint/global_step2150/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2ad29f1a01bca6c52caaba5da3ca1b1b7fed1b150ff1a8a793f96d09e741d822
|
| 3 |
+
size 73939813
|
last-checkpoint/global_step2150/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c041692ece9873bd47a7b865469cc75af6eb0d6bfb4e97d45a24abb258e8a50c
|
| 3 |
+
size 73939877
|
last-checkpoint/global_step2150/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3b262ba0035994932655c5a03030ca63c65a5e37551cbdff4a83db6e4eba9ca2
|
| 3 |
+
size 73939877
|
last-checkpoint/global_step2150/mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6ef2a5d14ad0f0ccc005eeb930f4717339e689bc4df3778a18b1154417ee6b78
|
| 3 |
+
size 564993061
|
last-checkpoint/latest
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
global_step2150
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:065a50993eba0212276345f76d8268040ff81f0b0379c2af74bb3ae5b3dc8bf1
|
| 3 |
size 15429
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:42410dea9cc1279875fd58cdbd387af92a3e6d12f7e52ff497b959ac4ac2873f
|
| 3 |
size 15429
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:84327cb94481f4aa330fcca0f3bb1121ad581d5beb94e2475a38bda5aca36760
|
| 3 |
size 15429
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:15a861189dd19e0aceec678d8a75cad82c965ce69f5363194a8cdd37ec6a9f5d
|
| 3 |
size 15429
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1401
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2e94beadc98ff451c6f6d2bc6f85a9bd66bfdf5599b86c4c582259ad51b083b3
|
| 3 |
size 1401
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
-
"best_metric": 0.
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 2.
|
| 6 |
"eval_steps": 50,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -3284,6 +3284,84 @@
|
|
| 3284 |
"eval_samples_per_second": 126.157,
|
| 3285 |
"eval_steps_per_second": 15.778,
|
| 3286 |
"step": 2100
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3287 |
}
|
| 3288 |
],
|
| 3289 |
"logging_steps": 5,
|
|
@@ -3312,7 +3390,7 @@
|
|
| 3312 |
"attributes": {}
|
| 3313 |
}
|
| 3314 |
},
|
| 3315 |
-
"total_flos": 1.
|
| 3316 |
"train_batch_size": 2,
|
| 3317 |
"trial_name": null,
|
| 3318 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
+
"best_metric": 0.6448646187782288,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 2.1077770973668097,
|
| 6 |
"eval_steps": 50,
|
| 7 |
+
"global_step": 2150,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 3284 |
"eval_samples_per_second": 126.157,
|
| 3285 |
"eval_steps_per_second": 15.778,
|
| 3286 |
"step": 2100
|
| 3287 |
+
},
|
| 3288 |
+
{
|
| 3289 |
+
"epoch": 2.063686466625842,
|
| 3290 |
+
"grad_norm": 0.23175491392612457,
|
| 3291 |
+
"learning_rate": 7.102482680582014e-05,
|
| 3292 |
+
"loss": 0.6646,
|
| 3293 |
+
"step": 2105
|
| 3294 |
+
},
|
| 3295 |
+
{
|
| 3296 |
+
"epoch": 2.068585425597061,
|
| 3297 |
+
"grad_norm": 0.23201850056648254,
|
| 3298 |
+
"learning_rate": 7.085436623686342e-05,
|
| 3299 |
+
"loss": 0.663,
|
| 3300 |
+
"step": 2110
|
| 3301 |
+
},
|
| 3302 |
+
{
|
| 3303 |
+
"epoch": 2.073484384568279,
|
| 3304 |
+
"grad_norm": 0.23181083798408508,
|
| 3305 |
+
"learning_rate": 7.068374683639328e-05,
|
| 3306 |
+
"loss": 0.6732,
|
| 3307 |
+
"step": 2115
|
| 3308 |
+
},
|
| 3309 |
+
{
|
| 3310 |
+
"epoch": 2.078383343539498,
|
| 3311 |
+
"grad_norm": 0.21368514001369476,
|
| 3312 |
+
"learning_rate": 7.051297035778806e-05,
|
| 3313 |
+
"loss": 0.6602,
|
| 3314 |
+
"step": 2120
|
| 3315 |
+
},
|
| 3316 |
+
{
|
| 3317 |
+
"epoch": 2.0832823025107166,
|
| 3318 |
+
"grad_norm": 0.25182291865348816,
|
| 3319 |
+
"learning_rate": 7.034203855604029e-05,
|
| 3320 |
+
"loss": 0.6544,
|
| 3321 |
+
"step": 2125
|
| 3322 |
+
},
|
| 3323 |
+
{
|
| 3324 |
+
"epoch": 2.0881812614819353,
|
| 3325 |
+
"grad_norm": 0.2292199581861496,
|
| 3326 |
+
"learning_rate": 7.017095318773873e-05,
|
| 3327 |
+
"loss": 0.6522,
|
| 3328 |
+
"step": 2130
|
| 3329 |
+
},
|
| 3330 |
+
{
|
| 3331 |
+
"epoch": 2.0930802204531536,
|
| 3332 |
+
"grad_norm": 0.20745912194252014,
|
| 3333 |
+
"learning_rate": 6.999971601105022e-05,
|
| 3334 |
+
"loss": 0.6527,
|
| 3335 |
+
"step": 2135
|
| 3336 |
+
},
|
| 3337 |
+
{
|
| 3338 |
+
"epoch": 2.0979791794243723,
|
| 3339 |
+
"grad_norm": 0.22758464515209198,
|
| 3340 |
+
"learning_rate": 6.982832878570172e-05,
|
| 3341 |
+
"loss": 0.6578,
|
| 3342 |
+
"step": 2140
|
| 3343 |
+
},
|
| 3344 |
+
{
|
| 3345 |
+
"epoch": 2.102878138395591,
|
| 3346 |
+
"grad_norm": 0.20944316685199738,
|
| 3347 |
+
"learning_rate": 6.965679327296211e-05,
|
| 3348 |
+
"loss": 0.6584,
|
| 3349 |
+
"step": 2145
|
| 3350 |
+
},
|
| 3351 |
+
{
|
| 3352 |
+
"epoch": 2.1077770973668097,
|
| 3353 |
+
"grad_norm": 0.21847444772720337,
|
| 3354 |
+
"learning_rate": 6.94851112356242e-05,
|
| 3355 |
+
"loss": 0.6733,
|
| 3356 |
+
"step": 2150
|
| 3357 |
+
},
|
| 3358 |
+
{
|
| 3359 |
+
"epoch": 2.1077770973668097,
|
| 3360 |
+
"eval_loss": 0.6448646187782288,
|
| 3361 |
+
"eval_runtime": 15.5401,
|
| 3362 |
+
"eval_samples_per_second": 126.061,
|
| 3363 |
+
"eval_steps_per_second": 15.766,
|
| 3364 |
+
"step": 2150
|
| 3365 |
}
|
| 3366 |
],
|
| 3367 |
"logging_steps": 5,
|
|
|
|
| 3390 |
"attributes": {}
|
| 3391 |
}
|
| 3392 |
},
|
| 3393 |
+
"total_flos": 1.1081578526120346e+18,
|
| 3394 |
"train_batch_size": 2,
|
| 3395 |
"trial_name": null,
|
| 3396 |
"trial_params": null
|