Training in progress, epoch 0, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step2200/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2200/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2200/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2200/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2200/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +238 -4
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1037269336
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:19b1fa32ac470abea9aaed5df0314fdf8255e2f5d17488e027920e596ac1b454
|
| 3 |
size 1037269336
|
last-checkpoint/global_step2200/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:17f06d8019305880e63ff917b8eaeade273a1f4efe92b142374a9f453b975ba1
|
| 3 |
+
size 781993445
|
last-checkpoint/global_step2200/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5d88ca866ed4caf5fe9bac9eb2dbf7945afd594a1ced48078e756b2ec4f52391
|
| 3 |
+
size 781993509
|
last-checkpoint/global_step2200/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3e2c549dac5e475a7e15a608b79addfb82769ab62a4fb5a74e1660ba75c8c15f
|
| 3 |
+
size 781993509
|
last-checkpoint/global_step2200/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:577761a19602e32eca30c81aefb3456cd3a26b9218d94e1ea056cc48e18c2862
|
| 3 |
+
size 781993509
|
last-checkpoint/global_step2200/mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3476292ffcb6799b5340968f39c80013d42c347ec1a8f3b423d91fa1ade313f7
|
| 3 |
+
size 2610290277
|
last-checkpoint/latest
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
global_step2200
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e9aa753095cc0a44fced50afca6bff1b99146c481ddc3dc764d689ff5546d5fd
|
| 3 |
size 15429
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:90c2966bfb4a402e04ec2751d9f8452dc016d605399a989dce9bed4000125da0
|
| 3 |
size 15429
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:68fa3782d3dbab732db659905737cfd4c32e0162423b6b3bf8864f2d1fee1b91
|
| 3 |
size 15429
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e218ffc86ec50875e9f6816271fc0465b75694055819b0e37bcd282c94f6dbe5
|
| 3 |
size 15429
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1401
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0282a728c75d0bb9e123936361d8d60683d939f3df4b2863405d14fc34b553e7
|
| 3 |
size 1401
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
-
"best_metric": 2.
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 50,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -3206,6 +3206,240 @@
|
|
| 3206 |
"eval_samples_per_second": 175.617,
|
| 3207 |
"eval_steps_per_second": 11.013,
|
| 3208 |
"step": 2050
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3209 |
}
|
| 3210 |
],
|
| 3211 |
"logging_steps": 5,
|
|
@@ -3234,7 +3468,7 @@
|
|
| 3234 |
"attributes": {}
|
| 3235 |
}
|
| 3236 |
},
|
| 3237 |
-
"total_flos": 5.
|
| 3238 |
"train_batch_size": 4,
|
| 3239 |
"trial_name": null,
|
| 3240 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
+
"best_metric": 2.08577561378479,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.31981392644279694,
|
| 6 |
"eval_steps": 50,
|
| 7 |
+
"global_step": 2200,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 3206 |
"eval_samples_per_second": 175.617,
|
| 3207 |
"eval_steps_per_second": 11.013,
|
| 3208 |
"step": 2050
|
| 3209 |
+
},
|
| 3210 |
+
{
|
| 3211 |
+
"epoch": 0.2987352812908853,
|
| 3212 |
+
"grad_norm": 2.2958388328552246,
|
| 3213 |
+
"learning_rate": 9.087037358857628e-05,
|
| 3214 |
+
"loss": 2.1674,
|
| 3215 |
+
"step": 2055
|
| 3216 |
+
},
|
| 3217 |
+
{
|
| 3218 |
+
"epoch": 0.29946213112370984,
|
| 3219 |
+
"grad_norm": 2.6375505924224854,
|
| 3220 |
+
"learning_rate": 9.082413186059305e-05,
|
| 3221 |
+
"loss": 2.0371,
|
| 3222 |
+
"step": 2060
|
| 3223 |
+
},
|
| 3224 |
+
{
|
| 3225 |
+
"epoch": 0.3001889809565344,
|
| 3226 |
+
"grad_norm": 2.563561201095581,
|
| 3227 |
+
"learning_rate": 9.077778760554678e-05,
|
| 3228 |
+
"loss": 2.2449,
|
| 3229 |
+
"step": 2065
|
| 3230 |
+
},
|
| 3231 |
+
{
|
| 3232 |
+
"epoch": 0.3009158307893589,
|
| 3233 |
+
"grad_norm": 2.3851094245910645,
|
| 3234 |
+
"learning_rate": 9.07313409401091e-05,
|
| 3235 |
+
"loss": 2.2764,
|
| 3236 |
+
"step": 2070
|
| 3237 |
+
},
|
| 3238 |
+
{
|
| 3239 |
+
"epoch": 0.30164268062218347,
|
| 3240 |
+
"grad_norm": 2.5274460315704346,
|
| 3241 |
+
"learning_rate": 9.068479198120939e-05,
|
| 3242 |
+
"loss": 1.9841,
|
| 3243 |
+
"step": 2075
|
| 3244 |
+
},
|
| 3245 |
+
{
|
| 3246 |
+
"epoch": 0.302369530455008,
|
| 3247 |
+
"grad_norm": 2.51540470123291,
|
| 3248 |
+
"learning_rate": 9.063814084603465e-05,
|
| 3249 |
+
"loss": 2.0978,
|
| 3250 |
+
"step": 2080
|
| 3251 |
+
},
|
| 3252 |
+
{
|
| 3253 |
+
"epoch": 0.30309638028783253,
|
| 3254 |
+
"grad_norm": 2.290086507797241,
|
| 3255 |
+
"learning_rate": 9.059138765202903e-05,
|
| 3256 |
+
"loss": 2.0059,
|
| 3257 |
+
"step": 2085
|
| 3258 |
+
},
|
| 3259 |
+
{
|
| 3260 |
+
"epoch": 0.3038232301206571,
|
| 3261 |
+
"grad_norm": 2.4995152950286865,
|
| 3262 |
+
"learning_rate": 9.054453251689364e-05,
|
| 3263 |
+
"loss": 2.2743,
|
| 3264 |
+
"step": 2090
|
| 3265 |
+
},
|
| 3266 |
+
{
|
| 3267 |
+
"epoch": 0.3045500799534816,
|
| 3268 |
+
"grad_norm": 2.180800199508667,
|
| 3269 |
+
"learning_rate": 9.049757555858624e-05,
|
| 3270 |
+
"loss": 2.1006,
|
| 3271 |
+
"step": 2095
|
| 3272 |
+
},
|
| 3273 |
+
{
|
| 3274 |
+
"epoch": 0.30527692978630616,
|
| 3275 |
+
"grad_norm": 2.5430526733398438,
|
| 3276 |
+
"learning_rate": 9.04505168953209e-05,
|
| 3277 |
+
"loss": 2.2312,
|
| 3278 |
+
"step": 2100
|
| 3279 |
+
},
|
| 3280 |
+
{
|
| 3281 |
+
"epoch": 0.30527692978630616,
|
| 3282 |
+
"eval_loss": 2.094419240951538,
|
| 3283 |
+
"eval_runtime": 21.9822,
|
| 3284 |
+
"eval_samples_per_second": 150.167,
|
| 3285 |
+
"eval_steps_per_second": 9.417,
|
| 3286 |
+
"step": 2100
|
| 3287 |
+
},
|
| 3288 |
+
{
|
| 3289 |
+
"epoch": 0.30600377961913067,
|
| 3290 |
+
"grad_norm": 2.994030237197876,
|
| 3291 |
+
"learning_rate": 9.040335664556774e-05,
|
| 3292 |
+
"loss": 2.1454,
|
| 3293 |
+
"step": 2105
|
| 3294 |
+
},
|
| 3295 |
+
{
|
| 3296 |
+
"epoch": 0.30673062945195523,
|
| 3297 |
+
"grad_norm": 2.507899761199951,
|
| 3298 |
+
"learning_rate": 9.035609492805267e-05,
|
| 3299 |
+
"loss": 2.3506,
|
| 3300 |
+
"step": 2110
|
| 3301 |
+
},
|
| 3302 |
+
{
|
| 3303 |
+
"epoch": 0.3074574792847798,
|
| 3304 |
+
"grad_norm": 2.3985066413879395,
|
| 3305 |
+
"learning_rate": 9.030873186175699e-05,
|
| 3306 |
+
"loss": 2.1076,
|
| 3307 |
+
"step": 2115
|
| 3308 |
+
},
|
| 3309 |
+
{
|
| 3310 |
+
"epoch": 0.3081843291176043,
|
| 3311 |
+
"grad_norm": 2.315556764602661,
|
| 3312 |
+
"learning_rate": 9.026126756591716e-05,
|
| 3313 |
+
"loss": 1.9807,
|
| 3314 |
+
"step": 2120
|
| 3315 |
+
},
|
| 3316 |
+
{
|
| 3317 |
+
"epoch": 0.30891117895042886,
|
| 3318 |
+
"grad_norm": 2.4136197566986084,
|
| 3319 |
+
"learning_rate": 9.021370216002447e-05,
|
| 3320 |
+
"loss": 2.2067,
|
| 3321 |
+
"step": 2125
|
| 3322 |
+
},
|
| 3323 |
+
{
|
| 3324 |
+
"epoch": 0.30963802878325336,
|
| 3325 |
+
"grad_norm": 2.6457340717315674,
|
| 3326 |
+
"learning_rate": 9.016603576382481e-05,
|
| 3327 |
+
"loss": 2.3536,
|
| 3328 |
+
"step": 2130
|
| 3329 |
+
},
|
| 3330 |
+
{
|
| 3331 |
+
"epoch": 0.3103648786160779,
|
| 3332 |
+
"grad_norm": 2.527038097381592,
|
| 3333 |
+
"learning_rate": 9.011826849731824e-05,
|
| 3334 |
+
"loss": 2.1984,
|
| 3335 |
+
"step": 2135
|
| 3336 |
+
},
|
| 3337 |
+
{
|
| 3338 |
+
"epoch": 0.3110917284489025,
|
| 3339 |
+
"grad_norm": 2.422018527984619,
|
| 3340 |
+
"learning_rate": 9.007040048075882e-05,
|
| 3341 |
+
"loss": 2.3617,
|
| 3342 |
+
"step": 2140
|
| 3343 |
+
},
|
| 3344 |
+
{
|
| 3345 |
+
"epoch": 0.311818578281727,
|
| 3346 |
+
"grad_norm": 2.45200777053833,
|
| 3347 |
+
"learning_rate": 9.002243183465422e-05,
|
| 3348 |
+
"loss": 2.2631,
|
| 3349 |
+
"step": 2145
|
| 3350 |
+
},
|
| 3351 |
+
{
|
| 3352 |
+
"epoch": 0.31254542811455155,
|
| 3353 |
+
"grad_norm": 2.2823517322540283,
|
| 3354 |
+
"learning_rate": 8.997436267976544e-05,
|
| 3355 |
+
"loss": 1.9974,
|
| 3356 |
+
"step": 2150
|
| 3357 |
+
},
|
| 3358 |
+
{
|
| 3359 |
+
"epoch": 0.31254542811455155,
|
| 3360 |
+
"eval_loss": 2.101454019546509,
|
| 3361 |
+
"eval_runtime": 18.9568,
|
| 3362 |
+
"eval_samples_per_second": 174.133,
|
| 3363 |
+
"eval_steps_per_second": 10.92,
|
| 3364 |
+
"step": 2150
|
| 3365 |
+
},
|
| 3366 |
+
{
|
| 3367 |
+
"epoch": 0.31327227794737605,
|
| 3368 |
+
"grad_norm": 2.5826852321624756,
|
| 3369 |
+
"learning_rate": 8.992619313710653e-05,
|
| 3370 |
+
"loss": 2.2736,
|
| 3371 |
+
"step": 2155
|
| 3372 |
+
},
|
| 3373 |
+
{
|
| 3374 |
+
"epoch": 0.3139991277802006,
|
| 3375 |
+
"grad_norm": 2.4211437702178955,
|
| 3376 |
+
"learning_rate": 8.987792332794426e-05,
|
| 3377 |
+
"loss": 2.2469,
|
| 3378 |
+
"step": 2160
|
| 3379 |
+
},
|
| 3380 |
+
{
|
| 3381 |
+
"epoch": 0.3147259776130252,
|
| 3382 |
+
"grad_norm": 3.2002980709075928,
|
| 3383 |
+
"learning_rate": 8.98295533737978e-05,
|
| 3384 |
+
"loss": 2.2387,
|
| 3385 |
+
"step": 2165
|
| 3386 |
+
},
|
| 3387 |
+
{
|
| 3388 |
+
"epoch": 0.3154528274458497,
|
| 3389 |
+
"grad_norm": 2.8662610054016113,
|
| 3390 |
+
"learning_rate": 8.978108339643846e-05,
|
| 3391 |
+
"loss": 2.2728,
|
| 3392 |
+
"step": 2170
|
| 3393 |
+
},
|
| 3394 |
+
{
|
| 3395 |
+
"epoch": 0.31617967727867424,
|
| 3396 |
+
"grad_norm": 2.5767691135406494,
|
| 3397 |
+
"learning_rate": 8.973251351788936e-05,
|
| 3398 |
+
"loss": 2.0728,
|
| 3399 |
+
"step": 2175
|
| 3400 |
+
},
|
| 3401 |
+
{
|
| 3402 |
+
"epoch": 0.31690652711149875,
|
| 3403 |
+
"grad_norm": 2.2617924213409424,
|
| 3404 |
+
"learning_rate": 8.968384386042512e-05,
|
| 3405 |
+
"loss": 2.0235,
|
| 3406 |
+
"step": 2180
|
| 3407 |
+
},
|
| 3408 |
+
{
|
| 3409 |
+
"epoch": 0.3176333769443233,
|
| 3410 |
+
"grad_norm": 2.60357928276062,
|
| 3411 |
+
"learning_rate": 8.96350745465715e-05,
|
| 3412 |
+
"loss": 2.0803,
|
| 3413 |
+
"step": 2185
|
| 3414 |
+
},
|
| 3415 |
+
{
|
| 3416 |
+
"epoch": 0.3183602267771478,
|
| 3417 |
+
"grad_norm": 2.360905408859253,
|
| 3418 |
+
"learning_rate": 8.958620569910522e-05,
|
| 3419 |
+
"loss": 2.1212,
|
| 3420 |
+
"step": 2190
|
| 3421 |
+
},
|
| 3422 |
+
{
|
| 3423 |
+
"epoch": 0.3190870766099724,
|
| 3424 |
+
"grad_norm": 2.760329246520996,
|
| 3425 |
+
"learning_rate": 8.953723744105356e-05,
|
| 3426 |
+
"loss": 2.2397,
|
| 3427 |
+
"step": 2195
|
| 3428 |
+
},
|
| 3429 |
+
{
|
| 3430 |
+
"epoch": 0.31981392644279694,
|
| 3431 |
+
"grad_norm": 2.653019428253174,
|
| 3432 |
+
"learning_rate": 8.948816989569402e-05,
|
| 3433 |
+
"loss": 2.1049,
|
| 3434 |
+
"step": 2200
|
| 3435 |
+
},
|
| 3436 |
+
{
|
| 3437 |
+
"epoch": 0.31981392644279694,
|
| 3438 |
+
"eval_loss": 2.08577561378479,
|
| 3439 |
+
"eval_runtime": 18.7698,
|
| 3440 |
+
"eval_samples_per_second": 175.867,
|
| 3441 |
+
"eval_steps_per_second": 11.028,
|
| 3442 |
+
"step": 2200
|
| 3443 |
}
|
| 3444 |
],
|
| 3445 |
"logging_steps": 5,
|
|
|
|
| 3468 |
"attributes": {}
|
| 3469 |
}
|
| 3470 |
},
|
| 3471 |
+
"total_flos": 5.7344243893744435e+17,
|
| 3472 |
"train_batch_size": 4,
|
| 3473 |
"trial_name": null,
|
| 3474 |
"trial_params": null
|