Training in progress, step 3600, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 527048968
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7a7db4ae93951b0eb394bb0a363f73cd5df34f9278223503ea607797313cdef9
|
| 3 |
size 527048968
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1054135994
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:550ddc0253077b9ade8068188ab7383f87735a416196347e817b58cdd6eecfa7
|
| 3 |
size 1054135994
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:79ae35034e3077f87418b20f4a24e69590c4f56a313fa0284d685c7f3a1b03d8
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:92a0ba1807c4ff64f4d8fc6d84a7a517689523073c7ea31a60948b80a14d9e61
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.717534065246582,
|
| 3 |
"best_model_checkpoint": "./output/checkpoint-450",
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 150,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -3302,6 +3302,151 @@
|
|
| 3302 |
"EMA_steps_per_second": 25.002,
|
| 3303 |
"epoch": 150.0,
|
| 3304 |
"step": 3450
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3305 |
}
|
| 3306 |
],
|
| 3307 |
"logging_steps": 10,
|
|
@@ -3321,7 +3466,7 @@
|
|
| 3321 |
"attributes": {}
|
| 3322 |
}
|
| 3323 |
},
|
| 3324 |
-
"total_flos":
|
| 3325 |
"train_batch_size": 4,
|
| 3326 |
"trial_name": null,
|
| 3327 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.717534065246582,
|
| 3 |
"best_model_checkpoint": "./output/checkpoint-450",
|
| 4 |
+
"epoch": 156.52173913043478,
|
| 5 |
"eval_steps": 150,
|
| 6 |
+
"global_step": 3600,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 3302 |
"EMA_steps_per_second": 25.002,
|
| 3303 |
"epoch": 150.0,
|
| 3304 |
"step": 3450
|
| 3305 |
+
},
|
| 3306 |
+
{
|
| 3307 |
+
"epoch": 150.43478260869566,
|
| 3308 |
+
"grad_norm": 1.7541120052337646,
|
| 3309 |
+
"learning_rate": 2.4672082280509036e-07,
|
| 3310 |
+
"loss": 0.214,
|
| 3311 |
+
"step": 3460
|
| 3312 |
+
},
|
| 3313 |
+
{
|
| 3314 |
+
"epoch": 150.8695652173913,
|
| 3315 |
+
"grad_norm": 2.0008656978607178,
|
| 3316 |
+
"learning_rate": 4.934416456101807e-07,
|
| 3317 |
+
"loss": 0.2627,
|
| 3318 |
+
"step": 3470
|
| 3319 |
+
},
|
| 3320 |
+
{
|
| 3321 |
+
"epoch": 151.30434782608697,
|
| 3322 |
+
"grad_norm": 1.6539170742034912,
|
| 3323 |
+
"learning_rate": 7.40162468415271e-07,
|
| 3324 |
+
"loss": 0.2,
|
| 3325 |
+
"step": 3480
|
| 3326 |
+
},
|
| 3327 |
+
{
|
| 3328 |
+
"epoch": 151.7391304347826,
|
| 3329 |
+
"grad_norm": 2.369926691055298,
|
| 3330 |
+
"learning_rate": 9.868832912203614e-07,
|
| 3331 |
+
"loss": 0.2478,
|
| 3332 |
+
"step": 3490
|
| 3333 |
+
},
|
| 3334 |
+
{
|
| 3335 |
+
"epoch": 152.17391304347825,
|
| 3336 |
+
"grad_norm": 2.07112979888916,
|
| 3337 |
+
"learning_rate": 1.2336041140254517e-06,
|
| 3338 |
+
"loss": 0.2427,
|
| 3339 |
+
"step": 3500
|
| 3340 |
+
},
|
| 3341 |
+
{
|
| 3342 |
+
"epoch": 152.6086956521739,
|
| 3343 |
+
"grad_norm": 1.6030749082565308,
|
| 3344 |
+
"learning_rate": 1.480324936830542e-06,
|
| 3345 |
+
"loss": 0.2402,
|
| 3346 |
+
"step": 3510
|
| 3347 |
+
},
|
| 3348 |
+
{
|
| 3349 |
+
"epoch": 153.04347826086956,
|
| 3350 |
+
"grad_norm": 1.5949645042419434,
|
| 3351 |
+
"learning_rate": 1.7270457596356322e-06,
|
| 3352 |
+
"loss": 0.2072,
|
| 3353 |
+
"step": 3520
|
| 3354 |
+
},
|
| 3355 |
+
{
|
| 3356 |
+
"epoch": 153.47826086956522,
|
| 3357 |
+
"grad_norm": 2.338641881942749,
|
| 3358 |
+
"learning_rate": 1.973766582440723e-06,
|
| 3359 |
+
"loss": 0.2506,
|
| 3360 |
+
"step": 3530
|
| 3361 |
+
},
|
| 3362 |
+
{
|
| 3363 |
+
"epoch": 153.91304347826087,
|
| 3364 |
+
"grad_norm": 2.719093084335327,
|
| 3365 |
+
"learning_rate": 2.220487405245813e-06,
|
| 3366 |
+
"loss": 0.2321,
|
| 3367 |
+
"step": 3540
|
| 3368 |
+
},
|
| 3369 |
+
{
|
| 3370 |
+
"epoch": 154.34782608695653,
|
| 3371 |
+
"grad_norm": 2.292358636856079,
|
| 3372 |
+
"learning_rate": 2.4672082280509034e-06,
|
| 3373 |
+
"loss": 0.2404,
|
| 3374 |
+
"step": 3550
|
| 3375 |
+
},
|
| 3376 |
+
{
|
| 3377 |
+
"epoch": 154.7826086956522,
|
| 3378 |
+
"grad_norm": 2.0019381046295166,
|
| 3379 |
+
"learning_rate": 2.4672079835702752e-06,
|
| 3380 |
+
"loss": 0.2343,
|
| 3381 |
+
"step": 3560
|
| 3382 |
+
},
|
| 3383 |
+
{
|
| 3384 |
+
"epoch": 155.2173913043478,
|
| 3385 |
+
"grad_norm": 1.6779125928878784,
|
| 3386 |
+
"learning_rate": 2.4672072501284865e-06,
|
| 3387 |
+
"loss": 0.1963,
|
| 3388 |
+
"step": 3570
|
| 3389 |
+
},
|
| 3390 |
+
{
|
| 3391 |
+
"epoch": 155.65217391304347,
|
| 3392 |
+
"grad_norm": 2.0632243156433105,
|
| 3393 |
+
"learning_rate": 2.467206027725829e-06,
|
| 3394 |
+
"loss": 0.267,
|
| 3395 |
+
"step": 3580
|
| 3396 |
+
},
|
| 3397 |
+
{
|
| 3398 |
+
"epoch": 156.08695652173913,
|
| 3399 |
+
"grad_norm": 1.6089539527893066,
|
| 3400 |
+
"learning_rate": 2.467204316362787e-06,
|
| 3401 |
+
"loss": 0.2034,
|
| 3402 |
+
"step": 3590
|
| 3403 |
+
},
|
| 3404 |
+
{
|
| 3405 |
+
"epoch": 156.52173913043478,
|
| 3406 |
+
"grad_norm": 2.475633382797241,
|
| 3407 |
+
"learning_rate": 2.4672021160400387e-06,
|
| 3408 |
+
"loss": 0.2685,
|
| 3409 |
+
"step": 3600
|
| 3410 |
+
},
|
| 3411 |
+
{
|
| 3412 |
+
"epoch": 156.52173913043478,
|
| 3413 |
+
"eval_loss": 0.9592596292495728,
|
| 3414 |
+
"eval_runtime": 0.4813,
|
| 3415 |
+
"eval_samples_per_second": 20.778,
|
| 3416 |
+
"eval_steps_per_second": 20.778,
|
| 3417 |
+
"step": 3600
|
| 3418 |
+
},
|
| 3419 |
+
{
|
| 3420 |
+
"Start_State_loss": 0.7309322357177734,
|
| 3421 |
+
"Start_State_runtime": 0.4223,
|
| 3422 |
+
"Start_State_samples_per_second": 23.679,
|
| 3423 |
+
"Start_State_steps_per_second": 23.679,
|
| 3424 |
+
"epoch": 156.52173913043478,
|
| 3425 |
+
"step": 3600
|
| 3426 |
+
},
|
| 3427 |
+
{
|
| 3428 |
+
"Raw_Model_loss": 0.9592596292495728,
|
| 3429 |
+
"Raw_Model_runtime": 0.3944,
|
| 3430 |
+
"Raw_Model_samples_per_second": 25.356,
|
| 3431 |
+
"Raw_Model_steps_per_second": 25.356,
|
| 3432 |
+
"epoch": 156.52173913043478,
|
| 3433 |
+
"step": 3600
|
| 3434 |
+
},
|
| 3435 |
+
{
|
| 3436 |
+
"SWA_loss": 0.8119293451309204,
|
| 3437 |
+
"SWA_runtime": 0.3904,
|
| 3438 |
+
"SWA_samples_per_second": 25.615,
|
| 3439 |
+
"SWA_steps_per_second": 25.615,
|
| 3440 |
+
"epoch": 156.52173913043478,
|
| 3441 |
+
"step": 3600
|
| 3442 |
+
},
|
| 3443 |
+
{
|
| 3444 |
+
"EMA_loss": 0.7311049103736877,
|
| 3445 |
+
"EMA_runtime": 0.4017,
|
| 3446 |
+
"EMA_samples_per_second": 24.896,
|
| 3447 |
+
"EMA_steps_per_second": 24.896,
|
| 3448 |
+
"epoch": 156.52173913043478,
|
| 3449 |
+
"step": 3600
|
| 3450 |
}
|
| 3451 |
],
|
| 3452 |
"logging_steps": 10,
|
|
|
|
| 3466 |
"attributes": {}
|
| 3467 |
}
|
| 3468 |
},
|
| 3469 |
+
"total_flos": 9.28760054861906e+16,
|
| 3470 |
"train_batch_size": 4,
|
| 3471 |
"trial_name": null,
|
| 3472 |
"trial_params": null
|