Training in progress, step 3600, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 527048968
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5fcde8fff671a9e3943a7206216f5aa93c2c1394ad15e27c488e3a5ce5334895
|
| 3 |
size 527048968
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1054135994
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:efd66e2f145ff8ba7f09e1ad5b11fdf963e4fd8ce95a14181fab94269e9fb8ca
|
| 3 |
size 1054135994
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:44222b1bb3193020a7e558d8efc91533b7bf22b40de2edd049f9d11da894b760
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3293218b6796a13a9f95a7300ab605072092402c0dbdc9fe7b53627646555830
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.7177689671516418,
|
| 3 |
"best_model_checkpoint": "./output/checkpoint-450",
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 150,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -3342,6 +3342,151 @@
|
|
| 3342 |
"EMA_steps_per_second": 24.892,
|
| 3343 |
"epoch": 150.0,
|
| 3344 |
"step": 3450
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3345 |
}
|
| 3346 |
],
|
| 3347 |
"logging_steps": 10,
|
|
@@ -3361,7 +3506,7 @@
|
|
| 3361 |
"attributes": {}
|
| 3362 |
}
|
| 3363 |
},
|
| 3364 |
-
"total_flos":
|
| 3365 |
"train_batch_size": 4,
|
| 3366 |
"trial_name": null,
|
| 3367 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.7177689671516418,
|
| 3 |
"best_model_checkpoint": "./output/checkpoint-450",
|
| 4 |
+
"epoch": 156.52173913043478,
|
| 5 |
"eval_steps": 150,
|
| 6 |
+
"global_step": 3600,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 3342 |
"EMA_steps_per_second": 24.892,
|
| 3343 |
"epoch": 150.0,
|
| 3344 |
"step": 3450
|
| 3345 |
+
},
|
| 3346 |
+
{
|
| 3347 |
+
"epoch": 150.43478260869566,
|
| 3348 |
+
"grad_norm": 1.7044988870620728,
|
| 3349 |
+
"learning_rate": 2.513945738151511e-07,
|
| 3350 |
+
"loss": 0.2135,
|
| 3351 |
+
"step": 3460
|
| 3352 |
+
},
|
| 3353 |
+
{
|
| 3354 |
+
"epoch": 150.8695652173913,
|
| 3355 |
+
"grad_norm": 2.001293897628784,
|
| 3356 |
+
"learning_rate": 5.027891476303022e-07,
|
| 3357 |
+
"loss": 0.2623,
|
| 3358 |
+
"step": 3470
|
| 3359 |
+
},
|
| 3360 |
+
{
|
| 3361 |
+
"epoch": 151.30434782608697,
|
| 3362 |
+
"grad_norm": 1.6400986909866333,
|
| 3363 |
+
"learning_rate": 7.541837214454532e-07,
|
| 3364 |
+
"loss": 0.1997,
|
| 3365 |
+
"step": 3480
|
| 3366 |
+
},
|
| 3367 |
+
{
|
| 3368 |
+
"epoch": 151.7391304347826,
|
| 3369 |
+
"grad_norm": 2.337966203689575,
|
| 3370 |
+
"learning_rate": 1.0055782952606044e-06,
|
| 3371 |
+
"loss": 0.2472,
|
| 3372 |
+
"step": 3490
|
| 3373 |
+
},
|
| 3374 |
+
{
|
| 3375 |
+
"epoch": 152.17391304347825,
|
| 3376 |
+
"grad_norm": 2.081322431564331,
|
| 3377 |
+
"learning_rate": 1.2569728690757554e-06,
|
| 3378 |
+
"loss": 0.2426,
|
| 3379 |
+
"step": 3500
|
| 3380 |
+
},
|
| 3381 |
+
{
|
| 3382 |
+
"epoch": 152.6086956521739,
|
| 3383 |
+
"grad_norm": 1.6173598766326904,
|
| 3384 |
+
"learning_rate": 1.5083674428909064e-06,
|
| 3385 |
+
"loss": 0.2398,
|
| 3386 |
+
"step": 3510
|
| 3387 |
+
},
|
| 3388 |
+
{
|
| 3389 |
+
"epoch": 153.04347826086956,
|
| 3390 |
+
"grad_norm": 1.571141004562378,
|
| 3391 |
+
"learning_rate": 1.7597620167060574e-06,
|
| 3392 |
+
"loss": 0.2069,
|
| 3393 |
+
"step": 3520
|
| 3394 |
+
},
|
| 3395 |
+
{
|
| 3396 |
+
"epoch": 153.47826086956522,
|
| 3397 |
+
"grad_norm": 2.327928066253662,
|
| 3398 |
+
"learning_rate": 2.011156590521209e-06,
|
| 3399 |
+
"loss": 0.2502,
|
| 3400 |
+
"step": 3530
|
| 3401 |
+
},
|
| 3402 |
+
{
|
| 3403 |
+
"epoch": 153.91304347826087,
|
| 3404 |
+
"grad_norm": 2.673839807510376,
|
| 3405 |
+
"learning_rate": 2.2625511643363598e-06,
|
| 3406 |
+
"loss": 0.232,
|
| 3407 |
+
"step": 3540
|
| 3408 |
+
},
|
| 3409 |
+
{
|
| 3410 |
+
"epoch": 154.34782608695653,
|
| 3411 |
+
"grad_norm": 2.2869648933410645,
|
| 3412 |
+
"learning_rate": 2.5139457381515108e-06,
|
| 3413 |
+
"loss": 0.2399,
|
| 3414 |
+
"step": 3550
|
| 3415 |
+
},
|
| 3416 |
+
{
|
| 3417 |
+
"epoch": 154.7826086956522,
|
| 3418 |
+
"grad_norm": 2.043811798095703,
|
| 3419 |
+
"learning_rate": 2.5139454890395686e-06,
|
| 3420 |
+
"loss": 0.2345,
|
| 3421 |
+
"step": 3560
|
| 3422 |
+
},
|
| 3423 |
+
{
|
| 3424 |
+
"epoch": 155.2173913043478,
|
| 3425 |
+
"grad_norm": 1.682305932044983,
|
| 3426 |
+
"learning_rate": 2.51394474170384e-06,
|
| 3427 |
+
"loss": 0.1958,
|
| 3428 |
+
"step": 3570
|
| 3429 |
+
},
|
| 3430 |
+
{
|
| 3431 |
+
"epoch": 155.65217391304347,
|
| 3432 |
+
"grad_norm": 2.0729916095733643,
|
| 3433 |
+
"learning_rate": 2.5139434961446224e-06,
|
| 3434 |
+
"loss": 0.2663,
|
| 3435 |
+
"step": 3580
|
| 3436 |
+
},
|
| 3437 |
+
{
|
| 3438 |
+
"epoch": 156.08695652173913,
|
| 3439 |
+
"grad_norm": 1.6533286571502686,
|
| 3440 |
+
"learning_rate": 2.513941752362408e-06,
|
| 3441 |
+
"loss": 0.2031,
|
| 3442 |
+
"step": 3590
|
| 3443 |
+
},
|
| 3444 |
+
{
|
| 3445 |
+
"epoch": 156.52173913043478,
|
| 3446 |
+
"grad_norm": 2.51108980178833,
|
| 3447 |
+
"learning_rate": 2.5139395103578894e-06,
|
| 3448 |
+
"loss": 0.2679,
|
| 3449 |
+
"step": 3600
|
| 3450 |
+
},
|
| 3451 |
+
{
|
| 3452 |
+
"epoch": 156.52173913043478,
|
| 3453 |
+
"eval_loss": 0.9608185887336731,
|
| 3454 |
+
"eval_runtime": 0.4253,
|
| 3455 |
+
"eval_samples_per_second": 23.515,
|
| 3456 |
+
"eval_steps_per_second": 23.515,
|
| 3457 |
+
"step": 3600
|
| 3458 |
+
},
|
| 3459 |
+
{
|
| 3460 |
+
"Start_State_loss": 0.861186683177948,
|
| 3461 |
+
"Start_State_runtime": 0.413,
|
| 3462 |
+
"Start_State_samples_per_second": 24.215,
|
| 3463 |
+
"Start_State_steps_per_second": 24.215,
|
| 3464 |
+
"epoch": 156.52173913043478,
|
| 3465 |
+
"step": 3600
|
| 3466 |
+
},
|
| 3467 |
+
{
|
| 3468 |
+
"Raw_Model_loss": 0.9608185887336731,
|
| 3469 |
+
"Raw_Model_runtime": 0.419,
|
| 3470 |
+
"Raw_Model_samples_per_second": 23.864,
|
| 3471 |
+
"Raw_Model_steps_per_second": 23.864,
|
| 3472 |
+
"epoch": 156.52173913043478,
|
| 3473 |
+
"step": 3600
|
| 3474 |
+
},
|
| 3475 |
+
{
|
| 3476 |
+
"SWA_loss": 0.7903212308883667,
|
| 3477 |
+
"SWA_runtime": 0.4071,
|
| 3478 |
+
"SWA_samples_per_second": 24.562,
|
| 3479 |
+
"SWA_steps_per_second": 24.562,
|
| 3480 |
+
"epoch": 156.52173913043478,
|
| 3481 |
+
"step": 3600
|
| 3482 |
+
},
|
| 3483 |
+
{
|
| 3484 |
+
"EMA_loss": 0.8596304059028625,
|
| 3485 |
+
"EMA_runtime": 0.4003,
|
| 3486 |
+
"EMA_samples_per_second": 24.982,
|
| 3487 |
+
"EMA_steps_per_second": 24.982,
|
| 3488 |
+
"epoch": 156.52173913043478,
|
| 3489 |
+
"step": 3600
|
| 3490 |
}
|
| 3491 |
],
|
| 3492 |
"logging_steps": 10,
|
|
|
|
| 3506 |
"attributes": {}
|
| 3507 |
}
|
| 3508 |
},
|
| 3509 |
+
"total_flos": 9.28760054861906e+16,
|
| 3510 |
"train_batch_size": 4,
|
| 3511 |
"trial_name": null,
|
| 3512 |
"trial_params": null
|