Training in progress, step 4800, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1482788592
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ece60875a5af29008a1db947f37d0c4e41a5f40d67190f9268f23085b8ae7125
|
| 3 |
size 1482788592
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 2897966842
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b613bbe7e00aa50cc4953564c3dd94412444451a44d0c95d0dfbdaf287ec8a09
|
| 3 |
size 2897966842
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fc93c8df3d2c508d95b256c21be191d97f1b117d9c86f242d9f503ffa40419f3
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1256
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:716d7ef0d2def98440e32b2cba336f73e613b85c0427aef8f0c8a6789d61bd46
|
| 3 |
size 1256
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 1.6921895742416382,
|
| 3 |
"best_model_checkpoint": "./output/checkpoint-4500",
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 150,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -3397,6 +3397,232 @@
|
|
| 3397 |
"eval_samples_per_second": 10.053,
|
| 3398 |
"eval_steps_per_second": 10.053,
|
| 3399 |
"step": 4500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3400 |
}
|
| 3401 |
],
|
| 3402 |
"logging_steps": 10,
|
|
@@ -3416,7 +3642,7 @@
|
|
| 3416 |
"attributes": {}
|
| 3417 |
}
|
| 3418 |
},
|
| 3419 |
-
"total_flos":
|
| 3420 |
"train_batch_size": 4,
|
| 3421 |
"trial_name": null,
|
| 3422 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 1.6921895742416382,
|
| 3 |
"best_model_checkpoint": "./output/checkpoint-4500",
|
| 4 |
+
"epoch": 0.15508384220219057,
|
| 5 |
"eval_steps": 150,
|
| 6 |
+
"global_step": 4800,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 3397 |
"eval_samples_per_second": 10.053,
|
| 3398 |
"eval_steps_per_second": 10.053,
|
| 3399 |
"step": 4500
|
| 3400 |
+
},
|
| 3401 |
+
{
|
| 3402 |
+
"epoch": 0.14571419340247488,
|
| 3403 |
+
"grad_norm": 7.294159889221191,
|
| 3404 |
+
"learning_rate": 5.472047830984499e-07,
|
| 3405 |
+
"loss": 1.7577,
|
| 3406 |
+
"step": 4510
|
| 3407 |
+
},
|
| 3408 |
+
{
|
| 3409 |
+
"epoch": 0.14603728474039612,
|
| 3410 |
+
"grad_norm": 9.507523536682129,
|
| 3411 |
+
"learning_rate": 5.252725889984403e-07,
|
| 3412 |
+
"loss": 1.7748,
|
| 3413 |
+
"step": 4520
|
| 3414 |
+
},
|
| 3415 |
+
{
|
| 3416 |
+
"epoch": 0.14636037607831734,
|
| 3417 |
+
"grad_norm": 10.296547889709473,
|
| 3418 |
+
"learning_rate": 5.037783829820298e-07,
|
| 3419 |
+
"loss": 1.6676,
|
| 3420 |
+
"step": 4530
|
| 3421 |
+
},
|
| 3422 |
+
{
|
| 3423 |
+
"epoch": 0.14668346741623858,
|
| 3424 |
+
"grad_norm": 10.683934211730957,
|
| 3425 |
+
"learning_rate": 4.827230485918372e-07,
|
| 3426 |
+
"loss": 1.703,
|
| 3427 |
+
"step": 4540
|
| 3428 |
+
},
|
| 3429 |
+
{
|
| 3430 |
+
"epoch": 0.1470065587541598,
|
| 3431 |
+
"grad_norm": 13.149202346801758,
|
| 3432 |
+
"learning_rate": 4.6210745133019236e-07,
|
| 3433 |
+
"loss": 1.8596,
|
| 3434 |
+
"step": 4550
|
| 3435 |
+
},
|
| 3436 |
+
{
|
| 3437 |
+
"epoch": 0.14732965009208104,
|
| 3438 |
+
"grad_norm": 10.627421379089355,
|
| 3439 |
+
"learning_rate": 4.419324386235529e-07,
|
| 3440 |
+
"loss": 1.5863,
|
| 3441 |
+
"step": 4560
|
| 3442 |
+
},
|
| 3443 |
+
{
|
| 3444 |
+
"epoch": 0.14765274143000226,
|
| 3445 |
+
"grad_norm": 8.185441970825195,
|
| 3446 |
+
"learning_rate": 4.2219883978767386e-07,
|
| 3447 |
+
"loss": 1.7421,
|
| 3448 |
+
"step": 4570
|
| 3449 |
+
},
|
| 3450 |
+
{
|
| 3451 |
+
"epoch": 0.1479758327679235,
|
| 3452 |
+
"grad_norm": 6.5582804679870605,
|
| 3453 |
+
"learning_rate": 4.029074659935082e-07,
|
| 3454 |
+
"loss": 1.7486,
|
| 3455 |
+
"step": 4580
|
| 3456 |
+
},
|
| 3457 |
+
{
|
| 3458 |
+
"epoch": 0.14829892410584472,
|
| 3459 |
+
"grad_norm": 7.293984413146973,
|
| 3460 |
+
"learning_rate": 3.8405911023387444e-07,
|
| 3461 |
+
"loss": 1.7631,
|
| 3462 |
+
"step": 4590
|
| 3463 |
+
},
|
| 3464 |
+
{
|
| 3465 |
+
"epoch": 0.14862201544376596,
|
| 3466 |
+
"grad_norm": 10.495855331420898,
|
| 3467 |
+
"learning_rate": 3.6565454729085526e-07,
|
| 3468 |
+
"loss": 1.8289,
|
| 3469 |
+
"step": 4600
|
| 3470 |
+
},
|
| 3471 |
+
{
|
| 3472 |
+
"epoch": 0.14894510678168718,
|
| 3473 |
+
"grad_norm": 7.07685661315918,
|
| 3474 |
+
"learning_rate": 3.4769453370394753e-07,
|
| 3475 |
+
"loss": 1.6386,
|
| 3476 |
+
"step": 4610
|
| 3477 |
+
},
|
| 3478 |
+
{
|
| 3479 |
+
"epoch": 0.14926819811960843,
|
| 3480 |
+
"grad_norm": 8.069764137268066,
|
| 3481 |
+
"learning_rate": 3.301798077389637e-07,
|
| 3482 |
+
"loss": 1.585,
|
| 3483 |
+
"step": 4620
|
| 3484 |
+
},
|
| 3485 |
+
{
|
| 3486 |
+
"epoch": 0.14959128945752964,
|
| 3487 |
+
"grad_norm": 8.399779319763184,
|
| 3488 |
+
"learning_rate": 3.1311108935768926e-07,
|
| 3489 |
+
"loss": 1.5544,
|
| 3490 |
+
"step": 4630
|
| 3491 |
+
},
|
| 3492 |
+
{
|
| 3493 |
+
"epoch": 0.14991438079545089,
|
| 3494 |
+
"grad_norm": 7.10072660446167,
|
| 3495 |
+
"learning_rate": 2.964890801882817e-07,
|
| 3496 |
+
"loss": 1.7765,
|
| 3497 |
+
"step": 4640
|
| 3498 |
+
},
|
| 3499 |
+
{
|
| 3500 |
+
"epoch": 0.1502374721333721,
|
| 3501 |
+
"grad_norm": 12.693696022033691,
|
| 3502 |
+
"learning_rate": 2.8031446349643393e-07,
|
| 3503 |
+
"loss": 1.5691,
|
| 3504 |
+
"step": 4650
|
| 3505 |
+
},
|
| 3506 |
+
{
|
| 3507 |
+
"epoch": 0.1502374721333721,
|
| 3508 |
+
"eval_loss": 1.6924811601638794,
|
| 3509 |
+
"eval_runtime": 50.4888,
|
| 3510 |
+
"eval_samples_per_second": 9.923,
|
| 3511 |
+
"eval_steps_per_second": 9.923,
|
| 3512 |
+
"step": 4650
|
| 3513 |
+
},
|
| 3514 |
+
{
|
| 3515 |
+
"epoch": 0.15056056347129335,
|
| 3516 |
+
"grad_norm": 8.841912269592285,
|
| 3517 |
+
"learning_rate": 2.645879041572891e-07,
|
| 3518 |
+
"loss": 1.6589,
|
| 3519 |
+
"step": 4660
|
| 3520 |
+
},
|
| 3521 |
+
{
|
| 3522 |
+
"epoch": 0.15088365480921456,
|
| 3523 |
+
"grad_norm": 7.690126895904541,
|
| 3524 |
+
"learning_rate": 2.4931004862810295e-07,
|
| 3525 |
+
"loss": 1.7137,
|
| 3526 |
+
"step": 4670
|
| 3527 |
+
},
|
| 3528 |
+
{
|
| 3529 |
+
"epoch": 0.1512067461471358,
|
| 3530 |
+
"grad_norm": 14.600467681884766,
|
| 3531 |
+
"learning_rate": 2.3448152492167586e-07,
|
| 3532 |
+
"loss": 1.8001,
|
| 3533 |
+
"step": 4680
|
| 3534 |
+
},
|
| 3535 |
+
{
|
| 3536 |
+
"epoch": 0.15152983748505702,
|
| 3537 |
+
"grad_norm": 8.619688034057617,
|
| 3538 |
+
"learning_rate": 2.201029425805393e-07,
|
| 3539 |
+
"loss": 1.7615,
|
| 3540 |
+
"step": 4690
|
| 3541 |
+
},
|
| 3542 |
+
{
|
| 3543 |
+
"epoch": 0.15185292882297827,
|
| 3544 |
+
"grad_norm": 12.033727645874023,
|
| 3545 |
+
"learning_rate": 2.061748926518972e-07,
|
| 3546 |
+
"loss": 1.6317,
|
| 3547 |
+
"step": 4700
|
| 3548 |
+
},
|
| 3549 |
+
{
|
| 3550 |
+
"epoch": 0.15217602016089948,
|
| 3551 |
+
"grad_norm": 9.276659965515137,
|
| 3552 |
+
"learning_rate": 1.9269794766333073e-07,
|
| 3553 |
+
"loss": 1.6155,
|
| 3554 |
+
"step": 4710
|
| 3555 |
+
},
|
| 3556 |
+
{
|
| 3557 |
+
"epoch": 0.15249911149882073,
|
| 3558 |
+
"grad_norm": 8.645523071289062,
|
| 3559 |
+
"learning_rate": 1.7967266159925864e-07,
|
| 3560 |
+
"loss": 1.5958,
|
| 3561 |
+
"step": 4720
|
| 3562 |
+
},
|
| 3563 |
+
{
|
| 3564 |
+
"epoch": 0.15282220283674194,
|
| 3565 |
+
"grad_norm": 13.718961715698242,
|
| 3566 |
+
"learning_rate": 1.670995698781777e-07,
|
| 3567 |
+
"loss": 1.5768,
|
| 3568 |
+
"step": 4730
|
| 3569 |
+
},
|
| 3570 |
+
{
|
| 3571 |
+
"epoch": 0.1531452941746632,
|
| 3572 |
+
"grad_norm": 12.2525634765625,
|
| 3573 |
+
"learning_rate": 1.549791893306424e-07,
|
| 3574 |
+
"loss": 1.571,
|
| 3575 |
+
"step": 4740
|
| 3576 |
+
},
|
| 3577 |
+
{
|
| 3578 |
+
"epoch": 0.1534683855125844,
|
| 3579 |
+
"grad_norm": 7.851583003997803,
|
| 3580 |
+
"learning_rate": 1.4331201817802332e-07,
|
| 3581 |
+
"loss": 1.7923,
|
| 3582 |
+
"step": 4750
|
| 3583 |
+
},
|
| 3584 |
+
{
|
| 3585 |
+
"epoch": 0.15379147685050565,
|
| 3586 |
+
"grad_norm": 10.048659324645996,
|
| 3587 |
+
"learning_rate": 1.320985360120322e-07,
|
| 3588 |
+
"loss": 1.7102,
|
| 3589 |
+
"step": 4760
|
| 3590 |
+
},
|
| 3591 |
+
{
|
| 3592 |
+
"epoch": 0.15411456818842686,
|
| 3593 |
+
"grad_norm": 9.430795669555664,
|
| 3594 |
+
"learning_rate": 1.2133920377499848e-07,
|
| 3595 |
+
"loss": 1.6879,
|
| 3596 |
+
"step": 4770
|
| 3597 |
+
},
|
| 3598 |
+
{
|
| 3599 |
+
"epoch": 0.1544376595263481,
|
| 3600 |
+
"grad_norm": 12.329809188842773,
|
| 3601 |
+
"learning_rate": 1.1103446374092981e-07,
|
| 3602 |
+
"loss": 1.7557,
|
| 3603 |
+
"step": 4780
|
| 3604 |
+
},
|
| 3605 |
+
{
|
| 3606 |
+
"epoch": 0.15476075086426933,
|
| 3607 |
+
"grad_norm": 11.180129051208496,
|
| 3608 |
+
"learning_rate": 1.0118473949732765e-07,
|
| 3609 |
+
"loss": 1.7791,
|
| 3610 |
+
"step": 4790
|
| 3611 |
+
},
|
| 3612 |
+
{
|
| 3613 |
+
"epoch": 0.15508384220219057,
|
| 3614 |
+
"grad_norm": 8.690634727478027,
|
| 3615 |
+
"learning_rate": 9.179043592777716e-08,
|
| 3616 |
+
"loss": 1.6464,
|
| 3617 |
+
"step": 4800
|
| 3618 |
+
},
|
| 3619 |
+
{
|
| 3620 |
+
"epoch": 0.15508384220219057,
|
| 3621 |
+
"eval_loss": 1.6925097703933716,
|
| 3622 |
+
"eval_runtime": 44.8573,
|
| 3623 |
+
"eval_samples_per_second": 11.169,
|
| 3624 |
+
"eval_steps_per_second": 11.169,
|
| 3625 |
+
"step": 4800
|
| 3626 |
}
|
| 3627 |
],
|
| 3628 |
"logging_steps": 10,
|
|
|
|
| 3642 |
"attributes": {}
|
| 3643 |
}
|
| 3644 |
},
|
| 3645 |
+
"total_flos": 4.0155209275981824e+17,
|
| 3646 |
"train_batch_size": 4,
|
| 3647 |
"trial_name": null,
|
| 3648 |
"trial_params": null
|