Training in progress, step 11000, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 328277848
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3c44d6488e642b06142cd46a4094e9c0a6f469f36493fd58ad5c3d0e96ec36e4
|
| 3 |
size 328277848
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 318646859
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bf4416085db9fad5f4e22c85af55c9af92326a09616b1325abe5de95aa64bef3
|
| 3 |
size 318646859
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4a9c47849ad44860f45019fca12bd8b47e7589be1317a01ad6705b924156a6be
|
| 3 |
size 14645
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d77946d2c30708215d82675369c6b0f4ea0ac50e0bfa8851a58c893e34baac40
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 1.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -7533,6 +7533,364 @@
|
|
| 7533 |
"eval_samples_per_second": 276.668,
|
| 7534 |
"eval_steps_per_second": 5.81,
|
| 7535 |
"step": 10500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7536 |
}
|
| 7537 |
],
|
| 7538 |
"logging_steps": 10,
|
|
@@ -7552,7 +7910,7 @@
|
|
| 7552 |
"attributes": {}
|
| 7553 |
}
|
| 7554 |
},
|
| 7555 |
-
"total_flos": 3.
|
| 7556 |
"train_batch_size": 48,
|
| 7557 |
"trial_name": null,
|
| 7558 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 1.8584220307484371,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 11000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 7533 |
"eval_samples_per_second": 276.668,
|
| 7534 |
"eval_steps_per_second": 5.81,
|
| 7535 |
"step": 10500
|
| 7536 |
+
},
|
| 7537 |
+
{
|
| 7538 |
+
"epoch": 1.7756377766514615,
|
| 7539 |
+
"grad_norm": 0.4605439007282257,
|
| 7540 |
+
"learning_rate": 1.330666277084756e-05,
|
| 7541 |
+
"loss": 4.323733139038086,
|
| 7542 |
+
"step": 10510
|
| 7543 |
+
},
|
| 7544 |
+
{
|
| 7545 |
+
"epoch": 1.777327251224869,
|
| 7546 |
+
"grad_norm": 0.45236051082611084,
|
| 7547 |
+
"learning_rate": 1.3110123947820345e-05,
|
| 7548 |
+
"loss": 4.295808410644531,
|
| 7549 |
+
"step": 10520
|
| 7550 |
+
},
|
| 7551 |
+
{
|
| 7552 |
+
"epoch": 1.7790167257982767,
|
| 7553 |
+
"grad_norm": 0.44899049401283264,
|
| 7554 |
+
"learning_rate": 1.2914981033673616e-05,
|
| 7555 |
+
"loss": 4.3012432098388675,
|
| 7556 |
+
"step": 10530
|
| 7557 |
+
},
|
| 7558 |
+
{
|
| 7559 |
+
"epoch": 1.7807062003716845,
|
| 7560 |
+
"grad_norm": 0.45084264874458313,
|
| 7561 |
+
"learning_rate": 1.2721236018340675e-05,
|
| 7562 |
+
"loss": 4.251883316040039,
|
| 7563 |
+
"step": 10540
|
| 7564 |
+
},
|
| 7565 |
+
{
|
| 7566 |
+
"epoch": 1.782395674945092,
|
| 7567 |
+
"grad_norm": 0.4470881521701813,
|
| 7568 |
+
"learning_rate": 1.2528890877500025e-05,
|
| 7569 |
+
"loss": 4.3038276672363285,
|
| 7570 |
+
"step": 10550
|
| 7571 |
+
},
|
| 7572 |
+
{
|
| 7573 |
+
"epoch": 1.7840851495184997,
|
| 7574 |
+
"grad_norm": 0.4608835279941559,
|
| 7575 |
+
"learning_rate": 1.2337947572555257e-05,
|
| 7576 |
+
"loss": 4.313830947875976,
|
| 7577 |
+
"step": 10560
|
| 7578 |
+
},
|
| 7579 |
+
{
|
| 7580 |
+
"epoch": 1.7857746240919075,
|
| 7581 |
+
"grad_norm": 0.4542098045349121,
|
| 7582 |
+
"learning_rate": 1.2148408050614961e-05,
|
| 7583 |
+
"loss": 4.274040985107422,
|
| 7584 |
+
"step": 10570
|
| 7585 |
+
},
|
| 7586 |
+
{
|
| 7587 |
+
"epoch": 1.787464098665315,
|
| 7588 |
+
"grad_norm": 0.47143012285232544,
|
| 7589 |
+
"learning_rate": 1.1960274244472928e-05,
|
| 7590 |
+
"loss": 4.278904724121094,
|
| 7591 |
+
"step": 10580
|
| 7592 |
+
},
|
| 7593 |
+
{
|
| 7594 |
+
"epoch": 1.7891535732387227,
|
| 7595 |
+
"grad_norm": 0.4651215970516205,
|
| 7596 |
+
"learning_rate": 1.1773548072588352e-05,
|
| 7597 |
+
"loss": 4.294227600097656,
|
| 7598 |
+
"step": 10590
|
| 7599 |
+
},
|
| 7600 |
+
{
|
| 7601 |
+
"epoch": 1.7908430478121304,
|
| 7602 |
+
"grad_norm": 0.46851372718811035,
|
| 7603 |
+
"learning_rate": 1.158823143906652e-05,
|
| 7604 |
+
"loss": 4.300647735595703,
|
| 7605 |
+
"step": 10600
|
| 7606 |
+
},
|
| 7607 |
+
{
|
| 7608 |
+
"epoch": 1.792532522385538,
|
| 7609 |
+
"grad_norm": 0.46132445335388184,
|
| 7610 |
+
"learning_rate": 1.1404326233639056e-05,
|
| 7611 |
+
"loss": 4.321317291259765,
|
| 7612 |
+
"step": 10610
|
| 7613 |
+
},
|
| 7614 |
+
{
|
| 7615 |
+
"epoch": 1.794221996958946,
|
| 7616 |
+
"grad_norm": 0.4654752016067505,
|
| 7617 |
+
"learning_rate": 1.1221834331644857e-05,
|
| 7618 |
+
"loss": 4.292523193359375,
|
| 7619 |
+
"step": 10620
|
| 7620 |
+
},
|
| 7621 |
+
{
|
| 7622 |
+
"epoch": 1.7959114715323534,
|
| 7623 |
+
"grad_norm": 0.4643712043762207,
|
| 7624 |
+
"learning_rate": 1.1040757594010908e-05,
|
| 7625 |
+
"loss": 4.292951583862305,
|
| 7626 |
+
"step": 10630
|
| 7627 |
+
},
|
| 7628 |
+
{
|
| 7629 |
+
"epoch": 1.7976009461057612,
|
| 7630 |
+
"grad_norm": 0.4542069435119629,
|
| 7631 |
+
"learning_rate": 1.0861097867233375e-05,
|
| 7632 |
+
"loss": 4.301964187622071,
|
| 7633 |
+
"step": 10640
|
| 7634 |
+
},
|
| 7635 |
+
{
|
| 7636 |
+
"epoch": 1.799290420679169,
|
| 7637 |
+
"grad_norm": 0.4690853953361511,
|
| 7638 |
+
"learning_rate": 1.0682856983358645e-05,
|
| 7639 |
+
"loss": 4.299973678588867,
|
| 7640 |
+
"step": 10650
|
| 7641 |
+
},
|
| 7642 |
+
{
|
| 7643 |
+
"epoch": 1.8009798952525764,
|
| 7644 |
+
"grad_norm": 0.45049384236335754,
|
| 7645 |
+
"learning_rate": 1.050603675996477e-05,
|
| 7646 |
+
"loss": 4.2946735382080075,
|
| 7647 |
+
"step": 10660
|
| 7648 |
+
},
|
| 7649 |
+
{
|
| 7650 |
+
"epoch": 1.8026693698259841,
|
| 7651 |
+
"grad_norm": 0.4500885009765625,
|
| 7652 |
+
"learning_rate": 1.0330639000142877e-05,
|
| 7653 |
+
"loss": 4.296617889404297,
|
| 7654 |
+
"step": 10670
|
| 7655 |
+
},
|
| 7656 |
+
{
|
| 7657 |
+
"epoch": 1.8043588443993919,
|
| 7658 |
+
"grad_norm": 0.45720404386520386,
|
| 7659 |
+
"learning_rate": 1.0156665492478794e-05,
|
| 7660 |
+
"loss": 4.294065856933594,
|
| 7661 |
+
"step": 10680
|
| 7662 |
+
},
|
| 7663 |
+
{
|
| 7664 |
+
"epoch": 1.8060483189727994,
|
| 7665 |
+
"grad_norm": 0.455400675535202,
|
| 7666 |
+
"learning_rate": 9.984118011034787e-06,
|
| 7667 |
+
"loss": 4.294452285766601,
|
| 7668 |
+
"step": 10690
|
| 7669 |
+
},
|
| 7670 |
+
{
|
| 7671 |
+
"epoch": 1.8077377935462071,
|
| 7672 |
+
"grad_norm": 0.4519326388835907,
|
| 7673 |
+
"learning_rate": 9.812998315331449e-06,
|
| 7674 |
+
"loss": 4.294923782348633,
|
| 7675 |
+
"step": 10700
|
| 7676 |
+
},
|
| 7677 |
+
{
|
| 7678 |
+
"epoch": 1.8094272681196149,
|
| 7679 |
+
"grad_norm": 0.4522061049938202,
|
| 7680 |
+
"learning_rate": 9.64330815032991e-06,
|
| 7681 |
+
"loss": 4.294776535034179,
|
| 7682 |
+
"step": 10710
|
| 7683 |
+
},
|
| 7684 |
+
{
|
| 7685 |
+
"epoch": 1.8111167426930224,
|
| 7686 |
+
"grad_norm": 0.44188153743743896,
|
| 7687 |
+
"learning_rate": 9.475049246413801e-06,
|
| 7688 |
+
"loss": 4.282304382324218,
|
| 7689 |
+
"step": 10720
|
| 7690 |
+
},
|
| 7691 |
+
{
|
| 7692 |
+
"epoch": 1.81280621726643,
|
| 7693 |
+
"grad_norm": 0.4490991532802582,
|
| 7694 |
+
"learning_rate": 9.308223319371789e-06,
|
| 7695 |
+
"loss": 4.309776306152344,
|
| 7696 |
+
"step": 10730
|
| 7697 |
+
},
|
| 7698 |
+
{
|
| 7699 |
+
"epoch": 1.8144956918398378,
|
| 7700 |
+
"grad_norm": 0.44931647181510925,
|
| 7701 |
+
"learning_rate": 9.142832070380051e-06,
|
| 7702 |
+
"loss": 4.275448608398437,
|
| 7703 |
+
"step": 10740
|
| 7704 |
+
},
|
| 7705 |
+
{
|
| 7706 |
+
"epoch": 1.8161851664132453,
|
| 7707 |
+
"grad_norm": 0.4667072296142578,
|
| 7708 |
+
"learning_rate": 8.978877185984895e-06,
|
| 7709 |
+
"loss": 4.278246688842773,
|
| 7710 |
+
"step": 10750
|
| 7711 |
+
},
|
| 7712 |
+
{
|
| 7713 |
+
"epoch": 1.8178746409866533,
|
| 7714 |
+
"grad_norm": 0.46589362621307373,
|
| 7715 |
+
"learning_rate": 8.816360338085537e-06,
|
| 7716 |
+
"loss": 4.319537734985351,
|
| 7717 |
+
"step": 10760
|
| 7718 |
+
},
|
| 7719 |
+
{
|
| 7720 |
+
"epoch": 1.8195641155600608,
|
| 7721 |
+
"grad_norm": 0.4552581012248993,
|
| 7722 |
+
"learning_rate": 8.655283183917094e-06,
|
| 7723 |
+
"loss": 4.305705642700195,
|
| 7724 |
+
"step": 10770
|
| 7725 |
+
},
|
| 7726 |
+
{
|
| 7727 |
+
"epoch": 1.8212535901334683,
|
| 7728 |
+
"grad_norm": 0.4621904194355011,
|
| 7729 |
+
"learning_rate": 8.495647366033708e-06,
|
| 7730 |
+
"loss": 4.302457427978515,
|
| 7731 |
+
"step": 10780
|
| 7732 |
+
},
|
| 7733 |
+
{
|
| 7734 |
+
"epoch": 1.8229430647068763,
|
| 7735 |
+
"grad_norm": 0.44105246663093567,
|
| 7736 |
+
"learning_rate": 8.33745451229173e-06,
|
| 7737 |
+
"loss": 4.288130187988282,
|
| 7738 |
+
"step": 10790
|
| 7739 |
+
},
|
| 7740 |
+
{
|
| 7741 |
+
"epoch": 1.8246325392802838,
|
| 7742 |
+
"grad_norm": 0.44687995314598083,
|
| 7743 |
+
"learning_rate": 8.180706235833162e-06,
|
| 7744 |
+
"loss": 4.27890625,
|
| 7745 |
+
"step": 10800
|
| 7746 |
+
},
|
| 7747 |
+
{
|
| 7748 |
+
"epoch": 1.8263220138536915,
|
| 7749 |
+
"grad_norm": 0.45001620054244995,
|
| 7750 |
+
"learning_rate": 8.025404135069207e-06,
|
| 7751 |
+
"loss": 4.305799102783203,
|
| 7752 |
+
"step": 10810
|
| 7753 |
+
},
|
| 7754 |
+
{
|
| 7755 |
+
"epoch": 1.8280114884270993,
|
| 7756 |
+
"grad_norm": 0.4509744942188263,
|
| 7757 |
+
"learning_rate": 7.871549793663985e-06,
|
| 7758 |
+
"loss": 4.28497314453125,
|
| 7759 |
+
"step": 10820
|
| 7760 |
+
},
|
| 7761 |
+
{
|
| 7762 |
+
"epoch": 1.8297009630005068,
|
| 7763 |
+
"grad_norm": 0.4606933295726776,
|
| 7764 |
+
"learning_rate": 7.719144780518315e-06,
|
| 7765 |
+
"loss": 4.279584121704102,
|
| 7766 |
+
"step": 10830
|
| 7767 |
+
},
|
| 7768 |
+
{
|
| 7769 |
+
"epoch": 1.8313904375739145,
|
| 7770 |
+
"grad_norm": 0.453891396522522,
|
| 7771 |
+
"learning_rate": 7.568190649753753e-06,
|
| 7772 |
+
"loss": 4.292636871337891,
|
| 7773 |
+
"step": 10840
|
| 7774 |
+
},
|
| 7775 |
+
{
|
| 7776 |
+
"epoch": 1.8330799121473222,
|
| 7777 |
+
"grad_norm": 0.4472525715827942,
|
| 7778 |
+
"learning_rate": 7.418688940696843e-06,
|
| 7779 |
+
"loss": 4.301625061035156,
|
| 7780 |
+
"step": 10850
|
| 7781 |
+
},
|
| 7782 |
+
{
|
| 7783 |
+
"epoch": 1.8347693867207298,
|
| 7784 |
+
"grad_norm": 0.45282483100891113,
|
| 7785 |
+
"learning_rate": 7.270641177863251e-06,
|
| 7786 |
+
"loss": 4.318436813354492,
|
| 7787 |
+
"step": 10860
|
| 7788 |
+
},
|
| 7789 |
+
{
|
| 7790 |
+
"epoch": 1.8364588612941375,
|
| 7791 |
+
"grad_norm": 0.4516963064670563,
|
| 7792 |
+
"learning_rate": 7.124048870942301e-06,
|
| 7793 |
+
"loss": 4.304822540283203,
|
| 7794 |
+
"step": 10870
|
| 7795 |
+
},
|
| 7796 |
+
{
|
| 7797 |
+
"epoch": 1.8381483358675452,
|
| 7798 |
+
"grad_norm": 0.4429229497909546,
|
| 7799 |
+
"learning_rate": 6.97891351478157e-06,
|
| 7800 |
+
"loss": 4.284355163574219,
|
| 7801 |
+
"step": 10880
|
| 7802 |
+
},
|
| 7803 |
+
{
|
| 7804 |
+
"epoch": 1.8398378104409527,
|
| 7805 |
+
"grad_norm": 0.4634481966495514,
|
| 7806 |
+
"learning_rate": 6.83523658937174e-06,
|
| 7807 |
+
"loss": 4.301011276245117,
|
| 7808 |
+
"step": 10890
|
| 7809 |
+
},
|
| 7810 |
+
{
|
| 7811 |
+
"epoch": 1.8415272850143607,
|
| 7812 |
+
"grad_norm": 0.44028082489967346,
|
| 7813 |
+
"learning_rate": 6.693019559831319e-06,
|
| 7814 |
+
"loss": 4.270964431762695,
|
| 7815 |
+
"step": 10900
|
| 7816 |
+
},
|
| 7817 |
+
{
|
| 7818 |
+
"epoch": 1.8432167595877682,
|
| 7819 |
+
"grad_norm": 0.44995155930519104,
|
| 7820 |
+
"learning_rate": 6.552263876391878e-06,
|
| 7821 |
+
"loss": 4.295645523071289,
|
| 7822 |
+
"step": 10910
|
| 7823 |
+
},
|
| 7824 |
+
{
|
| 7825 |
+
"epoch": 1.8449062341611757,
|
| 7826 |
+
"grad_norm": 0.45434826612472534,
|
| 7827 |
+
"learning_rate": 6.412970974383069e-06,
|
| 7828 |
+
"loss": 4.267873001098633,
|
| 7829 |
+
"step": 10920
|
| 7830 |
+
},
|
| 7831 |
+
{
|
| 7832 |
+
"epoch": 1.8465957087345837,
|
| 7833 |
+
"grad_norm": 0.4576048254966736,
|
| 7834 |
+
"learning_rate": 6.275142274218264e-06,
|
| 7835 |
+
"loss": 4.275784683227539,
|
| 7836 |
+
"step": 10930
|
| 7837 |
+
},
|
| 7838 |
+
{
|
| 7839 |
+
"epoch": 1.8482851833079912,
|
| 7840 |
+
"grad_norm": 0.447792649269104,
|
| 7841 |
+
"learning_rate": 6.138779181379777e-06,
|
| 7842 |
+
"loss": 4.311757659912109,
|
| 7843 |
+
"step": 10940
|
| 7844 |
+
},
|
| 7845 |
+
{
|
| 7846 |
+
"epoch": 1.849974657881399,
|
| 7847 |
+
"grad_norm": 0.45298346877098083,
|
| 7848 |
+
"learning_rate": 6.003883086404709e-06,
|
| 7849 |
+
"loss": 4.295004272460938,
|
| 7850 |
+
"step": 10950
|
| 7851 |
+
},
|
| 7852 |
+
{
|
| 7853 |
+
"epoch": 1.8516641324548067,
|
| 7854 |
+
"grad_norm": 0.4503316879272461,
|
| 7855 |
+
"learning_rate": 5.870455364870747e-06,
|
| 7856 |
+
"loss": 4.2887310028076175,
|
| 7857 |
+
"step": 10960
|
| 7858 |
+
},
|
| 7859 |
+
{
|
| 7860 |
+
"epoch": 1.8533536070282142,
|
| 7861 |
+
"grad_norm": 0.45251592993736267,
|
| 7862 |
+
"learning_rate": 5.738497377382117e-06,
|
| 7863 |
+
"loss": 4.286402893066406,
|
| 7864 |
+
"step": 10970
|
| 7865 |
+
},
|
| 7866 |
+
{
|
| 7867 |
+
"epoch": 1.855043081601622,
|
| 7868 |
+
"grad_norm": 0.44080156087875366,
|
| 7869 |
+
"learning_rate": 5.608010469555674e-06,
|
| 7870 |
+
"loss": 4.308802795410156,
|
| 7871 |
+
"step": 10980
|
| 7872 |
+
},
|
| 7873 |
+
{
|
| 7874 |
+
"epoch": 1.8567325561750296,
|
| 7875 |
+
"grad_norm": 0.4467971622943878,
|
| 7876 |
+
"learning_rate": 5.4789959720071995e-06,
|
| 7877 |
+
"loss": 4.299658584594726,
|
| 7878 |
+
"step": 10990
|
| 7879 |
+
},
|
| 7880 |
+
{
|
| 7881 |
+
"epoch": 1.8584220307484371,
|
| 7882 |
+
"grad_norm": 0.4504829943180084,
|
| 7883 |
+
"learning_rate": 5.3514552003379395e-06,
|
| 7884 |
+
"loss": 4.2919353485107425,
|
| 7885 |
+
"step": 11000
|
| 7886 |
+
},
|
| 7887 |
+
{
|
| 7888 |
+
"epoch": 1.8584220307484371,
|
| 7889 |
+
"eval_loss": 4.247786521911621,
|
| 7890 |
+
"eval_runtime": 3.5852,
|
| 7891 |
+
"eval_samples_per_second": 278.924,
|
| 7892 |
+
"eval_steps_per_second": 5.857,
|
| 7893 |
+
"step": 11000
|
| 7894 |
}
|
| 7895 |
],
|
| 7896 |
"logging_steps": 10,
|
|
|
|
| 7910 |
"attributes": {}
|
| 7911 |
}
|
| 7912 |
},
|
| 7913 |
+
"total_flos": 3.679004125896376e+17,
|
| 7914 |
"train_batch_size": 48,
|
| 7915 |
"trial_name": null,
|
| 7916 |
"trial_params": null
|