Training in progress, step 600, checkpoint
Browse files- last-checkpoint/model-00001-of-00004.safetensors +1 -1
- last-checkpoint/model-00002-of-00004.safetensors +1 -1
- last-checkpoint/model-00003-of-00004.safetensors +1 -1
- last-checkpoint/model-00004-of-00004.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +120 -2
last-checkpoint/model-00001-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4962001760
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:87c243598c92aa172639ff6e40dd1b983ce2b906a69338a7f68bb077d0b49af2
|
| 3 |
size 4962001760
|
last-checkpoint/model-00002-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4915916160
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:97de6841f7a06df63f15116413b63a6d8f0ed1481c73daec4d308907b9ef7477
|
| 3 |
size 4915916160
|
last-checkpoint/model-00003-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4999819336
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4df52273384350aee9baa269ceb1e9e573aeaf2a7566f394660f38eefd4989e8
|
| 3 |
size 4999819336
|
last-checkpoint/model-00004-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1623221024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bff86115c9ca1a90bf6873583c738e398f9f9947c13c68352f73a518f1bd9b44
|
| 3 |
size 1623221024
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13053963
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:554d638b6db84406c05520b337e63b464c5dd4ca01d9e36368adbd26b62126ee
|
| 3 |
size 13053963
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6954e89c1960fa7ad6ea4d0b8bbf7ab04896fa485702dacc91479549c4c5f5bd
|
| 3 |
size 16389
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:39430be47ab0759ba4c6b2029bf2ac1430c442bd0153447dadb3e88ceeace01f
|
| 3 |
size 16389
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0e70cc76576adfb81afd443354dc1068743c58308fe7d889099fbeedaa0a7788
|
| 3 |
size 16389
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2a75d5f02ca237d8ef45529e298315fb70e4bd993cf96fd04ae998cf9895b3c4
|
| 3 |
size 16389
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4c6680063dc12b212232a97593d0ad4ca30bf709a7f3fc4f37444f622689ee4b
|
| 3 |
size 16389
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d575736283b8fd594e1e1fc04ebb33a9123b2d46d9a7f2b36ee599ae2efdf4fa
|
| 3 |
size 16389
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ac6501ba3ee593e553d24857a1d06a2ae89d416a291843a7f705be771a8677fb
|
| 3 |
size 16389
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ee4b071aa28acd8e61610943b47f7a78b87ce3fa1a3a7004c67f45adf039ffbe
|
| 3 |
size 16389
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d09ab206d51e08db07522003a69609e8e727f47b95ea47e6b2b8aa03687721b3
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 100,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -609,6 +609,124 @@
|
|
| 609 |
"loss": 0.265,
|
| 610 |
"rewards/rejected": -1.5677401542663574,
|
| 611 |
"step": 500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 612 |
}
|
| 613 |
],
|
| 614 |
"logging_steps": 10,
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 8.0,
|
| 6 |
"eval_steps": 100,
|
| 7 |
+
"global_step": 600,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 609 |
"loss": 0.265,
|
| 610 |
"rewards/rejected": -1.5677401542663574,
|
| 611 |
"step": 500
|
| 612 |
+
},
|
| 613 |
+
{
|
| 614 |
+
"epoch": 6.8,
|
| 615 |
+
"grad_norm": 39.75,
|
| 616 |
+
"kl": 0.0,
|
| 617 |
+
"learning_rate": 3.2133333333333335e-07,
|
| 618 |
+
"logits/rejected": 1729151795.2,
|
| 619 |
+
"logps/rejected": -1609.8298828125,
|
| 620 |
+
"loss": 0.2389,
|
| 621 |
+
"rewards/rejected": -1.7230974197387696,
|
| 622 |
+
"step": 510
|
| 623 |
+
},
|
| 624 |
+
{
|
| 625 |
+
"epoch": 6.933333333333334,
|
| 626 |
+
"grad_norm": 42.0,
|
| 627 |
+
"kl": 0.0,
|
| 628 |
+
"learning_rate": 3.08e-07,
|
| 629 |
+
"logits/rejected": 1712730828.8,
|
| 630 |
+
"logps/rejected": -1633.16318359375,
|
| 631 |
+
"loss": 0.2112,
|
| 632 |
+
"rewards/rejected": -2.023202896118164,
|
| 633 |
+
"step": 520
|
| 634 |
+
},
|
| 635 |
+
{
|
| 636 |
+
"epoch": 7.066666666666666,
|
| 637 |
+
"grad_norm": 64.5,
|
| 638 |
+
"kl": 25.04488754272461,
|
| 639 |
+
"learning_rate": 2.9466666666666666e-07,
|
| 640 |
+
"logits/chosen": 1731994419.2,
|
| 641 |
+
"logits/rejected": 1720656691.2,
|
| 642 |
+
"logps/chosen": -1637.28173828125,
|
| 643 |
+
"logps/rejected": -1377.9115234375,
|
| 644 |
+
"loss": 0.3763,
|
| 645 |
+
"rewards/chosen": 5.003516006469726,
|
| 646 |
+
"rewards/margins": 6.7519731521606445,
|
| 647 |
+
"rewards/rejected": -1.748457145690918,
|
| 648 |
+
"step": 530
|
| 649 |
+
},
|
| 650 |
+
{
|
| 651 |
+
"epoch": 7.2,
|
| 652 |
+
"grad_norm": 56.0,
|
| 653 |
+
"kl": 52.43037033081055,
|
| 654 |
+
"learning_rate": 2.813333333333333e-07,
|
| 655 |
+
"logits/chosen": 1726875648.0,
|
| 656 |
+
"logps/chosen": -1521.06044921875,
|
| 657 |
+
"loss": 0.4829,
|
| 658 |
+
"rewards/chosen": 5.305931854248047,
|
| 659 |
+
"step": 540
|
| 660 |
+
},
|
| 661 |
+
{
|
| 662 |
+
"epoch": 7.333333333333333,
|
| 663 |
+
"grad_norm": 50.0,
|
| 664 |
+
"kl": 51.872047424316406,
|
| 665 |
+
"learning_rate": 2.68e-07,
|
| 666 |
+
"logits/chosen": 1734085222.4,
|
| 667 |
+
"logps/chosen": -1607.64345703125,
|
| 668 |
+
"loss": 0.4712,
|
| 669 |
+
"rewards/chosen": 5.294354629516602,
|
| 670 |
+
"step": 550
|
| 671 |
+
},
|
| 672 |
+
{
|
| 673 |
+
"epoch": 7.466666666666667,
|
| 674 |
+
"grad_norm": 51.0,
|
| 675 |
+
"kl": 55.77549362182617,
|
| 676 |
+
"learning_rate": 2.546666666666666e-07,
|
| 677 |
+
"logits/chosen": 1753063219.2,
|
| 678 |
+
"logps/chosen": -1617.415625,
|
| 679 |
+
"loss": 0.4773,
|
| 680 |
+
"rewards/chosen": 5.702725982666015,
|
| 681 |
+
"step": 560
|
| 682 |
+
},
|
| 683 |
+
{
|
| 684 |
+
"epoch": 7.6,
|
| 685 |
+
"grad_norm": 43.75,
|
| 686 |
+
"kl": 18.287708282470703,
|
| 687 |
+
"learning_rate": 2.413333333333333e-07,
|
| 688 |
+
"logits/chosen": 1731032073.5700934,
|
| 689 |
+
"logits/rejected": 1702629587.5305164,
|
| 690 |
+
"logps/chosen": -1447.9690420560748,
|
| 691 |
+
"logps/rejected": -1521.8135269953052,
|
| 692 |
+
"loss": 0.3238,
|
| 693 |
+
"rewards/chosen": 5.521567086193049,
|
| 694 |
+
"rewards/margins": 7.057752337348419,
|
| 695 |
+
"rewards/rejected": -1.5361852511553697,
|
| 696 |
+
"step": 570
|
| 697 |
+
},
|
| 698 |
+
{
|
| 699 |
+
"epoch": 7.733333333333333,
|
| 700 |
+
"grad_norm": 41.0,
|
| 701 |
+
"kl": 0.0,
|
| 702 |
+
"learning_rate": 2.28e-07,
|
| 703 |
+
"logits/rejected": 1697045913.6,
|
| 704 |
+
"logps/rejected": -1615.4837890625,
|
| 705 |
+
"loss": 0.2158,
|
| 706 |
+
"rewards/rejected": -1.9997014999389648,
|
| 707 |
+
"step": 580
|
| 708 |
+
},
|
| 709 |
+
{
|
| 710 |
+
"epoch": 7.866666666666667,
|
| 711 |
+
"grad_norm": 38.75,
|
| 712 |
+
"kl": 0.0,
|
| 713 |
+
"learning_rate": 2.1466666666666666e-07,
|
| 714 |
+
"logits/rejected": 1732887756.8,
|
| 715 |
+
"logps/rejected": -1627.6578125,
|
| 716 |
+
"loss": 0.206,
|
| 717 |
+
"rewards/rejected": -2.029564094543457,
|
| 718 |
+
"step": 590
|
| 719 |
+
},
|
| 720 |
+
{
|
| 721 |
+
"epoch": 8.0,
|
| 722 |
+
"grad_norm": 43.25,
|
| 723 |
+
"kl": 0.0,
|
| 724 |
+
"learning_rate": 2.0133333333333334e-07,
|
| 725 |
+
"logits/rejected": 1715685171.2,
|
| 726 |
+
"logps/rejected": -1501.2095703125,
|
| 727 |
+
"loss": 0.241,
|
| 728 |
+
"rewards/rejected": -1.795237922668457,
|
| 729 |
+
"step": 600
|
| 730 |
}
|
| 731 |
],
|
| 732 |
"logging_steps": 10,
|