Training in progress, epoch 2, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step298/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step298/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step298/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step298/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step298/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +335 -3
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 447329696
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:46a365e9fcb2454a4c23ba115a7809178487f14e670b8347d96cdf4cff0d5360
|
| 3 |
size 447329696
|
last-checkpoint/global_step298/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b7b42934795f6e589ecf158269a54cdac29447df9c133456dd455795bf3a1fd2
|
| 3 |
+
size 337225765
|
last-checkpoint/global_step298/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3f8f5690a0cda8662d09a99d62e7588b9ecbecd4324ff4f6a1828f76ed1d662a
|
| 3 |
+
size 337225893
|
last-checkpoint/global_step298/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8e068943495936dc63bb3b9f0b9fbf295441fdd77a28f3c70f87e1374052b7c8
|
| 3 |
+
size 337225893
|
last-checkpoint/global_step298/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:02fddd60d00b4177e5f0c9c4669dfbe280e45fdba78cc217dfad10a582c218dc
|
| 3 |
+
size 337225893
|
last-checkpoint/global_step298/mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4073004376166aa76edc9f00a363567c1dc2e7f7463a3a1f13d6a16a3b825edc
|
| 3 |
+
size 2282578149
|
last-checkpoint/latest
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
global_step298
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:022e5f570f01a12c1d81b42bd6eb19a0e173e7f47e424219366d42929f99e132
|
| 3 |
size 15429
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c3aed2436bc3ad431adb01e8d4df32815773aab4a6b13c3adb4fced6192268c3
|
| 3 |
size 15429
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:97dd34720bb0b20ed7301d9f60cd440d867fb1aac58f9dc15c1f7fe4ac70e520
|
| 3 |
size 15429
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:37832d386fc739839d36940cdc0c137876018d931548a6ee584a9d52680e1a59
|
| 3 |
size 15429
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1401
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6af806fee020be865d25dafddb307ac91eb8c597cf77813f63beb93464bf2104
|
| 3 |
size 1401
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
-
"best_metric": 0.
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 50,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -672,6 +672,338 @@
|
|
| 672 |
"eval_samples_per_second": 30.947,
|
| 673 |
"eval_steps_per_second": 1.97,
|
| 674 |
"step": 200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 675 |
}
|
| 676 |
],
|
| 677 |
"logging_steps": 5,
|
|
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
+
"best_metric": 0.02023915760219097,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 2.1436265709156195,
|
| 6 |
"eval_steps": 50,
|
| 7 |
+
"global_step": 300,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 672 |
"eval_samples_per_second": 30.947,
|
| 673 |
"eval_steps_per_second": 1.97,
|
| 674 |
"step": 200
|
| 675 |
+
},
|
| 676 |
+
{
|
| 677 |
+
"epoch": 1.466786355475763,
|
| 678 |
+
"grad_norm": 3.074913501739502,
|
| 679 |
+
"learning_rate": 0.00011372766805777725,
|
| 680 |
+
"logits/chosen": -4.8984375,
|
| 681 |
+
"logits/rejected": -8.321874618530273,
|
| 682 |
+
"logps/chosen": -133.85000610351562,
|
| 683 |
+
"logps/rejected": -351.5,
|
| 684 |
+
"loss": 0.0452,
|
| 685 |
+
"rewards/accuracies": 0.9750000238418579,
|
| 686 |
+
"rewards/chosen": -6.512499809265137,
|
| 687 |
+
"rewards/margins": 21.049999237060547,
|
| 688 |
+
"rewards/rejected": -27.5625,
|
| 689 |
+
"step": 205
|
| 690 |
+
},
|
| 691 |
+
{
|
| 692 |
+
"epoch": 1.502692998204668,
|
| 693 |
+
"grad_norm": 3.414430618286133,
|
| 694 |
+
"learning_rate": 0.00011372461861389142,
|
| 695 |
+
"logits/chosen": -5.293749809265137,
|
| 696 |
+
"logits/rejected": -7.599999904632568,
|
| 697 |
+
"logps/chosen": -180.0,
|
| 698 |
+
"logps/rejected": -397.3999938964844,
|
| 699 |
+
"loss": 0.0937,
|
| 700 |
+
"rewards/accuracies": 0.9781249761581421,
|
| 701 |
+
"rewards/chosen": -11.2421875,
|
| 702 |
+
"rewards/margins": 20.878124237060547,
|
| 703 |
+
"rewards/rejected": -32.10625076293945,
|
| 704 |
+
"step": 210
|
| 705 |
+
},
|
| 706 |
+
{
|
| 707 |
+
"epoch": 1.5385996409335727,
|
| 708 |
+
"grad_norm": 1.9072022438049316,
|
| 709 |
+
"learning_rate": 0.00011372142606203516,
|
| 710 |
+
"logits/chosen": -4.057812690734863,
|
| 711 |
+
"logits/rejected": -6.109375,
|
| 712 |
+
"logps/chosen": -156.0749969482422,
|
| 713 |
+
"logps/rejected": -324.79998779296875,
|
| 714 |
+
"loss": 0.0461,
|
| 715 |
+
"rewards/accuracies": 0.9750000238418579,
|
| 716 |
+
"rewards/chosen": -8.564062118530273,
|
| 717 |
+
"rewards/margins": 16.028125762939453,
|
| 718 |
+
"rewards/rejected": -24.587499618530273,
|
| 719 |
+
"step": 215
|
| 720 |
+
},
|
| 721 |
+
{
|
| 722 |
+
"epoch": 1.5745062836624775,
|
| 723 |
+
"grad_norm": 1.2352949380874634,
|
| 724 |
+
"learning_rate": 0.00011371809041024573,
|
| 725 |
+
"logits/chosen": -5.315625190734863,
|
| 726 |
+
"logits/rejected": -7.754687309265137,
|
| 727 |
+
"logps/chosen": -189.64999389648438,
|
| 728 |
+
"logps/rejected": -392.0,
|
| 729 |
+
"loss": 0.0332,
|
| 730 |
+
"rewards/accuracies": 0.981249988079071,
|
| 731 |
+
"rewards/chosen": -11.946874618530273,
|
| 732 |
+
"rewards/margins": 19.446874618530273,
|
| 733 |
+
"rewards/rejected": -31.424999237060547,
|
| 734 |
+
"step": 220
|
| 735 |
+
},
|
| 736 |
+
{
|
| 737 |
+
"epoch": 1.6104129263913824,
|
| 738 |
+
"grad_norm": 0.5811780095100403,
|
| 739 |
+
"learning_rate": 0.00011371461166692062,
|
| 740 |
+
"logits/chosen": -7.451562404632568,
|
| 741 |
+
"logits/rejected": -10.225000381469727,
|
| 742 |
+
"logps/chosen": -218.4499969482422,
|
| 743 |
+
"logps/rejected": -433.79998779296875,
|
| 744 |
+
"loss": 0.0486,
|
| 745 |
+
"rewards/accuracies": 0.984375,
|
| 746 |
+
"rewards/chosen": -14.824999809265137,
|
| 747 |
+
"rewards/margins": 20.75,
|
| 748 |
+
"rewards/rejected": -35.57500076293945,
|
| 749 |
+
"step": 225
|
| 750 |
+
},
|
| 751 |
+
{
|
| 752 |
+
"epoch": 1.6463195691202872,
|
| 753 |
+
"grad_norm": 0.33781036734580994,
|
| 754 |
+
"learning_rate": 0.00011371098984081755,
|
| 755 |
+
"logits/chosen": -9.0390625,
|
| 756 |
+
"logits/rejected": -11.784375190734863,
|
| 757 |
+
"logps/chosen": -232.75,
|
| 758 |
+
"logps/rejected": -425.79998779296875,
|
| 759 |
+
"loss": 0.0465,
|
| 760 |
+
"rewards/accuracies": 0.9781249761581421,
|
| 761 |
+
"rewards/chosen": -16.509374618530273,
|
| 762 |
+
"rewards/margins": 18.971874237060547,
|
| 763 |
+
"rewards/rejected": -35.45624923706055,
|
| 764 |
+
"step": 230
|
| 765 |
+
},
|
| 766 |
+
{
|
| 767 |
+
"epoch": 1.682226211849192,
|
| 768 |
+
"grad_norm": 0.9501491189002991,
|
| 769 |
+
"learning_rate": 0.0001137072249410545,
|
| 770 |
+
"logits/chosen": -8.489062309265137,
|
| 771 |
+
"logits/rejected": -11.706250190734863,
|
| 772 |
+
"logps/chosen": -229.6999969482422,
|
| 773 |
+
"logps/rejected": -451.3999938964844,
|
| 774 |
+
"loss": 0.0196,
|
| 775 |
+
"rewards/accuracies": 0.996874988079071,
|
| 776 |
+
"rewards/chosen": -16.003124237060547,
|
| 777 |
+
"rewards/margins": 21.600000381469727,
|
| 778 |
+
"rewards/rejected": -37.599998474121094,
|
| 779 |
+
"step": 235
|
| 780 |
+
},
|
| 781 |
+
{
|
| 782 |
+
"epoch": 1.718132854578097,
|
| 783 |
+
"grad_norm": 2.2919723987579346,
|
| 784 |
+
"learning_rate": 0.00011370331697710956,
|
| 785 |
+
"logits/chosen": -8.271875381469727,
|
| 786 |
+
"logits/rejected": -12.678125381469727,
|
| 787 |
+
"logps/chosen": -205.75,
|
| 788 |
+
"logps/rejected": -511.79998779296875,
|
| 789 |
+
"loss": 0.0752,
|
| 790 |
+
"rewards/accuracies": 0.984375,
|
| 791 |
+
"rewards/chosen": -13.65625,
|
| 792 |
+
"rewards/margins": 30.087499618530273,
|
| 793 |
+
"rewards/rejected": -43.75,
|
| 794 |
+
"step": 240
|
| 795 |
+
},
|
| 796 |
+
{
|
| 797 |
+
"epoch": 1.754039497307002,
|
| 798 |
+
"grad_norm": 2.5256083011627197,
|
| 799 |
+
"learning_rate": 0.00011369926595882104,
|
| 800 |
+
"logits/chosen": -10.071874618530273,
|
| 801 |
+
"logits/rejected": -13.768750190734863,
|
| 802 |
+
"logps/chosen": -291.04998779296875,
|
| 803 |
+
"logps/rejected": -619.2000122070312,
|
| 804 |
+
"loss": 0.0377,
|
| 805 |
+
"rewards/accuracies": 0.9781249761581421,
|
| 806 |
+
"rewards/chosen": -22.446874618530273,
|
| 807 |
+
"rewards/margins": 32.150001525878906,
|
| 808 |
+
"rewards/rejected": -54.57500076293945,
|
| 809 |
+
"step": 245
|
| 810 |
+
},
|
| 811 |
+
{
|
| 812 |
+
"epoch": 1.7899461400359067,
|
| 813 |
+
"grad_norm": 1.3631178140640259,
|
| 814 |
+
"learning_rate": 0.00011369507189638736,
|
| 815 |
+
"logits/chosen": -3.7313232421875,
|
| 816 |
+
"logits/rejected": -7.044335842132568,
|
| 817 |
+
"logps/chosen": -175.27499389648438,
|
| 818 |
+
"logps/rejected": -371.45001220703125,
|
| 819 |
+
"loss": 0.0627,
|
| 820 |
+
"rewards/accuracies": 0.9781249761581421,
|
| 821 |
+
"rewards/chosen": -10.771875381469727,
|
| 822 |
+
"rewards/margins": 18.6015625,
|
| 823 |
+
"rewards/rejected": -29.353124618530273,
|
| 824 |
+
"step": 250
|
| 825 |
+
},
|
| 826 |
+
{
|
| 827 |
+
"epoch": 1.7899461400359067,
|
| 828 |
+
"eval_logits/chosen": -0.22195972502231598,
|
| 829 |
+
"eval_logits/rejected": -1.6530331373214722,
|
| 830 |
+
"eval_logps/chosen": -116.17646789550781,
|
| 831 |
+
"eval_logps/rejected": -208.88235473632812,
|
| 832 |
+
"eval_loss": 0.07754824310541153,
|
| 833 |
+
"eval_rewards/accuracies": 0.9485294222831726,
|
| 834 |
+
"eval_rewards/chosen": -4.249080657958984,
|
| 835 |
+
"eval_rewards/margins": 8.38786792755127,
|
| 836 |
+
"eval_rewards/rejected": -12.650734901428223,
|
| 837 |
+
"eval_runtime": 8.5185,
|
| 838 |
+
"eval_samples_per_second": 31.344,
|
| 839 |
+
"eval_steps_per_second": 1.996,
|
| 840 |
+
"step": 250
|
| 841 |
+
},
|
| 842 |
+
{
|
| 843 |
+
"epoch": 1.8258527827648114,
|
| 844 |
+
"grad_norm": 1.8161951303482056,
|
| 845 |
+
"learning_rate": 0.00011369073480036712,
|
| 846 |
+
"logits/chosen": -2.3915038108825684,
|
| 847 |
+
"logits/rejected": -3.521484375,
|
| 848 |
+
"logps/chosen": -145.4250030517578,
|
| 849 |
+
"logps/rejected": -257.20001220703125,
|
| 850 |
+
"loss": 0.0791,
|
| 851 |
+
"rewards/accuracies": 0.9624999761581421,
|
| 852 |
+
"rewards/chosen": -7.525000095367432,
|
| 853 |
+
"rewards/margins": 10.292187690734863,
|
| 854 |
+
"rewards/rejected": -17.818750381469727,
|
| 855 |
+
"step": 255
|
| 856 |
+
},
|
| 857 |
+
{
|
| 858 |
+
"epoch": 1.8617594254937164,
|
| 859 |
+
"grad_norm": 0.3177375793457031,
|
| 860 |
+
"learning_rate": 0.00011368625468167889,
|
| 861 |
+
"logits/chosen": -8.206250190734863,
|
| 862 |
+
"logits/rejected": -10.065625190734863,
|
| 863 |
+
"logps/chosen": -325.3500061035156,
|
| 864 |
+
"logps/rejected": -509.1000061035156,
|
| 865 |
+
"loss": 0.0508,
|
| 866 |
+
"rewards/accuracies": 0.971875011920929,
|
| 867 |
+
"rewards/chosen": -26.078125,
|
| 868 |
+
"rewards/margins": 17.381250381469727,
|
| 869 |
+
"rewards/rejected": -43.42499923706055,
|
| 870 |
+
"step": 260
|
| 871 |
+
},
|
| 872 |
+
{
|
| 873 |
+
"epoch": 1.8976660682226212,
|
| 874 |
+
"grad_norm": 0.7164928317070007,
|
| 875 |
+
"learning_rate": 0.00011368163155160139,
|
| 876 |
+
"logits/chosen": -10.165624618530273,
|
| 877 |
+
"logits/rejected": -11.875,
|
| 878 |
+
"logps/chosen": -410.29998779296875,
|
| 879 |
+
"logps/rejected": -585.4000244140625,
|
| 880 |
+
"loss": 0.0577,
|
| 881 |
+
"rewards/accuracies": 0.981249988079071,
|
| 882 |
+
"rewards/chosen": -34.45624923706055,
|
| 883 |
+
"rewards/margins": 16.634374618530273,
|
| 884 |
+
"rewards/rejected": -51.07500076293945,
|
| 885 |
+
"step": 265
|
| 886 |
+
},
|
| 887 |
+
{
|
| 888 |
+
"epoch": 1.933572710951526,
|
| 889 |
+
"grad_norm": 2.070739507675171,
|
| 890 |
+
"learning_rate": 0.00011367686542177336,
|
| 891 |
+
"logits/chosen": -6.4375,
|
| 892 |
+
"logits/rejected": -7.814062595367432,
|
| 893 |
+
"logps/chosen": -340.1000061035156,
|
| 894 |
+
"logps/rejected": -469.79998779296875,
|
| 895 |
+
"loss": 0.0576,
|
| 896 |
+
"rewards/accuracies": 0.9593750238418579,
|
| 897 |
+
"rewards/chosen": -27.168750762939453,
|
| 898 |
+
"rewards/margins": 12.381250381469727,
|
| 899 |
+
"rewards/rejected": -39.54999923706055,
|
| 900 |
+
"step": 270
|
| 901 |
+
},
|
| 902 |
+
{
|
| 903 |
+
"epoch": 1.969479353680431,
|
| 904 |
+
"grad_norm": 1.2207964658737183,
|
| 905 |
+
"learning_rate": 0.0001136719563041935,
|
| 906 |
+
"logits/chosen": -4.547656059265137,
|
| 907 |
+
"logits/rejected": -6.598437309265137,
|
| 908 |
+
"logps/chosen": -263.0,
|
| 909 |
+
"logps/rejected": -423.8999938964844,
|
| 910 |
+
"loss": 0.0322,
|
| 911 |
+
"rewards/accuracies": 0.9906250238418579,
|
| 912 |
+
"rewards/chosen": -19.46875,
|
| 913 |
+
"rewards/margins": 15.509374618530273,
|
| 914 |
+
"rewards/rejected": -34.993751525878906,
|
| 915 |
+
"step": 275
|
| 916 |
+
},
|
| 917 |
+
{
|
| 918 |
+
"epoch": 2.0,
|
| 919 |
+
"grad_norm": 6.416799545288086,
|
| 920 |
+
"learning_rate": 0.0001136669042112205,
|
| 921 |
+
"logits/chosen": -5.321691036224365,
|
| 922 |
+
"logits/rejected": -8.242647171020508,
|
| 923 |
+
"logps/chosen": -274.0,
|
| 924 |
+
"logps/rejected": -515.6470336914062,
|
| 925 |
+
"loss": 0.0428,
|
| 926 |
+
"rewards/accuracies": 0.9852941036224365,
|
| 927 |
+
"rewards/chosen": -20.264705657958984,
|
| 928 |
+
"rewards/margins": 23.514705657958984,
|
| 929 |
+
"rewards/rejected": -43.80882263183594,
|
| 930 |
+
"step": 280
|
| 931 |
+
},
|
| 932 |
+
{
|
| 933 |
+
"epoch": 2.035906642728905,
|
| 934 |
+
"grad_norm": 0.0026381895877420902,
|
| 935 |
+
"learning_rate": 0.00011366170915557303,
|
| 936 |
+
"logits/chosen": -6.020312309265137,
|
| 937 |
+
"logits/rejected": -9.284375190734863,
|
| 938 |
+
"logps/chosen": -289.1499938964844,
|
| 939 |
+
"logps/rejected": -603.0,
|
| 940 |
+
"loss": 0.0676,
|
| 941 |
+
"rewards/accuracies": 0.9781249761581421,
|
| 942 |
+
"rewards/chosen": -22.331249237060547,
|
| 943 |
+
"rewards/margins": 30.418750762939453,
|
| 944 |
+
"rewards/rejected": -52.75,
|
| 945 |
+
"step": 285
|
| 946 |
+
},
|
| 947 |
+
{
|
| 948 |
+
"epoch": 2.0718132854578095,
|
| 949 |
+
"grad_norm": 9.352706909179688,
|
| 950 |
+
"learning_rate": 0.00011365637115032964,
|
| 951 |
+
"logits/chosen": -5.478906154632568,
|
| 952 |
+
"logits/rejected": -9.690625190734863,
|
| 953 |
+
"logps/chosen": -263.3500061035156,
|
| 954 |
+
"logps/rejected": -613.4000244140625,
|
| 955 |
+
"loss": 0.0791,
|
| 956 |
+
"rewards/accuracies": 0.9750000238418579,
|
| 957 |
+
"rewards/chosen": -19.450000762939453,
|
| 958 |
+
"rewards/margins": 34.474998474121094,
|
| 959 |
+
"rewards/rejected": -53.9375,
|
| 960 |
+
"step": 290
|
| 961 |
+
},
|
| 962 |
+
{
|
| 963 |
+
"epoch": 2.1077199281867145,
|
| 964 |
+
"grad_norm": 1.2689452171325684,
|
| 965 |
+
"learning_rate": 0.0001136508902089287,
|
| 966 |
+
"logits/chosen": -2.486132860183716,
|
| 967 |
+
"logits/rejected": -5.1484375,
|
| 968 |
+
"logps/chosen": -145.1999969482422,
|
| 969 |
+
"logps/rejected": -346.20001220703125,
|
| 970 |
+
"loss": 0.1107,
|
| 971 |
+
"rewards/accuracies": 0.9593750238418579,
|
| 972 |
+
"rewards/chosen": -7.432031154632568,
|
| 973 |
+
"rewards/margins": 19.28125,
|
| 974 |
+
"rewards/rejected": -26.706249237060547,
|
| 975 |
+
"step": 295
|
| 976 |
+
},
|
| 977 |
+
{
|
| 978 |
+
"epoch": 2.1436265709156195,
|
| 979 |
+
"grad_norm": 0.04833826795220375,
|
| 980 |
+
"learning_rate": 0.00011364526634516852,
|
| 981 |
+
"logits/chosen": -5.057812690734863,
|
| 982 |
+
"logits/rejected": -8.810937881469727,
|
| 983 |
+
"logps/chosen": -240.14999389648438,
|
| 984 |
+
"logps/rejected": -519.7999877929688,
|
| 985 |
+
"loss": 0.1121,
|
| 986 |
+
"rewards/accuracies": 0.9781249761581421,
|
| 987 |
+
"rewards/chosen": -17.071874618530273,
|
| 988 |
+
"rewards/margins": 27.412500381469727,
|
| 989 |
+
"rewards/rejected": -44.462501525878906,
|
| 990 |
+
"step": 300
|
| 991 |
+
},
|
| 992 |
+
{
|
| 993 |
+
"epoch": 2.1436265709156195,
|
| 994 |
+
"eval_logits/chosen": -4.876838207244873,
|
| 995 |
+
"eval_logits/rejected": -8.235294342041016,
|
| 996 |
+
"eval_logps/chosen": -253.05882263183594,
|
| 997 |
+
"eval_logps/rejected": -514.8235473632812,
|
| 998 |
+
"eval_loss": 0.02023915760219097,
|
| 999 |
+
"eval_rewards/accuracies": 0.9836230278015137,
|
| 1000 |
+
"eval_rewards/chosen": -18.169116973876953,
|
| 1001 |
+
"eval_rewards/margins": 25.522058486938477,
|
| 1002 |
+
"eval_rewards/rejected": -43.661766052246094,
|
| 1003 |
+
"eval_runtime": 8.6202,
|
| 1004 |
+
"eval_samples_per_second": 30.974,
|
| 1005 |
+
"eval_steps_per_second": 1.972,
|
| 1006 |
+
"step": 300
|
| 1007 |
}
|
| 1008 |
],
|
| 1009 |
"logging_steps": 5,
|