Training in progress, step 700, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 479005064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e023b69876a28967f68d2e2ca3f8be837b2c4981d6e3ad2049a01c6786eaa312
|
| 3 |
size 479005064
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 958299770
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7d213ee4b82282a82674cd9ce017085e7a7b3603c3dad380d9e3921fa872458b
|
| 3 |
size 958299770
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:df73025cee4dddc3e485bae9a5251784ca346a6e5d55d7c1b83f877ae248859a
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b98b9203fd15f057816eda32716436bfe57e83707db8ff60acfc089588f9da41
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -787,6 +787,66 @@
|
|
| 787 |
"reward_std": 0.11013087928295136,
|
| 788 |
"rewards/custom_reward_logic_v2": 0.0818750023841858,
|
| 789 |
"step": 650
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 790 |
}
|
| 791 |
],
|
| 792 |
"logging_steps": 10,
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.005574979492039726,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 700,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 787 |
"reward_std": 0.11013087928295136,
|
| 788 |
"rewards/custom_reward_logic_v2": 0.0818750023841858,
|
| 789 |
"step": 650
|
| 790 |
+
},
|
| 791 |
+
{
|
| 792 |
+
"completion_length": 19.23125,
|
| 793 |
+
"epoch": 0.005256409235351742,
|
| 794 |
+
"grad_norm": 1.1138290166854858,
|
| 795 |
+
"kl": 0.3609082795679569,
|
| 796 |
+
"learning_rate": 4.5211988927752026e-07,
|
| 797 |
+
"loss": 0.0144,
|
| 798 |
+
"reward": 0.13375000059604644,
|
| 799 |
+
"reward_std": 0.22290636524558066,
|
| 800 |
+
"rewards/custom_reward_logic_v2": 0.13375000059604644,
|
| 801 |
+
"step": 660
|
| 802 |
+
},
|
| 803 |
+
{
|
| 804 |
+
"completion_length": 21.1375,
|
| 805 |
+
"epoch": 0.005336051799523738,
|
| 806 |
+
"grad_norm": 0.7887033820152283,
|
| 807 |
+
"kl": 0.36398947462439535,
|
| 808 |
+
"learning_rate": 3.915213854677863e-07,
|
| 809 |
+
"loss": 0.0146,
|
| 810 |
+
"reward": 0.07750000171363354,
|
| 811 |
+
"reward_std": 0.09986742436885834,
|
| 812 |
+
"rewards/custom_reward_logic_v2": 0.07750000171363354,
|
| 813 |
+
"step": 670
|
| 814 |
+
},
|
| 815 |
+
{
|
| 816 |
+
"completion_length": 20.7375,
|
| 817 |
+
"epoch": 0.005415694363695734,
|
| 818 |
+
"grad_norm": 1.2118674516677856,
|
| 819 |
+
"kl": 0.34819948896765707,
|
| 820 |
+
"learning_rate": 3.3493649053890325e-07,
|
| 821 |
+
"loss": 0.0139,
|
| 822 |
+
"reward": 0.14000000059604645,
|
| 823 |
+
"reward_std": 0.15659263283014296,
|
| 824 |
+
"rewards/custom_reward_logic_v2": 0.14000000059604645,
|
| 825 |
+
"step": 680
|
| 826 |
+
},
|
| 827 |
+
{
|
| 828 |
+
"completion_length": 25.03125,
|
| 829 |
+
"epoch": 0.00549533692786773,
|
| 830 |
+
"grad_norm": 0.6678434014320374,
|
| 831 |
+
"kl": 0.3506194405257702,
|
| 832 |
+
"learning_rate": 2.8247291705444575e-07,
|
| 833 |
+
"loss": 0.014,
|
| 834 |
+
"reward": 0.10087500289082527,
|
| 835 |
+
"reward_std": 0.19995234534144402,
|
| 836 |
+
"rewards/custom_reward_logic_v2": 0.10087500289082527,
|
| 837 |
+
"step": 690
|
| 838 |
+
},
|
| 839 |
+
{
|
| 840 |
+
"completion_length": 19.75,
|
| 841 |
+
"epoch": 0.005574979492039726,
|
| 842 |
+
"grad_norm": 1.028297781944275,
|
| 843 |
+
"kl": 0.33339232876896857,
|
| 844 |
+
"learning_rate": 2.3423053240837518e-07,
|
| 845 |
+
"loss": 0.0133,
|
| 846 |
+
"reward": 0.09312500022351741,
|
| 847 |
+
"reward_std": 0.09688087031245232,
|
| 848 |
+
"rewards/custom_reward_logic_v2": 0.09312500022351741,
|
| 849 |
+
"step": 700
|
| 850 |
}
|
| 851 |
],
|
| 852 |
"logging_steps": 10,
|