Training in progress, step 750, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 479005064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:df9b8df9a3bcfa7793084f2c491e074f5bd3c4876f1de283980446d052bd9416
|
| 3 |
size 479005064
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 958299770
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aa436fad63bef13e1a414cbf3f838549d7499163af0e47cc9779ce02592bd406
|
| 3 |
size 958299770
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ee7c52c324bc53ae464edbc3c1cfa99789369f21c7d8544ae8559cb215adb050
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:09a89f840a88cd313584cc217e93a9306c165d4938e58831fcfab48fea824fe7
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -871,6 +871,42 @@
|
|
| 871 |
"reward_std": 0.25477964654564855,
|
| 872 |
"rewards/custom_reward_simplified_v7_dblog": 0.634375,
|
| 873 |
"step": 720
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 874 |
}
|
| 875 |
],
|
| 876 |
"logging_steps": 10,
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.005973192312899706,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 750,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 871 |
"reward_std": 0.25477964654564855,
|
| 872 |
"rewards/custom_reward_simplified_v7_dblog": 0.634375,
|
| 873 |
"step": 720
|
| 874 |
+
},
|
| 875 |
+
{
|
| 876 |
+
"completion_length": 607.86875,
|
| 877 |
+
"epoch": 0.005813907184555714,
|
| 878 |
+
"grad_norm": 0.20680995285511017,
|
| 879 |
+
"kl": 0.0055589195340871814,
|
| 880 |
+
"learning_rate": 4.391536957168733e-06,
|
| 881 |
+
"loss": 0.0002,
|
| 882 |
+
"reward": 0.8,
|
| 883 |
+
"reward_std": 0.32480863481760025,
|
| 884 |
+
"rewards/custom_reward_simplified_v7_dblog": 0.8,
|
| 885 |
+
"step": 730
|
| 886 |
+
},
|
| 887 |
+
{
|
| 888 |
+
"completion_length": 674.13125,
|
| 889 |
+
"epoch": 0.00589354974872771,
|
| 890 |
+
"grad_norm": 0.005594769027084112,
|
| 891 |
+
"kl": 0.005972519854549318,
|
| 892 |
+
"learning_rate": 4.367562678102491e-06,
|
| 893 |
+
"loss": 0.0002,
|
| 894 |
+
"reward": 0.665625,
|
| 895 |
+
"reward_std": 0.20820673778653145,
|
| 896 |
+
"rewards/custom_reward_simplified_v7_dblog": 0.665625,
|
| 897 |
+
"step": 740
|
| 898 |
+
},
|
| 899 |
+
{
|
| 900 |
+
"completion_length": 639.69375,
|
| 901 |
+
"epoch": 0.005973192312899706,
|
| 902 |
+
"grad_norm": 0.11012833565473557,
|
| 903 |
+
"kl": 0.005814655229914934,
|
| 904 |
+
"learning_rate": 4.34319334202531e-06,
|
| 905 |
+
"loss": 0.0002,
|
| 906 |
+
"reward": 0.796875,
|
| 907 |
+
"reward_std": 0.34761993661522866,
|
| 908 |
+
"rewards/custom_reward_simplified_v7_dblog": 0.796875,
|
| 909 |
+
"step": 750
|
| 910 |
}
|
| 911 |
],
|
| 912 |
"logging_steps": 10,
|