Training in progress, epoch 0, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step700/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step700/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step700/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step700/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step700/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +672 -4
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 83920720
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:095753d1308fca4d68bd50eee28c0d64ec3d81fcd352020de685e3e2b9697fb5
|
| 3 |
size 83920720
|
last-checkpoint/global_step700/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1a060593b440423607faacd1b99bd9ea5033e7c32bbbce869b135a4445b1df75
|
| 3 |
+
size 63256165
|
last-checkpoint/global_step700/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8e0ca9a7eb6451a86f4dabe7594dafadc5718627e49302966606a36148d751cd
|
| 3 |
+
size 63256229
|
last-checkpoint/global_step700/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:abd9fb04daf69ce6b1421696d3e31bbde42f3a466ffdbfa64dd2f7c0c1caca66
|
| 3 |
+
size 63256229
|
last-checkpoint/global_step700/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f3c79d800c97fb10117092f9ec04346c270ab37cdf4dc3eac702a8944e792013
|
| 3 |
+
size 63256229
|
last-checkpoint/global_step700/mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d3f68e58bd36f4b61d7415dc2f56e5d6defe6980f584b727e4e84a33a6aa1674
|
| 3 |
+
size 84096473
|
last-checkpoint/latest
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
global_step700
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:101ab08e2194627d031e7fc8097a3ac23d583896a9fe2b312020a2bed3639d65
|
| 3 |
size 15429
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15365
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7c70384deaa4c1845a6f2d4fb20565360de13afbc60092206cf9f88cd1e723c2
|
| 3 |
size 15365
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c6b6933871a383f04bb8f7db2236c9030798486f4c60fc0af36965963882a317
|
| 3 |
size 15429
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:93677c1a0c522ff69f16203c7d55be7cbdea8e71a0bdc8a1344a5cb9c0605e55
|
| 3 |
size 15429
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1401
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:79c56dc8bc830a15efd2cc23c66236839ef75d4ec3e413a9fbb8dae549996719
|
| 3 |
size 1401
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
-
"best_metric": 0.
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 100,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -4016,11 +4016,679 @@
|
|
| 4016 |
"eval_samples_per_second": 3.956,
|
| 4017 |
"eval_steps_per_second": 0.248,
|
| 4018 |
"step": 600
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4019 |
}
|
| 4020 |
],
|
| 4021 |
"logging_steps": 5,
|
| 4022 |
"max_steps": 1000,
|
| 4023 |
-
"num_input_tokens_seen":
|
| 4024 |
"num_train_epochs": 1,
|
| 4025 |
"save_steps": 100,
|
| 4026 |
"stateful_callbacks": {
|
|
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
+
"best_metric": 0.02497861161828041,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.16587677725118483,
|
| 6 |
"eval_steps": 100,
|
| 7 |
+
"global_step": 700,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 4016 |
"eval_samples_per_second": 3.956,
|
| 4017 |
"eval_steps_per_second": 0.248,
|
| 4018 |
"step": 600
|
| 4019 |
+
},
|
| 4020 |
+
{
|
| 4021 |
+
"clip_ratio/high_max": 0.0,
|
| 4022 |
+
"clip_ratio/high_mean": 0.0,
|
| 4023 |
+
"clip_ratio/low_mean": 0.0,
|
| 4024 |
+
"clip_ratio/low_min": 0.0,
|
| 4025 |
+
"clip_ratio/region_mean": 0.0,
|
| 4026 |
+
"completions/clipped_ratio": 1.0,
|
| 4027 |
+
"completions/max_length": 32.0,
|
| 4028 |
+
"completions/max_terminated_length": 0.0,
|
| 4029 |
+
"completions/mean_length": 32.0,
|
| 4030 |
+
"completions/mean_terminated_length": 0.0,
|
| 4031 |
+
"completions/min_length": 32.0,
|
| 4032 |
+
"completions/min_terminated_length": 0.0,
|
| 4033 |
+
"epoch": 0.14336492890995262,
|
| 4034 |
+
"grad_norm": 0.1272319108247757,
|
| 4035 |
+
"kl": 0.7216796875,
|
| 4036 |
+
"learning_rate": 8.126186854142752e-05,
|
| 4037 |
+
"loss": 0.0284,
|
| 4038 |
+
"num_tokens": 1033597.0,
|
| 4039 |
+
"reward": 953.1578125,
|
| 4040 |
+
"reward_std": 7.4799497604370115,
|
| 4041 |
+
"rewards/concise_response_reward/mean": 9.994318199157714,
|
| 4042 |
+
"rewards/concise_response_reward/std": 0.03214122056961059,
|
| 4043 |
+
"rewards/penalize_unwanted_words/mean": 1.0,
|
| 4044 |
+
"rewards/penalize_unwanted_words/std": 0.0,
|
| 4045 |
+
"rewards/reward_func_length/mean": 98.56343536376953,
|
| 4046 |
+
"rewards/reward_func_length/std": 1.168010425567627,
|
| 4047 |
+
"rewards/reward_keyword_presence/mean": 0.0,
|
| 4048 |
+
"rewards/reward_keyword_presence/std": 0.0,
|
| 4049 |
+
"rewards/reward_short_answers/mean": 9.994318199157714,
|
| 4050 |
+
"rewards/reward_short_answers/std": 0.03214122056961059,
|
| 4051 |
+
"step": 605
|
| 4052 |
+
},
|
| 4053 |
+
{
|
| 4054 |
+
"clip_ratio/high_max": 0.0,
|
| 4055 |
+
"clip_ratio/high_mean": 0.0,
|
| 4056 |
+
"clip_ratio/low_mean": 0.0,
|
| 4057 |
+
"clip_ratio/low_min": 0.0,
|
| 4058 |
+
"clip_ratio/region_mean": 0.0,
|
| 4059 |
+
"completions/clipped_ratio": 1.0,
|
| 4060 |
+
"completions/max_length": 32.0,
|
| 4061 |
+
"completions/max_terminated_length": 0.0,
|
| 4062 |
+
"completions/mean_length": 32.0,
|
| 4063 |
+
"completions/mean_terminated_length": 0.0,
|
| 4064 |
+
"completions/min_length": 32.0,
|
| 4065 |
+
"completions/min_terminated_length": 0.0,
|
| 4066 |
+
"epoch": 0.14454976303317535,
|
| 4067 |
+
"grad_norm": 0.05021500214934349,
|
| 4068 |
+
"kl": 0.66953125,
|
| 4069 |
+
"learning_rate": 7.955039481582097e-05,
|
| 4070 |
+
"loss": 0.0268,
|
| 4071 |
+
"num_tokens": 1042439.0,
|
| 4072 |
+
"reward": 954.049658203125,
|
| 4073 |
+
"reward_std": 5.795430326461792,
|
| 4074 |
+
"rewards/concise_response_reward/mean": 10.0,
|
| 4075 |
+
"rewards/concise_response_reward/std": 0.0,
|
| 4076 |
+
"rewards/penalize_unwanted_words/mean": 1.0,
|
| 4077 |
+
"rewards/penalize_unwanted_words/std": 0.0,
|
| 4078 |
+
"rewards/reward_func_length/mean": 98.65749816894531,
|
| 4079 |
+
"rewards/reward_func_length/std": 1.0676976799964906,
|
| 4080 |
+
"rewards/reward_keyword_presence/mean": 0.0,
|
| 4081 |
+
"rewards/reward_keyword_presence/std": 0.0,
|
| 4082 |
+
"rewards/reward_short_answers/mean": 10.0,
|
| 4083 |
+
"rewards/reward_short_answers/std": 0.0,
|
| 4084 |
+
"step": 610
|
| 4085 |
+
},
|
| 4086 |
+
{
|
| 4087 |
+
"clip_ratio/high_max": 0.0,
|
| 4088 |
+
"clip_ratio/high_mean": 0.0,
|
| 4089 |
+
"clip_ratio/low_mean": 0.0,
|
| 4090 |
+
"clip_ratio/low_min": 0.0,
|
| 4091 |
+
"clip_ratio/region_mean": 0.0,
|
| 4092 |
+
"completions/clipped_ratio": 1.0,
|
| 4093 |
+
"completions/max_length": 32.0,
|
| 4094 |
+
"completions/max_terminated_length": 0.0,
|
| 4095 |
+
"completions/mean_length": 32.0,
|
| 4096 |
+
"completions/mean_terminated_length": 0.0,
|
| 4097 |
+
"completions/min_length": 32.0,
|
| 4098 |
+
"completions/min_terminated_length": 0.0,
|
| 4099 |
+
"epoch": 0.1457345971563981,
|
| 4100 |
+
"grad_norm": 0.01850374974310398,
|
| 4101 |
+
"kl": 0.690625,
|
| 4102 |
+
"learning_rate": 7.784515023805328e-05,
|
| 4103 |
+
"loss": 0.0276,
|
| 4104 |
+
"num_tokens": 1051223.0,
|
| 4105 |
+
"reward": 952.8959228515625,
|
| 4106 |
+
"reward_std": 6.792151546478271,
|
| 4107 |
+
"rewards/concise_response_reward/mean": 9.994318199157714,
|
| 4108 |
+
"rewards/concise_response_reward/std": 0.03214122056961059,
|
| 4109 |
+
"rewards/penalize_unwanted_words/mean": 1.0,
|
| 4110 |
+
"rewards/penalize_unwanted_words/std": 0.0,
|
| 4111 |
+
"rewards/reward_func_length/mean": 98.53499755859374,
|
| 4112 |
+
"rewards/reward_func_length/std": 1.3000189661979675,
|
| 4113 |
+
"rewards/reward_keyword_presence/mean": 0.0,
|
| 4114 |
+
"rewards/reward_keyword_presence/std": 0.0,
|
| 4115 |
+
"rewards/reward_short_answers/mean": 9.994318199157714,
|
| 4116 |
+
"rewards/reward_short_answers/std": 0.03214122056961059,
|
| 4117 |
+
"step": 615
|
| 4118 |
+
},
|
| 4119 |
+
{
|
| 4120 |
+
"clip_ratio/high_max": 0.0,
|
| 4121 |
+
"clip_ratio/high_mean": 0.0,
|
| 4122 |
+
"clip_ratio/low_mean": 0.0,
|
| 4123 |
+
"clip_ratio/low_min": 0.0,
|
| 4124 |
+
"clip_ratio/region_mean": 0.0,
|
| 4125 |
+
"completions/clipped_ratio": 1.0,
|
| 4126 |
+
"completions/max_length": 32.0,
|
| 4127 |
+
"completions/max_terminated_length": 0.0,
|
| 4128 |
+
"completions/mean_length": 32.0,
|
| 4129 |
+
"completions/mean_terminated_length": 0.0,
|
| 4130 |
+
"completions/min_length": 32.0,
|
| 4131 |
+
"completions/min_terminated_length": 0.0,
|
| 4132 |
+
"epoch": 0.14691943127962084,
|
| 4133 |
+
"grad_norm": 0.13635386526584625,
|
| 4134 |
+
"kl": 0.72109375,
|
| 4135 |
+
"learning_rate": 7.614665424214193e-05,
|
| 4136 |
+
"loss": 0.0288,
|
| 4137 |
+
"num_tokens": 1060561.0,
|
| 4138 |
+
"reward": 951.67314453125,
|
| 4139 |
+
"reward_std": 7.1376995086669925,
|
| 4140 |
+
"rewards/concise_response_reward/mean": 9.98863639831543,
|
| 4141 |
+
"rewards/concise_response_reward/std": 0.044715401530265805,
|
| 4142 |
+
"rewards/penalize_unwanted_words/mean": 1.0,
|
| 4143 |
+
"rewards/penalize_unwanted_words/std": 0.0,
|
| 4144 |
+
"rewards/reward_func_length/mean": 98.40499877929688,
|
| 4145 |
+
"rewards/reward_func_length/std": 1.3004050493240356,
|
| 4146 |
+
"rewards/reward_keyword_presence/mean": 0.0,
|
| 4147 |
+
"rewards/reward_keyword_presence/std": 0.0,
|
| 4148 |
+
"rewards/reward_short_answers/mean": 9.98863639831543,
|
| 4149 |
+
"rewards/reward_short_answers/std": 0.044715401530265805,
|
| 4150 |
+
"step": 620
|
| 4151 |
+
},
|
| 4152 |
+
{
|
| 4153 |
+
"clip_ratio/high_max": 0.0,
|
| 4154 |
+
"clip_ratio/high_mean": 0.0,
|
| 4155 |
+
"clip_ratio/low_mean": 0.0,
|
| 4156 |
+
"clip_ratio/low_min": 0.0,
|
| 4157 |
+
"clip_ratio/region_mean": 0.0,
|
| 4158 |
+
"completions/clipped_ratio": 1.0,
|
| 4159 |
+
"completions/max_length": 32.0,
|
| 4160 |
+
"completions/max_terminated_length": 0.0,
|
| 4161 |
+
"completions/mean_length": 32.0,
|
| 4162 |
+
"completions/mean_terminated_length": 0.0,
|
| 4163 |
+
"completions/min_length": 32.0,
|
| 4164 |
+
"completions/min_terminated_length": 0.0,
|
| 4165 |
+
"epoch": 0.1481042654028436,
|
| 4166 |
+
"grad_norm": 0.048541147261857986,
|
| 4167 |
+
"kl": 0.697265625,
|
| 4168 |
+
"learning_rate": 7.445542420642097e-05,
|
| 4169 |
+
"loss": 0.0279,
|
| 4170 |
+
"num_tokens": 1069131.0,
|
| 4171 |
+
"reward": 951.8170288085937,
|
| 4172 |
+
"reward_std": 8.448184394836426,
|
| 4173 |
+
"rewards/concise_response_reward/mean": 9.98863639831543,
|
| 4174 |
+
"rewards/concise_response_reward/std": 0.044715401530265805,
|
| 4175 |
+
"rewards/penalize_unwanted_words/mean": 1.0,
|
| 4176 |
+
"rewards/penalize_unwanted_words/std": 0.0,
|
| 4177 |
+
"rewards/reward_func_length/mean": 98.42062377929688,
|
| 4178 |
+
"rewards/reward_func_length/std": 1.3346670150756836,
|
| 4179 |
+
"rewards/reward_keyword_presence/mean": 0.0,
|
| 4180 |
+
"rewards/reward_keyword_presence/std": 0.0,
|
| 4181 |
+
"rewards/reward_short_answers/mean": 9.98863639831543,
|
| 4182 |
+
"rewards/reward_short_answers/std": 0.044715401530265805,
|
| 4183 |
+
"step": 625
|
| 4184 |
+
},
|
| 4185 |
+
{
|
| 4186 |
+
"clip_ratio/high_max": 0.0,
|
| 4187 |
+
"clip_ratio/high_mean": 0.0,
|
| 4188 |
+
"clip_ratio/low_mean": 0.0,
|
| 4189 |
+
"clip_ratio/low_min": 0.0,
|
| 4190 |
+
"clip_ratio/region_mean": 0.0,
|
| 4191 |
+
"completions/clipped_ratio": 1.0,
|
| 4192 |
+
"completions/max_length": 32.0,
|
| 4193 |
+
"completions/max_terminated_length": 0.0,
|
| 4194 |
+
"completions/mean_length": 32.0,
|
| 4195 |
+
"completions/mean_terminated_length": 0.0,
|
| 4196 |
+
"completions/min_length": 32.0,
|
| 4197 |
+
"completions/min_terminated_length": 0.0,
|
| 4198 |
+
"epoch": 0.14928909952606634,
|
| 4199 |
+
"grad_norm": 0.11737528443336487,
|
| 4200 |
+
"kl": 0.756640625,
|
| 4201 |
+
"learning_rate": 7.277197529594257e-05,
|
| 4202 |
+
"loss": 0.0303,
|
| 4203 |
+
"num_tokens": 1077703.0,
|
| 4204 |
+
"reward": 949.5781005859375,
|
| 4205 |
+
"reward_std": 10.019141483306885,
|
| 4206 |
+
"rewards/concise_response_reward/mean": 9.98863639831543,
|
| 4207 |
+
"rewards/concise_response_reward/std": 0.044715401530265805,
|
| 4208 |
+
"rewards/penalize_unwanted_words/mean": 1.0,
|
| 4209 |
+
"rewards/penalize_unwanted_words/std": 0.0,
|
| 4210 |
+
"rewards/reward_func_length/mean": 98.17749786376953,
|
| 4211 |
+
"rewards/reward_func_length/std": 1.5676434755325317,
|
| 4212 |
+
"rewards/reward_keyword_presence/mean": 0.0,
|
| 4213 |
+
"rewards/reward_keyword_presence/std": 0.0,
|
| 4214 |
+
"rewards/reward_short_answers/mean": 9.98863639831543,
|
| 4215 |
+
"rewards/reward_short_answers/std": 0.044715401530265805,
|
| 4216 |
+
"step": 630
|
| 4217 |
+
},
|
| 4218 |
+
{
|
| 4219 |
+
"clip_ratio/high_max": 0.0,
|
| 4220 |
+
"clip_ratio/high_mean": 0.0,
|
| 4221 |
+
"clip_ratio/low_mean": 0.0,
|
| 4222 |
+
"clip_ratio/low_min": 0.0,
|
| 4223 |
+
"clip_ratio/region_mean": 0.0,
|
| 4224 |
+
"completions/clipped_ratio": 1.0,
|
| 4225 |
+
"completions/max_length": 32.0,
|
| 4226 |
+
"completions/max_terminated_length": 0.0,
|
| 4227 |
+
"completions/mean_length": 32.0,
|
| 4228 |
+
"completions/mean_terminated_length": 0.0,
|
| 4229 |
+
"completions/min_length": 32.0,
|
| 4230 |
+
"completions/min_terminated_length": 0.0,
|
| 4231 |
+
"epoch": 0.1504739336492891,
|
| 4232 |
+
"grad_norm": 0.17389771342277527,
|
| 4233 |
+
"kl": 0.729296875,
|
| 4234 |
+
"learning_rate": 7.109682030555283e-05,
|
| 4235 |
+
"loss": 0.0291,
|
| 4236 |
+
"num_tokens": 1086523.0,
|
| 4237 |
+
"reward": 949.6324951171875,
|
| 4238 |
+
"reward_std": 10.87011775970459,
|
| 4239 |
+
"rewards/concise_response_reward/mean": 9.994318199157714,
|
| 4240 |
+
"rewards/concise_response_reward/std": 0.03214122056961059,
|
| 4241 |
+
"rewards/penalize_unwanted_words/mean": 1.0,
|
| 4242 |
+
"rewards/penalize_unwanted_words/std": 0.0,
|
| 4243 |
+
"rewards/reward_func_length/mean": 98.18061981201171,
|
| 4244 |
+
"rewards/reward_func_length/std": 1.482282567024231,
|
| 4245 |
+
"rewards/reward_keyword_presence/mean": 0.0,
|
| 4246 |
+
"rewards/reward_keyword_presence/std": 0.0,
|
| 4247 |
+
"rewards/reward_short_answers/mean": 9.994318199157714,
|
| 4248 |
+
"rewards/reward_short_answers/std": 0.03214122056961059,
|
| 4249 |
+
"step": 635
|
| 4250 |
+
},
|
| 4251 |
+
{
|
| 4252 |
+
"clip_ratio/high_max": 0.0,
|
| 4253 |
+
"clip_ratio/high_mean": 0.0,
|
| 4254 |
+
"clip_ratio/low_mean": 0.0,
|
| 4255 |
+
"clip_ratio/low_min": 0.0,
|
| 4256 |
+
"clip_ratio/region_mean": 0.0,
|
| 4257 |
+
"completions/clipped_ratio": 1.0,
|
| 4258 |
+
"completions/max_length": 32.0,
|
| 4259 |
+
"completions/max_terminated_length": 0.0,
|
| 4260 |
+
"completions/mean_length": 32.0,
|
| 4261 |
+
"completions/mean_terminated_length": 0.0,
|
| 4262 |
+
"completions/min_length": 32.0,
|
| 4263 |
+
"completions/min_terminated_length": 0.0,
|
| 4264 |
+
"epoch": 0.15165876777251186,
|
| 4265 |
+
"grad_norm": 0.13560406863689423,
|
| 4266 |
+
"kl": 0.76875,
|
| 4267 |
+
"learning_rate": 6.943046950368944e-05,
|
| 4268 |
+
"loss": 0.0308,
|
| 4269 |
+
"num_tokens": 1095301.0,
|
| 4270 |
+
"reward": 951.1200439453125,
|
| 4271 |
+
"reward_std": 7.643131446838379,
|
| 4272 |
+
"rewards/concise_response_reward/mean": 10.0,
|
| 4273 |
+
"rewards/concise_response_reward/std": 0.0,
|
| 4274 |
+
"rewards/penalize_unwanted_words/mean": 1.0,
|
| 4275 |
+
"rewards/penalize_unwanted_words/std": 0.0,
|
| 4276 |
+
"rewards/reward_func_length/mean": 98.33937377929688,
|
| 4277 |
+
"rewards/reward_func_length/std": 1.4927765369415282,
|
| 4278 |
+
"rewards/reward_keyword_presence/mean": 0.0,
|
| 4279 |
+
"rewards/reward_keyword_presence/std": 0.0,
|
| 4280 |
+
"rewards/reward_short_answers/mean": 10.0,
|
| 4281 |
+
"rewards/reward_short_answers/std": 0.0,
|
| 4282 |
+
"step": 640
|
| 4283 |
+
},
|
| 4284 |
+
{
|
| 4285 |
+
"clip_ratio/high_max": 0.0,
|
| 4286 |
+
"clip_ratio/high_mean": 0.0,
|
| 4287 |
+
"clip_ratio/low_mean": 0.0,
|
| 4288 |
+
"clip_ratio/low_min": 0.0,
|
| 4289 |
+
"clip_ratio/region_mean": 0.0,
|
| 4290 |
+
"completions/clipped_ratio": 1.0,
|
| 4291 |
+
"completions/max_length": 32.0,
|
| 4292 |
+
"completions/max_terminated_length": 0.0,
|
| 4293 |
+
"completions/mean_length": 32.0,
|
| 4294 |
+
"completions/mean_terminated_length": 0.0,
|
| 4295 |
+
"completions/min_length": 32.0,
|
| 4296 |
+
"completions/min_terminated_length": 0.0,
|
| 4297 |
+
"epoch": 0.1528436018957346,
|
| 4298 |
+
"grad_norm": 0.045086201280355453,
|
| 4299 |
+
"kl": 0.6765625,
|
| 4300 |
+
"learning_rate": 6.77734304769489e-05,
|
| 4301 |
+
"loss": 0.0271,
|
| 4302 |
+
"num_tokens": 1104133.0,
|
| 4303 |
+
"reward": 951.9698120117188,
|
| 4304 |
+
"reward_std": 7.085178184509277,
|
| 4305 |
+
"rewards/concise_response_reward/mean": 9.982954597473144,
|
| 4306 |
+
"rewards/concise_response_reward/std": 0.09642366170883179,
|
| 4307 |
+
"rewards/penalize_unwanted_words/mean": 1.0,
|
| 4308 |
+
"rewards/penalize_unwanted_words/std": 0.0,
|
| 4309 |
+
"rewards/reward_func_length/mean": 98.43999938964843,
|
| 4310 |
+
"rewards/reward_func_length/std": 1.1985483288764953,
|
| 4311 |
+
"rewards/reward_keyword_presence/mean": 0.0,
|
| 4312 |
+
"rewards/reward_keyword_presence/std": 0.0,
|
| 4313 |
+
"rewards/reward_short_answers/mean": 9.982954597473144,
|
| 4314 |
+
"rewards/reward_short_answers/std": 0.09642366170883179,
|
| 4315 |
+
"step": 645
|
| 4316 |
+
},
|
| 4317 |
+
{
|
| 4318 |
+
"clip_ratio/high_max": 0.0,
|
| 4319 |
+
"clip_ratio/high_mean": 0.0,
|
| 4320 |
+
"clip_ratio/low_mean": 0.0,
|
| 4321 |
+
"clip_ratio/low_min": 0.0,
|
| 4322 |
+
"clip_ratio/region_mean": 0.0,
|
| 4323 |
+
"completions/clipped_ratio": 1.0,
|
| 4324 |
+
"completions/max_length": 32.0,
|
| 4325 |
+
"completions/max_terminated_length": 0.0,
|
| 4326 |
+
"completions/mean_length": 32.0,
|
| 4327 |
+
"completions/mean_terminated_length": 0.0,
|
| 4328 |
+
"completions/min_length": 32.0,
|
| 4329 |
+
"completions/min_terminated_length": 0.0,
|
| 4330 |
+
"epoch": 0.15402843601895735,
|
| 4331 |
+
"grad_norm": 0.06662766635417938,
|
| 4332 |
+
"kl": 0.7509765625,
|
| 4333 |
+
"learning_rate": 6.612620797547087e-05,
|
| 4334 |
+
"loss": 0.03,
|
| 4335 |
+
"num_tokens": 1112649.0,
|
| 4336 |
+
"reward": 951.80498046875,
|
| 4337 |
+
"reward_std": 8.269881916046142,
|
| 4338 |
+
"rewards/concise_response_reward/mean": 10.0,
|
| 4339 |
+
"rewards/concise_response_reward/std": 0.0,
|
| 4340 |
+
"rewards/penalize_unwanted_words/mean": 1.0,
|
| 4341 |
+
"rewards/penalize_unwanted_words/std": 0.0,
|
| 4342 |
+
"rewards/reward_func_length/mean": 98.41374816894532,
|
| 4343 |
+
"rewards/reward_func_length/std": 1.2879498958587647,
|
| 4344 |
+
"rewards/reward_keyword_presence/mean": 0.0,
|
| 4345 |
+
"rewards/reward_keyword_presence/std": 0.0,
|
| 4346 |
+
"rewards/reward_short_answers/mean": 10.0,
|
| 4347 |
+
"rewards/reward_short_answers/std": 0.0,
|
| 4348 |
+
"step": 650
|
| 4349 |
+
},
|
| 4350 |
+
{
|
| 4351 |
+
"clip_ratio/high_max": 0.0,
|
| 4352 |
+
"clip_ratio/high_mean": 0.0,
|
| 4353 |
+
"clip_ratio/low_mean": 0.0,
|
| 4354 |
+
"clip_ratio/low_min": 0.0,
|
| 4355 |
+
"clip_ratio/region_mean": 0.0,
|
| 4356 |
+
"completions/clipped_ratio": 1.0,
|
| 4357 |
+
"completions/max_length": 32.0,
|
| 4358 |
+
"completions/max_terminated_length": 0.0,
|
| 4359 |
+
"completions/mean_length": 32.0,
|
| 4360 |
+
"completions/mean_terminated_length": 0.0,
|
| 4361 |
+
"completions/min_length": 32.0,
|
| 4362 |
+
"completions/min_terminated_length": 0.0,
|
| 4363 |
+
"epoch": 0.1552132701421801,
|
| 4364 |
+
"grad_norm": 0.08734223991632462,
|
| 4365 |
+
"kl": 0.721875,
|
| 4366 |
+
"learning_rate": 6.448930375918631e-05,
|
| 4367 |
+
"loss": 0.0289,
|
| 4368 |
+
"num_tokens": 1121563.0,
|
| 4369 |
+
"reward": 949.6842895507813,
|
| 4370 |
+
"reward_std": 10.617791748046875,
|
| 4371 |
+
"rewards/concise_response_reward/mean": 9.994318199157714,
|
| 4372 |
+
"rewards/concise_response_reward/std": 0.03214122056961059,
|
| 4373 |
+
"rewards/penalize_unwanted_words/mean": 1.0,
|
| 4374 |
+
"rewards/penalize_unwanted_words/std": 0.0,
|
| 4375 |
+
"rewards/reward_func_length/mean": 98.18624725341797,
|
| 4376 |
+
"rewards/reward_func_length/std": 1.4956665992736817,
|
| 4377 |
+
"rewards/reward_keyword_presence/mean": 0.0,
|
| 4378 |
+
"rewards/reward_keyword_presence/std": 0.0,
|
| 4379 |
+
"rewards/reward_short_answers/mean": 9.994318199157714,
|
| 4380 |
+
"rewards/reward_short_answers/std": 0.03214122056961059,
|
| 4381 |
+
"step": 655
|
| 4382 |
+
},
|
| 4383 |
+
{
|
| 4384 |
+
"clip_ratio/high_max": 0.0,
|
| 4385 |
+
"clip_ratio/high_mean": 0.0,
|
| 4386 |
+
"clip_ratio/low_mean": 0.0,
|
| 4387 |
+
"clip_ratio/low_min": 0.0,
|
| 4388 |
+
"clip_ratio/region_mean": 0.0,
|
| 4389 |
+
"completions/clipped_ratio": 1.0,
|
| 4390 |
+
"completions/max_length": 32.0,
|
| 4391 |
+
"completions/max_terminated_length": 0.0,
|
| 4392 |
+
"completions/mean_length": 32.0,
|
| 4393 |
+
"completions/mean_terminated_length": 0.0,
|
| 4394 |
+
"completions/min_length": 32.0,
|
| 4395 |
+
"completions/min_terminated_length": 0.0,
|
| 4396 |
+
"epoch": 0.15639810426540285,
|
| 4397 |
+
"grad_norm": 0.12375020235776901,
|
| 4398 |
+
"kl": 0.783984375,
|
| 4399 |
+
"learning_rate": 6.286321644497655e-05,
|
| 4400 |
+
"loss": 0.0313,
|
| 4401 |
+
"num_tokens": 1130465.0,
|
| 4402 |
+
"reward": 945.558056640625,
|
| 4403 |
+
"reward_std": 11.715877628326416,
|
| 4404 |
+
"rewards/concise_response_reward/mean": 9.982954597473144,
|
| 4405 |
+
"rewards/concise_response_reward/std": 0.0768566220998764,
|
| 4406 |
+
"rewards/penalize_unwanted_words/mean": 1.0,
|
| 4407 |
+
"rewards/penalize_unwanted_words/std": 0.0,
|
| 4408 |
+
"rewards/reward_func_length/mean": 97.74375,
|
| 4409 |
+
"rewards/reward_func_length/std": 1.8698417901992799,
|
| 4410 |
+
"rewards/reward_keyword_presence/mean": 0.0,
|
| 4411 |
+
"rewards/reward_keyword_presence/std": 0.0,
|
| 4412 |
+
"rewards/reward_short_answers/mean": 9.982954597473144,
|
| 4413 |
+
"rewards/reward_short_answers/std": 0.0768566220998764,
|
| 4414 |
+
"step": 660
|
| 4415 |
+
},
|
| 4416 |
+
{
|
| 4417 |
+
"clip_ratio/high_max": 0.0,
|
| 4418 |
+
"clip_ratio/high_mean": 0.0,
|
| 4419 |
+
"clip_ratio/low_mean": 0.0,
|
| 4420 |
+
"clip_ratio/low_min": 0.0,
|
| 4421 |
+
"clip_ratio/region_mean": 0.0,
|
| 4422 |
+
"completions/clipped_ratio": 1.0,
|
| 4423 |
+
"completions/max_length": 32.0,
|
| 4424 |
+
"completions/max_terminated_length": 0.0,
|
| 4425 |
+
"completions/mean_length": 32.0,
|
| 4426 |
+
"completions/mean_terminated_length": 0.0,
|
| 4427 |
+
"completions/min_length": 32.0,
|
| 4428 |
+
"completions/min_terminated_length": 0.0,
|
| 4429 |
+
"epoch": 0.15758293838862558,
|
| 4430 |
+
"grad_norm": 0.03891945630311966,
|
| 4431 |
+
"kl": 0.734765625,
|
| 4432 |
+
"learning_rate": 6.12484413547897e-05,
|
| 4433 |
+
"loss": 0.0294,
|
| 4434 |
+
"num_tokens": 1138789.0,
|
| 4435 |
+
"reward": 944.3750122070312,
|
| 4436 |
+
"reward_std": 10.409846115112305,
|
| 4437 |
+
"rewards/concise_response_reward/mean": 9.98863639831543,
|
| 4438 |
+
"rewards/concise_response_reward/std": 0.06428244113922119,
|
| 4439 |
+
"rewards/penalize_unwanted_words/mean": 1.0,
|
| 4440 |
+
"rewards/penalize_unwanted_words/std": 0.0,
|
| 4441 |
+
"rewards/reward_func_length/mean": 97.61249694824218,
|
| 4442 |
+
"rewards/reward_func_length/std": 1.8993584871292115,
|
| 4443 |
+
"rewards/reward_keyword_presence/mean": 0.0,
|
| 4444 |
+
"rewards/reward_keyword_presence/std": 0.0,
|
| 4445 |
+
"rewards/reward_short_answers/mean": 9.98863639831543,
|
| 4446 |
+
"rewards/reward_short_answers/std": 0.06428244113922119,
|
| 4447 |
+
"step": 665
|
| 4448 |
+
},
|
| 4449 |
+
{
|
| 4450 |
+
"clip_ratio/high_max": 0.0,
|
| 4451 |
+
"clip_ratio/high_mean": 0.0,
|
| 4452 |
+
"clip_ratio/low_mean": 0.0,
|
| 4453 |
+
"clip_ratio/low_min": 0.0,
|
| 4454 |
+
"clip_ratio/region_mean": 0.0,
|
| 4455 |
+
"completions/clipped_ratio": 1.0,
|
| 4456 |
+
"completions/max_length": 32.0,
|
| 4457 |
+
"completions/max_terminated_length": 0.0,
|
| 4458 |
+
"completions/mean_length": 32.0,
|
| 4459 |
+
"completions/mean_terminated_length": 0.0,
|
| 4460 |
+
"completions/min_length": 32.0,
|
| 4461 |
+
"completions/min_terminated_length": 0.0,
|
| 4462 |
+
"epoch": 0.15876777251184834,
|
| 4463 |
+
"grad_norm": 0.18572458624839783,
|
| 4464 |
+
"kl": 0.755859375,
|
| 4465 |
+
"learning_rate": 5.964547036476099e-05,
|
| 4466 |
+
"loss": 0.0302,
|
| 4467 |
+
"num_tokens": 1147299.0,
|
| 4468 |
+
"reward": 949.0652954101563,
|
| 4469 |
+
"reward_std": 10.223407363891601,
|
| 4470 |
+
"rewards/concise_response_reward/mean": 10.0,
|
| 4471 |
+
"rewards/concise_response_reward/std": 0.0,
|
| 4472 |
+
"rewards/penalize_unwanted_words/mean": 1.0,
|
| 4473 |
+
"rewards/penalize_unwanted_words/std": 0.0,
|
| 4474 |
+
"rewards/reward_func_length/mean": 98.11624908447266,
|
| 4475 |
+
"rewards/reward_func_length/std": 1.5753605127334596,
|
| 4476 |
+
"rewards/reward_keyword_presence/mean": 0.0,
|
| 4477 |
+
"rewards/reward_keyword_presence/std": 0.0,
|
| 4478 |
+
"rewards/reward_short_answers/mean": 10.0,
|
| 4479 |
+
"rewards/reward_short_answers/std": 0.0,
|
| 4480 |
+
"step": 670
|
| 4481 |
+
},
|
| 4482 |
+
{
|
| 4483 |
+
"clip_ratio/high_max": 0.0,
|
| 4484 |
+
"clip_ratio/high_mean": 0.0,
|
| 4485 |
+
"clip_ratio/low_mean": 0.0,
|
| 4486 |
+
"clip_ratio/low_min": 0.0,
|
| 4487 |
+
"clip_ratio/region_mean": 0.0,
|
| 4488 |
+
"completions/clipped_ratio": 1.0,
|
| 4489 |
+
"completions/max_length": 32.0,
|
| 4490 |
+
"completions/max_terminated_length": 0.0,
|
| 4491 |
+
"completions/mean_length": 32.0,
|
| 4492 |
+
"completions/mean_terminated_length": 0.0,
|
| 4493 |
+
"completions/min_length": 32.0,
|
| 4494 |
+
"completions/min_terminated_length": 0.0,
|
| 4495 |
+
"epoch": 0.15995260663507108,
|
| 4496 |
+
"grad_norm": 0.08468377590179443,
|
| 4497 |
+
"kl": 0.874609375,
|
| 4498 |
+
"learning_rate": 5.805479175538229e-05,
|
| 4499 |
+
"loss": 0.035,
|
| 4500 |
+
"num_tokens": 1155879.0,
|
| 4501 |
+
"reward": 946.0927978515625,
|
| 4502 |
+
"reward_std": 11.903859615325928,
|
| 4503 |
+
"rewards/concise_response_reward/mean": 9.994318199157714,
|
| 4504 |
+
"rewards/concise_response_reward/std": 0.03214122056961059,
|
| 4505 |
+
"rewards/penalize_unwanted_words/mean": 1.0,
|
| 4506 |
+
"rewards/penalize_unwanted_words/std": 0.0,
|
| 4507 |
+
"rewards/reward_func_length/mean": 97.79624481201172,
|
| 4508 |
+
"rewards/reward_func_length/std": 1.7525642395019532,
|
| 4509 |
+
"rewards/reward_keyword_presence/mean": 0.0,
|
| 4510 |
+
"rewards/reward_keyword_presence/std": 0.0,
|
| 4511 |
+
"rewards/reward_short_answers/mean": 9.994318199157714,
|
| 4512 |
+
"rewards/reward_short_answers/std": 0.03214122056961059,
|
| 4513 |
+
"step": 675
|
| 4514 |
+
},
|
| 4515 |
+
{
|
| 4516 |
+
"clip_ratio/high_max": 0.0,
|
| 4517 |
+
"clip_ratio/high_mean": 0.0,
|
| 4518 |
+
"clip_ratio/low_mean": 0.0,
|
| 4519 |
+
"clip_ratio/low_min": 0.0,
|
| 4520 |
+
"clip_ratio/region_mean": 0.0,
|
| 4521 |
+
"completions/clipped_ratio": 1.0,
|
| 4522 |
+
"completions/max_length": 32.0,
|
| 4523 |
+
"completions/max_terminated_length": 0.0,
|
| 4524 |
+
"completions/mean_length": 32.0,
|
| 4525 |
+
"completions/mean_terminated_length": 0.0,
|
| 4526 |
+
"completions/min_length": 32.0,
|
| 4527 |
+
"completions/min_terminated_length": 0.0,
|
| 4528 |
+
"epoch": 0.16113744075829384,
|
| 4529 |
+
"grad_norm": 0.1105046197772026,
|
| 4530 |
+
"kl": 0.725390625,
|
| 4531 |
+
"learning_rate": 5.647689006276726e-05,
|
| 4532 |
+
"loss": 0.029,
|
| 4533 |
+
"num_tokens": 1164413.0,
|
| 4534 |
+
"reward": 942.4379760742188,
|
| 4535 |
+
"reward_std": 12.221305656433106,
|
| 4536 |
+
"rewards/concise_response_reward/mean": 9.994318199157714,
|
| 4537 |
+
"rewards/concise_response_reward/std": 0.03214122056961059,
|
| 4538 |
+
"rewards/penalize_unwanted_words/mean": 1.0,
|
| 4539 |
+
"rewards/penalize_unwanted_words/std": 0.0,
|
| 4540 |
+
"rewards/reward_func_length/mean": 97.39937744140624,
|
| 4541 |
+
"rewards/reward_func_length/std": 2.0443582057952883,
|
| 4542 |
+
"rewards/reward_keyword_presence/mean": 0.0,
|
| 4543 |
+
"rewards/reward_keyword_presence/std": 0.0,
|
| 4544 |
+
"rewards/reward_short_answers/mean": 9.994318199157714,
|
| 4545 |
+
"rewards/reward_short_answers/std": 0.03214122056961059,
|
| 4546 |
+
"step": 680
|
| 4547 |
+
},
|
| 4548 |
+
{
|
| 4549 |
+
"clip_ratio/high_max": 0.0,
|
| 4550 |
+
"clip_ratio/high_mean": 0.0,
|
| 4551 |
+
"clip_ratio/low_mean": 0.0,
|
| 4552 |
+
"clip_ratio/low_min": 0.0,
|
| 4553 |
+
"clip_ratio/region_mean": 0.0,
|
| 4554 |
+
"completions/clipped_ratio": 1.0,
|
| 4555 |
+
"completions/max_length": 32.0,
|
| 4556 |
+
"completions/max_terminated_length": 0.0,
|
| 4557 |
+
"completions/mean_length": 32.0,
|
| 4558 |
+
"completions/mean_terminated_length": 0.0,
|
| 4559 |
+
"completions/min_length": 32.0,
|
| 4560 |
+
"completions/min_terminated_length": 0.0,
|
| 4561 |
+
"epoch": 0.1623222748815166,
|
| 4562 |
+
"grad_norm": 0.07082920521497726,
|
| 4563 |
+
"kl": 0.771875,
|
| 4564 |
+
"learning_rate": 5.491224593105695e-05,
|
| 4565 |
+
"loss": 0.0309,
|
| 4566 |
+
"num_tokens": 1173043.0,
|
| 4567 |
+
"reward": 944.4613525390625,
|
| 4568 |
+
"reward_std": 13.601359939575195,
|
| 4569 |
+
"rewards/concise_response_reward/mean": 9.98863639831543,
|
| 4570 |
+
"rewards/concise_response_reward/std": 0.06428244113922119,
|
| 4571 |
+
"rewards/penalize_unwanted_words/mean": 1.0,
|
| 4572 |
+
"rewards/penalize_unwanted_words/std": 0.0,
|
| 4573 |
+
"rewards/reward_func_length/mean": 97.62187347412109,
|
| 4574 |
+
"rewards/reward_func_length/std": 1.89430890083313,
|
| 4575 |
+
"rewards/reward_keyword_presence/mean": 0.0,
|
| 4576 |
+
"rewards/reward_keyword_presence/std": 0.0,
|
| 4577 |
+
"rewards/reward_short_answers/mean": 9.98863639831543,
|
| 4578 |
+
"rewards/reward_short_answers/std": 0.06428244113922119,
|
| 4579 |
+
"step": 685
|
| 4580 |
+
},
|
| 4581 |
+
{
|
| 4582 |
+
"clip_ratio/high_max": 0.0,
|
| 4583 |
+
"clip_ratio/high_mean": 0.0,
|
| 4584 |
+
"clip_ratio/low_mean": 0.0,
|
| 4585 |
+
"clip_ratio/low_min": 0.0,
|
| 4586 |
+
"clip_ratio/region_mean": 0.0,
|
| 4587 |
+
"completions/clipped_ratio": 1.0,
|
| 4588 |
+
"completions/max_length": 32.0,
|
| 4589 |
+
"completions/max_terminated_length": 0.0,
|
| 4590 |
+
"completions/mean_length": 32.0,
|
| 4591 |
+
"completions/mean_terminated_length": 0.0,
|
| 4592 |
+
"completions/min_length": 32.0,
|
| 4593 |
+
"completions/min_terminated_length": 0.0,
|
| 4594 |
+
"epoch": 0.16350710900473933,
|
| 4595 |
+
"grad_norm": 0.08692283183336258,
|
| 4596 |
+
"kl": 0.671484375,
|
| 4597 |
+
"learning_rate": 5.33613359660109e-05,
|
| 4598 |
+
"loss": 0.0269,
|
| 4599 |
+
"num_tokens": 1182055.0,
|
| 4600 |
+
"reward": 950.5879272460937,
|
| 4601 |
+
"reward_std": 8.786369895935058,
|
| 4602 |
+
"rewards/concise_response_reward/mean": 9.994318199157714,
|
| 4603 |
+
"rewards/concise_response_reward/std": 0.03214122056961059,
|
| 4604 |
+
"rewards/penalize_unwanted_words/mean": 1.0,
|
| 4605 |
+
"rewards/penalize_unwanted_words/std": 0.0,
|
| 4606 |
+
"rewards/reward_func_length/mean": 98.2843734741211,
|
| 4607 |
+
"rewards/reward_func_length/std": 1.3933659553527833,
|
| 4608 |
+
"rewards/reward_keyword_presence/mean": 0.0,
|
| 4609 |
+
"rewards/reward_keyword_presence/std": 0.0,
|
| 4610 |
+
"rewards/reward_short_answers/mean": 9.994318199157714,
|
| 4611 |
+
"rewards/reward_short_answers/std": 0.03214122056961059,
|
| 4612 |
+
"step": 690
|
| 4613 |
+
},
|
| 4614 |
+
{
|
| 4615 |
+
"clip_ratio/high_max": 0.0,
|
| 4616 |
+
"clip_ratio/high_mean": 0.0,
|
| 4617 |
+
"clip_ratio/low_mean": 0.0,
|
| 4618 |
+
"clip_ratio/low_min": 0.0,
|
| 4619 |
+
"clip_ratio/region_mean": 0.0,
|
| 4620 |
+
"completions/clipped_ratio": 1.0,
|
| 4621 |
+
"completions/max_length": 32.0,
|
| 4622 |
+
"completions/max_terminated_length": 0.0,
|
| 4623 |
+
"completions/mean_length": 32.0,
|
| 4624 |
+
"completions/mean_terminated_length": 0.0,
|
| 4625 |
+
"completions/min_length": 32.0,
|
| 4626 |
+
"completions/min_terminated_length": 0.0,
|
| 4627 |
+
"epoch": 0.1646919431279621,
|
| 4628 |
+
"grad_norm": 0.03239274397492409,
|
| 4629 |
+
"kl": 0.648828125,
|
| 4630 |
+
"learning_rate": 5.182463258982846e-05,
|
| 4631 |
+
"loss": 0.026,
|
| 4632 |
+
"num_tokens": 1190273.0,
|
| 4633 |
+
"reward": 952.5474487304688,
|
| 4634 |
+
"reward_std": 6.796604633331299,
|
| 4635 |
+
"rewards/concise_response_reward/mean": 10.0,
|
| 4636 |
+
"rewards/concise_response_reward/std": 0.0,
|
| 4637 |
+
"rewards/penalize_unwanted_words/mean": 1.0,
|
| 4638 |
+
"rewards/penalize_unwanted_words/std": 0.0,
|
| 4639 |
+
"rewards/reward_func_length/mean": 98.49437103271484,
|
| 4640 |
+
"rewards/reward_func_length/std": 1.1651976346969604,
|
| 4641 |
+
"rewards/reward_keyword_presence/mean": 0.0,
|
| 4642 |
+
"rewards/reward_keyword_presence/std": 0.0,
|
| 4643 |
+
"rewards/reward_short_answers/mean": 10.0,
|
| 4644 |
+
"rewards/reward_short_answers/std": 0.0,
|
| 4645 |
+
"step": 695
|
| 4646 |
+
},
|
| 4647 |
+
{
|
| 4648 |
+
"epoch": 0.16587677725118483,
|
| 4649 |
+
"grad_norm": 0.12050563842058182,
|
| 4650 |
+
"learning_rate": 5.0302603897244474e-05,
|
| 4651 |
+
"loss": 0.0256,
|
| 4652 |
+
"step": 700
|
| 4653 |
+
},
|
| 4654 |
+
{
|
| 4655 |
+
"epoch": 0.16587677725118483,
|
| 4656 |
+
"eval_clip_ratio/high_max": 0.0,
|
| 4657 |
+
"eval_clip_ratio/high_mean": 0.0,
|
| 4658 |
+
"eval_clip_ratio/low_mean": 0.0,
|
| 4659 |
+
"eval_clip_ratio/low_min": 0.0,
|
| 4660 |
+
"eval_clip_ratio/region_mean": 0.0,
|
| 4661 |
+
"eval_completions/clipped_ratio": 1.0,
|
| 4662 |
+
"eval_completions/max_length": 32.0,
|
| 4663 |
+
"eval_completions/max_terminated_length": 0.0,
|
| 4664 |
+
"eval_completions/mean_length": 32.0,
|
| 4665 |
+
"eval_completions/mean_terminated_length": 0.0,
|
| 4666 |
+
"eval_completions/min_length": 32.0,
|
| 4667 |
+
"eval_completions/min_terminated_length": 0.0,
|
| 4668 |
+
"eval_kl": 0.6245924581692913,
|
| 4669 |
+
"eval_loss": 0.02497861161828041,
|
| 4670 |
+
"eval_num_tokens": 1198575.0,
|
| 4671 |
+
"eval_reward": 955.6451142078309,
|
| 4672 |
+
"eval_reward_std": 4.155148893066748,
|
| 4673 |
+
"eval_rewards/concise_response_reward/mean": 9.998881535267266,
|
| 4674 |
+
"eval_rewards/concise_response_reward/std": 0.004473873304100488,
|
| 4675 |
+
"eval_rewards/penalize_unwanted_words/mean": 1.0,
|
| 4676 |
+
"eval_rewards/penalize_unwanted_words/std": 0.0,
|
| 4677 |
+
"eval_rewards/reward_func_length/mean": 98.83129651527705,
|
| 4678 |
+
"eval_rewards/reward_func_length/std": 0.8231426833828134,
|
| 4679 |
+
"eval_rewards/reward_keyword_presence/mean": 0.0,
|
| 4680 |
+
"eval_rewards/reward_keyword_presence/std": 0.0,
|
| 4681 |
+
"eval_rewards/reward_short_answers/mean": 9.998881535267266,
|
| 4682 |
+
"eval_rewards/reward_short_answers/std": 0.004473873304100488,
|
| 4683 |
+
"eval_runtime": 503.6669,
|
| 4684 |
+
"eval_samples_per_second": 4.021,
|
| 4685 |
+
"eval_steps_per_second": 0.252,
|
| 4686 |
+
"step": 700
|
| 4687 |
}
|
| 4688 |
],
|
| 4689 |
"logging_steps": 5,
|
| 4690 |
"max_steps": 1000,
|
| 4691 |
+
"num_input_tokens_seen": 1198575,
|
| 4692 |
"num_train_epochs": 1,
|
| 4693 |
"save_steps": 100,
|
| 4694 |
"stateful_callbacks": {
|