Upload folder using huggingface_hub
Browse files- adapter_config.json +4 -4
- adapter_model.safetensors +1 -1
- optimizer.pt +1 -1
- rng_state.pth +1 -1
- scheduler.pt +1 -1
- trainer_state.json +783 -3
- training_args.bin +1 -1
adapter_config.json
CHANGED
|
@@ -29,12 +29,12 @@
|
|
| 29 |
"rank_pattern": {},
|
| 30 |
"revision": null,
|
| 31 |
"target_modules": [
|
| 32 |
-
"v_proj",
|
| 33 |
-
"q_proj",
|
| 34 |
-
"k_proj",
|
| 35 |
-
"down_proj",
|
| 36 |
"gate_proj",
|
|
|
|
| 37 |
"up_proj",
|
|
|
|
|
|
|
|
|
|
| 38 |
"o_proj"
|
| 39 |
],
|
| 40 |
"task_type": "CAUSAL_LM",
|
|
|
|
| 29 |
"rank_pattern": {},
|
| 30 |
"revision": null,
|
| 31 |
"target_modules": [
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
"gate_proj",
|
| 33 |
+
"down_proj",
|
| 34 |
"up_proj",
|
| 35 |
+
"q_proj",
|
| 36 |
+
"k_proj",
|
| 37 |
+
"v_proj",
|
| 38 |
"o_proj"
|
| 39 |
],
|
| 40 |
"task_type": "CAUSAL_LM",
|
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 262406656
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6f89e75eace1f37a98140d93962ea46e73cba4f4b8e34e368480bf3f2b1e4cdd
|
| 3 |
size 262406656
|
optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 122872331
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a3eb3d4cf9477e021678068cf544673ad23c71724f09a6af6a000805761f348f
|
| 3 |
size 122872331
|
rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:792b3fee8a1554be314683100df2b980f0bfc2f891874430d77a51ba9880a32f
|
| 3 |
size 14645
|
scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:74de7329a01fdf8f6ecea853bf84d421d0cc36daa4e1fdfaf82ec5c4e05cf81c
|
| 3 |
size 1465
|
trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -14828,11 +14828,791 @@
|
|
| 14828 |
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 14829 |
"rewards/quality_reward_func/std": 0.0,
|
| 14830 |
"step": 5700
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14831 |
}
|
| 14832 |
],
|
| 14833 |
"logging_steps": 10,
|
| 14834 |
"max_steps": 14544,
|
| 14835 |
-
"num_input_tokens_seen":
|
| 14836 |
"num_train_epochs": 1,
|
| 14837 |
"save_steps": 50,
|
| 14838 |
"stateful_callbacks": {
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.41254125412541254,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 6000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 14828 |
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 14829 |
"rewards/quality_reward_func/std": 0.0,
|
| 14830 |
"step": 5700
|
| 14831 |
+
},
|
| 14832 |
+
{
|
| 14833 |
+
"completion_length": 17.5,
|
| 14834 |
+
"completions/clipped_ratio": 0.0,
|
| 14835 |
+
"completions/max_length": 17.5,
|
| 14836 |
+
"completions/max_terminated_length": 17.5,
|
| 14837 |
+
"completions/mean_length": 16.775,
|
| 14838 |
+
"completions/mean_terminated_length": 16.775,
|
| 14839 |
+
"completions/min_length": 16.0,
|
| 14840 |
+
"completions/min_terminated_length": 16.0,
|
| 14841 |
+
"epoch": 0.3926017601760176,
|
| 14842 |
+
"frac_reward_zero_std": 1.0,
|
| 14843 |
+
"grad_norm": 0.0,
|
| 14844 |
+
"kl": 1.1857761025428772,
|
| 14845 |
+
"learning_rate": 3.8062080399291872e-06,
|
| 14846 |
+
"loss": 0.0,
|
| 14847 |
+
"num_tokens": 8192361.0,
|
| 14848 |
+
"reward": 4.099999904632568,
|
| 14849 |
+
"reward_std": 0.0,
|
| 14850 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 14851 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 14852 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 14853 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 14854 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 14855 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 14856 |
+
"step": 5710
|
| 14857 |
+
},
|
| 14858 |
+
{
|
| 14859 |
+
"completion_length": 20.1,
|
| 14860 |
+
"completions/clipped_ratio": 0.0,
|
| 14861 |
+
"completions/max_length": 20.1,
|
| 14862 |
+
"completions/max_terminated_length": 20.1,
|
| 14863 |
+
"completions/mean_length": 18.975,
|
| 14864 |
+
"completions/mean_terminated_length": 18.975,
|
| 14865 |
+
"completions/min_length": 17.4,
|
| 14866 |
+
"completions/min_terminated_length": 17.4,
|
| 14867 |
+
"epoch": 0.3932893289328933,
|
| 14868 |
+
"frac_reward_zero_std": 1.0,
|
| 14869 |
+
"grad_norm": 0.0,
|
| 14870 |
+
"kl": 1.346421904861927,
|
| 14871 |
+
"learning_rate": 3.801088006096989e-06,
|
| 14872 |
+
"loss": 0.0001,
|
| 14873 |
+
"num_tokens": 8204804.0,
|
| 14874 |
+
"reward": 4.099999904632568,
|
| 14875 |
+
"reward_std": 0.0,
|
| 14876 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 14877 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 14878 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 14879 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 14880 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 14881 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 14882 |
+
"step": 5720
|
| 14883 |
+
},
|
| 14884 |
+
{
|
| 14885 |
+
"completion_length": 16.7,
|
| 14886 |
+
"completions/clipped_ratio": 0.0,
|
| 14887 |
+
"completions/max_length": 16.7,
|
| 14888 |
+
"completions/max_terminated_length": 16.7,
|
| 14889 |
+
"completions/mean_length": 15.875,
|
| 14890 |
+
"completions/mean_terminated_length": 15.875,
|
| 14891 |
+
"completions/min_length": 15.4,
|
| 14892 |
+
"completions/min_terminated_length": 15.4,
|
| 14893 |
+
"epoch": 0.39397689768976896,
|
| 14894 |
+
"frac_reward_zero_std": 1.0,
|
| 14895 |
+
"grad_norm": 0.0,
|
| 14896 |
+
"kl": 1.1227647330611945,
|
| 14897 |
+
"learning_rate": 3.7959604768913615e-06,
|
| 14898 |
+
"loss": 0.0,
|
| 14899 |
+
"num_tokens": 8220067.0,
|
| 14900 |
+
"reward": 4.099999904632568,
|
| 14901 |
+
"reward_std": 0.0,
|
| 14902 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 14903 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 14904 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 14905 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 14906 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 14907 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 14908 |
+
"step": 5730
|
| 14909 |
+
},
|
| 14910 |
+
{
|
| 14911 |
+
"completion_length": 18.1,
|
| 14912 |
+
"completions/clipped_ratio": 0.0,
|
| 14913 |
+
"completions/max_length": 18.1,
|
| 14914 |
+
"completions/max_terminated_length": 18.1,
|
| 14915 |
+
"completions/mean_length": 17.1,
|
| 14916 |
+
"completions/mean_terminated_length": 17.1,
|
| 14917 |
+
"completions/min_length": 16.2,
|
| 14918 |
+
"completions/min_terminated_length": 16.2,
|
| 14919 |
+
"epoch": 0.39466446644664466,
|
| 14920 |
+
"frac_reward_zero_std": 1.0,
|
| 14921 |
+
"grad_norm": 0.0,
|
| 14922 |
+
"kl": 1.3379293769598006,
|
| 14923 |
+
"learning_rate": 3.7908254818512323e-06,
|
| 14924 |
+
"loss": 0.0,
|
| 14925 |
+
"num_tokens": 8235871.0,
|
| 14926 |
+
"reward": 4.099999904632568,
|
| 14927 |
+
"reward_std": 0.0,
|
| 14928 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 14929 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 14930 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 14931 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 14932 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 14933 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 14934 |
+
"step": 5740
|
| 14935 |
+
},
|
| 14936 |
+
{
|
| 14937 |
+
"completion_length": 20.6,
|
| 14938 |
+
"completions/clipped_ratio": 0.0,
|
| 14939 |
+
"completions/max_length": 20.6,
|
| 14940 |
+
"completions/max_terminated_length": 20.6,
|
| 14941 |
+
"completions/mean_length": 18.75,
|
| 14942 |
+
"completions/mean_terminated_length": 18.75,
|
| 14943 |
+
"completions/min_length": 16.7,
|
| 14944 |
+
"completions/min_terminated_length": 16.7,
|
| 14945 |
+
"epoch": 0.39535203520352036,
|
| 14946 |
+
"frac_reward_zero_std": 1.0,
|
| 14947 |
+
"grad_norm": 0.0,
|
| 14948 |
+
"kl": 1.1466250203549861,
|
| 14949 |
+
"learning_rate": 3.785683050558541e-06,
|
| 14950 |
+
"loss": 0.0,
|
| 14951 |
+
"num_tokens": 8249645.0,
|
| 14952 |
+
"reward": 4.099999904632568,
|
| 14953 |
+
"reward_std": 0.0,
|
| 14954 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 14955 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 14956 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 14957 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 14958 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 14959 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 14960 |
+
"step": 5750
|
| 14961 |
+
},
|
| 14962 |
+
{
|
| 14963 |
+
"completion_length": 15.5,
|
| 14964 |
+
"completions/clipped_ratio": 0.0,
|
| 14965 |
+
"completions/max_length": 15.5,
|
| 14966 |
+
"completions/max_terminated_length": 15.5,
|
| 14967 |
+
"completions/mean_length": 15.05,
|
| 14968 |
+
"completions/mean_terminated_length": 15.05,
|
| 14969 |
+
"completions/min_length": 14.7,
|
| 14970 |
+
"completions/min_terminated_length": 14.7,
|
| 14971 |
+
"epoch": 0.39603960396039606,
|
| 14972 |
+
"frac_reward_zero_std": 1.0,
|
| 14973 |
+
"grad_norm": 0.0,
|
| 14974 |
+
"kl": 1.3106171108782292,
|
| 14975 |
+
"learning_rate": 3.7805332126380647e-06,
|
| 14976 |
+
"loss": 0.0,
|
| 14977 |
+
"num_tokens": 8262587.0,
|
| 14978 |
+
"reward": 4.099999904632568,
|
| 14979 |
+
"reward_std": 0.0,
|
| 14980 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 14981 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 14982 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 14983 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 14984 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 14985 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 14986 |
+
"step": 5760
|
| 14987 |
+
},
|
| 14988 |
+
{
|
| 14989 |
+
"completion_length": 20.3,
|
| 14990 |
+
"completions/clipped_ratio": 0.0,
|
| 14991 |
+
"completions/max_length": 20.3,
|
| 14992 |
+
"completions/max_terminated_length": 20.3,
|
| 14993 |
+
"completions/mean_length": 17.925,
|
| 14994 |
+
"completions/mean_terminated_length": 17.925,
|
| 14995 |
+
"completions/min_length": 15.4,
|
| 14996 |
+
"completions/min_terminated_length": 15.4,
|
| 14997 |
+
"epoch": 0.3967271727172717,
|
| 14998 |
+
"frac_reward_zero_std": 1.0,
|
| 14999 |
+
"grad_norm": 0.0,
|
| 15000 |
+
"kl": 1.1861035495996475,
|
| 15001 |
+
"learning_rate": 3.775375997757249e-06,
|
| 15002 |
+
"loss": 0.0,
|
| 15003 |
+
"num_tokens": 8276160.0,
|
| 15004 |
+
"reward": 4.099999904632568,
|
| 15005 |
+
"reward_std": 0.0,
|
| 15006 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 15007 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 15008 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 15009 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 15010 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 15011 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 15012 |
+
"step": 5770
|
| 15013 |
+
},
|
| 15014 |
+
{
|
| 15015 |
+
"completion_length": 19.8,
|
| 15016 |
+
"completions/clipped_ratio": 0.0,
|
| 15017 |
+
"completions/max_length": 19.8,
|
| 15018 |
+
"completions/max_terminated_length": 19.8,
|
| 15019 |
+
"completions/mean_length": 17.475,
|
| 15020 |
+
"completions/mean_terminated_length": 17.475,
|
| 15021 |
+
"completions/min_length": 15.8,
|
| 15022 |
+
"completions/min_terminated_length": 15.8,
|
| 15023 |
+
"epoch": 0.3974147414741474,
|
| 15024 |
+
"frac_reward_zero_std": 1.0,
|
| 15025 |
+
"grad_norm": 0.0,
|
| 15026 |
+
"kl": 1.0805307626724243,
|
| 15027 |
+
"learning_rate": 3.7702114356260387e-06,
|
| 15028 |
+
"loss": 0.0,
|
| 15029 |
+
"num_tokens": 8290663.0,
|
| 15030 |
+
"reward": 4.099999904632568,
|
| 15031 |
+
"reward_std": 0.0,
|
| 15032 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 15033 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 15034 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 15035 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 15036 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 15037 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 15038 |
+
"step": 5780
|
| 15039 |
+
},
|
| 15040 |
+
{
|
| 15041 |
+
"completion_length": 17.0,
|
| 15042 |
+
"completions/clipped_ratio": 0.0,
|
| 15043 |
+
"completions/max_length": 17.0,
|
| 15044 |
+
"completions/max_terminated_length": 17.0,
|
| 15045 |
+
"completions/mean_length": 16.075,
|
| 15046 |
+
"completions/mean_terminated_length": 16.075,
|
| 15047 |
+
"completions/min_length": 15.2,
|
| 15048 |
+
"completions/min_terminated_length": 15.2,
|
| 15049 |
+
"epoch": 0.3981023102310231,
|
| 15050 |
+
"frac_reward_zero_std": 1.0,
|
| 15051 |
+
"grad_norm": 0.0,
|
| 15052 |
+
"kl": 1.2187039345502853,
|
| 15053 |
+
"learning_rate": 3.7650395559967036e-06,
|
| 15054 |
+
"loss": 0.0,
|
| 15055 |
+
"num_tokens": 8301238.0,
|
| 15056 |
+
"reward": 4.099999904632568,
|
| 15057 |
+
"reward_std": 0.0,
|
| 15058 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 15059 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 15060 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 15061 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 15062 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 15063 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 15064 |
+
"step": 5790
|
| 15065 |
+
},
|
| 15066 |
+
{
|
| 15067 |
+
"completion_length": 18.1,
|
| 15068 |
+
"completions/clipped_ratio": 0.0,
|
| 15069 |
+
"completions/max_length": 18.1,
|
| 15070 |
+
"completions/max_terminated_length": 18.1,
|
| 15071 |
+
"completions/mean_length": 17.05,
|
| 15072 |
+
"completions/mean_terminated_length": 17.05,
|
| 15073 |
+
"completions/min_length": 16.0,
|
| 15074 |
+
"completions/min_terminated_length": 16.0,
|
| 15075 |
+
"epoch": 0.3987898789878988,
|
| 15076 |
+
"frac_reward_zero_std": 1.0,
|
| 15077 |
+
"grad_norm": 0.0,
|
| 15078 |
+
"kl": 1.1940217852592467,
|
| 15079 |
+
"learning_rate": 3.759860388663668e-06,
|
| 15080 |
+
"loss": 0.0,
|
| 15081 |
+
"num_tokens": 8313336.0,
|
| 15082 |
+
"reward": 4.099999904632568,
|
| 15083 |
+
"reward_std": 0.0,
|
| 15084 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 15085 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 15086 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 15087 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 15088 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 15089 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 15090 |
+
"step": 5800
|
| 15091 |
+
},
|
| 15092 |
+
{
|
| 15093 |
+
"completion_length": 17.5,
|
| 15094 |
+
"completions/clipped_ratio": 0.0,
|
| 15095 |
+
"completions/max_length": 17.5,
|
| 15096 |
+
"completions/max_terminated_length": 17.5,
|
| 15097 |
+
"completions/mean_length": 17.025,
|
| 15098 |
+
"completions/mean_terminated_length": 17.025,
|
| 15099 |
+
"completions/min_length": 16.6,
|
| 15100 |
+
"completions/min_terminated_length": 16.6,
|
| 15101 |
+
"epoch": 0.39947744774477445,
|
| 15102 |
+
"frac_reward_zero_std": 1.0,
|
| 15103 |
+
"grad_norm": 0.0,
|
| 15104 |
+
"kl": 1.0611320044845343,
|
| 15105 |
+
"learning_rate": 3.754673963463341e-06,
|
| 15106 |
+
"loss": 0.0,
|
| 15107 |
+
"num_tokens": 8327733.0,
|
| 15108 |
+
"reward": 4.099999904632568,
|
| 15109 |
+
"reward_std": 0.0,
|
| 15110 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 15111 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 15112 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 15113 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 15114 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 15115 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 15116 |
+
"step": 5810
|
| 15117 |
+
},
|
| 15118 |
+
{
|
| 15119 |
+
"completion_length": 18.9,
|
| 15120 |
+
"completions/clipped_ratio": 0.0,
|
| 15121 |
+
"completions/max_length": 18.9,
|
| 15122 |
+
"completions/max_terminated_length": 18.9,
|
| 15123 |
+
"completions/mean_length": 17.125,
|
| 15124 |
+
"completions/mean_terminated_length": 17.125,
|
| 15125 |
+
"completions/min_length": 16.1,
|
| 15126 |
+
"completions/min_terminated_length": 16.1,
|
| 15127 |
+
"epoch": 0.40016501650165015,
|
| 15128 |
+
"frac_reward_zero_std": 1.0,
|
| 15129 |
+
"grad_norm": 0.0,
|
| 15130 |
+
"kl": 1.1921575225889682,
|
| 15131 |
+
"learning_rate": 3.749480310273943e-06,
|
| 15132 |
+
"loss": 0.0,
|
| 15133 |
+
"num_tokens": 8341750.0,
|
| 15134 |
+
"reward": 4.099999904632568,
|
| 15135 |
+
"reward_std": 0.0,
|
| 15136 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 15137 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 15138 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 15139 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 15140 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 15141 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 15142 |
+
"step": 5820
|
| 15143 |
+
},
|
| 15144 |
+
{
|
| 15145 |
+
"completion_length": 17.3,
|
| 15146 |
+
"completions/clipped_ratio": 0.0,
|
| 15147 |
+
"completions/max_length": 17.3,
|
| 15148 |
+
"completions/max_terminated_length": 17.3,
|
| 15149 |
+
"completions/mean_length": 16.35,
|
| 15150 |
+
"completions/mean_terminated_length": 16.35,
|
| 15151 |
+
"completions/min_length": 15.6,
|
| 15152 |
+
"completions/min_terminated_length": 15.6,
|
| 15153 |
+
"epoch": 0.40085258525852585,
|
| 15154 |
+
"frac_reward_zero_std": 1.0,
|
| 15155 |
+
"grad_norm": 0.0,
|
| 15156 |
+
"kl": 0.9138251326978206,
|
| 15157 |
+
"learning_rate": 3.7442794590153326e-06,
|
| 15158 |
+
"loss": 0.0,
|
| 15159 |
+
"num_tokens": 8356848.0,
|
| 15160 |
+
"reward": 4.099999904632568,
|
| 15161 |
+
"reward_std": 0.0,
|
| 15162 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 15163 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 15164 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 15165 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 15166 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 15167 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 15168 |
+
"step": 5830
|
| 15169 |
+
},
|
| 15170 |
+
{
|
| 15171 |
+
"completion_length": 20.0,
|
| 15172 |
+
"completions/clipped_ratio": 0.0,
|
| 15173 |
+
"completions/max_length": 20.0,
|
| 15174 |
+
"completions/max_terminated_length": 20.0,
|
| 15175 |
+
"completions/mean_length": 18.2,
|
| 15176 |
+
"completions/mean_terminated_length": 18.2,
|
| 15177 |
+
"completions/min_length": 16.3,
|
| 15178 |
+
"completions/min_terminated_length": 16.3,
|
| 15179 |
+
"epoch": 0.40154015401540155,
|
| 15180 |
+
"frac_reward_zero_std": 1.0,
|
| 15181 |
+
"grad_norm": 0.0,
|
| 15182 |
+
"kl": 1.1536221474409103,
|
| 15183 |
+
"learning_rate": 3.739071439648836e-06,
|
| 15184 |
+
"loss": 0.0,
|
| 15185 |
+
"num_tokens": 8372328.0,
|
| 15186 |
+
"reward": 4.099999904632568,
|
| 15187 |
+
"reward_std": 0.0,
|
| 15188 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 15189 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 15190 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 15191 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 15192 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 15193 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 15194 |
+
"step": 5840
|
| 15195 |
+
},
|
| 15196 |
+
{
|
| 15197 |
+
"completion_length": 19.8,
|
| 15198 |
+
"completions/clipped_ratio": 0.0,
|
| 15199 |
+
"completions/max_length": 19.8,
|
| 15200 |
+
"completions/max_terminated_length": 19.8,
|
| 15201 |
+
"completions/mean_length": 16.325,
|
| 15202 |
+
"completions/mean_terminated_length": 16.325,
|
| 15203 |
+
"completions/min_length": 14.6,
|
| 15204 |
+
"completions/min_terminated_length": 14.6,
|
| 15205 |
+
"epoch": 0.40222772277227725,
|
| 15206 |
+
"frac_reward_zero_std": 1.0,
|
| 15207 |
+
"grad_norm": 0.0,
|
| 15208 |
+
"kl": 1.0150370292365551,
|
| 15209 |
+
"learning_rate": 3.733856282177074e-06,
|
| 15210 |
+
"loss": 0.0,
|
| 15211 |
+
"num_tokens": 8387829.0,
|
| 15212 |
+
"reward": 4.099999904632568,
|
| 15213 |
+
"reward_std": 0.0,
|
| 15214 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 15215 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 15216 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 15217 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 15218 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 15219 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 15220 |
+
"step": 5850
|
| 15221 |
+
},
|
| 15222 |
+
{
|
| 15223 |
+
"completion_length": 19.6,
|
| 15224 |
+
"completions/clipped_ratio": 0.0,
|
| 15225 |
+
"completions/max_length": 19.6,
|
| 15226 |
+
"completions/max_terminated_length": 19.6,
|
| 15227 |
+
"completions/mean_length": 18.0,
|
| 15228 |
+
"completions/mean_terminated_length": 18.0,
|
| 15229 |
+
"completions/min_length": 16.1,
|
| 15230 |
+
"completions/min_terminated_length": 16.1,
|
| 15231 |
+
"epoch": 0.4029152915291529,
|
| 15232 |
+
"frac_reward_zero_std": 1.0,
|
| 15233 |
+
"grad_norm": 0.0,
|
| 15234 |
+
"kl": 1.3025204107165336,
|
| 15235 |
+
"learning_rate": 3.7286340166437907e-06,
|
| 15236 |
+
"loss": 0.0,
|
| 15237 |
+
"num_tokens": 8402069.0,
|
| 15238 |
+
"reward": 4.099999904632568,
|
| 15239 |
+
"reward_std": 0.0,
|
| 15240 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 15241 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 15242 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 15243 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 15244 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 15245 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 15246 |
+
"step": 5860
|
| 15247 |
+
},
|
| 15248 |
+
{
|
| 15249 |
+
"completion_length": 20.2,
|
| 15250 |
+
"completions/clipped_ratio": 0.0,
|
| 15251 |
+
"completions/max_length": 20.2,
|
| 15252 |
+
"completions/max_terminated_length": 20.2,
|
| 15253 |
+
"completions/mean_length": 17.9,
|
| 15254 |
+
"completions/mean_terminated_length": 17.9,
|
| 15255 |
+
"completions/min_length": 16.3,
|
| 15256 |
+
"completions/min_terminated_length": 16.3,
|
| 15257 |
+
"epoch": 0.4036028602860286,
|
| 15258 |
+
"frac_reward_zero_std": 1.0,
|
| 15259 |
+
"grad_norm": 0.0,
|
| 15260 |
+
"kl": 1.1302866250276566,
|
| 15261 |
+
"learning_rate": 3.723404673133674e-06,
|
| 15262 |
+
"loss": 0.0,
|
| 15263 |
+
"num_tokens": 8416929.0,
|
| 15264 |
+
"reward": 4.099999904632568,
|
| 15265 |
+
"reward_std": 0.0,
|
| 15266 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 15267 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 15268 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 15269 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 15270 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 15271 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 15272 |
+
"step": 5870
|
| 15273 |
+
},
|
| 15274 |
+
{
|
| 15275 |
+
"completion_length": 19.3,
|
| 15276 |
+
"completions/clipped_ratio": 0.0,
|
| 15277 |
+
"completions/max_length": 19.3,
|
| 15278 |
+
"completions/max_terminated_length": 19.3,
|
| 15279 |
+
"completions/mean_length": 17.15,
|
| 15280 |
+
"completions/mean_terminated_length": 17.15,
|
| 15281 |
+
"completions/min_length": 16.0,
|
| 15282 |
+
"completions/min_terminated_length": 16.0,
|
| 15283 |
+
"epoch": 0.4042904290429043,
|
| 15284 |
+
"frac_reward_zero_std": 1.0,
|
| 15285 |
+
"grad_norm": 0.0,
|
| 15286 |
+
"kl": 1.0556719139218331,
|
| 15287 |
+
"learning_rate": 3.7181682817721915e-06,
|
| 15288 |
+
"loss": 0.0,
|
| 15289 |
+
"num_tokens": 8433219.0,
|
| 15290 |
+
"reward": 4.099999904632568,
|
| 15291 |
+
"reward_std": 0.0,
|
| 15292 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 15293 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 15294 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 15295 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 15296 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 15297 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 15298 |
+
"step": 5880
|
| 15299 |
+
},
|
| 15300 |
+
{
|
| 15301 |
+
"completion_length": 20.4,
|
| 15302 |
+
"completions/clipped_ratio": 0.0,
|
| 15303 |
+
"completions/max_length": 20.4,
|
| 15304 |
+
"completions/max_terminated_length": 20.4,
|
| 15305 |
+
"completions/mean_length": 17.95,
|
| 15306 |
+
"completions/mean_terminated_length": 17.95,
|
| 15307 |
+
"completions/min_length": 15.5,
|
| 15308 |
+
"completions/min_terminated_length": 15.5,
|
| 15309 |
+
"epoch": 0.40497799779978,
|
| 15310 |
+
"frac_reward_zero_std": 1.0,
|
| 15311 |
+
"grad_norm": 0.0,
|
| 15312 |
+
"kl": 1.1209779269993305,
|
| 15313 |
+
"learning_rate": 3.712924872725411e-06,
|
| 15314 |
+
"loss": 0.0,
|
| 15315 |
+
"num_tokens": 8448301.0,
|
| 15316 |
+
"reward": 4.099999904632568,
|
| 15317 |
+
"reward_std": 0.0,
|
| 15318 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 15319 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 15320 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 15321 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 15322 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 15323 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 15324 |
+
"step": 5890
|
| 15325 |
+
},
|
| 15326 |
+
{
|
| 15327 |
+
"completion_length": 19.9,
|
| 15328 |
+
"completions/clipped_ratio": 0.0,
|
| 15329 |
+
"completions/max_length": 19.9,
|
| 15330 |
+
"completions/max_terminated_length": 19.9,
|
| 15331 |
+
"completions/mean_length": 17.45,
|
| 15332 |
+
"completions/mean_terminated_length": 17.45,
|
| 15333 |
+
"completions/min_length": 15.2,
|
| 15334 |
+
"completions/min_terminated_length": 15.2,
|
| 15335 |
+
"epoch": 0.40566556655665564,
|
| 15336 |
+
"frac_reward_zero_std": 1.0,
|
| 15337 |
+
"grad_norm": 0.0,
|
| 15338 |
+
"kl": 1.1055759094655513,
|
| 15339 |
+
"learning_rate": 3.7076744761998268e-06,
|
| 15340 |
+
"loss": 0.0,
|
| 15341 |
+
"num_tokens": 8461651.0,
|
| 15342 |
+
"reward": 4.099999904632568,
|
| 15343 |
+
"reward_std": 0.0,
|
| 15344 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 15345 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 15346 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 15347 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 15348 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 15349 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 15350 |
+
"step": 5900
|
| 15351 |
+
},
|
| 15352 |
+
{
|
| 15353 |
+
"completion_length": 18.3,
|
| 15354 |
+
"completions/clipped_ratio": 0.0,
|
| 15355 |
+
"completions/max_length": 18.3,
|
| 15356 |
+
"completions/max_terminated_length": 18.3,
|
| 15357 |
+
"completions/mean_length": 16.625,
|
| 15358 |
+
"completions/mean_terminated_length": 16.625,
|
| 15359 |
+
"completions/min_length": 15.4,
|
| 15360 |
+
"completions/min_terminated_length": 15.4,
|
| 15361 |
+
"epoch": 0.40635313531353134,
|
| 15362 |
+
"frac_reward_zero_std": 1.0,
|
| 15363 |
+
"grad_norm": 0.0,
|
| 15364 |
+
"kl": 1.0736303746700286,
|
| 15365 |
+
"learning_rate": 3.7024171224421884e-06,
|
| 15366 |
+
"loss": 0.0,
|
| 15367 |
+
"num_tokens": 8475424.0,
|
| 15368 |
+
"reward": 4.099999904632568,
|
| 15369 |
+
"reward_std": 0.0,
|
| 15370 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 15371 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 15372 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 15373 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 15374 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 15375 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 15376 |
+
"step": 5910
|
| 15377 |
+
},
|
| 15378 |
+
{
|
| 15379 |
+
"completion_length": 17.2,
|
| 15380 |
+
"completions/clipped_ratio": 0.0,
|
| 15381 |
+
"completions/max_length": 17.2,
|
| 15382 |
+
"completions/max_terminated_length": 17.2,
|
| 15383 |
+
"completions/mean_length": 16.425,
|
| 15384 |
+
"completions/mean_terminated_length": 16.425,
|
| 15385 |
+
"completions/min_length": 15.7,
|
| 15386 |
+
"completions/min_terminated_length": 15.7,
|
| 15387 |
+
"epoch": 0.40704070407040704,
|
| 15388 |
+
"frac_reward_zero_std": 1.0,
|
| 15389 |
+
"grad_norm": 0.0,
|
| 15390 |
+
"kl": 1.2711664289236069,
|
| 15391 |
+
"learning_rate": 3.6971528417393254e-06,
|
| 15392 |
+
"loss": 0.0,
|
| 15393 |
+
"num_tokens": 8490933.0,
|
| 15394 |
+
"reward": 4.099999904632568,
|
| 15395 |
+
"reward_std": 0.0,
|
| 15396 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 15397 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 15398 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 15399 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 15400 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 15401 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 15402 |
+
"step": 5920
|
| 15403 |
+
},
|
| 15404 |
+
{
|
| 15405 |
+
"completion_length": 21.2,
|
| 15406 |
+
"completions/clipped_ratio": 0.0,
|
| 15407 |
+
"completions/max_length": 21.2,
|
| 15408 |
+
"completions/max_terminated_length": 21.2,
|
| 15409 |
+
"completions/mean_length": 18.575,
|
| 15410 |
+
"completions/mean_terminated_length": 18.575,
|
| 15411 |
+
"completions/min_length": 15.8,
|
| 15412 |
+
"completions/min_terminated_length": 15.8,
|
| 15413 |
+
"epoch": 0.40772827282728275,
|
| 15414 |
+
"frac_reward_zero_std": 1.0,
|
| 15415 |
+
"grad_norm": 0.0,
|
| 15416 |
+
"kl": 0.9545292537659407,
|
| 15417 |
+
"learning_rate": 3.6918816644179707e-06,
|
| 15418 |
+
"loss": 0.0,
|
| 15419 |
+
"num_tokens": 8504496.0,
|
| 15420 |
+
"reward": 4.099999904632568,
|
| 15421 |
+
"reward_std": 0.0,
|
| 15422 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 15423 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 15424 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 15425 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 15426 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 15427 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 15428 |
+
"step": 5930
|
| 15429 |
+
},
|
| 15430 |
+
{
|
| 15431 |
+
"completion_length": 17.7,
|
| 15432 |
+
"completions/clipped_ratio": 0.0,
|
| 15433 |
+
"completions/max_length": 17.7,
|
| 15434 |
+
"completions/max_terminated_length": 17.7,
|
| 15435 |
+
"completions/mean_length": 16.425,
|
| 15436 |
+
"completions/mean_terminated_length": 16.425,
|
| 15437 |
+
"completions/min_length": 14.7,
|
| 15438 |
+
"completions/min_terminated_length": 14.7,
|
| 15439 |
+
"epoch": 0.4084158415841584,
|
| 15440 |
+
"frac_reward_zero_std": 1.0,
|
| 15441 |
+
"grad_norm": 0.0,
|
| 15442 |
+
"kl": 1.1604718565940857,
|
| 15443 |
+
"learning_rate": 3.686603620844589e-06,
|
| 15444 |
+
"loss": 0.0,
|
| 15445 |
+
"num_tokens": 8517765.0,
|
| 15446 |
+
"reward": 4.099999904632568,
|
| 15447 |
+
"reward_std": 0.0,
|
| 15448 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 15449 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 15450 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 15451 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 15452 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 15453 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 15454 |
+
"step": 5940
|
| 15455 |
+
},
|
| 15456 |
+
{
|
| 15457 |
+
"completion_length": 17.5,
|
| 15458 |
+
"completions/clipped_ratio": 0.0,
|
| 15459 |
+
"completions/max_length": 17.5,
|
| 15460 |
+
"completions/max_terminated_length": 17.5,
|
| 15461 |
+
"completions/mean_length": 16.55,
|
| 15462 |
+
"completions/mean_terminated_length": 16.55,
|
| 15463 |
+
"completions/min_length": 15.5,
|
| 15464 |
+
"completions/min_terminated_length": 15.5,
|
| 15465 |
+
"epoch": 0.4091034103410341,
|
| 15466 |
+
"frac_reward_zero_std": 1.0,
|
| 15467 |
+
"grad_norm": 0.0,
|
| 15468 |
+
"kl": 1.387231619283557,
|
| 15469 |
+
"learning_rate": 3.6813187414252e-06,
|
| 15470 |
+
"loss": 0.0,
|
| 15471 |
+
"num_tokens": 8530935.0,
|
| 15472 |
+
"reward": 4.099999904632568,
|
| 15473 |
+
"reward_std": 0.0,
|
| 15474 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 15475 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 15476 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 15477 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 15478 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 15479 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 15480 |
+
"step": 5950
|
| 15481 |
+
},
|
| 15482 |
+
{
|
| 15483 |
+
"completion_length": 18.0,
|
| 15484 |
+
"completions/clipped_ratio": 0.0,
|
| 15485 |
+
"completions/max_length": 18.0,
|
| 15486 |
+
"completions/max_terminated_length": 18.0,
|
| 15487 |
+
"completions/mean_length": 16.7,
|
| 15488 |
+
"completions/mean_terminated_length": 16.7,
|
| 15489 |
+
"completions/min_length": 15.3,
|
| 15490 |
+
"completions/min_terminated_length": 15.3,
|
| 15491 |
+
"epoch": 0.4097909790979098,
|
| 15492 |
+
"frac_reward_zero_std": 1.0,
|
| 15493 |
+
"grad_norm": 0.0,
|
| 15494 |
+
"kl": 1.3967902317643166,
|
| 15495 |
+
"learning_rate": 3.6760270566052037e-06,
|
| 15496 |
+
"loss": 0.0,
|
| 15497 |
+
"num_tokens": 8544803.0,
|
| 15498 |
+
"reward": 4.099999904632568,
|
| 15499 |
+
"reward_std": 0.0,
|
| 15500 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 15501 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 15502 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 15503 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 15504 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 15505 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 15506 |
+
"step": 5960
|
| 15507 |
+
},
|
| 15508 |
+
{
|
| 15509 |
+
"completion_length": 17.8,
|
| 15510 |
+
"completions/clipped_ratio": 0.0,
|
| 15511 |
+
"completions/max_length": 17.8,
|
| 15512 |
+
"completions/max_terminated_length": 17.8,
|
| 15513 |
+
"completions/mean_length": 16.275,
|
| 15514 |
+
"completions/mean_terminated_length": 16.275,
|
| 15515 |
+
"completions/min_length": 14.7,
|
| 15516 |
+
"completions/min_terminated_length": 14.7,
|
| 15517 |
+
"epoch": 0.4104785478547855,
|
| 15518 |
+
"frac_reward_zero_std": 1.0,
|
| 15519 |
+
"grad_norm": 0.0,
|
| 15520 |
+
"kl": 1.1969308275729418,
|
| 15521 |
+
"learning_rate": 3.670728596869205e-06,
|
| 15522 |
+
"loss": 0.0,
|
| 15523 |
+
"num_tokens": 8558642.0,
|
| 15524 |
+
"reward": 4.099999904632568,
|
| 15525 |
+
"reward_std": 0.0,
|
| 15526 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 15527 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 15528 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 15529 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 15530 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 15531 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 15532 |
+
"step": 5970
|
| 15533 |
+
},
|
| 15534 |
+
{
|
| 15535 |
+
"completion_length": 17.8,
|
| 15536 |
+
"completions/clipped_ratio": 0.0,
|
| 15537 |
+
"completions/max_length": 17.8,
|
| 15538 |
+
"completions/max_terminated_length": 17.8,
|
| 15539 |
+
"completions/mean_length": 15.625,
|
| 15540 |
+
"completions/mean_terminated_length": 15.625,
|
| 15541 |
+
"completions/min_length": 14.0,
|
| 15542 |
+
"completions/min_terminated_length": 14.0,
|
| 15543 |
+
"epoch": 0.4111661166116612,
|
| 15544 |
+
"frac_reward_zero_std": 1.0,
|
| 15545 |
+
"grad_norm": 0.0,
|
| 15546 |
+
"kl": 1.0570856800302864,
|
| 15547 |
+
"learning_rate": 3.6654233927408377e-06,
|
| 15548 |
+
"loss": 0.0,
|
| 15549 |
+
"num_tokens": 8572351.0,
|
| 15550 |
+
"reward": 4.099999904632568,
|
| 15551 |
+
"reward_std": 0.0,
|
| 15552 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 15553 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 15554 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 15555 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 15556 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 15557 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 15558 |
+
"step": 5980
|
| 15559 |
+
},
|
| 15560 |
+
{
|
| 15561 |
+
"completion_length": 19.0,
|
| 15562 |
+
"completions/clipped_ratio": 0.0,
|
| 15563 |
+
"completions/max_length": 19.0,
|
| 15564 |
+
"completions/max_terminated_length": 19.0,
|
| 15565 |
+
"completions/mean_length": 17.35,
|
| 15566 |
+
"completions/mean_terminated_length": 17.35,
|
| 15567 |
+
"completions/min_length": 16.4,
|
| 15568 |
+
"completions/min_terminated_length": 16.4,
|
| 15569 |
+
"epoch": 0.41185368536853684,
|
| 15570 |
+
"frac_reward_zero_std": 1.0,
|
| 15571 |
+
"grad_norm": 0.0,
|
| 15572 |
+
"kl": 1.3443511426448822,
|
| 15573 |
+
"learning_rate": 3.66011147478259e-06,
|
| 15574 |
+
"loss": 0.0,
|
| 15575 |
+
"num_tokens": 8588401.0,
|
| 15576 |
+
"reward": 4.099999904632568,
|
| 15577 |
+
"reward_std": 0.0,
|
| 15578 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 15579 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 15580 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 15581 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 15582 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 15583 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 15584 |
+
"step": 5990
|
| 15585 |
+
},
|
| 15586 |
+
{
|
| 15587 |
+
"completion_length": 17.9,
|
| 15588 |
+
"completions/clipped_ratio": 0.0,
|
| 15589 |
+
"completions/max_length": 17.9,
|
| 15590 |
+
"completions/max_terminated_length": 17.9,
|
| 15591 |
+
"completions/mean_length": 16.775,
|
| 15592 |
+
"completions/mean_terminated_length": 16.775,
|
| 15593 |
+
"completions/min_length": 16.4,
|
| 15594 |
+
"completions/min_terminated_length": 16.4,
|
| 15595 |
+
"epoch": 0.41254125412541254,
|
| 15596 |
+
"frac_reward_zero_std": 1.0,
|
| 15597 |
+
"grad_norm": 0.0,
|
| 15598 |
+
"kl": 1.286434081196785,
|
| 15599 |
+
"learning_rate": 3.654792873595627e-06,
|
| 15600 |
+
"loss": 0.0,
|
| 15601 |
+
"num_tokens": 8604144.0,
|
| 15602 |
+
"reward": 4.099999904632568,
|
| 15603 |
+
"reward_std": 0.0,
|
| 15604 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 15605 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 15606 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 15607 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 15608 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 15609 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 15610 |
+
"step": 6000
|
| 15611 |
}
|
| 15612 |
],
|
| 15613 |
"logging_steps": 10,
|
| 15614 |
"max_steps": 14544,
|
| 15615 |
+
"num_input_tokens_seen": 8604144,
|
| 15616 |
"num_train_epochs": 1,
|
| 15617 |
"save_steps": 50,
|
| 15618 |
"stateful_callbacks": {
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 7057
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b4f3df0c21647ebac4dcd78266f6f25b764a8202748a0b7c0402d7405dc13124
|
| 3 |
size 7057
|