Upload folder using huggingface_hub
Browse files- adapter_config.json +4 -4
- adapter_model.safetensors +1 -1
- optimizer.pt +1 -1
- rng_state.pth +1 -1
- scheduler.pt +1 -1
- trainer_state.json +523 -3
- training_args.bin +1 -1
adapter_config.json
CHANGED
|
@@ -29,13 +29,13 @@
|
|
| 29 |
"rank_pattern": {},
|
| 30 |
"revision": null,
|
| 31 |
"target_modules": [
|
| 32 |
-
"q_proj",
|
| 33 |
-
"o_proj",
|
| 34 |
"up_proj",
|
| 35 |
-
"v_proj",
|
| 36 |
"k_proj",
|
|
|
|
| 37 |
"gate_proj",
|
| 38 |
-
"
|
|
|
|
|
|
|
| 39 |
],
|
| 40 |
"task_type": "CAUSAL_LM",
|
| 41 |
"trainable_token_indices": null,
|
|
|
|
| 29 |
"rank_pattern": {},
|
| 30 |
"revision": null,
|
| 31 |
"target_modules": [
|
|
|
|
|
|
|
| 32 |
"up_proj",
|
|
|
|
| 33 |
"k_proj",
|
| 34 |
+
"down_proj",
|
| 35 |
"gate_proj",
|
| 36 |
+
"o_proj",
|
| 37 |
+
"v_proj",
|
| 38 |
+
"q_proj"
|
| 39 |
],
|
| 40 |
"task_type": "CAUSAL_LM",
|
| 41 |
"trainable_token_indices": null,
|
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 262406656
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:34ed7c8122b399a7429458c9bec480cf7e21349f800561dd00ce127f72784813
|
| 3 |
size 262406656
|
optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 122872331
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:43e14fe59460f9351b2c2c7e068f3daa559d773b943562f9712360bb2d6fcd69
|
| 3 |
size 122872331
|
rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:715fa110eff443e5f62845343f8d9c47ba7633ab807289e3e2a72139c5a3dcb5
|
| 3 |
size 14645
|
scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bc8093c5d8958faf7374afb5677c510aef66de9b3b9dfb56763d6e9c1ab23447
|
| 3 |
size 1465
|
trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -35888,11 +35888,531 @@
|
|
| 35888 |
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 35889 |
"rewards/quality_reward_func/std": 0.0,
|
| 35890 |
"step": 13800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35891 |
}
|
| 35892 |
],
|
| 35893 |
"logging_steps": 10,
|
| 35894 |
"max_steps": 14544,
|
| 35895 |
-
"num_input_tokens_seen":
|
| 35896 |
"num_train_epochs": 1,
|
| 35897 |
"save_steps": 50,
|
| 35898 |
"stateful_callbacks": {
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.9625962596259626,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 14000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 35888 |
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 35889 |
"rewards/quality_reward_func/std": 0.0,
|
| 35890 |
"step": 13800
|
| 35891 |
+
},
|
| 35892 |
+
{
|
| 35893 |
+
"completion_length": 18.7,
|
| 35894 |
+
"completions/clipped_ratio": 0.0,
|
| 35895 |
+
"completions/max_length": 18.7,
|
| 35896 |
+
"completions/max_terminated_length": 18.7,
|
| 35897 |
+
"completions/mean_length": 16.725,
|
| 35898 |
+
"completions/mean_terminated_length": 16.725,
|
| 35899 |
+
"completions/min_length": 15.3,
|
| 35900 |
+
"completions/min_terminated_length": 15.3,
|
| 35901 |
+
"epoch": 0.9495324532453245,
|
| 35902 |
+
"frac_reward_zero_std": 1.0,
|
| 35903 |
+
"grad_norm": 0.0,
|
| 35904 |
+
"kl": 1.0350565232336522,
|
| 35905 |
+
"learning_rate": 3.8801175392468584e-08,
|
| 35906 |
+
"loss": 0.0,
|
| 35907 |
+
"num_tokens": 19854041.0,
|
| 35908 |
+
"reward": 4.099999904632568,
|
| 35909 |
+
"reward_std": 0.0,
|
| 35910 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 35911 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 35912 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 35913 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 35914 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 35915 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 35916 |
+
"step": 13810
|
| 35917 |
+
},
|
| 35918 |
+
{
|
| 35919 |
+
"completion_length": 18.1,
|
| 35920 |
+
"completions/clipped_ratio": 0.0,
|
| 35921 |
+
"completions/max_length": 18.1,
|
| 35922 |
+
"completions/max_terminated_length": 18.1,
|
| 35923 |
+
"completions/mean_length": 16.9,
|
| 35924 |
+
"completions/mean_terminated_length": 16.9,
|
| 35925 |
+
"completions/min_length": 15.7,
|
| 35926 |
+
"completions/min_terminated_length": 15.7,
|
| 35927 |
+
"epoch": 0.9502200220022002,
|
| 35928 |
+
"frac_reward_zero_std": 1.0,
|
| 35929 |
+
"grad_norm": 0.0,
|
| 35930 |
+
"kl": 1.2483361944556237,
|
| 35931 |
+
"learning_rate": 3.775519104418812e-08,
|
| 35932 |
+
"loss": 0.0,
|
| 35933 |
+
"num_tokens": 19869149.0,
|
| 35934 |
+
"reward": 4.099999904632568,
|
| 35935 |
+
"reward_std": 0.0,
|
| 35936 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 35937 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 35938 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 35939 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 35940 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 35941 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 35942 |
+
"step": 13820
|
| 35943 |
+
},
|
| 35944 |
+
{
|
| 35945 |
+
"completion_length": 18.0,
|
| 35946 |
+
"completions/clipped_ratio": 0.0,
|
| 35947 |
+
"completions/max_length": 18.0,
|
| 35948 |
+
"completions/max_terminated_length": 18.0,
|
| 35949 |
+
"completions/mean_length": 16.775,
|
| 35950 |
+
"completions/mean_terminated_length": 16.775,
|
| 35951 |
+
"completions/min_length": 15.7,
|
| 35952 |
+
"completions/min_terminated_length": 15.7,
|
| 35953 |
+
"epoch": 0.9509075907590759,
|
| 35954 |
+
"frac_reward_zero_std": 1.0,
|
| 35955 |
+
"grad_norm": 0.0,
|
| 35956 |
+
"kl": 1.2970769941806792,
|
| 35957 |
+
"learning_rate": 3.672339132003211e-08,
|
| 35958 |
+
"loss": 0.0,
|
| 35959 |
+
"num_tokens": 19886136.0,
|
| 35960 |
+
"reward": 4.099999904632568,
|
| 35961 |
+
"reward_std": 0.0,
|
| 35962 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 35963 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 35964 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 35965 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 35966 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 35967 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 35968 |
+
"step": 13830
|
| 35969 |
+
},
|
| 35970 |
+
{
|
| 35971 |
+
"completion_length": 20.5,
|
| 35972 |
+
"completions/clipped_ratio": 0.0,
|
| 35973 |
+
"completions/max_length": 20.5,
|
| 35974 |
+
"completions/max_terminated_length": 20.5,
|
| 35975 |
+
"completions/mean_length": 18.35,
|
| 35976 |
+
"completions/mean_terminated_length": 18.35,
|
| 35977 |
+
"completions/min_length": 16.3,
|
| 35978 |
+
"completions/min_terminated_length": 16.3,
|
| 35979 |
+
"epoch": 0.9515951595159516,
|
| 35980 |
+
"frac_reward_zero_std": 1.0,
|
| 35981 |
+
"grad_norm": 0.0,
|
| 35982 |
+
"kl": 1.3883480228483678,
|
| 35983 |
+
"learning_rate": 3.5705782164044135e-08,
|
| 35984 |
+
"loss": 0.0001,
|
| 35985 |
+
"num_tokens": 19897482.0,
|
| 35986 |
+
"reward": 4.099999904632568,
|
| 35987 |
+
"reward_std": 0.0,
|
| 35988 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 35989 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 35990 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 35991 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 35992 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 35993 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 35994 |
+
"step": 13840
|
| 35995 |
+
},
|
| 35996 |
+
{
|
| 35997 |
+
"completion_length": 15.6,
|
| 35998 |
+
"completions/clipped_ratio": 0.0,
|
| 35999 |
+
"completions/max_length": 15.6,
|
| 36000 |
+
"completions/max_terminated_length": 15.6,
|
| 36001 |
+
"completions/mean_length": 15.25,
|
| 36002 |
+
"completions/mean_terminated_length": 15.25,
|
| 36003 |
+
"completions/min_length": 14.9,
|
| 36004 |
+
"completions/min_terminated_length": 14.9,
|
| 36005 |
+
"epoch": 0.9522827282728272,
|
| 36006 |
+
"frac_reward_zero_std": 1.0,
|
| 36007 |
+
"grad_norm": 0.0,
|
| 36008 |
+
"kl": 0.9344463728368282,
|
| 36009 |
+
"learning_rate": 3.470236943851929e-08,
|
| 36010 |
+
"loss": 0.0,
|
| 36011 |
+
"num_tokens": 19910592.0,
|
| 36012 |
+
"reward": 4.099999904632568,
|
| 36013 |
+
"reward_std": 0.0,
|
| 36014 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 36015 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 36016 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 36017 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 36018 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 36019 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 36020 |
+
"step": 13850
|
| 36021 |
+
},
|
| 36022 |
+
{
|
| 36023 |
+
"completion_length": 18.2,
|
| 36024 |
+
"completions/clipped_ratio": 0.0,
|
| 36025 |
+
"completions/max_length": 18.2,
|
| 36026 |
+
"completions/max_terminated_length": 18.2,
|
| 36027 |
+
"completions/mean_length": 16.75,
|
| 36028 |
+
"completions/mean_terminated_length": 16.75,
|
| 36029 |
+
"completions/min_length": 15.6,
|
| 36030 |
+
"completions/min_terminated_length": 15.6,
|
| 36031 |
+
"epoch": 0.9529702970297029,
|
| 36032 |
+
"frac_reward_zero_std": 1.0,
|
| 36033 |
+
"grad_norm": 0.0,
|
| 36034 |
+
"kl": 1.0786833353340626,
|
| 36035 |
+
"learning_rate": 3.371315892396698e-08,
|
| 36036 |
+
"loss": 0.0,
|
| 36037 |
+
"num_tokens": 19924222.0,
|
| 36038 |
+
"reward": 4.099999904632568,
|
| 36039 |
+
"reward_std": 0.0,
|
| 36040 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 36041 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 36042 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 36043 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 36044 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 36045 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 36046 |
+
"step": 13860
|
| 36047 |
+
},
|
| 36048 |
+
{
|
| 36049 |
+
"completion_length": 20.6,
|
| 36050 |
+
"completions/clipped_ratio": 0.0,
|
| 36051 |
+
"completions/max_length": 20.6,
|
| 36052 |
+
"completions/max_terminated_length": 20.6,
|
| 36053 |
+
"completions/mean_length": 18.65,
|
| 36054 |
+
"completions/mean_terminated_length": 18.65,
|
| 36055 |
+
"completions/min_length": 16.7,
|
| 36056 |
+
"completions/min_terminated_length": 16.7,
|
| 36057 |
+
"epoch": 0.9536578657865786,
|
| 36058 |
+
"frac_reward_zero_std": 1.0,
|
| 36059 |
+
"grad_norm": 0.0,
|
| 36060 |
+
"kl": 1.420877918601036,
|
| 36061 |
+
"learning_rate": 3.2738156319082336e-08,
|
| 36062 |
+
"loss": 0.0001,
|
| 36063 |
+
"num_tokens": 19939312.0,
|
| 36064 |
+
"reward": 4.099999904632568,
|
| 36065 |
+
"reward_std": 0.0,
|
| 36066 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 36067 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 36068 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 36069 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 36070 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 36071 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 36072 |
+
"step": 13870
|
| 36073 |
+
},
|
| 36074 |
+
{
|
| 36075 |
+
"completion_length": 18.8,
|
| 36076 |
+
"completions/clipped_ratio": 0.0,
|
| 36077 |
+
"completions/max_length": 18.8,
|
| 36078 |
+
"completions/max_terminated_length": 18.8,
|
| 36079 |
+
"completions/mean_length": 17.35,
|
| 36080 |
+
"completions/mean_terminated_length": 17.35,
|
| 36081 |
+
"completions/min_length": 16.0,
|
| 36082 |
+
"completions/min_terminated_length": 16.0,
|
| 36083 |
+
"epoch": 0.9543454345434543,
|
| 36084 |
+
"frac_reward_zero_std": 1.0,
|
| 36085 |
+
"grad_norm": 0.0,
|
| 36086 |
+
"kl": 1.182000921666622,
|
| 36087 |
+
"learning_rate": 3.1777367240708455e-08,
|
| 36088 |
+
"loss": 0.0,
|
| 36089 |
+
"num_tokens": 19953590.0,
|
| 36090 |
+
"reward": 4.099999904632568,
|
| 36091 |
+
"reward_std": 0.0,
|
| 36092 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 36093 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 36094 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 36095 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 36096 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 36097 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 36098 |
+
"step": 13880
|
| 36099 |
+
},
|
| 36100 |
+
{
|
| 36101 |
+
"completion_length": 18.6,
|
| 36102 |
+
"completions/clipped_ratio": 0.0,
|
| 36103 |
+
"completions/max_length": 18.6,
|
| 36104 |
+
"completions/max_terminated_length": 18.6,
|
| 36105 |
+
"completions/mean_length": 17.4,
|
| 36106 |
+
"completions/mean_terminated_length": 17.4,
|
| 36107 |
+
"completions/min_length": 16.1,
|
| 36108 |
+
"completions/min_terminated_length": 16.1,
|
| 36109 |
+
"epoch": 0.95503300330033,
|
| 36110 |
+
"frac_reward_zero_std": 1.0,
|
| 36111 |
+
"grad_norm": 0.0,
|
| 36112 |
+
"kl": 1.1275596469640732,
|
| 36113 |
+
"learning_rate": 3.0830797223808106e-08,
|
| 36114 |
+
"loss": 0.0,
|
| 36115 |
+
"num_tokens": 19969106.0,
|
| 36116 |
+
"reward": 4.099999904632568,
|
| 36117 |
+
"reward_std": 0.0,
|
| 36118 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 36119 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 36120 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 36121 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 36122 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 36123 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 36124 |
+
"step": 13890
|
| 36125 |
+
},
|
| 36126 |
+
{
|
| 36127 |
+
"completion_length": 20.7,
|
| 36128 |
+
"completions/clipped_ratio": 0.0,
|
| 36129 |
+
"completions/max_length": 20.7,
|
| 36130 |
+
"completions/max_terminated_length": 20.7,
|
| 36131 |
+
"completions/mean_length": 18.1,
|
| 36132 |
+
"completions/mean_terminated_length": 18.1,
|
| 36133 |
+
"completions/min_length": 16.5,
|
| 36134 |
+
"completions/min_terminated_length": 16.5,
|
| 36135 |
+
"epoch": 0.9557205720572057,
|
| 36136 |
+
"frac_reward_zero_std": 1.0,
|
| 36137 |
+
"grad_norm": 0.0,
|
| 36138 |
+
"kl": 1.4078487813472749,
|
| 36139 |
+
"learning_rate": 2.989845172142958e-08,
|
| 36140 |
+
"loss": 0.0001,
|
| 36141 |
+
"num_tokens": 19985234.0,
|
| 36142 |
+
"reward": 4.099999904632568,
|
| 36143 |
+
"reward_std": 0.0,
|
| 36144 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 36145 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 36146 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 36147 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 36148 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 36149 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 36150 |
+
"step": 13900
|
| 36151 |
+
},
|
| 36152 |
+
{
|
| 36153 |
+
"completion_length": 17.5,
|
| 36154 |
+
"completions/clipped_ratio": 0.0,
|
| 36155 |
+
"completions/max_length": 17.5,
|
| 36156 |
+
"completions/max_terminated_length": 17.5,
|
| 36157 |
+
"completions/mean_length": 16.625,
|
| 36158 |
+
"completions/mean_terminated_length": 16.625,
|
| 36159 |
+
"completions/min_length": 16.1,
|
| 36160 |
+
"completions/min_terminated_length": 16.1,
|
| 36161 |
+
"epoch": 0.9564081408140814,
|
| 36162 |
+
"frac_reward_zero_std": 1.0,
|
| 36163 |
+
"grad_norm": 0.0,
|
| 36164 |
+
"kl": 0.7960809737443924,
|
| 36165 |
+
"learning_rate": 2.89803361046756e-08,
|
| 36166 |
+
"loss": 0.0,
|
| 36167 |
+
"num_tokens": 20002139.0,
|
| 36168 |
+
"reward": 4.099999904632568,
|
| 36169 |
+
"reward_std": 0.0,
|
| 36170 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 36171 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 36172 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 36173 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 36174 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 36175 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 36176 |
+
"step": 13910
|
| 36177 |
+
},
|
| 36178 |
+
{
|
| 36179 |
+
"completion_length": 18.8,
|
| 36180 |
+
"completions/clipped_ratio": 0.0,
|
| 36181 |
+
"completions/max_length": 18.8,
|
| 36182 |
+
"completions/max_terminated_length": 18.8,
|
| 36183 |
+
"completions/mean_length": 16.975,
|
| 36184 |
+
"completions/mean_terminated_length": 16.975,
|
| 36185 |
+
"completions/min_length": 15.8,
|
| 36186 |
+
"completions/min_terminated_length": 15.8,
|
| 36187 |
+
"epoch": 0.9570957095709571,
|
| 36188 |
+
"frac_reward_zero_std": 1.0,
|
| 36189 |
+
"grad_norm": 0.0,
|
| 36190 |
+
"kl": 0.9877739049494266,
|
| 36191 |
+
"learning_rate": 2.8076455662673363e-08,
|
| 36192 |
+
"loss": 0.0,
|
| 36193 |
+
"num_tokens": 20019710.0,
|
| 36194 |
+
"reward": 4.099999904632568,
|
| 36195 |
+
"reward_std": 0.0,
|
| 36196 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 36197 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 36198 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 36199 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 36200 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 36201 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 36202 |
+
"step": 13920
|
| 36203 |
+
},
|
| 36204 |
+
{
|
| 36205 |
+
"completion_length": 20.1,
|
| 36206 |
+
"completions/clipped_ratio": 0.0,
|
| 36207 |
+
"completions/max_length": 20.1,
|
| 36208 |
+
"completions/max_terminated_length": 20.1,
|
| 36209 |
+
"completions/mean_length": 18.55,
|
| 36210 |
+
"completions/mean_terminated_length": 18.55,
|
| 36211 |
+
"completions/min_length": 16.9,
|
| 36212 |
+
"completions/min_terminated_length": 16.9,
|
| 36213 |
+
"epoch": 0.9577832783278328,
|
| 36214 |
+
"frac_reward_zero_std": 1.0,
|
| 36215 |
+
"grad_norm": 0.0,
|
| 36216 |
+
"kl": 1.1007904268801212,
|
| 36217 |
+
"learning_rate": 2.7186815602542606e-08,
|
| 36218 |
+
"loss": 0.0,
|
| 36219 |
+
"num_tokens": 20035352.0,
|
| 36220 |
+
"reward": 4.099999904632568,
|
| 36221 |
+
"reward_std": 0.0,
|
| 36222 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 36223 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 36224 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 36225 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 36226 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 36227 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 36228 |
+
"step": 13930
|
| 36229 |
+
},
|
| 36230 |
+
{
|
| 36231 |
+
"completion_length": 18.4,
|
| 36232 |
+
"completions/clipped_ratio": 0.0,
|
| 36233 |
+
"completions/max_length": 18.4,
|
| 36234 |
+
"completions/max_terminated_length": 18.4,
|
| 36235 |
+
"completions/mean_length": 15.9,
|
| 36236 |
+
"completions/mean_terminated_length": 15.9,
|
| 36237 |
+
"completions/min_length": 14.8,
|
| 36238 |
+
"completions/min_terminated_length": 14.8,
|
| 36239 |
+
"epoch": 0.9584708470847084,
|
| 36240 |
+
"frac_reward_zero_std": 1.0,
|
| 36241 |
+
"grad_norm": 0.0,
|
| 36242 |
+
"kl": 1.0643165530636907,
|
| 36243 |
+
"learning_rate": 2.6311421049366736e-08,
|
| 36244 |
+
"loss": 0.0,
|
| 36245 |
+
"num_tokens": 20046580.0,
|
| 36246 |
+
"reward": 4.099999904632568,
|
| 36247 |
+
"reward_std": 0.0,
|
| 36248 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 36249 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 36250 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 36251 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 36252 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 36253 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 36254 |
+
"step": 13940
|
| 36255 |
+
},
|
| 36256 |
+
{
|
| 36257 |
+
"completion_length": 19.0,
|
| 36258 |
+
"completions/clipped_ratio": 0.0,
|
| 36259 |
+
"completions/max_length": 19.0,
|
| 36260 |
+
"completions/max_terminated_length": 19.0,
|
| 36261 |
+
"completions/mean_length": 16.9,
|
| 36262 |
+
"completions/mean_terminated_length": 16.9,
|
| 36263 |
+
"completions/min_length": 15.5,
|
| 36264 |
+
"completions/min_terminated_length": 15.5,
|
| 36265 |
+
"epoch": 0.9591584158415841,
|
| 36266 |
+
"frac_reward_zero_std": 1.0,
|
| 36267 |
+
"grad_norm": 0.0,
|
| 36268 |
+
"kl": 1.126106108725071,
|
| 36269 |
+
"learning_rate": 2.5450277046162874e-08,
|
| 36270 |
+
"loss": 0.0,
|
| 36271 |
+
"num_tokens": 20061356.0,
|
| 36272 |
+
"reward": 4.099999904632568,
|
| 36273 |
+
"reward_std": 0.0,
|
| 36274 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 36275 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 36276 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 36277 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 36278 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 36279 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 36280 |
+
"step": 13950
|
| 36281 |
+
},
|
| 36282 |
+
{
|
| 36283 |
+
"completion_length": 19.7,
|
| 36284 |
+
"completions/clipped_ratio": 0.0,
|
| 36285 |
+
"completions/max_length": 19.7,
|
| 36286 |
+
"completions/max_terminated_length": 19.7,
|
| 36287 |
+
"completions/mean_length": 18.05,
|
| 36288 |
+
"completions/mean_terminated_length": 18.05,
|
| 36289 |
+
"completions/min_length": 17.1,
|
| 36290 |
+
"completions/min_terminated_length": 17.1,
|
| 36291 |
+
"epoch": 0.9598459845984598,
|
| 36292 |
+
"frac_reward_zero_std": 1.0,
|
| 36293 |
+
"grad_norm": 0.0,
|
| 36294 |
+
"kl": 0.9622666202485561,
|
| 36295 |
+
"learning_rate": 2.460338855385297e-08,
|
| 36296 |
+
"loss": 0.0,
|
| 36297 |
+
"num_tokens": 20075018.0,
|
| 36298 |
+
"reward": 4.099999904632568,
|
| 36299 |
+
"reward_std": 0.0,
|
| 36300 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 36301 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 36302 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 36303 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 36304 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 36305 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 36306 |
+
"step": 13960
|
| 36307 |
+
},
|
| 36308 |
+
{
|
| 36309 |
+
"completion_length": 17.2,
|
| 36310 |
+
"completions/clipped_ratio": 0.0,
|
| 36311 |
+
"completions/max_length": 17.2,
|
| 36312 |
+
"completions/max_terminated_length": 17.2,
|
| 36313 |
+
"completions/mean_length": 15.55,
|
| 36314 |
+
"completions/mean_terminated_length": 15.55,
|
| 36315 |
+
"completions/min_length": 14.6,
|
| 36316 |
+
"completions/min_terminated_length": 14.6,
|
| 36317 |
+
"epoch": 0.9605335533553355,
|
| 36318 |
+
"frac_reward_zero_std": 1.0,
|
| 36319 |
+
"grad_norm": 0.0,
|
| 36320 |
+
"kl": 1.032901889272034,
|
| 36321 |
+
"learning_rate": 2.3770760451234665e-08,
|
| 36322 |
+
"loss": 0.0,
|
| 36323 |
+
"num_tokens": 20089012.0,
|
| 36324 |
+
"reward": 4.099999904632568,
|
| 36325 |
+
"reward_std": 0.0,
|
| 36326 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 36327 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 36328 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 36329 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 36330 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 36331 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 36332 |
+
"step": 13970
|
| 36333 |
+
},
|
| 36334 |
+
{
|
| 36335 |
+
"completion_length": 20.5,
|
| 36336 |
+
"completions/clipped_ratio": 0.0,
|
| 36337 |
+
"completions/max_length": 20.5,
|
| 36338 |
+
"completions/max_terminated_length": 20.5,
|
| 36339 |
+
"completions/mean_length": 18.0,
|
| 36340 |
+
"completions/mean_terminated_length": 18.0,
|
| 36341 |
+
"completions/min_length": 16.0,
|
| 36342 |
+
"completions/min_terminated_length": 16.0,
|
| 36343 |
+
"epoch": 0.9612211221122112,
|
| 36344 |
+
"frac_reward_zero_std": 1.0,
|
| 36345 |
+
"grad_norm": 0.0,
|
| 36346 |
+
"kl": 1.3917377760633827,
|
| 36347 |
+
"learning_rate": 2.2952397534954097e-08,
|
| 36348 |
+
"loss": 0.0001,
|
| 36349 |
+
"num_tokens": 20102060.0,
|
| 36350 |
+
"reward": 4.099999904632568,
|
| 36351 |
+
"reward_std": 0.0,
|
| 36352 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 36353 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 36354 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 36355 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 36356 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 36357 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 36358 |
+
"step": 13980
|
| 36359 |
+
},
|
| 36360 |
+
{
|
| 36361 |
+
"completion_length": 17.1,
|
| 36362 |
+
"completions/clipped_ratio": 0.0,
|
| 36363 |
+
"completions/max_length": 17.1,
|
| 36364 |
+
"completions/max_terminated_length": 17.1,
|
| 36365 |
+
"completions/mean_length": 15.55,
|
| 36366 |
+
"completions/mean_terminated_length": 15.55,
|
| 36367 |
+
"completions/min_length": 14.3,
|
| 36368 |
+
"completions/min_terminated_length": 14.3,
|
| 36369 |
+
"epoch": 0.9619086908690869,
|
| 36370 |
+
"frac_reward_zero_std": 1.0,
|
| 36371 |
+
"grad_norm": 0.0,
|
| 36372 |
+
"kl": 1.2739990446716547,
|
| 36373 |
+
"learning_rate": 2.214830451947786e-08,
|
| 36374 |
+
"loss": 0.0,
|
| 36375 |
+
"num_tokens": 20116294.0,
|
| 36376 |
+
"reward": 4.099999904632568,
|
| 36377 |
+
"reward_std": 0.0,
|
| 36378 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 36379 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 36380 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 36381 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 36382 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 36383 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 36384 |
+
"step": 13990
|
| 36385 |
+
},
|
| 36386 |
+
{
|
| 36387 |
+
"completion_length": 17.7,
|
| 36388 |
+
"completions/clipped_ratio": 0.0,
|
| 36389 |
+
"completions/max_length": 17.7,
|
| 36390 |
+
"completions/max_terminated_length": 17.7,
|
| 36391 |
+
"completions/mean_length": 16.875,
|
| 36392 |
+
"completions/mean_terminated_length": 16.875,
|
| 36393 |
+
"completions/min_length": 16.1,
|
| 36394 |
+
"completions/min_terminated_length": 16.1,
|
| 36395 |
+
"epoch": 0.9625962596259626,
|
| 36396 |
+
"frac_reward_zero_std": 1.0,
|
| 36397 |
+
"grad_norm": 0.0,
|
| 36398 |
+
"kl": 1.1548074826598167,
|
| 36399 |
+
"learning_rate": 2.1358486037065253e-08,
|
| 36400 |
+
"loss": 0.0,
|
| 36401 |
+
"num_tokens": 20130169.0,
|
| 36402 |
+
"reward": 4.099999904632568,
|
| 36403 |
+
"reward_std": 0.0,
|
| 36404 |
+
"rewards/coherence_reward_func/mean": 1.2999999523162842,
|
| 36405 |
+
"rewards/coherence_reward_func/std": 0.0,
|
| 36406 |
+
"rewards/formatting_reward_func/mean": 2.0,
|
| 36407 |
+
"rewards/formatting_reward_func/std": 0.0,
|
| 36408 |
+
"rewards/quality_reward_func/mean": 0.800000011920929,
|
| 36409 |
+
"rewards/quality_reward_func/std": 0.0,
|
| 36410 |
+
"step": 14000
|
| 36411 |
}
|
| 36412 |
],
|
| 36413 |
"logging_steps": 10,
|
| 36414 |
"max_steps": 14544,
|
| 36415 |
+
"num_input_tokens_seen": 20130169,
|
| 36416 |
"num_train_epochs": 1,
|
| 36417 |
"save_steps": 50,
|
| 36418 |
"stateful_callbacks": {
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 7057
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:834eb2c805e6cbc223d894fc25ac7ea495fdb7ae28416408c469f17593a544fc
|
| 3 |
size 7057
|