Upload ray_tune_logs/params.json with huggingface_hub
Browse files
ray_tune_logs/params.json
CHANGED
|
@@ -22,7 +22,7 @@
|
|
| 22 |
"grad_norm": 0.5,
|
| 23 |
"gradient_accumulation_steps": 1,
|
| 24 |
"gradient_checkpoint": false,
|
| 25 |
-
"group_relative_norm":
|
| 26 |
"inference_config": {
|
| 27 |
"do_sample": true,
|
| 28 |
"max_new_tokens": 250,
|
|
@@ -60,11 +60,11 @@
|
|
| 60 |
"push_to_hub": null,
|
| 61 |
"ref_role1_name_or_path": "gpt2",
|
| 62 |
"ref_role2_name_or_path": "gpt2",
|
| 63 |
-
"rl_algo": "
|
| 64 |
"rl_w": 1.0,
|
| 65 |
"rollout_config": {
|
| 66 |
-
"accuracy_w": 0.
|
| 67 |
-
"accuracy_w2":
|
| 68 |
"len_pen": 1.0,
|
| 69 |
"len_pen2": 1.0,
|
| 70 |
"max_ctx_len": 1000,
|
|
@@ -83,7 +83,7 @@
|
|
| 83 |
"temperature": 1.0
|
| 84 |
},
|
| 85 |
"similarity_fn": "rouge",
|
| 86 |
-
"threshold": 0.
|
| 87 |
},
|
| 88 |
"rollout_game": "baseline3v2",
|
| 89 |
"sample_config": {
|
|
|
|
| 22 |
"grad_norm": 0.5,
|
| 23 |
"gradient_accumulation_steps": 1,
|
| 24 |
"gradient_checkpoint": false,
|
| 25 |
+
"group_relative_norm": false,
|
| 26 |
"inference_config": {
|
| 27 |
"do_sample": true,
|
| 28 |
"max_new_tokens": 250,
|
|
|
|
| 60 |
"push_to_hub": null,
|
| 61 |
"ref_role1_name_or_path": "gpt2",
|
| 62 |
"ref_role2_name_or_path": "gpt2",
|
| 63 |
+
"rl_algo": "on_policy",
|
| 64 |
"rl_w": 1.0,
|
| 65 |
"rollout_config": {
|
| 66 |
+
"accuracy_w": 0.5456347144827906,
|
| 67 |
+
"accuracy_w2": 2.5739712587324886,
|
| 68 |
"len_pen": 1.0,
|
| 69 |
"len_pen2": 1.0,
|
| 70 |
"max_ctx_len": 1000,
|
|
|
|
| 83 |
"temperature": 1.0
|
| 84 |
},
|
| 85 |
"similarity_fn": "rouge",
|
| 86 |
+
"threshold": 0.0010903843281968525
|
| 87 |
},
|
| 88 |
"rollout_game": "baseline3v2",
|
| 89 |
"sample_config": {
|