Upload ray_tune_logs/params.json with huggingface_hub
Browse files
ray_tune_logs/params.json
CHANGED
|
@@ -22,7 +22,7 @@
|
|
| 22 |
"grad_norm": 0.5,
|
| 23 |
"gradient_accumulation_steps": 1,
|
| 24 |
"gradient_checkpoint": false,
|
| 25 |
-
"group_relative_norm":
|
| 26 |
"inference_config": {
|
| 27 |
"do_sample": true,
|
| 28 |
"max_new_tokens": 250,
|
|
@@ -63,14 +63,14 @@
|
|
| 63 |
"rl_algo": "off_policy",
|
| 64 |
"rl_w": 1.0,
|
| 65 |
"rollout_config": {
|
| 66 |
-
"accuracy_w": 0.
|
| 67 |
-
"accuracy_w2":
|
| 68 |
"len_pen": 1.0,
|
| 69 |
"len_pen2": 1.0,
|
| 70 |
-
"n_gen1":
|
| 71 |
-
"n_gen2":
|
| 72 |
"similarity_fn": "rouge",
|
| 73 |
-
"threshold": 0.
|
| 74 |
},
|
| 75 |
"rollout_game": "baseline3v2",
|
| 76 |
"sample_config": {
|
|
|
|
| 22 |
"grad_norm": 0.5,
|
| 23 |
"gradient_accumulation_steps": 1,
|
| 24 |
"gradient_checkpoint": false,
|
| 25 |
+
"group_relative_norm": true,
|
| 26 |
"inference_config": {
|
| 27 |
"do_sample": true,
|
| 28 |
"max_new_tokens": 250,
|
|
|
|
| 63 |
"rl_algo": "off_policy",
|
| 64 |
"rl_w": 1.0,
|
| 65 |
"rollout_config": {
|
| 66 |
+
"accuracy_w": 0.49501595537331966,
|
| 67 |
+
"accuracy_w2": 3.833332156156665,
|
| 68 |
"len_pen": 1.0,
|
| 69 |
"len_pen2": 1.0,
|
| 70 |
+
"n_gen1": 2,
|
| 71 |
+
"n_gen2": 2,
|
| 72 |
"similarity_fn": "rouge",
|
| 73 |
+
"threshold": 0.007501954443620123
|
| 74 |
},
|
| 75 |
"rollout_game": "baseline3v2",
|
| 76 |
"sample_config": {
|