Upload task output 1
Browse files- README.md +1 -1
- adapter_config.json +6 -6
- adapter_model.safetensors +1 -1
- trainer_state.json +75 -75
- training_args.bin +1 -1
README.md
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
---
|
| 2 |
-
base_model:
|
| 3 |
library_name: peft
|
| 4 |
pipeline_tag: text-generation
|
| 5 |
tags:
|
|
|
|
| 1 |
---
|
| 2 |
+
base_model: openlm-research/open_llama_3b
|
| 3 |
library_name: peft
|
| 4 |
pipeline_tag: text-generation
|
| 5 |
tags:
|
adapter_config.json
CHANGED
|
@@ -3,7 +3,7 @@
|
|
| 3 |
"alpha_pattern": {},
|
| 4 |
"arrow_config": null,
|
| 5 |
"auto_mapping": null,
|
| 6 |
-
"base_model_name_or_path": "
|
| 7 |
"bias": "none",
|
| 8 |
"corda_config": null,
|
| 9 |
"ensure_weight_tying": false,
|
|
@@ -29,13 +29,13 @@
|
|
| 29 |
"rank_pattern": {},
|
| 30 |
"revision": null,
|
| 31 |
"target_modules": [
|
| 32 |
-
"
|
|
|
|
| 33 |
"down_proj",
|
|
|
|
|
|
|
| 34 |
"k_proj",
|
| 35 |
-
"gate_proj"
|
| 36 |
-
"up_proj",
|
| 37 |
-
"o_proj",
|
| 38 |
-
"v_proj"
|
| 39 |
],
|
| 40 |
"target_parameters": null,
|
| 41 |
"task_type": "CAUSAL_LM",
|
|
|
|
| 3 |
"alpha_pattern": {},
|
| 4 |
"arrow_config": null,
|
| 5 |
"auto_mapping": null,
|
| 6 |
+
"base_model_name_or_path": "openlm-research/open_llama_3b",
|
| 7 |
"bias": "none",
|
| 8 |
"corda_config": null,
|
| 9 |
"ensure_weight_tying": false,
|
|
|
|
| 29 |
"rank_pattern": {},
|
| 30 |
"revision": null,
|
| 31 |
"target_modules": [
|
| 32 |
+
"o_proj",
|
| 33 |
+
"up_proj",
|
| 34 |
"down_proj",
|
| 35 |
+
"q_proj",
|
| 36 |
+
"v_proj",
|
| 37 |
"k_proj",
|
| 38 |
+
"gate_proj"
|
|
|
|
|
|
|
|
|
|
| 39 |
],
|
| 40 |
"target_parameters": null,
|
| 41 |
"task_type": "CAUSAL_LM",
|
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 203456160
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:71c440c80ede39ed5f8bb23f30218abc37099b8e728db58e9de3c835e3af4eeb
|
| 3 |
size 203456160
|
trainer_state.json
CHANGED
|
@@ -35,7 +35,7 @@
|
|
| 35 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 36 |
"rewards/rollout_reward_func/std": 0.0,
|
| 37 |
"step": 1,
|
| 38 |
-
"step_time": 20.
|
| 39 |
},
|
| 40 |
{
|
| 41 |
"clip_ratio/high_max": 0.0,
|
|
@@ -63,7 +63,7 @@
|
|
| 63 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 64 |
"rewards/rollout_reward_func/std": 0.0,
|
| 65 |
"step": 2,
|
| 66 |
-
"step_time": 22.
|
| 67 |
},
|
| 68 |
{
|
| 69 |
"clip_ratio/high_max": 0.0,
|
|
@@ -91,7 +91,7 @@
|
|
| 91 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 92 |
"rewards/rollout_reward_func/std": 0.0,
|
| 93 |
"step": 3,
|
| 94 |
-
"step_time": 19.
|
| 95 |
},
|
| 96 |
{
|
| 97 |
"clip_ratio/high_max": 0.0,
|
|
@@ -119,7 +119,7 @@
|
|
| 119 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 120 |
"rewards/rollout_reward_func/std": 0.0,
|
| 121 |
"step": 4,
|
| 122 |
-
"step_time": 20.
|
| 123 |
},
|
| 124 |
{
|
| 125 |
"clip_ratio/high_max": 0.0,
|
|
@@ -147,7 +147,7 @@
|
|
| 147 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 148 |
"rewards/rollout_reward_func/std": 0.0,
|
| 149 |
"step": 5,
|
| 150 |
-
"step_time": 19.
|
| 151 |
},
|
| 152 |
{
|
| 153 |
"clip_ratio/high_max": 0.0,
|
|
@@ -175,7 +175,7 @@
|
|
| 175 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 176 |
"rewards/rollout_reward_func/std": 0.0,
|
| 177 |
"step": 6,
|
| 178 |
-
"step_time": 22.
|
| 179 |
},
|
| 180 |
{
|
| 181 |
"clip_ratio/high_max": 0.0,
|
|
@@ -203,7 +203,7 @@
|
|
| 203 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 204 |
"rewards/rollout_reward_func/std": 0.0,
|
| 205 |
"step": 7,
|
| 206 |
-
"step_time": 21.
|
| 207 |
},
|
| 208 |
{
|
| 209 |
"clip_ratio/high_max": 0.0,
|
|
@@ -231,7 +231,7 @@
|
|
| 231 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 232 |
"rewards/rollout_reward_func/std": 0.0,
|
| 233 |
"step": 8,
|
| 234 |
-
"step_time": 19.
|
| 235 |
},
|
| 236 |
{
|
| 237 |
"clip_ratio/high_max": 0.0,
|
|
@@ -259,7 +259,7 @@
|
|
| 259 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 260 |
"rewards/rollout_reward_func/std": 0.0,
|
| 261 |
"step": 9,
|
| 262 |
-
"step_time":
|
| 263 |
},
|
| 264 |
{
|
| 265 |
"clip_ratio/high_max": 0.0,
|
|
@@ -287,7 +287,7 @@
|
|
| 287 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 288 |
"rewards/rollout_reward_func/std": 0.0,
|
| 289 |
"step": 10,
|
| 290 |
-
"step_time":
|
| 291 |
},
|
| 292 |
{
|
| 293 |
"clip_ratio/high_max": 0.0,
|
|
@@ -315,7 +315,7 @@
|
|
| 315 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 316 |
"rewards/rollout_reward_func/std": 0.0,
|
| 317 |
"step": 11,
|
| 318 |
-
"step_time":
|
| 319 |
},
|
| 320 |
{
|
| 321 |
"clip_ratio/high_max": 0.0,
|
|
@@ -343,7 +343,7 @@
|
|
| 343 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 344 |
"rewards/rollout_reward_func/std": 0.0,
|
| 345 |
"step": 12,
|
| 346 |
-
"step_time": 21.
|
| 347 |
},
|
| 348 |
{
|
| 349 |
"clip_ratio/high_max": 0.0,
|
|
@@ -371,7 +371,7 @@
|
|
| 371 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 372 |
"rewards/rollout_reward_func/std": 0.0,
|
| 373 |
"step": 13,
|
| 374 |
-
"step_time":
|
| 375 |
},
|
| 376 |
{
|
| 377 |
"clip_ratio/high_max": 0.0,
|
|
@@ -399,7 +399,7 @@
|
|
| 399 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 400 |
"rewards/rollout_reward_func/std": 0.0,
|
| 401 |
"step": 14,
|
| 402 |
-
"step_time": 19.
|
| 403 |
},
|
| 404 |
{
|
| 405 |
"clip_ratio/high_max": 0.0,
|
|
@@ -427,7 +427,7 @@
|
|
| 427 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 428 |
"rewards/rollout_reward_func/std": 0.0,
|
| 429 |
"step": 15,
|
| 430 |
-
"step_time": 22.
|
| 431 |
},
|
| 432 |
{
|
| 433 |
"clip_ratio/high_max": 0.0,
|
|
@@ -455,7 +455,7 @@
|
|
| 455 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 456 |
"rewards/rollout_reward_func/std": 0.0,
|
| 457 |
"step": 16,
|
| 458 |
-
"step_time":
|
| 459 |
},
|
| 460 |
{
|
| 461 |
"clip_ratio/high_max": 0.0,
|
|
@@ -483,7 +483,7 @@
|
|
| 483 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 484 |
"rewards/rollout_reward_func/std": 0.0,
|
| 485 |
"step": 17,
|
| 486 |
-
"step_time": 19.
|
| 487 |
},
|
| 488 |
{
|
| 489 |
"clip_ratio/high_max": 0.0,
|
|
@@ -511,7 +511,7 @@
|
|
| 511 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 512 |
"rewards/rollout_reward_func/std": 0.0,
|
| 513 |
"step": 18,
|
| 514 |
-
"step_time": 22.
|
| 515 |
},
|
| 516 |
{
|
| 517 |
"clip_ratio/high_max": 0.0,
|
|
@@ -539,7 +539,7 @@
|
|
| 539 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 540 |
"rewards/rollout_reward_func/std": 0.0,
|
| 541 |
"step": 19,
|
| 542 |
-
"step_time": 22.
|
| 543 |
},
|
| 544 |
{
|
| 545 |
"clip_ratio/high_max": 0.0,
|
|
@@ -567,7 +567,7 @@
|
|
| 567 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 568 |
"rewards/rollout_reward_func/std": 0.0,
|
| 569 |
"step": 20,
|
| 570 |
-
"step_time": 22.
|
| 571 |
},
|
| 572 |
{
|
| 573 |
"clip_ratio/high_max": 0.0,
|
|
@@ -595,7 +595,7 @@
|
|
| 595 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 596 |
"rewards/rollout_reward_func/std": 0.0,
|
| 597 |
"step": 21,
|
| 598 |
-
"step_time": 19.
|
| 599 |
},
|
| 600 |
{
|
| 601 |
"clip_ratio/high_max": 0.0,
|
|
@@ -623,7 +623,7 @@
|
|
| 623 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 624 |
"rewards/rollout_reward_func/std": 0.0,
|
| 625 |
"step": 22,
|
| 626 |
-
"step_time":
|
| 627 |
},
|
| 628 |
{
|
| 629 |
"clip_ratio/high_max": 0.0,
|
|
@@ -651,7 +651,7 @@
|
|
| 651 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 652 |
"rewards/rollout_reward_func/std": 0.0,
|
| 653 |
"step": 23,
|
| 654 |
-
"step_time": 22.
|
| 655 |
},
|
| 656 |
{
|
| 657 |
"clip_ratio/high_max": 0.0,
|
|
@@ -679,7 +679,7 @@
|
|
| 679 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 680 |
"rewards/rollout_reward_func/std": 0.0,
|
| 681 |
"step": 24,
|
| 682 |
-
"step_time": 19.
|
| 683 |
},
|
| 684 |
{
|
| 685 |
"clip_ratio/high_max": 0.0,
|
|
@@ -707,7 +707,7 @@
|
|
| 707 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 708 |
"rewards/rollout_reward_func/std": 0.0,
|
| 709 |
"step": 25,
|
| 710 |
-
"step_time": 22.
|
| 711 |
},
|
| 712 |
{
|
| 713 |
"clip_ratio/high_max": 0.0,
|
|
@@ -735,7 +735,7 @@
|
|
| 735 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 736 |
"rewards/rollout_reward_func/std": 0.0,
|
| 737 |
"step": 26,
|
| 738 |
-
"step_time": 21.
|
| 739 |
},
|
| 740 |
{
|
| 741 |
"clip_ratio/high_max": 0.0,
|
|
@@ -763,7 +763,7 @@
|
|
| 763 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 764 |
"rewards/rollout_reward_func/std": 0.0,
|
| 765 |
"step": 27,
|
| 766 |
-
"step_time":
|
| 767 |
},
|
| 768 |
{
|
| 769 |
"clip_ratio/high_max": 0.0,
|
|
@@ -791,7 +791,7 @@
|
|
| 791 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 792 |
"rewards/rollout_reward_func/std": 0.0,
|
| 793 |
"step": 28,
|
| 794 |
-
"step_time": 22.
|
| 795 |
},
|
| 796 |
{
|
| 797 |
"clip_ratio/high_max": 0.0,
|
|
@@ -819,7 +819,7 @@
|
|
| 819 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 820 |
"rewards/rollout_reward_func/std": 0.0,
|
| 821 |
"step": 29,
|
| 822 |
-
"step_time": 19.
|
| 823 |
},
|
| 824 |
{
|
| 825 |
"clip_ratio/high_max": 0.0,
|
|
@@ -847,7 +847,7 @@
|
|
| 847 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 848 |
"rewards/rollout_reward_func/std": 0.0,
|
| 849 |
"step": 30,
|
| 850 |
-
"step_time": 19.
|
| 851 |
},
|
| 852 |
{
|
| 853 |
"clip_ratio/high_max": 0.0,
|
|
@@ -875,7 +875,7 @@
|
|
| 875 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 876 |
"rewards/rollout_reward_func/std": 0.0,
|
| 877 |
"step": 31,
|
| 878 |
-
"step_time": 19.
|
| 879 |
},
|
| 880 |
{
|
| 881 |
"clip_ratio/high_max": 0.0,
|
|
@@ -903,7 +903,7 @@
|
|
| 903 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 904 |
"rewards/rollout_reward_func/std": 0.0,
|
| 905 |
"step": 32,
|
| 906 |
-
"step_time":
|
| 907 |
},
|
| 908 |
{
|
| 909 |
"clip_ratio/high_max": 0.0,
|
|
@@ -931,7 +931,7 @@
|
|
| 931 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 932 |
"rewards/rollout_reward_func/std": 0.0,
|
| 933 |
"step": 33,
|
| 934 |
-
"step_time": 19.
|
| 935 |
},
|
| 936 |
{
|
| 937 |
"clip_ratio/high_max": 0.0,
|
|
@@ -959,7 +959,7 @@
|
|
| 959 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 960 |
"rewards/rollout_reward_func/std": 0.0,
|
| 961 |
"step": 34,
|
| 962 |
-
"step_time":
|
| 963 |
},
|
| 964 |
{
|
| 965 |
"clip_ratio/high_max": 0.0,
|
|
@@ -987,7 +987,7 @@
|
|
| 987 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 988 |
"rewards/rollout_reward_func/std": 0.0,
|
| 989 |
"step": 35,
|
| 990 |
-
"step_time": 21.
|
| 991 |
},
|
| 992 |
{
|
| 993 |
"clip_ratio/high_max": 0.0,
|
|
@@ -1015,7 +1015,7 @@
|
|
| 1015 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1016 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1017 |
"step": 36,
|
| 1018 |
-
"step_time": 20.
|
| 1019 |
},
|
| 1020 |
{
|
| 1021 |
"clip_ratio/high_max": 0.0,
|
|
@@ -1043,7 +1043,7 @@
|
|
| 1043 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1044 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1045 |
"step": 37,
|
| 1046 |
-
"step_time": 19.
|
| 1047 |
},
|
| 1048 |
{
|
| 1049 |
"clip_ratio/high_max": 0.0,
|
|
@@ -1071,7 +1071,7 @@
|
|
| 1071 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1072 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1073 |
"step": 38,
|
| 1074 |
-
"step_time": 19.
|
| 1075 |
},
|
| 1076 |
{
|
| 1077 |
"clip_ratio/high_max": 0.0,
|
|
@@ -1099,7 +1099,7 @@
|
|
| 1099 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1100 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1101 |
"step": 39,
|
| 1102 |
-
"step_time": 19.
|
| 1103 |
},
|
| 1104 |
{
|
| 1105 |
"clip_ratio/high_max": 0.0,
|
|
@@ -1127,7 +1127,7 @@
|
|
| 1127 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1128 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1129 |
"step": 40,
|
| 1130 |
-
"step_time": 19.
|
| 1131 |
},
|
| 1132 |
{
|
| 1133 |
"clip_ratio/high_max": 0.0,
|
|
@@ -1155,7 +1155,7 @@
|
|
| 1155 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1156 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1157 |
"step": 41,
|
| 1158 |
-
"step_time":
|
| 1159 |
},
|
| 1160 |
{
|
| 1161 |
"clip_ratio/high_max": 0.0,
|
|
@@ -1183,7 +1183,7 @@
|
|
| 1183 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1184 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1185 |
"step": 42,
|
| 1186 |
-
"step_time": 19.
|
| 1187 |
},
|
| 1188 |
{
|
| 1189 |
"clip_ratio/high_max": 0.0,
|
|
@@ -1211,7 +1211,7 @@
|
|
| 1211 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1212 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1213 |
"step": 43,
|
| 1214 |
-
"step_time": 19.
|
| 1215 |
},
|
| 1216 |
{
|
| 1217 |
"clip_ratio/high_max": 0.0,
|
|
@@ -1239,7 +1239,7 @@
|
|
| 1239 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1240 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1241 |
"step": 44,
|
| 1242 |
-
"step_time": 19.
|
| 1243 |
},
|
| 1244 |
{
|
| 1245 |
"clip_ratio/high_max": 0.0,
|
|
@@ -1267,7 +1267,7 @@
|
|
| 1267 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1268 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1269 |
"step": 45,
|
| 1270 |
-
"step_time": 22.
|
| 1271 |
},
|
| 1272 |
{
|
| 1273 |
"clip_ratio/high_max": 0.0,
|
|
@@ -1295,7 +1295,7 @@
|
|
| 1295 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1296 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1297 |
"step": 46,
|
| 1298 |
-
"step_time":
|
| 1299 |
},
|
| 1300 |
{
|
| 1301 |
"clip_ratio/high_max": 0.0,
|
|
@@ -1323,7 +1323,7 @@
|
|
| 1323 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1324 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1325 |
"step": 47,
|
| 1326 |
-
"step_time": 19.
|
| 1327 |
},
|
| 1328 |
{
|
| 1329 |
"clip_ratio/high_max": 0.0,
|
|
@@ -1351,7 +1351,7 @@
|
|
| 1351 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1352 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1353 |
"step": 48,
|
| 1354 |
-
"step_time": 22.
|
| 1355 |
},
|
| 1356 |
{
|
| 1357 |
"clip_ratio/high_max": 0.0,
|
|
@@ -1379,7 +1379,7 @@
|
|
| 1379 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1380 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1381 |
"step": 49,
|
| 1382 |
-
"step_time": 19.
|
| 1383 |
},
|
| 1384 |
{
|
| 1385 |
"clip_ratio/high_max": 0.0,
|
|
@@ -1407,7 +1407,7 @@
|
|
| 1407 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1408 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1409 |
"step": 50,
|
| 1410 |
-
"step_time": 22.
|
| 1411 |
},
|
| 1412 |
{
|
| 1413 |
"clip_ratio/high_max": 0.0,
|
|
@@ -1435,7 +1435,7 @@
|
|
| 1435 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1436 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1437 |
"step": 51,
|
| 1438 |
-
"step_time": 22.
|
| 1439 |
},
|
| 1440 |
{
|
| 1441 |
"clip_ratio/high_max": 0.0,
|
|
@@ -1463,7 +1463,7 @@
|
|
| 1463 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1464 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1465 |
"step": 52,
|
| 1466 |
-
"step_time": 19.
|
| 1467 |
},
|
| 1468 |
{
|
| 1469 |
"clip_ratio/high_max": 0.0,
|
|
@@ -1491,7 +1491,7 @@
|
|
| 1491 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1492 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1493 |
"step": 53,
|
| 1494 |
-
"step_time": 19.
|
| 1495 |
},
|
| 1496 |
{
|
| 1497 |
"clip_ratio/high_max": 0.0,
|
|
@@ -1519,7 +1519,7 @@
|
|
| 1519 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1520 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1521 |
"step": 54,
|
| 1522 |
-
"step_time": 19.
|
| 1523 |
},
|
| 1524 |
{
|
| 1525 |
"clip_ratio/high_max": 0.0,
|
|
@@ -1547,7 +1547,7 @@
|
|
| 1547 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1548 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1549 |
"step": 55,
|
| 1550 |
-
"step_time": 20.
|
| 1551 |
},
|
| 1552 |
{
|
| 1553 |
"clip_ratio/high_max": 0.0,
|
|
@@ -1575,7 +1575,7 @@
|
|
| 1575 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1576 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1577 |
"step": 56,
|
| 1578 |
-
"step_time": 22.
|
| 1579 |
},
|
| 1580 |
{
|
| 1581 |
"clip_ratio/high_max": 0.0,
|
|
@@ -1603,7 +1603,7 @@
|
|
| 1603 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1604 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1605 |
"step": 57,
|
| 1606 |
-
"step_time": 19.
|
| 1607 |
},
|
| 1608 |
{
|
| 1609 |
"clip_ratio/high_max": 0.0,
|
|
@@ -1631,7 +1631,7 @@
|
|
| 1631 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1632 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1633 |
"step": 58,
|
| 1634 |
-
"step_time":
|
| 1635 |
},
|
| 1636 |
{
|
| 1637 |
"clip_ratio/high_max": 0.0,
|
|
@@ -1659,7 +1659,7 @@
|
|
| 1659 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1660 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1661 |
"step": 59,
|
| 1662 |
-
"step_time": 22.
|
| 1663 |
},
|
| 1664 |
{
|
| 1665 |
"clip_ratio/high_max": 0.0,
|
|
@@ -1687,7 +1687,7 @@
|
|
| 1687 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1688 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1689 |
"step": 60,
|
| 1690 |
-
"step_time":
|
| 1691 |
},
|
| 1692 |
{
|
| 1693 |
"clip_ratio/high_max": 0.0,
|
|
@@ -1715,7 +1715,7 @@
|
|
| 1715 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1716 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1717 |
"step": 61,
|
| 1718 |
-
"step_time":
|
| 1719 |
},
|
| 1720 |
{
|
| 1721 |
"clip_ratio/high_max": 0.0,
|
|
@@ -1743,7 +1743,7 @@
|
|
| 1743 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1744 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1745 |
"step": 62,
|
| 1746 |
-
"step_time": 21.
|
| 1747 |
},
|
| 1748 |
{
|
| 1749 |
"clip_ratio/high_max": 0.0,
|
|
@@ -1771,7 +1771,7 @@
|
|
| 1771 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1772 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1773 |
"step": 63,
|
| 1774 |
-
"step_time": 22.
|
| 1775 |
},
|
| 1776 |
{
|
| 1777 |
"clip_ratio/high_max": 0.0,
|
|
@@ -1799,7 +1799,7 @@
|
|
| 1799 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1800 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1801 |
"step": 64,
|
| 1802 |
-
"step_time":
|
| 1803 |
},
|
| 1804 |
{
|
| 1805 |
"clip_ratio/high_max": 0.0,
|
|
@@ -1827,7 +1827,7 @@
|
|
| 1827 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1828 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1829 |
"step": 65,
|
| 1830 |
-
"step_time": 22.
|
| 1831 |
},
|
| 1832 |
{
|
| 1833 |
"clip_ratio/high_max": 0.0,
|
|
@@ -1855,7 +1855,7 @@
|
|
| 1855 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1856 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1857 |
"step": 66,
|
| 1858 |
-
"step_time": 19.
|
| 1859 |
},
|
| 1860 |
{
|
| 1861 |
"clip_ratio/high_max": 0.0,
|
|
@@ -1883,7 +1883,7 @@
|
|
| 1883 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1884 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1885 |
"step": 67,
|
| 1886 |
-
"step_time": 19.
|
| 1887 |
},
|
| 1888 |
{
|
| 1889 |
"clip_ratio/high_max": 0.0,
|
|
@@ -1911,7 +1911,7 @@
|
|
| 1911 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1912 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1913 |
"step": 68,
|
| 1914 |
-
"step_time":
|
| 1915 |
},
|
| 1916 |
{
|
| 1917 |
"clip_ratio/high_max": 0.0,
|
|
@@ -1939,7 +1939,7 @@
|
|
| 1939 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1940 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1941 |
"step": 69,
|
| 1942 |
-
"step_time": 23.
|
| 1943 |
},
|
| 1944 |
{
|
| 1945 |
"clip_ratio/high_max": 0.0,
|
|
@@ -1967,7 +1967,7 @@
|
|
| 1967 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1968 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1969 |
"step": 70,
|
| 1970 |
-
"step_time": 19.
|
| 1971 |
},
|
| 1972 |
{
|
| 1973 |
"clip_ratio/high_max": 0.0,
|
|
@@ -1995,7 +1995,7 @@
|
|
| 1995 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1996 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1997 |
"step": 71,
|
| 1998 |
-
"step_time": 19.
|
| 1999 |
},
|
| 2000 |
{
|
| 2001 |
"clip_ratio/high_max": 0.0,
|
|
@@ -2023,7 +2023,7 @@
|
|
| 2023 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 2024 |
"rewards/rollout_reward_func/std": 0.0,
|
| 2025 |
"step": 72,
|
| 2026 |
-
"step_time":
|
| 2027 |
},
|
| 2028 |
{
|
| 2029 |
"clip_ratio/high_max": 0.0,
|
|
@@ -2051,7 +2051,7 @@
|
|
| 2051 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 2052 |
"rewards/rollout_reward_func/std": 0.0,
|
| 2053 |
"step": 73,
|
| 2054 |
-
"step_time":
|
| 2055 |
},
|
| 2056 |
{
|
| 2057 |
"clip_ratio/high_max": 0.0,
|
|
@@ -2079,7 +2079,7 @@
|
|
| 2079 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 2080 |
"rewards/rollout_reward_func/std": 0.0,
|
| 2081 |
"step": 74,
|
| 2082 |
-
"step_time": 23.
|
| 2083 |
},
|
| 2084 |
{
|
| 2085 |
"clip_ratio/high_max": 0.0,
|
|
@@ -2107,7 +2107,7 @@
|
|
| 2107 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 2108 |
"rewards/rollout_reward_func/std": 0.0,
|
| 2109 |
"step": 75,
|
| 2110 |
-
"step_time": 19.
|
| 2111 |
}
|
| 2112 |
],
|
| 2113 |
"logging_steps": 1.0,
|
|
|
|
| 35 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 36 |
"rewards/rollout_reward_func/std": 0.0,
|
| 37 |
"step": 1,
|
| 38 |
+
"step_time": 20.038708471984137
|
| 39 |
},
|
| 40 |
{
|
| 41 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 63 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 64 |
"rewards/rollout_reward_func/std": 0.0,
|
| 65 |
"step": 2,
|
| 66 |
+
"step_time": 22.31387728100526
|
| 67 |
},
|
| 68 |
{
|
| 69 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 91 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 92 |
"rewards/rollout_reward_func/std": 0.0,
|
| 93 |
"step": 3,
|
| 94 |
+
"step_time": 19.508486614991853
|
| 95 |
},
|
| 96 |
{
|
| 97 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 119 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 120 |
"rewards/rollout_reward_func/std": 0.0,
|
| 121 |
"step": 4,
|
| 122 |
+
"step_time": 20.417726718005724
|
| 123 |
},
|
| 124 |
{
|
| 125 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 147 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 148 |
"rewards/rollout_reward_func/std": 0.0,
|
| 149 |
"step": 5,
|
| 150 |
+
"step_time": 19.458035143004963
|
| 151 |
},
|
| 152 |
{
|
| 153 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 175 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 176 |
"rewards/rollout_reward_func/std": 0.0,
|
| 177 |
"step": 6,
|
| 178 |
+
"step_time": 22.11808781498985
|
| 179 |
},
|
| 180 |
{
|
| 181 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 203 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 204 |
"rewards/rollout_reward_func/std": 0.0,
|
| 205 |
"step": 7,
|
| 206 |
+
"step_time": 21.699966289990698
|
| 207 |
},
|
| 208 |
{
|
| 209 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 231 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 232 |
"rewards/rollout_reward_func/std": 0.0,
|
| 233 |
"step": 8,
|
| 234 |
+
"step_time": 19.435475739024696
|
| 235 |
},
|
| 236 |
{
|
| 237 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 259 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 260 |
"rewards/rollout_reward_func/std": 0.0,
|
| 261 |
"step": 9,
|
| 262 |
+
"step_time": 23.149850735993823
|
| 263 |
},
|
| 264 |
{
|
| 265 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 287 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 288 |
"rewards/rollout_reward_func/std": 0.0,
|
| 289 |
"step": 10,
|
| 290 |
+
"step_time": 21.312995460008096
|
| 291 |
},
|
| 292 |
{
|
| 293 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 315 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 316 |
"rewards/rollout_reward_func/std": 0.0,
|
| 317 |
"step": 11,
|
| 318 |
+
"step_time": 18.84161558598862
|
| 319 |
},
|
| 320 |
{
|
| 321 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 343 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 344 |
"rewards/rollout_reward_func/std": 0.0,
|
| 345 |
"step": 12,
|
| 346 |
+
"step_time": 21.481500715010043
|
| 347 |
},
|
| 348 |
{
|
| 349 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 371 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 372 |
"rewards/rollout_reward_func/std": 0.0,
|
| 373 |
"step": 13,
|
| 374 |
+
"step_time": 22.648648835995118
|
| 375 |
},
|
| 376 |
{
|
| 377 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 399 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 400 |
"rewards/rollout_reward_func/std": 0.0,
|
| 401 |
"step": 14,
|
| 402 |
+
"step_time": 19.483697141011362
|
| 403 |
},
|
| 404 |
{
|
| 405 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 427 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 428 |
"rewards/rollout_reward_func/std": 0.0,
|
| 429 |
"step": 15,
|
| 430 |
+
"step_time": 22.048389301991847
|
| 431 |
},
|
| 432 |
{
|
| 433 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 455 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 456 |
"rewards/rollout_reward_func/std": 0.0,
|
| 457 |
"step": 16,
|
| 458 |
+
"step_time": 21.76690764699015
|
| 459 |
},
|
| 460 |
{
|
| 461 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 483 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 484 |
"rewards/rollout_reward_func/std": 0.0,
|
| 485 |
"step": 17,
|
| 486 |
+
"step_time": 19.46455569099635
|
| 487 |
},
|
| 488 |
{
|
| 489 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 511 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 512 |
"rewards/rollout_reward_func/std": 0.0,
|
| 513 |
"step": 18,
|
| 514 |
+
"step_time": 22.669331256991427
|
| 515 |
},
|
| 516 |
{
|
| 517 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 539 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 540 |
"rewards/rollout_reward_func/std": 0.0,
|
| 541 |
"step": 19,
|
| 542 |
+
"step_time": 22.320524194008613
|
| 543 |
},
|
| 544 |
{
|
| 545 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 567 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 568 |
"rewards/rollout_reward_func/std": 0.0,
|
| 569 |
"step": 20,
|
| 570 |
+
"step_time": 22.302162043000862
|
| 571 |
},
|
| 572 |
{
|
| 573 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 595 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 596 |
"rewards/rollout_reward_func/std": 0.0,
|
| 597 |
"step": 21,
|
| 598 |
+
"step_time": 19.36471923001227
|
| 599 |
},
|
| 600 |
{
|
| 601 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 623 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 624 |
"rewards/rollout_reward_func/std": 0.0,
|
| 625 |
"step": 22,
|
| 626 |
+
"step_time": 22.757351590000326
|
| 627 |
},
|
| 628 |
{
|
| 629 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 651 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 652 |
"rewards/rollout_reward_func/std": 0.0,
|
| 653 |
"step": 23,
|
| 654 |
+
"step_time": 22.50556095898355
|
| 655 |
},
|
| 656 |
{
|
| 657 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 679 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 680 |
"rewards/rollout_reward_func/std": 0.0,
|
| 681 |
"step": 24,
|
| 682 |
+
"step_time": 19.709908160984924
|
| 683 |
},
|
| 684 |
{
|
| 685 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 707 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 708 |
"rewards/rollout_reward_func/std": 0.0,
|
| 709 |
"step": 25,
|
| 710 |
+
"step_time": 22.27659140600008
|
| 711 |
},
|
| 712 |
{
|
| 713 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 735 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 736 |
"rewards/rollout_reward_func/std": 0.0,
|
| 737 |
"step": 26,
|
| 738 |
+
"step_time": 21.666986704993178
|
| 739 |
},
|
| 740 |
{
|
| 741 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 763 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 764 |
"rewards/rollout_reward_func/std": 0.0,
|
| 765 |
"step": 27,
|
| 766 |
+
"step_time": 19.754789013990376
|
| 767 |
},
|
| 768 |
{
|
| 769 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 791 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 792 |
"rewards/rollout_reward_func/std": 0.0,
|
| 793 |
"step": 28,
|
| 794 |
+
"step_time": 22.38940100500622
|
| 795 |
},
|
| 796 |
{
|
| 797 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 819 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 820 |
"rewards/rollout_reward_func/std": 0.0,
|
| 821 |
"step": 29,
|
| 822 |
+
"step_time": 19.54482721599925
|
| 823 |
},
|
| 824 |
{
|
| 825 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 847 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 848 |
"rewards/rollout_reward_func/std": 0.0,
|
| 849 |
"step": 30,
|
| 850 |
+
"step_time": 19.39816167599929
|
| 851 |
},
|
| 852 |
{
|
| 853 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 875 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 876 |
"rewards/rollout_reward_func/std": 0.0,
|
| 877 |
"step": 31,
|
| 878 |
+
"step_time": 19.455606768009602
|
| 879 |
},
|
| 880 |
{
|
| 881 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 903 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 904 |
"rewards/rollout_reward_func/std": 0.0,
|
| 905 |
"step": 32,
|
| 906 |
+
"step_time": 22.964359290992434
|
| 907 |
},
|
| 908 |
{
|
| 909 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 931 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 932 |
"rewards/rollout_reward_func/std": 0.0,
|
| 933 |
"step": 33,
|
| 934 |
+
"step_time": 19.29167694800708
|
| 935 |
},
|
| 936 |
{
|
| 937 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 959 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 960 |
"rewards/rollout_reward_func/std": 0.0,
|
| 961 |
"step": 34,
|
| 962 |
+
"step_time": 21.556990506993316
|
| 963 |
},
|
| 964 |
{
|
| 965 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 987 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 988 |
"rewards/rollout_reward_func/std": 0.0,
|
| 989 |
"step": 35,
|
| 990 |
+
"step_time": 21.540133035996405
|
| 991 |
},
|
| 992 |
{
|
| 993 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 1015 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1016 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1017 |
"step": 36,
|
| 1018 |
+
"step_time": 20.61158860699652
|
| 1019 |
},
|
| 1020 |
{
|
| 1021 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 1043 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1044 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1045 |
"step": 37,
|
| 1046 |
+
"step_time": 19.463059345995134
|
| 1047 |
},
|
| 1048 |
{
|
| 1049 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 1071 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1072 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1073 |
"step": 38,
|
| 1074 |
+
"step_time": 19.32687450600497
|
| 1075 |
},
|
| 1076 |
{
|
| 1077 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 1099 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1100 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1101 |
"step": 39,
|
| 1102 |
+
"step_time": 19.368901928013656
|
| 1103 |
},
|
| 1104 |
{
|
| 1105 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 1127 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1128 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1129 |
"step": 40,
|
| 1130 |
+
"step_time": 19.347783612996864
|
| 1131 |
},
|
| 1132 |
{
|
| 1133 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 1155 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1156 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1157 |
"step": 41,
|
| 1158 |
+
"step_time": 22.78021706399886
|
| 1159 |
},
|
| 1160 |
{
|
| 1161 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 1183 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1184 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1185 |
"step": 42,
|
| 1186 |
+
"step_time": 19.530366815997695
|
| 1187 |
},
|
| 1188 |
{
|
| 1189 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 1211 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1212 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1213 |
"step": 43,
|
| 1214 |
+
"step_time": 19.35092342599819
|
| 1215 |
},
|
| 1216 |
{
|
| 1217 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 1239 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1240 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1241 |
"step": 44,
|
| 1242 |
+
"step_time": 19.47554490200855
|
| 1243 |
},
|
| 1244 |
{
|
| 1245 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 1267 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1268 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1269 |
"step": 45,
|
| 1270 |
+
"step_time": 22.798208642001555
|
| 1271 |
},
|
| 1272 |
{
|
| 1273 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 1295 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1296 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1297 |
"step": 46,
|
| 1298 |
+
"step_time": 19.88411584899586
|
| 1299 |
},
|
| 1300 |
{
|
| 1301 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 1323 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1324 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1325 |
"step": 47,
|
| 1326 |
+
"step_time": 19.48703931599448
|
| 1327 |
},
|
| 1328 |
{
|
| 1329 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 1351 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1352 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1353 |
"step": 48,
|
| 1354 |
+
"step_time": 22.090118679989246
|
| 1355 |
},
|
| 1356 |
{
|
| 1357 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 1379 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1380 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1381 |
"step": 49,
|
| 1382 |
+
"step_time": 19.470153064998158
|
| 1383 |
},
|
| 1384 |
{
|
| 1385 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 1407 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1408 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1409 |
"step": 50,
|
| 1410 |
+
"step_time": 22.60661829500168
|
| 1411 |
},
|
| 1412 |
{
|
| 1413 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 1435 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1436 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1437 |
"step": 51,
|
| 1438 |
+
"step_time": 22.327632517990423
|
| 1439 |
},
|
| 1440 |
{
|
| 1441 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 1463 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1464 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1465 |
"step": 52,
|
| 1466 |
+
"step_time": 19.20923549200961
|
| 1467 |
},
|
| 1468 |
{
|
| 1469 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 1491 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1492 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1493 |
"step": 53,
|
| 1494 |
+
"step_time": 19.398160734999692
|
| 1495 |
},
|
| 1496 |
{
|
| 1497 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 1519 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1520 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1521 |
"step": 54,
|
| 1522 |
+
"step_time": 19.338012945008813
|
| 1523 |
},
|
| 1524 |
{
|
| 1525 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 1547 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1548 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1549 |
"step": 55,
|
| 1550 |
+
"step_time": 20.151991571001417
|
| 1551 |
},
|
| 1552 |
{
|
| 1553 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 1575 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1576 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1577 |
"step": 56,
|
| 1578 |
+
"step_time": 22.507306526997127
|
| 1579 |
},
|
| 1580 |
{
|
| 1581 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 1603 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1604 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1605 |
"step": 57,
|
| 1606 |
+
"step_time": 19.39536341799976
|
| 1607 |
},
|
| 1608 |
{
|
| 1609 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 1631 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1632 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1633 |
"step": 58,
|
| 1634 |
+
"step_time": 19.44620426499023
|
| 1635 |
},
|
| 1636 |
{
|
| 1637 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 1659 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1660 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1661 |
"step": 59,
|
| 1662 |
+
"step_time": 22.402232911990723
|
| 1663 |
},
|
| 1664 |
{
|
| 1665 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 1687 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1688 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1689 |
"step": 60,
|
| 1690 |
+
"step_time": 19.969688828998187
|
| 1691 |
},
|
| 1692 |
{
|
| 1693 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 1715 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1716 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1717 |
"step": 61,
|
| 1718 |
+
"step_time": 19.51650980200793
|
| 1719 |
},
|
| 1720 |
{
|
| 1721 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 1743 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1744 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1745 |
"step": 62,
|
| 1746 |
+
"step_time": 21.935334763016726
|
| 1747 |
},
|
| 1748 |
{
|
| 1749 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 1771 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1772 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1773 |
"step": 63,
|
| 1774 |
+
"step_time": 22.139962298009777
|
| 1775 |
},
|
| 1776 |
{
|
| 1777 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 1799 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1800 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1801 |
"step": 64,
|
| 1802 |
+
"step_time": 21.979154282984382
|
| 1803 |
},
|
| 1804 |
{
|
| 1805 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 1827 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1828 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1829 |
"step": 65,
|
| 1830 |
+
"step_time": 22.214402237004833
|
| 1831 |
},
|
| 1832 |
{
|
| 1833 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 1855 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1856 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1857 |
"step": 66,
|
| 1858 |
+
"step_time": 19.304359686997486
|
| 1859 |
},
|
| 1860 |
{
|
| 1861 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 1883 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1884 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1885 |
"step": 67,
|
| 1886 |
+
"step_time": 19.36013725100929
|
| 1887 |
},
|
| 1888 |
{
|
| 1889 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 1911 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1912 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1913 |
"step": 68,
|
| 1914 |
+
"step_time": 21.965071903985518
|
| 1915 |
},
|
| 1916 |
{
|
| 1917 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 1939 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1940 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1941 |
"step": 69,
|
| 1942 |
+
"step_time": 23.3430329200055
|
| 1943 |
},
|
| 1944 |
{
|
| 1945 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 1967 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1968 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1969 |
"step": 70,
|
| 1970 |
+
"step_time": 19.697308761002205
|
| 1971 |
},
|
| 1972 |
{
|
| 1973 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 1995 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 1996 |
"rewards/rollout_reward_func/std": 0.0,
|
| 1997 |
"step": 71,
|
| 1998 |
+
"step_time": 19.443207848002203
|
| 1999 |
},
|
| 2000 |
{
|
| 2001 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 2023 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 2024 |
"rewards/rollout_reward_func/std": 0.0,
|
| 2025 |
"step": 72,
|
| 2026 |
+
"step_time": 20.01639660699584
|
| 2027 |
},
|
| 2028 |
{
|
| 2029 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 2051 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 2052 |
"rewards/rollout_reward_func/std": 0.0,
|
| 2053 |
"step": 73,
|
| 2054 |
+
"step_time": 23.112452601002587
|
| 2055 |
},
|
| 2056 |
{
|
| 2057 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 2079 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 2080 |
"rewards/rollout_reward_func/std": 0.0,
|
| 2081 |
"step": 74,
|
| 2082 |
+
"step_time": 23.19768616399233
|
| 2083 |
},
|
| 2084 |
{
|
| 2085 |
"clip_ratio/high_max": 0.0,
|
|
|
|
| 2107 |
"rewards/rollout_reward_func/mean": 0.0,
|
| 2108 |
"rewards/rollout_reward_func/std": 0.0,
|
| 2109 |
"step": 75,
|
| 2110 |
+
"step_time": 19.854602511004487
|
| 2111 |
}
|
| 2112 |
],
|
| 2113 |
"logging_steps": 1.0,
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 7889
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a1c9d78e6601bc52704e4c410160ac3431adbbc8e01e0bcc154083b11a67105e
|
| 3 |
size 7889
|