haihp02 commited on
Commit
9dce2c8
·
verified ·
1 Parent(s): c89020b

Upload task output 1

Browse files
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- base_model: Qwen/Qwen3-4B
3
  library_name: peft
4
  pipeline_tag: text-generation
5
  tags:
 
1
  ---
2
+ base_model: openlm-research/open_llama_3b
3
  library_name: peft
4
  pipeline_tag: text-generation
5
  tags:
adapter_config.json CHANGED
@@ -3,7 +3,7 @@
3
  "alpha_pattern": {},
4
  "arrow_config": null,
5
  "auto_mapping": null,
6
- "base_model_name_or_path": "Qwen/Qwen3-4B",
7
  "bias": "none",
8
  "corda_config": null,
9
  "ensure_weight_tying": false,
@@ -29,13 +29,13 @@
29
  "rank_pattern": {},
30
  "revision": null,
31
  "target_modules": [
32
- "q_proj",
 
33
  "down_proj",
 
 
34
  "k_proj",
35
- "gate_proj",
36
- "up_proj",
37
- "o_proj",
38
- "v_proj"
39
  ],
40
  "target_parameters": null,
41
  "task_type": "CAUSAL_LM",
 
3
  "alpha_pattern": {},
4
  "arrow_config": null,
5
  "auto_mapping": null,
6
+ "base_model_name_or_path": "openlm-research/open_llama_3b",
7
  "bias": "none",
8
  "corda_config": null,
9
  "ensure_weight_tying": false,
 
29
  "rank_pattern": {},
30
  "revision": null,
31
  "target_modules": [
32
+ "o_proj",
33
+ "up_proj",
34
  "down_proj",
35
+ "q_proj",
36
+ "v_proj",
37
  "k_proj",
38
+ "gate_proj"
 
 
 
39
  ],
40
  "target_parameters": null,
41
  "task_type": "CAUSAL_LM",
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a50e8b89a6399e0f116c78638dfba0019f9719dcf17ea3e4688d6fafb79ed0ec
3
  size 203456160
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71c440c80ede39ed5f8bb23f30218abc37099b8e728db58e9de3c835e3af4eeb
3
  size 203456160
trainer_state.json CHANGED
@@ -35,7 +35,7 @@
35
  "rewards/rollout_reward_func/mean": 0.0,
36
  "rewards/rollout_reward_func/std": 0.0,
37
  "step": 1,
38
- "step_time": 20.163676705000398
39
  },
40
  {
41
  "clip_ratio/high_max": 0.0,
@@ -63,7 +63,7 @@
63
  "rewards/rollout_reward_func/mean": 0.0,
64
  "rewards/rollout_reward_func/std": 0.0,
65
  "step": 2,
66
- "step_time": 22.362987949985836
67
  },
68
  {
69
  "clip_ratio/high_max": 0.0,
@@ -91,7 +91,7 @@
91
  "rewards/rollout_reward_func/mean": 0.0,
92
  "rewards/rollout_reward_func/std": 0.0,
93
  "step": 3,
94
- "step_time": 19.506274071005464
95
  },
96
  {
97
  "clip_ratio/high_max": 0.0,
@@ -119,7 +119,7 @@
119
  "rewards/rollout_reward_func/mean": 0.0,
120
  "rewards/rollout_reward_func/std": 0.0,
121
  "step": 4,
122
- "step_time": 20.597769440006232
123
  },
124
  {
125
  "clip_ratio/high_max": 0.0,
@@ -147,7 +147,7 @@
147
  "rewards/rollout_reward_func/mean": 0.0,
148
  "rewards/rollout_reward_func/std": 0.0,
149
  "step": 5,
150
- "step_time": 19.509877896991384
151
  },
152
  {
153
  "clip_ratio/high_max": 0.0,
@@ -175,7 +175,7 @@
175
  "rewards/rollout_reward_func/mean": 0.0,
176
  "rewards/rollout_reward_func/std": 0.0,
177
  "step": 6,
178
- "step_time": 22.303637470999092
179
  },
180
  {
181
  "clip_ratio/high_max": 0.0,
@@ -203,7 +203,7 @@
203
  "rewards/rollout_reward_func/mean": 0.0,
204
  "rewards/rollout_reward_func/std": 0.0,
205
  "step": 7,
206
- "step_time": 21.887395144010952
207
  },
208
  {
209
  "clip_ratio/high_max": 0.0,
@@ -231,7 +231,7 @@
231
  "rewards/rollout_reward_func/mean": 0.0,
232
  "rewards/rollout_reward_func/std": 0.0,
233
  "step": 8,
234
- "step_time": 19.28527039799519
235
  },
236
  {
237
  "clip_ratio/high_max": 0.0,
@@ -259,7 +259,7 @@
259
  "rewards/rollout_reward_func/mean": 0.0,
260
  "rewards/rollout_reward_func/std": 0.0,
261
  "step": 9,
262
- "step_time": 22.71891580100055
263
  },
264
  {
265
  "clip_ratio/high_max": 0.0,
@@ -287,7 +287,7 @@
287
  "rewards/rollout_reward_func/mean": 0.0,
288
  "rewards/rollout_reward_func/std": 0.0,
289
  "step": 10,
290
- "step_time": 22.25483033200726
291
  },
292
  {
293
  "clip_ratio/high_max": 0.0,
@@ -315,7 +315,7 @@
315
  "rewards/rollout_reward_func/mean": 0.0,
316
  "rewards/rollout_reward_func/std": 0.0,
317
  "step": 11,
318
- "step_time": 19.253934067986847
319
  },
320
  {
321
  "clip_ratio/high_max": 0.0,
@@ -343,7 +343,7 @@
343
  "rewards/rollout_reward_func/mean": 0.0,
344
  "rewards/rollout_reward_func/std": 0.0,
345
  "step": 12,
346
- "step_time": 21.867263803986134
347
  },
348
  {
349
  "clip_ratio/high_max": 0.0,
@@ -371,7 +371,7 @@
371
  "rewards/rollout_reward_func/mean": 0.0,
372
  "rewards/rollout_reward_func/std": 0.0,
373
  "step": 13,
374
- "step_time": 23.14227707999089
375
  },
376
  {
377
  "clip_ratio/high_max": 0.0,
@@ -399,7 +399,7 @@
399
  "rewards/rollout_reward_func/mean": 0.0,
400
  "rewards/rollout_reward_func/std": 0.0,
401
  "step": 14,
402
- "step_time": 19.94518949700432
403
  },
404
  {
405
  "clip_ratio/high_max": 0.0,
@@ -427,7 +427,7 @@
427
  "rewards/rollout_reward_func/mean": 0.0,
428
  "rewards/rollout_reward_func/std": 0.0,
429
  "step": 15,
430
- "step_time": 22.22382894000475
431
  },
432
  {
433
  "clip_ratio/high_max": 0.0,
@@ -455,7 +455,7 @@
455
  "rewards/rollout_reward_func/mean": 0.0,
456
  "rewards/rollout_reward_func/std": 0.0,
457
  "step": 16,
458
- "step_time": 22.018562041987025
459
  },
460
  {
461
  "clip_ratio/high_max": 0.0,
@@ -483,7 +483,7 @@
483
  "rewards/rollout_reward_func/mean": 0.0,
484
  "rewards/rollout_reward_func/std": 0.0,
485
  "step": 17,
486
- "step_time": 19.51297070399596
487
  },
488
  {
489
  "clip_ratio/high_max": 0.0,
@@ -511,7 +511,7 @@
511
  "rewards/rollout_reward_func/mean": 0.0,
512
  "rewards/rollout_reward_func/std": 0.0,
513
  "step": 18,
514
- "step_time": 22.028209694988618
515
  },
516
  {
517
  "clip_ratio/high_max": 0.0,
@@ -539,7 +539,7 @@
539
  "rewards/rollout_reward_func/mean": 0.0,
540
  "rewards/rollout_reward_func/std": 0.0,
541
  "step": 19,
542
- "step_time": 22.048571847008134
543
  },
544
  {
545
  "clip_ratio/high_max": 0.0,
@@ -567,7 +567,7 @@
567
  "rewards/rollout_reward_func/mean": 0.0,
568
  "rewards/rollout_reward_func/std": 0.0,
569
  "step": 20,
570
- "step_time": 22.758304221002618
571
  },
572
  {
573
  "clip_ratio/high_max": 0.0,
@@ -595,7 +595,7 @@
595
  "rewards/rollout_reward_func/mean": 0.0,
596
  "rewards/rollout_reward_func/std": 0.0,
597
  "step": 21,
598
- "step_time": 19.671875301013642
599
  },
600
  {
601
  "clip_ratio/high_max": 0.0,
@@ -623,7 +623,7 @@
623
  "rewards/rollout_reward_func/mean": 0.0,
624
  "rewards/rollout_reward_func/std": 0.0,
625
  "step": 22,
626
- "step_time": 23.124933028993837
627
  },
628
  {
629
  "clip_ratio/high_max": 0.0,
@@ -651,7 +651,7 @@
651
  "rewards/rollout_reward_func/mean": 0.0,
652
  "rewards/rollout_reward_func/std": 0.0,
653
  "step": 23,
654
- "step_time": 22.448714583006222
655
  },
656
  {
657
  "clip_ratio/high_max": 0.0,
@@ -679,7 +679,7 @@
679
  "rewards/rollout_reward_func/mean": 0.0,
680
  "rewards/rollout_reward_func/std": 0.0,
681
  "step": 24,
682
- "step_time": 19.356710502994247
683
  },
684
  {
685
  "clip_ratio/high_max": 0.0,
@@ -707,7 +707,7 @@
707
  "rewards/rollout_reward_func/mean": 0.0,
708
  "rewards/rollout_reward_func/std": 0.0,
709
  "step": 25,
710
- "step_time": 22.57160071598628
711
  },
712
  {
713
  "clip_ratio/high_max": 0.0,
@@ -735,7 +735,7 @@
735
  "rewards/rollout_reward_func/mean": 0.0,
736
  "rewards/rollout_reward_func/std": 0.0,
737
  "step": 26,
738
- "step_time": 21.988813521005795
739
  },
740
  {
741
  "clip_ratio/high_max": 0.0,
@@ -763,7 +763,7 @@
763
  "rewards/rollout_reward_func/mean": 0.0,
764
  "rewards/rollout_reward_func/std": 0.0,
765
  "step": 27,
766
- "step_time": 20.382539455007645
767
  },
768
  {
769
  "clip_ratio/high_max": 0.0,
@@ -791,7 +791,7 @@
791
  "rewards/rollout_reward_func/mean": 0.0,
792
  "rewards/rollout_reward_func/std": 0.0,
793
  "step": 28,
794
- "step_time": 22.337577592013986
795
  },
796
  {
797
  "clip_ratio/high_max": 0.0,
@@ -819,7 +819,7 @@
819
  "rewards/rollout_reward_func/mean": 0.0,
820
  "rewards/rollout_reward_func/std": 0.0,
821
  "step": 29,
822
- "step_time": 19.32650230699801
823
  },
824
  {
825
  "clip_ratio/high_max": 0.0,
@@ -847,7 +847,7 @@
847
  "rewards/rollout_reward_func/mean": 0.0,
848
  "rewards/rollout_reward_func/std": 0.0,
849
  "step": 30,
850
- "step_time": 19.65242459801084
851
  },
852
  {
853
  "clip_ratio/high_max": 0.0,
@@ -875,7 +875,7 @@
875
  "rewards/rollout_reward_func/mean": 0.0,
876
  "rewards/rollout_reward_func/std": 0.0,
877
  "step": 31,
878
- "step_time": 19.795237326987262
879
  },
880
  {
881
  "clip_ratio/high_max": 0.0,
@@ -903,7 +903,7 @@
903
  "rewards/rollout_reward_func/mean": 0.0,
904
  "rewards/rollout_reward_func/std": 0.0,
905
  "step": 32,
906
- "step_time": 23.164430122997146
907
  },
908
  {
909
  "clip_ratio/high_max": 0.0,
@@ -931,7 +931,7 @@
931
  "rewards/rollout_reward_func/mean": 0.0,
932
  "rewards/rollout_reward_func/std": 0.0,
933
  "step": 33,
934
- "step_time": 19.669757636002032
935
  },
936
  {
937
  "clip_ratio/high_max": 0.0,
@@ -959,7 +959,7 @@
959
  "rewards/rollout_reward_func/mean": 0.0,
960
  "rewards/rollout_reward_func/std": 0.0,
961
  "step": 34,
962
- "step_time": 22.35915388200374
963
  },
964
  {
965
  "clip_ratio/high_max": 0.0,
@@ -987,7 +987,7 @@
987
  "rewards/rollout_reward_func/mean": 0.0,
988
  "rewards/rollout_reward_func/std": 0.0,
989
  "step": 35,
990
- "step_time": 21.746646188010345
991
  },
992
  {
993
  "clip_ratio/high_max": 0.0,
@@ -1015,7 +1015,7 @@
1015
  "rewards/rollout_reward_func/mean": 0.0,
1016
  "rewards/rollout_reward_func/std": 0.0,
1017
  "step": 36,
1018
- "step_time": 20.59833544599678
1019
  },
1020
  {
1021
  "clip_ratio/high_max": 0.0,
@@ -1043,7 +1043,7 @@
1043
  "rewards/rollout_reward_func/mean": 0.0,
1044
  "rewards/rollout_reward_func/std": 0.0,
1045
  "step": 37,
1046
- "step_time": 19.615884350998385
1047
  },
1048
  {
1049
  "clip_ratio/high_max": 0.0,
@@ -1071,7 +1071,7 @@
1071
  "rewards/rollout_reward_func/mean": 0.0,
1072
  "rewards/rollout_reward_func/std": 0.0,
1073
  "step": 38,
1074
- "step_time": 19.57846778199746
1075
  },
1076
  {
1077
  "clip_ratio/high_max": 0.0,
@@ -1099,7 +1099,7 @@
1099
  "rewards/rollout_reward_func/mean": 0.0,
1100
  "rewards/rollout_reward_func/std": 0.0,
1101
  "step": 39,
1102
- "step_time": 19.47087104698585
1103
  },
1104
  {
1105
  "clip_ratio/high_max": 0.0,
@@ -1127,7 +1127,7 @@
1127
  "rewards/rollout_reward_func/mean": 0.0,
1128
  "rewards/rollout_reward_func/std": 0.0,
1129
  "step": 40,
1130
- "step_time": 19.335427225996682
1131
  },
1132
  {
1133
  "clip_ratio/high_max": 0.0,
@@ -1155,7 +1155,7 @@
1155
  "rewards/rollout_reward_func/mean": 0.0,
1156
  "rewards/rollout_reward_func/std": 0.0,
1157
  "step": 41,
1158
- "step_time": 23.396767112004454
1159
  },
1160
  {
1161
  "clip_ratio/high_max": 0.0,
@@ -1183,7 +1183,7 @@
1183
  "rewards/rollout_reward_func/mean": 0.0,
1184
  "rewards/rollout_reward_func/std": 0.0,
1185
  "step": 42,
1186
- "step_time": 19.57603736199235
1187
  },
1188
  {
1189
  "clip_ratio/high_max": 0.0,
@@ -1211,7 +1211,7 @@
1211
  "rewards/rollout_reward_func/mean": 0.0,
1212
  "rewards/rollout_reward_func/std": 0.0,
1213
  "step": 43,
1214
- "step_time": 19.554181985011382
1215
  },
1216
  {
1217
  "clip_ratio/high_max": 0.0,
@@ -1239,7 +1239,7 @@
1239
  "rewards/rollout_reward_func/mean": 0.0,
1240
  "rewards/rollout_reward_func/std": 0.0,
1241
  "step": 44,
1242
- "step_time": 19.349005468007817
1243
  },
1244
  {
1245
  "clip_ratio/high_max": 0.0,
@@ -1267,7 +1267,7 @@
1267
  "rewards/rollout_reward_func/mean": 0.0,
1268
  "rewards/rollout_reward_func/std": 0.0,
1269
  "step": 45,
1270
- "step_time": 22.727461100002984
1271
  },
1272
  {
1273
  "clip_ratio/high_max": 0.0,
@@ -1295,7 +1295,7 @@
1295
  "rewards/rollout_reward_func/mean": 0.0,
1296
  "rewards/rollout_reward_func/std": 0.0,
1297
  "step": 46,
1298
- "step_time": 20.33228819798387
1299
  },
1300
  {
1301
  "clip_ratio/high_max": 0.0,
@@ -1323,7 +1323,7 @@
1323
  "rewards/rollout_reward_func/mean": 0.0,
1324
  "rewards/rollout_reward_func/std": 0.0,
1325
  "step": 47,
1326
- "step_time": 19.80965869199281
1327
  },
1328
  {
1329
  "clip_ratio/high_max": 0.0,
@@ -1351,7 +1351,7 @@
1351
  "rewards/rollout_reward_func/mean": 0.0,
1352
  "rewards/rollout_reward_func/std": 0.0,
1353
  "step": 48,
1354
- "step_time": 22.324468039005296
1355
  },
1356
  {
1357
  "clip_ratio/high_max": 0.0,
@@ -1379,7 +1379,7 @@
1379
  "rewards/rollout_reward_func/mean": 0.0,
1380
  "rewards/rollout_reward_func/std": 0.0,
1381
  "step": 49,
1382
- "step_time": 19.60882888899505
1383
  },
1384
  {
1385
  "clip_ratio/high_max": 0.0,
@@ -1407,7 +1407,7 @@
1407
  "rewards/rollout_reward_func/mean": 0.0,
1408
  "rewards/rollout_reward_func/std": 0.0,
1409
  "step": 50,
1410
- "step_time": 22.923746538996056
1411
  },
1412
  {
1413
  "clip_ratio/high_max": 0.0,
@@ -1435,7 +1435,7 @@
1435
  "rewards/rollout_reward_func/mean": 0.0,
1436
  "rewards/rollout_reward_func/std": 0.0,
1437
  "step": 51,
1438
- "step_time": 22.990994864012464
1439
  },
1440
  {
1441
  "clip_ratio/high_max": 0.0,
@@ -1463,7 +1463,7 @@
1463
  "rewards/rollout_reward_func/mean": 0.0,
1464
  "rewards/rollout_reward_func/std": 0.0,
1465
  "step": 52,
1466
- "step_time": 19.864189005005755
1467
  },
1468
  {
1469
  "clip_ratio/high_max": 0.0,
@@ -1491,7 +1491,7 @@
1491
  "rewards/rollout_reward_func/mean": 0.0,
1492
  "rewards/rollout_reward_func/std": 0.0,
1493
  "step": 53,
1494
- "step_time": 19.978898688008485
1495
  },
1496
  {
1497
  "clip_ratio/high_max": 0.0,
@@ -1519,7 +1519,7 @@
1519
  "rewards/rollout_reward_func/mean": 0.0,
1520
  "rewards/rollout_reward_func/std": 0.0,
1521
  "step": 54,
1522
- "step_time": 19.302379137989192
1523
  },
1524
  {
1525
  "clip_ratio/high_max": 0.0,
@@ -1547,7 +1547,7 @@
1547
  "rewards/rollout_reward_func/mean": 0.0,
1548
  "rewards/rollout_reward_func/std": 0.0,
1549
  "step": 55,
1550
- "step_time": 20.470470863983792
1551
  },
1552
  {
1553
  "clip_ratio/high_max": 0.0,
@@ -1575,7 +1575,7 @@
1575
  "rewards/rollout_reward_func/mean": 0.0,
1576
  "rewards/rollout_reward_func/std": 0.0,
1577
  "step": 56,
1578
- "step_time": 22.26121293801407
1579
  },
1580
  {
1581
  "clip_ratio/high_max": 0.0,
@@ -1603,7 +1603,7 @@
1603
  "rewards/rollout_reward_func/mean": 0.0,
1604
  "rewards/rollout_reward_func/std": 0.0,
1605
  "step": 57,
1606
- "step_time": 19.37005365499499
1607
  },
1608
  {
1609
  "clip_ratio/high_max": 0.0,
@@ -1631,7 +1631,7 @@
1631
  "rewards/rollout_reward_func/mean": 0.0,
1632
  "rewards/rollout_reward_func/std": 0.0,
1633
  "step": 58,
1634
- "step_time": 20.212254795012996
1635
  },
1636
  {
1637
  "clip_ratio/high_max": 0.0,
@@ -1659,7 +1659,7 @@
1659
  "rewards/rollout_reward_func/mean": 0.0,
1660
  "rewards/rollout_reward_func/std": 0.0,
1661
  "step": 59,
1662
- "step_time": 22.4186906150062
1663
  },
1664
  {
1665
  "clip_ratio/high_max": 0.0,
@@ -1687,7 +1687,7 @@
1687
  "rewards/rollout_reward_func/mean": 0.0,
1688
  "rewards/rollout_reward_func/std": 0.0,
1689
  "step": 60,
1690
- "step_time": 20.418159313005162
1691
  },
1692
  {
1693
  "clip_ratio/high_max": 0.0,
@@ -1715,7 +1715,7 @@
1715
  "rewards/rollout_reward_func/mean": 0.0,
1716
  "rewards/rollout_reward_func/std": 0.0,
1717
  "step": 61,
1718
- "step_time": 20.16101988900482
1719
  },
1720
  {
1721
  "clip_ratio/high_max": 0.0,
@@ -1743,7 +1743,7 @@
1743
  "rewards/rollout_reward_func/mean": 0.0,
1744
  "rewards/rollout_reward_func/std": 0.0,
1745
  "step": 62,
1746
- "step_time": 21.949011387994688
1747
  },
1748
  {
1749
  "clip_ratio/high_max": 0.0,
@@ -1771,7 +1771,7 @@
1771
  "rewards/rollout_reward_func/mean": 0.0,
1772
  "rewards/rollout_reward_func/std": 0.0,
1773
  "step": 63,
1774
- "step_time": 22.00667965998582
1775
  },
1776
  {
1777
  "clip_ratio/high_max": 0.0,
@@ -1799,7 +1799,7 @@
1799
  "rewards/rollout_reward_func/mean": 0.0,
1800
  "rewards/rollout_reward_func/std": 0.0,
1801
  "step": 64,
1802
- "step_time": 22.618203107005684
1803
  },
1804
  {
1805
  "clip_ratio/high_max": 0.0,
@@ -1827,7 +1827,7 @@
1827
  "rewards/rollout_reward_func/mean": 0.0,
1828
  "rewards/rollout_reward_func/std": 0.0,
1829
  "step": 65,
1830
- "step_time": 22.467545491999772
1831
  },
1832
  {
1833
  "clip_ratio/high_max": 0.0,
@@ -1855,7 +1855,7 @@
1855
  "rewards/rollout_reward_func/mean": 0.0,
1856
  "rewards/rollout_reward_func/std": 0.0,
1857
  "step": 66,
1858
- "step_time": 19.404011901009653
1859
  },
1860
  {
1861
  "clip_ratio/high_max": 0.0,
@@ -1883,7 +1883,7 @@
1883
  "rewards/rollout_reward_func/mean": 0.0,
1884
  "rewards/rollout_reward_func/std": 0.0,
1885
  "step": 67,
1886
- "step_time": 19.91166835000331
1887
  },
1888
  {
1889
  "clip_ratio/high_max": 0.0,
@@ -1911,7 +1911,7 @@
1911
  "rewards/rollout_reward_func/mean": 0.0,
1912
  "rewards/rollout_reward_func/std": 0.0,
1913
  "step": 68,
1914
- "step_time": 22.706522302993108
1915
  },
1916
  {
1917
  "clip_ratio/high_max": 0.0,
@@ -1939,7 +1939,7 @@
1939
  "rewards/rollout_reward_func/mean": 0.0,
1940
  "rewards/rollout_reward_func/std": 0.0,
1941
  "step": 69,
1942
- "step_time": 23.428568691997498
1943
  },
1944
  {
1945
  "clip_ratio/high_max": 0.0,
@@ -1967,7 +1967,7 @@
1967
  "rewards/rollout_reward_func/mean": 0.0,
1968
  "rewards/rollout_reward_func/std": 0.0,
1969
  "step": 70,
1970
- "step_time": 19.817468445995473
1971
  },
1972
  {
1973
  "clip_ratio/high_max": 0.0,
@@ -1995,7 +1995,7 @@
1995
  "rewards/rollout_reward_func/mean": 0.0,
1996
  "rewards/rollout_reward_func/std": 0.0,
1997
  "step": 71,
1998
- "step_time": 19.85283366099611
1999
  },
2000
  {
2001
  "clip_ratio/high_max": 0.0,
@@ -2023,7 +2023,7 @@
2023
  "rewards/rollout_reward_func/mean": 0.0,
2024
  "rewards/rollout_reward_func/std": 0.0,
2025
  "step": 72,
2026
- "step_time": 19.637089049007045
2027
  },
2028
  {
2029
  "clip_ratio/high_max": 0.0,
@@ -2051,7 +2051,7 @@
2051
  "rewards/rollout_reward_func/mean": 0.0,
2052
  "rewards/rollout_reward_func/std": 0.0,
2053
  "step": 73,
2054
- "step_time": 22.924901701990166
2055
  },
2056
  {
2057
  "clip_ratio/high_max": 0.0,
@@ -2079,7 +2079,7 @@
2079
  "rewards/rollout_reward_func/mean": 0.0,
2080
  "rewards/rollout_reward_func/std": 0.0,
2081
  "step": 74,
2082
- "step_time": 23.01028485299321
2083
  },
2084
  {
2085
  "clip_ratio/high_max": 0.0,
@@ -2107,7 +2107,7 @@
2107
  "rewards/rollout_reward_func/mean": 0.0,
2108
  "rewards/rollout_reward_func/std": 0.0,
2109
  "step": 75,
2110
- "step_time": 19.789993641017645
2111
  }
2112
  ],
2113
  "logging_steps": 1.0,
 
35
  "rewards/rollout_reward_func/mean": 0.0,
36
  "rewards/rollout_reward_func/std": 0.0,
37
  "step": 1,
38
+ "step_time": 20.038708471984137
39
  },
40
  {
41
  "clip_ratio/high_max": 0.0,
 
63
  "rewards/rollout_reward_func/mean": 0.0,
64
  "rewards/rollout_reward_func/std": 0.0,
65
  "step": 2,
66
+ "step_time": 22.31387728100526
67
  },
68
  {
69
  "clip_ratio/high_max": 0.0,
 
91
  "rewards/rollout_reward_func/mean": 0.0,
92
  "rewards/rollout_reward_func/std": 0.0,
93
  "step": 3,
94
+ "step_time": 19.508486614991853
95
  },
96
  {
97
  "clip_ratio/high_max": 0.0,
 
119
  "rewards/rollout_reward_func/mean": 0.0,
120
  "rewards/rollout_reward_func/std": 0.0,
121
  "step": 4,
122
+ "step_time": 20.417726718005724
123
  },
124
  {
125
  "clip_ratio/high_max": 0.0,
 
147
  "rewards/rollout_reward_func/mean": 0.0,
148
  "rewards/rollout_reward_func/std": 0.0,
149
  "step": 5,
150
+ "step_time": 19.458035143004963
151
  },
152
  {
153
  "clip_ratio/high_max": 0.0,
 
175
  "rewards/rollout_reward_func/mean": 0.0,
176
  "rewards/rollout_reward_func/std": 0.0,
177
  "step": 6,
178
+ "step_time": 22.11808781498985
179
  },
180
  {
181
  "clip_ratio/high_max": 0.0,
 
203
  "rewards/rollout_reward_func/mean": 0.0,
204
  "rewards/rollout_reward_func/std": 0.0,
205
  "step": 7,
206
+ "step_time": 21.699966289990698
207
  },
208
  {
209
  "clip_ratio/high_max": 0.0,
 
231
  "rewards/rollout_reward_func/mean": 0.0,
232
  "rewards/rollout_reward_func/std": 0.0,
233
  "step": 8,
234
+ "step_time": 19.435475739024696
235
  },
236
  {
237
  "clip_ratio/high_max": 0.0,
 
259
  "rewards/rollout_reward_func/mean": 0.0,
260
  "rewards/rollout_reward_func/std": 0.0,
261
  "step": 9,
262
+ "step_time": 23.149850735993823
263
  },
264
  {
265
  "clip_ratio/high_max": 0.0,
 
287
  "rewards/rollout_reward_func/mean": 0.0,
288
  "rewards/rollout_reward_func/std": 0.0,
289
  "step": 10,
290
+ "step_time": 21.312995460008096
291
  },
292
  {
293
  "clip_ratio/high_max": 0.0,
 
315
  "rewards/rollout_reward_func/mean": 0.0,
316
  "rewards/rollout_reward_func/std": 0.0,
317
  "step": 11,
318
+ "step_time": 18.84161558598862
319
  },
320
  {
321
  "clip_ratio/high_max": 0.0,
 
343
  "rewards/rollout_reward_func/mean": 0.0,
344
  "rewards/rollout_reward_func/std": 0.0,
345
  "step": 12,
346
+ "step_time": 21.481500715010043
347
  },
348
  {
349
  "clip_ratio/high_max": 0.0,
 
371
  "rewards/rollout_reward_func/mean": 0.0,
372
  "rewards/rollout_reward_func/std": 0.0,
373
  "step": 13,
374
+ "step_time": 22.648648835995118
375
  },
376
  {
377
  "clip_ratio/high_max": 0.0,
 
399
  "rewards/rollout_reward_func/mean": 0.0,
400
  "rewards/rollout_reward_func/std": 0.0,
401
  "step": 14,
402
+ "step_time": 19.483697141011362
403
  },
404
  {
405
  "clip_ratio/high_max": 0.0,
 
427
  "rewards/rollout_reward_func/mean": 0.0,
428
  "rewards/rollout_reward_func/std": 0.0,
429
  "step": 15,
430
+ "step_time": 22.048389301991847
431
  },
432
  {
433
  "clip_ratio/high_max": 0.0,
 
455
  "rewards/rollout_reward_func/mean": 0.0,
456
  "rewards/rollout_reward_func/std": 0.0,
457
  "step": 16,
458
+ "step_time": 21.76690764699015
459
  },
460
  {
461
  "clip_ratio/high_max": 0.0,
 
483
  "rewards/rollout_reward_func/mean": 0.0,
484
  "rewards/rollout_reward_func/std": 0.0,
485
  "step": 17,
486
+ "step_time": 19.46455569099635
487
  },
488
  {
489
  "clip_ratio/high_max": 0.0,
 
511
  "rewards/rollout_reward_func/mean": 0.0,
512
  "rewards/rollout_reward_func/std": 0.0,
513
  "step": 18,
514
+ "step_time": 22.669331256991427
515
  },
516
  {
517
  "clip_ratio/high_max": 0.0,
 
539
  "rewards/rollout_reward_func/mean": 0.0,
540
  "rewards/rollout_reward_func/std": 0.0,
541
  "step": 19,
542
+ "step_time": 22.320524194008613
543
  },
544
  {
545
  "clip_ratio/high_max": 0.0,
 
567
  "rewards/rollout_reward_func/mean": 0.0,
568
  "rewards/rollout_reward_func/std": 0.0,
569
  "step": 20,
570
+ "step_time": 22.302162043000862
571
  },
572
  {
573
  "clip_ratio/high_max": 0.0,
 
595
  "rewards/rollout_reward_func/mean": 0.0,
596
  "rewards/rollout_reward_func/std": 0.0,
597
  "step": 21,
598
+ "step_time": 19.36471923001227
599
  },
600
  {
601
  "clip_ratio/high_max": 0.0,
 
623
  "rewards/rollout_reward_func/mean": 0.0,
624
  "rewards/rollout_reward_func/std": 0.0,
625
  "step": 22,
626
+ "step_time": 22.757351590000326
627
  },
628
  {
629
  "clip_ratio/high_max": 0.0,
 
651
  "rewards/rollout_reward_func/mean": 0.0,
652
  "rewards/rollout_reward_func/std": 0.0,
653
  "step": 23,
654
+ "step_time": 22.50556095898355
655
  },
656
  {
657
  "clip_ratio/high_max": 0.0,
 
679
  "rewards/rollout_reward_func/mean": 0.0,
680
  "rewards/rollout_reward_func/std": 0.0,
681
  "step": 24,
682
+ "step_time": 19.709908160984924
683
  },
684
  {
685
  "clip_ratio/high_max": 0.0,
 
707
  "rewards/rollout_reward_func/mean": 0.0,
708
  "rewards/rollout_reward_func/std": 0.0,
709
  "step": 25,
710
+ "step_time": 22.27659140600008
711
  },
712
  {
713
  "clip_ratio/high_max": 0.0,
 
735
  "rewards/rollout_reward_func/mean": 0.0,
736
  "rewards/rollout_reward_func/std": 0.0,
737
  "step": 26,
738
+ "step_time": 21.666986704993178
739
  },
740
  {
741
  "clip_ratio/high_max": 0.0,
 
763
  "rewards/rollout_reward_func/mean": 0.0,
764
  "rewards/rollout_reward_func/std": 0.0,
765
  "step": 27,
766
+ "step_time": 19.754789013990376
767
  },
768
  {
769
  "clip_ratio/high_max": 0.0,
 
791
  "rewards/rollout_reward_func/mean": 0.0,
792
  "rewards/rollout_reward_func/std": 0.0,
793
  "step": 28,
794
+ "step_time": 22.38940100500622
795
  },
796
  {
797
  "clip_ratio/high_max": 0.0,
 
819
  "rewards/rollout_reward_func/mean": 0.0,
820
  "rewards/rollout_reward_func/std": 0.0,
821
  "step": 29,
822
+ "step_time": 19.54482721599925
823
  },
824
  {
825
  "clip_ratio/high_max": 0.0,
 
847
  "rewards/rollout_reward_func/mean": 0.0,
848
  "rewards/rollout_reward_func/std": 0.0,
849
  "step": 30,
850
+ "step_time": 19.39816167599929
851
  },
852
  {
853
  "clip_ratio/high_max": 0.0,
 
875
  "rewards/rollout_reward_func/mean": 0.0,
876
  "rewards/rollout_reward_func/std": 0.0,
877
  "step": 31,
878
+ "step_time": 19.455606768009602
879
  },
880
  {
881
  "clip_ratio/high_max": 0.0,
 
903
  "rewards/rollout_reward_func/mean": 0.0,
904
  "rewards/rollout_reward_func/std": 0.0,
905
  "step": 32,
906
+ "step_time": 22.964359290992434
907
  },
908
  {
909
  "clip_ratio/high_max": 0.0,
 
931
  "rewards/rollout_reward_func/mean": 0.0,
932
  "rewards/rollout_reward_func/std": 0.0,
933
  "step": 33,
934
+ "step_time": 19.29167694800708
935
  },
936
  {
937
  "clip_ratio/high_max": 0.0,
 
959
  "rewards/rollout_reward_func/mean": 0.0,
960
  "rewards/rollout_reward_func/std": 0.0,
961
  "step": 34,
962
+ "step_time": 21.556990506993316
963
  },
964
  {
965
  "clip_ratio/high_max": 0.0,
 
987
  "rewards/rollout_reward_func/mean": 0.0,
988
  "rewards/rollout_reward_func/std": 0.0,
989
  "step": 35,
990
+ "step_time": 21.540133035996405
991
  },
992
  {
993
  "clip_ratio/high_max": 0.0,
 
1015
  "rewards/rollout_reward_func/mean": 0.0,
1016
  "rewards/rollout_reward_func/std": 0.0,
1017
  "step": 36,
1018
+ "step_time": 20.61158860699652
1019
  },
1020
  {
1021
  "clip_ratio/high_max": 0.0,
 
1043
  "rewards/rollout_reward_func/mean": 0.0,
1044
  "rewards/rollout_reward_func/std": 0.0,
1045
  "step": 37,
1046
+ "step_time": 19.463059345995134
1047
  },
1048
  {
1049
  "clip_ratio/high_max": 0.0,
 
1071
  "rewards/rollout_reward_func/mean": 0.0,
1072
  "rewards/rollout_reward_func/std": 0.0,
1073
  "step": 38,
1074
+ "step_time": 19.32687450600497
1075
  },
1076
  {
1077
  "clip_ratio/high_max": 0.0,
 
1099
  "rewards/rollout_reward_func/mean": 0.0,
1100
  "rewards/rollout_reward_func/std": 0.0,
1101
  "step": 39,
1102
+ "step_time": 19.368901928013656
1103
  },
1104
  {
1105
  "clip_ratio/high_max": 0.0,
 
1127
  "rewards/rollout_reward_func/mean": 0.0,
1128
  "rewards/rollout_reward_func/std": 0.0,
1129
  "step": 40,
1130
+ "step_time": 19.347783612996864
1131
  },
1132
  {
1133
  "clip_ratio/high_max": 0.0,
 
1155
  "rewards/rollout_reward_func/mean": 0.0,
1156
  "rewards/rollout_reward_func/std": 0.0,
1157
  "step": 41,
1158
+ "step_time": 22.78021706399886
1159
  },
1160
  {
1161
  "clip_ratio/high_max": 0.0,
 
1183
  "rewards/rollout_reward_func/mean": 0.0,
1184
  "rewards/rollout_reward_func/std": 0.0,
1185
  "step": 42,
1186
+ "step_time": 19.530366815997695
1187
  },
1188
  {
1189
  "clip_ratio/high_max": 0.0,
 
1211
  "rewards/rollout_reward_func/mean": 0.0,
1212
  "rewards/rollout_reward_func/std": 0.0,
1213
  "step": 43,
1214
+ "step_time": 19.35092342599819
1215
  },
1216
  {
1217
  "clip_ratio/high_max": 0.0,
 
1239
  "rewards/rollout_reward_func/mean": 0.0,
1240
  "rewards/rollout_reward_func/std": 0.0,
1241
  "step": 44,
1242
+ "step_time": 19.47554490200855
1243
  },
1244
  {
1245
  "clip_ratio/high_max": 0.0,
 
1267
  "rewards/rollout_reward_func/mean": 0.0,
1268
  "rewards/rollout_reward_func/std": 0.0,
1269
  "step": 45,
1270
+ "step_time": 22.798208642001555
1271
  },
1272
  {
1273
  "clip_ratio/high_max": 0.0,
 
1295
  "rewards/rollout_reward_func/mean": 0.0,
1296
  "rewards/rollout_reward_func/std": 0.0,
1297
  "step": 46,
1298
+ "step_time": 19.88411584899586
1299
  },
1300
  {
1301
  "clip_ratio/high_max": 0.0,
 
1323
  "rewards/rollout_reward_func/mean": 0.0,
1324
  "rewards/rollout_reward_func/std": 0.0,
1325
  "step": 47,
1326
+ "step_time": 19.48703931599448
1327
  },
1328
  {
1329
  "clip_ratio/high_max": 0.0,
 
1351
  "rewards/rollout_reward_func/mean": 0.0,
1352
  "rewards/rollout_reward_func/std": 0.0,
1353
  "step": 48,
1354
+ "step_time": 22.090118679989246
1355
  },
1356
  {
1357
  "clip_ratio/high_max": 0.0,
 
1379
  "rewards/rollout_reward_func/mean": 0.0,
1380
  "rewards/rollout_reward_func/std": 0.0,
1381
  "step": 49,
1382
+ "step_time": 19.470153064998158
1383
  },
1384
  {
1385
  "clip_ratio/high_max": 0.0,
 
1407
  "rewards/rollout_reward_func/mean": 0.0,
1408
  "rewards/rollout_reward_func/std": 0.0,
1409
  "step": 50,
1410
+ "step_time": 22.60661829500168
1411
  },
1412
  {
1413
  "clip_ratio/high_max": 0.0,
 
1435
  "rewards/rollout_reward_func/mean": 0.0,
1436
  "rewards/rollout_reward_func/std": 0.0,
1437
  "step": 51,
1438
+ "step_time": 22.327632517990423
1439
  },
1440
  {
1441
  "clip_ratio/high_max": 0.0,
 
1463
  "rewards/rollout_reward_func/mean": 0.0,
1464
  "rewards/rollout_reward_func/std": 0.0,
1465
  "step": 52,
1466
+ "step_time": 19.20923549200961
1467
  },
1468
  {
1469
  "clip_ratio/high_max": 0.0,
 
1491
  "rewards/rollout_reward_func/mean": 0.0,
1492
  "rewards/rollout_reward_func/std": 0.0,
1493
  "step": 53,
1494
+ "step_time": 19.398160734999692
1495
  },
1496
  {
1497
  "clip_ratio/high_max": 0.0,
 
1519
  "rewards/rollout_reward_func/mean": 0.0,
1520
  "rewards/rollout_reward_func/std": 0.0,
1521
  "step": 54,
1522
+ "step_time": 19.338012945008813
1523
  },
1524
  {
1525
  "clip_ratio/high_max": 0.0,
 
1547
  "rewards/rollout_reward_func/mean": 0.0,
1548
  "rewards/rollout_reward_func/std": 0.0,
1549
  "step": 55,
1550
+ "step_time": 20.151991571001417
1551
  },
1552
  {
1553
  "clip_ratio/high_max": 0.0,
 
1575
  "rewards/rollout_reward_func/mean": 0.0,
1576
  "rewards/rollout_reward_func/std": 0.0,
1577
  "step": 56,
1578
+ "step_time": 22.507306526997127
1579
  },
1580
  {
1581
  "clip_ratio/high_max": 0.0,
 
1603
  "rewards/rollout_reward_func/mean": 0.0,
1604
  "rewards/rollout_reward_func/std": 0.0,
1605
  "step": 57,
1606
+ "step_time": 19.39536341799976
1607
  },
1608
  {
1609
  "clip_ratio/high_max": 0.0,
 
1631
  "rewards/rollout_reward_func/mean": 0.0,
1632
  "rewards/rollout_reward_func/std": 0.0,
1633
  "step": 58,
1634
+ "step_time": 19.44620426499023
1635
  },
1636
  {
1637
  "clip_ratio/high_max": 0.0,
 
1659
  "rewards/rollout_reward_func/mean": 0.0,
1660
  "rewards/rollout_reward_func/std": 0.0,
1661
  "step": 59,
1662
+ "step_time": 22.402232911990723
1663
  },
1664
  {
1665
  "clip_ratio/high_max": 0.0,
 
1687
  "rewards/rollout_reward_func/mean": 0.0,
1688
  "rewards/rollout_reward_func/std": 0.0,
1689
  "step": 60,
1690
+ "step_time": 19.969688828998187
1691
  },
1692
  {
1693
  "clip_ratio/high_max": 0.0,
 
1715
  "rewards/rollout_reward_func/mean": 0.0,
1716
  "rewards/rollout_reward_func/std": 0.0,
1717
  "step": 61,
1718
+ "step_time": 19.51650980200793
1719
  },
1720
  {
1721
  "clip_ratio/high_max": 0.0,
 
1743
  "rewards/rollout_reward_func/mean": 0.0,
1744
  "rewards/rollout_reward_func/std": 0.0,
1745
  "step": 62,
1746
+ "step_time": 21.935334763016726
1747
  },
1748
  {
1749
  "clip_ratio/high_max": 0.0,
 
1771
  "rewards/rollout_reward_func/mean": 0.0,
1772
  "rewards/rollout_reward_func/std": 0.0,
1773
  "step": 63,
1774
+ "step_time": 22.139962298009777
1775
  },
1776
  {
1777
  "clip_ratio/high_max": 0.0,
 
1799
  "rewards/rollout_reward_func/mean": 0.0,
1800
  "rewards/rollout_reward_func/std": 0.0,
1801
  "step": 64,
1802
+ "step_time": 21.979154282984382
1803
  },
1804
  {
1805
  "clip_ratio/high_max": 0.0,
 
1827
  "rewards/rollout_reward_func/mean": 0.0,
1828
  "rewards/rollout_reward_func/std": 0.0,
1829
  "step": 65,
1830
+ "step_time": 22.214402237004833
1831
  },
1832
  {
1833
  "clip_ratio/high_max": 0.0,
 
1855
  "rewards/rollout_reward_func/mean": 0.0,
1856
  "rewards/rollout_reward_func/std": 0.0,
1857
  "step": 66,
1858
+ "step_time": 19.304359686997486
1859
  },
1860
  {
1861
  "clip_ratio/high_max": 0.0,
 
1883
  "rewards/rollout_reward_func/mean": 0.0,
1884
  "rewards/rollout_reward_func/std": 0.0,
1885
  "step": 67,
1886
+ "step_time": 19.36013725100929
1887
  },
1888
  {
1889
  "clip_ratio/high_max": 0.0,
 
1911
  "rewards/rollout_reward_func/mean": 0.0,
1912
  "rewards/rollout_reward_func/std": 0.0,
1913
  "step": 68,
1914
+ "step_time": 21.965071903985518
1915
  },
1916
  {
1917
  "clip_ratio/high_max": 0.0,
 
1939
  "rewards/rollout_reward_func/mean": 0.0,
1940
  "rewards/rollout_reward_func/std": 0.0,
1941
  "step": 69,
1942
+ "step_time": 23.3430329200055
1943
  },
1944
  {
1945
  "clip_ratio/high_max": 0.0,
 
1967
  "rewards/rollout_reward_func/mean": 0.0,
1968
  "rewards/rollout_reward_func/std": 0.0,
1969
  "step": 70,
1970
+ "step_time": 19.697308761002205
1971
  },
1972
  {
1973
  "clip_ratio/high_max": 0.0,
 
1995
  "rewards/rollout_reward_func/mean": 0.0,
1996
  "rewards/rollout_reward_func/std": 0.0,
1997
  "step": 71,
1998
+ "step_time": 19.443207848002203
1999
  },
2000
  {
2001
  "clip_ratio/high_max": 0.0,
 
2023
  "rewards/rollout_reward_func/mean": 0.0,
2024
  "rewards/rollout_reward_func/std": 0.0,
2025
  "step": 72,
2026
+ "step_time": 20.01639660699584
2027
  },
2028
  {
2029
  "clip_ratio/high_max": 0.0,
 
2051
  "rewards/rollout_reward_func/mean": 0.0,
2052
  "rewards/rollout_reward_func/std": 0.0,
2053
  "step": 73,
2054
+ "step_time": 23.112452601002587
2055
  },
2056
  {
2057
  "clip_ratio/high_max": 0.0,
 
2079
  "rewards/rollout_reward_func/mean": 0.0,
2080
  "rewards/rollout_reward_func/std": 0.0,
2081
  "step": 74,
2082
+ "step_time": 23.19768616399233
2083
  },
2084
  {
2085
  "clip_ratio/high_max": 0.0,
 
2107
  "rewards/rollout_reward_func/mean": 0.0,
2108
  "rewards/rollout_reward_func/std": 0.0,
2109
  "step": 75,
2110
+ "step_time": 19.854602511004487
2111
  }
2112
  ],
2113
  "logging_steps": 1.0,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8d5567b1de128707b3851170b5e2c2ad0a2a8c83b32d343f3af280646938660
3
  size 7889
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1c9d78e6601bc52704e4c410160ac3431adbbc8e01e0bcc154083b11a67105e
3
  size 7889