File size: 39,476 Bytes
b45d5e3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.2070006035003018,
  "eval_steps": 250,
  "global_step": 2000,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.12125,
      "completions/max_length": 255.34,
      "completions/max_terminated_length": 252.14,
      "completions/mean_length": 221.534375,
      "completions/mean_terminated_length": 216.93697082519532,
      "completions/min_length": 173.54,
      "completions/min_terminated_length": 173.54,
      "entropy": 0.10048629969358444,
      "epoch": 0.030175015087507542,
      "frac_reward_zero_std": 0.3225,
      "grad_norm": 0.46380576491355896,
      "learning_rate": 5e-05,
      "loss": 0.004,
      "num_tokens": 8142396.0,
      "reward": 7.30375,
      "reward_std": 1.5006456315517425,
      "rewards/event_reward_fn/mean": 7.30375,
      "rewards/event_reward_fn/std": 6.278198585510254,
      "step": 50,
      "step_time": 40.824848868116966
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.068125,
      "completions/max_length": 251.74,
      "completions/max_terminated_length": 248.06,
      "completions/mean_length": 215.08625,
      "completions/mean_terminated_length": 212.25316284179686,
      "completions/min_length": 171.76,
      "completions/min_terminated_length": 171.76,
      "entropy": 0.10318506792187691,
      "epoch": 0.060350030175015085,
      "frac_reward_zero_std": 0.325,
      "grad_norm": 0.21978232264518738,
      "learning_rate": 5e-05,
      "loss": -0.0025,
      "num_tokens": 16421719.0,
      "reward": 7.36875,
      "reward_std": 1.3263894939422607,
      "rewards/event_reward_fn/mean": 7.36875,
      "rewards/event_reward_fn/std": 6.119045643806458,
      "step": 100,
      "step_time": 38.99798643006128
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.4825,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 251.32,
      "completions/mean_length": 238.104375,
      "completions/mean_terminated_length": 221.8957485961914,
      "completions/min_length": 191.34,
      "completions/min_terminated_length": 191.34,
      "entropy": 0.10444845259189606,
      "epoch": 0.09052504526252263,
      "frac_reward_zero_std": 0.2925,
      "grad_norm": 0.5579063892364502,
      "learning_rate": 5e-05,
      "loss": -0.0006,
      "num_tokens": 24885844.0,
      "reward": 7.74625,
      "reward_std": 1.5345598912239076,
      "rewards/event_reward_fn/mean": 7.74625,
      "rewards/event_reward_fn/std": 6.464660973548889,
      "step": 150,
      "step_time": 41.26081488572061
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.7925,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 202.92,
      "completions/mean_length": 245.916875,
      "completions/mean_terminated_length": 184.6587713623047,
      "completions/min_length": 199.94,
      "completions/min_terminated_length": 169.22,
      "entropy": 0.10581055819988251,
      "epoch": 0.12070006035003017,
      "frac_reward_zero_std": 0.33,
      "grad_norm": 0.31808722019195557,
      "learning_rate": 5e-05,
      "loss": 0.0003,
      "num_tokens": 33226966.0,
      "reward": 7.19125,
      "reward_std": 1.4298825466632843,
      "rewards/event_reward_fn/mean": 7.19125,
      "rewards/event_reward_fn/std": 5.8599746036529545,
      "step": 200,
      "step_time": 41.91275953448203
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.825,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 181.12,
      "completions/mean_length": 245.851875,
      "completions/mean_terminated_length": 163.72261688232422,
      "completions/min_length": 198.46,
      "completions/min_terminated_length": 152.38,
      "entropy": 0.10499135926365852,
      "epoch": 0.15087507543753773,
      "frac_reward_zero_std": 0.2875,
      "grad_norm": 0.2646925449371338,
      "learning_rate": 5e-05,
      "loss": 0.0005,
      "num_tokens": 41523308.0,
      "reward": 7.9475,
      "reward_std": 1.5300491595268249,
      "rewards/event_reward_fn/mean": 7.9475,
      "rewards/event_reward_fn/std": 6.3965685844421385,
      "step": 250,
      "step_time": 41.663273623897695
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.898125,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 149.62,
      "completions/mean_length": 250.625625,
      "completions/mean_terminated_length": 144.78653198242188,
      "completions/min_length": 215.68,
      "completions/min_terminated_length": 138.88,
      "entropy": 0.10884671121835708,
      "epoch": 0.18105009052504525,
      "frac_reward_zero_std": 0.3325,
      "grad_norm": 0.5418329834938049,
      "learning_rate": 5e-05,
      "loss": -0.0002,
      "num_tokens": 49889481.0,
      "reward": 7.489375,
      "reward_std": 1.5504147619009019,
      "rewards/event_reward_fn/mean": 7.489375,
      "rewards/event_reward_fn/std": 6.099679977893829,
      "step": 300,
      "step_time": 40.817094522019616
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.9275,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 131.48,
      "completions/mean_length": 253.1625,
      "completions/mean_terminated_length": 125.53590209960937,
      "completions/min_length": 228.04,
      "completions/min_terminated_length": 120.52,
      "entropy": 0.10796756476163864,
      "epoch": 0.2112251056125528,
      "frac_reward_zero_std": 0.3175,
      "grad_norm": 0.4433981776237488,
      "learning_rate": 5e-05,
      "loss": 0.0019,
      "num_tokens": 58206892.0,
      "reward": 7.89625,
      "reward_std": 1.573977051973343,
      "rewards/event_reward_fn/mean": 7.89625,
      "rewards/event_reward_fn/std": 6.586006484031677,
      "step": 350,
      "step_time": 42.12015992245928
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.945625,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 151.42,
      "completions/mean_length": 254.76625,
      "completions/mean_terminated_length": 146.34000091552736,
      "completions/min_length": 238.42,
      "completions/min_terminated_length": 141.14,
      "entropy": 0.11530103281140328,
      "epoch": 0.24140012070006034,
      "frac_reward_zero_std": 0.29,
      "grad_norm": 0.3932775855064392,
      "learning_rate": 5e-05,
      "loss": 0.0001,
      "num_tokens": 66513664.0,
      "reward": 7.304375,
      "reward_std": 1.552179645895958,
      "rewards/event_reward_fn/mean": 7.304375,
      "rewards/event_reward_fn/std": 5.687906408309937,
      "step": 400,
      "step_time": 40.78123372233997
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.92375,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 185.1,
      "completions/mean_length": 254.35875,
      "completions/mean_terminated_length": 178.61883544921875,
      "completions/min_length": 232.56,
      "completions/min_terminated_length": 171.12,
      "entropy": 0.13443249970674515,
      "epoch": 0.27157513578756787,
      "frac_reward_zero_std": 0.315,
      "grad_norm": 0.2284364551305771,
      "learning_rate": 5e-05,
      "loss": -0.0013,
      "num_tokens": 74493599.0,
      "reward": 7.766875,
      "reward_std": 1.5890911322832109,
      "rewards/event_reward_fn/mean": 7.766875,
      "rewards/event_reward_fn/std": 6.074563751220703,
      "step": 450,
      "step_time": 40.8964025861409
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.983125,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 73.22,
      "completions/mean_length": 255.728125,
      "completions/mean_terminated_length": 72.48666687011719,
      "completions/min_length": 250.14,
      "completions/min_terminated_length": 70.94,
      "entropy": 0.1348781806230545,
      "epoch": 0.30175015087507545,
      "frac_reward_zero_std": 0.32,
      "grad_norm": 0.44683775305747986,
      "learning_rate": 5e-05,
      "loss": 0.0006,
      "num_tokens": 82766712.0,
      "reward": 7.835625,
      "reward_std": 1.6530324041843414,
      "rewards/event_reward_fn/mean": 7.835625,
      "rewards/event_reward_fn/std": 6.139980282783508,
      "step": 500,
      "step_time": 41.13054014526191
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.996875,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 24.36,
      "completions/mean_length": 255.96125,
      "completions/mean_terminated_length": 24.36,
      "completions/min_length": 254.76,
      "completions/min_terminated_length": 24.36,
      "entropy": 0.13759294494986535,
      "epoch": 0.331925165962583,
      "frac_reward_zero_std": 0.265,
      "grad_norm": 0.40625813603401184,
      "learning_rate": 5e-05,
      "loss": -0.0,
      "num_tokens": 91249822.0,
      "reward": 7.55375,
      "reward_std": 1.706419097185135,
      "rewards/event_reward_fn/mean": 7.55375,
      "rewards/event_reward_fn/std": 5.799948143959045,
      "step": 550,
      "step_time": 42.41818935459829
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.9975,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 15.02,
      "completions/mean_length": 255.981875,
      "completions/mean_terminated_length": 14.98,
      "completions/min_length": 255.58,
      "completions/min_terminated_length": 14.94,
      "entropy": 0.135696639418602,
      "epoch": 0.3621001810500905,
      "frac_reward_zero_std": 0.27,
      "grad_norm": 0.34435781836509705,
      "learning_rate": 5e-05,
      "loss": 0.0,
      "num_tokens": 99526582.0,
      "reward": 8.249375,
      "reward_std": 1.7093309688568115,
      "rewards/event_reward_fn/mean": 8.249375,
      "rewards/event_reward_fn/std": 6.437053818702697,
      "step": 600,
      "step_time": 42.50718544923991
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 0.14394851058721542,
      "epoch": 0.3922751961375981,
      "frac_reward_zero_std": 0.3125,
      "grad_norm": 0.2780283987522125,
      "learning_rate": 5e-05,
      "loss": 0.0,
      "num_tokens": 107729490.0,
      "reward": 8.115625,
      "reward_std": 1.5425574934482575,
      "rewards/event_reward_fn/mean": 8.115625,
      "rewards/event_reward_fn/std": 6.014267163276672,
      "step": 650,
      "step_time": 60.873589005278774
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 0.1508351384103298,
      "epoch": 0.4224502112251056,
      "frac_reward_zero_std": 0.275,
      "grad_norm": 0.21461114287376404,
      "learning_rate": 5e-05,
      "loss": 0.0,
      "num_tokens": 116090467.0,
      "reward": 8.166875,
      "reward_std": 1.743121521472931,
      "rewards/event_reward_fn/mean": 8.166875,
      "rewards/event_reward_fn/std": 6.155384964942932,
      "step": 700,
      "step_time": 41.52520179868036
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.99875,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 9.66,
      "completions/mean_length": 255.981875,
      "completions/mean_terminated_length": 9.66,
      "completions/min_length": 255.42,
      "completions/min_terminated_length": 9.66,
      "entropy": 0.16852732509374618,
      "epoch": 0.45262522631261315,
      "frac_reward_zero_std": 0.2975,
      "grad_norm": 0.30243417620658875,
      "learning_rate": 5e-05,
      "loss": 0.0,
      "num_tokens": 124432282.0,
      "reward": 7.599375,
      "reward_std": 1.4733531725406648,
      "rewards/event_reward_fn/mean": 7.599375,
      "rewards/event_reward_fn/std": 5.72325975894928,
      "step": 750,
      "step_time": 45.15769186520076
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 0.18111263811588288,
      "epoch": 0.4828002414001207,
      "frac_reward_zero_std": 0.28,
      "grad_norm": 0.3852519989013672,
      "learning_rate": 5e-05,
      "loss": 0.0,
      "num_tokens": 132857345.0,
      "reward": 8.109375,
      "reward_std": 1.542456374168396,
      "rewards/event_reward_fn/mean": 8.109375,
      "rewards/event_reward_fn/std": 5.9935719728469845,
      "step": 800,
      "step_time": 42.812528482141204
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.99875,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 10.0,
      "completions/mean_length": 255.9925,
      "completions/mean_terminated_length": 10.0,
      "completions/min_length": 255.76,
      "completions/min_terminated_length": 10.0,
      "entropy": 0.21682359665632248,
      "epoch": 0.5129752564876282,
      "frac_reward_zero_std": 0.2425,
      "grad_norm": 0.34055572748184204,
      "learning_rate": 5e-05,
      "loss": -0.0001,
      "num_tokens": 141348381.0,
      "reward": 7.83125,
      "reward_std": 1.7194657081365585,
      "rewards/event_reward_fn/mean": 7.83125,
      "rewards/event_reward_fn/std": 6.176298160552978,
      "step": 850,
      "step_time": 42.62233325715526
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.960625,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 101.5,
      "completions/mean_length": 254.8675,
      "completions/mean_terminated_length": 95.82615142822266,
      "completions/min_length": 237.06,
      "completions/min_terminated_length": 88.58,
      "entropy": 1.2534486263990403,
      "epoch": 0.5431502715751357,
      "frac_reward_zero_std": 0.22,
      "grad_norm": 1.9650917053222656,
      "learning_rate": 5e-05,
      "loss": 0.0044,
      "num_tokens": 149758372.0,
      "reward": 7.47875,
      "reward_std": 1.8692259776592255,
      "rewards/event_reward_fn/mean": 7.47875,
      "rewards/event_reward_fn/std": 5.956067395210266,
      "step": 900,
      "step_time": 41.407225477900354
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.944375,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 140.22,
      "completions/mean_length": 253.330625,
      "completions/mean_terminated_length": 132.0480009460449,
      "completions/min_length": 209.16,
      "completions/min_terminated_length": 122.12,
      "entropy": 2.884652135372162,
      "epoch": 0.5733252866626434,
      "frac_reward_zero_std": 0.3375,
      "grad_norm": 2.167388916015625,
      "learning_rate": 5e-05,
      "loss": -0.0175,
      "num_tokens": 158149273.0,
      "reward": 6.6125,
      "reward_std": 1.6964978063106537,
      "rewards/event_reward_fn/mean": 6.6125,
      "rewards/event_reward_fn/std": 6.0062398338317875,
      "step": 950,
      "step_time": 41.2563528472418
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.994375,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 31.2,
      "completions/mean_length": 255.541875,
      "completions/mean_terminated_length": 31.16,
      "completions/min_length": 246.16,
      "completions/min_terminated_length": 31.12,
      "entropy": 1.2761903527379035,
      "epoch": 0.6035003017501509,
      "frac_reward_zero_std": 0.3825,
      "grad_norm": 0.4628017842769623,
      "learning_rate": 5e-05,
      "loss": -0.0033,
      "num_tokens": 166538998.0,
      "reward": 6.94875,
      "reward_std": 1.378657329082489,
      "rewards/event_reward_fn/mean": 6.94875,
      "rewards/event_reward_fn/std": 6.216048922538757,
      "step": 1000,
      "step_time": 42.322889749883906
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.984375,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 61.4,
      "completions/mean_length": 255.64625,
      "completions/mean_terminated_length": 60.46533386230469,
      "completions/min_length": 248.46,
      "completions/min_terminated_length": 59.02,
      "entropy": 0.4890740931034088,
      "epoch": 0.6336753168376584,
      "frac_reward_zero_std": 0.395,
      "grad_norm": 0.33829060196876526,
      "learning_rate": 5e-05,
      "loss": -0.0003,
      "num_tokens": 174874216.0,
      "reward": 6.793125,
      "reward_std": 1.1867496293783188,
      "rewards/event_reward_fn/mean": 6.793125,
      "rewards/event_reward_fn/std": 5.404484539031983,
      "step": 1050,
      "step_time": 42.91726479450008
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.993125,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 28.78,
      "completions/mean_length": 255.443125,
      "completions/mean_terminated_length": 28.65,
      "completions/min_length": 243.56,
      "completions/min_terminated_length": 28.52,
      "entropy": 0.7336188541352748,
      "epoch": 0.663850331925166,
      "frac_reward_zero_std": 0.3925,
      "grad_norm": 0.5986895561218262,
      "learning_rate": 5e-05,
      "loss": -0.0053,
      "num_tokens": 183293167.0,
      "reward": 6.371875,
      "reward_std": 1.27341972053051,
      "rewards/event_reward_fn/mean": 6.371875,
      "rewards/event_reward_fn/std": 5.160589256286621,
      "step": 1100,
      "step_time": 42.857495513077595
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.99875,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 8.74,
      "completions/mean_length": 255.953125,
      "completions/mean_terminated_length": 8.74,
      "completions/min_length": 254.5,
      "completions/min_terminated_length": 8.74,
      "entropy": 0.41359326869249347,
      "epoch": 0.6940253470126735,
      "frac_reward_zero_std": 0.365,
      "grad_norm": 0.714463472366333,
      "learning_rate": 5e-05,
      "loss": 0.0001,
      "num_tokens": 191713508.0,
      "reward": 6.96625,
      "reward_std": 1.294020129442215,
      "rewards/event_reward_fn/mean": 6.96625,
      "rewards/event_reward_fn/std": 5.736661648750305,
      "step": 1150,
      "step_time": 41.747385050542654
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.98875,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 64.84,
      "completions/mean_length": 255.530625,
      "completions/mean_terminated_length": 64.83,
      "completions/min_length": 244.02,
      "completions/min_terminated_length": 64.82,
      "entropy": 0.7842025232315063,
      "epoch": 0.724200362100181,
      "frac_reward_zero_std": 0.3825,
      "grad_norm": 2.366915225982666,
      "learning_rate": 5e-05,
      "loss": 0.0001,
      "num_tokens": 199636639.0,
      "reward": 7.296875,
      "reward_std": 1.2271459007263184,
      "rewards/event_reward_fn/mean": 7.296875,
      "rewards/event_reward_fn/std": 5.948999562263489,
      "step": 1200,
      "step_time": 41.03189678700059
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.99875,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 8.64,
      "completions/mean_length": 255.95,
      "completions/mean_terminated_length": 8.64,
      "completions/min_length": 254.4,
      "completions/min_terminated_length": 8.64,
      "entropy": 1.2313472920656203,
      "epoch": 0.7543753771876885,
      "frac_reward_zero_std": 0.35,
      "grad_norm": 2.256929636001587,
      "learning_rate": 5e-05,
      "loss": 0.0001,
      "num_tokens": 207997095.0,
      "reward": 6.69375,
      "reward_std": 1.492494255900383,
      "rewards/event_reward_fn/mean": 6.69375,
      "rewards/event_reward_fn/std": 5.9429325056076046,
      "step": 1250,
      "step_time": 42.135132130276176
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.98875,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 45.68,
      "completions/mean_length": 255.489375,
      "completions/mean_terminated_length": 45.05666687011719,
      "completions/min_length": 244.34,
      "completions/min_terminated_length": 44.66,
      "entropy": 1.172162665054202,
      "epoch": 0.7845503922751962,
      "frac_reward_zero_std": 0.36,
      "grad_norm": 0.8358303904533386,
      "learning_rate": 5e-05,
      "loss": -0.0024,
      "num_tokens": 216307264.0,
      "reward": 6.914375,
      "reward_std": 1.4417870903015138,
      "rewards/event_reward_fn/mean": 6.914375,
      "rewards/event_reward_fn/std": 5.760623688697815,
      "step": 1300,
      "step_time": 41.30298829050036
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.996875,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 18.28,
      "completions/mean_length": 255.91,
      "completions/mean_terminated_length": 18.09,
      "completions/min_length": 253.42,
      "completions/min_terminated_length": 17.9,
      "entropy": 1.4694241133332253,
      "epoch": 0.8147254073627037,
      "frac_reward_zero_std": 0.34,
      "grad_norm": 6.394040107727051,
      "learning_rate": 5e-05,
      "loss": -0.0003,
      "num_tokens": 224510279.0,
      "reward": 6.273125,
      "reward_std": 1.626619552373886,
      "rewards/event_reward_fn/mean": 6.273125,
      "rewards/event_reward_fn/std": 5.248045358657837,
      "step": 1350,
      "step_time": 40.860976390804865
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 2.0628131467103956,
      "epoch": 0.8449004224502112,
      "frac_reward_zero_std": 0.3375,
      "grad_norm": 1.5476884841918945,
      "learning_rate": 5e-05,
      "loss": 0.0,
      "num_tokens": 232969228.0,
      "reward": 6.45875,
      "reward_std": 1.7393629193305968,
      "rewards/event_reward_fn/mean": 6.45875,
      "rewards/event_reward_fn/std": 5.68415337562561,
      "step": 1400,
      "step_time": 43.151147363660854
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.999375,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.04,
      "completions/mean_length": 255.84125,
      "completions/mean_terminated_length": 0.04,
      "completions/min_length": 250.92,
      "completions/min_terminated_length": 0.04,
      "entropy": 1.3854397583007811,
      "epoch": 0.8750754375377188,
      "frac_reward_zero_std": 0.3375,
      "grad_norm": 3.6241378784179688,
      "learning_rate": 5e-05,
      "loss": -0.001,
      "num_tokens": 241277888.0,
      "reward": 6.719375,
      "reward_std": 1.752496111392975,
      "rewards/event_reward_fn/mean": 6.719375,
      "rewards/event_reward_fn/std": 6.006656441688538,
      "step": 1450,
      "step_time": 41.94062339906115
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.994375,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 6.86,
      "completions/mean_length": 254.820625,
      "completions/mean_terminated_length": 6.8333333349227905,
      "completions/min_length": 237.22,
      "completions/min_terminated_length": 6.82,
      "entropy": 1.1917432191967965,
      "epoch": 0.9052504526252263,
      "frac_reward_zero_std": 0.36,
      "grad_norm": 7.937113285064697,
      "learning_rate": 5e-05,
      "loss": -0.0277,
      "num_tokens": 249570792.0,
      "reward": 6.343125,
      "reward_std": 1.5732547068595886,
      "rewards/event_reward_fn/mean": 6.343125,
      "rewards/event_reward_fn/std": 5.813223929405212,
      "step": 1500,
      "step_time": 43.07453210723819
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.926875,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 131.5,
      "completions/mean_length": 253.175,
      "completions/mean_terminated_length": 125.9899688720703,
      "completions/min_length": 228.9,
      "completions/min_terminated_length": 121.38,
      "entropy": 0.5228479199111462,
      "epoch": 0.9354254677127338,
      "frac_reward_zero_std": 0.3875,
      "grad_norm": 1.1253968477249146,
      "learning_rate": 5e-05,
      "loss": 0.0002,
      "num_tokens": 257734094.0,
      "reward": 7.513125,
      "reward_std": 1.2554410457611085,
      "rewards/event_reward_fn/mean": 7.513125,
      "rewards/event_reward_fn/std": 5.942786226272583,
      "step": 1550,
      "step_time": 42.567975415034454
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.926875,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 131.88,
      "completions/mean_length": 253.125625,
      "completions/mean_terminated_length": 128.5973336791992,
      "completions/min_length": 223.42,
      "completions/min_terminated_length": 126.14,
      "entropy": 0.8324106151610613,
      "epoch": 0.9656004828002414,
      "frac_reward_zero_std": 0.35,
      "grad_norm": 0.8266918659210205,
      "learning_rate": 5e-05,
      "loss": -0.0003,
      "num_tokens": 266238921.0,
      "reward": 6.545625,
      "reward_std": 1.3680988204479219,
      "rewards/event_reward_fn/mean": 6.545625,
      "rewards/event_reward_fn/std": 5.431567845344543,
      "step": 1600,
      "step_time": 42.35816644520266
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.979375,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 52.06,
      "completions/mean_length": 255.3625,
      "completions/mean_terminated_length": 50.906666870117185,
      "completions/min_length": 244.64,
      "completions/min_terminated_length": 50.08,
      "entropy": 1.4035920506715776,
      "epoch": 0.995775497887749,
      "frac_reward_zero_std": 0.3475,
      "grad_norm": 11.164112091064453,
      "learning_rate": 5e-05,
      "loss": -0.0041,
      "num_tokens": 274713769.0,
      "reward": 6.30375,
      "reward_std": 1.4735591614246368,
      "rewards/event_reward_fn/mean": 6.30375,
      "rewards/event_reward_fn/std": 5.667113132476807,
      "step": 1650,
      "step_time": 41.3417172247381
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.99875,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 4.16,
      "completions/mean_length": 255.81,
      "completions/mean_terminated_length": 4.16,
      "completions/min_length": 249.92,
      "completions/min_terminated_length": 4.16,
      "entropy": 1.0942185708135366,
      "epoch": 1.0259505129752564,
      "frac_reward_zero_std": 0.4125,
      "grad_norm": 2.0238513946533203,
      "learning_rate": 5e-05,
      "loss": -0.0042,
      "num_tokens": 282976766.0,
      "reward": 7.19375,
      "reward_std": 1.4945010322332382,
      "rewards/event_reward_fn/mean": 7.19375,
      "rewards/event_reward_fn/std": 5.62495879650116,
      "step": 1700,
      "step_time": 41.58126259226352
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.995,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 27.9,
      "completions/mean_length": 255.874375,
      "completions/mean_terminated_length": 27.873333435058594,
      "completions/min_length": 253.14,
      "completions/min_terminated_length": 27.86,
      "entropy": 0.6756551740318537,
      "epoch": 1.056125528062764,
      "frac_reward_zero_std": 0.4125,
      "grad_norm": 0.9028272032737732,
      "learning_rate": 5e-05,
      "loss": 0.0002,
      "num_tokens": 291424307.0,
      "reward": 6.863125,
      "reward_std": 1.4053943872451782,
      "rewards/event_reward_fn/mean": 6.863125,
      "rewards/event_reward_fn/std": 5.494638476371765,
      "step": 1750,
      "step_time": 41.3957015178015
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.985,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 71.64,
      "completions/mean_length": 255.53375,
      "completions/mean_terminated_length": 71.1,
      "completions/min_length": 244.52,
      "completions/min_terminated_length": 70.44,
      "entropy": 0.8187148047238588,
      "epoch": 1.0863005431502715,
      "frac_reward_zero_std": 0.3525,
      "grad_norm": 4.235013484954834,
      "learning_rate": 5e-05,
      "loss": -0.0009,
      "num_tokens": 300026957.0,
      "reward": 7.29375,
      "reward_std": 1.5134036219120026,
      "rewards/event_reward_fn/mean": 7.29375,
      "rewards/event_reward_fn/std": 6.304830470085144,
      "step": 1800,
      "step_time": 42.75003189910087
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.9275,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 154.7,
      "completions/mean_length": 253.389375,
      "completions/mean_terminated_length": 145.82189025878907,
      "completions/min_length": 221.36,
      "completions/min_terminated_length": 134.32,
      "entropy": 0.8770341634750366,
      "epoch": 1.1164755582377792,
      "frac_reward_zero_std": 0.42,
      "grad_norm": 1.056412696838379,
      "learning_rate": 5e-05,
      "loss": 0.0016,
      "num_tokens": 308294562.0,
      "reward": 7.195,
      "reward_std": 1.413882914185524,
      "rewards/event_reward_fn/mean": 7.195,
      "rewards/event_reward_fn/std": 5.875013728141784,
      "step": 1850,
      "step_time": 42.22391830024426
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.9925,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 50.76,
      "completions/mean_length": 255.740625,
      "completions/mean_terminated_length": 49.8,
      "completions/min_length": 248.52,
      "completions/min_terminated_length": 48.84,
      "entropy": 0.7402717351168394,
      "epoch": 1.1466505733252867,
      "frac_reward_zero_std": 0.3725,
      "grad_norm": 1.3885732889175415,
      "learning_rate": 5e-05,
      "loss": 0.0007,
      "num_tokens": 316652055.0,
      "reward": 7.290625,
      "reward_std": 1.5660697519779205,
      "rewards/event_reward_fn/mean": 7.290625,
      "rewards/event_reward_fn/std": 6.019521760940552,
      "step": 1900,
      "step_time": 42.37986288452754
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.99375,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 35.46,
      "completions/mean_length": 255.766875,
      "completions/mean_terminated_length": 35.0,
      "completions/min_length": 249.58,
      "completions/min_terminated_length": 34.54,
      "entropy": 1.1067684018611907,
      "epoch": 1.1768255884127943,
      "frac_reward_zero_std": 0.3925,
      "grad_norm": 1.0810959339141846,
      "learning_rate": 5e-05,
      "loss": 0.0014,
      "num_tokens": 325016448.0,
      "reward": 6.665,
      "reward_std": 1.538350248336792,
      "rewards/event_reward_fn/mean": 6.665,
      "rewards/event_reward_fn/std": 6.030116739273072,
      "step": 1950,
      "step_time": 40.92805422256584
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.990625,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 63.46,
      "completions/mean_length": 255.716875,
      "completions/mean_terminated_length": 63.21,
      "completions/min_length": 247.28,
      "completions/min_terminated_length": 62.96,
      "entropy": 0.7298687703162432,
      "epoch": 1.2070006035003018,
      "frac_reward_zero_std": 0.3375,
      "grad_norm": 0.858845591545105,
      "learning_rate": 5e-05,
      "loss": 0.0002,
      "num_tokens": 333296057.0,
      "reward": 7.73125,
      "reward_std": 1.5106933176517487,
      "rewards/event_reward_fn/mean": 7.73125,
      "rewards/event_reward_fn/std": 6.448639197349548,
      "step": 2000,
      "step_time": 41.15218346009729
    }
  ],
  "logging_steps": 50,
  "max_steps": 16570,
  "num_input_tokens_seen": 333296057,
  "num_train_epochs": 10,
  "save_steps": 250,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 0.0,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}