File size: 74,166 Bytes
18280ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 2.998109640831758,
  "eval_steps": 51,
  "global_step": 198,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.15104166666666666,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 975.3333333333334,
      "completions/mean_length": 312.0329996744792,
      "completions/mean_terminated_length": 185.89303080240884,
      "completions/min_length": 28.333333333333332,
      "completions/min_terminated_length": 28.333333333333332,
      "epoch": 0.045368620037807186,
      "grad_norm": 0.14972379803657532,
      "kl": 4.560748736063639e-05,
      "learning_rate": 4e-07,
      "loss": -0.0081,
      "num_tokens": 942182.0,
      "reward": 0.37008477250734967,
      "reward_std": 0.11998833467562993,
      "rewards/get_embedding_sim/mean": 0.3440430959065755,
      "rewards/get_embedding_sim/std": 0.06710867583751678,
      "rewards/reward_num_unique_chars/mean": 0.026041666666666668,
      "rewards/reward_num_unique_chars/std": 0.14761295169591904,
      "step": 3
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.13020833333333334,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 959.0,
      "completions/mean_length": 307.0069580078125,
      "completions/mean_terminated_length": 199.09521484375,
      "completions/min_length": 10.333333333333334,
      "completions/min_terminated_length": 10.333333333333334,
      "epoch": 0.09073724007561437,
      "grad_norm": 0.12008437514305115,
      "kl": 0.0001388813058535258,
      "learning_rate": 1e-06,
      "loss": 0.035,
      "num_tokens": 1882942.0,
      "reward": 0.4796616733074188,
      "reward_std": 0.214401513338089,
      "rewards/get_embedding_sim/mean": 0.3694185713926951,
      "rewards/get_embedding_sim/std": 0.07585694640874863,
      "rewards/reward_num_unique_chars/mean": 0.1102430559694767,
      "rewards/reward_num_unique_chars/std": 0.2982482860485713,
      "step": 6
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.08072916666666667,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 845.0,
      "completions/mean_length": 234.67969258626303,
      "completions/mean_terminated_length": 166.36500040690103,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "epoch": 0.13610586011342155,
      "grad_norm": 0.08606597781181335,
      "kl": 0.00013801626240213713,
      "learning_rate": 1e-06,
      "loss": 0.0172,
      "num_tokens": 2735293.0,
      "reward": 0.39071526130040485,
      "reward_std": 0.1662569542725881,
      "rewards/get_embedding_sim/mean": 0.33168746034304303,
      "rewards/get_embedding_sim/std": 0.07500659177700679,
      "rewards/reward_num_unique_chars/mean": 0.059027779226501785,
      "rewards/reward_num_unique_chars/std": 0.22509141763051352,
      "step": 9
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.10503472222222225,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 983.3333333333334,
      "completions/mean_length": 262.79688517252606,
      "completions/mean_terminated_length": 173.54302469889322,
      "completions/min_length": 12.0,
      "completions/min_terminated_length": 12.0,
      "epoch": 0.18147448015122875,
      "grad_norm": 0.11949238181114197,
      "kl": 0.00030877192815144855,
      "learning_rate": 1e-06,
      "loss": -0.0001,
      "num_tokens": 3627859.0,
      "reward": 0.4095470607280731,
      "reward_std": 0.18979967882235846,
      "rewards/get_embedding_sim/mean": 0.33055397868156433,
      "rewards/get_embedding_sim/std": 0.07462155818939209,
      "rewards/reward_num_unique_chars/mean": 0.07899305472771327,
      "rewards/reward_num_unique_chars/std": 0.25569593409697217,
      "step": 12
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1362847222222222,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 991.6666666666666,
      "completions/mean_length": 316.6762288411458,
      "completions/mean_terminated_length": 204.85944112141928,
      "completions/min_length": 9.666666666666666,
      "completions/min_terminated_length": 9.666666666666666,
      "epoch": 0.22684310018903592,
      "grad_norm": 0.16435399651527405,
      "kl": 0.0005876521269480387,
      "learning_rate": 1e-06,
      "loss": 0.0529,
      "num_tokens": 4554894.0,
      "reward": 0.4522427221139272,
      "reward_std": 0.205996572971344,
      "rewards/get_embedding_sim/mean": 0.35502047340075177,
      "rewards/get_embedding_sim/std": 0.076506607234478,
      "rewards/reward_num_unique_chars/mean": 0.09722222139437993,
      "rewards/reward_num_unique_chars/std": 0.27809616923332214,
      "step": 15
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.11718750000000004,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 927.0,
      "completions/mean_length": 283.77171834309894,
      "completions/mean_terminated_length": 184.45149739583334,
      "completions/min_length": 24.666666666666668,
      "completions/min_terminated_length": 24.666666666666668,
      "epoch": 0.2722117202268431,
      "grad_norm": 0.17904439568519592,
      "kl": 0.0004306634267171224,
      "learning_rate": 1e-06,
      "loss": 0.036,
      "num_tokens": 5464567.0,
      "reward": 0.47324784596761066,
      "reward_std": 0.2480545292297999,
      "rewards/get_embedding_sim/mean": 0.35345616936683655,
      "rewards/get_embedding_sim/std": 0.08570993691682816,
      "rewards/reward_num_unique_chars/mean": 0.11979166915019353,
      "rewards/reward_num_unique_chars/std": 0.32309961318969727,
      "step": 18
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.07204861111111112,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 1011.0,
      "completions/mean_length": 230.54688517252603,
      "completions/mean_terminated_length": 169.21332804361978,
      "completions/min_length": 12.333333333333334,
      "completions/min_terminated_length": 12.333333333333334,
      "epoch": 0.31758034026465026,
      "grad_norm": 0.11123450100421906,
      "kl": 0.0011239051818847656,
      "learning_rate": 1e-06,
      "loss": 0.0222,
      "num_tokens": 6313117.0,
      "reward": 0.4715224802494049,
      "reward_std": 0.2366275986035665,
      "rewards/get_embedding_sim/mean": 0.3491266171137492,
      "rewards/get_embedding_sim/std": 0.06465367351969083,
      "rewards/reward_num_unique_chars/mean": 0.1223958358168602,
      "rewards/reward_num_unique_chars/std": 0.3250391185283661,
      "step": 21
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.09895833333333337,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 961.0,
      "completions/mean_length": 229.36719258626303,
      "completions/mean_terminated_length": 142.55723571777344,
      "completions/min_length": 15.0,
      "completions/min_terminated_length": 15.0,
      "epoch": 0.3629489603024575,
      "grad_norm": 0.118320994079113,
      "kl": 0.0019257068634033203,
      "learning_rate": 1e-06,
      "loss": 0.0158,
      "num_tokens": 7162132.0,
      "reward": 0.5189645787080129,
      "reward_std": 0.24159842729568481,
      "rewards/get_embedding_sim/mean": 0.3809437155723572,
      "rewards/get_embedding_sim/std": 0.0799456536769867,
      "rewards/reward_num_unique_chars/mean": 0.13802083084980646,
      "rewards/reward_num_unique_chars/std": 0.3419287900129954,
      "step": 24
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.052951388888888874,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 982.3333333333334,
      "completions/mean_length": 197.44445292154947,
      "completions/mean_terminated_length": 151.25631205240884,
      "completions/min_length": 14.666666666666666,
      "completions/min_terminated_length": 14.666666666666666,
      "epoch": 0.40831758034026466,
      "grad_norm": 0.11851406842470169,
      "kl": 0.002936681111653646,
      "learning_rate": 1e-06,
      "loss": 0.0317,
      "num_tokens": 7973172.0,
      "reward": 0.569815476735433,
      "reward_std": 0.25512967507044476,
      "rewards/get_embedding_sim/mean": 0.362350195646286,
      "rewards/get_embedding_sim/std": 0.07909337679545085,
      "rewards/reward_num_unique_chars/mean": 0.2074652761220932,
      "rewards/reward_num_unique_chars/std": 0.4044720729192098,
      "step": 27
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.056423611111111126,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 934.3333333333334,
      "completions/mean_length": 211.5963592529297,
      "completions/mean_terminated_length": 163.11248270670572,
      "completions/min_length": 16.333333333333332,
      "completions/min_terminated_length": 16.333333333333332,
      "epoch": 0.45368620037807184,
      "grad_norm": 0.21573348343372345,
      "kl": 0.008742332458496094,
      "learning_rate": 1e-06,
      "loss": 0.0125,
      "num_tokens": 8794371.0,
      "reward": 0.43826034665107727,
      "reward_std": 0.20837691922982535,
      "rewards/get_embedding_sim/mean": 0.3427741924921672,
      "rewards/get_embedding_sim/std": 0.0719177375237147,
      "rewards/reward_num_unique_chars/mean": 0.09548610945542653,
      "rewards/reward_num_unique_chars/std": 0.2681623448928197,
      "step": 30
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.056423611111111126,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 892.0,
      "completions/mean_length": 197.90365091959634,
      "completions/mean_terminated_length": 148.50442504882812,
      "completions/min_length": 15.0,
      "completions/min_terminated_length": 15.0,
      "epoch": 0.499054820415879,
      "grad_norm": 0.08199404180049896,
      "kl": 0.005775133768717448,
      "learning_rate": 1e-06,
      "loss": 0.0113,
      "num_tokens": 9601812.0,
      "reward": 0.45480871200561523,
      "reward_std": 0.2194500764211019,
      "rewards/get_embedding_sim/mean": 0.36192673444747925,
      "rewards/get_embedding_sim/std": 0.0750991627573967,
      "rewards/reward_num_unique_chars/mean": 0.0928819440305233,
      "rewards/reward_num_unique_chars/std": 0.2857237259546916,
      "step": 33
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.053819444444444454,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 873.3333333333334,
      "completions/mean_length": 201.65104166666666,
      "completions/mean_terminated_length": 154.5564727783203,
      "completions/min_length": 10.666666666666666,
      "completions/min_terminated_length": 10.666666666666666,
      "epoch": 0.5444234404536862,
      "grad_norm": 0.13542793691158295,
      "kl": 0.011366526285807291,
      "learning_rate": 1e-06,
      "loss": -0.0008,
      "num_tokens": 10414722.0,
      "reward": 0.4134095311164856,
      "reward_std": 0.16343241184949875,
      "rewards/get_embedding_sim/mean": 0.3708748022715251,
      "rewards/get_embedding_sim/std": 0.08833041042089462,
      "rewards/reward_num_unique_chars/mean": 0.042534722636143364,
      "rewards/reward_num_unique_chars/std": 0.1979833443959554,
      "step": 36
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.04340277777777779,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 891.6666666666666,
      "completions/mean_length": 176.1076456705729,
      "completions/mean_terminated_length": 137.35225423177084,
      "completions/min_length": 11.0,
      "completions/min_terminated_length": 11.0,
      "epoch": 0.5897920604914934,
      "grad_norm": 1.7642544507980347,
      "kl": 0.151151974995931,
      "learning_rate": 1e-06,
      "loss": 0.0179,
      "num_tokens": 11207422.0,
      "reward": 0.5713514387607574,
      "reward_std": 0.26335498690605164,
      "rewards/get_embedding_sim/mean": 0.36909447113672894,
      "rewards/get_embedding_sim/std": 0.09187572946151097,
      "rewards/reward_num_unique_chars/mean": 0.202256940305233,
      "rewards/reward_num_unique_chars/std": 0.390445997317632,
      "step": 39
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.032986111111111126,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 826.3333333333334,
      "completions/mean_length": 201.41754150390625,
      "completions/mean_terminated_length": 173.10018412272134,
      "completions/min_length": 18.333333333333332,
      "completions/min_terminated_length": 18.333333333333332,
      "epoch": 0.6351606805293005,
      "grad_norm": 0.10251538455486298,
      "kl": 0.014621734619140625,
      "learning_rate": 1e-06,
      "loss": 0.0102,
      "num_tokens": 12029279.0,
      "reward": 0.5142592787742615,
      "reward_std": 0.2620675365130107,
      "rewards/get_embedding_sim/mean": 0.3701620002587636,
      "rewards/get_embedding_sim/std": 0.10092929750680923,
      "rewards/reward_num_unique_chars/mean": 0.1440972238779068,
      "rewards/reward_num_unique_chars/std": 0.34582529465357464,
      "step": 42
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.026041666666666668,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 927.6666666666666,
      "completions/mean_length": 183.04254150390625,
      "completions/mean_terminated_length": 160.71800740559897,
      "completions/min_length": 14.666666666666666,
      "completions/min_terminated_length": 14.666666666666666,
      "epoch": 0.6805293005671077,
      "grad_norm": 0.09084329754114151,
      "kl": 0.015349706013997396,
      "learning_rate": 1e-06,
      "loss": -0.0004,
      "num_tokens": 12816000.0,
      "reward": 0.5384640991687775,
      "reward_std": 0.22944432497024536,
      "rewards/get_embedding_sim/mean": 0.39697099725405377,
      "rewards/get_embedding_sim/std": 0.10396929830312729,
      "rewards/reward_num_unique_chars/mean": 0.14149305721124014,
      "rewards/reward_num_unique_chars/std": 0.3254843403895696,
      "step": 45
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.02777777777777779,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 802.3333333333334,
      "completions/mean_length": 165.0295155843099,
      "completions/mean_terminated_length": 140.43072509765625,
      "completions/min_length": 9.666666666666666,
      "completions/min_terminated_length": 9.666666666666666,
      "epoch": 0.725897920604915,
      "grad_norm": 0.21910759806632996,
      "kl": 0.027149200439453125,
      "learning_rate": 1e-06,
      "loss": 0.0009,
      "num_tokens": 13587394.0,
      "reward": 0.5553397635618845,
      "reward_std": 0.23784717917442322,
      "rewards/get_embedding_sim/mean": 0.4086383481820424,
      "rewards/get_embedding_sim/std": 0.10949051380157471,
      "rewards/reward_num_unique_chars/mean": 0.14670138930281004,
      "rewards/reward_num_unique_chars/std": 0.33698558807373047,
      "step": 48
    },
    {
      "epoch": 0.7712665406427222,
      "grad_norm": 0.09893961995840073,
      "learning_rate": 1e-06,
      "loss": 0.0047,
      "step": 51
    },
    {
      "epoch": 0.7712665406427222,
      "eval_clip_ratio/high_max": 0.0,
      "eval_clip_ratio/high_mean": 0.0,
      "eval_clip_ratio/low_mean": 0.0,
      "eval_clip_ratio/low_min": 0.0,
      "eval_clip_ratio/region_mean": 0.0,
      "eval_completions/clipped_ratio": 0.12797619047619044,
      "eval_completions/max_length": 880.7678571428571,
      "eval_completions/max_terminated_length": 701.3214285714286,
      "eval_completions/mean_length": 258.24070589882984,
      "eval_completions/mean_terminated_length": 153.6624070576259,
      "eval_completions/min_length": 24.446428571428573,
      "eval_completions/min_terminated_length": 24.446428571428573,
      "eval_kl": 0.0542449951171875,
      "eval_loss": 0.026244351640343666,
      "eval_num_tokens": 14351398.0,
      "eval_reward": 0.524820977555854,
      "eval_reward_std": 0.22432494928528154,
      "eval_rewards/get_embedding_sim/mean": 0.43479119294456076,
      "eval_rewards/get_embedding_sim/std": 0.09110667330345937,
      "eval_rewards/reward_num_unique_chars/mean": 0.09002976235933602,
      "eval_rewards/reward_num_unique_chars/std": 0.18600706889161042,
      "eval_runtime": 2254.2404,
      "eval_samples_per_second": 0.025,
      "eval_steps_per_second": 0.001,
      "step": 51
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.044270833333333315,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 921.3333333333334,
      "completions/mean_length": 197.77517954508463,
      "completions/mean_terminated_length": 159.97277196248373,
      "completions/min_length": 9.833333333333334,
      "completions/min_terminated_length": 9.833333333333334,
      "epoch": 0.8166351606805293,
      "grad_norm": 0.08635270595550537,
      "kl": 0.030397415161132812,
      "learning_rate": 1e-06,
      "loss": 0.0077,
      "num_tokens": 15200636.0,
      "reward": 0.5215439548095068,
      "reward_std": 0.23126975446939468,
      "rewards/get_embedding_sim/mean": 0.42692585786183673,
      "rewards/get_embedding_sim/std": 0.11467409133911133,
      "rewards/reward_num_unique_chars/mean": 0.09461805845300357,
      "rewards/reward_num_unique_chars/std": 0.28477593511343,
      "step": 54
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.04253472222222221,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 836.6666666666666,
      "completions/mean_length": 198.4244842529297,
      "completions/mean_terminated_length": 161.79749043782553,
      "completions/min_length": 10.333333333333334,
      "completions/min_terminated_length": 10.333333333333334,
      "epoch": 0.8620037807183365,
      "grad_norm": 14.726771354675293,
      "kl": 0.21588261922200522,
      "learning_rate": 1e-06,
      "loss": 0.0104,
      "num_tokens": 16019045.0,
      "reward": 0.5494122306505839,
      "reward_std": 0.24494746327400208,
      "rewards/get_embedding_sim/mean": 0.44264134764671326,
      "rewards/get_embedding_sim/std": 0.11085022240877151,
      "rewards/reward_num_unique_chars/mean": 0.10677083333333333,
      "rewards/reward_num_unique_chars/std": 0.30227985978126526,
      "step": 57
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.04253472222222221,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 964.6666666666666,
      "completions/mean_length": 200.72309366861978,
      "completions/mean_terminated_length": 164.248779296875,
      "completions/min_length": 11.333333333333334,
      "completions/min_terminated_length": 11.333333333333334,
      "epoch": 0.9073724007561437,
      "grad_norm": 0.09581304341554642,
      "kl": 0.33023325602213544,
      "learning_rate": 1e-06,
      "loss": 0.0072,
      "num_tokens": 16832758.0,
      "reward": 0.599389910697937,
      "reward_std": 0.26327316959698993,
      "rewards/get_embedding_sim/mean": 0.45268850525220233,
      "rewards/get_embedding_sim/std": 0.11441038797299068,
      "rewards/reward_num_unique_chars/mean": 0.14670139302810034,
      "rewards/reward_num_unique_chars/std": 0.31440146267414093,
      "step": 60
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.021701388888888878,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 917.3333333333334,
      "completions/mean_length": 186.21094258626303,
      "completions/mean_terminated_length": 167.5730946858724,
      "completions/min_length": 13.0,
      "completions/min_terminated_length": 13.0,
      "epoch": 0.9527410207939508,
      "grad_norm": 0.08248484879732132,
      "kl": 0.04541015625,
      "learning_rate": 1e-06,
      "loss": 0.0017,
      "num_tokens": 17637097.0,
      "reward": 0.5855847001075745,
      "reward_std": 0.2750825683275859,
      "rewards/get_embedding_sim/mean": 0.46405691901842755,
      "rewards/get_embedding_sim/std": 0.11442819982767105,
      "rewards/reward_num_unique_chars/mean": 0.12152778108914693,
      "rewards/reward_num_unique_chars/std": 0.3193853000799815,
      "step": 63
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.031507423371647504,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 930.0,
      "completions/mean_length": 193.8086140950521,
      "completions/mean_terminated_length": 167.16290283203125,
      "completions/min_length": 7.0,
      "completions/min_terminated_length": 7.0,
      "epoch": 0.998109640831758,
      "grad_norm": 0.06374574452638626,
      "kl": 0.03699493408203125,
      "learning_rate": 1e-06,
      "loss": 0.0187,
      "num_tokens": 18440914.0,
      "reward": 0.6297420461972555,
      "reward_std": 0.2834969659646352,
      "rewards/get_embedding_sim/mean": 0.47088783979415894,
      "rewards/get_embedding_sim/std": 0.11324869592984517,
      "rewards/reward_num_unique_chars/mean": 0.1588541641831398,
      "rewards/reward_num_unique_chars/std": 0.36384791135787964,
      "step": 66
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.032118055555555546,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 833.3333333333334,
      "completions/mean_length": 200.6024373372396,
      "completions/mean_terminated_length": 173.2165069580078,
      "completions/min_length": 9.0,
      "completions/min_terminated_length": 9.0,
      "epoch": 1.0453686200378072,
      "grad_norm": 0.11849670857191086,
      "kl": 0.05316925048828125,
      "learning_rate": 1e-06,
      "loss": 0.0071,
      "num_tokens": 19261832.0,
      "reward": 0.5802033940951029,
      "reward_std": 0.25838569800059,
      "rewards/get_embedding_sim/mean": 0.4734325309594472,
      "rewards/get_embedding_sim/std": 0.11253533015648524,
      "rewards/reward_num_unique_chars/mean": 0.10677083084980647,
      "rewards/reward_num_unique_chars/std": 0.30244183043638867,
      "step": 69
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.039930555555555546,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 973.0,
      "completions/mean_length": 199.55555725097656,
      "completions/mean_terminated_length": 165.2902577718099,
      "completions/min_length": 9.333333333333334,
      "completions/min_terminated_length": 9.333333333333334,
      "epoch": 1.0907372400756143,
      "grad_norm": 0.10332732647657394,
      "kl": 0.0515289306640625,
      "learning_rate": 1e-06,
      "loss": 0.0075,
      "num_tokens": 20067096.0,
      "reward": 0.625789741675059,
      "reward_std": 0.2765499949455261,
      "rewards/get_embedding_sim/mean": 0.49471331636110943,
      "rewards/get_embedding_sim/std": 0.11266019940376282,
      "rewards/reward_num_unique_chars/mean": 0.1310763880610466,
      "rewards/reward_num_unique_chars/std": 0.336679349342982,
      "step": 72
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.036458333333333294,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 1008.6666666666666,
      "completions/mean_length": 209.8359375,
      "completions/mean_terminated_length": 178.97360229492188,
      "completions/min_length": 7.0,
      "completions/min_terminated_length": 7.0,
      "epoch": 1.1361058601134215,
      "grad_norm": 0.11925654858350754,
      "kl": 0.14461263020833334,
      "learning_rate": 1e-06,
      "loss": 0.0055,
      "num_tokens": 20893467.0,
      "reward": 0.5831413467725118,
      "reward_std": 0.2582869480053584,
      "rewards/get_embedding_sim/mean": 0.4919954836368561,
      "rewards/get_embedding_sim/std": 0.1114387462536494,
      "rewards/reward_num_unique_chars/mean": 0.09114583333333333,
      "rewards/reward_num_unique_chars/std": 0.2839343051115672,
      "step": 75
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.047743055555555546,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 897.3333333333334,
      "completions/mean_length": 221.0104217529297,
      "completions/mean_terminated_length": 180.71256510416666,
      "completions/min_length": 5.0,
      "completions/min_terminated_length": 5.0,
      "epoch": 1.1814744801512287,
      "grad_norm": 0.09554021060466766,
      "kl": 0.16336822509765625,
      "learning_rate": 1e-06,
      "loss": 0.0197,
      "num_tokens": 21730887.0,
      "reward": 0.6385945876439413,
      "reward_std": 0.2661168724298477,
      "rewards/get_embedding_sim/mean": 0.5127264857292175,
      "rewards/get_embedding_sim/std": 0.11183823893467586,
      "rewards/reward_num_unique_chars/mean": 0.12586805472771326,
      "rewards/reward_num_unique_chars/std": 0.3207412262757619,
      "step": 78
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.049479166666666664,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 939.0,
      "completions/mean_length": 228.04601033528647,
      "completions/mean_terminated_length": 186.1550038655599,
      "completions/min_length": 7.333333333333333,
      "completions/min_terminated_length": 7.333333333333333,
      "epoch": 1.2268431001890359,
      "grad_norm": 0.07755686342716217,
      "kl": 0.05751800537109375,
      "learning_rate": 1e-06,
      "loss": 0.0156,
      "num_tokens": 22583420.0,
      "reward": 0.6019672354062399,
      "reward_std": 0.26383428772290546,
      "rewards/get_embedding_sim/mean": 0.5021408100922903,
      "rewards/get_embedding_sim/std": 0.10627821832895279,
      "rewards/reward_num_unique_chars/mean": 0.09982638930281003,
      "rewards/reward_num_unique_chars/std": 0.2837299009164174,
      "step": 81
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.029513888888888878,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 931.3333333333334,
      "completions/mean_length": 191.74740091959634,
      "completions/mean_terminated_length": 166.4415028889974,
      "completions/min_length": 8.666666666666666,
      "completions/min_terminated_length": 8.666666666666666,
      "epoch": 1.272211720226843,
      "grad_norm": 0.08697984367609024,
      "kl": 0.057329813639322914,
      "learning_rate": 1e-06,
      "loss": 0.0141,
      "num_tokens": 23394137.0,
      "reward": 0.6638144056002299,
      "reward_std": 0.26522762576738995,
      "rewards/get_embedding_sim/mean": 0.5231893658638,
      "rewards/get_embedding_sim/std": 0.10482257604598999,
      "rewards/reward_num_unique_chars/mean": 0.140625,
      "rewards/reward_num_unique_chars/std": 0.3480878472328186,
      "step": 84
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.026041666666666668,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 872.3333333333334,
      "completions/mean_length": 185.42535400390625,
      "completions/mean_terminated_length": 163.15696716308594,
      "completions/min_length": 7.333333333333333,
      "completions/min_terminated_length": 7.333333333333333,
      "epoch": 1.3175803402646502,
      "grad_norm": 0.14970338344573975,
      "kl": 0.12465922037760417,
      "learning_rate": 1e-06,
      "loss": 0.008,
      "num_tokens": 24197571.0,
      "reward": 0.6190575559933981,
      "reward_std": 0.2601381540298462,
      "rewards/get_embedding_sim/mean": 0.5174950361251831,
      "rewards/get_embedding_sim/std": 0.0997606838742892,
      "rewards/reward_num_unique_chars/mean": 0.10156250124176343,
      "rewards/reward_num_unique_chars/std": 0.297150323788325,
      "step": 87
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.029513888888888912,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 965.6666666666666,
      "completions/mean_length": 216.75694783528647,
      "completions/mean_terminated_length": 192.3217315673828,
      "completions/min_length": 7.333333333333333,
      "completions/min_terminated_length": 7.333333333333333,
      "epoch": 1.3629489603024574,
      "grad_norm": 0.11725780367851257,
      "kl": 0.08345540364583333,
      "learning_rate": 1e-06,
      "loss": 0.0079,
      "num_tokens": 25034555.0,
      "reward": 0.5995156168937683,
      "reward_std": 0.22840352356433868,
      "rewards/get_embedding_sim/mean": 0.5118419329325358,
      "rewards/get_embedding_sim/std": 0.0987908939520518,
      "rewards/reward_num_unique_chars/mean": 0.0876736119389534,
      "rewards/reward_num_unique_chars/std": 0.27722589671611786,
      "step": 90
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.029513888888888878,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 884.3333333333334,
      "completions/mean_length": 186.4375,
      "completions/mean_terminated_length": 161.33899434407553,
      "completions/min_length": 6.0,
      "completions/min_terminated_length": 6.0,
      "epoch": 1.4083175803402646,
      "grad_norm": 0.10394510626792908,
      "kl": 0.07037099202473958,
      "learning_rate": 1e-06,
      "loss": 0.0084,
      "num_tokens": 25831283.0,
      "reward": 0.6795124411582947,
      "reward_std": 0.29141750435034436,
      "rewards/get_embedding_sim/mean": 0.5137137969334921,
      "rewards/get_embedding_sim/std": 0.09767910589774449,
      "rewards/reward_num_unique_chars/mean": 0.16579860697189966,
      "rewards/reward_num_unique_chars/std": 0.3542452355225881,
      "step": 93
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.05034722222222221,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 971.3333333333334,
      "completions/mean_length": 212.23785400390625,
      "completions/mean_terminated_length": 169.16080729166666,
      "completions/min_length": 6.666666666666667,
      "completions/min_terminated_length": 6.666666666666667,
      "epoch": 1.4536862003780717,
      "grad_norm": 0.10010381788015366,
      "kl": 0.06960042317708333,
      "learning_rate": 1e-06,
      "loss": 0.0203,
      "num_tokens": 26656485.0,
      "reward": 0.6300086975097656,
      "reward_std": 0.24619843065738678,
      "rewards/get_embedding_sim/mean": 0.5267100731531779,
      "rewards/get_embedding_sim/std": 0.1071697548031807,
      "rewards/reward_num_unique_chars/mean": 0.1032986119389534,
      "rewards/reward_num_unique_chars/std": 0.29826584458351135,
      "step": 96
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.037326388888888874,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 919.0,
      "completions/mean_length": 211.8107655843099,
      "completions/mean_terminated_length": 180.1980183919271,
      "completions/min_length": 6.333333333333333,
      "completions/min_terminated_length": 6.333333333333333,
      "epoch": 1.499054820415879,
      "grad_norm": 0.07485458254814148,
      "kl": 0.061063130696614586,
      "learning_rate": 1e-06,
      "loss": 0.0248,
      "num_tokens": 27490315.0,
      "reward": 0.620047926902771,
      "reward_std": 0.2632503807544708,
      "rewards/get_embedding_sim/mean": 0.5132770538330078,
      "rewards/get_embedding_sim/std": 0.10026986648639043,
      "rewards/reward_num_unique_chars/mean": 0.10677083333333333,
      "rewards/reward_num_unique_chars/std": 0.3043619990348816,
      "step": 99
    },
    {
      "epoch": 1.544423440453686,
      "grad_norm": 0.11106861382722855,
      "learning_rate": 1e-06,
      "loss": 0.0115,
      "step": 102
    },
    {
      "epoch": 1.544423440453686,
      "eval_clip_ratio/high_max": 0.0,
      "eval_clip_ratio/high_mean": 0.0,
      "eval_clip_ratio/low_mean": 0.0,
      "eval_clip_ratio/low_min": 0.0,
      "eval_clip_ratio/region_mean": 0.0,
      "eval_completions/clipped_ratio": 0.07068452380952381,
      "eval_completions/max_length": 887.5892857142857,
      "eval_completions/max_terminated_length": 675.0892857142857,
      "eval_completions/mean_length": 206.3244113922119,
      "eval_completions/mean_terminated_length": 145.37539066587175,
      "eval_completions/min_length": 18.160714285714285,
      "eval_completions/min_terminated_length": 18.160714285714285,
      "eval_kl": 0.06965419224330358,
      "eval_loss": 0.03773626312613487,
      "eval_num_tokens": 28307736.0,
      "eval_reward": 0.6229457370936871,
      "eval_reward_std": 0.2839882879384926,
      "eval_rewards/get_embedding_sim/mean": 0.5206391582531589,
      "eval_rewards/get_embedding_sim/std": 0.09148550758670483,
      "eval_rewards/reward_num_unique_chars/mean": 0.10230654794057566,
      "eval_rewards/reward_num_unique_chars/std": 0.24572753932859218,
      "eval_runtime": 1726.6979,
      "eval_samples_per_second": 0.032,
      "eval_steps_per_second": 0.001,
      "step": 102
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.03602430555555556,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 990.3333333333334,
      "completions/mean_length": 214.1545181274414,
      "completions/mean_terminated_length": 183.94319661458334,
      "completions/min_length": 6.5,
      "completions/min_terminated_length": 6.5,
      "epoch": 1.5897920604914932,
      "grad_norm": 1.0838171243667603,
      "kl": 0.0672899881998698,
      "learning_rate": 1e-06,
      "loss": 0.0253,
      "num_tokens": 29138511.0,
      "reward": 0.6624543964862823,
      "reward_std": 0.26948046932617825,
      "rewards/get_embedding_sim/mean": 0.5296418766180674,
      "rewards/get_embedding_sim/std": 0.10213356713453929,
      "rewards/reward_num_unique_chars/mean": 0.1328124993791183,
      "rewards/reward_num_unique_chars/std": 0.32840434461832047,
      "step": 105
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.026041666666666668,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 974.0,
      "completions/mean_length": 201.72048950195312,
      "completions/mean_terminated_length": 179.72496032714844,
      "completions/min_length": 7.333333333333333,
      "completions/min_terminated_length": 7.333333333333333,
      "epoch": 1.6351606805293004,
      "grad_norm": 0.0918864831328392,
      "kl": 0.061335245768229164,
      "learning_rate": 1e-06,
      "loss": 0.0208,
      "num_tokens": 29960717.0,
      "reward": 0.6120087305704752,
      "reward_std": 0.250284880399704,
      "rewards/get_embedding_sim/mean": 0.5364878376324972,
      "rewards/get_embedding_sim/std": 0.0979540745417277,
      "rewards/reward_num_unique_chars/mean": 0.07552083457509677,
      "rewards/reward_num_unique_chars/std": 0.26320414741834003,
      "step": 108
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.03559027777777779,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 923.0,
      "completions/mean_length": 206.97309366861978,
      "completions/mean_terminated_length": 177.1915079752604,
      "completions/min_length": 7.666666666666667,
      "completions/min_terminated_length": 7.666666666666667,
      "epoch": 1.6805293005671076,
      "grad_norm": 0.07678642123937607,
      "kl": 0.06285349527994792,
      "learning_rate": 1e-06,
      "loss": 0.0214,
      "num_tokens": 30781870.0,
      "reward": 0.6274827718734741,
      "reward_std": 0.26556732257207233,
      "rewards/get_embedding_sim/mean": 0.5155035257339478,
      "rewards/get_embedding_sim/std": 0.09278701990842819,
      "rewards/reward_num_unique_chars/mean": 0.11197916666666667,
      "rewards/reward_num_unique_chars/std": 0.30655037860075635,
      "step": 111
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.032118055555555546,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 881.3333333333334,
      "completions/mean_length": 185.73351542154947,
      "completions/mean_terminated_length": 157.97284952799478,
      "completions/min_length": 8.333333333333334,
      "completions/min_terminated_length": 8.333333333333334,
      "epoch": 1.725897920604915,
      "grad_norm": 0.07077532261610031,
      "kl": 0.07100423177083333,
      "learning_rate": 1e-06,
      "loss": 0.0255,
      "num_tokens": 31578315.0,
      "reward": 0.6285200913747152,
      "reward_std": 0.2933768729368846,
      "rewards/get_embedding_sim/mean": 0.5260895093282064,
      "rewards/get_embedding_sim/std": 0.10419273873170216,
      "rewards/reward_num_unique_chars/mean": 0.10243055472771327,
      "rewards/reward_num_unique_chars/std": 0.30191460251808167,
      "step": 114
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.035590277777777755,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 998.3333333333334,
      "completions/mean_length": 197.5555623372396,
      "completions/mean_terminated_length": 167.07290649414062,
      "completions/min_length": 7.333333333333333,
      "completions/min_terminated_length": 7.333333333333333,
      "epoch": 1.7712665406427222,
      "grad_norm": 0.07132314145565033,
      "kl": 0.07155863444010417,
      "learning_rate": 1e-06,
      "loss": 0.0289,
      "num_tokens": 32370091.0,
      "reward": 0.6605067054430643,
      "reward_std": 0.3198150396347046,
      "rewards/get_embedding_sim/mean": 0.5276941855748495,
      "rewards/get_embedding_sim/std": 0.09764280170202255,
      "rewards/reward_num_unique_chars/mean": 0.13281250248352686,
      "rewards/reward_num_unique_chars/std": 0.3356940845648448,
      "step": 117
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.03819444444444442,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 925.3333333333334,
      "completions/mean_length": 205.5260467529297,
      "completions/mean_terminated_length": 172.99127197265625,
      "completions/min_length": 7.333333333333333,
      "completions/min_terminated_length": 7.333333333333333,
      "epoch": 1.8166351606805293,
      "grad_norm": 0.07695771753787994,
      "kl": 0.079559326171875,
      "learning_rate": 1e-06,
      "loss": 0.0285,
      "num_tokens": 33183529.0,
      "reward": 0.6505021651585897,
      "reward_std": 0.28806476791699726,
      "rewards/get_embedding_sim/mean": 0.5255021254221598,
      "rewards/get_embedding_sim/std": 0.10448584208885829,
      "rewards/reward_num_unique_chars/mean": 0.12499999751647313,
      "rewards/reward_num_unique_chars/std": 0.3297826250394185,
      "step": 120
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.03125,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 956.0,
      "completions/mean_length": 187.78907267252603,
      "completions/mean_terminated_length": 160.96214803059897,
      "completions/min_length": 7.333333333333333,
      "completions/min_terminated_length": 7.333333333333333,
      "epoch": 1.8620037807183365,
      "grad_norm": 0.10003960132598877,
      "kl": 0.10397847493489583,
      "learning_rate": 1e-06,
      "loss": 0.0332,
      "num_tokens": 33979270.0,
      "reward": 0.702047864596049,
      "reward_std": 0.2998199959595998,
      "rewards/get_embedding_sim/mean": 0.5353811780611674,
      "rewards/get_embedding_sim/std": 0.1009945347905159,
      "rewards/reward_num_unique_chars/mean": 0.16666666915019354,
      "rewards/reward_num_unique_chars/std": 0.3635033369064331,
      "step": 123
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0390625,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 961.0,
      "completions/mean_length": 201.6701456705729,
      "completions/mean_terminated_length": 168.43896484375,
      "completions/min_length": 6.333333333333333,
      "completions/min_terminated_length": 6.333333333333333,
      "epoch": 1.9073724007561437,
      "grad_norm": 0.1418294459581375,
      "kl": 0.08981831868489583,
      "learning_rate": 1e-06,
      "loss": 0.0311,
      "num_tokens": 34790042.0,
      "reward": 0.6395866274833679,
      "reward_std": 0.278631071249644,
      "rewards/get_embedding_sim/mean": 0.5371560255686442,
      "rewards/get_embedding_sim/std": 0.10253078490495682,
      "rewards/reward_num_unique_chars/mean": 0.10243055721124013,
      "rewards/reward_num_unique_chars/std": 0.3033109207948049,
      "step": 126
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.020833333333333297,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 933.6666666666666,
      "completions/mean_length": 165.38021341959634,
      "completions/mean_terminated_length": 147.11800384521484,
      "completions/min_length": 6.333333333333333,
      "completions/min_terminated_length": 6.333333333333333,
      "epoch": 1.9527410207939508,
      "grad_norm": 0.085059255361557,
      "kl": 0.09361775716145833,
      "learning_rate": 1e-06,
      "loss": 0.0241,
      "num_tokens": 35555504.0,
      "reward": 0.7251607775688171,
      "reward_std": 0.3248043159643809,
      "rewards/get_embedding_sim/mean": 0.5359246134757996,
      "rewards/get_embedding_sim/std": 0.10831368962923686,
      "rewards/reward_num_unique_chars/mean": 0.18923610945542654,
      "rewards/reward_num_unique_chars/std": 0.38598161935806274,
      "step": 129
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.051843869731800774,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 949.6666666666666,
      "completions/mean_length": 218.2733408610026,
      "completions/mean_terminated_length": 174.35783894856772,
      "completions/min_length": 5.333333333333333,
      "completions/min_terminated_length": 5.333333333333333,
      "epoch": 1.998109640831758,
      "grad_norm": 0.15158401429653168,
      "kl": 0.09020487467447917,
      "learning_rate": 1e-06,
      "loss": 0.0421,
      "num_tokens": 36367765.0,
      "reward": 0.6982676188151041,
      "reward_std": 0.3466052810351054,
      "rewards/get_embedding_sim/mean": 0.5420175790786743,
      "rewards/get_embedding_sim/std": 0.09902476519346237,
      "rewards/reward_num_unique_chars/mean": 0.15625,
      "rewards/reward_num_unique_chars/std": 0.3612334032853444,
      "step": 132
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.03472222222222221,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 948.0,
      "completions/mean_length": 176.51909891764322,
      "completions/mean_terminated_length": 146.023562113444,
      "completions/min_length": 6.333333333333333,
      "completions/min_terminated_length": 6.333333333333333,
      "epoch": 2.045368620037807,
      "grad_norm": 0.08893448859453201,
      "kl": 0.096466064453125,
      "learning_rate": 1e-06,
      "loss": 0.0402,
      "num_tokens": 37147067.0,
      "reward": 0.7173450986544291,
      "reward_std": 0.35685937603314716,
      "rewards/get_embedding_sim/mean": 0.5341853896776835,
      "rewards/get_embedding_sim/std": 0.1000617394844691,
      "rewards/reward_num_unique_chars/mean": 0.1831597238779068,
      "rewards/reward_num_unique_chars/std": 0.3842338224252065,
      "step": 135
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.041666666666666664,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 879.0,
      "completions/mean_length": 208.60938008626303,
      "completions/mean_terminated_length": 173.26571655273438,
      "completions/min_length": 7.0,
      "completions/min_terminated_length": 7.0,
      "epoch": 2.0907372400756143,
      "grad_norm": 0.12921324372291565,
      "kl": 0.09910074869791667,
      "learning_rate": 1e-06,
      "loss": 0.0475,
      "num_tokens": 37974473.0,
      "reward": 0.672684927781423,
      "reward_std": 0.34854390223821,
      "rewards/get_embedding_sim/mean": 0.5364001393318176,
      "rewards/get_embedding_sim/std": 0.10567483057578404,
      "rewards/reward_num_unique_chars/mean": 0.13628472139437994,
      "rewards/reward_num_unique_chars/std": 0.34236905972162884,
      "step": 138
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.021701388888888878,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 956.0,
      "completions/mean_length": 167.10590616861978,
      "completions/mean_terminated_length": 148.0250244140625,
      "completions/min_length": 6.333333333333333,
      "completions/min_terminated_length": 6.333333333333333,
      "epoch": 2.1361058601134215,
      "grad_norm": 0.12040314823389053,
      "kl": 0.24815877278645834,
      "learning_rate": 1e-06,
      "loss": 0.0403,
      "num_tokens": 38741491.0,
      "reward": 0.6958853205045065,
      "reward_std": 0.3416078786055247,
      "rewards/get_embedding_sim/mean": 0.5396353205045065,
      "rewards/get_embedding_sim/std": 0.11144034812847774,
      "rewards/reward_num_unique_chars/mean": 0.15625000248352686,
      "rewards/reward_num_unique_chars/std": 0.3600207368532817,
      "step": 141
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.032986111111111084,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 909.0,
      "completions/mean_length": 182.28211975097656,
      "completions/mean_terminated_length": 153.65155029296875,
      "completions/min_length": 6.0,
      "completions/min_terminated_length": 6.0,
      "epoch": 2.1814744801512287,
      "grad_norm": 0.08720903098583221,
      "kl": 0.11935933430989583,
      "learning_rate": 1e-06,
      "loss": 0.042,
      "num_tokens": 39531464.0,
      "reward": 0.7134884198506674,
      "reward_std": 0.36159368356068927,
      "rewards/get_embedding_sim/mean": 0.5424814422925314,
      "rewards/get_embedding_sim/std": 0.11029936373233795,
      "rewards/reward_num_unique_chars/mean": 0.17100694278875986,
      "rewards/reward_num_unique_chars/std": 0.37471526861190796,
      "step": 144
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.03125,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 917.0,
      "completions/mean_length": 171.84375508626303,
      "completions/mean_terminated_length": 144.29528299967447,
      "completions/min_length": 8.0,
      "completions/min_terminated_length": 8.0,
      "epoch": 2.226843100189036,
      "grad_norm": 0.08887135237455368,
      "kl": 0.118194580078125,
      "learning_rate": 1e-06,
      "loss": 0.0381,
      "num_tokens": 40311428.0,
      "reward": 0.6788019339243571,
      "reward_std": 0.33359630902608234,
      "rewards/get_embedding_sim/mean": 0.5451213518778483,
      "rewards/get_embedding_sim/std": 0.10193872700134914,
      "rewards/reward_num_unique_chars/mean": 0.133680559694767,
      "rewards/reward_num_unique_chars/std": 0.34027015169461566,
      "step": 147
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.01909722222222221,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 922.6666666666666,
      "completions/mean_length": 165.07205200195312,
      "completions/mean_terminated_length": 148.45321146647134,
      "completions/min_length": 6.0,
      "completions/min_terminated_length": 6.0,
      "epoch": 2.272211720226843,
      "grad_norm": 0.08689926564693451,
      "kl": 0.149566650390625,
      "learning_rate": 1e-06,
      "loss": 0.0324,
      "num_tokens": 41091415.0,
      "reward": 0.6884604295094808,
      "reward_std": 0.3447088996569316,
      "rewards/get_embedding_sim/mean": 0.5495714743932089,
      "rewards/get_embedding_sim/std": 0.10353380193312962,
      "rewards/reward_num_unique_chars/mean": 0.13888888557751974,
      "rewards/reward_num_unique_chars/std": 0.3459552029768626,
      "step": 150
    },
    {
      "epoch": 2.31758034026465,
      "grad_norm": 0.10476606339216232,
      "learning_rate": 1e-06,
      "loss": 0.0513,
      "step": 153
    },
    {
      "epoch": 2.31758034026465,
      "eval_clip_ratio/high_max": 0.0,
      "eval_clip_ratio/high_mean": 0.0,
      "eval_clip_ratio/low_mean": 0.0,
      "eval_clip_ratio/low_min": 0.0,
      "eval_clip_ratio/region_mean": 0.0,
      "eval_completions/clipped_ratio": 0.04687500000000001,
      "eval_completions/max_length": 856.4464285714286,
      "eval_completions/max_terminated_length": 614.625,
      "eval_completions/mean_length": 148.9296919277736,
      "eval_completions/mean_terminated_length": 106.54870585032872,
      "eval_completions/min_length": 12.107142857142858,
      "eval_completions/min_terminated_length": 12.107142857142858,
      "eval_kl": 0.15039280482700892,
      "eval_loss": 0.05131923779845238,
      "eval_num_tokens": 41858572.0,
      "eval_reward": 0.7319182710988181,
      "eval_reward_std": 0.39004063113991705,
      "eval_rewards/get_embedding_sim/mean": 0.5399539640971592,
      "eval_rewards/get_embedding_sim/std": 0.09657471527212433,
      "eval_rewards/reward_num_unique_chars/mean": 0.19196428627973156,
      "eval_rewards/reward_num_unique_chars/std": 0.34904111203338417,
      "eval_runtime": 1578.4274,
      "eval_samples_per_second": 0.035,
      "eval_steps_per_second": 0.001,
      "step": 153
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022135416666666668,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 918.8333333333334,
      "completions/mean_length": 154.00824991861978,
      "completions/mean_terminated_length": 134.3066151936849,
      "completions/min_length": 6.333333333333333,
      "completions/min_terminated_length": 6.333333333333333,
      "epoch": 2.3629489603024574,
      "grad_norm": 0.17861098051071167,
      "kl": 0.1991424560546875,
      "learning_rate": 1e-06,
      "loss": 0.0446,
      "num_tokens": 42603290.0,
      "reward": 0.7928757965564728,
      "reward_std": 0.39940689504146576,
      "rewards/get_embedding_sim/mean": 0.5420077045758566,
      "rewards/get_embedding_sim/std": 0.10503626987338066,
      "rewards/reward_num_unique_chars/mean": 0.2508680547277133,
      "rewards/reward_num_unique_chars/std": 0.43154530723889667,
      "step": 156
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.02083333333333337,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 943.6666666666666,
      "completions/mean_length": 149.3498331705729,
      "completions/mean_terminated_length": 130.67583719889322,
      "completions/min_length": 6.0,
      "completions/min_terminated_length": 6.0,
      "epoch": 2.4083175803402646,
      "grad_norm": 0.0997217446565628,
      "kl": 0.176727294921875,
      "learning_rate": 1e-06,
      "loss": 0.0479,
      "num_tokens": 43362573.0,
      "reward": 0.7896133859952291,
      "reward_std": 0.37762073675791424,
      "rewards/get_embedding_sim/mean": 0.5604466795921326,
      "rewards/get_embedding_sim/std": 0.10085596889257431,
      "rewards/reward_num_unique_chars/mean": 0.22916666666666666,
      "rewards/reward_num_unique_chars/std": 0.417032649119695,
      "step": 159
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.01996527777777779,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 966.0,
      "completions/mean_length": 133.52865091959634,
      "completions/mean_terminated_length": 115.48833719889323,
      "completions/min_length": 6.333333333333333,
      "completions/min_terminated_length": 6.333333333333333,
      "epoch": 2.4536862003780717,
      "grad_norm": 0.08974426239728928,
      "kl": 0.13869730631510416,
      "learning_rate": 1e-06,
      "loss": 0.0442,
      "num_tokens": 44099022.0,
      "reward": 0.8535909652709961,
      "reward_std": 0.42868249615033466,
      "rewards/get_embedding_sim/mean": 0.5332783659299215,
      "rewards/get_embedding_sim/std": 0.10329846044381459,
      "rewards/reward_num_unique_chars/mean": 0.3203125,
      "rewards/reward_num_unique_chars/std": 0.45679094394048053,
      "step": 162
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.026041666666666668,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 908.0,
      "completions/mean_length": 141.3715337117513,
      "completions/mean_terminated_length": 117.76270294189453,
      "completions/min_length": 6.333333333333333,
      "completions/min_terminated_length": 6.333333333333333,
      "epoch": 2.499054820415879,
      "grad_norm": 0.10181669145822525,
      "kl": 0.23414103190104166,
      "learning_rate": 1e-06,
      "loss": 0.0402,
      "num_tokens": 44851706.0,
      "reward": 0.8000141382217407,
      "reward_std": 0.3941415250301361,
      "rewards/get_embedding_sim/mean": 0.5456738670667013,
      "rewards/get_embedding_sim/std": 0.10725356390078862,
      "rewards/reward_num_unique_chars/mean": 0.2543402810891469,
      "rewards/reward_num_unique_chars/std": 0.43460813164711,
      "step": 165
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.018229166666666703,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 882.6666666666666,
      "completions/mean_length": 141.09028116861978,
      "completions/mean_terminated_length": 124.72643280029297,
      "completions/min_length": 6.666666666666667,
      "completions/min_terminated_length": 6.666666666666667,
      "epoch": 2.544423440453686,
      "grad_norm": 0.08525840193033218,
      "kl": 0.20921834309895834,
      "learning_rate": 1e-06,
      "loss": 0.0455,
      "num_tokens": 45604066.0,
      "reward": 0.7868956923484802,
      "reward_std": 0.40804105003674823,
      "rewards/get_embedding_sim/mean": 0.5299512147903442,
      "rewards/get_embedding_sim/std": 0.10723193486531575,
      "rewards/reward_num_unique_chars/mean": 0.2569444378217061,
      "rewards/reward_num_unique_chars/std": 0.42565350731213886,
      "step": 168
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.025173611111111122,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 923.3333333333334,
      "completions/mean_length": 138.74913533528647,
      "completions/mean_terminated_length": 116.1558354695638,
      "completions/min_length": 6.333333333333333,
      "completions/min_terminated_length": 6.333333333333333,
      "epoch": 2.5897920604914932,
      "grad_norm": 0.6628166437149048,
      "kl": 0.317626953125,
      "learning_rate": 1e-06,
      "loss": 0.0491,
      "num_tokens": 46345857.0,
      "reward": 0.8313470085461935,
      "reward_std": 0.4129582444826762,
      "rewards/get_embedding_sim/mean": 0.5509650309880575,
      "rewards/get_embedding_sim/std": 0.09154053280750911,
      "rewards/reward_num_unique_chars/mean": 0.2803819427887599,
      "rewards/reward_num_unique_chars/std": 0.4417712489763896,
      "step": 171
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.01128472222222221,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 772.6666666666666,
      "completions/mean_length": 111.97396087646484,
      "completions/mean_terminated_length": 101.59329223632812,
      "completions/min_length": 6.666666666666667,
      "completions/min_terminated_length": 6.666666666666667,
      "epoch": 2.6351606805293004,
      "grad_norm": 0.09945366531610489,
      "kl": 0.262237548828125,
      "learning_rate": 1e-06,
      "loss": 0.038,
      "num_tokens": 47054931.0,
      "reward": 0.8715664744377136,
      "reward_std": 0.4346109131971995,
      "rewards/get_embedding_sim/mean": 0.549517830212911,
      "rewards/get_embedding_sim/std": 0.11452717334032059,
      "rewards/reward_num_unique_chars/mean": 0.3220486094554265,
      "rewards/reward_num_unique_chars/std": 0.4634987811247508,
      "step": 174
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.033854166666666685,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 918.0,
      "completions/mean_length": 155.64192962646484,
      "completions/mean_terminated_length": 125.11021041870117,
      "completions/min_length": 6.0,
      "completions/min_terminated_length": 6.0,
      "epoch": 2.6805293005671076,
      "grad_norm": 0.09938167780637741,
      "kl": 0.2503814697265625,
      "learning_rate": 1e-06,
      "loss": 0.054,
      "num_tokens": 47795091.0,
      "reward": 0.7676738500595093,
      "reward_std": 0.39470958709716797,
      "rewards/get_embedding_sim/mean": 0.5736633539199829,
      "rewards/get_embedding_sim/std": 0.09976038336753845,
      "rewards/reward_num_unique_chars/mean": 0.1940104141831398,
      "rewards/reward_num_unique_chars/std": 0.3958670049905777,
      "step": 177
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017361111111111122,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 878.6666666666666,
      "completions/mean_length": 116.89757283528645,
      "completions/mean_terminated_length": 100.84752400716145,
      "completions/min_length": 6.333333333333333,
      "completions/min_terminated_length": 6.333333333333333,
      "epoch": 2.7258979206049148,
      "grad_norm": 0.1425255984067917,
      "kl": 0.2775370279947917,
      "learning_rate": 1e-06,
      "loss": 0.0481,
      "num_tokens": 48511037.0,
      "reward": 0.863362193107605,
      "reward_std": 0.4587005575497945,
      "rewards/get_embedding_sim/mean": 0.5473899245262146,
      "rewards/get_embedding_sim/std": 0.09922760476668675,
      "rewards/reward_num_unique_chars/mean": 0.3159722288449605,
      "rewards/reward_num_unique_chars/std": 0.4650394419829051,
      "step": 180
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.014756944444444456,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 891.3333333333334,
      "completions/mean_length": 102.27864837646484,
      "completions/mean_terminated_length": 88.46848042805989,
      "completions/min_length": 7.0,
      "completions/min_terminated_length": 7.0,
      "epoch": 2.7712665406427224,
      "grad_norm": 0.09843874722719193,
      "kl": 0.34393310546875,
      "learning_rate": 1e-06,
      "loss": 0.0436,
      "num_tokens": 49204478.0,
      "reward": 0.8661341269810995,
      "reward_std": 0.46143727501233417,
      "rewards/get_embedding_sim/mean": 0.5727312763532003,
      "rewards/get_embedding_sim/std": 0.11564485480388005,
      "rewards/reward_num_unique_chars/mean": 0.2934027711550395,
      "rewards/reward_num_unique_chars/std": 0.4531017243862152,
      "step": 183
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.01996527777777779,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 754.6666666666666,
      "completions/mean_length": 122.11806233723958,
      "completions/mean_terminated_length": 103.76323954264323,
      "completions/min_length": 5.666666666666667,
      "completions/min_terminated_length": 5.666666666666667,
      "epoch": 2.816635160680529,
      "grad_norm": 0.15066391229629517,
      "kl": 0.3179728190104167,
      "learning_rate": 1e-06,
      "loss": 0.0582,
      "num_tokens": 49927110.0,
      "reward": 0.892190178235372,
      "reward_std": 0.4511215090751648,
      "rewards/get_embedding_sim/mean": 0.5527804295221964,
      "rewards/get_embedding_sim/std": 0.1083058441678683,
      "rewards/reward_num_unique_chars/mean": 0.3394097238779068,
      "rewards/reward_num_unique_chars/std": 0.46636247634887695,
      "step": 186
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022569444444444458,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 940.3333333333334,
      "completions/mean_length": 126.84115091959636,
      "completions/mean_terminated_length": 106.15006764729817,
      "completions/min_length": 7.333333333333333,
      "completions/min_terminated_length": 7.333333333333333,
      "epoch": 2.8620037807183367,
      "grad_norm": 2.990046739578247,
      "kl": 0.47100830078125,
      "learning_rate": 1e-06,
      "loss": 0.0551,
      "num_tokens": 50663055.0,
      "reward": 0.8623983860015869,
      "reward_std": 0.4631191889444987,
      "rewards/get_embedding_sim/mean": 0.5533705353736877,
      "rewards/get_embedding_sim/std": 0.11140244205792744,
      "rewards/reward_num_unique_chars/mean": 0.3090277810891469,
      "rewards/reward_num_unique_chars/std": 0.4615551829338074,
      "step": 189
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.01649305555555558,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 891.0,
      "completions/mean_length": 111.4730936686198,
      "completions/mean_terminated_length": 96.25564575195312,
      "completions/min_length": 5.333333333333333,
      "completions/min_terminated_length": 5.333333333333333,
      "epoch": 2.9073724007561434,
      "grad_norm": 0.11676046997308731,
      "kl": 0.4471232096354167,
      "learning_rate": 1e-06,
      "loss": 0.0481,
      "num_tokens": 51373952.0,
      "reward": 0.9203431606292725,
      "reward_std": 0.47053369879722595,
      "rewards/get_embedding_sim/mean": 0.5444750587145487,
      "rewards/get_embedding_sim/std": 0.1073705404996872,
      "rewards/reward_num_unique_chars/mean": 0.3758680522441864,
      "rewards/reward_num_unique_chars/std": 0.4811862111091614,
      "step": 192
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.016493055555555542,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 776.3333333333334,
      "completions/mean_length": 113.12673950195312,
      "completions/mean_terminated_length": 97.83474731445312,
      "completions/min_length": 8.666666666666666,
      "completions/min_terminated_length": 8.666666666666666,
      "epoch": 2.952741020793951,
      "grad_norm": 0.09854816645383835,
      "kl": 0.285675048828125,
      "learning_rate": 1e-06,
      "loss": 0.0465,
      "num_tokens": 52094098.0,
      "reward": 0.888769249121348,
      "reward_std": 0.46734312176704407,
      "rewards/get_embedding_sim/mean": 0.5519636472066244,
      "rewards/get_embedding_sim/std": 0.11934416989485423,
      "rewards/reward_num_unique_chars/mean": 0.3368055522441864,
      "rewards/reward_num_unique_chars/std": 0.4713793396949768,
      "step": 195
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013888888888888876,
      "completions/max_length": 1024.0,
      "completions/max_terminated_length": 927.3333333333334,
      "completions/mean_length": 112.50087229410808,
      "completions/mean_terminated_length": 99.5953369140625,
      "completions/min_length": 7.0,
      "completions/min_terminated_length": 7.0,
      "epoch": 2.998109640831758,
      "grad_norm": 0.08575434237718582,
      "kl": 0.42242431640625,
      "learning_rate": 1e-06,
      "loss": 0.0494,
      "num_tokens": 52800707.0,
      "reward": 0.9310129086176554,
      "reward_std": 0.4663335382938385,
      "rewards/get_embedding_sim/mean": 0.5603530804316202,
      "rewards/get_embedding_sim/std": 0.10822075108687083,
      "rewards/reward_num_unique_chars/mean": 0.3706597288449605,
      "rewards/reward_num_unique_chars/std": 0.4820249378681183,
      "step": 198
    }
  ],
  "logging_steps": 3,
  "max_steps": 198,
  "num_input_tokens_seen": 52800707,
  "num_train_epochs": 3,
  "save_steps": 25,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 0.0,
  "train_batch_size": 8,
  "trial_name": null,
  "trial_params": null
}