File size: 75,124 Bytes
93b6a9a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 2.0,
  "eval_steps": 500,
  "global_step": 326,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.75,
      "completions/max_length": 512.0,
      "completions/max_terminated_length": 37.0,
      "completions/mean_length": 393.25,
      "completions/mean_terminated_length": 37.0,
      "completions/min_length": 37.0,
      "completions/min_terminated_length": 37.0,
      "entropy": 1.4897738695144653,
      "epoch": 0.006134969325153374,
      "frac_reward_zero_std": 0.5,
      "grad_norm": 2.2988293170928955,
      "learning_rate": 5e-07,
      "loss": -0.21252349019050598,
      "num_tokens": 3567.0,
      "reward": -0.3424999713897705,
      "reward_std": 0.01500000525265932,
      "rewards/format_reward/mean": 0.02500000037252903,
      "rewards/format_reward/std": 0.05000000074505806,
      "rewards/security_audit_reward/mean": -0.5,
      "rewards/security_audit_reward/std": 0.0,
      "step": 1,
      "step_time": 39.508622552999896
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.5,
      "completions/max_length": 512.0,
      "completions/max_terminated_length": 248.75,
      "completions/mean_length": 389.625,
      "completions/mean_terminated_length": 192.79166793823242,
      "completions/min_length": 272.75,
      "completions/min_terminated_length": 144.75,
      "entropy": 1.363443061709404,
      "epoch": 0.03067484662576687,
      "frac_reward_zero_std": 0.375,
      "grad_norm": 4.688082218170166,
      "learning_rate": 4.938650306748465e-07,
      "loss": 0.04808004945516586,
      "num_tokens": 17675.0,
      "reward": -0.2981249839067459,
      "reward_std": 0.08178356755524874,
      "rewards/format_reward/mean": 0.10000000381842256,
      "rewards/format_reward/std": 0.12774468399584293,
      "rewards/security_audit_reward/mean": -0.46875,
      "rewards/security_audit_reward/std": 0.0625,
      "step": 5,
      "step_time": 38.500043476749966
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.65,
      "completions/max_length": 512.0,
      "completions/max_terminated_length": 345.6,
      "completions/mean_length": 463.0,
      "completions/mean_terminated_length": 305.6,
      "completions/min_length": 363.8,
      "completions/min_terminated_length": 261.4,
      "entropy": 1.4113845229148865,
      "epoch": 0.06134969325153374,
      "frac_reward_zero_std": 0.4,
      "grad_norm": 3.245452880859375,
      "learning_rate": 4.86196319018405e-07,
      "loss": -0.00041331946849823,
      "num_tokens": 37093.0,
      "reward": -0.29424998760223386,
      "reward_std": 0.08391451295465231,
      "rewards/format_reward/mean": 0.12750000804662703,
      "rewards/format_reward/std": 0.16304838731884957,
      "rewards/security_audit_reward/mean": -0.475,
      "rewards/security_audit_reward/std": 0.05,
      "step": 10,
      "step_time": 39.192330704800135
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.6,
      "completions/max_length": 512.0,
      "completions/max_terminated_length": 394.0,
      "completions/mean_length": 455.5,
      "completions/mean_terminated_length": 352.9,
      "completions/min_length": 311.8,
      "completions/min_terminated_length": 311.8,
      "entropy": 1.179759132862091,
      "epoch": 0.09202453987730061,
      "frac_reward_zero_std": 0.7,
      "grad_norm": 2.9624693393707275,
      "learning_rate": 4.785276073619632e-07,
      "loss": 0.03452911972999573,
      "num_tokens": 55311.0,
      "reward": -0.2887499898672104,
      "reward_std": 0.09658594038337469,
      "rewards/format_reward/mean": 0.0875,
      "rewards/format_reward/std": 0.08947573080658913,
      "rewards/security_audit_reward/mean": -0.45,
      "rewards/security_audit_reward/std": 0.1,
      "step": 15,
      "step_time": 38.30515608799997
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.45,
      "completions/max_length": 512.0,
      "completions/max_terminated_length": 395.2,
      "completions/mean_length": 416.9,
      "completions/mean_terminated_length": 328.76666870117185,
      "completions/min_length": 260.8,
      "completions/min_terminated_length": 260.8,
      "entropy": 1.298638153076172,
      "epoch": 0.12269938650306748,
      "frac_reward_zero_std": 0.2,
      "grad_norm": 4.034470081329346,
      "learning_rate": 4.7085889570552147e-07,
      "loss": -0.008246073126792907,
      "num_tokens": 72771.0,
      "reward": -0.23124998807907104,
      "reward_std": 0.16768747363239528,
      "rewards/format_reward/mean": 0.19750000424683095,
      "rewards/format_reward/std": 0.2057904489338398,
      "rewards/security_audit_reward/mean": -0.4149999976158142,
      "rewards/security_audit_reward/std": 0.16999999880790712,
      "step": 20,
      "step_time": 37.87772348239996
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.3,
      "completions/max_length": 512.0,
      "completions/max_terminated_length": 423.4,
      "completions/mean_length": 382.1,
      "completions/mean_terminated_length": 334.1000091552734,
      "completions/min_length": 236.0,
      "completions/min_terminated_length": 236.0,
      "entropy": 1.317835807800293,
      "epoch": 0.15337423312883436,
      "frac_reward_zero_std": 0.3,
      "grad_norm": 2.853423595428467,
      "learning_rate": 4.631901840490797e-07,
      "loss": -0.013739901781082153,
      "num_tokens": 89889.0,
      "reward": -0.2974999874830246,
      "reward_std": 0.15671177953481674,
      "rewards/format_reward/mean": 0.17500000596046447,
      "rewards/format_reward/std": 0.18444484770298003,
      "rewards/security_audit_reward/mean": -0.5,
      "rewards/security_audit_reward/std": 0.15773502588272095,
      "step": 25,
      "step_time": 38.74009619139997
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.65,
      "completions/max_length": 512.0,
      "completions/max_terminated_length": 346.0,
      "completions/mean_length": 463.35,
      "completions/mean_terminated_length": 295.3,
      "completions/min_length": 337.8,
      "completions/min_terminated_length": 235.4,
      "entropy": 1.1444598376750945,
      "epoch": 0.18404907975460122,
      "frac_reward_zero_std": 0.2,
      "grad_norm": 3.64375901222229,
      "learning_rate": 4.55521472392638e-07,
      "loss": -0.03970654606819153,
      "num_tokens": 108664.0,
      "reward": -0.3184999763965607,
      "reward_std": 0.04019503518939018,
      "rewards/format_reward/mean": 0.10499999970197678,
      "rewards/format_reward/std": 0.13398344144225122,
      "rewards/security_audit_reward/mean": -0.5,
      "rewards/security_audit_reward/std": 0.0,
      "step": 30,
      "step_time": 38.56538706479987
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.4,
      "completions/max_length": 504.2,
      "completions/max_terminated_length": 454.6,
      "completions/mean_length": 421.25,
      "completions/mean_terminated_length": 386.0,
      "completions/min_length": 328.6,
      "completions/min_terminated_length": 328.6,
      "entropy": 1.3522289156913758,
      "epoch": 0.2147239263803681,
      "frac_reward_zero_std": 0.3,
      "grad_norm": 3.4385552406311035,
      "learning_rate": 4.4785276073619634e-07,
      "loss": -0.06348788738250732,
      "num_tokens": 126953.0,
      "reward": -0.32824997901916503,
      "reward_std": 0.03220053892582655,
      "rewards/format_reward/mean": 0.07250000201165677,
      "rewards/format_reward/std": 0.10733511671423912,
      "rewards/security_audit_reward/mean": -0.5,
      "rewards/security_audit_reward/std": 0.0,
      "step": 35,
      "step_time": 37.87626404739986
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.55,
      "completions/max_length": 505.2,
      "completions/max_terminated_length": 334.4,
      "completions/mean_length": 414.9,
      "completions/mean_terminated_length": 240.3,
      "completions/min_length": 243.0,
      "completions/min_terminated_length": 140.6,
      "entropy": 1.230024951696396,
      "epoch": 0.24539877300613497,
      "frac_reward_zero_std": 0.3,
      "grad_norm": 4.400479793548584,
      "learning_rate": 4.401840490797546e-07,
      "loss": 0.11927952766418456,
      "num_tokens": 144785.0,
      "reward": -0.2897499829530716,
      "reward_std": 0.12973095811903476,
      "rewards/format_reward/mean": 0.14250000044703484,
      "rewards/format_reward/std": 0.14365934804081917,
      "rewards/security_audit_reward/mean": -0.475,
      "rewards/security_audit_reward/std": 0.13164966106414794,
      "step": 40,
      "step_time": 37.8069536416001
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.6,
      "completions/max_length": 472.2,
      "completions/max_terminated_length": 190.6,
      "completions/mean_length": 412.7,
      "completions/mean_terminated_length": 160.85,
      "completions/min_length": 333.8,
      "completions/min_terminated_length": 129.0,
      "entropy": 1.2133947968482972,
      "epoch": 0.27607361963190186,
      "frac_reward_zero_std": 0.1,
      "grad_norm": 4.325937271118164,
      "learning_rate": 4.3251533742331285e-07,
      "loss": 0.025146520137786864,
      "num_tokens": 162443.0,
      "reward": -0.1574999876320362,
      "reward_std": 0.2636621415615082,
      "rewards/format_reward/mean": 0.24500001072883607,
      "rewards/format_reward/std": 0.23762110471725464,
      "rewards/security_audit_reward/mean": -0.32999999523162843,
      "rewards/security_audit_reward/std": 0.28574271202087403,
      "step": 45,
      "step_time": 34.90182834920015
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.55,
      "completions/max_length": 512.0,
      "completions/max_terminated_length": 308.6,
      "completions/mean_length": 397.05,
      "completions/mean_terminated_length": 259.2,
      "completions/min_length": 203.0,
      "completions/min_terminated_length": 203.0,
      "entropy": 1.4294291973114013,
      "epoch": 0.3067484662576687,
      "frac_reward_zero_std": 0.4,
      "grad_norm": 3.9505743980407715,
      "learning_rate": 4.2484662576687116e-07,
      "loss": -0.08058007955551147,
      "num_tokens": 180200.0,
      "reward": -0.29249998927116394,
      "reward_std": 0.10127481501549482,
      "rewards/format_reward/mean": 0.0750000026077032,
      "rewards/format_reward/std": 0.1127780631184578,
      "rewards/security_audit_reward/mean": -0.45,
      "rewards/security_audit_reward/std": 0.1,
      "step": 50,
      "step_time": 38.750808009400046
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.5,
      "completions/max_length": 512.0,
      "completions/max_terminated_length": 389.8,
      "completions/mean_length": 404.65,
      "completions/mean_terminated_length": 297.43333740234374,
      "completions/min_length": 192.8,
      "completions/min_terminated_length": 192.8,
      "entropy": 1.2564165532588958,
      "epoch": 0.3374233128834356,
      "frac_reward_zero_std": 0.3,
      "grad_norm": 3.3762269020080566,
      "learning_rate": 4.171779141104294e-07,
      "loss": -0.030467823147773743,
      "num_tokens": 198109.0,
      "reward": -0.2542499825358391,
      "reward_std": 0.07489922866225243,
      "rewards/format_reward/mean": 0.20250000841915608,
      "rewards/format_reward/std": 0.1368803471326828,
      "rewards/security_audit_reward/mean": -0.45,
      "rewards/security_audit_reward/std": 0.05773502588272095,
      "step": 55,
      "step_time": 38.411275500399825
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.35,
      "completions/max_length": 496.2,
      "completions/max_terminated_length": 448.8,
      "completions/mean_length": 394.3,
      "completions/mean_terminated_length": 358.6166687011719,
      "completions/min_length": 285.4,
      "completions/min_terminated_length": 285.4,
      "entropy": 1.2620218694210052,
      "epoch": 0.36809815950920244,
      "frac_reward_zero_std": 0.2,
      "grad_norm": 2.8227944374084473,
      "learning_rate": 4.095092024539877e-07,
      "loss": 0.039707571268081665,
      "num_tokens": 215747.0,
      "reward": -0.2729999750852585,
      "reward_std": 0.13599938787519933,
      "rewards/format_reward/mean": 0.14000000432133675,
      "rewards/format_reward/std": 0.14343783408403396,
      "rewards/security_audit_reward/mean": -0.45,
      "rewards/security_audit_reward/std": 0.1393846869468689,
      "step": 60,
      "step_time": 37.50645367139987
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.35,
      "completions/max_length": 484.8,
      "completions/max_terminated_length": 334.8,
      "completions/mean_length": 381.35,
      "completions/mean_terminated_length": 255.98333740234375,
      "completions/min_length": 275.6,
      "completions/min_terminated_length": 173.2,
      "entropy": 1.2798833012580872,
      "epoch": 0.3987730061349693,
      "frac_reward_zero_std": 0.2,
      "grad_norm": 3.4819753170013428,
      "learning_rate": 4.01840490797546e-07,
      "loss": -0.06275686025619506,
      "num_tokens": 233162.0,
      "reward": -0.201749986410141,
      "reward_std": 0.2016347900032997,
      "rewards/format_reward/mean": 0.20250000804662704,
      "rewards/format_reward/std": 0.22137173414230346,
      "rewards/security_audit_reward/mean": -0.375,
      "rewards/security_audit_reward/std": 0.20773502588272094,
      "step": 65,
      "step_time": 37.139902984000216
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.6,
      "completions/max_length": 512.0,
      "completions/max_terminated_length": 289.8,
      "completions/mean_length": 408.45,
      "completions/mean_terminated_length": 251.7,
      "completions/min_length": 207.0,
      "completions/min_terminated_length": 207.0,
      "entropy": 1.2134525895118713,
      "epoch": 0.4294478527607362,
      "frac_reward_zero_std": 0.3,
      "grad_norm": 4.019806861877441,
      "learning_rate": 3.941717791411043e-07,
      "loss": 0.08099154829978943,
      "num_tokens": 251321.0,
      "reward": -0.27599998414516447,
      "reward_std": 0.08945702444761991,
      "rewards/format_reward/mean": 0.1300000037997961,
      "rewards/format_reward/std": 0.1645726040005684,
      "rewards/security_audit_reward/mean": -0.45,
      "rewards/security_audit_reward/std": 0.05773502588272095,
      "step": 70,
      "step_time": 38.02127088899997
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.5,
      "completions/max_length": 459.8,
      "completions/max_terminated_length": 206.2,
      "completions/mean_length": 348.9,
      "completions/mean_terminated_length": 176.9,
      "completions/min_length": 148.6,
      "completions/min_terminated_length": 148.6,
      "entropy": 1.3179432690143584,
      "epoch": 0.4601226993865031,
      "frac_reward_zero_std": 0.0,
      "grad_norm": 6.28598690032959,
      "learning_rate": 3.8650306748466255e-07,
      "loss": -0.11171818971633911,
      "num_tokens": 267725.0,
      "reward": -0.19474998638033866,
      "reward_std": 0.17031802013516426,
      "rewards/format_reward/mean": 0.23750000447034836,
      "rewards/format_reward/std": 0.17114628925919534,
      "rewards/security_audit_reward/mean": -0.3800000011920929,
      "rewards/security_audit_reward/std": 0.180902099609375,
      "step": 75,
      "step_time": 34.450158203000136
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.5,
      "completions/max_length": 512.0,
      "completions/max_terminated_length": 332.8,
      "completions/mean_length": 400.75,
      "completions/mean_terminated_length": 289.06666870117186,
      "completions/min_length": 251.0,
      "completions/min_terminated_length": 251.0,
      "entropy": 1.1514661133289337,
      "epoch": 0.49079754601226994,
      "frac_reward_zero_std": 0.4,
      "grad_norm": 2.8479247093200684,
      "learning_rate": 3.788343558282208e-07,
      "loss": 0.03145935535430908,
      "num_tokens": 285726.0,
      "reward": -0.2569999933242798,
      "reward_std": 0.15333212018013,
      "rewards/format_reward/mean": 0.1350000023841858,
      "rewards/format_reward/std": 0.18636635541915894,
      "rewards/security_audit_reward/mean": -0.425,
      "rewards/security_audit_reward/std": 0.15,
      "step": 80,
      "step_time": 38.869779922999626
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.5,
      "completions/max_length": 512.0,
      "completions/max_terminated_length": 321.2,
      "completions/mean_length": 398.5,
      "completions/mean_terminated_length": 253.0,
      "completions/min_length": 183.0,
      "completions/min_terminated_length": 183.0,
      "entropy": 1.244500571489334,
      "epoch": 0.5214723926380368,
      "frac_reward_zero_std": 0.3,
      "grad_norm": 2.146970272064209,
      "learning_rate": 3.7116564417177916e-07,
      "loss": 0.06171210408210755,
      "num_tokens": 304148.0,
      "reward": -0.22524999380111693,
      "reward_std": 0.19191497713327407,
      "rewards/format_reward/mean": 0.18250000327825547,
      "rewards/format_reward/std": 0.19969657957553863,
      "rewards/security_audit_reward/mean": -0.4,
      "rewards/security_audit_reward/std": 0.2,
      "step": 85,
      "step_time": 39.297288996000134
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.55,
      "completions/max_length": 512.0,
      "completions/max_terminated_length": 360.0,
      "completions/mean_length": 419.5,
      "completions/mean_terminated_length": 325.6,
      "completions/min_length": 291.2,
      "completions/min_terminated_length": 291.2,
      "entropy": 1.206581747531891,
      "epoch": 0.5521472392638037,
      "frac_reward_zero_std": 0.2,
      "grad_norm": 2.1158456802368164,
      "learning_rate": 3.634969325153374e-07,
      "loss": -0.06664568185806274,
      "num_tokens": 321680.0,
      "reward": -0.23324998915195466,
      "reward_std": 0.1919491995126009,
      "rewards/format_reward/mean": 0.1675000049173832,
      "rewards/format_reward/std": 0.19863576367497443,
      "rewards/security_audit_reward/mean": -0.40499999523162844,
      "rewards/security_audit_reward/std": 0.1899999976158142,
      "step": 90,
      "step_time": 38.49561594039933
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.4,
      "completions/max_length": 469.2,
      "completions/max_terminated_length": 364.0,
      "completions/mean_length": 383.2,
      "completions/mean_terminated_length": 291.8333374023438,
      "completions/min_length": 217.6,
      "completions/min_terminated_length": 217.6,
      "entropy": 1.217250692844391,
      "epoch": 0.5828220858895705,
      "frac_reward_zero_std": 0.4,
      "grad_norm": 4.098232269287109,
      "learning_rate": 3.558282208588957e-07,
      "loss": 0.05211906433105469,
      "num_tokens": 339350.0,
      "reward": -0.2119999945163727,
      "reward_std": 0.19894140996038914,
      "rewards/format_reward/mean": 0.1799999989569187,
      "rewards/format_reward/std": 0.23350853994488716,
      "rewards/security_audit_reward/mean": -0.37999999821186065,
      "rewards/security_audit_reward/std": 0.1911805212497711,
      "step": 95,
      "step_time": 35.83687614579994
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.5,
      "completions/max_length": 512.0,
      "completions/max_terminated_length": 345.4,
      "completions/mean_length": 377.45,
      "completions/mean_terminated_length": 290.6333343505859,
      "completions/min_length": 240.4,
      "completions/min_terminated_length": 240.4,
      "entropy": 1.2783292949199676,
      "epoch": 0.6134969325153374,
      "frac_reward_zero_std": 0.3,
      "grad_norm": 2.361516237258911,
      "learning_rate": 3.48159509202454e-07,
      "loss": 0.06258203387260437,
      "num_tokens": 356239.0,
      "reward": -0.20649999231100083,
      "reward_std": 0.18195689767599105,
      "rewards/format_reward/mean": 0.24499999433755876,
      "rewards/format_reward/std": 0.19310407042503358,
      "rewards/security_audit_reward/mean": -0.4,
      "rewards/security_audit_reward/std": 0.2,
      "step": 100,
      "step_time": 38.47846096040011
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.3,
      "completions/max_length": 463.2,
      "completions/max_terminated_length": 394.6,
      "completions/mean_length": 335.8,
      "completions/mean_terminated_length": 268.3,
      "completions/min_length": 139.6,
      "completions/min_terminated_length": 139.6,
      "entropy": 1.2529696226119995,
      "epoch": 0.6441717791411042,
      "frac_reward_zero_std": 0.2,
      "grad_norm": 3.356074094772339,
      "learning_rate": 3.4049079754601224e-07,
      "loss": 0.003340443968772888,
      "num_tokens": 373237.0,
      "reward": -0.2567499876022339,
      "reward_std": 0.27417250275611876,
      "rewards/format_reward/mean": 0.14750000461935997,
      "rewards/format_reward/std": 0.19759280756115913,
      "rewards/security_audit_reward/mean": -0.4299999952316284,
      "rewards/security_audit_reward/std": 0.3186576545238495,
      "step": 105,
      "step_time": 35.43083410320014
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.45,
      "completions/max_length": 512.0,
      "completions/max_terminated_length": 372.8,
      "completions/mean_length": 419.15,
      "completions/mean_terminated_length": 319.2666748046875,
      "completions/min_length": 266.8,
      "completions/min_terminated_length": 266.8,
      "entropy": 1.1685741186141967,
      "epoch": 0.6748466257668712,
      "frac_reward_zero_std": 0.2,
      "grad_norm": 4.318619728088379,
      "learning_rate": 3.3282208588957055e-07,
      "loss": -0.026089027523994446,
      "num_tokens": 391784.0,
      "reward": -0.2662499874830246,
      "reward_std": 0.07884115856140853,
      "rewards/format_reward/mean": 0.16250000558793545,
      "rewards/format_reward/std": 0.14070439487695693,
      "rewards/security_audit_reward/mean": -0.45,
      "rewards/security_audit_reward/std": 0.05773502588272095,
      "step": 110,
      "step_time": 38.89281254739999
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.3,
      "completions/max_length": 468.0,
      "completions/max_terminated_length": 337.6,
      "completions/mean_length": 330.1,
      "completions/mean_terminated_length": 240.4166687011719,
      "completions/min_length": 160.8,
      "completions/min_terminated_length": 160.8,
      "entropy": 1.2954379856586455,
      "epoch": 0.7055214723926381,
      "frac_reward_zero_std": 0.2,
      "grad_norm": 3.293928384780884,
      "learning_rate": 3.251533742331288e-07,
      "loss": 0.17276796102523803,
      "num_tokens": 408446.0,
      "reward": -0.22849999666213988,
      "reward_std": 0.1390242099761963,
      "rewards/format_reward/mean": 0.2300000011920929,
      "rewards/format_reward/std": 0.23302415013313293,
      "rewards/security_audit_reward/mean": -0.425,
      "rewards/security_audit_reward/std": 0.10773502588272095,
      "step": 115,
      "step_time": 35.73082293679981
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.55,
      "completions/max_length": 512.0,
      "completions/max_terminated_length": 281.2,
      "completions/mean_length": 400.5,
      "completions/mean_terminated_length": 248.73333740234375,
      "completions/min_length": 211.8,
      "completions/min_terminated_length": 211.8,
      "entropy": 1.2283548831939697,
      "epoch": 0.7361963190184049,
      "frac_reward_zero_std": 0.2,
      "grad_norm": 2.632479190826416,
      "learning_rate": 3.174846625766871e-07,
      "loss": 0.05111231803894043,
      "num_tokens": 426822.0,
      "reward": -0.22074998915195465,
      "reward_std": 0.15957241374999284,
      "rewards/format_reward/mean": 0.1975000012665987,
      "rewards/format_reward/std": 0.1847505249083042,
      "rewards/security_audit_reward/mean": -0.4,
      "rewards/security_audit_reward/std": 0.15773502588272095,
      "step": 120,
      "step_time": 39.14503100519996
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.5,
      "completions/max_length": 512.0,
      "completions/max_terminated_length": 340.2,
      "completions/mean_length": 401.4,
      "completions/mean_terminated_length": 236.03333740234376,
      "completions/min_length": 245.2,
      "completions/min_terminated_length": 142.8,
      "entropy": 1.307636547088623,
      "epoch": 0.7668711656441718,
      "frac_reward_zero_std": 0.3,
      "grad_norm": 5.566491603851318,
      "learning_rate": 3.0981595092024537e-07,
      "loss": 0.003215853124856949,
      "num_tokens": 444322.0,
      "reward": -0.11199999079108239,
      "reward_std": 0.2506739288568497,
      "rewards/format_reward/mean": 0.2449999988079071,
      "rewards/format_reward/std": 0.20622505843639374,
      "rewards/security_audit_reward/mean": -0.26500000059604645,
      "rewards/security_audit_reward/std": 0.278915548324585,
      "step": 125,
      "step_time": 38.70229864360026
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.35,
      "completions/max_length": 512.0,
      "completions/max_terminated_length": 390.6,
      "completions/mean_length": 367.0,
      "completions/mean_terminated_length": 264.0000061035156,
      "completions/min_length": 123.2,
      "completions/min_terminated_length": 123.2,
      "entropy": 1.248900693655014,
      "epoch": 0.7975460122699386,
      "frac_reward_zero_std": 0.2,
      "grad_norm": 4.395384311676025,
      "learning_rate": 3.021472392638036e-07,
      "loss": 0.06482647061347961,
      "num_tokens": 461894.0,
      "reward": -0.2042499899864197,
      "reward_std": 0.17003463432192803,
      "rewards/format_reward/mean": 0.2525000125169754,
      "rewards/format_reward/std": 0.21560870110988617,
      "rewards/security_audit_reward/mean": -0.4,
      "rewards/security_audit_reward/std": 0.15773502588272095,
      "step": 130,
      "step_time": 39.174271353800215
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.35,
      "completions/max_length": 512.0,
      "completions/max_terminated_length": 370.0,
      "completions/mean_length": 359.95,
      "completions/mean_terminated_length": 276.9666687011719,
      "completions/min_length": 203.8,
      "completions/min_terminated_length": 203.8,
      "entropy": 1.3299469709396363,
      "epoch": 0.8282208588957055,
      "frac_reward_zero_std": 0.0,
      "grad_norm": 4.3037519454956055,
      "learning_rate": 2.94478527607362e-07,
      "loss": 0.026153716444969177,
      "num_tokens": 478783.0,
      "reward": -0.19949999153614045,
      "reward_std": 0.15296672135591508,
      "rewards/format_reward/mean": 0.24500000178813935,
      "rewards/format_reward/std": 0.2273508906364441,
      "rewards/security_audit_reward/mean": -0.39000000059604645,
      "rewards/security_audit_reward/std": 0.12891554832458496,
      "step": 135,
      "step_time": 38.658669441000164
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.45,
      "completions/max_length": 512.0,
      "completions/max_terminated_length": 354.0,
      "completions/mean_length": 380.3,
      "completions/mean_terminated_length": 271.6666687011719,
      "completions/min_length": 206.4,
      "completions/min_terminated_length": 206.4,
      "entropy": 1.0997539341449738,
      "epoch": 0.8588957055214724,
      "frac_reward_zero_std": 0.2,
      "grad_norm": 2.3693976402282715,
      "learning_rate": 2.8680981595092024e-07,
      "loss": -0.01876506209373474,
      "num_tokens": 496243.0,
      "reward": -0.17974998727440833,
      "reward_std": 0.18710523881018162,
      "rewards/format_reward/mean": 0.21750000193715097,
      "rewards/format_reward/std": 0.1843859799206257,
      "rewards/security_audit_reward/mean": -0.35,
      "rewards/security_audit_reward/std": 0.20347774028778076,
      "step": 140,
      "step_time": 39.171343391999834
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.35,
      "completions/max_length": 456.2,
      "completions/max_terminated_length": 355.2,
      "completions/mean_length": 348.35,
      "completions/mean_terminated_length": 270.3,
      "completions/min_length": 164.8,
      "completions/min_terminated_length": 164.8,
      "entropy": 1.2017314374446868,
      "epoch": 0.8895705521472392,
      "frac_reward_zero_std": 0.3,
      "grad_norm": 4.230531215667725,
      "learning_rate": 2.791411042944785e-07,
      "loss": 0.0029310762882232668,
      "num_tokens": 513422.0,
      "reward": -0.1637499898672104,
      "reward_std": 0.2463478922843933,
      "rewards/format_reward/mean": 0.2475000023841858,
      "rewards/format_reward/std": 0.2085829883813858,
      "rewards/security_audit_reward/mean": -0.34000000059604646,
      "rewards/security_audit_reward/std": 0.26830023527145386,
      "step": 145,
      "step_time": 34.95672115479992
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.25,
      "completions/max_length": 482.4,
      "completions/max_terminated_length": 431.6,
      "completions/mean_length": 355.35,
      "completions/mean_terminated_length": 319.4166687011719,
      "completions/min_length": 212.0,
      "completions/min_terminated_length": 212.0,
      "entropy": 1.258862280845642,
      "epoch": 0.9202453987730062,
      "frac_reward_zero_std": 0.2,
      "grad_norm": 6.071740627288818,
      "learning_rate": 2.714723926380368e-07,
      "loss": 0.0822126567363739,
      "num_tokens": 530643.0,
      "reward": -0.1912499874830246,
      "reward_std": 0.1670845106244087,
      "rewards/format_reward/mean": 0.27249999940395353,
      "rewards/format_reward/std": 0.1733592666685581,
      "rewards/security_audit_reward/mean": -0.39000000059604645,
      "rewards/security_audit_reward/std": 0.17118052244186402,
      "step": 150,
      "step_time": 37.038782767599876
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.2,
      "completions/max_length": 442.2,
      "completions/max_terminated_length": 316.0,
      "completions/mean_length": 283.75,
      "completions/mean_terminated_length": 237.85000305175782,
      "completions/min_length": 166.2,
      "completions/min_terminated_length": 166.2,
      "entropy": 1.4853489220142364,
      "epoch": 0.950920245398773,
      "frac_reward_zero_std": 0.0,
      "grad_norm": 5.2570695877075195,
      "learning_rate": 2.6380368098159506e-07,
      "loss": 0.11004064083099366,
      "num_tokens": 545966.0,
      "reward": -0.15949999541044235,
      "reward_std": 0.19192611873149873,
      "rewards/format_reward/mean": 0.28500000238418577,
      "rewards/format_reward/std": 0.22434256076812745,
      "rewards/security_audit_reward/mean": -0.35,
      "rewards/security_audit_reward/std": 0.2,
      "step": 155,
      "step_time": 33.86747411140077
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.55,
      "completions/max_length": 512.0,
      "completions/max_terminated_length": 340.8,
      "completions/mean_length": 411.45,
      "completions/mean_terminated_length": 265.9666687011719,
      "completions/min_length": 192.0,
      "completions/min_terminated_length": 192.0,
      "entropy": 1.0828768193721772,
      "epoch": 0.9815950920245399,
      "frac_reward_zero_std": 0.2,
      "grad_norm": 2.6537587642669678,
      "learning_rate": 2.5613496932515337e-07,
      "loss": 0.03556116819381714,
      "num_tokens": 563683.0,
      "reward": -0.20099999010562897,
      "reward_std": 0.1888158166781068,
      "rewards/format_reward/mean": 0.24000000059604645,
      "rewards/format_reward/std": 0.16870398968458175,
      "rewards/security_audit_reward/mean": -0.3899999976158142,
      "rewards/security_audit_reward/std": 0.2199999988079071,
      "step": 160,
      "step_time": 38.665304075799575
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.3,
      "completions/max_length": 483.2,
      "completions/max_terminated_length": 393.2,
      "completions/mean_length": 359.2,
      "completions/mean_terminated_length": 303.3666687011719,
      "completions/min_length": 178.2,
      "completions/min_terminated_length": 178.2,
      "entropy": 1.1811485469341279,
      "epoch": 1.0122699386503067,
      "frac_reward_zero_std": 0.0,
      "grad_norm": 4.203860282897949,
      "learning_rate": 2.4846625766871163e-07,
      "loss": -0.02532302737236023,
      "num_tokens": 580183.0,
      "reward": -0.1227499857544899,
      "reward_std": 0.2651766210794449,
      "rewards/format_reward/mean": 0.2675000011920929,
      "rewards/format_reward/std": 0.24115291833877564,
      "rewards/security_audit_reward/mean": -0.2899999976158142,
      "rewards/security_audit_reward/std": 0.2812127649784088,
      "step": 165,
      "step_time": 36.34888075860035
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.4,
      "completions/max_length": 512.0,
      "completions/max_terminated_length": 345.8,
      "completions/mean_length": 358.5,
      "completions/mean_terminated_length": 235.96666870117187,
      "completions/min_length": 123.0,
      "completions/min_terminated_length": 123.0,
      "entropy": 1.2863860994577407,
      "epoch": 1.0429447852760736,
      "frac_reward_zero_std": 0.2,
      "grad_norm": 2.71185302734375,
      "learning_rate": 2.4079754601226994e-07,
      "loss": 0.12254136800765991,
      "num_tokens": 597345.0,
      "reward": -0.20274999886751174,
      "reward_std": 0.1825057201087475,
      "rewards/format_reward/mean": 0.25750000327825545,
      "rewards/format_reward/std": 0.19183385372161865,
      "rewards/security_audit_reward/mean": -0.4,
      "rewards/security_audit_reward/std": 0.19711971282958984,
      "step": 170,
      "step_time": 38.96664929399922
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.5,
      "completions/max_length": 512.0,
      "completions/max_terminated_length": 323.4,
      "completions/mean_length": 407.1,
      "completions/mean_terminated_length": 234.40000610351564,
      "completions/min_length": 242.8,
      "completions/min_terminated_length": 140.4,
      "entropy": 1.179810070991516,
      "epoch": 1.0736196319018405,
      "frac_reward_zero_std": 0.2,
      "grad_norm": 4.413055419921875,
      "learning_rate": 2.331288343558282e-07,
      "loss": 0.041037318110466,
      "num_tokens": 615063.0,
      "reward": -0.20374999046325684,
      "reward_std": 0.2052689865231514,
      "rewards/format_reward/mean": 0.31250000894069674,
      "rewards/format_reward/std": 0.22553626000881194,
      "rewards/security_audit_reward/mean": -0.425,
      "rewards/security_audit_reward/std": 0.23164966106414794,
      "step": 175,
      "step_time": 39.00492364500023
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.3,
      "completions/max_length": 511.6,
      "completions/max_terminated_length": 424.0,
      "completions/mean_length": 399.45,
      "completions/mean_terminated_length": 347.9166748046875,
      "completions/min_length": 262.6,
      "completions/min_terminated_length": 262.6,
      "entropy": 1.1026120364665986,
      "epoch": 1.1042944785276074,
      "frac_reward_zero_std": 0.0,
      "grad_norm": 4.408414840698242,
      "learning_rate": 2.254601226993865e-07,
      "loss": 0.058766734600067136,
      "num_tokens": 632696.0,
      "reward": -0.16599998623132706,
      "reward_std": 0.26377752125263215,
      "rewards/format_reward/mean": 0.24000000655651094,
      "rewards/format_reward/std": 0.21778101623058319,
      "rewards/security_audit_reward/mean": -0.34000000059604646,
      "rewards/security_audit_reward/std": 0.3105652093887329,
      "step": 180,
      "step_time": 39.09616019519963
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.2,
      "completions/max_length": 477.4,
      "completions/max_terminated_length": 421.6,
      "completions/mean_length": 322.45,
      "completions/mean_terminated_length": 273.1000030517578,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "entropy": 1.2888785600662231,
      "epoch": 1.1349693251533743,
      "frac_reward_zero_std": 0.0,
      "grad_norm": 3.6877431869506836,
      "learning_rate": 2.1779141104294476e-07,
      "loss": -0.0771723210811615,
      "num_tokens": 649353.0,
      "reward": -0.1799999952316284,
      "reward_std": 0.3134476348757744,
      "rewards/format_reward/mean": 0.2750000089406967,
      "rewards/format_reward/std": 0.24135999679565429,
      "rewards/security_audit_reward/mean": -0.375,
      "rewards/security_audit_reward/std": 0.3593961834907532,
      "step": 185,
      "step_time": 36.71157897000012
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.3,
      "completions/max_length": 495.0,
      "completions/max_terminated_length": 351.0,
      "completions/mean_length": 328.8,
      "completions/mean_terminated_length": 243.7,
      "completions/min_length": 164.8,
      "completions/min_terminated_length": 164.8,
      "entropy": 1.4585140287876128,
      "epoch": 1.165644171779141,
      "frac_reward_zero_std": 0.2,
      "grad_norm": 4.987306118011475,
      "learning_rate": 2.1012269938650307e-07,
      "loss": -0.15080010890960693,
      "num_tokens": 665513.0,
      "reward": -0.050499990582466125,
      "reward_std": 0.2684710592031479,
      "rewards/format_reward/mean": 0.31000000387430193,
      "rewards/format_reward/std": 0.1994625985622406,
      "rewards/security_audit_reward/mean": -0.20500000119209288,
      "rewards/security_audit_reward/std": 0.31255176067352297,
      "step": 190,
      "step_time": 37.63543628939988
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.5,
      "completions/max_length": 512.0,
      "completions/max_terminated_length": 307.8,
      "completions/mean_length": 391.75,
      "completions/mean_terminated_length": 253.06666870117186,
      "completions/min_length": 178.0,
      "completions/min_terminated_length": 178.0,
      "entropy": 1.1293343544006347,
      "epoch": 1.196319018404908,
      "frac_reward_zero_std": 0.1,
      "grad_norm": 5.247244358062744,
      "learning_rate": 2.0245398773006135e-07,
      "loss": -0.04229157567024231,
      "num_tokens": 683268.0,
      "reward": -0.10224998965859414,
      "reward_std": 0.19266743455082178,
      "rewards/format_reward/mean": 0.3124999929219484,
      "rewards/format_reward/std": 0.1390557773411274,
      "rewards/security_audit_reward/mean": -0.2800000011920929,
      "rewards/security_audit_reward/std": 0.23863712549209595,
      "step": 195,
      "step_time": 38.93936442300037
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.4,
      "completions/max_length": 485.0,
      "completions/max_terminated_length": 346.2,
      "completions/mean_length": 364.6,
      "completions/mean_terminated_length": 265.8166748046875,
      "completions/min_length": 165.8,
      "completions/min_terminated_length": 165.8,
      "entropy": 0.8287177711725235,
      "epoch": 1.2269938650306749,
      "frac_reward_zero_std": 0.1,
      "grad_norm": 2.4230945110321045,
      "learning_rate": 1.9478527607361963e-07,
      "loss": -0.05633368492126465,
      "num_tokens": 700760.0,
      "reward": -0.1807499848306179,
      "reward_std": 0.18529897555708885,
      "rewards/format_reward/mean": 0.3075000137090683,
      "rewards/format_reward/std": 0.15467575192451477,
      "rewards/security_audit_reward/mean": -0.39000000059604645,
      "rewards/security_audit_reward/std": 0.2105652093887329,
      "step": 200,
      "step_time": 37.065423558799736
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.25,
      "completions/max_length": 431.6,
      "completions/max_terminated_length": 338.8,
      "completions/mean_length": 284.1,
      "completions/mean_terminated_length": 225.65,
      "completions/min_length": 119.4,
      "completions/min_terminated_length": 119.4,
      "entropy": 1.2736368715763091,
      "epoch": 1.2576687116564418,
      "frac_reward_zero_std": 0.1,
      "grad_norm": 4.797567367553711,
      "learning_rate": 1.8711656441717791e-07,
      "loss": 0.08297693133354186,
      "num_tokens": 716344.0,
      "reward": -0.07274999544024467,
      "reward_std": 0.24350565671920776,
      "rewards/format_reward/mean": 0.31749999821186065,
      "rewards/format_reward/std": 0.19669782146811485,
      "rewards/security_audit_reward/mean": -0.23999999985098838,
      "rewards/security_audit_reward/std": 0.2692204549908638,
      "step": 205,
      "step_time": 33.11877055760014
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.25,
      "completions/max_length": 484.0,
      "completions/max_terminated_length": 403.2,
      "completions/mean_length": 335.45,
      "completions/mean_terminated_length": 276.4500030517578,
      "completions/min_length": 173.8,
      "completions/min_terminated_length": 173.8,
      "entropy": 1.1084223449230195,
      "epoch": 1.2883435582822087,
      "frac_reward_zero_std": 0.1,
      "grad_norm": 2.4603023529052734,
      "learning_rate": 1.7944785276073617e-07,
      "loss": 0.07945090532302856,
      "num_tokens": 733245.0,
      "reward": -0.13774999380111694,
      "reward_std": 0.2730386942625046,
      "rewards/format_reward/mean": 0.2174999989569187,
      "rewards/format_reward/std": 0.22229814901947975,
      "rewards/security_audit_reward/mean": -0.29000000059604647,
      "rewards/security_audit_reward/std": 0.3105652093887329,
      "step": 210,
      "step_time": 37.03591289120122
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.4,
      "completions/max_length": 512.0,
      "completions/max_terminated_length": 361.8,
      "completions/mean_length": 345.0,
      "completions/mean_terminated_length": 261.3000030517578,
      "completions/min_length": 184.8,
      "completions/min_terminated_length": 184.8,
      "entropy": 1.2274070978164673,
      "epoch": 1.3190184049079754,
      "frac_reward_zero_std": 0.0,
      "grad_norm": 4.573819637298584,
      "learning_rate": 1.7177914110429448e-07,
      "loss": -0.07497722506523133,
      "num_tokens": 749917.0,
      "reward": -0.01174999624490738,
      "reward_std": 0.3002330154180527,
      "rewards/format_reward/mean": 0.3225000023841858,
      "rewards/format_reward/std": 0.1751384623348713,
      "rewards/security_audit_reward/mean": -0.15499999821186067,
      "rewards/security_audit_reward/std": 0.3648489773273468,
      "step": 215,
      "step_time": 38.89012140319937
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.2,
      "completions/max_length": 481.8,
      "completions/max_terminated_length": 344.6,
      "completions/mean_length": 292.3,
      "completions/mean_terminated_length": 234.26666870117188,
      "completions/min_length": 122.2,
      "completions/min_terminated_length": 122.2,
      "entropy": 1.1711494624614716,
      "epoch": 1.3496932515337423,
      "frac_reward_zero_std": 0.1,
      "grad_norm": 3.879939556121826,
      "learning_rate": 1.6411042944785276e-07,
      "loss": 0.06901218891143798,
      "num_tokens": 765457.0,
      "reward": -0.2002499908208847,
      "reward_std": 0.19745510853827,
      "rewards/format_reward/mean": 0.20749999657273294,
      "rewards/format_reward/std": 0.2098293460905552,
      "rewards/security_audit_reward/mean": -0.375,
      "rewards/security_audit_reward/std": 0.20773502588272094,
      "step": 220,
      "step_time": 36.50884771559977
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.35,
      "completions/max_length": 451.4,
      "completions/max_terminated_length": 281.8,
      "completions/mean_length": 310.85,
      "completions/mean_terminated_length": 210.58333435058594,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "entropy": 1.4326449751853942,
      "epoch": 1.3803680981595092,
      "frac_reward_zero_std": 0.0,
      "grad_norm": 5.483170986175537,
      "learning_rate": 1.5644171779141104e-07,
      "loss": -0.03490494191646576,
      "num_tokens": 782226.0,
      "reward": -0.14649999141693115,
      "reward_std": 0.19791007936000823,
      "rewards/format_reward/mean": 0.3049999952316284,
      "rewards/format_reward/std": 0.19433450996875762,
      "rewards/security_audit_reward/mean": -0.3399999998509884,
      "rewards/security_audit_reward/std": 0.22000000029802322,
      "step": 225,
      "step_time": 35.079213985799655
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.35,
      "completions/max_length": 511.2,
      "completions/max_terminated_length": 315.2,
      "completions/mean_length": 338.15,
      "completions/mean_terminated_length": 226.28333435058593,
      "completions/min_length": 134.8,
      "completions/min_terminated_length": 134.8,
      "entropy": 1.1364098012447357,
      "epoch": 1.4110429447852761,
      "frac_reward_zero_std": 0.2,
      "grad_norm": 3.3578364849090576,
      "learning_rate": 1.4877300613496933e-07,
      "loss": 0.0896155834197998,
      "num_tokens": 798571.0,
      "reward": -0.11574998870491982,
      "reward_std": 0.19651760943233967,
      "rewards/format_reward/mean": 0.2674999989569187,
      "rewards/format_reward/std": 0.15046989992260934,
      "rewards/security_audit_reward/mean": -0.27999999821186067,
      "rewards/security_audit_reward/std": 0.2297215759754181,
      "step": 230,
      "step_time": 38.61798697480081
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.35,
      "completions/max_length": 505.4,
      "completions/max_terminated_length": 385.4,
      "completions/mean_length": 360.65,
      "completions/mean_terminated_length": 295.23333740234375,
      "completions/min_length": 210.2,
      "completions/min_terminated_length": 210.2,
      "entropy": 1.0565216183662414,
      "epoch": 1.441717791411043,
      "frac_reward_zero_std": 0.0,
      "grad_norm": 4.266097068786621,
      "learning_rate": 1.4110429447852758e-07,
      "loss": 0.07759050726890564,
      "num_tokens": 815570.0,
      "reward": -0.06749999299645423,
      "reward_std": 0.27374918162822726,
      "rewards/format_reward/mean": 0.37000001072883604,
      "rewards/format_reward/std": 0.1865294199436903,
      "rewards/security_audit_reward/mean": -0.2550000011920929,
      "rewards/security_audit_reward/std": 0.32802181243896483,
      "step": 235,
      "step_time": 38.44030983600023
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.3,
      "completions/max_length": 495.6,
      "completions/max_terminated_length": 352.6,
      "completions/mean_length": 336.05,
      "completions/mean_terminated_length": 243.60000915527343,
      "completions/min_length": 145.8,
      "completions/min_terminated_length": 145.8,
      "entropy": 1.417020809650421,
      "epoch": 1.4723926380368098,
      "frac_reward_zero_std": 0.2,
      "grad_norm": 4.767548084259033,
      "learning_rate": 1.334355828220859e-07,
      "loss": 0.03671485185623169,
      "num_tokens": 831713.0,
      "reward": -0.12849999219179153,
      "reward_std": 0.19381159394979477,
      "rewards/format_reward/mean": 0.3300000041723251,
      "rewards/format_reward/std": 0.20168980173766612,
      "rewards/security_audit_reward/mean": -0.325,
      "rewards/security_audit_reward/std": 0.20773502588272094,
      "step": 240,
      "step_time": 37.49174958900003
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.6,
      "completions/max_length": 512.0,
      "completions/max_terminated_length": 343.0,
      "completions/mean_length": 417.3,
      "completions/mean_terminated_length": 293.7,
      "completions/min_length": 246.8,
      "completions/min_terminated_length": 246.8,
      "entropy": 1.0088598132133484,
      "epoch": 1.5030674846625767,
      "frac_reward_zero_std": 0.0,
      "grad_norm": 3.1936607360839844,
      "learning_rate": 1.2576687116564417e-07,
      "loss": -0.03240810632705689,
      "num_tokens": 849855.0,
      "reward": -0.11324999332427979,
      "reward_std": 0.2602782666683197,
      "rewards/format_reward/mean": 0.3224999994039536,
      "rewards/format_reward/std": 0.20757876634597777,
      "rewards/security_audit_reward/mean": -0.3,
      "rewards/security_audit_reward/std": 0.3154700517654419,
      "step": 245,
      "step_time": 39.07302968719996
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.2,
      "completions/max_length": 459.6,
      "completions/max_terminated_length": 428.2,
      "completions/mean_length": 325.35,
      "completions/mean_terminated_length": 277.5833374023438,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "entropy": 1.1867628961801528,
      "epoch": 1.5337423312883436,
      "frac_reward_zero_std": 0.1,
      "grad_norm": 3.9449942111968994,
      "learning_rate": 1.1809815950920244e-07,
      "loss": -0.005416367202997208,
      "num_tokens": 866330.0,
      "reward": -0.08199999034404755,
      "reward_std": 0.2747137784957886,
      "rewards/format_reward/mean": 0.3099999874830246,
      "rewards/format_reward/std": 0.17935641929507257,
      "rewards/security_audit_reward/mean": -0.25,
      "rewards/security_audit_reward/std": 0.33094010353088377,
      "step": 250,
      "step_time": 35.27377968320034
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.25,
      "completions/max_length": 450.0,
      "completions/max_terminated_length": 311.2,
      "completions/mean_length": 283.8,
      "completions/mean_terminated_length": 202.06666870117186,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "entropy": 1.322118791937828,
      "epoch": 1.5644171779141103,
      "frac_reward_zero_std": 0.1,
      "grad_norm": 6.9948039054870605,
      "learning_rate": 1.1042944785276073e-07,
      "loss": 0.055895209312438965,
      "num_tokens": 881650.0,
      "reward": -0.11924999132752419,
      "reward_std": 0.14510822538286447,
      "rewards/format_reward/mean": 0.3024999976158142,
      "rewards/format_reward/std": 0.15741010159254074,
      "rewards/security_audit_reward/mean": -0.3,
      "rewards/security_audit_reward/std": 0.15773502588272095,
      "step": 255,
      "step_time": 34.332786842800125
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.3,
      "completions/max_length": 485.0,
      "completions/max_terminated_length": 312.2,
      "completions/mean_length": 312.0,
      "completions/mean_terminated_length": 225.13333740234376,
      "completions/min_length": 141.8,
      "completions/min_terminated_length": 141.8,
      "entropy": 1.1261488378047944,
      "epoch": 1.5950920245398774,
      "frac_reward_zero_std": 0.1,
      "grad_norm": 5.633542537689209,
      "learning_rate": 1.0276073619631902e-07,
      "loss": 0.12422184944152832,
      "num_tokens": 898170.0,
      "reward": -0.09424999356269836,
      "reward_std": 0.2452640563249588,
      "rewards/format_reward/mean": 0.3274999916553497,
      "rewards/format_reward/std": 0.19424656331539153,
      "rewards/security_audit_reward/mean": -0.275,
      "rewards/security_audit_reward/std": 0.29574271440505984,
      "step": 260,
      "step_time": 37.18088837539908
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.3,
      "completions/max_length": 481.4,
      "completions/max_terminated_length": 333.8,
      "completions/mean_length": 323.4,
      "completions/mean_terminated_length": 233.95,
      "completions/min_length": 174.6,
      "completions/min_terminated_length": 174.6,
      "entropy": 1.3164357602596284,
      "epoch": 1.6257668711656441,
      "frac_reward_zero_std": 0.2,
      "grad_norm": 4.177126884460449,
      "learning_rate": 9.50920245398773e-08,
      "loss": -0.0031075358390808107,
      "num_tokens": 914438.0,
      "reward": -0.109499990940094,
      "reward_std": 0.19355954378843307,
      "rewards/format_reward/mean": 0.3700000077486038,
      "rewards/format_reward/std": 0.19431518614292145,
      "rewards/security_audit_reward/mean": -0.31500000059604644,
      "rewards/security_audit_reward/std": 0.21745660305023193,
      "step": 265,
      "step_time": 36.75480746599969
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.15,
      "completions/max_length": 460.2,
      "completions/max_terminated_length": 279.2,
      "completions/mean_length": 224.9,
      "completions/mean_terminated_length": 170.45000305175782,
      "completions/min_length": 84.4,
      "completions/min_terminated_length": 84.4,
      "entropy": 1.267154586315155,
      "epoch": 1.656441717791411,
      "frac_reward_zero_std": 0.2,
      "grad_norm": 7.552680015563965,
      "learning_rate": 8.742331288343557e-08,
      "loss": -0.12154214382171631,
      "num_tokens": 928536.0,
      "reward": -0.07999998778104782,
      "reward_std": 0.16925212144851684,
      "rewards/format_reward/mean": 0.37499999403953554,
      "rewards/format_reward/std": 0.1521439790725708,
      "rewards/security_audit_reward/mean": -0.275,
      "rewards/security_audit_reward/std": 0.20773502588272094,
      "step": 270,
      "step_time": 35.04094967719975
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.3,
      "completions/max_length": 469.4,
      "completions/max_terminated_length": 287.6,
      "completions/mean_length": 300.95,
      "completions/mean_terminated_length": 204.76667175292968,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "entropy": 1.1936017721891403,
      "epoch": 1.687116564417178,
      "frac_reward_zero_std": 0.2,
      "grad_norm": 7.106525421142578,
      "learning_rate": 7.975460122699386e-08,
      "loss": -0.0458857923746109,
      "num_tokens": 944307.0,
      "reward": -0.05174999088048935,
      "reward_std": 0.23333178758621215,
      "rewards/format_reward/mean": 0.3874999940395355,
      "rewards/format_reward/std": 0.16559004038572311,
      "rewards/security_audit_reward/mean": -0.24000000059604645,
      "rewards/security_audit_reward/std": 0.2866505742073059,
      "step": 275,
      "step_time": 36.0470411268001
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.2,
      "completions/max_length": 498.6,
      "completions/max_terminated_length": 375.0,
      "completions/mean_length": 295.25,
      "completions/mean_terminated_length": 232.6666717529297,
      "completions/min_length": 97.8,
      "completions/min_terminated_length": 97.8,
      "entropy": 1.460896384716034,
      "epoch": 1.7177914110429446,
      "frac_reward_zero_std": 0.0,
      "grad_norm": 5.004129409790039,
      "learning_rate": 7.208588957055214e-08,
      "loss": -0.10658804178237916,
      "num_tokens": 960078.0,
      "reward": -0.01824999153614044,
      "reward_std": 0.2521414369344711,
      "rewards/format_reward/mean": 0.3825000047683716,
      "rewards/format_reward/std": 0.15327396541833876,
      "rewards/security_audit_reward/mean": -0.1899999976158142,
      "rewards/security_audit_reward/std": 0.3234777390956879,
      "step": 280,
      "step_time": 37.26412723539943
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.3,
      "completions/max_length": 501.0,
      "completions/max_terminated_length": 363.2,
      "completions/mean_length": 340.6,
      "completions/mean_terminated_length": 257.06666870117186,
      "completions/min_length": 146.2,
      "completions/min_terminated_length": 146.2,
      "entropy": 1.0431257128715514,
      "epoch": 1.7484662576687118,
      "frac_reward_zero_std": 0.1,
      "grad_norm": 4.059199333190918,
      "learning_rate": 6.441717791411043e-08,
      "loss": -0.10386581420898437,
      "num_tokens": 976992.0,
      "reward": -0.06924999207258224,
      "reward_std": 0.2484972782433033,
      "rewards/format_reward/mean": 0.38750000596046447,
      "rewards/format_reward/std": 0.1250488668680191,
      "rewards/security_audit_reward/mean": -0.26500000059604645,
      "rewards/security_audit_reward/std": 0.3080150008201599,
      "step": 285,
      "step_time": 38.17630281240017
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.05,
      "completions/max_length": 435.0,
      "completions/max_terminated_length": 413.6,
      "completions/mean_length": 281.65,
      "completions/mean_terminated_length": 274.0,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "entropy": 1.1775987446308136,
      "epoch": 1.7791411042944785,
      "frac_reward_zero_std": 0.3,
      "grad_norm": 4.12350606918335,
      "learning_rate": 5.674846625766871e-08,
      "loss": -0.03856886327266693,
      "num_tokens": 992819.0,
      "reward": -0.05999999046325684,
      "reward_std": 0.1456713281571865,
      "rewards/format_reward/mean": 0.360000005364418,
      "rewards/format_reward/std": 0.12044776938855647,
      "rewards/security_audit_reward/mean": -0.23999999985098838,
      "rewards/security_audit_reward/std": 0.17773502618074416,
      "step": 290,
      "step_time": 33.279480656001034
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.15,
      "completions/max_length": 470.0,
      "completions/max_terminated_length": 383.0,
      "completions/mean_length": 306.15,
      "completions/mean_terminated_length": 264.66666717529296,
      "completions/min_length": 170.8,
      "completions/min_terminated_length": 170.8,
      "entropy": 1.3437508165836334,
      "epoch": 1.8098159509202454,
      "frac_reward_zero_std": 0.0,
      "grad_norm": 5.014013290405273,
      "learning_rate": 4.907975460122699e-08,
      "loss": 0.1800641179084778,
      "num_tokens": 1008676.0,
      "reward": -0.14524998962879182,
      "reward_std": 0.19439554661512376,
      "rewards/format_reward/mean": 0.33249999284744264,
      "rewards/format_reward/std": 0.21220951080322265,
      "rewards/security_audit_reward/mean": -0.35,
      "rewards/security_audit_reward/std": 0.2154700517654419,
      "step": 295,
      "step_time": 35.92737517459973
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.3,
      "completions/max_length": 471.6,
      "completions/max_terminated_length": 299.6,
      "completions/mean_length": 360.45,
      "completions/mean_terminated_length": 234.9166687011719,
      "completions/min_length": 229.0,
      "completions/min_terminated_length": 126.6,
      "entropy": 1.1840724140405654,
      "epoch": 1.8404907975460123,
      "frac_reward_zero_std": 0.1,
      "grad_norm": 2.570652484893799,
      "learning_rate": 4.1411042944785274e-08,
      "loss": 0.019638296961784363,
      "num_tokens": 1025285.0,
      "reward": -0.06699999049305916,
      "reward_std": 0.2400740846991539,
      "rewards/format_reward/mean": 0.3600000023841858,
      "rewards/format_reward/std": 0.1531308189034462,
      "rewards/security_audit_reward/mean": -0.25,
      "rewards/security_audit_reward/std": 0.2868344783782959,
      "step": 300,
      "step_time": 35.41554451999982
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.3,
      "completions/max_length": 491.0,
      "completions/max_terminated_length": 357.4,
      "completions/mean_length": 330.65,
      "completions/mean_terminated_length": 249.31666870117186,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "entropy": 1.2406673014163971,
      "epoch": 1.871165644171779,
      "frac_reward_zero_std": 0.0,
      "grad_norm": 4.481863975524902,
      "learning_rate": 3.3742331288343556e-08,
      "loss": 0.2060640573501587,
      "num_tokens": 1041362.0,
      "reward": -0.005999994277954101,
      "reward_std": 0.23084985613822936,
      "rewards/format_reward/mean": 0.4000000059604645,
      "rewards/format_reward/std": 0.12440616972744464,
      "rewards/security_audit_reward/mean": -0.1800000011920929,
      "rewards/security_audit_reward/std": 0.2963721513748169,
      "step": 305,
      "step_time": 37.082924159199685
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.25,
      "completions/max_length": 440.8,
      "completions/max_terminated_length": 313.2,
      "completions/mean_length": 296.75,
      "completions/mean_terminated_length": 235.5666748046875,
      "completions/min_length": 162.4,
      "completions/min_terminated_length": 162.4,
      "entropy": 1.4028007209300994,
      "epoch": 1.9018404907975461,
      "frac_reward_zero_std": 0.1,
      "grad_norm": 3.5625224113464355,
      "learning_rate": 2.607361963190184e-08,
      "loss": -0.09365988969802856,
      "num_tokens": 1056493.0,
      "reward": -0.07374998778104783,
      "reward_std": 0.17730526700615884,
      "rewards/format_reward/mean": 0.3725000023841858,
      "rewards/format_reward/std": 0.1375160299241543,
      "rewards/security_audit_reward/mean": -0.26500000059604645,
      "rewards/security_audit_reward/std": 0.21745660305023193,
      "step": 310,
      "step_time": 32.90120237959964
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.35,
      "completions/max_length": 508.8,
      "completions/max_terminated_length": 354.0,
      "completions/mean_length": 356.8,
      "completions/mean_terminated_length": 257.8333343505859,
      "completions/min_length": 172.4,
      "completions/min_terminated_length": 172.4,
      "entropy": 1.2189550220966339,
      "epoch": 1.9325153374233128,
      "frac_reward_zero_std": 0.2,
      "grad_norm": 4.627664089202881,
      "learning_rate": 1.8404907975460124e-08,
      "loss": -0.043705222010612485,
      "num_tokens": 1073209.0,
      "reward": -0.10774998962879181,
      "reward_std": 0.1972955085337162,
      "rewards/format_reward/mean": 0.35250000059604647,
      "rewards/format_reward/std": 0.13575982302427292,
      "rewards/security_audit_reward/mean": -0.3050000011920929,
      "rewards/security_audit_reward/std": 0.25009607076644896,
      "step": 315,
      "step_time": 38.71515165839992
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.3,
      "completions/max_length": 512.0,
      "completions/max_terminated_length": 394.2,
      "completions/mean_length": 341.55,
      "completions/mean_terminated_length": 271.6666717529297,
      "completions/min_length": 180.4,
      "completions/min_terminated_length": 180.4,
      "entropy": 1.1277358770370483,
      "epoch": 1.9631901840490797,
      "frac_reward_zero_std": 0.3,
      "grad_norm": 3.979893207550049,
      "learning_rate": 1.0736196319018405e-08,
      "loss": -0.07816079258918762,
      "num_tokens": 1089918.0,
      "reward": -0.08449999019503593,
      "reward_std": 0.14596682507544756,
      "rewards/format_reward/mean": 0.3949999988079071,
      "rewards/format_reward/std": 0.13996364884078502,
      "rewards/security_audit_reward/mean": -0.29000000059604647,
      "rewards/security_audit_reward/std": 0.17118052244186402,
      "step": 320,
      "step_time": 39.41049809280048
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.15,
      "completions/max_length": 488.4,
      "completions/max_terminated_length": 420.6,
      "completions/mean_length": 319.75,
      "completions/mean_terminated_length": 284.56667175292966,
      "completions/min_length": 199.2,
      "completions/min_terminated_length": 199.2,
      "entropy": 1.3403348803520203,
      "epoch": 1.9938650306748467,
      "frac_reward_zero_std": 0.2,
      "grad_norm": 3.1303930282592773,
      "learning_rate": 3.067484662576687e-09,
      "loss": -0.08682631254196167,
      "num_tokens": 1105841.0,
      "reward": -0.05374999940395355,
      "reward_std": 0.21490582572296263,
      "rewards/format_reward/mean": 0.2875,
      "rewards/format_reward/std": 0.19336618185043336,
      "rewards/security_audit_reward/mean": -0.2,
      "rewards/security_audit_reward/std": 0.22739237546920776,
      "step": 325,
      "step_time": 37.026589032400445
    }
  ],
  "logging_steps": 5,
  "max_steps": 326,
  "num_input_tokens_seen": 1108991,
  "num_train_epochs": 2,
  "save_steps": 50,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 0.0,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}