File size: 34,762 Bytes
bb8822b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.0,
  "eval_steps": 10,
  "global_step": 750,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "clip_ratio": 0.0,
      "completion_length": 298.3437587738037,
      "epoch": 0.013333333333333334,
      "grad_norm": 0.16560879349708557,
      "kl": 0.00018159300088882446,
      "learning_rate": 4e-07,
      "loss": 0.0049,
      "reward": 0.15208333674818278,
      "reward_std": 0.2552751675248146,
      "rewards/accuracy_reward": 0.06666666846722365,
      "rewards/format_reward": 0.08541666883975267,
      "step": 10
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 314.44792423248293,
      "epoch": 0.02666666666666667,
      "grad_norm": 0.2575075328350067,
      "kl": 0.0009123936295509338,
      "learning_rate": 8e-07,
      "loss": 0.0058,
      "reward": 0.20625000558793544,
      "reward_std": 0.2727560464292765,
      "rewards/accuracy_reward": 0.06458333488553762,
      "rewards/format_reward": 0.14166667070239783,
      "step": 20
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 252.6500078201294,
      "epoch": 0.04,
      "grad_norm": 0.46988776326179504,
      "kl": 0.03377872705459595,
      "learning_rate": 1.2000000000000002e-06,
      "loss": 0.0205,
      "reward": 0.5270833471789956,
      "reward_std": 0.4601708807051182,
      "rewards/accuracy_reward": 0.07916666902601718,
      "rewards/format_reward": 0.44791667852550743,
      "step": 30
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 242.21250705718995,
      "epoch": 0.05333333333333334,
      "grad_norm": 0.19061416387557983,
      "kl": 0.06111354827880859,
      "learning_rate": 1.6e-06,
      "loss": 0.048,
      "reward": 0.8041666891425848,
      "reward_std": 0.4267156172543764,
      "rewards/accuracy_reward": 0.06250000167638063,
      "rewards/format_reward": 0.7416666835546494,
      "step": 40
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 197.73125562667846,
      "epoch": 0.06666666666666667,
      "grad_norm": 0.2476302981376648,
      "kl": 0.0408905029296875,
      "learning_rate": 2e-06,
      "loss": 0.021,
      "reward": 0.8895833583548665,
      "reward_std": 0.4103152878582478,
      "rewards/accuracy_reward": 0.09375000242143869,
      "rewards/format_reward": 0.7958333509042859,
      "step": 50
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 198.86250495910645,
      "epoch": 0.08,
      "grad_norm": 0.13683611154556274,
      "kl": 0.0466217041015625,
      "learning_rate": 2.4000000000000003e-06,
      "loss": 0.0316,
      "reward": 0.9916666936129331,
      "reward_std": 0.36507078595459463,
      "rewards/accuracy_reward": 0.11666666958481073,
      "rewards/format_reward": 0.8750000182539225,
      "step": 60
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 225.4541726589203,
      "epoch": 0.09333333333333334,
      "grad_norm": 0.17669692635536194,
      "kl": 0.06876106262207031,
      "learning_rate": 2.8000000000000003e-06,
      "loss": 0.0388,
      "reward": 1.0520833637565374,
      "reward_std": 0.4207858666777611,
      "rewards/accuracy_reward": 0.19375000540167092,
      "rewards/format_reward": 0.8583333497866988,
      "step": 70
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 194.30625553131102,
      "epoch": 0.10666666666666667,
      "grad_norm": 0.15089309215545654,
      "kl": 0.08316802978515625,
      "learning_rate": 2.9995938617691924e-06,
      "loss": 0.03,
      "reward": 1.1625000346451997,
      "reward_std": 0.44051293805241587,
      "rewards/accuracy_reward": 0.2520833399146795,
      "rewards/format_reward": 0.91041667945683,
      "step": 80
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 202.31667232513428,
      "epoch": 0.12,
      "grad_norm": 0.2549769878387451,
      "kl": 0.06892852783203125,
      "learning_rate": 2.9963460753897363e-06,
      "loss": 0.0229,
      "reward": 1.1770833685994149,
      "reward_std": 0.4563848368823528,
      "rewards/accuracy_reward": 0.2687500063329935,
      "rewards/format_reward": 0.9083333484828472,
      "step": 90
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 215.91458954811097,
      "epoch": 0.13333333333333333,
      "grad_norm": 3.687699556350708,
      "kl": 0.1230712890625,
      "learning_rate": 2.989857536612915e-06,
      "loss": 0.0166,
      "reward": 1.235416704416275,
      "reward_std": 0.4576607421040535,
      "rewards/accuracy_reward": 0.32083334121853113,
      "rewards/format_reward": 0.914583346247673,
      "step": 100
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 231.20208988189697,
      "epoch": 0.14666666666666667,
      "grad_norm": 0.19754290580749512,
      "kl": 0.09471588134765625,
      "learning_rate": 2.980142298168869e-06,
      "loss": 0.0774,
      "reward": 1.204166702181101,
      "reward_std": 0.5094456784427166,
      "rewards/accuracy_reward": 0.329166673310101,
      "rewards/format_reward": 0.8750000186264515,
      "step": 110
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 210.604172039032,
      "epoch": 0.16,
      "grad_norm": 0.9343002438545227,
      "kl": 0.10347900390625,
      "learning_rate": 2.9672214011007086e-06,
      "loss": 0.0425,
      "reward": 1.2375000335276127,
      "reward_std": 0.5164696607738734,
      "rewards/accuracy_reward": 0.35208334103226663,
      "rewards/format_reward": 0.8854166835546493,
      "step": 120
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 254.06667470932007,
      "epoch": 0.17333333333333334,
      "grad_norm": 0.25169649720191956,
      "kl": 0.148199462890625,
      "learning_rate": 2.951122829194296e-06,
      "loss": 0.0796,
      "reward": 1.1187500342726708,
      "reward_std": 0.5015288021415472,
      "rewards/accuracy_reward": 0.26250000670552254,
      "rewards/format_reward": 0.8562500193715096,
      "step": 130
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 233.81875615119935,
      "epoch": 0.18666666666666668,
      "grad_norm": 0.7912704348564148,
      "kl": 2.357049560546875,
      "learning_rate": 2.9318814483715983e-06,
      "loss": 0.1295,
      "reward": 0.9895833631977439,
      "reward_std": 0.5210714556276799,
      "rewards/accuracy_reward": 0.26250000577419996,
      "rewards/format_reward": 0.7270833453163504,
      "step": 140
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 227.87708988189698,
      "epoch": 0.2,
      "grad_norm": 1.25613272190094,
      "kl": 0.46229248046875,
      "learning_rate": 2.9095389311788626e-06,
      "loss": 0.0646,
      "reward": 1.129166703671217,
      "reward_std": 0.49879880994558334,
      "rewards/accuracy_reward": 0.2687500067055225,
      "rewards/format_reward": 0.8604166861623526,
      "step": 150
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 194.6770890235901,
      "epoch": 0.21333333333333335,
      "grad_norm": 0.4988195300102234,
      "kl": 0.613861083984375,
      "learning_rate": 2.8841436665331635e-06,
      "loss": 0.0366,
      "reward": 1.2395833693444729,
      "reward_std": 0.48180873580276967,
      "rewards/accuracy_reward": 0.31666667591780423,
      "rewards/format_reward": 0.922916679084301,
      "step": 160
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 168.8062547683716,
      "epoch": 0.22666666666666666,
      "grad_norm": 0.9171387553215027,
      "kl": 1.17796630859375,
      "learning_rate": 2.855750654922781e-06,
      "loss": 0.0156,
      "reward": 1.1541667036712169,
      "reward_std": 0.5033410575240851,
      "rewards/accuracy_reward": 0.25416667219251393,
      "rewards/format_reward": 0.9000000156462192,
      "step": 170
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 241.8708402633667,
      "epoch": 0.24,
      "grad_norm": 25.64914894104004,
      "kl": 114.61384887695313,
      "learning_rate": 2.8244213892883906e-06,
      "loss": 2.7595,
      "reward": 1.181250037252903,
      "reward_std": 0.51867739520967,
      "rewards/accuracy_reward": 0.31041667331010103,
      "rewards/format_reward": 0.8708333529531955,
      "step": 180
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 314.0875092506409,
      "epoch": 0.25333333333333335,
      "grad_norm": 0.8180872797966003,
      "kl": 2.062310791015625,
      "learning_rate": 2.7902237218430485e-06,
      "loss": 0.1151,
      "reward": 1.1354166999459268,
      "reward_std": 0.5186546068638563,
      "rewards/accuracy_reward": 0.26458334010094403,
      "rewards/format_reward": 0.8708333499729634,
      "step": 190
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 271.76875801086425,
      "epoch": 0.26666666666666666,
      "grad_norm": 2.5058634281158447,
      "kl": 1.689434814453125,
      "learning_rate": 2.753231717119405e-06,
      "loss": 0.0826,
      "reward": 1.1770833715796472,
      "reward_std": 0.5468059632927179,
      "rewards/accuracy_reward": 0.3104166744276881,
      "rewards/format_reward": 0.8666666835546494,
      "step": 200
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 189.7958378791809,
      "epoch": 0.28,
      "grad_norm": 0.8333961367607117,
      "kl": 2.188385009765625,
      "learning_rate": 2.713525491562421e-06,
      "loss": 0.159,
      "reward": 1.1291666947305203,
      "reward_std": 0.48742703087627887,
      "rewards/accuracy_reward": 0.2375000048428774,
      "rewards/format_reward": 0.8916666850447654,
      "step": 210
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 169.8958384513855,
      "epoch": 0.29333333333333333,
      "grad_norm": 0.48866531252861023,
      "kl": 1.365594482421875,
      "learning_rate": 2.671191040014989e-06,
      "loss": 0.1571,
      "reward": 1.214583370089531,
      "reward_std": 0.45735101476311685,
      "rewards/accuracy_reward": 0.2812500067055225,
      "rewards/format_reward": 0.9333333462476731,
      "step": 220
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 210.16042280197144,
      "epoch": 0.30666666666666664,
      "grad_norm": 0.4208928346633911,
      "kl": 1.861163330078125,
      "learning_rate": 2.626320049472249e-06,
      "loss": 0.2113,
      "reward": 1.1687500353902578,
      "reward_std": 0.48322329856455326,
      "rewards/accuracy_reward": 0.3020833406597376,
      "rewards/format_reward": 0.866666679084301,
      "step": 230
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 213.62292337417603,
      "epoch": 0.32,
      "grad_norm": 0.9921014308929443,
      "kl": 5.52261962890625,
      "learning_rate": 2.5790097005079765e-06,
      "loss": 0.4186,
      "reward": 1.1104166995733977,
      "reward_std": 0.5192686680704355,
      "rewards/accuracy_reward": 0.26250000651925803,
      "rewards/format_reward": 0.8479166831821203,
      "step": 240
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 173.20625500679017,
      "epoch": 0.3333333333333333,
      "grad_norm": 0.5788355469703674,
      "kl": 0.586700439453125,
      "learning_rate": 2.529362456803101e-06,
      "loss": 0.0873,
      "reward": 1.2354166999459266,
      "reward_std": 0.5144322618842125,
      "rewards/accuracy_reward": 0.32708334140479567,
      "rewards/format_reward": 0.9083333492279053,
      "step": 250
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 170.66458702087402,
      "epoch": 0.3466666666666667,
      "grad_norm": 1.29031503200531,
      "kl": 3.8255126953125,
      "learning_rate": 2.477485843232183e-06,
      "loss": 0.2837,
      "reward": 1.2604167006909848,
      "reward_std": 0.5114245742559433,
      "rewards/accuracy_reward": 0.3416666738688946,
      "rewards/format_reward": 0.918750012665987,
      "step": 260
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 167.50208835601808,
      "epoch": 0.36,
      "grad_norm": 0.6127444505691528,
      "kl": 0.5264404296875,
      "learning_rate": 2.4234922129884873e-06,
      "loss": 0.0213,
      "reward": 1.3562500387430192,
      "reward_std": 0.42282451651990416,
      "rewards/accuracy_reward": 0.38958334233611824,
      "rewards/format_reward": 0.9666666738688946,
      "step": 270
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 177.09583835601808,
      "epoch": 0.37333333333333335,
      "grad_norm": 0.6640939712524414,
      "kl": 1.479010009765625,
      "learning_rate": 2.36749850425198e-06,
      "loss": 0.1688,
      "reward": 1.250000035762787,
      "reward_std": 0.4655508127063513,
      "rewards/accuracy_reward": 0.3479166755452752,
      "rewards/format_reward": 0.9020833499729634,
      "step": 280
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 198.85000591278077,
      "epoch": 0.38666666666666666,
      "grad_norm": 0.14313329756259918,
      "kl": 1.668316650390625,
      "learning_rate": 2.3096259869272697e-06,
      "loss": 0.1292,
      "reward": 1.289583370834589,
      "reward_std": 0.47122009098529816,
      "rewards/accuracy_reward": 0.3645833419635892,
      "rewards/format_reward": 0.9250000134110451,
      "step": 290
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 230.99375743865966,
      "epoch": 0.4,
      "grad_norm": 0.10921728610992432,
      "kl": 0.85283203125,
      "learning_rate": 2.25e-06,
      "loss": 0.0644,
      "reward": 1.3041667066514493,
      "reward_std": 0.4090763859450817,
      "rewards/accuracy_reward": 0.37500000894069674,
      "rewards/format_reward": 0.9291666768491268,
      "step": 300
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 270.9208402633667,
      "epoch": 0.41333333333333333,
      "grad_norm": 1.005725383758545,
      "kl": 0.2567535400390625,
      "learning_rate": 2.1887496800805174e-06,
      "loss": 0.035,
      "reward": 1.333333370089531,
      "reward_std": 0.46985283866524696,
      "rewards/accuracy_reward": 0.3937500087544322,
      "rewards/format_reward": 0.939583346992731,
      "step": 310
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 238.92500734329224,
      "epoch": 0.4266666666666667,
      "grad_norm": 1.076439380645752,
      "kl": 1.3035552978515625,
      "learning_rate": 2.126007681722727e-06,
      "loss": 0.1024,
      "reward": 1.2500000342726707,
      "reward_std": 0.48043784201145173,
      "rewards/accuracy_reward": 0.35625000931322576,
      "rewards/format_reward": 0.8937500141561031,
      "step": 320
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 414.82501125335693,
      "epoch": 0.44,
      "grad_norm": 0.6154415011405945,
      "kl": 4.590509033203125,
      "learning_rate": 2.061909890123868e-06,
      "loss": 0.2873,
      "reward": 0.777083358168602,
      "reward_std": 0.4854964125901461,
      "rewards/accuracy_reward": 0.22083333935588598,
      "rewards/format_reward": 0.5562500126659871,
      "step": 330
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 237.14375581741334,
      "epoch": 0.4533333333333333,
      "grad_norm": 0.4070974886417389,
      "kl": 0.4917999267578125,
      "learning_rate": 1.9965951268274372e-06,
      "loss": 0.0908,
      "reward": 1.347916703671217,
      "reward_std": 0.5186698414385319,
      "rewards/accuracy_reward": 0.4395833432674408,
      "rewards/format_reward": 0.9083333507180213,
      "step": 340
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 211.02292304039003,
      "epoch": 0.4666666666666667,
      "grad_norm": 0.7467241883277893,
      "kl": 1.289691162109375,
      "learning_rate": 1.9302048490666355e-06,
      "loss": 0.1216,
      "reward": 1.3833333637565375,
      "reward_std": 0.4452923540025949,
      "rewards/accuracy_reward": 0.4604166755452752,
      "rewards/format_reward": 0.9229166809469461,
      "step": 350
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 207.78333921432494,
      "epoch": 0.48,
      "grad_norm": 0.5213920474052429,
      "kl": 0.3299652099609375,
      "learning_rate": 1.8628828433995015e-06,
      "loss": 0.0802,
      "reward": 1.354166705906391,
      "reward_std": 0.43137194626033304,
      "rewards/accuracy_reward": 0.4145833427086473,
      "rewards/format_reward": 0.9395833432674408,
      "step": 360
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 214.07083945274354,
      "epoch": 0.49333333333333335,
      "grad_norm": 11.964993476867676,
      "kl": 1.5061492919921875,
      "learning_rate": 1.7947749142992453e-06,
      "loss": 0.1615,
      "reward": 1.2437500320374966,
      "reward_std": 0.4532905198633671,
      "rewards/accuracy_reward": 0.3145833395421505,
      "rewards/format_reward": 0.9291666775941849,
      "step": 370
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 231.10625724792482,
      "epoch": 0.5066666666666667,
      "grad_norm": 0.7543414235115051,
      "kl": 0.93128662109375,
      "learning_rate": 1.7260285683742248e-06,
      "loss": 0.1419,
      "reward": 1.3000000320374965,
      "reward_std": 0.5549088928848505,
      "rewards/accuracy_reward": 0.4020833412185311,
      "rewards/format_reward": 0.8979166842997074,
      "step": 380
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 234.94167280197144,
      "epoch": 0.52,
      "grad_norm": 1.8742121458053589,
      "kl": 37.191220092773435,
      "learning_rate": 1.6567926949014804e-06,
      "loss": 0.5318,
      "reward": 1.3187500394880771,
      "reward_std": 0.5167587421834469,
      "rewards/accuracy_reward": 0.40833334214985373,
      "rewards/format_reward": 0.9104166820645332,
      "step": 390
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 188.943754863739,
      "epoch": 0.5333333333333333,
      "grad_norm": 0.14478746056556702,
      "kl": 0.573187255859375,
      "learning_rate": 1.5872172433657137e-06,
      "loss": 0.0723,
      "reward": 1.3020833745598792,
      "reward_std": 0.4792703174054623,
      "rewards/accuracy_reward": 0.3645833421498537,
      "rewards/format_reward": 0.9375000111758709,
      "step": 400
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 178.75833768844603,
      "epoch": 0.5466666666666666,
      "grad_norm": 1.8087667226791382,
      "kl": 0.8369659423828125,
      "learning_rate": 1.5174528987020958e-06,
      "loss": 0.0923,
      "reward": 1.3125000409781933,
      "reward_std": 0.48596611246466637,
      "rewards/accuracy_reward": 0.39166667591780424,
      "rewards/format_reward": 0.9208333469927311,
      "step": 410
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 256.539590215683,
      "epoch": 0.56,
      "grad_norm": 0.5763727426528931,
      "kl": 2.3948883056640624,
      "learning_rate": 1.4476507549462489e-06,
      "loss": 0.2606,
      "reward": 1.1541667029261589,
      "reward_std": 0.6029179282486439,
      "rewards/accuracy_reward": 0.35000000819563865,
      "rewards/format_reward": 0.8041666833683848,
      "step": 420
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 163.55833940505983,
      "epoch": 0.5733333333333334,
      "grad_norm": 0.23491673171520233,
      "kl": 0.215203857421875,
      "learning_rate": 1.3779619879982127e-06,
      "loss": 0.023,
      "reward": 1.420833373069763,
      "reward_std": 0.41848115585744383,
      "rewards/accuracy_reward": 0.4520833443850279,
      "rewards/format_reward": 0.9687500074505806,
      "step": 430
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 183.7666721343994,
      "epoch": 0.5866666666666667,
      "grad_norm": 0.1774040013551712,
      "kl": 0.300738525390625,
      "learning_rate": 1.308537528209108e-06,
      "loss": 0.0643,
      "reward": 1.4270833656191826,
      "reward_std": 0.4158630233258009,
      "rewards/accuracy_reward": 0.46666667573153975,
      "rewards/format_reward": 0.9604166753590107,
      "step": 440
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 212.7937566280365,
      "epoch": 0.6,
      "grad_norm": 0.20609110593795776,
      "kl": 0.5560638427734375,
      "learning_rate": 1.2395277334996047e-06,
      "loss": 0.0963,
      "reward": 1.3229166999459268,
      "reward_std": 0.4623309187591076,
      "rewards/accuracy_reward": 0.3937500096857548,
      "rewards/format_reward": 0.9291666783392429,
      "step": 450
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 189.80208778381348,
      "epoch": 0.6133333333333333,
      "grad_norm": 0.49082836508750916,
      "kl": 0.7411895751953125,
      "learning_rate": 1.1710820637181448e-06,
      "loss": 0.0805,
      "reward": 1.320833370834589,
      "reward_std": 0.4459747776389122,
      "rewards/accuracy_reward": 0.37916667480021715,
      "rewards/format_reward": 0.9416666768491269,
      "step": 460
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 194.38542304039,
      "epoch": 0.6266666666666667,
      "grad_norm": 0.48382291197776794,
      "kl": 0.4151397705078125,
      "learning_rate": 1.103348756944197e-06,
      "loss": 0.0534,
      "reward": 1.3812500394880771,
      "reward_std": 0.480151966586709,
      "rewards/accuracy_reward": 0.4250000108033419,
      "rewards/format_reward": 0.9562500081956387,
      "step": 470
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 220.32292351722717,
      "epoch": 0.64,
      "grad_norm": 0.2328379899263382,
      "kl": 0.377593994140625,
      "learning_rate": 1.036474508437579e-06,
      "loss": 0.0734,
      "reward": 1.318750038743019,
      "reward_std": 0.47938378117978575,
      "rewards/accuracy_reward": 0.37708334289491174,
      "rewards/format_reward": 0.9416666783392429,
      "step": 480
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 213.46458911895752,
      "epoch": 0.6533333333333333,
      "grad_norm": 0.6423675417900085,
      "kl": 0.6955718994140625,
      "learning_rate": 9.70604152929197e-07,
      "loss": 0.1096,
      "reward": 1.3416667044162751,
      "reward_std": 0.46137820817530156,
      "rewards/accuracy_reward": 0.4145833419635892,
      "rewards/format_reward": 0.9270833462476731,
      "step": 490
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 214.08125677108765,
      "epoch": 0.6666666666666666,
      "grad_norm": 0.14841440320014954,
      "kl": 0.7181640625,
      "learning_rate": 9.058803509412648e-07,
      "loss": 0.0987,
      "reward": 1.397916704416275,
      "reward_std": 0.44944200329482553,
      "rewards/accuracy_reward": 0.46041667945683,
      "rewards/format_reward": 0.9375000111758709,
      "step": 500
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 226.89792413711547,
      "epoch": 0.68,
      "grad_norm": 0.20204541087150574,
      "kl": 0.37396240234375,
      "learning_rate": 8.424432798163837e-07,
      "loss": 0.075,
      "reward": 1.4020833641290664,
      "reward_std": 0.3638565935194492,
      "rewards/accuracy_reward": 0.45000000968575476,
      "rewards/format_reward": 0.9520833425223827,
      "step": 510
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 227.1395909309387,
      "epoch": 0.6933333333333334,
      "grad_norm": 0.170625239610672,
      "kl": 0.7568450927734375,
      "learning_rate": 7.804303301246311e-07,
      "loss": 0.0984,
      "reward": 1.331250037252903,
      "reward_std": 0.48546464554965496,
      "rewards/accuracy_reward": 0.40000001061707735,
      "rewards/format_reward": 0.9312500089406968,
      "step": 520
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 222.33542160987855,
      "epoch": 0.7066666666666667,
      "grad_norm": 0.35064828395843506,
      "kl": 0.613885498046875,
      "learning_rate": 7.19975808106177e-07,
      "loss": 0.0488,
      "reward": 1.3625000312924385,
      "reward_std": 0.44792898930609226,
      "rewards/accuracy_reward": 0.4104166755452752,
      "rewards/format_reward": 0.9520833417773247,
      "step": 530
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 237.87292375564576,
      "epoch": 0.72,
      "grad_norm": 0.3742374777793884,
      "kl": 0.584375,
      "learning_rate": 6.6121064479388e-07,
      "loss": 0.0612,
      "reward": 1.4020833656191827,
      "reward_std": 0.4303410712629557,
      "rewards/accuracy_reward": 0.46041667759418486,
      "rewards/format_reward": 0.9416666768491269,
      "step": 540
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 244.73542499542236,
      "epoch": 0.7333333333333333,
      "grad_norm": 0.346811980009079,
      "kl": 0.83583984375,
      "learning_rate": 6.04262112445821e-07,
      "loss": 0.1029,
      "reward": 1.293750037997961,
      "reward_std": 0.48419367931783197,
      "rewards/accuracy_reward": 0.3791666770353913,
      "rewards/format_reward": 0.9145833477377892,
      "step": 550
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 268.9645917892456,
      "epoch": 0.7466666666666667,
      "grad_norm": 0.14408645033836365,
      "kl": 0.513336181640625,
      "learning_rate": 5.492535489019345e-07,
      "loss": 0.084,
      "reward": 1.2875000312924385,
      "reward_std": 0.48581976890563966,
      "rewards/accuracy_reward": 0.3770833395421505,
      "rewards/format_reward": 0.9104166757315397,
      "step": 560
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 241.53333921432494,
      "epoch": 0.76,
      "grad_norm": 1.40211021900177,
      "kl": 0.8751678466796875,
      "learning_rate": 4.963040904617131e-07,
      "loss": 0.1203,
      "reward": 1.3145833693444728,
      "reward_std": 0.49484665393829347,
      "rewards/accuracy_reward": 0.4145833423361182,
      "rewards/format_reward": 0.9000000134110451,
      "step": 570
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 252.40000743865966,
      "epoch": 0.7733333333333333,
      "grad_norm": 0.4801377058029175,
      "kl": 1.2982696533203124,
      "learning_rate": 4.4552841386150737e-07,
      "loss": 0.1253,
      "reward": 1.2708333659917117,
      "reward_std": 0.5930769924074412,
      "rewards/accuracy_reward": 0.43541667610406876,
      "rewards/format_reward": 0.8354166846722364,
      "step": 580
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 259.3583409309387,
      "epoch": 0.7866666666666666,
      "grad_norm": 1.0408235788345337,
      "kl": 1.4053619384765625,
      "learning_rate": 3.9703648791025716e-07,
      "loss": 0.183,
      "reward": 1.1916666943579912,
      "reward_std": 0.6191505286842585,
      "rewards/accuracy_reward": 0.3854166746139526,
      "rewards/format_reward": 0.8062500186264515,
      "step": 590
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 243.28750715255737,
      "epoch": 0.8,
      "grad_norm": 0.2709506154060364,
      "kl": 0.96761474609375,
      "learning_rate": 3.5093333532153313e-07,
      "loss": 0.1034,
      "reward": 1.316666693240404,
      "reward_std": 0.46122562885284424,
      "rewards/accuracy_reward": 0.4354166748002172,
      "rewards/format_reward": 0.8812500096857547,
      "step": 600
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 218.83542261123657,
      "epoch": 0.8133333333333334,
      "grad_norm": 0.19969280064105988,
      "kl": 0.4908477783203125,
      "learning_rate": 3.073188052577282e-07,
      "loss": 0.0814,
      "reward": 1.3458333656191825,
      "reward_std": 0.4190520565956831,
      "rewards/accuracy_reward": 0.4041666744276881,
      "rewards/format_reward": 0.9416666775941849,
      "step": 610
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 216.3875057220459,
      "epoch": 0.8266666666666667,
      "grad_norm": 0.23758280277252197,
      "kl": 0.64254150390625,
      "learning_rate": 2.6628735707900655e-07,
      "loss": 0.0776,
      "reward": 1.3395833685994147,
      "reward_std": 0.4737738098949194,
      "rewards/accuracy_reward": 0.4250000070780516,
      "rewards/format_reward": 0.9145833484828472,
      "step": 620
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 245.11459074020385,
      "epoch": 0.84,
      "grad_norm": 0.4293162226676941,
      "kl": 1.1108795166015626,
      "learning_rate": 2.2792785576536108e-07,
      "loss": 0.1537,
      "reward": 1.24375003837049,
      "reward_std": 0.520675316080451,
      "rewards/accuracy_reward": 0.3729166738688946,
      "rewards/format_reward": 0.8708333484828472,
      "step": 630
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 249.4729232788086,
      "epoch": 0.8533333333333334,
      "grad_norm": 0.17477993667125702,
      "kl": 1.014031982421875,
      "learning_rate": 1.9232337945485655e-07,
      "loss": 0.1481,
      "reward": 1.293750035017729,
      "reward_std": 0.5492795780301094,
      "rewards/accuracy_reward": 0.4104166742414236,
      "rewards/format_reward": 0.8833333525806666,
      "step": 640
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 271.8937582015991,
      "epoch": 0.8666666666666667,
      "grad_norm": 0.683167040348053,
      "kl": 1.2961456298828125,
      "learning_rate": 1.5955103951488177e-07,
      "loss": 0.1557,
      "reward": 1.1854166965931654,
      "reward_std": 0.5265001837164164,
      "rewards/accuracy_reward": 0.33541667349636556,
      "rewards/format_reward": 0.8500000169500709,
      "step": 650
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 238.02084150314332,
      "epoch": 0.88,
      "grad_norm": 0.47596192359924316,
      "kl": 0.965118408203125,
      "learning_rate": 1.2968181353609853e-07,
      "loss": 0.1338,
      "reward": 1.3020833618938923,
      "reward_std": 0.49198094978928564,
      "rewards/accuracy_reward": 0.40833334252238274,
      "rewards/format_reward": 0.8937500178813934,
      "step": 660
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 221.03750729560852,
      "epoch": 0.8933333333333333,
      "grad_norm": 0.4685460031032562,
      "kl": 0.8098968505859375,
      "learning_rate": 1.0278039161078634e-07,
      "loss": 0.1018,
      "reward": 1.3375000290572643,
      "reward_std": 0.4632135137915611,
      "rewards/accuracy_reward": 0.43750000912696124,
      "rewards/format_reward": 0.9000000119209289,
      "step": 670
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 253.50209112167357,
      "epoch": 0.9066666666666666,
      "grad_norm": 0.2506866753101349,
      "kl": 0.906805419921875,
      "learning_rate": 7.89050362285062e-08,
      "loss": 0.1365,
      "reward": 1.2375000305473804,
      "reward_std": 0.517885773256421,
      "rewards/accuracy_reward": 0.3645833428949118,
      "rewards/format_reward": 0.8729166854172945,
      "step": 680
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 239.60625705718994,
      "epoch": 0.92,
      "grad_norm": 0.5413881540298462,
      "kl": 0.835858154296875,
      "learning_rate": 5.810745609252166e-08,
      "loss": 0.1281,
      "reward": 1.2187500409781933,
      "reward_std": 0.5353617053478956,
      "rewards/accuracy_reward": 0.3395833415910602,
      "rewards/format_reward": 0.8791666828095913,
      "step": 690
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 232.96667356491088,
      "epoch": 0.9333333333333333,
      "grad_norm": 0.41816839575767517,
      "kl": 1.4766021728515626,
      "learning_rate": 4.0432694130264294e-08,
      "loss": 0.1412,
      "reward": 1.2375000312924385,
      "reward_std": 0.5488180216401816,
      "rewards/accuracy_reward": 0.37708334121853115,
      "rewards/format_reward": 0.8604166824370623,
      "step": 700
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 278.4333411693573,
      "epoch": 0.9466666666666667,
      "grad_norm": 0.5456490516662598,
      "kl": 1.7320938110351562,
      "learning_rate": 2.5919029940380145e-08,
      "loss": 0.2249,
      "reward": 1.2062500290572644,
      "reward_std": 0.6055900201201438,
      "rewards/accuracy_reward": 0.3833333427086473,
      "rewards/format_reward": 0.8229166869074106,
      "step": 710
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 239.62500748634338,
      "epoch": 0.96,
      "grad_norm": 0.32890596985816956,
      "kl": 1.458221435546875,
      "learning_rate": 1.4597896887644457e-08,
      "loss": 0.1827,
      "reward": 1.264583370089531,
      "reward_std": 0.5755864661186934,
      "rewards/accuracy_reward": 0.4187500111758709,
      "rewards/format_reward": 0.8458333492279053,
      "step": 720
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 274.1791749954224,
      "epoch": 0.9733333333333334,
      "grad_norm": 0.3116457760334015,
      "kl": 1.3982940673828126,
      "learning_rate": 6.493814025293476e-09,
      "loss": 0.1776,
      "reward": 1.231250035017729,
      "reward_std": 0.5566708967089653,
      "rewards/accuracy_reward": 0.40000000949949027,
      "rewards/format_reward": 0.8312500137835741,
      "step": 730
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 255.23750734329224,
      "epoch": 0.9866666666666667,
      "grad_norm": 0.4142087697982788,
      "kl": 1.2847900390625,
      "learning_rate": 1.624332992213151e-09,
      "loss": 0.1855,
      "reward": 1.2229166984558106,
      "reward_std": 0.5469876442104578,
      "rewards/accuracy_reward": 0.38333334159106014,
      "rewards/format_reward": 0.8395833514630795,
      "step": 740
    },
    {
      "clip_ratio": 0.0,
      "completion_length": 279.8500079154968,
      "epoch": 1.0,
      "grad_norm": 0.3987623155117035,
      "kl": 1.2904449462890626,
      "learning_rate": 0.0,
      "loss": 0.2137,
      "reward": 1.2583333723247052,
      "reward_std": 0.6197128046303988,
      "rewards/accuracy_reward": 0.4229166766628623,
      "rewards/format_reward": 0.8354166867211461,
      "step": 750
    },
    {
      "epoch": 1.0,
      "step": 750,
      "total_flos": 0.0,
      "train_loss": 0.1488674604743719,
      "train_runtime": 66920.1081,
      "train_samples_per_second": 0.09,
      "train_steps_per_second": 0.011
    }
  ],
  "logging_steps": 10,
  "max_steps": 750,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 0.0,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}