| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.054, |
| "eval_steps": 5.0, |
| "global_step": 54, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "/length/completion": 9061.760416666666, |
| "/length/completion/max": 19510, |
| "/length/completion/min": 2153, |
| "/length/completion/std": 333.8509541397672, |
| "/length/context": 42230.270833333336, |
| "/length/context/max": 101733, |
| "/length/context/min": 3126, |
| "/length/context/std": 2205.856462898938, |
| "/length/forward": 42235.0, |
| "/length/forward/max": 101736, |
| "/length/forward/min": 3128, |
| "/length/forward/std": 2205.8737442778643, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.5446428571428571, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.49800302740658364, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.5446428571428571, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.49800302740658364, |
| "advantages": -0.10318419991938722, |
| "advantages/max": 2.0, |
| "advantages/min": -2.0, |
| "advantages/std": 0.019792414635250263, |
| "entropy": 0.65704345703125, |
| "entropy/max": 0.87890625, |
| "entropy/min": 0.455078125, |
| "entropy/std": 0.008027744975431144, |
| "epoch": 0.001, |
| "grad_norm": 15232.0, |
| "learning_rate": 0.0, |
| "loss": -165.1345625, |
| "out_of_date_ratio": 0.0014260866205404454, |
| "out_of_date_ratio/max": 0.003501293947920203, |
| "out_of_date_ratio/min": 0.0001568135485285893, |
| "out_of_date_ratio/std": 8.450739925303703e-05, |
| "rewards": 0.5104166666666666, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.051019960662424764, |
| "sampled_at_step": 0.9999999925494194, |
| "sampled_at_step/max": 1.0, |
| "sampled_at_step/min": 0.9999999403953552, |
| "sampled_at_step/std": 2.011886704218841e-09, |
| "scores": 0.4623135832325675, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.010009663488673342, |
| "step": 1, |
| "steps": 24.84375, |
| "steps/max": 80, |
| "steps/min": 1, |
| "steps/std": 1.7357010149325394 |
| }, |
| { |
| "/length/completion": 8887.78125, |
| "/length/completion/max": 20317, |
| "/length/completion/min": 3326, |
| "/length/completion/std": 433.71652733755445, |
| "/length/context": 40515.65625, |
| "/length/context/max": 121680, |
| "/length/context/min": 4305, |
| "/length/context/std": 2330.0265864167477, |
| "/length/forward": 40519.833333333336, |
| "/length/forward/max": 121688, |
| "/length/forward/min": 4312, |
| "/length/forward/std": 2330.095548169546, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.592375366568915, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.4913927061437305, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.592375366568915, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.4913927061437305, |
| "advantages": -0.028806329974655393, |
| "advantages/max": 2.0, |
| "advantages/min": -2.0, |
| "advantages/std": 0.020580049780948472, |
| "entropy": 0.6922200520833334, |
| "entropy/max": 0.8984375, |
| "entropy/min": 0.4921875, |
| "entropy/std": 0.009487675328552674, |
| "epoch": 0.002, |
| "grad_norm": 15232.0, |
| "learning_rate": 2e-07, |
| "loss": -14.253539583333334, |
| "out_of_date_ratio": 0.0013957185919935, |
| "out_of_date_ratio/max": 0.003554652677848935, |
| "out_of_date_ratio/min": 0.0002505480661056936, |
| "out_of_date_ratio/std": 7.495178299694405e-05, |
| "rewards": 0.6145833333333334, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.04967295748634071, |
| "sampled_at_step": 1.330572656666239, |
| "sampled_at_step/max": 2.0, |
| "sampled_at_step/min": 0.9999999403953552, |
| "sampled_at_step/std": 0.04314657295355584, |
| "scores": 0.574643011683254, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.010284327028776, |
| "step": 2, |
| "steps": 23.072916666666668, |
| "steps/max": 84, |
| "steps/min": 1, |
| "steps/std": 1.808259214570864 |
| }, |
| { |
| "/length/completion": 9210.4375, |
| "/length/completion/max": 25411, |
| "/length/completion/min": 2206, |
| "/length/completion/std": 474.7124508411544, |
| "/length/context": 43727.708333333336, |
| "/length/context/max": 122274, |
| "/length/context/min": 3516, |
| "/length/context/std": 2767.7691200088607, |
| "/length/forward": 43732.0, |
| "/length/forward/max": 122280, |
| "/length/forward/min": 3520, |
| "/length/forward/std": 2767.7861615578454, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.5821205821205822, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.49321010734997783, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.5821205821205822, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.49321010734997783, |
| "advantages": -0.09020842379505026, |
| "advantages/max": 2.0, |
| "advantages/min": -2.0, |
| "advantages/std": 0.018848007565270594, |
| "entropy": 0.6811930338541666, |
| "entropy/max": 0.859375, |
| "entropy/min": 0.55078125, |
| "entropy/std": 0.007361056852565585, |
| "epoch": 0.003, |
| "grad_norm": 15232.0, |
| "learning_rate": 4e-07, |
| "loss": -92.92765208333333, |
| "out_of_date_ratio": 0.002393863861925638, |
| "out_of_date_ratio/max": 0.09005628526210785, |
| "out_of_date_ratio/min": 0.0, |
| "out_of_date_ratio/std": 0.0009420074751743802, |
| "rewards": 0.5625, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.05063078670631141, |
| "sampled_at_step": 2.32747404028972, |
| "sampled_at_step/max": 3.000000238418579, |
| "sampled_at_step/min": 0.9999999403953552, |
| "sampled_at_step/std": 0.07218538224091829, |
| "scores": 0.5509118541033434, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.009695359383926561, |
| "step": 3, |
| "steps": 26.416666666666668, |
| "steps/max": 97, |
| "steps/min": 0, |
| "steps/std": 2.1152075570163933 |
| }, |
| { |
| "/length/completion": 10750.739583333334, |
| "/length/completion/max": 21108, |
| "/length/completion/min": 2193, |
| "/length/completion/std": 450.1033487719153, |
| "/length/context": 50107.604166666664, |
| "/length/context/max": 114001, |
| "/length/context/min": 6428, |
| "/length/context/std": 2518.8334550931213, |
| "/length/forward": 50112.166666666664, |
| "/length/forward/max": 114008, |
| "/length/forward/min": 6432, |
| "/length/forward/std": 2518.891348333757, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.5820668693009119, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.49321904764835184, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.5820668693009119, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.49321904764835184, |
| "advantages": -0.052586616041765684, |
| "advantages/max": 2.0, |
| "advantages/min": -2.0, |
| "advantages/std": 0.01657736980411449, |
| "entropy": 0.6615804036458334, |
| "entropy/max": 0.87890625, |
| "entropy/min": 0.4921875, |
| "entropy/std": 0.008354287781568109, |
| "epoch": 0.004, |
| "grad_norm": 16384.0, |
| "learning_rate": 6e-07, |
| "loss": -242.6494145833333, |
| "out_of_date_ratio": 0.0013956707824339294, |
| "out_of_date_ratio/max": 0.0036721748765558004, |
| "out_of_date_ratio/min": 0.00016429803508799523, |
| "out_of_date_ratio/std": 7.971234370627556e-05, |
| "rewards": 0.59375, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.05012598061177124, |
| "sampled_at_step": 3.343302513162295, |
| "sampled_at_step/max": 4.0, |
| "sampled_at_step/min": 2.2274067401885986, |
| "sampled_at_step/std": 0.05714829785258465, |
| "scores": 0.493687707641196, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.00911280642020964, |
| "step": 4, |
| "steps": 30.354166666666668, |
| "steps/max": 76, |
| "steps/min": 1, |
| "steps/std": 1.7908779251434763 |
| }, |
| { |
| "/length/completion": 11387.947916666666, |
| "/length/completion/max": 22277, |
| "/length/completion/min": 4552, |
| "/length/completion/std": 385.16190915095103, |
| "/length/context": 53574.416666666664, |
| "/length/context/max": 105110, |
| "/length/context/min": 16843, |
| "/length/context/std": 2121.527686130431, |
| "/length/forward": 53579.0, |
| "/length/forward/max": 105112, |
| "/length/forward/min": 16848, |
| "/length/forward/std": 2121.4906782535404, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.6083832335329341, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.488111744059642, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.6083832335329341, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.488111744059642, |
| "advantages": -0.07026523964634883, |
| "advantages/max": 2.0, |
| "advantages/min": -1.7142857142857142, |
| "advantages/std": 0.018303452579415777, |
| "entropy": 0.6888427734375, |
| "entropy/max": 0.8359375, |
| "entropy/min": 0.56640625, |
| "entropy/std": 0.00563372915583649, |
| "epoch": 0.005, |
| "grad_norm": 19456.0, |
| "learning_rate": 8e-07, |
| "loss": -275.4627083333333, |
| "out_of_date_ratio": 0.0016567955062782858, |
| "out_of_date_ratio/max": 0.020350120961666107, |
| "out_of_date_ratio/min": 0.0002010050229728222, |
| "out_of_date_ratio/std": 0.0002122593014383166, |
| "rewards": 0.4895833333333333, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.051019960662424785, |
| "sampled_at_step": 4.356270944078763, |
| "sampled_at_step/max": 5.0, |
| "sampled_at_step/min": 3.0, |
| "sampled_at_step/std": 0.05214663289803722, |
| "scores": 0.40390879478827363, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.008855820888513739, |
| "step": 5, |
| "steps": 30.979166666666668, |
| "steps/max": 77, |
| "steps/min": 7, |
| "steps/std": 1.7108073527787282 |
| }, |
| { |
| "/length/completion": 8645.0625, |
| "/length/completion/max": 22095, |
| "/length/completion/min": 4112, |
| "/length/completion/std": 328.20605970067106, |
| "/length/context": 49704.583333333336, |
| "/length/context/max": 128783, |
| "/length/context/min": 11158, |
| "/length/context/std": 2747.831565315202, |
| "/length/forward": 49709.25, |
| "/length/forward/max": 128784, |
| "/length/forward/min": 11160, |
| "/length/forward/std": 2747.838415181319, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.6016597510373444, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.48955622253120745, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.6016597510373444, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.48955622253120745, |
| "advantages": -0.02579739867235099, |
| "advantages/max": 2.0, |
| "advantages/min": -2.0, |
| "advantages/std": 0.018538305458941026, |
| "entropy": 0.6691487630208334, |
| "entropy/max": 0.9609375, |
| "entropy/min": 0.455078125, |
| "entropy/std": 0.010216517799197189, |
| "epoch": 0.006, |
| "grad_norm": 14656.0, |
| "learning_rate": 1e-06, |
| "loss": -45.5740875, |
| "out_of_date_ratio": 0.0014883852118146024, |
| "out_of_date_ratio/max": 0.006303992588073015, |
| "out_of_date_ratio/min": 0.0001845529186539352, |
| "out_of_date_ratio/std": 0.00010177097145203845, |
| "rewards": 0.53125, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.05093126879064569, |
| "sampled_at_step": 5.492770120501518, |
| "sampled_at_step/max": 6.000000476837158, |
| "sampled_at_step/min": 4.611965656280518, |
| "sampled_at_step/std": 0.045385259602976326, |
| "scores": 0.5179448432187382, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.009712100160934692, |
| "step": 6, |
| "steps": 26.572916666666668, |
| "steps/max": 84, |
| "steps/min": 4, |
| "steps/std": 1.638879348525962 |
| }, |
| { |
| "/length/completion": 10545.895833333334, |
| "/length/completion/max": 26415, |
| "/length/completion/min": 1407, |
| "/length/completion/std": 454.70999573960717, |
| "/length/context": 49447.052083333336, |
| "/length/context/max": 125477, |
| "/length/context/min": 4282, |
| "/length/context/std": 2438.5141942497844, |
| "/length/forward": 49451.416666666664, |
| "/length/forward/max": 125480, |
| "/length/forward/min": 4288, |
| "/length/forward/std": 2438.461217154489, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.6220238095238095, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.4848816246356396, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.6030405405405406, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.4892674595812734, |
| "advantages": -0.025122265122264592, |
| "advantages/max": 2.0, |
| "advantages/min": -2.0, |
| "advantages/std": 0.01904916215825559, |
| "entropy": 0.6802164713541666, |
| "entropy/max": 0.85546875, |
| "entropy/min": 0.54296875, |
| "entropy/std": 0.006917466056080718, |
| "epoch": 0.007, |
| "grad_norm": 18304.0, |
| "learning_rate": 1.2e-06, |
| "loss": 12.00070625, |
| "out_of_date_ratio": 0.0014764862108525751, |
| "out_of_date_ratio/max": 0.004187604878097773, |
| "out_of_date_ratio/min": 0.00011552680371096358, |
| "out_of_date_ratio/std": 8.891737377791334e-05, |
| "rewards": 0.5208333333333334, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.05098671929023751, |
| "sampled_at_step": 6.518794342875481, |
| "sampled_at_step/max": 7.000000476837158, |
| "sampled_at_step/min": 5.0, |
| "sampled_at_step/std": 0.05228243236909025, |
| "scores": 0.4976576576576577, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.009491475804562591, |
| "step": 7, |
| "steps": 27.90625, |
| "steps/max": 89, |
| "steps/min": 1, |
| "steps/std": 1.8318577232140487 |
| }, |
| { |
| "/length/completion": 9839.854166666666, |
| "/length/completion/max": 21195, |
| "/length/completion/min": 2998, |
| "/length/completion/std": 366.70528619391644, |
| "/length/context": 49984.9375, |
| "/length/context/max": 113953, |
| "/length/context/min": 12001, |
| "/length/context/std": 2039.1666751173045, |
| "/length/forward": 49989.416666666664, |
| "/length/forward/max": 113960, |
| "/length/forward/min": 12008, |
| "/length/forward/std": 2039.1574836150392, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.6066037735849057, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.4885034651437574, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.5986646884272997, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.49016862328952066, |
| "advantages": 0.030626780626780342, |
| "advantages/max": 2.0, |
| "advantages/min": -1.7142857142857142, |
| "advantages/std": 0.019301630821304063, |
| "entropy": 0.67767333984375, |
| "entropy/max": 0.89453125, |
| "entropy/min": 0.458984375, |
| "entropy/std": 0.008873916231699142, |
| "epoch": 0.008, |
| "grad_norm": 17280.0, |
| "learning_rate": 1.4e-06, |
| "loss": 347.6390791666667, |
| "out_of_date_ratio": 0.0013641679650694034, |
| "out_of_date_ratio/max": 0.012770682573318481, |
| "out_of_date_ratio/min": 0.00014114326040726155, |
| "out_of_date_ratio/std": 0.00013914642752668266, |
| "rewards": 0.6041666666666666, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.04991130733147589, |
| "sampled_at_step": 7.504495506485303, |
| "sampled_at_step/max": 8.0, |
| "sampled_at_step/min": 6.0, |
| "sampled_at_step/std": 0.04903364618515806, |
| "scores": 0.6477920227920227, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.009014026700098238, |
| "step": 8, |
| "steps": 28.25, |
| "steps/max": 68, |
| "steps/min": 4, |
| "steps/std": 1.4796067420470587 |
| }, |
| { |
| "/length/completion": 8776.010416666666, |
| "/length/completion/max": 26631, |
| "/length/completion/min": 982, |
| "/length/completion/std": 428.9860690236972, |
| "/length/context": 43058.010416666664, |
| "/length/context/max": 124668, |
| "/length/context/min": 2162, |
| "/length/context/std": 2756.5255490843815, |
| "/length/forward": 43062.666666666664, |
| "/length/forward/max": 124672, |
| "/length/forward/min": 2168, |
| "/length/forward/std": 2756.5407963672587, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.600375234521576, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.4898212044151812, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.5981308411214953, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.49027577751790646, |
| "advantages": -0.19077901430842445, |
| "advantages/max": 2.0, |
| "advantages/min": -2.0, |
| "advantages/std": 0.018712851888668346, |
| "entropy": 0.6672770182291666, |
| "entropy/max": 0.84375, |
| "entropy/min": 0.5234375, |
| "entropy/std": 0.007357147745400299, |
| "epoch": 0.009, |
| "grad_norm": 14848.0, |
| "learning_rate": 1.6e-06, |
| "loss": -744.38655625, |
| "out_of_date_ratio": 0.0014761180206429951, |
| "out_of_date_ratio/max": 0.0039397054351866245, |
| "out_of_date_ratio/min": 0.0, |
| "out_of_date_ratio/std": 9.058071731289764e-05, |
| "rewards": 0.4791666666666667, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.050986719290237494, |
| "sampled_at_step": 8.244493653376898, |
| "sampled_at_step/max": 9.0, |
| "sampled_at_step/min": 7.0, |
| "sampled_at_step/std": 0.05606692134224259, |
| "scores": 0.34379968203497613, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.009469251146552133, |
| "step": 9, |
| "steps": 25.208333333333332, |
| "steps/max": 96, |
| "steps/min": 0, |
| "steps/std": 2.0891022557437693 |
| }, |
| { |
| "/length/completion": 9919.166666666666, |
| "/length/completion/max": 28914, |
| "/length/completion/min": 1503, |
| "/length/completion/std": 462.08533345737334, |
| "/length/context": 48338.229166666664, |
| "/length/context/max": 116558, |
| "/length/context/min": 2761, |
| "/length/context/std": 2398.330957210102, |
| "/length/forward": 48342.916666666664, |
| "/length/forward/max": 116560, |
| "/length/forward/min": 2768, |
| "/length/forward/std": 2398.321283904732, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.5992402659069326, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.49005241517894965, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.5964912280701754, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.4906011036529667, |
| "advantages": 0.14579998967422325, |
| "advantages/max": 2.0, |
| "advantages/min": -2.0, |
| "advantages/std": 0.017462920262873362, |
| "entropy": 0.66534423828125, |
| "entropy/max": 0.890625, |
| "entropy/min": 0.310546875, |
| "entropy/std": 0.010153132071121932, |
| "epoch": 0.01, |
| "grad_norm": 15424.0, |
| "learning_rate": 1.8e-06, |
| "loss": 576.7980958333334, |
| "out_of_date_ratio": 0.0012154976905852284, |
| "out_of_date_ratio/max": 0.0027450979687273502, |
| "out_of_date_ratio/min": 0.0, |
| "out_of_date_ratio/std": 6.903620514673011e-05, |
| "rewards": 0.6041666666666666, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.04991130733147588, |
| "sampled_at_step": 9.399776776631674, |
| "sampled_at_step/max": 10.0, |
| "sampled_at_step/min": 8.0, |
| "sampled_at_step/std": 0.06032417629102527, |
| "scores": 0.5858330321647994, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.009364186993252418, |
| "step": 10, |
| "steps": 27.822916666666668, |
| "steps/max": 88, |
| "steps/min": 1, |
| "steps/std": 1.7334281569407148 |
| }, |
| { |
| "/length/completion": 10443.125, |
| "/length/completion/max": 27506, |
| "/length/completion/min": 1803, |
| "/length/completion/std": 543.500688424718, |
| "/length/context": 47232.635416666664, |
| "/length/context/max": 120937, |
| "/length/context/min": 3018, |
| "/length/context/std": 2898.144162085004, |
| "/length/forward": 47237.166666666664, |
| "/length/forward/max": 120944, |
| "/length/forward/min": 3024, |
| "/length/forward/std": 2898.150111563425, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.5850860420650096, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.4927071802254477, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.5896174863387978, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.4919031471156853, |
| "advantages": -0.07608915906788609, |
| "advantages/max": 1.7142857142857144, |
| "advantages/min": -2.0, |
| "advantages/std": 0.018877367468057998, |
| "entropy": 0.7043863932291666, |
| "entropy/max": 0.86328125, |
| "entropy/min": 0.5390625, |
| "entropy/std": 0.007723941035489615, |
| "epoch": 0.011, |
| "grad_norm": 30080.0, |
| "learning_rate": 2e-06, |
| "loss": -558.7412104166666, |
| "out_of_date_ratio": 0.0014792358707988267, |
| "out_of_date_ratio/max": 0.005837538279592991, |
| "out_of_date_ratio/min": 0.00012627856631297618, |
| "out_of_date_ratio/std": 9.652012977910382e-05, |
| "rewards": 0.5104166666666666, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.05101996066242478, |
| "sampled_at_step": 10.584631284077963, |
| "sampled_at_step/max": 11.0, |
| "sampled_at_step/min": 10.0, |
| "sampled_at_step/std": 0.04345596676676561, |
| "scores": 0.46418439716312054, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.009391357929316552, |
| "step": 11, |
| "steps": 28.375, |
| "steps/max": 88, |
| "steps/min": 0, |
| "steps/std": 2.2854199901221657 |
| }, |
| { |
| "/length/completion": 11570.645833333334, |
| "/length/completion/max": 24767, |
| "/length/completion/min": 1517, |
| "/length/completion/std": 538.688173773276, |
| "/length/context": 57460.239583333336, |
| "/length/context/max": 116545, |
| "/length/context/min": 8185, |
| "/length/context/std": 2912.14489634308, |
| "/length/forward": 57465.083333333336, |
| "/length/forward/max": 116552, |
| "/length/forward/min": 8192, |
| "/length/forward/std": 2912.1244622454274, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.581547064305685, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.4933052567154787, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.5925740090316106, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.49135532240102475, |
| "advantages": 0.015989072794471607, |
| "advantages/max": 2.0, |
| "advantages/min": -2.0, |
| "advantages/std": 0.015835078943366814, |
| "entropy": 0.682373046875, |
| "entropy/max": 0.84375, |
| "entropy/min": 0.54296875, |
| "entropy/std": 0.006832136883896569, |
| "epoch": 0.012, |
| "grad_norm": 24576.0, |
| "learning_rate": 1.9999949650055508e-06, |
| "loss": 381.7514645833333, |
| "out_of_date_ratio": 0.001392214337556652, |
| "out_of_date_ratio/max": 0.0036032216157764196, |
| "out_of_date_ratio/min": 0.0, |
| "out_of_date_ratio/std": 6.81944290825032e-05, |
| "rewards": 0.4791666666666667, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.050986719290237514, |
| "sampled_at_step": 11.37171138326327, |
| "sampled_at_step/max": 12.000000953674316, |
| "sampled_at_step/min": 10.790054321289062, |
| "sampled_at_step/std": 0.04296443813268482, |
| "scores": 0.4578177727784027, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.008354839024517575, |
| "step": 12, |
| "steps": 36.041666666666664, |
| "steps/max": 98, |
| "steps/min": 0, |
| "steps/std": 2.405592292268241 |
| }, |
| { |
| "/length/completion": 9533.010416666666, |
| "/length/completion/max": 20970, |
| "/length/completion/min": 2845, |
| "/length/completion/std": 475.7710193195782, |
| "/length/context": 42798.520833333336, |
| "/length/context/max": 127818, |
| "/length/context/min": 5819, |
| "/length/context/std": 2309.1928122194336, |
| "/length/forward": 42803.083333333336, |
| "/length/forward/max": 127824, |
| "/length/forward/min": 5824, |
| "/length/forward/std": 2309.192172870657, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.5896927651139743, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.49188942648344175, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.5953379953379954, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.49082651379579373, |
| "advantages": 0.028693766334871362, |
| "advantages/max": 2.0, |
| "advantages/min": -2.0, |
| "advantages/std": 0.019878183877504816, |
| "entropy": 0.6960856119791666, |
| "entropy/max": 0.85546875, |
| "entropy/min": 0.515625, |
| "entropy/std": 0.00751492793808001, |
| "epoch": 0.013, |
| "grad_norm": 16384.0, |
| "learning_rate": 1.9999798600729064e-06, |
| "loss": 202.53405833333332, |
| "out_of_date_ratio": 0.0016945011958947969, |
| "out_of_date_ratio/max": 0.016010673716664314, |
| "out_of_date_ratio/min": 0.00016920473717618734, |
| "out_of_date_ratio/std": 0.00017768531065253792, |
| "rewards": 0.5, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.05103103630798287, |
| "sampled_at_step": 12.429851442575455, |
| "sampled_at_step/max": 13.000000953674316, |
| "sampled_at_step/min": 11.195213317871094, |
| "sampled_at_step/std": 0.04931637853899746, |
| "scores": 0.5165434021019852, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.009859391293109745, |
| "step": 13, |
| "steps": 25.760416666666668, |
| "steps/max": 98, |
| "steps/min": 1, |
| "steps/std": 1.8281024328893596 |
| }, |
| { |
| "/length/completion": 9545.020833333334, |
| "/length/completion/max": 27580, |
| "/length/completion/min": 2641, |
| "/length/completion/std": 491.1694360666533, |
| "/length/context": 41898.489583333336, |
| "/length/context/max": 111089, |
| "/length/context/min": 4075, |
| "/length/context/std": 2197.090228116492, |
| "/length/forward": 41903.166666666664, |
| "/length/forward/max": 111096, |
| "/length/forward/min": 4080, |
| "/length/forward/std": 2197.118581736236, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.5901981230448383, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.491797009546813, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.59849157054126, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.49020343790340376, |
| "advantages": 0.0003391938492836318, |
| "advantages/max": 1.7142857142857144, |
| "advantages/min": -2.0, |
| "advantages/std": 0.020971885229945038, |
| "entropy": 0.66033935546875, |
| "entropy/max": 0.93359375, |
| "entropy/min": 0.470703125, |
| "entropy/std": 0.008022561389017911, |
| "epoch": 0.014, |
| "grad_norm": 16640.0, |
| "learning_rate": 1.9999546853541726e-06, |
| "loss": -108.18811041666667, |
| "out_of_date_ratio": 0.001645550712358575, |
| "out_of_date_ratio/max": 0.02955367974936962, |
| "out_of_date_ratio/min": 0.0001886258542072028, |
| "out_of_date_ratio/std": 0.0003018806362539357, |
| "rewards": 0.5416666666666666, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.05085353651346114, |
| "sampled_at_step": 13.120210727055868, |
| "sampled_at_step/max": 14.0, |
| "sampled_at_step/min": 11.999999046325684, |
| "sampled_at_step/std": 0.04944200516546299, |
| "scores": 0.5041551246537396, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.009946090042752999, |
| "step": 14, |
| "steps": 25.322916666666668, |
| "steps/max": 73, |
| "steps/min": 1, |
| "steps/std": 1.6704705804045474 |
| }, |
| { |
| "/length/completion": 8026.5, |
| "/length/completion/max": 21793, |
| "/length/completion/min": 1298, |
| "/length/completion/std": 418.3677279291144, |
| "/length/context": 37541.177083333336, |
| "/length/context/max": 120264, |
| "/length/context/min": 2177, |
| "/length/context/std": 2429.7662521622196, |
| "/length/forward": 37545.416666666664, |
| "/length/forward/max": 120272, |
| "/length/forward/min": 2184, |
| "/length/forward/std": 2429.7884767185656, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.610223642172524, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.48769944505424995, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.6011730205278593, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.48965704316109804, |
| "advantages": -0.028815848716794226, |
| "advantages/max": 2.0, |
| "advantages/min": -2.0, |
| "advantages/std": 0.02292120980091652, |
| "entropy": 0.6660970052083334, |
| "entropy/max": 0.85546875, |
| "entropy/min": 0.474609375, |
| "entropy/std": 0.0068975977831584635, |
| "epoch": 0.015, |
| "grad_norm": 15488.0, |
| "learning_rate": 1.9999194411028592e-06, |
| "loss": -59.172912499999995, |
| "out_of_date_ratio": 0.0014122336497166543, |
| "out_of_date_ratio/max": 0.004552352242171764, |
| "out_of_date_ratio/min": 0.0, |
| "out_of_date_ratio/std": 8.94015080482922e-05, |
| "rewards": 0.5729166666666666, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.05048547230415048, |
| "sampled_at_step": 14.362340231736502, |
| "sampled_at_step/max": 15.000000953674316, |
| "sampled_at_step/min": 13.0, |
| "sampled_at_step/std": 0.050649083833140505, |
| "scores": 0.5875731652408824, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.010445522162230367, |
| "step": 15, |
| "steps": 22.135416666666668, |
| "steps/max": 94, |
| "steps/min": 1, |
| "steps/std": 2.065213902174111 |
| }, |
| { |
| "/length/completion": 8805.333333333334, |
| "/length/completion/max": 21973, |
| "/length/completion/min": 1796, |
| "/length/completion/std": 445.0948948510345, |
| "/length/context": 46869.760416666664, |
| "/length/context/max": 120639, |
| "/length/context/min": 2709, |
| "/length/context/std": 2724.167357688649, |
| "/length/forward": 46874.166666666664, |
| "/length/forward/max": 120640, |
| "/length/forward/min": 2712, |
| "/length/forward/std": 2724.1792405113283, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.6228448275862069, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.4846742703458842, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.6072555205047319, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.48836078192383503, |
| "advantages": -0.07705723798953784, |
| "advantages/max": 1.7142857142857144, |
| "advantages/min": -2.0, |
| "advantages/std": 0.019517642661302088, |
| "entropy": 0.6896565755208334, |
| "entropy/max": 0.87109375, |
| "entropy/min": 0.54296875, |
| "entropy/std": 0.007813531975228124, |
| "epoch": 0.016, |
| "grad_norm": 15744.0, |
| "learning_rate": 1.9998741276738752e-06, |
| "loss": -173.27766875, |
| "out_of_date_ratio": 0.0016480689225015037, |
| "out_of_date_ratio/max": 0.03180282190442085, |
| "out_of_date_ratio/min": 9.623712685424834e-05, |
| "out_of_date_ratio/std": 0.0003251372886183483, |
| "rewards": 0.5208333333333334, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.050986719290237514, |
| "sampled_at_step": 15.466753671566645, |
| "sampled_at_step/max": 16.0, |
| "sampled_at_step/min": 13.999999046325684, |
| "sampled_at_step/std": 0.05769970522461167, |
| "scores": 0.4565297817240104, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.009580747823909712, |
| "step": 16, |
| "steps": 27.15625, |
| "steps/max": 89, |
| "steps/min": 1, |
| "steps/std": 1.9363419548250318 |
| }, |
| { |
| "/length/completion": 10960.5, |
| "/length/completion/max": 25811, |
| "/length/completion/min": 5331, |
| "/length/completion/std": 404.2495592615958, |
| "/length/context": 58679.833333333336, |
| "/length/context/max": 128199, |
| "/length/context/min": 15882, |
| "/length/context/std": 2293.188648558825, |
| "/length/forward": 58684.416666666664, |
| "/length/forward/max": 128200, |
| "/length/forward/min": 15888, |
| "/length/forward/std": 2293.1736586545526, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.655958549222798, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.47505466098578625, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.6157798165137615, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.4864103556546057, |
| "advantages": 0.06352010768971889, |
| "advantages/max": 1.7142857142857144, |
| "advantages/min": -2.0, |
| "advantages/std": 0.014668813291095996, |
| "entropy": 0.6750081380208334, |
| "entropy/max": 0.85546875, |
| "entropy/min": 0.49609375, |
| "entropy/std": 0.00851281709713449, |
| "epoch": 0.017, |
| "grad_norm": 15808.0, |
| "learning_rate": 1.9998187455235257e-06, |
| "loss": 523.4619145833334, |
| "out_of_date_ratio": 0.0012592377715918701, |
| "out_of_date_ratio/max": 0.003246423788368702, |
| "out_of_date_ratio/min": 0.000278965977486223, |
| "out_of_date_ratio/std": 7.217478768110386e-05, |
| "rewards": 0.7083333333333334, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.04639024033294229, |
| "sampled_at_step": 16.395306944847107, |
| "sampled_at_step/max": 17.0, |
| "sampled_at_step/min": 15.0, |
| "sampled_at_step/std": 0.05894587906922644, |
| "scores": 0.6763839811542992, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.00802837817144003, |
| "step": 17, |
| "steps": 34.375, |
| "steps/max": 87, |
| "steps/min": 4, |
| "steps/std": 1.7676595333079401 |
| }, |
| { |
| "/length/completion": 7872.354166666667, |
| "/length/completion/max": 21558, |
| "/length/completion/min": 1382, |
| "/length/completion/std": 426.2353171293944, |
| "/length/context": 44733.21875, |
| "/length/context/max": 127878, |
| "/length/context/min": 2521, |
| "/length/context/std": 3317.4042700867667, |
| "/length/forward": 44737.75, |
| "/length/forward/max": 127880, |
| "/length/forward/min": 2528, |
| "/length/forward/std": 3317.3828607032756, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.6842684268426843, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.4648065693048262, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.6210896309314587, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.48511576070138474, |
| "advantages": 0.11858076563958492, |
| "advantages/max": 1.4285714285714286, |
| "advantages/min": -2.0, |
| "advantages/std": 0.015536615058260218, |
| "entropy": 0.67474365234375, |
| "entropy/max": 0.92578125, |
| "entropy/min": 0.478515625, |
| "entropy/std": 0.009013613661301047, |
| "epoch": 0.018, |
| "grad_norm": 12096.0, |
| "learning_rate": 1.999753295209509e-06, |
| "loss": 204.49255416666665, |
| "out_of_date_ratio": 0.0012250137936765289, |
| "out_of_date_ratio/max": 0.003539822995662689, |
| "out_of_date_ratio/min": 0.0, |
| "out_of_date_ratio/std": 7.54957539457252e-05, |
| "rewards": 0.7395833333333334, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.0447911619803609, |
| "sampled_at_step": 17.16051246722539, |
| "sampled_at_step/max": 18.0, |
| "sampled_at_step/min": 16.0, |
| "sampled_at_step/std": 0.04216236406924461, |
| "scores": 0.8149509803921569, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.007848803038037575, |
| "step": 18, |
| "steps": 24.5, |
| "steps/max": 90, |
| "steps/min": 1, |
| "steps/std": 2.258039264371735 |
| }, |
| { |
| "/length/completion": 10017.989583333334, |
| "/length/completion/max": 22287, |
| "/length/completion/min": 3320, |
| "/length/completion/std": 440.913435562424, |
| "/length/context": 51859.25, |
| "/length/context/max": 127071, |
| "/length/context/min": 7556, |
| "/length/context/std": 2528.4204074337463, |
| "/length/forward": 51863.833333333336, |
| "/length/forward/max": 127072, |
| "/length/forward/min": 7560, |
| "/length/forward/std": 2528.3746782467865, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.7072892938496583, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.45500675671390867, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.6281186783546865, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.4833069462118742, |
| "advantages": -0.01376936316695331, |
| "advantages/max": 1.1428571428571428, |
| "advantages/min": -2.0, |
| "advantages/std": 0.01817736219182349, |
| "entropy": 0.66009521484375, |
| "entropy/max": 0.94140625, |
| "entropy/min": 0.44921875, |
| "entropy/std": 0.009324537253803052, |
| "epoch": 0.019, |
| "grad_norm": 17152.0, |
| "learning_rate": 1.999677777390909e-06, |
| "loss": 84.7324875, |
| "out_of_date_ratio": 0.0011100193546553783, |
| "out_of_date_ratio/max": 0.0025269172620028257, |
| "out_of_date_ratio/min": 6.997410673648119e-05, |
| "out_of_date_ratio/std": 6.765312539943454e-05, |
| "rewards": 0.6979166666666666, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.04686294212198703, |
| "sampled_at_step": 18.135198891162872, |
| "sampled_at_step/max": 19.0, |
| "sampled_at_step/min": 17.0, |
| "sampled_at_step/std": 0.04848014928364492, |
| "scores": 0.6662650602409639, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.008183805815967112, |
| "step": 19, |
| "steps": 33.583333333333336, |
| "steps/max": 98, |
| "steps/min": 2, |
| "steps/std": 2.2649803520936715 |
| }, |
| { |
| "/length/completion": 11512.09375, |
| "/length/completion/max": 27309, |
| "/length/completion/min": 4980, |
| "/length/completion/std": 501.45766494340864, |
| "/length/context": 56020.770833333336, |
| "/length/context/max": 126836, |
| "/length/context/min": 11030, |
| "/length/context/std": 2774.8540662281084, |
| "/length/forward": 56025.083333333336, |
| "/length/forward/max": 126840, |
| "/length/forward/min": 11032, |
| "/length/forward/std": 2774.8575054000307, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.7183257918552036, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.44981534946108553, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.632258064516129, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.4821906307368982, |
| "advantages": 0.07681134654818979, |
| "advantages/max": 1.1428571428571428, |
| "advantages/min": -2.0, |
| "advantages/std": 0.01614613564061393, |
| "entropy": 0.6991780598958334, |
| "entropy/max": 0.87109375, |
| "entropy/min": 0.5390625, |
| "entropy/std": 0.007499462124219176, |
| "epoch": 0.02, |
| "grad_norm": 17280.0, |
| "learning_rate": 1.999592192828189e-06, |
| "loss": 827.3307291666666, |
| "out_of_date_ratio": 0.0011428044173650658, |
| "out_of_date_ratio/max": 0.003580619813874364, |
| "out_of_date_ratio/min": 0.00019168104336131364, |
| "out_of_date_ratio/std": 6.266466155041168e-05, |
| "rewards": 0.75, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.044194173824159216, |
| "sampled_at_step": 19.341610689957935, |
| "sampled_at_step/max": 20.0, |
| "sampled_at_step/min": 18.123659133911133, |
| "sampled_at_step/std": 0.05884703632870817, |
| "scores": 0.7580741626794258, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.007405661196030622, |
| "step": 20, |
| "steps": 33.833333333333336, |
| "steps/max": 99, |
| "steps/min": 2, |
| "steps/std": 2.107704671314623 |
| }, |
| { |
| "/length/completion": 11401.34375, |
| "/length/completion/max": 21933, |
| "/length/completion/min": 4242, |
| "/length/completion/std": 396.7083745875666, |
| "/length/context": 64273.041666666664, |
| "/length/context/max": 124766, |
| "/length/context/min": 13984, |
| "/length/context/std": 2506.1907459968925, |
| "/length/forward": 64277.916666666664, |
| "/length/forward/max": 124768, |
| "/length/forward/min": 13992, |
| "/length/forward/std": 2506.190649740026, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.7045951859956237, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.45622451695126653, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.6270718232044199, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.48358324179762907, |
| "advantages": 0.0018951597619684107, |
| "advantages/max": 2.0, |
| "advantages/min": -2.0, |
| "advantages/std": 0.016091829848799057, |
| "entropy": 0.6525065104166666, |
| "entropy/max": 0.84375, |
| "entropy/min": 0.458984375, |
| "entropy/std": 0.009489987290893497, |
| "epoch": 0.021, |
| "grad_norm": 18432.0, |
| "learning_rate": 1.999496542383185e-06, |
| "loss": 121.03529791666666, |
| "out_of_date_ratio": 0.0013320353434664867, |
| "out_of_date_ratio/max": 0.003433594247326255, |
| "out_of_date_ratio/min": 0.00010756158008007333, |
| "out_of_date_ratio/std": 7.833091810820446e-05, |
| "rewards": 0.6145833333333334, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.04967295748634071, |
| "sampled_at_step": 20.23511741558711, |
| "sampled_at_step/max": 21.0, |
| "sampled_at_step/min": 19.0, |
| "sampled_at_step/std": 0.05663429886634042, |
| "scores": 0.5839745290527991, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.008028674375231538, |
| "step": 21, |
| "steps": 38.260416666666664, |
| "steps/max": 86, |
| "steps/min": 7, |
| "steps/std": 2.0449720687775765 |
| }, |
| { |
| "/length/completion": 9401.072916666666, |
| "/length/completion/max": 24161, |
| "/length/completion/min": 1460, |
| "/length/completion/std": 499.54914422142616, |
| "/length/context": 47311.802083333336, |
| "/length/context/max": 122394, |
| "/length/context/min": 2396, |
| "/length/context/std": 2575.039869424853, |
| "/length/forward": 47316.333333333336, |
| "/length/forward/max": 122400, |
| "/length/forward/min": 2400, |
| "/length/forward/std": 2575.044503166312, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.6977272727272728, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.45924277416180914, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.6279691211401425, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.4833465672109604, |
| "advantages": -0.012113324657122618, |
| "advantages/max": 2.0, |
| "advantages/min": -2.0, |
| "advantages/std": 0.019199719746732787, |
| "entropy": 0.6626383463541666, |
| "entropy/max": 0.89453125, |
| "entropy/min": 0.494140625, |
| "entropy/std": 0.007544228380313726, |
| "epoch": 0.022, |
| "grad_norm": 232448.0, |
| "learning_rate": 1.9993908270190957e-06, |
| "loss": 111.84316041666666, |
| "out_of_date_ratio": 0.0016641596797247378, |
| "out_of_date_ratio/max": 0.020300446078181267, |
| "out_of_date_ratio/min": 0.00035029338323511183, |
| "out_of_date_ratio/std": 0.00023182703826180737, |
| "rewards": 0.5729166666666666, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.050485472304150465, |
| "sampled_at_step": 21.164862950642902, |
| "sampled_at_step/max": 22.0, |
| "sampled_at_step/min": 20.999998092651367, |
| "sampled_at_step/std": 0.03211382577428944, |
| "scores": 0.49159074982480727, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.009357968706678136, |
| "step": 22, |
| "steps": 28.729166666666668, |
| "steps/max": 89, |
| "steps/min": 0, |
| "steps/std": 1.9640032706013673 |
| }, |
| { |
| "/length/completion": 9214.354166666666, |
| "/length/completion/max": 23003, |
| "/length/completion/min": 1919, |
| "/length/completion/std": 387.5235336657432, |
| "/length/context": 48108.802083333336, |
| "/length/context/max": 114013, |
| "/length/context/min": 3056, |
| "/length/context/std": 2795.0509071366123, |
| "/length/forward": 48113.416666666664, |
| "/length/forward/max": 114016, |
| "/length/forward/min": 3064, |
| "/length/forward/std": 2795.03867741772, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.7045177045177046, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.4562592558390556, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.6330749354005168, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.4819658302910577, |
| "advantages": 0.023602484472047516, |
| "advantages/max": 1.7142857142857144, |
| "advantages/min": -2.0, |
| "advantages/std": 0.018105340055766753, |
| "entropy": 0.6816813151041666, |
| "entropy/max": 0.96875, |
| "entropy/min": 0.52734375, |
| "entropy/std": 0.009203881162625499, |
| "epoch": 0.023, |
| "grad_norm": 15232.0, |
| "learning_rate": 1.9992750478004735e-06, |
| "loss": 9.265197916666667, |
| "out_of_date_ratio": 0.0013355615161951089, |
| "out_of_date_ratio/max": 0.00573215214535594, |
| "out_of_date_ratio/min": 0.00025896672741509974, |
| "out_of_date_ratio/std": 8.832910798917201e-05, |
| "rewards": 0.65625, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.048475287679651785, |
| "sampled_at_step": 22.168105483055115, |
| "sampled_at_step/max": 23.0, |
| "sampled_at_step/min": 21.0, |
| "sampled_at_step/std": 0.050861245309274106, |
| "scores": 0.6652173913043479, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.00898273734163259, |
| "step": 23, |
| "steps": 27.75, |
| "steps/max": 81, |
| "steps/min": 1, |
| "steps/std": 1.9785635045827228 |
| }, |
| { |
| "/length/completion": 9490.1875, |
| "/length/completion/max": 29269, |
| "/length/completion/min": 2814, |
| "/length/completion/std": 472.25405473316374, |
| "/length/context": 48921.354166666664, |
| "/length/context/max": 126739, |
| "/length/context/min": 11459, |
| "/length/context/std": 2698.3990532783187, |
| "/length/forward": 48925.833333333336, |
| "/length/forward/max": 126744, |
| "/length/forward/min": 11464, |
| "/length/forward/std": 2698.425859230934, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.7132701421800948, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.45223428270585836, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.6399560922063666, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.4800128042608059, |
| "advantages": -0.13765028874372895, |
| "advantages/max": 1.7142857142857144, |
| "advantages/min": -2.0, |
| "advantages/std": 0.01947662744072505, |
| "entropy": 0.6754150390625, |
| "entropy/max": 0.85546875, |
| "entropy/min": 0.48046875, |
| "entropy/std": 0.007579306748481353, |
| "epoch": 0.024, |
| "grad_norm": 16000.0, |
| "learning_rate": 1.999149205893214e-06, |
| "loss": -90.01123125, |
| "out_of_date_ratio": 0.001372865597204509, |
| "out_of_date_ratio/max": 0.010023866780102253, |
| "out_of_date_ratio/min": 0.00016106950351968408, |
| "out_of_date_ratio/std": 0.00013162758332460245, |
| "rewards": 0.625, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.04941058844013093, |
| "sampled_at_step": 23.256375809510548, |
| "sampled_at_step/max": 24.0, |
| "sampled_at_step/min": 21.6400089263916, |
| "sampled_at_step/std": 0.06531165671866788, |
| "scores": 0.5569913850231941, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.009042129178310435, |
| "step": 24, |
| "steps": 30.4375, |
| "steps/max": 99, |
| "steps/min": 3, |
| "steps/std": 2.303464541344009 |
| }, |
| { |
| "/length/completion": 9030.229166666666, |
| "/length/completion/max": 20017, |
| "/length/completion/min": 2032, |
| "/length/completion/std": 404.27926977346874, |
| "/length/context": 43733.875, |
| "/length/context/max": 115111, |
| "/length/context/min": 2969, |
| "/length/context/std": 2356.429606294356, |
| "/length/forward": 43737.833333333336, |
| "/length/forward/max": 115112, |
| "/length/forward/min": 2976, |
| "/length/forward/std": 2356.3995413175926, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.673055242390079, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.4690968802724136, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.6356711321250328, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.4812414611272727, |
| "advantages": -0.14799107142857285, |
| "advantages/max": 2.0, |
| "advantages/min": -2.0, |
| "advantages/std": 0.020134242058078504, |
| "entropy": 0.6791585286458334, |
| "entropy/max": 0.9296875, |
| "entropy/min": 0.4921875, |
| "entropy/std": 0.009211186287749336, |
| "epoch": 0.025, |
| "grad_norm": 16512.0, |
| "learning_rate": 1.9990133025645437e-06, |
| "loss": -228.33545, |
| "out_of_date_ratio": 0.0014518716141841044, |
| "out_of_date_ratio/max": 0.003765060333535075, |
| "out_of_date_ratio/min": 0.00016504373343195766, |
| "out_of_date_ratio/std": 8.420615578605796e-05, |
| "rewards": 0.5208333333333334, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.050986719290237514, |
| "sampled_at_step": 24.447250723838806, |
| "sampled_at_step/max": 25.000001907348633, |
| "sampled_at_step/min": 23.619524002075195, |
| "sampled_at_step/std": 0.04587732273306496, |
| "scores": 0.4421875, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.009815837705506364, |
| "step": 25, |
| "steps": 25.666666666666668, |
| "steps/max": 79, |
| "steps/min": 1, |
| "steps/std": 1.8250087201719623 |
| }, |
| { |
| "/length/completion": 10101.09375, |
| "/length/completion/max": 20788, |
| "/length/completion/min": 3039, |
| "/length/completion/std": 403.0559760239524, |
| "/length/context": 48021.208333333336, |
| "/length/context/max": 112424, |
| "/length/context/min": 7596, |
| "/length/context/std": 2270.2870376868336, |
| "/length/forward": 48025.833333333336, |
| "/length/forward/max": 112432, |
| "/length/forward/min": 7600, |
| "/length/forward/std": 2270.3245285615944, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.6428571428571429, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.4791574237499546, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.6338742393509128, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.48174442190669386, |
| "advantages": -0.05486008836524312, |
| "advantages/max": 2.0, |
| "advantages/min": -1.4285714285714286, |
| "advantages/std": 0.018416687139217066, |
| "entropy": 0.6940511067708334, |
| "entropy/max": 0.89453125, |
| "entropy/min": 0.51171875, |
| "entropy/std": 0.008278710396076156, |
| "epoch": 0.026, |
| "grad_norm": 19840.0, |
| "learning_rate": 1.998867339183008e-06, |
| "loss": -140.20670625, |
| "out_of_date_ratio": 0.001547872148269865, |
| "out_of_date_ratio/max": 0.004394118674099445, |
| "out_of_date_ratio/min": 0.0, |
| "out_of_date_ratio/std": 8.020284871100495e-05, |
| "rewards": 0.3229166666666667, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.047723322942373116, |
| "sampled_at_step": 25.289051393667858, |
| "sampled_at_step/max": 26.000001907348633, |
| "sampled_at_step/min": 24.999998092651367, |
| "sampled_at_step/std": 0.037862932441932125, |
| "scores": 0.3286082474226804, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.00843075316638251, |
| "step": 26, |
| "steps": 31.333333333333332, |
| "steps/max": 89, |
| "steps/min": 1, |
| "steps/std": 1.9080232341449717 |
| }, |
| { |
| "/length/completion": 8978.75, |
| "/length/completion/max": 18034, |
| "/length/completion/min": 3624, |
| "/length/completion/std": 311.56472994576995, |
| "/length/context": 52919.604166666664, |
| "/length/context/max": 103118, |
| "/length/context/min": 11282, |
| "/length/context/std": 2021.9312112484567, |
| "/length/forward": 52924.333333333336, |
| "/length/forward/max": 103120, |
| "/length/forward/min": 11288, |
| "/length/forward/std": 2021.9383608195633, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.6522222222222223, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.4762650470711957, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.6329268292682927, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.4820066991865132, |
| "advantages": -0.07285397529300003, |
| "advantages/max": 2.0, |
| "advantages/min": -1.7142857142857142, |
| "advantages/std": 0.017689748039436834, |
| "entropy": 0.6405843098958334, |
| "entropy/max": 0.81640625, |
| "entropy/min": 0.478515625, |
| "entropy/std": 0.008094466537248381, |
| "epoch": 0.027, |
| "grad_norm": 15616.0, |
| "learning_rate": 1.998711317218456e-06, |
| "loss": -217.17899583333335, |
| "out_of_date_ratio": 0.0014654934244996791, |
| "out_of_date_ratio/max": 0.003957169596105814, |
| "out_of_date_ratio/min": 0.00015121730393730104, |
| "out_of_date_ratio/std": 8.927867169875872e-05, |
| "rewards": 0.4270833333333333, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.050485472304150465, |
| "sampled_at_step": 26.36866702636083, |
| "sampled_at_step/max": 27.000001907348633, |
| "sampled_at_step/min": 25.0, |
| "sampled_at_step/std": 0.050479192137442085, |
| "scores": 0.3801076971808679, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.00863921262874711, |
| "step": 27, |
| "steps": 31.885416666666668, |
| "steps/max": 71, |
| "steps/min": 5, |
| "steps/std": 1.6309233516920185 |
| }, |
| { |
| "/length/completion": 10679.104166666666, |
| "/length/completion/max": 26624, |
| "/length/completion/min": 2763, |
| "/length/completion/std": 485.3186727086443, |
| "/length/context": 54714.947916666664, |
| "/length/context/max": 115639, |
| "/length/context/min": 3676, |
| "/length/context/std": 2660.0730577165136, |
| "/length/forward": 54719.416666666664, |
| "/length/forward/max": 115640, |
| "/length/forward/min": 3680, |
| "/length/forward/std": 2660.049047283798, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.677765843179377, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.4673321142386053, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.6375442739079102, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.48070944729133946, |
| "advantages": -0.017792985457656552, |
| "advantages/max": 2.0, |
| "advantages/min": -2.0, |
| "advantages/std": 0.018213078624609743, |
| "entropy": 0.6812744140625, |
| "entropy/max": 0.859375, |
| "entropy/min": 0.47265625, |
| "entropy/std": 0.007308055562462717, |
| "epoch": 0.028, |
| "grad_norm": 17920.0, |
| "learning_rate": 1.9985452382420274e-06, |
| "loss": -186.76735416666668, |
| "out_of_date_ratio": 0.001270884770140886, |
| "out_of_date_ratio/max": 0.0031341412104666233, |
| "out_of_date_ratio/min": 0.00020686801872216165, |
| "out_of_date_ratio/std": 7.350484083591433e-05, |
| "rewards": 0.6041666666666666, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.04991130733147589, |
| "sampled_at_step": 27.260556002457935, |
| "sampled_at_step/max": 28.000001907348633, |
| "sampled_at_step/min": 26.0, |
| "sampled_at_step/std": 0.05604123740619327, |
| "scores": 0.5955089820359282, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.00849230074604125, |
| "step": 28, |
| "steps": 33.791666666666664, |
| "steps/max": 92, |
| "steps/min": 1, |
| "steps/std": 2.219846006034937 |
| }, |
| { |
| "/length/completion": 9437.427083333334, |
| "/length/completion/max": 28132, |
| "/length/completion/min": 2891, |
| "/length/completion/std": 436.3822586208799, |
| "/length/context": 51917.46875, |
| "/length/context/max": 118962, |
| "/length/context/min": 10854, |
| "/length/context/std": 2628.8098323681484, |
| "/length/forward": 51922.166666666664, |
| "/length/forward/max": 118968, |
| "/length/forward/min": 10856, |
| "/length/forward/std": 2628.812668929029, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.6684901531728665, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.47075584784873803, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.6378043178686266, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.48063496541217493, |
| "advantages": 0.01068174677976811, |
| "advantages/max": 1.7142857142857144, |
| "advantages/min": -2.0, |
| "advantages/std": 0.01657294113911273, |
| "entropy": 0.6856689453125, |
| "entropy/max": 0.87890625, |
| "entropy/min": 0.4765625, |
| "entropy/std": 0.006581478913246546, |
| "epoch": 0.029, |
| "grad_norm": 16768.0, |
| "learning_rate": 1.9983691039261353e-06, |
| "loss": 0.014404166666666668, |
| "out_of_date_ratio": 0.001233935588516033, |
| "out_of_date_ratio/max": 0.003753588069230318, |
| "out_of_date_ratio/min": 0.0, |
| "out_of_date_ratio/std": 7.756263237153342e-05, |
| "rewards": 0.6770833333333334, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.047723322942373116, |
| "sampled_at_step": 28.41019606590271, |
| "sampled_at_step/max": 29.000001907348633, |
| "sampled_at_step/min": 27.322860717773438, |
| "sampled_at_step/std": 0.0516098577435189, |
| "scores": 0.6760917373546969, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.008294601161650362, |
| "step": 29, |
| "steps": 32.15625, |
| "steps/max": 99, |
| "steps/min": 4, |
| "steps/std": 2.146709374423526 |
| }, |
| { |
| "/length/completion": 11369.114583333334, |
| "/length/completion/max": 23314, |
| "/length/completion/min": 3318, |
| "/length/completion/std": 507.260502736684, |
| "/length/context": 58615.427083333336, |
| "/length/context/max": 113119, |
| "/length/context/min": 16362, |
| "/length/context/std": 2548.0394785382373, |
| "/length/forward": 58619.833333333336, |
| "/length/forward/max": 113120, |
| "/length/forward/min": 16368, |
| "/length/forward/std": 2548.016881319358, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.6605691056910569, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.47351616899275406, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.6413898601398601, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.47959243889956243, |
| "advantages": -0.00934836403629328, |
| "advantages/max": 2.0, |
| "advantages/min": -2.0, |
| "advantages/std": 0.016053681878628597, |
| "entropy": 0.6816813151041666, |
| "entropy/max": 0.86328125, |
| "entropy/min": 0.49609375, |
| "entropy/std": 0.007637391596894437, |
| "epoch": 0.03, |
| "grad_norm": 17408.0, |
| "learning_rate": 1.998182916044451e-06, |
| "loss": 372.35253958333334, |
| "out_of_date_ratio": 0.0014495693473387898, |
| "out_of_date_ratio/max": 0.010763758793473244, |
| "out_of_date_ratio/min": 0.00022534365416504443, |
| "out_of_date_ratio/std": 0.0001220856333759772, |
| "rewards": 0.5104166666666666, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.05101996066242478, |
| "sampled_at_step": 29.722339312235516, |
| "sampled_at_step/max": 30.000001907348633, |
| "sampled_at_step/min": 28.63516616821289, |
| "sampled_at_step/std": 0.04449124005690786, |
| "scores": 0.46989276876546604, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.008275792529581869, |
| "step": 30, |
| "steps": 36.885416666666664, |
| "steps/max": 91, |
| "steps/min": 6, |
| "steps/std": 2.0705791778804565 |
| }, |
| { |
| "/length/completion": 9677.302083333334, |
| "/length/completion/max": 20464, |
| "/length/completion/min": 3520, |
| "/length/completion/std": 360.3757061020706, |
| "/length/context": 47705.333333333336, |
| "/length/context/max": 114634, |
| "/length/context/min": 10643, |
| "/length/context/std": 2297.2822418484816, |
| "/length/forward": 47709.666666666664, |
| "/length/forward/max": 114640, |
| "/length/forward/min": 10648, |
| "/length/forward/std": 2297.297518331373, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.6553147574819401, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.47526553221154866, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.6407540775259479, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.47977941771174426, |
| "advantages": 0.04816326530612251, |
| "advantages/max": 2.0, |
| "advantages/min": -2.0, |
| "advantages/std": 0.01873621505921223, |
| "entropy": 0.6687418619791666, |
| "entropy/max": 0.84375, |
| "entropy/min": 0.546875, |
| "entropy/std": 0.0069627937479380154, |
| "epoch": 0.031, |
| "grad_norm": 16512.0, |
| "learning_rate": 1.9979866764718843e-06, |
| "loss": 245.54707916666666, |
| "out_of_date_ratio": 0.0014398050516319927, |
| "out_of_date_ratio/max": 0.0034001213498413563, |
| "out_of_date_ratio/min": 0.0, |
| "out_of_date_ratio/std": 8.200511865324555e-05, |
| "rewards": 0.5625, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.05063078670631141, |
| "sampled_at_step": 30.396397809187572, |
| "sampled_at_step/max": 31.000001907348633, |
| "sampled_at_step/min": 29.999998092651367, |
| "sampled_at_step/std": 0.040759151687636004, |
| "scores": 0.5892857142857143, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.00929723578782738, |
| "step": 31, |
| "steps": 28.166666666666668, |
| "steps/max": 81, |
| "steps/min": 3, |
| "steps/std": 1.9046080512020584 |
| }, |
| { |
| "/length/completion": 11456.354166666666, |
| "/length/completion/max": 23502, |
| "/length/completion/min": 3792, |
| "/length/completion/std": 482.2487134353623, |
| "/length/context": 56354.708333333336, |
| "/length/context/max": 97717, |
| "/length/context/min": 12262, |
| "/length/context/std": 2432.4953052653914, |
| "/length/forward": 56359.333333333336, |
| "/length/forward/max": 97720, |
| "/length/forward/min": 12264, |
| "/length/forward/std": 2432.503741765612, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.6494845360824743, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.47713140063530446, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.6368864243171083, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.4808971894674339, |
| "advantages": 0.07540518464382524, |
| "advantages/max": 2.0, |
| "advantages/min": -2.0, |
| "advantages/std": 0.016593084064504177, |
| "entropy": 0.6540120442708334, |
| "entropy/max": 0.82421875, |
| "entropy/min": 0.4921875, |
| "entropy/std": 0.0068052894061513365, |
| "epoch": 0.032, |
| "grad_norm": 18560.0, |
| "learning_rate": 1.997780387184565e-06, |
| "loss": 354.41247500000003, |
| "out_of_date_ratio": 0.0013360527542924199, |
| "out_of_date_ratio/max": 0.0034993954468518496, |
| "out_of_date_ratio/min": 0.0001190476177725941, |
| "out_of_date_ratio/std": 7.784003677692855e-05, |
| "rewards": 0.5625, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.05063078670631141, |
| "sampled_at_step": 31.47506093978882, |
| "sampled_at_step/max": 32.0, |
| "sampled_at_step/min": 30.20079231262207, |
| "sampled_at_step/std": 0.046957284692903785, |
| "scores": 0.5853144748721035, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.008546511861260894, |
| "step": 32, |
| "steps": 33.614583333333336, |
| "steps/max": 84, |
| "steps/min": 4, |
| "steps/std": 2.050542451925803 |
| }, |
| { |
| "/length/completion": 10748.010416666666, |
| "/length/completion/max": 28022, |
| "/length/completion/min": 2958, |
| "/length/completion/std": 467.90537643482224, |
| "/length/context": 52605.020833333336, |
| "/length/context/max": 119246, |
| "/length/context/min": 6469, |
| "/length/context/std": 2694.21027027666, |
| "/length/forward": 52609.5, |
| "/length/forward/max": 119248, |
| "/length/forward/min": 6472, |
| "/length/forward/std": 2694.183390793798, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.675701839303001, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.46811202042411, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.6410408042578356, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.47969520691195516, |
| "advantages": 0.035493036471189295, |
| "advantages/max": 2.0, |
| "advantages/min": -2.0, |
| "advantages/std": 0.01854459908350241, |
| "entropy": 0.6657511393229166, |
| "entropy/max": 0.94921875, |
| "entropy/min": 0.474609375, |
| "entropy/std": 0.00920301048279462, |
| "epoch": 0.033, |
| "grad_norm": 18944.0, |
| "learning_rate": 1.997564050259824e-06, |
| "loss": 32.570058333333336, |
| "out_of_date_ratio": 0.0016721839908010832, |
| "out_of_date_ratio/max": 0.03893996775150299, |
| "out_of_date_ratio/min": 0.0, |
| "out_of_date_ratio/std": 0.00039718575078297327, |
| "rewards": 0.6041666666666666, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.04991130733147588, |
| "sampled_at_step": 32.382101813952126, |
| "sampled_at_step/max": 33.0, |
| "sampled_at_step/min": 31.0, |
| "sampled_at_step/std": 0.05291823048647063, |
| "scores": 0.5575480925986306, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.008968448426516548, |
| "step": 33, |
| "steps": 30.947916666666668, |
| "steps/max": 83, |
| "steps/min": 0, |
| "steps/std": 2.1503476882781585 |
| }, |
| { |
| "/length/completion": 9198.875, |
| "/length/completion/max": 21159, |
| "/length/completion/min": 1864, |
| "/length/completion/std": 383.6682451283641, |
| "/length/context": 47677.3125, |
| "/length/context/max": 127958, |
| "/length/context/min": 2721, |
| "/length/context/std": 2562.6787387829513, |
| "/length/forward": 47681.916666666664, |
| "/length/forward/max": 127960, |
| "/length/forward/min": 2728, |
| "/length/forward/std": 2562.679914401653, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.6801579466929911, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.4664151736847391, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.6436405618626131, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.47892315561872006, |
| "advantages": -0.0682769007642093, |
| "advantages/max": 2.0, |
| "advantages/min": -2.0, |
| "advantages/std": 0.01756966769979579, |
| "entropy": 0.6900634765625, |
| "entropy/max": 0.8828125, |
| "entropy/min": 0.43359375, |
| "entropy/std": 0.008183432081512754, |
| "epoch": 0.034, |
| "grad_norm": 15040.0, |
| "learning_rate": 1.997337667876172e-06, |
| "loss": -356.5561125, |
| "out_of_date_ratio": 0.001257536003322457, |
| "out_of_date_ratio/max": 0.005396825261414051, |
| "out_of_date_ratio/min": 0.0, |
| "out_of_date_ratio/std": 9.732487820129926e-05, |
| "rewards": 0.6875, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.04730703678277331, |
| "sampled_at_step": 33.30234805742899, |
| "sampled_at_step/max": 34.0, |
| "sampled_at_step/min": 33.0, |
| "sampled_at_step/std": 0.03829985984963102, |
| "scores": 0.5903943771964076, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.009717381918579165, |
| "step": 34, |
| "steps": 25.677083333333332, |
| "steps/max": 92, |
| "steps/min": 1, |
| "steps/std": 1.7743528011009773 |
| }, |
| { |
| "/length/completion": 9827.28125, |
| "/length/completion/max": 22478, |
| "/length/completion/min": 1481, |
| "/length/completion/std": 426.35430911083745, |
| "/length/context": 52099.104166666664, |
| "/length/context/max": 121980, |
| "/length/context/min": 4481, |
| "/length/context/std": 2602.116649626552, |
| "/length/forward": 52103.75, |
| "/length/forward/max": 121984, |
| "/length/forward/min": 4488, |
| "/length/forward/std": 2602.108545255828, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.6769078295341923, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.46765758825181203, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.6439947536068953, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.478816782218094, |
| "advantages": -0.11816995990144273, |
| "advantages/max": 2.0, |
| "advantages/min": -2.0, |
| "advantages/std": 0.01787321571590539, |
| "entropy": 0.6913859049479166, |
| "entropy/max": 0.92578125, |
| "entropy/min": 0.462890625, |
| "entropy/std": 0.009919730611026047, |
| "epoch": 0.035, |
| "grad_norm": 16064.0, |
| "learning_rate": 1.9971012423132772e-06, |
| "loss": -428.37520416666666, |
| "out_of_date_ratio": 0.0012809614311966773, |
| "out_of_date_ratio/max": 0.004535594489425421, |
| "out_of_date_ratio/min": 0.00010249052138533443, |
| "out_of_date_ratio/std": 8.340348975318018e-05, |
| "rewards": 0.625, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.04941058844013093, |
| "sampled_at_step": 34.23181116580963, |
| "sampled_at_step/max": 35.0, |
| "sampled_at_step/min": 33.0, |
| "sampled_at_step/std": 0.05921732305282789, |
| "scores": 0.5302671626648631, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.009177981231588788, |
| "step": 35, |
| "steps": 29.802083333333332, |
| "steps/max": 77, |
| "steps/min": 0, |
| "steps/std": 1.7894620515550024 |
| }, |
| { |
| "/length/completion": 9422.875, |
| "/length/completion/max": 23769, |
| "/length/completion/min": 2900, |
| "/length/completion/std": 459.94317547239297, |
| "/length/context": 49770.354166666664, |
| "/length/context/max": 126792, |
| "/length/context/min": 6995, |
| "/length/context/std": 2917.8340353953995, |
| "/length/forward": 49774.916666666664, |
| "/length/forward/max": 126800, |
| "/length/forward/min": 7000, |
| "/length/forward/std": 2917.8420979566768, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.678646934460888, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.4669960094130649, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.6463280964559737, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.4781088664599017, |
| "advantages": -0.01185733257446363, |
| "advantages/max": 2.0, |
| "advantages/min": -2.0, |
| "advantages/std": 0.01525715744572812, |
| "entropy": 0.7012125651041666, |
| "entropy/max": 1.0078125, |
| "entropy/min": 0.5234375, |
| "entropy/std": 0.008509193891243078, |
| "epoch": 0.036, |
| "grad_norm": 13440.0, |
| "learning_rate": 1.9968547759519425e-06, |
| "loss": -72.75817916666666, |
| "out_of_date_ratio": 0.0014415098589779518, |
| "out_of_date_ratio/max": 0.004487856291234493, |
| "out_of_date_ratio/min": 0.00010764262697193772, |
| "out_of_date_ratio/std": 9.46737329751352e-05, |
| "rewards": 0.5833333333333334, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.05031728036871333, |
| "sampled_at_step": 35.245263735453285, |
| "sampled_at_step/max": 36.000003814697266, |
| "sampled_at_step/min": 34.0, |
| "sampled_at_step/std": 0.051642521621436184, |
| "scores": 0.5703851261620186, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.009019786682503346, |
| "step": 36, |
| "steps": 30.375, |
| "steps/max": 96, |
| "steps/min": 2, |
| "steps/std": 2.3679193611046903 |
| }, |
| { |
| "/length/completion": 9100.052083333334, |
| "/length/completion/max": 24602, |
| "/length/completion/min": 2334, |
| "/length/completion/std": 462.026403863143, |
| "/length/context": 48322.8125, |
| "/length/context/max": 104864, |
| "/length/context/min": 3297, |
| "/length/context/std": 2566.345017526433, |
| "/length/forward": 48327.333333333336, |
| "/length/forward/max": 104872, |
| "/length/forward/min": 3304, |
| "/length/forward/std": 2566.355532188125, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.6878363832077503, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.4633761896595802, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.6477414747366542, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.47767400666420845, |
| "advantages": 0.008175892166610221, |
| "advantages/max": 2.0, |
| "advantages/min": -2.0, |
| "advantages/std": 0.017455894236115342, |
| "entropy": 0.7001139322916666, |
| "entropy/max": 0.9375, |
| "entropy/min": 0.52734375, |
| "entropy/std": 0.00808525356253917, |
| "epoch": 0.037, |
| "grad_norm": 30336.0, |
| "learning_rate": 1.9965982712740806e-06, |
| "loss": 11.875345833333334, |
| "out_of_date_ratio": 0.0015731908315501641, |
| "out_of_date_ratio/max": 0.015670742839574814, |
| "out_of_date_ratio/min": 0.0, |
| "out_of_date_ratio/std": 0.00018833875065268633, |
| "rewards": 0.6354166666666666, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.04912381533653095, |
| "sampled_at_step": 36.266119639078774, |
| "sampled_at_step/max": 37.0, |
| "sampled_at_step/min": 35.0, |
| "sampled_at_step/std": 0.062039810694395646, |
| "scores": 0.654292343387471, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.009352466428458224, |
| "step": 37, |
| "steps": 25.9375, |
| "steps/max": 72, |
| "steps/min": 1, |
| "steps/std": 1.6376253994075451 |
| }, |
| { |
| "/length/completion": 10135.427083333334, |
| "/length/completion/max": 30915, |
| "/length/completion/min": 3327, |
| "/length/completion/std": 468.35185469585446, |
| "/length/context": 53150.572916666664, |
| "/length/context/max": 122558, |
| "/length/context/min": 15804, |
| "/length/context/std": 2433.6580449971475, |
| "/length/forward": 53155.25, |
| "/length/forward/max": 122560, |
| "/length/forward/min": 15808, |
| "/length/forward/std": 2433.6683033657837, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.6797385620915033, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.4665769489562024, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.6460006985679357, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.47820894598248015, |
| "advantages": -0.1376781704019445, |
| "advantages/max": 2.0, |
| "advantages/min": -2.0, |
| "advantages/std": 0.01651928713975243, |
| "entropy": 0.6912027994791666, |
| "entropy/max": 0.83203125, |
| "entropy/min": 0.53125, |
| "entropy/std": 0.00708367145695894, |
| "epoch": 0.038, |
| "grad_norm": 15680.0, |
| "learning_rate": 1.996331730862691e-06, |
| "loss": -134.66802916666668, |
| "out_of_date_ratio": 0.0013084049907471733, |
| "out_of_date_ratio/max": 0.003387369913980365, |
| "out_of_date_ratio/min": 0.0002214839478256181, |
| "out_of_date_ratio/std": 7.499232801296063e-05, |
| "rewards": 0.53125, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.05093126879064569, |
| "sampled_at_step": 37.08955097198486, |
| "sampled_at_step/max": 38.000003814697266, |
| "sampled_at_step/min": 36.19251251220703, |
| "sampled_at_step/std": 0.041960562771199195, |
| "scores": 0.4007058068655759, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.008777370925423156, |
| "step": 38, |
| "steps": 31.46875, |
| "steps/max": 98, |
| "steps/min": 4, |
| "steps/std": 1.941804868393939 |
| }, |
| { |
| "/length/completion": 9828.083333333334, |
| "/length/completion/max": 23061, |
| "/length/completion/min": 1558, |
| "/length/completion/std": 470.9235785716673, |
| "/length/context": 51597.614583333336, |
| "/length/context/max": 109570, |
| "/length/context/min": 4229, |
| "/length/context/std": 2691.8637708957626, |
| "/length/forward": 51602.166666666664, |
| "/length/forward/max": 109576, |
| "/length/forward/min": 4232, |
| "/length/forward/std": 2691.8928374513284, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.6431095406360424, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.47908210087514325, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.6410693001872978, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.47968682757051556, |
| "advantages": -0.13596059113300493, |
| "advantages/max": 2.0, |
| "advantages/min": -2.0, |
| "advantages/std": 0.018862637046954452, |
| "entropy": 0.6741943359375, |
| "entropy/max": 0.91015625, |
| "entropy/min": 0.4609375, |
| "entropy/std": 0.00861796389963598, |
| "epoch": 0.039, |
| "grad_norm": 16384.0, |
| "learning_rate": 1.996055157401834e-06, |
| "loss": -246.81274374999998, |
| "out_of_date_ratio": 0.0014630949255357943, |
| "out_of_date_ratio/max": 0.004159239586442709, |
| "out_of_date_ratio/min": 0.0001836041483329609, |
| "out_of_date_ratio/std": 9.1284843466261e-05, |
| "rewards": 0.5625, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.05063078670631141, |
| "sampled_at_step": 38.16589645544688, |
| "sampled_at_step/max": 39.0, |
| "sampled_at_step/min": 37.30094528198242, |
| "sampled_at_step/std": 0.036066542168659795, |
| "scores": 0.503448275862069, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.009284546103208795, |
| "step": 39, |
| "steps": 29.208333333333332, |
| "steps/max": 89, |
| "steps/min": 1, |
| "steps/std": 1.9946715404793218 |
| }, |
| { |
| "/length/completion": 10498.177083333334, |
| "/length/completion/max": 20891, |
| "/length/completion/min": 2779, |
| "/length/completion/std": 468.1356710498424, |
| "/length/context": 56444.9375, |
| "/length/context/max": 124660, |
| "/length/context/min": 9841, |
| "/length/context/std": 2764.127233626301, |
| "/length/forward": 56449.5, |
| "/length/forward/max": 124664, |
| "/length/forward/min": 9848, |
| "/length/forward/std": 2764.099841701785, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.6257744733581165, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.4839222890624985, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.6392202991093934, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.4802267259492034, |
| "advantages": -0.047900650502660895, |
| "advantages/max": 2.0, |
| "advantages/min": -2.0, |
| "advantages/std": 0.015531604552016694, |
| "entropy": 0.6410319010416666, |
| "entropy/max": 0.796875, |
| "entropy/min": 0.478515625, |
| "entropy/std": 0.00667534252894688, |
| "epoch": 0.04, |
| "grad_norm": 16512.0, |
| "learning_rate": 1.9957685536765995e-06, |
| "loss": -490.44510833333334, |
| "out_of_date_ratio": 0.0016274743311441853, |
| "out_of_date_ratio/max": 0.009337756782770157, |
| "out_of_date_ratio/min": 0.00029226948390714824, |
| "out_of_date_ratio/std": 0.00011572550944232628, |
| "rewards": 0.4166666666666667, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.05031728036871333, |
| "sampled_at_step": 39.08427309989929, |
| "sampled_at_step/max": 39.60692596435547, |
| "sampled_at_step/min": 38.14115524291992, |
| "sampled_at_step/std": 0.02244150888420229, |
| "scores": 0.4544648137196925, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.008561989957449728, |
| "step": 40, |
| "steps": 34.229166666666664, |
| "steps/max": 98, |
| "steps/min": 3, |
| "steps/std": 2.1958411985285036 |
| }, |
| { |
| "/length/completion": 8468.854166666666, |
| "/length/completion/max": 16865, |
| "/length/completion/min": 3451, |
| "/length/completion/std": 290.9413056124056, |
| "/length/context": 45148.833333333336, |
| "/length/context/max": 94495, |
| "/length/context/min": 17906, |
| "/length/context/std": 1885.4234569782568, |
| "/length/forward": 45153.0, |
| "/length/forward/max": 94496, |
| "/length/forward/min": 17912, |
| "/length/forward/std": 1885.4056712386564, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.6025316455696202, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.4893743573756043, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.6380216891225764, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.48057258903494593, |
| "advantages": -0.1638573108584894, |
| "advantages/max": 1.7142857142857144, |
| "advantages/min": -2.0, |
| "advantages/std": 0.020575259221953697, |
| "entropy": 0.6600748697916666, |
| "entropy/max": 0.8203125, |
| "entropy/min": 0.515625, |
| "entropy/std": 0.005811703397960358, |
| "epoch": 0.041, |
| "grad_norm": 14784.0, |
| "learning_rate": 1.9954719225730845e-06, |
| "loss": -400.2380375, |
| "out_of_date_ratio": 0.0013619511713235017, |
| "out_of_date_ratio/max": 0.01208761241286993, |
| "out_of_date_ratio/min": 0.00017220595327671617, |
| "out_of_date_ratio/std": 0.00013614273060853817, |
| "rewards": 0.625, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.04941058844013093, |
| "sampled_at_step": 40.14164388179779, |
| "sampled_at_step/max": 41.0, |
| "sampled_at_step/min": 39.0, |
| "sampled_at_step/std": 0.059141493917955895, |
| "scores": 0.5049000392003136, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.00989905914015969, |
| "step": 41, |
| "steps": 25.572916666666668, |
| "steps/max": 82, |
| "steps/min": 6, |
| "steps/std": 1.6592115026998668 |
| }, |
| { |
| "/length/completion": 9004.208333333334, |
| "/length/completion/max": 22447, |
| "/length/completion/min": 3307, |
| "/length/completion/std": 401.7670371646414, |
| "/length/context": 48468.375, |
| "/length/context/max": 118308, |
| "/length/context/min": 4344, |
| "/length/context/std": 2498.548039093242, |
| "/length/forward": 48472.75, |
| "/length/forward/max": 118312, |
| "/length/forward/min": 4352, |
| "/length/forward/std": 2498.565013379694, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.5725190839694656, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.49471303041281367, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.6363929146537842, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.4810373923430952, |
| "advantages": -0.150188230632059, |
| "advantages/max": 2.0, |
| "advantages/min": -2.0, |
| "advantages/std": 0.01863469859831098, |
| "entropy": 0.6724446614583334, |
| "entropy/max": 0.88671875, |
| "entropy/min": 0.5078125, |
| "entropy/std": 0.007728973447664365, |
| "epoch": 0.042, |
| "grad_norm": 15872.0, |
| "learning_rate": 1.995165267078361e-06, |
| "loss": -295.17856875, |
| "out_of_date_ratio": 0.0015124180954444455, |
| "out_of_date_ratio/max": 0.012281018309295177, |
| "out_of_date_ratio/min": 0.0002090737980324775, |
| "out_of_date_ratio/std": 0.0001382285172342395, |
| "rewards": 0.5833333333333334, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.05031728036871333, |
| "sampled_at_step": 41.327045361200966, |
| "sampled_at_step/max": 42.0, |
| "sampled_at_step/min": 40.344154357910156, |
| "sampled_at_step/std": 0.04198771784926251, |
| "scores": 0.478502080443828, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.009301876674003574, |
| "step": 42, |
| "steps": 29.041666666666668, |
| "steps/max": 87, |
| "steps/min": 1, |
| "steps/std": 1.926881558505675 |
| }, |
| { |
| "/length/completion": 10251.0625, |
| "/length/completion/max": 23711, |
| "/length/completion/min": 3195, |
| "/length/completion/std": 406.6798169120462, |
| "/length/context": 57392.572916666664, |
| "/length/context/max": 128658, |
| "/length/context/min": 6731, |
| "/length/context/std": 2686.198047917464, |
| "/length/forward": 57397.416666666664, |
| "/length/forward/max": 128664, |
| "/length/forward/min": 6736, |
| "/length/forward/std": 2686.221671415675, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.5697399527186762, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.49511245085818306, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.6366197183098592, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.48097302686214627, |
| "advantages": -0.040922190201730727, |
| "advantages/max": 2.0, |
| "advantages/min": -1.7142857142857142, |
| "advantages/std": 0.018565406605664785, |
| "entropy": 0.7135823567708334, |
| "entropy/max": 0.9140625, |
| "entropy/min": 0.53515625, |
| "entropy/std": 0.008849231390132668, |
| "epoch": 0.043, |
| "grad_norm": 19584.0, |
| "learning_rate": 1.994848590280447e-06, |
| "loss": -46.936462500000005, |
| "out_of_date_ratio": 0.0016592781415359543, |
| "out_of_date_ratio/max": 0.005419677589088678, |
| "out_of_date_ratio/min": 0.00015295197954401374, |
| "out_of_date_ratio/std": 9.289293381818487e-05, |
| "rewards": 0.4375, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.05063078670631141, |
| "sampled_at_step": 42.43688189983368, |
| "sampled_at_step/max": 43.000003814697266, |
| "sampled_at_step/min": 41.0, |
| "sampled_at_step/std": 0.04539542797800991, |
| "scores": 0.39020172910662826, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.00828081250988518, |
| "step": 43, |
| "steps": 35.145833333333336, |
| "steps/max": 97, |
| "steps/min": 1, |
| "steps/std": 2.2647557799388003 |
| }, |
| { |
| "/length/completion": 9909.958333333334, |
| "/length/completion/max": 25309, |
| "/length/completion/min": 2894, |
| "/length/completion/std": 413.7526306239452, |
| "/length/context": 50159.177083333336, |
| "/length/context/max": 119338, |
| "/length/context/min": 9507, |
| "/length/context/std": 2976.311486937006, |
| "/length/forward": 50163.583333333336, |
| "/length/forward/max": 119344, |
| "/length/forward/min": 9512, |
| "/length/forward/std": 2976.3120073100904, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.5692137320044297, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.49518628747373306, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.6355764848853106, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.4812681339400569, |
| "advantages": -0.030305856023870745, |
| "advantages/max": 2.0, |
| "advantages/min": -2.0, |
| "advantages/std": 0.016289078510691973, |
| "entropy": 0.6901448567708334, |
| "entropy/max": 0.9140625, |
| "entropy/min": 0.48046875, |
| "entropy/std": 0.008722974198219498, |
| "epoch": 0.044, |
| "grad_norm": 15232.0, |
| "learning_rate": 1.994521895368273e-06, |
| "loss": -118.76766041666667, |
| "out_of_date_ratio": 0.0013302226734595024, |
| "out_of_date_ratio/max": 0.005487493705004454, |
| "out_of_date_ratio/min": 0.0, |
| "out_of_date_ratio/std": 8.94447647063914e-05, |
| "rewards": 0.59375, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.05012598061177124, |
| "sampled_at_step": 43.42779644330343, |
| "sampled_at_step/max": 44.000003814697266, |
| "sampled_at_step/min": 42.6025276184082, |
| "sampled_at_step/std": 0.046298369511212654, |
| "scores": 0.48204960835509136, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.009027044213490274, |
| "step": 44, |
| "steps": 30.916666666666668, |
| "steps/max": 91, |
| "steps/min": 2, |
| "steps/std": 2.3740251410569284 |
| }, |
| { |
| "/length/completion": 9256.635416666666, |
| "/length/completion/max": 22972, |
| "/length/completion/min": 3043, |
| "/length/completion/std": 451.1958094288279, |
| "/length/context": 46203.208333333336, |
| "/length/context/max": 112819, |
| "/length/context/min": 12488, |
| "/length/context/std": 2378.974874873011, |
| "/length/forward": 46207.833333333336, |
| "/length/forward/max": 112824, |
| "/length/forward/min": 12496, |
| "/length/forward/std": 2378.9563187198955, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.6008537886872999, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.4897228944080695, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.6373756865073474, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.4807576528321059, |
| "advantages": -0.0014897579143391263, |
| "advantages/max": 2.0, |
| "advantages/min": -2.0, |
| "advantages/std": 0.018824250703849507, |
| "entropy": 0.6787516276041666, |
| "entropy/max": 0.8515625, |
| "entropy/min": 0.53125, |
| "entropy/std": 0.007416265674992091, |
| "epoch": 0.045, |
| "grad_norm": 16512.0, |
| "learning_rate": 1.9941851856316543e-06, |
| "loss": -221.58431041666665, |
| "out_of_date_ratio": 0.001474499657585208, |
| "out_of_date_ratio/max": 0.003614853834733367, |
| "out_of_date_ratio/min": 0.000163612567121163, |
| "out_of_date_ratio/std": 8.391956347657486e-05, |
| "rewards": 0.5208333333333334, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.05098671929023751, |
| "sampled_at_step": 44.238667726516724, |
| "sampled_at_step/max": 45.0, |
| "sampled_at_step/min": 43.0, |
| "sampled_at_step/std": 0.04742509657311229, |
| "scores": 0.539292364990689, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.009619504357267591, |
| "step": 45, |
| "steps": 26.96875, |
| "steps/max": 75, |
| "steps/min": 4, |
| "steps/std": 1.8648133659897332 |
| }, |
| { |
| "/length/completion": 10171.645833333334, |
| "/length/completion/max": 18897, |
| "/length/completion/min": 1419, |
| "/length/completion/std": 372.87753867676923, |
| "/length/context": 46847.260416666664, |
| "/length/context/max": 118579, |
| "/length/context/min": 12678, |
| "/length/context/std": 2387.7613746383, |
| "/length/forward": 46851.5, |
| "/length/forward/max": 118584, |
| "/length/forward/min": 12680, |
| "/length/forward/std": 2387.756631967953, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.602711157455683, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.4893367124323641, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.6342740762292697, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.48163313055974066, |
| "advantages": 0.002143392991105286, |
| "advantages/max": 2.0, |
| "advantages/min": -2.0, |
| "advantages/std": 0.020591038312953034, |
| "entropy": 0.71630859375, |
| "entropy/max": 0.9765625, |
| "entropy/min": 0.5703125, |
| "entropy/std": 0.008231126334650838, |
| "epoch": 0.046, |
| "grad_norm": 17664.0, |
| "learning_rate": 1.993838464461254e-06, |
| "loss": -98.02665208333333, |
| "out_of_date_ratio": 0.00150063132878131, |
| "out_of_date_ratio/max": 0.014094432815909386, |
| "out_of_date_ratio/min": 0.00014930944598745555, |
| "out_of_date_ratio/std": 0.00016047740447908465, |
| "rewards": 0.5520833333333334, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.05075342008066314, |
| "sampled_at_step": 45.25676174958547, |
| "sampled_at_step/max": 46.000003814697266, |
| "sampled_at_step/min": 44.0, |
| "sampled_at_step/std": 0.05679606363792777, |
| "scores": 0.5375093773443361, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.009656381511994647, |
| "step": 46, |
| "steps": 26.770833333333332, |
| "steps/max": 99, |
| "steps/min": 1, |
| "steps/std": 2.0040366797041256 |
| }, |
| { |
| "/length/completion": 11373.427083333334, |
| "/length/completion/max": 27384, |
| "/length/completion/min": 3119, |
| "/length/completion/std": 528.5598256687088, |
| "/length/context": 53539.96875, |
| "/length/context/max": 128783, |
| "/length/context/min": 4073, |
| "/length/context/std": 3093.0401978765053, |
| "/length/forward": 53544.416666666664, |
| "/length/forward/max": 128784, |
| "/length/forward/min": 4080, |
| "/length/forward/std": 3093.016143871078, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.5946775844421699, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.4909543308742636, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.6324810741322668, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.4821294068989783, |
| "advantages": -0.06990488503093467, |
| "advantages/max": 2.0, |
| "advantages/min": -1.7142857142857142, |
| "advantages/std": 0.017079460019094654, |
| "entropy": 0.7022705078125, |
| "entropy/max": 0.9921875, |
| "entropy/min": 0.51953125, |
| "entropy/std": 0.008319350894964734, |
| "epoch": 0.047, |
| "grad_norm": 18432.0, |
| "learning_rate": 1.9934817353485502e-06, |
| "loss": -573.1879875, |
| "out_of_date_ratio": 0.0016289489382567506, |
| "out_of_date_ratio/max": 0.00349434744566679, |
| "out_of_date_ratio/min": 0.00033151003299281, |
| "out_of_date_ratio/std": 7.278455981428102e-05, |
| "rewards": 0.40625, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.05012598061177124, |
| "sampled_at_step": 46.28057046731313, |
| "sampled_at_step/max": 47.000003814697266, |
| "sampled_at_step/min": 45.32149124145508, |
| "sampled_at_step/std": 0.041052582981975456, |
| "scores": 0.3610213316095669, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.008634743818926866, |
| "step": 47, |
| "steps": 31.229166666666668, |
| "steps/max": 89, |
| "steps/min": 1, |
| "steps/std": 2.263054296677534 |
| }, |
| { |
| "/length/completion": 12594.364583333334, |
| "/length/completion/max": 27909, |
| "/length/completion/min": 4662, |
| "/length/completion/std": 465.0296403803164, |
| "/length/context": 62327.614583333336, |
| "/length/context/max": 128086, |
| "/length/context/min": 11295, |
| "/length/context/std": 2814.866780361495, |
| "/length/forward": 62332.166666666664, |
| "/length/forward/max": 128088, |
| "/length/forward/min": 11296, |
| "/length/forward/std": 2814.867033516383, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.6015779092702169, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.4895732104070766, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.631578947368421, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.4823763889427196, |
| "advantages": 0.07370510176285243, |
| "advantages/max": 1.7142857142857144, |
| "advantages/min": -2.0, |
| "advantages/std": 0.017063208956500202, |
| "entropy": 0.6767171223958334, |
| "entropy/max": 0.875, |
| "entropy/min": 0.462890625, |
| "entropy/std": 0.008830032891094176, |
| "epoch": 0.048, |
| "grad_norm": 20352.0, |
| "learning_rate": 1.993115001885801e-06, |
| "loss": 524.9152833333334, |
| "out_of_date_ratio": 0.0015428930751113512, |
| "out_of_date_ratio/max": 0.010222065262496471, |
| "out_of_date_ratio/min": 0.0, |
| "out_of_date_ratio/std": 0.00012055963656145637, |
| "rewards": 0.4895833333333333, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.05101996066242478, |
| "sampled_at_step": 47.23998463153839, |
| "sampled_at_step/max": 48.0, |
| "sampled_at_step/min": 46.0, |
| "sampled_at_step/std": 0.05458848832887572, |
| "scores": 0.4832470716426042, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.008247719765965627, |
| "step": 48, |
| "steps": 37.239583333333336, |
| "steps/max": 93, |
| "steps/min": 3, |
| "steps/std": 2.3124880251035043 |
| }, |
| { |
| "/length/completion": 9643.739583333334, |
| "/length/completion/max": 25244, |
| "/length/completion/min": 2278, |
| "/length/completion/std": 581.9111320393498, |
| "/length/context": 45923.697916666664, |
| "/length/context/max": 115489, |
| "/length/context/min": 10414, |
| "/length/context/std": 2683.867219109207, |
| "/length/forward": 45928.166666666664, |
| "/length/forward/max": 115496, |
| "/length/forward/min": 10416, |
| "/length/forward/std": 2683.864334052588, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.6012861736334405, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.48963364981452884, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.6317237584261934, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.482336865132739, |
| "advantages": -0.010672928117828497, |
| "advantages/max": 2.0, |
| "advantages/min": -1.7142857142857142, |
| "advantages/std": 0.01988726664862735, |
| "entropy": 0.688232421875, |
| "entropy/max": 0.8671875, |
| "entropy/min": 0.458984375, |
| "entropy/std": 0.008185300181255748, |
| "epoch": 0.049, |
| "grad_norm": 16512.0, |
| "learning_rate": 1.9927382677660083e-06, |
| "loss": 149.77001875, |
| "out_of_date_ratio": 0.001472773932164273, |
| "out_of_date_ratio/max": 0.003977461252361536, |
| "out_of_date_ratio/min": 0.0, |
| "out_of_date_ratio/std": 9.025339034445814e-05, |
| "rewards": 0.5104166666666666, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.05101996066242478, |
| "sampled_at_step": 48.107348243395485, |
| "sampled_at_step/max": 49.0, |
| "sampled_at_step/min": 47.46670913696289, |
| "sampled_at_step/std": 0.03662542993500882, |
| "scores": 0.5666791184161375, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.009577435331342692, |
| "step": 49, |
| "steps": 26.885416666666668, |
| "steps/max": 90, |
| "steps/min": 4, |
| "steps/std": 2.237565456816576 |
| }, |
| { |
| "/length/completion": 9139.177083333334, |
| "/length/completion/max": 17365, |
| "/length/completion/min": 2460, |
| "/length/completion/std": 388.9433452061105, |
| "/length/context": 42846.822916666664, |
| "/length/context/max": 128316, |
| "/length/context/min": 9876, |
| "/length/context/std": 2074.138494759108, |
| "/length/forward": 42851.333333333336, |
| "/length/forward/max": 128320, |
| "/length/forward/min": 9880, |
| "/length/forward/std": 2074.1248980214746, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.6177437020810514, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.48593870047594334, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.6335445597950654, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.48183591662426184, |
| "advantages": -0.1057565883496267, |
| "advantages/max": 1.7142857142857144, |
| "advantages/min": -2.0, |
| "advantages/std": 0.021120893835498165, |
| "entropy": 0.67254638671875, |
| "entropy/max": 0.8671875, |
| "entropy/min": 0.482421875, |
| "entropy/std": 0.00796678719028089, |
| "epoch": 0.05, |
| "grad_norm": 15488.0, |
| "learning_rate": 1.992351536782881e-06, |
| "loss": -225.88562083333332, |
| "out_of_date_ratio": 0.0011801550923943676, |
| "out_of_date_ratio/max": 0.0036585365887731314, |
| "out_of_date_ratio/min": 0.0, |
| "out_of_date_ratio/std": 7.948650594591365e-05, |
| "rewards": 0.65625, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.048475287679651785, |
| "sampled_at_step": 49.28301433722178, |
| "sampled_at_step/max": 50.000003814697266, |
| "sampled_at_step/min": 47.999996185302734, |
| "sampled_at_step/std": 0.07208558500765914, |
| "scores": 0.5746298519407763, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.009889958472670886, |
| "step": 50, |
| "steps": 25.03125, |
| "steps/max": 96, |
| "steps/min": 4, |
| "steps/std": 1.8162947061031707 |
| }, |
| { |
| "/length/completion": 9401.333333333334, |
| "/length/completion/max": 21765, |
| "/length/completion/min": 3213, |
| "/length/completion/std": 379.3273187781392, |
| "/length/context": 52463.145833333336, |
| "/length/context/max": 125646, |
| "/length/context/min": 7832, |
| "/length/context/std": 2379.3423480716424, |
| "/length/forward": 52467.5, |
| "/length/forward/max": 125648, |
| "/length/forward/min": 7840, |
| "/length/forward/std": 2379.32203174122, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.5954022988505747, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.4908140191294725, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.6323256430654999, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.4821720898053945, |
| "advantages": 0.021790943139257633, |
| "advantages/max": 2.0, |
| "advantages/min": -2.0, |
| "advantages/std": 0.017091281548428414, |
| "entropy": 0.6608072916666666, |
| "entropy/max": 0.84375, |
| "entropy/min": 0.515625, |
| "entropy/std": 0.007203795776861115, |
| "epoch": 0.051, |
| "grad_norm": 14400.0, |
| "learning_rate": 1.991954812830795e-06, |
| "loss": 254.4839875, |
| "out_of_date_ratio": 0.0011251259766898631, |
| "out_of_date_ratio/max": 0.003933531232178211, |
| "out_of_date_ratio/min": 0.0, |
| "out_of_date_ratio/std": 7.5655512414866e-05, |
| "rewards": 0.75, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.044194173824159216, |
| "sampled_at_step": 50.112096428871155, |
| "sampled_at_step/max": 51.000003814697266, |
| "sampled_at_step/min": 48.0, |
| "sampled_at_step/std": 0.054218936651792234, |
| "scores": 0.7402110997616616, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.00809163192240093, |
| "step": 51, |
| "steps": 29.59375, |
| "steps/max": 89, |
| "steps/min": 2, |
| "steps/std": 1.7793043787575034 |
| }, |
| { |
| "/length/completion": 12228.010416666666, |
| "/length/completion/max": 22997, |
| "/length/completion/min": 3270, |
| "/length/completion/std": 500.05629641725665, |
| "/length/context": 51091.479166666664, |
| "/length/context/max": 115132, |
| "/length/context/min": 6674, |
| "/length/context/std": 2259.578293306198, |
| "/length/forward": 51096.0, |
| "/length/forward/max": 115136, |
| "/length/forward/min": 6680, |
| "/length/forward/std": 2259.560576749382, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.6038374717832957, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.4890989464859371, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.6317769130998703, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.4823223457127706, |
| "advantages": -0.08217029951387875, |
| "advantages/max": 1.7142857142857144, |
| "advantages/min": -2.0, |
| "advantages/std": 0.020094432251289217, |
| "entropy": 0.7004801432291666, |
| "entropy/max": 0.89453125, |
| "entropy/min": 0.5546875, |
| "entropy/std": 0.007632295410241296, |
| "epoch": 0.052, |
| "grad_norm": 21760.0, |
| "learning_rate": 1.991548099904757e-06, |
| "loss": 52.64125625, |
| "out_of_date_ratio": 0.001651031008426192, |
| "out_of_date_ratio/max": 0.03822629898786545, |
| "out_of_date_ratio/min": 0.00011681560863507912, |
| "out_of_date_ratio/std": 0.0003895121769529702, |
| "rewards": 0.4583333333333333, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.05085353651346116, |
| "sampled_at_step": 51.44491422176361, |
| "sampled_at_step/max": 52.000003814697266, |
| "sampled_at_step/min": 50.419376373291016, |
| "sampled_at_step/std": 0.04813901003363954, |
| "scores": 0.4376143432125869, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.009489494447882852, |
| "step": 52, |
| "steps": 27.46875, |
| "steps/max": 77, |
| "steps/min": 2, |
| "steps/std": 1.643389081957216 |
| }, |
| { |
| "/length/completion": 8772.604166666666, |
| "/length/completion/max": 21070, |
| "/length/completion/min": 1752, |
| "/length/completion/std": 421.9746077057603, |
| "/length/context": 43895.510416666664, |
| "/length/context/max": 119424, |
| "/length/context/min": 3176, |
| "/length/context/std": 2951.941704876966, |
| "/length/forward": 43899.833333333336, |
| "/length/forward/max": 119432, |
| "/length/forward/min": 3184, |
| "/length/forward/std": 2951.9432746807433, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.6261491317671093, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.4838247581040109, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.6313137674770122, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.48244864438714447, |
| "advantages": 0.014319014319014235, |
| "advantages/max": 2.0, |
| "advantages/min": -2.0, |
| "advantages/std": 0.02023509036850466, |
| "entropy": 0.6823933919270834, |
| "entropy/max": 0.90234375, |
| "entropy/min": 0.455078125, |
| "entropy/std": 0.009506664852205228, |
| "epoch": 0.053, |
| "grad_norm": 15296.0, |
| "learning_rate": 1.991131402100361e-06, |
| "loss": 316.06665, |
| "out_of_date_ratio": 0.0015899745600715203, |
| "out_of_date_ratio/max": 0.005576208233833313, |
| "out_of_date_ratio/min": 0.0003133813734166324, |
| "out_of_date_ratio/std": 0.00010189227208870029, |
| "rewards": 0.5104166666666666, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.05101996066242478, |
| "sampled_at_step": 52.58958820501963, |
| "sampled_at_step/max": 53.000003814697266, |
| "sampled_at_step/min": 51.999996185302734, |
| "sampled_at_step/std": 0.0446586193431428, |
| "scores": 0.4829059829059829, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.009849445426304237, |
| "step": 53, |
| "steps": 25.8125, |
| "steps/max": 92, |
| "steps/min": 1, |
| "steps/std": 2.317763927235868 |
| }, |
| { |
| "/length/completion": 10806.510416666666, |
| "/length/completion/max": 26634, |
| "/length/completion/min": 900, |
| "/length/completion/std": 557.6911313642544, |
| "/length/context": 48856.885416666664, |
| "/length/context/max": 118755, |
| "/length/context/min": 3450, |
| "/length/context/std": 3002.7807003203775, |
| "/length/forward": 48861.666666666664, |
| "/length/forward/max": 118760, |
| "/length/forward/min": 3456, |
| "/length/forward/std": 3002.77547056392, |
| "/record/score/last_5_max": 1.0, |
| "/record/score/last_5_mean": 0.6335952848722987, |
| "/record/score/last_5_min": 0.0, |
| "/record/score/last_5_std": 0.4818218549006356, |
| "/record/score/max": 1.0, |
| "/record/score/mean": 0.6321173280749323, |
| "/record/score/min": 0.0, |
| "/record/score/std": 0.48222921066889, |
| "advantages": -0.001383604289172752, |
| "advantages/max": 2.0, |
| "advantages/min": -2.0, |
| "advantages/std": 0.0180340178972355, |
| "entropy": 0.6672566731770834, |
| "entropy/max": 0.91015625, |
| "entropy/min": 0.45703125, |
| "entropy/std": 0.00915473630294931, |
| "epoch": 0.054, |
| "grad_norm": 15808.0, |
| "learning_rate": 1.9907047236137496e-06, |
| "loss": -10.589589583333334, |
| "out_of_date_ratio": 0.0016308887469070517, |
| "out_of_date_ratio/max": 0.027313625440001488, |
| "out_of_date_ratio/min": 0.0001468213158659637, |
| "out_of_date_ratio/std": 0.00028130080240651613, |
| "rewards": 0.53125, |
| "rewards/max": 1.0, |
| "rewards/min": 0.0, |
| "rewards/std": 0.05093126879064569, |
| "sampled_at_step": 53.38630406061808, |
| "sampled_at_step/max": 54.000003814697266, |
| "sampled_at_step/min": 52.30179977416992, |
| "sampled_at_step/std": 0.05020286577148773, |
| "scores": 0.45693531649948116, |
| "scores/max": 1.0, |
| "scores/min": 0.0, |
| "scores/std": 0.009264651713448353, |
| "step": 54, |
| "steps": 29.114583333333332, |
| "steps/max": 95, |
| "steps/min": 0, |
| "steps/std": 2.370375147567457 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 1000, |
| "num_input_tokens_seen": 2062430400, |
| "num_train_epochs": 9223372036854775807, |
| "save_steps": 3, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4.977636301701382e+19, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|