AgentCPM-Explore / trainer_state.json
SmartDazi's picture
Upload folder using huggingface_hub
cbead97 verified
raw
history blame
112 kB
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.054,
"eval_steps": 5.0,
"global_step": 54,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"/length/completion": 9061.760416666666,
"/length/completion/max": 19510,
"/length/completion/min": 2153,
"/length/completion/std": 333.8509541397672,
"/length/context": 42230.270833333336,
"/length/context/max": 101733,
"/length/context/min": 3126,
"/length/context/std": 2205.856462898938,
"/length/forward": 42235.0,
"/length/forward/max": 101736,
"/length/forward/min": 3128,
"/length/forward/std": 2205.8737442778643,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.5446428571428571,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.49800302740658364,
"/record/score/max": 1.0,
"/record/score/mean": 0.5446428571428571,
"/record/score/min": 0.0,
"/record/score/std": 0.49800302740658364,
"advantages": -0.10318419991938722,
"advantages/max": 2.0,
"advantages/min": -2.0,
"advantages/std": 0.019792414635250263,
"entropy": 0.65704345703125,
"entropy/max": 0.87890625,
"entropy/min": 0.455078125,
"entropy/std": 0.008027744975431144,
"epoch": 0.001,
"grad_norm": 15232.0,
"learning_rate": 0.0,
"loss": -165.1345625,
"out_of_date_ratio": 0.0014260866205404454,
"out_of_date_ratio/max": 0.003501293947920203,
"out_of_date_ratio/min": 0.0001568135485285893,
"out_of_date_ratio/std": 8.450739925303703e-05,
"rewards": 0.5104166666666666,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.051019960662424764,
"sampled_at_step": 0.9999999925494194,
"sampled_at_step/max": 1.0,
"sampled_at_step/min": 0.9999999403953552,
"sampled_at_step/std": 2.011886704218841e-09,
"scores": 0.4623135832325675,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.010009663488673342,
"step": 1,
"steps": 24.84375,
"steps/max": 80,
"steps/min": 1,
"steps/std": 1.7357010149325394
},
{
"/length/completion": 8887.78125,
"/length/completion/max": 20317,
"/length/completion/min": 3326,
"/length/completion/std": 433.71652733755445,
"/length/context": 40515.65625,
"/length/context/max": 121680,
"/length/context/min": 4305,
"/length/context/std": 2330.0265864167477,
"/length/forward": 40519.833333333336,
"/length/forward/max": 121688,
"/length/forward/min": 4312,
"/length/forward/std": 2330.095548169546,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.592375366568915,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.4913927061437305,
"/record/score/max": 1.0,
"/record/score/mean": 0.592375366568915,
"/record/score/min": 0.0,
"/record/score/std": 0.4913927061437305,
"advantages": -0.028806329974655393,
"advantages/max": 2.0,
"advantages/min": -2.0,
"advantages/std": 0.020580049780948472,
"entropy": 0.6922200520833334,
"entropy/max": 0.8984375,
"entropy/min": 0.4921875,
"entropy/std": 0.009487675328552674,
"epoch": 0.002,
"grad_norm": 15232.0,
"learning_rate": 2e-07,
"loss": -14.253539583333334,
"out_of_date_ratio": 0.0013957185919935,
"out_of_date_ratio/max": 0.003554652677848935,
"out_of_date_ratio/min": 0.0002505480661056936,
"out_of_date_ratio/std": 7.495178299694405e-05,
"rewards": 0.6145833333333334,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.04967295748634071,
"sampled_at_step": 1.330572656666239,
"sampled_at_step/max": 2.0,
"sampled_at_step/min": 0.9999999403953552,
"sampled_at_step/std": 0.04314657295355584,
"scores": 0.574643011683254,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.010284327028776,
"step": 2,
"steps": 23.072916666666668,
"steps/max": 84,
"steps/min": 1,
"steps/std": 1.808259214570864
},
{
"/length/completion": 9210.4375,
"/length/completion/max": 25411,
"/length/completion/min": 2206,
"/length/completion/std": 474.7124508411544,
"/length/context": 43727.708333333336,
"/length/context/max": 122274,
"/length/context/min": 3516,
"/length/context/std": 2767.7691200088607,
"/length/forward": 43732.0,
"/length/forward/max": 122280,
"/length/forward/min": 3520,
"/length/forward/std": 2767.7861615578454,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.5821205821205822,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.49321010734997783,
"/record/score/max": 1.0,
"/record/score/mean": 0.5821205821205822,
"/record/score/min": 0.0,
"/record/score/std": 0.49321010734997783,
"advantages": -0.09020842379505026,
"advantages/max": 2.0,
"advantages/min": -2.0,
"advantages/std": 0.018848007565270594,
"entropy": 0.6811930338541666,
"entropy/max": 0.859375,
"entropy/min": 0.55078125,
"entropy/std": 0.007361056852565585,
"epoch": 0.003,
"grad_norm": 15232.0,
"learning_rate": 4e-07,
"loss": -92.92765208333333,
"out_of_date_ratio": 0.002393863861925638,
"out_of_date_ratio/max": 0.09005628526210785,
"out_of_date_ratio/min": 0.0,
"out_of_date_ratio/std": 0.0009420074751743802,
"rewards": 0.5625,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.05063078670631141,
"sampled_at_step": 2.32747404028972,
"sampled_at_step/max": 3.000000238418579,
"sampled_at_step/min": 0.9999999403953552,
"sampled_at_step/std": 0.07218538224091829,
"scores": 0.5509118541033434,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.009695359383926561,
"step": 3,
"steps": 26.416666666666668,
"steps/max": 97,
"steps/min": 0,
"steps/std": 2.1152075570163933
},
{
"/length/completion": 10750.739583333334,
"/length/completion/max": 21108,
"/length/completion/min": 2193,
"/length/completion/std": 450.1033487719153,
"/length/context": 50107.604166666664,
"/length/context/max": 114001,
"/length/context/min": 6428,
"/length/context/std": 2518.8334550931213,
"/length/forward": 50112.166666666664,
"/length/forward/max": 114008,
"/length/forward/min": 6432,
"/length/forward/std": 2518.891348333757,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.5820668693009119,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.49321904764835184,
"/record/score/max": 1.0,
"/record/score/mean": 0.5820668693009119,
"/record/score/min": 0.0,
"/record/score/std": 0.49321904764835184,
"advantages": -0.052586616041765684,
"advantages/max": 2.0,
"advantages/min": -2.0,
"advantages/std": 0.01657736980411449,
"entropy": 0.6615804036458334,
"entropy/max": 0.87890625,
"entropy/min": 0.4921875,
"entropy/std": 0.008354287781568109,
"epoch": 0.004,
"grad_norm": 16384.0,
"learning_rate": 6e-07,
"loss": -242.6494145833333,
"out_of_date_ratio": 0.0013956707824339294,
"out_of_date_ratio/max": 0.0036721748765558004,
"out_of_date_ratio/min": 0.00016429803508799523,
"out_of_date_ratio/std": 7.971234370627556e-05,
"rewards": 0.59375,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.05012598061177124,
"sampled_at_step": 3.343302513162295,
"sampled_at_step/max": 4.0,
"sampled_at_step/min": 2.2274067401885986,
"sampled_at_step/std": 0.05714829785258465,
"scores": 0.493687707641196,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.00911280642020964,
"step": 4,
"steps": 30.354166666666668,
"steps/max": 76,
"steps/min": 1,
"steps/std": 1.7908779251434763
},
{
"/length/completion": 11387.947916666666,
"/length/completion/max": 22277,
"/length/completion/min": 4552,
"/length/completion/std": 385.16190915095103,
"/length/context": 53574.416666666664,
"/length/context/max": 105110,
"/length/context/min": 16843,
"/length/context/std": 2121.527686130431,
"/length/forward": 53579.0,
"/length/forward/max": 105112,
"/length/forward/min": 16848,
"/length/forward/std": 2121.4906782535404,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.6083832335329341,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.488111744059642,
"/record/score/max": 1.0,
"/record/score/mean": 0.6083832335329341,
"/record/score/min": 0.0,
"/record/score/std": 0.488111744059642,
"advantages": -0.07026523964634883,
"advantages/max": 2.0,
"advantages/min": -1.7142857142857142,
"advantages/std": 0.018303452579415777,
"entropy": 0.6888427734375,
"entropy/max": 0.8359375,
"entropy/min": 0.56640625,
"entropy/std": 0.00563372915583649,
"epoch": 0.005,
"grad_norm": 19456.0,
"learning_rate": 8e-07,
"loss": -275.4627083333333,
"out_of_date_ratio": 0.0016567955062782858,
"out_of_date_ratio/max": 0.020350120961666107,
"out_of_date_ratio/min": 0.0002010050229728222,
"out_of_date_ratio/std": 0.0002122593014383166,
"rewards": 0.4895833333333333,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.051019960662424785,
"sampled_at_step": 4.356270944078763,
"sampled_at_step/max": 5.0,
"sampled_at_step/min": 3.0,
"sampled_at_step/std": 0.05214663289803722,
"scores": 0.40390879478827363,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.008855820888513739,
"step": 5,
"steps": 30.979166666666668,
"steps/max": 77,
"steps/min": 7,
"steps/std": 1.7108073527787282
},
{
"/length/completion": 8645.0625,
"/length/completion/max": 22095,
"/length/completion/min": 4112,
"/length/completion/std": 328.20605970067106,
"/length/context": 49704.583333333336,
"/length/context/max": 128783,
"/length/context/min": 11158,
"/length/context/std": 2747.831565315202,
"/length/forward": 49709.25,
"/length/forward/max": 128784,
"/length/forward/min": 11160,
"/length/forward/std": 2747.838415181319,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.6016597510373444,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.48955622253120745,
"/record/score/max": 1.0,
"/record/score/mean": 0.6016597510373444,
"/record/score/min": 0.0,
"/record/score/std": 0.48955622253120745,
"advantages": -0.02579739867235099,
"advantages/max": 2.0,
"advantages/min": -2.0,
"advantages/std": 0.018538305458941026,
"entropy": 0.6691487630208334,
"entropy/max": 0.9609375,
"entropy/min": 0.455078125,
"entropy/std": 0.010216517799197189,
"epoch": 0.006,
"grad_norm": 14656.0,
"learning_rate": 1e-06,
"loss": -45.5740875,
"out_of_date_ratio": 0.0014883852118146024,
"out_of_date_ratio/max": 0.006303992588073015,
"out_of_date_ratio/min": 0.0001845529186539352,
"out_of_date_ratio/std": 0.00010177097145203845,
"rewards": 0.53125,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.05093126879064569,
"sampled_at_step": 5.492770120501518,
"sampled_at_step/max": 6.000000476837158,
"sampled_at_step/min": 4.611965656280518,
"sampled_at_step/std": 0.045385259602976326,
"scores": 0.5179448432187382,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.009712100160934692,
"step": 6,
"steps": 26.572916666666668,
"steps/max": 84,
"steps/min": 4,
"steps/std": 1.638879348525962
},
{
"/length/completion": 10545.895833333334,
"/length/completion/max": 26415,
"/length/completion/min": 1407,
"/length/completion/std": 454.70999573960717,
"/length/context": 49447.052083333336,
"/length/context/max": 125477,
"/length/context/min": 4282,
"/length/context/std": 2438.5141942497844,
"/length/forward": 49451.416666666664,
"/length/forward/max": 125480,
"/length/forward/min": 4288,
"/length/forward/std": 2438.461217154489,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.6220238095238095,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.4848816246356396,
"/record/score/max": 1.0,
"/record/score/mean": 0.6030405405405406,
"/record/score/min": 0.0,
"/record/score/std": 0.4892674595812734,
"advantages": -0.025122265122264592,
"advantages/max": 2.0,
"advantages/min": -2.0,
"advantages/std": 0.01904916215825559,
"entropy": 0.6802164713541666,
"entropy/max": 0.85546875,
"entropy/min": 0.54296875,
"entropy/std": 0.006917466056080718,
"epoch": 0.007,
"grad_norm": 18304.0,
"learning_rate": 1.2e-06,
"loss": 12.00070625,
"out_of_date_ratio": 0.0014764862108525751,
"out_of_date_ratio/max": 0.004187604878097773,
"out_of_date_ratio/min": 0.00011552680371096358,
"out_of_date_ratio/std": 8.891737377791334e-05,
"rewards": 0.5208333333333334,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.05098671929023751,
"sampled_at_step": 6.518794342875481,
"sampled_at_step/max": 7.000000476837158,
"sampled_at_step/min": 5.0,
"sampled_at_step/std": 0.05228243236909025,
"scores": 0.4976576576576577,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.009491475804562591,
"step": 7,
"steps": 27.90625,
"steps/max": 89,
"steps/min": 1,
"steps/std": 1.8318577232140487
},
{
"/length/completion": 9839.854166666666,
"/length/completion/max": 21195,
"/length/completion/min": 2998,
"/length/completion/std": 366.70528619391644,
"/length/context": 49984.9375,
"/length/context/max": 113953,
"/length/context/min": 12001,
"/length/context/std": 2039.1666751173045,
"/length/forward": 49989.416666666664,
"/length/forward/max": 113960,
"/length/forward/min": 12008,
"/length/forward/std": 2039.1574836150392,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.6066037735849057,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.4885034651437574,
"/record/score/max": 1.0,
"/record/score/mean": 0.5986646884272997,
"/record/score/min": 0.0,
"/record/score/std": 0.49016862328952066,
"advantages": 0.030626780626780342,
"advantages/max": 2.0,
"advantages/min": -1.7142857142857142,
"advantages/std": 0.019301630821304063,
"entropy": 0.67767333984375,
"entropy/max": 0.89453125,
"entropy/min": 0.458984375,
"entropy/std": 0.008873916231699142,
"epoch": 0.008,
"grad_norm": 17280.0,
"learning_rate": 1.4e-06,
"loss": 347.6390791666667,
"out_of_date_ratio": 0.0013641679650694034,
"out_of_date_ratio/max": 0.012770682573318481,
"out_of_date_ratio/min": 0.00014114326040726155,
"out_of_date_ratio/std": 0.00013914642752668266,
"rewards": 0.6041666666666666,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.04991130733147589,
"sampled_at_step": 7.504495506485303,
"sampled_at_step/max": 8.0,
"sampled_at_step/min": 6.0,
"sampled_at_step/std": 0.04903364618515806,
"scores": 0.6477920227920227,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.009014026700098238,
"step": 8,
"steps": 28.25,
"steps/max": 68,
"steps/min": 4,
"steps/std": 1.4796067420470587
},
{
"/length/completion": 8776.010416666666,
"/length/completion/max": 26631,
"/length/completion/min": 982,
"/length/completion/std": 428.9860690236972,
"/length/context": 43058.010416666664,
"/length/context/max": 124668,
"/length/context/min": 2162,
"/length/context/std": 2756.5255490843815,
"/length/forward": 43062.666666666664,
"/length/forward/max": 124672,
"/length/forward/min": 2168,
"/length/forward/std": 2756.5407963672587,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.600375234521576,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.4898212044151812,
"/record/score/max": 1.0,
"/record/score/mean": 0.5981308411214953,
"/record/score/min": 0.0,
"/record/score/std": 0.49027577751790646,
"advantages": -0.19077901430842445,
"advantages/max": 2.0,
"advantages/min": -2.0,
"advantages/std": 0.018712851888668346,
"entropy": 0.6672770182291666,
"entropy/max": 0.84375,
"entropy/min": 0.5234375,
"entropy/std": 0.007357147745400299,
"epoch": 0.009,
"grad_norm": 14848.0,
"learning_rate": 1.6e-06,
"loss": -744.38655625,
"out_of_date_ratio": 0.0014761180206429951,
"out_of_date_ratio/max": 0.0039397054351866245,
"out_of_date_ratio/min": 0.0,
"out_of_date_ratio/std": 9.058071731289764e-05,
"rewards": 0.4791666666666667,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.050986719290237494,
"sampled_at_step": 8.244493653376898,
"sampled_at_step/max": 9.0,
"sampled_at_step/min": 7.0,
"sampled_at_step/std": 0.05606692134224259,
"scores": 0.34379968203497613,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.009469251146552133,
"step": 9,
"steps": 25.208333333333332,
"steps/max": 96,
"steps/min": 0,
"steps/std": 2.0891022557437693
},
{
"/length/completion": 9919.166666666666,
"/length/completion/max": 28914,
"/length/completion/min": 1503,
"/length/completion/std": 462.08533345737334,
"/length/context": 48338.229166666664,
"/length/context/max": 116558,
"/length/context/min": 2761,
"/length/context/std": 2398.330957210102,
"/length/forward": 48342.916666666664,
"/length/forward/max": 116560,
"/length/forward/min": 2768,
"/length/forward/std": 2398.321283904732,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.5992402659069326,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.49005241517894965,
"/record/score/max": 1.0,
"/record/score/mean": 0.5964912280701754,
"/record/score/min": 0.0,
"/record/score/std": 0.4906011036529667,
"advantages": 0.14579998967422325,
"advantages/max": 2.0,
"advantages/min": -2.0,
"advantages/std": 0.017462920262873362,
"entropy": 0.66534423828125,
"entropy/max": 0.890625,
"entropy/min": 0.310546875,
"entropy/std": 0.010153132071121932,
"epoch": 0.01,
"grad_norm": 15424.0,
"learning_rate": 1.8e-06,
"loss": 576.7980958333334,
"out_of_date_ratio": 0.0012154976905852284,
"out_of_date_ratio/max": 0.0027450979687273502,
"out_of_date_ratio/min": 0.0,
"out_of_date_ratio/std": 6.903620514673011e-05,
"rewards": 0.6041666666666666,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.04991130733147588,
"sampled_at_step": 9.399776776631674,
"sampled_at_step/max": 10.0,
"sampled_at_step/min": 8.0,
"sampled_at_step/std": 0.06032417629102527,
"scores": 0.5858330321647994,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.009364186993252418,
"step": 10,
"steps": 27.822916666666668,
"steps/max": 88,
"steps/min": 1,
"steps/std": 1.7334281569407148
},
{
"/length/completion": 10443.125,
"/length/completion/max": 27506,
"/length/completion/min": 1803,
"/length/completion/std": 543.500688424718,
"/length/context": 47232.635416666664,
"/length/context/max": 120937,
"/length/context/min": 3018,
"/length/context/std": 2898.144162085004,
"/length/forward": 47237.166666666664,
"/length/forward/max": 120944,
"/length/forward/min": 3024,
"/length/forward/std": 2898.150111563425,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.5850860420650096,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.4927071802254477,
"/record/score/max": 1.0,
"/record/score/mean": 0.5896174863387978,
"/record/score/min": 0.0,
"/record/score/std": 0.4919031471156853,
"advantages": -0.07608915906788609,
"advantages/max": 1.7142857142857144,
"advantages/min": -2.0,
"advantages/std": 0.018877367468057998,
"entropy": 0.7043863932291666,
"entropy/max": 0.86328125,
"entropy/min": 0.5390625,
"entropy/std": 0.007723941035489615,
"epoch": 0.011,
"grad_norm": 30080.0,
"learning_rate": 2e-06,
"loss": -558.7412104166666,
"out_of_date_ratio": 0.0014792358707988267,
"out_of_date_ratio/max": 0.005837538279592991,
"out_of_date_ratio/min": 0.00012627856631297618,
"out_of_date_ratio/std": 9.652012977910382e-05,
"rewards": 0.5104166666666666,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.05101996066242478,
"sampled_at_step": 10.584631284077963,
"sampled_at_step/max": 11.0,
"sampled_at_step/min": 10.0,
"sampled_at_step/std": 0.04345596676676561,
"scores": 0.46418439716312054,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.009391357929316552,
"step": 11,
"steps": 28.375,
"steps/max": 88,
"steps/min": 0,
"steps/std": 2.2854199901221657
},
{
"/length/completion": 11570.645833333334,
"/length/completion/max": 24767,
"/length/completion/min": 1517,
"/length/completion/std": 538.688173773276,
"/length/context": 57460.239583333336,
"/length/context/max": 116545,
"/length/context/min": 8185,
"/length/context/std": 2912.14489634308,
"/length/forward": 57465.083333333336,
"/length/forward/max": 116552,
"/length/forward/min": 8192,
"/length/forward/std": 2912.1244622454274,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.581547064305685,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.4933052567154787,
"/record/score/max": 1.0,
"/record/score/mean": 0.5925740090316106,
"/record/score/min": 0.0,
"/record/score/std": 0.49135532240102475,
"advantages": 0.015989072794471607,
"advantages/max": 2.0,
"advantages/min": -2.0,
"advantages/std": 0.015835078943366814,
"entropy": 0.682373046875,
"entropy/max": 0.84375,
"entropy/min": 0.54296875,
"entropy/std": 0.006832136883896569,
"epoch": 0.012,
"grad_norm": 24576.0,
"learning_rate": 1.9999949650055508e-06,
"loss": 381.7514645833333,
"out_of_date_ratio": 0.001392214337556652,
"out_of_date_ratio/max": 0.0036032216157764196,
"out_of_date_ratio/min": 0.0,
"out_of_date_ratio/std": 6.81944290825032e-05,
"rewards": 0.4791666666666667,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.050986719290237514,
"sampled_at_step": 11.37171138326327,
"sampled_at_step/max": 12.000000953674316,
"sampled_at_step/min": 10.790054321289062,
"sampled_at_step/std": 0.04296443813268482,
"scores": 0.4578177727784027,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.008354839024517575,
"step": 12,
"steps": 36.041666666666664,
"steps/max": 98,
"steps/min": 0,
"steps/std": 2.405592292268241
},
{
"/length/completion": 9533.010416666666,
"/length/completion/max": 20970,
"/length/completion/min": 2845,
"/length/completion/std": 475.7710193195782,
"/length/context": 42798.520833333336,
"/length/context/max": 127818,
"/length/context/min": 5819,
"/length/context/std": 2309.1928122194336,
"/length/forward": 42803.083333333336,
"/length/forward/max": 127824,
"/length/forward/min": 5824,
"/length/forward/std": 2309.192172870657,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.5896927651139743,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.49188942648344175,
"/record/score/max": 1.0,
"/record/score/mean": 0.5953379953379954,
"/record/score/min": 0.0,
"/record/score/std": 0.49082651379579373,
"advantages": 0.028693766334871362,
"advantages/max": 2.0,
"advantages/min": -2.0,
"advantages/std": 0.019878183877504816,
"entropy": 0.6960856119791666,
"entropy/max": 0.85546875,
"entropy/min": 0.515625,
"entropy/std": 0.00751492793808001,
"epoch": 0.013,
"grad_norm": 16384.0,
"learning_rate": 1.9999798600729064e-06,
"loss": 202.53405833333332,
"out_of_date_ratio": 0.0016945011958947969,
"out_of_date_ratio/max": 0.016010673716664314,
"out_of_date_ratio/min": 0.00016920473717618734,
"out_of_date_ratio/std": 0.00017768531065253792,
"rewards": 0.5,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.05103103630798287,
"sampled_at_step": 12.429851442575455,
"sampled_at_step/max": 13.000000953674316,
"sampled_at_step/min": 11.195213317871094,
"sampled_at_step/std": 0.04931637853899746,
"scores": 0.5165434021019852,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.009859391293109745,
"step": 13,
"steps": 25.760416666666668,
"steps/max": 98,
"steps/min": 1,
"steps/std": 1.8281024328893596
},
{
"/length/completion": 9545.020833333334,
"/length/completion/max": 27580,
"/length/completion/min": 2641,
"/length/completion/std": 491.1694360666533,
"/length/context": 41898.489583333336,
"/length/context/max": 111089,
"/length/context/min": 4075,
"/length/context/std": 2197.090228116492,
"/length/forward": 41903.166666666664,
"/length/forward/max": 111096,
"/length/forward/min": 4080,
"/length/forward/std": 2197.118581736236,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.5901981230448383,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.491797009546813,
"/record/score/max": 1.0,
"/record/score/mean": 0.59849157054126,
"/record/score/min": 0.0,
"/record/score/std": 0.49020343790340376,
"advantages": 0.0003391938492836318,
"advantages/max": 1.7142857142857144,
"advantages/min": -2.0,
"advantages/std": 0.020971885229945038,
"entropy": 0.66033935546875,
"entropy/max": 0.93359375,
"entropy/min": 0.470703125,
"entropy/std": 0.008022561389017911,
"epoch": 0.014,
"grad_norm": 16640.0,
"learning_rate": 1.9999546853541726e-06,
"loss": -108.18811041666667,
"out_of_date_ratio": 0.001645550712358575,
"out_of_date_ratio/max": 0.02955367974936962,
"out_of_date_ratio/min": 0.0001886258542072028,
"out_of_date_ratio/std": 0.0003018806362539357,
"rewards": 0.5416666666666666,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.05085353651346114,
"sampled_at_step": 13.120210727055868,
"sampled_at_step/max": 14.0,
"sampled_at_step/min": 11.999999046325684,
"sampled_at_step/std": 0.04944200516546299,
"scores": 0.5041551246537396,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.009946090042752999,
"step": 14,
"steps": 25.322916666666668,
"steps/max": 73,
"steps/min": 1,
"steps/std": 1.6704705804045474
},
{
"/length/completion": 8026.5,
"/length/completion/max": 21793,
"/length/completion/min": 1298,
"/length/completion/std": 418.3677279291144,
"/length/context": 37541.177083333336,
"/length/context/max": 120264,
"/length/context/min": 2177,
"/length/context/std": 2429.7662521622196,
"/length/forward": 37545.416666666664,
"/length/forward/max": 120272,
"/length/forward/min": 2184,
"/length/forward/std": 2429.7884767185656,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.610223642172524,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.48769944505424995,
"/record/score/max": 1.0,
"/record/score/mean": 0.6011730205278593,
"/record/score/min": 0.0,
"/record/score/std": 0.48965704316109804,
"advantages": -0.028815848716794226,
"advantages/max": 2.0,
"advantages/min": -2.0,
"advantages/std": 0.02292120980091652,
"entropy": 0.6660970052083334,
"entropy/max": 0.85546875,
"entropy/min": 0.474609375,
"entropy/std": 0.0068975977831584635,
"epoch": 0.015,
"grad_norm": 15488.0,
"learning_rate": 1.9999194411028592e-06,
"loss": -59.172912499999995,
"out_of_date_ratio": 0.0014122336497166543,
"out_of_date_ratio/max": 0.004552352242171764,
"out_of_date_ratio/min": 0.0,
"out_of_date_ratio/std": 8.94015080482922e-05,
"rewards": 0.5729166666666666,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.05048547230415048,
"sampled_at_step": 14.362340231736502,
"sampled_at_step/max": 15.000000953674316,
"sampled_at_step/min": 13.0,
"sampled_at_step/std": 0.050649083833140505,
"scores": 0.5875731652408824,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.010445522162230367,
"step": 15,
"steps": 22.135416666666668,
"steps/max": 94,
"steps/min": 1,
"steps/std": 2.065213902174111
},
{
"/length/completion": 8805.333333333334,
"/length/completion/max": 21973,
"/length/completion/min": 1796,
"/length/completion/std": 445.0948948510345,
"/length/context": 46869.760416666664,
"/length/context/max": 120639,
"/length/context/min": 2709,
"/length/context/std": 2724.167357688649,
"/length/forward": 46874.166666666664,
"/length/forward/max": 120640,
"/length/forward/min": 2712,
"/length/forward/std": 2724.1792405113283,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.6228448275862069,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.4846742703458842,
"/record/score/max": 1.0,
"/record/score/mean": 0.6072555205047319,
"/record/score/min": 0.0,
"/record/score/std": 0.48836078192383503,
"advantages": -0.07705723798953784,
"advantages/max": 1.7142857142857144,
"advantages/min": -2.0,
"advantages/std": 0.019517642661302088,
"entropy": 0.6896565755208334,
"entropy/max": 0.87109375,
"entropy/min": 0.54296875,
"entropy/std": 0.007813531975228124,
"epoch": 0.016,
"grad_norm": 15744.0,
"learning_rate": 1.9998741276738752e-06,
"loss": -173.27766875,
"out_of_date_ratio": 0.0016480689225015037,
"out_of_date_ratio/max": 0.03180282190442085,
"out_of_date_ratio/min": 9.623712685424834e-05,
"out_of_date_ratio/std": 0.0003251372886183483,
"rewards": 0.5208333333333334,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.050986719290237514,
"sampled_at_step": 15.466753671566645,
"sampled_at_step/max": 16.0,
"sampled_at_step/min": 13.999999046325684,
"sampled_at_step/std": 0.05769970522461167,
"scores": 0.4565297817240104,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.009580747823909712,
"step": 16,
"steps": 27.15625,
"steps/max": 89,
"steps/min": 1,
"steps/std": 1.9363419548250318
},
{
"/length/completion": 10960.5,
"/length/completion/max": 25811,
"/length/completion/min": 5331,
"/length/completion/std": 404.2495592615958,
"/length/context": 58679.833333333336,
"/length/context/max": 128199,
"/length/context/min": 15882,
"/length/context/std": 2293.188648558825,
"/length/forward": 58684.416666666664,
"/length/forward/max": 128200,
"/length/forward/min": 15888,
"/length/forward/std": 2293.1736586545526,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.655958549222798,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.47505466098578625,
"/record/score/max": 1.0,
"/record/score/mean": 0.6157798165137615,
"/record/score/min": 0.0,
"/record/score/std": 0.4864103556546057,
"advantages": 0.06352010768971889,
"advantages/max": 1.7142857142857144,
"advantages/min": -2.0,
"advantages/std": 0.014668813291095996,
"entropy": 0.6750081380208334,
"entropy/max": 0.85546875,
"entropy/min": 0.49609375,
"entropy/std": 0.00851281709713449,
"epoch": 0.017,
"grad_norm": 15808.0,
"learning_rate": 1.9998187455235257e-06,
"loss": 523.4619145833334,
"out_of_date_ratio": 0.0012592377715918701,
"out_of_date_ratio/max": 0.003246423788368702,
"out_of_date_ratio/min": 0.000278965977486223,
"out_of_date_ratio/std": 7.217478768110386e-05,
"rewards": 0.7083333333333334,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.04639024033294229,
"sampled_at_step": 16.395306944847107,
"sampled_at_step/max": 17.0,
"sampled_at_step/min": 15.0,
"sampled_at_step/std": 0.05894587906922644,
"scores": 0.6763839811542992,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.00802837817144003,
"step": 17,
"steps": 34.375,
"steps/max": 87,
"steps/min": 4,
"steps/std": 1.7676595333079401
},
{
"/length/completion": 7872.354166666667,
"/length/completion/max": 21558,
"/length/completion/min": 1382,
"/length/completion/std": 426.2353171293944,
"/length/context": 44733.21875,
"/length/context/max": 127878,
"/length/context/min": 2521,
"/length/context/std": 3317.4042700867667,
"/length/forward": 44737.75,
"/length/forward/max": 127880,
"/length/forward/min": 2528,
"/length/forward/std": 3317.3828607032756,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.6842684268426843,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.4648065693048262,
"/record/score/max": 1.0,
"/record/score/mean": 0.6210896309314587,
"/record/score/min": 0.0,
"/record/score/std": 0.48511576070138474,
"advantages": 0.11858076563958492,
"advantages/max": 1.4285714285714286,
"advantages/min": -2.0,
"advantages/std": 0.015536615058260218,
"entropy": 0.67474365234375,
"entropy/max": 0.92578125,
"entropy/min": 0.478515625,
"entropy/std": 0.009013613661301047,
"epoch": 0.018,
"grad_norm": 12096.0,
"learning_rate": 1.999753295209509e-06,
"loss": 204.49255416666665,
"out_of_date_ratio": 0.0012250137936765289,
"out_of_date_ratio/max": 0.003539822995662689,
"out_of_date_ratio/min": 0.0,
"out_of_date_ratio/std": 7.54957539457252e-05,
"rewards": 0.7395833333333334,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.0447911619803609,
"sampled_at_step": 17.16051246722539,
"sampled_at_step/max": 18.0,
"sampled_at_step/min": 16.0,
"sampled_at_step/std": 0.04216236406924461,
"scores": 0.8149509803921569,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.007848803038037575,
"step": 18,
"steps": 24.5,
"steps/max": 90,
"steps/min": 1,
"steps/std": 2.258039264371735
},
{
"/length/completion": 10017.989583333334,
"/length/completion/max": 22287,
"/length/completion/min": 3320,
"/length/completion/std": 440.913435562424,
"/length/context": 51859.25,
"/length/context/max": 127071,
"/length/context/min": 7556,
"/length/context/std": 2528.4204074337463,
"/length/forward": 51863.833333333336,
"/length/forward/max": 127072,
"/length/forward/min": 7560,
"/length/forward/std": 2528.3746782467865,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.7072892938496583,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.45500675671390867,
"/record/score/max": 1.0,
"/record/score/mean": 0.6281186783546865,
"/record/score/min": 0.0,
"/record/score/std": 0.4833069462118742,
"advantages": -0.01376936316695331,
"advantages/max": 1.1428571428571428,
"advantages/min": -2.0,
"advantages/std": 0.01817736219182349,
"entropy": 0.66009521484375,
"entropy/max": 0.94140625,
"entropy/min": 0.44921875,
"entropy/std": 0.009324537253803052,
"epoch": 0.019,
"grad_norm": 17152.0,
"learning_rate": 1.999677777390909e-06,
"loss": 84.7324875,
"out_of_date_ratio": 0.0011100193546553783,
"out_of_date_ratio/max": 0.0025269172620028257,
"out_of_date_ratio/min": 6.997410673648119e-05,
"out_of_date_ratio/std": 6.765312539943454e-05,
"rewards": 0.6979166666666666,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.04686294212198703,
"sampled_at_step": 18.135198891162872,
"sampled_at_step/max": 19.0,
"sampled_at_step/min": 17.0,
"sampled_at_step/std": 0.04848014928364492,
"scores": 0.6662650602409639,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.008183805815967112,
"step": 19,
"steps": 33.583333333333336,
"steps/max": 98,
"steps/min": 2,
"steps/std": 2.2649803520936715
},
{
"/length/completion": 11512.09375,
"/length/completion/max": 27309,
"/length/completion/min": 4980,
"/length/completion/std": 501.45766494340864,
"/length/context": 56020.770833333336,
"/length/context/max": 126836,
"/length/context/min": 11030,
"/length/context/std": 2774.8540662281084,
"/length/forward": 56025.083333333336,
"/length/forward/max": 126840,
"/length/forward/min": 11032,
"/length/forward/std": 2774.8575054000307,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.7183257918552036,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.44981534946108553,
"/record/score/max": 1.0,
"/record/score/mean": 0.632258064516129,
"/record/score/min": 0.0,
"/record/score/std": 0.4821906307368982,
"advantages": 0.07681134654818979,
"advantages/max": 1.1428571428571428,
"advantages/min": -2.0,
"advantages/std": 0.01614613564061393,
"entropy": 0.6991780598958334,
"entropy/max": 0.87109375,
"entropy/min": 0.5390625,
"entropy/std": 0.007499462124219176,
"epoch": 0.02,
"grad_norm": 17280.0,
"learning_rate": 1.999592192828189e-06,
"loss": 827.3307291666666,
"out_of_date_ratio": 0.0011428044173650658,
"out_of_date_ratio/max": 0.003580619813874364,
"out_of_date_ratio/min": 0.00019168104336131364,
"out_of_date_ratio/std": 6.266466155041168e-05,
"rewards": 0.75,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.044194173824159216,
"sampled_at_step": 19.341610689957935,
"sampled_at_step/max": 20.0,
"sampled_at_step/min": 18.123659133911133,
"sampled_at_step/std": 0.05884703632870817,
"scores": 0.7580741626794258,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.007405661196030622,
"step": 20,
"steps": 33.833333333333336,
"steps/max": 99,
"steps/min": 2,
"steps/std": 2.107704671314623
},
{
"/length/completion": 11401.34375,
"/length/completion/max": 21933,
"/length/completion/min": 4242,
"/length/completion/std": 396.7083745875666,
"/length/context": 64273.041666666664,
"/length/context/max": 124766,
"/length/context/min": 13984,
"/length/context/std": 2506.1907459968925,
"/length/forward": 64277.916666666664,
"/length/forward/max": 124768,
"/length/forward/min": 13992,
"/length/forward/std": 2506.190649740026,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.7045951859956237,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.45622451695126653,
"/record/score/max": 1.0,
"/record/score/mean": 0.6270718232044199,
"/record/score/min": 0.0,
"/record/score/std": 0.48358324179762907,
"advantages": 0.0018951597619684107,
"advantages/max": 2.0,
"advantages/min": -2.0,
"advantages/std": 0.016091829848799057,
"entropy": 0.6525065104166666,
"entropy/max": 0.84375,
"entropy/min": 0.458984375,
"entropy/std": 0.009489987290893497,
"epoch": 0.021,
"grad_norm": 18432.0,
"learning_rate": 1.999496542383185e-06,
"loss": 121.03529791666666,
"out_of_date_ratio": 0.0013320353434664867,
"out_of_date_ratio/max": 0.003433594247326255,
"out_of_date_ratio/min": 0.00010756158008007333,
"out_of_date_ratio/std": 7.833091810820446e-05,
"rewards": 0.6145833333333334,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.04967295748634071,
"sampled_at_step": 20.23511741558711,
"sampled_at_step/max": 21.0,
"sampled_at_step/min": 19.0,
"sampled_at_step/std": 0.05663429886634042,
"scores": 0.5839745290527991,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.008028674375231538,
"step": 21,
"steps": 38.260416666666664,
"steps/max": 86,
"steps/min": 7,
"steps/std": 2.0449720687775765
},
{
"/length/completion": 9401.072916666666,
"/length/completion/max": 24161,
"/length/completion/min": 1460,
"/length/completion/std": 499.54914422142616,
"/length/context": 47311.802083333336,
"/length/context/max": 122394,
"/length/context/min": 2396,
"/length/context/std": 2575.039869424853,
"/length/forward": 47316.333333333336,
"/length/forward/max": 122400,
"/length/forward/min": 2400,
"/length/forward/std": 2575.044503166312,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.6977272727272728,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.45924277416180914,
"/record/score/max": 1.0,
"/record/score/mean": 0.6279691211401425,
"/record/score/min": 0.0,
"/record/score/std": 0.4833465672109604,
"advantages": -0.012113324657122618,
"advantages/max": 2.0,
"advantages/min": -2.0,
"advantages/std": 0.019199719746732787,
"entropy": 0.6626383463541666,
"entropy/max": 0.89453125,
"entropy/min": 0.494140625,
"entropy/std": 0.007544228380313726,
"epoch": 0.022,
"grad_norm": 232448.0,
"learning_rate": 1.9993908270190957e-06,
"loss": 111.84316041666666,
"out_of_date_ratio": 0.0016641596797247378,
"out_of_date_ratio/max": 0.020300446078181267,
"out_of_date_ratio/min": 0.00035029338323511183,
"out_of_date_ratio/std": 0.00023182703826180737,
"rewards": 0.5729166666666666,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.050485472304150465,
"sampled_at_step": 21.164862950642902,
"sampled_at_step/max": 22.0,
"sampled_at_step/min": 20.999998092651367,
"sampled_at_step/std": 0.03211382577428944,
"scores": 0.49159074982480727,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.009357968706678136,
"step": 22,
"steps": 28.729166666666668,
"steps/max": 89,
"steps/min": 0,
"steps/std": 1.9640032706013673
},
{
"/length/completion": 9214.354166666666,
"/length/completion/max": 23003,
"/length/completion/min": 1919,
"/length/completion/std": 387.5235336657432,
"/length/context": 48108.802083333336,
"/length/context/max": 114013,
"/length/context/min": 3056,
"/length/context/std": 2795.0509071366123,
"/length/forward": 48113.416666666664,
"/length/forward/max": 114016,
"/length/forward/min": 3064,
"/length/forward/std": 2795.03867741772,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.7045177045177046,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.4562592558390556,
"/record/score/max": 1.0,
"/record/score/mean": 0.6330749354005168,
"/record/score/min": 0.0,
"/record/score/std": 0.4819658302910577,
"advantages": 0.023602484472047516,
"advantages/max": 1.7142857142857144,
"advantages/min": -2.0,
"advantages/std": 0.018105340055766753,
"entropy": 0.6816813151041666,
"entropy/max": 0.96875,
"entropy/min": 0.52734375,
"entropy/std": 0.009203881162625499,
"epoch": 0.023,
"grad_norm": 15232.0,
"learning_rate": 1.9992750478004735e-06,
"loss": 9.265197916666667,
"out_of_date_ratio": 0.0013355615161951089,
"out_of_date_ratio/max": 0.00573215214535594,
"out_of_date_ratio/min": 0.00025896672741509974,
"out_of_date_ratio/std": 8.832910798917201e-05,
"rewards": 0.65625,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.048475287679651785,
"sampled_at_step": 22.168105483055115,
"sampled_at_step/max": 23.0,
"sampled_at_step/min": 21.0,
"sampled_at_step/std": 0.050861245309274106,
"scores": 0.6652173913043479,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.00898273734163259,
"step": 23,
"steps": 27.75,
"steps/max": 81,
"steps/min": 1,
"steps/std": 1.9785635045827228
},
{
"/length/completion": 9490.1875,
"/length/completion/max": 29269,
"/length/completion/min": 2814,
"/length/completion/std": 472.25405473316374,
"/length/context": 48921.354166666664,
"/length/context/max": 126739,
"/length/context/min": 11459,
"/length/context/std": 2698.3990532783187,
"/length/forward": 48925.833333333336,
"/length/forward/max": 126744,
"/length/forward/min": 11464,
"/length/forward/std": 2698.425859230934,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.7132701421800948,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.45223428270585836,
"/record/score/max": 1.0,
"/record/score/mean": 0.6399560922063666,
"/record/score/min": 0.0,
"/record/score/std": 0.4800128042608059,
"advantages": -0.13765028874372895,
"advantages/max": 1.7142857142857144,
"advantages/min": -2.0,
"advantages/std": 0.01947662744072505,
"entropy": 0.6754150390625,
"entropy/max": 0.85546875,
"entropy/min": 0.48046875,
"entropy/std": 0.007579306748481353,
"epoch": 0.024,
"grad_norm": 16000.0,
"learning_rate": 1.999149205893214e-06,
"loss": -90.01123125,
"out_of_date_ratio": 0.001372865597204509,
"out_of_date_ratio/max": 0.010023866780102253,
"out_of_date_ratio/min": 0.00016106950351968408,
"out_of_date_ratio/std": 0.00013162758332460245,
"rewards": 0.625,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.04941058844013093,
"sampled_at_step": 23.256375809510548,
"sampled_at_step/max": 24.0,
"sampled_at_step/min": 21.6400089263916,
"sampled_at_step/std": 0.06531165671866788,
"scores": 0.5569913850231941,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.009042129178310435,
"step": 24,
"steps": 30.4375,
"steps/max": 99,
"steps/min": 3,
"steps/std": 2.303464541344009
},
{
"/length/completion": 9030.229166666666,
"/length/completion/max": 20017,
"/length/completion/min": 2032,
"/length/completion/std": 404.27926977346874,
"/length/context": 43733.875,
"/length/context/max": 115111,
"/length/context/min": 2969,
"/length/context/std": 2356.429606294356,
"/length/forward": 43737.833333333336,
"/length/forward/max": 115112,
"/length/forward/min": 2976,
"/length/forward/std": 2356.3995413175926,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.673055242390079,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.4690968802724136,
"/record/score/max": 1.0,
"/record/score/mean": 0.6356711321250328,
"/record/score/min": 0.0,
"/record/score/std": 0.4812414611272727,
"advantages": -0.14799107142857285,
"advantages/max": 2.0,
"advantages/min": -2.0,
"advantages/std": 0.020134242058078504,
"entropy": 0.6791585286458334,
"entropy/max": 0.9296875,
"entropy/min": 0.4921875,
"entropy/std": 0.009211186287749336,
"epoch": 0.025,
"grad_norm": 16512.0,
"learning_rate": 1.9990133025645437e-06,
"loss": -228.33545,
"out_of_date_ratio": 0.0014518716141841044,
"out_of_date_ratio/max": 0.003765060333535075,
"out_of_date_ratio/min": 0.00016504373343195766,
"out_of_date_ratio/std": 8.420615578605796e-05,
"rewards": 0.5208333333333334,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.050986719290237514,
"sampled_at_step": 24.447250723838806,
"sampled_at_step/max": 25.000001907348633,
"sampled_at_step/min": 23.619524002075195,
"sampled_at_step/std": 0.04587732273306496,
"scores": 0.4421875,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.009815837705506364,
"step": 25,
"steps": 25.666666666666668,
"steps/max": 79,
"steps/min": 1,
"steps/std": 1.8250087201719623
},
{
"/length/completion": 10101.09375,
"/length/completion/max": 20788,
"/length/completion/min": 3039,
"/length/completion/std": 403.0559760239524,
"/length/context": 48021.208333333336,
"/length/context/max": 112424,
"/length/context/min": 7596,
"/length/context/std": 2270.2870376868336,
"/length/forward": 48025.833333333336,
"/length/forward/max": 112432,
"/length/forward/min": 7600,
"/length/forward/std": 2270.3245285615944,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.6428571428571429,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.4791574237499546,
"/record/score/max": 1.0,
"/record/score/mean": 0.6338742393509128,
"/record/score/min": 0.0,
"/record/score/std": 0.48174442190669386,
"advantages": -0.05486008836524312,
"advantages/max": 2.0,
"advantages/min": -1.4285714285714286,
"advantages/std": 0.018416687139217066,
"entropy": 0.6940511067708334,
"entropy/max": 0.89453125,
"entropy/min": 0.51171875,
"entropy/std": 0.008278710396076156,
"epoch": 0.026,
"grad_norm": 19840.0,
"learning_rate": 1.998867339183008e-06,
"loss": -140.20670625,
"out_of_date_ratio": 0.001547872148269865,
"out_of_date_ratio/max": 0.004394118674099445,
"out_of_date_ratio/min": 0.0,
"out_of_date_ratio/std": 8.020284871100495e-05,
"rewards": 0.3229166666666667,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.047723322942373116,
"sampled_at_step": 25.289051393667858,
"sampled_at_step/max": 26.000001907348633,
"sampled_at_step/min": 24.999998092651367,
"sampled_at_step/std": 0.037862932441932125,
"scores": 0.3286082474226804,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.00843075316638251,
"step": 26,
"steps": 31.333333333333332,
"steps/max": 89,
"steps/min": 1,
"steps/std": 1.9080232341449717
},
{
"/length/completion": 8978.75,
"/length/completion/max": 18034,
"/length/completion/min": 3624,
"/length/completion/std": 311.56472994576995,
"/length/context": 52919.604166666664,
"/length/context/max": 103118,
"/length/context/min": 11282,
"/length/context/std": 2021.9312112484567,
"/length/forward": 52924.333333333336,
"/length/forward/max": 103120,
"/length/forward/min": 11288,
"/length/forward/std": 2021.9383608195633,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.6522222222222223,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.4762650470711957,
"/record/score/max": 1.0,
"/record/score/mean": 0.6329268292682927,
"/record/score/min": 0.0,
"/record/score/std": 0.4820066991865132,
"advantages": -0.07285397529300003,
"advantages/max": 2.0,
"advantages/min": -1.7142857142857142,
"advantages/std": 0.017689748039436834,
"entropy": 0.6405843098958334,
"entropy/max": 0.81640625,
"entropy/min": 0.478515625,
"entropy/std": 0.008094466537248381,
"epoch": 0.027,
"grad_norm": 15616.0,
"learning_rate": 1.998711317218456e-06,
"loss": -217.17899583333335,
"out_of_date_ratio": 0.0014654934244996791,
"out_of_date_ratio/max": 0.003957169596105814,
"out_of_date_ratio/min": 0.00015121730393730104,
"out_of_date_ratio/std": 8.927867169875872e-05,
"rewards": 0.4270833333333333,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.050485472304150465,
"sampled_at_step": 26.36866702636083,
"sampled_at_step/max": 27.000001907348633,
"sampled_at_step/min": 25.0,
"sampled_at_step/std": 0.050479192137442085,
"scores": 0.3801076971808679,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.00863921262874711,
"step": 27,
"steps": 31.885416666666668,
"steps/max": 71,
"steps/min": 5,
"steps/std": 1.6309233516920185
},
{
"/length/completion": 10679.104166666666,
"/length/completion/max": 26624,
"/length/completion/min": 2763,
"/length/completion/std": 485.3186727086443,
"/length/context": 54714.947916666664,
"/length/context/max": 115639,
"/length/context/min": 3676,
"/length/context/std": 2660.0730577165136,
"/length/forward": 54719.416666666664,
"/length/forward/max": 115640,
"/length/forward/min": 3680,
"/length/forward/std": 2660.049047283798,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.677765843179377,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.4673321142386053,
"/record/score/max": 1.0,
"/record/score/mean": 0.6375442739079102,
"/record/score/min": 0.0,
"/record/score/std": 0.48070944729133946,
"advantages": -0.017792985457656552,
"advantages/max": 2.0,
"advantages/min": -2.0,
"advantages/std": 0.018213078624609743,
"entropy": 0.6812744140625,
"entropy/max": 0.859375,
"entropy/min": 0.47265625,
"entropy/std": 0.007308055562462717,
"epoch": 0.028,
"grad_norm": 17920.0,
"learning_rate": 1.9985452382420274e-06,
"loss": -186.76735416666668,
"out_of_date_ratio": 0.001270884770140886,
"out_of_date_ratio/max": 0.0031341412104666233,
"out_of_date_ratio/min": 0.00020686801872216165,
"out_of_date_ratio/std": 7.350484083591433e-05,
"rewards": 0.6041666666666666,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.04991130733147589,
"sampled_at_step": 27.260556002457935,
"sampled_at_step/max": 28.000001907348633,
"sampled_at_step/min": 26.0,
"sampled_at_step/std": 0.05604123740619327,
"scores": 0.5955089820359282,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.00849230074604125,
"step": 28,
"steps": 33.791666666666664,
"steps/max": 92,
"steps/min": 1,
"steps/std": 2.219846006034937
},
{
"/length/completion": 9437.427083333334,
"/length/completion/max": 28132,
"/length/completion/min": 2891,
"/length/completion/std": 436.3822586208799,
"/length/context": 51917.46875,
"/length/context/max": 118962,
"/length/context/min": 10854,
"/length/context/std": 2628.8098323681484,
"/length/forward": 51922.166666666664,
"/length/forward/max": 118968,
"/length/forward/min": 10856,
"/length/forward/std": 2628.812668929029,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.6684901531728665,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.47075584784873803,
"/record/score/max": 1.0,
"/record/score/mean": 0.6378043178686266,
"/record/score/min": 0.0,
"/record/score/std": 0.48063496541217493,
"advantages": 0.01068174677976811,
"advantages/max": 1.7142857142857144,
"advantages/min": -2.0,
"advantages/std": 0.01657294113911273,
"entropy": 0.6856689453125,
"entropy/max": 0.87890625,
"entropy/min": 0.4765625,
"entropy/std": 0.006581478913246546,
"epoch": 0.029,
"grad_norm": 16768.0,
"learning_rate": 1.9983691039261353e-06,
"loss": 0.014404166666666668,
"out_of_date_ratio": 0.001233935588516033,
"out_of_date_ratio/max": 0.003753588069230318,
"out_of_date_ratio/min": 0.0,
"out_of_date_ratio/std": 7.756263237153342e-05,
"rewards": 0.6770833333333334,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.047723322942373116,
"sampled_at_step": 28.41019606590271,
"sampled_at_step/max": 29.000001907348633,
"sampled_at_step/min": 27.322860717773438,
"sampled_at_step/std": 0.0516098577435189,
"scores": 0.6760917373546969,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.008294601161650362,
"step": 29,
"steps": 32.15625,
"steps/max": 99,
"steps/min": 4,
"steps/std": 2.146709374423526
},
{
"/length/completion": 11369.114583333334,
"/length/completion/max": 23314,
"/length/completion/min": 3318,
"/length/completion/std": 507.260502736684,
"/length/context": 58615.427083333336,
"/length/context/max": 113119,
"/length/context/min": 16362,
"/length/context/std": 2548.0394785382373,
"/length/forward": 58619.833333333336,
"/length/forward/max": 113120,
"/length/forward/min": 16368,
"/length/forward/std": 2548.016881319358,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.6605691056910569,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.47351616899275406,
"/record/score/max": 1.0,
"/record/score/mean": 0.6413898601398601,
"/record/score/min": 0.0,
"/record/score/std": 0.47959243889956243,
"advantages": -0.00934836403629328,
"advantages/max": 2.0,
"advantages/min": -2.0,
"advantages/std": 0.016053681878628597,
"entropy": 0.6816813151041666,
"entropy/max": 0.86328125,
"entropy/min": 0.49609375,
"entropy/std": 0.007637391596894437,
"epoch": 0.03,
"grad_norm": 17408.0,
"learning_rate": 1.998182916044451e-06,
"loss": 372.35253958333334,
"out_of_date_ratio": 0.0014495693473387898,
"out_of_date_ratio/max": 0.010763758793473244,
"out_of_date_ratio/min": 0.00022534365416504443,
"out_of_date_ratio/std": 0.0001220856333759772,
"rewards": 0.5104166666666666,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.05101996066242478,
"sampled_at_step": 29.722339312235516,
"sampled_at_step/max": 30.000001907348633,
"sampled_at_step/min": 28.63516616821289,
"sampled_at_step/std": 0.04449124005690786,
"scores": 0.46989276876546604,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.008275792529581869,
"step": 30,
"steps": 36.885416666666664,
"steps/max": 91,
"steps/min": 6,
"steps/std": 2.0705791778804565
},
{
"/length/completion": 9677.302083333334,
"/length/completion/max": 20464,
"/length/completion/min": 3520,
"/length/completion/std": 360.3757061020706,
"/length/context": 47705.333333333336,
"/length/context/max": 114634,
"/length/context/min": 10643,
"/length/context/std": 2297.2822418484816,
"/length/forward": 47709.666666666664,
"/length/forward/max": 114640,
"/length/forward/min": 10648,
"/length/forward/std": 2297.297518331373,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.6553147574819401,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.47526553221154866,
"/record/score/max": 1.0,
"/record/score/mean": 0.6407540775259479,
"/record/score/min": 0.0,
"/record/score/std": 0.47977941771174426,
"advantages": 0.04816326530612251,
"advantages/max": 2.0,
"advantages/min": -2.0,
"advantages/std": 0.01873621505921223,
"entropy": 0.6687418619791666,
"entropy/max": 0.84375,
"entropy/min": 0.546875,
"entropy/std": 0.0069627937479380154,
"epoch": 0.031,
"grad_norm": 16512.0,
"learning_rate": 1.9979866764718843e-06,
"loss": 245.54707916666666,
"out_of_date_ratio": 0.0014398050516319927,
"out_of_date_ratio/max": 0.0034001213498413563,
"out_of_date_ratio/min": 0.0,
"out_of_date_ratio/std": 8.200511865324555e-05,
"rewards": 0.5625,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.05063078670631141,
"sampled_at_step": 30.396397809187572,
"sampled_at_step/max": 31.000001907348633,
"sampled_at_step/min": 29.999998092651367,
"sampled_at_step/std": 0.040759151687636004,
"scores": 0.5892857142857143,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.00929723578782738,
"step": 31,
"steps": 28.166666666666668,
"steps/max": 81,
"steps/min": 3,
"steps/std": 1.9046080512020584
},
{
"/length/completion": 11456.354166666666,
"/length/completion/max": 23502,
"/length/completion/min": 3792,
"/length/completion/std": 482.2487134353623,
"/length/context": 56354.708333333336,
"/length/context/max": 97717,
"/length/context/min": 12262,
"/length/context/std": 2432.4953052653914,
"/length/forward": 56359.333333333336,
"/length/forward/max": 97720,
"/length/forward/min": 12264,
"/length/forward/std": 2432.503741765612,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.6494845360824743,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.47713140063530446,
"/record/score/max": 1.0,
"/record/score/mean": 0.6368864243171083,
"/record/score/min": 0.0,
"/record/score/std": 0.4808971894674339,
"advantages": 0.07540518464382524,
"advantages/max": 2.0,
"advantages/min": -2.0,
"advantages/std": 0.016593084064504177,
"entropy": 0.6540120442708334,
"entropy/max": 0.82421875,
"entropy/min": 0.4921875,
"entropy/std": 0.0068052894061513365,
"epoch": 0.032,
"grad_norm": 18560.0,
"learning_rate": 1.997780387184565e-06,
"loss": 354.41247500000003,
"out_of_date_ratio": 0.0013360527542924199,
"out_of_date_ratio/max": 0.0034993954468518496,
"out_of_date_ratio/min": 0.0001190476177725941,
"out_of_date_ratio/std": 7.784003677692855e-05,
"rewards": 0.5625,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.05063078670631141,
"sampled_at_step": 31.47506093978882,
"sampled_at_step/max": 32.0,
"sampled_at_step/min": 30.20079231262207,
"sampled_at_step/std": 0.046957284692903785,
"scores": 0.5853144748721035,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.008546511861260894,
"step": 32,
"steps": 33.614583333333336,
"steps/max": 84,
"steps/min": 4,
"steps/std": 2.050542451925803
},
{
"/length/completion": 10748.010416666666,
"/length/completion/max": 28022,
"/length/completion/min": 2958,
"/length/completion/std": 467.90537643482224,
"/length/context": 52605.020833333336,
"/length/context/max": 119246,
"/length/context/min": 6469,
"/length/context/std": 2694.21027027666,
"/length/forward": 52609.5,
"/length/forward/max": 119248,
"/length/forward/min": 6472,
"/length/forward/std": 2694.183390793798,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.675701839303001,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.46811202042411,
"/record/score/max": 1.0,
"/record/score/mean": 0.6410408042578356,
"/record/score/min": 0.0,
"/record/score/std": 0.47969520691195516,
"advantages": 0.035493036471189295,
"advantages/max": 2.0,
"advantages/min": -2.0,
"advantages/std": 0.01854459908350241,
"entropy": 0.6657511393229166,
"entropy/max": 0.94921875,
"entropy/min": 0.474609375,
"entropy/std": 0.00920301048279462,
"epoch": 0.033,
"grad_norm": 18944.0,
"learning_rate": 1.997564050259824e-06,
"loss": 32.570058333333336,
"out_of_date_ratio": 0.0016721839908010832,
"out_of_date_ratio/max": 0.03893996775150299,
"out_of_date_ratio/min": 0.0,
"out_of_date_ratio/std": 0.00039718575078297327,
"rewards": 0.6041666666666666,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.04991130733147588,
"sampled_at_step": 32.382101813952126,
"sampled_at_step/max": 33.0,
"sampled_at_step/min": 31.0,
"sampled_at_step/std": 0.05291823048647063,
"scores": 0.5575480925986306,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.008968448426516548,
"step": 33,
"steps": 30.947916666666668,
"steps/max": 83,
"steps/min": 0,
"steps/std": 2.1503476882781585
},
{
"/length/completion": 9198.875,
"/length/completion/max": 21159,
"/length/completion/min": 1864,
"/length/completion/std": 383.6682451283641,
"/length/context": 47677.3125,
"/length/context/max": 127958,
"/length/context/min": 2721,
"/length/context/std": 2562.6787387829513,
"/length/forward": 47681.916666666664,
"/length/forward/max": 127960,
"/length/forward/min": 2728,
"/length/forward/std": 2562.679914401653,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.6801579466929911,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.4664151736847391,
"/record/score/max": 1.0,
"/record/score/mean": 0.6436405618626131,
"/record/score/min": 0.0,
"/record/score/std": 0.47892315561872006,
"advantages": -0.0682769007642093,
"advantages/max": 2.0,
"advantages/min": -2.0,
"advantages/std": 0.01756966769979579,
"entropy": 0.6900634765625,
"entropy/max": 0.8828125,
"entropy/min": 0.43359375,
"entropy/std": 0.008183432081512754,
"epoch": 0.034,
"grad_norm": 15040.0,
"learning_rate": 1.997337667876172e-06,
"loss": -356.5561125,
"out_of_date_ratio": 0.001257536003322457,
"out_of_date_ratio/max": 0.005396825261414051,
"out_of_date_ratio/min": 0.0,
"out_of_date_ratio/std": 9.732487820129926e-05,
"rewards": 0.6875,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.04730703678277331,
"sampled_at_step": 33.30234805742899,
"sampled_at_step/max": 34.0,
"sampled_at_step/min": 33.0,
"sampled_at_step/std": 0.03829985984963102,
"scores": 0.5903943771964076,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.009717381918579165,
"step": 34,
"steps": 25.677083333333332,
"steps/max": 92,
"steps/min": 1,
"steps/std": 1.7743528011009773
},
{
"/length/completion": 9827.28125,
"/length/completion/max": 22478,
"/length/completion/min": 1481,
"/length/completion/std": 426.35430911083745,
"/length/context": 52099.104166666664,
"/length/context/max": 121980,
"/length/context/min": 4481,
"/length/context/std": 2602.116649626552,
"/length/forward": 52103.75,
"/length/forward/max": 121984,
"/length/forward/min": 4488,
"/length/forward/std": 2602.108545255828,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.6769078295341923,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.46765758825181203,
"/record/score/max": 1.0,
"/record/score/mean": 0.6439947536068953,
"/record/score/min": 0.0,
"/record/score/std": 0.478816782218094,
"advantages": -0.11816995990144273,
"advantages/max": 2.0,
"advantages/min": -2.0,
"advantages/std": 0.01787321571590539,
"entropy": 0.6913859049479166,
"entropy/max": 0.92578125,
"entropy/min": 0.462890625,
"entropy/std": 0.009919730611026047,
"epoch": 0.035,
"grad_norm": 16064.0,
"learning_rate": 1.9971012423132772e-06,
"loss": -428.37520416666666,
"out_of_date_ratio": 0.0012809614311966773,
"out_of_date_ratio/max": 0.004535594489425421,
"out_of_date_ratio/min": 0.00010249052138533443,
"out_of_date_ratio/std": 8.340348975318018e-05,
"rewards": 0.625,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.04941058844013093,
"sampled_at_step": 34.23181116580963,
"sampled_at_step/max": 35.0,
"sampled_at_step/min": 33.0,
"sampled_at_step/std": 0.05921732305282789,
"scores": 0.5302671626648631,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.009177981231588788,
"step": 35,
"steps": 29.802083333333332,
"steps/max": 77,
"steps/min": 0,
"steps/std": 1.7894620515550024
},
{
"/length/completion": 9422.875,
"/length/completion/max": 23769,
"/length/completion/min": 2900,
"/length/completion/std": 459.94317547239297,
"/length/context": 49770.354166666664,
"/length/context/max": 126792,
"/length/context/min": 6995,
"/length/context/std": 2917.8340353953995,
"/length/forward": 49774.916666666664,
"/length/forward/max": 126800,
"/length/forward/min": 7000,
"/length/forward/std": 2917.8420979566768,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.678646934460888,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.4669960094130649,
"/record/score/max": 1.0,
"/record/score/mean": 0.6463280964559737,
"/record/score/min": 0.0,
"/record/score/std": 0.4781088664599017,
"advantages": -0.01185733257446363,
"advantages/max": 2.0,
"advantages/min": -2.0,
"advantages/std": 0.01525715744572812,
"entropy": 0.7012125651041666,
"entropy/max": 1.0078125,
"entropy/min": 0.5234375,
"entropy/std": 0.008509193891243078,
"epoch": 0.036,
"grad_norm": 13440.0,
"learning_rate": 1.9968547759519425e-06,
"loss": -72.75817916666666,
"out_of_date_ratio": 0.0014415098589779518,
"out_of_date_ratio/max": 0.004487856291234493,
"out_of_date_ratio/min": 0.00010764262697193772,
"out_of_date_ratio/std": 9.46737329751352e-05,
"rewards": 0.5833333333333334,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.05031728036871333,
"sampled_at_step": 35.245263735453285,
"sampled_at_step/max": 36.000003814697266,
"sampled_at_step/min": 34.0,
"sampled_at_step/std": 0.051642521621436184,
"scores": 0.5703851261620186,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.009019786682503346,
"step": 36,
"steps": 30.375,
"steps/max": 96,
"steps/min": 2,
"steps/std": 2.3679193611046903
},
{
"/length/completion": 9100.052083333334,
"/length/completion/max": 24602,
"/length/completion/min": 2334,
"/length/completion/std": 462.026403863143,
"/length/context": 48322.8125,
"/length/context/max": 104864,
"/length/context/min": 3297,
"/length/context/std": 2566.345017526433,
"/length/forward": 48327.333333333336,
"/length/forward/max": 104872,
"/length/forward/min": 3304,
"/length/forward/std": 2566.355532188125,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.6878363832077503,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.4633761896595802,
"/record/score/max": 1.0,
"/record/score/mean": 0.6477414747366542,
"/record/score/min": 0.0,
"/record/score/std": 0.47767400666420845,
"advantages": 0.008175892166610221,
"advantages/max": 2.0,
"advantages/min": -2.0,
"advantages/std": 0.017455894236115342,
"entropy": 0.7001139322916666,
"entropy/max": 0.9375,
"entropy/min": 0.52734375,
"entropy/std": 0.00808525356253917,
"epoch": 0.037,
"grad_norm": 30336.0,
"learning_rate": 1.9965982712740806e-06,
"loss": 11.875345833333334,
"out_of_date_ratio": 0.0015731908315501641,
"out_of_date_ratio/max": 0.015670742839574814,
"out_of_date_ratio/min": 0.0,
"out_of_date_ratio/std": 0.00018833875065268633,
"rewards": 0.6354166666666666,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.04912381533653095,
"sampled_at_step": 36.266119639078774,
"sampled_at_step/max": 37.0,
"sampled_at_step/min": 35.0,
"sampled_at_step/std": 0.062039810694395646,
"scores": 0.654292343387471,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.009352466428458224,
"step": 37,
"steps": 25.9375,
"steps/max": 72,
"steps/min": 1,
"steps/std": 1.6376253994075451
},
{
"/length/completion": 10135.427083333334,
"/length/completion/max": 30915,
"/length/completion/min": 3327,
"/length/completion/std": 468.35185469585446,
"/length/context": 53150.572916666664,
"/length/context/max": 122558,
"/length/context/min": 15804,
"/length/context/std": 2433.6580449971475,
"/length/forward": 53155.25,
"/length/forward/max": 122560,
"/length/forward/min": 15808,
"/length/forward/std": 2433.6683033657837,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.6797385620915033,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.4665769489562024,
"/record/score/max": 1.0,
"/record/score/mean": 0.6460006985679357,
"/record/score/min": 0.0,
"/record/score/std": 0.47820894598248015,
"advantages": -0.1376781704019445,
"advantages/max": 2.0,
"advantages/min": -2.0,
"advantages/std": 0.01651928713975243,
"entropy": 0.6912027994791666,
"entropy/max": 0.83203125,
"entropy/min": 0.53125,
"entropy/std": 0.00708367145695894,
"epoch": 0.038,
"grad_norm": 15680.0,
"learning_rate": 1.996331730862691e-06,
"loss": -134.66802916666668,
"out_of_date_ratio": 0.0013084049907471733,
"out_of_date_ratio/max": 0.003387369913980365,
"out_of_date_ratio/min": 0.0002214839478256181,
"out_of_date_ratio/std": 7.499232801296063e-05,
"rewards": 0.53125,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.05093126879064569,
"sampled_at_step": 37.08955097198486,
"sampled_at_step/max": 38.000003814697266,
"sampled_at_step/min": 36.19251251220703,
"sampled_at_step/std": 0.041960562771199195,
"scores": 0.4007058068655759,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.008777370925423156,
"step": 38,
"steps": 31.46875,
"steps/max": 98,
"steps/min": 4,
"steps/std": 1.941804868393939
},
{
"/length/completion": 9828.083333333334,
"/length/completion/max": 23061,
"/length/completion/min": 1558,
"/length/completion/std": 470.9235785716673,
"/length/context": 51597.614583333336,
"/length/context/max": 109570,
"/length/context/min": 4229,
"/length/context/std": 2691.8637708957626,
"/length/forward": 51602.166666666664,
"/length/forward/max": 109576,
"/length/forward/min": 4232,
"/length/forward/std": 2691.8928374513284,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.6431095406360424,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.47908210087514325,
"/record/score/max": 1.0,
"/record/score/mean": 0.6410693001872978,
"/record/score/min": 0.0,
"/record/score/std": 0.47968682757051556,
"advantages": -0.13596059113300493,
"advantages/max": 2.0,
"advantages/min": -2.0,
"advantages/std": 0.018862637046954452,
"entropy": 0.6741943359375,
"entropy/max": 0.91015625,
"entropy/min": 0.4609375,
"entropy/std": 0.00861796389963598,
"epoch": 0.039,
"grad_norm": 16384.0,
"learning_rate": 1.996055157401834e-06,
"loss": -246.81274374999998,
"out_of_date_ratio": 0.0014630949255357943,
"out_of_date_ratio/max": 0.004159239586442709,
"out_of_date_ratio/min": 0.0001836041483329609,
"out_of_date_ratio/std": 9.1284843466261e-05,
"rewards": 0.5625,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.05063078670631141,
"sampled_at_step": 38.16589645544688,
"sampled_at_step/max": 39.0,
"sampled_at_step/min": 37.30094528198242,
"sampled_at_step/std": 0.036066542168659795,
"scores": 0.503448275862069,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.009284546103208795,
"step": 39,
"steps": 29.208333333333332,
"steps/max": 89,
"steps/min": 1,
"steps/std": 1.9946715404793218
},
{
"/length/completion": 10498.177083333334,
"/length/completion/max": 20891,
"/length/completion/min": 2779,
"/length/completion/std": 468.1356710498424,
"/length/context": 56444.9375,
"/length/context/max": 124660,
"/length/context/min": 9841,
"/length/context/std": 2764.127233626301,
"/length/forward": 56449.5,
"/length/forward/max": 124664,
"/length/forward/min": 9848,
"/length/forward/std": 2764.099841701785,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.6257744733581165,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.4839222890624985,
"/record/score/max": 1.0,
"/record/score/mean": 0.6392202991093934,
"/record/score/min": 0.0,
"/record/score/std": 0.4802267259492034,
"advantages": -0.047900650502660895,
"advantages/max": 2.0,
"advantages/min": -2.0,
"advantages/std": 0.015531604552016694,
"entropy": 0.6410319010416666,
"entropy/max": 0.796875,
"entropy/min": 0.478515625,
"entropy/std": 0.00667534252894688,
"epoch": 0.04,
"grad_norm": 16512.0,
"learning_rate": 1.9957685536765995e-06,
"loss": -490.44510833333334,
"out_of_date_ratio": 0.0016274743311441853,
"out_of_date_ratio/max": 0.009337756782770157,
"out_of_date_ratio/min": 0.00029226948390714824,
"out_of_date_ratio/std": 0.00011572550944232628,
"rewards": 0.4166666666666667,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.05031728036871333,
"sampled_at_step": 39.08427309989929,
"sampled_at_step/max": 39.60692596435547,
"sampled_at_step/min": 38.14115524291992,
"sampled_at_step/std": 0.02244150888420229,
"scores": 0.4544648137196925,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.008561989957449728,
"step": 40,
"steps": 34.229166666666664,
"steps/max": 98,
"steps/min": 3,
"steps/std": 2.1958411985285036
},
{
"/length/completion": 8468.854166666666,
"/length/completion/max": 16865,
"/length/completion/min": 3451,
"/length/completion/std": 290.9413056124056,
"/length/context": 45148.833333333336,
"/length/context/max": 94495,
"/length/context/min": 17906,
"/length/context/std": 1885.4234569782568,
"/length/forward": 45153.0,
"/length/forward/max": 94496,
"/length/forward/min": 17912,
"/length/forward/std": 1885.4056712386564,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.6025316455696202,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.4893743573756043,
"/record/score/max": 1.0,
"/record/score/mean": 0.6380216891225764,
"/record/score/min": 0.0,
"/record/score/std": 0.48057258903494593,
"advantages": -0.1638573108584894,
"advantages/max": 1.7142857142857144,
"advantages/min": -2.0,
"advantages/std": 0.020575259221953697,
"entropy": 0.6600748697916666,
"entropy/max": 0.8203125,
"entropy/min": 0.515625,
"entropy/std": 0.005811703397960358,
"epoch": 0.041,
"grad_norm": 14784.0,
"learning_rate": 1.9954719225730845e-06,
"loss": -400.2380375,
"out_of_date_ratio": 0.0013619511713235017,
"out_of_date_ratio/max": 0.01208761241286993,
"out_of_date_ratio/min": 0.00017220595327671617,
"out_of_date_ratio/std": 0.00013614273060853817,
"rewards": 0.625,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.04941058844013093,
"sampled_at_step": 40.14164388179779,
"sampled_at_step/max": 41.0,
"sampled_at_step/min": 39.0,
"sampled_at_step/std": 0.059141493917955895,
"scores": 0.5049000392003136,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.00989905914015969,
"step": 41,
"steps": 25.572916666666668,
"steps/max": 82,
"steps/min": 6,
"steps/std": 1.6592115026998668
},
{
"/length/completion": 9004.208333333334,
"/length/completion/max": 22447,
"/length/completion/min": 3307,
"/length/completion/std": 401.7670371646414,
"/length/context": 48468.375,
"/length/context/max": 118308,
"/length/context/min": 4344,
"/length/context/std": 2498.548039093242,
"/length/forward": 48472.75,
"/length/forward/max": 118312,
"/length/forward/min": 4352,
"/length/forward/std": 2498.565013379694,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.5725190839694656,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.49471303041281367,
"/record/score/max": 1.0,
"/record/score/mean": 0.6363929146537842,
"/record/score/min": 0.0,
"/record/score/std": 0.4810373923430952,
"advantages": -0.150188230632059,
"advantages/max": 2.0,
"advantages/min": -2.0,
"advantages/std": 0.01863469859831098,
"entropy": 0.6724446614583334,
"entropy/max": 0.88671875,
"entropy/min": 0.5078125,
"entropy/std": 0.007728973447664365,
"epoch": 0.042,
"grad_norm": 15872.0,
"learning_rate": 1.995165267078361e-06,
"loss": -295.17856875,
"out_of_date_ratio": 0.0015124180954444455,
"out_of_date_ratio/max": 0.012281018309295177,
"out_of_date_ratio/min": 0.0002090737980324775,
"out_of_date_ratio/std": 0.0001382285172342395,
"rewards": 0.5833333333333334,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.05031728036871333,
"sampled_at_step": 41.327045361200966,
"sampled_at_step/max": 42.0,
"sampled_at_step/min": 40.344154357910156,
"sampled_at_step/std": 0.04198771784926251,
"scores": 0.478502080443828,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.009301876674003574,
"step": 42,
"steps": 29.041666666666668,
"steps/max": 87,
"steps/min": 1,
"steps/std": 1.926881558505675
},
{
"/length/completion": 10251.0625,
"/length/completion/max": 23711,
"/length/completion/min": 3195,
"/length/completion/std": 406.6798169120462,
"/length/context": 57392.572916666664,
"/length/context/max": 128658,
"/length/context/min": 6731,
"/length/context/std": 2686.198047917464,
"/length/forward": 57397.416666666664,
"/length/forward/max": 128664,
"/length/forward/min": 6736,
"/length/forward/std": 2686.221671415675,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.5697399527186762,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.49511245085818306,
"/record/score/max": 1.0,
"/record/score/mean": 0.6366197183098592,
"/record/score/min": 0.0,
"/record/score/std": 0.48097302686214627,
"advantages": -0.040922190201730727,
"advantages/max": 2.0,
"advantages/min": -1.7142857142857142,
"advantages/std": 0.018565406605664785,
"entropy": 0.7135823567708334,
"entropy/max": 0.9140625,
"entropy/min": 0.53515625,
"entropy/std": 0.008849231390132668,
"epoch": 0.043,
"grad_norm": 19584.0,
"learning_rate": 1.994848590280447e-06,
"loss": -46.936462500000005,
"out_of_date_ratio": 0.0016592781415359543,
"out_of_date_ratio/max": 0.005419677589088678,
"out_of_date_ratio/min": 0.00015295197954401374,
"out_of_date_ratio/std": 9.289293381818487e-05,
"rewards": 0.4375,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.05063078670631141,
"sampled_at_step": 42.43688189983368,
"sampled_at_step/max": 43.000003814697266,
"sampled_at_step/min": 41.0,
"sampled_at_step/std": 0.04539542797800991,
"scores": 0.39020172910662826,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.00828081250988518,
"step": 43,
"steps": 35.145833333333336,
"steps/max": 97,
"steps/min": 1,
"steps/std": 2.2647557799388003
},
{
"/length/completion": 9909.958333333334,
"/length/completion/max": 25309,
"/length/completion/min": 2894,
"/length/completion/std": 413.7526306239452,
"/length/context": 50159.177083333336,
"/length/context/max": 119338,
"/length/context/min": 9507,
"/length/context/std": 2976.311486937006,
"/length/forward": 50163.583333333336,
"/length/forward/max": 119344,
"/length/forward/min": 9512,
"/length/forward/std": 2976.3120073100904,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.5692137320044297,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.49518628747373306,
"/record/score/max": 1.0,
"/record/score/mean": 0.6355764848853106,
"/record/score/min": 0.0,
"/record/score/std": 0.4812681339400569,
"advantages": -0.030305856023870745,
"advantages/max": 2.0,
"advantages/min": -2.0,
"advantages/std": 0.016289078510691973,
"entropy": 0.6901448567708334,
"entropy/max": 0.9140625,
"entropy/min": 0.48046875,
"entropy/std": 0.008722974198219498,
"epoch": 0.044,
"grad_norm": 15232.0,
"learning_rate": 1.994521895368273e-06,
"loss": -118.76766041666667,
"out_of_date_ratio": 0.0013302226734595024,
"out_of_date_ratio/max": 0.005487493705004454,
"out_of_date_ratio/min": 0.0,
"out_of_date_ratio/std": 8.94447647063914e-05,
"rewards": 0.59375,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.05012598061177124,
"sampled_at_step": 43.42779644330343,
"sampled_at_step/max": 44.000003814697266,
"sampled_at_step/min": 42.6025276184082,
"sampled_at_step/std": 0.046298369511212654,
"scores": 0.48204960835509136,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.009027044213490274,
"step": 44,
"steps": 30.916666666666668,
"steps/max": 91,
"steps/min": 2,
"steps/std": 2.3740251410569284
},
{
"/length/completion": 9256.635416666666,
"/length/completion/max": 22972,
"/length/completion/min": 3043,
"/length/completion/std": 451.1958094288279,
"/length/context": 46203.208333333336,
"/length/context/max": 112819,
"/length/context/min": 12488,
"/length/context/std": 2378.974874873011,
"/length/forward": 46207.833333333336,
"/length/forward/max": 112824,
"/length/forward/min": 12496,
"/length/forward/std": 2378.9563187198955,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.6008537886872999,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.4897228944080695,
"/record/score/max": 1.0,
"/record/score/mean": 0.6373756865073474,
"/record/score/min": 0.0,
"/record/score/std": 0.4807576528321059,
"advantages": -0.0014897579143391263,
"advantages/max": 2.0,
"advantages/min": -2.0,
"advantages/std": 0.018824250703849507,
"entropy": 0.6787516276041666,
"entropy/max": 0.8515625,
"entropy/min": 0.53125,
"entropy/std": 0.007416265674992091,
"epoch": 0.045,
"grad_norm": 16512.0,
"learning_rate": 1.9941851856316543e-06,
"loss": -221.58431041666665,
"out_of_date_ratio": 0.001474499657585208,
"out_of_date_ratio/max": 0.003614853834733367,
"out_of_date_ratio/min": 0.000163612567121163,
"out_of_date_ratio/std": 8.391956347657486e-05,
"rewards": 0.5208333333333334,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.05098671929023751,
"sampled_at_step": 44.238667726516724,
"sampled_at_step/max": 45.0,
"sampled_at_step/min": 43.0,
"sampled_at_step/std": 0.04742509657311229,
"scores": 0.539292364990689,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.009619504357267591,
"step": 45,
"steps": 26.96875,
"steps/max": 75,
"steps/min": 4,
"steps/std": 1.8648133659897332
},
{
"/length/completion": 10171.645833333334,
"/length/completion/max": 18897,
"/length/completion/min": 1419,
"/length/completion/std": 372.87753867676923,
"/length/context": 46847.260416666664,
"/length/context/max": 118579,
"/length/context/min": 12678,
"/length/context/std": 2387.7613746383,
"/length/forward": 46851.5,
"/length/forward/max": 118584,
"/length/forward/min": 12680,
"/length/forward/std": 2387.756631967953,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.602711157455683,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.4893367124323641,
"/record/score/max": 1.0,
"/record/score/mean": 0.6342740762292697,
"/record/score/min": 0.0,
"/record/score/std": 0.48163313055974066,
"advantages": 0.002143392991105286,
"advantages/max": 2.0,
"advantages/min": -2.0,
"advantages/std": 0.020591038312953034,
"entropy": 0.71630859375,
"entropy/max": 0.9765625,
"entropy/min": 0.5703125,
"entropy/std": 0.008231126334650838,
"epoch": 0.046,
"grad_norm": 17664.0,
"learning_rate": 1.993838464461254e-06,
"loss": -98.02665208333333,
"out_of_date_ratio": 0.00150063132878131,
"out_of_date_ratio/max": 0.014094432815909386,
"out_of_date_ratio/min": 0.00014930944598745555,
"out_of_date_ratio/std": 0.00016047740447908465,
"rewards": 0.5520833333333334,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.05075342008066314,
"sampled_at_step": 45.25676174958547,
"sampled_at_step/max": 46.000003814697266,
"sampled_at_step/min": 44.0,
"sampled_at_step/std": 0.05679606363792777,
"scores": 0.5375093773443361,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.009656381511994647,
"step": 46,
"steps": 26.770833333333332,
"steps/max": 99,
"steps/min": 1,
"steps/std": 2.0040366797041256
},
{
"/length/completion": 11373.427083333334,
"/length/completion/max": 27384,
"/length/completion/min": 3119,
"/length/completion/std": 528.5598256687088,
"/length/context": 53539.96875,
"/length/context/max": 128783,
"/length/context/min": 4073,
"/length/context/std": 3093.0401978765053,
"/length/forward": 53544.416666666664,
"/length/forward/max": 128784,
"/length/forward/min": 4080,
"/length/forward/std": 3093.016143871078,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.5946775844421699,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.4909543308742636,
"/record/score/max": 1.0,
"/record/score/mean": 0.6324810741322668,
"/record/score/min": 0.0,
"/record/score/std": 0.4821294068989783,
"advantages": -0.06990488503093467,
"advantages/max": 2.0,
"advantages/min": -1.7142857142857142,
"advantages/std": 0.017079460019094654,
"entropy": 0.7022705078125,
"entropy/max": 0.9921875,
"entropy/min": 0.51953125,
"entropy/std": 0.008319350894964734,
"epoch": 0.047,
"grad_norm": 18432.0,
"learning_rate": 1.9934817353485502e-06,
"loss": -573.1879875,
"out_of_date_ratio": 0.0016289489382567506,
"out_of_date_ratio/max": 0.00349434744566679,
"out_of_date_ratio/min": 0.00033151003299281,
"out_of_date_ratio/std": 7.278455981428102e-05,
"rewards": 0.40625,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.05012598061177124,
"sampled_at_step": 46.28057046731313,
"sampled_at_step/max": 47.000003814697266,
"sampled_at_step/min": 45.32149124145508,
"sampled_at_step/std": 0.041052582981975456,
"scores": 0.3610213316095669,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.008634743818926866,
"step": 47,
"steps": 31.229166666666668,
"steps/max": 89,
"steps/min": 1,
"steps/std": 2.263054296677534
},
{
"/length/completion": 12594.364583333334,
"/length/completion/max": 27909,
"/length/completion/min": 4662,
"/length/completion/std": 465.0296403803164,
"/length/context": 62327.614583333336,
"/length/context/max": 128086,
"/length/context/min": 11295,
"/length/context/std": 2814.866780361495,
"/length/forward": 62332.166666666664,
"/length/forward/max": 128088,
"/length/forward/min": 11296,
"/length/forward/std": 2814.867033516383,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.6015779092702169,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.4895732104070766,
"/record/score/max": 1.0,
"/record/score/mean": 0.631578947368421,
"/record/score/min": 0.0,
"/record/score/std": 0.4823763889427196,
"advantages": 0.07370510176285243,
"advantages/max": 1.7142857142857144,
"advantages/min": -2.0,
"advantages/std": 0.017063208956500202,
"entropy": 0.6767171223958334,
"entropy/max": 0.875,
"entropy/min": 0.462890625,
"entropy/std": 0.008830032891094176,
"epoch": 0.048,
"grad_norm": 20352.0,
"learning_rate": 1.993115001885801e-06,
"loss": 524.9152833333334,
"out_of_date_ratio": 0.0015428930751113512,
"out_of_date_ratio/max": 0.010222065262496471,
"out_of_date_ratio/min": 0.0,
"out_of_date_ratio/std": 0.00012055963656145637,
"rewards": 0.4895833333333333,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.05101996066242478,
"sampled_at_step": 47.23998463153839,
"sampled_at_step/max": 48.0,
"sampled_at_step/min": 46.0,
"sampled_at_step/std": 0.05458848832887572,
"scores": 0.4832470716426042,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.008247719765965627,
"step": 48,
"steps": 37.239583333333336,
"steps/max": 93,
"steps/min": 3,
"steps/std": 2.3124880251035043
},
{
"/length/completion": 9643.739583333334,
"/length/completion/max": 25244,
"/length/completion/min": 2278,
"/length/completion/std": 581.9111320393498,
"/length/context": 45923.697916666664,
"/length/context/max": 115489,
"/length/context/min": 10414,
"/length/context/std": 2683.867219109207,
"/length/forward": 45928.166666666664,
"/length/forward/max": 115496,
"/length/forward/min": 10416,
"/length/forward/std": 2683.864334052588,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.6012861736334405,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.48963364981452884,
"/record/score/max": 1.0,
"/record/score/mean": 0.6317237584261934,
"/record/score/min": 0.0,
"/record/score/std": 0.482336865132739,
"advantages": -0.010672928117828497,
"advantages/max": 2.0,
"advantages/min": -1.7142857142857142,
"advantages/std": 0.01988726664862735,
"entropy": 0.688232421875,
"entropy/max": 0.8671875,
"entropy/min": 0.458984375,
"entropy/std": 0.008185300181255748,
"epoch": 0.049,
"grad_norm": 16512.0,
"learning_rate": 1.9927382677660083e-06,
"loss": 149.77001875,
"out_of_date_ratio": 0.001472773932164273,
"out_of_date_ratio/max": 0.003977461252361536,
"out_of_date_ratio/min": 0.0,
"out_of_date_ratio/std": 9.025339034445814e-05,
"rewards": 0.5104166666666666,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.05101996066242478,
"sampled_at_step": 48.107348243395485,
"sampled_at_step/max": 49.0,
"sampled_at_step/min": 47.46670913696289,
"sampled_at_step/std": 0.03662542993500882,
"scores": 0.5666791184161375,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.009577435331342692,
"step": 49,
"steps": 26.885416666666668,
"steps/max": 90,
"steps/min": 4,
"steps/std": 2.237565456816576
},
{
"/length/completion": 9139.177083333334,
"/length/completion/max": 17365,
"/length/completion/min": 2460,
"/length/completion/std": 388.9433452061105,
"/length/context": 42846.822916666664,
"/length/context/max": 128316,
"/length/context/min": 9876,
"/length/context/std": 2074.138494759108,
"/length/forward": 42851.333333333336,
"/length/forward/max": 128320,
"/length/forward/min": 9880,
"/length/forward/std": 2074.1248980214746,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.6177437020810514,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.48593870047594334,
"/record/score/max": 1.0,
"/record/score/mean": 0.6335445597950654,
"/record/score/min": 0.0,
"/record/score/std": 0.48183591662426184,
"advantages": -0.1057565883496267,
"advantages/max": 1.7142857142857144,
"advantages/min": -2.0,
"advantages/std": 0.021120893835498165,
"entropy": 0.67254638671875,
"entropy/max": 0.8671875,
"entropy/min": 0.482421875,
"entropy/std": 0.00796678719028089,
"epoch": 0.05,
"grad_norm": 15488.0,
"learning_rate": 1.992351536782881e-06,
"loss": -225.88562083333332,
"out_of_date_ratio": 0.0011801550923943676,
"out_of_date_ratio/max": 0.0036585365887731314,
"out_of_date_ratio/min": 0.0,
"out_of_date_ratio/std": 7.948650594591365e-05,
"rewards": 0.65625,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.048475287679651785,
"sampled_at_step": 49.28301433722178,
"sampled_at_step/max": 50.000003814697266,
"sampled_at_step/min": 47.999996185302734,
"sampled_at_step/std": 0.07208558500765914,
"scores": 0.5746298519407763,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.009889958472670886,
"step": 50,
"steps": 25.03125,
"steps/max": 96,
"steps/min": 4,
"steps/std": 1.8162947061031707
},
{
"/length/completion": 9401.333333333334,
"/length/completion/max": 21765,
"/length/completion/min": 3213,
"/length/completion/std": 379.3273187781392,
"/length/context": 52463.145833333336,
"/length/context/max": 125646,
"/length/context/min": 7832,
"/length/context/std": 2379.3423480716424,
"/length/forward": 52467.5,
"/length/forward/max": 125648,
"/length/forward/min": 7840,
"/length/forward/std": 2379.32203174122,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.5954022988505747,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.4908140191294725,
"/record/score/max": 1.0,
"/record/score/mean": 0.6323256430654999,
"/record/score/min": 0.0,
"/record/score/std": 0.4821720898053945,
"advantages": 0.021790943139257633,
"advantages/max": 2.0,
"advantages/min": -2.0,
"advantages/std": 0.017091281548428414,
"entropy": 0.6608072916666666,
"entropy/max": 0.84375,
"entropy/min": 0.515625,
"entropy/std": 0.007203795776861115,
"epoch": 0.051,
"grad_norm": 14400.0,
"learning_rate": 1.991954812830795e-06,
"loss": 254.4839875,
"out_of_date_ratio": 0.0011251259766898631,
"out_of_date_ratio/max": 0.003933531232178211,
"out_of_date_ratio/min": 0.0,
"out_of_date_ratio/std": 7.5655512414866e-05,
"rewards": 0.75,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.044194173824159216,
"sampled_at_step": 50.112096428871155,
"sampled_at_step/max": 51.000003814697266,
"sampled_at_step/min": 48.0,
"sampled_at_step/std": 0.054218936651792234,
"scores": 0.7402110997616616,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.00809163192240093,
"step": 51,
"steps": 29.59375,
"steps/max": 89,
"steps/min": 2,
"steps/std": 1.7793043787575034
},
{
"/length/completion": 12228.010416666666,
"/length/completion/max": 22997,
"/length/completion/min": 3270,
"/length/completion/std": 500.05629641725665,
"/length/context": 51091.479166666664,
"/length/context/max": 115132,
"/length/context/min": 6674,
"/length/context/std": 2259.578293306198,
"/length/forward": 51096.0,
"/length/forward/max": 115136,
"/length/forward/min": 6680,
"/length/forward/std": 2259.560576749382,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.6038374717832957,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.4890989464859371,
"/record/score/max": 1.0,
"/record/score/mean": 0.6317769130998703,
"/record/score/min": 0.0,
"/record/score/std": 0.4823223457127706,
"advantages": -0.08217029951387875,
"advantages/max": 1.7142857142857144,
"advantages/min": -2.0,
"advantages/std": 0.020094432251289217,
"entropy": 0.7004801432291666,
"entropy/max": 0.89453125,
"entropy/min": 0.5546875,
"entropy/std": 0.007632295410241296,
"epoch": 0.052,
"grad_norm": 21760.0,
"learning_rate": 1.991548099904757e-06,
"loss": 52.64125625,
"out_of_date_ratio": 0.001651031008426192,
"out_of_date_ratio/max": 0.03822629898786545,
"out_of_date_ratio/min": 0.00011681560863507912,
"out_of_date_ratio/std": 0.0003895121769529702,
"rewards": 0.4583333333333333,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.05085353651346116,
"sampled_at_step": 51.44491422176361,
"sampled_at_step/max": 52.000003814697266,
"sampled_at_step/min": 50.419376373291016,
"sampled_at_step/std": 0.04813901003363954,
"scores": 0.4376143432125869,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.009489494447882852,
"step": 52,
"steps": 27.46875,
"steps/max": 77,
"steps/min": 2,
"steps/std": 1.643389081957216
},
{
"/length/completion": 8772.604166666666,
"/length/completion/max": 21070,
"/length/completion/min": 1752,
"/length/completion/std": 421.9746077057603,
"/length/context": 43895.510416666664,
"/length/context/max": 119424,
"/length/context/min": 3176,
"/length/context/std": 2951.941704876966,
"/length/forward": 43899.833333333336,
"/length/forward/max": 119432,
"/length/forward/min": 3184,
"/length/forward/std": 2951.9432746807433,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.6261491317671093,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.4838247581040109,
"/record/score/max": 1.0,
"/record/score/mean": 0.6313137674770122,
"/record/score/min": 0.0,
"/record/score/std": 0.48244864438714447,
"advantages": 0.014319014319014235,
"advantages/max": 2.0,
"advantages/min": -2.0,
"advantages/std": 0.02023509036850466,
"entropy": 0.6823933919270834,
"entropy/max": 0.90234375,
"entropy/min": 0.455078125,
"entropy/std": 0.009506664852205228,
"epoch": 0.053,
"grad_norm": 15296.0,
"learning_rate": 1.991131402100361e-06,
"loss": 316.06665,
"out_of_date_ratio": 0.0015899745600715203,
"out_of_date_ratio/max": 0.005576208233833313,
"out_of_date_ratio/min": 0.0003133813734166324,
"out_of_date_ratio/std": 0.00010189227208870029,
"rewards": 0.5104166666666666,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.05101996066242478,
"sampled_at_step": 52.58958820501963,
"sampled_at_step/max": 53.000003814697266,
"sampled_at_step/min": 51.999996185302734,
"sampled_at_step/std": 0.0446586193431428,
"scores": 0.4829059829059829,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.009849445426304237,
"step": 53,
"steps": 25.8125,
"steps/max": 92,
"steps/min": 1,
"steps/std": 2.317763927235868
},
{
"/length/completion": 10806.510416666666,
"/length/completion/max": 26634,
"/length/completion/min": 900,
"/length/completion/std": 557.6911313642544,
"/length/context": 48856.885416666664,
"/length/context/max": 118755,
"/length/context/min": 3450,
"/length/context/std": 3002.7807003203775,
"/length/forward": 48861.666666666664,
"/length/forward/max": 118760,
"/length/forward/min": 3456,
"/length/forward/std": 3002.77547056392,
"/record/score/last_5_max": 1.0,
"/record/score/last_5_mean": 0.6335952848722987,
"/record/score/last_5_min": 0.0,
"/record/score/last_5_std": 0.4818218549006356,
"/record/score/max": 1.0,
"/record/score/mean": 0.6321173280749323,
"/record/score/min": 0.0,
"/record/score/std": 0.48222921066889,
"advantages": -0.001383604289172752,
"advantages/max": 2.0,
"advantages/min": -2.0,
"advantages/std": 0.0180340178972355,
"entropy": 0.6672566731770834,
"entropy/max": 0.91015625,
"entropy/min": 0.45703125,
"entropy/std": 0.00915473630294931,
"epoch": 0.054,
"grad_norm": 15808.0,
"learning_rate": 1.9907047236137496e-06,
"loss": -10.589589583333334,
"out_of_date_ratio": 0.0016308887469070517,
"out_of_date_ratio/max": 0.027313625440001488,
"out_of_date_ratio/min": 0.0001468213158659637,
"out_of_date_ratio/std": 0.00028130080240651613,
"rewards": 0.53125,
"rewards/max": 1.0,
"rewards/min": 0.0,
"rewards/std": 0.05093126879064569,
"sampled_at_step": 53.38630406061808,
"sampled_at_step/max": 54.000003814697266,
"sampled_at_step/min": 52.30179977416992,
"sampled_at_step/std": 0.05020286577148773,
"scores": 0.45693531649948116,
"scores/max": 1.0,
"scores/min": 0.0,
"scores/std": 0.009264651713448353,
"step": 54,
"steps": 29.114583333333332,
"steps/max": 95,
"steps/min": 0,
"steps/std": 2.370375147567457
}
],
"logging_steps": 1.0,
"max_steps": 1000,
"num_input_tokens_seen": 2062430400,
"num_train_epochs": 9223372036854775807,
"save_steps": 3,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 4.977636301701382e+19,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}